Hi,
Currently a HVM domain has vtlb and vhpt individually.
This patch unifies them. A vtlb entry is recorded in
vhpt collision chain area.
- improve flexibility. currently vtlb size is fixed but some
applications like ie32el consume much vtlb.
- utilize vhpt collision chain area. it looks sparse.
- reduce TLB miss for access to a vtlb entry. since vhpt is
mapped on a TR.
- speedup ptc.e emulation slightly.
On the other hand, there would be a slight overhead in
searching a TLB entry.
In my testing, any performance degradation can't be seen.
Thanks,
Kouya
Signed-off-by: Kouya Shimura <kouya@xxxxxxxxxxxxxx>
diff -r daf39fc8038a xen/arch/ia64/vmx/vmmu.c
--- a/xen/arch/ia64/vmx/vmmu.c Wed Feb 27 13:08:59 2008 -0700
+++ b/xen/arch/ia64/vmx/vmmu.c Thu Feb 28 15:50:36 2008 +0900
@@ -24,20 +24,7 @@
#include <xen/sched-if.h>
#include <asm/vhpt.h>
-static int default_vtlb_sz = DEFAULT_VTLB_SZ;
static int default_vhpt_sz = DEFAULT_VHPT_SZ;
-
-static void __init parse_vtlb_size(char *s)
-{
- int sz = parse_size_and_unit(s, NULL);
-
- if (sz > 0) {
- default_vtlb_sz = fls(sz - 1);
- /* minimum 16KB (for tag uniqueness) */
- if (default_vtlb_sz < 14)
- default_vtlb_sz = 14;
- }
-}
static void __init parse_vhpt_size(char *s)
{
@@ -48,7 +35,6 @@ static void __init parse_vhpt_size(char
}
}
-custom_param("vti_vtlb_size", parse_vtlb_size);
custom_param("vti_vhpt_size", parse_vhpt_size);
@@ -82,7 +68,6 @@ int init_domain_tlb(struct vcpu *v)
if (rc)
return rc;
- rc = thash_alloc(&(v->arch.vtlb), default_vtlb_sz, "vtlb");
if (rc) {
free_domain_vhpt(v);
return rc;
@@ -94,9 +79,6 @@ int init_domain_tlb(struct vcpu *v)
void free_domain_tlb(struct vcpu *v)
{
- if (v->arch.vtlb.hash)
- thash_free(&(v->arch.vtlb));
-
free_domain_vhpt(v);
}
@@ -164,8 +146,6 @@ fetch_code(VCPU *vcpu, u64 gip, IA64_BUN
}
else {
tlb = vtlb_lookup(vcpu, gip, ISIDE_TLB);
-// if( tlb == NULL )
-// tlb = vtlb_lookup(vcpu, gip, DSIDE_TLB );
if (tlb)
gpip = (tlb->ppn >>(tlb->ps-12)<<tlb->ps) | ( gip &
(PSIZE(tlb->ps)-1) );
}
diff -r daf39fc8038a xen/arch/ia64/vmx/vmx_fault.c
--- a/xen/arch/ia64/vmx/vmx_fault.c Wed Feb 27 13:08:59 2008 -0700
+++ b/xen/arch/ia64/vmx/vmx_fault.c Thu Feb 28 14:17:10 2008 +0900
@@ -392,7 +392,8 @@ try_again:
return IA64_FAULT;
}
}
- thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
+ thash_vhpt_insert(v, data->page_flags, data->itir & ~ITIR_VTLB,
+ vadr, type);
return IA64_NO_FAULT;
}
diff -r daf39fc8038a xen/arch/ia64/vmx/vmx_ivt.S
--- a/xen/arch/ia64/vmx/vmx_ivt.S Wed Feb 27 13:08:59 2008 -0700
+++ b/xen/arch/ia64/vmx/vmx_ivt.S Thu Feb 28 16:19:06 2008 +0900
@@ -58,6 +58,7 @@
#include <asm/thread_info.h>
#include <asm/unistd.h>
#include <asm/vhpt.h>
+#include <asm/vmmu.h>
#include <asm/virt_event.h>
#include <asm/vmx_phy_mode.h>
#include <xen/errno.h>
@@ -169,13 +170,15 @@ vmx_itlb_loop:
adds r16 = VLE_TITAG_OFFSET, r17
adds r19 = VLE_CCHAIN_OFFSET, r17
;;
- ld8 r24 = [r16] // Read tag
+ ld8 r24 = [r16],VLE_ITIR_OFFSET-VLE_TITAG_OFFSET // Read tag
ld8 r23 = [r19] // Read chain
;;
+ ld8 r19 = [r16],VLE_TITAG_OFFSET-VLE_ITIR_OFFSET // Read itir
lfetch [r23]
- cmp.eq p6,p7 = r20, r24 // does tag match ?
+ cmp.eq p0,p7 = r20, r24 // does tag match ?
;;
(p7)mov r17 = r23; // No: entry = chain
+ tbit.nz p6,p0 = r19, ITIR_VTLB_BIT // vtlb?
(p7)br.sptk vmx_itlb_loop // again
;;
// Swap the first entry with the entry found in the collision chain
@@ -183,6 +186,7 @@ vmx_itlb_loop:
// In comments 1 stands for the first entry and 2 for the found entry.
ld8 r29 = [r28] // Read tag of 1
dep r22 = -1,r24,63,1 // set ti=1 of 2 (to disable it during the swap)
+(p6)br.sptk vmx_itlb_loop // again
;;
ld8 r25 = [r17] // Read value of 2
ld8 r27 = [r18] // Read value of 1
@@ -190,13 +194,12 @@ vmx_itlb_loop:
st8 [r28] = r22, VLE_ITIR_OFFSET - VLE_TITAG_OFFSET // Write tag of 1
mf
;;
- ld8 r29 = [r16] // read itir of 2
ld8 r22 = [r28] // read itir of 1
st8 [r18] = r25 // Write value of 1
st8 [r17] = r27 // Write value of 2
;;
st8 [r16] = r22 // Write itir of 2
- st8 [r28] = r29, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET // write itir of 1
+ st8 [r28] = r19, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET // write itir of 1
;;
st8.rel [r28] = r24 // Write tag of 1 (with ti=0)
// Insert the translation entry
@@ -252,17 +255,20 @@ vmx_dtlb_loop:
adds r16 = VLE_TITAG_OFFSET, r17
adds r19 = VLE_CCHAIN_OFFSET, r17
;;
- ld8 r24 = [r16]
- ld8 r23 = [r19]
- ;;
+ ld8 r24 = [r16],VLE_ITIR_OFFSET-VLE_TITAG_OFFSET // Read tag
+ ld8 r23 = [r19] // Read chain
+ ;;
+ ld8 r19 = [r16],VLE_TITAG_OFFSET-VLE_ITIR_OFFSET // Read itir
lfetch [r23]
- cmp.eq p6,p7 = r20, r24
+ cmp.eq p0,p7 = r20, r24
;;
(p7)mov r17 = r23;
+ tbit.nz p6,p0 = r19, ITIR_VTLB_BIT // vtlb?
(p7)br.sptk vmx_dtlb_loop
;;
ld8 r29 = [r28]
dep r22 = -1,r24,63,1 //set ti=1
+(p6)br.sptk vmx_dtlb_loop
;;
ld8 r25 = [r17]
ld8 r27 = [r18]
@@ -270,13 +276,12 @@ vmx_dtlb_loop:
st8 [r28] = r22, VLE_ITIR_OFFSET - VLE_TITAG_OFFSET
mf
;;
- ld8 r29 = [r16]
ld8 r22 = [r28]
st8 [r18] = r25
st8 [r17] = r27
;;
st8 [r16] = r22
- st8 [r28] = r29, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET
+ st8 [r28] = r19, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET
;;
st8.rel [r28] = r24
itc.d r25
diff -r daf39fc8038a xen/arch/ia64/vmx/vmx_virt.c
--- a/xen/arch/ia64/vmx/vmx_virt.c Wed Feb 27 13:08:59 2008 -0700
+++ b/xen/arch/ia64/vmx/vmx_virt.c Thu Feb 28 11:57:11 2008 +0900
@@ -1418,10 +1418,6 @@ vmx_emulate(VCPU *vcpu, REGS *regs)
cause = VMX(vcpu,cause);
opcode = VMX(vcpu,opcode);
-#ifdef VTLB_DEBUG
- check_vtlb_sanity(vmx_vcpu_get_vtlb(vcpu));
- dump_vtlb(vmx_vcpu_get_vtlb(vcpu));
-#endif
#if 0
if ( (cause == 0xff && opcode == 0x1e000000000) || cause == 0 ) {
printk ("VMAL decode error: cause - %lx; op - %lx\n",
diff -r daf39fc8038a xen/arch/ia64/vmx/vtlb.c
--- a/xen/arch/ia64/vmx/vtlb.c Wed Feb 27 13:08:59 2008 -0700
+++ b/xen/arch/ia64/vmx/vtlb.c Thu Feb 28 15:20:01 2008 +0900
@@ -178,7 +178,7 @@ void thash_vhpt_insert(VCPU *v, u64 pte,
mrr.rrval = ia64_get_rr(va);
if (itir_ps(itir) >= mrr.ps && VMX_MMU_MODE(v) != VMX_MMU_PHY_D) {
- vmx_vhpt_insert(vcpu_get_vhpt(v), phy_pte, itir, va);
+ vmx_vhpt_insert(&v->arch.vhpt, phy_pte, itir, va);
} else {
if (VMX_MMU_MODE(v) == VMX_MMU_PHY_D)
itir = (itir & ~RR_PS_MASK) | (mrr.rrval & RR_PS_MASK);
@@ -309,7 +309,7 @@ static void vtlb_purge(VCPU *v, u64 va,
thash_data_t *cur;
u64 start, curadr, size, psbits, tag, rr_ps, num;
ia64_rr vrr;
- thash_cb_t *hcb = &v->arch.vtlb;
+ thash_cb_t *hcb = &v->arch.vhpt;
vcpu_get_rr(v, va, &vrr.rrval);
psbits = VMX(v, psbits[(va >> 61)]);
@@ -323,10 +323,9 @@ static void vtlb_purge(VCPU *v, u64 va,
vrr.ps = rr_ps;
while (num) {
cur = vtlb_thash(hcb->pta, curadr, vrr.rrval, &tag);
- while (cur) {
- if (cur->etag == tag && cur->ps == rr_ps)
+ for (cur = cur->next; cur; cur = cur->next) {
+ if (THASH_MATCH_VTLB(cur, tag, rr_ps))
cur->etag = 1UL << 63;
- cur = cur->next;
}
curadr += size;
num--;
@@ -353,7 +352,7 @@ static void vhpt_purge(VCPU *v, u64 va,
cur = (thash_data_t *)ia64_thash(start);
tag = ia64_ttag(start);
while (cur) {
- if (cur->etag == tag)
+ if (cur->etag == tag) // && cur->itir & ITIR_VTLB == 0
cur->etag = 1UL << 63;
cur = cur->next;
}
@@ -407,11 +406,9 @@ static void vtlb_insert(VCPU *v, u64 pte
static void vtlb_insert(VCPU *v, u64 pte, u64 itir, u64 va)
{
thash_data_t *hash_table, *cch, *tail;
- /* int flag; */
ia64_rr vrr;
- /* u64 gppn, ppns, ppne; */
u64 tag, len;
- thash_cb_t *hcb = &v->arch.vtlb;
+ thash_cb_t *hcb = &v->arch.vhpt;
vcpu_quick_region_set(PSCBX(v, tc_regions), va);
@@ -420,18 +417,16 @@ static void vtlb_insert(VCPU *v, u64 pte
VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
hash_table = vtlb_thash(hcb->pta, va, vrr.rrval, &tag);
len = 0;
- cch = hash_table;
- do {
+ for (cch = hash_table->next; cch; cch = cch->next) {
if (INVALID_TLB(cch)) {
cch->page_flags = pte;
- cch->itir = itir;
+ cch->itir = itir | ITIR_VTLB;
cch->etag = tag;
return;
}
++len;
tail = cch;
- cch = cch->next;
- } while(cch);
+ }
if (len >= MAX_CCN_DEPTH) {
thash_recycle_cch(hcb, hash_table, tail);
cch = cch_alloc(hcb);
@@ -440,7 +435,7 @@ static void vtlb_insert(VCPU *v, u64 pte
cch = __alloc_chain(hcb);
}
cch->page_flags = pte;
- cch->itir = itir;
+ cch->itir = itir | ITIR_VTLB;
cch->etag = tag;
cch->next = hash_table->next;
wmb();
@@ -587,24 +582,10 @@ void thash_purge_all(VCPU *v)
{
int num;
thash_data_t *head;
- thash_cb_t *vtlb,*vhpt;
- vtlb = &v->arch.vtlb;
- vhpt = &v->arch.vhpt;
+ thash_cb_t *vhpt = &v->arch.vhpt;
for (num = 0; num < 8; num++)
VMX(v, psbits[num]) = 0;
-
- head = vtlb->hash;
- num = (vtlb->hash_sz/sizeof(thash_data_t));
- do{
- head->page_flags = 0;
- head->etag = 1UL<<63;
- head->itir = 0;
- head->next = 0;
- head++;
- num--;
- } while(num);
- cch_mem_init(vtlb);
head = vhpt->hash;
num = (vhpt->hash_sz/sizeof(thash_data_t));
@@ -633,7 +614,7 @@ thash_data_t *vtlb_lookup(VCPU *v, u64 v
thash_data_t *cch;
u64 psbits, ps, tag;
ia64_rr vrr;
- thash_cb_t *hcb = &v->arch.vtlb;
+ thash_cb_t *hcb = &v->arch.vhpt;
cch = __vtr_lookup(v, va, is_data);
if (cch)
@@ -648,11 +629,10 @@ thash_data_t *vtlb_lookup(VCPU *v, u64 v
psbits &= ~(1UL << ps);
vrr.ps = ps;
cch = vtlb_thash(hcb->pta, va, vrr.rrval, &tag);
- do {
- if (cch->etag == tag && cch->ps == ps)
+ for (cch = cch->next; cch != NULL; cch = cch->next) {
+ if (THASH_MATCH_VTLB(cch, tag, ps))
return cch;
- cch = cch->next;
- } while(cch);
+ }
}
return NULL;
}
diff -r daf39fc8038a xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h Wed Feb 27 13:08:59 2008 -0700
+++ b/xen/include/asm-ia64/domain.h Thu Feb 28 11:57:11 2008 +0900
@@ -273,7 +273,6 @@ struct arch_vcpu {
struct thread_struct _thread; // this must be last
- thash_cb_t vtlb;
thash_cb_t vhpt;
char irq_new_pending;
char irq_new_condition; // vpsr.i/vtpr change, check for pending VHPI
diff -r daf39fc8038a xen/include/asm-ia64/vmmu.h
--- a/xen/include/asm-ia64/vmmu.h Wed Feb 27 13:08:59 2008 -0700
+++ b/xen/include/asm-ia64/vmmu.h Thu Feb 28 17:08:09 2008 +0900
@@ -24,9 +24,7 @@
#define XEN_TLBthash_H
#define MAX_CCN_DEPTH (15) // collision chain depth
-#define DEFAULT_VTLB_SZ (14) // 16K hash + 16K c-chain for VTLB
#define DEFAULT_VHPT_SZ (23) // 8M hash + 8M c-chain for VHPT
-#define VTLB(v,_x) (v->arch.vtlb._x)
#define VHPT(v,_x) (v->arch.vhpt._x)
#ifndef __ASSEMBLY__
@@ -51,6 +49,10 @@ enum {
#define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT)
#define ITIR_RV_MASK (((1UL<<32)-1)<<32 | 0x3)
+#define ITIR_PS_MASK (((1<<6)-1)<<2)
+#define ITIR_VTLB_BIT 1 // NB. reserved field
+#define ITIR_VTLB (1UL<<ITIR_VTLB_BIT)
+
#define PAGE_FLAGS_RV_MASK (0x2 | (0x3UL<<50)|(((1UL<<11)-1)<<53))
#define PAGE_FLAGS_AR_PL_MASK ((0x7UL<<9)|(0x3UL<<7))
@@ -103,21 +105,12 @@ typedef struct thash_data {
};
} thash_data_t;
-#define INVALIDATE_VHPT_HEADER(hdata) \
-{ ((hdata)->page_flags)=0; \
- ((hdata)->itir)=PAGE_SHIFT<<2; \
- ((hdata)->etag)=1UL<<63; \
- ((hdata)->next)=0;}
-
-#define INVALIDATE_TLB_HEADER(hash) INVALIDATE_VHPT_HEADER(hash)
-
-#define INVALIDATE_HASH_HEADER(hcb,hash) INVALIDATE_VHPT_HEADER(hash)
-
#define INVALID_VHPT(hdata) ((hdata)->ti)
#define INVALID_TLB(hdata) ((hdata)->ti)
-#define INVALID_TR(hdata) (!(hdata)->p)
-#define INVALID_ENTRY(hcb, hdata) INVALID_VHPT(hdata)
-
+
+#define THASH_MATCH_VTLB(hdata, tag, ps) \
+ ((hdata)->etag == (tag) && \
+ ((hdata)->itir & (ITIR_PS_MASK|ITIR_VTLB) == ((ps)<<2)|ITIR_VTLB))
typedef struct thash_cb {
/* THASH base information */
diff -r daf39fc8038a xen/include/asm-ia64/vmx_vcpu.h
--- a/xen/include/asm-ia64/vmx_vcpu.h Wed Feb 27 13:08:59 2008 -0700
+++ b/xen/include/asm-ia64/vmx_vcpu.h Thu Feb 28 11:57:11 2008 +0900
@@ -380,15 +380,6 @@ static inline unsigned long vrrtomrr(VCP
#endif
}
-static inline thash_cb_t *vmx_vcpu_get_vtlb(VCPU * vcpu)
-{
- return &vcpu->arch.vtlb;
-}
-
-static inline thash_cb_t *vcpu_get_vhpt(VCPU * vcpu)
-{
- return &vcpu->arch.vhpt;
-}
/**************************************************************************
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|