Hi,
I observed usage ratio of vtlb by using the attached
modification (test_vtlb.diff).
It's very low. vtlb includes at most 21 entries.
The attached patch shrinks the default vtlb size from 512KB to 16KB
to optimize memory. Also speedup ptc_e emulation.
To improve the hash function, frequency of collision never changed
and there is no performance degradation.
The following is a result:
xm dmesg | sort | uniq -c
=============================================================
w/o patch (vtlb:512KB)
Linux:
1 (XEN) thash_purge_all entries:6 collisions:0
4 (XEN) thash_purge_all entries:7 collisions:0
10 (XEN) thash_purge_all entries:8 collisions:0
9 (XEN) thash_purge_all entries:9 collisions:0
4 (XEN) thash_purge_all entries:10 collisions:0
1 (XEN) thash_purge_all entries:11 collisions:0
1 (XEN) thash_purge_all entries:17 collisions:0
1 (XEN) thash_purge_all entries:20 collisions:0
1 (XEN) thash_purge_all entries:20 collisions:3
Windows:
75 (XEN) thash_purge_all entries:1 collisions:0
228 (XEN) thash_purge_all entries:2 collisions:0
19 (XEN) thash_purge_all entries:3 collisions:0
18 (XEN) thash_purge_all entries:4 collisions:0
5 (XEN) thash_purge_all entries:5 collisions:0
4 (XEN) thash_purge_all entries:6 collisions:0
2 (XEN) thash_purge_all entries:7 collisions:0
9 (XEN) thash_purge_all entries:8 collisions:0
8 (XEN) thash_purge_all entries:9 collisions:0
4 (XEN) thash_purge_all entries:10 collisions:0
2 (XEN) thash_purge_all entries:13 collisions:0
1 (XEN) thash_purge_all entries:14 collisions:0
1 (XEN) thash_purge_all entries:16 collisions:0
1 (XEN) thash_purge_all entries:21 collisions:0
=============================================================
w patch (vtlb:16KB)
Linux:
4 (XEN) thash_purge_all entries:7 collisions:0
6 (XEN) thash_purge_all entries:8 collisions:0
3 (XEN) thash_purge_all entries:9 collisions:0
10 (XEN) thash_purge_all entries:10 collisions:0
1 (XEN) thash_purge_all entries:11 collisions:0
3 (XEN) thash_purge_all entries:11 collisions:1
2 (XEN) thash_purge_all entries:12 collisions:0
1 (XEN) thash_purge_all entries:17 collisions:1
1 (XEN) thash_purge_all entries:19 collisions:1
1 (XEN) thash_purge_all entries:21 collisions:0
Windows:
90 (XEN) thash_purge_all entries:1 collisions:0
230 (XEN) thash_purge_all entries:2 collisions:0
19 (XEN) thash_purge_all entries:3 collisions:0
9 (XEN) thash_purge_all entries:4 collisions:0
6 (XEN) thash_purge_all entries:6 collisions:0
2 (XEN) thash_purge_all entries:7 collisions:0
10 (XEN) thash_purge_all entries:8 collisions:0
7 (XEN) thash_purge_all entries:9 collisions:0
4 (XEN) thash_purge_all entries:10 collisions:0
2 (XEN) thash_purge_all entries:13 collisions:0
1 (XEN) thash_purge_all entries:15 collisions:0
1 (XEN) thash_purge_all entries:16 collisions:0
1 (XEN) thash_purge_all entries:21 collisions:0
Thanks,
Kouya
Singed-off-by: Kouya Shimura <kouya@xxxxxxxxxxxxxx>
diff -r cd51fa91956b xen/arch/ia64/vmx/vtlb.c
--- a/xen/arch/ia64/vmx/vtlb.c Sun Aug 12 14:50:02 2007 -0600
+++ b/xen/arch/ia64/vmx/vtlb.c Wed Aug 15 14:58:16 2007 +0900
@@ -381,6 +381,9 @@ thash_data_t *__alloc_chain(thash_cb_t *
return cch;
}
+#if 1
+u64 vtlb_ents, vtlb_colls;
+#endif
/*
* Insert an entry into hash TLB or VHPT.
* NOTES:
@@ -398,6 +401,9 @@ void vtlb_insert(VCPU *v, u64 pte, u64 i
/* u64 gppn, ppns, ppne; */
u64 tag, len;
thash_cb_t *hcb = &v->arch.vtlb;
+#if 1
+ ++vtlb_ents;
+#endif
vcpu_get_rr(v, va, &vrr.rrval);
vrr.ps = itir_ps(itir);
VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
@@ -428,6 +434,9 @@ void vtlb_insert(VCPU *v, u64 pte, u64 i
wmb();
hash_table->next = cch;
hash_table->len += 1;
+#if 1
+ ++vtlb_colls;
+#endif
return;
}
@@ -581,6 +590,10 @@ void thash_purge_all(VCPU *v)
int num;
thash_data_t *head;
thash_cb_t *vtlb,*vhpt;
+#if 1
+ printk("%s entries:%ld collisions:%ld\n", __func__, vtlb_ents, vtlb_colls);
+ vtlb_ents = vtlb_colls = 0;
+#endif
vtlb =&v->arch.vtlb;
vhpt =&v->arch.vhpt;
diff -r cd51fa91956b xen/arch/ia64/vmx/vmmu.c
--- a/xen/arch/ia64/vmx/vmmu.c Sun Aug 12 14:50:02 2007 -0600
+++ b/xen/arch/ia64/vmx/vmmu.c Wed Aug 15 14:38:37 2007 +0900
@@ -32,9 +32,9 @@ static void __init parse_vtlb_size(char
if (sz > 0) {
default_vtlb_sz = fls(sz - 1);
- /* minimum 256KB (since calculated tag might be broken) */
- if (default_vtlb_sz < 18)
- default_vtlb_sz = 18;
+ /* minimum 16KB (for tag uniqueness) */
+ if (default_vtlb_sz < 14)
+ default_vtlb_sz = 14;
}
}
@@ -240,40 +240,8 @@ void machine_tlb_insert(struct vcpu *v,
*/
void machine_tlb_purge(u64 va, u64 ps)
{
-// u64 psr;
-// psr = ia64_clear_ic();
ia64_ptcl(va, ps << 2);
-// ia64_set_psr(psr);
-// ia64_srlz_i();
-// return;
-}
-/*
-u64 machine_thash(u64 va)
-{
- return ia64_thash(va);
-}
-
-u64 machine_ttag(u64 va)
-{
- return ia64_ttag(va);
-}
-*/
-thash_data_t * vsa_thash(PTA vpta, u64 va, u64 vrr, u64 *tag)
-{
- u64 index,pfn,rid,pfn_bits;
- pfn_bits = vpta.size-5-8;
- pfn = REGION_OFFSET(va)>>_REGION_PAGE_SIZE(vrr);
- rid = _REGION_ID(vrr);
- index = ((rid&0xff)<<pfn_bits)|(pfn&((1UL<<pfn_bits)-1));
- *tag = ((rid>>8)&0xffff) | ((pfn >>pfn_bits)<<16);
- return (thash_data_t *)((vpta.base<<PTA_BASE_SHIFT)+(index<<5));
-// return ia64_call_vsa(PAL_VPS_THASH,va,vrr,vpta,0,0,0,0);
-}
-
-//u64 vsa_ttag(u64 va, u64 vrr)
-//{
-// return ia64_call_vsa(PAL_VPS_TTAG,va,vrr,0,0,0,0,0);
-//}
+}
int vhpt_enabled(VCPU *vcpu, uint64_t vadr, vhpt_ref_t ref)
{
diff -r cd51fa91956b xen/arch/ia64/vmx/vtlb.c
--- a/xen/arch/ia64/vmx/vtlb.c Sun Aug 12 14:50:02 2007 -0600
+++ b/xen/arch/ia64/vmx/vtlb.c Wed Aug 15 14:38:37 2007 +0900
@@ -286,6 +286,16 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte)
return ret;
}
+static thash_data_t * vtlb_thash(PTA vpta, u64 va, u64 vrr, u64 *tag)
+{
+ u64 index,pfn,rid;
+ pfn = REGION_OFFSET(va)>>_REGION_PAGE_SIZE(vrr);
+ rid = _REGION_ID(vrr);
+ index = (pfn^rid)&((1UL<<(vpta.size-5))-1);
+ *tag = pfn^(rid<<39);
+ return (thash_data_t *)((vpta.base<<PTA_BASE_SHIFT)+(index<<5));
+}
+
/*
* purge software guest tlb
*/
@@ -308,7 +318,7 @@ static void vtlb_purge(VCPU *v, u64 va,
size = PSIZE(rr_ps);
vrr.ps = rr_ps;
while (num) {
- cur = vsa_thash(hcb->pta, curadr, vrr.rrval, &tag);
+ cur = vtlb_thash(hcb->pta, curadr, vrr.rrval, &tag);
while (cur) {
if (cur->etag == tag && cur->ps == rr_ps)
cur->etag = 1UL << 63;
@@ -401,7 +411,7 @@ void vtlb_insert(VCPU *v, u64 pte, u64 i
vcpu_get_rr(v, va, &vrr.rrval);
vrr.ps = itir_ps(itir);
VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
- hash_table = vsa_thash(hcb->pta, va, vrr.rrval, &tag);
+ hash_table = vtlb_thash(hcb->pta, va, vrr.rrval, &tag);
cch = hash_table;
while (cch) {
if (INVALID_TLB(cch)) {
@@ -639,7 +649,7 @@ thash_data_t *vtlb_lookup(VCPU *v, u64 v
ps = __ffs(psbits);
psbits &= ~(1UL << ps);
vrr.ps = ps;
- cch = vsa_thash(hcb->pta, va, vrr.rrval, &tag);
+ cch = vtlb_thash(hcb->pta, va, vrr.rrval, &tag);
do {
if (cch->etag == tag && cch->ps == ps)
return cch;
diff -r cd51fa91956b xen/include/asm-ia64/vmmu.h
--- a/xen/include/asm-ia64/vmmu.h Sun Aug 12 14:50:02 2007 -0600
+++ b/xen/include/asm-ia64/vmmu.h Wed Aug 15 14:38:37 2007 +0900
@@ -24,7 +24,7 @@
#define XEN_TLBthash_H
#define MAX_CCN_DEPTH (15) // collision chain depth
-#define DEFAULT_VTLB_SZ (19) // 512K hash + 512K c-chain for VTLB
+#define DEFAULT_VTLB_SZ (14) // 16K hash + 16K c-chain for VTLB
#define DEFAULT_VHPT_SZ (23) // 8M hash + 8M c-chain for VHPT
#define VTLB(v,_x) (v->arch.vtlb._x)
#define VHPT(v,_x) (v->arch.vhpt._x)
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|