This patch implement a very simple non complete reverse map for OOS
pages writable mappings to avoid shadow brute-force search on resyncs.
Signed-off-by: Gianluca Guida <gianluca.guida@xxxxxxxxxxxxx>
diff -r c8aa06014ac0 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Fri Jun 20 15:10:08 2008 +0100
+++ b/xen/arch/x86/mm/shadow/common.c Fri Jun 20 15:10:53 2008 +0100
@@ -580,6 +580,153 @@ static inline void _sh_resync_l1(struct
#endif
}
+#define _FIXUP_IDX(_b, _i) ((_b) * SHADOW_OOS_FT_HASH + (_i))
+
+void oos_fixup_add(struct vcpu *v, mfn_t gmfn,
+ mfn_t smfn, unsigned long off)
+{
+ int idx, i, free = 0, free_slot = 0;
+ struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups;
+
+ idx = mfn_x(gmfn) % SHADOW_OOS_FT_HASH;
+ for ( i = 0; i < SHADOW_OOS_FT_ENTRIES; i++ )
+ {
+ if ( !mfn_valid(fixups[_FIXUP_IDX(idx, i)].gmfn)
+ || !mfn_is_out_of_sync(fixups[_FIXUP_IDX(idx, i)].gmfn) )
+ {
+ free = 1;
+ free_slot = _FIXUP_IDX(idx, i);
+ }
+ else if ( (mfn_x(fixups[_FIXUP_IDX(idx, i)].gmfn) == mfn_x(gmfn))
+ && (mfn_x(fixups[_FIXUP_IDX(idx, i)].smfn) == mfn_x(smfn))
+ && (fixups[_FIXUP_IDX(idx, i)].off == off) )
+ {
+ perfc_incr(shadow_oos_fixup_no_add);
+ return;
+ }
+ }
+
+ if ( free )
+ {
+ if ( !v->arch.paging.shadow.oos_fixup_used )
+ v->arch.paging.shadow.oos_fixup_used = 1;
+ fixups[free_slot].gmfn = gmfn;
+ fixups[free_slot].smfn = smfn;
+ fixups[free_slot].off = off;
+ perfc_incr(shadow_oos_fixup_add_ok);
+ return;
+ }
+
+
+ perfc_incr(shadow_oos_fixup_add_fail);
+}
+
+void oos_fixup_remove(struct vcpu *v, mfn_t gmfn)
+{
+ int idx, i;
+ struct domain *d = v->domain;
+
+ perfc_incr(shadow_oos_fixup_remove);
+
+ idx = mfn_x(gmfn) % SHADOW_OOS_FT_HASH;
+ for_each_vcpu(d, v)
+ {
+ struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups;
+ for ( i = 0; i < SHADOW_OOS_FT_ENTRIES; i++ )
+ if ( mfn_x(fixups[_FIXUP_IDX(idx, i)].gmfn) == mfn_x(gmfn) )
+ fixups[_FIXUP_IDX(idx, i)].gmfn = _mfn(INVALID_MFN);
+ }
+}
+
+int oos_fixup_flush(struct vcpu *v)
+{
+ int i, rc = 0;
+ struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups;
+
+ perfc_incr(shadow_oos_fixup_flush);
+
+ if ( !v->arch.paging.shadow.oos_fixup_used )
+ return 0;
+
+ for ( i = 0; i < SHADOW_OOS_FT_HASH * SHADOW_OOS_FT_ENTRIES; i++ )
+ {
+ if ( mfn_valid(fixups[i].gmfn) )
+ {
+ if ( mfn_is_out_of_sync(fixups[i].gmfn) )
+ rc |= sh_remove_write_access_from_sl1p(v, fixups[i].gmfn,
+ fixups[i].smfn,
+ fixups[i].off);
+ fixups[i].gmfn = _mfn(INVALID_MFN);
+ }
+ }
+
+ v->arch.paging.shadow.oos_fixup_used = 0;
+
+ return rc;
+}
+
+int oos_fixup_flush_gmfn(struct vcpu *v, mfn_t gmfn)
+{
+ int idx, i, rc = 0;
+ struct domain *d = v->domain;
+
+ perfc_incr(shadow_oos_fixup_flush_gmfn);
+
+ idx = mfn_x(gmfn) % SHADOW_OOS_FT_HASH;
+ for_each_vcpu(d, v)
+ {
+ struct oos_fixup *fixups = v->arch.paging.shadow.oos_fixups;
+
+ for ( i = 0; i < SHADOW_OOS_FT_ENTRIES; i++ )
+ {
+ if ( mfn_x(fixups[_FIXUP_IDX(idx, i)].gmfn) != mfn_x(gmfn) )
+ continue;
+
+ rc |= sh_remove_write_access_from_sl1p(v,
+
fixups[_FIXUP_IDX(idx,i)].gmfn,
+
fixups[_FIXUP_IDX(idx,i)].smfn,
+
fixups[_FIXUP_IDX(idx,i)].off);
+
+ fixups[_FIXUP_IDX(idx,i)].gmfn = _mfn(INVALID_MFN);
+ }
+ }
+
+ return rc;
+}
+
+static int oos_remove_write_access(struct vcpu *v, mfn_t gmfn, unsigned long
va)
+{
+ int ftlb = 0;
+
+ ftlb |= oos_fixup_flush_gmfn(v, gmfn);
+
+ switch ( sh_remove_write_access(v, gmfn, 0, va) )
+ {
+ default:
+ case 0:
+ break;
+
+ case 1:
+ ftlb |= 1;
+ break;
+
+ case -1:
+ /* An unfindable writeable typecount has appeared, probably via a
+ * grant table entry: can't shoot the mapping, so try to unshadow
+ * the page. If that doesn't work either, the guest is granting
+ * his pagetables and must be killed after all.
+ * This will flush the tlb, so we can return with no worries. */
+ sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
+ return 1;
+ }
+
+ if ( ftlb )
+ flush_tlb_mask(v->domain->domain_dirty_cpumask);
+
+ return 0;
+}
+
+
/* Pull all the entries on an out-of-sync page back into sync. */
static void _sh_resync(struct vcpu *v, mfn_t gmfn, unsigned long va)
{
@@ -595,26 +742,10 @@ static void _sh_resync(struct vcpu *v, m
SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, va=%lx\n",
v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va);
- /* Need to pull write access so the page *stays* in sync.
- * This might be rather slow but we hope that in the common case
- * we're handling this pagetable after a guest walk has pulled
- * write access the fast way. */
- switch ( sh_remove_write_access(v, gmfn, 0, va) )
+ /* Need to pull write access so the page *stays* in sync. */
+ if ( oos_remove_write_access(v, gmfn, va) )
{
- default:
- case 0:
- break;
-
- case 1:
- flush_tlb_mask(v->domain->domain_dirty_cpumask);
- break;
-
- case -1:
- /* An unfindable writeable typecount has appeared, probably via a
- * grant table entry: can't shoot the mapping, so try to unshadow
- * the page. If that doesn't work either, the guest is granting
- * his pagetables and must be killed after all. */
- sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
+ /* Page has been unshadowed. */
return;
}
@@ -753,6 +884,9 @@ void sh_resync_all(struct vcpu *v, int s
if ( do_locking )
shadow_lock(v->domain);
+ if ( oos_fixup_flush(v) )
+ flush_tlb_mask(v->domain->domain_dirty_cpumask);
+
/* First: resync all of this vcpu's oos pages */
for ( idx = 0; idx < SHADOW_OOS_PAGES; idx++ )
if ( mfn_valid(oos[idx]) )
@@ -882,7 +1016,10 @@ void shadow_demote(struct vcpu *v, mfn_t
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Was the page out of sync? */
if ( page_is_out_of_sync(page) )
+ {
oos_hash_remove(v, gmfn);
+ oos_fixup_remove(v, gmfn);
+ }
#endif
clear_bit(_PGC_page_table, &page->count_info);
}
@@ -2224,7 +2361,10 @@ int sh_remove_write_access(struct vcpu *
#endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */
/* Brute-force search of all the shadows, by walking the hash */
- perfc_incr(shadow_writeable_bf);
+ if ( level == 0 )
+ perfc_incr(shadow_writeable_bf_1);
+ else
+ perfc_incr(shadow_writeable_bf);
hash_foreach(v, callback_mask, callbacks, gmfn);
/* If that didn't catch the mapping, then there's some non-pagetable
@@ -2244,7 +2384,34 @@ int sh_remove_write_access(struct vcpu *
return 1;
}
-
+#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
+int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn,
+ mfn_t smfn, unsigned long off)
+{
+ struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+
+ ASSERT(mfn_valid(smfn));
+ ASSERT(mfn_valid(gmfn));
+
+ if ( sp->type == SH_type_l1_32_shadow )
+ {
+ return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,2)
+ (v, gmfn, smfn, off);
+ }
+#if CONFIG_PAGING_LEVELS >= 3
+ else if ( sp->type == SH_type_l1_pae_shadow )
+ return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,3)
+ (v, gmfn, smfn, off);
+#if CONFIG_PAGING_LEVELS >= 4
+ else if ( sp->type == SH_type_l1_64_shadow )
+ return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,4)
+ (v, gmfn, smfn, off);
+#endif
+#endif
+
+ return 0;
+}
+#endif
/**************************************************************************/
/* Remove all mappings of a guest frame from the shadow tables.
@@ -2580,6 +2747,25 @@ static void sh_update_paging_modes(struc
spin_lock_init(&v->arch.paging.vtlb_lock);
}
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
+
+#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
+ if ( v->arch.paging.shadow.oos_fixups == NULL )
+ {
+ int i;
+ v->arch.paging.shadow.oos_fixups =
+ alloc_xenheap_pages(SHADOW_OOS_FT_ORDER);
+ if ( v->arch.paging.shadow.oos_fixups == NULL )
+ {
+ SHADOW_ERROR("Could not allocate OOS fixup table"
+ " for dom %u vcpu %u\n",
+ v->domain->domain_id, v->vcpu_id);
+ domain_crash(v->domain);
+ return;
+ }
+ for ( i = 0; i < SHADOW_OOS_FT_HASH * SHADOW_OOS_FT_ENTRIES; i++ )
+ v->arch.paging.shadow.oos_fixups[i].gmfn = _mfn(INVALID_MFN);
+ }
+#endif /* OOS */
// Valid transitions handled by this function:
// - For PV guests:
@@ -2908,17 +3094,27 @@ void shadow_teardown(struct domain *d)
}
}
-#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
+#if (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC))
/* Free the virtual-TLB array attached to each vcpu */
for_each_vcpu(d, v)
{
+#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
if ( v->arch.paging.vtlb )
{
xfree(v->arch.paging.vtlb);
v->arch.paging.vtlb = NULL;
}
+#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
+
+#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
+ if ( v->arch.paging.shadow.oos_fixups )
+ {
+ free_xenheap_pages(v->arch.paging.shadow.oos_fixups,
+ SHADOW_OOS_FT_ORDER);
+ }
+#endif /* OOS */
}
-#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
+#endif /* (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) */
list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist)
{
diff -r c8aa06014ac0 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Fri Jun 20 15:10:08 2008 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c Fri Jun 20 15:10:53 2008 +0100
@@ -1409,6 +1409,9 @@ static int shadow_set_l1e(struct vcpu *v
int flags = 0;
struct domain *d = v->domain;
shadow_l1e_t old_sl1e;
+#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
+ mfn_t new_gmfn = shadow_l1e_get_mfn(new_sl1e);
+#endif
ASSERT(sl1e != NULL);
old_sl1e = *sl1e;
@@ -1425,8 +1428,18 @@ static int shadow_set_l1e(struct vcpu *v
/* Doesn't look like a pagetable. */
flags |= SHADOW_SET_ERROR;
new_sl1e = shadow_l1e_empty();
- } else {
+ }
+ else
+ {
shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
+#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
+ if ( mfn_valid(new_gmfn) && mfn_oos_may_write(new_gmfn)
+ && (shadow_l1e_get_flags(new_sl1e) & _PAGE_RW) )
+ {
+ oos_fixup_add(v, new_gmfn, sl1mfn,
pgentry_ptr_to_slot(sl1e));
+ }
+#endif
+
}
}
}
@@ -4238,6 +4251,56 @@ sh_update_cr3(struct vcpu *v, int do_loc
/**************************************************************************/
/* Functions to revoke guest rights */
+#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
+int sh_rm_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn,
+ mfn_t smfn, unsigned long off)
+{
+ int r;
+ shadow_l1e_t *sl1p, sl1e;
+ struct shadow_page_info *sp;
+
+ ASSERT(mfn_valid(gmfn));
+ ASSERT(mfn_valid(smfn));
+
+ sp = mfn_to_shadow_page(smfn);
+
+ if ( sp->mbz != 0 ||
+#if GUEST_PAGING_LEVELS == 4
+ (sp->type != SH_type_l1_64_shadow)
+#elif GUEST_PAGING_LEVELS == 3
+ (sp->type != SH_type_l1_pae_shadow)
+#elif GUEST_PAGING_LEVELS == 2
+ (sp->type != SH_type_l1_32_shadow)
+#endif
+ )
+ goto fail;
+
+ sl1p = sh_map_domain_page(smfn);
+ sl1p += off;
+ sl1e = *sl1p;
+ if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW))
+ != (_PAGE_PRESENT|_PAGE_RW))
+ || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) )
+ {
+ sh_unmap_domain_page(sl1p);
+ goto fail;
+ }
+
+ /* Found it! Need to remove its write permissions. */
+ sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
+ r = shadow_set_l1e(v, sl1p, sl1e, smfn);
+ ASSERT( !(r & SHADOW_SET_ERROR) );
+
+ sh_unmap_domain_page(sl1p);
+ perfc_incr(shadow_writeable_h_7);
+ return 1;
+
+ fail:
+ perfc_incr(shadow_writeable_h_8);
+ return 0;
+}
+#endif /* OOS */
+
#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
/* Look up this vaddr in the current shadow and see if it's a writeable
diff -r c8aa06014ac0 xen/arch/x86/mm/shadow/multi.h
--- a/xen/arch/x86/mm/shadow/multi.h Fri Jun 20 15:10:08 2008 +0100
+++ b/xen/arch/x86/mm/shadow/multi.h Fri Jun 20 15:10:53 2008 +0100
@@ -124,4 +124,8 @@ extern int
extern int
SHADOW_INTERNAL_NAME(sh_safe_not_to_sync, GUEST_LEVELS)
(struct vcpu*v, mfn_t gmfn);
+
+extern int
+SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p, GUEST_LEVELS)
+ (struct vcpu *v, mfn_t gmfn, mfn_t smfn, unsigned long off);
#endif
diff -r c8aa06014ac0 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Fri Jun 20 15:10:08 2008 +0100
+++ b/xen/arch/x86/mm/shadow/private.h Fri Jun 20 15:10:53 2008 +0100
@@ -321,6 +321,16 @@ static inline int sh_type_is_pinnable(st
*/
#define SHF_out_of_sync (1u<<30)
#define SHF_oos_may_write (1u<<29)
+
+/* Fixup tables are a non-complete writable-mappings reverse map for
+ OOS pages. This let us quickly resync pages (avoiding brute-force
+ search of the shadows) when the va hint is not sufficient (i.e.,
+ the pagetable is mapped in multiple places and in multiple
+ shadows.) */
+#define SHADOW_OOS_FT_ENTRIES \
+ ((PAGE_SIZE << SHADOW_OOS_FT_ORDER) \
+ / (SHADOW_OOS_FT_HASH * sizeof(struct oos_fixup)))
+
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
static inline int sh_page_has_multiple_shadows(struct page_info *pg)
@@ -414,6 +424,11 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn
/* Pull an out-of-sync page back into sync. */
void sh_resync(struct vcpu *v, mfn_t gmfn);
+
+void oos_fixup_add(struct vcpu *v, mfn_t gmfn, mfn_t smfn, unsigned long off);
+
+int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn,
+ mfn_t smfn, unsigned long offset);
/* Pull all out-of-sync shadows back into sync. If skip != 0, we try
* to avoid resyncing where we think we can get away with it. */
diff -r c8aa06014ac0 xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h Fri Jun 20 15:10:08 2008 +0100
+++ b/xen/arch/x86/mm/shadow/types.h Fri Jun 20 15:10:53 2008 +0100
@@ -441,6 +441,7 @@ struct shadow_walk_t
#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
#define sh_resync_l1 INTERNAL_NAME(sh_resync_l1)
#define sh_safe_not_to_sync INTERNAL_NAME(sh_safe_not_to_sync)
+#define sh_rm_write_access_from_sl1p
INTERNAL_NAME(sh_rm_write_access_from_sl1p)
#endif
/* The sh_guest_(map|get)_* functions depends on Xen's paging levels */
diff -r c8aa06014ac0 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Fri Jun 20 15:10:08 2008 +0100
+++ b/xen/include/asm-x86/domain.h Fri Jun 20 15:10:53 2008 +0100
@@ -129,6 +129,12 @@ struct shadow_vcpu {
/* Shadow out-of-sync: pages that this vcpu has let go out of sync */
mfn_t oos[SHADOW_OOS_PAGES];
unsigned long oos_va[SHADOW_OOS_PAGES];
+ struct oos_fixup {
+ mfn_t gmfn;
+ mfn_t smfn;
+ unsigned long off;
+ } *oos_fixups;
+ int oos_fixup_used;
};
/************************************************/
diff -r c8aa06014ac0 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Fri Jun 20 15:10:08 2008 +0100
+++ b/xen/include/asm-x86/mm.h Fri Jun 20 15:10:53 2008 +0100
@@ -131,7 +131,12 @@ static inline u32 pickle_domptr(struct d
#define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
/* The number of out-of-sync shadows we allow per vcpu (prime, please) */
-#define SHADOW_OOS_PAGES 7
+#define SHADOW_OOS_PAGES 3
+
+/* The order OOS fixup tables per vcpu */
+#define SHADOW_OOS_FT_ORDER 1
+/* OOS fixup tables hash entries */
+#define SHADOW_OOS_FT_HASH 13
#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
diff -r c8aa06014ac0 xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h Fri Jun 20 15:10:08 2008 +0100
+++ b/xen/include/asm-x86/perfc_defn.h Fri Jun 20 15:10:53 2008 +0100
@@ -81,7 +81,10 @@ PERFCOUNTER(shadow_writeable_h_4, "shad
PERFCOUNTER(shadow_writeable_h_4, "shadow writeable: linux low/solaris")
PERFCOUNTER(shadow_writeable_h_5, "shadow writeable: linux high")
PERFCOUNTER(shadow_writeable_h_6, "shadow writeable: unsync va")
+PERFCOUNTER(shadow_writeable_h_7, "shadow writeable: sl1p")
+PERFCOUNTER(shadow_writeable_h_8, "shadow writeable: sl1p failed")
PERFCOUNTER(shadow_writeable_bf, "shadow writeable brute-force")
+PERFCOUNTER(shadow_writeable_bf_1, "shadow writeable resync bf")
PERFCOUNTER(shadow_mappings, "shadow removes all mappings")
PERFCOUNTER(shadow_mappings_bf, "shadow rm-mappings brute-force")
PERFCOUNTER(shadow_early_unshadow, "shadow unshadows for fork/exit")
@@ -102,6 +105,13 @@ PERFCOUNTER(shadow_em_ex_non_pt, "shad
PERFCOUNTER(shadow_em_ex_non_pt, "shadow extra non-pt-write op")
PERFCOUNTER(shadow_em_ex_fail, "shadow extra emulation failed")
+PERFCOUNTER(shadow_oos_fixup_add_ok, "shadow OOS fixups adds")
+PERFCOUNTER(shadow_oos_fixup_no_add, "shadow OOS fixups no adds")
+PERFCOUNTER(shadow_oos_fixup_add_fail, "shadow OOS fixups adds failed")
+PERFCOUNTER(shadow_oos_fixup_remove, "shadow OOS fixups removes")
+PERFCOUNTER(shadow_oos_fixup_flush, "shadow OOS fixups flushes")
+PERFCOUNTER(shadow_oos_fixup_flush_gmfn,"shadow OOS fixups gmfn flushes")
+
PERFCOUNTER(shadow_unsync, "shadow OOS unsyncs")
PERFCOUNTER(shadow_unsync_evict, "shadow OOS evictions")
PERFCOUNTER(shadow_resync, "shadow OOS resyncs")
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|