This is only a first step, the use of the domain lock in do_mmuext_op()
and do_set_gdt() still need looking at, as do the assertions of the
lock being held in create_grant_{pte,va}_mapping().
This change results in a 5% performance improvement for kernel builds
on dual-socket quad-core systems (which is what I used for reference
for both 32- and 64-bit). Along with that, the amount of time reported
as spent in the kernel gets reduced by almost 25% (the fraction of time
spent in the kernel is generally reported significantly higher under
Xen than with a native kernel).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: 2008-04-15/xen/arch/x86/mm.c
===================================================================
--- 2008-04-15.orig/xen/arch/x86/mm.c 2008-04-15 08:48:15.000000000 +0200
+++ 2008-04-15/xen/arch/x86/mm.c 2008-04-15 16:21:12.000000000 +0200
@@ -1371,6 +1371,37 @@ static void free_l4_table(struct page_in
#endif
+static void lock_page(struct page_info *page)
+{
+ unsigned long x, y;
+
+ y = page->u.inuse.type_info;
+ do {
+ for ( ; ; )
+ {
+ ASSERT(y & PGT_count_mask);
+ if (likely(y & PGT_validated) && likely(!(y & PGT_locked)))
+ break;
+ cpu_relax();
+ y = page->u.inuse.type_info;
+ }
+ x = y;
+ }
+ while ( (y = cmpxchg(&page->u.inuse.type_info, x, x | PGT_locked)) != x );
+}
+
+static void unlock_page(struct page_info *page)
+{
+ unsigned long x, y;
+
+ y = page->u.inuse.type_info;
+ do {
+ ASSERT(y & PGT_locked);
+ ASSERT(y & PGT_count_mask);
+ x = y;
+ }
+ while ( (y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_locked)) != x );
+}
/* How to write an entry to the guest pagetables.
* Returns 0 for failure (pointer not valid), 1 for success. */
@@ -1432,24 +1463,33 @@ static int mod_l1_entry(l1_pgentry_t *pl
struct vcpu *curr = current;
struct domain *d = curr->domain;
unsigned long mfn;
+ struct page_info *l1pg = mfn_to_page(gl1mfn);
+ int rc = 1;
+
+ lock_page(l1pg);
if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
- return 0;
+ return unlock_page(l1pg), 0;
if ( unlikely(paging_mode_refcounts(d)) )
- return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
+ {
+ rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
+ unlock_page(l1pg);
+ return rc;
+ }
if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
{
/* Translate foreign guest addresses. */
mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e));
if ( unlikely(mfn == INVALID_MFN) )
- return 0;
+ return unlock_page(l1pg), 0;
ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
{
+ unlock_page(l1pg);
MEM_LOG("Bad L1 flags %x",
l1e_get_flags(nl1e) & l1_disallow_mask(d));
return 0;
@@ -1459,30 +1499,33 @@ static int mod_l1_entry(l1_pgentry_t *pl
if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
{
adjust_guest_l1e(nl1e, d);
- return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
- preserve_ad);
+ rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
+ preserve_ad);
+ unlock_page(l1pg);
+ return rc;
}
if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
- return 0;
+ return unlock_page(l1pg), 0;
adjust_guest_l1e(nl1e, d);
if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
preserve_ad)) )
{
- put_page_from_l1e(nl1e, d);
- return 0;
+ ol1e = nl1e;
+ rc = 0;
}
}
- else
+ else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
+ preserve_ad)) )
{
- if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
- preserve_ad)) )
- return 0;
+ unlock_page(l1pg);
+ return 0;
}
+ unlock_page(l1pg);
put_page_from_l1e(ol1e, d);
- return 1;
+ return rc;
}
@@ -1496,6 +1539,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
l2_pgentry_t ol2e;
struct vcpu *curr = current;
struct domain *d = curr->domain;
+ struct page_info *l2pg = mfn_to_page(pfn);
+ int rc = 1;
if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
{
@@ -1503,13 +1548,16 @@ static int mod_l2_entry(l2_pgentry_t *pl
return 0;
}
+ lock_page(l2pg);
+
if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
- return 0;
+ return unlock_page(l2pg), 0;
if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
{
if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
{
+ unlock_page(l2pg);
MEM_LOG("Bad L2 flags %x",
l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
return 0;
@@ -1519,28 +1567,32 @@ static int mod_l2_entry(l2_pgentry_t *pl
if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) )
{
adjust_guest_l2e(nl2e, d);
- return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
+ rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
+ unlock_page(l2pg);
+ return rc;
}
if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
- return 0;
+ return unlock_page(l2pg), 0;
adjust_guest_l2e(nl2e, d);
if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
preserve_ad)) )
{
- put_page_from_l2e(nl2e, pfn);
- return 0;
+ ol2e = nl2e;
+ rc = 0;
}
}
else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
preserve_ad)) )
{
+ unlock_page(l2pg);
return 0;
}
+ unlock_page(l2pg);
put_page_from_l2e(ol2e, pfn);
- return 1;
+ return rc;
}
#if CONFIG_PAGING_LEVELS >= 3
@@ -1554,7 +1606,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
l3_pgentry_t ol3e;
struct vcpu *curr = current;
struct domain *d = curr->domain;
- int okay;
+ struct page_info *l3pg = mfn_to_page(pfn);
+ int okay, rc = 1;
if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
{
@@ -1571,13 +1624,16 @@ static int mod_l3_entry(l3_pgentry_t *pl
return 0;
#endif
+ lock_page(l3pg);
+
if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
- return 0;
+ return unlock_page(l3pg), 0;
if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
{
if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
{
+ unlock_page(l3pg);
MEM_LOG("Bad L3 flags %x",
l3e_get_flags(nl3e) & l3_disallow_mask(d));
return 0;
@@ -1587,23 +1643,26 @@ static int mod_l3_entry(l3_pgentry_t *pl
if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) )
{
adjust_guest_l3e(nl3e, d);
- return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
+ rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
+ unlock_page(l3pg);
+ return rc;
}
if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
- return 0;
+ return unlock_page(l3pg), 0;
adjust_guest_l3e(nl3e, d);
if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
preserve_ad)) )
{
- put_page_from_l3e(nl3e, pfn);
- return 0;
+ ol3e = nl3e;
+ rc = 0;
}
}
else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
preserve_ad)) )
{
+ unlock_page(l3pg);
return 0;
}
@@ -1612,8 +1671,9 @@ static int mod_l3_entry(l3_pgentry_t *pl
pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
+ unlock_page(l3pg);
put_page_from_l3e(ol3e, pfn);
- return 1;
+ return rc;
}
#endif
@@ -1629,6 +1689,8 @@ static int mod_l4_entry(l4_pgentry_t *pl
struct vcpu *curr = current;
struct domain *d = curr->domain;
l4_pgentry_t ol4e;
+ struct page_info *l4pg = mfn_to_page(pfn);
+ int rc = 1;
if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
{
@@ -1636,13 +1698,16 @@ static int mod_l4_entry(l4_pgentry_t *pl
return 0;
}
+ lock_page(l4pg);
+
if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
- return 0;
+ return unlock_page(l4pg), 0;
if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
{
if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
{
+ unlock_page(l4pg);
MEM_LOG("Bad L4 flags %x",
l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
return 0;
@@ -1652,28 +1717,32 @@ static int mod_l4_entry(l4_pgentry_t *pl
if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) )
{
adjust_guest_l4e(nl4e, d);
- return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
+ rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
+ unlock_page(l4pg);
+ return rc;
}
if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) )
- return 0;
+ return unlock_page(l4pg), 0;
adjust_guest_l4e(nl4e, d);
if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
preserve_ad)) )
{
- put_page_from_l4e(nl4e, pfn);
- return 0;
+ ol4e = nl4e;
+ rc = 0;
}
}
else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
preserve_ad)) )
{
+ unlock_page(l4pg);
return 0;
}
+ unlock_page(l4pg);
put_page_from_l4e(ol4e, pfn);
- return 1;
+ return rc;
}
#endif
@@ -2492,8 +2561,6 @@ int do_mmu_update(
domain_mmap_cache_init(&mapcache);
- domain_lock(d);
-
for ( i = 0; i < count; i++ )
{
if ( hypercall_preempt_check() )
@@ -2664,8 +2731,6 @@ int do_mmu_update(
process_deferred_ops();
- domain_unlock(d);
-
domain_mmap_cache_destroy(&mapcache);
perfc_add(num_page_updates, i);
@@ -2718,14 +2783,19 @@ static int create_grant_pte_mapping(
goto failed;
}
+ lock_page(page);
+
ol1e = *(l1_pgentry_t *)va;
if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) )
{
+ unlock_page(page);
put_page_type(page);
rc = GNTST_general_error;
goto failed;
}
+ unlock_page(page);
+
if ( !paging_mode_refcounts(d) )
put_page_from_l1e(ol1e, d);
@@ -2769,16 +2839,14 @@ static int destroy_grant_pte_mapping(
goto failed;
}
- if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
- {
- put_page_type(page);
- rc = GNTST_general_error;
- goto failed;
- }
+ lock_page(page);
+
+ ol1e = *(l1_pgentry_t *)va;
/* Check that the virtual address supplied is actually mapped to frame. */
if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
{
+ unlock_page(page);
MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
(unsigned long)l1e_get_intpte(ol1e), addr, frame);
put_page_type(page);
@@ -2793,12 +2861,14 @@ static int destroy_grant_pte_mapping(
d->vcpu[0] /* Change if we go to per-vcpu shadows. */,
0)) )
{
+ unlock_page(page);
MEM_LOG("Cannot delete PTE entry at %p", va);
put_page_type(page);
rc = GNTST_general_error;
goto failed;
}
+ unlock_page(page);
put_page_type(page);
failed:
@@ -2814,6 +2884,7 @@ static int create_grant_va_mapping(
l1_pgentry_t *pl1e, ol1e;
struct domain *d = v->domain;
unsigned long gl1mfn;
+ struct page_info *l1pg;
int okay;
ASSERT(domain_is_locked(d));
@@ -2826,8 +2897,11 @@ static int create_grant_va_mapping(
MEM_LOG("Could not find L1 PTE for address %lx", va);
return GNTST_general_error;
}
+ l1pg = mfn_to_page(gl1mfn);
+ lock_page(l1pg);
ol1e = *pl1e;
okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0);
+ unlock_page(l1pg);
guest_unmap_l1e(v, pl1e);
pl1e = NULL;
@@ -2845,6 +2919,7 @@ static int replace_grant_va_mapping(
{
l1_pgentry_t *pl1e, ol1e;
unsigned long gl1mfn;
+ struct page_info *l1pg;
int rc = 0;
pl1e = guest_map_l1e(v, addr, &gl1mfn);
@@ -2853,11 +2928,15 @@ static int replace_grant_va_mapping(
MEM_LOG("Could not find L1 PTE for address %lx", addr);
return GNTST_general_error;
}
+
+ l1pg = mfn_to_page(gl1mfn);
+ lock_page(l1pg);
ol1e = *pl1e;
/* Check that the virtual address supplied is actually mapped to frame. */
if ( unlikely(l1e_get_pfn(ol1e) != frame) )
{
+ unlock_page(l1pg);
MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
l1e_get_pfn(ol1e), addr, frame);
rc = GNTST_general_error;
@@ -2867,11 +2946,14 @@ static int replace_grant_va_mapping(
/* Delete pagetable entry. */
if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
{
+ unlock_page(l1pg);
MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
rc = GNTST_general_error;
goto out;
}
+ unlock_page(l1pg);
+
out:
guest_unmap_l1e(v, pl1e);
return rc;
@@ -2906,6 +2988,7 @@ int replace_grant_host_mapping(
struct vcpu *curr = current;
l1_pgentry_t *pl1e, ol1e;
unsigned long gl1mfn;
+ struct page_info *l1pg;
int rc;
if ( flags & GNTMAP_contains_pte )
@@ -2927,16 +3010,21 @@ int replace_grant_host_mapping(
(unsigned long)new_addr);
return GNTST_general_error;
}
+
+ l1pg = mfn_to_page(gl1mfn);
+ lock_page(l1pg);
ol1e = *pl1e;
if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(),
gl1mfn, curr, 0)) )
{
+ unlock_page(l1pg);
MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
guest_unmap_l1e(curr, pl1e);
return GNTST_general_error;
}
+ unlock_page(l1pg);
guest_unmap_l1e(curr, pl1e);
rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
@@ -3014,8 +3102,6 @@ int do_update_va_mapping(unsigned long v
if ( rc )
return rc;
- domain_lock(d);
-
pl1e = guest_map_l1e(v, va, &gl1mfn);
if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) )
@@ -3027,8 +3113,6 @@ int do_update_va_mapping(unsigned long v
process_deferred_ops();
- domain_unlock(d);
-
switch ( flags & UVMF_FLUSHTYPE_MASK )
{
case UVMF_TLB_FLUSH:
@@ -3669,8 +3753,6 @@ int ptwr_do_page_fault(struct vcpu *v, u
struct ptwr_emulate_ctxt ptwr_ctxt;
int rc;
- domain_lock(d);
-
/* Attempt to read the PTE that maps the VA being accessed. */
guest_get_eff_l1e(v, addr, &pte);
page = l1e_get_page(pte);
@@ -3690,16 +3772,16 @@ int ptwr_do_page_fault(struct vcpu *v, u
ptwr_ctxt.cr2 = addr;
ptwr_ctxt.pte = pte;
+ lock_page(page);
rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
+ unlock_page(page);
if ( rc == X86EMUL_UNHANDLEABLE )
goto bail;
- domain_unlock(d);
perfc_incr(ptwr_emulations);
return EXCRET_fault_fixed;
bail:
- domain_unlock(d);
return 0;
}
Index: 2008-04-15/xen/include/asm-x86/mm.h
===================================================================
--- 2008-04-15.orig/xen/include/asm-x86/mm.h 2008-04-15 08:48:06.000000000
+0200
+++ 2008-04-15/xen/include/asm-x86/mm.h 2008-04-15 15:16:03.000000000 +0200
@@ -82,9 +82,12 @@ struct page_info
/* PAE only: is this an L2 page directory containing Xen-private mappings? */
#define _PGT_pae_xen_l2 26
#define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2)
+ /* The page is currently locked for modification. */
+#define _PGT_locked 25
+#define PGT_locked (1U<<_PGT_locked)
- /* 26-bit count of uses of this frame as its current type. */
-#define PGT_count_mask ((1U<<26)-1)
+ /* 25-bit count of uses of this frame as its current type. */
+#define PGT_count_mask ((1U<<25)-1)
/* Cleared when the owning guest 'frees' this page. */
#define _PGC_allocated 31
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|