I like this better now I realise I can turn the unlock_page() into a
byte-sized write on x86/64 at least, by turning page->shadow_flags into a
u32. That means only one LOCK prefix per critical section.
I will look into applying this patch plus this optimisation for x86/64.
-- Keir
On 12/6/08 16:11, "Jan Beulich" <jbeulich@xxxxxxxxxx> wrote:
> This is a re-send of an earlier sent patch that was deemed to be too
> complex, yet a simpler mechanism didn't find its way into the code base
> so far. This version has two bugs fixed and removes the lock also from
> do_mmuext_op(), but continues to not be a complete implementation: At
> least the use of the domain lock in do_set_gdt() still needs looking
> at, as do the assertions of the lock being held in
> create_grant_{pte,va}_mapping().
>
> This change results in a 5% performance improvement for kernel builds
> on dual-socket quad-core systems (which is what I used for reference
> for both 32- and 64-bit). Along with that, the amount of time reported
> as spent in the kernel gets reduced by almost 25% (the fraction of time
> spent in the kernel is generally reported significantly higher under
> Xen than with a native kernel).
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
>
> Index: 2008-06-12/xen/arch/x86/domain.c
> ===================================================================
> --- 2008-06-12.orig/xen/arch/x86/domain.c 2008-06-12 09:07:02.000000000 +0200
> +++ 2008-06-12/xen/arch/x86/domain.c 2008-06-12 09:09:12.000000000 +0200
> @@ -262,7 +262,7 @@ static int setup_compat_l4(struct vcpu *
> return -ENOMEM;
>
> /* This page needs to look like a pagetable so that it can be shadowed */
> - pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated;
> + pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1;
>
> l4tab = copy_page(page_to_virt(pg), idle_pg_table);
> l4tab[0] = l4e_empty();
> Index: 2008-06-12/xen/arch/x86/domain_build.c
> ===================================================================
> --- 2008-06-12.orig/xen/arch/x86/domain_build.c 2008-06-12 09:08:54.000000000
> +0200
> +++ 2008-06-12/xen/arch/x86/domain_build.c 2008-06-12 09:09:12.000000000 +0200
> @@ -575,6 +575,7 @@ int __init construct_dom0(
> page = alloc_domheap_page(NULL, 0);
> if ( !page )
> panic("Not enough RAM for domain 0 PML4.\n");
> + page->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1;
> l4start = l4tab = page_to_virt(page);
> }
> copy_page(l4tab, idle_pg_table);
> Index: 2008-06-12/xen/arch/x86/mm.c
> ===================================================================
> --- 2008-06-12.orig/xen/arch/x86/mm.c 2008-06-12 09:08:42.000000000 +0200
> +++ 2008-06-12/xen/arch/x86/mm.c 2008-06-12 09:09:12.000000000 +0200
> @@ -1360,6 +1360,37 @@ static void free_l4_table(struct page_in
>
> #endif
>
> +static void lock_page(struct page_info *page)
> +{
> + unsigned long x, y;
> +
> + y = page->u.inuse.type_info;
> + do {
> + for ( ; ; )
> + {
> + ASSERT(y & PGT_count_mask);
> + if (likely(y & PGT_validated) && likely(!(y & PGT_locked)))
> + break;
> + cpu_relax();
> + y = page->u.inuse.type_info;
> + }
> + x = y;
> + }
> + while ( (y = cmpxchg(&page->u.inuse.type_info, x, x | PGT_locked)) != x
> );
> +}
> +
> +static void unlock_page(struct page_info *page)
> +{
> + unsigned long x, y;
> +
> + y = page->u.inuse.type_info;
> + do {
> + ASSERT(y & PGT_locked);
> + ASSERT(y & PGT_count_mask);
> + x = y;
> + }
> + while ( (y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_locked)) != x
> );
> +}
>
> /* How to write an entry to the guest pagetables.
> * Returns 0 for failure (pointer not valid), 1 for success. */
> @@ -1421,24 +1452,33 @@ static int mod_l1_entry(l1_pgentry_t *pl
> struct vcpu *curr = current;
> struct domain *d = curr->domain;
> unsigned long mfn;
> + struct page_info *l1pg = mfn_to_page(gl1mfn);
> + int rc = 1;
> +
> + lock_page(l1pg);
>
> if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
> - return 0;
> + return unlock_page(l1pg), 0;
>
> if ( unlikely(paging_mode_refcounts(d)) )
> - return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
> + {
> + rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
> + unlock_page(l1pg);
> + return rc;
> + }
>
> if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
> {
> /* Translate foreign guest addresses. */
> mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e));
> if ( unlikely(mfn == INVALID_MFN) )
> - return 0;
> + return unlock_page(l1pg), 0;
> ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
> nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
>
> if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
> {
> + unlock_page(l1pg);
> MEM_LOG("Bad L1 flags %x",
> l1e_get_flags(nl1e) & l1_disallow_mask(d));
> return 0;
> @@ -1448,30 +1488,33 @@ static int mod_l1_entry(l1_pgentry_t *pl
> if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
> {
> adjust_guest_l1e(nl1e, d);
> - return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
> - preserve_ad);
> + rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
> + preserve_ad);
> + unlock_page(l1pg);
> + return rc;
> }
>
> if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
> - return 0;
> + return unlock_page(l1pg), 0;
>
> adjust_guest_l1e(nl1e, d);
> if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
> preserve_ad)) )
> {
> - put_page_from_l1e(nl1e, d);
> - return 0;
> + ol1e = nl1e;
> + rc = 0;
> }
> }
> - else
> + else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
> + preserve_ad)) )
> {
> - if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
> - preserve_ad)) )
> - return 0;
> + unlock_page(l1pg);
> + return 0;
> }
>
> + unlock_page(l1pg);
> put_page_from_l1e(ol1e, d);
> - return 1;
> + return rc;
> }
>
>
> @@ -1485,6 +1528,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
> l2_pgentry_t ol2e;
> struct vcpu *curr = current;
> struct domain *d = curr->domain;
> + struct page_info *l2pg = mfn_to_page(pfn);
> + int rc = 1;
>
> if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
> {
> @@ -1492,13 +1537,16 @@ static int mod_l2_entry(l2_pgentry_t *pl
> return 0;
> }
>
> + lock_page(l2pg);
> +
> if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
> - return 0;
> + return unlock_page(l2pg), 0;
>
> if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
> {
> if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
> {
> + unlock_page(l2pg);
> MEM_LOG("Bad L2 flags %x",
> l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
> return 0;
> @@ -1508,28 +1556,32 @@ static int mod_l2_entry(l2_pgentry_t *pl
> if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) )
> {
> adjust_guest_l2e(nl2e, d);
> - return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
> preserve_ad);
> + rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
> + unlock_page(l2pg);
> + return rc;
> }
>
> if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
> - return 0;
> + return unlock_page(l2pg), 0;
>
> adjust_guest_l2e(nl2e, d);
> if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
> preserve_ad)) )
> {
> - put_page_from_l2e(nl2e, pfn);
> - return 0;
> + ol2e = nl2e;
> + rc = 0;
> }
> }
> else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
> preserve_ad)) )
> {
> + unlock_page(l2pg);
> return 0;
> }
>
> + unlock_page(l2pg);
> put_page_from_l2e(ol2e, pfn);
> - return 1;
> + return rc;
> }
>
> #if CONFIG_PAGING_LEVELS >= 3
> @@ -1543,7 +1595,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
> l3_pgentry_t ol3e;
> struct vcpu *curr = current;
> struct domain *d = curr->domain;
> - int okay;
> + struct page_info *l3pg = mfn_to_page(pfn);
> + int okay, rc = 1;
>
> if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
> {
> @@ -1558,13 +1611,16 @@ static int mod_l3_entry(l3_pgentry_t *pl
> if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
> return 0;
>
> + lock_page(l3pg);
> +
> if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
> - return 0;
> + return unlock_page(l3pg), 0;
>
> if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
> {
> if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
> {
> + unlock_page(l3pg);
> MEM_LOG("Bad L3 flags %x",
> l3e_get_flags(nl3e) & l3_disallow_mask(d));
> return 0;
> @@ -1574,23 +1630,26 @@ static int mod_l3_entry(l3_pgentry_t *pl
> if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) )
> {
> adjust_guest_l3e(nl3e, d);
> - return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
> preserve_ad);
> + rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
> + unlock_page(l3pg);
> + return rc;
> }
>
> if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
> - return 0;
> + return unlock_page(l3pg), 0;
>
> adjust_guest_l3e(nl3e, d);
> if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
> preserve_ad)) )
> {
> - put_page_from_l3e(nl3e, pfn);
> - return 0;
> + ol3e = nl3e;
> + rc = 0;
> }
> }
> else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
> preserve_ad)) )
> {
> + unlock_page(l3pg);
> return 0;
> }
>
> @@ -1599,8 +1658,9 @@ static int mod_l3_entry(l3_pgentry_t *pl
>
> pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
>
> + unlock_page(l3pg);
> put_page_from_l3e(ol3e, pfn);
> - return 1;
> + return rc;
> }
>
> #endif
> @@ -1616,6 +1676,8 @@ static int mod_l4_entry(l4_pgentry_t *pl
> struct vcpu *curr = current;
> struct domain *d = curr->domain;
> l4_pgentry_t ol4e;
> + struct page_info *l4pg = mfn_to_page(pfn);
> + int rc = 1;
>
> if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
> {
> @@ -1623,13 +1685,16 @@ static int mod_l4_entry(l4_pgentry_t *pl
> return 0;
> }
>
> + lock_page(l4pg);
> +
> if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
> - return 0;
> + return unlock_page(l4pg), 0;
>
> if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
> {
> if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
> {
> + unlock_page(l4pg);
> MEM_LOG("Bad L4 flags %x",
> l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
> return 0;
> @@ -1639,28 +1704,32 @@ static int mod_l4_entry(l4_pgentry_t *pl
> if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) )
> {
> adjust_guest_l4e(nl4e, d);
> - return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
> preserve_ad);
> + rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
> + unlock_page(l4pg);
> + return rc;
> }
>
> if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) )
> - return 0;
> + return unlock_page(l4pg), 0;
>
> adjust_guest_l4e(nl4e, d);
> if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
> preserve_ad)) )
> {
> - put_page_from_l4e(nl4e, pfn);
> - return 0;
> + ol4e = nl4e;
> + rc = 0;
> }
> }
> else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
> preserve_ad)) )
> {
> + unlock_page(l4pg);
> return 0;
> }
>
> + unlock_page(l4pg);
> put_page_from_l4e(ol4e, pfn);
> - return 1;
> + return rc;
> }
>
> #endif
> @@ -2189,8 +2258,6 @@ int do_mmuext_op(
> goto out;
> }
>
> - domain_lock(d);
> -
> for ( i = 0; i < count; i++ )
> {
> if ( hypercall_preempt_check() )
> @@ -2438,8 +2505,6 @@ int do_mmuext_op(
>
> process_deferred_ops();
>
> - domain_unlock(d);
> -
> perfc_add(num_mmuext_ops, i);
>
> out:
> @@ -2493,8 +2558,6 @@ int do_mmu_update(
>
> domain_mmap_cache_init(&mapcache);
>
> - domain_lock(d);
> -
> for ( i = 0; i < count; i++ )
> {
> if ( hypercall_preempt_check() )
> @@ -2667,8 +2730,6 @@ int do_mmu_update(
>
> process_deferred_ops();
>
> - domain_unlock(d);
> -
> domain_mmap_cache_destroy(&mapcache);
>
> perfc_add(num_page_updates, i);
> @@ -2721,14 +2782,19 @@ static int create_grant_pte_mapping(
> goto failed;
> }
>
> + lock_page(page);
> +
> ol1e = *(l1_pgentry_t *)va;
> if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) )
> {
> + unlock_page(page);
> put_page_type(page);
> rc = GNTST_general_error;
> goto failed;
> }
>
> + unlock_page(page);
> +
> if ( !paging_mode_refcounts(d) )
> put_page_from_l1e(ol1e, d);
>
> @@ -2772,16 +2838,14 @@ static int destroy_grant_pte_mapping(
> goto failed;
> }
>
> - if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
> - {
> - put_page_type(page);
> - rc = GNTST_general_error;
> - goto failed;
> - }
> + lock_page(page);
> +
> + ol1e = *(l1_pgentry_t *)va;
>
> /* Check that the virtual address supplied is actually mapped to frame.
> */
> if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
> {
> + unlock_page(page);
> MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame
> %lx",
> (unsigned long)l1e_get_intpte(ol1e), addr, frame);
> put_page_type(page);
> @@ -2796,12 +2860,14 @@ static int destroy_grant_pte_mapping(
> d->vcpu[0] /* Change if we go to per-vcpu shadows. */,
> 0)) )
> {
> + unlock_page(page);
> MEM_LOG("Cannot delete PTE entry at %p", va);
> put_page_type(page);
> rc = GNTST_general_error;
> goto failed;
> }
>
> + unlock_page(page);
> put_page_type(page);
>
> failed:
> @@ -2817,6 +2883,7 @@ static int create_grant_va_mapping(
> l1_pgentry_t *pl1e, ol1e;
> struct domain *d = v->domain;
> unsigned long gl1mfn;
> + struct page_info *l1pg;
> int okay;
>
> ASSERT(domain_is_locked(d));
> @@ -2829,8 +2896,11 @@ static int create_grant_va_mapping(
> MEM_LOG("Could not find L1 PTE for address %lx", va);
> return GNTST_general_error;
> }
> + l1pg = mfn_to_page(gl1mfn);
> + lock_page(l1pg);
> ol1e = *pl1e;
> okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0);
> + unlock_page(l1pg);
> guest_unmap_l1e(v, pl1e);
> pl1e = NULL;
>
> @@ -2848,6 +2918,7 @@ static int replace_grant_va_mapping(
> {
> l1_pgentry_t *pl1e, ol1e;
> unsigned long gl1mfn;
> + struct page_info *l1pg;
> int rc = 0;
>
> pl1e = guest_map_l1e(v, addr, &gl1mfn);
> @@ -2856,11 +2927,15 @@ static int replace_grant_va_mapping(
> MEM_LOG("Could not find L1 PTE for address %lx", addr);
> return GNTST_general_error;
> }
> +
> + l1pg = mfn_to_page(gl1mfn);
> + lock_page(l1pg);
> ol1e = *pl1e;
>
> /* Check that the virtual address supplied is actually mapped to frame.
> */
> if ( unlikely(l1e_get_pfn(ol1e) != frame) )
> {
> + unlock_page(l1pg);
> MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
> l1e_get_pfn(ol1e), addr, frame);
> rc = GNTST_general_error;
> @@ -2870,11 +2945,14 @@ static int replace_grant_va_mapping(
> /* Delete pagetable entry. */
> if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
> {
> + unlock_page(l1pg);
> MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
> rc = GNTST_general_error;
> goto out;
> }
>
> + unlock_page(l1pg);
> +
> out:
> guest_unmap_l1e(v, pl1e);
> return rc;
> @@ -2909,6 +2987,7 @@ int replace_grant_host_mapping(
> struct vcpu *curr = current;
> l1_pgentry_t *pl1e, ol1e;
> unsigned long gl1mfn;
> + struct page_info *l1pg;
> int rc;
>
> if ( flags & GNTMAP_contains_pte )
> @@ -2930,16 +3009,21 @@ int replace_grant_host_mapping(
> (unsigned long)new_addr);
> return GNTST_general_error;
> }
> +
> + l1pg = mfn_to_page(gl1mfn);
> + lock_page(l1pg);
> ol1e = *pl1e;
>
> if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(),
> gl1mfn, curr, 0)) )
> {
> + unlock_page(l1pg);
> MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
> guest_unmap_l1e(curr, pl1e);
> return GNTST_general_error;
> }
>
> + unlock_page(l1pg);
> guest_unmap_l1e(curr, pl1e);
>
> rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
> @@ -3017,8 +3101,6 @@ int do_update_va_mapping(unsigned long v
> if ( rc )
> return rc;
>
> - domain_lock(d);
> -
> pl1e = guest_map_l1e(v, va, &gl1mfn);
>
> if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) )
> @@ -3030,8 +3112,6 @@ int do_update_va_mapping(unsigned long v
>
> process_deferred_ops();
>
> - domain_unlock(d);
> -
> switch ( flags & UVMF_FLUSHTYPE_MASK )
> {
> case UVMF_TLB_FLUSH:
> @@ -3651,8 +3731,6 @@ int ptwr_do_page_fault(struct vcpu *v, u
> struct ptwr_emulate_ctxt ptwr_ctxt;
> int rc;
>
> - domain_lock(d);
> -
> /* Attempt to read the PTE that maps the VA being accessed. */
> guest_get_eff_l1e(v, addr, &pte);
> page = l1e_get_page(pte);
> @@ -3672,16 +3750,16 @@ int ptwr_do_page_fault(struct vcpu *v, u
> ptwr_ctxt.cr2 = addr;
> ptwr_ctxt.pte = pte;
>
> + lock_page(page);
> rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
> + unlock_page(page);
> if ( rc == X86EMUL_UNHANDLEABLE )
> goto bail;
>
> - domain_unlock(d);
> perfc_incr(ptwr_emulations);
> return EXCRET_fault_fixed;
>
> bail:
> - domain_unlock(d);
> return 0;
> }
>
> Index: 2008-06-12/xen/include/asm-x86/mm.h
> ===================================================================
> --- 2008-06-12.orig/xen/include/asm-x86/mm.h 2008-06-12 09:06:20.000000000
> +0200
> +++ 2008-06-12/xen/include/asm-x86/mm.h 2008-06-12 09:09:12.000000000 +0200
> @@ -82,9 +82,12 @@ struct page_info
> /* PAE only: is this an L2 page directory containing Xen-private mappings?
> */
> #define _PGT_pae_xen_l2 26
> #define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2)
> + /* The page is currently locked for modification. */
> +#define _PGT_locked 25
> +#define PGT_locked (1U<<_PGT_locked)
>
> - /* 26-bit count of uses of this frame as its current type. */
> -#define PGT_count_mask ((1U<<26)-1)
> + /* 25-bit count of uses of this frame as its current type. */
> +#define PGT_count_mask ((1U<<25)-1)
>
> /* Cleared when the owning guest 'frees' this page. */
> #define _PGC_allocated 31
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|