WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] x86: remove use of per-domain lock from page table e

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] x86: remove use of per-domain lock from page table entry handling
From: "Jan Beulich" <jbeulich@xxxxxxxxxx>
Date: Tue, 22 Apr 2008 14:01:44 +0100
Delivery-date: Tue, 22 Apr 2008 06:01:58 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
This is only a first step, the use of the domain lock in do_mmuext_op()
and do_set_gdt() still need looking at, as do the assertions of the
lock being held in create_grant_{pte,va}_mapping().

This change results in a 5% performance improvement for kernel builds
on dual-socket quad-core systems (which is what I used for reference
for both 32- and 64-bit). Along with that, the amount of time reported
as spent in the kernel gets reduced by almost 25% (the fraction of time
spent in the kernel is generally reported significantly higher under
Xen than with a native kernel).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2008-04-15/xen/arch/x86/mm.c
===================================================================
--- 2008-04-15.orig/xen/arch/x86/mm.c   2008-04-15 08:48:15.000000000 +0200
+++ 2008-04-15/xen/arch/x86/mm.c        2008-04-15 16:21:12.000000000 +0200
@@ -1371,6 +1371,37 @@ static void free_l4_table(struct page_in
 
 #endif
 
+static void lock_page(struct page_info *page)
+{
+    unsigned long x, y;
+
+    y = page->u.inuse.type_info;
+    do {
+        for ( ; ; )
+        {
+            ASSERT(y & PGT_count_mask);
+            if (likely(y & PGT_validated) && likely(!(y & PGT_locked)))
+                break;
+            cpu_relax();
+            y = page->u.inuse.type_info;
+        }
+        x = y;
+    }
+    while ( (y = cmpxchg(&page->u.inuse.type_info, x, x | PGT_locked)) != x );
+}
+
+static void unlock_page(struct page_info *page)
+{
+    unsigned long x, y;
+
+    y = page->u.inuse.type_info;
+    do {
+        ASSERT(y & PGT_locked);
+        ASSERT(y & PGT_count_mask);
+        x = y;
+    }
+    while ( (y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_locked)) != x );
+}
 
 /* How to write an entry to the guest pagetables.
  * Returns 0 for failure (pointer not valid), 1 for success. */
@@ -1432,24 +1463,33 @@ static int mod_l1_entry(l1_pgentry_t *pl
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     unsigned long mfn;
+    struct page_info *l1pg = mfn_to_page(gl1mfn);
+    int rc = 1;
+
+    lock_page(l1pg);
 
     if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
-        return 0;
+        return unlock_page(l1pg), 0;
 
     if ( unlikely(paging_mode_refcounts(d)) )
-        return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
+    {
+        rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
+        unlock_page(l1pg);
+        return rc;
+    }
 
     if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
     {
         /* Translate foreign guest addresses. */
         mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e));
         if ( unlikely(mfn == INVALID_MFN) )
-            return 0;
+            return unlock_page(l1pg), 0;
         ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
         nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
 
         if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
         {
+            unlock_page(l1pg);
             MEM_LOG("Bad L1 flags %x",
                     l1e_get_flags(nl1e) & l1_disallow_mask(d));
             return 0;
@@ -1459,30 +1499,33 @@ static int mod_l1_entry(l1_pgentry_t *pl
         if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
         {
             adjust_guest_l1e(nl1e, d);
-            return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
-                                preserve_ad);
+            rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
+                              preserve_ad);
+            unlock_page(l1pg);
+            return rc;
         }
 
         if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
-            return 0;
+            return unlock_page(l1pg), 0;
         
         adjust_guest_l1e(nl1e, d);
         if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
                                     preserve_ad)) )
         {
-            put_page_from_l1e(nl1e, d);
-            return 0;
+            ol1e = nl1e;
+            rc = 0;
         }
     }
-    else
+    else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
+                                     preserve_ad)) )
     {
-        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
-                                    preserve_ad)) )
-            return 0;
+        unlock_page(l1pg);
+        return 0;
     }
 
+    unlock_page(l1pg);
     put_page_from_l1e(ol1e, d);
-    return 1;
+    return rc;
 }
 
 
@@ -1496,6 +1539,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
     l2_pgentry_t ol2e;
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
+    struct page_info *l2pg = mfn_to_page(pfn);
+    int rc = 1;
 
     if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
     {
@@ -1503,13 +1548,16 @@ static int mod_l2_entry(l2_pgentry_t *pl
         return 0;
     }
 
+    lock_page(l2pg);
+
     if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
-        return 0;
+        return unlock_page(l2pg), 0;
 
     if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
     {
         if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
         {
+            unlock_page(l2pg);
             MEM_LOG("Bad L2 flags %x",
                     l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
             return 0;
@@ -1519,28 +1567,32 @@ static int mod_l2_entry(l2_pgentry_t *pl
         if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) )
         {
             adjust_guest_l2e(nl2e, d);
-            return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
+            rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
+            unlock_page(l2pg);
+            return rc;
         }
 
         if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
-            return 0;
+            return unlock_page(l2pg), 0;
 
         adjust_guest_l2e(nl2e, d);
         if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
                                     preserve_ad)) )
         {
-            put_page_from_l2e(nl2e, pfn);
-            return 0;
+            ol2e = nl2e;
+            rc = 0;
         }
     }
     else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
                                      preserve_ad)) )
     {
+        unlock_page(l2pg);
         return 0;
     }
 
+    unlock_page(l2pg);
     put_page_from_l2e(ol2e, pfn);
-    return 1;
+    return rc;
 }
 
 #if CONFIG_PAGING_LEVELS >= 3
@@ -1554,7 +1606,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
     l3_pgentry_t ol3e;
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
-    int okay;
+    struct page_info *l3pg = mfn_to_page(pfn);
+    int okay, rc = 1;
 
     if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
     {
@@ -1571,13 +1624,16 @@ static int mod_l3_entry(l3_pgentry_t *pl
         return 0;
 #endif 
 
+    lock_page(l3pg);
+
     if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
-        return 0;
+        return unlock_page(l3pg), 0;
 
     if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
     {
         if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
         {
+            unlock_page(l3pg);
             MEM_LOG("Bad L3 flags %x",
                     l3e_get_flags(nl3e) & l3_disallow_mask(d));
             return 0;
@@ -1587,23 +1643,26 @@ static int mod_l3_entry(l3_pgentry_t *pl
         if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) )
         {
             adjust_guest_l3e(nl3e, d);
-            return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
+            rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
+            unlock_page(l3pg);
+            return rc;
         }
 
         if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
-            return 0;
+            return unlock_page(l3pg), 0;
 
         adjust_guest_l3e(nl3e, d);
         if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
                                     preserve_ad)) )
         {
-            put_page_from_l3e(nl3e, pfn);
-            return 0;
+            ol3e = nl3e;
+            rc = 0;
         }
     }
     else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
                                      preserve_ad)) )
     {
+        unlock_page(l3pg);
         return 0;
     }
 
@@ -1612,8 +1671,9 @@ static int mod_l3_entry(l3_pgentry_t *pl
 
     pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
 
+    unlock_page(l3pg);
     put_page_from_l3e(ol3e, pfn);
-    return 1;
+    return rc;
 }
 
 #endif
@@ -1629,6 +1689,8 @@ static int mod_l4_entry(l4_pgentry_t *pl
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     l4_pgentry_t ol4e;
+    struct page_info *l4pg = mfn_to_page(pfn);
+    int rc = 1;
 
     if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
     {
@@ -1636,13 +1698,16 @@ static int mod_l4_entry(l4_pgentry_t *pl
         return 0;
     }
 
+    lock_page(l4pg);
+
     if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
-        return 0;
+        return unlock_page(l4pg), 0;
 
     if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
     {
         if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
         {
+            unlock_page(l4pg);
             MEM_LOG("Bad L4 flags %x",
                     l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
             return 0;
@@ -1652,28 +1717,32 @@ static int mod_l4_entry(l4_pgentry_t *pl
         if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) )
         {
             adjust_guest_l4e(nl4e, d);
-            return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
+            rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
+            unlock_page(l4pg);
+            return rc;
         }
 
         if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) )
-            return 0;
+            return unlock_page(l4pg), 0;
 
         adjust_guest_l4e(nl4e, d);
         if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
                                     preserve_ad)) )
         {
-            put_page_from_l4e(nl4e, pfn);
-            return 0;
+            ol4e = nl4e;
+            rc = 0;
         }
     }
     else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
                                      preserve_ad)) )
     {
+        unlock_page(l4pg);
         return 0;
     }
 
+    unlock_page(l4pg);
     put_page_from_l4e(ol4e, pfn);
-    return 1;
+    return rc;
 }
 
 #endif
@@ -2492,8 +2561,6 @@ int do_mmu_update(
 
     domain_mmap_cache_init(&mapcache);
 
-    domain_lock(d);
-
     for ( i = 0; i < count; i++ )
     {
         if ( hypercall_preempt_check() )
@@ -2664,8 +2731,6 @@ int do_mmu_update(
 
     process_deferred_ops();
 
-    domain_unlock(d);
-
     domain_mmap_cache_destroy(&mapcache);
 
     perfc_add(num_page_updates, i);
@@ -2718,14 +2783,19 @@ static int create_grant_pte_mapping(
         goto failed;
     }
 
+    lock_page(page);
+
     ol1e = *(l1_pgentry_t *)va;
     if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) )
     {
+        unlock_page(page);
         put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
     } 
 
+    unlock_page(page);
+
     if ( !paging_mode_refcounts(d) )
         put_page_from_l1e(ol1e, d);
 
@@ -2769,16 +2839,14 @@ static int destroy_grant_pte_mapping(
         goto failed;
     }
 
-    if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
-    {
-        put_page_type(page);
-        rc = GNTST_general_error;
-        goto failed;
-    }
+    lock_page(page);
+
+    ol1e = *(l1_pgentry_t *)va;
     
     /* Check that the virtual address supplied is actually mapped to frame. */
     if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
     {
+        unlock_page(page);
         MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
                 (unsigned long)l1e_get_intpte(ol1e), addr, frame);
         put_page_type(page);
@@ -2793,12 +2861,14 @@ static int destroy_grant_pte_mapping(
                    d->vcpu[0] /* Change if we go to per-vcpu shadows. */,
                    0)) )
     {
+        unlock_page(page);
         MEM_LOG("Cannot delete PTE entry at %p", va);
         put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
     }
 
+    unlock_page(page);
     put_page_type(page);
 
  failed:
@@ -2814,6 +2884,7 @@ static int create_grant_va_mapping(
     l1_pgentry_t *pl1e, ol1e;
     struct domain *d = v->domain;
     unsigned long gl1mfn;
+    struct page_info *l1pg;
     int okay;
     
     ASSERT(domain_is_locked(d));
@@ -2826,8 +2897,11 @@ static int create_grant_va_mapping(
         MEM_LOG("Could not find L1 PTE for address %lx", va);
         return GNTST_general_error;
     }
+    l1pg = mfn_to_page(gl1mfn);
+    lock_page(l1pg);
     ol1e = *pl1e;
     okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0);
+    unlock_page(l1pg);
     guest_unmap_l1e(v, pl1e);
     pl1e = NULL;
 
@@ -2845,6 +2919,7 @@ static int replace_grant_va_mapping(
 {
     l1_pgentry_t *pl1e, ol1e;
     unsigned long gl1mfn;
+    struct page_info *l1pg;
     int rc = 0;
     
     pl1e = guest_map_l1e(v, addr, &gl1mfn);
@@ -2853,11 +2928,15 @@ static int replace_grant_va_mapping(
         MEM_LOG("Could not find L1 PTE for address %lx", addr);
         return GNTST_general_error;
     }
+
+    l1pg = mfn_to_page(gl1mfn);
+    lock_page(l1pg);
     ol1e = *pl1e;
 
     /* Check that the virtual address supplied is actually mapped to frame. */
     if ( unlikely(l1e_get_pfn(ol1e) != frame) )
     {
+        unlock_page(l1pg);
         MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
                 l1e_get_pfn(ol1e), addr, frame);
         rc = GNTST_general_error;
@@ -2867,11 +2946,14 @@ static int replace_grant_va_mapping(
     /* Delete pagetable entry. */
     if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
     {
+        unlock_page(l1pg);
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         rc = GNTST_general_error;
         goto out;
     }
 
+    unlock_page(l1pg);
+
  out:
     guest_unmap_l1e(v, pl1e);
     return rc;
@@ -2906,6 +2988,7 @@ int replace_grant_host_mapping(
     struct vcpu *curr = current;
     l1_pgentry_t *pl1e, ol1e;
     unsigned long gl1mfn;
+    struct page_info *l1pg;
     int rc;
     
     if ( flags & GNTMAP_contains_pte )
@@ -2927,16 +3010,21 @@ int replace_grant_host_mapping(
                 (unsigned long)new_addr);
         return GNTST_general_error;
     }
+
+    l1pg = mfn_to_page(gl1mfn);
+    lock_page(l1pg);
     ol1e = *pl1e;
 
     if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(),
                                 gl1mfn, curr, 0)) )
     {
+        unlock_page(l1pg);
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         guest_unmap_l1e(curr, pl1e);
         return GNTST_general_error;
     }
 
+    unlock_page(l1pg);
     guest_unmap_l1e(curr, pl1e);
 
     rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
@@ -3014,8 +3102,6 @@ int do_update_va_mapping(unsigned long v
     if ( rc )
         return rc;
 
-    domain_lock(d);
-
     pl1e = guest_map_l1e(v, va, &gl1mfn);
 
     if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) )
@@ -3027,8 +3113,6 @@ int do_update_va_mapping(unsigned long v
 
     process_deferred_ops();
 
-    domain_unlock(d);
-
     switch ( flags & UVMF_FLUSHTYPE_MASK )
     {
     case UVMF_TLB_FLUSH:
@@ -3669,8 +3753,6 @@ int ptwr_do_page_fault(struct vcpu *v, u
     struct ptwr_emulate_ctxt ptwr_ctxt;
     int rc;
 
-    domain_lock(d);
-
     /* Attempt to read the PTE that maps the VA being accessed. */
     guest_get_eff_l1e(v, addr, &pte);
     page = l1e_get_page(pte);
@@ -3690,16 +3772,16 @@ int ptwr_do_page_fault(struct vcpu *v, u
     ptwr_ctxt.cr2 = addr;
     ptwr_ctxt.pte = pte;
 
+    lock_page(page);
     rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
+    unlock_page(page);
     if ( rc == X86EMUL_UNHANDLEABLE )
         goto bail;
 
-    domain_unlock(d);
     perfc_incr(ptwr_emulations);
     return EXCRET_fault_fixed;
 
  bail:
-    domain_unlock(d);
     return 0;
 }
 
Index: 2008-04-15/xen/include/asm-x86/mm.h
===================================================================
--- 2008-04-15.orig/xen/include/asm-x86/mm.h    2008-04-15 08:48:06.000000000 
+0200
+++ 2008-04-15/xen/include/asm-x86/mm.h 2008-04-15 15:16:03.000000000 +0200
@@ -82,9 +82,12 @@ struct page_info
  /* PAE only: is this an L2 page directory containing Xen-private mappings? */
 #define _PGT_pae_xen_l2     26
 #define PGT_pae_xen_l2      (1U<<_PGT_pae_xen_l2)
+ /* The page is currently locked for modification. */
+#define _PGT_locked         25
+#define PGT_locked          (1U<<_PGT_locked)
 
- /* 26-bit count of uses of this frame as its current type. */
-#define PGT_count_mask      ((1U<<26)-1)
+ /* 25-bit count of uses of this frame as its current type. */
+#define PGT_count_mask      ((1U<<25)-1)
 
  /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated      31



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel