[Xen-devel] [PATCH 8 of 9] Modify all internal p2m functions to

 xen/arch/x86/mm/hap/hap.c        |    2 +-
 xen/arch/x86/mm/hap/nested_hap.c |   21 ++-
 xen/arch/x86/mm/p2m-ept.c        |   26 +----
 xen/arch/x86/mm/p2m-pod.c        |   42 +++++--
 xen/arch/x86/mm/p2m-pt.c         |   20 +---
 xen/arch/x86/mm/p2m.c            |  185 ++++++++++++++++++++++++--------------
 xen/include/asm-ia64/mm.h        |    5 +
 xen/include/asm-x86/p2m.h        |   45 +++++++++-
 8 files changed, 217 insertions(+), 129 deletions(-)


This patch only modifies code internal to the p2m, adding convenience
macros, etc. It will yield a compiling code base but an incorrect
hypervisor (external callers of queries into the p2m will not unlock).
Next patch takes care of external callers, split done for the benefit
of conciseness.

Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -861,7 +861,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi
     old_flags = l1e_get_flags(*p);
 
     if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT) 
-         && !p2m_get_hostp2m(d)->defer_nested_flush ) {
+         && !atomic_read(&(p2m_get_hostp2m(d)->defer_nested_flush)) ) {
         /* We are replacing a valid entry so we need to flush nested p2ms,
          * unless the only change is an increase in access rights. */
         mfn_t omfn = _mfn(l1e_get_pfn(*p));
diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/hap/nested_hap.c
--- a/xen/arch/x86/mm/hap/nested_hap.c
+++ b/xen/arch/x86/mm/hap/nested_hap.c
@@ -105,8 +105,6 @@ nestedhap_fix_p2m(struct vcpu *v, struct
     ASSERT(p2m);
     ASSERT(p2m->set_entry);
 
-    p2m_lock(p2m);
-
     /* If this p2m table has been flushed or recycled under our feet, 
      * leave it alone.  We'll pick up the right one as we try to 
      * vmenter the guest. */
@@ -122,11 +120,13 @@ nestedhap_fix_p2m(struct vcpu *v, struct
         gfn = (L2_gpa >> PAGE_SHIFT) & mask;
         mfn = _mfn((L0_gpa >> PAGE_SHIFT) & mask);
 
+        /* Not bumping refcount of pages underneath because we're getting
+         * rid of whatever was there */
+        get_p2m(p2m, gfn, page_order);
         rv = set_p2m_entry(p2m, gfn, mfn, page_order, p2mt, p2ma);
+        put_p2m(p2m, gfn, page_order);
     }
 
-    p2m_unlock(p2m);
-
     if (rv == 0) {
         gdprintk(XENLOG_ERR,
                "failed to set entry for 0x%"PRIx64" -> 0x%"PRIx64"\n",
@@ -146,19 +146,26 @@ nestedhap_walk_L0_p2m(struct p2m_domain 
     mfn_t mfn;
     p2m_type_t p2mt;
     p2m_access_t p2ma;
+    int rc;
 
     /* walk L0 P2M table */
     mfn = gfn_to_mfn_type_p2m(p2m, L1_gpa >> PAGE_SHIFT, &p2mt, &p2ma, 
                               p2m_query, page_order);
 
+    rc = NESTEDHVM_PAGEFAULT_ERROR;
     if ( p2m_is_paging(p2mt) || p2m_is_shared(p2mt) || !p2m_is_ram(p2mt) )
-        return NESTEDHVM_PAGEFAULT_ERROR;
+        goto out;
 
+    rc = NESTEDHVM_PAGEFAULT_ERROR;
     if ( !mfn_valid(mfn) )
-        return NESTEDHVM_PAGEFAULT_ERROR;
+        goto out;
 
     *L0_gpa = (mfn_x(mfn) << PAGE_SHIFT) + (L1_gpa & ~PAGE_MASK);
-    return NESTEDHVM_PAGEFAULT_DONE;
+    rc = NESTEDHVM_PAGEFAULT_DONE;
+
+out:
+    drop_p2m_gfn(p2m, L1_gpa >> PAGE_SHIFT, mfn_x(mfn));
+    return rc;
 }
 
 /* This function uses L2_gpa to walk the P2M page table in L1. If the 
diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-ept.c
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -43,29 +43,16 @@
 #define is_epte_present(ept_entry)      ((ept_entry)->epte & 0x7)
 #define is_epte_superpage(ept_entry)    ((ept_entry)->sp)
 
-/* Non-ept "lock-and-check" wrapper */
+/* Ept-specific check wrapper */
 static int ept_pod_check_and_populate(struct p2m_domain *p2m, unsigned long 
gfn,
                                       ept_entry_t *entry, int order,
                                       p2m_query_t q)
 {
-    int r;
-
-    /* This is called from the p2m lookups, which can happen with or 
-     * without the lock hed. */
-    p2m_lock_recursive(p2m);
-
     /* Check to make sure this is still PoD */
     if ( entry->sa_p2mt != p2m_populate_on_demand )
-    {
-        p2m_unlock(p2m);
         return 0;
-    }
 
-    r = p2m_pod_demand_populate(p2m, gfn, order, q);
-
-    p2m_unlock(p2m);
-
-    return r;
+    return p2m_pod_demand_populate(p2m, gfn, order, q);
 }
 
 static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, 
p2m_access_t access)
@@ -265,9 +252,9 @@ static int ept_next_level(struct p2m_dom
 
     ept_entry = (*table) + index;
 
-    /* ept_next_level() is called (sometimes) without a lock.  Read
+    /* ept_next_level() is called (never) without a lock.  Read
      * the entry once, and act on the "cached" entry after that to
-     * avoid races. */
+     * avoid races. AAA */
     e = atomic_read_ept_entry(ept_entry);
 
     if ( !is_epte_present(&e) )
@@ -733,7 +720,8 @@ void ept_change_entry_emt_with_range(str
     int order = 0;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
-    p2m_lock(p2m);
+    /* This is a global operation, essentially */
+    get_p2m_global(p2m);
     for ( gfn = start_gfn; gfn <= end_gfn; gfn++ )
     {
         int level = 0;
@@ -773,7 +761,7 @@ void ept_change_entry_emt_with_range(str
                 ept_set_entry(p2m, gfn, mfn, order, e.sa_p2mt, e.access);
         }
     }
-    p2m_unlock(p2m);
+    put_p2m_global(p2m);
 }
 
 /*
diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-pod.c
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -102,8 +102,6 @@ p2m_pod_cache_add(struct p2m_domain *p2m
     }
 #endif
 
-    ASSERT(p2m_locked_by_me(p2m));
-
     /*
      * Pages from domain_alloc and returned by the balloon driver aren't
      * guaranteed to be zero; but by reclaiming zero pages, we implicitly
@@ -536,7 +534,7 @@ p2m_pod_decrease_reservation(struct doma
     {
         p2m_type_t t;
 
-        gfn_to_mfn_query(d, gpfn + i, &t);
+        gfn_to_mfn_query_unlocked(d, gpfn + i, &t);
 
         if ( t == p2m_populate_on_demand )
             pod++;
@@ -602,6 +600,7 @@ p2m_pod_decrease_reservation(struct doma
             nonpod--;
             ram--;
         }
+        drop_p2m_gfn(p2m, gpfn + i, mfn_x(mfn));
     }    
 
     /* If there are no more non-PoD entries, tell decrease_reservation() that
@@ -661,12 +660,15 @@ p2m_pod_zero_check_superpage(struct p2m_
     for ( i=0; i<SUPERPAGE_PAGES; i++ )
     {
         
-        mfn = gfn_to_mfn_query(d, gfn + i, &type);
-
         if ( i == 0 )
         {
+            /* Only lock the p2m entry the first time, that will lock 
+             * server for the whole superpage */
+            mfn = gfn_to_mfn_query(d, gfn + i, &type);
             mfn0 = mfn;
             type0 = type;
+        } else {
+            mfn = gfn_to_mfn_query_unlocked(d, gfn + i, &type);
         }
 
         /* Conditions that must be met for superpage-superpage:
@@ -773,6 +775,10 @@ out:
         p2m_pod_cache_add(p2m, mfn_to_page(mfn0), PAGE_ORDER_2M);
         p2m->pod.entry_count += SUPERPAGE_PAGES;
     }
+    
+    /* We got p2m locks once for the whole superpage, with the original
+     * mfn0. We drop it here. */
+    drop_p2m_gfn(p2m, gfn, mfn_x(mfn0));    
 }
 
 /* On entry, PoD lock is held */
@@ -894,6 +900,12 @@ p2m_pod_zero_check(struct p2m_domain *p2
             p2m->pod.entry_count++;
         }
     }
+
+    /* Drop all p2m locks and references */
+    for ( i=0; i<count; i++ )
+    {
+        drop_p2m_gfn(p2m, gfns[i], mfn_x(mfns[i]));
+    }
     
 }
 
@@ -928,7 +940,9 @@ p2m_pod_emergency_sweep_super(struct p2m
     p2m->pod.reclaim_super = i ? i - SUPERPAGE_PAGES : 0;
 }
 
-#define POD_SWEEP_STRIDE  16
+/* Note that spinlock recursion counters have 4 bits, so 16 or higher
+ * will overflow a single 2M spinlock in a zero check. */
+#define POD_SWEEP_STRIDE  15
 static void
 p2m_pod_emergency_sweep(struct p2m_domain *p2m)
 {
@@ -946,7 +960,7 @@ p2m_pod_emergency_sweep(struct p2m_domai
     /* FIXME: Figure out how to avoid superpages */
     for ( i=p2m->pod.reclaim_single; i > 0 ; i-- )
     {
-        gfn_to_mfn_query(p2m->domain, i, &t );
+        gfn_to_mfn_query_unlocked(p2m->domain, i, &t );
         if ( p2m_is_ram(t) )
         {
             gfns[j] = i;
@@ -974,6 +988,7 @@ p2m_pod_emergency_sweep(struct p2m_domai
 
 }
 
+/* The gfn and order need to be locked in the p2m before you walk in here */
 int
 p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn,
                         unsigned int order,
@@ -985,8 +1000,6 @@ p2m_pod_demand_populate(struct p2m_domai
     mfn_t mfn;
     int i;
 
-    ASSERT(p2m_locked_by_me(p2m));
-
     pod_lock(p2m);
     /* This check is done with the pod lock held.  This will make sure that
      * even if d->is_dying changes under our feet, p2m_pod_empty_cache() 
@@ -1008,8 +1021,6 @@ p2m_pod_demand_populate(struct p2m_domai
         set_p2m_entry(p2m, gfn_aligned, _mfn(0), PAGE_ORDER_2M,
                       p2m_populate_on_demand, p2m->default_access);
         audit_p2m(p2m, 1);
-        /* This is because the ept/pt caller locks the p2m recursively */
-        p2m_unlock(p2m);
         return 0;
     }
 
@@ -1132,7 +1143,9 @@ guest_physmap_mark_populate_on_demand(st
     if ( rc != 0 )
         return rc;
 
-    p2m_lock(p2m);
+    /* Pre-lock all the p2m entries. We don't take refs to the
+     * pages, because there shouldn't be any pages underneath. */
+    get_p2m(p2m, gfn, order);
     audit_p2m(p2m, 1);
 
     P2M_DEBUG("mark pod gfn=%#lx\n", gfn);
@@ -1140,7 +1153,8 @@ guest_physmap_mark_populate_on_demand(st
     /* Make sure all gpfns are unused */
     for ( i = 0; i < (1UL << order); i++ )
     {
-        omfn = gfn_to_mfn_query(d, gfn + i, &ot);
+        p2m_access_t a;
+        omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL);
         if ( p2m_is_ram(ot) )
         {
             printk("%s: gfn_to_mfn returned type %d!\n",
@@ -1169,9 +1183,9 @@ guest_physmap_mark_populate_on_demand(st
     }
 
     audit_p2m(p2m, 1);
-    p2m_unlock(p2m);
 
 out:
+    put_p2m(p2m, gfn, order);
     return rc;
 }
 
diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-pt.c
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -487,31 +487,16 @@ out:
 }
 
 
-/* Non-ept "lock-and-check" wrapper */
+/* PT-specific check wrapper */
 static int p2m_pod_check_and_populate(struct p2m_domain *p2m, unsigned long 
gfn,
                                       l1_pgentry_t *p2m_entry, int order,
                                       p2m_query_t q)
 {
-    int r;
-
-    /* This is called from the p2m lookups, which can happen with or 
-     * without the lock hed. */
-    p2m_lock_recursive(p2m);
-    audit_p2m(p2m, 1);
-
     /* Check to make sure this is still PoD */
     if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != 
p2m_populate_on_demand )
-    {
-        p2m_unlock(p2m);
         return 0;
-    }
 
-    r = p2m_pod_demand_populate(p2m, gfn, order, q);
-
-    audit_p2m(p2m, 1);
-    p2m_unlock(p2m);
-
-    return r;
+    return p2m_pod_demand_populate(p2m, gfn, order, q);
 }
 
 /* Read the current domain's p2m table (through the linear mapping). */
@@ -894,6 +879,7 @@ static void p2m_change_type_global(struc
     if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) == 0 )
         return;
 
+    /* Checks for exclusive lock */
     ASSERT(p2m_locked_by_me(p2m));
 
 #if CONFIG_PAGING_LEVELS == 4
diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -143,9 +143,9 @@ void p2m_change_entry_type_global(struct
                                   p2m_type_t ot, p2m_type_t nt)
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
-    p2m_lock(p2m);
+    get_p2m_global(p2m);
     p2m->change_entry_type_global(p2m, ot, nt);
-    p2m_unlock(p2m);
+    put_p2m_global(p2m);
 }
 
 mfn_t gfn_to_mfn_type_p2m(struct p2m_domain *p2m, unsigned long gfn,
@@ -162,12 +162,17 @@ mfn_t gfn_to_mfn_type_p2m(struct p2m_dom
         return _mfn(gfn);
     }
 
+    /* We take the lock for this single gfn. The caller has to put this lock */
+    get_p2m_gfn(p2m, gfn);
+
     mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);
 
 #ifdef __x86_64__
     if ( q == p2m_unshare && p2m_is_shared(*t) )
     {
         ASSERT(!p2m_is_nestedp2m(p2m));
+        /* p2m locking is recursive, so we won't deadlock going
+         * into the sharing code */
         mem_sharing_unshare_page(p2m->domain, gfn, 0);
         mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);
     }
@@ -179,13 +184,28 @@ mfn_t gfn_to_mfn_type_p2m(struct p2m_dom
         /* Return invalid_mfn to avoid caller's access */
         mfn = _mfn(INVALID_MFN);
         if (q == p2m_guest)
+        {
+            put_p2m_gfn(p2m, gfn);
             domain_crash(p2m->domain);
+        }
     }
 #endif
 
+    /* Take an extra reference to the page. It won't disappear beneath us */
+    if ( mfn_valid(mfn) )
+    {
+        /* Use this because we don't necessarily know who owns the page */
+        if ( !page_get_owner_and_reference(mfn_to_page(mfn)) )
+        {
+            mfn = _mfn(INVALID_MFN);
+        }
+    }
+
+    /* We leave holding the p2m lock for this gfn */
     return mfn;
 }
 
+/* Appropriate locks held on entry */
 int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, 
                   unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
 {
@@ -194,8 +214,6 @@ int set_p2m_entry(struct p2m_domain *p2m
     unsigned int order;
     int rc = 1;
 
-    ASSERT(p2m_locked_by_me(p2m));
-
     while ( todo )
     {
         if ( hap_enabled(d) )
@@ -217,6 +235,18 @@ int set_p2m_entry(struct p2m_domain *p2m
     return rc;
 }
 
+void drop_p2m_gfn(struct p2m_domain *p2m, unsigned long gfn, 
+                    unsigned long frame)
+{
+    mfn_t mfn = _mfn(frame);
+    /* For non-translated domains, locks are never taken */
+    if ( !p2m || !paging_mode_translate(p2m->domain) )
+        return;
+    if ( mfn_valid(mfn) )
+        put_page(mfn_to_page(mfn));
+    put_p2m_gfn(p2m, gfn);
+}
+
 struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type)
 {
     struct page_info *pg;
@@ -262,12 +292,12 @@ int p2m_alloc_table(struct p2m_domain *p
     unsigned long gfn = -1UL;
     struct domain *d = p2m->domain;
 
-    p2m_lock(p2m);
+    get_p2m_global(p2m);
 
     if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
     {
         P2M_ERROR("p2m already allocated for this domain\n");
-        p2m_unlock(p2m);
+        put_p2m_global(p2m);
         return -EINVAL;
     }
 
@@ -283,7 +313,7 @@ int p2m_alloc_table(struct p2m_domain *p
 
     if ( p2m_top == NULL )
     {
-        p2m_unlock(p2m);
+        put_p2m_global(p2m);
         return -ENOMEM;
     }
 
@@ -295,7 +325,7 @@ int p2m_alloc_table(struct p2m_domain *p
     P2M_PRINTK("populating p2m table\n");
 
     /* Initialise physmap tables for slot zero. Other code assumes this. */
-    p2m->defer_nested_flush = 1;
+    atomic_set(&p2m->defer_nested_flush, 1);
     if ( !set_p2m_entry(p2m, 0, _mfn(INVALID_MFN), 0,
                         p2m_invalid, p2m->default_access) )
         goto error;
@@ -323,10 +353,10 @@ int p2m_alloc_table(struct p2m_domain *p
         }
         spin_unlock(&p2m->domain->page_alloc_lock);
     }
-    p2m->defer_nested_flush = 0;
+    atomic_set(&p2m->defer_nested_flush, 0);
 
     P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
-    p2m_unlock(p2m);
+    put_p2m_global(p2m);
     return 0;
 
 error_unlock:
@@ -334,7 +364,7 @@ error_unlock:
  error:
     P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
                PRI_mfn "\n", gfn, mfn_x(mfn));
-    p2m_unlock(p2m);
+    put_p2m_global(p2m);
     return -ENOMEM;
 }
 
@@ -354,26 +384,28 @@ void p2m_teardown(struct p2m_domain *p2m
     if (p2m == NULL)
         return;
 
+    get_p2m_global(p2m);
+
 #ifdef __x86_64__
     for ( gfn=0; gfn < p2m->max_mapped_pfn; gfn++ )
     {
-        mfn = gfn_to_mfn_type_p2m(p2m, gfn, &t, &a, p2m_query, NULL);
+        mfn = p2m->get_entry(p2m, gfn, &t, &a, p2m_query, NULL);
         if ( mfn_valid(mfn) && (t == p2m_ram_shared) )
         {
             ASSERT(!p2m_is_nestedp2m(p2m));
+            /* The p2m allows an exclusive global holder to recursively
+             * lock sub-ranges. For this. */
             BUG_ON(mem_sharing_unshare_page(d, gfn, MEM_SHARING_DESTROY_GFN));
         }
 
     }
 #endif
 
-    p2m_lock(p2m);
-
     p2m->phys_table = pagetable_null();
 
     while ( (pg = page_list_remove_head(&p2m->pages)) )
         d->arch.paging.free_page(d, pg);
-    p2m_unlock(p2m);
+    put_p2m_global(p2m);
 }
 
 static void p2m_teardown_nestedp2m(struct domain *d)
@@ -401,6 +433,7 @@ void p2m_final_teardown(struct domain *d
 }
 
 
+/* Locks held on entry */
 static void
 p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn,
                 unsigned int page_order)
@@ -438,11 +471,11 @@ guest_physmap_remove_page(struct domain 
                           unsigned long mfn, unsigned int page_order)
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
-    p2m_lock(p2m);
+    get_p2m(p2m, gfn, page_order);
     audit_p2m(p2m, 1);
     p2m_remove_page(p2m, gfn, mfn, page_order);
     audit_p2m(p2m, 1);
-    p2m_unlock(p2m);
+    put_p2m(p2m, gfn, page_order);
 }
 
 int
@@ -480,7 +513,7 @@ guest_physmap_add_entry(struct domain *d
     if ( rc != 0 )
         return rc;
 
-    p2m_lock(p2m);
+    get_p2m(p2m, gfn, page_order);
     audit_p2m(p2m, 0);
 
     P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
@@ -488,12 +521,13 @@ guest_physmap_add_entry(struct domain *d
     /* First, remove m->p mappings for existing p->m mappings */
     for ( i = 0; i < (1UL << page_order); i++ )
     {
-        omfn = gfn_to_mfn_query(d, gfn + i, &ot);
+        p2m_access_t a;
+        omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL);
         if ( p2m_is_grant(ot) )
         {
             /* Really shouldn't be unmapping grant maps this way */
+            put_p2m(p2m, gfn, page_order);
             domain_crash(d);
-            p2m_unlock(p2m);
             return -EINVAL;
         }
         else if ( p2m_is_ram(ot) )
@@ -523,11 +557,12 @@ guest_physmap_add_entry(struct domain *d
             && (ogfn != INVALID_M2P_ENTRY)
             && (ogfn != gfn + i) )
         {
+            p2m_access_t a;
             /* This machine frame is already mapped at another physical
              * address */
             P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
                       mfn + i, ogfn, gfn + i);
-            omfn = gfn_to_mfn_query(d, ogfn, &ot);
+            omfn = p2m->get_entry(p2m, ogfn, &ot, &a, p2m_query, NULL);
             if ( p2m_is_ram(ot) )
             {
                 ASSERT(mfn_valid(omfn));
@@ -567,7 +602,7 @@ guest_physmap_add_entry(struct domain *d
     }
 
     audit_p2m(p2m, 1);
-    p2m_unlock(p2m);
+    put_p2m(p2m, gfn, page_order);
 
     return rc;
 }
@@ -579,18 +614,19 @@ p2m_type_t p2m_change_type(struct domain
                            p2m_type_t ot, p2m_type_t nt)
 {
     p2m_type_t pt;
+    p2m_access_t a;
     mfn_t mfn;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
 
-    p2m_lock(p2m);
+    get_p2m_gfn(p2m, gfn);
 
-    mfn = gfn_to_mfn_query(d, gfn, &pt);
+    mfn = p2m->get_entry(p2m, gfn, &pt, &a, p2m_query, NULL);
     if ( pt == ot )
         set_p2m_entry(p2m, gfn, mfn, 0, nt, p2m->default_access);
 
-    p2m_unlock(p2m);
+    put_p2m_gfn(p2m, gfn);
 
     return pt;
 }
@@ -608,20 +644,23 @@ void p2m_change_type_range(struct domain
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
 
-    p2m_lock(p2m);
-    p2m->defer_nested_flush = 1;
+    atomic_set(&p2m->defer_nested_flush, 1);
 
+    /* We've been given a number instead of an order, so lock each
+     * gfn individually */
     for ( gfn = start; gfn < end; gfn++ )
     {
-        mfn = gfn_to_mfn_query(d, gfn, &pt);
+        p2m_access_t a;
+        get_p2m_gfn(p2m, gfn);
+        mfn = p2m->get_entry(p2m, gfn, &pt, &a, p2m_query, NULL);
         if ( pt == ot )
             set_p2m_entry(p2m, gfn, mfn, 0, nt, p2m->default_access);
+        put_p2m_gfn(p2m, gfn);
     }
 
-    p2m->defer_nested_flush = 0;
+    atomic_set(&p2m->defer_nested_flush, 0);
     if ( nestedhvm_enabled(d) )
         p2m_flush_nestedp2m(d);
-    p2m_unlock(p2m);
 }
 
 
@@ -631,17 +670,18 @@ set_mmio_p2m_entry(struct domain *d, uns
 {
     int rc = 0;
     p2m_type_t ot;
+    p2m_access_t a;
     mfn_t omfn;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     if ( !paging_mode_translate(d) )
         return 0;
 
-    p2m_lock(p2m);
-    omfn = gfn_to_mfn_query(d, gfn, &ot);
+    get_p2m_gfn(p2m, gfn);
+    omfn = p2m->get_entry(p2m, gfn, &ot, &a, p2m_query, NULL);
     if ( p2m_is_grant(ot) )
     {
-        p2m_unlock(p2m);
+        put_p2m_gfn(p2m, gfn);
         domain_crash(d);
         return 0;
     }
@@ -654,11 +694,11 @@ set_mmio_p2m_entry(struct domain *d, uns
     P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
     rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_mmio_direct, p2m->default_access);
     audit_p2m(p2m, 1);
-    p2m_unlock(p2m);
+    put_p2m_gfn(p2m, gfn);
     if ( 0 == rc )
         gdprintk(XENLOG_ERR,
             "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
-            mfn_x(gfn_to_mfn_query(d, gfn, &ot)));
+            mfn_x(gfn_to_mfn_query_unlocked(d, gfn, &ot)));
     return rc;
 }
 
@@ -668,13 +708,14 @@ clear_mmio_p2m_entry(struct domain *d, u
     int rc = 0;
     mfn_t mfn;
     p2m_type_t t;
+    p2m_access_t a;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     if ( !paging_mode_translate(d) )
         return 0;
 
-    p2m_lock(p2m);
-    mfn = gfn_to_mfn_query(d, gfn, &t);
+    get_p2m_gfn(p2m, gfn);
+    mfn = p2m->get_entry(p2m, gfn, &t, &a, p2m_query, NULL);
 
     /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
     if ( (INVALID_MFN == mfn_x(mfn)) || (t != p2m_mmio_direct) )
@@ -687,8 +728,7 @@ clear_mmio_p2m_entry(struct domain *d, u
     audit_p2m(p2m, 1);
 
 out:
-    p2m_unlock(p2m);
-
+    put_p2m_gfn(p2m, gfn);
     return rc;
 }
 
@@ -698,13 +738,14 @@ set_shared_p2m_entry(struct domain *d, u
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     int rc = 0;
     p2m_type_t ot;
+    p2m_access_t a;
     mfn_t omfn;
 
     if ( !paging_mode_translate(p2m->domain) )
         return 0;
 
-    p2m_lock(p2m);
-    omfn = gfn_to_mfn_query(p2m->domain, gfn, &ot);
+    get_p2m_gfn(p2m, gfn);
+    omfn = p2m->get_entry(p2m, gfn, &ot, &a, p2m_query, NULL);
     /* At the moment we only allow p2m change if gfn has already been made
      * sharable first */
     ASSERT(p2m_is_shared(ot));
@@ -714,11 +755,11 @@ set_shared_p2m_entry(struct domain *d, u
 
     P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn));
     rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_shared, p2m->default_access);
-    p2m_unlock(p2m);
+    put_p2m_gfn(p2m, gfn);
     if ( 0 == rc )
         gdprintk(XENLOG_ERR,
             "set_shared_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
-            mfn_x(gfn_to_mfn_query(d, gfn, &ot)));
+            mfn_x(gfn_to_mfn_query_unlocked(d, gfn, &ot)));
     return rc;
 }
 
@@ -732,7 +773,7 @@ int p2m_mem_paging_nominate(struct domai
     mfn_t mfn;
     int ret;
 
-    p2m_lock(p2m);
+    get_p2m_gfn(p2m, gfn);
 
     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
 
@@ -765,7 +806,7 @@ int p2m_mem_paging_nominate(struct domai
     ret = 0;
 
  out:
-    p2m_unlock(p2m);
+    put_p2m_gfn(p2m, gfn);
     return ret;
 }
 
@@ -778,7 +819,7 @@ int p2m_mem_paging_evict(struct domain *
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     int ret = -EINVAL;
 
-    p2m_lock(p2m);
+    get_p2m_gfn(p2m, gfn);
 
     /* Get mfn */
     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
@@ -824,7 +865,7 @@ int p2m_mem_paging_evict(struct domain *
     put_page(page);
 
  out:
-    p2m_unlock(p2m);
+    put_p2m_gfn(p2m, gfn);
     return ret;
 }
 
@@ -863,7 +904,7 @@ void p2m_mem_paging_populate(struct doma
     req.type = MEM_EVENT_TYPE_PAGING;
 
     /* Fix p2m mapping */
-    p2m_lock(p2m);
+    get_p2m_gfn(p2m, gfn);
     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
     /* Allow only nominated or evicted pages to enter page-in path */
     if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
@@ -875,7 +916,7 @@ void p2m_mem_paging_populate(struct doma
         set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a);
         audit_p2m(p2m, 1);
     }
-    p2m_unlock(p2m);
+    put_p2m_gfn(p2m, gfn);
 
     /* Pause domain if request came from guest and gfn has paging type */
     if (  p2m_is_paging(p2mt) && v->domain->domain_id == d->domain_id )
@@ -908,7 +949,7 @@ int p2m_mem_paging_prep(struct domain *d
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     int ret = -ENOMEM;
 
-    p2m_lock(p2m);
+    get_p2m_gfn(p2m, gfn);
 
     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
 
@@ -931,7 +972,7 @@ int p2m_mem_paging_prep(struct domain *d
     ret = 0;
 
  out:
-    p2m_unlock(p2m);
+    put_p2m_gfn(p2m, gfn);
     return ret;
 }
 
@@ -949,12 +990,12 @@ void p2m_mem_paging_resume(struct domain
     /* Fix p2m entry if the page was not dropped */
     if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
     {
-        p2m_lock(p2m);
+        get_p2m_gfn(p2m, rsp.gfn);
         mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query, NULL);
         set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a);
         set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
         audit_p2m(p2m, 1);
-        p2m_unlock(p2m);
+        put_p2m_gfn(p2m, rsp.gfn);
     }
 
     /* Unpause domain */
@@ -979,16 +1020,16 @@ void p2m_mem_access_check(unsigned long 
     p2m_access_t p2ma;
     
     /* First, handle rx2rw conversion automatically */
-    p2m_lock(p2m);
+    get_p2m_gfn(p2m, gfn);
     mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, p2m_query, NULL);
 
     if ( access_w && p2ma == p2m_access_rx2rw ) 
     {
         p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rw);
-        p2m_unlock(p2m);
+        put_p2m_gfn(p2m, gfn);
         return;
     }
-    p2m_unlock(p2m);
+    put_p2m_gfn(p2m, gfn);
 
     /* Otherwise, check if there is a memory event listener, and send the 
message along */
     res = mem_event_check_ring(d, &d->mem_access);
@@ -1006,9 +1047,9 @@ void p2m_mem_access_check(unsigned long 
         else
         {
             /* A listener is not required, so clear the access restrictions */
-            p2m_lock(p2m);
+            get_p2m_gfn(p2m, gfn);
             p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rwx);
-            p2m_unlock(p2m);
+            put_p2m_gfn(p2m, gfn);
         }
 
         return;
@@ -1064,7 +1105,7 @@ int p2m_set_mem_access(struct domain *d,
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     unsigned long pfn;
-    p2m_access_t a;
+    p2m_access_t a, _a;
     p2m_type_t t;
     mfn_t mfn;
     int rc = 0;
@@ -1095,17 +1136,20 @@ int p2m_set_mem_access(struct domain *d,
         return 0;
     }
 
-    p2m_lock(p2m);
+    /* Because we don't get an order, rather a number, we need to lock each
+     * entry individually */
     for ( pfn = start_pfn; pfn < start_pfn + nr; pfn++ )
     {
-        mfn = gfn_to_mfn_query(d, pfn, &t);
+        get_p2m_gfn(p2m, pfn);
+        mfn = p2m->get_entry(p2m, pfn, &t, &_a, p2m_query, NULL);
         if ( p2m->set_entry(p2m, pfn, mfn, PAGE_ORDER_4K, t, a) == 0 )
         {
+            put_p2m_gfn(p2m, pfn);
             rc = -ENOMEM;
             break;
         }
+        put_p2m_gfn(p2m, pfn);
     }
-    p2m_unlock(p2m);
     return rc;
 }
 
@@ -1138,7 +1182,10 @@ int p2m_get_mem_access(struct domain *d,
         return 0;
     }
 
+    get_p2m_gfn(p2m, pfn);
     mfn = p2m->get_entry(p2m, pfn, &t, &a, p2m_query, NULL);
+    put_p2m_gfn(p2m, pfn);
+
     if ( mfn_x(mfn) == INVALID_MFN )
         return -ESRCH;
     
@@ -1175,7 +1222,7 @@ p2m_flush_table(struct p2m_domain *p2m)
     struct domain *d = p2m->domain;
     void *p;
 
-    p2m_lock(p2m);
+    get_p2m_global(p2m);
 
     /* "Host" p2m tables can have shared entries &c that need a bit more 
      * care when discarding them */
@@ -1203,7 +1250,7 @@ p2m_flush_table(struct p2m_domain *p2m)
             d->arch.paging.free_page(d, pg);
     page_list_add(top, &p2m->pages);
 
-    p2m_unlock(p2m);
+    put_p2m_global(p2m);
 }
 
 void
@@ -1245,7 +1292,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
     p2m = nv->nv_p2m;
     if ( p2m ) 
     {
-        p2m_lock(p2m);
+        get_p2m_global(p2m);
         if ( p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR )
         {
             nv->nv_flushp2m = 0;
@@ -1255,24 +1302,24 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
                 hvm_asid_flush_vcpu(v);
             p2m->cr3 = cr3;
             cpu_set(v->processor, p2m->p2m_dirty_cpumask);
-            p2m_unlock(p2m);
+            put_p2m_global(p2m);
             nestedp2m_unlock(d);
             return p2m;
         }
-        p2m_unlock(p2m);
+        put_p2m_global(p2m);
     }
 
     /* All p2m's are or were in use. Take the least recent used one,
      * flush it and reuse. */
     p2m = p2m_getlru_nestedp2m(d, NULL);
     p2m_flush_table(p2m);
-    p2m_lock(p2m);
+    get_p2m_global(p2m);
     nv->nv_p2m = p2m;
     p2m->cr3 = cr3;
     nv->nv_flushp2m = 0;
     hvm_asid_flush_vcpu(v);
     cpu_set(v->processor, p2m->p2m_dirty_cpumask);
-    p2m_unlock(p2m);
+    put_p2m_global(p2m);
     nestedp2m_unlock(d);
 
     return p2m;
diff -r 8a98179666de -r 471d4f2754d6 xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h
+++ b/xen/include/asm-ia64/mm.h
@@ -561,6 +561,11 @@ extern u64 translate_domain_pte(u64 ptev
     ((get_gpfn_from_mfn((madr) >> PAGE_SHIFT) << PAGE_SHIFT) | \
     ((madr) & ~PAGE_MASK))
 
+/* Because x86-specific p2m fine-grained lock releases are called from common
+ * code, we put a dummy placeholder here */
+#define drop_p2m_gfn(p, g, m)           ((void)0)
+#define drop_p2m_gfn_domain(p, g, m)    ((void)0)
+
 /* Internal use only: returns 0 in case of bad address.  */
 extern unsigned long paddr_to_maddr(unsigned long paddr);
 
diff -r 8a98179666de -r 471d4f2754d6 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -220,7 +220,7 @@ struct p2m_domain {
      * tables on every host-p2m change.  The setter of this flag 
      * is responsible for performing the full flush before releasing the
      * host p2m's lock. */
-    int                defer_nested_flush;
+    atomic_t           defer_nested_flush;
 
     /* Pages used to construct the p2m */
     struct page_list_head pages;
@@ -298,6 +298,15 @@ struct p2m_domain *p2m_get_p2m(struct vc
 #define p2m_get_pagetable(p2m)  ((p2m)->phys_table)
 
 
+/* No matter what value you get out of a query, the p2m has been locked for
+ * that range. No matter what you do, you need to drop those locks.
+ * You need to pass back the mfn obtained when locking, not the new one,
+ * as the refcount of the original mfn was bumped. */
+void drop_p2m_gfn(struct p2m_domain *p2m, unsigned long gfn, 
+                        unsigned long mfn);
+#define drop_p2m_gfn_domain(d, g, m)    \
+        drop_p2m_gfn(p2m_get_hostp2m((d)), (g), (m))
+
 /* Read a particular P2M table, mapping pages as we go.  Most callers
  * should _not_ call this directly; use the other gfn_to_mfn_* functions
  * below unless you know you want to walk a p2m that isn't a domain's
@@ -327,6 +336,28 @@ static inline mfn_t gfn_to_mfn_type(stru
 #define gfn_to_mfn_guest(d, g, t)   gfn_to_mfn_type((d), (g), (t), p2m_guest)
 #define gfn_to_mfn_unshare(d, g, t) gfn_to_mfn_type((d), (g), (t), p2m_unshare)
 
+/* This one applies to very specific situations in which you're querying
+ * a p2m entry and will be done "immediately" (such as a printk or computing a 
+ * return value). Use this only if there are no expectations of the p2m entry
+ * holding steady. */
+static inline mfn_t gfn_to_mfn_type_unlocked(struct domain *d,
+                                        unsigned long gfn, p2m_type_t *t,
+                                        p2m_query_t q)
+{
+    mfn_t mfn = gfn_to_mfn_type(d, gfn, t, q);
+    drop_p2m_gfn_domain(d, gfn, mfn_x(mfn));
+    return mfn;
+}
+
+#define gfn_to_mfn_unlocked(d, g, t)            \
+    gfn_to_mfn_type_unlocked((d), (g), (t), p2m_alloc)
+#define gfn_to_mfn_query_unlocked(d, g, t)    \
+    gfn_to_mfn_type_unlocked((d), (g), (t), p2m_query)
+#define gfn_to_mfn_guest_unlocked(d, g, t)    \
+    gfn_to_mfn_type_unlocked((d), (g), (t), p2m_guest)
+#define gfn_to_mfn_unshare_unlocked(d, g, t)    \
+    gfn_to_mfn_type_unlocked((d), (g), (t), p2m_unshare)
+
 /* Compatibility function exporting the old untyped interface */
 static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
 {
@@ -338,6 +369,15 @@ static inline unsigned long gmfn_to_mfn(
     return INVALID_MFN;
 }
 
+/* Same comments apply re unlocking */
+static inline unsigned long gmfn_to_mfn_unlocked(struct domain *d,
+                                                 unsigned long gpfn)
+{
+    unsigned long mfn = gmfn_to_mfn(d, gpfn);
+    drop_p2m_gfn_domain(d, gpfn, mfn);
+    return mfn;
+}
+
 /* General conversion function from mfn to gfn */
 static inline unsigned long mfn_to_gfn(struct domain *d, mfn_t mfn)
 {
@@ -521,7 +561,8 @@ static inline int p2m_gfn_check_limit(
 #define p2m_gfn_check_limit(d, g, o) 0
 #endif
 
-/* Directly set a p2m entry: only for use by p2m code */
+/* Directly set a p2m entry: only for use by p2m code. It expects locks to 
+ * be held on entry */
 int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, 
                   unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma);
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
WARNING - OLD ARCHIVES

xen-devel

[Xen-devel] [PATCH 8 of 9] Modify all internal p2m functions to use the