WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 6 of 9] Protect superpage splitting in implementation

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 6 of 9] Protect superpage splitting in implementation-dependent traversals
From: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Date: Thu, 27 Oct 2011 00:33:51 -0400
Cc: andres@xxxxxxxxxxxxxx, keir.xen@xxxxxxxxx, tim@xxxxxxx, olaf@xxxxxxxxx, adin@xxxxxxxxxxxxxx
Delivery-date: Thu, 27 Oct 2011 05:33:05 -0700
Dkim-signature: v=1; a=rsa-sha1; c=relaxed; d=lagarcavilla.org; h= content-type:mime-version:content-transfer-encoding:subject :message-id:in-reply-to:references:date:from:to:cc; s= lagarcavilla.org; bh=ZALpDu3Odio4eyiebPjZI4Bldtw=; b=Md0Mko+GnR+ XN9cxfP7JuALiluAxu9ET+m0hdi4OMkNh9hThlqMC31XGlcQ8YU2Sgj+3/L24/pM pqTvaaNX8kNd98bQLQaCTC/WBGdKZgh3OR63qLWDt8Khx0vccBG6pgJZ+im0tQf6 EVlVLBVBlJr3POjyY/6pWu0VwfKxgElM=
Domainkey-signature: a=rsa-sha1; c=nofws; d=lagarcavilla.org; h=content-type :mime-version:content-transfer-encoding:subject:message-id :in-reply-to:references:date:from:to:cc; q=dns; s= lagarcavilla.org; b=nesJcO0L4//rcmH5U++XHWAaBDv/bmb6M2yMIYjOF8gN cdlKydVIayjEMR8t4gS9DeX7+tumeiuq5RsbWdlttHPbXJHX2HPITG44zJQswZle SIf8c9rIrL6E3Rl8/gzTpK3WgUaqotEboFJEvYDTpbTz5O+GK+V4IXXmeF81FEc=
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <patchbomb.1319690025@xxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <patchbomb.1319690025@xxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mercurial-patchbomb/1.8.4
 xen/arch/x86/mm/p2m-ept.c  |  15 +++++++-
 xen/arch/x86/mm/p2m-lock.h |  11 +++++-
 xen/arch/x86/mm/p2m-pt.c   |  82 ++++++++++++++++++++++++++++++++++-----------
 3 files changed, 84 insertions(+), 24 deletions(-)


In both pt and ept modes, the p2m trees can map 1GB superpages.
Because our locks work on a 2MB superpage basis, without proper
locking we could have two simultaneous traversals to two different
2MB ranges split the 1GB superpage in a racy manner.

Fix this with the existing alloc_lock in the superpage structure.
We allow 1GB-grained locking for a future implementation -- we
just default to a global lock in all cases currently.

Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

diff -r a23e1262b124 -r 0a97d62c2d41 xen/arch/x86/mm/p2m-ept.c
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -163,6 +163,7 @@ static int ept_set_middle_entry(struct p
 }
 
 /* free ept sub tree behind an entry */
+/* Lock on this superpage (if any) held on entry */
 static void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int 
level)
 {
     /* End if the entry is a leaf entry. */
@@ -181,6 +182,7 @@ static void ept_free_entry(struct p2m_do
     p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn));
 }
 
+/* Lock on this superpage held on entry */
 static int ept_split_super_page(struct p2m_domain *p2m, ept_entry_t *ept_entry,
                                 int level, int target)
 {
@@ -315,6 +317,7 @@ ept_set_entry(struct p2m_domain *p2m, un
     int needs_sync = 1;
     struct domain *d = p2m->domain;
     ept_entry_t old_entry = { .epte = 0 };
+    p2m_lock_t *p2ml = p2m->lock;
 
     /*
      * the caller must make sure:
@@ -361,6 +364,8 @@ ept_set_entry(struct p2m_domain *p2m, un
      * with a leaf entry (a 1GiB or 2MiB page), and handle things 
appropriately.
      */
 
+    if ( target == 2 )
+        lock_p2m_1G(p2ml, index);
     if ( i == target )
     {
         /* We reached the target level. */
@@ -373,7 +378,7 @@ ept_set_entry(struct p2m_domain *p2m, un
         /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 
2MiB),
          * the intermediate tables will be freed below after the ept flush
          *
-         * Read-then-write is OK because we hold the p2m lock. */
+         * Read-then-write is OK because we hold the 1G or 2M lock. */
         old_entry = *ept_entry;
 
         if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
@@ -412,6 +417,8 @@ ept_set_entry(struct p2m_domain *p2m, un
         if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) )
         {
             ept_free_entry(p2m, &split_ept_entry, i);
+            if ( target == 2 )
+                unlock_p2m_1G(p2ml, index);
             goto out;
         }
 
@@ -440,7 +447,7 @@ ept_set_entry(struct p2m_domain *p2m, un
         /* the caller should take care of the previous page */
         new_entry.mfn = mfn_x(mfn);
 
-        /* Safe to read-then-write because we hold the p2m lock */
+        /* Safe to read-then-write because we hold the 1G or 2M lock */
         if ( ept_entry->mfn == new_entry.mfn )
              need_modify_vtd_table = 0;
 
@@ -448,6 +455,8 @@ ept_set_entry(struct p2m_domain *p2m, un
 
         atomic_write_ept_entry(ept_entry, new_entry);
     }
+    if ( target == 2 )
+        unlock_p2m_1G(p2ml, index);
 
     /* Track the highest gfn for which we have ever had a valid mapping */
     if ( mfn_valid(mfn_x(mfn)) &&
@@ -642,6 +651,8 @@ static ept_entry_t ept_get_entry_content
     return content;
 }
 
+/* This is called before crashing a domain, so we're not particularly 
+ * concerned with locking */
 void ept_walk_table(struct domain *d, unsigned long gfn)
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
diff -r a23e1262b124 -r 0a97d62c2d41 xen/arch/x86/mm/p2m-lock.h
--- a/xen/arch/x86/mm/p2m-lock.h
+++ b/xen/arch/x86/mm/p2m-lock.h
@@ -109,6 +109,11 @@ typedef struct __p2m_lock {
     mm_lock_t lock;
 } p2m_lock_t;
 
+/* We do not need sub-locking on 1G superpages because we have a 
+ * global lock */
+#define lock_p2m_1G(l, gfn)     ((void)l) 
+#define unlock_p2m_1G(l, gfn)   ((void)l)
+
 static inline int p2m_lock_init(struct p2m_domain *p2m)
 {
     p2m_lock_t *p2ml = xmalloc(p2m_lock_t);
@@ -271,7 +276,8 @@ typedef struct __p2m_lock
     /* To enforce ordering in mm-locks */
     int unlock_level;
     /* To protect on-demand allocation of locks 
-     * (yeah you heard that right) */
+     * (yeah you heard that right) 
+     * Also protects 1GB superpage splitting. */
     spinlock_t alloc_lock;
     /* Global lock */
     p2m_inner_lock_t global;
@@ -429,6 +435,9 @@ static inline void put_p2m_2m(struct p2m
     unlock_p2m_leaf(__get_2m_lock(p2m->lock, gfn_1g, gfn_2m));
 }
 
+#define lock_p2m_1G(l, gfn)     spin_lock(&l->alloc_lock)
+#define unlock_p2m_1G(l, gfn)   spin_unlock(&l->alloc_lock)
+
 /* Allocate 2M locks we may not have allocated yet for this 1G superpage */
 static inline int alloc_locks_2m(struct p2m_domain *p2m, unsigned long gfn_1g)
 {
diff -r a23e1262b124 -r 0a97d62c2d41 xen/arch/x86/mm/p2m-pt.c
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -159,6 +159,7 @@ p2m_next_level(struct p2m_domain *p2m, m
                unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
                u32 max, unsigned long type)
 {
+    p2m_lock_t *p2ml = p2m->lock;
     l1_pgentry_t *l1_entry;
     l1_pgentry_t *p2m_entry;
     l1_pgentry_t new_entry;
@@ -207,6 +208,7 @@ p2m_next_level(struct p2m_domain *p2m, m
     ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
 
     /* split 1GB pages into 2MB pages */
+    lock_p2m_1G(p2ml, *gfn_remainder >> shift);
     if ( type == PGT_l2_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
     {
         unsigned long flags, pfn;
@@ -214,7 +216,10 @@ p2m_next_level(struct p2m_domain *p2m, m
 
         pg = p2m_alloc_ptp(p2m, PGT_l2_page_table);
         if ( pg == NULL )
+        {
+            unlock_p2m_1G(p2ml, *gfn_remainder >> shift);
             return 0;
+        }
 
         flags = l1e_get_flags(*p2m_entry);
         pfn = l1e_get_pfn(*p2m_entry);
@@ -233,9 +238,11 @@ p2m_next_level(struct p2m_domain *p2m, m
         p2m_add_iommu_flags(&new_entry, 2, IOMMUF_readable|IOMMUF_writable);
         p2m->write_p2m_entry(p2m, gfn, p2m_entry, *table_mfn, new_entry, 3);
     }
-
+    unlock_p2m_1G(p2ml, *gfn_remainder >> shift);
 
     /* split single 2MB large page into 4KB page in P2M table */
+    /* This does not necessitate locking because 2MB regions are locked
+     * exclusively */
     if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
     {
         unsigned long flags, pfn;
@@ -297,6 +304,7 @@ p2m_set_entry(struct p2m_domain *p2m, un
                                    IOMMUF_readable|IOMMUF_writable:
                                    0; 
     unsigned long old_mfn = 0;
+    p2m_lock_t *p2ml = p2m->lock;
 
     if ( tb_init_done )
     {
@@ -326,7 +334,10 @@ p2m_set_entry(struct p2m_domain *p2m, un
      */
     if ( page_order == PAGE_ORDER_1G )
     {
-        l1_pgentry_t old_entry = l1e_empty();
+        l1_pgentry_t old_entry;
+        lock_p2m_1G(p2ml, l3_table_offset(gfn));
+
+        old_entry = l1e_empty();
         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                    L3_PAGETABLE_SHIFT - PAGE_SHIFT,
                                    L3_PAGETABLE_ENTRIES);
@@ -358,7 +369,9 @@ p2m_set_entry(struct p2m_domain *p2m, un
         /* Free old intermediate tables if necessary */
         if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
             p2m_free_entry(p2m, &old_entry, page_order);
+        unlock_p2m_1G(p2ml, l3_table_offset(gfn));
     }
+
     /*
      * When using PAE Xen, we only allow 33 bits of pseudo-physical
      * address in translated guests (i.e. 8 GBytes).  This restriction
@@ -515,6 +528,7 @@ static mfn_t p2m_gfn_to_mfn_current(stru
      * XXX Once we start explicitly registering MMIO regions in the p2m 
      * XXX we will return p2m_invalid for unmapped gfns */
 
+    p2m_lock_t *p2ml = p2m->lock;
     l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
     l2_pgentry_t l2e = l2e_empty();
     int ret;
@@ -543,6 +557,8 @@ pod_retry_l3:
             /* The read has succeeded, so we know that mapping exists */
             if ( q != p2m_query )
             {
+                /* We do not need to lock the 1G superpage here because PoD 
+                 * will do it by splitting */
                 if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) )
                     goto pod_retry_l3;
                 p2mt = p2m_invalid;
@@ -558,6 +574,7 @@ pod_retry_l3:
         goto pod_retry_l2;
     }
 
+    lock_p2m_1G(p2ml, l3_table_offset(addr));
     if ( l3e_get_flags(l3e) & _PAGE_PSE )
     {
         p2mt = p2m_flags_to_type(l3e_get_flags(l3e));
@@ -571,8 +588,12 @@ pod_retry_l3:
             
         if ( page_order )
             *page_order = PAGE_ORDER_1G;
+        unlock_p2m_1G(p2ml, l3_table_offset(addr));
         goto out;
     }
+    unlock_p2m_1G(p2ml, l3_table_offset(addr));
+#else
+    (void)p2ml; /* gcc ... */
 #endif
     /*
      * Read & process L2
@@ -691,6 +712,7 @@ p2m_gfn_to_mfn(struct p2m_domain *p2m, u
     paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
     l2_pgentry_t *l2e;
     l1_pgentry_t *l1e;
+    p2m_lock_t *p2ml = p2m->lock;
 
     ASSERT(paging_mode_translate(p2m->domain));
 
@@ -744,6 +766,8 @@ pod_retry_l3:
             {
                 if ( q != p2m_query )
                 {
+                    /* See previous comments on why there is no need to lock
+                     * 1GB superpage here */
                     if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) )
                         goto pod_retry_l3;
                 }
@@ -755,16 +779,23 @@ pod_retry_l3:
         }
         else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) )
         {
-            mfn = _mfn(l3e_get_pfn(*l3e) +
-                       l2_table_offset(addr) * L1_PAGETABLE_ENTRIES +
-                       l1_table_offset(addr));
-            *t = p2m_flags_to_type(l3e_get_flags(*l3e));
-            unmap_domain_page(l3e);
+            lock_p2m_1G(p2ml, l3_table_offset(addr));
+            /* Retry to be sure */
+            if ( (l3e_get_flags(*l3e) & _PAGE_PSE) )
+            {
+                mfn = _mfn(l3e_get_pfn(*l3e) +
+                           l2_table_offset(addr) * L1_PAGETABLE_ENTRIES +
+                           l1_table_offset(addr));
+                *t = p2m_flags_to_type(l3e_get_flags(*l3e));
+                unmap_domain_page(l3e);
 
-            ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
-            if ( page_order )
-                *page_order = PAGE_ORDER_1G;
-            return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+                ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+                if ( page_order )
+                    *page_order = PAGE_ORDER_1G;
+                unlock_p2m_1G(p2ml, l3_table_offset(addr));
+                return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+            }
+            unlock_p2m_1G(p2ml, l3_table_offset(addr));
         }
 
         mfn = _mfn(l3e_get_pfn(*l3e));
@@ -852,6 +883,7 @@ static void p2m_change_type_global(struc
     l4_pgentry_t *l4e;
     unsigned long i4;
 #endif /* CONFIG_PAGING_LEVELS == 4 */
+    p2m_lock_t *p2ml = p2m->lock;
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
     BUG_ON(ot != nt && (ot == p2m_mmio_direct || nt == p2m_mmio_direct));
@@ -891,17 +923,25 @@ static void p2m_change_type_global(struc
             }
             if ( (l3e_get_flags(l3e[i3]) & _PAGE_PSE) )
             {
-                flags = l3e_get_flags(l3e[i3]);
-                if ( p2m_flags_to_type(flags) != ot )
+                lock_p2m_1G(p2ml, i3);
+                if ( (l3e_get_flags(l3e[i3]) & _PAGE_PSE) )
+                {
+                    flags = l3e_get_flags(l3e[i3]);
+                    if ( p2m_flags_to_type(flags) != ot )
+                    {
+                        unlock_p2m_1G(p2ml, i3);
+                        continue;
+                    }
+                    mfn = l3e_get_pfn(l3e[i3]);
+                    gfn = get_gpfn_from_mfn(mfn);
+                    flags = p2m_type_to_flags(nt, _mfn(mfn));
+                    l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
+                    p2m->write_p2m_entry(p2m, gfn,
+                                         (l1_pgentry_t *)&l3e[i3],
+                                         l3mfn, l1e_content, 3);
+                    unlock_p2m_1G(p2ml, i3);
                     continue;
-                mfn = l3e_get_pfn(l3e[i3]);
-                gfn = get_gpfn_from_mfn(mfn);
-                flags = p2m_type_to_flags(nt, _mfn(mfn));
-                l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
-                p2m->write_p2m_entry(p2m, gfn,
-                                     (l1_pgentry_t *)&l3e[i3],
-                                     l3mfn, l1e_content, 3);
-                continue;
+                }
             }
 
             l2mfn = _mfn(l3e_get_pfn(l3e[i3]));

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>