WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] VMX: fix ept pages free up when ept superpage split

To: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] VMX: fix ept pages free up when ept superpage split fails.
From: "Li, Xin" <xin.li@xxxxxxxxx>
Date: Sat, 3 Jul 2010 13:37:01 +0800
Accept-language: zh-CN, en-US
Acceptlanguage: zh-CN, en-US
Delivery-date: Fri, 02 Jul 2010 22:38:16 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: AcsaccLPVpfp/45HQ1SYN6gRMYJaRg==
Thread-topic: [PATCH] VMX: fix ept pages free up when ept superpage split fails.
VMX: fix ept pages free up when ept superpage split fails:
1) implement ept super page split in a recursive way to
   form an ept sub tree before real installation;
2) free an ept sub tree also in a recursive way.
3) change ept_next_level last input parameter from shift
   bits # to next walk level;

signed-off-by: Xin Li <xin.li@xxxxxxxxx>

diff -r f483b5ce7be2 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Fri Jul 02 19:04:57 2010 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Sat Jul 03 21:29:56 2010 +0800
@@ -118,6 +118,74 @@
     return 1;
 }
 
+/* free ept sub tree behind an entry */
+void ept_free_entry(struct domain *d, ept_entry_t *ept_entry, int level)
+{
+    /* End if the entry is a leaf entry. */
+    if ( level == 0 || !is_epte_present(ept_entry) ||
+         is_epte_superpage(ept_entry) )
+        return;
+
+    if ( level > 1 )
+    {
+        ept_entry_t *epte = map_domain_page(ept_entry->mfn);
+        for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+            ept_free_entry(d, epte + i, level - 1);
+        unmap_domain_page(epte);
+    }
+
+    d->arch.p2m->free_page(d, mfn_to_page(ept_entry->mfn));
+}
+
+static int ept_split_super_page(struct domain *d, ept_entry_t *ept_entry,
+                                int level, int target)
+{
+    ept_entry_t new_ept, *table;
+    uint64_t trunk;
+    int rv = 1;
+
+    /* End if the entry is a leaf entry or reaches the target level. */
+    if ( level == 0 || level == target )
+        return rv;
+
+    ASSERT(is_epte_superpage(ept_entry));
+
+    if ( !ept_set_middle_entry(d, &new_ept) )
+        return 0;
+
+    table = map_domain_page(new_ept.mfn);
+    trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER);
+
+    for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+    {
+        ept_entry_t *epte = table + i;
+
+        epte->emt = ept_entry->emt;
+        epte->ipat = ept_entry->ipat;
+        epte->sp = (level > 1) ? 1 : 0;
+        epte->avail1 = ept_entry->avail1;
+        epte->avail2 = 0;
+        epte->mfn = ept_entry->mfn + i * trunk;
+
+        ept_p2m_type_to_flags(epte, epte->avail1);
+
+        if ( (level - 1) == target )
+            continue;
+
+        ASSERT(is_epte_superpage(epte));
+
+        if ( !(rv = ept_split_super_page(d, epte, level - 1, target)) )
+            break;
+    }
+
+    unmap_domain_page(table);
+
+    /* Even failed we should install the newly allocated ept page. */
+    *ept_entry = new_ept;
+
+    return rv;
+}
+
 /* Take the currently mapped table, find the corresponding gfn entry,
  * and map the next table, if available.  If the entry is empty
  * and read_only is set, 
@@ -134,14 +202,18 @@
  */
 static int ept_next_level(struct domain *d, bool_t read_only,
                           ept_entry_t **table, unsigned long *gfn_remainder,
-                          u32 shift)
+                          int next_level)
 {
     ept_entry_t *ept_entry;
-    ept_entry_t *next;
-    u32 index;
+    u32 shift, index;
+
+    shift = next_level * EPT_TABLE_ORDER;
 
     index = *gfn_remainder >> shift;
 
+    /* index must be falling into the page */
+    ASSERT(index < EPT_PAGETABLE_ENTRIES);
+
     ept_entry = (*table) + index;
 
     if ( !is_epte_present(ept_entry) )
@@ -161,69 +233,15 @@
         return GUEST_TABLE_SUPER_PAGE;
     else
     {
+        unsigned long mfn = ept_entry->mfn;
+
+        unmap_domain_page(*table);
+        *table = map_domain_page(mfn);
         *gfn_remainder &= (1UL << shift) - 1;
-        next = map_domain_page(ept_entry->mfn);
-        unmap_domain_page(*table);
-        *table = next;
         return GUEST_TABLE_NORMAL_PAGE;
     }
 }
 
-/* It's super page before and we should break down it now. */
-static int ept_split_large_page(struct domain *d,
-                                ept_entry_t **table, u32 *index,
-                                unsigned long gfn, int level)
-{
-    ept_entry_t *prev_table = *table;
-    ept_entry_t *split_table = NULL;
-    ept_entry_t *split_entry = NULL;
-    ept_entry_t *ept_entry = (*table) + (*index);
-    ept_entry_t temp_ept_entry;
-    unsigned long s_gfn, s_mfn;
-    unsigned long offset, trunk;
-    int i;
-
-    /* alloc new page for new ept middle level entry which is
-     * before a leaf super entry
-     */
-
-    if ( !ept_set_middle_entry(d, &temp_ept_entry) )
-        return 0;
-
-    /* split the super page to small next level pages */
-    split_table = map_domain_page(temp_ept_entry.mfn);
-    offset = gfn & ((1UL << (level * EPT_TABLE_ORDER)) - 1);
-    trunk = (1UL << ((level-1) * EPT_TABLE_ORDER));
-
-    for ( i = 0; i < (1UL << EPT_TABLE_ORDER); i++ )
-    {
-        s_gfn = gfn - offset + i * trunk;
-        s_mfn = ept_entry->mfn + i * trunk;
-
-        split_entry = split_table + i;
-        split_entry->emt = ept_entry->emt;
-        split_entry->ipat = ept_entry->ipat;
-
-        split_entry->sp = (level > 1) ? 1 : 0;
-
-        split_entry->mfn = s_mfn;
-
-        split_entry->avail1 = ept_entry->avail1;
-        split_entry->avail2 = 0;
-        /* last step */
-        split_entry->r = split_entry->w = split_entry->x = 1;
-        ept_p2m_type_to_flags(split_entry, ept_entry->avail1);
-    }
-
-    *ept_entry = temp_ept_entry;
-    
-    *index = offset / trunk;
-    *table = split_table;
-    unmap_domain_page(prev_table);
-
-    return 1;
-}
-
 /*
  * ept_set_entry() computes 'need_modify_vtd_table' for itself,
  * by observing whether any gfn->mfn translations are modified.
@@ -265,7 +283,7 @@
 
     for ( i = ept_get_wl(d); i > target; i-- )
     {
-        ret = ept_next_level(d, 0, &table, &gfn_remainder, i * 
EPT_TABLE_ORDER);
+        ret = ept_next_level(d, 0, &table, &gfn_remainder, i);
         if ( !ret )
             goto out;
         else if ( ret != GUEST_TABLE_NORMAL_PAGE )
@@ -275,12 +293,10 @@
     ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target);
 
     index = gfn_remainder >> (i * EPT_TABLE_ORDER);
-    gfn_remainder &= (1UL << (i * EPT_TABLE_ORDER)) - 1;
+    offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
 
     ept_entry = table + index;
 
-    offset = gfn_remainder;
-
     /*
      * When we are here, we must be on a leaf ept entry
      * with i == target or i > target.
@@ -301,15 +317,14 @@
                                                 direct_mmio);
             ept_entry->ipat = ipat;
             ept_entry->sp = order ? 1 : 0;
+            ept_entry->avail1 = p2mt;
+            ept_entry->avail2 = 0;
 
             if ( ept_entry->mfn == mfn_x(mfn) )
                 need_modify_vtd_table = 0;
             else
                 ept_entry->mfn = mfn_x(mfn);
 
-            ept_entry->avail1 = p2mt;
-            ept_entry->avail2 = 0;
-
             ept_p2m_type_to_flags(ept_entry, p2mt);
         }
         else
@@ -318,33 +333,50 @@
     else
     {
         /* We need to split the original page. */
-        ept_entry_t *split_ept_entry;
+        ept_entry_t split_ept_entry;
 
         ASSERT(is_epte_superpage(ept_entry));
 
-        for ( ; i > target; i-- )
+        split_ept_entry = *ept_entry;
+
+        if ( !ept_split_super_page(d, &split_ept_entry, i, target) )
         {
-            rv = ept_split_large_page(d, &table, &index, gfn, i);
-            if ( !rv )
-                goto out;
+            ept_free_entry(d, &split_ept_entry, i);
+            goto out;
         }
 
-        split_ept_entry = table + index;
-        split_ept_entry->avail1 = p2mt;
-        ept_p2m_type_to_flags(split_ept_entry, p2mt);
-        split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat,
-                                                  direct_mmio);
-        split_ept_entry->ipat = ipat;
+        /* now install the newly split ept sub-tree */
+        /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
+        *ept_entry = split_ept_entry;
 
-        if ( split_ept_entry->mfn == mfn_x(mfn) )
-            need_modify_vtd_table = 0;
-        else
-            split_ept_entry->mfn = mfn_x(mfn);
+        /* then move to the level we want to make real changes */
+        for ( ; i > target; i-- )
+            ept_next_level(d, 0, &table, &gfn_remainder, i);
+
+        ASSERT(i == target);
+
+        index = gfn_remainder >> (i * EPT_TABLE_ORDER);
+        offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
+
+        ept_entry = table + index;
+
+        ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio);
+        ept_entry->ipat = ipat;
+        ept_entry->sp = i ? 1 : 0;
+        ept_entry->avail1 = p2mt;
+        ept_entry->avail2 = 0;
+
+        if ( ept_entry->mfn == mfn_x(mfn) )
+             need_modify_vtd_table = 0;
+        else /* the caller should take care of the previous page */
+            ept_entry->mfn = mfn_x(mfn);
+
+        ept_p2m_type_to_flags(ept_entry, p2mt);
     }
 
     /* Track the highest gfn for which we have ever had a valid mapping */
-    if ( mfn_valid(mfn_x(mfn))
-         && (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
+    if ( mfn_valid(mfn_x(mfn)) &&
+         (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
         d->arch.p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
 
     /* Success */
@@ -366,11 +398,11 @@
                 for ( i = 0; i < (1 << order); i++ )
                     iommu_map_page(
                         d, gfn - offset + i, mfn_x(mfn) - offset + i,
-                        IOMMUF_readable|IOMMUF_writable);
+                        IOMMUF_readable | IOMMUF_writable);
             }
             else if ( !order )
                 iommu_map_page(
-                    d, gfn, mfn_x(mfn), IOMMUF_readable|IOMMUF_writable);
+                    d, gfn, mfn_x(mfn), IOMMUF_readable | IOMMUF_writable);
         }
         else
         {
@@ -410,8 +442,7 @@
     for ( i = ept_get_wl(d); i > 0; i-- )
     {
     retry:
-        ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                             i * EPT_TABLE_ORDER);
+        ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
         if ( !ret )
             goto out;
         else if ( ret == GUEST_TABLE_POD_PAGE )
@@ -498,8 +529,7 @@
 
     for ( i = ept_get_wl(d); i > 0; i-- )
     {
-        ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                             i * EPT_TABLE_ORDER);
+        ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
         if ( !ret || ret == GUEST_TABLE_POD_PAGE )
             goto out;
         else if ( ret == GUEST_TABLE_SUPER_PAGE )
@@ -722,8 +752,7 @@
 
             for ( i = ept_get_wl(d); i > 0; i-- )
             {
-                ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                                     i * EPT_TABLE_ORDER);
+                ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
                 if ( ret != GUEST_TABLE_NORMAL_PAGE )
                     break;
             }

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] VMX: fix ept pages free up when ept superpage split fails., Li, Xin <=