WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 1/1] Xen PV support for hugepages

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 1/1] Xen PV support for hugepages
From: dcm@xxxxxxxx
Date: Tue, 04 Nov 2008 09:41:49 -0600
Delivery-date: Tue, 04 Nov 2008 07:42:13 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
This is the latest version of a patch that adds hugepage support to the Xen
hypervisor in a PV environment.  It is against the latest xen-unstable tree
on xenbits.xensource.com.  I believe this version addresses the comments
made about the previous version of the patch.

Hugepage support must be enabled via the hypervisor command line option
"allowhugepage".

It assumes the guest guarantees that the hugepage is physically aligned and
contiguous.

There is currently no support in the tools for saving/restoring/migrating
guests who use hugepages.

Note to the maintainers:  What else needs to be done for this patch to be
considered for inclusion into the tree?

Signed-off-by: Dave McCracken <dave.mccracken@xxxxxxxxxx>

----

--- xen-unstable//./xen/include/asm-x86/mm.h    2008-10-02 14:23:17.000000000 
-0500
+++ xen-hpage/./xen/include/asm-x86/mm.h        2008-11-04 08:24:35.000000000 
-0600
@@ -240,6 +240,8 @@ pae_copy_root(struct vcpu *v, l3_pgentry
 int check_descriptor(const struct domain *, struct desc_struct *d);
 
 
+extern int opt_allow_hugepage;
+
 /******************************************************************************
  * With shadow pagetables, the different kinds of address start 
  * to get get confusing.
--- xen-unstable//./xen/include/asm-x86/x86_32/page.h   2008-07-17 
09:49:27.000000000 -0500
+++ xen-hpage/./xen/include/asm-x86/x86_32/page.h       2008-11-04 
08:24:35.000000000 -0600
@@ -115,7 +115,7 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
 #define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX)
 
 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
-#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK & ~_PAGE_PSE)
 #define L3_DISALLOW_MASK 0xFFFFF1FEU /* must-be-zero */
 
 #endif /* __X86_32_PAGE_H__ */
--- xen-unstable//./xen/include/asm-x86/x86_64/page.h   2008-10-02 
14:23:17.000000000 -0500
+++ xen-hpage/./xen/include/asm-x86/x86_64/page.h       2008-11-04 
08:24:35.000000000 -0600
@@ -115,7 +115,7 @@ typedef l4_pgentry_t root_pgentry_t;
 #define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX)
 
 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
-#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK & ~_PAGE_PSE)
 #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK)
 #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK)
 
--- xen-unstable//./xen/arch/x86/mm.c   2008-11-04 08:22:40.000000000 -0600
+++ xen-hpage/./xen/arch/x86/mm.c       2008-11-04 08:24:35.000000000 -0600
@@ -160,6 +160,9 @@ unsigned long total_pages;
 
 #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
 
+int opt_allow_hugepage = 0;
+boolean_param("allowhugepage", opt_allow_hugepage);
+
 #define l1_disallow_mask(d)                                     \
     ((d != dom_io) &&                                           \
      (rangeset_is_empty((d)->iomem_caps) &&                     \
@@ -584,6 +587,26 @@ static int get_page_and_type_from_pagenr
     return rc;
 }
 
+static int get_data_page(struct page_info *page, struct domain *d, int 
writeable)
+{
+    int rc;
+
+    if ( writeable )
+        rc = get_page_and_type(page, d, PGT_writable_page);
+    else
+        rc = get_page(page, d);
+
+    return rc;
+}
+
+static void put_data_page(struct page_info *page, int writeable)
+{
+    if ( writeable )
+        put_page_and_type(page);
+    else
+        put_page(page);
+}
+
 /*
  * We allow root tables to map each other (a.k.a. linear page tables). It
  * needs some special care with reference counts and access permissions:
@@ -656,6 +679,7 @@ get_page_from_l1e(
     struct vcpu *curr = current;
     struct domain *owner;
     int okay;
+    int writeable;
 
     if ( !(l1f & _PAGE_PRESENT) )
         return 1;
@@ -698,10 +722,9 @@ get_page_from_l1e(
      * contribute to writeable mapping refcounts.  (This allows the
      * qemu-dm helper process in dom0 to map the domain's memory without
      * messing up the count of "real" writable mappings.) */
-    okay = (((l1f & _PAGE_RW) && 
-             !(unlikely(paging_mode_external(d) && (d != curr->domain))))
-            ? get_page_and_type(page, d, PGT_writable_page)
-            : get_page(page, d));
+    writeable = (l1f & _PAGE_RW) &&
+        !( unlikely(paging_mode_external(d) && (d != curr->domain)) );
+    okay = get_data_page(page, d, writeable);
     if ( !okay )
     {
         MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
@@ -759,11 +782,43 @@ get_page_from_l2e(
         MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
         return -EINVAL;
     }
+    if ( l2e_get_flags(l2e) & _PAGE_PSE )
+    {
+        unsigned long mfn = l2e_get_pfn(l2e);
+        unsigned long m, me;
+        struct page_info *page = mfn_to_page(mfn);
+        int writeable;
 
-    rc = get_page_and_type_from_pagenr(
-        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
-    if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
-        rc = 0;
+        if ( !opt_allow_hugepage )
+            return -EINVAL;
+
+        writeable = l2e_get_flags(l2e) & _PAGE_RW;
+
+        rc = get_data_page(page, d, writeable);
+        if ( unlikely(!rc) )
+            return rc;
+
+        for ( m = mfn+1, me = mfn + (L1_PAGETABLE_ENTRIES-1); m <= me; m++ )
+        {
+            rc = get_data_page(mfn_to_page(m), d, writeable);
+            if ( unlikely(!rc) )
+            {
+                for ( --m; m > mfn; --m )
+                    put_data_page(mfn_to_page(m), writeable);
+                put_data_page(page, writeable);
+                return 0;
+            }
+        }
+#ifdef __x86_64__
+        map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, 
L1_PAGETABLE_ENTRIES,
+                         PAGE_HYPERVISOR | l2e_get_flags(l2e));
+#endif
+    } else {
+        rc = get_page_and_type_from_pagenr(
+            l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
+        if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+            rc = 0;
+    }
 
     return rc;
 }
@@ -955,7 +1010,18 @@ static int put_page_from_l2e(l2_pgentry_
     if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && 
          (l2e_get_pfn(l2e) != pfn) )
     {
-        put_page_and_type(l2e_get_page(l2e));
+        if ( l2e_get_flags(l2e) & _PAGE_PSE )
+        {
+            unsigned long mfn = l2e_get_pfn(l2e);
+            unsigned long m, me;
+            struct page_info *page = mfn_to_page(mfn);
+            int writeable = l2e_get_flags(l2e) & _PAGE_RW;
+
+            for ( m = mfn+1, me = mfn + (L1_PAGETABLE_ENTRIES-1); m <= me; m++ 
)
+                put_data_page(mfn_to_page(m), writeable);
+            put_data_page(page, writeable);
+        } else
+            put_page_and_type(l2e_get_page(l2e));
         return 0;
     }
     return 1;
--- xen-unstable//./xen/arch/x86/traps.c        2008-11-04 08:22:40.000000000 
-0600
+++ xen-hpage/./xen/arch/x86/traps.c    2008-11-04 08:24:35.000000000 -0600
@@ -723,7 +723,8 @@ static void pv_cpuid(struct cpu_user_reg
     {
         /* Modify Feature Information. */
         __clear_bit(X86_FEATURE_VME, &d);
-        __clear_bit(X86_FEATURE_PSE, &d);
+        if (!opt_allow_hugepage)
+            __clear_bit(X86_FEATURE_PSE, &d);
         __clear_bit(X86_FEATURE_PGE, &d);
         __clear_bit(X86_FEATURE_MCE, &d);
         __clear_bit(X86_FEATURE_MCA, &d);
@@ -2002,9 +2003,12 @@ static int emulate_privileged_op(struct 
         case 4: /* Read CR4 */
             /*
              * Guests can read CR4 to see what features Xen has enabled. We
-             * therefore lie about PGE & PSE as they are unavailable to guests.
+             * therefore lie about PGE as it is unavailable to guests.
+             * Also disallow PSE if hugepages are not enabled.
              */
-            *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE);
+            *reg = read_cr4() & ~(X86_CR4_PGE);
+            if (!opt_allow_hugepage)
+                *reg &= ~(X86_CR4_PSE);
             break;
 
         default:

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel