WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] EPT: 1GB large page support.

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] EPT: 1GB large page support.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 05 Apr 2010 23:20:47 -0700
Delivery-date: Mon, 05 Apr 2010 23:23:28 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1270534496 -3600
# Node ID d7370232060a31d17cd27c9d40a4a6cf2f09935d
# Parent  b20f897d6010457ec507138d450a332eba5147ea
EPT: 1GB large page support.

Alloc 1GB large page for EPT if possible. It also contains the logic
to split large page into small ones (2M or 4K).

Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
Signed-off-by: Xiaohui Xin <xiaohui.xin@xxxxxxxxx>
Acked-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
---
 xen/arch/x86/hvm/hvm.c             |    5 
 xen/arch/x86/hvm/vmx/vmcs.c        |   16 ++
 xen/arch/x86/hvm/vmx/vmx.c         |    3 
 xen/arch/x86/mm/hap/p2m-ept.c      |  199 +++++++++++++++++++------------------
 xen/include/asm-x86/hvm/vmx/vmcs.h |    7 +
 xen/include/asm-x86/msr-index.h    |    1 
 6 files changed, 134 insertions(+), 97 deletions(-)

diff -r b20f897d6010 -r d7370232060a xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue Apr 06 07:13:19 2010 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Tue Apr 06 07:14:56 2010 +0100
@@ -966,6 +966,11 @@ bool_t hvm_hap_nested_page_fault(unsigne
     /* Spurious fault? PoD and log-dirty also take this path. */
     if ( p2m_is_ram(p2mt) )
     {
+        /*
+         * Page log dirty is always done with order 0. If this mfn resides in
+         * a large page, we do not change other pages type within that large
+         * page.
+         */
         paging_mark_dirty(current->domain, mfn_x(mfn));
         p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
         return 1;
diff -r b20f897d6010 -r d7370232060a xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Tue Apr 06 07:13:19 2010 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Tue Apr 06 07:14:56 2010 +0100
@@ -64,6 +64,7 @@ u32 vmx_secondary_exec_control __read_mo
 u32 vmx_secondary_exec_control __read_mostly;
 u32 vmx_vmexit_control __read_mostly;
 u32 vmx_vmentry_control __read_mostly;
+u8 vmx_ept_super_page_level_limit __read_mostly;
 bool_t cpu_has_vmx_ins_outs_instr_info __read_mostly;
 
 static DEFINE_PER_CPU_READ_MOSTLY(struct vmcs_struct *, host_vmcs);
@@ -183,6 +184,21 @@ static void vmx_init_vmcs_config(void)
             _vmx_secondary_exec_control &=
                 ~(SECONDARY_EXEC_ENABLE_EPT |
                   SECONDARY_EXEC_UNRESTRICTED_GUEST);
+        if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT )
+        {
+            uint64_t cap;
+            rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, cap);
+            if ( cap & VMX_EPT_SUPER_PAGE_1G )
+            {
+                vmx_ept_super_page_level_limit = 2;
+                printk("EPT support 1G super page.\n");
+            }
+            else if ( cap & VMX_EPT_SUPER_PAGE_2M )
+            {
+                vmx_ept_super_page_level_limit = 1; 
+                printk("EPT support 2M super page.\n");
+            }
+        }
     }
 
     if ( (_vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) &&
diff -r b20f897d6010 -r d7370232060a xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Tue Apr 06 07:13:19 2010 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Tue Apr 06 07:14:56 2010 +0100
@@ -1446,7 +1446,8 @@ void start_vmx(void)
     if ( cpu_has_vmx_ept )
         vmx_function_table.hap_supported = 1;
     
-    vmx_function_table.hap_1gb_pgtb = 0;
+    vmx_function_table.hap_1gb_pgtb = ( vmx_ept_super_page_level_limit == 2 ) ?
+                                        1 : 0;
 
     setup_vmcs_dump();
 
diff -r b20f897d6010 -r d7370232060a xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Tue Apr 06 07:13:19 2010 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Tue Apr 06 07:14:56 2010 +0100
@@ -25,6 +25,7 @@
 #include <asm/domain.h>
 #include <asm/p2m.h>
 #include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
 #include <xen/iommu.h>
 #include <asm/mtrr.h>
 #include <asm/hvm/cacheattr.h>
@@ -167,6 +168,61 @@ static int ept_next_level(struct domain 
     }
 }
 
+/* It's super page before and we should break down it now. */
+static int ept_split_large_page(struct domain *d,
+                                ept_entry_t **table, u32 *index,
+                                unsigned long gfn, int level)
+{
+    ept_entry_t *prev_table = *table;
+    ept_entry_t *split_table = NULL;
+    ept_entry_t *split_entry = NULL;
+    ept_entry_t *ept_entry = (*table) + (*index);
+    ept_entry_t temp_ept_entry;
+    unsigned long s_gfn, s_mfn;
+    unsigned long offset, trunk;
+    int i;
+
+    /* alloc new page for new ept middle level entry which is
+     * before a leaf super entry
+     */
+
+    if ( !ept_set_middle_entry(d, &temp_ept_entry) )
+        return 0;
+
+    /* split the super page to small next level pages */
+    split_table = map_domain_page(temp_ept_entry.mfn);
+    offset = gfn & ((1UL << (level * EPT_TABLE_ORDER)) - 1);
+    trunk = (1UL << ((level-1) * EPT_TABLE_ORDER));
+
+    for ( i = 0; i < (1UL << EPT_TABLE_ORDER); i++ )
+    {
+        s_gfn = gfn - offset + i * trunk;
+        s_mfn = ept_entry->mfn + i * trunk;
+
+        split_entry = split_table + i;
+        split_entry->emt = ept_entry->emt;
+        split_entry->ipat = ept_entry->ipat;
+
+        split_entry->sp_avail =  (level > 1) ? 1 : 0;
+
+        split_entry->mfn = s_mfn;
+
+        split_entry->avail1 = ept_entry->avail1;
+        split_entry->avail2 = 0;
+        /* last step */
+        split_entry->r = split_entry->w = split_entry->x = 1;
+        ept_p2m_type_to_flags(split_entry, ept_entry->avail1);
+    }
+
+    *ept_entry = temp_ept_entry;
+    
+    *index = offset / trunk;
+    *table = split_table;
+    unmap_domain_page(prev_table);
+
+    return 1;
+}
+
 /*
  * ept_set_entry() computes 'need_modify_vtd_table' for itself,
  * by observing whether any gfn->mfn translations are modified.
@@ -183,13 +239,11 @@ ept_set_entry(struct domain *d, unsigned
     int i;
     int rv = 0;
     int ret = 0;
+    int split_level = 0;
     int walk_level = order / EPT_TABLE_ORDER;
     int direct_mmio = (p2mt == p2m_mmio_direct);
     uint8_t ipat = 0;
     int need_modify_vtd_table = 1;
-
-    /* We only support 4k and 2m pages now */
-    BUG_ON(order && order != EPT_TABLE_ORDER);
 
     if (  order != 0 )
         if ( (gfn & ((1UL << order) - 1)) )
@@ -208,15 +262,15 @@ ept_set_entry(struct domain *d, unsigned
             break;
     }
 
-    /* If order == 9, we should never get SUPERPAGE or PoD.
-     * If order == 0, we should only get POD if we have a POD superpage.
+    /* If order == 0, we should only get POD if we have a POD superpage.
      * If i > walk_level, we need to split the page; otherwise,
      * just behave as normal. */
-    ASSERT(order == 0 || ret == GUEST_TABLE_NORMAL_PAGE);
     ASSERT(ret != GUEST_TABLE_POD_PAGE || i != walk_level);
 
     index = gfn_remainder >> ( i ?  (i * EPT_TABLE_ORDER): order);
     offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
+
+    split_level = i;
 
     ept_entry = table + index;
 
@@ -231,25 +285,10 @@ ept_set_entry(struct domain *d, unsigned
             ept_entry->ipat = ipat;
             ept_entry->sp_avail = order ? 1 : 0;
 
-            if ( ret == GUEST_TABLE_SUPER_PAGE )
-            {
-                if ( ept_entry->mfn == (mfn_x(mfn) - offset) )
-                    need_modify_vtd_table = 0;  
-                else                  
-                    ept_entry->mfn = mfn_x(mfn) - offset;
-
-                if ( (ept_entry->avail1 == p2m_ram_logdirty)
-                     && (p2mt == p2m_ram_rw) )
-                    for ( i = 0; i < 512; i++ )
-                        paging_mark_dirty(d, mfn_x(mfn) - offset + i);
-            }
+            if ( ept_entry->mfn == mfn_x(mfn) )
+                need_modify_vtd_table = 0;
             else
-            {
-                if ( ept_entry->mfn == mfn_x(mfn) )
-                    need_modify_vtd_table = 0;
-                else
-                    ept_entry->mfn = mfn_x(mfn);
-            }
+                ept_entry->mfn = mfn_x(mfn);
 
             ept_entry->avail1 = p2mt;
             ept_entry->avail2 = 0;
@@ -261,51 +300,22 @@ ept_set_entry(struct domain *d, unsigned
     }
     else
     {
-        /* 
-         * It's super page before, now set one of the 4k pages, so
-         * we should split the 2m page to 4k pages now.
-         */
-        /* Pointers to / into new (split) middle-level table */
-        ept_entry_t *split_table = NULL;
-        ept_entry_t *split_ept_entry = NULL;
-        /* Info about old (superpage) table */
-        unsigned long super_mfn = ept_entry->mfn;
-        p2m_type_t super_p2mt = ept_entry->avail1;
-        /* The new l2 entry which we'll write after we've build the new l1 
table */
-        ept_entry_t l2_ept_entry;
-
-        /* 
-         * Allocate new page for new ept middle level entry which is
-         * before a leaf super entry
-         */
-        if ( !ept_set_middle_entry(d, &l2_ept_entry) )
-            goto out;
-
-        /* Split the super page before to 4k pages */
-        split_table = map_domain_page(l2_ept_entry.mfn);
-        offset = gfn & ((1 << EPT_TABLE_ORDER) - 1);
-
-        for ( i = 0; i < 512; i++ )
-        {
-            split_ept_entry = split_table + i;
-            split_ept_entry->emt = epte_get_entry_emt(d, gfn - offset + i,
-                                                      _mfn(super_mfn + i),
-                                                      &ipat, direct_mmio);
-            split_ept_entry->ipat = ipat;
-            split_ept_entry->sp_avail =  0;
-            /* Don't increment mfn if it's a PoD mfn */
-            if ( super_p2mt != p2m_populate_on_demand )
-                split_ept_entry->mfn = super_mfn + i;
-            else
-                split_ept_entry->mfn = super_mfn; 
-            split_ept_entry->avail1 = super_p2mt;
-            split_ept_entry->avail2 = 0;
-
-            ept_p2m_type_to_flags(split_ept_entry, super_p2mt);
-        }
-
-        /* Set the destinated 4k page as normal */
-        split_ept_entry = split_table + offset;
+        int num = order / EPT_TABLE_ORDER;
+        int level;
+        ept_entry_t *split_ept_entry;
+    
+        if ( num >= cpu_vmx_ept_super_page_level_limit )
+            num = cpu_vmx_ept_super_page_level_limit;
+        for ( level = split_level; level > num ; level-- )
+        {
+            rv = ept_split_large_page(d, &table, &index, gfn, level);
+            if ( !rv )
+                goto out;
+        }
+
+        split_ept_entry = table + index;
+        split_ept_entry->avail1 = p2mt;
+        ept_p2m_type_to_flags(split_ept_entry, p2mt);
         split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat,
                                                   direct_mmio);
         split_ept_entry->ipat = ipat;
@@ -314,12 +324,6 @@ ept_set_entry(struct domain *d, unsigned
             need_modify_vtd_table = 0;
         else
             split_ept_entry->mfn = mfn_x(mfn);
-
-        split_ept_entry->avail1 = p2mt;
-        ept_p2m_type_to_flags(split_ept_entry, p2mt);
-
-        unmap_domain_page(split_table);
-        *ept_entry = l2_ept_entry;
     }
 
     /* Track the highest gfn for which we have ever had a valid mapping */
@@ -336,7 +340,7 @@ out:
     ept_sync_domain(d);
 
     /* Now the p2m table is not shared with vt-d page table */
-    if ( iommu_enabled && need_iommu(d) && need_modify_vtd_table )
+    if ( rv && iommu_enabled && need_iommu(d) && need_modify_vtd_table )
     {
         if ( p2mt == p2m_ram_rw )
         {
@@ -459,7 +463,7 @@ out:
 /* WARNING: Only caller doesn't care about PoD pages.  So this function will
  * always return 0 for PoD pages, not populate them.  If that becomes 
necessary,
  * pass a p2m_query_t type along to distinguish. */
-static ept_entry_t ept_get_entry_content(struct domain *d, unsigned long gfn)
+static ept_entry_t ept_get_entry_content(struct domain *d, unsigned long gfn, 
int *level)
 {
     ept_entry_t *table =
         map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
@@ -487,6 +491,7 @@ static ept_entry_t ept_get_entry_content
     index = gfn_remainder >> (i * EPT_TABLE_ORDER);
     ept_entry = table + index;
     content = *ept_entry;
+    *level = i;
 
  out:
     unmap_domain_page(table);
@@ -579,7 +584,10 @@ void ept_change_entry_emt_with_range(str
     p2m_lock(d->arch.p2m);
     for ( gfn = start_gfn; gfn <= end_gfn; gfn++ )
     {
-        e = ept_get_entry_content(d, gfn);
+        int level = 0;
+        uint64_t trunk = 0;
+
+        e = ept_get_entry_content(d, gfn, &level);
         if ( !p2m_has_emt(e.avail1) )
             continue;
 
@@ -588,25 +596,24 @@ void ept_change_entry_emt_with_range(str
 
         if ( e.sp_avail )
         {
-            if ( !(gfn & ((1 << EPT_TABLE_ORDER) - 1)) &&
-                 ((gfn + 0x1FF) <= end_gfn) )
+            while ( level )
             {
-                /* 
-                 * gfn assigned with 2M, and the end covers more than 2m areas.
-                 * Set emt for super page.
-                 */
-                order = EPT_TABLE_ORDER;
-                if ( need_modify_ept_entry(d, gfn, mfn, e.ipat, e.emt, 
e.avail1) )
-                    ept_set_entry(d, gfn, mfn, order, e.avail1);
-                gfn += 0x1FF;
-            }
-            else
-            {
-                /* Change emt for partial entries of the 2m area. */
-                if ( need_modify_ept_entry(d, gfn, mfn, e.ipat, e.emt, 
e.avail1) )
-                    ept_set_entry(d, gfn, mfn, order, e.avail1);
-                gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF;
-            }
+                trunk = (1UL << (level * EPT_TABLE_ORDER)) - 1;
+                if ( !(gfn & trunk) && (gfn + trunk <= end_gfn) )
+                {
+                    /* gfn assigned with 2M or 1G, and the end covers more than
+                     * the super page areas.
+                     * Set emt for super page.
+                     */
+                    order = level * EPT_TABLE_ORDER;
+                    if ( need_modify_ept_entry(d, gfn, mfn, 
+                          e.ipat, e.emt, e.avail1) )
+                        ept_set_entry(d, gfn, mfn, order, e.avail1);
+                    gfn += trunk;
+                    break;
+                }
+                level--;
+             }
         }
         else /* gfn assigned with 4k */
         {
diff -r b20f897d6010 -r d7370232060a xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h        Tue Apr 06 07:13:19 2010 +0100
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h        Tue Apr 06 07:14:56 2010 +0100
@@ -175,6 +175,11 @@ extern u32 vmx_secondary_exec_control;
 extern u32 vmx_secondary_exec_control;
 
 extern bool_t cpu_has_vmx_ins_outs_instr_info;
+
+extern u8 vmx_ept_super_page_level_limit;
+
+#define VMX_EPT_SUPER_PAGE_2M              0x00010000
+#define VMX_EPT_SUPER_PAGE_1G              0x00020000
 
 #define cpu_has_wbinvd_exiting \
     (vmx_secondary_exec_control & SECONDARY_EXEC_WBINVD_EXITING)
@@ -203,6 +208,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr
      SECONDARY_EXEC_UNRESTRICTED_GUEST)
 #define cpu_has_vmx_ple \
     (vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
+#define cpu_vmx_ept_super_page_level_limit  \
+    vmx_ept_super_page_level_limit
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
 #define VMX_INTR_SHADOW_STI             0x00000001
diff -r b20f897d6010 -r d7370232060a xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h   Tue Apr 06 07:13:19 2010 +0100
+++ b/xen/include/asm-x86/msr-index.h   Tue Apr 06 07:14:56 2010 +0100
@@ -166,6 +166,7 @@
 #define MSR_IA32_VMX_CR4_FIXED0                 0x488
 #define MSR_IA32_VMX_CR4_FIXED1                 0x489
 #define MSR_IA32_VMX_PROCBASED_CTLS2            0x48b
+#define MSR_IA32_VMX_EPT_VPID_CAP               0x48c
 #define MSR_IA32_VMX_TRUE_PINBASED_CTLS         0x48d
 #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS        0x48e
 #define MSR_IA32_VMX_TRUE_EXIT_CTLS             0x48f

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] EPT: 1GB large page support., Xen patchbot-unstable <=