WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86: map M2P table sparsely

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86: map M2P table sparsely
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 22 Sep 2009 01:25:27 -0700
Delivery-date: Tue, 22 Sep 2009 01:26:26 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1253603956 -3600
# Node ID 51152e4f995f383eccc7c686afc3ab67d626327d
# Parent  d9b50ae2bf18fdeae57159bc56d414057ade502a
x86: map M2P table sparsely

Avoid backing M2P table holes with memory, when those holes are large
enough to cover an exact multiple of large pages.

For the sake of saving and migrating guests, XENMEM_machphys_mfn_list
fills the holes in the array it returns with the MFN for the previous
range returned (thanks to Keir pointing out that it really doesn't
matter *what* MFN gets returned for invalid ranges). Using the most
recently encountered MFN (rather than e.g. always the first one)
represents an attempt to cut down on the number of references these
pages will get when they get mapped into a privileged domain's address
space.

This also allows for saving a couple of 2M pages even on certain
"normal" systems.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/arch/x86/x86_32/mm.c        |   47 ++++++++++----
 xen/arch/x86/x86_64/compat/mm.c |   23 +++++--
 xen/arch/x86/x86_64/mm.c        |  130 +++++++++++++++++++++++++++-------------
 3 files changed, 141 insertions(+), 59 deletions(-)

diff -r d9b50ae2bf18 -r 51152e4f995f xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Tue Sep 22 08:18:19 2009 +0100
+++ b/xen/arch/x86/x86_32/mm.c  Tue Sep 22 08:19:16 2009 +0100
@@ -72,7 +72,7 @@ void __init paging_init(void)
 {
     unsigned long v;
     struct page_info *pg;
-    int i;
+    unsigned int i, n;
 
     if ( cpu_has_pge )
     {
@@ -96,8 +96,18 @@ void __init paging_init(void)
      */
     mpt_size  = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
     mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
+#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long))
+#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
+             sizeof(*machine_to_phys_mapping))
+    BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
+                 sizeof(*machine_to_phys_mapping));
     for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
     {
+        for ( n = 0; n < CNT; ++n)
+            if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) )
+                break;
+        if ( n == CNT )
+            continue;
         if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
             panic("Not enough memory to bootstrap Xen.\n");
         l2e_write(&idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i],
@@ -106,11 +116,12 @@ void __init paging_init(void)
         l2e_write(&idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i],
                   l2e_from_page(
                       pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW));
-    }
-
-    /* Fill with an obvious debug pattern. */
-    for ( i = 0; i < (mpt_size / BYTES_PER_LONG); i++)
-        set_gpfn_from_mfn(i, 0x55555555);
+        /* Fill with an obvious debug pattern. */
+        memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55,
+               1UL << L2_PAGETABLE_SHIFT);
+    }
+#undef CNT
+#undef MFN
 
     /* Create page tables for ioremap()/map_domain_page_global(). */
     for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
@@ -163,14 +174,17 @@ void __init subarch_init_memory(void)
 {
     unsigned long m2p_start_mfn;
     unsigned int i, j;
+    l2_pgentry_t l2e;
 
     BUILD_BUG_ON(sizeof(struct page_info) != 24);
 
     /* M2P table is mappable read-only by privileged domains. */
     for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
     {
-        m2p_start_mfn = l2e_get_pfn(
-            idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]);
+        l2e = idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i];
+        if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+            continue;
+        m2p_start_mfn = l2e_get_pfn(l2e);
         for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ )
         {
             struct page_info *page = mfn_to_page(m2p_start_mfn + j);
@@ -191,8 +205,9 @@ long subarch_memory_op(int op, XEN_GUEST
 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
 {
     struct xen_machphys_mfn_list xmml;
-    unsigned long mfn;
+    unsigned long mfn, last_mfn;
     unsigned int i, max;
+    l2_pgentry_t l2e;
     long rc = 0;
 
     switch ( op )
@@ -203,12 +218,18 @@ long subarch_memory_op(int op, XEN_GUEST
 
         max = min_t(unsigned int, xmml.max_extents, mpt_size >> 21);
 
-        for ( i = 0; i < max; i++ )
-        {
-            mfn = l2e_get_pfn(idle_pg_table_l2[l2_linear_offset(
-                RDWR_MPT_VIRT_START + (i << 21))]) + l1_table_offset(i << 21);
+        for ( i = 0, last_mfn = 0; i < max; i++ )
+        {
+            l2e = idle_pg_table_l2[l2_linear_offset(
+                RDWR_MPT_VIRT_START + (i << 21))];
+            if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
+                mfn = l2e_get_pfn(l2e);
+            else
+                mfn = last_mfn;
+            ASSERT(mfn);
             if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
                 return -EFAULT;
+            last_mfn = mfn;
         }
 
         xmml.nr_extents = i;
diff -r d9b50ae2bf18 -r 51152e4f995f xen/arch/x86/x86_64/compat/mm.c
--- a/xen/arch/x86/x86_64/compat/mm.c   Tue Sep 22 08:18:19 2009 +0100
+++ b/xen/arch/x86/x86_64/compat/mm.c   Tue Sep 22 08:19:16 2009 +0100
@@ -153,19 +153,31 @@ int compat_arch_memory_op(int op, XEN_GU
     }
 
     case XENMEM_machphys_mfn_list:
+    {
+        unsigned long limit;
+        compat_pfn_t last_mfn;
+
         if ( copy_from_guest(&xmml, arg, 1) )
             return -EFAULT;
 
-        for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START;
-              (i != xmml.max_extents) && (v != RDWR_COMPAT_MPT_VIRT_END);
+        limit = (unsigned long)(compat_machine_to_phys_mapping +
+            min_t(unsigned long, max_page,
+                  MACH2PHYS_COMPAT_NR_ENTRIES(current->domain)));
+        if ( limit > RDWR_COMPAT_MPT_VIRT_END )
+            limit = RDWR_COMPAT_MPT_VIRT_END;
+        for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START, last_mfn = 0;
+              (i != xmml.max_extents) && (v < limit);
               i++, v += 1 << L2_PAGETABLE_SHIFT )
         {
             l2e = compat_idle_pg_table_l2[l2_table_offset(v)];
-            if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-                break;
-            mfn = l2e_get_pfn(l2e) + l1_table_offset(v);
+            if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
+                mfn = l2e_get_pfn(l2e);
+            else
+                mfn = last_mfn;
+            ASSERT(mfn);
             if ( copy_to_compat_offset(xmml.extent_start, i, &mfn, 1) )
                 return -EFAULT;
+            last_mfn = mfn;
         }
 
         xmml.nr_extents = i;
@@ -173,6 +185,7 @@ int compat_arch_memory_op(int op, XEN_GU
             rc = -EFAULT;
 
         break;
+    }
 
     default:
         rc = -ENOSYS;
diff -r d9b50ae2bf18 -r 51152e4f995f xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Tue Sep 22 08:18:19 2009 +0100
+++ b/xen/arch/x86/x86_64/mm.c  Tue Sep 22 08:19:16 2009 +0100
@@ -194,7 +194,7 @@ void __init paging_init(void)
 void __init paging_init(void)
 {
     unsigned long i, mpt_size, va;
-    unsigned int memflags;
+    unsigned int n, memflags;
     l3_pgentry_t *l3_ro_mpt;
     l2_pgentry_t *l2_ro_mpt = NULL;
     struct page_info *l1_pg, *l2_pg, *l3_pg;
@@ -213,6 +213,11 @@ void __init paging_init(void)
      */
     mpt_size  = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
     mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
+#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long))
+#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
+             sizeof(*machine_to_phys_mapping))
+    BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
+                 sizeof(*machine_to_phys_mapping));
     for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
     {
         BUILD_BUG_ON(RO_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
@@ -222,37 +227,63 @@ void __init paging_init(void)
 
         if ( cpu_has_page1gb &&
              !((unsigned long)l2_ro_mpt & ~PAGE_MASK) &&
-             (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) &&
-             (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER,
-                                          memflags)) != NULL )
+             (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) )
+        {
+            unsigned int k, holes;
+
+            for ( holes = k = 0; k < 1 << PAGETABLE_ORDER; ++k)
+            {
+                for ( n = 0; n < CNT; ++n)
+                    if ( mfn_valid(MFN(i + k) + n * PDX_GROUP_COUNT) )
+                        break;
+                if ( n == CNT )
+                    ++holes;
+            }
+            if ( k == holes )
+            {
+                i += (1UL << PAGETABLE_ORDER) - 1;
+                continue;
+            }
+            if ( holes == 0 &&
+                 (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER,
+                                              memflags)) != NULL )
+            {
+                map_pages_to_xen(
+                    RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
+                    page_to_mfn(l1_pg),
+                    1UL << (2 * PAGETABLE_ORDER),
+                    PAGE_HYPERVISOR);
+                memset((void *)(RDWR_MPT_VIRT_START + (i << 
L2_PAGETABLE_SHIFT)),
+                       0x77, 1UL << L3_PAGETABLE_SHIFT);
+
+                ASSERT(!l2_table_offset(va));
+                /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this 
area. */
+                l3e_write(&l3_ro_mpt[l3_table_offset(va)],
+                    l3e_from_page(l1_pg,
+                        /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
+                i += (1UL << PAGETABLE_ORDER) - 1;
+                continue;
+            }
+        }
+
+        for ( n = 0; n < CNT; ++n)
+            if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) )
+                break;
+        if ( n == CNT )
+            l1_pg = NULL;
+        else if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
+                                               memflags)) == NULL )
+            goto nomem;
+        else
         {
             map_pages_to_xen(
                 RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
                 page_to_mfn(l1_pg),
-                1UL << (2 * PAGETABLE_ORDER),
+                1UL << PAGETABLE_ORDER,
                 PAGE_HYPERVISOR);
             memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)),
-                   0x77, 1UL << L3_PAGETABLE_SHIFT);
-
-            ASSERT(!l2_table_offset(va));
-            /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. 
*/
-            l3e_write(&l3_ro_mpt[l3_table_offset(va)],
-                l3e_from_page(l1_pg,
-                    /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
-            i += (1UL << PAGETABLE_ORDER) - 1;
-            continue;
-        }
-
-        if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
-                                          memflags)) == NULL )
-            goto nomem;
-        map_pages_to_xen(
-            RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
-            page_to_mfn(l1_pg), 
-            1UL << PAGETABLE_ORDER,
-            PAGE_HYPERVISOR);
-        memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55,
-               1UL << L2_PAGETABLE_SHIFT);
+                   0x55, 1UL << L2_PAGETABLE_SHIFT);
+        }
         if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
         {
             if ( (l2_pg = alloc_domheap_page(NULL, memflags)) == NULL )
@@ -264,10 +295,13 @@ void __init paging_init(void)
             ASSERT(!l2_table_offset(va));
         }
         /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
-        l2e_write(l2_ro_mpt, l2e_from_page(
-            l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
+        if ( l1_pg )
+            l2e_write(l2_ro_mpt, l2e_from_page(
+                l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
         l2_ro_mpt++;
     }
+#undef CNT
+#undef MFN
 
     /* Create user-accessible L2 directory to map the MPT for compat guests. */
     BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) !=
@@ -288,12 +322,22 @@ void __init paging_init(void)
     mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
     if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END )
         m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size;
-    for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
+#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int))
+#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
+             sizeof(*compat_machine_to_phys_mapping))
+    BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
+                 sizeof(*compat_machine_to_phys_mapping));
+    for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++, l2_ro_mpt++ )
     {
         memflags = MEMF_node(phys_to_nid(i <<
             (L2_PAGETABLE_SHIFT - 2 + PAGE_SHIFT)));
+        for ( n = 0; n < CNT; ++n)
+            if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) )
+                break;
+        if ( n == CNT )
+            continue;
         if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
-                                          memflags)) == NULL )
+                                               memflags)) == NULL )
             goto nomem;
         map_pages_to_xen(
             RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
@@ -306,8 +350,9 @@ void __init paging_init(void)
                1UL << L2_PAGETABLE_SHIFT);
         /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */
         l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT));
-        l2_ro_mpt++;
-    }
+    }
+#undef CNT
+#undef MFN
 
     /* Set up linear page table mapping. */
     l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)],
@@ -428,7 +473,7 @@ long subarch_memory_op(int op, XEN_GUEST
     l3_pgentry_t l3e;
     l2_pgentry_t l2e;
     unsigned long v;
-    xen_pfn_t mfn;
+    xen_pfn_t mfn, last_mfn;
     unsigned int i;
     long rc = 0;
 
@@ -440,29 +485,32 @@ long subarch_memory_op(int op, XEN_GUEST
 
         BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
         BUILD_BUG_ON(RDWR_MPT_VIRT_END   & ((1UL << L3_PAGETABLE_SHIFT) - 1));
-        for ( i = 0, v = RDWR_MPT_VIRT_START;
-              (i != xmml.max_extents) && (v != RDWR_MPT_VIRT_END);
+        for ( i = 0, v = RDWR_MPT_VIRT_START, last_mfn = 0;
+              (i != xmml.max_extents) &&
+              (v < (unsigned long)(machine_to_phys_mapping + max_page));
               i++, v += 1UL << L2_PAGETABLE_SHIFT )
         {
             l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
                 l3_table_offset(v)];
             if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-                break;
-            if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
+                mfn = last_mfn;
+            else if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
             {
                 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
-                if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-                    break;
-                mfn = l2e_get_pfn(l2e);
+                if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
+                    mfn = l2e_get_pfn(l2e);
+                else
+                    mfn = last_mfn;
             }
             else
             {
                 mfn = l3e_get_pfn(l3e)
                     + (l2_table_offset(v) << PAGETABLE_ORDER);
             }
-            ASSERT(!l1_table_offset(v));
+            ASSERT(mfn);
             if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
                 return -EFAULT;
+            last_mfn = mfn;
         }
 
         xmml.nr_extents = i;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86: map M2P table sparsely, Xen patchbot-unstable <=