WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] x86-64: use 1Gb pages in 1:1 mapping if available

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] x86-64: use 1Gb pages in 1:1 mapping if available
From: "Jan Beulich" <jbeulich@xxxxxxxxxx>
Date: Thu, 24 Jan 2008 15:04:47 +0000
Delivery-date: Thu, 24 Jan 2008 07:04:52 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
At once adjust the 2/4Mb page handling slightly in a few places (to
match the newly added code):
- when re-creating a large page mapping after finding that all small
  page mappings in the respective area are using identical flags and
  suitable MFNs, the virtual address was already incremented pas the
  area to be dealt with, which needs to be accounted for in the
  invocation of flush_area() in that path
- don't or-in/and-out _PAGE_PSE on non-present pages
- when comparing flags, try minimse the number of l1f_to_lNf()/
  lNf_to_l1f() instances used
- instead of skipping a single page when encountering a big page
  mapping equalling to what a small page mapping would establish, skip
  to the next larger page boundary

This patch won't apply cleanly without the previously sent patch
adjusting show_page_walk().

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2008-01-18/xen/arch/x86/mm.c
===================================================================
--- 2008-01-18.orig/xen/arch/x86/mm.c   2008-01-23 15:39:18.000000000 +0100
+++ 2008-01-18/xen/arch/x86/mm.c        2008-01-23 16:22:01.000000000 +0100
@@ -113,6 +113,8 @@
 #include <xsm/xsm.h>
 #include <xen/trace.h>
 
+extern int early_boot;
+
 #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
 
 /*
@@ -3659,7 +3661,13 @@ int ptwr_do_page_fault(struct vcpu *v, u
 
 void free_xen_pagetable(void *v)
 {
-    extern int early_boot;
+#ifdef __x86_64__
+    unsigned long ma = virt_to_maddr(v);
+    unsigned long l2_ident_ma = virt_to_maddr(l2_identmap);
+
+    if ( ma >= l2_ident_ma && ma < l2_ident_ma + sizeof(l2_identmap) )
+        return;
+#endif
 
     BUG_ON(early_boot);
     
@@ -3670,8 +3678,8 @@ void free_xen_pagetable(void *v)
 }
 
 /* Convert to from superpage-mapping flags for map_pages_to_xen(). */
-#define l1f_to_l2f(f) ((f) | _PAGE_PSE)
-#define l2f_to_l1f(f) ((f) & ~_PAGE_PSE)
+#define l1f_to_lNf(f) ((f) & _PAGE_PRESENT ? (f) | _PAGE_PSE : (f))
+#define lNf_to_l1f(f) ((f) & _PAGE_PRESENT ? (f) & ~_PAGE_PSE : (f))
 
 /*
  * map_pages_to_xen() can be called with interrupts disabled:
@@ -3697,6 +3705,126 @@ int map_pages_to_xen(
 
     while ( nr_mfns != 0 )
     {
+#ifdef __x86_64__
+        l3_pgentry_t *pl3e = virt_to_xen_l3e(virt);
+        l3_pgentry_t ol3e = *pl3e;
+
+        if ( cpu_has_page1gb &&
+             !(((virt >> PAGE_SHIFT) | mfn) &
+               ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)) &&
+             nr_mfns >= (1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) &&
+             !(flags & (_PAGE_PAT | MAP_SMALL_PAGES)) )
+        {
+            /* 1Gb-page mapping. */
+            l3e_write_atomic(pl3e, l3e_from_pfn(mfn, l1f_to_lNf(flags)));
+
+            if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) )
+            {
+                unsigned int flush_flags =
+                    FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER);
+
+                if ( l3e_get_flags(ol3e) & _PAGE_PSE )
+                {
+                    if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL )
+                        flush_flags |= FLUSH_TLB_GLOBAL;
+                    if ( (l1f_to_lNf(l3e_get_flags(ol3e)) ^ flags) &
+                         PAGE_CACHE_ATTRS )
+                        flush_flags |= FLUSH_CACHE;
+                    flush_area(virt, flush_flags);
+                }
+                else
+                {
+                    pl2e = l3e_to_l2e(ol3e);
+                    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+                    {
+                        ol2e = pl2e[i];
+                        if ( !(l2e_get_flags(ol2e) & _PAGE_PRESENT) )
+                            continue;
+                        if ( l2e_get_flags(ol2e) & _PAGE_PSE )
+                        {
+                            if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL )
+                                flush_flags |= FLUSH_TLB_GLOBAL;
+                            if ( (lNf_to_l1f(l2e_get_flags(ol2e)) ^ flags) &
+                                 PAGE_CACHE_ATTRS )
+                                flush_flags |= FLUSH_CACHE;
+                        }
+                        else
+                        {
+                            unsigned int j;
+
+                            pl1e = l2e_to_l1e(ol2e);
+                            for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ )
+                            {
+                                ol1e = pl1e[j];
+                                if ( l1e_get_flags(ol1e) & _PAGE_GLOBAL )
+                                    flush_flags |= FLUSH_TLB_GLOBAL;
+                                if ( (l1e_get_flags(ol1e) ^ flags) &
+                                     PAGE_CACHE_ATTRS )
+                                    flush_flags |= FLUSH_CACHE;
+                            }
+                        }
+                    }
+                    flush_area(virt, flush_flags);
+                    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+                    {
+                        ol2e = pl2e[i];
+                        if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) &&
+                             !(l2e_get_flags(ol2e) & _PAGE_PSE) )
+                            free_xen_pagetable(l2e_to_l1e(ol2e));
+                    }
+                    free_xen_pagetable(pl2e);
+                }
+            }
+
+            virt    += 1UL << L3_PAGETABLE_SHIFT;
+            mfn     += 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
+            nr_mfns -= 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
+            continue;
+        }
+
+        if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) &&
+             (l3e_get_flags(ol3e) & _PAGE_PSE) )
+        {
+            unsigned int flush_flags =
+                FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER);
+
+            /* Skip this PTE if there is no change. */
+            if ( ((l3e_get_pfn(ol3e) & ~(L2_PAGETABLE_ENTRIES *
+                                         L1_PAGETABLE_ENTRIES - 1)) +
+                  (l2_table_offset(virt) << PAGETABLE_ORDER) +
+                  l1_table_offset(virt) == mfn) &&
+                 ((lNf_to_l1f(l3e_get_flags(ol3e)) ^ flags) &
+                  ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0 )
+            {
+                i = (1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) -
+                    (mfn & ((1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1));
+                if ( i > nr_mfns )
+                    i = nr_mfns;
+                virt    += i << PAGE_SHIFT;
+                mfn     += i;
+                nr_mfns -= i;
+                continue;
+            }
+
+            pl2e = alloc_xen_pagetable();
+            if ( pl2e == NULL )
+                return -ENOMEM;
+
+            for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+                l2e_write(pl2e + i,
+                          l2e_from_pfn(l3e_get_pfn(ol3e) +
+                                       (i << PAGETABLE_ORDER),
+                                       l3e_get_flags(ol3e)));
+
+            if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL )
+                flush_flags |= FLUSH_TLB_GLOBAL;
+
+            l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e),
+                                                __PAGE_HYPERVISOR));
+            flush_area(virt, flush_flags);
+        }
+#endif
+
         pl2e = virt_to_xen_l2e(virt);
 
         if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) &&
@@ -3705,7 +3833,7 @@ int map_pages_to_xen(
         {
             /* Super-page mapping. */
             ol2e = *pl2e;
-            l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_l2f(flags)));
+            l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_lNf(flags)));
 
             if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
             {
@@ -3716,8 +3844,8 @@ int map_pages_to_xen(
                 {
                     if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL )
                         flush_flags |= FLUSH_TLB_GLOBAL;
-                    if ( (l2e_get_flags(ol2e) ^ l1f_to_l2f(flags)) &
-                         l1f_to_l2f(PAGE_CACHE_ATTRS) )
+                    if ( (lNf_to_l1f(l2e_get_flags(ol2e)) ^ flags) &
+                         PAGE_CACHE_ATTRS )
                         flush_flags |= FLUSH_CACHE;
                     flush_area(virt, flush_flags);
                 }
@@ -3761,13 +3889,17 @@ int map_pages_to_xen(
                 /* Skip this PTE if there is no change. */
                 if ( (((l2e_get_pfn(*pl2e) & ~(L1_PAGETABLE_ENTRIES - 1)) +
                        l1_table_offset(virt)) == mfn) &&
-                     (((l2f_to_l1f(l2e_get_flags(*pl2e)) ^ flags) &
+                     (((lNf_to_l1f(l2e_get_flags(*pl2e)) ^ flags) &
                        ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0) )
                 {
-                    virt    += 1UL << L1_PAGETABLE_SHIFT;
-                    mfn     += 1UL;
-                    nr_mfns -= 1UL;
-                    continue;
+                    i = (1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) -
+                        (mfn & ((1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1));
+                    if ( i > nr_mfns )
+                        i = nr_mfns;
+                    virt    += i << L1_PAGETABLE_SHIFT;
+                    mfn     += i;
+                    nr_mfns -= i;
+                    goto check_l3;
                 }
 
                 pl1e = alloc_xen_pagetable();
@@ -3777,7 +3909,7 @@ int map_pages_to_xen(
                 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
                     l1e_write(&pl1e[i],
                               l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
-                                           l2f_to_l1f(l2e_get_flags(*pl2e))));
+                                           lNf_to_l1f(l2e_get_flags(*pl2e))));
 
                 if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL )
                     flush_flags |= FLUSH_TLB_GLOBAL;
@@ -3820,13 +3952,43 @@ int map_pages_to_xen(
                 {
                     ol2e = *pl2e;
                     l2e_write_atomic(pl2e, l2e_from_pfn(base_mfn,
-                                                        l1f_to_l2f(flags)));
-                    flush_area(virt, (FLUSH_TLB_GLOBAL |
-                                      FLUSH_ORDER(PAGETABLE_ORDER)));
+                                                        l1f_to_lNf(flags)));
+                    flush_area(virt - PAGE_SIZE,
+                               FLUSH_TLB_GLOBAL | 
FLUSH_ORDER(PAGETABLE_ORDER));
                     free_xen_pagetable(l2e_to_l1e(ol2e));
                 }
             }
         }
+
+ check_l3: ;
+#ifdef __x86_64__
+        if ( cpu_has_page1gb &&
+             !early_boot &&
+             flags == PAGE_HYPERVISOR &&
+             (nr_mfns == 0 ||
+              !(((virt >> PAGE_SHIFT) | mfn) &
+                ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))) )
+        {
+            unsigned long base_mfn;
+
+            ol3e = *pl3e;
+            pl2e = l3e_to_l2e(ol3e);
+            base_mfn = l2e_get_pfn(*pl2e) & ~(L2_PAGETABLE_ENTRIES *
+                                              L1_PAGETABLE_ENTRIES - 1);
+            for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
+                if ( l2e_get_pfn(*pl2e) != base_mfn + (i << PAGETABLE_ORDER) ||
+                     l2e_get_flags(*pl2e) != l1f_to_lNf(flags) )
+                    break;
+            if ( i == L2_PAGETABLE_ENTRIES )
+            {
+                l3e_write_atomic(pl3e, l3e_from_pfn(base_mfn,
+                                                    l1f_to_lNf(flags)));
+                flush_area(virt - PAGE_SIZE,
+                           FLUSH_TLB_GLOBAL | FLUSH_ORDER(2*PAGETABLE_ORDER));
+                free_xen_pagetable(l3e_to_l2e(ol3e));
+            }
+        }
+#endif
     }
 
     return 0;
@@ -3844,6 +4006,40 @@ void destroy_xen_mappings(unsigned long 
 
     while ( v < e )
     {
+#ifdef __x86_64__
+        l3_pgentry_t *pl3e = virt_to_xen_l3e(v);
+
+        if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
+        {
+            v += 1UL << L3_PAGETABLE_SHIFT;
+            v &= ~((1UL << L3_PAGETABLE_SHIFT) - 1);
+            continue;
+        }
+
+        if ( l3e_get_flags(*pl3e) & _PAGE_PSE )
+        {
+            if ( l2_table_offset(v) == 0 &&
+                 l1_table_offset(v) == 0 &&
+                 ((e - v) >= (1UL << L3_PAGETABLE_SHIFT)) )
+            {
+                /* PAGE1GB: whole superpage is destroyed. */
+                l3e_write_atomic(pl3e, l3e_empty());
+                v += 1UL << L3_PAGETABLE_SHIFT;
+                continue;
+            }
+
+            /* PAGE1GB: shatter the superpage and fall through. */
+            pl2e = alloc_xen_pagetable();
+            for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+                l2e_write(pl2e + i,
+                          l2e_from_pfn(l3e_get_pfn(*pl3e) +
+                                       (i << PAGETABLE_ORDER),
+                                       l3e_get_flags(*pl3e)));
+            l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e),
+                                                __PAGE_HYPERVISOR));
+        }
+#endif
+
         pl2e = virt_to_xen_l2e(v);
 
         if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
@@ -3896,6 +4092,23 @@ void destroy_xen_mappings(unsigned long 
                 free_xen_pagetable(pl1e);
             }
         }
+
+#ifdef __x86_64__
+        /* If we are done with the L3E, check if it is now empty. */
+        if ( (v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0) )
+            continue;
+        pl2e = l3e_to_l2e(*pl3e);
+        for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+            if ( l2e_get_intpte(pl2e[i]) != 0 )
+                break;
+        if ( i == L2_PAGETABLE_ENTRIES )
+        {
+            /* Empty: zap the L3E and free the L2 page. */
+            l3e_write_atomic(pl3e, l3e_empty());
+            flush_all(FLUSH_TLB_GLOBAL); /* flush before free */
+            free_xen_pagetable(pl2e);
+        }
+#endif
     }
 
     flush_all(FLUSH_TLB_GLOBAL);
Index: 2008-01-18/xen/arch/x86/setup.c
===================================================================
--- 2008-01-18.orig/xen/arch/x86/setup.c        2008-01-23 15:39:18.000000000 
+0100
+++ 2008-01-18/xen/arch/x86/setup.c     2008-01-23 16:51:48.000000000 +0100
@@ -672,8 +672,9 @@ void __init __start_xen(unsigned long mb
                 pl3e = l4e_to_l3e(*pl4e);
                 for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ )
                 {
-                    /* Not present or already relocated? */
+                    /* Not present, 1Gb mapping, or already relocated? */
                     if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
+                         (l3e_get_flags(*pl3e) & _PAGE_PSE) ||
                          (l3e_get_pfn(*pl3e) > 0x1000) )
                         continue;
                     *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
Index: 2008-01-18/xen/arch/x86/x86_64/mm.c
===================================================================
--- 2008-01-18.orig/xen/arch/x86/x86_64/mm.c    2008-01-23 15:39:18.000000000 
+0100
+++ 2008-01-18/xen/arch/x86/x86_64/mm.c 2008-01-23 11:56:42.000000000 +0100
@@ -70,30 +70,36 @@ void *alloc_xen_pagetable(void)
     return mfn_to_virt(mfn);
 }
 
-l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
+l3_pgentry_t *virt_to_xen_l3e(unsigned long v)
 {
     l4_pgentry_t *pl4e;
-    l3_pgentry_t *pl3e;
-    l2_pgentry_t *pl2e;
 
     pl4e = &idle_pg_table[l4_table_offset(v)];
     if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
     {
-        pl3e = alloc_xen_pagetable();
+        l3_pgentry_t *pl3e = alloc_xen_pagetable();
+
         clear_page(pl3e);
         l4e_write(pl4e, l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR));
     }
     
-    pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v);
+    return l4e_to_l3e(*pl4e) + l3_table_offset(v);
+}
+
+l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
+{
+    l3_pgentry_t *pl3e;
+
+    pl3e = virt_to_xen_l3e(v);
     if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
     {
-        pl2e = alloc_xen_pagetable();
+        l2_pgentry_t *pl2e = alloc_xen_pagetable();
+
         clear_page(pl2e);
         l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR));
     }
     
-    pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v);
-    return pl2e;
+    return l3e_to_l2e(*pl3e) + l2_table_offset(v);
 }
 
 void __init paging_init(void)
Index: 2008-01-18/xen/arch/x86/x86_64/traps.c
===================================================================
--- 2008-01-18.orig/xen/arch/x86/x86_64/traps.c 2008-01-23 15:39:18.000000000 
+0100
+++ 2008-01-18/xen/arch/x86/x86_64/traps.c      2008-01-23 11:58:58.000000000 
+0100
@@ -148,9 +148,11 @@ void show_page_walk(unsigned long addr)
     mfn = l3e_get_pfn(l3e);
     pfn = mfn_valid(mfn) && mpt_valid ?
           get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
-    printk(" L3[0x%03lx] = %"PRIpte" %016lx\n",
-           l3_table_offset(addr), l3e_get_intpte(l3e), pfn);
-    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+    printk(" L3[0x%03lx] = %"PRIpte" %016lx%s\n",
+           l3_table_offset(addr), l3e_get_intpte(l3e), pfn,
+           (l3e_get_flags(l3e) & _PAGE_PSE) ? " (PSE)" : "");
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ||
+         (l3e_get_flags(l3e) & _PAGE_PSE) )
         return;
 
     l2t = mfn_to_virt(mfn);
Index: 2008-01-18/xen/include/asm-x86/page.h
===================================================================
--- 2008-01-18.orig/xen/include/asm-x86/page.h  2008-01-23 15:39:18.000000000 
+0100
+++ 2008-01-18/xen/include/asm-x86/page.h       2008-01-22 15:35:32.000000000 
+0100
@@ -350,6 +350,9 @@ static inline int get_order_from_pages(u
 void *alloc_xen_pagetable(void);
 void free_xen_pagetable(void *v);
 l2_pgentry_t *virt_to_xen_l2e(unsigned long v);
+#ifdef __x86_64__
+l3_pgentry_t *virt_to_xen_l3e(unsigned long v);
+#endif
 
 /* Map machine page range in Xen virtual address space. */
 #define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages for the mapping */
Index: 2008-01-18/xen/include/asm-x86/x86_64/page.h
===================================================================
--- 2008-01-18.orig/xen/include/asm-x86/x86_64/page.h   2008-01-23 
15:39:18.000000000 +0100
+++ 2008-01-18/xen/include/asm-x86/x86_64/page.h        2008-01-23 
11:14:54.000000000 +0100
@@ -59,6 +59,8 @@ typedef struct { intpte_t l3; } l3_pgent
 typedef struct { intpte_t l4; } l4_pgentry_t;
 typedef l4_pgentry_t root_pgentry_t;
 
+extern l2_pgentry_t l2_identmap[4*L2_PAGETABLE_ENTRIES];
+
 #endif /* !__ASSEMBLY__ */
 
 #define pte_read_atomic(ptep)       (*(ptep))



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>