WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86-64: reduce range spanned by 1:1 mappi

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86-64: reduce range spanned by 1:1 mapping and frame table indexes
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 22 Sep 2009 01:25:24 -0700
Delivery-date: Tue, 22 Sep 2009 01:26:18 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1253603809 -3600
# Node ID d6f4089f0f8ce86863314c65fca957d15854a814
# Parent  e3eb0a6ed463f84623ed07e5a5f2820c6b502a61
x86-64: reduce range spanned by 1:1 mapping and frame table indexes

Introduces a virtual space conserving transformation on the MFN thus
far used to index 1:1 mapping and frame table, removing the largest
range of contiguous bits (below the most significant one) which are
zero for all valid MFNs from the MFN representation, to be used to
index into those arrays, thereby cutting the virtual range these
tables must cover approximately by half with each bit removed.

Since this should account for hotpluggable memory (in order to not
requiring a re-write when that gets supported), the determination of
which bits are candidates for removal must not be based on the E820
information, but instead has to use the SRAT. That in turn requires a
change to the ordering of steps done during early boot.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/arch/x86/e820.c               |   17 ++-------
 xen/arch/x86/mm.c                 |   10 +++--
 xen/arch/x86/mm/shadow/common.c   |   33 ++++++++++++------
 xen/arch/x86/mm/shadow/multi.c    |   28 +++++++--------
 xen/arch/x86/mm/shadow/private.h  |   17 ++++++++-
 xen/arch/x86/setup.c              |   67 ++++++++++++++++++++++++++++++++++----
 xen/arch/x86/srat.c               |   64 ++++++++++++++++++++++++++++++++++++
 xen/arch/x86/tboot.c              |    8 +++-
 xen/arch/x86/x86_64/mm.c          |   39 ++++++++++++++++++++++
 xen/drivers/acpi/numa.c           |    2 -
 xen/include/asm-x86/config.h      |   19 +++++-----
 xen/include/asm-x86/mm.h          |   46 ++++++++++++++++++++------
 xen/include/asm-x86/numa.h        |    1 
 xen/include/asm-x86/page.h        |   16 +++------
 xen/include/asm-x86/x86_32/page.h |   12 ++++--
 xen/include/asm-x86/x86_64/page.h |   54 ++++++++++++++++++++++++++----
 xen/include/xen/acpi.h            |    3 +
 xen/include/xen/mm.h              |   36 +++++++++++++-------
 18 files changed, 367 insertions(+), 105 deletions(-)

diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/arch/x86/e820.c
--- a/xen/arch/x86/e820.c       Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/arch/x86/e820.c       Tue Sep 22 08:16:49 2009 +0100
@@ -500,22 +500,15 @@ static void __init machine_specific_memo
                   "can be accessed by Xen in 32-bit mode.");
 #else
     {
-        unsigned long limit, mpt_limit, ro_mpt_limit, pft_limit;
-
-        limit = DIRECTMAP_VIRT_END - DIRECTMAP_VIRT_START;
+        unsigned long mpt_limit, ro_mpt_limit;
+
         mpt_limit = ((RDWR_MPT_VIRT_END - RDWR_MPT_VIRT_START)
                      / sizeof(unsigned long)) << PAGE_SHIFT;
         ro_mpt_limit = ((RO_MPT_VIRT_END - RO_MPT_VIRT_START)
                         / sizeof(unsigned long)) << PAGE_SHIFT;
-        pft_limit = ((FRAMETABLE_VIRT_END - FRAMETABLE_VIRT_START)
-                     / sizeof(struct page_info)) << PAGE_SHIFT;
-        if ( limit > mpt_limit )
-            limit = mpt_limit;
-        if ( limit > ro_mpt_limit )
-            limit = ro_mpt_limit;
-        if ( limit > pft_limit )
-            limit = pft_limit;
-        clip_to_limit(limit,
+        if ( mpt_limit > ro_mpt_limit )
+            mpt_limit = ro_mpt_limit;
+        clip_to_limit(mpt_limit,
                       "Only the first %lu GB of the physical "
                       "memory map can be accessed by Xen.");
     }
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/arch/x86/mm.c Tue Sep 22 08:16:49 2009 +0100
@@ -173,7 +173,7 @@ void __init init_frametable(void)
     BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1));
 #endif
 
-    nr_pages  = PFN_UP(max_page * sizeof(*frame_table));
+    nr_pages  = PFN_UP(max_pdx * sizeof(*frame_table));
     page_step = 1 << (cpu_has_page1gb ? L3_PAGETABLE_SHIFT - PAGE_SHIFT
                                       : L2_PAGETABLE_SHIFT - PAGE_SHIFT);
 
@@ -248,10 +248,11 @@ void __init arch_init_memory(void)
          * the statically-initialised 1-16MB mapping area.
          */
         iostart_pfn = max_t(unsigned long, pfn, 1UL << (20 - PAGE_SHIFT));
-        ioend_pfn = rstart_pfn;
 #if defined(CONFIG_X86_32)
-        ioend_pfn = min_t(unsigned long, ioend_pfn,
+        ioend_pfn = min_t(unsigned long, rstart_pfn,
                           DIRECTMAP_MBYTES << (20 - PAGE_SHIFT));
+#else
+        ioend_pfn = min(rstart_pfn, 16UL << (20 - PAGE_SHIFT));
 #endif
         if ( iostart_pfn < ioend_pfn )            
             destroy_xen_mappings((unsigned long)mfn_to_virt(iostart_pfn),
@@ -260,7 +261,8 @@ void __init arch_init_memory(void)
         /* Mark as I/O up to next RAM region. */
         for ( ; pfn < rstart_pfn; pfn++ )
         {
-            BUG_ON(!mfn_valid(pfn));
+            if ( !mfn_valid(pfn) )
+                continue;
             share_xen_page_with_guest(
                 mfn_to_page(pfn), dom_io, XENSHARE_writable);
         }
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/arch/x86/mm/shadow/common.c   Tue Sep 22 08:16:49 2009 +0100
@@ -1340,7 +1340,7 @@ static inline void trace_shadow_prealloc
         /* Convert smfn to gfn */
         unsigned long gfn;
         ASSERT(mfn_valid(smfn));
-        gfn = mfn_to_gfn(d, _mfn(mfn_to_page(smfn)->v.sh.back));
+        gfn = mfn_to_gfn(d, backpointer(mfn_to_page(smfn)));
         __trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/,
                     sizeof(gfn), (unsigned char*)&gfn);
     }
@@ -1502,13 +1502,13 @@ static inline struct page_info *
 static inline struct page_info *
 next_shadow(const struct page_info *sp)
 {
-    return sp->next_shadow ? mfn_to_page(_mfn(sp->next_shadow)) : NULL;
+    return sp->next_shadow ? pdx_to_page(sp->next_shadow) : NULL;
 }
 
 static inline void
 set_next_shadow(struct page_info *sp, struct page_info *next)
 {
-    sp->next_shadow = next ? mfn_x(page_to_mfn(next)) : 0;
+    sp->next_shadow = next ? page_to_pdx(next) : 0;
 }
 
 /* Allocate another shadow's worth of (contiguous, aligned) pages,
@@ -1553,6 +1553,17 @@ mfn_t shadow_alloc(struct domain *d,
         sp += 1 << i;
     }
     d->arch.paging.shadow.free_pages -= 1 << order;
+
+    switch (shadow_type)
+    {
+    case SH_type_fl1_32_shadow:
+    case SH_type_fl1_pae_shadow:
+    case SH_type_fl1_64_shadow:
+        break;
+    default:
+        backpointer = pfn_to_pdx(backpointer);
+        break;
+    }
 
     /* Init page info fields and clear the pages */
     for ( i = 0; i < 1<<order ; i++ ) 
@@ -1911,7 +1922,7 @@ static void sh_hash_audit_bucket(struct 
         BUG_ON( sp->u.sh.type == 0 );
         BUG_ON( sp->u.sh.type > SH_type_max_shadow );
         /* Wrong bucket? */
-        BUG_ON( sh_hash(sp->v.sh.back, sp->u.sh.type) != bucket );
+        BUG_ON( sh_hash(__backpointer(sp), sp->u.sh.type) != bucket );
         /* Duplicate entry? */
         for ( x = next_shadow(sp); x; x = next_shadow(x) )
             BUG_ON( x->v.sh.back == sp->v.sh.back &&
@@ -1921,7 +1932,7 @@ static void sh_hash_audit_bucket(struct 
              && sp->u.sh.type != SH_type_fl1_pae_shadow
              && sp->u.sh.type != SH_type_fl1_64_shadow )
         {
-            struct page_info *gpg = mfn_to_page(_mfn(sp->v.sh.back));
+            struct page_info *gpg = mfn_to_page(backpointer(sp));
             /* Bad shadow flags on guest page? */
             BUG_ON( !(gpg->shadow_flags & (1<<sp->u.sh.type)) );
             /* Bad type count on guest page? */
@@ -1935,9 +1946,9 @@ static void sh_hash_audit_bucket(struct 
                 {
                     if ( !page_is_out_of_sync(gpg) )
                     {
-                        SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by 
%#"PRI_mfn")"
+                        SHADOW_ERROR("MFN %#"PRI_mfn" shadowed (by 
%#"PRI_mfn")"
                                      " and not OOS but has typecount %#lx\n",
-                                     sp->v.sh.back,
+                                     __backpointer(sp),
                                      mfn_x(page_to_mfn(sp)), 
                                      gpg->u.inuse.type_info);
                         BUG();
@@ -1949,9 +1960,9 @@ static void sh_hash_audit_bucket(struct 
             if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page 
                  && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
             {
-                SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")"
+                SHADOW_ERROR("MFN %#"PRI_mfn" shadowed (by %#"PRI_mfn")"
                              " but has typecount %#lx\n",
-                             sp->v.sh.back, mfn_x(page_to_mfn(sp)),
+                             __backpointer(sp), mfn_x(page_to_mfn(sp)),
                              gpg->u.inuse.type_info);
                 BUG();
             }
@@ -2037,7 +2048,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
     prev = NULL;
     while(sp)
     {
-        if ( sp->v.sh.back == n && sp->u.sh.type == t )
+        if ( __backpointer(sp) == n && sp->u.sh.type == t )
         {
             /* Pull-to-front if 'sp' isn't already the head item */
             if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) )
@@ -2204,7 +2215,7 @@ void sh_destroy_shadow(struct vcpu *v, m
            t == SH_type_fl1_64_shadow  || 
            t == SH_type_monitor_table  || 
            (is_pv_32on64_vcpu(v) && t == SH_type_l4_64_shadow) ||
-           (page_get_owner(mfn_to_page(_mfn(sp->v.sh.back)))
+           (page_get_owner(mfn_to_page(backpointer(sp)))
             == v->domain)); 
 
     /* The down-shifts here are so that the switch statement is on nice
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Tue Sep 22 08:16:49 2009 +0100
@@ -1010,7 +1010,7 @@ static int shadow_set_l2e(struct vcpu *v
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
         {
             struct page_info *sp = mfn_to_page(sl1mfn);
-            mfn_t gl1mfn = _mfn(sp->v.sh.back);
+            mfn_t gl1mfn = backpointer(sp);
 
             /* If the shadow is a fl1 then the backpointer contains
                the GFN instead of the GMFN, and it's definitely not
@@ -1974,7 +1974,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
     ASSERT(t == SH_type_l4_shadow);
 
     /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
+    gmfn = backpointer(mfn_to_page(smfn));
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
     /* Decrement refcounts of all the old entries */
@@ -2003,7 +2003,7 @@ void sh_destroy_l3_shadow(struct vcpu *v
     ASSERT(t == SH_type_l3_shadow);
 
     /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
+    gmfn = backpointer(mfn_to_page(smfn));
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
 
@@ -2038,7 +2038,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
 #endif
 
     /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
+    gmfn = backpointer(mfn_to_page(smfn));
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
 
@@ -2073,7 +2073,7 @@ void sh_destroy_l1_shadow(struct vcpu *v
     }
     else 
     {
-        mfn_t gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
+        mfn_t gmfn = backpointer(mfn_to_page(smfn));
         delete_shadow_status(v, gmfn, t, smfn);
         shadow_demote(v, gmfn, t);
     }
@@ -2397,7 +2397,7 @@ static int validate_gl1e(struct vcpu *v,
     result |= shadow_set_l1e(v, sl1p, new_sl1e, p2mt, sl1mfn);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
-    gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
+    gl1mfn = backpointer(mfn_to_page(sl1mfn));
     if ( mfn_valid(gl1mfn) 
          && mfn_is_out_of_sync(gl1mfn) )
     {
@@ -3006,8 +3006,8 @@ static int sh_page_fault(struct vcpu *v,
                                     + shadow_l2_linear_offset(va)),
                                    sizeof(sl2e)) != 0)
                  || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT)
-                 || !mfn_valid(gl1mfn = _mfn(mfn_to_page(
-                                  shadow_l2e_get_mfn(sl2e))->v.sh.back))
+                 || !mfn_valid(gl1mfn = backpointer(mfn_to_page(
+                                  shadow_l2e_get_mfn(sl2e))))
                  || unlikely(mfn_is_out_of_sync(gl1mfn)) )
             {
                 /* Hit the slow path as if there had been no 
@@ -3582,7 +3582,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
     /* Check to see if the SL1 is out of sync. */
     {
-        mfn_t gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
+        mfn_t gl1mfn = backpointer(mfn_to_page(sl1mfn));
         struct page_info *pg = mfn_to_page(gl1mfn);
         if ( mfn_valid(gl1mfn) 
              && page_is_out_of_sync(pg) )
@@ -3612,7 +3612,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
             }
 
             sl1mfn = shadow_l2e_get_mfn(sl2e);
-            gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
+            gl1mfn = backpointer(mfn_to_page(sl1mfn));
             pg = mfn_to_page(gl1mfn);
             
             if ( likely(sh_mfn_is_a_page_table(gl1mfn)
@@ -4949,7 +4949,7 @@ int sh_audit_l1_table(struct vcpu *v, mf
     int done = 0;
     
     /* Follow the backpointer */
-    gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
+    gl1mfn = backpointer(mfn_to_page(sl1mfn));
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     /* Out-of-sync l1 shadows can contain anything: just check the OOS hash */
@@ -5042,7 +5042,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
     int done = 0;
 
     /* Follow the backpointer */
-    gl2mfn = _mfn(mfn_to_page(sl2mfn)->v.sh.back);
+    gl2mfn = backpointer(mfn_to_page(sl2mfn));
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     /* Only L1's may be out of sync. */
@@ -5091,7 +5091,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
     int done = 0;
 
     /* Follow the backpointer */
-    gl3mfn = _mfn(mfn_to_page(sl3mfn)->v.sh.back);
+    gl3mfn = backpointer(mfn_to_page(sl3mfn));
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
     /* Only L1's may be out of sync. */
@@ -5138,7 +5138,7 @@ int sh_audit_l4_table(struct vcpu *v, mf
     int done = 0;
 
     /* Follow the backpointer */
-    gl4mfn = _mfn(mfn_to_page(sl4mfn)->v.sh.back);
+    gl4mfn = backpointer(mfn_to_page(sl4mfn));
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
     /* Only L1's may be out of sync. */
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/arch/x86/mm/shadow/private.h  Tue Sep 22 08:16:49 2009 +0100
@@ -488,6 +488,19 @@ mfn_t oos_snapshot_lookup(struct vcpu *v
 #undef pagetable_from_page
 #define pagetable_from_page(pg) pagetable_from_mfn(page_to_mfn(pg))
 
+#define backpointer(sp) _mfn(pdx_to_pfn((unsigned long)(sp)->v.sh.back))
+static inline unsigned long __backpointer(const struct page_info *sp)
+{
+    switch (sp->u.sh.type)
+    {
+    case SH_type_fl1_32_shadow:
+    case SH_type_fl1_pae_shadow:
+    case SH_type_fl1_64_shadow:
+        return sp->v.sh.back;
+    }
+    return pdx_to_pfn(sp->v.sh.back);
+}
+
 static inline int
 sh_mfn_is_a_page_table(mfn_t gmfn)
 {
@@ -610,8 +623,8 @@ static inline int sh_get_ref(struct vcpu
 
     if ( unlikely(nx >= 1U<<26) )
     {
-        SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRpgmfn " smfn=%lx\n",
-                       sp->v.sh.back, mfn_x(smfn));
+        SHADOW_PRINTK("shadow ref overflow, gmfn=%lx smfn=%lx\n",
+                       __backpointer(sp), mfn_x(smfn));
         return 0;
     }
     
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/arch/x86/setup.c      Tue Sep 22 08:16:49 2009 +0100
@@ -297,6 +297,21 @@ static void __init move_memory(
             src_end - src_start);
 }
 
+static void __init setup_max_pdx(void)
+{
+#ifdef __x86_64__
+    max_pdx = pfn_to_pdx(max_page - 1) + 1;
+
+    if ( max_pdx > (DIRECTMAP_SIZE >> PAGE_SHIFT) )
+        max_pdx = DIRECTMAP_SIZE >> PAGE_SHIFT;
+
+    if ( max_pdx > FRAMETABLE_SIZE / sizeof(*frame_table) )
+        max_pdx = FRAMETABLE_SIZE / sizeof(*frame_table);
+
+    max_page = pdx_to_pfn(max_pdx - 1) + 1;
+#endif
+}
+
 /* A temporary copy of the e820 map that we can mess with during bootstrap. */
 static struct e820map __initdata boot_e820;
 
@@ -425,6 +440,7 @@ void __init __start_xen(unsigned long mb
     module_t *mod = (module_t *)__va(mbi->mods_addr);
     unsigned long nr_pages, modules_length, modules_headroom;
     int i, j, e820_warn = 0, bytes = 0;
+    bool_t acpi_boot_table_init_done = 0;
     struct ns16550_defaults ns16550 = {
         .data_bits = 8,
         .parity    = 'n',
@@ -777,11 +793,13 @@ void __init __start_xen(unsigned long mb
     /* Late kexec reservation (dynamic start address). */
     kexec_reserve_area(&boot_e820);
 
+    setup_max_pdx();
+
     /*
      * Walk every RAM region and map it in its entirety (on x86/64, at least)
      * and notify it to the boot allocator.
      */
-    for ( i = 0; i < boot_e820.nr_map; i++ )
+    for ( nr_pages = i = 0; i < boot_e820.nr_map; i++ )
     {
         uint64_t s, e, map_s, map_e, mask = PAGE_SIZE - 1;
 
@@ -796,6 +814,45 @@ void __init __start_xen(unsigned long mb
         if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
             continue;
 
+#ifdef __x86_64__
+        if ( !acpi_boot_table_init_done &&
+             s >= BOOTSTRAP_DIRECTMAP_END &&
+             !acpi_boot_table_init() )
+        {
+            acpi_boot_table_init_done = 1;
+            srat_parse_regions(s);
+            setup_max_pdx();
+        }
+
+        if ( pfn_to_pdx((e - 1) >> PAGE_SHIFT) >= max_pdx )
+        {
+            if ( pfn_to_pdx(s >> PAGE_SHIFT) >= max_pdx )
+            {
+                for ( j = i - 1; ; --j )
+                {
+                    if ( boot_e820.map[j].type == E820_RAM )
+                        break;
+                    ASSERT(j);
+                }
+                map_e = boot_e820.map[j].addr + boot_e820.map[j].size;
+                if ( (map_e >> PAGE_SHIFT) < max_page )
+                {
+                    max_page = map_e >> PAGE_SHIFT;
+                    max_pdx = pfn_to_pdx(max_page - 1) + 1;
+                }
+                printk(XENLOG_WARNING "Ignoring inaccessible memory range"
+                                      " %013"PRIx64"-%013"PRIx64"\n",
+                       s, e);
+                continue;
+            }
+            map_e = e;
+            e = (pdx_to_pfn(max_pdx - 1) + 1ULL) << PAGE_SHIFT;
+            printk(XENLOG_WARNING "Ignoring inaccessible memory range"
+                                  " %013"PRIx64"-%013"PRIx64"\n",
+                   e, map_e);
+        }
+#endif
+
         /* Need to create mappings above 16MB. */
         map_s = max_t(uint64_t, s, 16<<20);
         map_e = e;
@@ -815,14 +872,11 @@ void __init __start_xen(unsigned long mb
 
         /* Pass remainder of this memory chunk to the allocator. */
         init_boot_pages(map_s, e);
+        nr_pages += (e - s) >> PAGE_SHIFT;
     }
 
     memguard_init();
 
-    nr_pages = 0;
-    for ( i = 0; i < e820.nr_map; i++ )
-        if ( e820.map[i].type == E820_RAM )
-            nr_pages += e820.map[i].size >> PAGE_SHIFT;
     printk("System RAM: %luMB (%lukB)\n",
            nr_pages >> (20 - PAGE_SHIFT),
            nr_pages << (PAGE_SHIFT - 10));
@@ -857,7 +911,8 @@ void __init __start_xen(unsigned long mb
 
     init_frametable();
 
-    acpi_boot_table_init();
+    if ( !acpi_boot_table_init_done )
+        acpi_boot_table_init();
 
     acpi_numa_init();
 
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/arch/x86/srat.c
--- a/xen/arch/x86/srat.c       Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/arch/x86/srat.c       Tue Sep 22 08:16:49 2009 +0100
@@ -286,6 +286,70 @@ static void unparse_node(int node)
 
 void __init acpi_numa_arch_fixup(void) {}
 
+#ifdef __x86_64__
+
+static u64 __initdata srat_region_mask;
+
+static u64 __init fill_mask(u64 mask)
+{
+       while (mask & (mask + 1))
+               mask |= mask + 1;
+       return mask;
+}
+
+static int __init srat_parse_region(struct acpi_subtable_header *header,
+                                   const unsigned long end)
+{
+       struct acpi_srat_mem_affinity *ma;
+
+       if (!header)
+               return -EINVAL;
+
+       ma = container_of(header, struct acpi_srat_mem_affinity, header);
+
+       if (!ma->length ||
+           !(ma->flags & ACPI_SRAT_MEM_ENABLED) ||
+           (ma->flags & ACPI_SRAT_MEM_NON_VOLATILE))
+               return 0;
+
+       if (numa_off)
+               printk(KERN_INFO "SRAT: %013"PRIx64"-%013"PRIx64"\n",
+                      ma->base_address, ma->base_address + ma->length - 1);
+
+       srat_region_mask |= ma->base_address |
+                           fill_mask(ma->base_address ^
+                                     (ma->base_address + ma->length - 1));
+
+       return 0;
+}
+
+void __init srat_parse_regions(u64 addr)
+{
+       u64 mask;
+       unsigned int i;
+
+       if (acpi_disabled || acpi_numa < 0 ||
+           acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat))
+               return;
+
+       srat_region_mask = fill_mask(addr - 1);
+       acpi_table_parse_srat(ACPI_SRAT_MEMORY_AFFINITY, srat_parse_region, 0);
+
+       for (mask = srat_region_mask, i = 0; mask && i < e820.nr_map; i++) {
+               if (e820.map[i].type != E820_RAM)
+                       continue;
+
+               if (~mask &
+                   fill_mask(e820.map[i].addr ^
+                             (e820.map[i].addr + e820.map[i].size - 1)))
+                       mask = 0;
+       }
+
+       pfn_pdx_hole_setup(mask >> PAGE_SHIFT);
+}
+
+#endif /* __x86_64__ */
+
 /* Use the information discovered above to actually set up the nodes. */
 int __init acpi_scan_nodes(u64 start, u64 end)
 {
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/arch/x86/tboot.c
--- a/xen/arch/x86/tboot.c      Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/arch/x86/tboot.c      Tue Sep 22 08:16:49 2009 +0100
@@ -175,6 +175,9 @@ static void update_pagetable_mac(vmac_ct
     for ( mfn = 0; mfn < max_page; mfn++ )
     {
         struct page_info *page = mfn_to_page(mfn);
+
+        if ( !mfn_valid(mfn) )
+            continue;
         if ( is_page_in_use(page) && !is_xen_heap_page(page) ) {
             if ( page->count_info & PGC_page_table ) {
                 void *pg = map_domain_page(mfn);
@@ -237,6 +240,9 @@ static void tboot_gen_xenheap_integrity(
     for ( mfn = 0; mfn < max_page; mfn++ )
     {
         struct page_info *page = __mfn_to_page(mfn);
+
+        if ( !mfn_valid(mfn) )
+            continue;
         if ( is_page_in_use(page) && is_xen_heap_page(page) ) {
             void *pg = mfn_to_virt(mfn);
             vmac_update((uint8_t *)pg, PAGE_SIZE, &ctx);
@@ -258,7 +264,7 @@ static void tboot_gen_frametable_integri
 
     vmac_set_key((uint8_t *)key, &ctx);
     *mac = vmac((uint8_t *)frame_table,
-                PFN_UP(max_page * sizeof(*frame_table)), nonce, NULL, &ctx);
+                PFN_UP(max_pdx * sizeof(*frame_table)), nonce, NULL, &ctx);
 
     printk("MAC for frametable is: 0x%08"PRIx64"\n", *mac);
 
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/arch/x86/x86_64/mm.c  Tue Sep 22 08:16:49 2009 +0100
@@ -33,6 +33,15 @@
 #include <asm/msr.h>
 #include <public/memory.h>
 
+/* Parameters for PFN/MADDR compression. */
+unsigned long __read_mostly max_pdx;
+unsigned long __read_mostly pfn_pdx_bottom_mask = ~0UL;
+unsigned long __read_mostly ma_va_bottom_mask = ~0UL;
+unsigned long __read_mostly pfn_top_mask = 0;
+unsigned long __read_mostly ma_top_mask = 0;
+unsigned long __read_mostly pfn_hole_mask = 0;
+unsigned int __read_mostly pfn_pdx_hole_shift = 0;
+
 #ifdef CONFIG_COMPAT
 unsigned int m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
 #endif
@@ -142,6 +151,36 @@ void *do_page_walk(struct vcpu *v, unsig
         return NULL;
 
     return mfn_to_virt(mfn) + (addr & ~PAGE_MASK);
+}
+
+void __init pfn_pdx_hole_setup(unsigned long mask)
+{
+    unsigned int i, j, bottom_shift, hole_shift;
+
+    for ( hole_shift = bottom_shift = j = 0; ; )
+    {
+        i = find_next_zero_bit(&mask, BITS_PER_LONG, j);
+        j = find_next_bit(&mask, BITS_PER_LONG, i);
+        if ( j >= BITS_PER_LONG )
+            break;
+        if ( j - i > hole_shift )
+        {
+            hole_shift = j - i;
+            bottom_shift = i;
+        }
+    }
+    if ( !hole_shift )
+        return;
+
+    printk(KERN_INFO "PFN compression on bits %u...%u\n",
+           bottom_shift, bottom_shift + hole_shift - 1);
+
+    pfn_pdx_hole_shift  = hole_shift;
+    pfn_pdx_bottom_mask = (1UL << bottom_shift) - 1;
+    ma_va_bottom_mask   = (PAGE_SIZE << bottom_shift) - 1;
+    pfn_hole_mask       = ((1UL << hole_shift) - 1) << bottom_shift;
+    pfn_top_mask        = ~(pfn_pdx_bottom_mask | pfn_hole_mask);
+    ma_top_mask         = pfn_top_mask << PAGE_SHIFT;
 }
 
 void __init paging_init(void)
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/drivers/acpi/numa.c
--- a/xen/drivers/acpi/numa.c   Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/drivers/acpi/numa.c   Tue Sep 22 08:16:49 2009 +0100
@@ -141,7 +141,7 @@ acpi_parse_memory_affinity(struct acpi_s
        return 0;
 }
 
-static int __init acpi_parse_srat(struct acpi_table_header *table)
+int __init acpi_parse_srat(struct acpi_table_header *table)
 {
        if (!table)
                return -EINVAL;
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/include/asm-x86/config.h      Tue Sep 22 08:16:49 2009 +0100
@@ -145,17 +145,17 @@ extern unsigned int video_mode, video_fl
  *    Shadow linear page table.
  *  0xffff820000000000 - 0xffff827fffffffff [512GB, 2^39 bytes, PML4:260]
  *    Per-domain mappings (e.g., GDT, LDT).
- *  0xffff828000000000 - 0xffff8283ffffffff [16GB,  2^34 bytes, PML4:261]
+ *  0xffff828000000000 - 0xffff82bfffffffff [256GB, 2^38 bytes, PML4:261]
  *    Machine-to-phys translation table.
- *  0xffff828400000000 - 0xffff8287ffffffff [16GB,  2^34 bytes, PML4:261]
+ *  0xffff82c000000000 - 0xffff82c3ffffffff [16GB,  2^34 bytes, PML4:261]
  *    ioremap()/fixmap area.
- *  0xffff828800000000 - 0xffff82883fffffff [1GB,   2^30 bytes, PML4:261]
+ *  0xffff82c400000000 - 0xffff82c43fffffff [1GB,   2^30 bytes, PML4:261]
  *    Compatibility machine-to-phys translation table.
- *  0xffff828840000000 - 0xffff82887fffffff [1GB,   2^30 bytes, PML4:261]
+ *  0xffff82c440000000 - 0xffff82c47fffffff [1GB,   2^30 bytes, PML4:261]
  *    High read-only compatibility machine-to-phys translation table.
- *  0xffff828880000000 - 0xffff8288bfffffff [1GB,   2^30 bytes, PML4:261]
+ *  0xffff82c480000000 - 0xffff82c4bfffffff [1GB,   2^30 bytes, PML4:261]
  *    Xen text, static data, bss.
- *  0xffff8288c0000000 - 0xffff82f5ffffffff [437GB,             PML4:261]
+ *  0xffff82c4c0000000 - 0xffff82f5ffffffff [197GB,             PML4:261]
  *    Reserved for future use.
  *  0xffff82f600000000 - 0xffff82ffffffffff [40GB,  2^38 bytes, PML4:261]
  *    Page-frame information array.
@@ -188,7 +188,8 @@ extern unsigned int video_mode, video_fl
 #define HYPERVISOR_VIRT_END     (HYPERVISOR_VIRT_START + PML4_ENTRY_BYTES*16)
 /* Slot 256: read-only guest-accessible machine-to-phys translation table. */
 #define RO_MPT_VIRT_START       (PML4_ADDR(256))
-#define RO_MPT_VIRT_END         (RO_MPT_VIRT_START + PML4_ENTRY_BYTES/2)
+#define MPT_VIRT_SIZE           (PML4_ENTRY_BYTES / 2)
+#define RO_MPT_VIRT_END         (RO_MPT_VIRT_START + MPT_VIRT_SIZE)
 /* Slot 257: ioremap for PCI mmconfig space for 2048 segments (512GB)
  *     - full 16-bit segment support needs 44 bits
  *     - since PML4 slot has 39 bits, we limit segments to 2048 (11-bits)
@@ -205,9 +206,9 @@ extern unsigned int video_mode, video_fl
 #define PERDOMAIN_VIRT_START    (PML4_ADDR(260))
 #define PERDOMAIN_VIRT_END      (PERDOMAIN_VIRT_START + (PERDOMAIN_MBYTES<<20))
 #define PERDOMAIN_MBYTES        (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
-/* Slot 261: machine-to-phys conversion table (16GB). */
+/* Slot 261: machine-to-phys conversion table (256GB). */
 #define RDWR_MPT_VIRT_START     (PML4_ADDR(261))
-#define RDWR_MPT_VIRT_END       (RDWR_MPT_VIRT_START + GB(16))
+#define RDWR_MPT_VIRT_END       (RDWR_MPT_VIRT_START + MPT_VIRT_SIZE)
 /* Slot 261: ioremap()/fixmap area (16GB). */
 #define IOREMAP_VIRT_START      RDWR_MPT_VIRT_END
 #define IOREMAP_VIRT_END        (IOREMAP_VIRT_START + GB(16))
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/include/asm-x86/mm.h  Tue Sep 22 08:16:49 2009 +0100
@@ -22,13 +22,12 @@
  * wants to support more than 16TB.
  * 'unsigned long' should be used for MFNs everywhere else.
  */
-#define __mfn_t unsigned int
-#define PRpgmfn "08x"
+#define __pdx_t unsigned int
 
 #undef page_list_entry
 struct page_list_entry
 {
-    __mfn_t next, prev;
+    __pdx_t next, prev;
 };
 
 struct page_info
@@ -77,14 +76,14 @@ struct page_info
 
         /* Page is in use, but not as a shadow. */
         struct {
-            /* Owner of this page (NULL if page is anonymous). */
-            u32 _domain; /* pickled format */
+            /* Owner of this page (zero if page is anonymous). */
+            __pdx_t _domain;
         } inuse;
 
         /* Page is in use as a shadow. */
         struct {
             /* GMFN of guest page we're a shadow of. */
-            __mfn_t back;
+            __pdx_t back;
         } sh;
 
         /* Page is on a free list (including shadow code free lists). */
@@ -146,11 +145,11 @@ struct page_info
         u32 shadow_flags;
 
         /* When in use as a shadow, next shadow in this hash chain. */
-        __mfn_t next_shadow;
+        __pdx_t next_shadow;
     };
 };
 
-#undef __mfn_t
+#undef __pdx_t
 
 #define PG_shift(idx)   (BITS_PER_LONG - (idx))
 #define PG_mask(x, idx) (x ## UL << PG_shift(idx))
@@ -245,9 +244,9 @@ struct page_info
 
 #define page_get_owner(_p)                                              \
     ((struct domain *)((_p)->v.inuse._domain ?                          \
-                       mfn_to_virt((_p)->v.inuse._domain) : NULL))
+                       pdx_to_virt((_p)->v.inuse._domain) : NULL))
 #define page_set_owner(_p,_d)                                           \
-    ((_p)->v.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
+    ((_p)->v.inuse._domain = (_d) ? virt_to_pdx(_d) : 0)
 
 #define maddr_get_owner(ma)   (page_get_owner(maddr_to_page((ma))))
 #define vaddr_get_owner(va)   (page_get_owner(virt_to_page((va))))
@@ -263,6 +262,33 @@ extern unsigned long max_page;
 extern unsigned long max_page;
 extern unsigned long total_pages;
 void init_frametable(void);
+
+/* Convert between Xen-heap virtual addresses and page-info structures. */
+static inline struct page_info *__virt_to_page(const void *v)
+{
+    unsigned long va = (unsigned long)v;
+
+#ifdef __x86_64__
+    ASSERT(va >= XEN_VIRT_START);
+    ASSERT(va < DIRECTMAP_VIRT_END);
+    if ( va < XEN_VIRT_END )
+        va += DIRECTMAP_VIRT_START - XEN_VIRT_START + xen_phys_start;
+    else
+        ASSERT(va >= DIRECTMAP_VIRT_START);
+#else
+    ASSERT(va - DIRECTMAP_VIRT_START < DIRECTMAP_VIRT_END);
+#endif
+    return frame_table + ((va - DIRECTMAP_VIRT_START) >> PAGE_SHIFT);
+}
+
+static inline void *__page_to_virt(const struct page_info *pg)
+{
+    ASSERT((unsigned long)pg - FRAMETABLE_VIRT_START < FRAMETABLE_VIRT_END);
+    return (void *)(DIRECTMAP_VIRT_START +
+                    ((unsigned long)pg - FRAMETABLE_VIRT_START) /
+                    (sizeof(*pg) / (sizeof(*pg) & -sizeof(*pg))) *
+                    (PAGE_SIZE / (sizeof(*pg) & -sizeof(*pg))));
+}
 
 int free_page_type(struct page_info *page, unsigned long type,
                    int preemptible);
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/include/asm-x86/numa.h
--- a/xen/include/asm-x86/numa.h        Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/include/asm-x86/numa.h        Tue Sep 22 08:16:49 2009 +0100
@@ -75,5 +75,6 @@ static inline __attribute__((pure)) int 
 #define clear_node_cpumask(cpu) do {} while (0)
 #endif
 
+void srat_parse_regions(u64 addr);
 
 #endif
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/include/asm-x86/page.h        Tue Sep 22 08:16:49 2009 +0100
@@ -220,8 +220,6 @@ void copy_page_sse2(void *, const void *
                              copy_page_sse2(_t, _f) :                   \
                              (void)memcpy(_t, _f, PAGE_SIZE))
 
-#define __mfn_valid(mfn)    ((mfn) < max_page)
-
 /* Convert between Xen-heap virtual addresses and machine addresses. */
 #define __pa(x)             (virt_to_maddr(x))
 #define __va(x)             (maddr_to_virt(x))
@@ -231,16 +229,12 @@ void copy_page_sse2(void *, const void *
 #define __mfn_to_virt(mfn)  (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT))
 
 /* Convert between machine frame numbers and page-info structures. */
-#define __mfn_to_page(mfn)  (frame_table + (mfn))
-#define __page_to_mfn(pg)   ((unsigned long)((pg) - frame_table))
+#define __mfn_to_page(mfn)  (frame_table + pfn_to_pdx(mfn))
+#define __page_to_mfn(pg)   pdx_to_pfn((unsigned long)((pg) - frame_table))
 
 /* Convert between machine addresses and page-info structures. */
-#define __maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT))
-#define __page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT)
-
-/* Convert between Xen-heap virtual addresses and page-info structures. */
-#define __virt_to_page(va)  (frame_table + (__pa(va) >> PAGE_SHIFT))
-#define __page_to_virt(pg)  (maddr_to_virt(page_to_maddr(pg)))
+#define __maddr_to_page(ma) __mfn_to_page((ma) >> PAGE_SHIFT)
+#define __page_to_maddr(pg) ((paddr_t)__page_to_mfn(pg) << PAGE_SHIFT)
 
 /* Convert between frame number and address formats.  */
 #define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
@@ -253,6 +247,8 @@ void copy_page_sse2(void *, const void *
 #define mfn_valid(mfn)      __mfn_valid(mfn)
 #define virt_to_mfn(va)     __virt_to_mfn(va)
 #define mfn_to_virt(mfn)    __mfn_to_virt(mfn)
+#define virt_to_maddr(va)   __virt_to_maddr((unsigned long)(va))
+#define maddr_to_virt(ma)   __maddr_to_virt((unsigned long)(ma))
 #define mfn_to_page(mfn)    __mfn_to_page(mfn)
 #define page_to_mfn(pg)     __page_to_mfn(pg)
 #define maddr_to_page(ma)   __maddr_to_page(ma)
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/include/asm-x86/x86_32/page.h
--- a/xen/include/asm-x86/x86_32/page.h Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/include/asm-x86/x86_32/page.h Tue Sep 22 08:16:49 2009 +0100
@@ -41,21 +41,25 @@
 #include <xen/config.h>
 #include <asm/types.h>
 
+#define __mfn_valid(mfn)        ((mfn) < max_page)
+
+#define max_pdx                 max_page
+#define pfn_to_pdx(pfn)         (pfn)
+#define pdx_to_pfn(pdx)         (pdx)
+#define virt_to_pdx(va)         virt_to_mfn(va)
+#define pdx_to_virt(pdx)        mfn_to_virt(pdx)
+
 static inline unsigned long __virt_to_maddr(unsigned long va)
 {
     ASSERT(va >= DIRECTMAP_VIRT_START && va < DIRECTMAP_VIRT_END);
     return va - DIRECTMAP_VIRT_START;
 }
-#define virt_to_maddr(va)       \
-    (__virt_to_maddr((unsigned long)(va)))
 
 static inline void *__maddr_to_virt(unsigned long ma)
 {
     ASSERT(ma < DIRECTMAP_VIRT_END - DIRECTMAP_VIRT_START);
     return (void *)(ma + DIRECTMAP_VIRT_START);
 }
-#define maddr_to_virt(ma)       \
-    (__maddr_to_virt((unsigned long)(ma)))
 
 /* read access (should only be used for debug printk's) */
 typedef u64 intpte_t;
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/include/asm-x86/x86_64/page.h Tue Sep 22 08:16:49 2009 +0100
@@ -35,25 +35,63 @@
 /* Physical address where Xen was relocated to. */
 extern unsigned long xen_phys_start;
 
+extern unsigned long max_page, max_pdx;
+extern unsigned long pfn_pdx_bottom_mask, ma_va_bottom_mask;
+extern unsigned int pfn_pdx_hole_shift;
+extern unsigned long pfn_hole_mask;
+extern unsigned long pfn_top_mask, ma_top_mask;
+extern void pfn_pdx_hole_setup(unsigned long);
+
+#define page_to_pdx(pg)  ((pg) - frame_table)
+#define pdx_to_page(pdx) (frame_table + (pdx))
+/*
+ * Note: These are solely for the use by page_{get,set}_owner(), and
+ *       therefore don't need to handle the XEN_VIRT_{START,END} range.
+ */
+#define virt_to_pdx(va)  (((unsigned long)(va) - DIRECTMAP_VIRT_START) >> \
+                          PAGE_SHIFT)
+#define pdx_to_virt(pdx) ((void *)(DIRECTMAP_VIRT_START + \
+                                   ((unsigned long)(pdx) << PAGE_SHIFT)))
+
+static inline int __mfn_valid(unsigned long mfn)
+{
+    return mfn < max_page && !(mfn & pfn_hole_mask);
+}
+
+static inline unsigned long pfn_to_pdx(unsigned long pfn)
+{
+    return (pfn & pfn_pdx_bottom_mask) |
+           ((pfn & pfn_top_mask) >> pfn_pdx_hole_shift);
+}
+
+static inline unsigned long pdx_to_pfn(unsigned long pdx)
+{
+    return (pdx & pfn_pdx_bottom_mask) |
+           ((pdx << pfn_pdx_hole_shift) & pfn_top_mask);
+}
+
 static inline unsigned long __virt_to_maddr(unsigned long va)
 {
     ASSERT(va >= XEN_VIRT_START);
     ASSERT(va < DIRECTMAP_VIRT_END);
-    ASSERT((va < XEN_VIRT_END) || (va >= DIRECTMAP_VIRT_START));
     if ( va >= DIRECTMAP_VIRT_START )
-        return va - DIRECTMAP_VIRT_START;
-    return va - XEN_VIRT_START + xen_phys_start;
+        va -= DIRECTMAP_VIRT_START;
+    else
+    {
+        ASSERT(va < XEN_VIRT_END);
+        va += xen_phys_start - XEN_VIRT_START;
+    }
+    return (va & ma_va_bottom_mask) |
+           ((va << pfn_pdx_hole_shift) & ma_top_mask);
 }
-#define virt_to_maddr(va)       \
-    (__virt_to_maddr((unsigned long)(va)))
 
 static inline void *__maddr_to_virt(unsigned long ma)
 {
     ASSERT(ma < DIRECTMAP_VIRT_END - DIRECTMAP_VIRT_START);
-    return (void *)(ma + DIRECTMAP_VIRT_START);
+    return (void *)(DIRECTMAP_VIRT_START +
+                    ((ma & ma_va_bottom_mask) |
+                     ((ma & ma_top_mask) >> pfn_pdx_hole_shift)));
 }
-#define maddr_to_virt(ma)       \
-    (__maddr_to_virt((unsigned long)(ma)))
 
 /* read access (should only be used for debug printk's) */
 typedef u64 intpte_t;
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/include/xen/acpi.h
--- a/xen/include/xen/acpi.h    Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/include/xen/acpi.h    Tue Sep 22 08:16:49 2009 +0100
@@ -269,6 +269,9 @@ int acpi_table_parse_entries(char *id, u
 int acpi_table_parse_entries(char *id, unsigned long table_size,
        int entry_id, acpi_table_entry_handler handler, unsigned int 
max_entries);
 int acpi_table_parse_madt(enum acpi_madt_type id, acpi_table_entry_handler 
handler, unsigned int max_entries);
+int acpi_table_parse_srat(enum acpi_srat_entry_id id,
+       acpi_madt_entry_handler handler, unsigned int max_entries);
+int acpi_parse_srat(struct acpi_table_header *);
 void acpi_table_print (struct acpi_table_header *header, unsigned long 
phys_addr);
 void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
 void acpi_table_print_srat_entry (struct acpi_subtable_header *srat);
diff -r e3eb0a6ed463 -r d6f4089f0f8c xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Tue Sep 22 08:14:48 2009 +0100
+++ b/xen/include/xen/mm.h      Tue Sep 22 08:16:49 2009 +0100
@@ -103,6 +103,16 @@ struct page_list_head
 
 #define PAGE_LIST_NULL (~0)
 
+# if !defined(pdx_to_page) && !defined(page_to_pdx)
+#  if defined(__page_to_mfn) || defined(__mfn_to_page)
+#   define page_to_pdx __page_to_mfn
+#   define pdx_to_page __mfn_to_page
+#  else
+#   define page_to_pdx page_to_mfn
+#   define pdx_to_page mfn_to_page
+#  endif
+# endif
+
 # define PAGE_LIST_HEAD_INIT(name) { NULL, NULL }
 # define PAGE_LIST_HEAD(name) \
     struct page_list_head name = PAGE_LIST_HEAD_INIT(name)
@@ -123,21 +133,21 @@ page_list_next(const struct page_info *p
 page_list_next(const struct page_info *page,
                const struct page_list_head *head)
 {
-    return page != head->tail ? mfn_to_page(page->list.next) : NULL;
+    return page != head->tail ? pdx_to_page(page->list.next) : NULL;
 }
 static inline struct page_info *
 page_list_prev(const struct page_info *page,
                const struct page_list_head *head)
 {
-    return page != head->next ? mfn_to_page(page->list.prev) : NULL;
+    return page != head->next ? pdx_to_page(page->list.prev) : NULL;
 }
 static inline void
 page_list_add(struct page_info *page, struct page_list_head *head)
 {
     if ( head->next )
     {
-        page->list.next = page_to_mfn(head->next);
-        head->next->list.prev = page_to_mfn(page);
+        page->list.next = page_to_pdx(head->next);
+        head->next->list.prev = page_to_pdx(page);
     }
     else
     {
@@ -153,8 +163,8 @@ page_list_add_tail(struct page_info *pag
     page->list.next = PAGE_LIST_NULL;
     if ( head->next )
     {
-        page->list.prev = page_to_mfn(head->tail);
-        head->tail->list.next = page_to_mfn(page);
+        page->list.prev = page_to_pdx(head->tail);
+        head->tail->list.next = page_to_pdx(page);
     }
     else
     {
@@ -191,8 +201,8 @@ static inline void
 static inline void
 page_list_del(struct page_info *page, struct page_list_head *head)
 {
-    struct page_info *next = mfn_to_page(page->list.next);
-    struct page_info *prev = mfn_to_page(page->list.prev);
+    struct page_info *next = pdx_to_page(page->list.next);
+    struct page_info *prev = pdx_to_page(page->list.prev);
 
     if ( !__page_list_del_head(page, head, next, prev) )
     {
@@ -204,8 +214,8 @@ page_list_del2(struct page_info *page, s
 page_list_del2(struct page_info *page, struct page_list_head *head1,
                struct page_list_head *head2)
 {
-    struct page_info *next = mfn_to_page(page->list.next);
-    struct page_info *prev = mfn_to_page(page->list.prev);
+    struct page_info *next = pdx_to_page(page->list.next);
+    struct page_info *prev = pdx_to_page(page->list.prev);
 
     if ( !__page_list_del_head(page, head1, next, prev) &&
          !__page_list_del_head(page, head2, next, prev) )
@@ -252,11 +262,11 @@ page_list_splice(struct page_list_head *
     last = list->tail;
     at = head->next;
 
-    first->list.prev = page_to_mfn(head->next);
+    first->list.prev = page_to_pdx(head->next);
     head->next = first;
 
-    last->list.next = page_to_mfn(at);
-    at->list.prev = page_to_mfn(last);
+    last->list.next = page_to_pdx(at);
+    at->list.prev = page_to_pdx(last);
 }
 
 #define page_list_for_each(pos, head) \

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86-64: reduce range spanned by 1:1 mapping and frame table indexes, Xen patchbot-unstable <=