WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Merge

# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
# Node ID 5c029fda79dca05de04b68a54a827f874aae087a
# Parent  bd207697f0c7b3cce4593073435506f6d5b58ef8
# Parent  87fc080f555b5ddc7041bb5d5703de388cb5efa4
Merge
---
 xen/arch/x86/hvm/svm/svm.c       |   17 
 xen/arch/x86/hvm/svm/vmcb.c      |    9 
 xen/arch/x86/mm/shadow/common.c  |  189 ++++----
 xen/arch/x86/mm/shadow/multi.c   |  843 ++++++++++-----------------------------
 xen/arch/x86/mm/shadow/multi.h   |    4 
 xen/arch/x86/mm/shadow/private.h |   75 ---
 xen/arch/x86/mm/shadow/types.h   |  123 -----
 xen/include/asm-x86/domain.h     |   17 
 xen/include/asm-x86/hvm/vcpu.h   |    5 
 xen/include/asm-x86/mm.h         |   17 
 xen/include/asm-x86/perfc_defn.h |    1 
 xen/include/asm-x86/shadow.h     |    7 
 12 files changed, 364 insertions(+), 943 deletions(-)

diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c        Wed Oct 18 14:36:20 2006 +0100
@@ -1739,9 +1739,6 @@ static int mov_to_cr(int gpreg, int cr, 
             if (old_base_mfn)
                 put_page(mfn_to_page(old_base_mfn));
 
-            /*
-             * arch.shadow_table should now hold the next CR3 for shadow
-             */
             v->arch.hvm_svm.cpu_cr3 = value;
             update_cr3(v);
             vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
@@ -1787,10 +1784,6 @@ static int mov_to_cr(int gpreg, int cr, 
                             (unsigned long) (mfn << PAGE_SHIFT));
 
                 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
-
-                /*
-                 * arch->shadow_table should hold the next CR3 for shadow
-                 */
 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, 
                             "Update CR3 value = %lx, mfn = %lx",
@@ -2355,7 +2348,7 @@ void svm_dump_regs(const char *from, str
 {
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
+    unsigned long pt = v->arch.hvm_vcpu.hw_cr3;
 
     printf("%s: guest registers from %s:\n", __func__, from);
 #if defined (__x86_64__)
@@ -2681,11 +2674,11 @@ asmlinkage void svm_vmexit_handler(struc
         if (do_debug)
         {
             printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
-                   "shadow_table = 0x%08x\n", 
+                   "hw_cr3 = 0x%16lx\n", 
                    __func__,
                    (int) v->arch.guest_table.pfn,
                    (int) v->arch.monitor_table.pfn, 
-                   (int) v->arch.shadow_table.pfn);
+                   (long unsigned int) v->arch.hvm_vcpu.hw_cr3);
 
             svm_dump_vmcb(__func__, vmcb);
             svm_dump_regs(__func__, regs);
@@ -2913,10 +2906,10 @@ asmlinkage void svm_vmexit_handler(struc
     if (do_debug) 
     {
         printk("vmexit_handler():- guest_table = 0x%08x, "
-               "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
+               "monitor_table = 0x%08x, hw_cr3 = 0x%16x\n",
                (int)v->arch.guest_table.pfn,
                (int)v->arch.monitor_table.pfn, 
-               (int)v->arch.shadow_table.pfn);
+               (int)v->arch.hvm_vcpu.hw_cr3);
         printk("svm_vmexit_handler: Returning\n");
     }
 #endif
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Wed Oct 18 14:36:20 2006 +0100
@@ -372,8 +372,8 @@ void svm_do_launch(struct vcpu *v)
     if (svm_dbg_on) 
     {
         unsigned long pt;
-        pt = pagetable_get_paddr(v->arch.shadow_table);
-        printk("%s: shadow_table = %lx\n", __func__, pt);
+        printk("%s: hw_cr3 = %llx\n", __func__, 
+               (unsigned long long) v->arch.hvm_vcpu.hw_cr3);
         pt = pagetable_get_paddr(v->arch.guest_table);
         printk("%s: guest_table  = %lx\n", __func__, pt);
         pt = pagetable_get_paddr(v->domain->arch.phys_table);
@@ -387,8 +387,9 @@ void svm_do_launch(struct vcpu *v)
     {
         printk("%s: cr3 = %lx ", __func__, (unsigned long)vmcb->cr3);
         printk("init_guest_table: guest_table = 0x%08x, monitor_table = 
0x%08x,"
-                " shadow_table = 0x%08x\n", (int)v->arch.guest_table.pfn, 
-                (int)v->arch.monitor_table.pfn, (int)v->arch.shadow_table.pfn);
+                " hw_cr3 = 0x%16llx\n", (int)v->arch.guest_table.pfn, 
+               (int)v->arch.monitor_table.pfn, 
+               (unsigned long long) v->arch.hvm_vcpu.hw_cr3);
     }
 
     v->arch.schedule_tail = arch_svm_do_resume;
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/mm/shadow/common.c   Wed Oct 18 14:36:20 2006 +0100
@@ -283,11 +283,8 @@ __shadow_validate_guest_entry(struct vcp
     if ( page->shadow_flags & SHF_L2H_PAE ) 
         result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3)
             (v, gmfn, entry, size);
-    if ( page->shadow_flags & SHF_L3_PAE ) 
-        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 3, 3)
-            (v, gmfn, entry, size);
 #else /* 32-bit non-PAE hypervisor does not support PAE guests */
-    ASSERT((page->shadow_flags & (SHF_L3_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
+    ASSERT((page->shadow_flags & (SHF_L2H_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
 #endif
 
 #if CONFIG_PAGING_LEVELS >= 4 
@@ -343,8 +340,11 @@ shadow_validate_guest_pt_write(struct vc
     if ( rc & SHADOW_SET_ERROR ) 
     {
         /* This page is probably not a pagetable any more: tear it out of the 
-         * shadows, along with any tables that reference it */
-        shadow_remove_all_shadows_and_parents(v, gmfn);
+         * shadows, along with any tables that reference it.  
+         * Since the validate call above will have made a "safe" (i.e. zero) 
+         * shadow entry, we can let the domain live even if we can't fully 
+         * unshadow the page. */
+        sh_remove_shadows(v, gmfn, 0, 0);
     }
 }
 
@@ -424,22 +424,16 @@ shadow_validate_guest_pt_write(struct vc
 /* Allocating shadow pages
  * -----------------------
  *
- * Most shadow pages are allocated singly, but there are two cases where we 
- * need to allocate multiple pages together.
- * 
- * 1: Shadowing 32-bit guest tables on PAE or 64-bit shadows.
- *    A 32-bit guest l1 table covers 4MB of virtuial address space,
- *    and needs to be shadowed by two PAE/64-bit l1 tables (covering 2MB
- *    of virtual address space each).  Similarly, a 32-bit guest l2 table 
- *    (4GB va) needs to be shadowed by four PAE/64-bit l2 tables (1GB va 
- *    each).  These multi-page shadows are contiguous and aligned; 
- *    functions for handling offsets into them are defined in shadow.c 
- *    (shadow_l1_index() etc.)
+ * Most shadow pages are allocated singly, but there is one case where
+ * we need to allocate multiple pages together: shadowing 32-bit guest
+ * tables on PAE or 64-bit shadows.  A 32-bit guest l1 table covers 4MB
+ * of virtuial address space, and needs to be shadowed by two PAE/64-bit
+ * l1 tables (covering 2MB of virtual address space each).  Similarly, a
+ * 32-bit guest l2 table (4GB va) needs to be shadowed by four
+ * PAE/64-bit l2 tables (1GB va each).  These multi-page shadows are
+ * contiguous and aligned; functions for handling offsets into them are
+ * defined in shadow.c (shadow_l1_index() etc.)
  *    
- * 2: Shadowing PAE top-level pages.  Each guest page that contains
- *    any PAE top-level pages requires two shadow pages to shadow it.
- *    They contain alternating l3 tables and pae_l3_bookkeeping structs.
- *
  * This table shows the allocation behaviour of the different modes:
  *
  * Xen paging      32b  pae  pae  64b  64b  64b
@@ -449,7 +443,7 @@ shadow_validate_guest_pt_write(struct vc
  *
  * sl1 size         4k   8k   4k   8k   4k   4k
  * sl2 size         4k  16k   4k  16k   4k   4k
- * sl3 size         -    -    8k   -    8k   4k
+ * sl3 size         -    -    -    -    -    4k
  * sl4 size         -    -    -    -    -    4k
  *
  * We allocate memory from xen in four-page units and break them down
@@ -503,7 +497,6 @@ shadow_order(u32 shadow_type)
         0, /* PGC_SH_fl1_pae_shadow */
         0, /* PGC_SH_l2_pae_shadow  */
         0, /* PGC_SH_l2h_pae_shadow */
-        1, /* PGC_SH_l3_pae_shadow  */
         0, /* PGC_SH_l1_64_shadow   */
         0, /* PGC_SH_fl1_64_shadow  */
         0, /* PGC_SH_l2_64_shadow   */
@@ -546,7 +539,8 @@ void shadow_unhook_mappings(struct vcpu 
 #endif
         break;
 #if CONFIG_PAGING_LEVELS >= 3
-    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
+    case PGC_SH_l2_pae_shadow >> PGC_SH_type_shift:
+    case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
         SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn);
         break;
 #endif
@@ -587,18 +581,8 @@ void shadow_prealloc(struct domain *d, u
         pg = list_entry(l, struct page_info, list);
         smfn = page_to_mfn(pg);
 
-#if CONFIG_PAGING_LEVELS >= 3
-        if ( (pg->count_info & PGC_SH_type_mask) == PGC_SH_l3_pae_shadow )
-        {
-            /* For PAE, we need to unpin each subshadow on this shadow */
-            SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn);
-        } 
-        else 
-#endif /* 32-bit code always takes this branch */
-        {
-            /* Unpin this top-level shadow */
-            sh_unpin(v, smfn);
-        }
+        /* Unpin this top-level shadow */
+        sh_unpin(v, smfn);
 
         /* See if that freed up a chunk of appropriate size */
         if ( chunk_is_available(d, order) ) return;
@@ -620,8 +604,12 @@ void shadow_prealloc(struct domain *d, u
         shadow_unhook_mappings(v, smfn);
 
         /* Need to flush TLB if we've altered our own tables */
-        if ( !shadow_mode_external(d) 
-             && pagetable_get_pfn(current->arch.shadow_table) == mfn_x(smfn) )
+        if ( !shadow_mode_external(d) &&
+             (pagetable_get_pfn(current->arch.shadow_table[0]) == mfn_x(smfn)
+              || pagetable_get_pfn(current->arch.shadow_table[1]) == 
mfn_x(smfn)
+              || pagetable_get_pfn(current->arch.shadow_table[2]) == 
mfn_x(smfn)
+              || pagetable_get_pfn(current->arch.shadow_table[3]) == 
mfn_x(smfn)
+                 ) )
             local_flush_tlb();
         
         /* See if that freed up a chunk of appropriate size */
@@ -732,6 +720,15 @@ void shadow_free(struct domain *d, mfn_t
 
     for ( i = 0; i < 1<<order; i++ ) 
     {
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+        struct vcpu *v;
+        for_each_vcpu(d, v) 
+        {
+            /* No longer safe to look for a writeable mapping in this shadow */
+            if ( v->arch.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) 
+                v->arch.shadow.last_writeable_pte_smfn = 0;
+        }
+#endif
         /* Strip out the type: this is now a free shadow page */
         pg[i].count_info = 0;
         /* Remember the TLB timestamp so we will know whether to flush 
@@ -920,9 +917,20 @@ p2m_next_level(struct domain *d, mfn_t *
 #if CONFIG_PAGING_LEVELS == 3
         if (type == PGT_l2_page_table)
         {
+            struct vcpu *v;
             /* We have written to the p2m l3: need to sync the per-vcpu
              * copies of it in the monitor tables */
             p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry);
+            /* Also, any vcpus running on shadows of the p2m need to 
+             * reload their CR3s so the change propagates to the shadow */
+            ASSERT(shadow_lock_is_acquired(d));
+            for_each_vcpu(d, v) 
+            {
+                if ( pagetable_get_pfn(v->arch.guest_table) 
+                     == pagetable_get_pfn(d->arch.phys_table) 
+                     && v->arch.shadow.mode != NULL )
+                    v->arch.shadow.mode->update_cr3(v);
+            }
         }
 #endif
         /* The P2M can be shadowed: keep the shadows synced */
@@ -1711,9 +1719,6 @@ void sh_destroy_shadow(struct vcpu *v, m
     case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
         SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn);
         break;
-    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
-        SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 3, 3)(v, smfn);
-        break;
 #endif
 
 #if CONFIG_PAGING_LEVELS >= 4
@@ -1768,7 +1773,6 @@ int shadow_remove_write_access(struct vc
 #endif
         NULL, /* l2_pae  */
         NULL, /* l2h_pae */
-        NULL, /* l3_pae  */
 #if CONFIG_PAGING_LEVELS >= 4
         SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64   */
         SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64  */
@@ -1825,12 +1829,11 @@ int shadow_remove_write_access(struct vc
         unsigned long gfn;
         /* Heuristic: there is likely to be only one writeable mapping,
          * and that mapping is likely to be in the current pagetable,
-         * either in the guest's linear map (linux, windows) or in a
-         * magic slot used to map high memory regions (linux HIGHTPTE) */
+         * in the guest's linear map (on non-HIGHPTE linux and windows)*/
 
 #define GUESS(_a, _h) do {                                              \
-            if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) )          \
-                perfc_incrc(shadow_writeable_h_ ## _h);                \
+            if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) )      \
+                perfc_incrc(shadow_writeable_h_ ## _h);                 \
             if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )        \
                 return 1;                                               \
         } while (0)
@@ -1880,9 +1883,35 @@ int shadow_remove_write_access(struct vc
 #endif /* CONFIG_PAGING_LEVELS >= 3 */
 
 #undef GUESS
-
-    }
-#endif
+    }
+
+    if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
+        return 1;
+
+    /* Second heuristic: on HIGHPTE linux, there are two particular PTEs
+     * (entries in the fixmap) where linux maps its pagetables.  Since
+     * we expect to hit them most of the time, we start the search for
+     * the writeable mapping by looking at the same MFN where the last
+     * brute-force search succeeded. */
+
+    if ( v->arch.shadow.last_writeable_pte_smfn != 0 )
+    {
+        unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask);
+        mfn_t last_smfn = _mfn(v->arch.shadow.last_writeable_pte_smfn);
+        int shtype = (mfn_to_page(last_smfn)->count_info & PGC_SH_type_mask) 
+            >> PGC_SH_type_shift;
+
+        if ( callbacks[shtype] ) 
+            callbacks[shtype](v, last_smfn, gmfn);
+
+        if ( (pg->u.inuse.type_info & PGT_count_mask) != old_count )
+            perfc_incrc(shadow_writeable_h_5);
+    }
+
+    if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
+        return 1;
+
+#endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */
     
     /* Brute-force search of all the shadows, by walking the hash */
     perfc_incrc(shadow_writeable_bf);
@@ -1932,7 +1961,6 @@ int shadow_remove_all_mappings(struct vc
 #endif
         NULL, /* l2_pae  */
         NULL, /* l2h_pae */
-        NULL, /* l3_pae  */
 #if CONFIG_PAGING_LEVELS >= 4
         SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64   */
         SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64  */
@@ -2005,7 +2033,8 @@ static int sh_remove_shadow_via_pointer(
     ASSERT((pg->count_info & PGC_SH_type_mask) > 0);
     ASSERT((pg->count_info & PGC_SH_type_mask) < PGC_SH_max_shadow);
     ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_32_shadow);
-    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l3_pae_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_pae_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2h_pae_shadow);
     ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l4_64_shadow);
     
     if (pg->up == 0) return 0;
@@ -2034,7 +2063,6 @@ static int sh_remove_shadow_via_pointer(
     case PGC_SH_l1_pae_shadow:
     case PGC_SH_l2_pae_shadow:
     case PGC_SH_l2h_pae_shadow:
-    case PGC_SH_l3_pae_shadow:
         SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn);
         break;
 #if CONFIG_PAGING_LEVELS >= 4
@@ -2058,17 +2086,20 @@ static int sh_remove_shadow_via_pointer(
     return rc;
 }
 
-void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all)
+void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
 /* Remove the shadows of this guest page.  
- * If all != 0, find all shadows, if necessary by walking the tables.
- * Otherwise, just try the (much faster) heuristics, which will remove 
- * at most one reference to each shadow of the page. */
+ * If fast != 0, just try the quick heuristic, which will remove 
+ * at most one reference to each shadow of the page.  Otherwise, walk
+ * all the shadow tables looking for refs to shadows of this gmfn.
+ * If all != 0, kill the domain if we can't find all the shadows.
+ * (all != 0 implies fast == 0)
+ */
 {
     struct page_info *pg;
     mfn_t smfn;
     u32 sh_flags;
     unsigned char t;
-
+    
     /* Dispatch table for getting per-type functions: each level must
      * be called with the function to remove a lower-level shadow. */
     static hash_callback_t callbacks[16] = {
@@ -2085,11 +2116,9 @@ void sh_remove_shadows(struct vcpu *v, m
 #if CONFIG_PAGING_LEVELS >= 3
         SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae  */
         SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */
-        SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,3,3), /* l3_pae  */
 #else 
         NULL, /* l2_pae  */
         NULL, /* l2h_pae */
-        NULL, /* l3_pae  */
 #endif
         NULL, /* l1_64   */
         NULL, /* fl1_64  */
@@ -2115,9 +2144,8 @@ void sh_remove_shadows(struct vcpu *v, m
         ((1 << (PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift))
          | (1 << (PGC_SH_l2_pae_shadow >> PGC_SH_type_shift))), /* l1_pae  */
         0, /* fl1_pae */
-        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2_pae  */
-        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2h_pae  */
-        0, /* l3_pae  */
+        0, /* l2_pae  */
+        0, /* l2h_pae  */
         1 << (PGC_SH_l2_64_shadow >> PGC_SH_type_shift), /* l1_64   */
         0, /* fl1_64  */
         1 << (PGC_SH_l3_64_shadow >> PGC_SH_type_shift), /* l2_64   */
@@ -2128,6 +2156,7 @@ void sh_remove_shadows(struct vcpu *v, m
     };
 
     ASSERT(shadow_lock_is_acquired(v->domain));
+    ASSERT(!(all && fast));
 
     pg = mfn_to_page(gmfn);
 
@@ -2147,29 +2176,26 @@ void sh_remove_shadows(struct vcpu *v, m
      * call will remove at most one shadow, and terminate immediately when
      * it does remove it, so we never walk the hash after doing a deletion.  */
 #define DO_UNSHADOW(_type) do {                                 \
-    t = (_type) >> PGC_SH_type_shift;                          \
-    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);              \
-    if ( !sh_remove_shadow_via_pointer(v, smfn) && all )       \
+    t = (_type) >> PGC_SH_type_shift;                           \
+    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);               \
+    if ( !sh_remove_shadow_via_pointer(v, smfn) && !fast )      \
         hash_foreach(v, masks[t], callbacks, smfn);             \
 } while (0)
 
     /* Top-level shadows need to be unpinned */
-#define DO_UNPIN(_type) do {                                             \
+#define DO_UNPIN(_type) do {                                            \
     t = (_type) >> PGC_SH_type_shift;                                   \
     smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);                       \
     if ( mfn_to_page(smfn)->count_info & PGC_SH_pinned )                \
         sh_unpin(v, smfn);                                              \
-    if ( (_type) == PGC_SH_l3_pae_shadow )                              \
-        SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn); \
 } while (0)
 
     if ( sh_flags & SHF_L1_32 )   DO_UNSHADOW(PGC_SH_l1_32_shadow);
     if ( sh_flags & SHF_L2_32 )   DO_UNPIN(PGC_SH_l2_32_shadow);
 #if CONFIG_PAGING_LEVELS >= 3
     if ( sh_flags & SHF_L1_PAE )  DO_UNSHADOW(PGC_SH_l1_pae_shadow);
-    if ( sh_flags & SHF_L2_PAE )  DO_UNSHADOW(PGC_SH_l2_pae_shadow);
-    if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(PGC_SH_l2h_pae_shadow);
-    if ( sh_flags & SHF_L3_PAE )  DO_UNPIN(PGC_SH_l3_pae_shadow);
+    if ( sh_flags & SHF_L2_PAE )  DO_UNPIN(PGC_SH_l2_pae_shadow);
+    if ( sh_flags & SHF_L2H_PAE ) DO_UNPIN(PGC_SH_l2h_pae_shadow);
 #if CONFIG_PAGING_LEVELS >= 4
     if ( sh_flags & SHF_L1_64 )   DO_UNSHADOW(PGC_SH_l1_64_shadow);
     if ( sh_flags & SHF_L2_64 )   DO_UNSHADOW(PGC_SH_l2_64_shadow);
@@ -2181,20 +2207,14 @@ void sh_remove_shadows(struct vcpu *v, m
 #undef DO_UNSHADOW
 #undef DO_UNPIN
 
-
-#if CONFIG_PAGING_LEVELS > 2
-    /* We may have caused some PAE l3 entries to change: need to 
-     * fix up the copies of them in various places */
-    if ( sh_flags & (SHF_L2_PAE|SHF_L2H_PAE) )
-        sh_pae_recopy(v->domain);
-#endif
-
     /* If that didn't catch the shadows, something is wrong */
-    if ( all && (pg->count_info & PGC_page_table) )
-    {
-        SHADOW_ERROR("can't find all shadows of mfn %05lx 
(shadow_flags=%08x)\n",
+    if ( !fast && (pg->count_info & PGC_page_table) )
+    {
+        SHADOW_ERROR("can't find all shadows of mfn %05lx "
+                     "(shadow_flags=%08x)\n",
                       mfn_x(gmfn), pg->shadow_flags);
-        domain_crash(v->domain);
+        if ( all ) 
+            domain_crash(v->domain);
     }
 }
 
@@ -3118,7 +3138,6 @@ void shadow_audit_tables(struct vcpu *v)
         SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */
         SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2_pae  */
         SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2h_pae */
-        SHADOW_INTERNAL_NAME(sh_audit_l3_table,3,3),  /* l3_pae  */
 #if CONFIG_PAGING_LEVELS >= 4
         SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4),  /* l1_64   */
         SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64  */
@@ -3143,7 +3162,7 @@ void shadow_audit_tables(struct vcpu *v)
         {
         case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break;
         case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE
-                        |SHF_L2H_PAE|SHF_L3_PAE); break;
+                        |SHF_L2H_PAE); break;
         case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64  
                         |SHF_L3_64|SHF_L4_64); break;
         default: BUG();
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Wed Oct 18 14:36:20 2006 +0100
@@ -20,20 +20,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-
-// DESIGN QUESTIONS:
-// Why use subshadows for PAE guests?
-// - reduces pressure in the hash table
-// - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3)
-// - would need to find space in the page_info to store 7 more bits of
-//   backpointer
-// - independent shadows of 32 byte chunks makes it non-obvious how to quickly
-//   figure out when to demote the guest page from l3 status
-//
-// PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space.
-// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
-//   space for both PV and HVM guests.
-//
 
 #include <xen/config.h>
 #include <xen/types.h>
@@ -118,9 +104,6 @@ static char *fetch_type_names[] = {
 #endif
 
 /* XXX forward declarations */
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, 
int clear_res);
-#endif
 static inline void sh_update_linear_entries(struct vcpu *v);
 
 /**************************************************************************/
@@ -129,8 +112,6 @@ static inline void sh_update_linear_entr
  * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
  * FL1's:       maps the *gfn* of the start of a superpage to the mfn of a
  *              shadow L1 which maps its "splinters".
- * PAE CR3s:    maps the 32-byte aligned, 32-bit CR3 value to the mfn of the
- *              PAE L3 info page for that CR3 value.
  */
 
 static inline mfn_t 
@@ -215,7 +196,6 @@ delete_fl1_shadow_status(struct vcpu *v,
 {
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn));
-
     shadow_hash_delete(v, gfn_x(gfn),
                         PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn);
 }
@@ -429,18 +409,16 @@ static void sh_audit_gw(struct vcpu *v, 
     if ( !(SHADOW_AUDIT_ENABLE) )
         return;
 
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
     if ( valid_mfn(gw->l4mfn)
          && valid_mfn((smfn = get_shadow_status(v, gw->l4mfn, 
                                                 PGC_SH_l4_shadow))) )
         (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
-#endif /* PAE or 64... */
     if ( valid_mfn(gw->l3mfn)
          && valid_mfn((smfn = get_shadow_status(v, gw->l3mfn, 
                                                 PGC_SH_l3_shadow))) )
         (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
-#endif /* All levels... */
+#endif /* PAE or 64... */
     if ( valid_mfn(gw->l2mfn) )
     {
         if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
@@ -498,8 +476,7 @@ static u32 guest_set_ad_bits(struct vcpu
     flags = guest_l1e_get_flags(*ep);
 
     /* PAE l3s do not have A and D bits */
-    if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
-        return flags;
+    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
 
     /* Need the D bit as well for writes, in L1es and PSE L2es. */
     if ( ft == ft_demand_write  
@@ -646,37 +623,13 @@ shadow_l2_index(mfn_t *smfn, u32 guest_i
 #endif
 }
 
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
 
 static inline u32
 shadow_l3_index(mfn_t *smfn, u32 guest_index)
 {
-#if GUEST_PAGING_LEVELS == 3
-    u32 group_id;
-
-    // Because we use twice the space in L3 shadows as was consumed in guest
-    // L3s, the number of guest entries per shadow page is
-    // SHADOW_L2_PAGETABLE_ENTRIES/2.  (Note this is *not*
-    // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...)
-    //
-    *smfn = _mfn(mfn_x(*smfn) +
-                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
-
-    // We store PAE L3 shadows in groups of 4, alternating shadows and
-    // pae_l3_bookkeeping structs.  So the effective shadow index is
-    // the the group_id * 8 + the offset within the group.
-    //
-    guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2);
-    group_id = guest_index / 4;
-    return (group_id * 8) + (guest_index % 4);
-#else
     return guest_index;
-#endif
-}
-
-#endif // GUEST_PAGING_LEVELS >= 3
-
-#if GUEST_PAGING_LEVELS >= 4
+}
 
 static inline u32
 shadow_l4_index(mfn_t *smfn, u32 guest_index)
@@ -722,6 +675,9 @@ do {                                    
     u32 pass_thru_flags;
     u32 sflags;
 
+    /* We don't shadow PAE l3s */
+    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
+
     // XXX -- might want to think about PAT support for HVM guests...
 
 #ifndef NDEBUG
@@ -757,29 +713,16 @@ do {                                    
     if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
         gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
     
-    // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
-    //
-    if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
-        pass_thru_flags = _PAGE_PRESENT;
-    else
-    {
-        pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
-                           _PAGE_RW | _PAGE_PRESENT);
-        if ( guest_supports_nx(v) )
-            pass_thru_flags |= _PAGE_NX_BIT;
-    }
-
-    // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their
-    // L3e's; they are all implied.  So we emulate them here.
-    //
-    if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) )
-        gflags = pass_thru_flags;
 
     // Propagate bits from the guest to the shadow.
     // Some of these may be overwritten, below.
     // Since we know the guest's PRESENT bit is set, we also set the shadow's
     // SHADOW_PRESENT bit.
     //
+    pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
+                       _PAGE_RW | _PAGE_PRESENT);
+    if ( guest_supports_nx(v) )
+        pass_thru_flags |= _PAGE_NX_BIT;
     sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT;
 
     // Copy the guest's RW bit into the SHADOW_RW bit.
@@ -800,8 +743,7 @@ do {                                    
     // If the A or D bit has not yet been set in the guest, then we must
     // prevent the corresponding kind of access.
     //
-    if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) &&
-                  !(gflags & _PAGE_ACCESSED)) )
+    if ( unlikely(!(gflags & _PAGE_ACCESSED)) )
         sflags &= ~_PAGE_PRESENT;
 
     /* D bits exist in L1es and PSE L2es */
@@ -890,9 +832,7 @@ l4e_propagate_from_guest(struct vcpu *v,
                   fetch_type_names[ft], gl4e->l4, sl4p->l4);
     ASSERT(sflags != -1);
 }
-#endif // GUEST_PAGING_LEVELS >= 4
-
-#if GUEST_PAGING_LEVELS >= 3
+
 static void
 l3e_propagate_from_guest(struct vcpu *v,
                          guest_l3e_t *gl3e,
@@ -912,7 +852,7 @@ l3e_propagate_from_guest(struct vcpu *v,
                   fetch_type_names[ft], gl3e->l3, sl3p->l3);
     ASSERT(sflags != -1);
 }
-#endif // GUEST_PAGING_LEVELS >= 3
+#endif // GUEST_PAGING_LEVELS >= 4
 
 static void
 l2e_propagate_from_guest(struct vcpu *v, 
@@ -1081,9 +1021,6 @@ shadow_write_entries(void *d, void *s, i
         safe_write_entry(dst++, src++);
 
     if ( map != NULL ) sh_unmap_domain_page(map);
-
-    /* XXX TODO:
-     * Update min/max field in page_info struct of this mfn */
 }
 
 static inline int
@@ -1195,9 +1132,7 @@ static int shadow_set_l4e(struct vcpu *v
     }
     return flags;
 }
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-
-#if GUEST_PAGING_LEVELS >= 3
+
 static int shadow_set_l3e(struct vcpu *v, 
                           shadow_l3e_t *sl3e, 
                           shadow_l3e_t new_sl3e, 
@@ -1224,28 +1159,6 @@ static int shadow_set_l3e(struct vcpu *v
     shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
     flags |= SHADOW_SET_CHANGED;
 
-#if GUEST_PAGING_LEVELS == 3 
-    /* We wrote a guest l3e in a PAE pagetable.  This table is copied in
-     * the linear pagetable entries of its l2s, and may also be copied
-     * to a low memory location to make it fit in CR3.  Report that we
-     * need to resync those copies (we can't wait for the guest to flush
-     * the TLB because it might be an increase in rights). */
-    {
-        struct vcpu *vcpu;
-
-        struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e);
-        for_each_vcpu(v->domain, vcpu)
-        {
-            if (info->vcpus & (1 << vcpu->vcpu_id))
-            {
-                // Remember that this flip/update needs to occur.
-                vcpu->arch.shadow.pae_flip_pending = 1;
-                flags |= SHADOW_SET_L3PAE_RECOPY;
-            }
-        }
-    }
-#endif
-
     if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT ) 
     {
         /* We lost a reference to an old mfn. */
@@ -1260,7 +1173,7 @@ static int shadow_set_l3e(struct vcpu *v
     }
     return flags;
 }
-#endif /* GUEST_PAGING_LEVELS >= 3 */ 
+#endif /* GUEST_PAGING_LEVELS >= 4 */ 
 
 static int shadow_set_l2e(struct vcpu *v, 
                           shadow_l2e_t *sl2e, 
@@ -1535,51 +1448,7 @@ do {                                    
 
 #endif /* different kinds of l2 */
 
-#if GUEST_PAGING_LEVELS == 3
-
-/* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */
-#define SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code)             \
-do {                                                                    \
-    int _i;                                                             \
-    for ( _i = 0; _i < 4; _i++ )                                        \
-    {                                                                   \
-        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
-            {_code}                                                     \
-        if ( _done ) break;                                             \
-        _sl3e++;                                                        \
-        increment_ptr_to_guest_entry(_gl3p);                            \
-    }                                                                   \
-} while (0)
-
-/* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */
-#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
-do {                                                                    \
-    int _i, _j, _k, __done = 0;                                         \
-    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask)       \
-           == PGC_SH_l3_pae_shadow);                                   \
-    /* The subshadows are split, 64 on each page of the shadow */       \
-    for ( _j = 0; _j < 2 && !__done; _j++ )                             \
-    {                                                                   \
-        void *_sp = sh_map_domain_page(_sl3mfn);                       \
-        for ( _i = 0; _i < 64; _i++ )                                   \
-        {                                                               \
-            /* Every second 32-byte region is a bookkeeping entry */    \
-            _sl3e = (shadow_l3e_t *)(_sp + (64 * _i));                  \
-            if ( (sl3p_to_info(_sl3e))->refcount > 0 )                  \
-                SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p,                   \
-                                        ({ __done = (_done); __done; }), \
-                                        _code);                         \
-            else                                                        \
-                for ( _k = 0 ; _k < 4 ; _k++ )                          \
-                    increment_ptr_to_guest_entry(_gl3p);                \
-            if ( __done ) break;                                        \
-        }                                                               \
-        sh_unmap_domain_page(_sp);                                     \
-        _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1);                             \
-    }                                                                   \
-} while (0)
-
-#elif GUEST_PAGING_LEVELS == 4
+#if GUEST_PAGING_LEVELS == 4
 
 /* 64-bit l3: touch all entries */
 #define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
@@ -1711,8 +1580,8 @@ void sh_install_xen_entries_in_l2h(struc
     
     /* We don't set up a linear mapping here because we can't until this
      * l2h is installed in an l3e.  sh_update_linear_entries() handles
-     * the linear mappings when the l3 is loaded.  We zero them here, just as
-     * a safety measure.
+     * the linear mappings when CR3 (and so the fourth l3e) is loaded.  
+     * We zero them here, just as a safety measure.
      */
     for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
         sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] =
@@ -1739,37 +1608,6 @@ void sh_install_xen_entries_in_l2h(struc
     }
     
     sh_unmap_domain_page(sl2e);
-}
-
-void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn)
-{
-    shadow_l3e_t *sl3e;
-    guest_l3e_t *gl3e = v->arch.guest_vtable;
-    shadow_l3e_t new_sl3e;
-    gfn_t l2gfn;
-    mfn_t l2gmfn, l2smfn;
-    int r;
-
-    ASSERT(!shadow_mode_external(v->domain));
-    ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT);
-    l2gfn = guest_l3e_get_gfn(gl3e[3]);
-    l2gmfn = sh_gfn_to_mfn(v->domain, gfn_x(l2gfn));
-    l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow);
-    if ( !valid_mfn(l2smfn) )
-    {
-        /* must remove write access to this page before shadowing it */
-        // XXX -- should check to see whether this is better with level==0 or
-        // level==2...
-        if ( shadow_remove_write_access(v, l2gmfn, 2, 0xc0000000ul) != 0 )
-            flush_tlb_mask(v->domain->domain_dirty_cpumask);
- 
-        l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow);
-    }
-    l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
-                             ft_prefetch);
-    sl3e = sh_map_domain_page(sl3mfn);
-    r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn);
-    sh_unmap_domain_page(sl3e);
 }
 #endif
 
@@ -1827,8 +1665,6 @@ void sh_install_xen_entries_in_l2(struct
 
 
 
-
-
 /**************************************************************************/
 /* Create a shadow of a given guest page.
  */
@@ -1839,7 +1675,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
     SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
                   mfn_x(gmfn), shadow_type, mfn_x(smfn));
 
-    if ( shadow_type != PGC_SH_guest_root_type )
+    if ( shadow_type != PGC_SH_l2_32_shadow 
+         && shadow_type != PGC_SH_l2_pae_shadow 
+         && shadow_type != PGC_SH_l2h_pae_shadow 
+         && shadow_type != PGC_SH_l4_64_shadow )
         /* Lower-level shadow, not yet linked form a higher level */
         mfn_to_page(smfn)->up = 0;
 
@@ -1853,8 +1692,6 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
             sh_install_xen_entries_in_l4(v, gmfn, smfn); break;
 #endif
 #if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
-        case PGC_SH_l3_shadow:
-            sh_install_xen_entries_in_l3(v, gmfn, smfn); break;
         case PGC_SH_l2h_shadow:
             sh_install_xen_entries_in_l2h(v, smfn); break;
 #endif
@@ -1988,20 +1825,16 @@ static shadow_l4e_t * shadow_get_and_cre
                                                 mfn_t *sl4mfn)
 {
     /* There is always a shadow of the top level table.  Get it. */
-    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table);
+    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
     /* Reading the top level table is always valid. */
     return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
 }
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-
-
-#if GUEST_PAGING_LEVELS >= 3
+
 static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, 
                                                 walk_t *gw, 
                                                 mfn_t *sl3mfn,
                                                 fetch_type_t ft)
 {
-#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
     mfn_t sl4mfn;
     shadow_l4e_t *sl4e;
     if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */
@@ -2032,19 +1865,8 @@ static shadow_l3e_t * shadow_get_and_cre
     }
     /* Now follow it down a level.  Guaranteed to succeed. */
     return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
-#else /* PAE... */
-    /* There is always a shadow of the top level table.  Get it. */
-    *sl3mfn = pagetable_get_mfn(v->arch.shadow_table);
-    /* This next line is important: the shadow l3 table is in an 8k
-     * shadow and we need to return the right mfn of the pair. This call
-     * will set it for us as a side-effect. */
-    (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e));
-    ASSERT(v->arch.shadow_vtable);
-    return ((shadow_l3e_t *)v->arch.shadow_vtable) 
-        + shadow_l3_table_offset(gw->va);
+}
 #endif /* GUEST_PAGING_LEVELS >= 4 */
-}
-#endif /* GUEST_PAGING_LEVELS >= 3 */
 
 
 static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, 
@@ -2052,7 +1874,7 @@ static shadow_l2e_t * shadow_get_and_cre
                                                 mfn_t *sl2mfn,
                                                 fetch_type_t ft)
 {
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
     mfn_t sl3mfn = _mfn(INVALID_MFN);
     shadow_l3e_t *sl3e;
     if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
@@ -2080,17 +1902,22 @@ static shadow_l2e_t * shadow_get_and_cre
                                  *sl2mfn, &new_sl3e, ft);
         r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
         ASSERT((r & SHADOW_SET_FLUSH) == 0);
-#if GUEST_PAGING_LEVELS == 3 
-        /* Need to sync up the linear maps, as we are about to use them */
-        ASSERT( r & SHADOW_SET_L3PAE_RECOPY );
-        sh_pae_recopy(v->domain);
-#endif
     }
     /* Now follow it down a level.  Guaranteed to succeed. */
+    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
+#elif GUEST_PAGING_LEVELS == 3 /* PAE... */
+    /* We never demand-shadow PAE l3es: they are only created in
+     * sh_update_cr3().  Check if the relevant sl3e is present. */
+    shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.shadow.l3table) 
+        + shadow_l3_linear_offset(gw->va);
+    if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) 
+        return NULL;
+    *sl2mfn = shadow_l3e_get_mfn(*sl3e);
+    ASSERT(valid_mfn(*sl2mfn));
     return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
 #else /* 32bit... */
     /* There is always a shadow of the top level table.  Get it. */
-    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table);
+    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
     /* This next line is important: the guest l2 has a 16k
      * shadow, we need to return the right mfn of the four. This
      * call will set it for us as a side-effect. */
@@ -2213,9 +2040,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
     /* Put the memory back in the pool */
     shadow_free(v->domain, smfn);
 }
-#endif    
-
-#if GUEST_PAGING_LEVELS >= 3
+
 void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
 {
     shadow_l3e_t *sl3e;
@@ -2230,10 +2055,6 @@ void sh_destroy_l3_shadow(struct vcpu *v
     gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
-#if GUEST_PAGING_LEVELS == 3
-    /* Take this shadow off the list of root shadows */
-    list_del_init(&mfn_to_page(smfn)->list);
-#endif
 
     /* Decrement refcounts of all the old entries */
     sl3mfn = smfn; 
@@ -2247,53 +2068,8 @@ void sh_destroy_l3_shadow(struct vcpu *v
     /* Put the memory back in the pool */
     shadow_free(v->domain, smfn);
 }
-#endif    
-
-
-#if GUEST_PAGING_LEVELS == 3
-static void sh_destroy_l3_subshadow(struct vcpu *v, 
-                                     shadow_l3e_t *sl3e)
-/* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */
-{
-    int i;
-    mfn_t sl3mfn = _mfn(maddr_from_mapped_domain_page(sl3e) >> PAGE_SHIFT);
-    ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0); 
-    for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ ) 
-        if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT ) 
-            shadow_set_l3e(v, &sl3e[i], shadow_l3e_empty(), sl3mfn);
-}
-#endif
-
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-void sh_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn)
-/* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */
-{
-    int i, j;
-    struct pae_l3_bookkeeping *bk;
-    
-    ASSERT((mfn_to_page(smfn)->count_info & PGC_SH_type_mask) 
-           == PGC_SH_l3_pae_shadow);
-    /* The subshadows are split, 64 on each page of the shadow */
-    for ( i = 0; i < 2; i++ ) 
-    {
-        void *p = sh_map_domain_page(_mfn(mfn_x(smfn) + i));
-        for ( j = 0; j < 64; j++ )
-        {
-            /* Every second 32-byte region is a bookkeeping entry */
-            bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32);
-            if ( bk->pinned )
-                sh_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn);
-            /* Check whether we've just freed the whole shadow */
-            if ( (mfn_to_page(smfn)->count_info & PGC_SH_count_mask) == 0 ) 
-            {
-                sh_unmap_domain_page(p);
-                return;
-            }
-        }
-        sh_unmap_domain_page(p);
-    }
-}
-#endif
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
 
 void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
 {
@@ -2311,7 +2087,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
     gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
-#if GUEST_PAGING_LEVELS == 2
+#if (GUEST_PAGING_LEVELS == 2) || (GUEST_PAGING_LEVELS == 3)
     /* Take this shadow off the list of root shadows */
     list_del_init(&mfn_to_page(smfn)->list);
 #endif
@@ -2421,31 +2197,14 @@ void sh_unhook_32b_mappings(struct vcpu 
 
 #elif GUEST_PAGING_LEVELS == 3
 
-void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl3mfn)
-/* Walk a full PAE l3 shadow, unhooking entries from all the subshadows */
-{
-    shadow_l3e_t *sl3e;
-    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
-        if ( (shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) {
-            mfn_t sl2mfn = shadow_l3e_get_mfn(*sl3e);
-            if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) 
-                 == PGC_SH_l2h_pae_shadow ) 
-            {
-                /* High l2: need to pick particular l2es to unhook */
-                shadow_l2e_t *sl2e;
-                SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, 1, {
-                    (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
-                });
-            }
-            else
-            {
-                /* Normal l2: can safely unhook the whole l3e */
-                (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
-            }
-        }
+void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn)
+/* Walk a PAE l2 shadow, unhooking entries from all the subshadows */
+{
+    shadow_l2e_t *sl2e;
+    int xen_mappings = !shadow_mode_external(v->domain);
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
+        (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
     });
-    /* We've changed PAE L3 entries: must sync up various copies of them */
-    sh_pae_recopy(v->domain);
 }
 
 #elif GUEST_PAGING_LEVELS == 4
@@ -2523,9 +2282,8 @@ static int validate_gl4e(struct vcpu *v,
     result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
     return result;
 }
-#endif // GUEST_PAGING_LEVELS >= 4
-
-#if GUEST_PAGING_LEVELS >= 3
+
+
 static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
 {
     shadow_l3e_t new_sl3e;
@@ -2535,16 +2293,6 @@ static int validate_gl3e(struct vcpu *v,
     int result = 0;
 
     perfc_incrc(shadow_validate_gl3e_calls);
-
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-    {
-        /* If we've updated a subshadow which is unreferenced then 
-           we don't care what value is being written - bail. */
-        struct pae_l3_bookkeeping *info = sl3p_to_info(se); 
-        if(!info->refcount)
-            return result; 
-    }
-#endif
 
     if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
     {
@@ -2559,16 +2307,9 @@ static int validate_gl3e(struct vcpu *v,
                              sl2mfn, &new_sl3e, ft_prefetch);
     result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
 
-#if GUEST_PAGING_LEVELS == 3
-    /* We have changed a PAE l3 entry: need to sync up the possible copies 
-     * of it */
-    if ( result & SHADOW_SET_L3PAE_RECOPY )
-        sh_pae_recopy(v->domain);
-#endif
-
     return result;
 }
-#endif // GUEST_PAGING_LEVELS >= 3
+#endif // GUEST_PAGING_LEVELS >= 4
 
 static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
 {
@@ -2755,12 +2496,12 @@ sh_map_and_validate_gl3e(struct vcpu *v,
 sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
                           void *new_gl3p, u32 size)
 {
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
     return sh_map_and_validate(v, gl3mfn, new_gl3p, size, 
                                 PGC_SH_l3_shadow, 
                                 shadow_l3_index, 
                                 validate_gl3e);
-#else // ! GUEST_PAGING_LEVELS >= 3
+#else // ! GUEST_PAGING_LEVELS >= 4
     SHADOW_PRINTK("called in wrong paging mode!\n");
     BUG();
     return 0;
@@ -2822,10 +2563,10 @@ static inline void check_for_early_unsha
     {
         u32 flags = mfn_to_page(gmfn)->shadow_flags;
         mfn_t smfn;
-        if ( !(flags & (SHF_L2_32|SHF_L3_PAE|SHF_L4_64)) )
+        if ( !(flags & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) )
         {
             perfc_incrc(shadow_early_unshadow);
-            sh_remove_shadows(v, gmfn, 0 /* Can fail to unshadow */ );
+            sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
             return;
         }
         /* SHF_unhooked_mappings is set to make sure we only unhook
@@ -2840,9 +2581,14 @@ static inline void check_for_early_unsha
                 smfn = get_shadow_status(v, gmfn, PGC_SH_l2_32_shadow);
                 shadow_unhook_mappings(v, smfn);
             }
-            if ( flags & SHF_L3_PAE ) 
+            if ( flags & SHF_L2_PAE ) 
             {
-                smfn = get_shadow_status(v, gmfn, PGC_SH_l3_pae_shadow);
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l2_pae_shadow);
+                shadow_unhook_mappings(v, smfn);
+            }
+            if ( flags & SHF_L2H_PAE ) 
+            {
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l2h_pae_shadow);
                 shadow_unhook_mappings(v, smfn);
             }
             if ( flags & SHF_L4_64 ) 
@@ -3134,7 +2880,6 @@ static int sh_page_fault(struct vcpu *v,
     shadow_audit_tables(v);
     reset_early_unshadow(v);
     shadow_unlock(d);
-    sh_log_mmio(v, gpa);
     handle_mmio(va, gpa);
     return EXCRET_fault_fixed;
 
@@ -3183,8 +2928,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
             return 0;
     }
 #elif SHADOW_PAGING_LEVELS == 3
-    if ( !(shadow_l3e_get_flags(
-          ((shadow_l3e_t *)v->arch.shadow_vtable)[shadow_l3_linear_offset(va)])
+    if ( !(l3e_get_flags(v->arch.shadow.l3table[shadow_l3_linear_offset(va)])
            & _PAGE_PRESENT) )
         // no need to flush anything if there's no SL2...
         return 0;
@@ -3247,34 +2991,6 @@ sh_gva_to_gpa(struct vcpu *v, unsigned l
     else
         return (gfn << PAGE_SHIFT) | (va & ~PAGE_MASK);
 }
-
-
-// XXX -- should this be in this file?
-//        Or should it be moved to shadow-common.c?
-//
-/* returns a lowmem machine address of the copied HVM L3 root table
- * If clear_res != 0, then clear the PAE-l3 reserved bits in the copy,
- * otherwise blank out any entries with reserved bits in them.  */
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-static unsigned long
-hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res)
-{
-    int i, f;
-    int res = (_PAGE_RW|_PAGE_NX_BIT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY);
-    l3_pgentry_t new_l3e, *copy = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-    memcpy(copy, l3tab, 4 * sizeof(l3_pgentry_t));
-    for ( i = 0; i < 4; i++ )
-    {
-        f = l3e_get_flags(l3tab[i]);
-        if ( (f & _PAGE_PRESENT) && (!(f & res) || clear_res) )
-            new_l3e = l3e_from_pfn(l3e_get_pfn(l3tab[i]), f & ~res);
-        else
-            new_l3e = l3e_empty();
-        safe_write_entry(&copy[i], &new_l3e);
-    }
-    return __pa(copy);
-}
-#endif
 
 
 static inline void
@@ -3330,7 +3046,7 @@ sh_update_linear_entries(struct vcpu *v)
         if ( v == current ) 
         {
             __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
                              __PAGE_HYPERVISOR);
         } 
         else
@@ -3338,7 +3054,7 @@ sh_update_linear_entries(struct vcpu *v)
             l4_pgentry_t *ml4e;
             ml4e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
             ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
                              __PAGE_HYPERVISOR);
             sh_unmap_domain_page(ml4e);
         }
@@ -3379,13 +3095,8 @@ sh_update_linear_entries(struct vcpu *v)
             sh_unmap_domain_page(ml4e);
         }
 
-#if GUEST_PAGING_LEVELS == 2
         /* Shadow l3 tables are made up by update_cr3 */
-        sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-#else
-        /* Always safe to use shadow_vtable, because it's globally mapped */
-        sl3e = v->arch.shadow_vtable;
-#endif
+        sl3e = v->arch.shadow.l3table;
 
         for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
         {
@@ -3424,14 +3135,14 @@ sh_update_linear_entries(struct vcpu *v)
 #if GUEST_PAGING_LEVELS == 2
         /* Shadow l3 tables were built by update_cr3 */
         if ( shadow_mode_external(d) )
-            shadow_l3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+            shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
         else
             BUG(); /* PV 2-on-3 is not supported yet */
         
 #else /* GUEST_PAGING_LEVELS == 3 */
         
-        /* Always safe to use *_vtable, because they're globally mapped */
-        shadow_l3e = v->arch.shadow_vtable;
+        shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
+        /* Always safe to use guest_vtable, because it's globally mapped */
         guest_l3e = v->arch.guest_vtable;
 
 #endif /* GUEST_PAGING_LEVELS */
@@ -3510,7 +3221,7 @@ sh_update_linear_entries(struct vcpu *v)
         if ( v == current ) 
         {
             __linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
                              __PAGE_HYPERVISOR);
         } 
         else
@@ -3518,7 +3229,7 @@ sh_update_linear_entries(struct vcpu *v)
             l2_pgentry_t *ml2e;
             ml2e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
             ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
                              __PAGE_HYPERVISOR);
             sh_unmap_domain_page(ml2e);
         }
@@ -3530,69 +3241,7 @@ sh_update_linear_entries(struct vcpu *v)
 }
 
 
-// XXX -- should this be in this file?
-//        Or should it be moved to shadow-common.c?
-//
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-void sh_pae_recopy(struct domain *d)
-/* Called whenever we write to the l3 entries of a PAE pagetable which 
- * is currently in use.  Each vcpu that is using the table needs to 
- * resync its copies of the l3s in linear maps and any low-memory
- * copies it might have made for fitting into 32bit CR3.
- * Since linear maps are also resynced when we change CR3, we don't
- * need to worry about changes to PAE l3es that are not currently in use.*/
-{
-    struct vcpu *v;
-    cpumask_t flush_mask = CPU_MASK_NONE;
-    ASSERT(shadow_lock_is_acquired(d));
-    
-    for_each_vcpu(d, v)
-    {
-        if ( !v->arch.shadow.pae_flip_pending ) 
-            continue;
-
-        cpu_set(v->processor, flush_mask);
-        
-        SHADOW_PRINTK("d=%u v=%u\n", v->domain->domain_id, v->vcpu_id);
-
-        /* This vcpu has a copy in its linear maps */
-        sh_update_linear_entries(v);
-        if ( hvm_guest(v) )
-        {
-            /* This vcpu has a copy in its HVM PAE l3 */
-            v->arch.hvm_vcpu.hw_cr3 = 
-                hvm_pae_copy_root(v, v->arch.shadow_vtable,
-                                  !shadow_vcpu_mode_translate(v));
-        }
-#if CONFIG_PAGING_LEVELS == 3
-        else 
-        {
-            /* This vcpu might have copied the l3 to below 4GB */
-            if ( v->arch.cr3 >> PAGE_SHIFT 
-                 != pagetable_get_pfn(v->arch.shadow_table) )
-            {
-                /* Recopy to where that copy is. */
-                int i;
-                l3_pgentry_t *dst, *src;
-                dst = __va(v->arch.cr3 & ~0x1f); /* Mask cache control bits */
-                src = v->arch.shadow_vtable;
-                for ( i = 0 ; i < 4 ; i++ ) 
-                    safe_write_entry(dst + i, src + i);
-            }
-        }
-#endif
-        v->arch.shadow.pae_flip_pending = 0;        
-    }
-
-    flush_tlb_mask(flush_mask);
-}
-#endif /* (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) */
-
-
-/* removes:
- *     vcpu->arch.guest_vtable
- *     vcpu->arch.shadow_table
- *     vcpu->arch.shadow_vtable
+/* Removes vcpu->arch.guest_vtable and vcpu->arch.shadow_table[].
  * Does all appropriate management/bookkeeping/refcounting/etc...
  */
 static void
@@ -3600,6 +3249,7 @@ sh_detach_old_tables(struct vcpu *v)
 {
     struct domain *d = v->domain;
     mfn_t smfn;
+    int i = 0;
 
     ////
     //// vcpu->arch.guest_vtable
@@ -3620,56 +3270,80 @@ sh_detach_old_tables(struct vcpu *v)
     }
 
     ////
-    //// vcpu->arch.shadow_table
+    //// vcpu->arch.shadow_table[]
     ////
-    smfn = pagetable_get_mfn(v->arch.shadow_table);
-    if ( mfn_x(smfn) )
-    {
-        ASSERT(v->arch.shadow_vtable);
+
 
 #if GUEST_PAGING_LEVELS == 3
-        // PAE guests do not (necessarily) use an entire page for their
-        // 4-entry L3s, so we have to deal with them specially.
-        //
-        sh_put_ref_l3_subshadow(v, v->arch.shadow_vtable, smfn);
-#else
-        sh_put_ref(v, smfn, 0);
-#endif
-
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-        {
-            struct pae_l3_bookkeeping *info =
-                sl3p_to_info(v->arch.shadow_vtable);
-            ASSERT(test_bit(v->vcpu_id, &info->vcpus));
-            clear_bit(v->vcpu_id, &info->vcpus);
-        }
-#endif
-        v->arch.shadow_table = pagetable_null();
-    }
-
-    ////
-    //// vcpu->arch.shadow_vtable
-    ////
-    if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
-         v->arch.shadow_vtable )
-    {
-        // Q: why does this need to use (un)map_domain_page_*global* ?
-        /* A: so sh_update_linear_entries can operate on other vcpus */
-        sh_unmap_domain_page_global(v->arch.shadow_vtable);
-        v->arch.shadow_vtable = NULL;
-    }
-}
+    /* PAE guests have four shadow_table entries */
+    for ( i = 0 ; i < 4 ; i++ )
+#endif
+    {
+        smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
+        if ( mfn_x(smfn) )
+            sh_put_ref(v, smfn, 0);
+        v->arch.shadow_table[i] = pagetable_null();
+    }
+}
+
+/* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
+static void
+sh_set_toplevel_shadow(struct vcpu *v, 
+                       int slot,
+                       mfn_t gmfn, 
+                       unsigned int root_type) 
+{
+    mfn_t smfn = get_shadow_status(v, gmfn, root_type);
+    struct domain *d = v->domain;
+    ASSERT(pagetable_is_null(v->arch.shadow_table[slot]));
+    if ( valid_mfn(smfn) )
+    {
+        /* Pull this root shadow to the front of the list of roots. */
+        list_del(&mfn_to_page(smfn)->list);
+        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
+    }
+    else
+    {
+        /* This guest MFN is a pagetable.  Must revoke write access. */
+        if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) != 0 )
+            flush_tlb_mask(v->domain->domain_dirty_cpumask); 
+        /* Make sure there's enough free shadow memory. */
+        shadow_prealloc(d, SHADOW_MAX_ORDER); 
+        /* Shadow the page. */
+        smfn = sh_make_shadow(v, gmfn, root_type);
+        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
+    }
+    ASSERT(valid_mfn(smfn));
+    
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    /* Once again OK to unhook entries from this table if we see fork/exit */
+    ASSERT(sh_mfn_is_a_page_table(gmfn));
+    mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
+#endif
+
+    /* Take a ref to this page: it will be released in sh_detach_old_tables. */
+    sh_get_ref(smfn, 0);
+    sh_pin(smfn);
+
+    /* Done.  Install it */
+    SHADOW_PRINTK("%u/%u [%u] gmfn %#"SH_PRI_mfn" smfn %#"SH_PRI_mfn"\n",
+                  GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot,
+                  mfn_x(gmfn), mfn_x(smfn));
+    v->arch.shadow_table[slot] = pagetable_from_mfn(smfn);
+}
+
 
 static void
 sh_update_cr3(struct vcpu *v)
-/* Updates vcpu->arch.shadow_table after the guest has changed CR3.
+/* Updates vcpu->arch.cr3 after the guest has changed CR3.
  * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
  * if appropriate).
- * HVM guests should also set hvm_get_guest_cntl_reg(v, 3)...
+ * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works,
+ * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards.
  */
 {
     struct domain *d = v->domain;
-    mfn_t gmfn, smfn;
+    mfn_t gmfn;
 #if GUEST_PAGING_LEVELS == 3
     u32 guest_idx=0;
 #endif
@@ -3770,159 +3444,102 @@ sh_update_cr3(struct vcpu *v)
 #endif
 
     ////
-    //// vcpu->arch.shadow_table
+    //// vcpu->arch.shadow_table[]
     ////
-    smfn = get_shadow_status(v, gmfn, PGC_SH_guest_root_type);
-    if ( valid_mfn(smfn) )
-    {
-        /* Pull this root shadow to the front of the list of roots. */
-        list_del(&mfn_to_page(smfn)->list);
-        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
-    }
-    else
-    {
-        /* This guest MFN is a pagetable.  Must revoke write access. */
-        if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) 
-             != 0 )
-            flush_tlb_mask(d->domain_dirty_cpumask); 
-        /* Make sure there's enough free shadow memory. */
-        shadow_prealloc(d, SHADOW_MAX_ORDER); 
-        /* Shadow the page. */
-        smfn = sh_make_shadow(v, gmfn, PGC_SH_guest_root_type);
-        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
-    }
-    ASSERT(valid_mfn(smfn));
-    v->arch.shadow_table = pagetable_from_mfn(smfn);
-
-#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
-    /* Once again OK to unhook entries from this table if we see fork/exit */
-    ASSERT(sh_mfn_is_a_page_table(gmfn));
-    mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
-#endif
-
-
-    ////
-    //// vcpu->arch.shadow_vtable
-    ////
-    if ( shadow_mode_external(d) )
-    {
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-        mfn_t adjusted_smfn = smfn;
-        u32 shadow_idx = shadow_l3_index(&adjusted_smfn, guest_idx);
-        // Q: why does this need to use (un)map_domain_page_*global* ?
-        v->arch.shadow_vtable =
-            (shadow_l3e_t *)sh_map_domain_page_global(adjusted_smfn) +
-            shadow_idx;
+
+#if GUEST_PAGING_LEVELS == 2
+    sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l2_shadow);
+#elif GUEST_PAGING_LEVELS == 3
+    /* PAE guests have four shadow_table entries, based on the 
+     * current values of the guest's four l3es. */
+    {
+        int i;
+        guest_l3e_t *gl3e = (guest_l3e_t*)v->arch.guest_vtable;
+        for ( i = 0; i < 4; i++ ) 
+        {
+            ASSERT(pagetable_is_null(v->arch.shadow_table[i]));
+            if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
+            {
+                gfn_t gl2gfn = guest_l3e_get_gfn(gl3e[i]);
+                mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
+                if ( valid_mfn(gl2mfn) )                
+                    sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
+                                           ? PGC_SH_l2h_shadow 
+                                           : PGC_SH_l2_shadow);
+            }
+        }
+    }
+#elif GUEST_PAGING_LEVELS == 4
+    sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l4_shadow);
 #else
-        // Q: why does this need to use (un)map_domain_page_*global* ?
-        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
-#endif
-    }
-    else
-    {
-#if SHADOW_PAGING_LEVELS == 4
-        v->arch.shadow_vtable = __sh_linear_l4_table;
-#elif GUEST_PAGING_LEVELS == 3
-        // XXX - why does this need a global map?
-        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
+#error This should never happen 
+#endif
+
+#if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+#endif
+
+    /// 
+    /// v->arch.shadow.l3table
+    ///
+#if SHADOW_PAGING_LEVELS == 3
+        {
+            mfn_t smfn;
+            int i;
+            for ( i = 0; i < 4; i++ )
+            {
+#if GUEST_PAGING_LEVELS == 2
+                /* 2-on-3: make a PAE l3 that points at the four-page l2 */
+                smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[0]) + i);
 #else
-        v->arch.shadow_vtable = __sh_linear_l2_table;
-#endif
-    }
-
-#if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-    // Now that shadow_vtable is in place, check that the sl3e[3] is properly
-    // shadowed and installed in PAE PV guests...
-    if ( !shadow_mode_external(d) &&
-         !(shadow_l3e_get_flags(((shadow_l3e_t *)v->arch.shadow_vtable)[3]) &
-           _PAGE_PRESENT) )
-    {
-        sh_install_xen_entries_in_l3(v, gmfn, smfn);
-    }
-#endif
-
-    ////
-    //// Take a ref to the new shadow table, and pin it.
-    ////
-    //
-    // This ref is logically "held" by v->arch.shadow_table entry itself.
-    // Release the old ref.
-    //
-#if GUEST_PAGING_LEVELS == 3
-    // PAE guests do not (necessarily) use an entire page for their
-    // 4-entry L3s, so we have to deal with them specially.
-    //
-    // XXX - might want to revisit this if/when we do multiple compilation for
-    //       HVM-vs-PV guests, as PAE PV guests could get away without doing
-    //       subshadows.
-    //
-    sh_get_ref_l3_subshadow(v->arch.shadow_vtable, smfn);
-    sh_pin_l3_subshadow(v->arch.shadow_vtable, smfn);
-#else
-    sh_get_ref(smfn, 0);
-    sh_pin(smfn);
-#endif
-
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-    // PAE 3-on-3 shadows have to keep track of which vcpu's are using
-    // which l3 subshadow, in order handle the SHADOW_SET_L3PAE_RECOPY
-    // case from validate_gl3e().  Search for SHADOW_SET_L3PAE_RECOPY
-    // in the code for more info.
-    //
-    {
-        struct pae_l3_bookkeeping *info =
-            sl3p_to_info(v->arch.shadow_vtable);
-        ASSERT(!test_bit(v->vcpu_id, &info->vcpus));
-        set_bit(v->vcpu_id, &info->vcpus);
-    }
-#endif
-
-    debugtrace_printk("%s cr3 gmfn=%05lx smfn=%05lx\n",
-                      __func__, gmfn, smfn);
+                /* 3-on-3: make a PAE l3 that points at the four l2 pages */
+                smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
+#endif
+                v->arch.shadow.l3table[i] = 
+                    (mfn_x(smfn) == 0) 
+                    ? shadow_l3e_empty()
+                    : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT);
+            }
+        }
+#endif /* SHADOW_PAGING_LEVELS == 3 */
+
 
     ///
-    /// v->arch.cr3 and, if appropriate, v->arch.hvm_vcpu.hw_cr3
+    /// v->arch.cr3
     ///
     if ( shadow_mode_external(d) )
     {
-        ASSERT(hvm_guest(v));
         make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
-
-#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
-#if SHADOW_PAGING_LEVELS != 3
-#error unexpected combination of GUEST and SHADOW paging levels
-#endif
-        /* 2-on-3: make a PAE l3 table that points at the four-page l2 */
-        {
-            mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table);
-            int i;
-
-            ASSERT(v->arch.hvm_vcpu.hw_cr3 ==
-                   virt_to_maddr(v->arch.hvm_vcpu.hvm_lowmem_l3tab));
-            for (i = 0; i < 4; i++)
-            {
-                v->arch.hvm_vcpu.hvm_lowmem_l3tab[i] =
-                    shadow_l3e_from_mfn(_mfn(mfn_x(smfn)+i), _PAGE_PRESENT);
-            }
-        }
-#elif (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-        /* 3-on-3: copy the shadow l3 to slots that are below 4GB.
-         * If paging is disabled, clear l3e reserved bits; otherwise 
-         * remove entries that have reserved bits set. */
-        v->arch.hvm_vcpu.hw_cr3 =
-            hvm_pae_copy_root(v, v->arch.shadow_vtable, 
-                              !shadow_vcpu_mode_translate(v));
-#else
-        /* 2-on-2 or 4-on-4: just put the shadow top-level into cr3 */
-        v->arch.hvm_vcpu.hw_cr3 =
-            pagetable_get_paddr(v->arch.shadow_table);
-#endif
     }
     else // not shadow_mode_external...
     {
         /* We don't support PV except guest == shadow == config levels */
         BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
-        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table));
+#if SHADOW_PAGING_LEVELS == 3
+        /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated.
+         * Don't use make_cr3 because (a) we know it's below 4GB, and
+         * (b) it's not necessarily page-aligned, and make_cr3 takes a pfn */
+        ASSERT(virt_to_maddr(&v->arch.shadow.l3table) <= 0xffffffe0ULL);
+        v->arch.cr3 = virt_to_maddr(&v->arch.shadow.l3table);
+#else
+        /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */
+        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table[0]));
+#endif
+    }
+
+
+    ///
+    /// v->arch.hvm_vcpu.hw_cr3
+    ///
+    if ( shadow_mode_external(d) )
+    {
+        ASSERT(hvm_guest(v));
+#if SHADOW_PAGING_LEVELS == 3
+        /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */
+        v->arch.hvm_vcpu.hw_cr3 = virt_to_maddr(&v->arch.shadow.l3table);
+#else
+        /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */
+        v->arch.hvm_vcpu.hw_cr3 = pagetable_get_paddr(v->arch.shadow_table[0]);
+#endif
     }
 
     /* Fix up the linear pagetable mappings */
@@ -3950,7 +3567,6 @@ static int sh_guess_wrmap(struct vcpu *v
 
 
     /* Carefully look in the shadow linear map for the l1e we expect */
-    if ( v->arch.shadow_vtable == NULL ) return 0;
 #if GUEST_PAGING_LEVELS >= 4
     sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
     if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
@@ -3959,7 +3575,7 @@ static int sh_guess_wrmap(struct vcpu *v
     if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
         return 0;
 #elif GUEST_PAGING_LEVELS == 3
-    sl3p = ((shadow_l3e_t *) v->arch.shadow_vtable) 
+    sl3p = ((shadow_l3e_t *) v->arch.shadow.l3table) 
         + shadow_l3_linear_offset(vaddr);
     if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
         return 0;
@@ -3988,6 +3604,7 @@ int sh_remove_write_access(struct vcpu *
     shadow_l1e_t *sl1e;
     int done = 0;
     int flags;
+    mfn_t base_sl1mfn = sl1mfn; /* Because sl1mfn changes in the foreach */
     
     SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
     {
@@ -3997,6 +3614,10 @@ int sh_remove_write_access(struct vcpu *
              && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) )
         {
             shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC 
+            /* Remember the last shadow that we shot a writeable mapping in */
+            v->arch.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn);
+#endif
             if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info
                   & PGT_count_mask) == 0 )
                 /* This breaks us cleanly out of the FOREACH macro */
@@ -4044,13 +3665,11 @@ void sh_clear_shadow_entry(struct vcpu *
     case PGC_SH_l2h_shadow:
 #endif
         shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break;
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
     case PGC_SH_l3_shadow:
         shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break;
-#if GUEST_PAGING_LEVELS >= 4
     case PGC_SH_l4_shadow:
         shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break;
-#endif
 #endif
     default: BUG(); /* Called with the wrong kind of shadow. */
     }
@@ -4081,7 +3700,7 @@ int sh_remove_l1_shadow(struct vcpu *v, 
     return done;
 }
 
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
 int sh_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn)
 /* Remove all mappings of this l2 shadow from this l3 shadow */
 {
@@ -4104,7 +3723,6 @@ int sh_remove_l2_shadow(struct vcpu *v, 
     return done;
 }
 
-#if GUEST_PAGING_LEVELS >= 4
 int sh_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn)
 /* Remove all mappings of this l3 shadow from this l4 shadow */
 {
@@ -4127,7 +3745,6 @@ int sh_remove_l3_shadow(struct vcpu *v, 
     return done;
 }
 #endif /* 64bit guest */ 
-#endif /* PAE guest */
 
 /**************************************************************************/
 /* Handling HVM guest writes to pagetables  */
@@ -4448,7 +4065,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
     return 0;
 }
 
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
 int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
 {
     guest_l3e_t *gl3e, *gp;
@@ -4486,9 +4103,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
     sh_unmap_domain_page(gp);
     return 0;
 }
-#endif /* GUEST_PAGING_LEVELS >= 3 */
-
-#if GUEST_PAGING_LEVELS >= 4
+
 int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x)
 {
     guest_l4e_t *gl4e, *gp;
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/mm/shadow/multi.h
--- a/xen/arch/x86/mm/shadow/multi.h    Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/mm/shadow/multi.h    Wed Oct 18 14:36:20 2006 +0100
@@ -49,10 +49,6 @@ extern void
 extern void 
 SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
     struct vcpu *v, mfn_t smfn);
-
-extern void
-SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows, 3, 3)
-    (struct vcpu *v, mfn_t smfn);
 
 extern void 
 SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/mm/shadow/private.h  Wed Oct 18 14:36:20 2006 +0100
@@ -178,77 +178,6 @@ extern void shadow_audit_p2m(struct doma
 
 
 /******************************************************************************
- * Mechanism for double-checking the optimized pagefault path: this
- * structure contains a record of actions taken by the fault handling
- * code.  In paranoid mode, the fast-path code fills out one of these
- * structures (but doesn't take any actual action) and then the normal 
- * path fills in another.  When the fault handler finishes, the 
- * two are compared */
-
-#ifdef SHADOW_OPTIMIZATION_PARANOIA
-
-typedef struct shadow_action_log sh_log_t;
-struct shadow_action_log {
-    paddr_t ad[CONFIG_PAGING_LEVELS];  /* A & D bits propagated here */
-    paddr_t mmio;                      /* Address of an mmio operation */
-    int rv;                            /* Result of the fault handler */
-};
-
-/* There are two logs, one for the fast path, one for the normal path */
-enum sh_log_type { log_slow = 0, log_fast= 1 };
-
-/* Alloc and zero the logs */
-static inline void sh_init_log(struct vcpu *v) 
-{
-    if ( unlikely(!v->arch.shadow.action_log) ) 
-        v->arch.shadow.action_log = xmalloc_array(sh_log_t, 2);
-    ASSERT(v->arch.shadow.action_log);
-    memset(v->arch.shadow.action_log, 0, 2 * sizeof (sh_log_t));
-}
-
-/* Log an A&D-bit update */
-static inline void sh_log_ad(struct vcpu *v, paddr_t e, unsigned int level)
-{
-    v->arch.shadow.action_log[v->arch.shadow.action_index].ad[level] = e;
-}
-
-/* Log an MMIO address */
-static inline void sh_log_mmio(struct vcpu *v, paddr_t m)
-{
-    v->arch.shadow.action_log[v->arch.shadow.action_index].mmio = m;
-}
-
-/* Log the result */
-static inline void sh_log_rv(struct vcpu *v, int rv)
-{
-    v->arch.shadow.action_log[v->arch.shadow.action_index].rv = rv;
-}
-
-/* Set which mode we're in */
-static inline void sh_set_log_mode(struct vcpu *v, enum sh_log_type t) 
-{
-    v->arch.shadow.action_index = t;
-}
-
-/* Know not to take action, because we're only checking the mechanism */
-static inline int sh_take_no_action(struct vcpu *v) 
-{
-    return (v->arch.shadow.action_index == log_fast);
-}
-
-#else /* Non-paranoid mode: these logs do not exist */
-
-#define sh_init_log(_v) do { (void)(_v); } while(0)
-#define sh_set_log_mode(_v,_t) do { (void)(_v); } while(0)
-#define sh_log_ad(_v,_e,_l) do { (void)(_v),(void)(_e),(void)(_l); } while (0)
-#define sh_log_mmio(_v,_m) do { (void)(_v),(void)(_m); } while (0)
-#define sh_log_rv(_v,_r) do { (void)(_v),(void)(_r); } while (0)
-#define sh_take_no_action(_v) (((void)(_v)), 0)
-
-#endif /* SHADOW_OPTIMIZATION_PARANOIA */
-
-
-/******************************************************************************
  * Macro for dealing with the naming of the internal names of the
  * shadow code's external entry points.
  */
@@ -336,13 +265,9 @@ void shadow_convert_to_log_dirty(struct 
  * non-Xen mappings in this top-level shadow mfn */
 void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn);
 
-/* Re-sync copies of PAE shadow L3 tables if they have been changed */
-void sh_pae_recopy(struct domain *d);
-
 /* Install the xen mappings in various flavours of shadow */
 void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn);
 void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
-void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn);
 void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
 
 
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h    Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/mm/shadow/types.h    Wed Oct 18 14:36:20 2006 +0100
@@ -215,8 +215,7 @@ static inline shadow_l4e_t shadow_l4e_fr
      shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)); \
 })
 
-// shadow linear L3 and L4 tables only exist in 4 level paging...
-#if SHADOW_PAGING_LEVELS == 4
+#if SHADOW_PAGING_LEVELS >= 4
 #define sh_linear_l3_table(v) ({ \
     ASSERT(current == (v)); \
     ((shadow_l3e_t *) \
@@ -386,7 +385,6 @@ static inline guest_l4e_t guest_l4e_from
 #define PGC_SH_fl1_shadow PGC_SH_fl1_pae_shadow
 #define PGC_SH_l2_shadow  PGC_SH_l2_pae_shadow
 #define PGC_SH_l2h_shadow PGC_SH_l2h_pae_shadow
-#define PGC_SH_l3_shadow  PGC_SH_l3_pae_shadow
 #else
 #define PGC_SH_l1_shadow  PGC_SH_l1_64_shadow
 #define PGC_SH_fl1_shadow PGC_SH_fl1_64_shadow
@@ -404,14 +402,6 @@ valid_gfn(gfn_t m)
 {
     return VALID_GFN(gfn_x(m));
 }
-
-#if GUEST_PAGING_LEVELS == 2
-#define PGC_SH_guest_root_type PGC_SH_l2_32_shadow
-#elif GUEST_PAGING_LEVELS == 3
-#define PGC_SH_guest_root_type PGC_SH_l3_pae_shadow
-#else
-#define PGC_SH_guest_root_type PGC_SH_l4_64_shadow
-#endif
 
 /* Translation between mfns and gfns */
 static inline mfn_t
@@ -490,8 +480,6 @@ struct shadow_walk_t
 #define sh_map_and_validate_gl1e   INTERNAL_NAME(sh_map_and_validate_gl1e)
 #define sh_destroy_l4_shadow       INTERNAL_NAME(sh_destroy_l4_shadow)
 #define sh_destroy_l3_shadow       INTERNAL_NAME(sh_destroy_l3_shadow)
-#define sh_destroy_l3_subshadow    INTERNAL_NAME(sh_destroy_l3_subshadow)
-#define sh_unpin_all_l3_subshadows INTERNAL_NAME(sh_unpin_all_l3_subshadows)
 #define sh_destroy_l2_shadow       INTERNAL_NAME(sh_destroy_l2_shadow)
 #define sh_destroy_l1_shadow       INTERNAL_NAME(sh_destroy_l1_shadow)
 #define sh_unhook_32b_mappings     INTERNAL_NAME(sh_unhook_32b_mappings)
@@ -533,115 +521,6 @@ struct shadow_walk_t
                               SHADOW_PAGING_LEVELS)
 
 
-#if GUEST_PAGING_LEVELS == 3
-/*
- * Accounting information stored in the shadow of PAE Guest L3 pages.
- * Because these "L3 pages" are only 32-bytes, it is inconvenient to keep
- * various refcounts, etc., on the page_info of their page.  We provide extra
- * bookkeeping space in the shadow itself, and this is the structure
- * definition for that bookkeeping information.
- */
-struct pae_l3_bookkeeping {
-    u32 vcpus;                  /* bitmap of which vcpus are currently storing
-                                 * copies of this 32-byte page */
-    u32 refcount;               /* refcount for this 32-byte page */
-    u8 pinned;                  /* is this 32-byte page pinned or not? */
-};
-
-// Convert a shadow entry pointer into a pae_l3_bookkeeping pointer.
-#define sl3p_to_info(_ptr) ((struct pae_l3_bookkeeping *)         \
-                            (((unsigned long)(_ptr) & ~31) + 32))
-
-static void sh_destroy_l3_subshadow(struct vcpu *v, 
-                                     shadow_l3e_t *sl3e);
-
-/* Increment a subshadow ref
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow. */
-static inline void sh_get_ref_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
-    /* First ref to the subshadow takes a ref to the full shadow */
-    if ( bk->refcount == 0 ) 
-        sh_get_ref(smfn, 0);
-    if ( unlikely(++(bk->refcount) == 0) )
-    {
-        SHADOW_PRINTK("shadow l3 subshadow ref overflow, smfn=%" SH_PRI_mfn " 
sh=%p\n", 
-                       mfn_x(smfn), sl3e);
-        domain_crash_synchronous();
-    }
-}
-
-/* Decrement a subshadow ref.
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow.  Calling this may cause the 
- * entire shadow to disappear, so the caller must immediately unmap 
- * the pointer after calling. */ 
-static inline void sh_put_ref_l3_subshadow(struct vcpu *v, 
-                                            shadow_l3e_t *sl3e,
-                                            mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk;
-
-    bk = sl3p_to_info(sl3e);
-
-    ASSERT(bk->refcount > 0);
-    if ( --(bk->refcount) == 0 )
-    {
-        /* Need to destroy this subshadow */
-        sh_destroy_l3_subshadow(v, sl3e);
-        /* Last ref to the subshadow had a ref to the full shadow */
-        sh_put_ref(v, smfn, 0);
-    }
-}
-
-/* Pin a subshadow 
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow. */
-static inline void sh_pin_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
-#if 0
-    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
-                      __func__, mfn_x(smfn),
-                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
-#endif
-
-    if ( !bk->pinned )
-    {
-        bk->pinned = 1;
-        sh_get_ref_l3_subshadow(sl3e, smfn);
-    }
-}
-
-/* Unpin a sub-shadow. 
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow.  Calling this may cause the 
- * entire shadow to disappear, so the caller must immediately unmap 
- * the pointer after calling. */ 
-static inline void sh_unpin_l3_subshadow(struct vcpu *v, 
-                                          shadow_l3e_t *sl3e,
-                                          mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
-#if 0
-    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
-                      __func__, mfn_x(smfn),
-                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
-#endif
-
-    if ( bk->pinned )
-    {
-        bk->pinned = 0;
-        sh_put_ref_l3_subshadow(v, sl3e, smfn);
-    }
-}
-
-#endif /* GUEST_PAGING_LEVELS == 3 */
-
 #if SHADOW_PAGING_LEVELS == 3
 #define MFN_FITS_IN_HVM_CR3(_MFN) !(mfn_x(_MFN) >> 20)
 #endif
diff -r bd207697f0c7 -r 5c029fda79dc xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/include/asm-x86/domain.h      Wed Oct 18 14:36:20 2006 +0100
@@ -134,18 +134,20 @@ struct pae_l3_cache { };
 #endif
 
 struct shadow_vcpu {
+#if CONFIG_PAGING_LEVELS >= 3
+    /* PAE guests: per-vcpu shadow top-level table */
+    l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
+#endif
     /* Pointers to mode-specific entry points. */
     struct shadow_paging_mode *mode;
     /* Last MFN that we emulated a write to. */
     unsigned long last_emulated_mfn;
+    /* MFN of the last shadow that we shot a writeable mapping in */
+    unsigned long last_writeable_pte_smfn;
     /* HVM guest: paging enabled (CR0.PG)?  */
     unsigned int translate_enabled:1;
     /* Emulated fault needs to be propagated to guest? */
     unsigned int propagate_fault:1;
-#if CONFIG_PAGING_LEVELS >= 3
-    /* Shadow update requires this PAE cpu to recopy/install its L3 table. */
-    unsigned int pae_flip_pending:1;
-#endif
 };
 
 struct arch_vcpu
@@ -190,13 +192,12 @@ struct arch_vcpu
     pagetable_t guest_table;            /* (MFN) guest notion of cr3 */
     /* guest_table holds a ref to the page, and also a type-count unless
      * shadow refcounts are in use */
-    pagetable_t shadow_table;           /* (MFN) shadow of guest */
+    pagetable_t shadow_table[4];        /* (MFN) shadow(s) of guest */
     pagetable_t monitor_table;          /* (MFN) hypervisor PT (for HVM) */
     unsigned long cr3;                     /* (MA) value to install in HW CR3 
*/
 
-    void *guest_vtable;                 /* virtual address of pagetable */
-    void *shadow_vtable;                /* virtual address of shadow_table */
-    root_pgentry_t *monitor_vtable;            /* virtual address of 
monitor_table */
+    void *guest_vtable;                 /* virtual addr of pagetable */
+    root_pgentry_t *monitor_vtable;            /* virtual addr of 
monitor_table */
 
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
diff -r bd207697f0c7 -r 5c029fda79dc xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/include/asm-x86/hvm/vcpu.h    Wed Oct 18 14:36:20 2006 +0100
@@ -41,11 +41,6 @@ struct hvm_vcpu {
 
     int                 xen_port;
 
-#if CONFIG_PAGING_LEVELS >= 3
-    l3_pgentry_t hvm_lowmem_l3tab[4]
-    __attribute__((__aligned__(32)));
-#endif
-
     /* Flags */
     int                 flag_dr_dirty;
 
diff -r bd207697f0c7 -r 5c029fda79dc xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/include/asm-x86/mm.h  Wed Oct 18 14:36:20 2006 +0100
@@ -114,15 +114,14 @@ struct page_info
 #define PGC_SH_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
 #define PGC_SH_l2_pae_shadow  (6U<<28) /* shadowing a pae L2-low page */
 #define PGC_SH_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
-#define PGC_SH_l3_pae_shadow  (8U<<28) /* shadowing a pae L3 page */
-#define PGC_SH_l1_64_shadow   (9U<<28) /* shadowing a 64-bit L1 page */
-#define PGC_SH_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
-#define PGC_SH_l2_64_shadow  (11U<<28) /* shadowing a 64-bit L2 page */
-#define PGC_SH_l3_64_shadow  (12U<<28) /* shadowing a 64-bit L3 page */
-#define PGC_SH_l4_64_shadow  (13U<<28) /* shadowing a 64-bit L4 page */
-#define PGC_SH_max_shadow    (13U<<28)
-#define PGC_SH_p2m_table     (14U<<28) /* in use as the p2m table */
-#define PGC_SH_monitor_table (15U<<28) /* in use as a monitor table */
+#define PGC_SH_l1_64_shadow   (8U<<28) /* shadowing a 64-bit L1 page */
+#define PGC_SH_fl1_64_shadow  (9U<<28) /* L1 shadow for 64-bit 2M superpg */
+#define PGC_SH_l2_64_shadow  (10U<<28) /* shadowing a 64-bit L2 page */
+#define PGC_SH_l3_64_shadow  (11U<<28) /* shadowing a 64-bit L3 page */
+#define PGC_SH_l4_64_shadow  (12U<<28) /* shadowing a 64-bit L4 page */
+#define PGC_SH_max_shadow    (12U<<28)
+#define PGC_SH_p2m_table     (13U<<28) /* in use as the p2m table */
+#define PGC_SH_monitor_table (14U<<28) /* in use as a monitor table */
 #define PGC_SH_unused        (15U<<28)
 
 #define PGC_SH_type_mask     (15U<<28)
diff -r bd207697f0c7 -r 5c029fda79dc xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h  Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/include/asm-x86/perfc_defn.h  Wed Oct 18 14:36:20 2006 +0100
@@ -71,6 +71,7 @@ PERFCOUNTER_CPU(shadow_writeable_h_2,  "
 PERFCOUNTER_CPU(shadow_writeable_h_2,  "shadow writeable: 32pae w2k3")
 PERFCOUNTER_CPU(shadow_writeable_h_3,  "shadow writeable: 64b w2k3")
 PERFCOUNTER_CPU(shadow_writeable_h_4,  "shadow writeable: 32b linux low")
+PERFCOUNTER_CPU(shadow_writeable_h_5,  "shadow writeable: 32b linux high")
 PERFCOUNTER_CPU(shadow_writeable_bf,   "shadow writeable brute-force")
 PERFCOUNTER_CPU(shadow_mappings,       "shadow removes all mappings")
 PERFCOUNTER_CPU(shadow_mappings_bf,    "shadow rm-mappings brute-force")
diff -r bd207697f0c7 -r 5c029fda79dc xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/include/asm-x86/shadow.h      Wed Oct 18 14:36:20 2006 +0100
@@ -72,7 +72,6 @@
 #define SHADOW_SET_CHANGED            0x1
 #define SHADOW_SET_FLUSH              0x2
 #define SHADOW_SET_ERROR              0x4
-#define SHADOW_SET_L3PAE_RECOPY       0x8
 
 // How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
 #ifdef __x86_64__
@@ -406,7 +405,6 @@ shadow_update_cr3(struct vcpu *v)
  * for HVM guests, arch.monitor_table and hvm's guest CR3.
  *
  * Update ref counts to shadow tables appropriately.
- * For PAE, relocate L3 entries, if necessary, into low memory.
  */
 static inline void update_cr3(struct vcpu *v)
 {
@@ -549,13 +547,13 @@ shadow_remove_all_shadows_and_parents(st
  * Unshadow it, and recursively unshadow pages that reference it. */
 
 /* Remove all shadows of the guest mfn. */
-extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all);
+extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
 static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
 {
     int was_locked = shadow_lock_is_acquired(v->domain);
     if ( !was_locked )
         shadow_lock(v->domain);
-    sh_remove_shadows(v, gmfn, 1);
+    sh_remove_shadows(v, gmfn, 0, 1);
     if ( !was_locked )
         shadow_unlock(v->domain);
 }
@@ -587,7 +585,6 @@ shadow_guest_physmap_remove_page(struct 
 #define SHF_FL1_PAE (1u << PGC_SH_type_to_index(PGC_SH_fl1_pae_shadow))
 #define SHF_L2_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l2_pae_shadow))
 #define SHF_L2H_PAE (1u << PGC_SH_type_to_index(PGC_SH_l2h_pae_shadow))
-#define SHF_L3_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l3_pae_shadow))
 #define SHF_L1_64   (1u << PGC_SH_type_to_index(PGC_SH_l1_64_shadow))
 #define SHF_FL1_64  (1u << PGC_SH_type_to_index(PGC_SH_fl1_64_shadow))
 #define SHF_L2_64   (1u << PGC_SH_type_to_index(PGC_SH_l2_64_shadow))

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>