WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] [XEN] Support lightweight shadow-translat

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] [XEN] Support lightweight shadow-translate PV guests, for paravirt-ops.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 28 Sep 2006 17:50:31 +0000
Delivery-date: Thu, 28 Sep 2006 10:53:04 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Tim Deegan <tim.deegan@xxxxxxxxxxxxx>
# Node ID b6ee084892dad84750bb8aa3fb89056c3aa21633
# Parent  5f42b4824e455c6350a06c4e3061f663e2d2f39e
[XEN] Support lightweight shadow-translate PV guests, for paravirt-ops.
This is a modified subset of Michael Fetterman's shadow-translate work.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
---
 xen/arch/x86/domain.c              |   11 
 xen/arch/x86/mm.c                  |  189 ++++++++--------
 xen/arch/x86/mm/shadow/common.c    |  161 +++++--------
 xen/arch/x86/mm/shadow/multi.c     |  426 +++++++++++++++++++++++--------------
 xen/arch/x86/mm/shadow/multi.h     |    7 
 xen/arch/x86/mm/shadow/private.h   |   49 ----
 xen/arch/x86/mm/shadow/types.h     |   31 ++
 xen/arch/x86/traps.c               |    2 
 xen/include/asm-x86/domain.h       |    2 
 xen/include/asm-x86/guest_access.h |   20 -
 xen/include/asm-x86/mm.h           |    2 
 xen/include/asm-x86/shadow.h       |   82 ++++++-
 12 files changed, 573 insertions(+), 409 deletions(-)

diff -r 5f42b4824e45 -r b6ee084892da xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/arch/x86/domain.c     Thu Sep 28 17:10:54 2006 +0100
@@ -334,8 +334,10 @@ int arch_set_info_guest(
     }
     else
     {
-        if ( !get_page_and_type(mfn_to_page(cr3_pfn), d,
-                                PGT_base_page_table) )
+        if ( shadow_mode_refcounts(d)
+             ? !get_page(mfn_to_page(cr3_pfn), d)
+             : !get_page_and_type(mfn_to_page(cr3_pfn), d,
+                                  PGT_base_page_table) )
         {
             destroy_gdt(v);
             return -EINVAL;
@@ -952,7 +954,10 @@ void domain_relinquish_resources(struct 
         pfn = pagetable_get_pfn(v->arch.guest_table_user);
         if ( pfn != 0 )
         {
-            put_page_and_type(mfn_to_page(pfn));
+            if ( shadow_mode_refcounts(d) )
+                put_page(mfn_to_page(pfn));
+            else
+                put_page_and_type(mfn_to_page(pfn));
             v->arch.guest_table_user = pagetable_null();
         }
 #endif
diff -r 5f42b4824e45 -r b6ee084892da xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/arch/x86/mm.c Thu Sep 28 17:10:54 2006 +0100
@@ -427,23 +427,11 @@ int map_ldt_shadow_page(unsigned int off
     unsigned long gmfn, mfn;
     l1_pgentry_t l1e, nl1e;
     unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
-    int res;
-
-#if defined(__x86_64__)
-    /* If in user mode, switch to kernel mode just to read LDT mapping. */
-    int user_mode = !(v->arch.flags & TF_kernel_mode);
-#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
-#elif defined(__i386__)
-#define TOGGLE_MODE() ((void)0)
-#endif
+    int okay;
 
     BUG_ON(unlikely(in_irq()));
 
-    TOGGLE_MODE();
-    __copy_from_user(&l1e, &linear_pg_table[l1_linear_offset(gva)],
-                     sizeof(l1e));
-    TOGGLE_MODE();
-
+    guest_get_eff_kern_l1e(v, gva, &l1e);
     if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
         return 0;
 
@@ -452,17 +440,17 @@ int map_ldt_shadow_page(unsigned int off
     if ( unlikely(!VALID_MFN(mfn)) )
         return 0;
 
-    res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
-
-    if ( !res && unlikely(shadow_mode_refcounts(d)) )
+    okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
+
+    if ( !okay && unlikely(shadow_mode_refcounts(d)) )
     {
         shadow_lock(d);
         shadow_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0);
-        res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
+        okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
         shadow_unlock(d);
     }
 
-    if ( unlikely(!res) )
+    if ( unlikely(!okay) )
         return 0;
 
     nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
@@ -1233,7 +1221,7 @@ static inline int update_l1e(l1_pgentry_
         }
     }
 #endif
-    if ( unlikely(shadow_mode_enabled(v->domain)) )
+    if ( unlikely(shadow_mode_enabled(v->domain)) && rv )
     {
         shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
         shadow_unlock(v->domain);    
@@ -1251,6 +1239,9 @@ static int mod_l1_entry(l1_pgentry_t *pl
 
     if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
         return 0;
+
+    if ( unlikely(shadow_mode_refcounts(d)) )
+        return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current);
 
     if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
     {
@@ -1871,6 +1862,14 @@ static int set_foreigndom(domid_t domid)
         }
     }
 
+    if ( unlikely(shadow_mode_translate(d)) )
+    {
+        MEM_LOG("%s: can not mix foreign mappings with translated domains",
+                __func__);
+        info->foreign = NULL;
+        okay = 0; 
+    }
+
  out:
     return okay;
 }
@@ -1902,7 +1901,7 @@ int do_mmuext_op(
 {
     struct mmuext_op op;
     int rc = 0, i = 0, okay;
-    unsigned long mfn, type;
+    unsigned long mfn = 0, gmfn = 0, type;
     unsigned int done = 0;
     struct page_info *page;
     struct vcpu *v = current;
@@ -1947,7 +1946,8 @@ int do_mmuext_op(
         }
 
         okay = 1;
-        mfn  = op.arg1.mfn;
+        gmfn  = op.arg1.mfn;
+        mfn = gmfn_to_mfn(FOREIGNDOM, gmfn);
         page = mfn_to_page(mfn);
 
         switch ( op.cmd )
@@ -2022,7 +2022,6 @@ int do_mmuext_op(
             break;
 
         case MMUEXT_NEW_BASEPTR:
-            mfn = gmfn_to_mfn(current->domain, mfn);
             okay = new_guest_cr3(mfn);
             this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
             break;
@@ -2031,8 +2030,13 @@ int do_mmuext_op(
         case MMUEXT_NEW_USER_BASEPTR:
             okay = 1;
             if (likely(mfn != 0))
-                okay = get_page_and_type_from_pagenr(
-                    mfn, PGT_root_page_table, d);
+            {
+                if ( shadow_mode_refcounts(d) )
+                    okay = get_page_from_pagenr(mfn, d);
+                else
+                    okay = get_page_and_type_from_pagenr(
+                        mfn, PGT_root_page_table, d);
+            }
             if ( unlikely(!okay) )
             {
                 MEM_LOG("Error while installing new mfn %lx", mfn);
@@ -2043,7 +2047,12 @@ int do_mmuext_op(
                     pagetable_get_pfn(v->arch.guest_table_user);
                 v->arch.guest_table_user = pagetable_from_pfn(mfn);
                 if ( old_mfn != 0 )
-                    put_page_and_type(mfn_to_page(old_mfn));
+                {
+                    if ( shadow_mode_refcounts(d) )
+                        put_page(mfn_to_page(old_mfn));
+                    else
+                        put_page_and_type(mfn_to_page(old_mfn));
+                }
             }
             break;
 #endif
@@ -2504,17 +2513,26 @@ static int create_grant_va_mapping(
 {
     l1_pgentry_t *pl1e, ol1e;
     struct domain *d = v->domain;
+    unsigned long gl1mfn;
+    int okay;
     
     ASSERT(spin_is_locked(&d->big_lock));
 
     adjust_guest_l1e(nl1e);
 
-    pl1e = &linear_pg_table[l1_linear_offset(va)];
-
-    if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ||
-         !update_l1e(pl1e, ol1e, nl1e, 
-                    l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) )
+    pl1e = guest_map_l1e(v, va, &gl1mfn);
+    if ( !pl1e )
+    {
+        MEM_LOG("Could not find L1 PTE for address %lx", va);
         return GNTST_general_error;
+    }
+    ol1e = *pl1e;
+    okay = update_l1e(pl1e, ol1e, nl1e, gl1mfn, v);
+    guest_unmap_l1e(v, pl1e);
+    pl1e = NULL;
+
+    if ( !okay )
+            return GNTST_general_error;
 
     if ( !shadow_mode_refcounts(d) )
         put_page_from_l1e(ol1e, d);
@@ -2523,17 +2541,19 @@ static int create_grant_va_mapping(
 }
 
 static int destroy_grant_va_mapping(
-    unsigned long addr, unsigned long frame, struct domain *d)
+    unsigned long addr, unsigned long frame, struct vcpu *v)
 {
     l1_pgentry_t *pl1e, ol1e;
+    unsigned long gl1mfn;
+    int rc = 0;
     
-    pl1e = &linear_pg_table[l1_linear_offset(addr)];
-
-    if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) )
-    {
-        MEM_LOG("Could not find PTE entry for address %lx", addr);
+    pl1e = guest_map_l1e(v, addr, &gl1mfn);
+    if ( !pl1e )
+    {
+        MEM_LOG("Could not find L1 PTE for address %lx", addr);
         return GNTST_general_error;
     }
+    ol1e = *pl1e;
 
     /*
      * Check that the virtual address supplied is actually mapped to
@@ -2543,19 +2563,21 @@ static int destroy_grant_va_mapping(
     {
         MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
                 l1e_get_pfn(ol1e), addr, frame);
-        return GNTST_general_error;
+        rc = GNTST_general_error;
+        goto out;
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), 
-                      l2e_get_pfn(__linear_l2_table[l2_linear_offset(addr)]),
-                      d->vcpu[0] /* Change for per-vcpu shadows */)) )
+    if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
     {
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
-        return GNTST_general_error;
-    }
-
-    return 0;
+        rc = GNTST_general_error;
+        goto out; // this is redundant & unnecessary, but informative
+    }
+
+ out:
+    guest_unmap_l1e(v, pl1e);
+    return rc;
 }
 
 int create_grant_host_mapping(
@@ -2578,7 +2600,7 @@ int destroy_grant_host_mapping(
 {
     if ( flags & GNTMAP_contains_pte )
         return destroy_grant_pte_mapping(addr, frame, current->domain);
-    return destroy_grant_va_mapping(addr, frame, current->domain);
+    return destroy_grant_va_mapping(addr, frame, current);
 }
 
 int steal_page(
@@ -2634,7 +2656,8 @@ int do_update_va_mapping(unsigned long v
     l1_pgentry_t   val = l1e_from_intpte(val64);
     struct vcpu   *v   = current;
     struct domain *d   = v->domain;
-    unsigned long  vmask, bmap_ptr;
+    l1_pgentry_t  *pl1e;
+    unsigned long  vmask, bmap_ptr, gl1mfn;
     cpumask_t      pmask;
     int            rc  = 0;
 
@@ -2643,35 +2666,17 @@ int do_update_va_mapping(unsigned long v
     if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) )
         return -EINVAL;
 
-    if ( unlikely(shadow_mode_refcounts(d)) )
-    {
-        DPRINTK("Grant op on a shadow-refcounted domain\n");
-        return -EINVAL; 
-    }
-
     LOCK_BIGLOCK(d);
 
-    if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
-    {
-        if ( unlikely(this_cpu(percpu_mm_info).foreign &&
-                      (shadow_mode_translate(d) ||
-                       shadow_mode_translate(
-                           this_cpu(percpu_mm_info).foreign))) )
-        {
-            /*
-             * The foreign domain's pfn's are in a different namespace. There's
-             * not enough information in just a gpte to figure out how to   
-             * (re-)shadow this entry.
-             */
-            domain_crash(d);
-        }
-    }
-
-    if ( unlikely(!mod_l1_entry(
-                      &linear_pg_table[l1_linear_offset(va)], val,
-                      l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]))) )
+    pl1e = guest_map_l1e(v, va, &gl1mfn);
+
+    if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn)) )
         rc = -EINVAL;
-    
+
+    if ( pl1e )
+        guest_unmap_l1e(v, pl1e);
+    pl1e = NULL;
+
     switch ( flags & UVMF_FLUSHTYPE_MASK )
     {
     case UVMF_TLB_FLUSH:
@@ -3033,7 +3038,7 @@ static int ptwr_emulated_update(
     unsigned int bytes,
     unsigned int do_cmpxchg)
 {
-    unsigned long pfn;
+    unsigned long gmfn, mfn;
     struct page_info *page;
     l1_pgentry_t pte, ol1e, nl1e, *pl1e;
     struct vcpu *v = current;
@@ -3073,15 +3078,17 @@ static int ptwr_emulated_update(
     }
 
     /* Read the PTE that maps the page being updated. */
-    if ( __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
-                          sizeof(pte)) )
-    {
-        MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table");
+    guest_get_eff_l1e(v, addr, &pte);
+    if ( unlikely(!(l1e_get_flags(pte) & _PAGE_PRESENT)) )
+    {
+        MEM_LOG("%s: Cannot get L1 PTE for guest address %lx",
+                __func__, addr);
         return X86EMUL_UNHANDLEABLE;
     }
 
-    pfn  = l1e_get_pfn(pte);
-    page = mfn_to_page(pfn);
+    gmfn  = l1e_get_pfn(pte);
+    mfn = gmfn_to_mfn(d, gmfn);
+    page = mfn_to_page(mfn);
 
     /* We are looking only for read-only mappings of p.t. pages. */
     ASSERT((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) == _PAGE_PRESENT);
@@ -3091,7 +3098,7 @@ static int ptwr_emulated_update(
 
     /* Check the new PTE. */
     nl1e = l1e_from_intpte(val);
-    if ( unlikely(!get_page_from_l1e(nl1e, d)) )
+    if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) )
     {
         if ( (CONFIG_PAGING_LEVELS == 3) &&
              (bytes == 4) &&
@@ -3130,13 +3137,13 @@ static int ptwr_emulated_update(
             if ( shadow_mode_enabled(d) )
                 shadow_unlock(d);
             unmap_domain_page(pl1e);
-            put_page_from_l1e(nl1e, d);
+            put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d);
             return X86EMUL_CMPXCHG_FAILED;
         }
-        if ( unlikely(shadow_mode_enabled(v->domain)) )
+        if ( unlikely(shadow_mode_enabled(d)) )
         {
             shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
-            shadow_unlock(v->domain);    
+            shadow_unlock(d);    
         }
     }
     else
@@ -3149,7 +3156,7 @@ static int ptwr_emulated_update(
     unmap_domain_page(pl1e);
 
     /* Finally, drop the old PTE. */
-    put_page_from_l1e(ol1e, d);
+    put_page_from_l1e(gl1e_to_ml1e(d, ol1e), d);
 
     return X86EMUL_CONTINUE;
 }
@@ -3198,13 +3205,13 @@ static struct x86_emulate_ops ptwr_emula
 };
 
 /* Write page fault handler: check if guest is trying to modify a PTE. */
-int ptwr_do_page_fault(struct domain *d, unsigned long addr, 
+int ptwr_do_page_fault(struct vcpu *v, unsigned long addr, 
                        struct cpu_user_regs *regs)
 {
+    struct domain *d = v->domain;
     unsigned long     pfn;
     struct page_info *page;
     l1_pgentry_t      pte;
-    l2_pgentry_t     *pl2e, l2e;
     struct x86_emulate_ctxt emul_ctxt;
 
     LOCK_BIGLOCK(d);
@@ -3213,13 +3220,9 @@ int ptwr_do_page_fault(struct domain *d,
      * Attempt to read the PTE that maps the VA being accessed. By checking for
      * PDE validity in the L2 we avoid many expensive fixups in __get_user().
      */
-    pl2e = &__linear_l2_table[l2_linear_offset(addr)];
-    if ( __copy_from_user(&l2e, pl2e, sizeof(l2e)) ||
-        !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
-         __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
-                          sizeof(pte)) )
+    guest_get_eff_l1e(v, addr, &pte);
+    if ( !(l1e_get_flags(pte) & _PAGE_PRESENT) )
         goto bail;
-
     pfn  = l1e_get_pfn(pte);
     page = mfn_to_page(pfn);
 
diff -r 5f42b4824e45 -r b6ee084892da xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/arch/x86/mm/shadow/common.c   Thu Sep 28 17:10:54 2006 +0100
@@ -75,35 +75,27 @@ sh_x86_emulate_read_std(unsigned long ad
                          unsigned int bytes,
                          struct x86_emulate_ctxt *ctxt)
 {
-    struct vcpu *v = current;
-    if ( hvm_guest(v) )
-    {
-        *val = 0;
-        // XXX -- this is WRONG.
-        //        It entirely ignores the permissions in the page tables.
-        //        In this case, that is only a user vs supervisor access check.
-        //
-        if ( hvm_copy(val, addr, bytes, HVM_COPY_IN) )
-        {
+    *val = 0;
+    // XXX -- this is WRONG.
+    //        It entirely ignores the permissions in the page tables.
+    //        In this case, that is only a user vs supervisor access check.
+    //
+    if ( hvm_copy(val, addr, bytes, HVM_COPY_IN) )
+    {
 #if 0
-            SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
-                           v->domain->domain_id, v->vcpu_id, 
-                           addr, *val, bytes);
-#endif
-            return X86EMUL_CONTINUE;
-        }
-
-        /* If we got here, there was nothing mapped here, or a bad GFN 
-         * was mapped here.  This should never happen: we're here because
-         * of a write fault at the end of the instruction we're emulating. */ 
-        SHADOW_PRINTK("read failed to va %#lx\n", addr);
-        return X86EMUL_PROPAGATE_FAULT;
-    }
-    else 
-    {
-        SHADOW_PRINTK("this operation is not emulated yet\n");
-        return X86EMUL_UNHANDLEABLE;
-    }
+        struct vcpu *v = current;
+        SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
+                       v->domain->domain_id, v->vcpu_id, 
+                       addr, *val, bytes);
+#endif
+        return X86EMUL_CONTINUE;
+    }
+
+    /* If we got here, there was nothing mapped here, or a bad GFN 
+     * was mapped here.  This should never happen: we're here because
+     * of a write fault at the end of the instruction we're emulating. */ 
+    SHADOW_PRINTK("read failed to va %#lx\n", addr);
+    return X86EMUL_PROPAGATE_FAULT;
 }
 
 static int
@@ -112,33 +104,26 @@ sh_x86_emulate_write_std(unsigned long a
                           unsigned int bytes,
                           struct x86_emulate_ctxt *ctxt)
 {
+#if 0
     struct vcpu *v = current;
-#if 0
     SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
                   v->domain->domain_id, v->vcpu_id, addr, val, bytes);
 #endif
-    if ( hvm_guest(v) )
-    {
-        // XXX -- this is WRONG.
-        //        It entirely ignores the permissions in the page tables.
-        //        In this case, that includes user vs supervisor, and
-        //        write access.
-        //
-        if ( hvm_copy(&val, addr, bytes, HVM_COPY_OUT) )
-            return X86EMUL_CONTINUE;
-
-        /* If we got here, there was nothing mapped here, or a bad GFN 
-         * was mapped here.  This should never happen: we're here because
-         * of a write fault at the end of the instruction we're emulating,
-         * which should be handled by sh_x86_emulate_write_emulated. */ 
-        SHADOW_PRINTK("write failed to va %#lx\n", addr);
-        return X86EMUL_PROPAGATE_FAULT;
-    }
-    else 
-    {
-        SHADOW_PRINTK("this operation is not emulated yet\n");
-        return X86EMUL_UNHANDLEABLE;
-    }
+
+    // XXX -- this is WRONG.
+    //        It entirely ignores the permissions in the page tables.
+    //        In this case, that includes user vs supervisor, and
+    //        write access.
+    //
+    if ( hvm_copy(&val, addr, bytes, HVM_COPY_OUT) )
+        return X86EMUL_CONTINUE;
+
+    /* If we got here, there was nothing mapped here, or a bad GFN 
+     * was mapped here.  This should never happen: we're here because
+     * of a write fault at the end of the instruction we're emulating,
+     * which should be handled by sh_x86_emulate_write_emulated. */ 
+    SHADOW_PRINTK("write failed to va %#lx\n", addr);
+    return X86EMUL_PROPAGATE_FAULT;
 }
 
 static int
@@ -152,15 +137,7 @@ sh_x86_emulate_write_emulated(unsigned l
     SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
                   v->domain->domain_id, v->vcpu_id, addr, val, bytes);
 #endif
-    if ( hvm_guest(v) )
-    {
-        return v->arch.shadow.mode->x86_emulate_write(v, addr, &val, bytes, 
ctxt);
-    }
-    else 
-    {
-        SHADOW_PRINTK("this operation is not emulated yet\n");
-        return X86EMUL_UNHANDLEABLE;
-    }
+    return v->arch.shadow.mode->x86_emulate_write(v, addr, &val, bytes, ctxt);
 }
 
 static int 
@@ -175,16 +152,8 @@ sh_x86_emulate_cmpxchg_emulated(unsigned
     SHADOW_PRINTK("d=%u v=%u a=%#lx o?=%#lx n:=%#lx bytes=%u\n",
                    v->domain->domain_id, v->vcpu_id, addr, old, new, bytes);
 #endif
-    if ( hvm_guest(v) )
-    {
-        return v->arch.shadow.mode->x86_emulate_cmpxchg(v, addr, old, new, 
-                                                    bytes, ctxt);
-    }
-    else 
-    {
-        SHADOW_PRINTK("this operation is not emulated yet\n");
-        return X86EMUL_UNHANDLEABLE;
-    }
+    return v->arch.shadow.mode->x86_emulate_cmpxchg(v, addr, old, new,
+                                                     bytes, ctxt);
 }
 
 static int 
@@ -201,16 +170,8 @@ sh_x86_emulate_cmpxchg8b_emulated(unsign
                    v->domain->domain_id, v->vcpu_id, addr, old_hi, old_lo,
                    new_hi, new_lo, ctxt);
 #endif
-    if ( hvm_guest(v) )
-    {
-        return v->arch.shadow.mode->x86_emulate_cmpxchg8b(v, addr, old_lo, 
old_hi,
-                                                      new_lo, new_hi, ctxt);
-    }
-    else 
-    {
-        SHADOW_PRINTK("this operation is not emulated yet\n");
-        return X86EMUL_UNHANDLEABLE;
-    }
+    return v->arch.shadow.mode->x86_emulate_cmpxchg8b(v, addr, old_lo, old_hi,
+                                                       new_lo, new_hi, ctxt);
 }
 
 
@@ -267,7 +228,7 @@ void shadow_demote(struct vcpu *v, mfn_t
 /* Validate a pagetable change from the guest and update the shadows.
  * Returns a bitmask of SHADOW_SET_* flags. */
 
-static int
+int
 __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
                                void *entry, u32 size)
 {
@@ -367,7 +328,9 @@ void
 void
 shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
                                 void *entry, u32 size)
-/* This is the entry point for emulated writes to pagetables in HVM guests */
+/* This is the entry point for emulated writes to pagetables in HVM guests and
+ * PV translated guests.
+ */
 {
     struct domain *d = v->domain;
     int rc;
@@ -806,7 +769,7 @@ void shadow_free(struct domain *d, mfn_t
 
 /* Divert some memory from the pool to be used by the p2m mapping.
  * This action is irreversible: the p2m mapping only ever grows.
- * That's OK because the p2m table only exists for external domains,
+ * That's OK because the p2m table only exists for translated domains,
  * and those domains can't ever turn off shadow mode.
  * Also, we only ever allocate a max-order chunk, so as to preserve
  * the invariant that shadow_prealloc() always works.
@@ -830,7 +793,12 @@ shadow_alloc_p2m_pages(struct domain *d)
     d->arch.shadow.total_pages -= (1<<SHADOW_MAX_ORDER);
     for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++)
     {
-        /* Unlike shadow pages, mark p2m pages as owned by the domain */
+        /* Unlike shadow pages, mark p2m pages as owned by the domain.
+         * Marking the domain as the owner would normally allow the guest to
+         * create mappings of these pages, but these p2m pages will never be
+         * in the domain's guest-physical address space, and so that is not
+         * believed to be a concern.
+         */
         page_set_owner(&pg[i], d);
         list_add_tail(&pg[i].list, &d->arch.shadow.p2m_freelist);
     }
@@ -2269,7 +2237,7 @@ void sh_update_paging_modes(struct vcpu 
     //
     if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
     {
-        printk("%s: postponing determination of shadow mode\n", __func__);
+        SHADOW_PRINTK("%s: postponing determination of shadow mode\n", 
__func__);
         return;
     }
 
@@ -2294,6 +2262,7 @@ void sh_update_paging_modes(struct vcpu 
 #else
 #error unexpected paging mode
 #endif
+        v->arch.shadow.translate_enabled = !!shadow_mode_translate(d);
     }
     else
     {
@@ -2303,8 +2272,8 @@ void sh_update_paging_modes(struct vcpu 
         ASSERT(shadow_mode_translate(d));
         ASSERT(shadow_mode_external(d));
 
-        v->arch.shadow.hvm_paging_enabled = !!hvm_paging_enabled(v);
-        if ( !v->arch.shadow.hvm_paging_enabled )
+        v->arch.shadow.translate_enabled = !!hvm_paging_enabled(v);
+        if ( !v->arch.shadow.translate_enabled )
         {
             
             /* Set v->arch.guest_table to use the p2m map, and choose
@@ -2381,13 +2350,14 @@ void sh_update_paging_modes(struct vcpu 
 
         if ( v->arch.shadow.mode != old_mode )
         {
-            SHADOW_PRINTK("new paging mode: d=%u v=%u g=%u s=%u "
-                           "(was g=%u s=%u)\n",
-                           d->domain_id, v->vcpu_id, 
-                           v->arch.shadow.mode->guest_levels,
-                           v->arch.shadow.mode->shadow_levels,
-                           old_mode ? old_mode->guest_levels : 0,
-                           old_mode ? old_mode->shadow_levels : 0);
+            SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u "
+                          "(was g=%u s=%u)\n",
+                          d->domain_id, v->vcpu_id,
+                          hvm_guest(v) ? !!hvm_paging_enabled(v) : 1,
+                          v->arch.shadow.mode->guest_levels,
+                          v->arch.shadow.mode->shadow_levels,
+                          old_mode ? old_mode->guest_levels : 0,
+                          old_mode ? old_mode->shadow_levels : 0);
             if ( old_mode &&
                  (v->arch.shadow.mode->shadow_levels !=
                   old_mode->shadow_levels) )
@@ -2467,6 +2437,7 @@ static int shadow_enable(struct domain *
     /* Sanity check the arguments */
     if ( (d == current->domain) ||
          shadow_mode_enabled(d) ||
+         ((mode & SHM2_translate) && !(mode & SHM2_refcounts)) ||
          ((mode & SHM2_external) && !(mode & SHM2_translate)) )
     {
         rv = -EINVAL;
@@ -2522,7 +2493,7 @@ static int shadow_enable(struct domain *
  out:
     shadow_unlock(d);
     domain_unpause(d);
-    return 0;
+    return rv;
 }
 
 void shadow_teardown(struct domain *d)
diff -r 5f42b4824e45 -r b6ee084892da xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Thu Sep 28 17:10:54 2006 +0100
@@ -483,8 +483,7 @@ static u32 guest_set_ad_bits(struct vcpu
                              unsigned int level, 
                              fetch_type_t ft)
 {
-    u32 flags, shflags, bit;
-    struct page_info *pg;
+    u32 flags;
     int res = 0;
 
     ASSERT(valid_mfn(gmfn)
@@ -502,11 +501,10 @@ static u32 guest_set_ad_bits(struct vcpu
     if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
         return flags;
 
-    /* Need the D bit as well for writes, in l1es and 32bit/PAE PSE l2es. */
+    /* Need the D bit as well for writes, in L1es and PSE L2es. */
     if ( ft == ft_demand_write  
-         && (level == 1 || 
-             (level == 2 && GUEST_PAGING_LEVELS < 4 
-              && (flags & _PAGE_PSE) && guest_supports_superpages(v))) )
+         && (level == 1 ||
+             (level == 2 && (flags & _PAGE_PSE) && 
guest_supports_superpages(v))) )
     {
         if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) 
              == (_PAGE_DIRTY | _PAGE_ACCESSED) )
@@ -524,76 +522,69 @@ static u32 guest_set_ad_bits(struct vcpu
 
     /* Set the bit(s) */
     sh_mark_dirty(v->domain, gmfn);
-    SHADOW_DEBUG(A_AND_D, "gfn = %"SH_PRI_gfn", "
+    SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", "
                   "old flags = %#x, new flags = %#x\n", 
-                  guest_l1e_get_gfn(*ep), guest_l1e_get_flags(*ep), flags);
+                  gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep), 
flags);
     *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
     
-    /* May need to propagate this change forward to other kinds of shadow */
-    pg = mfn_to_page(gmfn);
-    if ( !sh_mfn_is_a_page_table(gmfn) ) 
-    {
-        /* This guest pagetable is not yet shadowed at all. */
-        // MAF: I think this assert is busted...  If this gmfn has not yet
-        // been promoted, then it seems perfectly reasonable for there to be
-        // outstanding type refs to it...
-        /* TJD: No. If the gmfn has not been promoted, we must at least 
-         * have recognised that it is a pagetable, and pulled write access.
-         * The type count should only be non-zero if it is actually a page 
-         * table.  The test above was incorrect, though, so I've fixed it. */
-        ASSERT((pg->u.inuse.type_info & PGT_count_mask) == 0);
-        return flags;  
-    }
-
-    shflags = pg->shadow_flags & SHF_page_type_mask;
-    while ( shflags )
-    {
-        bit = find_first_set_bit(shflags);
-        ASSERT(shflags & (1u << bit));
-        shflags &= ~(1u << bit);
-        if ( !(pg->shadow_flags & (1u << bit)) )
-            continue;
-        switch ( bit )
-        {
-        case PGC_SH_type_to_index(PGC_SH_l1_shadow):
-            if (level != 1) 
-                res |= sh_map_and_validate_gl1e(v, gmfn, ep, sizeof (*ep));
-            break;
-        case PGC_SH_type_to_index(PGC_SH_l2_shadow):
-            if (level != 2) 
-                res |= sh_map_and_validate_gl2e(v, gmfn, ep, sizeof (*ep));
-            break;
-#if GUEST_PAGING_LEVELS == 3 /* PAE only */
-        case PGC_SH_type_to_index(PGC_SH_l2h_shadow):
-            if (level != 2) 
-                res |= sh_map_and_validate_gl2he(v, gmfn, ep, sizeof (*ep));
-            break;
-#endif
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
-        case PGC_SH_type_to_index(PGC_SH_l3_shadow):
-            if (level != 3) 
-                res |= sh_map_and_validate_gl3e(v, gmfn, ep, sizeof (*ep));
-            break;
-#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
-        case PGC_SH_type_to_index(PGC_SH_l4_shadow):
-            if (level != 4) 
-                res |= sh_map_and_validate_gl4e(v, gmfn, ep, sizeof (*ep));
-            break;
-#endif 
-#endif
-        default:
-            SHADOW_ERROR("mfn %"SH_PRI_mfn" is shadowed in multiple "
-                          "modes: A&D bits may be out of sync (flags=%#x).\n", 
-                          mfn_x(gmfn), pg->shadow_flags); 
-            /* XXX Shadows in other modes will not be updated, so will
-             * have their A and D bits out of sync. */
-        }
-    }
-    
+    /* Propagate this change to any existing shadows */
+    res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep));
+
     /* We should never need to flush the TLB or recopy PAE entries */
-    ASSERT( res == 0 || res == SHADOW_SET_CHANGED );
+    ASSERT((res == 0) || (res == SHADOW_SET_CHANGED));
+
     return flags;
 }
+
+#if (CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS) && (CONFIG_PAGING_LEVELS == 
SHADOW_PAGING_LEVELS)
+void *
+sh_guest_map_l1e(struct vcpu *v, unsigned long addr,
+                  unsigned long *gl1mfn)
+{
+    void *pl1e = NULL;
+    walk_t gw;
+
+    ASSERT(shadow_mode_translate(v->domain));
+        
+    // XXX -- this is expensive, but it's easy to cobble together...
+    // FIXME!
+
+    shadow_lock(v->domain);
+    guest_walk_tables(v, addr, &gw, 1);
+
+    if ( gw.l2e &&
+         (guest_l2e_get_flags(*gw.l2e) & _PAGE_PRESENT) &&
+         !(guest_supports_superpages(v) && (guest_l2e_get_flags(*gw.l2e) & 
_PAGE_PSE)) )
+    {
+        if ( gl1mfn )
+            *gl1mfn = mfn_x(gw.l1mfn);
+        pl1e = map_domain_page(mfn_x(gw.l1mfn)) +
+            (guest_l1_table_offset(addr) * sizeof(guest_l1e_t));
+    }
+
+    unmap_walk(v, &gw);
+    shadow_unlock(v->domain);
+
+    return pl1e;
+}
+
+void
+sh_guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
+{
+    walk_t gw;
+
+    ASSERT(shadow_mode_translate(v->domain));
+        
+    // XXX -- this is expensive, but it's easy to cobble together...
+    // FIXME!
+
+    shadow_lock(v->domain);
+    guest_walk_tables(v, addr, &gw, 1);
+    *(guest_l1e_t *)eff_l1e = gw.eff_l1e;
+    unmap_walk(v, &gw);
+    shadow_unlock(v->domain);
+}
+#endif /* CONFIG==SHADOW==GUEST */
 
 /**************************************************************************/
 /* Functions to compute the correct index into a shadow page, given an
@@ -709,17 +700,6 @@ shadow_l4_index(mfn_t *smfn, u32 guest_i
  * to the _PAGE_DIRTY bit handling), but for L[234], they are grouped together
  * into the respective demand_fault functions.
  */
-
-#define CHECK(_cond)                                    \
-do {                                                    \
-    if (unlikely(!(_cond)))                             \
-    {                                                   \
-        printk("%s %s %d ASSERTION (%s) FAILED\n",      \
-               __func__, __FILE__, __LINE__, #_cond);   \
-        return -1;                                      \
-    }                                                   \
-} while (0);
-
 // The function below tries to capture all of the flag manipulation for the
 // demand and propagate functions into one place.
 //
@@ -728,6 +708,16 @@ sh_propagate_flags(struct vcpu *v, mfn_t
                     u32 gflags, guest_l1e_t *guest_entry_ptr, mfn_t gmfn, 
                     int mmio, int level, fetch_type_t ft)
 {
+#define CHECK(_cond)                                    \
+do {                                                    \
+    if (unlikely(!(_cond)))                             \
+    {                                                   \
+        printk("%s %s %d ASSERTION (%s) FAILED\n",      \
+               __func__, __FILE__, __LINE__, #_cond);   \
+        domain_crash(d);                                \
+    }                                                   \
+} while (0);
+
     struct domain *d = v->domain;
     u32 pass_thru_flags;
     u32 sflags;
@@ -763,6 +753,10 @@ sh_propagate_flags(struct vcpu *v, mfn_t
             return 0;
     }
 
+    // Set the A and D bits in the guest entry, if we need to.
+    if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
+        gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
+    
     // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
     //
     if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
@@ -797,17 +791,12 @@ sh_propagate_flags(struct vcpu *v, mfn_t
     // Higher level entries do not, strictly speaking, have dirty bits, but
     // since we use shadow linear tables, each of these entries may, at some
     // point in time, also serve as a shadow L1 entry.
-    // By setting both the  A&D bits in each of these, we eliminate the burden
+    // By setting both the A&D bits in each of these, we eliminate the burden
     // on the hardware to update these bits on initial accesses.
     //
     if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) )
         sflags |= _PAGE_ACCESSED | _PAGE_DIRTY;
 
-
-    // Set the A and D bits in the guest entry, if we need to.
-    if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
-        gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
-    
     // If the A or D bit has not yet been set in the guest, then we must
     // prevent the corresponding kind of access.
     //
@@ -815,12 +804,12 @@ sh_propagate_flags(struct vcpu *v, mfn_t
                   !(gflags & _PAGE_ACCESSED)) )
         sflags &= ~_PAGE_PRESENT;
 
-    /* D bits exist in l1es, and 32bit/PAE PSE l2es, but not 64bit PSE l2es */
-    if ( unlikely( ((level == 1) 
-                    || ((level == 2) && (GUEST_PAGING_LEVELS < 4) 
-                        && guest_supports_superpages(v) &&
-                        (gflags & _PAGE_PSE)))
-                   && !(gflags & _PAGE_DIRTY)) )
+    /* D bits exist in L1es and PSE L2es */
+    if ( unlikely(((level == 1) ||
+                   ((level == 2) &&
+                    (gflags & _PAGE_PSE) &&
+                    guest_supports_superpages(v)))
+                  && !(gflags & _PAGE_DIRTY)) )
         sflags &= ~_PAGE_RW;
 
     // MMIO caching
@@ -869,10 +858,17 @@ sh_propagate_flags(struct vcpu *v, mfn_t
         }
     }
 
+    // PV guests in 64-bit mode use two different page tables for user vs
+    // supervisor permissions, making the guest's _PAGE_USER bit irrelevant.
+    // It is always shadowed as present...
+    if ( (GUEST_PAGING_LEVELS == 4) && !hvm_guest(v) )
+    {
+        sflags |= _PAGE_USER;
+    }
+
     return sflags;
-}
-
 #undef CHECK
+}
 
 #if GUEST_PAGING_LEVELS >= 4
 static void
@@ -1732,10 +1728,20 @@ void sh_install_xen_entries_in_l4(struct
                             __PAGE_HYPERVISOR);
 
     /* Linear mapping */
-    sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
-        shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
     sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
         shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR);
+
+    if ( shadow_mode_translate(v->domain) && !shadow_mode_external(v->domain) )
+    {
+        // linear tables may not be used with translated PV guests
+        sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
+            shadow_l4e_empty();
+    }
+    else
+    {
+        sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
+            shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
+    }
 
     if ( shadow_mode_translate(v->domain) )
     {
@@ -1779,7 +1785,15 @@ void sh_install_xen_entries_in_l2h(struc
     
     /* We don't set up a linear mapping here because we can't until this
      * l2h is installed in an l3e.  sh_update_linear_entries() handles
-     * the linear mappings when the l3 is loaded. */
+     * the linear mappings when the l3 is loaded.  We zero them here, just as
+     * a safety measure.
+     */
+    for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+        sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+            shadow_l2e_empty();
+    for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+        sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START) + i] =
+            shadow_l2e_empty();
 
     if ( shadow_mode_translate(d) )
     {
@@ -1817,6 +1831,12 @@ void sh_install_xen_entries_in_l3(struct
     l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow);
     if ( !valid_mfn(l2smfn) )
     {
+        /* must remove write access to this page before shadowing it */
+        // XXX -- should check to see whether this is better with level==0 or
+        // level==2...
+        if ( shadow_remove_write_access(v, l2gmfn, 2, 0xc0000000ul) != 0 )
+            flush_tlb_mask(v->domain->domain_dirty_cpumask);
+ 
         l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow);
     }
     l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
@@ -1852,10 +1872,20 @@ void sh_install_xen_entries_in_l2(struct
                 __PAGE_HYPERVISOR);
 
     /* Linear mapping */
-    sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
-        shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR);
     sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
         shadow_l2e_from_mfn(sl2mfn, __PAGE_HYPERVISOR);
+
+    if ( shadow_mode_translate(v->domain) && !shadow_mode_external(v->domain) )
+    {
+        // linear tables may not be used with translated PV guests
+        sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
+            shadow_l2e_empty();
+    }
+    else
+    {
+        sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
+            shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR);
+    }
 
     if ( shadow_mode_translate(d) )
     {
@@ -2527,6 +2557,32 @@ static int validate_gl4e(struct vcpu *v,
     }
     l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
                              sl3mfn, &new_sl4e, ft_prefetch);
+
+    // check for updates to xen reserved slots
+    if ( !shadow_mode_external(v->domain) )
+    {
+        int shadow_index = (((unsigned long)sl4p & ~PAGE_MASK) /
+                            sizeof(shadow_l4e_t));
+        int reserved_xen_slot = !is_guest_l4_slot(shadow_index);
+
+        if ( unlikely(reserved_xen_slot) )
+        {
+            // attempt by the guest to write to a xen reserved slot
+            //
+            SHADOW_PRINTK("%s out-of-range update "
+                           "sl4mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n",
+                           __func__, mfn_x(sl4mfn), shadow_index, new_sl4e.l4);
+            if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
+            {
+                SHADOW_ERROR("out-of-range l4e update\n");
+                result |= SHADOW_SET_ERROR;
+            }
+
+            // do not call shadow_set_l4e...
+            return result;
+        }
+    }
+
     result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
     return result;
 }
@@ -2616,6 +2672,48 @@ static int validate_gl2e(struct vcpu *v,
     }
     l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
                              sl1mfn, &new_sl2e, ft_prefetch);
+
+    // check for updates to xen reserved slots in PV guests...
+    // XXX -- need to revisit this for PV 3-on-4 guests.
+    //
+#if SHADOW_PAGING_LEVELS < 4
+#if CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS
+    if ( !shadow_mode_external(v->domain) )
+    {
+        int shadow_index = (((unsigned long)sl2p & ~PAGE_MASK) /
+                            sizeof(shadow_l2e_t));
+        int reserved_xen_slot;
+
+#if SHADOW_PAGING_LEVELS == 3
+        reserved_xen_slot = 
+            (((mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask)
+              == PGC_SH_l2h_pae_shadow) &&
+             (shadow_index 
+              >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))));
+#else /* SHADOW_PAGING_LEVELS == 2 */
+        reserved_xen_slot = (shadow_index >= L2_PAGETABLE_FIRST_XEN_SLOT);
+#endif
+
+        if ( unlikely(reserved_xen_slot) )
+        {
+            // attempt by the guest to write to a xen reserved slot
+            //
+            SHADOW_PRINTK("%s out-of-range update "
+                           "sl2mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n",
+                           __func__, mfn_x(sl2mfn), shadow_index, new_sl2e.l2);
+            if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT )
+            {
+                SHADOW_ERROR("out-of-range l2e update\n");
+                result |= SHADOW_SET_ERROR;
+            }
+
+            // do not call shadow_set_l2e...
+            return result;
+        }
+    }
+#endif /* CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS */
+#endif /* SHADOW_PAGING_LEVELS < 4 */
+
     result |= shadow_set_l2e(v, sl2p, new_sl2e, sl2mfn);
 
     return result;
@@ -2897,7 +2995,7 @@ static int sh_page_fault(struct vcpu *v,
     }
 
     // All levels of the guest page table are now known to be present.
-    accumulated_gflags = accumulate_guest_flags(&gw);
+    accumulated_gflags = accumulate_guest_flags(v, &gw);
 
     // Check for attempts to access supervisor-only pages from user mode,
     // i.e. ring 3.  Such errors are not caused or dealt with by the shadow
@@ -3348,6 +3446,7 @@ sh_update_linear_entries(struct vcpu *v)
         l2_pgentry_t *l2e, new_l2e;
         shadow_l3e_t *guest_l3e = NULL, *shadow_l3e;
         int i;
+        int unmap_l2e = 0;
 
 #if GUEST_PAGING_LEVELS == 2
         /* Shadow l3 tables were built by update_cr3 */
@@ -3365,39 +3464,45 @@ sh_update_linear_entries(struct vcpu *v)
 #endif /* GUEST_PAGING_LEVELS */
         
         /* Choose where to write the entries, using linear maps if possible */
-        if ( v == current && shadow_mode_external(d) ) 
-        {
-            /* From the monitor tables, it's safe to use linear maps to update
-             * monitor l2s */
-            l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES);
-        }
-        else if ( shadow_mode_external(d) ) 
-        {
-            /* Map the monitor table's high l2 */
-            l3_pgentry_t *l3e;
-            l3e = sh_map_domain_page(
-                pagetable_get_mfn(v->arch.monitor_table));
-            ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
-            l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[3])));
-            sh_unmap_domain_page(l3e);
-        } 
+        if ( shadow_mode_external(d) )
+        {
+            if ( v == current )
+            {
+                /* From the monitor tables, it's safe to use linear maps
+                 * to update monitor l2s */
+                l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES);
+            }
+            else
+            {
+                /* Map the monitor table's high l2 */
+                l3_pgentry_t *l3e;
+                l3e = sh_map_domain_page(
+                    pagetable_get_mfn(v->arch.monitor_table));
+                ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+                l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[3])));
+                unmap_l2e = 1;
+                sh_unmap_domain_page(l3e);
+            }
+        }
         else 
         {
             /* Map the shadow table's high l2 */
             ASSERT(shadow_l3e_get_flags(shadow_l3e[3]) & _PAGE_PRESENT);
             l2e = sh_map_domain_page(shadow_l3e_get_mfn(shadow_l3e[3]));
+            unmap_l2e = 1;
         }
         
-        
-        if ( !shadow_mode_external(d) )
-        {
-            /* Write linear mapping of guest. */
+        /* Write linear mapping of guest (only in PV, and only when 
+         * not translated). */
+        if ( !shadow_mode_translate(d) )
+        {
             for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
-            { 
-                new_l2e = (shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT) 
-                    ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])),
-                                   __PAGE_HYPERVISOR) 
-                    : l2e_empty();
+            {
+                new_l2e = 
+                    ((shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT)
+                     ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])),
+                                    __PAGE_HYPERVISOR) 
+                     : l2e_empty());
                 safe_write_entry(
                     &l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i],
                     &new_l2e);
@@ -3416,9 +3521,8 @@ sh_update_linear_entries(struct vcpu *v)
                 &new_l2e);
         }
         
-        if ( v != current || !shadow_mode_external(d) )
+        if ( unmap_l2e )
             sh_unmap_domain_page(l2e);
-        
     }
 
 #elif CONFIG_PAGING_LEVELS == 2
@@ -3521,16 +3625,24 @@ static void
 static void
 sh_detach_old_tables(struct vcpu *v)
 {
+    struct domain *d = v->domain;
     mfn_t smfn;
 
     ////
     //// vcpu->arch.guest_vtable
     ////
-    if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
-         v->arch.guest_vtable )
-    {
-        // Q: why does this need to use (un)map_domain_page_*global* ?
-        sh_unmap_domain_page_global(v->arch.guest_vtable);
+    if ( v->arch.guest_vtable )
+    {
+#if GUEST_PAGING_LEVELS == 4
+        if ( shadow_mode_external(d) || shadow_mode_translate(d) )
+            sh_unmap_domain_page_global(v->arch.guest_vtable);
+#elif GUEST_PAGING_LEVELS == 3
+        if ( 1 || shadow_mode_external(d) || shadow_mode_translate(d) )
+            sh_unmap_domain_page_global(v->arch.guest_vtable);
+#elif GUEST_PAGING_LEVELS == 2
+        if ( shadow_mode_external(d) || shadow_mode_translate(d) )
+            sh_unmap_domain_page_global(v->arch.guest_vtable);
+#endif
         v->arch.guest_vtable = NULL;
     }
 
@@ -3645,9 +3757,14 @@ sh_update_cr3(struct vcpu *v)
     ////
     //// vcpu->arch.guest_vtable
     ////
+#if GUEST_PAGING_LEVELS == 4
+    if ( shadow_mode_external(d) || shadow_mode_translate(d) )
+        v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
+    else
+        v->arch.guest_vtable = __linear_l4_table;
+#elif GUEST_PAGING_LEVELS == 3
     if ( shadow_mode_external(d) )
     {
-#if GUEST_PAGING_LEVELS == 3
         if ( shadow_vcpu_mode_translate(v) ) 
             /* Paging enabled: find where in the page the l3 table is */
             guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3));
@@ -3658,25 +3775,21 @@ sh_update_cr3(struct vcpu *v)
         // Ignore the low 2 bits of guest_idx -- they are really just
         // cache control.
         guest_idx &= ~3;
+
         // XXX - why does this need a global map?
         v->arch.guest_vtable =
             (guest_l3e_t *)sh_map_domain_page_global(gmfn) + guest_idx;
+    }
+    else
+        v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
+#elif GUEST_PAGING_LEVELS == 2
+    if ( shadow_mode_external(d) || shadow_mode_translate(d) )
+        v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
+    else
+        v->arch.guest_vtable = __linear_l2_table;
 #else
-        // XXX - why does this need a global map?
-        v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
-#endif
-    }
-    else
-    {
-#ifdef __x86_64__
-        v->arch.guest_vtable = __linear_l4_table;
-#elif GUEST_PAGING_LEVELS == 3
-        // XXX - why does this need a global map?
-        v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
-#else
-        v->arch.guest_vtable = __linear_l2_table;
-#endif
-    }
+#error this should never happen
+#endif
 
 #if 0
     printk("%s %s %d gmfn=%05lx guest_vtable=%p\n",
@@ -3743,6 +3856,17 @@ sh_update_cr3(struct vcpu *v)
         v->arch.shadow_vtable = __sh_linear_l2_table;
 #endif
     }
+
+#if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+    // Now that shadow_vtable is in place, check that the sl3e[3] is properly
+    // shadowed and installed in PAE PV guests...
+    if ( !shadow_mode_external(d) &&
+         !(shadow_l3e_get_flags(((shadow_l3e_t *)v->arch.shadow_vtable)[3]) &
+           _PAGE_PRESENT) )
+    {
+        sh_install_xen_entries_in_l3(v, gmfn, smfn);
+    }
+#endif
 
     ////
     //// Take a ref to the new shadow table, and pin it.
@@ -4049,7 +4173,7 @@ static inline void * emulate_map_dest(st
     mfn_t mfn;
 
     guest_walk_tables(v, vaddr, &gw, 1);
-    flags = accumulate_guest_flags(&gw);
+    flags = accumulate_guest_flags(v, &gw);
     gfn = guest_l1e_get_gfn(gw.eff_l1e);
     mfn = vcpu_gfn_to_mfn(v, gfn);
     sh_audit_gw(v, &gw);
@@ -4453,6 +4577,8 @@ struct shadow_paging_mode sh_paging_mode
     .x86_emulate_cmpxchg8b  = sh_x86_emulate_cmpxchg8b,
     .make_monitor_table     = sh_make_monitor_table,
     .destroy_monitor_table  = sh_destroy_monitor_table,
+    .guest_map_l1e          = sh_guest_map_l1e,
+    .guest_get_eff_l1e      = sh_guest_get_eff_l1e,
 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
     .guess_wrmap            = sh_guess_wrmap,
 #endif
diff -r 5f42b4824e45 -r b6ee084892da xen/arch/x86/mm/shadow/multi.h
--- a/xen/arch/x86/mm/shadow/multi.h    Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/arch/x86/mm/shadow/multi.h    Thu Sep 28 17:10:54 2006 +0100
@@ -103,6 +103,13 @@ SHADOW_INTERNAL_NAME(sh_audit_l4_table, 
     (struct vcpu *v, mfn_t sl4mfn, mfn_t x);
 #endif
 
+extern void *
+SHADOW_INTERNAL_NAME(sh_guest_map_l1e, CONFIG_PAGING_LEVELS, 
CONFIG_PAGING_LEVELS)
+    (struct vcpu *v, unsigned long va, unsigned long *gl1mfn);
+extern void
+SHADOW_INTERNAL_NAME(sh_guest_get_eff_l1e, CONFIG_PAGING_LEVELS, 
CONFIG_PAGING_LEVELS)
+    (struct vcpu *v, unsigned long va, void *eff_l1e);
+
 #if SHADOW_LEVELS == GUEST_LEVELS
 extern mfn_t
 SHADOW_INTERNAL_NAME(sh_make_monitor_table, SHADOW_LEVELS, GUEST_LEVELS)
diff -r 5f42b4824e45 -r b6ee084892da xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/arch/x86/mm/shadow/private.h  Thu Sep 28 17:10:54 2006 +0100
@@ -532,55 +532,6 @@ static inline void sh_unpin(struct vcpu 
     }
 }
 
-/**************************************************************************/
-/* Guest physmap (p2m) support */
-
-/* Read our own P2M table, checking in the linear pagetables first to be
- * sure that we will succeed.  Call this function if you expect it to
- * fail often, as it avoids page faults.  If you expect to succeed, use
- * vcpu_gfn_to_mfn, which copy_from_user()s the entry */
-static inline mfn_t
-vcpu_gfn_to_mfn_nofault(struct vcpu *v, unsigned long gfn)
-{
-    unsigned long entry_addr = (unsigned long) &phys_to_machine_mapping[gfn];
-#if CONFIG_PAGING_LEVELS >= 4
-    l4_pgentry_t *l4e;
-    l3_pgentry_t *l3e;
-#endif
-    l2_pgentry_t *l2e;
-    l1_pgentry_t *l1e;
-
-    ASSERT(current == v);
-    if ( !shadow_vcpu_mode_translate(v) )
-        return _mfn(gfn);
-
-#if CONFIG_PAGING_LEVELS > 2
-    if ( gfn >= (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t) ) 
-        /* This pfn is higher than the p2m map can hold */
-        return _mfn(INVALID_MFN);
-#endif
-    
-    /* Walk the linear pagetables.  Note that this is *not* the same as 
-     * the walk in sh_gfn_to_mfn_foreign, which is walking the p2m map */
-#if CONFIG_PAGING_LEVELS >= 4
-    l4e = __linear_l4_table + l4_linear_offset(entry_addr);
-    if ( !(l4e_get_flags(*l4e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
-    l3e = __linear_l3_table + l3_linear_offset(entry_addr);
-    if ( !(l3e_get_flags(*l3e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
-#endif
-    l2e = __linear_l2_table + l2_linear_offset(entry_addr);
-    if ( !(l2e_get_flags(*l2e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
-    l1e = __linear_l1_table + l1_linear_offset(entry_addr);
-    if ( !(l1e_get_flags(*l1e) & _PAGE_PRESENT) ) return _mfn(INVALID_MFN);
-
-    /* Safe to look at this part of the table */
-    if ( l1e_get_flags(phys_to_machine_mapping[gfn])  & _PAGE_PRESENT )
-        return _mfn(l1e_get_pfn(phys_to_machine_mapping[gfn]));
-    
-    return _mfn(INVALID_MFN);
-}
-
-
 #endif /* _XEN_SHADOW_PRIVATE_H */
 
 /*
diff -r 5f42b4824e45 -r b6ee084892da xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h    Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/arch/x86/mm/shadow/types.h    Thu Sep 28 17:10:54 2006 +0100
@@ -205,6 +205,9 @@ static inline shadow_l4e_t shadow_l4e_fr
     __sh_linear_l1_table; \
 })
 
+// XXX -- these should not be conditional on hvm_guest(v), but rather on
+//        shadow_mode_external(d)...
+//
 #define sh_linear_l2_table(v) ({ \
     ASSERT(current == (v)); \
     ((shadow_l2e_t *) \
@@ -507,10 +510,22 @@ struct shadow_walk_t
 #define sh_guess_wrmap             INTERNAL_NAME(sh_guess_wrmap)
 #define sh_clear_shadow_entry      INTERNAL_NAME(sh_clear_shadow_entry)
 
+/* The sh_guest_(map|get)_* functions only depends on the number of config
+ * levels
+ */
+#define sh_guest_map_l1e                                       \
+        SHADOW_INTERNAL_NAME(sh_guest_map_l1e,                \
+                              CONFIG_PAGING_LEVELS,             \
+                              CONFIG_PAGING_LEVELS)
+#define sh_guest_get_eff_l1e                                   \
+        SHADOW_INTERNAL_NAME(sh_guest_get_eff_l1e,            \
+                              CONFIG_PAGING_LEVELS,             \
+                              CONFIG_PAGING_LEVELS)
+
 /* sh_make_monitor_table only depends on the number of shadow levels */
-#define sh_make_monitor_table                          \
-        SHADOW_INTERNAL_NAME(sh_make_monitor_table,   \
-                              SHADOW_PAGING_LEVELS,     \
+#define sh_make_monitor_table                                  \
+        SHADOW_INTERNAL_NAME(sh_make_monitor_table,           \
+                              SHADOW_PAGING_LEVELS,             \
                               SHADOW_PAGING_LEVELS)
 #define sh_destroy_monitor_table                               \
         SHADOW_INTERNAL_NAME(sh_destroy_monitor_table,        \
@@ -652,7 +667,7 @@ static inline void sh_unpin_l3_subshadow
 #endif /* GUEST_PAGING_LEVELS >= 3 */
 
 static inline u32
-accumulate_guest_flags(walk_t *gw)
+accumulate_guest_flags(struct vcpu *v, walk_t *gw)
 {
     u32 accumulated_flags;
 
@@ -674,8 +689,14 @@ accumulate_guest_flags(walk_t *gw)
     accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT;
 #endif
 
-    // Finally, revert the NX bit back to its original polarity
+    // Revert the NX bit back to its original polarity
     accumulated_flags ^= _PAGE_NX_BIT;
+
+    // In 64-bit PV guests, the _PAGE_USER bit is implied in all guest
+    // entries (since even the guest kernel runs in ring 3).
+    //
+    if ( (GUEST_PAGING_LEVELS == 4) && !hvm_guest(v) )
+        accumulated_flags |= _PAGE_USER;
 
     return accumulated_flags;
 }
diff -r 5f42b4824e45 -r b6ee084892da xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/arch/x86/traps.c      Thu Sep 28 17:10:54 2006 +0100
@@ -886,7 +886,7 @@ static int fixup_page_fault(unsigned lon
          /* Do not check if access-protection fault since the page may 
             legitimately be not present in shadow page tables */
          ((regs->error_code & PFEC_write_access) == PFEC_write_access) &&
-         ptwr_do_page_fault(d, addr, regs) )
+         ptwr_do_page_fault(v, addr, regs) )
         return EXCRET_fault_fixed;
 
     if ( shadow_mode_enabled(d) )
diff -r 5f42b4824e45 -r b6ee084892da xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/include/asm-x86/domain.h      Thu Sep 28 17:10:54 2006 +0100
@@ -139,7 +139,7 @@ struct shadow_vcpu {
     /* Last MFN that we emulated a write to. */
     unsigned long last_emulated_mfn;
     /* HVM guest: paging enabled (CR0.PG)?  */
-    unsigned int hvm_paging_enabled:1;
+    unsigned int translate_enabled:1;
     /* Emulated fault needs to be propagated to guest? */
     unsigned int propagate_fault:1;
 #if CONFIG_PAGING_LEVELS >= 3
diff -r 5f42b4824e45 -r b6ee084892da xen/include/asm-x86/guest_access.h
--- a/xen/include/asm-x86/guest_access.h        Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/include/asm-x86/guest_access.h        Thu Sep 28 17:10:54 2006 +0100
@@ -8,6 +8,7 @@
 #define __ASM_X86_GUEST_ACCESS_H__
 
 #include <asm/uaccess.h>
+#include <asm/shadow.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/guest_access.h>
 
@@ -33,7 +34,7 @@
 #define copy_to_guest_offset(hnd, off, ptr, nr) ({      \
     const typeof(ptr) _x = (hnd).p;                     \
     const typeof(ptr) _y = (ptr);                       \
-    hvm_guest(current) ?                                \
+    shadow_mode_translate(current->domain) ?            \
     copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) :  \
     copy_to_user(_x+(off), _y, sizeof(*_x)*(nr));       \
 })
@@ -45,7 +46,7 @@
 #define copy_from_guest_offset(ptr, hnd, off, nr) ({    \
     const typeof(ptr) _x = (hnd).p;                     \
     const typeof(ptr) _y = (ptr);                       \
-    hvm_guest(current) ?                                \
+    shadow_mode_translate(current->domain) ?            \
     copy_from_user_hvm(_y, _x+(off), sizeof(*_x)*(nr)) :\
     copy_from_user(_y, _x+(off), sizeof(*_x)*(nr));     \
 })
@@ -54,7 +55,7 @@
 #define copy_field_to_guest(hnd, ptr, field) ({         \
     const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
     const typeof(&(ptr)->field) _y = &(ptr)->field;     \
-    hvm_guest(current) ?                                \
+    shadow_mode_translate(current->domain) ?            \
     copy_to_user_hvm(_x, _y, sizeof(*_x)) :             \
     copy_to_user(_x, _y, sizeof(*_x));                  \
 })
@@ -63,7 +64,7 @@
 #define copy_field_from_guest(ptr, hnd, field) ({       \
     const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
     const typeof(&(ptr)->field) _y = &(ptr)->field;     \
-    hvm_guest(current) ?                                \
+    shadow_mode_translate(current->domain) ?            \
     copy_from_user_hvm(_y, _x, sizeof(*_x)) :           \
     copy_from_user(_y, _x, sizeof(*_x));                \
 })
@@ -73,12 +74,13 @@
  * Allows use of faster __copy_* functions.
  */
 #define guest_handle_okay(hnd, nr)                      \
-    (hvm_guest(current) || array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
+    (shadow_mode_external(current->domain) ||           \
+     array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
 
 #define __copy_to_guest_offset(hnd, off, ptr, nr) ({    \
     const typeof(ptr) _x = (hnd).p;                     \
     const typeof(ptr) _y = (ptr);                       \
-    hvm_guest(current) ?                                \
+    shadow_mode_translate(current->domain) ?            \
     copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) :  \
     __copy_to_user(_x+(off), _y, sizeof(*_x)*(nr));     \
 })
@@ -86,7 +88,7 @@
 #define __copy_from_guest_offset(ptr, hnd, off, nr) ({  \
     const typeof(ptr) _x = (hnd).p;                     \
     const typeof(ptr) _y = (ptr);                       \
-    hvm_guest(current) ?                                \
+    shadow_mode_translate(current->domain) ?            \
     copy_from_user_hvm(_y, _x+(off),sizeof(*_x)*(nr)) : \
     __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr));   \
 })
@@ -94,7 +96,7 @@
 #define __copy_field_to_guest(hnd, ptr, field) ({       \
     const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
     const typeof(&(ptr)->field) _y = &(ptr)->field;     \
-    hvm_guest(current) ?                                \
+    shadow_mode_translate(current->domain) ?            \
     copy_to_user_hvm(_x, _y, sizeof(*_x)) :             \
     __copy_to_user(_x, _y, sizeof(*_x));                \
 })
@@ -102,7 +104,7 @@
 #define __copy_field_from_guest(ptr, hnd, field) ({     \
     const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
     const typeof(&(ptr)->field) _y = &(ptr)->field;     \
-    hvm_guest(current) ?                                \
+    shadow_mode_translate(current->domain) ?            \
     copy_from_user_hvm(_x, _y, sizeof(*_x)) :           \
     __copy_from_user(_y, _x, sizeof(*_x));              \
 })
diff -r 5f42b4824e45 -r b6ee084892da xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/include/asm-x86/mm.h  Thu Sep 28 17:10:54 2006 +0100
@@ -348,7 +348,7 @@ void memguard_unguard_range(void *p, uns
 
 void memguard_guard_stack(void *p);
 
-int  ptwr_do_page_fault(struct domain *, unsigned long,
+int  ptwr_do_page_fault(struct vcpu *, unsigned long,
                         struct cpu_user_regs *);
 
 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
diff -r 5f42b4824e45 -r b6ee084892da xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Thu Sep 28 17:09:11 2006 +0100
+++ b/xen/include/asm-x86/shadow.h      Thu Sep 28 17:10:54 2006 +0100
@@ -26,6 +26,7 @@
 #include <public/domctl.h> 
 #include <xen/sched.h>
 #include <xen/perfc.h>
+#include <xen/domain_page.h>
 #include <asm/flushtlb.h>
 
 /* How to make sure a page is not referred to in a shadow PT */
@@ -245,7 +246,9 @@ shadow_vcpu_mode_translate(struct vcpu *
     // enabled.  (HVM vcpu's with paging disabled are using the p2m table as
     // its paging table, so no translation occurs in this case.)
     //
-    return v->arch.shadow.hvm_paging_enabled;
+    // It is also true for translated PV domains.
+    //
+    return v->arch.shadow.translate_enabled;
 }
 
 
@@ -287,6 +290,10 @@ struct shadow_paging_mode {
                                             struct x86_emulate_ctxt *ctxt);
     mfn_t         (*make_monitor_table    )(struct vcpu *v);
     void          (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
+    void *        (*guest_map_l1e         )(struct vcpu *v, unsigned long va,
+                                            unsigned long *gl1mfn);
+    void          (*guest_get_eff_l1e     )(struct vcpu *v, unsigned long va,
+                                            void *eff_l1e);
 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
     int           (*guess_wrmap           )(struct vcpu *v, 
                                             unsigned long vaddr, mfn_t gmfn);
@@ -452,9 +459,73 @@ shadow_destroy_monitor_table(struct vcpu
     v->arch.shadow.mode->destroy_monitor_table(v, mmfn);
 }
 
+static inline void *
+guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn)
+{
+    if ( likely(!shadow_mode_translate(v->domain)) )
+    {
+        l2_pgentry_t l2e;
+        ASSERT(!shadow_mode_external(v->domain));
+        /* Find this l1e and its enclosing l1mfn in the linear map */
+        if ( __copy_from_user(&l2e, 
+                              &__linear_l2_table[l2_linear_offset(addr)],
+                              sizeof(l2_pgentry_t)) != 0 )
+            return NULL;
+        /* Check flags that it will be safe to read the l1e */
+        if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) 
+             != _PAGE_PRESENT )
+            return NULL;
+        *gl1mfn = l2e_get_pfn(l2e);
+        return &__linear_l1_table[l1_linear_offset(addr)];
+    }
+
+    return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn);
+}
+
+static inline void
+guest_unmap_l1e(struct vcpu *v, void *p)
+{
+    if ( unlikely(shadow_mode_translate(v->domain)) )
+        unmap_domain_page(p);
+}
+
+static inline void
+guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
+{
+    if ( likely(!shadow_mode_translate(v->domain)) )
+    {
+        ASSERT(!shadow_mode_external(v->domain));
+        if ( __copy_from_user(eff_l1e, 
+                              &__linear_l1_table[l1_linear_offset(addr)],
+                              sizeof(l1_pgentry_t)) != 0 )
+            *(l1_pgentry_t *)eff_l1e = l1e_empty();
+        return;
+    }
+        
+    v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e);
+}
+
+static inline void
+guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
+{
+#if defined(__x86_64__)
+    int user_mode = !(v->arch.flags & TF_kernel_mode);
+#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
+#else
+#define TOGGLE_MODE() ((void)0)
+#endif
+
+    TOGGLE_MODE();
+    guest_get_eff_l1e(v, addr, eff_l1e);
+    TOGGLE_MODE();
+}
+
+
 /* Validate a pagetable change from the guest and update the shadows. */
 extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
                                         void *new_guest_entry);
+extern int __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
+                                         void *entry, u32 size);
 
 /* Update the shadows in response to a pagetable write from a HVM guest */
 extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 
@@ -629,7 +700,14 @@ sh_mfn_to_gfn(struct domain *d, mfn_t mf
         return mfn_x(mfn);
 }
 
-
+static inline l1_pgentry_t
+gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
+{
+    if ( unlikely(shadow_mode_translate(d)) )
+        l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)),
+                           l1e_get_flags(l1e));
+    return l1e;
+}
 
 #endif /* _XEN_SHADOW_H */
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] [XEN] Support lightweight shadow-translate PV guests, for paravirt-ops., Xen patchbot-unstable <=