WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] [HVM] Add type information to the p2m map

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] [HVM] Add type information to the p2m map.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 11 Sep 2007 15:30:11 -0700
Delivery-date: Tue, 11 Sep 2007 15:31:36 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
# Date 1189431750 -3600
# Node ID 4633e9604da9c51f077285465d63db1820e6f574
# Parent  1474db8058b20753eb465273f7dbf5e10662bf0f
[HVM] Add type information to the p2m map.
This is a base for memory tricks like page sharing, copy-on-write, lazy
allocation etc.  It should also make pass-through MMIO easier to
implement in the p2m.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
---
 xen/arch/x86/hvm/hvm.c           |   33 ++++---
 xen/arch/x86/hvm/io.c            |    9 -
 xen/arch/x86/hvm/svm/svm.c       |   32 ++++--
 xen/arch/x86/hvm/vmx/vmx.c       |   18 ++-
 xen/arch/x86/mm/hap/guest_walk.c |   10 +-
 xen/arch/x86/mm/hap/hap.c        |   10 +-
 xen/arch/x86/mm/p2m.c            |  122 +++++++++++++++++---------
 xen/arch/x86/mm/shadow/common.c  |   24 +++--
 xen/arch/x86/mm/shadow/multi.c   |  138 +++++++++++++++++++-----------
 xen/arch/x86/mm/shadow/types.h   |    2 
 xen/include/asm-x86/mm.h         |    2 
 xen/include/asm-x86/p2m.h        |  179 ++++++++++++++++++++++++++++-----------
 12 files changed, 379 insertions(+), 200 deletions(-)

diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Mon Sep 10 14:42:30 2007 +0100
@@ -161,12 +161,14 @@ static int hvm_set_ioreq_page(
     struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
 {
     struct page_info *page;
+    p2m_type_t p2mt;
     unsigned long mfn;
     void *va;
 
-    mfn = gmfn_to_mfn(d, gmfn);
-    if ( !mfn_valid(mfn) )
+    mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
+    if ( !p2m_is_ram(p2mt) )
         return -EINVAL;
+    ASSERT(mfn_valid(mfn));
 
     page = mfn_to_page(mfn);
     if ( !get_page_and_type(page, d, PGT_writable_page) )
@@ -517,7 +519,8 @@ int hvm_set_cr0(unsigned long value)
 int hvm_set_cr0(unsigned long value)
 {
     struct vcpu *v = current;
-    unsigned long mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
+    p2m_type_t p2mt;
+    unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
   
     HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
 
@@ -559,8 +562,10 @@ int hvm_set_cr0(unsigned long value)
         if ( !paging_mode_hap(v->domain) )
         {
             /* The guest CR3 must be pointing to the guest physical. */
-            mfn = get_mfn_from_gpfn(v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT);
-            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
+            gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;
+            mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+            if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || 
+                 !get_page(mfn_to_page(mfn), v->domain))
             {
                 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", 
                          v->arch.hvm_vcpu.guest_cr[3], mfn);
@@ -603,16 +608,18 @@ int hvm_set_cr3(unsigned long value)
 int hvm_set_cr3(unsigned long value)
 {
     unsigned long mfn;
+    p2m_type_t p2mt;
     struct vcpu *v = current;
 
     if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
          (value != v->arch.hvm_vcpu.guest_cr[3]) )
     {
-        /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */
+        /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
-        mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
-        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
-            goto bad_cr3;
+        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
+        if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
+             !get_page(mfn_to_page(mfn), v->domain) )
+              goto bad_cr3;
 
         put_page(pagetable_get_page(v->arch.guest_table));
         v->arch.guest_table = pagetable_from_pfn(mfn);
@@ -677,6 +684,7 @@ static int __hvm_copy(void *buf, paddr_t
 static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
 {
     unsigned long gfn, mfn;
+    p2m_type_t p2mt;
     char *p;
     int count, todo;
 
@@ -690,10 +698,11 @@ static int __hvm_copy(void *buf, paddr_t
         else
             gfn = addr >> PAGE_SHIFT;
         
-        mfn = get_mfn_from_gpfn(gfn);
-
-        if ( mfn == INVALID_MFN )
+        mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+
+        if ( !p2m_is_ram(p2mt) )
             return todo;
+        ASSERT(mfn_valid(mfn));
 
         p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
 
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/io.c     Mon Sep 10 14:42:30 2007 +0100
@@ -826,9 +826,7 @@ void hvm_io_assist(void)
     ioreq_t *p;
     struct cpu_user_regs *regs;
     struct hvm_io_op *io_opp;
-    unsigned long gmfn;
     struct vcpu *v = current;
-    struct domain *d = v->domain;
 
     io_opp = &v->arch.hvm_vcpu.io_op;
     regs   = &io_opp->io_context;
@@ -861,13 +859,6 @@ void hvm_io_assist(void)
     regs->eflags &= ~X86_EFLAGS_RF;
     hvm_load_cpu_guest_regs(v, regs);
     memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
-
-    /* Has memory been dirtied? */
-    if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
-    {
-        gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
-        paging_mark_dirty(d, gmfn);
-    }
 
  out:
     vcpu_end_shutdown_deferral(v);
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c        Mon Sep 10 14:42:30 2007 +0100
@@ -338,6 +338,7 @@ int svm_vmcb_restore(struct vcpu *v, str
 int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
 {
     unsigned long mfn = 0;
+    p2m_type_t p2mt;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
     if ( c->pending_valid &&
@@ -353,8 +354,8 @@ int svm_vmcb_restore(struct vcpu *v, str
     {
         if ( c->cr0 & X86_CR0_PG )
         {
-            mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+            mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+            if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
             {
                 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
                          c->cr3);
@@ -1004,15 +1005,23 @@ int start_svm(struct cpuinfo_x86 *c)
     return 1;
 }
 
-static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
-{
-    if (mmio_space(gpa)) {
+static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
+{
+    p2m_type_t p2mt;
+    mfn_t mfn;
+    unsigned long gfn = gpa >> PAGE_SHIFT;
+
+    /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
+    mfn = gfn_to_mfn_current(gfn, &p2mt);
+    if ( p2mt == p2m_mmio_dm )
+    {
         handle_mmio(gpa);
-        return 1;
-    }
-
-    paging_mark_dirty(current->domain, get_mfn_from_gpfn(gpa >> PAGE_SHIFT));
-    return p2m_set_flags(current->domain, gpa, __PAGE_HYPERVISOR|_PAGE_USER);
+        return;
+    }
+
+    /* Log-dirty: mark the page dirty and let the guest write it again */
+    paging_mark_dirty(current->domain, mfn_x(mfn));
+    p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
 }
 
 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
@@ -2341,8 +2350,7 @@ asmlinkage void svm_vmexit_handler(struc
 
     case VMEXIT_NPF:
         regs->error_code = vmcb->exitinfo1;
-        if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) )
-            domain_crash(v->domain);
+        svm_do_nested_pgfault(vmcb->exitinfo2, regs);
         break;
 
     default:
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Sep 10 14:42:30 2007 +0100
@@ -566,6 +566,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
 int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
 {
     unsigned long mfn = 0;
+    p2m_type_t p2mt;
 
     if ( c->pending_valid &&
          ((c->pending_type == 1) || (c->pending_type > 6) ||
@@ -578,8 +579,8 @@ int vmx_vmcs_restore(struct vcpu *v, str
 
     if ( c->cr0 & X86_CR0_PG )
     {
-        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+        mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+        if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
         {
             gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3);
             return -EINVAL;
@@ -1292,19 +1293,23 @@ static void vmx_do_cpuid(struct cpu_user
          * Note that this leaf lives at <max-hypervisor-leaf> + 1.
          */
         u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx;
-        unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
+        p2m_type_t p2mt;
+        unsigned long mfn;
         struct vcpu *v = current;
         char *p;
 
+        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
+
         gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value);
 
         /* 8-byte aligned valid pseudophys address from vmxassist, please. */
-        if ( (value & 7) || (mfn == INVALID_MFN) ||
+        if ( (value & 7) || !p2m_is_ram(p2mt) ||
              !v->arch.hvm_vmx.vmxassist_enabled )
         {
             domain_crash(v->domain);
             return;
         }
+        ASSERT(mfn_valid(mfn));
 
         p = map_domain_page(mfn);
         value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1))));
@@ -1905,11 +1910,12 @@ static int vmx_world_restore(struct vcpu
 static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
 {
     unsigned long mfn = 0;
+    p2m_type_t p2mt;
 
     if ( c->cr0 & X86_CR0_PG )
     {
-        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+        mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+        if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
         {
             gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3);
             return -EINVAL;
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c  Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/hap/guest_walk.c  Mon Sep 10 14:42:30 2007 +0100
@@ -28,7 +28,8 @@
 #include <xen/sched.h>
 #include <asm/hvm/svm/vmcb.h>
 #include <asm/domain.h>
-#include <asm/shadow.h>
+#include <asm/paging.h>
+#include <asm/p2m.h>
 #include <asm/hap.h>
 
 #include "private.h"
@@ -67,6 +68,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
     int lev, index;
     paddr_t gpa = 0;
     unsigned long gpfn, mfn;
+    p2m_type_t p2mt;
     int success = 1;
 
     l1_pgentry_t *l1e;
@@ -81,14 +83,16 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
     gpfn = (gcr3 >> PAGE_SHIFT);
     for ( lev = mode; lev >= 1; lev-- )
     {
-        mfn = get_mfn_from_gpfn(gpfn);
-        if ( mfn == INVALID_MFN )
+        mfn = mfn_x(gfn_to_mfn_current(gpfn, &p2mt));
+        if ( !p2m_is_ram(p2mt) )
         {
             HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
                        lev);
             success = 0;
             break;
         }
+        ASSERT(mfn_valid(mfn));
+
         index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
 
 #if GUEST_PAGING_LEVELS >= 4
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/hap/hap.c Mon Sep 10 14:42:30 2007 +0100
@@ -60,8 +60,8 @@ int hap_enable_log_dirty(struct domain *
     d->arch.paging.mode |= PG_log_dirty;
     hap_unlock(d);
 
-    /* set l1e entries of P2M table to NOT_WRITABLE. */
-    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+    /* set l1e entries of P2M table to be read-only. */
+    p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
     flush_tlb_mask(d->domain_dirty_cpumask);
     return 0;
 }
@@ -73,14 +73,14 @@ int hap_disable_log_dirty(struct domain 
     hap_unlock(d);
 
     /* set l1e entries of P2M table with normal mode */
-    p2m_set_flags_global(d, __PAGE_HYPERVISOR|_PAGE_USER);
+    p2m_change_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
     return 0;
 }
 
 void hap_clean_dirty_bitmap(struct domain *d)
 {
-    /* mark physical memory as NOT_WRITEABLE and flush the TLB */
-    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+    /* set l1e entries of P2M table to be read-only. */
+    p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
     flush_tlb_mask(d->domain_dirty_cpumask);
 }
 
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/p2m.c     Mon Sep 10 14:42:30 2007 +0100
@@ -4,7 +4,7 @@
  * physical-to-machine mappings for automatically-translated domains.
  *
  * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
  *
@@ -93,6 +93,31 @@
 #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
 
 
+/* PTE flags for the various types of p2m entry */
+#define P2M_BASE_FLAGS \
+        (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+static unsigned long p2m_type_to_flags(p2m_type_t t) 
+{
+    unsigned long flags = (t & 0x7UL) << 9;
+    switch(t)
+    {
+    case p2m_invalid:
+    default:
+        return flags;
+    case p2m_ram_rw:
+        return flags | P2M_BASE_FLAGS | _PAGE_RW;
+    case p2m_ram_logdirty:
+        return flags | P2M_BASE_FLAGS;
+    case p2m_ram_ro:
+        return flags | P2M_BASE_FLAGS;
+    case p2m_mmio_dm:
+        return flags;
+    case p2m_mmio_direct:
+        return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
+    }
+}
+
 
 // Find the next level's P2M entry, checking for out-of-range gfn's...
 // Returns NULL on error.
@@ -358,19 +383,25 @@ void p2m_teardown(struct domain *d)
 }
 
 mfn_t
-gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
 /* Read another domain's p2m entries */
 {
     mfn_t mfn;
-    paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT;
+    paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
     l2_pgentry_t *l2e;
     l1_pgentry_t *l1e;
 
     ASSERT(paging_mode_translate(d));
+
+    /* XXX This is for compatibility with the old model, where anything not 
+     * XXX marked as RAM was considered to be emulated MMIO space.
+     * XXX Once we start explicitly registering MMIO regions in the p2m 
+     * XXX we will return p2m_invalid for unmapped gfns */
+    *t = p2m_mmio_dm;
+
     mfn = pagetable_get_mfn(d->arch.phys_table);
 
-
-    if ( gpfn > d->arch.p2m.max_mapped_pfn )
+    if ( gfn > d->arch.p2m.max_mapped_pfn )
         /* This pfn is higher than the highest the p2m map currently holds */
         return _mfn(INVALID_MFN);
 
@@ -428,9 +459,11 @@ gfn_to_mfn_foreign(struct domain *d, uns
         return _mfn(INVALID_MFN);
     }
     mfn = _mfn(l1e_get_pfn(*l1e));
+    *t = p2m_flags_to_type(l1e_get_flags(*l1e));
     unmap_domain_page(l1e);
 
-    return mfn;
+    ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+    return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
 }
 
 #if P2M_AUDIT
@@ -630,10 +663,7 @@ p2m_remove_page(struct domain *d, unsign
         return;
     P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
 
-    ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn);
-    //ASSERT(mfn_to_gfn(d, mfn) == gfn);
-
-    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
+    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
     set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
 }
 
@@ -653,6 +683,7 @@ guest_physmap_add_page(struct domain *d,
                        unsigned long mfn)
 {
     unsigned long ogfn;
+    p2m_type_t ot;
     mfn_t omfn;
 
     if ( !paging_mode_translate(d) )
@@ -663,10 +694,10 @@ guest_physmap_add_page(struct domain *d,
 
     P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
 
-    omfn = gfn_to_mfn(d, gfn);
-    if ( mfn_valid(omfn) )
-    {
-        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
+    omfn = gfn_to_mfn(d, gfn, &ot);
+    if ( p2m_is_ram(ot) )
+    {
+        ASSERT(mfn_valid(omfn));
         set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
     }
 
@@ -683,8 +714,10 @@ guest_physmap_add_page(struct domain *d,
         /* This machine frame is already mapped at another physical address */
         P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
                   mfn, ogfn, gfn);
-        if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) )
-        {
+        omfn = gfn_to_mfn(d, ogfn, &ot);
+        if ( p2m_is_ram(ot) )
+        {
+            ASSERT(mfn_valid(omfn));
             P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
                       ogfn , mfn_x(omfn));
             if ( mfn_x(omfn) == mfn )
@@ -692,21 +725,29 @@ guest_physmap_add_page(struct domain *d,
         }
     }
 
-    set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
-    set_gpfn_from_mfn(mfn, gfn);
+    if ( mfn_valid(_mfn(mfn)) ) 
+    {
+        set_p2m_entry(d, gfn, _mfn(mfn),
+                  p2m_type_to_flags(p2m_ram_rw)|__PAGE_HYPERVISOR|_PAGE_USER);
+        set_gpfn_from_mfn(mfn, gfn);
+    }
+    else
+    {
+        gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
+                 gfn, mfn);
+        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+    }
 
     audit_p2m(d);
     p2m_unlock(d);
 }
 
-/* This function goes through P2M table and modify l1e flags of all pages. Note
- * that physical base address of l1e is intact. This function can be used for
- * special purpose, such as marking physical memory as NOT WRITABLE for
- * tracking dirty pages during live migration.
- */
-void p2m_set_flags_global(struct domain *d, u32 l1e_flags)
-{
-    unsigned long mfn, gfn;
+/* Walk the whole p2m table, changing any entries of the old type
+ * to the new type.  This is used in hardware-assisted paging to 
+ * quickly enable or diable log-dirty tracking */
+void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
+{
+    unsigned long mfn, gfn, flags;
     l1_pgentry_t l1e_content;
     l1_pgentry_t *l1e;
     l2_pgentry_t *l2e;
@@ -769,12 +810,14 @@ void p2m_set_flags_global(struct domain 
 
                 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
                 {
-                    if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
+                    flags = l1e_get_flags(l1e[i1]);
+                    if ( p2m_flags_to_type(flags) != ot )
                         continue;
                     mfn = l1e_get_pfn(l1e[i1]);
                     gfn = get_gpfn_from_mfn(mfn);
-                    /* create a new 1le entry using l1e_flags */
-                    l1e_content = l1e_from_pfn(mfn, l1e_flags);
+                    /* create a new 1le entry with the new type */
+                    flags = p2m_flags_to_type(nt);
+                    l1e_content = l1e_from_pfn(mfn, flags);
                     paging_write_p2m_entry(d, gfn, &l1e[i1],
                                            l1mfn, l1e_content, 1);
                 }
@@ -800,24 +843,23 @@ void p2m_set_flags_global(struct domain 
     p2m_unlock(d);
 }
 
-/* This function traces through P2M table and modifies l1e flags of a specific
- * gpa.
- */
-int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags)
-{
-    unsigned long gfn;
+/* Modify the p2m type of a single gfn from ot to nt, returning the 
+ * entry's previous type */
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, 
+                           p2m_type_t ot, p2m_type_t nt)
+{
+    p2m_type_t pt;
     mfn_t mfn;
 
     p2m_lock(d);
 
-    gfn = gpa >> PAGE_SHIFT;
-    mfn = gfn_to_mfn(d, gfn);
-    if ( mfn_valid(mfn) )
-        set_p2m_entry(d, gfn, mfn, l1e_flags);
+    mfn = gfn_to_mfn(d, gfn, &pt);
+    if ( pt == ot )
+        set_p2m_entry(d, gfn, mfn, p2m_type_to_flags(nt));
 
     p2m_unlock(d);
 
-    return 1;
+    return pt;
 }
 
 /*
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/shadow/common.c   Mon Sep 10 14:42:30 2007 +0100
@@ -2764,19 +2764,23 @@ shadow_write_p2m_entry(struct vcpu *v, u
                        l1_pgentry_t new, unsigned int level)
 {
     struct domain *d = v->domain;
-    mfn_t mfn;
     
     shadow_lock(d);
 
-    /* handle physmap_add and physmap_remove */
-    mfn = gfn_to_mfn(d, gfn);
-    if ( v != NULL && level == 1 && mfn_valid(mfn) ) {
-        sh_remove_all_shadows_and_parents(v, mfn);
-        if ( sh_remove_all_mappings(v, mfn) )
-            flush_tlb_mask(d->domain_dirty_cpumask);    
-    }
-    
-    /* update the entry with new content */
+    /* If we're removing an MFN from the p2m, remove it from the shadows too */
+    if ( level == 1 )
+    {
+        mfn_t mfn = _mfn(l1e_get_pfn(*p));
+        p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
+        if ( p2m_is_valid(p2mt) && mfn_valid(mfn) ) 
+        {
+            sh_remove_all_shadows_and_parents(v, mfn);
+            if ( sh_remove_all_mappings(v, mfn) )
+                flush_tlb_mask(d->domain_dirty_cpumask);    
+        }
+    }
+
+    /* Update the entry with new content */
     safe_write_pte(p, new);
 
     /* install P2M in monitors for PAE Xen */
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Mon Sep 10 14:42:30 2007 +0100
@@ -209,6 +209,7 @@ guest_walk_tables(struct vcpu *v, unsign
 guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
 {
     struct domain *d = v->domain;
+    p2m_type_t p2mt;
     ASSERT(!guest_op || shadow_locked_by_me(d));
     
     perfc_incr(shadow_guest_walk);
@@ -223,8 +224,9 @@ guest_walk_tables(struct vcpu *v, unsign
         + guest_l4_table_offset(va);
     /* Walk down to the l3e */
     if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
-    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e));
-    if ( !mfn_valid(gw->l3mfn) ) return 1;
+    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
+    if ( !p2m_is_ram(p2mt) ) return 1;
+    ASSERT(mfn_valid(gw->l3mfn));
     /* This mfn is a pagetable: make sure the guest can't write to it. */
     if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
         flush_tlb_mask(d->domain_dirty_cpumask); 
@@ -236,8 +238,9 @@ guest_walk_tables(struct vcpu *v, unsign
 #endif /* PAE or 64... */
     /* Walk down to the l2e */
     if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
-    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e));
-    if ( !mfn_valid(gw->l2mfn) ) return 1;
+    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
+    if ( !p2m_is_ram(p2mt) ) return 1;
+    ASSERT(mfn_valid(gw->l2mfn));
     /* This mfn is a pagetable: make sure the guest can't write to it. */
     if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
         flush_tlb_mask(d->domain_dirty_cpumask); 
@@ -278,8 +281,9 @@ guest_walk_tables(struct vcpu *v, unsign
     else 
     {
         /* Not a superpage: carry on and find the l1e. */
-        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e));
-        if ( !mfn_valid(gw->l1mfn) ) return 1;
+        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
+        if ( !p2m_is_ram(p2mt) ) return 1;
+        ASSERT(mfn_valid(gw->l1mfn));
         /* This mfn is a pagetable: make sure the guest can't write to it. */
         if ( guest_op 
              && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
@@ -626,7 +630,7 @@ _sh_propagate(struct vcpu *v,
               void *shadow_entry_ptr,
               int level,
               fetch_type_t ft, 
-              int mmio)
+              p2m_type_t p2mt)
 {
     guest_l1e_t *gp = guest_entry_ptr;
     shadow_l1e_t *sp = shadow_entry_ptr;
@@ -636,6 +640,13 @@ _sh_propagate(struct vcpu *v,
 
     /* We don't shadow PAE l3s */
     ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
+
+    /* Check there's something for the shadows to map to */
+    if ( !p2m_is_valid(p2mt) )
+    {
+        *sp = shadow_l1e_empty();
+        goto done;
+    }
 
     if ( mfn_valid(guest_table_mfn) )
         /* Handle A and D bit propagation into the guest */
@@ -658,19 +669,22 @@ _sh_propagate(struct vcpu *v,
         goto done;
     }
 
-    if ( level == 1 && mmio )
-    {
-        /* Guest l1e maps MMIO space */
+    if ( level == 1 && p2mt == p2m_mmio_dm )
+    {
+        /* Guest l1e maps emulated MMIO space */
         *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
         if ( !d->arch.paging.shadow.has_fast_mmio_entries )
             d->arch.paging.shadow.has_fast_mmio_entries = 1;
         goto done;
     }
 
-    // Must have a valid target_mfn, unless this is a prefetch.  In the
+    // Must have a valid target_mfn unless this is a prefetch.  In the
     // case of a prefetch, an invalid mfn means that we can not usefully
     // shadow anything, and so we return early.
     //
+    /* N.B. For pass-through MMIO, either this test needs to be relaxed,
+     * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
+     * MMIO areas need to be added to the frame-table to make them "valid". */
     if ( !mfn_valid(target_mfn) )
     {
         ASSERT((ft == ft_prefetch));
@@ -718,6 +732,8 @@ _sh_propagate(struct vcpu *v,
     // Only allow the guest write access to a page a) on a demand fault,
     // or b) if the page is already marked as dirty.
     //
+    // (We handle log-dirty entirely inside the shadow code, without using the 
+    // p2m_ram_logdirty p2m type: only HAP uses that.)
     if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
     {
         if ( ft & FETCH_TYPE_WRITE ) 
@@ -725,6 +741,10 @@ _sh_propagate(struct vcpu *v,
         else if ( !sh_mfn_is_dirty(d, target_mfn) )
             sflags &= ~_PAGE_RW;
     }
+
+    /* Read-only memory */
+    if ( p2mt == p2m_ram_ro ) 
+        sflags &= ~_PAGE_RW;
     
     // protect guest page tables
     //
@@ -754,7 +774,12 @@ _sh_propagate(struct vcpu *v,
         sflags |= _PAGE_USER;
     }
 
+    /* MMIO addresses should never be cached */
+    if ( p2m_is_mmio(p2mt) )
+        sflags |= _PAGE_PCD;
+
     *sp = shadow_l1e_from_mfn(target_mfn, sflags);
+
  done:
     SHADOW_DEBUG(PROPAGATE,
                  "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
@@ -775,7 +800,7 @@ l4e_propagate_from_guest(struct vcpu *v,
                          shadow_l4e_t *sl4e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, 0);
+    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
 }
 
 static void
@@ -786,7 +811,7 @@ l3e_propagate_from_guest(struct vcpu *v,
                          shadow_l3e_t *sl3e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, 0);
+    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
 }
 #endif // GUEST_PAGING_LEVELS >= 4
 
@@ -798,7 +823,7 @@ l2e_propagate_from_guest(struct vcpu *v,
                          shadow_l2e_t *sl2e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, 0);
+    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
 }
 
 static void
@@ -808,9 +833,9 @@ l1e_propagate_from_guest(struct vcpu *v,
                          mfn_t gmfn, 
                          shadow_l1e_t *sl1e,
                          fetch_type_t ft, 
-                         int mmio)
-{
-    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, mmio);
+                         p2m_type_t p2mt)
+{
+    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
 }
 
 
@@ -2196,6 +2221,7 @@ static int validate_gl4e(struct vcpu *v,
     shadow_l4e_t *sl4p = se;
     mfn_t sl3mfn = _mfn(INVALID_MFN);
     struct domain *d = v->domain;
+    p2m_type_t p2mt;
     int result = 0;
 
     perfc_incr(shadow_validate_gl4e_calls);
@@ -2203,8 +2229,8 @@ static int validate_gl4e(struct vcpu *v,
     if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
     {
         gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
-        mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn);
-        if ( mfn_valid(gl3mfn) )
+        mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
+        if ( p2m_is_ram(p2mt) )
             sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
         else
             result |= SHADOW_SET_ERROR;
@@ -2248,6 +2274,7 @@ static int validate_gl3e(struct vcpu *v,
     guest_l3e_t *new_gl3e = new_ge;
     shadow_l3e_t *sl3p = se;
     mfn_t sl2mfn = _mfn(INVALID_MFN);
+    p2m_type_t p2mt;
     int result = 0;
 
     perfc_incr(shadow_validate_gl3e_calls);
@@ -2255,8 +2282,8 @@ static int validate_gl3e(struct vcpu *v,
     if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
     {
         gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
-        mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn);
-        if ( mfn_valid(gl2mfn) )
+        mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
+        if ( p2m_is_ram(p2mt) )
             sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
         else
             result |= SHADOW_SET_ERROR;
@@ -2275,6 +2302,7 @@ static int validate_gl2e(struct vcpu *v,
     guest_l2e_t *new_gl2e = new_ge;
     shadow_l2e_t *sl2p = se;
     mfn_t sl1mfn = _mfn(INVALID_MFN);
+    p2m_type_t p2mt;
     int result = 0;
 
     perfc_incr(shadow_validate_gl2e_calls);
@@ -2299,8 +2327,8 @@ static int validate_gl2e(struct vcpu *v,
         }
         else
         {
-            mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn);
-            if ( mfn_valid(gl1mfn) )
+            mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt);
+            if ( p2m_is_ram(p2mt) )
                 sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
             else
                 result |= SHADOW_SET_ERROR;
@@ -2361,16 +2389,16 @@ static int validate_gl1e(struct vcpu *v,
     shadow_l1e_t *sl1p = se;
     gfn_t gfn;
     mfn_t gmfn;
-    int result = 0, mmio;
+    p2m_type_t p2mt;
+    int result = 0;
 
     perfc_incr(shadow_validate_gl1e_calls);
 
     gfn = guest_l1e_get_gfn(*new_gl1e);
-    gmfn = gfn_to_mfn(v->domain, gfn);
-
-    mmio = (is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)));
+    gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+
     l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 
-                             ft_prefetch, mmio);
+                             ft_prefetch, p2mt);
     
     result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
     return result;
@@ -2554,12 +2582,13 @@ static void sh_prefetch(struct vcpu *v, 
 static void sh_prefetch(struct vcpu *v, walk_t *gw, 
                         shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn)
 {
-    int i, dist, mmio;
+    int i, dist;
     gfn_t gfn;
     mfn_t gmfn;
     guest_l1e_t gl1e;
     shadow_l1e_t sl1e;
     u32 gflags;
+    p2m_type_t p2mt;
 
     /* Prefetch no further than the end of the _shadow_ l1 MFN */
     dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e;
@@ -2597,14 +2626,13 @@ static void sh_prefetch(struct vcpu *v, 
 
         /* Look at the gfn that the l1e is pointing at */
         gfn = guest_l1e_get_gfn(gl1e);
-        gmfn = gfn_to_mfn(v->domain, gfn);
-        mmio = ( is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)) );
+        gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
 
         /* Propagate the entry.  Safe to use a pointer to our local 
          * gl1e, since this is not a demand-fetch so there will be no 
          * write-back to the guest. */
         l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
-                                 gmfn, &sl1e, ft_prefetch, mmio);
+                                 gmfn, &sl1e, ft_prefetch, p2mt);
         (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
     }
 }
@@ -2633,8 +2661,9 @@ static int sh_page_fault(struct vcpu *v,
     paddr_t gpa;
     struct sh_emulate_ctxt emul_ctxt;
     struct x86_emulate_ops *emul_ops;
-    int r, mmio;
+    int r;
     fetch_type_t ft = 0;
+    p2m_type_t p2mt;
 
     SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
                    v->domain->domain_id, v->vcpu_id, va, regs->error_code);
@@ -2787,10 +2816,9 @@ static int sh_page_fault(struct vcpu *v,
 
     /* What mfn is the guest trying to access? */
     gfn = guest_l1e_get_gfn(gw.eff_l1e);
-    gmfn = gfn_to_mfn(d, gfn);
-    mmio = (is_hvm_domain(d) && mmio_space(gfn_to_paddr(gfn)));
-
-    if ( !mmio && !mfn_valid(gmfn) )
+    gmfn = gfn_to_mfn(d, gfn, &p2mt);
+
+    if ( !p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn)) )
     {
         perfc_incr(shadow_fault_bail_bad_gfn);
         SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 
@@ -2821,7 +2849,7 @@ static int sh_page_fault(struct vcpu *v,
 
     /* Calculate the shadow entry and write it */
     l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 
-                             gmfn, &sl1e, ft, mmio);
+                             gmfn, &sl1e, ft, p2mt);
     r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
 
 #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
@@ -2844,7 +2872,10 @@ static int sh_page_fault(struct vcpu *v,
         }
     }
 
-    if ( mmio ) 
+    /* Need to hand off device-model MMIO and writes to read-only
+     * memory to the device model */
+    if ( p2mt == p2m_mmio_dm 
+         || (p2mt == p2m_ram_ro && ft == ft_demand_write) ) 
     {
         gpa = guest_walk_to_gpa(&gw);
         goto mmio;
@@ -3598,6 +3629,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
         int flush = 0;
         gfn_t gl2gfn;
         mfn_t gl2mfn;
+        p2m_type_t p2mt;
         guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e;
         /* First, make all four entries read-only. */
         for ( i = 0; i < 4; i++ )
@@ -3605,8 +3637,9 @@ sh_update_cr3(struct vcpu *v, int do_loc
             if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
             {
                 gl2gfn = guest_l3e_get_gfn(gl3e[i]);
-                gl2mfn = gfn_to_mfn(d, gl2gfn);
-                flush |= sh_remove_write_access(v, gl2mfn, 2, 0); 
+                gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+                if ( p2m_is_ram(p2mt) )
+                    flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
             }
         }
         if ( flush ) 
@@ -3617,13 +3650,15 @@ sh_update_cr3(struct vcpu *v, int do_loc
             if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
             {
                 gl2gfn = guest_l3e_get_gfn(gl3e[i]);
-                gl2mfn = gfn_to_mfn(d, gl2gfn);
-                sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
-                                       ? SH_type_l2h_shadow 
-                                       : SH_type_l2_shadow);
+                gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+                if ( p2m_is_ram(p2mt) )
+                    sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
+                                           ? SH_type_l2h_shadow 
+                                           : SH_type_l2_shadow);
+                else
+                    sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
             }
             else
-                /* The guest is not present: clear out the shadow. */
                 sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
         }
     }
@@ -3932,6 +3967,7 @@ static inline void * emulate_map_dest(st
     u32 flags, errcode;
     gfn_t gfn;
     mfn_t mfn;
+    p2m_type_t p2mt;
 
     /* We don't emulate user-mode writes to page tables */
     if ( ring_3(sh_ctxt->ctxt.regs) ) 
@@ -3971,7 +4007,6 @@ static inline void * emulate_map_dest(st
         }
     }
 #endif
-    mfn = gfn_to_mfn(v->domain, gfn);
 
     errcode = PFEC_write_access;
     if ( !(flags & _PAGE_PRESENT) ) 
@@ -3981,8 +4016,10 @@ static inline void * emulate_map_dest(st
     if ( !(flags & _PAGE_RW) ) 
         goto page_fault;
 
-    if ( mfn_valid(mfn) )
-    {
+    mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+    if ( p2m_is_ram(p2mt) )
+    {
+        ASSERT(mfn_valid(mfn));
         *mfnp = mfn;
         v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
         return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
@@ -4231,6 +4268,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
 /* Convert this gfn to an mfn in the manner appropriate for the
  * guest pagetable it's used in (gmfn) */ 
 {
+    p2m_type_t p2mt;
     if ( !shadow_mode_translate(v->domain) )
         return _mfn(gfn_x(gfn));
     
@@ -4238,7 +4276,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
          != PGT_writable_page ) 
         return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
     else 
-        return gfn_to_mfn(v->domain, gfn);
+        return gfn_to_mfn(v->domain, gfn, &p2mt);
 } 
 
 
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h    Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/shadow/types.h    Mon Sep 10 14:42:30 2007 +0100
@@ -414,7 +414,7 @@ gfn_to_paddr(gfn_t gfn)
 
 /* Override gfn_to_mfn to work with gfn_t */
 #undef gfn_to_mfn
-#define gfn_to_mfn(d, g) _gfn_to_mfn((d), gfn_x(g))
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), gfn_x(g), (t))
 
 
 /* Type used for recording a walk through guest pagetables.  It is
diff -r 1474db8058b2 -r 4633e9604da9 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/include/asm-x86/mm.h  Mon Sep 10 14:42:30 2007 +0100
@@ -328,8 +328,6 @@ TYPE_SAFE(unsigned long,mfn);
       ? get_gpfn_from_mfn(mfn)                          \
       : (mfn) )
 
-#define gmfn_to_mfn(_d, gpfn)  mfn_x(gfn_to_mfn(_d, gpfn))
-
 #define INVALID_MFN             (~0UL)
 
 #ifdef CONFIG_COMPAT
diff -r 1474db8058b2 -r 4633e9604da9 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/include/asm-x86/p2m.h Mon Sep 10 14:42:30 2007 +0100
@@ -4,7 +4,7 @@
  * physical-to-machine mappings for automatically-translated domains.
  *
  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
  *
@@ -27,49 +27,141 @@
 #define _XEN_P2M_H
 
 
-/* The phys_to_machine_mapping is the reversed mapping of MPT for full
- * virtualization.  It is only used by shadow_mode_translate()==true
- * guests, so we steal the address space that would have normally
- * been used by the read-only MPT map.
+/*
+ * The phys_to_machine_mapping maps guest physical frame numbers 
+ * to machine frame numbers.  It only exists for paging_mode_translate 
+ * guests. It is organised in page-table format, which:
+ *
+ * (1) allows us to use it directly as the second pagetable in hardware-
+ *     assisted paging and (hopefully) iommu support; and 
+ * (2) lets us map it directly into the guest vcpus' virtual address space 
+ *     as a linear pagetable, so we can read and write it easily.
+ *
+ * For (2) we steal the address space that would have normally been used
+ * by the read-only MPT map in a non-translated guest.  (For 
+ * paging_mode_external() guests this mapping is in the monitor table.)
  */
 #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
 
-
-/* Read the current domain's P2M table. */
-static inline mfn_t gfn_to_mfn_current(unsigned long gfn)
-{
-    l1_pgentry_t l1e = l1e_empty();
-    int ret;
-
-    if ( gfn > current->domain->arch.p2m.max_mapped_pfn )
-        return _mfn(INVALID_MFN);
-
-    /* Don't read off the end of the p2m table */
-    ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t));
-
-    ret = __copy_from_user(&l1e,
-                           &phys_to_machine_mapping[gfn],
-                           sizeof(l1e));
-
-    if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) )
-        return _mfn(l1e_get_pfn(l1e));
-
-    return _mfn(INVALID_MFN);
+/*
+ * The upper levels of the p2m pagetable always contain full rights; all 
+ * variation in the access control bits is made in the level-1 PTEs.
+ * 
+ * In addition to the phys-to-machine translation, each p2m PTE contains
+ * *type* information about the gfn it translates, helping Xen to decide
+ * on the correct course of action when handling a page-fault to that
+ * guest frame.  We store the type in the "available" bits of the PTEs
+ * in the table, which gives us 8 possible types on 32-bit systems.
+ * Further expansions of the type system will only be supported on
+ * 64-bit Xen.
+ */
+typedef enum {
+    p2m_invalid = 0,            /* Nothing mapped here */
+    p2m_ram_rw = 1,             /* Normal read/write guest RAM */
+    p2m_ram_logdirty = 2,       /* Temporarily read-only for log-dirty */
+    p2m_ram_ro = 3,             /* Read-only; writes go to the device model */
+    p2m_mmio_dm = 4,            /* Reads and write go to the device model */
+    p2m_mmio_direct = 5,        /* Read/write mapping of genuine MMIO area */
+} p2m_type_t;
+
+/* We use bitmaps and maks to handle groups of types */
+#define p2m_to_mask(_t) (1UL << (_t))
+
+/* RAM types, which map to real machine frames */
+#define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw)          \
+                       | p2m_to_mask(p2m_ram_logdirty)  \
+                       | p2m_to_mask(p2m_ram_ro))
+
+/* MMIO types, which don't have to map to anything in the frametable */
+#define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm)        \
+                        | p2m_to_mask(p2m_mmio_direct))
+
+/* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */
+#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty)     \
+                      | p2m_to_mask(p2m_ram_ro))
+
+/* Useful predicates */
+#define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
+#define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
+#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
+#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
+
+/* Extract the type from the PTE flags that store it */
+static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
+{
+    /* Type is stored in the "available" bits, 9, 10 and 11 */
+    return (flags >> 9) & 0x7;
+}
+ 
+/* Read the current domain's p2m table (through the linear mapping). */
+static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
+{
+    mfn_t mfn = _mfn(INVALID_MFN);
+    p2m_type_t p2mt = p2m_mmio_dm;
+    /* XXX This is for compatibility with the old model, where anything not 
+     * XXX marked as RAM was considered to be emulated MMIO space.
+     * XXX Once we start explicitly registering MMIO regions in the p2m 
+     * XXX we will return p2m_invalid for unmapped gfns */
+
+    if ( gfn <= current->domain->arch.p2m.max_mapped_pfn )
+    {
+        l1_pgentry_t l1e = l1e_empty();
+        int ret;
+
+        ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
+               / sizeof(l1_pgentry_t));
+
+        /* Need to __copy_from_user because the p2m is sparse and this
+         * part might not exist */
+        ret = __copy_from_user(&l1e,
+                               &phys_to_machine_mapping[gfn],
+                               sizeof(l1e));
+
+        if ( ret == 0 ) {
+            p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+            ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(*t));
+            if ( p2m_is_valid(p2mt) )
+                mfn = _mfn(l1e_get_pfn(l1e));
+            else 
+                /* XXX see above */
+                p2mt = p2m_mmio_dm;
+        }
+    }
+
+    *t = p2mt;
+    return mfn;
 }
 
 /* Read another domain's P2M table, mapping pages as we go */
-mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t);
 
 /* General conversion function from gfn to mfn */
-#define gfn_to_mfn(d, g) _gfn_to_mfn((d), (g))
-static inline mfn_t _gfn_to_mfn(struct domain *d, unsigned long gfn)
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t))
+static inline mfn_t _gfn_to_mfn(struct domain *d,
+                                unsigned long gfn, p2m_type_t *t)
 {
     if ( !paging_mode_translate(d) )
+    {
+        /* Not necessarily true, but for non-translated guests, we claim
+         * it's the most generic kind of memory */
+        *t = p2m_ram_rw;
         return _mfn(gfn);
+    }
     if ( likely(current->domain == d) )
-        return gfn_to_mfn_current(gfn);
+        return gfn_to_mfn_current(gfn, t);
     else 
-        return gfn_to_mfn_foreign(d, gfn);
+        return gfn_to_mfn_foreign(d, gfn, t);
+}
+
+/* Compatibility function exporting the old untyped interface */
+static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
+{
+    mfn_t mfn;
+    p2m_type_t t;
+    mfn = gfn_to_mfn(d, gpfn, &t);
+    if ( p2m_is_valid(t) )
+        return mfn_x(mfn);
+    return INVALID_MFN;
 }
 
 /* General conversion function from mfn to gfn */
@@ -81,19 +173,6 @@ static inline unsigned long mfn_to_gfn(s
         return mfn_x(mfn);
 }
 
-/* Compatibility function for HVM code */
-static inline unsigned long get_mfn_from_gpfn(unsigned long pfn)
-{
-    return mfn_x(gfn_to_mfn_current(pfn));
-}
-
-/* Is this guest address an mmio one? (i.e. not defined in p2m map) */
-static inline int mmio_space(paddr_t gpa)
-{
-    unsigned long gfn = gpa >> PAGE_SHIFT;
-    return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn)));
-}
-
 /* Translate the frame number held in an l1e from guest to machine */
 static inline l1_pgentry_t
 gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
@@ -105,7 +184,6 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr
 }
 
 
-
 /* Init the datastructures for later use by the p2m code */
 void p2m_init(struct domain *d);
 
@@ -130,11 +208,12 @@ void guest_physmap_remove_page(struct do
 void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
                                unsigned long mfn);
 
-/* set P2M table l1e flags */
-void p2m_set_flags_global(struct domain *d, u32 l1e_flags);
-
-/* set P2M table l1e flags for a gpa */
-int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags);
+/* Change types across all p2m entries in a domain */
+void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
+
+/* Compare-exchange the type of a single p2m entry */
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
+                           p2m_type_t ot, p2m_type_t nt);
 
 #endif /* _XEN_P2M_H */
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] [HVM] Add type information to the p2m map., Xen patchbot-unstable <=