WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg (staging)

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg (staging)
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 11 Sep 2007 15:30:49 -0700
Delivery-date: Tue, 11 Sep 2007 15:34:12 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1189454336 21600
# Node ID 42b925c00d8aff2bf2abdd0d23c11fa75271ea58
# Parent  7d9b20d91102803532be2cc1d1b00733ceab12cc
# Parent  154769114a82c4051b196bdfe7fdea4b159d339f
merge with xen-unstable.hg (staging)
---
 tools/libxen/src/xen_xspolicy.c  |    2 
 xen/Makefile                     |    3 
 xen/arch/ia64/xen/domain.c       |    2 
 xen/arch/powerpc/domain.c        |    2 
 xen/arch/x86/acpi/power.c        |   15 ---
 xen/arch/x86/hvm/hvm.c           |   33 ++++---
 xen/arch/x86/hvm/io.c            |    9 -
 xen/arch/x86/hvm/svm/intr.c      |   35 +++++++
 xen/arch/x86/hvm/svm/svm.c       |   42 ++++++---
 xen/arch/x86/hvm/svm/vmcb.c      |   30 +++---
 xen/arch/x86/hvm/vmx/intr.c      |   29 ++++--
 xen/arch/x86/hvm/vmx/vmx.c       |   18 ++-
 xen/arch/x86/machine_kexec.c     |    3 
 xen/arch/x86/mm/hap/guest_walk.c |   10 +-
 xen/arch/x86/mm/hap/hap.c        |   10 +-
 xen/arch/x86/mm/p2m.c            |  122 +++++++++++++++++---------
 xen/arch/x86/mm/shadow/common.c  |   24 +++--
 xen/arch/x86/mm/shadow/multi.c   |  138 +++++++++++++++++++-----------
 xen/arch/x86/mm/shadow/types.h   |    2 
 xen/arch/x86/shutdown.c          |   10 --
 xen/arch/x86/smp.c               |   47 +++++++---
 xen/common/keyhandler.c          |    2 
 xen/common/shutdown.c            |    4 
 xen/drivers/char/console.c       |    2 
 xen/include/asm-x86/mm.h         |    2 
 xen/include/asm-x86/p2m.h        |  179 ++++++++++++++++++++++++++++-----------
 xen/include/xen/shutdown.h       |    2 
 27 files changed, 509 insertions(+), 268 deletions(-)

diff -r 7d9b20d91102 -r 42b925c00d8a tools/libxen/src/xen_xspolicy.c
--- a/tools/libxen/src/xen_xspolicy.c   Mon Sep 10 13:56:34 2007 -0600
+++ b/tools/libxen/src/xen_xspolicy.c   Mon Sep 10 13:58:56 2007 -0600
@@ -21,8 +21,8 @@
 #include <stddef.h>
 #include <stdlib.h>
 
+#include "xen_internal.h"
 #include "xen/api/xen_common.h"
-#include "xen/api/xen_internal.h"
 #include "xen/api/xen_xspolicy.h"
 
 
diff -r 7d9b20d91102 -r 42b925c00d8a xen/Makefile
--- a/xen/Makefile      Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/Makefile      Mon Sep 10 13:58:56 2007 -0600
@@ -35,12 +35,15 @@ _install: $(TARGET).gz build-headers
                $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/hvm
        [ -d $(DESTDIR)/usr/include/xen/io ] || \
                $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/io
+       [ -d $(DESTDIR)/usr/include/xen/xsm ] || \
+               $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/xsm
        [ -d $(DESTDIR)/usr/include/xen/foreign ] || \
                $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/foreign
        $(INSTALL_DATA) include/public/*.h $(DESTDIR)/usr/include/xen
        $(INSTALL_DATA) include/public/arch-x86/*.h 
$(DESTDIR)/usr/include/xen/arch-x86
        $(INSTALL_DATA) include/public/hvm/*.h $(DESTDIR)/usr/include/xen/hvm
        $(INSTALL_DATA) include/public/io/*.h $(DESTDIR)/usr/include/xen/io
+       $(INSTALL_DATA) include/public/xsm/*.h $(DESTDIR)/usr/include/xen/xsm
        $(INSTALL_DATA) include/public/foreign/*.h 
$(DESTDIR)/usr/include/xen/foreign
        $(INSTALL_DATA) include/public/COPYING $(DESTDIR)/usr/include/xen
 
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/ia64/xen/domain.c        Mon Sep 10 13:58:56 2007 -0600
@@ -1487,7 +1487,7 @@ int __init construct_dom0(struct domain 
        return 0;
 }
 
-void machine_restart(char * __unused)
+void machine_restart(void)
 {
        console_start_sync();
        if (running_on_sim)
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/powerpc/domain.c
--- a/xen/arch/powerpc/domain.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/powerpc/domain.c Mon Sep 10 13:58:56 2007 -0600
@@ -119,7 +119,7 @@ void machine_halt(void)
     machine_fail(__func__);
 }
 
-void machine_restart(char * __unused)
+void machine_restart(void)
 {
     console_start_sync();
     printk("%s called\n", __func__);
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/acpi/power.c Mon Sep 10 13:58:56 2007 -0600
@@ -181,11 +181,6 @@ static long enter_state_helper(void *dat
 /*
  * Dom0 issues this hypercall in place of writing pm1a_cnt. Xen then
  * takes over the control and put the system into sleep state really.
- *
- * Guest may issue a two-phases write to PM1x_CNT, to work
- * around poorly implemented hardware. It's better to keep
- * this logic here. Two writes can be differentiated by 
- * enable bit setting.
  */
 int acpi_enter_sleep(struct xenpf_enter_acpi_sleep *sleep)
 {
@@ -204,16 +199,6 @@ int acpi_enter_sleep(struct xenpf_enter_
     if ( sleep->flags )
         return -EINVAL;
 
-    /* Write #1 */
-    if ( !(sleep->pm1a_cnt_val & ACPI_BITMASK_SLEEP_ENABLE) )
-    {
-        outw((u16)sleep->pm1a_cnt_val, acpi_sinfo.pm1a_cnt);
-        if ( acpi_sinfo.pm1b_cnt )
-            outw((u16)sleep->pm1b_cnt_val, acpi_sinfo.pm1b_cnt);
-        return 0;
-    }
-
-    /* Write #2 */
     acpi_sinfo.pm1a_cnt_val = sleep->pm1a_cnt_val;
     acpi_sinfo.pm1b_cnt_val = sleep->pm1b_cnt_val;
     acpi_sinfo.sleep_state = sleep->sleep_state;
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Mon Sep 10 13:58:56 2007 -0600
@@ -161,12 +161,14 @@ static int hvm_set_ioreq_page(
     struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
 {
     struct page_info *page;
+    p2m_type_t p2mt;
     unsigned long mfn;
     void *va;
 
-    mfn = gmfn_to_mfn(d, gmfn);
-    if ( !mfn_valid(mfn) )
+    mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
+    if ( !p2m_is_ram(p2mt) )
         return -EINVAL;
+    ASSERT(mfn_valid(mfn));
 
     page = mfn_to_page(mfn);
     if ( !get_page_and_type(page, d, PGT_writable_page) )
@@ -517,7 +519,8 @@ int hvm_set_cr0(unsigned long value)
 int hvm_set_cr0(unsigned long value)
 {
     struct vcpu *v = current;
-    unsigned long mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
+    p2m_type_t p2mt;
+    unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
   
     HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
 
@@ -559,8 +562,10 @@ int hvm_set_cr0(unsigned long value)
         if ( !paging_mode_hap(v->domain) )
         {
             /* The guest CR3 must be pointing to the guest physical. */
-            mfn = get_mfn_from_gpfn(v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT);
-            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
+            gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;
+            mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+            if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || 
+                 !get_page(mfn_to_page(mfn), v->domain))
             {
                 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", 
                          v->arch.hvm_vcpu.guest_cr[3], mfn);
@@ -603,16 +608,18 @@ int hvm_set_cr3(unsigned long value)
 int hvm_set_cr3(unsigned long value)
 {
     unsigned long mfn;
+    p2m_type_t p2mt;
     struct vcpu *v = current;
 
     if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
          (value != v->arch.hvm_vcpu.guest_cr[3]) )
     {
-        /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */
+        /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
-        mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
-        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
-            goto bad_cr3;
+        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
+        if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
+             !get_page(mfn_to_page(mfn), v->domain) )
+              goto bad_cr3;
 
         put_page(pagetable_get_page(v->arch.guest_table));
         v->arch.guest_table = pagetable_from_pfn(mfn);
@@ -677,6 +684,7 @@ static int __hvm_copy(void *buf, paddr_t
 static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
 {
     unsigned long gfn, mfn;
+    p2m_type_t p2mt;
     char *p;
     int count, todo;
 
@@ -690,10 +698,11 @@ static int __hvm_copy(void *buf, paddr_t
         else
             gfn = addr >> PAGE_SHIFT;
         
-        mfn = get_mfn_from_gpfn(gfn);
-
-        if ( mfn == INVALID_MFN )
+        mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+
+        if ( !p2m_is_ram(p2mt) )
             return todo;
+        ASSERT(mfn_valid(mfn));
 
         p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
 
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/io.c     Mon Sep 10 13:58:56 2007 -0600
@@ -826,9 +826,7 @@ void hvm_io_assist(void)
     ioreq_t *p;
     struct cpu_user_regs *regs;
     struct hvm_io_op *io_opp;
-    unsigned long gmfn;
     struct vcpu *v = current;
-    struct domain *d = v->domain;
 
     io_opp = &v->arch.hvm_vcpu.io_op;
     regs   = &io_opp->io_context;
@@ -861,13 +859,6 @@ void hvm_io_assist(void)
     regs->eflags &= ~X86_EFLAGS_RF;
     hvm_load_cpu_guest_regs(v, regs);
     memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
-
-    /* Has memory been dirtied? */
-    if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
-    {
-        gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
-        paging_mark_dirty(d, gmfn);
-    }
 
  out:
     vcpu_end_shutdown_deferral(v);
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c       Mon Sep 10 13:58:56 2007 -0600
@@ -30,6 +30,7 @@
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/support.h>
+#include <asm/hvm/vlapic.h>
 #include <asm/hvm/svm/svm.h>
 #include <asm/hvm/svm/intr.h>
 #include <xen/event.h>
@@ -99,6 +100,33 @@ static void enable_intr_window(struct vc
     svm_inject_dummy_vintr(v);
 }
 
+static void update_cr8_intercept(
+    struct vcpu *v, enum hvm_intack masked_intr_source)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    struct vlapic *vlapic = vcpu_vlapic(v);
+    int max_irr;
+
+    vmcb->cr_intercepts &= ~CR_INTERCEPT_CR8_WRITE;
+
+    /*
+     * If ExtInts are masked then that dominates the TPR --- the 'interrupt
+     * window' has already been enabled in this case.
+     */
+    if ( (masked_intr_source == hvm_intack_lapic) ||
+         (masked_intr_source == hvm_intack_pic) )
+        return;
+
+    /* Is there an interrupt pending at the LAPIC? Nothing to do if not. */
+    if ( !vlapic_enabled(vlapic) || 
+         ((max_irr = vlapic_find_highest_irr(vlapic)) == -1) )
+        return;
+
+    /* Highest-priority pending interrupt is masked by the TPR? */
+    if ( (vmcb->vintr.fields.tpr & 0xf) >= (max_irr >> 4) )
+        vmcb->cr_intercepts |= CR_INTERCEPT_CR8_WRITE;
+}
+
 asmlinkage void svm_intr_assist(void) 
 {
     struct vcpu *v = current;
@@ -113,7 +141,7 @@ asmlinkage void svm_intr_assist(void)
     do {
         intr_source = hvm_vcpu_has_pending_irq(v);
         if ( likely(intr_source == hvm_intack_none) )
-            return;
+            goto out;
 
         /*
          * Pending IRQs must be delayed if:
@@ -133,7 +161,7 @@ asmlinkage void svm_intr_assist(void)
              !hvm_interrupts_enabled(v, intr_source) )
         {
             enable_intr_window(v, intr_source);
-            return;
+            goto out;
         }
     } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
 
@@ -152,6 +180,9 @@ asmlinkage void svm_intr_assist(void)
     intr_source = hvm_vcpu_has_pending_irq(v);
     if ( unlikely(intr_source != hvm_intack_none) )
         enable_intr_window(v, intr_source);
+
+ out:
+    update_cr8_intercept(v, intr_source);
 }
 
 /*
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Mon Sep 10 13:58:56 2007 -0600
@@ -338,6 +338,7 @@ int svm_vmcb_restore(struct vcpu *v, str
 int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
 {
     unsigned long mfn = 0;
+    p2m_type_t p2mt;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
     if ( c->pending_valid &&
@@ -353,8 +354,8 @@ int svm_vmcb_restore(struct vcpu *v, str
     {
         if ( c->cr0 & X86_CR0_PG )
         {
-            mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+            mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+            if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
             {
                 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
                          c->cr3);
@@ -1004,15 +1005,23 @@ int start_svm(struct cpuinfo_x86 *c)
     return 1;
 }
 
-static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
-{
-    if (mmio_space(gpa)) {
+static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
+{
+    p2m_type_t p2mt;
+    mfn_t mfn;
+    unsigned long gfn = gpa >> PAGE_SHIFT;
+
+    /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
+    mfn = gfn_to_mfn_current(gfn, &p2mt);
+    if ( p2mt == p2m_mmio_dm )
+    {
         handle_mmio(gpa);
-        return 1;
-    }
-
-    paging_mark_dirty(current->domain, get_mfn_from_gpfn(gpa >> PAGE_SHIFT));
-    return p2m_set_flags(current->domain, gpa, __PAGE_HYPERVISOR|_PAGE_USER);
+        return;
+    }
+
+    /* Log-dirty: mark the page dirty and let the guest write it again */
+    paging_mark_dirty(current->domain, mfn_x(mfn));
+    p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
 }
 
 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
@@ -2144,6 +2153,16 @@ asmlinkage void svm_vmexit_handler(struc
     eventinj_t eventinj;
     int inst_len, rc;
 
+    /*
+     * Before doing anything else, we need to sync up the VLAPIC's TPR with
+     * SVM's vTPR if CR8 writes are currently disabled.  It's OK if the 
+     * guest doesn't touch the CR8 (e.g. 32-bit Windows) because we update
+     * the vTPR on MMIO writes to the TPR
+     */
+    if ( !(vmcb->cr_intercepts & CR_INTERCEPT_CR8_WRITE) )
+        vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
+                       (vmcb->vintr.fields.tpr & 0x0F) << 4);
+
     exit_reason = vmcb->exitcode;
 
     HVMTRACE_2D(VMEXIT, v, vmcb->rip, exit_reason);
@@ -2341,8 +2360,7 @@ asmlinkage void svm_vmexit_handler(struc
 
     case VMEXIT_NPF:
         regs->error_code = vmcb->exitinfo1;
-        if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) )
-            domain_crash(v->domain);
+        svm_do_nested_pgfault(vmcb->exitinfo2, regs);
         break;
 
     default:
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Mon Sep 10 13:58:56 2007 -0600
@@ -114,23 +114,29 @@ static int construct_vmcb(struct vcpu *v
     svm_asid_init_vcpu(v);
 
     vmcb->general1_intercepts = 
-        GENERAL1_INTERCEPT_INTR         | GENERAL1_INTERCEPT_NMI         |
-        GENERAL1_INTERCEPT_SMI          | GENERAL1_INTERCEPT_INIT        |
-        GENERAL1_INTERCEPT_CPUID        | GENERAL1_INTERCEPT_INVD        |
-        GENERAL1_INTERCEPT_HLT          | GENERAL1_INTERCEPT_INVLPG      | 
-        GENERAL1_INTERCEPT_INVLPGA      | GENERAL1_INTERCEPT_IOIO_PROT   |
-        GENERAL1_INTERCEPT_MSR_PROT     | GENERAL1_INTERCEPT_SHUTDOWN_EVT;
+        GENERAL1_INTERCEPT_INTR        | GENERAL1_INTERCEPT_NMI         |
+        GENERAL1_INTERCEPT_SMI         | GENERAL1_INTERCEPT_INIT        |
+        GENERAL1_INTERCEPT_CPUID       | GENERAL1_INTERCEPT_INVD        |
+        GENERAL1_INTERCEPT_HLT         | GENERAL1_INTERCEPT_INVLPG      | 
+        GENERAL1_INTERCEPT_INVLPGA     | GENERAL1_INTERCEPT_IOIO_PROT   |
+        GENERAL1_INTERCEPT_MSR_PROT    | GENERAL1_INTERCEPT_SHUTDOWN_EVT;
     vmcb->general2_intercepts = 
-        GENERAL2_INTERCEPT_VMRUN  | GENERAL2_INTERCEPT_VMMCALL | 
-        GENERAL2_INTERCEPT_VMLOAD | GENERAL2_INTERCEPT_VMSAVE  |
-        GENERAL2_INTERCEPT_STGI   | GENERAL2_INTERCEPT_CLGI    |
-        GENERAL2_INTERCEPT_SKINIT | GENERAL2_INTERCEPT_RDTSCP;
+        GENERAL2_INTERCEPT_VMRUN       | GENERAL2_INTERCEPT_VMMCALL     |
+        GENERAL2_INTERCEPT_VMLOAD      | GENERAL2_INTERCEPT_VMSAVE      |
+        GENERAL2_INTERCEPT_STGI        | GENERAL2_INTERCEPT_CLGI        |
+        GENERAL2_INTERCEPT_SKINIT      | GENERAL2_INTERCEPT_RDTSCP;
 
     /* Intercept all debug-register writes. */
     vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
 
-    /* Intercept all control-register accesses, except to CR2. */
-    vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ | CR_INTERCEPT_CR2_WRITE);
+    /*
+     * Intercept all control-register accesses except for CR2 reads/writes
+     * and CR8 reads (and actually CR8 writes, but that's a special case
+     * that's handled in svm/intr.c). 
+     */
+    vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ |
+                            CR_INTERCEPT_CR2_WRITE |
+                            CR_INTERCEPT_CR8_READ);
 
     /* I/O and MSR permission bitmaps. */
     arch_svm->msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c       Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/intr.c       Mon Sep 10 13:58:56 2007 -0600
@@ -107,22 +107,35 @@ static void enable_intr_window(struct vc
     }
 }
 
-static void update_tpr_threshold(struct vlapic *vlapic)
+static void update_tpr_threshold(
+    struct vcpu *v, enum hvm_intack masked_intr_source)
 {
-    int max_irr, tpr;
+    struct vlapic *vlapic = vcpu_vlapic(v);
+    int max_irr, tpr, threshold = 0;
 
     if ( !cpu_has_vmx_tpr_shadow )
         return;
 
+    /*
+     * If ExtInts are masked then that dominates the TPR --- the 'interrupt
+     * window' has already been enabled in this case.
+     */
+    if ( (masked_intr_source == hvm_intack_lapic) ||
+         (masked_intr_source == hvm_intack_pic) )
+        goto out;
+
+    /* Is there an interrupt pending at the LAPIC? Nothing to do if not. */
     if ( !vlapic_enabled(vlapic) || 
          ((max_irr = vlapic_find_highest_irr(vlapic)) == -1) )
-    {
-        __vmwrite(TPR_THRESHOLD, 0);
-        return;
-    }
+        goto out;
 
+    /* Highest-priority pending interrupt is masked by the TPR? */
     tpr = vlapic_get_reg(vlapic, APIC_TASKPRI) & 0xF0;
-    __vmwrite(TPR_THRESHOLD, (max_irr > tpr) ? (tpr >> 4) : (max_irr >> 4));
+    if ( (tpr >> 4) >= (max_irr >> 4) )
+        threshold = max_irr >> 4;
+
+ out:
+    __vmwrite(TPR_THRESHOLD, threshold);
 }
 
 asmlinkage void vmx_intr_assist(void)
@@ -171,7 +184,7 @@ asmlinkage void vmx_intr_assist(void)
         enable_intr_window(v, intr_source);
 
  out:
-    update_tpr_threshold(vcpu_vlapic(v));
+    update_tpr_threshold(v, intr_source);
 }
 
 /*
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Sep 10 13:58:56 2007 -0600
@@ -566,6 +566,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
 int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
 {
     unsigned long mfn = 0;
+    p2m_type_t p2mt;
 
     if ( c->pending_valid &&
          ((c->pending_type == 1) || (c->pending_type > 6) ||
@@ -578,8 +579,8 @@ int vmx_vmcs_restore(struct vcpu *v, str
 
     if ( c->cr0 & X86_CR0_PG )
     {
-        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+        mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+        if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
         {
             gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3);
             return -EINVAL;
@@ -1292,19 +1293,23 @@ static void vmx_do_cpuid(struct cpu_user
          * Note that this leaf lives at <max-hypervisor-leaf> + 1.
          */
         u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx;
-        unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
+        p2m_type_t p2mt;
+        unsigned long mfn;
         struct vcpu *v = current;
         char *p;
 
+        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
+
         gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value);
 
         /* 8-byte aligned valid pseudophys address from vmxassist, please. */
-        if ( (value & 7) || (mfn == INVALID_MFN) ||
+        if ( (value & 7) || !p2m_is_ram(p2mt) ||
              !v->arch.hvm_vmx.vmxassist_enabled )
         {
             domain_crash(v->domain);
             return;
         }
+        ASSERT(mfn_valid(mfn));
 
         p = map_domain_page(mfn);
         value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1))));
@@ -1905,11 +1910,12 @@ static int vmx_world_restore(struct vcpu
 static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
 {
     unsigned long mfn = 0;
+    p2m_type_t p2mt;
 
     if ( c->cr0 & X86_CR0_PG )
     {
-        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+        mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+        if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
         {
             gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3);
             return -EINVAL;
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/machine_kexec.c
--- a/xen/arch/x86/machine_kexec.c      Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/machine_kexec.c      Mon Sep 10 13:58:56 2007 -0600
@@ -82,9 +82,6 @@ static void __machine_reboot_kexec(void 
 
     smp_send_stop();
 
-    disable_IO_APIC();
-    hvm_cpu_down();
-
     machine_kexec(image);
 }
 
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c  Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/hap/guest_walk.c  Mon Sep 10 13:58:56 2007 -0600
@@ -28,7 +28,8 @@
 #include <xen/sched.h>
 #include <asm/hvm/svm/vmcb.h>
 #include <asm/domain.h>
-#include <asm/shadow.h>
+#include <asm/paging.h>
+#include <asm/p2m.h>
 #include <asm/hap.h>
 
 #include "private.h"
@@ -67,6 +68,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
     int lev, index;
     paddr_t gpa = 0;
     unsigned long gpfn, mfn;
+    p2m_type_t p2mt;
     int success = 1;
 
     l1_pgentry_t *l1e;
@@ -81,14 +83,16 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
     gpfn = (gcr3 >> PAGE_SHIFT);
     for ( lev = mode; lev >= 1; lev-- )
     {
-        mfn = get_mfn_from_gpfn(gpfn);
-        if ( mfn == INVALID_MFN )
+        mfn = mfn_x(gfn_to_mfn_current(gpfn, &p2mt));
+        if ( !p2m_is_ram(p2mt) )
         {
             HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
                        lev);
             success = 0;
             break;
         }
+        ASSERT(mfn_valid(mfn));
+
         index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
 
 #if GUEST_PAGING_LEVELS >= 4
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/hap/hap.c Mon Sep 10 13:58:56 2007 -0600
@@ -60,8 +60,8 @@ int hap_enable_log_dirty(struct domain *
     d->arch.paging.mode |= PG_log_dirty;
     hap_unlock(d);
 
-    /* set l1e entries of P2M table to NOT_WRITABLE. */
-    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+    /* set l1e entries of P2M table to be read-only. */
+    p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
     flush_tlb_mask(d->domain_dirty_cpumask);
     return 0;
 }
@@ -73,14 +73,14 @@ int hap_disable_log_dirty(struct domain 
     hap_unlock(d);
 
     /* set l1e entries of P2M table with normal mode */
-    p2m_set_flags_global(d, __PAGE_HYPERVISOR|_PAGE_USER);
+    p2m_change_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
     return 0;
 }
 
 void hap_clean_dirty_bitmap(struct domain *d)
 {
-    /* mark physical memory as NOT_WRITEABLE and flush the TLB */
-    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+    /* set l1e entries of P2M table to be read-only. */
+    p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
     flush_tlb_mask(d->domain_dirty_cpumask);
 }
 
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/p2m.c     Mon Sep 10 13:58:56 2007 -0600
@@ -4,7 +4,7 @@
  * physical-to-machine mappings for automatically-translated domains.
  *
  * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
  *
@@ -93,6 +93,31 @@
 #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
 
 
+/* PTE flags for the various types of p2m entry */
+#define P2M_BASE_FLAGS \
+        (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+static unsigned long p2m_type_to_flags(p2m_type_t t) 
+{
+    unsigned long flags = (t & 0x7UL) << 9;
+    switch(t)
+    {
+    case p2m_invalid:
+    default:
+        return flags;
+    case p2m_ram_rw:
+        return flags | P2M_BASE_FLAGS | _PAGE_RW;
+    case p2m_ram_logdirty:
+        return flags | P2M_BASE_FLAGS;
+    case p2m_ram_ro:
+        return flags | P2M_BASE_FLAGS;
+    case p2m_mmio_dm:
+        return flags;
+    case p2m_mmio_direct:
+        return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
+    }
+}
+
 
 // Find the next level's P2M entry, checking for out-of-range gfn's...
 // Returns NULL on error.
@@ -358,19 +383,25 @@ void p2m_teardown(struct domain *d)
 }
 
 mfn_t
-gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
 /* Read another domain's p2m entries */
 {
     mfn_t mfn;
-    paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT;
+    paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
     l2_pgentry_t *l2e;
     l1_pgentry_t *l1e;
 
     ASSERT(paging_mode_translate(d));
+
+    /* XXX This is for compatibility with the old model, where anything not 
+     * XXX marked as RAM was considered to be emulated MMIO space.
+     * XXX Once we start explicitly registering MMIO regions in the p2m 
+     * XXX we will return p2m_invalid for unmapped gfns */
+    *t = p2m_mmio_dm;
+
     mfn = pagetable_get_mfn(d->arch.phys_table);
 
-
-    if ( gpfn > d->arch.p2m.max_mapped_pfn )
+    if ( gfn > d->arch.p2m.max_mapped_pfn )
         /* This pfn is higher than the highest the p2m map currently holds */
         return _mfn(INVALID_MFN);
 
@@ -428,9 +459,11 @@ gfn_to_mfn_foreign(struct domain *d, uns
         return _mfn(INVALID_MFN);
     }
     mfn = _mfn(l1e_get_pfn(*l1e));
+    *t = p2m_flags_to_type(l1e_get_flags(*l1e));
     unmap_domain_page(l1e);
 
-    return mfn;
+    ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+    return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
 }
 
 #if P2M_AUDIT
@@ -630,10 +663,7 @@ p2m_remove_page(struct domain *d, unsign
         return;
     P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
 
-    ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn);
-    //ASSERT(mfn_to_gfn(d, mfn) == gfn);
-
-    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
+    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
     set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
 }
 
@@ -653,6 +683,7 @@ guest_physmap_add_page(struct domain *d,
                        unsigned long mfn)
 {
     unsigned long ogfn;
+    p2m_type_t ot;
     mfn_t omfn;
 
     if ( !paging_mode_translate(d) )
@@ -663,10 +694,10 @@ guest_physmap_add_page(struct domain *d,
 
     P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
 
-    omfn = gfn_to_mfn(d, gfn);
-    if ( mfn_valid(omfn) )
-    {
-        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
+    omfn = gfn_to_mfn(d, gfn, &ot);
+    if ( p2m_is_ram(ot) )
+    {
+        ASSERT(mfn_valid(omfn));
         set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
     }
 
@@ -683,8 +714,10 @@ guest_physmap_add_page(struct domain *d,
         /* This machine frame is already mapped at another physical address */
         P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
                   mfn, ogfn, gfn);
-        if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) )
-        {
+        omfn = gfn_to_mfn(d, ogfn, &ot);
+        if ( p2m_is_ram(ot) )
+        {
+            ASSERT(mfn_valid(omfn));
             P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
                       ogfn , mfn_x(omfn));
             if ( mfn_x(omfn) == mfn )
@@ -692,21 +725,29 @@ guest_physmap_add_page(struct domain *d,
         }
     }
 
-    set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
-    set_gpfn_from_mfn(mfn, gfn);
+    if ( mfn_valid(_mfn(mfn)) ) 
+    {
+        set_p2m_entry(d, gfn, _mfn(mfn),
+                  p2m_type_to_flags(p2m_ram_rw)|__PAGE_HYPERVISOR|_PAGE_USER);
+        set_gpfn_from_mfn(mfn, gfn);
+    }
+    else
+    {
+        gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
+                 gfn, mfn);
+        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+    }
 
     audit_p2m(d);
     p2m_unlock(d);
 }
 
-/* This function goes through P2M table and modify l1e flags of all pages. Note
- * that physical base address of l1e is intact. This function can be used for
- * special purpose, such as marking physical memory as NOT WRITABLE for
- * tracking dirty pages during live migration.
- */
-void p2m_set_flags_global(struct domain *d, u32 l1e_flags)
-{
-    unsigned long mfn, gfn;
+/* Walk the whole p2m table, changing any entries of the old type
+ * to the new type.  This is used in hardware-assisted paging to 
+ * quickly enable or diable log-dirty tracking */
+void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
+{
+    unsigned long mfn, gfn, flags;
     l1_pgentry_t l1e_content;
     l1_pgentry_t *l1e;
     l2_pgentry_t *l2e;
@@ -769,12 +810,14 @@ void p2m_set_flags_global(struct domain 
 
                 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
                 {
-                    if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
+                    flags = l1e_get_flags(l1e[i1]);
+                    if ( p2m_flags_to_type(flags) != ot )
                         continue;
                     mfn = l1e_get_pfn(l1e[i1]);
                     gfn = get_gpfn_from_mfn(mfn);
-                    /* create a new 1le entry using l1e_flags */
-                    l1e_content = l1e_from_pfn(mfn, l1e_flags);
+                    /* create a new 1le entry with the new type */
+                    flags = p2m_flags_to_type(nt);
+                    l1e_content = l1e_from_pfn(mfn, flags);
                     paging_write_p2m_entry(d, gfn, &l1e[i1],
                                            l1mfn, l1e_content, 1);
                 }
@@ -800,24 +843,23 @@ void p2m_set_flags_global(struct domain 
     p2m_unlock(d);
 }
 
-/* This function traces through P2M table and modifies l1e flags of a specific
- * gpa.
- */
-int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags)
-{
-    unsigned long gfn;
+/* Modify the p2m type of a single gfn from ot to nt, returning the 
+ * entry's previous type */
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, 
+                           p2m_type_t ot, p2m_type_t nt)
+{
+    p2m_type_t pt;
     mfn_t mfn;
 
     p2m_lock(d);
 
-    gfn = gpa >> PAGE_SHIFT;
-    mfn = gfn_to_mfn(d, gfn);
-    if ( mfn_valid(mfn) )
-        set_p2m_entry(d, gfn, mfn, l1e_flags);
+    mfn = gfn_to_mfn(d, gfn, &pt);
+    if ( pt == ot )
+        set_p2m_entry(d, gfn, mfn, p2m_type_to_flags(nt));
 
     p2m_unlock(d);
 
-    return 1;
+    return pt;
 }
 
 /*
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/shadow/common.c   Mon Sep 10 13:58:56 2007 -0600
@@ -2764,19 +2764,23 @@ shadow_write_p2m_entry(struct vcpu *v, u
                        l1_pgentry_t new, unsigned int level)
 {
     struct domain *d = v->domain;
-    mfn_t mfn;
     
     shadow_lock(d);
 
-    /* handle physmap_add and physmap_remove */
-    mfn = gfn_to_mfn(d, gfn);
-    if ( v != NULL && level == 1 && mfn_valid(mfn) ) {
-        sh_remove_all_shadows_and_parents(v, mfn);
-        if ( sh_remove_all_mappings(v, mfn) )
-            flush_tlb_mask(d->domain_dirty_cpumask);    
-    }
-    
-    /* update the entry with new content */
+    /* If we're removing an MFN from the p2m, remove it from the shadows too */
+    if ( level == 1 )
+    {
+        mfn_t mfn = _mfn(l1e_get_pfn(*p));
+        p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
+        if ( p2m_is_valid(p2mt) && mfn_valid(mfn) ) 
+        {
+            sh_remove_all_shadows_and_parents(v, mfn);
+            if ( sh_remove_all_mappings(v, mfn) )
+                flush_tlb_mask(d->domain_dirty_cpumask);    
+        }
+    }
+
+    /* Update the entry with new content */
     safe_write_pte(p, new);
 
     /* install P2M in monitors for PAE Xen */
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/shadow/multi.c    Mon Sep 10 13:58:56 2007 -0600
@@ -209,6 +209,7 @@ guest_walk_tables(struct vcpu *v, unsign
 guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
 {
     struct domain *d = v->domain;
+    p2m_type_t p2mt;
     ASSERT(!guest_op || shadow_locked_by_me(d));
     
     perfc_incr(shadow_guest_walk);
@@ -223,8 +224,9 @@ guest_walk_tables(struct vcpu *v, unsign
         + guest_l4_table_offset(va);
     /* Walk down to the l3e */
     if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
-    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e));
-    if ( !mfn_valid(gw->l3mfn) ) return 1;
+    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
+    if ( !p2m_is_ram(p2mt) ) return 1;
+    ASSERT(mfn_valid(gw->l3mfn));
     /* This mfn is a pagetable: make sure the guest can't write to it. */
     if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
         flush_tlb_mask(d->domain_dirty_cpumask); 
@@ -236,8 +238,9 @@ guest_walk_tables(struct vcpu *v, unsign
 #endif /* PAE or 64... */
     /* Walk down to the l2e */
     if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
-    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e));
-    if ( !mfn_valid(gw->l2mfn) ) return 1;
+    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
+    if ( !p2m_is_ram(p2mt) ) return 1;
+    ASSERT(mfn_valid(gw->l2mfn));
     /* This mfn is a pagetable: make sure the guest can't write to it. */
     if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
         flush_tlb_mask(d->domain_dirty_cpumask); 
@@ -278,8 +281,9 @@ guest_walk_tables(struct vcpu *v, unsign
     else 
     {
         /* Not a superpage: carry on and find the l1e. */
-        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e));
-        if ( !mfn_valid(gw->l1mfn) ) return 1;
+        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
+        if ( !p2m_is_ram(p2mt) ) return 1;
+        ASSERT(mfn_valid(gw->l1mfn));
         /* This mfn is a pagetable: make sure the guest can't write to it. */
         if ( guest_op 
              && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
@@ -626,7 +630,7 @@ _sh_propagate(struct vcpu *v,
               void *shadow_entry_ptr,
               int level,
               fetch_type_t ft, 
-              int mmio)
+              p2m_type_t p2mt)
 {
     guest_l1e_t *gp = guest_entry_ptr;
     shadow_l1e_t *sp = shadow_entry_ptr;
@@ -636,6 +640,13 @@ _sh_propagate(struct vcpu *v,
 
     /* We don't shadow PAE l3s */
     ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
+
+    /* Check there's something for the shadows to map to */
+    if ( !p2m_is_valid(p2mt) )
+    {
+        *sp = shadow_l1e_empty();
+        goto done;
+    }
 
     if ( mfn_valid(guest_table_mfn) )
         /* Handle A and D bit propagation into the guest */
@@ -658,19 +669,22 @@ _sh_propagate(struct vcpu *v,
         goto done;
     }
 
-    if ( level == 1 && mmio )
-    {
-        /* Guest l1e maps MMIO space */
+    if ( level == 1 && p2mt == p2m_mmio_dm )
+    {
+        /* Guest l1e maps emulated MMIO space */
         *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
         if ( !d->arch.paging.shadow.has_fast_mmio_entries )
             d->arch.paging.shadow.has_fast_mmio_entries = 1;
         goto done;
     }
 
-    // Must have a valid target_mfn, unless this is a prefetch.  In the
+    // Must have a valid target_mfn unless this is a prefetch.  In the
     // case of a prefetch, an invalid mfn means that we can not usefully
     // shadow anything, and so we return early.
     //
+    /* N.B. For pass-through MMIO, either this test needs to be relaxed,
+     * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
+     * MMIO areas need to be added to the frame-table to make them "valid". */
     if ( !mfn_valid(target_mfn) )
     {
         ASSERT((ft == ft_prefetch));
@@ -718,6 +732,8 @@ _sh_propagate(struct vcpu *v,
     // Only allow the guest write access to a page a) on a demand fault,
     // or b) if the page is already marked as dirty.
     //
+    // (We handle log-dirty entirely inside the shadow code, without using the 
+    // p2m_ram_logdirty p2m type: only HAP uses that.)
     if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
     {
         if ( ft & FETCH_TYPE_WRITE ) 
@@ -725,6 +741,10 @@ _sh_propagate(struct vcpu *v,
         else if ( !sh_mfn_is_dirty(d, target_mfn) )
             sflags &= ~_PAGE_RW;
     }
+
+    /* Read-only memory */
+    if ( p2mt == p2m_ram_ro ) 
+        sflags &= ~_PAGE_RW;
     
     // protect guest page tables
     //
@@ -754,7 +774,12 @@ _sh_propagate(struct vcpu *v,
         sflags |= _PAGE_USER;
     }
 
+    /* MMIO addresses should never be cached */
+    if ( p2m_is_mmio(p2mt) )
+        sflags |= _PAGE_PCD;
+
     *sp = shadow_l1e_from_mfn(target_mfn, sflags);
+
  done:
     SHADOW_DEBUG(PROPAGATE,
                  "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
@@ -775,7 +800,7 @@ l4e_propagate_from_guest(struct vcpu *v,
                          shadow_l4e_t *sl4e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, 0);
+    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
 }
 
 static void
@@ -786,7 +811,7 @@ l3e_propagate_from_guest(struct vcpu *v,
                          shadow_l3e_t *sl3e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, 0);
+    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
 }
 #endif // GUEST_PAGING_LEVELS >= 4
 
@@ -798,7 +823,7 @@ l2e_propagate_from_guest(struct vcpu *v,
                          shadow_l2e_t *sl2e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, 0);
+    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
 }
 
 static void
@@ -808,9 +833,9 @@ l1e_propagate_from_guest(struct vcpu *v,
                          mfn_t gmfn, 
                          shadow_l1e_t *sl1e,
                          fetch_type_t ft, 
-                         int mmio)
-{
-    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, mmio);
+                         p2m_type_t p2mt)
+{
+    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
 }
 
 
@@ -2196,6 +2221,7 @@ static int validate_gl4e(struct vcpu *v,
     shadow_l4e_t *sl4p = se;
     mfn_t sl3mfn = _mfn(INVALID_MFN);
     struct domain *d = v->domain;
+    p2m_type_t p2mt;
     int result = 0;
 
     perfc_incr(shadow_validate_gl4e_calls);
@@ -2203,8 +2229,8 @@ static int validate_gl4e(struct vcpu *v,
     if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
     {
         gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
-        mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn);
-        if ( mfn_valid(gl3mfn) )
+        mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
+        if ( p2m_is_ram(p2mt) )
             sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
         else
             result |= SHADOW_SET_ERROR;
@@ -2248,6 +2274,7 @@ static int validate_gl3e(struct vcpu *v,
     guest_l3e_t *new_gl3e = new_ge;
     shadow_l3e_t *sl3p = se;
     mfn_t sl2mfn = _mfn(INVALID_MFN);
+    p2m_type_t p2mt;
     int result = 0;
 
     perfc_incr(shadow_validate_gl3e_calls);
@@ -2255,8 +2282,8 @@ static int validate_gl3e(struct vcpu *v,
     if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
     {
         gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
-        mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn);
-        if ( mfn_valid(gl2mfn) )
+        mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
+        if ( p2m_is_ram(p2mt) )
             sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
         else
             result |= SHADOW_SET_ERROR;
@@ -2275,6 +2302,7 @@ static int validate_gl2e(struct vcpu *v,
     guest_l2e_t *new_gl2e = new_ge;
     shadow_l2e_t *sl2p = se;
     mfn_t sl1mfn = _mfn(INVALID_MFN);
+    p2m_type_t p2mt;
     int result = 0;
 
     perfc_incr(shadow_validate_gl2e_calls);
@@ -2299,8 +2327,8 @@ static int validate_gl2e(struct vcpu *v,
         }
         else
         {
-            mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn);
-            if ( mfn_valid(gl1mfn) )
+            mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt);
+            if ( p2m_is_ram(p2mt) )
                 sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
             else
                 result |= SHADOW_SET_ERROR;
@@ -2361,16 +2389,16 @@ static int validate_gl1e(struct vcpu *v,
     shadow_l1e_t *sl1p = se;
     gfn_t gfn;
     mfn_t gmfn;
-    int result = 0, mmio;
+    p2m_type_t p2mt;
+    int result = 0;
 
     perfc_incr(shadow_validate_gl1e_calls);
 
     gfn = guest_l1e_get_gfn(*new_gl1e);
-    gmfn = gfn_to_mfn(v->domain, gfn);
-
-    mmio = (is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)));
+    gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+
     l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 
-                             ft_prefetch, mmio);
+                             ft_prefetch, p2mt);
     
     result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
     return result;
@@ -2554,12 +2582,13 @@ static void sh_prefetch(struct vcpu *v, 
 static void sh_prefetch(struct vcpu *v, walk_t *gw, 
                         shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn)
 {
-    int i, dist, mmio;
+    int i, dist;
     gfn_t gfn;
     mfn_t gmfn;
     guest_l1e_t gl1e;
     shadow_l1e_t sl1e;
     u32 gflags;
+    p2m_type_t p2mt;
 
     /* Prefetch no further than the end of the _shadow_ l1 MFN */
     dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e;
@@ -2597,14 +2626,13 @@ static void sh_prefetch(struct vcpu *v, 
 
         /* Look at the gfn that the l1e is pointing at */
         gfn = guest_l1e_get_gfn(gl1e);
-        gmfn = gfn_to_mfn(v->domain, gfn);
-        mmio = ( is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)) );
+        gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
 
         /* Propagate the entry.  Safe to use a pointer to our local 
          * gl1e, since this is not a demand-fetch so there will be no 
          * write-back to the guest. */
         l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
-                                 gmfn, &sl1e, ft_prefetch, mmio);
+                                 gmfn, &sl1e, ft_prefetch, p2mt);
         (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
     }
 }
@@ -2633,8 +2661,9 @@ static int sh_page_fault(struct vcpu *v,
     paddr_t gpa;
     struct sh_emulate_ctxt emul_ctxt;
     struct x86_emulate_ops *emul_ops;
-    int r, mmio;
+    int r;
     fetch_type_t ft = 0;
+    p2m_type_t p2mt;
 
     SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
                    v->domain->domain_id, v->vcpu_id, va, regs->error_code);
@@ -2787,10 +2816,9 @@ static int sh_page_fault(struct vcpu *v,
 
     /* What mfn is the guest trying to access? */
     gfn = guest_l1e_get_gfn(gw.eff_l1e);
-    gmfn = gfn_to_mfn(d, gfn);
-    mmio = (is_hvm_domain(d) && mmio_space(gfn_to_paddr(gfn)));
-
-    if ( !mmio && !mfn_valid(gmfn) )
+    gmfn = gfn_to_mfn(d, gfn, &p2mt);
+
+    if ( !p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn)) )
     {
         perfc_incr(shadow_fault_bail_bad_gfn);
         SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 
@@ -2821,7 +2849,7 @@ static int sh_page_fault(struct vcpu *v,
 
     /* Calculate the shadow entry and write it */
     l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 
-                             gmfn, &sl1e, ft, mmio);
+                             gmfn, &sl1e, ft, p2mt);
     r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
 
 #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
@@ -2844,7 +2872,10 @@ static int sh_page_fault(struct vcpu *v,
         }
     }
 
-    if ( mmio ) 
+    /* Need to hand off device-model MMIO and writes to read-only
+     * memory to the device model */
+    if ( p2mt == p2m_mmio_dm 
+         || (p2mt == p2m_ram_ro && ft == ft_demand_write) ) 
     {
         gpa = guest_walk_to_gpa(&gw);
         goto mmio;
@@ -3598,6 +3629,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
         int flush = 0;
         gfn_t gl2gfn;
         mfn_t gl2mfn;
+        p2m_type_t p2mt;
         guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e;
         /* First, make all four entries read-only. */
         for ( i = 0; i < 4; i++ )
@@ -3605,8 +3637,9 @@ sh_update_cr3(struct vcpu *v, int do_loc
             if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
             {
                 gl2gfn = guest_l3e_get_gfn(gl3e[i]);
-                gl2mfn = gfn_to_mfn(d, gl2gfn);
-                flush |= sh_remove_write_access(v, gl2mfn, 2, 0); 
+                gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+                if ( p2m_is_ram(p2mt) )
+                    flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
             }
         }
         if ( flush ) 
@@ -3617,13 +3650,15 @@ sh_update_cr3(struct vcpu *v, int do_loc
             if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
             {
                 gl2gfn = guest_l3e_get_gfn(gl3e[i]);
-                gl2mfn = gfn_to_mfn(d, gl2gfn);
-                sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
-                                       ? SH_type_l2h_shadow 
-                                       : SH_type_l2_shadow);
+                gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+                if ( p2m_is_ram(p2mt) )
+                    sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
+                                           ? SH_type_l2h_shadow 
+                                           : SH_type_l2_shadow);
+                else
+                    sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
             }
             else
-                /* The guest is not present: clear out the shadow. */
                 sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
         }
     }
@@ -3932,6 +3967,7 @@ static inline void * emulate_map_dest(st
     u32 flags, errcode;
     gfn_t gfn;
     mfn_t mfn;
+    p2m_type_t p2mt;
 
     /* We don't emulate user-mode writes to page tables */
     if ( ring_3(sh_ctxt->ctxt.regs) ) 
@@ -3971,7 +4007,6 @@ static inline void * emulate_map_dest(st
         }
     }
 #endif
-    mfn = gfn_to_mfn(v->domain, gfn);
 
     errcode = PFEC_write_access;
     if ( !(flags & _PAGE_PRESENT) ) 
@@ -3981,8 +4016,10 @@ static inline void * emulate_map_dest(st
     if ( !(flags & _PAGE_RW) ) 
         goto page_fault;
 
-    if ( mfn_valid(mfn) )
-    {
+    mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+    if ( p2m_is_ram(p2mt) )
+    {
+        ASSERT(mfn_valid(mfn));
         *mfnp = mfn;
         v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
         return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
@@ -4231,6 +4268,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
 /* Convert this gfn to an mfn in the manner appropriate for the
  * guest pagetable it's used in (gmfn) */ 
 {
+    p2m_type_t p2mt;
     if ( !shadow_mode_translate(v->domain) )
         return _mfn(gfn_x(gfn));
     
@@ -4238,7 +4276,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
          != PGT_writable_page ) 
         return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
     else 
-        return gfn_to_mfn(v->domain, gfn);
+        return gfn_to_mfn(v->domain, gfn, &p2mt);
 } 
 
 
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h    Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/shadow/types.h    Mon Sep 10 13:58:56 2007 -0600
@@ -414,7 +414,7 @@ gfn_to_paddr(gfn_t gfn)
 
 /* Override gfn_to_mfn to work with gfn_t */
 #undef gfn_to_mfn
-#define gfn_to_mfn(d, g) _gfn_to_mfn((d), gfn_x(g))
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), gfn_x(g), (t))
 
 
 /* Type used for recording a walk through guest pagetables.  It is
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/shutdown.c
--- a/xen/arch/x86/shutdown.c   Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/shutdown.c   Mon Sep 10 13:58:56 2007 -0600
@@ -197,7 +197,7 @@ static void machine_real_restart(const u
 
 #endif
 
-void machine_restart(char *cmd)
+void machine_restart(void)
 {
     int i;
 
@@ -216,18 +216,12 @@ void machine_restart(char *cmd)
             safe_halt();
     }
 
-    /*
-     * Stop all CPUs and turn off local APICs and the IO-APIC, so
-     * other OSs see a clean IRQ state.
-     */
     smp_send_stop();
-    disable_IO_APIC();
-    hvm_cpu_down();
 
     /* Rebooting needs to touch the page at absolute address 0. */
     *((unsigned short *)__va(0x472)) = reboot_mode;
 
-    if (reboot_thru_bios <= 0)
+    if ( reboot_thru_bios <= 0 )
     {
         for ( ; ; )
         {
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c        Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/smp.c        Mon Sep 10 13:58:56 2007 -0600
@@ -279,6 +279,19 @@ int on_selected_cpus(
 
     ASSERT(local_irq_is_enabled());
 
+    /* Legacy UP system with no APIC to deliver IPIs? */
+    if ( unlikely(!cpu_has_apic) )
+    {
+        ASSERT(num_online_cpus() == 1);
+        if ( cpu_isset(0, selected) )
+        {
+            local_irq_disable();
+            func(info);
+            local_irq_enable();
+        }
+        return 0;
+    }
+
     if ( nr_cpus == 0 )
         return 0;
 
@@ -306,23 +319,33 @@ int on_selected_cpus(
 
 static void stop_this_cpu (void *dummy)
 {
+    disable_local_APIC();
+    hvm_cpu_down();
+
     cpu_clear(smp_processor_id(), cpu_online_map);
+
+    for ( ; ; )
+        __asm__ __volatile__ ( "hlt" );
+}
+
+/*
+ * Stop all CPUs and turn off local APICs and the IO-APIC, so other OSs see a 
+ * clean IRQ state.
+ */
+void smp_send_stop(void)
+{
+    int timeout = 10;
+
+    smp_call_function(stop_this_cpu, NULL, 1, 0);
+
+    /* Wait 10ms for all other CPUs to go offline. */
+    while ( (num_online_cpus() > 1) && (timeout-- > 0) )
+        mdelay(1);
 
     local_irq_disable();
     disable_local_APIC();
+    disable_IO_APIC();
     hvm_cpu_down();
-
-    for ( ; ; )
-        __asm__ __volatile__ ( "hlt" );
-}
-
-void smp_send_stop(void)
-{
-    /* Stop all other CPUs in the system. */
-    smp_call_function(stop_this_cpu, NULL, 1, 0);
-
-    local_irq_disable();
-    disable_local_APIC();
     local_irq_enable();
 }
 
diff -r 7d9b20d91102 -r 42b925c00d8a xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/common/keyhandler.c   Mon Sep 10 13:58:56 2007 -0600
@@ -123,7 +123,7 @@ static void halt_machine(unsigned char k
 static void halt_machine(unsigned char key, struct cpu_user_regs *regs)
 {
     printk("'%c' pressed -> rebooting machine\n", key);
-    machine_restart(NULL);
+    machine_restart();
 }
 
 static void cpuset_print(char *set, int size, cpumask_t mask)
diff -r 7d9b20d91102 -r 42b925c00d8a xen/common/shutdown.c
--- a/xen/common/shutdown.c     Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/common/shutdown.c     Mon Sep 10 13:58:56 2007 -0600
@@ -24,7 +24,7 @@ static void maybe_reboot(void)
         printk("rebooting machine in 5 seconds.\n");
         watchdog_disable();
         mdelay(5000);
-        machine_restart(NULL);
+        machine_restart();
     }
 }
 
@@ -50,7 +50,7 @@ void dom0_shutdown(u8 reason)
     case SHUTDOWN_reboot:
     {
         printk("Domain 0 shutdown: rebooting machine.\n");
-        machine_restart(NULL);
+        machine_restart();
         break; /* not reached */
     }
 
diff -r 7d9b20d91102 -r 42b925c00d8a xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/drivers/char/console.c        Mon Sep 10 13:58:56 2007 -0600
@@ -895,7 +895,7 @@ void panic(const char *fmt, ...)
     {
         watchdog_disable();
         mdelay(5000);
-        machine_restart(NULL);
+        machine_restart();
     }
 }
 
diff -r 7d9b20d91102 -r 42b925c00d8a xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/include/asm-x86/mm.h  Mon Sep 10 13:58:56 2007 -0600
@@ -328,8 +328,6 @@ TYPE_SAFE(unsigned long,mfn);
       ? get_gpfn_from_mfn(mfn)                          \
       : (mfn) )
 
-#define gmfn_to_mfn(_d, gpfn)  mfn_x(gfn_to_mfn(_d, gpfn))
-
 #define INVALID_MFN             (~0UL)
 
 #ifdef CONFIG_COMPAT
diff -r 7d9b20d91102 -r 42b925c00d8a xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/include/asm-x86/p2m.h Mon Sep 10 13:58:56 2007 -0600
@@ -4,7 +4,7 @@
  * physical-to-machine mappings for automatically-translated domains.
  *
  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
  *
@@ -27,49 +27,141 @@
 #define _XEN_P2M_H
 
 
-/* The phys_to_machine_mapping is the reversed mapping of MPT for full
- * virtualization.  It is only used by shadow_mode_translate()==true
- * guests, so we steal the address space that would have normally
- * been used by the read-only MPT map.
+/*
+ * The phys_to_machine_mapping maps guest physical frame numbers 
+ * to machine frame numbers.  It only exists for paging_mode_translate 
+ * guests. It is organised in page-table format, which:
+ *
+ * (1) allows us to use it directly as the second pagetable in hardware-
+ *     assisted paging and (hopefully) iommu support; and 
+ * (2) lets us map it directly into the guest vcpus' virtual address space 
+ *     as a linear pagetable, so we can read and write it easily.
+ *
+ * For (2) we steal the address space that would have normally been used
+ * by the read-only MPT map in a non-translated guest.  (For 
+ * paging_mode_external() guests this mapping is in the monitor table.)
  */
 #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
 
-
-/* Read the current domain's P2M table. */
-static inline mfn_t gfn_to_mfn_current(unsigned long gfn)
-{
-    l1_pgentry_t l1e = l1e_empty();
-    int ret;
-
-    if ( gfn > current->domain->arch.p2m.max_mapped_pfn )
-        return _mfn(INVALID_MFN);
-
-    /* Don't read off the end of the p2m table */
-    ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t));
-
-    ret = __copy_from_user(&l1e,
-                           &phys_to_machine_mapping[gfn],
-                           sizeof(l1e));
-
-    if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) )
-        return _mfn(l1e_get_pfn(l1e));
-
-    return _mfn(INVALID_MFN);
+/*
+ * The upper levels of the p2m pagetable always contain full rights; all 
+ * variation in the access control bits is made in the level-1 PTEs.
+ * 
+ * In addition to the phys-to-machine translation, each p2m PTE contains
+ * *type* information about the gfn it translates, helping Xen to decide
+ * on the correct course of action when handling a page-fault to that
+ * guest frame.  We store the type in the "available" bits of the PTEs
+ * in the table, which gives us 8 possible types on 32-bit systems.
+ * Further expansions of the type system will only be supported on
+ * 64-bit Xen.
+ */
+typedef enum {
+    p2m_invalid = 0,            /* Nothing mapped here */
+    p2m_ram_rw = 1,             /* Normal read/write guest RAM */
+    p2m_ram_logdirty = 2,       /* Temporarily read-only for log-dirty */
+    p2m_ram_ro = 3,             /* Read-only; writes go to the device model */
+    p2m_mmio_dm = 4,            /* Reads and write go to the device model */
+    p2m_mmio_direct = 5,        /* Read/write mapping of genuine MMIO area */
+} p2m_type_t;
+
+/* We use bitmaps and maks to handle groups of types */
+#define p2m_to_mask(_t) (1UL << (_t))
+
+/* RAM types, which map to real machine frames */
+#define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw)          \
+                       | p2m_to_mask(p2m_ram_logdirty)  \
+                       | p2m_to_mask(p2m_ram_ro))
+
+/* MMIO types, which don't have to map to anything in the frametable */
+#define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm)        \
+                        | p2m_to_mask(p2m_mmio_direct))
+
+/* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */
+#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty)     \
+                      | p2m_to_mask(p2m_ram_ro))
+
+/* Useful predicates */
+#define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
+#define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
+#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
+#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
+
+/* Extract the type from the PTE flags that store it */
+static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
+{
+    /* Type is stored in the "available" bits, 9, 10 and 11 */
+    return (flags >> 9) & 0x7;
+}
+ 
+/* Read the current domain's p2m table (through the linear mapping). */
+static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
+{
+    mfn_t mfn = _mfn(INVALID_MFN);
+    p2m_type_t p2mt = p2m_mmio_dm;
+    /* XXX This is for compatibility with the old model, where anything not 
+     * XXX marked as RAM was considered to be emulated MMIO space.
+     * XXX Once we start explicitly registering MMIO regions in the p2m 
+     * XXX we will return p2m_invalid for unmapped gfns */
+
+    if ( gfn <= current->domain->arch.p2m.max_mapped_pfn )
+    {
+        l1_pgentry_t l1e = l1e_empty();
+        int ret;
+
+        ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
+               / sizeof(l1_pgentry_t));
+
+        /* Need to __copy_from_user because the p2m is sparse and this
+         * part might not exist */
+        ret = __copy_from_user(&l1e,
+                               &phys_to_machine_mapping[gfn],
+                               sizeof(l1e));
+
+        if ( ret == 0 ) {
+            p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+            ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(*t));
+            if ( p2m_is_valid(p2mt) )
+                mfn = _mfn(l1e_get_pfn(l1e));
+            else 
+                /* XXX see above */
+                p2mt = p2m_mmio_dm;
+        }
+    }
+
+    *t = p2mt;
+    return mfn;
 }
 
 /* Read another domain's P2M table, mapping pages as we go */
-mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t);
 
 /* General conversion function from gfn to mfn */
-#define gfn_to_mfn(d, g) _gfn_to_mfn((d), (g))
-static inline mfn_t _gfn_to_mfn(struct domain *d, unsigned long gfn)
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t))
+static inline mfn_t _gfn_to_mfn(struct domain *d,
+                                unsigned long gfn, p2m_type_t *t)
 {
     if ( !paging_mode_translate(d) )
+    {
+        /* Not necessarily true, but for non-translated guests, we claim
+         * it's the most generic kind of memory */
+        *t = p2m_ram_rw;
         return _mfn(gfn);
+    }
     if ( likely(current->domain == d) )
-        return gfn_to_mfn_current(gfn);
+        return gfn_to_mfn_current(gfn, t);
     else 
-        return gfn_to_mfn_foreign(d, gfn);
+        return gfn_to_mfn_foreign(d, gfn, t);
+}
+
+/* Compatibility function exporting the old untyped interface */
+static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
+{
+    mfn_t mfn;
+    p2m_type_t t;
+    mfn = gfn_to_mfn(d, gpfn, &t);
+    if ( p2m_is_valid(t) )
+        return mfn_x(mfn);
+    return INVALID_MFN;
 }
 
 /* General conversion function from mfn to gfn */
@@ -81,19 +173,6 @@ static inline unsigned long mfn_to_gfn(s
         return mfn_x(mfn);
 }
 
-/* Compatibility function for HVM code */
-static inline unsigned long get_mfn_from_gpfn(unsigned long pfn)
-{
-    return mfn_x(gfn_to_mfn_current(pfn));
-}
-
-/* Is this guest address an mmio one? (i.e. not defined in p2m map) */
-static inline int mmio_space(paddr_t gpa)
-{
-    unsigned long gfn = gpa >> PAGE_SHIFT;
-    return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn)));
-}
-
 /* Translate the frame number held in an l1e from guest to machine */
 static inline l1_pgentry_t
 gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
@@ -105,7 +184,6 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr
 }
 
 
-
 /* Init the datastructures for later use by the p2m code */
 void p2m_init(struct domain *d);
 
@@ -130,11 +208,12 @@ void guest_physmap_remove_page(struct do
 void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
                                unsigned long mfn);
 
-/* set P2M table l1e flags */
-void p2m_set_flags_global(struct domain *d, u32 l1e_flags);
-
-/* set P2M table l1e flags for a gpa */
-int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags);
+/* Change types across all p2m entries in a domain */
+void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
+
+/* Compare-exchange the type of a single p2m entry */
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
+                           p2m_type_t ot, p2m_type_t nt);
 
 #endif /* _XEN_P2M_H */
 
diff -r 7d9b20d91102 -r 42b925c00d8a xen/include/xen/shutdown.h
--- a/xen/include/xen/shutdown.h        Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/include/xen/shutdown.h        Mon Sep 10 13:58:56 2007 -0600
@@ -6,7 +6,7 @@ extern int opt_noreboot;
 
 void dom0_shutdown(u8 reason);
 
-void machine_restart(char *cmd);
+void machine_restart(void);
 void machine_halt(void);
 void machine_power_off(void);
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>