WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 13 Feb 2009 04:00:17 -0800
Delivery-date: Fri, 13 Feb 2009 04:00:29 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1234491748 -32400
# Node ID c7cba853583da45ee4478237047fdd5d6bed68cd
# Parent  af992824b5cfa3b81dbe68293216a5df3ec0bdf4
# Parent  32b15413749255e0cd518f25d9202759586dcb27
merge with xen-unstable.hg
---
 .hgignore                                        |   10 
 Config.mk                                        |    4 
 docs/man/xm.pod.1                                |    2 
 extras/mini-os/arch/x86/mm.c                     |    6 
 tools/blktap/drivers/Makefile                    |    2 
 tools/firmware/rombios/rombios.c                 |   15 -
 tools/libxc/xc_domain.c                          |   58 ++++
 tools/libxc/xc_pagetab.c                         |   36 +-
 tools/libxc/xenctrl.h                            |   25 +
 tools/python/xen/xend/XendAPIStore.py            |    3 
 tools/python/xen/xend/image.py                   |   34 +-
 tools/python/xen/xend/server/pciquirk.py         |    3 
 tools/python/xen/xm/create.py                    |    2 
 tools/python/xen/xm/main.py                      |    8 
 tools/xentrace/xenctx.c                          |   58 +++-
 xen/arch/ia64/linux-xen/iosapic.c                |   10 
 xen/arch/ia64/linux-xen/irq_ia64.c               |   26 +-
 xen/arch/ia64/linux-xen/mca.c                    |    9 
 xen/arch/ia64/xen/hypercall.c                    |    2 
 xen/arch/ia64/xen/irq.c                          |   24 -
 xen/arch/x86/Makefile                            |    1 
 xen/arch/x86/acpi/cpufreq/cpufreq.c              |   16 -
 xen/arch/x86/acpi/suspend.c                      |    3 
 xen/arch/x86/boot/Makefile                       |    6 
 xen/arch/x86/boot/build32.mk                     |   24 +
 xen/arch/x86/boot/head.S                         |   10 
 xen/arch/x86/boot/reloc.c                        |   89 ++++++
 xen/arch/x86/cpu/mcheck/amd_k8.c                 |   14 -
 xen/arch/x86/cpu/mcheck/amd_nonfatal.c           |   13 -
 xen/arch/x86/cpu/mcheck/mce.c                    |  130 ++++++++++
 xen/arch/x86/cpu/mcheck/mce.h                    |    3 
 xen/arch/x86/cpu/mcheck/mce_intel.c              |    8 
 xen/arch/x86/domain.c                            |   38 +-
 xen/arch/x86/domain_build.c                      |    2 
 xen/arch/x86/domctl.c                            |   42 ++-
 xen/arch/x86/e820.c                              |    2 
 xen/arch/x86/i8259.c                             |    4 
 xen/arch/x86/io_apic.c                           |   54 ----
 xen/arch/x86/irq.c                               |   77 +++++
 xen/arch/x86/mm.c                                |  158 ++++++------
 xen/arch/x86/mm/hap/hap.c                        |   23 -
 xen/arch/x86/mm/hap/p2m-ept.c                    |   88 ++++--
 xen/arch/x86/mm/p2m.c                            |   95 ++-----
 xen/arch/x86/mm/paging.c                         |    6 
 xen/arch/x86/mm/shadow/common.c                  |  296 +++++++++++------------
 xen/arch/x86/mm/shadow/multi.c                   |  131 ++++------
 xen/arch/x86/mm/shadow/private.h                 |  123 ++-------
 xen/arch/x86/numa.c                              |    2 
 xen/arch/x86/physdev.c                           |    4 
 xen/arch/x86/setup.c                             |   16 -
 xen/arch/x86/smpboot.c                           |    4 
 xen/arch/x86/x86_32/xen.lds.S                    |    1 
 xen/arch/x86/x86_64/entry.S                      |    1 
 xen/arch/x86/x86_64/xen.lds.S                    |    1 
 xen/common/domain.c                              |   18 -
 xen/common/grant_table.c                         |    2 
 xen/common/hvm/save.c                            |   48 +++
 xen/common/memory.c                              |   32 --
 xen/common/page_alloc.c                          |  158 +++++++-----
 xen/drivers/char/serial.c                        |    2 
 xen/drivers/cpufreq/cpufreq.c                    |   73 +++++
 xen/drivers/cpufreq/cpufreq_misc_governors.c     |   25 +
 xen/drivers/cpufreq/cpufreq_ondemand.c           |   95 +++----
 xen/drivers/passthrough/amd/iommu_init.c         |   23 -
 xen/drivers/passthrough/amd/iommu_map.c          |   22 -
 xen/drivers/passthrough/amd/pci_amd_iommu.c      |    1 
 xen/drivers/passthrough/io.c                     |    2 
 xen/drivers/passthrough/iommu.c                  |    9 
 xen/drivers/passthrough/vtd/dmar.c               |   18 -
 xen/drivers/passthrough/vtd/ia64/vtd.c           |   13 -
 xen/drivers/passthrough/vtd/intremap.c           |    2 
 xen/drivers/passthrough/vtd/iommu.c              |   61 +++-
 xen/drivers/passthrough/vtd/iommu.h              |    4 
 xen/drivers/passthrough/vtd/qinval.c             |    4 
 xen/drivers/passthrough/vtd/vtd.h                |    4 
 xen/drivers/passthrough/vtd/x86/vtd.c            |    9 
 xen/include/acpi/cpufreq/cpufreq.h               |    1 
 xen/include/asm-ia64/hardirq.h                   |    1 
 xen/include/asm-ia64/hvm/iommu.h                 |    1 
 xen/include/asm-ia64/hvm/irq.h                   |   14 -
 xen/include/asm-ia64/linux-xen/asm/smp.h         |    1 
 xen/include/asm-ia64/linux-xen/linux/interrupt.h |    4 
 xen/include/asm-ia64/linux/asm/hw_irq.h          |    2 
 xen/include/asm-ia64/mm.h                        |   12 
 xen/include/asm-ia64/tlbflush.h                  |    3 
 xen/include/asm-x86/domain.h                     |   13 -
 xen/include/asm-x86/hvm/vmx/vmx.h                |    6 
 xen/include/asm-x86/iocap.h                      |    5 
 xen/include/asm-x86/irq.h                        |    3 
 xen/include/asm-x86/mm.h                         |   84 +++++-
 xen/include/asm-x86/p2m.h                        |    4 
 xen/include/asm-x86/page.h                       |   38 ++
 xen/include/asm-x86/perfc.h                      |    1 
 xen/include/asm-x86/processor.h                  |    1 
 xen/include/public/arch-ia64/hvm/save.h          |    4 
 xen/include/public/arch-x86/hvm/save.h           |    4 
 xen/include/public/arch-x86/xen-mca.h            |   48 +++
 xen/include/public/domctl.h                      |   12 
 xen/include/public/io/pciif.h                    |    2 
 xen/include/xen/hvm/save.h                       |    2 
 xen/include/xen/iocap.h                          |    3 
 xen/include/xen/irq.h                            |   20 +
 xen/include/xen/mm.h                             |  186 +++++++++++++-
 xen/include/xen/sched.h                          |    5 
 xen/xsm/flask/hooks.c                            |    1 
 105 files changed, 1880 insertions(+), 1048 deletions(-)

diff -r af992824b5cf -r c7cba853583d .hgignore
--- a/.hgignore Fri Feb 13 10:56:01 2009 +0900
+++ b/.hgignore Fri Feb 13 11:22:28 2009 +0900
@@ -256,6 +256,7 @@
 ^xen/arch/x86/asm-offsets\.s$
 ^xen/arch/x86/boot/mkelf32$
 ^xen/arch/x86/xen\.lds$
+^xen/arch/x86/boot/reloc.S$
 ^xen/ddb/.*$
 ^xen/include/asm$
 ^xen/include/asm-.*/asm-offsets\.h$
@@ -279,15 +280,6 @@
 ^xen/arch/ia64/asm-xsi-offsets\.s$
 ^xen/arch/ia64/map\.out$
 ^xen/arch/ia64/xen\.lds\.s$
-^xen/arch/powerpc/dom0\.bin$
-^xen/arch/powerpc/asm-offsets\.s$
-^xen/arch/powerpc/firmware$
-^xen/arch/powerpc/firmware.dbg$
-^xen/arch/powerpc/firmware_image.bin$
-^xen/arch/powerpc/xen\.lds$
-^xen/arch/powerpc/\.xen-syms$
-^xen/arch/powerpc/xen-syms\.S$
-^xen/arch/powerpc/cmdline.dep$
 ^unmodified_drivers/linux-2.6/\.tmp_versions
 ^unmodified_drivers/linux-2.6/.*\.cmd$
 ^unmodified_drivers/linux-2.6/.*\.ko$
diff -r af992824b5cf -r c7cba853583d Config.mk
--- a/Config.mk Fri Feb 13 10:56:01 2009 +0900
+++ b/Config.mk Fri Feb 13 11:22:28 2009 +0900
@@ -1,7 +1,7 @@
 # -*- mode: Makefile; -*-
 
-# A debug build of Xen and tools?
-debug ?= y ## TEMPORARILY ENABLED
+# A debug build of Xen and tools? TEMPORARILY ENABLED
+debug ?= y
 
 XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
                          -e s/i86pc/x86_32/ -e s/amd64/x86_64/)
diff -r af992824b5cf -r c7cba853583d docs/man/xm.pod.1
--- a/docs/man/xm.pod.1 Fri Feb 13 10:56:01 2009 +0900
+++ b/docs/man/xm.pod.1 Fri Feb 13 11:22:28 2009 +0900
@@ -66,6 +66,8 @@ The attached console will perform much l
 The attached console will perform much like a standard serial console,
 so running curses based interfaces over the console B<is not
 advised>.  Vi tends to get very odd when using it over this interface.
+
+Use the key combination Ctrl+] to detach the domain console.
 
 =item B<create> I<configfile> [I<OPTIONS>] [I<vars>]..
 
diff -r af992824b5cf -r c7cba853583d extras/mini-os/arch/x86/mm.c
--- a/extras/mini-os/arch/x86/mm.c      Fri Feb 13 10:56:01 2009 +0900
+++ b/extras/mini-os/arch/x86/mm.c      Fri Feb 13 11:22:28 2009 +0900
@@ -550,9 +550,15 @@ static void clear_bootstrap(void)
 
 void arch_init_p2m(unsigned long max_pfn)
 {
+#ifdef __x86_64__
 #define L1_P2M_SHIFT    9
 #define L2_P2M_SHIFT    18    
 #define L3_P2M_SHIFT    27    
+#else
+#define L1_P2M_SHIFT    10
+#define L2_P2M_SHIFT    20    
+#define L3_P2M_SHIFT    30    
+#endif
 #define L1_P2M_ENTRIES  (1 << L1_P2M_SHIFT)    
 #define L2_P2M_ENTRIES  (1 << (L2_P2M_SHIFT - L1_P2M_SHIFT))    
 #define L3_P2M_ENTRIES  (1 << (L3_P2M_SHIFT - L2_P2M_SHIFT))    
diff -r af992824b5cf -r c7cba853583d tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile     Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/blktap/drivers/Makefile     Fri Feb 13 11:22:28 2009 +0900
@@ -13,7 +13,7 @@ CFLAGS   += -I $(LIBAIO_DIR)
 CFLAGS   += -I $(LIBAIO_DIR)
 CFLAGS   += -D_GNU_SOURCE
 
-ifeq ($(shell . ./check_gcrypt),"yes")
+ifeq ($(shell . ./check_gcrypt $(CC)),yes)
 CFLAGS += -DUSE_GCRYPT
 CRYPT_LIB := -lgcrypt
 else
diff -r af992824b5cf -r c7cba853583d tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/firmware/rombios/rombios.c  Fri Feb 13 11:22:28 2009 +0900
@@ -4609,6 +4609,10 @@ int15_function32(regs, ES, DS, FLAGS)
 {
   Bit32u  extended_memory_size=0; // 64bits long
   Bit16u  CX,DX;
+#ifdef HVMASSIST
+  Bit16u off, e820_table_size;
+  Bit32u base, type, size;
+#endif
 
 BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax);
 
@@ -4625,8 +4629,10 @@ ASM_START
 
       ;; Get the count in eax
       mov  bx, sp
+SEG SS
       mov  ax, _int15_function32.CX [bx]
       shl  eax, #16
+SEG SS
       mov  ax, _int15_function32.DX [bx]
 
       ;; convert to numbers of 15usec ticks
@@ -4660,8 +4666,7 @@ ASM_END
         {
 #ifdef HVMASSIST
        case 0x20: {
-            Bit16u e820_table_size =
-                read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
+            e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
 
             if (regs.u.r32.edx != 0x534D4150) /* SMAP */
                 goto int15_unimplemented;
@@ -4674,8 +4679,6 @@ ASM_END
                 if ((regs.u.r32.ebx + 0x14 - 1) > e820_table_size)
                     regs.u.r32.ebx = 0;
             } else if (regs.u.r16.bx == 1) {
-                Bit32u base, type;
-                Bit16u off;
                 for (off = 0; off < e820_table_size; off += 0x14) {
                     base = read_dword(E820_SEG, E820_OFFSET + off);
                     type = read_dword(E820_SEG, E820_OFFSET + 0x10 + off);
@@ -4699,9 +4702,7 @@ ASM_END
         }
 
         case 0x01: {
-            Bit16u off, e820_table_size =
-                read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
-            Bit32u base, type, size;
+            e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
 
             // do we have any reason to fail here ?
             CLEAR_CF();
diff -r af992824b5cf -r c7cba853583d tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/libxc/xc_domain.c   Fri Feb 13 11:22:28 2009 +0900
@@ -269,6 +269,38 @@ int xc_domain_hvm_getcontext(int xc_hand
         unlock_pages(ctxt_buf, size);
 
     return (ret < 0 ? -1 : domctl.u.hvmcontext.size);
+}
+
+/* Get just one element of the HVM guest context.
+ * size must be >= HVM_SAVE_LENGTH(type) */
+int xc_domain_hvm_getcontext_partial(int xc_handle,
+                                     uint32_t domid,
+                                     uint16_t typecode,
+                                     uint16_t instance,
+                                     void *ctxt_buf,
+                                     uint32_t size)
+{
+    int ret;
+    DECLARE_DOMCTL;
+
+    if ( !ctxt_buf ) 
+        return -EINVAL;
+
+    domctl.cmd = XEN_DOMCTL_gethvmcontext_partial;
+    domctl.domain = (domid_t) domid;
+    domctl.u.hvmcontext_partial.type = typecode;
+    domctl.u.hvmcontext_partial.instance = instance;
+    set_xen_guest_handle(domctl.u.hvmcontext_partial.buffer, ctxt_buf);
+
+    if ( (ret = lock_pages(ctxt_buf, size)) != 0 )
+        return ret;
+    
+    ret = do_domctl(xc_handle, &domctl);
+
+    if ( ctxt_buf ) 
+        unlock_pages(ctxt_buf, size);
+
+    return ret ? -1 : 0;
 }
 
 /* set info to hvm guest for restore */
@@ -909,6 +941,32 @@ int xc_domain_update_msi_irq(
     return rc;
 }
 
+int xc_domain_unbind_msi_irq(
+    int xc_handle,
+    uint32_t domid,
+    uint32_t gvec,
+    uint32_t pirq,
+    uint32_t gflags)
+{
+    int rc;
+    xen_domctl_bind_pt_irq_t *bind;
+
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_unbind_pt_irq;
+    domctl.domain = (domid_t)domid;
+
+    bind = &(domctl.u.bind_pt_irq);
+    bind->hvm_domid = domid;
+    bind->irq_type = PT_IRQ_TYPE_MSI;
+    bind->machine_irq = pirq;
+    bind->u.msi.gvec = gvec;
+    bind->u.msi.gflags = gflags;
+
+    rc = do_domctl(xc_handle, &domctl);
+    return rc;
+}
+
 /* Pass-through: binds machine irq to guests irq */
 int xc_domain_bind_pt_irq(
     int xc_handle,
diff -r af992824b5cf -r c7cba853583d tools/libxc/xc_pagetab.c
--- a/tools/libxc/xc_pagetab.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/libxc/xc_pagetab.c  Fri Feb 13 11:22:28 2009 +0900
@@ -4,50 +4,42 @@
  * Function to translate virtual to physical addresses.
  */
 #include "xc_private.h"
+#include <xen/hvm/save.h>
 
 #define CR0_PG  0x80000000
 #define CR4_PAE 0x20
 #define PTE_PSE 0x80
+#define EFER_LMA 0x400
+
 
 unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
                                            int vcpu, unsigned long long virt)
 {
     xc_dominfo_t dominfo;
-    vcpu_guest_context_any_t ctx;
     uint64_t paddr, mask, pte = 0;
     int size, level, pt_levels = 2;
     void *map;
 
     if (xc_domain_getinfo(xc_handle, dom, 1, &dominfo) != 1 
-        || dominfo.domid != dom
-        || xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0)
+        || dominfo.domid != dom)
         return 0;
 
     /* What kind of paging are we dealing with? */
     if (dominfo.hvm) {
-        unsigned long cr0, cr3, cr4;
-        xen_capabilities_info_t xen_caps = "";
-        if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0)
+        struct hvm_hw_cpu ctx;
+        if (xc_domain_hvm_getcontext_partial(xc_handle, dom,
+                                             HVM_SAVE_CODE(CPU), vcpu,
+                                             &ctx, sizeof ctx) != 0)
             return 0;
-        /* HVM context records are always host-sized */
-        if (strstr(xen_caps, "xen-3.0-x86_64")) {
-            cr0 = ctx.x64.ctrlreg[0];
-            cr3 = ctx.x64.ctrlreg[3];
-            cr4 = ctx.x64.ctrlreg[4];
-        } else {
-            cr0 = ctx.x32.ctrlreg[0];
-            cr3 = ctx.x32.ctrlreg[3];
-            cr4 = ctx.x32.ctrlreg[4];
-        }
-        if (!(cr0 & CR0_PG))
+        if (!(ctx.cr0 & CR0_PG))
             return virt;
-        if (0 /* XXX how to get EFER.LMA? */) 
-            pt_levels = 4;
-        else
-            pt_levels = (cr4 & CR4_PAE) ? 3 : 2;
-        paddr = cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull);
+        pt_levels = (ctx.msr_efer&EFER_LMA) ? 4 : (ctx.cr4&CR4_PAE) ? 3 : 2;
+        paddr = ctx.cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull);
     } else {
         DECLARE_DOMCTL;
+        vcpu_guest_context_any_t ctx;
+        if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0)
+            return 0;
         domctl.domain = dom;
         domctl.cmd = XEN_DOMCTL_get_address_size;
         if ( do_domctl(xc_handle, &domctl) != 0 )
diff -r af992824b5cf -r c7cba853583d tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/libxc/xenctrl.h     Fri Feb 13 11:22:28 2009 +0900
@@ -375,6 +375,25 @@ int xc_domain_hvm_getcontext(int xc_hand
                              uint8_t *ctxt_buf,
                              uint32_t size);
 
+
+/**
+ * This function returns one element of the context of a hvm domain
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to get information from
+ * @parm typecode which type of elemnt required 
+ * @parm instance which instance of the type
+ * @parm ctxt_buf a pointer to a structure to store the execution context of
+ *            the hvm domain
+ * @parm size the size of ctxt_buf (must be >= HVM_SAVE_LENGTH(typecode))
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_getcontext_partial(int xc_handle,
+                                     uint32_t domid,
+                                     uint16_t typecode,
+                                     uint16_t instance,
+                                     void *ctxt_buf,
+                                     uint32_t size);
+
 /**
  * This function will set the context for hvm domain
  *
@@ -1074,6 +1093,12 @@ int xc_domain_update_msi_irq(
     uint32_t gvec,
     uint32_t pirq,
     uint32_t gflags);
+
+int xc_domain_unbind_msi_irq(int xc_handle,
+                             uint32_t domid,
+                             uint32_t gvec,
+                             uint32_t pirq,
+                             uint32_t gflags);
 
 int xc_domain_bind_pt_irq(int xc_handle,
                           uint32_t domid,
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/XendAPIStore.py
--- a/tools/python/xen/xend/XendAPIStore.py     Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xend/XendAPIStore.py     Fri Feb 13 11:22:28 2009 +0900
@@ -33,7 +33,8 @@ def register(uuid, type, inst):
 
 def deregister(uuid, type):
     old = get(uuid, type)
-    del __classes[(uuid, type)]
+    if old is not None:
+        del __classes[(uuid, type)]
     return old
 
 def get(uuid, type):
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xend/image.py    Fri Feb 13 11:22:28 2009 +0900
@@ -372,8 +372,6 @@ class ImageHandler:
             env['DISPLAY'] = self.display
         if self.xauthority:
             env['XAUTHORITY'] = self.xauthority
-        if self.vncconsole:
-            args = args + ([ "-vncviewer" ])
         unique_id = "%i-%i" % (self.vm.getDomid(), time.time())
         sentinel_path = sentinel_path_prefix + unique_id
         sentinel_path_fifo = sentinel_path + '.fifo'
@@ -558,24 +556,30 @@ class ImageHandler:
                     os.kill(self.pid, signal.SIGHUP)
                 except OSError, exn:
                     log.exception(exn)
-                try:
-                    # Try to reap the child every 100ms for 10s. Then SIGKILL 
it.
-                    for i in xrange(100):
+                # Try to reap the child every 100ms for 10s. Then SIGKILL it.
+                for i in xrange(100):
+                    try:
                         (p, rv) = os.waitpid(self.pid, os.WNOHANG)
                         if p == self.pid:
                             break
-                        time.sleep(0.1)
-                    else:
-                        log.warning("DeviceModel %d took more than 10s "
-                                    "to terminate: sending SIGKILL" % self.pid)
+                    except OSError:
+                        # This is expected if Xend has been restarted within
+                        # the life of this domain.  In this case, we can kill
+                        # the process, but we can't wait for it because it's
+                        # not our child. We continue this loop, and after it is
+                        # terminated make really sure the process is going away
+                        # (SIGKILL).
+                        pass
+                    time.sleep(0.1)
+                else:
+                    log.warning("DeviceModel %d took more than 10s "
+                                "to terminate: sending SIGKILL" % self.pid)
+                    try:
                         os.kill(self.pid, signal.SIGKILL)
                         os.waitpid(self.pid, 0)
-                except OSError, exn:
-                    # This is expected if Xend has been restarted within the
-                    # life of this domain.  In this case, we can kill the 
process,
-                    # but we can't wait for it because it's not our child.
-                    # We just make really sure it's going away (SIGKILL) first.
-                    os.kill(self.pid, signal.SIGKILL)
+                    except OSError:
+                        # This happens if the process doesn't exist.
+                        pass
                 state = xstransact.Remove("/local/domain/0/device-model/%i"
                                           % self.vm.getDomid())
             finally:
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/server/pciquirk.py
--- a/tools/python/xen/xend/server/pciquirk.py  Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xend/server/pciquirk.py  Fri Feb 13 11:22:28 2009 +0900
@@ -123,7 +123,8 @@ class PCIQuirk:
             log.info("Config file does not exist: %s" % PERMISSIVE_CONFIG_FILE)
             self.pci_perm_dev_config = ['xend-pci-perm-devs']
 
-        devices = child_at(child(pci_perm_dev_config, 
'unconstrained_dev_ids'),0)
+        devices = child_at(child(self.pci_perm_dev_config,
+                                 'unconstrained_dev_ids'),0)
         if self.__matchPCIdev( devices ):
             log.debug("Permissive mode enabled for PCI device [%s]" %
                       self.devid)
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xm/create.py     Fri Feb 13 11:22:28 2009 +0900
@@ -1337,7 +1337,7 @@ def main(argv):
     elif not opts.is_xml:
         dom = make_domain(opts, config)
         
-    if opts.vals.vncviewer:
+    if opts.vals.vncconsole:
         domid = domain_name_to_domid(sxp.child_value(config, 'name', -1))
         vncviewer_autopass = getattr(opts.vals,'vncviewer-autopass', False)
         console.runVncViewer(domid, vncviewer_autopass, True)
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xm/main.py       Fri Feb 13 11:22:28 2009 +0900
@@ -59,7 +59,11 @@ import XenAPI
 import XenAPI
 
 import xen.lowlevel.xc
-xc = xen.lowlevel.xc.xc()
+try:
+    xc = xen.lowlevel.xc.xc()
+except Exception, ex:
+    print >>sys.stderr, ("Is xen kernel running?")
+    sys.exit(1)
 
 import inspect
 from xen.xend import XendOptions
@@ -735,7 +739,7 @@ def xm_save(args):
         (options, params) = getopt.gnu_getopt(args, 'c', ['checkpoint'])
     except getopt.GetoptError, opterr:
         err(opterr)
-        sys.exit(1)
+        usage('save')
 
     checkpoint = False
     for (k, v) in options:
diff -r af992824b5cf -r c7cba853583d tools/xentrace/xenctx.c
--- a/tools/xentrace/xenctx.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/xentrace/xenctx.c   Fri Feb 13 11:22:28 2009 +0900
@@ -26,6 +26,7 @@
 #include "xenctrl.h"
 #include <xen/foreign/x86_32.h>
 #include <xen/foreign/x86_64.h>
+#include <xen/hvm/save.h>
 
 int xc_handle = 0;
 int domid = 0;
@@ -287,6 +288,35 @@ static void print_ctx_32(vcpu_guest_cont
     }
 }
 
+static void print_ctx_32on64(vcpu_guest_context_x86_64_t *ctx)
+{
+    struct cpu_user_regs_x86_64 *regs = &ctx->user_regs;
+
+    printf("cs:eip: %04x:%08x ", regs->cs, (uint32_t)regs->eip);
+    print_symbol((uint32_t)regs->eip);
+    print_flags((uint32_t)regs->eflags);
+    printf("ss:esp: %04x:%08x\n", regs->ss, (uint32_t)regs->esp);
+
+    printf("eax: %08x\t", (uint32_t)regs->eax);
+    printf("ebx: %08x\t", (uint32_t)regs->ebx);
+    printf("ecx: %08x\t", (uint32_t)regs->ecx);
+    printf("edx: %08x\n", (uint32_t)regs->edx);
+
+    printf("esi: %08x\t", (uint32_t)regs->esi);
+    printf("edi: %08x\t", (uint32_t)regs->edi);
+    printf("ebp: %08x\n", (uint32_t)regs->ebp);
+
+    printf(" ds:     %04x\t", regs->ds);
+    printf(" es:     %04x\t", regs->es);
+    printf(" fs:     %04x\t", regs->fs);
+    printf(" gs:     %04x\n", regs->gs);
+
+    if (disp_all) {
+        print_special(ctx->ctrlreg, "cr", 0x1d, 4);
+        print_special(ctx->debugreg, "dr", 0xcf, 4);
+    }
+}
+
 static void print_ctx_64(vcpu_guest_context_x86_64_t *ctx)
 {
     struct cpu_user_regs_x86_64 *regs = &ctx->user_regs;
@@ -335,6 +365,8 @@ static void print_ctx(vcpu_guest_context
 {
     if (ctxt_word_size == 4) 
         print_ctx_32(&ctx->x32);
+    else if (guest_word_size == 4)
+        print_ctx_32on64(&ctx->x64);
     else 
         print_ctx_64(&ctx->x64);
 }
@@ -788,23 +820,29 @@ static void dump_ctx(int vcpu)
 
 #if defined(__i386__) || defined(__x86_64__)
     {
-        struct xen_domctl domctl;
-        memset(&domctl, 0, sizeof domctl);
-        domctl.domain = domid;
-        domctl.cmd = XEN_DOMCTL_get_address_size;
-        if (xc_domctl(xc_handle, &domctl) == 0)
-            ctxt_word_size = guest_word_size = domctl.u.address_size.size / 8;
         if (dominfo.hvm) {
+            struct hvm_hw_cpu cpuctx;
             xen_capabilities_info_t xen_caps = "";
+            if (xc_domain_hvm_getcontext_partial(
+                    xc_handle, domid, HVM_SAVE_CODE(CPU), 
+                    vcpu, &cpuctx, sizeof cpuctx) != 0) {
+                perror("xc_domain_hvm_getcontext_partial");
+                exit(-1);
+            }
+            guest_word_size = (cpuctx.msr_efer & 0x400) ? 8 : 4;
+            /* HVM guest context records are always host-sized */
             if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) {
                 perror("xc_version");
                 exit(-1);
             }
-            /* HVM guest context records are always host-sized */
             ctxt_word_size = (strstr(xen_caps, "xen-3.0-x86_64")) ? 8 : 4;
-            /* XXX For now we can't tell whether a HVM guest is in long
-             * XXX mode; eventually fix this here and in xc_pagetab.c */
-            guest_word_size = 4;
+        } else {
+            struct xen_domctl domctl;
+            memset(&domctl, 0, sizeof domctl);
+            domctl.domain = domid;
+            domctl.cmd = XEN_DOMCTL_get_address_size;
+            if (xc_domctl(xc_handle, &domctl) == 0)
+                ctxt_word_size = guest_word_size = domctl.u.address_size.size 
/ 8;
         }
     }
 #endif
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/iosapic.c
--- a/xen/arch/ia64/linux-xen/iosapic.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/linux-xen/iosapic.c Fri Feb 13 11:22:28 2009 +0900
@@ -93,6 +93,16 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
+#ifdef XEN
+static inline int iosapic_irq_to_vector (int irq)
+{
+       return irq;
+}
+
+#undef irq_to_vector
+#define irq_to_vector(irq)      iosapic_irq_to_vector(irq)
+#define AUTO_ASSIGN    AUTO_ASSIGN_IRQ
+#endif
 
 #undef DEBUG_INTERRUPT_ROUTING
 
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/irq_ia64.c
--- a/xen/arch/ia64/linux-xen/irq_ia64.c        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/linux-xen/irq_ia64.c        Fri Feb 13 11:22:28 2009 +0900
@@ -250,6 +250,7 @@ register_percpu_irq (ia64_vector vec, st
 register_percpu_irq (ia64_vector vec, struct irqaction *action)
 {
        irq_desc_t *desc;
+#ifndef XEN
        unsigned int irq;
 
        for (irq = 0; irq < NR_IRQS; ++irq)
@@ -258,16 +259,19 @@ register_percpu_irq (ia64_vector vec, st
                        desc->status |= IRQ_PER_CPU;
                        desc->handler = &irq_type_ia64_lsapic;
                        if (action)
-#ifdef XEN
-                               setup_vector(irq, action);
-#else
                                setup_irq(irq, action);
-#endif
-               }
-}
-
-#ifdef XEN
-int request_irq(unsigned int irq,
+               }
+#else
+       desc = irq_descp(vec);
+       desc->status |= IRQ_PER_CPU;
+       desc->handler = &irq_type_ia64_lsapic;
+       if (action)
+               setup_vector(vec, action);
+#endif
+}
+
+#ifdef XEN
+int request_irq_vector(unsigned int vector,
                void (*handler)(int, void *, struct cpu_user_regs *),
                unsigned long irqflags, const char * devname, void *dev_id)
 {
@@ -279,7 +283,7 @@ int request_irq(unsigned int irq,
         * otherwise we'll have trouble later trying to figure out
         * which interrupt is which (messes up the interrupt freeing logic etc).
         *                          */
-       if (irq >= NR_IRQS)
+       if (vector >= NR_VECTORS)
                return -EINVAL;
        if (!handler)
                return -EINVAL;
@@ -291,7 +295,7 @@ int request_irq(unsigned int irq,
        action->handler = handler;
        action->name = devname;
        action->dev_id = dev_id;
-       setup_vector(irq, action);
+       setup_vector(vector, action);
        if (retval)
                xfree(action);
 
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/mca.c
--- a/xen/arch/ia64/linux-xen/mca.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/linux-xen/mca.c     Fri Feb 13 11:22:28 2009 +0900
@@ -114,7 +114,6 @@ extern void                 ia64_slave_init_handler (v
 extern void                    ia64_slave_init_handler (void);
 #ifdef XEN
 extern void setup_vector (unsigned int vec, struct irqaction *action);
-#define setup_irq(irq, action) setup_vector(irq, action)
 #endif
 
 static ia64_mc_info_t          ia64_mc_info;
@@ -1931,12 +1930,18 @@ ia64_mca_late_init(void)
                if (cpe_vector >= 0) {
                        /* If platform supports CPEI, enable the irq. */
                        cpe_poll_enabled = 0;
+#ifndef XEN
                        for (irq = 0; irq < NR_IRQS; ++irq)
                                if (irq_to_vector(irq) == cpe_vector) {
                                        desc = irq_descp(irq);
                                        desc->status |= IRQ_PER_CPU;
-                                       setup_irq(irq, &mca_cpe_irqaction);
+                                       setup_vector(irq, &mca_cpe_irqaction);
                                }
+#else
+                       desc = irq_descp(cpe_vector);
+                       desc->status |= IRQ_PER_CPU;
+                       setup_vector(cpe_vector, &mca_cpe_irqaction);
+#endif
                        ia64_mca_register_cpev(cpe_vector);
                        IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", 
__FUNCTION__);
                } else {
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/xen/hypercall.c
--- a/xen/arch/ia64/xen/hypercall.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/xen/hypercall.c     Fri Feb 13 11:22:28 2009 +0900
@@ -543,7 +543,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
             break;
         irq_status_query.flags = 0;
         /* Edge-triggered interrupts don't need an explicit unmask downcall. */
-        if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") )
+        if ( !strstr(irq_descp(irq)->handler->typename, "edge") )
             irq_status_query.flags |= XENIRQSTAT_needs_eoi;
         ret = copy_to_guest(arg, &irq_status_query, 1) ? -EFAULT : 0;
         break;
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/xen/irq.c   Fri Feb 13 11:22:28 2009 +0900
@@ -228,11 +228,11 @@ out:
  * disabled.
  */
 
-int setup_vector(unsigned int irq, struct irqaction * new)
+int setup_vector(unsigned int vector, struct irqaction * new)
 {
        unsigned long flags;
        struct irqaction *old, **p;
-       irq_desc_t *desc = irq_descp(irq);
+       irq_desc_t *desc = irq_descp(vector);
 
        /*
         * The following block of code has to be executed atomically
@@ -248,8 +248,8 @@ int setup_vector(unsigned int irq, struc
 
        desc->depth = 0;
        desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_GUEST);
-       desc->handler->startup(irq);
-       desc->handler->enable(irq);
+       desc->handler->startup(vector);
+       desc->handler->enable(vector);
        spin_unlock_irqrestore(&desc->lock,flags);
 
        return 0;
@@ -258,13 +258,11 @@ int setup_vector(unsigned int irq, struc
 /* Vectors reserved by xen (and thus not sharable with domains).  */
 unsigned long ia64_xen_vector[BITS_TO_LONGS(NR_IRQS)];
 
-int setup_irq(unsigned int irq, struct irqaction * new)
-{
-       unsigned int vec;
+int setup_irq_vector(unsigned int vec, struct irqaction * new)
+{
        int res;
 
-       /* Get vector for IRQ.  */
-       if (acpi_gsi_to_irq (irq, &vec) < 0)
+       if ( vec == IA64_INVALID_VECTOR )
                return -ENOSYS;
        /* Reserve the vector (and thus the irq).  */
        if (test_and_set_bit(vec, ia64_xen_vector))
@@ -273,14 +271,12 @@ int setup_irq(unsigned int irq, struct i
        return res;
 }
 
-void free_irq(unsigned int irq)
-{
-       unsigned int vec;
+void release_irq_vector(unsigned int vec)
+{
        unsigned long flags;
        irq_desc_t *desc;
 
-       /* Get vector for IRQ.  */
-       if (acpi_gsi_to_irq(irq, &vec) < 0)
+       if ( vec == IA64_INVALID_VECTOR )
                return;
 
        desc = irq_descp(vec);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/Makefile     Fri Feb 13 11:22:28 2009 +0900
@@ -92,3 +92,4 @@ clean::
 clean::
        rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32
        rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d
+       rm -f boot/reloc.S boot/reloc.lnk boot/reloc.bin
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c       Fri Feb 13 11:22:28 2009 +0900
@@ -58,6 +58,9 @@ static struct acpi_cpufreq_data *drv_dat
 
 static struct cpufreq_driver acpi_cpufreq_driver;
 
+static unsigned int __read_mostly acpi_pstate_strict;
+integer_param("acpi_pstate_strict", acpi_pstate_strict);
+
 static int check_est_cpu(unsigned int cpuid)
 {
     struct cpuinfo_x86 *cpu = &cpu_data[cpuid];
@@ -180,7 +183,7 @@ static void drv_read(struct drv_cmd *cmd
     ASSERT(cpus_weight(cmd->mask) == 1);
 
     /* to reduce IPI for the sake of performance */
-    if (cpu_isset(smp_processor_id(), cmd->mask))
+    if (likely(cpu_isset(smp_processor_id(), cmd->mask)))
         do_drv_read((void *)cmd);
     else
         on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1);
@@ -196,15 +199,16 @@ static u32 get_cur_val(cpumask_t mask)
     struct cpufreq_policy *policy;
     struct processor_performance *perf;
     struct drv_cmd cmd;
-    unsigned int cpu;
+    unsigned int cpu = smp_processor_id();
 
     if (unlikely(cpus_empty(mask)))
         return 0;
 
-    cpu = first_cpu(mask);
+    if (!cpu_isset(cpu, mask))
+        cpu = first_cpu(mask);
     policy = cpufreq_cpu_policy[cpu];
 
-    if (!policy)
+    if (cpu >= NR_CPUS || !policy || !drv_data[policy->cpu])
         return 0;    
 
     switch (drv_data[policy->cpu]->cpu_feature) {
@@ -214,7 +218,7 @@ static u32 get_cur_val(cpumask_t mask)
         break;
     case SYSTEM_IO_CAPABLE:
         cmd.type = SYSTEM_IO_CAPABLE;
-        perf = drv_data[first_cpu(mask)]->acpi_data;
+        perf = drv_data[policy->cpu]->acpi_data;
         cmd.addr.io.port = perf->control_register.address;
         cmd.addr.io.bit_width = perf->control_register.bit_width;
         break;
@@ -393,7 +397,7 @@ static int acpi_cpufreq_target(struct cp
 
     drv_write(&cmd);
 
-    if (!check_freqs(cmd.mask, freqs.new, data)) {
+    if (acpi_pstate_strict && !check_freqs(cmd.mask, freqs.new, data)) {
         printk(KERN_WARNING "Fail transfer to new freq %d\n", freqs.new);
         return -EAGAIN;
     }
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/acpi/suspend.c
--- a/xen/arch/x86/acpi/suspend.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/acpi/suspend.c       Fri Feb 13 11:22:28 2009 +0900
@@ -65,6 +65,9 @@ void restore_rest_processor_state(void)
     /* Reload FPU state on next FPU use. */
     stts();
 
+    if (cpu_has_pat)
+        wrmsrl(MSR_IA32_CR_PAT, host_pat);
+
     mtrr_ap_init();
     mcheck_init(&boot_cpu_data);
 }
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/Makefile
--- a/xen/arch/x86/boot/Makefile        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/boot/Makefile        Fri Feb 13 11:22:28 2009 +0900
@@ -1,1 +1,7 @@ obj-y += head.o
 obj-y += head.o
+
+head.o: reloc.S
+
+# NB. BOOT_TRAMPOLINE == 0x8c000
+%.S: %.c
+       RELOC=0x8c000 $(MAKE) -f build32.mk $@
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/build32.mk
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/boot/build32.mk      Fri Feb 13 11:22:28 2009 +0900
@@ -0,0 +1,24 @@
+XEN_ROOT=../../../..
+override XEN_TARGET_ARCH=x86_32
+CFLAGS =
+include $(XEN_ROOT)/Config.mk
+
+# Disable PIE/SSP if GCC supports them. They can break us.
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
+
+CFLAGS += -Werror -fno-builtin -msoft-float
+
+%.S: %.bin
+       (od -v -t x $< | head -n -1 | \
+       sed 's/ /,0x/g' | sed 's/^[0-9]*,/ .long /') >$@
+
+%.bin: %.lnk
+       $(OBJCOPY) -O binary $< $@
+
+%.lnk: %.o
+       $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x8c000 -o $@ $<
+
+%.o: %.c
+       $(CC) $(CFLAGS) -c $< -o $@
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/head.S
--- a/xen/arch/x86/boot/head.S  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/boot/head.S  Fri Feb 13 11:22:28 2009 +0900
@@ -79,8 +79,11 @@ __start:
         cmp     $0x2BADB002,%eax
         jne     not_multiboot
 
-        /* Save the Multiboot info structure for later use. */
-        mov     %ebx,sym_phys(multiboot_ptr)
+        /* Save the Multiboot info struct (after relocation) for later use. */
+        mov     $sym_phys(cpu0_stack)+1024,%esp
+        push    %ebx
+        call    reloc
+        mov     %eax,sym_phys(multiboot_ptr)
 
         /* Initialize BSS (no nasty surprises!) */
         mov     $sym_phys(__bss_start),%edi
@@ -192,6 +195,9 @@ 2:      cmp     $L1_PAGETABLE_ENTRIES,%e
 
 #include "cmdline.S"
 
+reloc:
+#include "reloc.S"
+
         .align 16
         .globl trampoline_start, trampoline_end
 trampoline_start:
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/reloc.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/boot/reloc.c Fri Feb 13 11:22:28 2009 +0900
@@ -0,0 +1,89 @@
+/******************************************************************************
+ * reloc.c
+ * 
+ * 32-bit flat memory-map routines for relocating Multiboot structures
+ * and modules. This is most easily done early with paging disabled.
+ * 
+ * Copyright (c) 2009, Citrix Systems, Inc.
+ * 
+ * Authors:
+ *    Keir Fraser <keir.fraser@xxxxxxxxxx>
+ */
+
+asm (
+    "    .text                         \n"
+    "    .globl _start                 \n"
+    "_start:                           \n"
+    "    mov  $_start,%edi             \n"
+    "    call 1f                       \n"
+    "1:  pop  %esi                     \n"
+    "    sub  $1b-_start,%esi          \n"
+    "    mov  $__bss_start-_start,%ecx \n"
+    "    rep  movsb                    \n"
+    "    xor  %eax,%eax                \n"
+    "    mov  $_end,%ecx               \n"
+    "    sub  %edi,%ecx                \n"
+    "    rep  stosb                    \n"
+    "    mov  $reloc,%eax              \n"
+    "    jmp  *%eax                    \n"
+    );
+
+typedef unsigned int u32;
+#include "../../../include/xen/multiboot.h"
+
+extern char _start[];
+
+static void *memcpy(void *dest, const void *src, unsigned int n)
+{
+    char *s = (char *)src, *d = dest;
+    while ( n-- )
+        *d++ = *s++;
+    return dest;
+}
+
+static void *reloc_mbi_struct(void *old, unsigned int bytes)
+{
+    static void *alloc = &_start;
+    alloc = (void *)(((unsigned long)alloc - bytes) & ~15ul);
+    return memcpy(alloc, old, bytes);
+}
+
+static char *reloc_mbi_string(char *old)
+{
+    char *p;
+    for ( p = old; *p != '\0'; p++ )
+        continue;
+    return reloc_mbi_struct(old, p - old + 1);
+}
+
+multiboot_info_t *reloc(multiboot_info_t *mbi_old)
+{
+    multiboot_info_t *mbi = reloc_mbi_struct(mbi_old, sizeof(*mbi));
+    int i;
+
+    if ( mbi->flags & MBI_CMDLINE )
+        mbi->cmdline = (u32)reloc_mbi_string((char *)mbi->cmdline);
+
+    if ( mbi->flags & MBI_MODULES )
+    {
+        module_t *mods = reloc_mbi_struct(
+            (module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t));
+        mbi->mods_addr = (u32)mods;
+        for ( i = 0; i < mbi->mods_count; i++ )
+            if ( mods[i].string )
+                mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string);
+    }
+
+    if ( mbi->flags & MBI_MEMMAP )
+        mbi->mmap_addr = (u32)reloc_mbi_struct(
+            (memory_map_t *)mbi->mmap_addr, mbi->mmap_length);
+
+    /* Mask features we don't understand or don't relocate. */
+    mbi->flags &= (MBI_MEMLIMITS |
+                   MBI_DRIVES |
+                   MBI_CMDLINE |
+                   MBI_MODULES |
+                   MBI_MEMMAP);
+
+    return mbi;
+}
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/amd_k8.c
--- a/xen/arch/x86/cpu/mcheck/amd_k8.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/amd_k8.c  Fri Feb 13 11:22:28 2009 +0900
@@ -99,6 +99,8 @@ void k8_machine_check(struct cpu_user_re
 
        mc_data = x86_mcinfo_getptr();
        cpu_nr = smp_processor_id();
+       BUG_ON(cpu_nr != vcpu->processor);
+
        curdom = vcpu->domain;
 
        memset(&mc_global, 0, sizeof(mc_global));
@@ -106,14 +108,12 @@ void k8_machine_check(struct cpu_user_re
        mc_global.common.size = sizeof(mc_global);
 
        mc_global.mc_domid = curdom->domain_id; /* impacted domain */
-       mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
-       BUG_ON(cpu_nr != vcpu->processor);
-       mc_global.mc_core_threadid = 0;
+
+       x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid,
+           &mc_global.mc_coreid, &mc_global.mc_core_threadid,
+           &mc_global.mc_apicid, NULL, NULL, NULL);
+
        mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
-#if 0 /* TODO: on which socket is this physical core?
-         It's not clear to me how to figure this out. */
-       mc_global.mc_socketid = ???;
-#endif
        mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE;
        rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
 
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/amd_nonfatal.c
--- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c    Fri Feb 13 11:22:28 2009 +0900
@@ -95,6 +95,7 @@ void mce_amd_checkregs(void *info)
        mc_data = NULL;
 
        cpu_nr = smp_processor_id();
+       BUG_ON(cpu_nr != vcpu->processor);
        event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA);
        error_found = 0;
 
@@ -103,14 +104,12 @@ void mce_amd_checkregs(void *info)
        mc_global.common.size = sizeof(mc_global);
 
        mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */
-       mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
-       BUG_ON(cpu_nr != vcpu->processor);
-       mc_global.mc_core_threadid = 0;
        mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
-#if 0 /* TODO: on which socket is this physical core?
-         It's not clear to me how to figure this out. */
-       mc_global.mc_socketid = ???;
-#endif
+
+       x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid,
+           &mc_global.mc_coreid, &mc_global.mc_core_threadid,
+           &mc_global.mc_apicid, NULL, NULL, NULL);
+
        mc_global.mc_flags |= MC_FLAG_CORRECTABLE;
        rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
 
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Fri Feb 13 11:22:28 2009 +0900
@@ -443,6 +443,96 @@ next:
 
 
 
+static void do_mc_get_cpu_info(void *v)
+{
+       int cpu = smp_processor_id();
+       int cindex, cpn;
+       struct cpuinfo_x86 *c;
+       xen_mc_logical_cpu_t *log_cpus, *xcp;
+       uint32_t junk, ebx;
+
+       log_cpus = v;
+       c = &cpu_data[cpu];
+       cindex = 0;
+       cpn = cpu - 1;
+
+       /*
+        * Deal with sparse masks, condensed into a contig array.
+        */
+       while (cpn >= 0) {
+               if (cpu_isset(cpn, cpu_online_map))
+                       cindex++;
+               cpn--;
+       }
+
+       xcp = &log_cpus[cindex];
+       c = &cpu_data[cpu];
+       xcp->mc_cpunr = cpu;
+       x86_mc_get_cpu_info(cpu, &xcp->mc_chipid,
+           &xcp->mc_coreid, &xcp->mc_threadid,
+           &xcp->mc_apicid, &xcp->mc_ncores,
+           &xcp->mc_ncores_active, &xcp->mc_nthreads);
+       xcp->mc_cpuid_level = c->cpuid_level;
+       xcp->mc_family = c->x86;
+       xcp->mc_vendor = c->x86_vendor;
+       xcp->mc_model = c->x86_model;
+       xcp->mc_step = c->x86_mask;
+       xcp->mc_cache_size = c->x86_cache_size;
+       xcp->mc_cache_alignment = c->x86_cache_alignment;
+       memcpy(xcp->mc_vendorid, c->x86_vendor_id, sizeof xcp->mc_vendorid);
+       memcpy(xcp->mc_brandid, c->x86_model_id, sizeof xcp->mc_brandid);
+       memcpy(xcp->mc_cpu_caps, c->x86_capability, sizeof xcp->mc_cpu_caps);
+
+       /*
+        * This part needs to run on the CPU itself.
+        */
+       xcp->mc_nmsrvals = __MC_NMSRS;
+       xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP;
+       rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value);
+
+       if (c->cpuid_level >= 1) {
+               cpuid(1, &junk, &ebx, &junk, &junk);
+               xcp->mc_clusterid = (ebx >> 24) & 0xff;
+       } else
+               xcp->mc_clusterid = hard_smp_processor_id();
+}
+
+
+void x86_mc_get_cpu_info(unsigned cpu, uint32_t *chipid, uint16_t *coreid,
+                        uint16_t *threadid, uint32_t *apicid,
+                        unsigned *ncores, unsigned *ncores_active,
+                        unsigned *nthreads)
+{
+       struct cpuinfo_x86 *c;
+
+       *apicid = cpu_physical_id(cpu);
+       c = &cpu_data[cpu];
+       if (c->apicid == BAD_APICID) {
+               *chipid = cpu;
+               *coreid = 0;
+               *threadid = 0;
+               if (ncores != NULL)
+                       *ncores = 1;
+               if (ncores_active != NULL)
+                       *ncores_active = 1;
+               if (nthreads != NULL)
+                       *nthreads = 1;
+       } else {
+               *chipid = phys_proc_id[cpu];
+               if (c->x86_max_cores > 1)
+                       *coreid = cpu_core_id[cpu];
+               else
+                       *coreid = 0;
+               *threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1);
+               if (ncores != NULL)
+                       *ncores = c->x86_max_cores;
+               if (ncores_active != NULL)
+                       *ncores_active = c->booted_cores;
+               if (nthreads != NULL)
+                       *nthreads = c->x86_num_siblings;
+       }
+}
+
 /* Machine Check Architecture Hypercall */
 long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc)
 {
@@ -452,6 +542,7 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
        struct domain *domU;
        struct xen_mc_fetch *mc_fetch;
        struct xen_mc_notifydomain *mc_notifydomain;
+       struct xen_mc_physcpuinfo *mc_physcpuinfo;
        struct mc_info *mi;
        uint32_t flags;
        uint32_t fetch_idx;
@@ -460,6 +551,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
         * a DomU to fetch mc data while Dom0 notifies another DomU. */
        static DEFINE_SPINLOCK(mc_lock);
        static DEFINE_SPINLOCK(mc_notify_lock);
+       int nlcpu;
+       xen_mc_logical_cpu_t *log_cpus = NULL;
 
        if ( copy_from_guest(op, u_xen_mc, 1) )
                return -EFAULT;
@@ -580,6 +673,43 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
 
                spin_unlock(&mc_notify_lock);
                break;
+
+       case XEN_MC_physcpuinfo:
+              if ( !IS_PRIV(v->domain) )
+                      return -EPERM;
+ 
+              mc_physcpuinfo = &op->u.mc_physcpuinfo;
+              nlcpu = num_online_cpus();
+ 
+              if (!guest_handle_is_null(mc_physcpuinfo->info)) {
+                      if (mc_physcpuinfo->ncpus <= 0)
+                              return -EINVAL;
+                      nlcpu = min(nlcpu, (int)mc_physcpuinfo->ncpus);
+                      log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu);
+                      if (log_cpus == NULL)
+                              return -ENOMEM;
+ 
+                      if (on_each_cpu(do_mc_get_cpu_info, log_cpus,
+                          1, 1) != 0) {
+                              xfree(log_cpus);
+                              return -EIO;
+                      }
+              }
+ 
+              mc_physcpuinfo->ncpus = nlcpu;
+ 
+              if (copy_to_guest(u_xen_mc, op, 1)) {
+                      if (log_cpus != NULL)
+                              xfree(log_cpus);
+                      return -EFAULT;
+              }
+ 
+              if (!guest_handle_is_null(mc_physcpuinfo->info)) {
+                      if (copy_to_guest(mc_physcpuinfo->info,
+                          log_cpus, nlcpu))
+                              ret = -EFAULT;
+                      xfree(log_cpus);
+              }
        }
 
        return ret;
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Fri Feb 13 11:22:28 2009 +0900
@@ -34,4 +34,5 @@ int x86_mcinfo_add(struct mc_info *mi, v
 int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
 void x86_mcinfo_dump(struct mc_info *mi);
 void mc_panic(char *s);
-
+void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *,
+                        uint32_t *, uint32_t *, uint32_t *, uint32_t *);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Fri Feb 13 11:22:28 2009 +0900
@@ -182,11 +182,9 @@ static struct mc_info *machine_check_pol
         mcg.mc_flags = MC_FLAG_POLLED;
     else if (calltype == MC_FLAG_CMCI)
         mcg.mc_flags = MC_FLAG_CMCI;
-    mcg.mc_socketid = phys_proc_id[cpu];
-    mcg.mc_coreid = cpu_core_id[cpu];
-    mcg.mc_apicid = cpu_physical_id(cpu);
-    mcg.mc_core_threadid =
-        mcg.mc_apicid & ( 1 << (cpu_data[cpu].x86_num_siblings - 1)); 
+    x86_mc_get_cpu_info(
+        cpu, &mcg.mc_socketid, &mcg.mc_coreid,
+        &mcg.mc_core_threadid, &mcg.mc_apicid, NULL, NULL, NULL);
     rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
 
     for ( i = 0; i < nr_mce_banks; i++ ) {
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/domain.c     Fri Feb 13 11:22:28 2009 +0900
@@ -141,7 +141,7 @@ void dump_pageframe_info(struct domain *
     }
     else
     {
-        list_for_each_entry ( page, &d->page_list, list )
+        page_list_for_each ( page, &d->page_list )
         {
             printk("    DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
                    _p(page_to_mfn(page)),
@@ -154,7 +154,7 @@ void dump_pageframe_info(struct domain *
         p2m_pod_dump_data(d);
     }
 
-    list_for_each_entry ( page, &d->xenpage_list, list )
+    page_list_for_each ( page, &d->xenpage_list )
     {
         printk("    XenPage %p: caf=%08lx, taf=%" PRtype_info "\n",
                _p(page_to_mfn(page)),
@@ -352,6 +352,8 @@ int vcpu_initialise(struct vcpu *v)
     v->arch.perdomain_ptes =
         d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
 
+    spin_lock_init(&v->arch.shadow_ldt_lock);
+
     return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
 }
 
@@ -380,7 +382,7 @@ int arch_domain_create(struct domain *d,
     INIT_LIST_HEAD(&d->arch.pdev_list);
 
     d->arch.relmem = RELMEM_not_started;
-    INIT_LIST_HEAD(&d->arch.relmem_list);
+    INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
 
     pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
     d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order, 0);
@@ -1655,9 +1657,8 @@ int hypercall_xlat_continuation(unsigned
 #endif
 
 static int relinquish_memory(
-    struct domain *d, struct list_head *list, unsigned long type)
-{
-    struct list_head *ent;
+    struct domain *d, struct page_list_head *list, unsigned long type)
+{
     struct page_info  *page;
     unsigned long     x, y;
     int               ret = 0;
@@ -1665,17 +1666,13 @@ static int relinquish_memory(
     /* Use a recursive lock, as we may enter 'free_domheap_page'. */
     spin_lock_recursive(&d->page_alloc_lock);
 
-    ent = list->next;
-    while ( ent != list )
-    {
-        page = list_entry(ent, struct page_info, list);
-
+    while ( (page = page_list_remove_head(list)) )
+    {
         /* Grab a reference to the page so it won't disappear from under us. */
         if ( unlikely(!get_page(page, d)) )
         {
             /* Couldn't get a reference -- someone is freeing this page. */
-            ent = ent->next;
-            list_move_tail(&page->list, &d->arch.relmem_list);
+            page_list_add_tail(page, &d->arch.relmem_list);
             continue;
         }
 
@@ -1687,6 +1684,7 @@ static int relinquish_memory(
             break;
         case -EAGAIN:
         case -EINTR:
+            page_list_add(page, list);
             set_bit(_PGT_pinned, &page->u.inuse.type_info);
             put_page(page);
             goto out;
@@ -1723,6 +1721,7 @@ static int relinquish_memory(
                 case 0:
                     break;
                 case -EINTR:
+                    page_list_add(page, list);
                     page->u.inuse.type_info |= PGT_validated;
                     if ( x & PGT_partial )
                         put_page(page);
@@ -1730,6 +1729,7 @@ static int relinquish_memory(
                     ret = -EAGAIN;
                     goto out;
                 case -EAGAIN:
+                    page_list_add(page, list);
                     page->u.inuse.type_info |= PGT_partial;
                     if ( x & PGT_partial )
                         put_page(page);
@@ -1746,9 +1746,8 @@ static int relinquish_memory(
             }
         }
 
-        /* Follow the list chain and /then/ potentially free the page. */
-        ent = ent->next;
-        list_move_tail(&page->list, &d->arch.relmem_list);
+        /* Put the page on the list and /then/ potentially free it. */
+        page_list_add_tail(page, &d->arch.relmem_list);
         put_page(page);
 
         if ( hypercall_preempt_check() )
@@ -1758,7 +1757,12 @@ static int relinquish_memory(
         }
     }
 
-    list_splice_init(&d->arch.relmem_list, list);
+    /* list is empty at this point. */
+    if ( !page_list_empty(&d->arch.relmem_list) )
+    {
+        *list = d->arch.relmem_list;
+        INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
+    }
 
  out:
     spin_unlock_recursive(&d->page_alloc_lock);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/domain_build.c       Fri Feb 13 11:22:28 2009 +0900
@@ -880,7 +880,7 @@ int __init construct_dom0(
     }
     si->first_p2m_pfn = pfn;
     si->nr_p2m_frames = d->tot_pages - count;
-    list_for_each_entry ( page, &d->page_list, list )
+    page_list_for_each ( page, &d->page_list )
     {
         mfn = page_to_mfn(page);
         if ( get_gpfn_from_mfn(mfn) >= count )
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/domctl.c     Fri Feb 13 11:22:28 2009 +0900
@@ -240,7 +240,7 @@ long arch_do_domctl(
         struct domain *d = rcu_lock_domain_by_id(domctl->domain);
         unsigned long max_pfns = domctl->u.getmemlist.max_pfns;
         uint64_t mfn;
-        struct list_head *list_ent;
+        struct page_info *page;
 
         ret = -EINVAL;
         if ( d != NULL )
@@ -259,19 +259,19 @@ long arch_do_domctl(
                 goto getmemlist_out;
             }
 
-            ret = 0;
-            list_ent = d->page_list.next;
-            for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
+            ret = i = 0;
+            page_list_for_each(page, &d->page_list)
             {
-                mfn = page_to_mfn(list_entry(
-                    list_ent, struct page_info, list));
+                if ( i >= max_pfns )
+                    break;
+                mfn = page_to_mfn(page);
                 if ( copy_to_guest_offset(domctl->u.getmemlist.buffer,
                                           i, &mfn, 1) )
                 {
                     ret = -EFAULT;
                     break;
                 }
-                list_ent = mfn_to_page(mfn)->list.next;
+                ++i;
             }
             
             spin_unlock(&d->page_alloc_lock);
@@ -416,6 +416,34 @@ long arch_do_domctl(
         rcu_unlock_domain(d);
     }
     break;
+
+    case XEN_DOMCTL_gethvmcontext_partial:
+    { 
+        struct domain *d;
+
+        ret = -ESRCH;
+        if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
+            break;
+
+        ret = xsm_hvmcontext(d, domctl->cmd);
+        if ( ret )
+            goto gethvmcontext_partial_out;
+
+        ret = -EINVAL;
+        if ( !is_hvm_domain(d) ) 
+            goto gethvmcontext_partial_out;
+
+        domain_pause(d);
+        ret = hvm_save_one(d, domctl->u.hvmcontext_partial.type,
+                           domctl->u.hvmcontext_partial.instance,
+                           domctl->u.hvmcontext_partial.buffer);
+        domain_unpause(d);
+
+    gethvmcontext_partial_out:
+        rcu_unlock_domain(d);
+    }
+    break;
+
 
     case XEN_DOMCTL_set_address_size:
     {
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/e820.c
--- a/xen/arch/x86/e820.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/e820.c       Fri Feb 13 11:22:28 2009 +0900
@@ -1,10 +1,10 @@
 #include <xen/config.h>
 #include <xen/init.h>
 #include <xen/lib.h>
+#include <xen/mm.h>
 #include <xen/compat.h>
 #include <xen/dmi.h>
 #include <asm/e820.h>
-#include <asm/mm.h>
 #include <asm/page.h>
 
 /* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/i8259.c      Fri Feb 13 11:22:28 2009 +0900
@@ -410,8 +410,8 @@ void __init init_IRQ(void)
     }
 
     /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
-    vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN;
-    vector_irq[0x80] = NEVER_ASSIGN;
+    vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN_IRQ;
+    vector_irq[0x80] = NEVER_ASSIGN_IRQ;
 
     apic_intr_init();
 
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/io_apic.c    Fri Feb 13 11:22:28 2009 +0900
@@ -49,7 +49,6 @@ static struct { int pin, apic; } ioapic_
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
 
 int skip_ioapic_setup;
 
@@ -88,9 +87,6 @@ static struct irq_pin_list {
     [0 ... PIN_MAP_SIZE-1].pin = -1
 };
 static int irq_2_pin_free_entry = NR_IRQS;
-
-int vector_irq[NR_VECTORS] __read_mostly = {
-    [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN};
 
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -668,56 +664,6 @@ static inline int IO_APIC_irq_trigger(in
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
 u8 irq_vector[NR_IRQS] __read_mostly;
-
-int free_irq_vector(int vector)
-{
-    int irq;
-
-    BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR));
-
-    spin_lock(&vector_lock);
-    if ((irq = vector_irq[vector]) == AUTO_ASSIGN)
-        vector_irq[vector] = FREE_TO_ASSIGN;
-    spin_unlock(&vector_lock);
-
-    return (irq == AUTO_ASSIGN) ? 0 : -EINVAL;
-}
-
-int assign_irq_vector(int irq)
-{
-    static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
-    unsigned vector;
-
-    BUG_ON(irq >= NR_IRQS);
-
-    spin_lock(&vector_lock);
-
-    if ((irq != AUTO_ASSIGN) && (IO_APIC_VECTOR(irq) > 0)) {
-        spin_unlock(&vector_lock);
-        return IO_APIC_VECTOR(irq);
-    }
-
-    vector = current_vector;
-    while (vector_irq[vector] != FREE_TO_ASSIGN) {
-        vector += 8;
-        if (vector > LAST_DYNAMIC_VECTOR)
-            vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7);
-
-        if (vector == current_vector) {
-            spin_unlock(&vector_lock);
-            return -ENOSPC;
-        }
-    }
-
-    current_vector = vector;
-    vector_irq[vector] = irq;
-    if (irq != AUTO_ASSIGN)
-        IO_APIC_VECTOR(irq) = vector;
-
-    spin_unlock(&vector_lock);
-
-    return vector;
-}
 
 static struct hw_interrupt_type ioapic_level_type;
 static struct hw_interrupt_type ioapic_edge_type;
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/irq.c        Fri Feb 13 11:22:28 2009 +0900
@@ -27,6 +27,11 @@ boolean_param("noirqbalance", opt_noirqb
 
 irq_desc_t irq_desc[NR_VECTORS];
 
+static DEFINE_SPINLOCK(vector_lock);
+int vector_irq[NR_VECTORS] __read_mostly = {
+    [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ
+};
+
 static void __do_IRQ_guest(int vector);
 
 void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
@@ -54,6 +59,56 @@ struct hw_interrupt_type no_irq_type = {
 
 atomic_t irq_err_count;
 
+int free_irq_vector(int vector)
+{
+    int irq;
+
+    BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR));
+
+    spin_lock(&vector_lock);
+    if ((irq = vector_irq[vector]) == AUTO_ASSIGN_IRQ)
+        vector_irq[vector] = FREE_TO_ASSIGN_IRQ;
+    spin_unlock(&vector_lock);
+
+    return (irq == AUTO_ASSIGN_IRQ) ? 0 : -EINVAL;
+}
+
+int assign_irq_vector(int irq)
+{
+    static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
+    unsigned vector;
+
+    BUG_ON(irq >= NR_IRQS);
+
+    spin_lock(&vector_lock);
+
+    if ((irq != AUTO_ASSIGN_IRQ) && (IO_APIC_VECTOR(irq) > 0)) {
+        spin_unlock(&vector_lock);
+        return IO_APIC_VECTOR(irq);
+    }
+
+    vector = current_vector;
+    while (vector_irq[vector] != FREE_TO_ASSIGN_IRQ) {
+        vector += 8;
+        if (vector > LAST_DYNAMIC_VECTOR)
+            vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7);
+
+        if (vector == current_vector) {
+            spin_unlock(&vector_lock);
+            return -ENOSPC;
+        }
+    }
+
+    current_vector = vector;
+    vector_irq[vector] = irq;
+    if (irq != AUTO_ASSIGN_IRQ)
+        IO_APIC_VECTOR(irq) = vector;
+
+    spin_unlock(&vector_lock);
+
+    return vector;
+}
+
 asmlinkage void do_IRQ(struct cpu_user_regs *regs)
 {
     unsigned int      vector = regs->entry_vector;
@@ -104,7 +159,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
     spin_unlock(&desc->lock);
 }
 
-int request_irq(unsigned int irq,
+int request_irq_vector(unsigned int vector,
         void (*handler)(int, void *, struct cpu_user_regs *),
         unsigned long irqflags, const char * devname, void *dev_id)
 {
@@ -117,7 +172,7 @@ int request_irq(unsigned int irq,
      * which interrupt is which (messes up the interrupt freeing
      * logic etc).
      */
-    if (irq >= NR_IRQS)
+    if (vector >= NR_VECTORS)
         return -EINVAL;
     if (!handler)
         return -EINVAL;
@@ -130,34 +185,32 @@ int request_irq(unsigned int irq,
     action->name = devname;
     action->dev_id = dev_id;
 
-    retval = setup_irq(irq, action);
+    retval = setup_irq_vector(vector, action);
     if (retval)
         xfree(action);
 
     return retval;
 }
 
-void free_irq(unsigned int irq)
-{
-    unsigned int  vector = irq_to_vector(irq);
-    irq_desc_t   *desc = &irq_desc[vector];
+void release_irq_vector(unsigned int vector)
+{
+    irq_desc_t *desc = &irq_desc[vector];
     unsigned long flags;
 
     spin_lock_irqsave(&desc->lock,flags);
     desc->action  = NULL;
     desc->depth   = 1;
     desc->status |= IRQ_DISABLED;
-    desc->handler->shutdown(irq);
+    desc->handler->shutdown(vector);
     spin_unlock_irqrestore(&desc->lock,flags);
 
     /* Wait to make sure it's not being used on another CPU */
     do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
 }
 
-int setup_irq(unsigned int irq, struct irqaction *new)
-{
-    unsigned int  vector = irq_to_vector(irq);
-    irq_desc_t   *desc = &irq_desc[vector];
+int setup_irq_vector(unsigned int vector, struct irqaction *new)
+{
+    irq_desc_t *desc = &irq_desc[vector];
     unsigned long flags;
  
     spin_lock_irqsave(&desc->lock,flags);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm.c Fri Feb 13 11:22:28 2009 +0900
@@ -179,12 +179,6 @@ l2_pgentry_t *compat_idle_pg_table_l2 = 
 #define l3_disallow_mask(d) L3_DISALLOW_MASK
 #endif
 
-static void queue_deferred_ops(struct domain *d, unsigned int ops)
-{
-    ASSERT(d == current->domain);
-    this_cpu(percpu_mm_info).deferred_ops |= ops;
-}
-
 void __init init_frametable(void)
 {
     unsigned long nr_pages, page_step, i, mfn;
@@ -333,7 +327,7 @@ void share_xen_page_with_guest(
         page->count_info |= PGC_allocated | 1;
         if ( unlikely(d->xenheap_pages++ == 0) )
             get_knownalive_domain(d);
-        list_add_tail(&page->list, &d->xenpage_list);
+        page_list_add_tail(page, &d->xenpage_list);
     }
 
     spin_unlock(&d->page_alloc_lock);
@@ -464,14 +458,18 @@ void update_cr3(struct vcpu *v)
 }
 
 
-static void invalidate_shadow_ldt(struct vcpu *v)
+static void invalidate_shadow_ldt(struct vcpu *v, int flush)
 {
     int i;
     unsigned long pfn;
     struct page_info *page;
-    
+
+    BUG_ON(unlikely(in_irq()));
+
+    spin_lock(&v->arch.shadow_ldt_lock);
+
     if ( v->arch.shadow_ldt_mapcnt == 0 )
-        return;
+        goto out;
 
     v->arch.shadow_ldt_mapcnt = 0;
 
@@ -486,11 +484,12 @@ static void invalidate_shadow_ldt(struct
         put_page_and_type(page);
     }
 
-    /* Dispose of the (now possibly invalid) mappings from the TLB.  */
-    if ( v == current )
-        queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
-    else
-        flush_tlb_mask(v->domain->domain_dirty_cpumask);
+    /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */
+    if ( flush )
+        flush_tlb_mask(v->vcpu_dirty_cpumask);
+
+ out:
+    spin_unlock(&v->arch.shadow_ldt_lock);
 }
 
 
@@ -541,8 +540,10 @@ int map_ldt_shadow_page(unsigned int off
 
     nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
 
+    spin_lock(&v->arch.shadow_ldt_lock);
     l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
     v->arch.shadow_ldt_mapcnt++;
+    spin_unlock(&v->arch.shadow_ldt_lock);
 
     return 1;
 }
@@ -989,7 +990,7 @@ void put_page_from_l1e(l1_pgentry_t l1e,
              (d == e) )
         {
             for_each_vcpu ( d, v )
-                invalidate_shadow_ldt(v);
+                invalidate_shadow_ldt(v, 1);
         }
         put_page(page);
     }
@@ -2023,30 +2024,17 @@ int free_page_type(struct page_info *pag
     unsigned long gmfn;
     int rc;
 
-    if ( likely(owner != NULL) )
-    {
-        /*
-         * We have to flush before the next use of the linear mapping
-         * (e.g., update_va_mapping()) or we could end up modifying a page
-         * that is no longer a page table (and hence screw up ref counts).
-         */
-        if ( current->domain == owner )
-            queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS);
-        else
-            flush_tlb_mask(owner->domain_dirty_cpumask);
-
-        if ( unlikely(paging_mode_enabled(owner)) )
-        {
-            /* A page table is dirtied when its type count becomes zero. */
-            paging_mark_dirty(owner, page_to_mfn(page));
-
-            if ( shadow_mode_refcounts(owner) )
-                return 0;
-
-            gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
-            ASSERT(VALID_M2P(gmfn));
-            shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
-        }
+    if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) )
+    {
+        /* A page table is dirtied when its type count becomes zero. */
+        paging_mark_dirty(owner, page_to_mfn(page));
+
+        if ( shadow_mode_refcounts(owner) )
+            return 0;
+
+        gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
+        ASSERT(VALID_M2P(gmfn));
+        shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
     }
 
     if ( !(type & PGT_partial) )
@@ -2366,8 +2354,8 @@ void cleanup_page_cacheattr(struct page_
 
 int new_guest_cr3(unsigned long mfn)
 {
-    struct vcpu *v = current;
-    struct domain *d = v->domain;
+    struct vcpu *curr = current;
+    struct domain *d = curr->domain;
     int okay;
     unsigned long old_base_mfn;
 
@@ -2377,19 +2365,19 @@ int new_guest_cr3(unsigned long mfn)
         okay = paging_mode_refcounts(d)
             ? 0 /* Old code was broken, but what should it be? */
             : mod_l4_entry(
-                    __va(pagetable_get_paddr(v->arch.guest_table)),
+                    __va(pagetable_get_paddr(curr->arch.guest_table)),
                     l4e_from_pfn(
                         mfn,
                         (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
-                    pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0;
+                    pagetable_get_pfn(curr->arch.guest_table), 0, 0) == 0;
         if ( unlikely(!okay) )
         {
             MEM_LOG("Error while installing new compat baseptr %lx", mfn);
             return 0;
         }
 
-        invalidate_shadow_ldt(v);
-        write_ptbase(v);
+        invalidate_shadow_ldt(curr, 0);
+        write_ptbase(curr);
 
         return 1;
     }
@@ -2403,14 +2391,14 @@ int new_guest_cr3(unsigned long mfn)
         return 0;
     }
 
-    invalidate_shadow_ldt(v);
-
-    old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-
-    v->arch.guest_table = pagetable_from_pfn(mfn);
-    update_cr3(v);
-
-    write_ptbase(v);
+    invalidate_shadow_ldt(curr, 0);
+
+    old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
+
+    curr->arch.guest_table = pagetable_from_pfn(mfn);
+    update_cr3(curr);
+
+    write_ptbase(curr);
 
     if ( likely(old_base_mfn != 0) )
     {
@@ -2440,6 +2428,10 @@ static void process_deferred_ops(void)
             flush_tlb_local();
     }
 
+    /*
+     * Do this after flushing TLBs, to ensure we see fresh LDT mappings
+     * via the linear pagetable mapping.
+     */
     if ( deferred_ops & DOP_RELOAD_LDT )
         (void)map_ldt_shadow_page(0);
 
@@ -2565,8 +2557,8 @@ int do_mmuext_op(
     unsigned long mfn = 0, gmfn = 0, type;
     unsigned int done = 0;
     struct page_info *page;
-    struct vcpu *v = current;
-    struct domain *d = v->domain;
+    struct vcpu *curr = current;
+    struct domain *d = curr->domain;
 
     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
     {
@@ -2729,8 +2721,8 @@ int do_mmuext_op(
                 }
             }
 
-            old_mfn = pagetable_get_pfn(v->arch.guest_table_user);
-            v->arch.guest_table_user = pagetable_from_pfn(mfn);
+            old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
+            curr->arch.guest_table_user = pagetable_from_pfn(mfn);
 
             if ( old_mfn != 0 )
             {
@@ -2750,7 +2742,7 @@ int do_mmuext_op(
     
         case MMUEXT_INVLPG_LOCAL:
             if ( !paging_mode_enabled(d) 
-                 || paging_invlpg(v, op.arg1.linear_addr) != 0 )
+                 || paging_invlpg(curr, op.arg1.linear_addr) != 0 )
                 flush_tlb_one_local(op.arg1.linear_addr);
             break;
 
@@ -2773,7 +2765,7 @@ int do_mmuext_op(
         }
 
         case MMUEXT_TLB_FLUSH_ALL:
-            flush_tlb_mask(d->domain_dirty_cpumask);
+            this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
             break;
     
         case MMUEXT_INVLPG_ALL:
@@ -2809,13 +2801,14 @@ int do_mmuext_op(
                 okay = 0;
                 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
             }
-            else if ( (v->arch.guest_context.ldt_ents != ents) || 
-                      (v->arch.guest_context.ldt_base != ptr) )
+            else if ( (curr->arch.guest_context.ldt_ents != ents) || 
+                      (curr->arch.guest_context.ldt_base != ptr) )
             {
-                invalidate_shadow_ldt(v);
-                v->arch.guest_context.ldt_base = ptr;
-                v->arch.guest_context.ldt_ents = ents;
-                load_LDT(v);
+                invalidate_shadow_ldt(curr, 0);
+                this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
+                curr->arch.guest_context.ldt_base = ptr;
+                curr->arch.guest_context.ldt_ents = ents;
+                load_LDT(curr);
                 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT;
                 if ( ents != 0 )
                     this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT;
@@ -2931,8 +2924,7 @@ int do_mmu_update(
     struct page_info *page;
     int rc = 0, okay = 1, i = 0;
     unsigned int cmd, done = 0;
-    struct vcpu *v = current;
-    struct domain *d = v->domain;
+    struct domain *d = current->domain;
     struct domain_mmap_cache mapcache;
 
     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
@@ -3042,7 +3034,8 @@ int do_mmu_update(
 #endif
                 case PGT_writable_page:
                     perfc_incr(writable_mmu_updates);
-                    okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
+                    okay = paging_write_guest_entry(
+                        current, va, req.val, _mfn(mfn));
                     break;
                 }
                 page_unlock(page);
@@ -3052,7 +3045,8 @@ int do_mmu_update(
             else if ( get_page_type(page, PGT_writable_page) )
             {
                 perfc_incr(writable_mmu_updates);
-                okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
+                okay = paging_write_guest_entry(
+                    current, va, req.val, _mfn(mfn));
                 put_page_type(page);
             }
 
@@ -3508,7 +3502,7 @@ int steal_page(
     /* Unlink from original owner. */
     if ( !(memflags & MEMF_no_refcount) )
         d->tot_pages--;
-    list_del(&page->list);
+    page_list_del(page, &d->page_list);
 
     spin_unlock(&d->page_alloc_lock);
     return 0;
@@ -3567,34 +3561,40 @@ int do_update_va_mapping(unsigned long v
     if ( pl1e )
         guest_unmap_l1e(v, pl1e);
 
-    process_deferred_ops();
-
     switch ( flags & UVMF_FLUSHTYPE_MASK )
     {
     case UVMF_TLB_FLUSH:
         switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
         {
         case UVMF_LOCAL:
-            flush_tlb_local();
+            this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
             break;
         case UVMF_ALL:
-            flush_tlb_mask(d->domain_dirty_cpumask);
+            this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
             break;
         default:
+            if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS )
+                break;
             if ( unlikely(!is_pv_32on64_domain(d) ?
                           get_user(vmask, (unsigned long *)bmap_ptr) :
                           get_user(vmask, (unsigned int *)bmap_ptr)) )
-                rc = -EFAULT;
+                rc = -EFAULT, vmask = 0;
             pmask = vcpumask_to_pcpumask(d, vmask);
+            if ( cpu_isset(smp_processor_id(), pmask) )
+                this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
             flush_tlb_mask(pmask);
             break;
         }
         break;
 
     case UVMF_INVLPG:
+        if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS )
+            break;
         switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
         {
         case UVMF_LOCAL:
+            if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB )
+                break;
             if ( !paging_mode_enabled(d) ||
                  (paging_invlpg(v, va) != 0) ) 
                 flush_tlb_one_local(va);
@@ -3606,13 +3606,17 @@ int do_update_va_mapping(unsigned long v
             if ( unlikely(!is_pv_32on64_domain(d) ?
                           get_user(vmask, (unsigned long *)bmap_ptr) :
                           get_user(vmask, (unsigned int *)bmap_ptr)) )
-                rc = -EFAULT;
+                rc = -EFAULT, vmask = 0;
             pmask = vcpumask_to_pcpumask(d, vmask);
+            if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB )
+                cpu_clear(smp_processor_id(), pmask);
             flush_tlb_one_mask(pmask, va);
             break;
         }
         break;
     }
+
+    process_deferred_ops();
 
     return rc;
 }
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/hap/hap.c Fri Feb 13 11:22:28 2009 +0900
@@ -45,11 +45,11 @@
 
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
 #undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
 #undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
@@ -96,11 +96,10 @@ static struct page_info *hap_alloc(struc
 
     ASSERT(hap_locked_by_me(d));
 
-    if ( unlikely(list_empty(&d->arch.paging.hap.freelist)) )
+    pg = page_list_remove_head(&d->arch.paging.hap.freelist);
+    if ( unlikely(!pg) )
         return NULL;
 
-    pg = list_entry(d->arch.paging.hap.freelist.next, struct page_info, list);
-    list_del(&pg->list);
     d->arch.paging.hap.free_pages--;
 
     p = hap_map_domain_page(page_to_mfn(pg));
@@ -118,7 +117,7 @@ static void hap_free(struct domain *d, m
     ASSERT(hap_locked_by_me(d));
 
     d->arch.paging.hap.free_pages++;
-    list_add_tail(&pg->list, &d->arch.paging.hap.freelist);
+    page_list_add_tail(pg, &d->arch.paging.hap.freelist);
 }
 
 static struct page_info *hap_alloc_p2m_page(struct domain *d)
@@ -210,15 +209,13 @@ hap_set_allocation(struct domain *d, uns
             }
             d->arch.paging.hap.free_pages++;
             d->arch.paging.hap.total_pages++;
-            list_add_tail(&pg->list, &d->arch.paging.hap.freelist);
+            page_list_add_tail(pg, &d->arch.paging.hap.freelist);
         }
         else if ( d->arch.paging.hap.total_pages > pages )
         {
             /* Need to return memory to domheap */
-            ASSERT(!list_empty(&d->arch.paging.hap.freelist));
-            pg = list_entry(d->arch.paging.hap.freelist.next,
-                            struct page_info, list);
-            list_del(&pg->list);
+            pg = page_list_remove_head(&d->arch.paging.hap.freelist);
+            ASSERT(pg);
             d->arch.paging.hap.free_pages--;
             d->arch.paging.hap.total_pages--;
             pg->count_info = 0;
@@ -393,7 +390,7 @@ void hap_domain_init(struct domain *d)
 void hap_domain_init(struct domain *d)
 {
     hap_lock_init(d);
-    INIT_LIST_HEAD(&d->arch.paging.hap.freelist);
+    INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
 
     /* This domain will use HAP for log-dirty mode */
     paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty,
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Fri Feb 13 11:22:28 2009 +0900
@@ -63,7 +63,7 @@ static int ept_set_middle_entry(struct d
 
     pg->count_info = 1;
     pg->u.inuse.type_info = 1 | PGT_validated;
-    list_add_tail(&pg->list, &d->arch.p2m->pages);
+    page_list_add_tail(pg, &d->arch.p2m->pages);
 
     ept_entry->emt = 0;
     ept_entry->igmt = 0;
@@ -116,12 +116,12 @@ static int ept_next_level(struct domain 
 }
 
 /*
- * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself,
+ * ept_set_entry() computes 'need_modify_vtd_table' for itself,
  * by observing whether any gfn->mfn translations are modified.
  */
 static int
-_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
-              unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table)
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
+              unsigned int order, p2m_type_t p2mt)
 {
     ept_entry_t *table = NULL;
     unsigned long gfn_remainder = gfn, offset = 0;
@@ -131,6 +131,7 @@ _ept_set_entry(struct domain *d, unsigne
     int walk_level = order / EPT_TABLE_ORDER;
     int direct_mmio = (p2mt == p2m_mmio_direct);
     uint8_t igmt = 0;
+    int need_modify_vtd_table = 1;
 
     /* we only support 4k and 2m pages now */
 
@@ -171,14 +172,23 @@ _ept_set_entry(struct domain *d, unsigne
 
             if ( ret == GUEST_TABLE_SUPER_PAGE )
             {
-                ept_entry->mfn = mfn_x(mfn) - offset;
+                if ( ept_entry->mfn == (mfn_x(mfn) - offset) )
+                    need_modify_vtd_table = 0;  
+                else                  
+                    ept_entry->mfn = mfn_x(mfn) - offset;
+
                 if ( ept_entry->avail1 == p2m_ram_logdirty &&
                   p2mt == p2m_ram_rw )
                     for ( i = 0; i < 512; i++ )
                         paging_mark_dirty(d, mfn_x(mfn)-offset+i);
             }
             else
-                ept_entry->mfn = mfn_x(mfn);
+            {
+                if ( ept_entry->mfn == mfn_x(mfn) )
+                    need_modify_vtd_table = 0;
+                else
+                    ept_entry->mfn = mfn_x(mfn);
+            }
 
             ept_entry->avail1 = p2mt;
             ept_entry->rsvd = 0;
@@ -239,7 +249,10 @@ _ept_set_entry(struct domain *d, unsigne
                                                 &igmt, direct_mmio);
         split_ept_entry->igmt = igmt;
 
-        split_ept_entry->mfn = mfn_x(mfn);
+        if ( split_ept_entry->mfn == mfn_x(mfn) )
+            need_modify_vtd_table = 0;
+        else
+            split_ept_entry->mfn = mfn_x(mfn);
         split_ept_entry->avail1 = p2mt;
         ept_p2m_type_to_flags(split_ept_entry, p2mt);
 
@@ -287,17 +300,6 @@ out:
     }
 
     return rv;
-}
-
-static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
-              unsigned int order, p2m_type_t p2mt)
-{
-    /* ept_set_entry() are called from set_entry(),
-     * We should always create VT-d page table acording 
-     * to the gfn to mfn translations changes.
-     */
-    return _ept_set_entry(d, gfn, mfn, order, p2mt, 1); 
 }
 
 /* Read ept p2m entries */
@@ -393,6 +395,21 @@ static mfn_t ept_get_entry_current(unsig
     return ept_get_entry(current->domain, gfn, t, q);
 }
 
+/* To test if the new emt type is the same with old,
+ * return 1 to not to reset ept entry.
+ */
+static int need_modify_ept_entry(struct domain *d, unsigned long gfn,
+                                    unsigned long mfn, uint8_t o_igmt,
+                                    uint8_t o_emt, p2m_type_t p2mt)
+{
+    uint8_t igmt, emt;
+    emt = epte_get_entry_emt(d, gfn, mfn, &igmt, 
+                                (p2mt == p2m_mmio_direct));
+    if ( (emt == o_emt) && (igmt == o_igmt) )
+        return 0;
+    return 1; 
+}
+
 void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn,
                  unsigned long end_gfn)
 {
@@ -401,6 +418,7 @@ void ept_change_entry_emt_with_range(str
     uint64_t epte;
     int order = 0;
     unsigned long mfn;
+    uint8_t o_igmt, o_emt;
 
     for ( gfn = start_gfn; gfn <= end_gfn; gfn++ )
     {
@@ -410,7 +428,9 @@ void ept_change_entry_emt_with_range(str
         mfn = (epte & EPTE_MFN_MASK) >> PAGE_SHIFT;
         if ( !mfn_valid(mfn) )
             continue;
-        p2mt = (epte & EPTE_AVAIL1_MASK) >> 8;
+        p2mt = (epte & EPTE_AVAIL1_MASK) >> EPTE_AVAIL1_SHIFT;
+        o_igmt = (epte & EPTE_IGMT_MASK) >> EPTE_IGMT_SHIFT;
+        o_emt = (epte & EPTE_EMT_MASK) >> EPTE_EMT_SHIFT;
         order = 0;
 
         if ( epte & EPTE_SUPER_PAGE_MASK )
@@ -422,30 +442,26 @@ void ept_change_entry_emt_with_range(str
                  * Set emt for super page.
                  */
                 order = EPT_TABLE_ORDER;
-                /* vmx_set_uc_mode() dont' touch the gfn to mfn
-                 * translations, only modify the emt field of the EPT entries.
-                 * so we need not modify the current VT-d page tables.
-                 */
-                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
+                if ( need_modify_ept_entry(d, gfn, mfn, 
+                                            o_igmt, o_emt, p2mt) )
+                    ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
                 gfn += 0x1FF;
             }
             else
             {
-                /* 1)change emt for partial entries of the 2m area.
-                 * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
-                 * translations, only modify the emt field of the EPT entries.
-                 * so we need not modify the current VT-d page tables.
-                 */
-                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0);
+                /* change emt for partial entries of the 2m area. */
+                if ( need_modify_ept_entry(d, gfn, mfn, 
+                                            o_igmt, o_emt, p2mt) )
+                    ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
                 gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF;
             }
         }
-        else /* 1)gfn assigned with 4k
-              * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
-              * translations, only modify the emt field of the EPT entries.
-              * so we need not modify the current VT-d page tables.
-             */
-            _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
+        else /* gfn assigned with 4k */
+        {
+            if ( need_modify_ept_entry(d, gfn, mfn, 
+                                            o_igmt, o_emt, p2mt) )
+                ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+        }
     }
 }
 
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/p2m.c     Fri Feb 13 11:22:28 2009 +0900
@@ -89,11 +89,11 @@
 
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
 #undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
 #undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 
 /* PTE flags for the various types of p2m entry */
@@ -175,7 +175,7 @@ p2m_next_level(struct domain *d, mfn_t *
         struct page_info *pg = d->arch.p2m->alloc_page(d);
         if ( pg == NULL )
             return 0;
-        list_add_tail(&pg->list, &d->arch.p2m->pages);
+        page_list_add_tail(pg, &d->arch.p2m->pages);
         pg->u.inuse.type_info = type | 1 | PGT_validated;
         pg->count_info = 1;
 
@@ -214,7 +214,7 @@ p2m_next_level(struct domain *d, mfn_t *
         struct page_info *pg = d->arch.p2m->alloc_page(d);
         if ( pg == NULL )
             return 0;
-        list_add_tail(&pg->list, &d->arch.p2m->pages);
+        page_list_add_tail(pg, &d->arch.p2m->pages);
         pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
         pg->count_info = 1;
         
@@ -300,18 +300,18 @@ p2m_pod_cache_add(struct domain *d,
     for(i=0; i < 1 << order ; i++)
     {
         p = page + i;
-        list_del(&p->list);
+        page_list_del(p, &d->page_list);
     }
 
     /* Then add the first one to the appropriate populate-on-demand list */
     switch(order)
     {
     case 9:
-        list_add_tail(&page->list, &p2md->pod.super); /* lock: page_alloc */
+        page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */
         p2md->pod.count += 1 << order;
         break;
     case 0:
-        list_add_tail(&page->list, &p2md->pod.single); /* lock: page_alloc */
+        page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */
         p2md->pod.count += 1 ;
         break;
     default:
@@ -334,54 +334,51 @@ static struct page_info * p2m_pod_cache_
     struct page_info *p = NULL;
     int i;
 
-    if ( order == 9 && list_empty(&p2md->pod.super) )
+    if ( order == 9 && page_list_empty(&p2md->pod.super) )
     {
         return NULL;
     }
-    else if ( order == 0 && list_empty(&p2md->pod.single) )
+    else if ( order == 0 && page_list_empty(&p2md->pod.single) )
     {
         unsigned long mfn;
         struct page_info *q;
 
-        BUG_ON( list_empty(&p2md->pod.super) );
+        BUG_ON( page_list_empty(&p2md->pod.super) );
 
         /* Break up a superpage to make single pages. NB count doesn't
          * need to be adjusted. */
         printk("%s: Breaking up superpage.\n", __func__);
-        p = list_entry(p2md->pod.super.next, struct page_info, list);
-        list_del(&p->list);
+        p = page_list_remove_head(&p2md->pod.super);
         mfn = mfn_x(page_to_mfn(p));
 
         for ( i=0; i<(1<<9); i++ )
         {
             q = mfn_to_page(_mfn(mfn+i));
-            list_add_tail(&q->list, &p2md->pod.single);
+            page_list_add_tail(q, &p2md->pod.single);
         }
     }
 
     switch ( order )
     {
     case 9:
-        BUG_ON( list_empty(&p2md->pod.super) );
-        p = list_entry(p2md->pod.super.next, struct page_info, list); 
+        BUG_ON( page_list_empty(&p2md->pod.super) );
+        p = page_list_remove_head(&p2md->pod.super);
         p2md->pod.count -= 1 << order; /* Lock: page_alloc */
         break;
     case 0:
-        BUG_ON( list_empty(&p2md->pod.single) );
-        p = list_entry(p2md->pod.single.next, struct page_info, list);
+        BUG_ON( page_list_empty(&p2md->pod.single) );
+        p = page_list_remove_head(&p2md->pod.single);
         p2md->pod.count -= 1;
         break;
     default:
         BUG();
     }
 
-    list_del(&p->list);
-
     /* Put the pages back on the domain page_list */
     for ( i = 0 ; i < (1 << order) ; i++ )
     {
         BUG_ON(page_get_owner(p + i) != d);
-        list_add_tail(&p[i].list, &d->page_list);
+        page_list_add_tail(p + i, &d->page_list);
     }
 
     return p;
@@ -425,7 +422,7 @@ p2m_pod_set_cache_target(struct domain *
         spin_lock(&d->page_alloc_lock);
 
         if ( (p2md->pod.count - pod_target) > (1>>9)
-             && !list_empty(&p2md->pod.super) )
+             && !page_list_empty(&p2md->pod.super) )
             order = 9;
         else
             order = 0;
@@ -535,38 +532,27 @@ p2m_pod_empty_cache(struct domain *d)
 p2m_pod_empty_cache(struct domain *d)
 {
     struct p2m_domain *p2md = d->arch.p2m;
-    struct list_head *q, *p;
+    struct page_info *page;
 
     spin_lock(&d->page_alloc_lock);
 
-    list_for_each_safe(p, q, &p2md->pod.super) /* lock: page_alloc */
+    while ( (page = page_list_remove_head(&p2md->pod.super)) )
     {
         int i;
-        struct page_info *page;
             
-        list_del(p);
-            
-        page = list_entry(p, struct page_info, list);
-
         for ( i = 0 ; i < (1 << 9) ; i++ )
         {
             BUG_ON(page_get_owner(page + i) != d);
-            list_add_tail(&page[i].list, &d->page_list);
+            page_list_add_tail(page + i, &d->page_list);
         }
 
         p2md->pod.count -= 1<<9;
     }
 
-    list_for_each_safe(p, q, &p2md->pod.single)
-    {
-        struct page_info *page;
-            
-        list_del(p);
-            
-        page = list_entry(p, struct page_info, list);
-
+    while ( (page = page_list_remove_head(&p2md->pod.single)) )
+    {
         BUG_ON(page_get_owner(page) != d);
-        list_add_tail(&page->list, &d->page_list);
+        page_list_add_tail(page, &d->page_list);
 
         p2md->pod.count -= 1;
     }
@@ -952,7 +938,7 @@ p2m_pod_emergency_sweep_super(struct dom
          * NB that this is a zero-sum game; we're increasing our cache size
          * by increasing our 'debt'.  Since we hold the p2m lock,
          * (entry_count - count) must remain the same. */
-        if ( !list_empty(&p2md->pod.super) &&  i < limit )
+        if ( !page_list_empty(&p2md->pod.super) &&  i < limit )
             break;
     }
 
@@ -1035,12 +1021,12 @@ p2m_pod_demand_populate(struct domain *d
     }
 
     /* If we're low, start a sweep */
-    if ( order == 9 && list_empty(&p2md->pod.super) )
+    if ( order == 9 && page_list_empty(&p2md->pod.super) )
         p2m_pod_emergency_sweep_super(d);
 
-    if ( list_empty(&p2md->pod.single) &&
+    if ( page_list_empty(&p2md->pod.single) &&
          ( ( order == 0 )
-           || (order == 9 && list_empty(&p2md->pod.super) ) ) )
+           || (order == 9 && page_list_empty(&p2md->pod.super) ) ) )
         p2m_pod_emergency_sweep(d);
 
     /* Keep track of the highest gfn demand-populated by a guest fault */
@@ -1477,9 +1463,9 @@ int p2m_init(struct domain *d)
 
     memset(p2m, 0, sizeof(*p2m));
     p2m_lock_init(p2m);
-    INIT_LIST_HEAD(&p2m->pages);
-    INIT_LIST_HEAD(&p2m->pod.super);
-    INIT_LIST_HEAD(&p2m->pod.single);
+    INIT_PAGE_LIST_HEAD(&p2m->pages);
+    INIT_PAGE_LIST_HEAD(&p2m->pod.super);
+    INIT_PAGE_LIST_HEAD(&p2m->pod.single);
 
     p2m->set_entry = p2m_set_entry;
     p2m->get_entry = p2m_gfn_to_mfn;
@@ -1540,7 +1526,6 @@ int p2m_alloc_table(struct domain *d,
 
 {
     mfn_t mfn = _mfn(INVALID_MFN);
-    struct list_head *entry;
     struct page_info *page, *p2m_top;
     unsigned int page_count = 0;
     unsigned long gfn = -1UL;
@@ -1566,7 +1551,7 @@ int p2m_alloc_table(struct domain *d,
         p2m_unlock(p2m);
         return -ENOMEM;
     }
-    list_add_tail(&p2m_top->list, &p2m->pages);
+    page_list_add_tail(p2m_top, &p2m->pages);
 
     p2m_top->count_info = 1;
     p2m_top->u.inuse.type_info =
@@ -1587,11 +1572,8 @@ int p2m_alloc_table(struct domain *d,
         goto error;
 
     /* Copy all existing mappings from the page list and m2p */
-    for ( entry = d->page_list.next;
-          entry != &d->page_list;
-          entry = entry->next )
-    {
-        page = list_entry(entry, struct page_info, list);
+    page_list_for_each(page, &d->page_list)
+    {
         mfn = page_to_mfn(page);
         gfn = get_gpfn_from_mfn(mfn_x(mfn));
         page_count++;
@@ -1621,19 +1603,14 @@ void p2m_teardown(struct domain *d)
 /* Return all the p2m pages to Xen.
  * We know we don't have any extra mappings to these pages */
 {
-    struct list_head *entry, *n;
     struct page_info *pg;
     struct p2m_domain *p2m = d->arch.p2m;
 
     p2m_lock(p2m);
     d->arch.phys_table = pagetable_null();
 
-    list_for_each_safe(entry, n, &p2m->pages)
-    {
-        pg = list_entry(entry, struct page_info, list);
-        list_del(entry);
+    while ( (pg = page_list_remove_head(&p2m->pages)) )
         p2m->free_page(d, pg);
-    }
     p2m_unlock(p2m);
 }
 
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/paging.c  Fri Feb 13 11:22:28 2009 +0900
@@ -47,11 +47,11 @@
 /************************************************/
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
 #undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
 #undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /* The log-dirty lock.  This protects the log-dirty bitmap from
  * concurrent accesses (and teardowns, etc).
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/shadow/common.c   Fri Feb 13 11:22:28 2009 +0900
@@ -48,9 +48,9 @@ void shadow_domain_init(struct domain *d
     int i;
     shadow_lock_init(d);
     for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
-        INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
-    INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
-    INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
+        INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
+    INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
+    INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
 
     /* Use shadow pagetables for log-dirty support */
     paging_log_dirty_init(d, shadow_enable_log_dirty, 
@@ -1291,9 +1291,9 @@ static inline int space_is_available(
     for ( ; order <= shadow_max_order(d); ++order )
     {
         unsigned int n = count;
-        const struct list_head *p;
-
-        list_for_each ( p, &d->arch.paging.shadow.freelists[order] )
+        const struct page_info *sp;
+
+        page_list_for_each ( sp, &d->arch.paging.shadow.freelists[order] )
             if ( --n == 0 )
                 return 1;
         count = (count + 1) >> 1;
@@ -1306,8 +1306,8 @@ static inline int space_is_available(
  * non-Xen mappings in this top-level shadow mfn */
 static void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
 {
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
-    switch ( sp->type )
+    struct page_info *sp = mfn_to_page(smfn);
+    switch ( sp->u.sh.type )
     {
     case SH_type_l2_32_shadow:
         SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v,smfn);
@@ -1322,7 +1322,7 @@ static void shadow_unhook_mappings(struc
         break;
 #endif
     default:
-        SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->type);
+        SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->u.sh.type);
         BUG();
     }
 }
@@ -1334,7 +1334,7 @@ static inline void trace_shadow_prealloc
         /* Convert smfn to gfn */
         unsigned long gfn;
         ASSERT(mfn_valid(smfn));
-        gfn = mfn_to_gfn(d, _mfn(mfn_to_shadow_page(smfn)->backpointer));
+        gfn = mfn_to_gfn(d, _mfn(mfn_to_page(smfn)->v.sh.back));
         __trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/,
                     sizeof(gfn), (unsigned char*)&gfn);
     }
@@ -1350,8 +1350,7 @@ static void _shadow_prealloc(
     /* Need a vpcu for calling unpins; for now, since we don't have
      * per-vcpu shadows, any will do */
     struct vcpu *v, *v2;
-    struct list_head *l, *t;
-    struct shadow_page_info *sp;
+    struct page_info *sp, *t;
     mfn_t smfn;
     int i;
 
@@ -1365,10 +1364,9 @@ static void _shadow_prealloc(
 
     /* Stage one: walk the list of pinned pages, unpinning them */
     perfc_incr(shadow_prealloc_1);
-    list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows)
-    {
-        sp = list_entry(l, struct shadow_page_info, list);
-        smfn = shadow_page_to_mfn(sp);
+    page_list_for_each_safe_reverse(sp, t, 
&d->arch.paging.shadow.pinned_shadows)
+    {
+        smfn = page_to_mfn(sp);
 
         /* Unpin this top-level shadow */
         trace_shadow_prealloc_unpin(d, smfn);
@@ -1427,8 +1425,7 @@ void shadow_prealloc(struct domain *d, u
  * this domain's shadows */
 static void shadow_blow_tables(struct domain *d) 
 {
-    struct list_head *l, *t;
-    struct shadow_page_info *sp;
+    struct page_info *sp, *t;
     struct vcpu *v = d->vcpu[0];
     mfn_t smfn;
     int i;
@@ -1436,10 +1433,9 @@ static void shadow_blow_tables(struct do
     ASSERT(v != NULL);
 
     /* Pass one: unpin all pinned pages */
-    list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows)
-    {
-        sp = list_entry(l, struct shadow_page_info, list);
-        smfn = shadow_page_to_mfn(sp);
+    page_list_for_each_safe_reverse(sp, t, 
&d->arch.paging.shadow.pinned_shadows)
+    {
+        smfn = page_to_mfn(sp);
         sh_unpin(v, smfn);
     }
         
@@ -1493,6 +1489,18 @@ __initcall(shadow_blow_tables_keyhandler
 __initcall(shadow_blow_tables_keyhandler_init);
 #endif /* !NDEBUG */
 
+static inline struct page_info *
+next_shadow(const struct page_info *sp)
+{
+    return sp->next_shadow ? mfn_to_page(_mfn(sp->next_shadow)) : NULL;
+}
+
+static inline void
+set_next_shadow(struct page_info *sp, struct page_info *next)
+{
+    sp->next_shadow = next ? mfn_x(page_to_mfn(next)) : 0;
+}
+
 /* Allocate another shadow's worth of (contiguous, aligned) pages,
  * and fill in the type and backpointer fields of their page_infos. 
  * Never fails to allocate. */
@@ -1500,7 +1508,7 @@ mfn_t shadow_alloc(struct domain *d,
                     u32 shadow_type,
                     unsigned long backpointer)
 {
-    struct shadow_page_info *sp = NULL;
+    struct page_info *sp = NULL;
     unsigned int order = shadow_order(shadow_type);
     cpumask_t mask;
     void *p;
@@ -1515,7 +1523,7 @@ mfn_t shadow_alloc(struct domain *d,
 
     /* Find smallest order which can satisfy the request. */
     for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
-        if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
+        if ( (sp = page_list_remove_head(&d->arch.paging.shadow.freelists[i])) 
)
             goto found;
     
     /* If we get here, we failed to allocate. This should never happen.
@@ -1526,16 +1534,12 @@ mfn_t shadow_alloc(struct domain *d,
     BUG();
 
  found:
-    sp = list_entry(d->arch.paging.shadow.freelists[i].next, 
-                    struct shadow_page_info, list);
-    list_del(&sp->list);
-            
     /* We may have to halve the chunk a number of times. */
     while ( i != order )
     {
         i--;
-        sp->order = i;
-        list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]);
+        sp->v.free.order = i;
+        page_list_add_tail(sp, &d->arch.paging.shadow.freelists[i]);
         sp += 1 << i;
     }
     d->arch.paging.shadow.free_pages -= 1 << order;
@@ -1553,26 +1557,26 @@ mfn_t shadow_alloc(struct domain *d,
             flush_tlb_mask(mask);
         }
         /* Now safe to clear the page for reuse */
-        p = sh_map_domain_page(shadow_page_to_mfn(sp+i));
+        p = sh_map_domain_page(page_to_mfn(sp+i));
         ASSERT(p != NULL);
         clear_page(p);
         sh_unmap_domain_page(p);
-        INIT_LIST_HEAD(&sp[i].list);
-        sp[i].type = shadow_type;
-        sp[i].pinned = 0;
-        sp[i].count = 0;
-        sp[i].backpointer = backpointer;
-        sp[i].next_shadow = NULL;
+        INIT_PAGE_LIST_ENTRY(&sp[i].list);
+        sp[i].u.sh.type = shadow_type;
+        sp[i].u.sh.pinned = 0;
+        sp[i].u.sh.count = 0;
+        sp[i].v.sh.back = backpointer;
+        set_next_shadow(&sp[i], NULL);
         perfc_incr(shadow_alloc_count);
     }
-    return shadow_page_to_mfn(sp);
+    return page_to_mfn(sp);
 }
 
 
 /* Return some shadow pages to the pool. */
 void shadow_free(struct domain *d, mfn_t smfn)
 {
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn); 
+    struct page_info *sp = mfn_to_page(smfn); 
     u32 shadow_type;
     unsigned long order;
     unsigned long mask;
@@ -1581,7 +1585,7 @@ void shadow_free(struct domain *d, mfn_t
     ASSERT(shadow_locked_by_me(d));
     perfc_incr(shadow_free);
 
-    shadow_type = sp->type;
+    shadow_type = sp->u.sh.type;
     ASSERT(shadow_type != SH_type_none);
     ASSERT(shadow_type != SH_type_p2m_table);
     order = shadow_order(shadow_type);
@@ -1605,7 +1609,7 @@ void shadow_free(struct domain *d, mfn_t
         }
 #endif
         /* Strip out the type: this is now a free shadow page */
-        sp[i].type = 0;
+        sp[i].u.sh.type = 0;
         /* Remember the TLB timestamp so we will know whether to flush 
          * TLBs when we reuse the page.  Because the destructors leave the
          * contents of the pages in place, we can delay TLB flushes until
@@ -1618,22 +1622,24 @@ void shadow_free(struct domain *d, mfn_t
     for ( ; order < shadow_max_order(d); ++order )
     {
         mask = 1 << order;
-        if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) {
+        if ( (mfn_x(page_to_mfn(sp)) & mask) ) {
             /* Merge with predecessor block? */
-            if ( ((sp-mask)->type != PGT_none) || ((sp-mask)->order != order) )
+            if ( ((sp-mask)->u.sh.type != PGT_none) ||
+                 ((sp-mask)->v.free.order != order) )
                 break;
-            list_del(&(sp-mask)->list);
             sp -= mask;
+            page_list_del(sp, &d->arch.paging.shadow.freelists[order]);
         } else {
             /* Merge with successor block? */
-            if ( ((sp+mask)->type != PGT_none) || ((sp+mask)->order != order) )
+            if ( ((sp+mask)->u.sh.type != PGT_none) ||
+                 ((sp+mask)->v.free.order != order) )
                 break;
-            list_del(&(sp+mask)->list);
-        }
-    }
-
-    sp->order = order;
-    list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
+            page_list_del(sp + mask, &d->arch.paging.shadow.freelists[order]);
+        }
+    }
+
+    sp->v.free.order = order;
+    page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]);
 }
 
 /* Divert some memory from the pool to be used by the p2m mapping.
@@ -1672,7 +1678,7 @@ sh_alloc_p2m_pages(struct domain *d)
          */
         page_set_owner(&pg[i], d);
         pg[i].count_info = 1;
-        list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist);
+        page_list_add_tail(&pg[i], &d->arch.paging.shadow.p2m_freelist);
     }
     return 1;
 }
@@ -1681,25 +1687,22 @@ static struct page_info *
 static struct page_info *
 shadow_alloc_p2m_page(struct domain *d)
 {
-    struct list_head *entry;
     struct page_info *pg;
     mfn_t mfn;
     void *p;
     
     shadow_lock(d);
 
-    if ( list_empty(&d->arch.paging.shadow.p2m_freelist) &&
+    if ( page_list_empty(&d->arch.paging.shadow.p2m_freelist) &&
          !sh_alloc_p2m_pages(d) )
     {
         shadow_unlock(d);
         return NULL;
     }
-    entry = d->arch.paging.shadow.p2m_freelist.next;
-    list_del(entry);
+    pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist);
 
     shadow_unlock(d);
 
-    pg = list_entry(entry, struct page_info, list);
     mfn = page_to_mfn(pg);
     p = sh_map_domain_page(mfn);
     clear_page(p);
@@ -1780,7 +1783,7 @@ static unsigned int sh_set_allocation(st
                                       unsigned int pages,
                                       int *preempted)
 {
-    struct shadow_page_info *sp;
+    struct page_info *sp;
     unsigned int lower_bound;
     unsigned int j, order = shadow_max_order(d);
 
@@ -1802,7 +1805,7 @@ static unsigned int sh_set_allocation(st
         if ( d->arch.paging.shadow.total_pages < pages ) 
         {
             /* Need to allocate more memory from domheap */
-            sp = (struct shadow_page_info *)
+            sp = (struct page_info *)
                 alloc_domheap_pages(NULL, order, MEMF_node(domain_to_node(d)));
             if ( sp == NULL ) 
             { 
@@ -1813,23 +1816,26 @@ static unsigned int sh_set_allocation(st
             d->arch.paging.shadow.total_pages += 1 << order;
             for ( j = 0; j < 1U << order; j++ )
             {
-                sp[j].type = 0;  
-                sp[j].pinned = 0;
-                sp[j].count = 0;
-                sp[j].mbz = 0;
+                sp[j].u.sh.type = 0;
+                sp[j].u.sh.pinned = 0;
+                sp[j].u.sh.count = 0;
                 sp[j].tlbflush_timestamp = 0; /* Not in any TLB */
             }
-            sp->order = order;
-            list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
+            sp->v.free.order = order;
+            page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]);
         } 
         else if ( d->arch.paging.shadow.total_pages > pages ) 
         {
             /* Need to return memory to domheap */
             _shadow_prealloc(d, order, 1);
-            ASSERT(!list_empty(&d->arch.paging.shadow.freelists[order]));
-            sp = list_entry(d->arch.paging.shadow.freelists[order].next,
-                            struct shadow_page_info, list);
-            list_del(&sp->list);
+            sp = 
page_list_remove_head(&d->arch.paging.shadow.freelists[order]);
+            ASSERT(sp);
+            /*
+             * The pages were allocated anonymously, but the owner field
+             * gets overwritten normally, so need to clear it here.
+             */
+            for ( j = 0; j < 1U << order; j++ )
+                page_set_owner(&((struct page_info *)sp)[j], NULL);
             d->arch.paging.shadow.free_pages -= 1 << order;
             d->arch.paging.shadow.total_pages -= 1 << order;
             free_domheap_pages((struct page_info *)sp, order);
@@ -1880,7 +1886,7 @@ static void sh_hash_audit_bucket(struct 
 static void sh_hash_audit_bucket(struct domain *d, int bucket)
 /* Audit one bucket of the hash table */
 {
-    struct shadow_page_info *sp, *x;
+    struct page_info *sp, *x;
 
     if ( !(SHADOW_AUDIT_ENABLE) )
         return;
@@ -1889,38 +1895,39 @@ static void sh_hash_audit_bucket(struct 
     while ( sp )
     {
         /* Not a shadow? */
-        BUG_ON( sp->mbz != 0 );
+        BUG_ON( sp->count_info != 0 );
         /* Bogus type? */
-        BUG_ON( sp->type == 0 ); 
-        BUG_ON( sp->type > SH_type_max_shadow );
+        BUG_ON( sp->u.sh.type == 0 );
+        BUG_ON( sp->u.sh.type > SH_type_max_shadow );
         /* Wrong bucket? */
-        BUG_ON( sh_hash(sp->backpointer, sp->type) != bucket ); 
+        BUG_ON( sh_hash(sp->v.sh.back, sp->u.sh.type) != bucket );
         /* Duplicate entry? */
-        for ( x = sp->next_shadow; x; x = x->next_shadow )
-            BUG_ON( x->backpointer == sp->backpointer && x->type == sp->type );
+        for ( x = next_shadow(sp); x; x = next_shadow(x) )
+            BUG_ON( x->v.sh.back == sp->v.sh.back &&
+                    x->u.sh.type == sp->u.sh.type );
         /* Follow the backpointer to the guest pagetable */
-        if ( sp->type != SH_type_fl1_32_shadow
-             && sp->type != SH_type_fl1_pae_shadow
-             && sp->type != SH_type_fl1_64_shadow )
-        {
-            struct page_info *gpg = mfn_to_page(_mfn(sp->backpointer));
+        if ( sp->u.sh.type != SH_type_fl1_32_shadow
+             && sp->u.sh.type != SH_type_fl1_pae_shadow
+             && sp->u.sh.type != SH_type_fl1_64_shadow )
+        {
+            struct page_info *gpg = mfn_to_page(_mfn(sp->v.sh.back));
             /* Bad shadow flags on guest page? */
-            BUG_ON( !(gpg->shadow_flags & (1<<sp->type)) );
+            BUG_ON( !(gpg->shadow_flags & (1<<sp->u.sh.type)) );
             /* Bad type count on guest page? */
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
-            if ( sp->type == SH_type_l1_32_shadow
-                 || sp->type == SH_type_l1_pae_shadow
-                 || sp->type == SH_type_l1_64_shadow )
+            if ( sp->u.sh.type == SH_type_l1_32_shadow
+                 || sp->u.sh.type == SH_type_l1_pae_shadow
+                 || sp->u.sh.type == SH_type_l1_64_shadow )
             {
                 if ( (gpg->u.inuse.type_info & PGT_type_mask) == 
PGT_writable_page
                      && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
                 {
                     if ( !page_is_out_of_sync(gpg) )
                     {
-                        SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
+                        SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by 
%#"PRI_mfn")"
                                      " and not OOS but has typecount %#lx\n",
-                                     sp->backpointer, 
-                                     mfn_x(shadow_page_to_mfn(sp)), 
+                                     sp->v.sh.back,
+                                     mfn_x(page_to_mfn(sp)), 
                                      gpg->u.inuse.type_info);
                         BUG();
                     }
@@ -1931,15 +1938,15 @@ static void sh_hash_audit_bucket(struct 
             if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page 
                  && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
             {
-                SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
+                SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")"
                              " but has typecount %#lx\n",
-                             sp->backpointer, mfn_x(shadow_page_to_mfn(sp)), 
+                             sp->v.sh.back, mfn_x(page_to_mfn(sp)),
                              gpg->u.inuse.type_info);
                 BUG();
             }
         }
         /* That entry was OK; on we go */
-        sp = sp->next_shadow;
+        sp = next_shadow(sp);
     }
 }
 
@@ -1972,15 +1979,15 @@ static void sh_hash_audit(struct domain 
  * Returns 0 for success, 1 for error. */
 static int shadow_hash_alloc(struct domain *d)
 {
-    struct shadow_page_info **table;
+    struct page_info **table;
 
     ASSERT(shadow_locked_by_me(d));
     ASSERT(!d->arch.paging.shadow.hash_table);
 
-    table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS);
+    table = xmalloc_array(struct page_info *, SHADOW_HASH_BUCKETS);
     if ( !table ) return 1;
     memset(table, 0, 
-           SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *));
+           SHADOW_HASH_BUCKETS * sizeof (struct page_info *));
     d->arch.paging.shadow.hash_table = table;
     return 0;
 }
@@ -2002,7 +2009,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
  * or INVALID_MFN if it doesn't exist */
 {
     struct domain *d = v->domain;
-    struct shadow_page_info *sp, *prev;
+    struct page_info *sp, *prev;
     key_t key;
 
     ASSERT(shadow_locked_by_me(d));
@@ -2019,21 +2026,21 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
     prev = NULL;
     while(sp)
     {
-        if ( sp->backpointer == n && sp->type == t )
+        if ( sp->v.sh.back == n && sp->u.sh.type == t )
         {
             /* Pull-to-front if 'sp' isn't already the head item */
             if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) )
             {
                 if ( unlikely(d->arch.paging.shadow.hash_walking != 0) )
                     /* Can't reorder: someone is walking the hash chains */
-                    return shadow_page_to_mfn(sp);
+                    return page_to_mfn(sp);
                 else 
                 {
                     ASSERT(prev);
                     /* Delete sp from the list */
                     prev->next_shadow = sp->next_shadow;                    
                     /* Re-insert it at the head of the list */
-                    sp->next_shadow = d->arch.paging.shadow.hash_table[key];
+                    set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]);
                     d->arch.paging.shadow.hash_table[key] = sp;
                 }
             }
@@ -2041,10 +2048,10 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
             {
                 perfc_incr(shadow_hash_lookup_head);
             }
-            return shadow_page_to_mfn(sp);
+            return page_to_mfn(sp);
         }
         prev = sp;
-        sp = sp->next_shadow;
+        sp = next_shadow(sp);
     }
 
     perfc_incr(shadow_hash_lookup_miss);
@@ -2056,7 +2063,7 @@ void shadow_hash_insert(struct vcpu *v, 
 /* Put a mapping (n,t)->smfn into the hash table */
 {
     struct domain *d = v->domain;
-    struct shadow_page_info *sp;
+    struct page_info *sp;
     key_t key;
     
     ASSERT(shadow_locked_by_me(d));
@@ -2070,8 +2077,8 @@ void shadow_hash_insert(struct vcpu *v, 
     sh_hash_audit_bucket(d, key);
     
     /* Insert this shadow at the top of the bucket */
-    sp = mfn_to_shadow_page(smfn);
-    sp->next_shadow = d->arch.paging.shadow.hash_table[key];
+    sp = mfn_to_page(smfn);
+    set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]);
     d->arch.paging.shadow.hash_table[key] = sp;
     
     sh_hash_audit_bucket(d, key);
@@ -2082,7 +2089,7 @@ void shadow_hash_delete(struct vcpu *v, 
 /* Excise the mapping (n,t)->smfn from the hash table */
 {
     struct domain *d = v->domain;
-    struct shadow_page_info *sp, *x;
+    struct page_info *sp, *x;
     key_t key;
 
     ASSERT(shadow_locked_by_me(d));
@@ -2095,10 +2102,10 @@ void shadow_hash_delete(struct vcpu *v, 
     key = sh_hash(n, t);
     sh_hash_audit_bucket(d, key);
     
-    sp = mfn_to_shadow_page(smfn);
+    sp = mfn_to_page(smfn);
     if ( d->arch.paging.shadow.hash_table[key] == sp ) 
         /* Easy case: we're deleting the head item. */
-        d->arch.paging.shadow.hash_table[key] = sp->next_shadow;
+        d->arch.paging.shadow.hash_table[key] = next_shadow(sp);
     else 
     {
         /* Need to search for the one we want */
@@ -2107,15 +2114,15 @@ void shadow_hash_delete(struct vcpu *v, 
         {
             ASSERT(x); /* We can't have hit the end, since our target is
                         * still in the chain somehwere... */
-            if ( x->next_shadow == sp ) 
+            if ( next_shadow(x) == sp )
             {
                 x->next_shadow = sp->next_shadow;
                 break;
             }
-            x = x->next_shadow;
-        }
-    }
-    sp->next_shadow = NULL;
+            x = next_shadow(x);
+        }
+    }
+    set_next_shadow(sp, NULL);
 
     sh_hash_audit_bucket(d, key);
 }
@@ -2137,7 +2144,7 @@ static void hash_foreach(struct vcpu *v,
 {
     int i, done = 0;
     struct domain *d = v->domain;
-    struct shadow_page_info *x;
+    struct page_info *x;
 
     /* Say we're here, to stop hash-lookups reordering the chains */
     ASSERT(shadow_locked_by_me(d));
@@ -2149,14 +2156,14 @@ static void hash_foreach(struct vcpu *v,
         /* WARNING: This is not safe against changes to the hash table.
          * The callback *must* return non-zero if it has inserted or
          * deleted anything from the hash (lookups are OK, though). */
-        for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow )
-        {
-            if ( callback_mask & (1 << x->type) ) 
+        for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) )
+        {
+            if ( callback_mask & (1 << x->u.sh.type) )
             {
-                ASSERT(x->type <= 15);
-                ASSERT(callbacks[x->type] != NULL);
-                done = callbacks[x->type](v, shadow_page_to_mfn(x), 
-                                          callback_mfn);
+                ASSERT(x->u.sh.type <= 15);
+                ASSERT(callbacks[x->u.sh.type] != NULL);
+                done = callbacks[x->u.sh.type](v, page_to_mfn(x),
+                                               callback_mfn);
                 if ( done ) break;
             }
         }
@@ -2173,8 +2180,8 @@ static void hash_foreach(struct vcpu *v,
 
 void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
 {
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
-    unsigned int t = sp->type;
+    struct page_info *sp = mfn_to_page(smfn);
+    unsigned int t = sp->u.sh.type;
 
 
     SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn));
@@ -2186,7 +2193,7 @@ void sh_destroy_shadow(struct vcpu *v, m
            t == SH_type_fl1_64_shadow  || 
            t == SH_type_monitor_table  || 
            (is_pv_32on64_vcpu(v) && t == SH_type_l4_64_shadow) ||
-           (page_get_owner(mfn_to_page(_mfn(sp->backpointer))) 
+           (page_get_owner(mfn_to_page(_mfn(sp->v.sh.back)))
             == v->domain)); 
 
     /* The down-shifts here are so that the switch statement is on nice
@@ -2438,7 +2445,7 @@ int sh_remove_write_access(struct vcpu *
     {
         unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask);
         mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn);
-        int shtype = mfn_to_shadow_page(last_smfn)->type;
+        int shtype = mfn_to_page(last_smfn)->u.sh.type;
 
         if ( callbacks[shtype] ) 
             callbacks[shtype](v, last_smfn, gmfn);
@@ -2481,25 +2488,25 @@ int sh_remove_write_access_from_sl1p(str
 int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn,
                                      mfn_t smfn, unsigned long off)
 {
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+    struct page_info *sp = mfn_to_page(smfn);
     
     ASSERT(mfn_valid(smfn));
     ASSERT(mfn_valid(gmfn));
     
-    if ( sp->type == SH_type_l1_32_shadow
-         || sp->type == SH_type_fl1_32_shadow )
+    if ( sp->u.sh.type == SH_type_l1_32_shadow
+         || sp->u.sh.type == SH_type_fl1_32_shadow )
     {
         return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,2)
             (v, gmfn, smfn, off);
     }
 #if CONFIG_PAGING_LEVELS >= 3
-    else if ( sp->type == SH_type_l1_pae_shadow
-              || sp->type == SH_type_fl1_pae_shadow )
+    else if ( sp->u.sh.type == SH_type_l1_pae_shadow
+              || sp->u.sh.type == SH_type_fl1_pae_shadow )
         return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,3)
             (v, gmfn, smfn, off);
 #if CONFIG_PAGING_LEVELS >= 4
-    else if ( sp->type == SH_type_l1_64_shadow
-              || sp->type == SH_type_fl1_64_shadow )
+    else if ( sp->u.sh.type == SH_type_l1_64_shadow
+              || sp->u.sh.type == SH_type_fl1_64_shadow )
         return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,4)
             (v, gmfn, smfn, off);
 #endif
@@ -2601,17 +2608,17 @@ static int sh_remove_shadow_via_pointer(
 /* Follow this shadow's up-pointer, if it has one, and remove the reference
  * found there.  Returns 1 if that was the only reference to this shadow */
 {
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+    struct page_info *sp = mfn_to_page(smfn);
     mfn_t pmfn;
     void *vaddr;
     int rc;
 
-    ASSERT(sp->type > 0);
-    ASSERT(sp->type < SH_type_max_shadow);
-    ASSERT(sp->type != SH_type_l2_32_shadow);
-    ASSERT(sp->type != SH_type_l2_pae_shadow);
-    ASSERT(sp->type != SH_type_l2h_pae_shadow);
-    ASSERT(sp->type != SH_type_l4_64_shadow);
+    ASSERT(sp->u.sh.type > 0);
+    ASSERT(sp->u.sh.type < SH_type_max_shadow);
+    ASSERT(sp->u.sh.type != SH_type_l2_32_shadow);
+    ASSERT(sp->u.sh.type != SH_type_l2_pae_shadow);
+    ASSERT(sp->u.sh.type != SH_type_l2h_pae_shadow);
+    ASSERT(sp->u.sh.type != SH_type_l4_64_shadow);
     
     if (sp->up == 0) return 0;
     pmfn = _mfn(sp->up >> PAGE_SHIFT);
@@ -2622,10 +2629,10 @@ static int sh_remove_shadow_via_pointer(
     ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
     
     /* Is this the only reference to this shadow? */
-    rc = (sp->count == 1) ? 1 : 0;
+    rc = (sp->u.sh.count == 1) ? 1 : 0;
 
     /* Blank the offending entry */
-    switch (sp->type) 
+    switch (sp->u.sh.type)
     {
     case SH_type_l1_32_shadow:
     case SH_type_l2_32_shadow:
@@ -3156,7 +3163,6 @@ void shadow_teardown(struct domain *d)
 {
     struct vcpu *v;
     mfn_t mfn;
-    struct list_head *entry, *n;
     struct page_info *pg;
 
     ASSERT(d->is_dying);
@@ -3208,12 +3214,8 @@ void shadow_teardown(struct domain *d)
     }
 #endif /* (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) */
 
-    list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist)
-    {
-        list_del(entry);
-        pg = list_entry(entry, struct page_info, list);
+    while ( (pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist)) )
         shadow_free_p2m_page(d, pg);
-    }
 
     if ( d->arch.paging.shadow.total_pages != 0 )
     {
@@ -3657,7 +3659,6 @@ int shadow_track_dirty_vram(struct domai
         for ( i = 0; i < nr; i++ ) {
             mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
             struct page_info *page;
-            u32 count_info;
             int dirty = 0;
             paddr_t sl1ma = d->dirty_vram->sl1ma[i];
 
@@ -3668,8 +3669,7 @@ int shadow_track_dirty_vram(struct domai
             else
             {
                 page = mfn_to_page(mfn);
-                count_info = page->u.inuse.type_info & PGT_count_mask;
-                switch (count_info)
+                switch (page->u.inuse.type_info & PGT_count_mask)
                 {
                 case 0:
                     /* No guest reference, nothing to track. */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/shadow/multi.c    Fri Feb 13 11:22:28 2009 +0900
@@ -973,13 +973,13 @@ static int shadow_set_l2e(struct vcpu *v
         }
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
         {
-            struct shadow_page_info *sp = mfn_to_shadow_page(sl1mfn);
-            mfn_t gl1mfn = _mfn(sp->backpointer);
+            struct page_info *sp = mfn_to_page(sl1mfn);
+            mfn_t gl1mfn = _mfn(sp->v.sh.back);
 
             /* If the shadow is a fl1 then the backpointer contains
                the GFN instead of the GMFN, and it's definitely not
                OOS. */
-            if ( (sp->type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
+            if ( (sp->u.sh.type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
                  && mfn_is_out_of_sync(gl1mfn) )
                 sh_resync(v, gl1mfn);
         }
@@ -1036,9 +1036,8 @@ static inline void shadow_vram_get_l1e(s
     if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) 
{
         unsigned long i = gfn - d->dirty_vram->begin_pfn;
         struct page_info *page = mfn_to_page(mfn);
-        u32 count_info = page->u.inuse.type_info & PGT_count_mask;
         
-        if ( count_info == 1 )
+        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
             /* Initial guest reference, record it */
             d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
                 | ((unsigned long)sl1e & ~PAGE_MASK);
@@ -1064,12 +1063,11 @@ static inline void shadow_vram_put_l1e(s
     if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) 
{
         unsigned long i = gfn - d->dirty_vram->begin_pfn;
         struct page_info *page = mfn_to_page(mfn);
-        u32 count_info = page->u.inuse.type_info & PGT_count_mask;
         int dirty = 0;
         paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
             | ((unsigned long)sl1e & ~PAGE_MASK);
 
-        if ( count_info == 1 ) {
+        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 ) {
             /* Last reference */
             if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) {
                 /* We didn't know it was that one, let's say it is dirty */
@@ -1194,8 +1192,8 @@ do {                                    
 do {                                                                    \
     int _i;                                                             \
     shadow_l1e_t *_sp = sh_map_domain_page((_sl1mfn));                  \
-    ASSERT(mfn_to_shadow_page(_sl1mfn)->type == SH_type_l1_shadow       \
-           || mfn_to_shadow_page(_sl1mfn)->type == SH_type_fl1_shadow); \
+    ASSERT(mfn_to_page(_sl1mfn)->u.sh.type == SH_type_l1_shadow  \
+           || mfn_to_page(_sl1mfn)->u.sh.type == SH_type_fl1_shadow);\
     for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ )              \
     {                                                                   \
         (_sl1e) = _sp + _i;                                             \
@@ -1232,7 +1230,7 @@ do {                                    
 do {                                                                      \
     int _i, _j, __done = 0;                                               \
     int _xen = !shadow_mode_external(_dom);                               \
-    ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_32_shadow);    \
+    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_32_shadow);\
     for ( _j = 0; _j < 4 && !__done; _j++ )                               \
     {                                                                     \
         shadow_l2e_t *_sp = sh_map_domain_page(_sl2mfn);                  \
@@ -1260,11 +1258,11 @@ do {                                    
     int _i;                                                                \
     int _xen = !shadow_mode_external(_dom);                                \
     shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn));                     \
-    ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_pae_shadow      \
-           || mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_pae_shadow);\
+    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_pae_shadow \
+           || mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow);\
     for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
         if ( (!(_xen))                                                     \
-             || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_pae_shadow\
+             || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_pae_shadow\
              || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES))                  \
                  < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
         {                                                                  \
@@ -1285,13 +1283,13 @@ do {                                    
     int _i;                                                                 \
     int _xen = !shadow_mode_external(_dom);                                 \
     shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn));                      \
-    ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_64_shadow ||     \
-           mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_64_shadow);     \
+    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_64_shadow ||\
+           mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_64_shadow);\
     for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                  \
     {                                                                       \
         if ( (!(_xen))                                                      \
              || !is_pv_32on64_domain(_dom)                                  \
-             || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_64_shadow  \
+             || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_64_shadow\
              || (_i < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_dom)) )           \
         {                                                                   \
             (_sl2e) = _sp + _i;                                             \
@@ -1313,7 +1311,7 @@ do {                                    
 do {                                                                    \
     int _i;                                                             \
     shadow_l3e_t *_sp = sh_map_domain_page((_sl3mfn));                  \
-    ASSERT(mfn_to_shadow_page(_sl3mfn)->type == SH_type_l3_64_shadow);  \
+    ASSERT(mfn_to_page(_sl3mfn)->u.sh.type == SH_type_l3_64_shadow);\
     for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ )              \
     {                                                                   \
         (_sl3e) = _sp + _i;                                             \
@@ -1331,7 +1329,7 @@ do {                                    
     shadow_l4e_t *_sp = sh_map_domain_page((_sl4mfn));                  \
     int _xen = !shadow_mode_external(_dom);                             \
     int _i;                                                             \
-    ASSERT(mfn_to_shadow_page(_sl4mfn)->type == SH_type_l4_64_shadow);  \
+    ASSERT(mfn_to_page(_sl4mfn)->u.sh.type == SH_type_l4_64_shadow);\
     for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ )              \
     {                                                                   \
         if ( (!(_xen)) || is_guest_l4_slot(_dom, _i) )                  \
@@ -1506,7 +1504,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
          && shadow_type != SH_type_l2h_pae_shadow 
          && shadow_type != SH_type_l4_64_shadow )
         /* Lower-level shadow, not yet linked form a higher level */
-        mfn_to_shadow_page(smfn)->up = 0;
+        mfn_to_page(smfn)->up = 0;
 
 #if GUEST_PAGING_LEVELS == 4
 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 
@@ -1519,14 +1517,12 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
          * of them, decide that this isn't an old linux guest, and stop
          * pinning l3es.  This is not very quick but it doesn't happen
          * very often. */
-        struct list_head *l, *t;
-        struct shadow_page_info *sp;
+        struct page_info *sp, *t;
         struct vcpu *v2;
         int l4count = 0, vcpus = 0;
-        list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows)
-        {
-            sp = list_entry(l, struct shadow_page_info, list);
-            if ( sp->type == SH_type_l4_64_shadow )
+        page_list_for_each(sp, &v->domain->arch.paging.shadow.pinned_shadows)
+        {
+            if ( sp->u.sh.type == SH_type_l4_64_shadow )
                 l4count++;
         }
         for_each_vcpu ( v->domain, v2 ) 
@@ -1534,11 +1530,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
         if ( l4count > 2 * vcpus ) 
         {
             /* Unpin all the pinned l3 tables, and don't pin any more. */
-            list_for_each_safe(l, t, 
&v->domain->arch.paging.shadow.pinned_shadows)
+            page_list_for_each_safe(sp, t, 
&v->domain->arch.paging.shadow.pinned_shadows)
             {
-                sp = list_entry(l, struct shadow_page_info, list);
-                if ( sp->type == SH_type_l3_64_shadow )
-                    sh_unpin(v, shadow_page_to_mfn(sp));
+                if ( sp->u.sh.type == SH_type_l3_64_shadow )
+                    sh_unpin(v, page_to_mfn(sp));
             }
             v->domain->arch.paging.shadow.opt_flags &= 
~SHOPT_LINUX_L3_TOPLEVEL;
         }
@@ -1921,7 +1916,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
 void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
 {
     shadow_l4e_t *sl4e;
-    u32 t = mfn_to_shadow_page(smfn)->type;
+    u32 t = mfn_to_page(smfn)->u.sh.type;
     mfn_t gmfn, sl4mfn;
 
     SHADOW_DEBUG(DESTROY_SHADOW,
@@ -1929,7 +1924,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
     ASSERT(t == SH_type_l4_shadow);
 
     /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
     /* Decrement refcounts of all the old entries */
@@ -1950,7 +1945,7 @@ void sh_destroy_l3_shadow(struct vcpu *v
 void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
 {
     shadow_l3e_t *sl3e;
-    u32 t = mfn_to_shadow_page(smfn)->type;
+    u32 t = mfn_to_page(smfn)->u.sh.type;
     mfn_t gmfn, sl3mfn;
 
     SHADOW_DEBUG(DESTROY_SHADOW,
@@ -1958,7 +1953,7 @@ void sh_destroy_l3_shadow(struct vcpu *v
     ASSERT(t == SH_type_l3_shadow);
 
     /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
 
@@ -1980,7 +1975,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
 void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
 {
     shadow_l2e_t *sl2e;
-    u32 t = mfn_to_shadow_page(smfn)->type;
+    u32 t = mfn_to_page(smfn)->u.sh.type;
     mfn_t gmfn, sl2mfn;
 
     SHADOW_DEBUG(DESTROY_SHADOW,
@@ -1993,7 +1988,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
 #endif
 
     /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
 
@@ -2014,7 +2009,7 @@ void sh_destroy_l1_shadow(struct vcpu *v
 {
     struct domain *d = v->domain;
     shadow_l1e_t *sl1e;
-    u32 t = mfn_to_shadow_page(smfn)->type;
+    u32 t = mfn_to_page(smfn)->u.sh.type;
 
     SHADOW_DEBUG(DESTROY_SHADOW,
                   "%s(%05lx)\n", __func__, mfn_x(smfn));
@@ -2023,12 +2018,12 @@ void sh_destroy_l1_shadow(struct vcpu *v
     /* Record that the guest page isn't shadowed any more (in this type) */
     if ( t == SH_type_fl1_shadow )
     {
-        gfn_t gfn = _gfn(mfn_to_shadow_page(smfn)->backpointer);
+        gfn_t gfn = _gfn(mfn_to_page(smfn)->v.sh.back);
         delete_fl1_shadow_status(v, gfn, smfn);
     }
     else 
     {
-        mfn_t gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+        mfn_t gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
         delete_shadow_status(v, gmfn, t, smfn);
         shadow_demote(v, gmfn, t);
     }
@@ -2054,7 +2049,7 @@ void sh_destroy_monitor_table(struct vcp
 void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
 {
     struct domain *d = v->domain;
-    ASSERT(mfn_to_shadow_page(mmfn)->type == SH_type_monitor_table);
+    ASSERT(mfn_to_page(mmfn)->u.sh.type == SH_type_monitor_table);
 
 #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
     {
@@ -2298,7 +2293,7 @@ static int validate_gl2e(struct vcpu *v,
 
 #if SHADOW_PAGING_LEVELS == 3
         reserved_xen_slot = 
-            ((mfn_to_shadow_page(sl2mfn)->type == SH_type_l2h_pae_shadow) &&
+            ((mfn_to_page(sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow) &&
              (shadow_index 
               >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))));
 #else /* SHADOW_PAGING_LEVELS == 2 */
@@ -2352,7 +2347,7 @@ static int validate_gl1e(struct vcpu *v,
     result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
-    gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+    gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
     if ( mfn_valid(gl1mfn) 
          && mfn_is_out_of_sync(gl1mfn) )
     {
@@ -2429,30 +2424,30 @@ void sh_resync_l1(struct vcpu *v, mfn_t 
  *      called in the *mode* of the vcpu that unsynced it.  Clear?  Good. */
 int sh_safe_not_to_sync(struct vcpu *v, mfn_t gl1mfn)
 {
-    struct shadow_page_info *sp;
+    struct page_info *sp;
     mfn_t smfn;
 
     smfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
     ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */
     
     /* Up to l2 */
-    sp = mfn_to_shadow_page(smfn);
-    if ( sp->count != 1 || !sp->up )
+    sp = mfn_to_page(smfn);
+    if ( sp->u.sh.count != 1 || !sp->up )
         return 0;
     smfn = _mfn(sp->up >> PAGE_SHIFT);
     ASSERT(mfn_valid(smfn));
 
 #if (SHADOW_PAGING_LEVELS == 4) 
     /* up to l3 */
-    sp = mfn_to_shadow_page(smfn);
-    if ( sp->count != 1 || !sp->up )
+    sp = mfn_to_page(smfn);
+    if ( sp->u.sh.count != 1 || !sp->up )
         return 0;
     smfn = _mfn(sp->up >> PAGE_SHIFT);
     ASSERT(mfn_valid(smfn));
 
     /* up to l4 */
-    sp = mfn_to_shadow_page(smfn);
-    if ( sp->count != 1 
+    sp = mfn_to_page(smfn);
+    if ( sp->u.sh.count != 1
          || sh_type_is_pinnable(v, SH_type_l3_64_shadow) || !sp->up )
         return 0;
     smfn = _mfn(sp->up >> PAGE_SHIFT);
@@ -2970,8 +2965,8 @@ static int sh_page_fault(struct vcpu *v,
                                         + shadow_l2_linear_offset(va)),
                                        sizeof(sl2e)) != 0)
                      || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT)
-                     || !mfn_valid(gl1mfn = _mfn(mfn_to_shadow_page(
-                                      shadow_l2e_get_mfn(sl2e))->backpointer))
+                     || !mfn_valid(gl1mfn = _mfn(mfn_to_page(
+                                      shadow_l2e_get_mfn(sl2e))->v.sh.back))
                      || unlikely(mfn_is_out_of_sync(gl1mfn)) )
                {
                    /* Hit the slow path as if there had been no 
@@ -3523,7 +3518,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
     // easier than invalidating all of the individual 4K pages).
     //
     sl1mfn = shadow_l2e_get_mfn(sl2e);
-    if ( mfn_to_shadow_page(sl1mfn)->type
+    if ( mfn_to_page(sl1mfn)->u.sh.type
          == SH_type_fl1_shadow )
     {
         flush_tlb_local();
@@ -3533,7 +3528,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
     /* Check to see if the SL1 is out of sync. */
     {
-        mfn_t gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+        mfn_t gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
         struct page_info *pg = mfn_to_page(gl1mfn);
         if ( mfn_valid(gl1mfn) 
              && page_is_out_of_sync(pg) )
@@ -3563,7 +3558,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
             }
 
             sl1mfn = shadow_l2e_get_mfn(sl2e);
-            gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+            gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
             pg = mfn_to_page(gl1mfn);
             
             if ( likely(sh_mfn_is_a_page_table(gl1mfn)
@@ -3968,7 +3963,7 @@ sh_set_toplevel_shadow(struct vcpu *v,
         /* Need to repin the old toplevel shadow if it's been unpinned
          * by shadow_prealloc(): in PV mode we're still running on this
          * shadow and it's not safe to free it yet. */
-        if ( !mfn_to_shadow_page(old_smfn)->pinned && !sh_pin(v, old_smfn) )
+        if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(v, old_smfn) )
         {
             SHADOW_ERROR("can't re-pin %#lx\n", mfn_x(old_smfn));
             domain_crash(v->domain);
@@ -4262,16 +4257,16 @@ int sh_rm_write_access_from_sl1p(struct 
 {
     int r;
     shadow_l1e_t *sl1p, sl1e;
-    struct shadow_page_info *sp;
+    struct page_info *sp;
 
     ASSERT(mfn_valid(gmfn));
     ASSERT(mfn_valid(smfn));
 
-    sp = mfn_to_shadow_page(smfn);
-
-    if ( sp->mbz != 0
-         || (sp->type != SH_type_l1_shadow
-             && sp->type != SH_type_fl1_shadow) )
+    sp = mfn_to_page(smfn);
+
+    if ( sp->count_info != 0
+         || (sp->u.sh.type != SH_type_l1_shadow
+             && sp->u.sh.type != SH_type_fl1_shadow) )
         goto fail;
 
     sl1p = sh_map_domain_page(smfn);
@@ -4410,7 +4405,7 @@ void sh_clear_shadow_entry(struct vcpu *
 void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn)
 /* Blank out a single shadow entry */
 {
-    switch ( mfn_to_shadow_page(smfn)->type )
+    switch ( mfn_to_page(smfn)->u.sh.type )
     {
     case SH_type_l1_shadow:
         (void) shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break;
@@ -4443,7 +4438,7 @@ int sh_remove_l1_shadow(struct vcpu *v, 
              && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) )
         {
             (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
-            if ( mfn_to_shadow_page(sl1mfn)->type == 0 )
+            if ( mfn_to_page(sl1mfn)->u.sh.type == 0 )
                 /* This breaks us cleanly out of the FOREACH macro */
                 done = 1;
         }
@@ -4466,7 +4461,7 @@ int sh_remove_l2_shadow(struct vcpu *v, 
              && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) )
         {
             (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
-            if ( mfn_to_shadow_page(sl2mfn)->type == 0 )
+            if ( mfn_to_page(sl2mfn)->u.sh.type == 0 )
                 /* This breaks us cleanly out of the FOREACH macro */
                 done = 1;
         }
@@ -4488,7 +4483,7 @@ int sh_remove_l3_shadow(struct vcpu *v, 
              && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) )
         {
             (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
-            if ( mfn_to_shadow_page(sl3mfn)->type == 0 )
+            if ( mfn_to_page(sl3mfn)->u.sh.type == 0 )
                 /* This breaks us cleanly out of the FOREACH macro */
                 done = 1;
         }
@@ -4890,7 +4885,7 @@ int sh_audit_l1_table(struct vcpu *v, mf
     int done = 0;
     
     /* Follow the backpointer */
-    gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+    gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     /* Out-of-sync l1 shadows can contain anything: just check the OOS hash */
@@ -4980,7 +4975,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
     int done = 0;
 
     /* Follow the backpointer */
-    gl2mfn = _mfn(mfn_to_shadow_page(sl2mfn)->backpointer);
+    gl2mfn = _mfn(mfn_to_page(sl2mfn)->v.sh.back);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     /* Only L1's may be out of sync. */
@@ -5029,7 +5024,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
     int done = 0;
 
     /* Follow the backpointer */
-    gl3mfn = _mfn(mfn_to_shadow_page(sl3mfn)->backpointer);
+    gl3mfn = _mfn(mfn_to_page(sl3mfn)->v.sh.back);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
     /* Only L1's may be out of sync. */
@@ -5076,7 +5071,7 @@ int sh_audit_l4_table(struct vcpu *v, mf
     int done = 0;
 
     /* Follow the backpointer */
-    gl4mfn = _mfn(mfn_to_shadow_page(sl4mfn)->backpointer);
+    gl4mfn = _mfn(mfn_to_page(sl4mfn)->v.sh.back);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
     /* Only L1's may be out of sync. */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/shadow/private.h  Fri Feb 13 11:22:28 2009 +0900
@@ -220,60 +220,6 @@ extern void shadow_audit_tables(struct v
 #undef GUEST_LEVELS
 #endif /* CONFIG_PAGING_LEVELS == 4 */
 
-/******************************************************************************
- * Page metadata for shadow pages.
- */
-
-struct shadow_page_info
-{
-    union {
-        /* Ensures that shadow_page_info is same size as page_info. */
-        struct page_info page_info;
-
-        struct {
-            union {
-                /* When in use, guest page we're a shadow of */
-                unsigned long backpointer;
-                /* When free, order of the freelist we're on */
-                unsigned int order;
-            };
-            union {
-                /* When in use, next shadow in this hash chain */
-                struct shadow_page_info *next_shadow;
-                /* When free, TLB flush time when freed */
-                u32 tlbflush_timestamp;
-            };
-            struct {
-                unsigned long mbz;     /* Must be zero: count_info is here. */
-                unsigned long type:5;   /* What kind of shadow is this? */
-                unsigned long pinned:1; /* Is the shadow pinned? */
-                unsigned long count:26; /* Reference count */
-            } __attribute__((packed));
-            union {
-                /* For unused shadow pages, a list of pages of this order; for 
-                 * pinnable shadows, if pinned, a list of other pinned shadows
-                 * (see sh_type_is_pinnable() below for the definition of 
-                 * "pinnable" shadow types). */
-                struct list_head list;
-                /* For non-pinnable shadows, a higher entry that points
-                 * at us. */
-                paddr_t up;
-            };
-        };
-    };
-};
-
-/* The structure above *must* be no larger than a struct page_info
- * from mm.h, since we'll be using the same space in the frametable. 
- * Also, the mbz field must line up with the count_info field of normal 
- * pages, so they cannot be successfully get_page()d. */
-static inline void shadow_check_page_struct_offsets(void) {
-    BUILD_BUG_ON(sizeof (struct shadow_page_info) !=
-                 sizeof (struct page_info));
-    BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) !=
-                 offsetof(struct page_info, count_info));
-};
-
 /* Shadow type codes */
 #define SH_type_none           (0U) /* on the shadow free list */
 #define SH_type_min_shadow     (1U)
@@ -528,22 +474,13 @@ mfn_t oos_snapshot_lookup(struct vcpu *v
  * MFN/page-info handling 
  */
 
-// Override mfn_to_page from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
+/* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
-#define mfn_to_shadow_page(_m) ((struct shadow_page_info *)mfn_to_page(_m))
-
-// Override page_to_mfn from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
+#undef mfn_valid
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
 #undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
-#define shadow_page_to_mfn(_spg) (page_to_mfn((struct page_info *)_spg))
-
-// Override mfn_valid from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
-#undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /* Override pagetable_t <-> struct page_info conversions to work with mfn_t */
 #undef pagetable_get_page
@@ -675,26 +612,26 @@ static inline int sh_get_ref(struct vcpu
 static inline int sh_get_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
 {
     u32 x, nx;
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+    struct page_info *sp = mfn_to_page(smfn);
 
     ASSERT(mfn_valid(smfn));
 
-    x = sp->count;
+    x = sp->u.sh.count;
     nx = x + 1;
 
     if ( unlikely(nx >= 1U<<26) )
     {
-        SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
-                       sp->backpointer, mfn_x(smfn));
+        SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRpgmfn " smfn=%lx\n",
+                       sp->v.sh.back, mfn_x(smfn));
         return 0;
     }
     
     /* Guarded by the shadow lock, so no need for atomic update */
-    sp->count = nx;
+    sp->u.sh.count = nx;
 
     /* We remember the first shadow entry that points to each shadow. */
     if ( entry_pa != 0 
-         && !sh_type_is_pinnable(v, sp->type) 
+         && !sh_type_is_pinnable(v, sp->u.sh.type)
          && sp->up == 0 ) 
         sp->up = entry_pa;
     
@@ -707,29 +644,29 @@ static inline void sh_put_ref(struct vcp
 static inline void sh_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
 {
     u32 x, nx;
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+    struct page_info *sp = mfn_to_page(smfn);
 
     ASSERT(mfn_valid(smfn));
-    ASSERT(sp->mbz == 0);
+    ASSERT(sp->count_info == 0);
 
     /* If this is the entry in the up-pointer, remove it */
     if ( entry_pa != 0 
-         && !sh_type_is_pinnable(v, sp->type) 
+         && !sh_type_is_pinnable(v, sp->u.sh.type)
          && sp->up == entry_pa ) 
         sp->up = 0;
 
-    x = sp->count;
+    x = sp->u.sh.count;
     nx = x - 1;
 
     if ( unlikely(x == 0) ) 
     {
         SHADOW_ERROR("shadow ref underflow, smfn=%lx oc=%08x t=%#x\n",
-                     mfn_x(smfn), sp->count, sp->type);
+                     mfn_x(smfn), sp->u.sh.count, sp->u.sh.type);
         BUG();
     }
 
     /* Guarded by the shadow lock, so no need for atomic update */
-    sp->count = nx;
+    sp->u.sh.count = nx;
 
     if ( unlikely(nx == 0) ) 
         sh_destroy_shadow(v, smfn);
@@ -741,26 +678,26 @@ static inline void sh_put_ref(struct vcp
  * Returns 0 for failure, 1 for success. */
 static inline int sh_pin(struct vcpu *v, mfn_t smfn)
 {
-    struct shadow_page_info *sp;
+    struct page_info *sp;
     
     ASSERT(mfn_valid(smfn));
-    sp = mfn_to_shadow_page(smfn);
-    ASSERT(sh_type_is_pinnable(v, sp->type));
-    if ( sp->pinned ) 
+    sp = mfn_to_page(smfn);
+    ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
+    if ( sp->u.sh.pinned )
     {
         /* Already pinned: take it out of the pinned-list so it can go 
          * at the front */
-        list_del(&sp->list);
+        page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows);
     }
     else
     {
         /* Not pinned: pin it! */
         if ( !sh_get_ref(v, smfn, 0) )
             return 0;
-        sp->pinned = 1;
+        sp->u.sh.pinned = 1;
     }
     /* Put it at the head of the list of pinned shadows */
-    list_add(&sp->list, &v->domain->arch.paging.shadow.pinned_shadows);
+    page_list_add(sp, &v->domain->arch.paging.shadow.pinned_shadows);
     return 1;
 }
 
@@ -768,15 +705,15 @@ static inline int sh_pin(struct vcpu *v,
  * of pinned shadows, and release the extra ref. */
 static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
 {
-    struct shadow_page_info *sp;
+    struct page_info *sp;
     
     ASSERT(mfn_valid(smfn));
-    sp = mfn_to_shadow_page(smfn);
-    ASSERT(sh_type_is_pinnable(v, sp->type));
-    if ( sp->pinned )
+    sp = mfn_to_page(smfn);
+    ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
+    if ( sp->u.sh.pinned )
     {
-        sp->pinned = 0;
-        list_del(&sp->list);
+        sp->u.sh.pinned = 0;
+        page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows);
         sp->up = 0; /* in case this stops being a pinnable type in future */
         sh_put_ref(v, smfn, 0);
     }
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/numa.c
--- a/xen/arch/x86/numa.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/numa.c       Fri Feb 13 11:22:28 2009 +0900
@@ -312,7 +312,7 @@ static void dump_numa(unsigned char key)
                for_each_online_node(i)
                        page_num_node[i] = 0;
 
-               list_for_each_entry(page, &d->page_list, list)
+               page_list_for_each(page, &d->page_list)
                {
                        i = phys_to_nid(page_to_mfn(page) << PAGE_SHIFT);
                        page_num_node[i]++;
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/physdev.c    Fri Feb 13 11:22:28 2009 +0900
@@ -62,7 +62,7 @@ static int physdev_map_pirq(struct physd
                 ret = -EINVAL;
                 goto free_domain;
             }
-            vector = IO_APIC_VECTOR(map->index);
+            vector = domain_irq_to_vector(current->domain, map->index);
             if ( !vector )
             {
                 dprintk(XENLOG_G_ERR, "dom%d: map irq with no vector %d\n",
@@ -75,7 +75,7 @@ static int physdev_map_pirq(struct physd
         case MAP_PIRQ_TYPE_MSI:
             vector = map->index;
             if ( vector == -1 )
-                vector = assign_irq_vector(AUTO_ASSIGN);
+                vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
 
             if ( vector < 0 || vector >= NR_VECTORS )
             {
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/setup.c      Fri Feb 13 11:22:28 2009 +0900
@@ -417,7 +417,7 @@ void __init __start_xen(unsigned long mb
     unsigned int initrdidx = 1;
     multiboot_info_t *mbi = __va(mbi_p);
     module_t *mod = (module_t *)__va(mbi->mods_addr);
-    unsigned long nr_pages, modules_length, modules_headroom = -1;
+    unsigned long nr_pages, modules_length, modules_headroom;
     unsigned long allocator_bitmap_end;
     int i, e820_warn = 0, bytes = 0;
     struct ns16550_defaults ns16550 = {
@@ -618,6 +618,12 @@ void __init __start_xen(unsigned long mb
      */
     modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
 
+    /* ensure mod[0] is mapped before parsing */
+    bootstrap_map(mod[0].mod_start, mod[0].mod_end);
+    modules_headroom = bzimage_headroom(
+                      (char *)(unsigned long)mod[0].mod_start,
+                      (unsigned long)(mod[0].mod_end - mod[0].mod_start));
+
     for ( i = boot_e820.nr_map-1; i >= 0; i-- )
     {
         uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
@@ -636,7 +642,8 @@ void __init __start_xen(unsigned long mb
             s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
 
 #if defined(CONFIG_X86_64)
-#define reloc_size ((__pa(&_end) + mask) & ~mask)
+/* Relocate Xen image, allocation bitmap, and one page of padding. */
+#define reloc_size ((__pa(&_end) + max_page/8 + PAGE_SIZE + mask) & ~mask)
         /* Is the region suitable for relocating Xen? */
         if ( !xen_phys_start && ((e-s) >= reloc_size) )
         {
@@ -721,11 +728,6 @@ void __init __start_xen(unsigned long mb
         }
 #endif
 
-        if ( modules_headroom == -1 )
-            modules_headroom = bzimage_headroom(
-                      (char *)(unsigned long)mod[0].mod_start,
-                      (unsigned long)(mod[0].mod_end - mod[0].mod_start));
-
         /* Is the region suitable for relocating the multiboot modules? */
         if ( !initial_images_start && (s < e) &&
              ((e-s) >= (modules_length+modules_headroom)) )
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/smpboot.c    Fri Feb 13 11:22:28 2009 +0900
@@ -1256,8 +1256,6 @@ int __cpu_disable(void)
        mdelay(1);
        local_irq_disable();
 
-       cpufreq_del_cpu(cpu);
-
        time_suspend();
 
        cpu_mcheck_disable();
@@ -1320,6 +1318,8 @@ int cpu_down(unsigned int cpu)
        }
 
        printk("Prepare to bring CPU%d down...\n", cpu);
+
+       cpufreq_del_cpu(cpu);
 
        err = stop_machine_run(take_cpu_down, NULL, cpu);
        if (err < 0)
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_32/xen.lds.S
--- a/xen/arch/x86/x86_32/xen.lds.S     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/x86_32/xen.lds.S     Fri Feb 13 11:22:28 2009 +0900
@@ -91,6 +91,7 @@ SECTIONS
        *(.exit.text)
        *(.exit.data)
        *(.exitcall.exit)
+       *(.eh_frame)
        }
 
   /* Stabs debugging sections.  */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/x86_64/entry.S       Fri Feb 13 11:22:28 2009 +0900
@@ -739,7 +739,6 @@ ENTRY(hypercall_args_table)
         .byte 1 /* do_sysctl            */  /* 35 */
         .byte 1 /* do_domctl            */
         .byte 2 /* do_kexec             */
-        .byte 1 /* do_xsm_op            */
         .rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_64/xen.lds.S
--- a/xen/arch/x86/x86_64/xen.lds.S     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/x86_64/xen.lds.S     Fri Feb 13 11:22:28 2009 +0900
@@ -89,6 +89,7 @@ SECTIONS
        *(.exit.text)
        *(.exit.data)
        *(.exitcall.exit)
+       *(.eh_frame)
        }
 
   /* Stabs debugging sections.  */
diff -r af992824b5cf -r c7cba853583d xen/common/domain.c
--- a/xen/common/domain.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/domain.c       Fri Feb 13 11:22:28 2009 +0900
@@ -41,7 +41,6 @@ boolean_param("dom0_vcpus_pin", opt_dom0
 
 /* set xen as default cpufreq */
 enum cpufreq_controller cpufreq_controller = FREQCTL_xen;
-struct cpufreq_governor *cpufreq_opt_governor;
 
 static void __init setup_cpufreq_option(char *str)
 {
@@ -70,19 +69,6 @@ static void __init setup_cpufreq_option(
             cpufreq_cmdline_parse(arg);
 }
 custom_param("cpufreq", setup_cpufreq_option);
-
-static void __init setup_cpufreq_gov_option(char *str)
-{
-    if ( !strcmp(str, "userspace") )
-        cpufreq_opt_governor = &cpufreq_gov_userspace;
-    else if ( !strcmp(str, "performance") )
-        cpufreq_opt_governor = &cpufreq_gov_performance;
-    else if ( !strcmp(str, "powersave") )
-        cpufreq_opt_governor = &cpufreq_gov_powersave;
-    else if ( !strcmp(str, "ondemand") )
-        cpufreq_opt_governor = &cpufreq_gov_dbs;
-}
-custom_param("cpufreq_governor", setup_cpufreq_gov_option);
 
 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
 DEFINE_SPINLOCK(domlist_update_lock);
@@ -233,8 +219,8 @@ struct domain *domain_create(
     spin_lock_init(&d->page_alloc_lock);
     spin_lock_init(&d->shutdown_lock);
     spin_lock_init(&d->hypercall_deadlock_mutex);
-    INIT_LIST_HEAD(&d->page_list);
-    INIT_LIST_HEAD(&d->xenpage_list);
+    INIT_PAGE_LIST_HEAD(&d->page_list);
+    INIT_PAGE_LIST_HEAD(&d->xenpage_list);
 
     if ( domcr_flags & DOMCRF_hvm )
         d->is_hvm = 1;
diff -r af992824b5cf -r c7cba853583d xen/common/grant_table.c
--- a/xen/common/grant_table.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/grant_table.c  Fri Feb 13 11:22:28 2009 +0900
@@ -1192,7 +1192,7 @@ gnttab_transfer(
         /* Okay, add the page to 'e'. */
         if ( unlikely(e->tot_pages++ == 0) )
             get_knownalive_domain(e);
-        list_add_tail(&page->list, &e->page_list);
+        page_list_add_tail(page, &e->page_list);
         page_set_owner(page, e);
 
         spin_unlock(&e->page_alloc_lock);
diff -r af992824b5cf -r c7cba853583d xen/common/hvm/save.c
--- a/xen/common/hvm/save.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/hvm/save.c     Fri Feb 13 11:22:28 2009 +0900
@@ -26,6 +26,7 @@
 #include <xen/version.h>
 #include <public/version.h>
 #include <xen/sched.h>
+#include <xen/guest_access.h>
 
 #include <asm/hvm/support.h>
 
@@ -75,6 +76,53 @@ size_t hvm_save_size(struct domain *d)
     return sz;
 }
 
+/* Extract a single instance of a save record, by marshalling all
+ * records of that type and copying out the one we need. */
+int hvm_save_one(struct domain *d, uint16_t typecode, uint16_t instance, 
+                 XEN_GUEST_HANDLE_64(uint8) handle)
+{
+    int rv = 0;
+    size_t sz = 0;
+    struct vcpu *v;
+    hvm_domain_context_t ctxt = { 0, };
+
+    if ( d->is_dying 
+         || typecode > HVM_SAVE_CODE_MAX 
+         || hvm_sr_handlers[typecode].size < sizeof(struct hvm_save_descriptor)
+         || hvm_sr_handlers[typecode].save == NULL )
+        return -EINVAL;
+
+    if ( hvm_sr_handlers[typecode].kind == HVMSR_PER_VCPU )
+        for_each_vcpu(d, v)
+            sz += hvm_sr_handlers[typecode].size;
+    else 
+        sz = hvm_sr_handlers[typecode].size;
+    
+    if ( (instance + 1) * hvm_sr_handlers[typecode].size > sz )
+        return -EINVAL;
+
+    ctxt.size = sz;
+    ctxt.data = xmalloc_bytes(sz);
+    if ( !ctxt.data )
+        return -ENOMEM;
+
+    if ( hvm_sr_handlers[typecode].save(d, &ctxt) != 0 )
+    {
+        gdprintk(XENLOG_ERR, 
+                 "HVM save: failed to save type %"PRIu16"\n", typecode);
+        rv = -EFAULT;
+    }
+    else if ( copy_to_guest(handle,
+                            ctxt.data 
+                            + (instance * hvm_sr_handlers[typecode].size) 
+                            + sizeof (struct hvm_save_descriptor), 
+                            hvm_sr_handlers[typecode].size
+                            - sizeof (struct hvm_save_descriptor)) )
+        rv = -EFAULT;
+
+    xfree(ctxt.data);
+    return rv;
+}
 
 int hvm_save(struct domain *d, hvm_domain_context_t *h)
 {
diff -r af992824b5cf -r c7cba853583d xen/common/memory.c
--- a/xen/common/memory.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/memory.c       Fri Feb 13 11:22:28 2009 +0900
@@ -218,8 +218,8 @@ static long memory_exchange(XEN_GUEST_HA
 static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
 {
     struct xen_memory_exchange exch;
-    LIST_HEAD(in_chunk_list);
-    LIST_HEAD(out_chunk_list);
+    PAGE_LIST_HEAD(in_chunk_list);
+    PAGE_LIST_HEAD(out_chunk_list);
     unsigned long in_chunk_order, out_chunk_order;
     xen_pfn_t     gpfn, gmfn, mfn;
     unsigned long i, j, k;
@@ -325,7 +325,7 @@ static long memory_exchange(XEN_GUEST_HA
                     goto fail;
                 }
 
-                list_add(&page->list, &in_chunk_list);
+                page_list_add(page, &in_chunk_list);
             }
         }
 
@@ -339,7 +339,7 @@ static long memory_exchange(XEN_GUEST_HA
                 goto fail;
             }
 
-            list_add(&page->list, &out_chunk_list);
+            page_list_add(page, &out_chunk_list);
         }
 
         /*
@@ -347,10 +347,8 @@ static long memory_exchange(XEN_GUEST_HA
          */
 
         /* Destroy final reference to each input page. */
-        while ( !list_empty(&in_chunk_list) )
-        {
-            page = list_entry(in_chunk_list.next, struct page_info, list);
-            list_del(&page->list);
+        while ( (page = page_list_remove_head(&in_chunk_list)) )
+        {
             if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
                 BUG();
             mfn = page_to_mfn(page);
@@ -360,10 +358,8 @@ static long memory_exchange(XEN_GUEST_HA
 
         /* Assign each output page to the domain. */
         j = 0;
-        while ( !list_empty(&out_chunk_list) )
-        {
-            page = list_entry(out_chunk_list.next, struct page_info, list);
-            list_del(&page->list);
+        while ( (page = page_list_remove_head(&out_chunk_list)) )
+        {
             if ( assign_pages(d, page, exch.out.extent_order,
                               MEMF_no_refcount) )
                 BUG();
@@ -399,21 +395,13 @@ static long memory_exchange(XEN_GUEST_HA
      */
  fail:
     /* Reassign any input pages we managed to steal. */
-    while ( !list_empty(&in_chunk_list) )
-    {
-        page = list_entry(in_chunk_list.next, struct page_info, list);
-        list_del(&page->list);
+    while ( (page = page_list_remove_head(&in_chunk_list)) )
         if ( assign_pages(d, page, 0, MEMF_no_refcount) )
             BUG();
-    }
 
     /* Free any output pages we managed to allocate. */
-    while ( !list_empty(&out_chunk_list) )
-    {
-        page = list_entry(out_chunk_list.next, struct page_info, list);
-        list_del(&page->list);
+    while ( (page = page_list_remove_head(&out_chunk_list)) )
         free_domheap_pages(page, exch.out.extent_order);
-    }
 
     exch.nr_exchanged = i << in_chunk_order;
 
diff -r af992824b5cf -r c7cba853583d xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/page_alloc.c   Fri Feb 13 11:22:28 2009 +0900
@@ -71,7 +71,7 @@ integer_param("dma_bits", dma_bitsize);
 #endif
 
 static DEFINE_SPINLOCK(page_scrub_lock);
-LIST_HEAD(page_scrub_list);
+PAGE_LIST_HEAD(page_scrub_list);
 static unsigned long scrub_pages;
 
 /*********************
@@ -264,7 +264,7 @@ unsigned long __init alloc_boot_pages(
 #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN :  \
                           (fls(page_to_mfn(pg)) - 1))
 
-typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
+typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
 #define heap(node, zone, order) ((*_heap[node])[zone][order])
 
@@ -272,13 +272,16 @@ static unsigned long *avail[MAX_NUMNODES
 
 static DEFINE_SPINLOCK(heap_lock);
 
-static void init_node_heap(int node)
+static unsigned long init_node_heap(int node, unsigned long mfn,
+                                    unsigned long nr)
 {
     /* First node to be discovered has its heap metadata statically alloced. */
     static heap_by_zone_and_order_t _heap_static;
     static unsigned long avail_static[NR_ZONES];
     static int first_node_initialised;
-
+    unsigned long needed = (sizeof(**_heap) +
+                            sizeof(**avail) * NR_ZONES +
+                            PAGE_SIZE - 1) >> PAGE_SHIFT;
     int i, j;
 
     if ( !first_node_initialised )
@@ -286,19 +289,40 @@ static void init_node_heap(int node)
         _heap[node] = &_heap_static;
         avail[node] = avail_static;
         first_node_initialised = 1;
+        needed = 0;
+    }
+#ifdef DIRECTMAP_VIRT_END
+    else if ( nr >= needed &&
+              mfn + needed <= virt_to_mfn(DIRECTMAP_VIRT_END) )
+    {
+        _heap[node] = mfn_to_virt(mfn);
+        avail[node] = mfn_to_virt(mfn + needed) - sizeof(**avail) * NR_ZONES;
+    }
+#endif
+    else if ( get_order_from_bytes(sizeof(**_heap)) ==
+              get_order_from_pages(needed) )
+    {
+        _heap[node] = alloc_xenheap_pages(get_order_from_pages(needed), 0);
+        BUG_ON(!_heap[node]);
+        avail[node] = (void *)_heap[node] + (needed << PAGE_SHIFT) -
+                      sizeof(**avail) * NR_ZONES;
+        needed = 0;
     }
     else
     {
         _heap[node] = xmalloc(heap_by_zone_and_order_t);
         avail[node] = xmalloc_array(unsigned long, NR_ZONES);
         BUG_ON(!_heap[node] || !avail[node]);
+        needed = 0;
     }
 
     memset(avail[node], 0, NR_ZONES * sizeof(long));
 
     for ( i = 0; i < NR_ZONES; i++ )
         for ( j = 0; j <= MAX_ORDER; j++ )
-            INIT_LIST_HEAD(&(*_heap[node])[i][j]);
+            INIT_PAGE_LIST_HEAD(&(*_heap[node])[i][j]);
+
+    return needed;
 }
 
 /* Allocate 2^@order contiguous pages. */
@@ -340,7 +364,7 @@ static struct page_info *alloc_heap_page
 
             /* Find smallest order which can satisfy the request. */
             for ( j = order; j <= MAX_ORDER; j++ )
-                if ( !list_empty(&heap(node, zone, j)) )
+                if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                     goto found;
         } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
 
@@ -354,14 +378,11 @@ static struct page_info *alloc_heap_page
     return NULL;
 
  found: 
-    pg = list_entry(heap(node, zone, j).next, struct page_info, list);
-    list_del(&pg->list);
-
     /* We may have to halve the chunk a number of times. */
     while ( j != order )
     {
         PFN_ORDER(pg) = --j;
-        list_add_tail(&pg->list, &heap(node, zone, j));
+        page_list_add_tail(pg, &heap(node, zone, j));
         pg += 1 << j;
     }
     
@@ -378,10 +399,13 @@ static struct page_info *alloc_heap_page
         /* Reference count must continuously be zero for free pages. */
         BUG_ON(pg[i].count_info != 0);
 
-        /* Add in any extra CPUs that need flushing because of this page. */
-        cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
-        tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
-        cpus_or(mask, mask, extra_cpus_mask);
+        if ( pg[i].u.free.need_tlbflush )
+        {
+            /* Add in extra CPUs that need flushing because of this page. */
+            cpus_andnot(extra_cpus_mask, cpu_online_map, mask);
+            tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
+            cpus_or(mask, mask, extra_cpus_mask);
+        }
 
         /* Initialise fields which have other uses for free pages. */
         pg[i].u.inuse.type_info = 0;
@@ -404,7 +428,6 @@ static void free_heap_pages(
     unsigned long mask;
     unsigned int i, node = phys_to_nid(page_to_maddr(pg));
     unsigned int zone = page_to_zone(pg);
-    struct domain *d;
 
     ASSERT(order <= MAX_ORDER);
     ASSERT(node >= 0);
@@ -425,15 +448,10 @@ static void free_heap_pages(
          */
         pg[i].count_info = 0;
 
-        if ( (d = page_get_owner(&pg[i])) != NULL )
-        {
+        /* If a page has no owner it will need no safety TLB flush. */
+        pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
+        if ( pg[i].u.free.need_tlbflush )
             pg[i].tlbflush_timestamp = tlbflush_current_time();
-            pg[i].u.free.cpumask     = d->domain_dirty_cpumask;
-        }
-        else
-        {
-            cpus_clear(pg[i].u.free.cpumask);
-        }
     }
 
     spin_lock(&heap_lock);
@@ -452,8 +470,8 @@ static void free_heap_pages(
             if ( allocated_in_map(page_to_mfn(pg)-mask) ||
                  (PFN_ORDER(pg-mask) != order) )
                 break;
-            list_del(&(pg-mask)->list);
             pg -= mask;
+            page_list_del(pg, &heap(node, zone, order));
         }
         else
         {
@@ -461,7 +479,7 @@ static void free_heap_pages(
             if ( allocated_in_map(page_to_mfn(pg)+mask) ||
                  (PFN_ORDER(pg+mask) != order) )
                 break;
-            list_del(&(pg+mask)->list);
+            page_list_del(pg + mask, &heap(node, zone, order));
         }
         
         order++;
@@ -471,7 +489,7 @@ static void free_heap_pages(
     }
 
     PFN_ORDER(pg) = order;
-    list_add_tail(&pg->list, &heap(node, zone, order));
+    page_list_add_tail(pg, &heap(node, zone, order));
 
     spin_unlock(&heap_lock);
 }
@@ -482,7 +500,6 @@ static void free_heap_pages(
  * latter is not on a MAX_ORDER boundary, then we reserve the page by
  * not freeing it to the buddy allocator.
  */
-#define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
 static void init_heap_pages(
     struct page_info *pg, unsigned long nr_pages)
 {
@@ -491,25 +508,33 @@ static void init_heap_pages(
 
     nid_prev = phys_to_nid(page_to_maddr(pg-1));
 
-    for ( i = 0; i < nr_pages; i++ )
+    for ( i = 0; i < nr_pages; nid_prev = nid_curr, i++ )
     {
         nid_curr = phys_to_nid(page_to_maddr(pg+i));
 
         if ( unlikely(!avail[nid_curr]) )
-            init_node_heap(nid_curr);
+        {
+            unsigned long n;
+
+            n = init_node_heap(nid_curr, page_to_mfn(pg+i), nr_pages - i);
+            if ( n )
+            {
+                BUG_ON(i + n > nr_pages);
+                i += n - 1;
+                continue;
+            }
+        }
 
         /*
-         * free pages of the same node, or if they differ, but are on a
-         * MAX_ORDER alignement boundary (which already get reserved)
+         * Free pages of the same node, or if they differ, but are on a
+         * MAX_ORDER alignment boundary (which already get reserved).
          */
-         if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
-                                         MAX_ORDER_ALIGNED) )
-             free_heap_pages(pg+i, 0);
-         else
-             printk("Reserving non-aligned node boundary @ mfn %lu\n",
-                    page_to_mfn(pg+i));
-
-        nid_prev = nid_curr;
+        if ( (nid_curr == nid_prev) ||
+             !(page_to_mfn(pg+i) & ((1UL << MAX_ORDER) - 1)) )
+            free_heap_pages(pg+i, 0);
+        else
+            printk("Reserving non-aligned node boundary @ mfn %#lx\n",
+                   page_to_mfn(pg+i));
     }
 }
 
@@ -537,7 +562,7 @@ static unsigned long avail_heap_pages(
 #define avail_for_domheap(mfn) !(allocated_in_map(mfn) || is_xen_heap_mfn(mfn))
 void __init end_boot_allocator(void)
 {
-    unsigned long i;
+    unsigned long i, nr = 0;
     int curr_free, next_free;
 
     /* Pages that are free now go to the domain sub-allocator. */
@@ -550,8 +575,15 @@ void __init end_boot_allocator(void)
         if ( next_free )
             map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
         if ( curr_free )
-            init_heap_pages(mfn_to_page(i), 1);
-    }
+            ++nr;
+        else if ( nr )
+        {
+            init_heap_pages(mfn_to_page(i - nr), nr);
+            nr = 0;
+        }
+    }
+    if ( nr )
+        init_heap_pages(mfn_to_page(i - nr), nr);
 
     if ( !dma_bitsize && (num_online_nodes() > 1) )
     {
@@ -786,7 +818,7 @@ int assign_pages(
         page_set_owner(&pg[i], d);
         wmb(); /* Domain pointer must be visible before updating refcnt. */
         pg[i].count_info = PGC_allocated | 1;
-        list_add_tail(&pg[i].list, &d->page_list);
+        page_list_add_tail(&pg[i], &d->page_list);
     }
 
     spin_unlock(&d->page_alloc_lock);
@@ -844,7 +876,7 @@ void free_domheap_pages(struct page_info
         spin_lock_recursive(&d->page_alloc_lock);
 
         for ( i = 0; i < (1 << order); i++ )
-            list_del(&pg[i].list);
+            page_list_del2(&pg[i], &d->xenpage_list, &d->arch.relmem_list);
 
         d->xenheap_pages -= 1 << order;
         drop_dom_ref = (d->xenheap_pages == 0);
@@ -859,7 +891,7 @@ void free_domheap_pages(struct page_info
         for ( i = 0; i < (1 << order); i++ )
         {
             BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
-            list_del(&pg[i].list);
+            page_list_del2(&pg[i], &d->page_list, &d->arch.relmem_list);
         }
 
         d->tot_pages -= 1 << order;
@@ -882,7 +914,7 @@ void free_domheap_pages(struct page_info
             {
                 page_set_owner(&pg[i], NULL);
                 spin_lock(&page_scrub_lock);
-                list_add(&pg[i].list, &page_scrub_list);
+                page_list_add(&pg[i], &page_scrub_list);
                 scrub_pages++;
                 spin_unlock(&page_scrub_lock);
             }
@@ -965,7 +997,7 @@ static DEFINE_PER_CPU(struct timer, page
 
 static void page_scrub_softirq(void)
 {
-    struct list_head *ent;
+    PAGE_LIST_HEAD(list);
     struct page_info  *pg;
     void             *p;
     int               i;
@@ -983,32 +1015,26 @@ static void page_scrub_softirq(void)
     do {
         spin_lock(&page_scrub_lock);
 
-        if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
+        /* Peel up to 16 pages from the list. */
+        for ( i = 0; i < 16; i++ )
+        {
+            if ( !(pg = page_list_remove_head(&page_scrub_list)) )
+                break;
+            page_list_add_tail(pg, &list);
+        }
+        
+        if ( unlikely(i == 0) )
         {
             spin_unlock(&page_scrub_lock);
             goto out;
         }
-        
-        /* Peel up to 16 pages from the list. */
-        for ( i = 0; i < 16; i++ )
-        {
-            if ( ent->next == &page_scrub_list )
-                break;
-            ent = ent->next;
-        }
-        
-        /* Remove peeled pages from the list. */
-        ent->next->prev = &page_scrub_list;
-        page_scrub_list.next = ent->next;
-        scrub_pages -= (i+1);
+
+        scrub_pages -= i;
 
         spin_unlock(&page_scrub_lock);
 
-        /* Working backwards, scrub each page in turn. */
-        while ( ent != &page_scrub_list )
-        {
-            pg = list_entry(ent, struct page_info, list);
-            ent = ent->prev;
+        /* Scrub each page in turn. */
+        while ( (pg = page_list_remove_head(&list)) ) {
             p = map_domain_page(page_to_mfn(pg));
             scrub_page(p);
             unmap_domain_page(p);
diff -r af992824b5cf -r c7cba853583d xen/drivers/char/serial.c
--- a/xen/drivers/char/serial.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/char/serial.c Fri Feb 13 11:22:28 2009 +0900
@@ -471,7 +471,7 @@ void serial_suspend(void)
     int i, irq;
     for ( i = 0; i < ARRAY_SIZE(com); i++ )
         if ( (irq = serial_irq(i)) >= 0 )
-            free_irq(irq);
+            release_irq(irq);
 }
 
 void serial_resume(void)
diff -r af992824b5cf -r c7cba853583d xen/drivers/cpufreq/cpufreq.c
--- a/xen/drivers/cpufreq/cpufreq.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/cpufreq/cpufreq.c     Fri Feb 13 11:22:28 2009 +0900
@@ -46,6 +46,9 @@
 #include <acpi/acpi.h>
 #include <acpi/cpufreq/cpufreq.h>
 
+static unsigned int usr_max_freq, usr_min_freq;
+static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy);
+
 struct cpufreq_dom {
     unsigned int       dom;
     cpumask_t          map;
@@ -53,6 +56,7 @@ struct cpufreq_dom {
 };
 static LIST_HEAD(cpufreq_dom_list_head);
 
+struct cpufreq_governor *cpufreq_opt_governor;
 LIST_HEAD(cpufreq_governor_list);
 
 struct cpufreq_governor *__find_governor(const char *governor)
@@ -213,6 +217,9 @@ int cpufreq_add_cpu(unsigned int cpu)
         perf->domain_info.num_processors) {
         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
         policy->governor = NULL;
+
+        cpufreq_cmdline_common_para(&new_policy);
+
         ret = __cpufreq_set_policy(policy, &new_policy);
         if (ret) {
             if (new_policy.governor == CPUFREQ_DEFAULT_GOVERNOR)
@@ -467,3 +474,69 @@ out:
     return ret;
 }
 
+static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy)
+{
+    if (usr_max_freq)
+        new_policy->max = usr_max_freq;
+    if (usr_min_freq)
+        new_policy->min = usr_min_freq;
+}
+
+static int __init cpufreq_handle_common_option(const char *name, const char 
*val)
+{
+    if (!strcmp(name, "maxfreq") && val) {
+        usr_max_freq = simple_strtoul(val, NULL, 0);
+        return 1;
+    }
+
+    if (!strcmp(name, "minfreq") && val) {
+        usr_min_freq = simple_strtoul(val, NULL, 0);
+        return 1;
+    }
+
+    return 0;
+}
+
+void __init cpufreq_cmdline_parse(char *str)
+{
+    static struct cpufreq_governor *__initdata cpufreq_governors[] =
+    {
+        &cpufreq_gov_userspace,
+        &cpufreq_gov_dbs,
+        &cpufreq_gov_performance,
+        &cpufreq_gov_powersave
+    };
+    unsigned int gov_index = 0;
+
+    do {
+        char *val, *end = strchr(str, ',');
+        unsigned int i;
+
+        if (end)
+            *end++ = '\0';
+        val = strchr(str, '=');
+        if (val)
+            *val++ = '\0';
+
+        if (!cpufreq_opt_governor) {
+            if (!val) {
+                for (i = 0; i < ARRAY_SIZE(cpufreq_governors); ++i) {
+                    if (!strcmp(str, cpufreq_governors[i]->name)) {
+                        cpufreq_opt_governor = cpufreq_governors[i];
+                        gov_index = i;
+                        str = NULL;
+                        break;
+                    }
+                }
+            } else {
+                cpufreq_opt_governor = CPUFREQ_DEFAULT_GOVERNOR;
+            }
+        }
+
+        if (str && !cpufreq_handle_common_option(str, val) &&
+            cpufreq_governors[gov_index]->handle_option)
+            cpufreq_governors[gov_index]->handle_option(str, val);
+
+        str = end;
+    } while (str);
+}
diff -r af992824b5cf -r c7cba853583d 
xen/drivers/cpufreq/cpufreq_misc_governors.c
--- a/xen/drivers/cpufreq/cpufreq_misc_governors.c      Fri Feb 13 10:56:01 
2009 +0900
+++ b/xen/drivers/cpufreq/cpufreq_misc_governors.c      Fri Feb 13 11:22:28 
2009 +0900
@@ -18,6 +18,7 @@
 #include <xen/sched.h>
 #include <acpi/cpufreq/cpufreq.h>
 
+static unsigned int usr_speed;
 
 /*
  * cpufreq userspace governor
@@ -26,6 +27,7 @@ static int cpufreq_governor_userspace(st
                                       unsigned int event)
 {
     int ret = 0;
+    unsigned int freq;
 
     if (!policy)
         return -EINVAL;
@@ -35,12 +37,17 @@ static int cpufreq_governor_userspace(st
     case CPUFREQ_GOV_STOP:
         break;
     case CPUFREQ_GOV_LIMITS:
-        if (policy->max < policy->cur)
+        freq = usr_speed ? : policy->cur;
+        if (policy->max < freq)
             ret = __cpufreq_driver_target(policy, policy->max,
                         CPUFREQ_RELATION_H);
-        else if (policy->min > policy->cur)
+        else if (policy->min > freq)
             ret = __cpufreq_driver_target(policy, policy->min,
                         CPUFREQ_RELATION_L);
+        else if (usr_speed)
+            ret = __cpufreq_driver_target(policy, freq,
+                        CPUFREQ_RELATION_L);
+
         break;
     default:
         ret = -EINVAL;
@@ -50,9 +57,17 @@ static int cpufreq_governor_userspace(st
     return ret;
 }
 
+static void __init 
+cpufreq_userspace_handle_option(const char *name, const char *val)
+{
+    if (!strcmp(name, "speed") && val)
+        usr_speed = simple_strtoul(val, NULL, 0);
+}
+
 struct cpufreq_governor cpufreq_gov_userspace = {
     .name = "userspace",
     .governor = cpufreq_governor_userspace,
+    .handle_option = cpufreq_userspace_handle_option
 };
 
 static int __init cpufreq_gov_userspace_init(void)
@@ -61,7 +76,7 @@ static int __init cpufreq_gov_userspace_
 }
 __initcall(cpufreq_gov_userspace_init);
 
-static void cpufreq_gov_userspace_exit(void)
+static void __exit cpufreq_gov_userspace_exit(void)
 {
     cpufreq_unregister_governor(&cpufreq_gov_userspace);
 }
@@ -106,7 +121,7 @@ static int __init cpufreq_gov_performanc
 }
 __initcall(cpufreq_gov_performance_init);
 
-static void cpufreq_gov_performance_exit(void)
+static void __exit cpufreq_gov_performance_exit(void)
 {
     cpufreq_unregister_governor(&cpufreq_gov_performance);
 }
@@ -151,7 +166,7 @@ static int __init cpufreq_gov_powersave_
 }
 __initcall(cpufreq_gov_powersave_init);
 
-static void cpufreq_gov_powersave_exit(void)
+static void __exit cpufreq_gov_powersave_exit(void)
 {
     cpufreq_unregister_governor(&cpufreq_gov_powersave);
 }
diff -r af992824b5cf -r c7cba853583d xen/drivers/cpufreq/cpufreq_ondemand.c
--- a/xen/drivers/cpufreq/cpufreq_ondemand.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/cpufreq/cpufreq_ondemand.c    Fri Feb 13 11:22:28 2009 +0900
@@ -281,9 +281,50 @@ int cpufreq_governor_dbs(struct cpufreq_
     return 0;
 }
 
+static void __init cpufreq_dbs_handle_option(const char *name, const char *val)
+{
+    if ( !strcmp(name, "rate") && val )
+    {
+        usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
+    }
+    else if ( !strcmp(name, "up_threshold") && val )
+    {
+        unsigned long tmp = simple_strtoul(val, NULL, 0);
+
+        if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
+        {
+            printk(XENLOG_WARNING "cpufreq/ondemand: "
+                   "specified threshold too low, using %d\n",
+                   MIN_FREQUENCY_UP_THRESHOLD);
+            tmp = MIN_FREQUENCY_UP_THRESHOLD;
+        }
+        else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
+        {
+            printk(XENLOG_WARNING "cpufreq/ondemand: "
+                   "specified threshold too high, using %d\n",
+                   MAX_FREQUENCY_UP_THRESHOLD);
+            tmp = MAX_FREQUENCY_UP_THRESHOLD;
+        }
+        dbs_tuners_ins.up_threshold = tmp;
+    }
+    else if ( !strcmp(name, "bias") && val )
+    {
+        unsigned long tmp = simple_strtoul(val, NULL, 0);
+
+        if ( tmp > 1000 )
+        {
+            printk(XENLOG_WARNING "cpufreq/ondemand: "
+                   "specified bias too high, using 1000\n");
+            tmp = 1000;
+        }
+        dbs_tuners_ins.powersave_bias = tmp;
+    }
+}
+
 struct cpufreq_governor cpufreq_gov_dbs = {
     .name = "ondemand",
     .governor = cpufreq_governor_dbs,
+    .handle_option = cpufreq_dbs_handle_option
 };
 
 static int __init cpufreq_gov_dbs_init(void)
@@ -292,60 +333,8 @@ static int __init cpufreq_gov_dbs_init(v
 }
 __initcall(cpufreq_gov_dbs_init);
 
-static void cpufreq_gov_dbs_exit(void)
+static void __exit cpufreq_gov_dbs_exit(void)
 {
     cpufreq_unregister_governor(&cpufreq_gov_dbs);
 }
 __exitcall(cpufreq_gov_dbs_exit);
-
-void __init cpufreq_cmdline_parse(char *str)
-{
-    do {
-        char *val, *end = strchr(str, ',');
-
-        if ( end )
-            *end++ = '\0';
-        val = strchr(str, '=');
-        if ( val )
-            *val++ = '\0';
-
-        if ( !strcmp(str, "rate") && val )
-        {
-            usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
-        }
-        else if ( !strcmp(str, "threshold") && val )
-        {
-            unsigned long tmp = simple_strtoul(val, NULL, 0);
-
-            if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
-            {
-                printk(XENLOG_WARNING "cpufreq/ondemand: "
-                       "specified threshold too low, using %d\n",
-                       MIN_FREQUENCY_UP_THRESHOLD);
-                tmp = MIN_FREQUENCY_UP_THRESHOLD;
-            }
-            else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
-            {
-                printk(XENLOG_WARNING "cpufreq/ondemand: "
-                       "specified threshold too high, using %d\n",
-                       MAX_FREQUENCY_UP_THRESHOLD);
-                tmp = MAX_FREQUENCY_UP_THRESHOLD;
-            }
-            dbs_tuners_ins.up_threshold = tmp;
-        }
-        else if ( !strcmp(str, "bias") && val )
-        {
-            unsigned long tmp = simple_strtoul(val, NULL, 0);
-
-            if ( tmp > 1000 )
-            {
-                printk(XENLOG_WARNING "cpufreq/ondemand: "
-                       "specified bias too high, using 1000\n");
-                tmp = 1000;
-            }
-            dbs_tuners_ins.powersave_bias = tmp;
-        }
-
-        str = end;
-    } while ( str );
-}
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/amd/iommu_init.c  Fri Feb 13 11:22:28 2009 +0900
@@ -479,26 +479,27 @@ static int set_iommu_interrupt_handler(s
 {
     int vector, ret;
 
-    vector = assign_irq_vector(AUTO_ASSIGN);
-    vector_to_iommu[vector] = iommu;
-
-    /* make irq == vector */
-    irq_vector[vector] = vector;
-    vector_irq[vector] = vector;
-
-    if ( !vector )
-    {
-        amd_iov_error("no vectors\n");
+    vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
+    if ( vector <= 0 )
+    {
+        gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
         return 0;
     }
 
     irq_desc[vector].handler = &iommu_msi_type;
-    ret = request_irq(vector, amd_iommu_page_fault, 0, "amd_iommu", iommu);
+    ret = request_irq_vector(vector, amd_iommu_page_fault, 0,
+                             "amd_iommu", iommu);
     if ( ret )
     {
+        irq_desc[vector].handler = &no_irq_type;
+        free_irq_vector(vector);
         amd_iov_error("can't request irq\n");
         return 0;
     }
+
+    /* Make sure that vector is never re-used. */
+    vector_irq[vector] = NEVER_ASSIGN_IRQ;
+    vector_to_iommu[vector] = iommu;
     iommu->vector = vector;
     return vector;
 }
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/amd/iommu_map.c   Fri Feb 13 11:22:28 2009 +0900
@@ -461,8 +461,8 @@ int amd_iommu_map_page(struct domain *d,
     iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
     if ( iommu_l2e == 0 )
     {
+        spin_unlock_irqrestore(&hd->mapping_lock, flags);
         amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
-        spin_unlock_irqrestore(&hd->mapping_lock, flags);
         return -EFAULT;
     }
     set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir);
@@ -493,8 +493,8 @@ int amd_iommu_unmap_page(struct domain *
 
     if ( iommu_l2e == 0 )
     {
+        spin_unlock_irqrestore(&hd->mapping_lock, flags);
         amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
-        spin_unlock_irqrestore(&hd->mapping_lock, flags);
         return -EFAULT;
     }
 
@@ -533,9 +533,9 @@ int amd_iommu_reserve_domain_unity_map(
 
         if ( iommu_l2e == 0 )
         {
-            amd_iov_error(
-            "Invalid IO pagetable entry phys_addr = %lx\n", phys_addr);
             spin_unlock_irqrestore(&hd->mapping_lock, flags);
+            amd_iov_error("Invalid IO pagetable entry phys_addr = %lx\n",
+                          phys_addr);
             return -EFAULT;
         }
 
@@ -552,7 +552,6 @@ int amd_iommu_sync_p2m(struct domain *d)
 {
     unsigned long mfn, gfn, flags;
     u64 iommu_l2e;
-    struct list_head *entry;
     struct page_info *page;
     struct hvm_iommu *hd;
     int iw = IOMMU_IO_WRITE_ENABLED;
@@ -568,10 +567,10 @@ int amd_iommu_sync_p2m(struct domain *d)
     if ( hd->p2m_synchronized )
         goto out;
 
-    for ( entry = d->page_list.next; entry != &d->page_list;
-            entry = entry->next )
-    {
-        page = list_entry(entry, struct page_info, list);
+    spin_lock(&d->page_alloc_lock);
+
+    page_list_for_each ( page, &d->page_list )
+    {
         mfn = page_to_mfn(page);
         gfn = get_gpfn_from_mfn(mfn);
 
@@ -582,13 +581,16 @@ int amd_iommu_sync_p2m(struct domain *d)
 
         if ( iommu_l2e == 0 )
         {
+            spin_unlock(&d->page_alloc_lock);
+            spin_unlock_irqrestore(&hd->mapping_lock, flags);
             amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
-            spin_unlock_irqrestore(&hd->mapping_lock, flags);
             return -EFAULT;
         }
 
         set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir);
     }
+
+    spin_unlock(&d->page_alloc_lock);
 
     hd->p2m_synchronized = 1;
 
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Fri Feb 13 10:56:01 
2009 +0900
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Fri Feb 13 11:22:28 
2009 +0900
@@ -23,7 +23,6 @@
 #include <xen/pci_regs.h>
 #include <asm/amd-iommu.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
-#include <asm/mm.h>
 
 extern unsigned short ivrs_bdf_entries;
 extern struct ivrs_mappings *ivrs_mappings;
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/io.c      Fri Feb 13 11:22:28 2009 +0900
@@ -87,8 +87,8 @@ int pt_irq_create_bind_vtd(
 
         if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
         {
+            spin_unlock(&d->event_lock);
             xfree(hvm_irq_dpci);
-            spin_unlock(&d->event_lock);
             return -EINVAL;
         }
     }
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/iommu.c   Fri Feb 13 11:22:28 2009 +0900
@@ -33,6 +33,8 @@ int amd_iov_detect(void);
  *   no-pv                      Disable IOMMU for PV domains (default)
  *   force|required             Don't boot unless IOMMU is enabled
  *   passthrough                Bypass VT-d translation for Dom0
+ *   snoop                      Utilize the snoop control for IOMMU (default)
+ *   no-snoop                   Dont utilize the snoop control for IOMMU
  */
 custom_param("iommu", parse_iommu_param);
 int iommu_enabled = 0;
@@ -45,6 +47,7 @@ static void __init parse_iommu_param(cha
 {
     char *ss;
     iommu_enabled = 1;
+    iommu_snoop = 1;
 
     do {
         ss = strchr(s, ',');
@@ -62,6 +65,10 @@ static void __init parse_iommu_param(cha
             force_iommu = 1;
         else if ( !strcmp(s, "passthrough") )
             iommu_passthrough = 1;
+        else if ( !strcmp(s, "snoop") )
+            iommu_snoop = 1;
+        else if ( !strcmp(s, "no-snoop") )
+            iommu_snoop = 0;
 
         s = ss + 1;
     } while ( ss );
@@ -141,7 +148,7 @@ static int iommu_populate_page_table(str
 
     spin_lock(&d->page_alloc_lock);
 
-    list_for_each_entry ( page, &d->page_list, list )
+    page_list_for_each ( page, &d->page_list )
     {
         if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
         {
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/dmar.c        Fri Feb 13 11:22:28 2009 +0900
@@ -21,6 +21,7 @@
 
 #include <xen/init.h>
 #include <xen/bitmap.h>
+#include <xen/errno.h>
 #include <xen/kernel.h>
 #include <xen/acpi.h>
 #include <xen/mm.h>
@@ -518,8 +519,6 @@ int acpi_dmar_init(void)
 int acpi_dmar_init(void)
 {
     int rc;
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
 
     rc = -ENODEV;
     if ( force_iommu )
@@ -536,20 +535,7 @@ int acpi_dmar_init(void)
     if ( list_empty(&acpi_drhd_units) )
         goto fail;
 
-    /* Giving that all devices within guest use same io page table,
-     * enable snoop control only if all VT-d engines support it.
-     */
-    iommu_snoop = 1;
-    for_each_drhd_unit ( drhd )
-    {
-        iommu = drhd->iommu;
-        if ( !ecap_snp_ctl(iommu->ecap) ) {
-            iommu_snoop = 0;
-            break;
-        }
-    }
-
-    printk("Intel VT-d has been enabled, snoop_control=%d.\n", iommu_snoop);
+    printk("Intel VT-d has been enabled\n");
 
     return 0;
 
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/ia64/vtd.c
--- a/xen/drivers/passthrough/vtd/ia64/vtd.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/ia64/vtd.c    Fri Feb 13 11:22:28 2009 +0900
@@ -29,7 +29,9 @@
 #include "../vtd.h"
 
 
-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+int vector_irq[NR_VECTORS] __read_mostly = {
+    [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ
+};
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
 u8 irq_vector[NR_IRQS] __read_mostly;
 
@@ -45,18 +47,19 @@ void unmap_vtd_domain_page(void *va)
 }
 
 /* Allocate page table, return its machine address */
-u64 alloc_pgtable_maddr(struct domain *d)
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages)
 {
     struct page_info *pg;
     u64 *vaddr;
 
-    pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0);
+    pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
+                             d ? MEMF_node(domain_to_node(d)) : 0);
     vaddr = map_domain_page(page_to_mfn(pg));
     if ( !vaddr )
         return 0;
-    memset(vaddr, 0, PAGE_SIZE);
+    memset(vaddr, 0, PAGE_SIZE * npages);
 
-    iommu_flush_cache_page(vaddr);
+    iommu_flush_cache_page(vaddr, npages);
     unmap_domain_page(vaddr);
 
     return page_to_maddr(pg);
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/intremap.c    Fri Feb 13 11:22:28 2009 +0900
@@ -502,7 +502,7 @@ int intremap_setup(struct iommu *iommu)
     ir_ctrl = iommu_ir_ctrl(iommu);
     if ( ir_ctrl->iremap_maddr == 0 )
     {
-        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL);
+        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL, 1);
         if ( ir_ctrl->iremap_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c       Fri Feb 13 11:22:28 2009 +0900
@@ -129,9 +129,9 @@ void iommu_flush_cache_entry(void *addr)
     __iommu_flush_cache(addr, 8);
 }
 
-void iommu_flush_cache_page(void *addr)
-{
-    __iommu_flush_cache(addr, PAGE_SIZE_4K);
+void iommu_flush_cache_page(void *addr, unsigned long npages)
+{
+    __iommu_flush_cache(addr, PAGE_SIZE_4K * npages);
 }
 
 int nr_iommus;
@@ -146,7 +146,7 @@ static u64 bus_to_context_maddr(struct i
     root = &root_entries[bus];
     if ( !root_present(*root) )
     {
-        maddr = alloc_pgtable_maddr(NULL);
+        maddr = alloc_pgtable_maddr(NULL, 1);
         if ( maddr == 0 )
         {
             unmap_vtd_domain_page(root_entries);
@@ -174,7 +174,7 @@ static u64 addr_to_dma_page_maddr(struct
     addr &= (((u64)1) << addr_width) - 1;
     ASSERT(spin_is_locked(&hd->mapping_lock));
     if ( hd->pgd_maddr == 0 )
-        if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain)) == 0) )
+        if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain, 1)) == 0) 
)
             goto out;
 
     parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
@@ -187,7 +187,7 @@ static u64 addr_to_dma_page_maddr(struct
         {
             if ( !alloc )
                 break;
-            maddr = alloc_pgtable_maddr(domain);
+            maddr = alloc_pgtable_maddr(domain, 1);
             if ( !maddr )
                 break;
             dma_set_pte_addr(*pte, maddr);
@@ -577,7 +577,7 @@ static int iommu_set_root_entry(struct i
     spin_lock(&iommu->lock);
 
     if ( iommu->root_maddr == 0 )
-        iommu->root_maddr = alloc_pgtable_maddr(NULL);
+        iommu->root_maddr = alloc_pgtable_maddr(NULL, 1);
     if ( iommu->root_maddr == 0 )
     {
         spin_unlock(&iommu->lock);
@@ -874,23 +874,27 @@ int iommu_set_interrupt(struct iommu *io
 {
     int vector, ret;
 
-    vector = assign_irq_vector(AUTO_ASSIGN);
-    vector_to_iommu[vector] = iommu;
-
-    /* VT-d fault is a MSI, make irq == vector */
-    irq_vector[vector] = vector;
-    vector_irq[vector] = vector;
-
-    if ( !vector )
+    vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
+    if ( vector <= 0 )
     {
         gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
         return -EINVAL;
     }
 
     irq_desc[vector].handler = &dma_msi_type;
-    ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
+    ret = request_irq_vector(vector, iommu_page_fault, 0, "dmar", iommu);
     if ( ret )
+    {
+        irq_desc[vector].handler = &no_irq_type;
+        free_irq_vector(vector);
         gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
+        return ret;
+    }
+
+    /* Make sure that vector is never re-used. */
+    vector_irq[vector] = NEVER_ASSIGN_IRQ;
+    vector_to_iommu[vector] = iommu;
+
     return vector;
 }
 
@@ -966,7 +970,7 @@ static void iommu_free(struct acpi_drhd_
         iounmap(iommu->reg);
 
     free_intel_iommu(iommu->intel);
-    free_irq(iommu->vector);
+    release_irq_vector(iommu->vector);
     xfree(iommu);
 
     drhd->iommu = NULL;
@@ -1677,6 +1681,11 @@ static int init_vtd_hw(void)
         }
 
         vector = iommu_set_interrupt(iommu);
+        if ( vector < 0 )
+        {
+            gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n");
+            return vector;
+        }
         dma_msi_data_init(iommu, vector);
         dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
         iommu->vector = vector;
@@ -1756,6 +1765,23 @@ int intel_vtd_setup(void)
     if ( init_vtd_hw() )
         goto error;
 
+    /* Giving that all devices within guest use same io page table,
+     * enable snoop control only if all VT-d engines support it.
+     */
+
+    if ( iommu_snoop )
+    {
+        for_each_drhd_unit ( drhd )
+        {
+            iommu = drhd->iommu;
+            if ( !ecap_snp_ctl(iommu->ecap) ) {
+                iommu_snoop = 0;
+                break;
+            }
+        }
+    }
+    
+    printk("Intel VT-d snoop control %sabled\n", iommu_snoop ? "en" : "dis");
     register_keyhandler('V', dump_iommu_info, "dump iommu info");
 
     return 0;
@@ -1764,6 +1790,7 @@ int intel_vtd_setup(void)
     for_each_drhd_unit ( drhd )
         iommu_free(drhd);
     vtd_enabled = 0;
+    iommu_snoop = 0;
     return -ENOMEM;
 }
 
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.h       Fri Feb 13 11:22:28 2009 +0900
@@ -397,7 +397,9 @@ struct poll_info {
     u32 udata;
 };
 
-#define QINVAL_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct qinval_entry))
+#define MAX_QINVAL_PAGES 8
+#define NUM_QINVAL_PAGES 1
+#define QINVAL_ENTRY_NR (PAGE_SIZE_4K*NUM_QINVAL_PAGES/sizeof(struct 
qinval_entry))
 #define qinval_present(v) ((v).lo & 1)
 #define qinval_fault_disable(v) (((v).lo >> 1) & 1)
 
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/qinval.c      Fri Feb 13 11:22:28 2009 +0900
@@ -427,7 +427,7 @@ int qinval_setup(struct iommu *iommu)
 
     if ( qi_ctrl->qinval_maddr == 0 )
     {
-        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL);
+        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL, NUM_QINVAL_PAGES);
         if ( qi_ctrl->qinval_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
@@ -445,6 +445,8 @@ int qinval_setup(struct iommu *iommu)
      * registers are automatically reset to 0 with write
      * to IQA register.
      */
+    if ( NUM_QINVAL_PAGES <= MAX_QINVAL_PAGES )
+        qi_ctrl->qinval_maddr |= NUM_QINVAL_PAGES - 1;
     dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
 
     /* enable queued invalidation hardware */
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/vtd.h
--- a/xen/drivers/passthrough/vtd/vtd.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/vtd.h Fri Feb 13 11:22:28 2009 +0900
@@ -101,12 +101,12 @@ void cacheline_flush(char *);
 void cacheline_flush(char *);
 void flush_all_cache(void);
 void *map_to_nocache_virt(int nr_iommus, u64 maddr);
-u64 alloc_pgtable_maddr(struct domain *d);
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages);
 void free_pgtable_maddr(u64 maddr);
 void *map_vtd_domain_page(u64 maddr);
 void unmap_vtd_domain_page(void *va);
 
 void iommu_flush_cache_entry(void *addr);
-void iommu_flush_cache_page(void *addr);
+void iommu_flush_cache_page(void *addr, unsigned long npages);
 
 #endif // _VTD_H_
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c     Fri Feb 13 11:22:28 2009 +0900
@@ -38,20 +38,21 @@ void unmap_vtd_domain_page(void *va)
 }
 
 /* Allocate page table, return its machine address */
-u64 alloc_pgtable_maddr(struct domain *d)
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages)
 {
     struct page_info *pg;
     u64 *vaddr;
     unsigned long mfn;
 
-    pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0);
+    pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
+                             d ? MEMF_node(domain_to_node(d)) : 0);
     if ( !pg )
         return 0;
     mfn = page_to_mfn(pg);
     vaddr = map_domain_page(mfn);
-    memset(vaddr, 0, PAGE_SIZE);
+    memset(vaddr, 0, PAGE_SIZE * npages);
 
-    iommu_flush_cache_page(vaddr);
+    iommu_flush_cache_page(vaddr, npages);
     unmap_domain_page(vaddr);
 
     return (u64)mfn << PAGE_SHIFT_4K;
diff -r af992824b5cf -r c7cba853583d xen/include/acpi/cpufreq/cpufreq.h
--- a/xen/include/acpi/cpufreq/cpufreq.h        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/acpi/cpufreq/cpufreq.h        Fri Feb 13 11:22:28 2009 +0900
@@ -87,6 +87,7 @@ struct cpufreq_governor {
     char    name[CPUFREQ_NAME_LEN];
     int     (*governor)(struct cpufreq_policy *policy,
                         unsigned int event);
+    void    (*handle_option)(const char *name, const char *value);
     struct list_head governor_list;
 };
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hardirq.h
--- a/xen/include/asm-ia64/hardirq.h    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/hardirq.h    Fri Feb 13 11:22:28 2009 +0900
@@ -4,6 +4,7 @@
 #define __ARCH_IRQ_STAT        1
 #define HARDIRQ_BITS   14
 #include <linux/hardirq.h>
+#include <xen/sched.h>
 
 #define local_softirq_pending()                
(local_cpu_data->softirq_pending)
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hvm/iommu.h
--- a/xen/include/asm-ia64/hvm/iommu.h  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/hvm/iommu.h  Fri Feb 13 11:22:28 2009 +0900
@@ -28,7 +28,6 @@ static inline void pci_cleanup_msi(struc
     /* TODO */
 }
 
-#define AUTO_ASSIGN         -1
 
 extern int assign_irq_vector (int irq);
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hvm/irq.h
--- a/xen/include/asm-ia64/hvm/irq.h    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/hvm/irq.h    Fri Feb 13 11:22:28 2009 +0900
@@ -90,13 +90,17 @@ struct hvm_irq {
 #define hvm_pci_intx_link(dev, intx) \
     (((dev) + (intx)) & 3)
 
-/* Extract the IA-64 vector that corresponds to IRQ.  */
-static inline int
-irq_to_vector (int irq)
+#define IA64_INVALID_VECTOR    ((unsigned int)((int)-1))
+static inline unsigned int irq_to_vector(int irq)
 {
-    return irq;
+    int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
+    unsigned int vector;
+
+    if ( acpi_gsi_to_irq(irq, &vector) < 0)
+        return 0;
+
+    return vector;
 }
-
 
 extern u8 irq_vector[NR_IRQS];
 extern int vector_irq[NR_VECTORS];
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/linux-xen/asm/smp.h
--- a/xen/include/asm-ia64/linux-xen/asm/smp.h  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/linux-xen/asm/smp.h  Fri Feb 13 11:22:28 2009 +0900
@@ -47,7 +47,6 @@ ia64_get_lid (void)
 #define SMP_IPI_REDIRECTION    (1 << 1)
 
 #ifdef XEN
-#include <xen/sched.h>
 #define raw_smp_processor_id() (current->processor)
 #else
 #define raw_smp_processor_id() (current_thread_info()->cpu)
diff -r af992824b5cf -r c7cba853583d 
xen/include/asm-ia64/linux-xen/linux/interrupt.h
--- a/xen/include/asm-ia64/linux-xen/linux/interrupt.h  Fri Feb 13 10:56:01 
2009 +0900
+++ b/xen/include/asm-ia64/linux-xen/linux/interrupt.h  Fri Feb 13 11:22:28 
2009 +0900
@@ -52,10 +52,10 @@ struct irqaction {
 };
 
 extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs);
-extern int request_irq(unsigned int,
+extern int request_irq_vector(unsigned int,
                       irqreturn_t (*handler)(int, void *, struct pt_regs *),
                       unsigned long, const char *, void *);
-extern void free_irq(unsigned int, void *);
+extern void release_irq_vector(unsigned int, void *);
 #endif
 
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/linux/asm/hw_irq.h
--- a/xen/include/asm-ia64/linux/asm/hw_irq.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/linux/asm/hw_irq.h   Fri Feb 13 11:22:28 2009 +0900
@@ -34,7 +34,7 @@ typedef u8 ia64_vector;
 #define IA64_MAX_VECTORED_IRQ          255
 #define IA64_NUM_VECTORS               256
 
-#define AUTO_ASSIGN                    -1
+#define AUTO_ASSIGN_IRQ                        (-1)
 
 #define IA64_SPURIOUS_INT_VECTOR       0x0f
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/mm.h Fri Feb 13 11:22:28 2009 +0900
@@ -13,7 +13,6 @@
 #include <xen/list.h>
 #include <xen/spinlock.h>
 #include <xen/perfc.h>
-#include <xen/sched.h>
 
 #include <asm/processor.h>
 #include <asm/atomic.h>
@@ -63,21 +62,14 @@ struct page_info
         struct {
             /* Order-size of the free chunk this page is the head of. */
             u32 order;
-            /* Mask of possibly-tainted TLBs. */
-            cpumask_t cpumask;
+            /* Do TLBs need flushing for safety before next page use? */
+            bool_t need_tlbflush;
         } free;
 
     } u;
 
     /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
     u32 tlbflush_timestamp;
-
-#if 0
-// following added for Linux compiling
-    page_flags_t flags;
-    atomic_t _count;
-    struct list_head lru;      // is this the same as above "list"?
-#endif
 };
 
 #define set_page_count(p,v)    atomic_set(&(p)->_count, v - 1)
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/tlbflush.h
--- a/xen/include/asm-ia64/tlbflush.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/tlbflush.h   Fri Feb 13 11:22:28 2009 +0900
@@ -1,7 +1,8 @@
 #ifndef __FLUSHTLB_H__
 #define __FLUSHTLB_H__
 
-#include <xen/sched.h>
+struct vcpu;
+struct domain;
 
 /* TLB flushes can be either local (current vcpu only) or domain wide (on
    all vcpus).
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/domain.h      Fri Feb 13 11:22:28 2009 +0900
@@ -79,11 +79,11 @@ struct shadow_domain {
     int               locker; /* processor which holds the lock */
     const char       *locker_function; /* Func that took it */
     unsigned int      opt_flags;    /* runtime tunable optimizations on/off */
-    struct list_head  pinned_shadows;
+    struct page_list_head pinned_shadows;
 
     /* Memory allocation */
-    struct list_head  freelists[SHADOW_MAX_ORDER + 1];
-    struct list_head  p2m_freelist;
+    struct page_list_head freelists[SHADOW_MAX_ORDER + 1];
+    struct page_list_head p2m_freelist;
     unsigned int      total_pages;  /* number of pages allocated */
     unsigned int      free_pages;   /* number of pages on freelists */
     unsigned int      p2m_pages;    /* number of pages allocates to p2m */
@@ -92,7 +92,7 @@ struct shadow_domain {
     pagetable_t unpaged_pagetable;
 
     /* Shadow hashtable */
-    struct shadow_page_info **hash_table;
+    struct page_info **hash_table;
     int hash_walking;  /* Some function is walking the hash table */
 
     /* Fast MMIO path heuristic */
@@ -143,7 +143,7 @@ struct hap_domain {
     int               locker;
     const char       *locker_function;
 
-    struct list_head  freelist;
+    struct page_list_head freelist;
     unsigned int      total_pages;  /* number of pages allocated */
     unsigned int      free_pages;   /* number of pages on freelists */
     unsigned int      p2m_pages;    /* number of pages allocates to p2m */
@@ -265,7 +265,7 @@ struct arch_domain
         RELMEM_l2,
         RELMEM_done,
     } relmem;
-    struct list_head relmem_list;
+    struct page_list_head relmem_list;
 
     cpuid_input_t cpuids[MAX_CPUID_INPUT];
 } __cacheline_aligned;
@@ -352,6 +352,7 @@ struct arch_vcpu
 
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
+    spinlock_t shadow_ldt_lock;
 
     struct paging_vcpu paging;
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Fri Feb 13 11:22:28 2009 +0900
@@ -48,7 +48,11 @@ typedef union {
 #define EPTE_SUPER_PAGE_MASK    0x80
 #define EPTE_MFN_MASK           0x1fffffffffff000
 #define EPTE_AVAIL1_MASK        0xF00
-#define EPTE_EMT_MASK           0x78
+#define EPTE_EMT_MASK           0x38
+#define EPTE_IGMT_MASK          0x40
+#define EPTE_AVAIL1_SHIFT       8
+#define EPTE_EMT_SHIFT          3
+#define EPTE_IGMT_SHIFT         6
 
 void vmx_asm_vmexit_handler(struct cpu_user_regs);
 void vmx_asm_do_vmentry(void);
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/iocap.h
--- a/xen/include/asm-x86/iocap.h       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/iocap.h       Fri Feb 13 11:22:28 2009 +0900
@@ -14,7 +14,8 @@
 #define ioports_access_permitted(d, s, e)               \
     rangeset_contains_range((d)->arch.ioport_caps, s, e)
 
-#define cache_flush_permitted(d)                       \
-    (!rangeset_is_empty((d)->iomem_caps))
+#define cache_flush_permitted(d)                        \
+    (!rangeset_is_empty((d)->iomem_caps) ||             \
+     !rangeset_is_empty((d)->arch.ioport_caps))
 
 #endif /* __X86_IOCAP_H__ */
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/irq.h Fri Feb 13 11:22:28 2009 +0900
@@ -19,9 +19,6 @@
 
 extern int vector_irq[NR_VECTORS];
 extern u8 irq_vector[NR_IRQS];
-#define AUTO_ASSIGN    -1
-#define NEVER_ASSIGN   -2
-#define FREE_TO_ASSIGN -3
 
 #define platform_legacy_irq(irq)       ((irq) < 16)
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/mm.h  Fri Feb 13 11:22:28 2009 +0900
@@ -12,15 +12,40 @@
  * Per-page-frame information.
  * 
  * Every architecture must ensure the following:
- *  1. 'struct page_info' contains a 'struct list_head list'.
+ *  1. 'struct page_info' contains a 'struct page_list_entry list'.
  *  2. Provide a PFN_ORDER() macro for accessing the order of a free page.
  */
-#define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
+#define PFN_ORDER(_pfn) ((_pfn)->v.free.order)
+
+/*
+ * This definition is solely for the use in struct page_info (and
+ * struct page_list_head), intended to allow easy adjustment once x86-64
+ * wants to support more than 16TB.
+ * 'unsigned long' should be used for MFNs everywhere else.
+ */
+#define __mfn_t unsigned int
+#define PRpgmfn "08x"
+
+#undef page_list_entry
+struct page_list_entry
+{
+    __mfn_t next, prev;
+};
 
 struct page_info
 {
-    /* Each frame can be threaded onto a doubly-linked list. */
-    struct list_head list;
+    union {
+        /* Each frame can be threaded onto a doubly-linked list.
+         *
+         * For unused shadow pages, a list of pages of this order; for
+         * pinnable shadows, if pinned, a list of other pinned shadows
+         * (see sh_type_is_pinnable() below for the definition of
+         * "pinnable" shadow types).
+         */
+        struct page_list_entry list;
+        /* For non-pinnable shadows, a higher entry that points at us. */
+        paddr_t up;
+    };
 
     /* Reference count and various PGC_xxx flags and fields. */
     unsigned long count_info;
@@ -30,21 +55,46 @@ struct page_info
 
         /* Page is in use: ((count_info & PGC_count_mask) != 0). */
         struct {
-            /* Owner of this page (NULL if page is anonymous). */
-            u32 _domain; /* pickled format */
             /* Type reference count and various PGT_xxx flags and fields. */
             unsigned long type_info;
         } inuse;
 
+        /* Page is in use as a shadow: count_info == 0. */
+        struct {
+            unsigned long type:5;   /* What kind of shadow is this? */
+            unsigned long pinned:1; /* Is the shadow pinned? */
+            unsigned long count:26; /* Reference count */
+        } sh;
+
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
+            /* Do TLBs need flushing for safety before next page use? */
+            bool_t need_tlbflush;
+        } free;
+
+    } u;
+
+    union {
+
+        /* Page is in use, but not as a shadow. */
+        struct {
+            /* Owner of this page (NULL if page is anonymous). */
+            u32 _domain; /* pickled format */
+        } inuse;
+
+        /* Page is in use as a shadow. */
+        struct {
+            /* GMFN of guest page we're a shadow of. */
+            __mfn_t back;
+        } sh;
+
+        /* Page is on a free list (including shadow code free lists). */
+        struct {
             /* Order-size of the free chunk this page is the head of. */
-            u32 order;
-            /* Mask of possibly-tainted TLBs. */
-            cpumask_t cpumask;
+            unsigned int order;
         } free;
 
-    } u;
+    } v;
 
     union {
         /*
@@ -95,8 +145,13 @@ struct page_info
          * tracked for TLB-flush avoidance when a guest runs in shadow mode.
          */
         u32 shadow_flags;
+
+        /* When in use as a shadow, next shadow in this hash chain. */
+        __mfn_t next_shadow;
     };
 };
+
+#undef __mfn_t
 
 #define PG_shift(idx)   (BITS_PER_LONG - (idx))
 #define PG_mask(x, idx) (x ## UL << PG_shift(idx))
@@ -155,7 +210,8 @@ struct page_info
 })
 #else
 #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
-#define is_xen_heap_mfn(mfn) is_xen_heap_page(&frame_table[mfn])
+#define is_xen_heap_mfn(mfn) \
+    (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn)))
 #endif
 
 #if defined(__i386__)
@@ -174,10 +230,10 @@ struct page_info
 #define SHADOW_OOS_FIXUPS 2
 
 #define page_get_owner(_p)                                              \
-    ((struct domain *)((_p)->u.inuse._domain ?                          \
-                       mfn_to_virt((_p)->u.inuse._domain) : NULL))
+    ((struct domain *)((_p)->v.inuse._domain ?                          \
+                       mfn_to_virt((_p)->v.inuse._domain) : NULL))
 #define page_set_owner(_p,_d)                                           \
-    ((_p)->u.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
+    ((_p)->v.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
 
 #define maddr_get_owner(ma)   (page_get_owner(maddr_to_page((ma))))
 #define vaddr_get_owner(va)   (page_get_owner(virt_to_page((va))))
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/p2m.h Fri Feb 13 11:22:28 2009 +0900
@@ -110,7 +110,7 @@ struct p2m_domain {
     const char        *locker_function; /* Func that took it */
 
     /* Pages used to construct the p2m */
-    struct list_head   pages;
+    struct page_list_head pages;
 
     /* Functions to call to get or free pages for the p2m */
     struct page_info * (*alloc_page  )(struct domain *d);
@@ -148,7 +148,7 @@ struct p2m_domain {
      *   protect moving stuff from the PoD cache to the domain page list.
      */
     struct {
-        struct list_head super,        /* List of superpages                */
+        struct page_list_head super,   /* List of superpages                */
                          single;       /* Non-super lists                   */
         int              count,        /* # of pages in cache lists         */
                          entry_count;  /* # of pages in p2m marked pod      */
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/page.h        Fri Feb 13 11:22:28 2009 +0900
@@ -220,31 +220,47 @@ void copy_page_sse2(void *, const void *
                              copy_page_sse2(_t, _f) :                   \
                              (void)memcpy(_t, _f, PAGE_SIZE))
 
-#define mfn_valid(mfn)      ((mfn) < max_page)
+#define __mfn_valid(mfn)    ((mfn) < max_page)
 
 /* Convert between Xen-heap virtual addresses and machine addresses. */
 #define __pa(x)             (virt_to_maddr(x))
 #define __va(x)             (maddr_to_virt(x))
 
 /* Convert between Xen-heap virtual addresses and machine frame numbers. */
-#define virt_to_mfn(va)     (virt_to_maddr(va) >> PAGE_SHIFT)
-#define mfn_to_virt(mfn)    (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT))
+#define __virt_to_mfn(va)   (virt_to_maddr(va) >> PAGE_SHIFT)
+#define __mfn_to_virt(mfn)  (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT))
 
 /* Convert between machine frame numbers and page-info structures. */
-#define mfn_to_page(mfn)    (frame_table + (mfn))
-#define page_to_mfn(pg)     ((unsigned long)((pg) - frame_table))
+#define __mfn_to_page(mfn)  (frame_table + (mfn))
+#define __page_to_mfn(pg)   ((unsigned long)((pg) - frame_table))
 
 /* Convert between machine addresses and page-info structures. */
-#define maddr_to_page(ma)   (frame_table + ((ma) >> PAGE_SHIFT))
-#define page_to_maddr(pg)   ((paddr_t)((pg) - frame_table) << PAGE_SHIFT)
+#define __maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT))
+#define __page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT)
 
 /* Convert between Xen-heap virtual addresses and page-info structures. */
-#define virt_to_page(va)    (frame_table + (__pa(va) >> PAGE_SHIFT))
-#define page_to_virt(pg)    (maddr_to_virt(page_to_maddr(pg)))
+#define __virt_to_page(va)  (frame_table + (__pa(va) >> PAGE_SHIFT))
+#define __page_to_virt(pg)  (maddr_to_virt(page_to_maddr(pg)))
 
 /* Convert between frame number and address formats.  */
-#define pfn_to_paddr(pfn)   ((paddr_t)(pfn) << PAGE_SHIFT)
-#define paddr_to_pfn(pa)    ((unsigned long)((pa) >> PAGE_SHIFT))
+#define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
+#define __paddr_to_pfn(pa)  ((unsigned long)((pa) >> PAGE_SHIFT))
+
+/*
+ * We define non-underscored wrappers for above conversion functions. These are
+ * overridden in various source files while underscored versions remain intact.
+ */
+#define mfn_valid(mfn)      __mfn_valid(mfn)
+#define virt_to_mfn(va)     __virt_to_mfn(va)
+#define mfn_to_virt(mfn)    __mfn_to_virt(mfn)
+#define mfn_to_page(mfn)    __mfn_to_page(mfn)
+#define page_to_mfn(pg)     __page_to_mfn(pg)
+#define maddr_to_page(ma)   __maddr_to_page(ma)
+#define page_to_maddr(pg)   __page_to_maddr(pg)
+#define virt_to_page(va)    __virt_to_page(va)
+#define page_to_virt(pg)    __page_to_virt(pg)
+#define pfn_to_paddr(pfn)   __pfn_to_paddr(pfn)
+#define paddr_to_pfn(pa)    __paddr_to_pfn(pa)
 
 #endif /* !defined(__ASSEMBLY__) */
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/perfc.h
--- a/xen/include/asm-x86/perfc.h       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/perfc.h       Fri Feb 13 11:22:28 2009 +0900
@@ -1,6 +1,5 @@
 #ifndef __ASM_PERFC_H__
 #define __ASM_PERFC_H__
-#include <asm/mm.h>
 
 static inline void arch_perfc_printall(void)
 {
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/processor.h   Fri Feb 13 11:22:28 2009 +0900
@@ -188,6 +188,7 @@ extern struct cpuinfo_x86 cpu_data[];
 #define current_cpu_data boot_cpu_data
 #endif
 
+extern u64 host_pat;
 extern int phys_proc_id[NR_CPUS];
 extern int cpu_core_id[NR_CPUS];
 
diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-ia64/hvm/save.h
--- a/xen/include/public/arch-ia64/hvm/save.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/arch-ia64/hvm/save.h   Fri Feb 13 11:22:28 2009 +0900
@@ -23,8 +23,8 @@
 #ifndef __XEN_PUBLIC_HVM_SAVE_IA64_H__
 #define __XEN_PUBLIC_HVM_SAVE_IA64_H__
 
-#include <public/hvm/save.h>
-#include <public/arch-ia64.h>
+#include "../../hvm/save.h"
+#include "../../arch-ia64.h"
 
 /* 
  * Save/restore header: general info about the save file. 
diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-x86/hvm/save.h
--- a/xen/include/public/arch-x86/hvm/save.h    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/arch-x86/hvm/save.h    Fri Feb 13 11:22:28 2009 +0900
@@ -287,7 +287,7 @@ struct hvm_hw_pci_irqs {
      * Indexed by: device*4 + INTx#.
      */
     union {
-        DECLARE_BITMAP(i, 32*4);
+        unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 
32*4); */
         uint64_t pad[2];
     };
 };
@@ -300,7 +300,7 @@ struct hvm_hw_isa_irqs {
      * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing).
      */
     union {
-        DECLARE_BITMAP(i, 16);
+        unsigned long i[1];  /* DECLARE_BITMAP(i, 16); */
         uint64_t pad[1];
     };
 };
diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-x86/xen-mca.h
--- a/xen/include/public/arch-x86/xen-mca.h     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/arch-x86/xen-mca.h     Fri Feb 13 11:22:28 2009 +0900
@@ -56,7 +56,7 @@
 /* Hypercall */
 #define __HYPERVISOR_mca __HYPERVISOR_arch_0
 
-#define XEN_MCA_INTERFACE_VERSION 0x03000001
+#define XEN_MCA_INTERFACE_VERSION 0x03000002
 
 /* IN: Dom0 calls hypercall from MC event handler. */
 #define XEN_MC_CORRECTABLE  0x0
@@ -118,7 +118,7 @@ struct mcinfo_global {
     uint16_t mc_domid;
     uint32_t mc_socketid; /* physical socket of the physical core */
     uint16_t mc_coreid; /* physical impacted core */
-    uint8_t  mc_apicid;
+    uint32_t mc_apicid;
     uint16_t mc_core_threadid; /* core thread of physical core */
     uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
     uint64_t mc_gstatus; /* global status */
@@ -175,6 +175,41 @@ struct mc_info {
 };
 typedef struct mc_info mc_info_t;
 
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS       7       /* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX        0       /* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX        1       /* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM     2       /* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX  3       /* Linux-defined */
+#define MC_CAPS_STD_ECX        4       /* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA    5       /* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX        6       /* cpuid level 0x80000001 (%ecx) */
+
+typedef struct mcinfo_logical_cpu {
+    uint32_t mc_cpunr;          
+    uint32_t mc_chipid; 
+    uint16_t mc_coreid;
+    uint16_t mc_threadid;
+    uint32_t mc_apicid;
+    uint32_t mc_clusterid;
+    uint32_t mc_ncores;
+    uint32_t mc_ncores_active;
+    uint32_t mc_nthreads;
+    int32_t mc_cpuid_level;
+    uint32_t mc_family;
+    uint32_t mc_vendor;
+    uint32_t mc_model;
+    uint32_t mc_step;
+    char mc_vendorid[16];
+    char mc_brandid[64];
+    uint32_t mc_cpu_caps[MC_NCAPS];
+    uint32_t mc_cache_size;
+    uint32_t mc_cache_alignment;
+    int32_t mc_nmsrvals;
+    struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+} xen_mc_logical_cpu_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t);
 
 
 /* 
@@ -272,6 +307,14 @@ typedef struct xen_mc_notifydomain xen_m
 typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
 DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);
 
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+       /* IN/OUT */
+       uint32_t ncpus;
+       uint32_t pad0;
+       /* OUT */
+       XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;
+};
 
 struct xen_mc {
     uint32_t cmd;
@@ -279,6 +322,7 @@ struct xen_mc {
     union {
         struct xen_mc_fetch        mc_fetch;
         struct xen_mc_notifydomain mc_notifydomain;
+        struct xen_mc_physcpuinfo  mc_physcpuinfo;
         uint8_t pad[MCINFO_HYPERCALLSIZE];
     } u;
 };
diff -r af992824b5cf -r c7cba853583d xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/domctl.h       Fri Feb 13 11:22:28 2009 +0900
@@ -630,6 +630,17 @@ typedef struct xen_domctl_debug_op xen_d
 typedef struct xen_domctl_debug_op xen_domctl_debug_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t);
 
+/*
+ * Request a particular record from the HVM context
+ */
+#define XEN_DOMCTL_gethvmcontext_partial   55
+typedef struct xen_domctl_hvmcontext_partial {
+    uint32_t type;                      /* IN: Type of record required */
+    uint32_t instance;                  /* IN: Instance of that type */
+    XEN_GUEST_HANDLE_64(uint8) buffer;  /* OUT: buffer to write record into */
+} xen_domctl_hvmcontext_partial_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t);
+
 
 struct xen_domctl {
     uint32_t cmd;
@@ -658,6 +669,7 @@ struct xen_domctl {
         struct xen_domctl_settimeoffset     settimeoffset;
         struct xen_domctl_real_mode_area    real_mode_area;
         struct xen_domctl_hvmcontext        hvmcontext;
+        struct xen_domctl_hvmcontext_partial hvmcontext_partial;
         struct xen_domctl_address_size      address_size;
         struct xen_domctl_sendtrigger       sendtrigger;
         struct xen_domctl_get_device_group  get_device_group;
diff -r af992824b5cf -r c7cba853583d xen/include/public/io/pciif.h
--- a/xen/include/public/io/pciif.h     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/io/pciif.h     Fri Feb 13 11:22:28 2009 +0900
@@ -29,7 +29,7 @@
 
 /* xen_pci_sharedinfo flags */
 #define _XEN_PCIF_active     (0)
-#define XEN_PCIF_active      (1<<_XEN_PCI_active)
+#define XEN_PCIF_active      (1<<_XEN_PCIF_active)
 #define _XEN_PCIB_AERHANDLER (1)
 #define XEN_PCIB_AERHANDLER  (1<<_XEN_PCIB_AERHANDLER)
 #define _XEN_PCIB_active     (2)
diff -r af992824b5cf -r c7cba853583d xen/include/xen/hvm/save.h
--- a/xen/include/xen/hvm/save.h        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/hvm/save.h        Fri Feb 13 11:22:28 2009 +0900
@@ -152,6 +152,8 @@ __initcall(__hvm_register_##_x##_save_an
 /* Entry points for saving and restoring HVM domain state */
 size_t hvm_save_size(struct domain *d);
 int hvm_save(struct domain *d, hvm_domain_context_t *h);
+int hvm_save_one(struct domain *d,  uint16_t typecode, uint16_t instance, 
+                 XEN_GUEST_HANDLE_64(uint8) handle);
 int hvm_load(struct domain *d, hvm_domain_context_t *h);
 
 /* Arch-specific definitions. */
diff -r af992824b5cf -r c7cba853583d xen/include/xen/iocap.h
--- a/xen/include/xen/iocap.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/iocap.h   Fri Feb 13 11:22:28 2009 +0900
@@ -29,6 +29,7 @@
     rangeset_contains_singleton((d)->irq_caps, i)
 
 #define multipage_allocation_permitted(d)               \
-    (!rangeset_is_empty((d)->iomem_caps))
+    (!rangeset_is_empty((d)->iomem_caps) ||             \
+     !rangeset_is_empty((d)->arch.ioport_caps))
 
 #endif /* __XEN_IOCAP_H__ */
diff -r af992824b5cf -r c7cba853583d xen/include/xen/irq.h
--- a/xen/include/xen/irq.h     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/irq.h     Fri Feb 13 11:22:28 2009 +0900
@@ -24,6 +24,11 @@ struct irqaction
 #define IRQ_GUEST       16      /* IRQ is handled by guest OS(es) */
 #define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
 #define IRQ_PER_CPU     256     /* IRQ is per CPU */
+
+/* Special IRQ numbers. */
+#define AUTO_ASSIGN_IRQ         (-1)
+#define NEVER_ASSIGN_IRQ        (-2)
+#define FREE_TO_ASSIGN_IRQ      (-3)
 
 /*
  * Interrupt controller descriptor. This is all we need
@@ -64,11 +69,20 @@ typedef struct {
 
 extern irq_desc_t irq_desc[NR_VECTORS];
 
-extern int setup_irq(unsigned int, struct irqaction *);
-extern void free_irq(unsigned int);
-extern int request_irq(unsigned int irq,
+extern int setup_irq_vector(unsigned int, struct irqaction *);
+extern void release_irq_vector(unsigned int);
+extern int request_irq_vector(unsigned int vector,
                void (*handler)(int, void *, struct cpu_user_regs *),
                unsigned long irqflags, const char * devname, void *dev_id);
+
+#define setup_irq(irq, action) \
+    setup_irq_vector(irq_to_vector(irq), action)
+
+#define release_irq(irq) \
+    release_irq_vector(irq_to_vector(irq))
+
+#define request_irq(irq, handler, irqflags, devname, devid) \
+    request_irq_vector(irq_to_vector(irq), handler, irqflags, defname, devid)
 
 extern hw_irq_controller no_irq_type;
 extern void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs);
diff -r af992824b5cf -r c7cba853583d xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/mm.h      Fri Feb 13 11:22:28 2009 +0900
@@ -85,22 +85,192 @@ int assign_pages(
 #define MAX_ORDER 20 /* 2^20 contiguous pages */
 #endif
 
+#define page_list_entry list_head
+
+#include <asm/mm.h>
+
+#ifndef page_list_entry
+struct page_list_head
+{
+    struct page_info *next, *tail;
+};
+/* These must only have instances in struct page_info. */
+# define page_list_entry
+
+# define PAGE_LIST_HEAD_INIT(name) { NULL, NULL }
+# define PAGE_LIST_HEAD(name) \
+    struct page_list_head name = PAGE_LIST_HEAD_INIT(name)
+# define INIT_PAGE_LIST_HEAD(head) ((head)->tail = (head)->next = NULL)
+# define INIT_PAGE_LIST_ENTRY(ent) ((ent)->prev = (ent)->next = ~0)
+
+static inline int
+page_list_empty(const struct page_list_head *head)
+{
+    return !head->next;
+}
+static inline struct page_info *
+page_list_first(const struct page_list_head *head)
+{
+    return head->next;
+}
+static inline struct page_info *
+page_list_next(const struct page_info *page,
+               const struct page_list_head *head)
+{
+    return page != head->tail ? mfn_to_page(page->list.next) : NULL;
+}
+static inline struct page_info *
+page_list_prev(const struct page_info *page,
+               const struct page_list_head *head)
+{
+    return page != head->next ? mfn_to_page(page->list.prev) : NULL;
+}
+static inline void
+page_list_add(struct page_info *page, struct page_list_head *head)
+{
+    if ( head->next )
+    {
+        page->list.next = page_to_mfn(head->next);
+        head->next->list.prev = page_to_mfn(page);
+    }
+    else
+    {
+        head->tail = page;
+        page->list.next = ~0;
+    }
+    page->list.prev = ~0;
+    head->next = page;
+}
+static inline void
+page_list_add_tail(struct page_info *page, struct page_list_head *head)
+{
+    page->list.next = ~0;
+    if ( head->next )
+    {
+        page->list.prev = page_to_mfn(head->tail);
+        head->tail->list.next = page_to_mfn(page);
+    }
+    else
+    {
+        page->list.prev = ~0;
+        head->next = page;
+    }
+    head->tail = page;
+}
+static inline bool_t
+__page_list_del_head(struct page_info *page, struct page_list_head *head,
+                     struct page_info *next, struct page_info *prev)
+{
+    if ( head->next == page )
+    {
+        if ( head->tail != page )
+        {
+            next->list.prev = ~0;
+            head->next = next;
+        }
+        else
+            head->tail = head->next = NULL;
+        return 1;
+    }
+
+    if ( head->tail == page )
+    {
+        prev->list.next = ~0;
+        head->tail = prev;
+        return 1;
+    }
+
+    return 0;
+}
+static inline void
+page_list_del(struct page_info *page, struct page_list_head *head)
+{
+    struct page_info *next = mfn_to_page(page->list.next);
+    struct page_info *prev = mfn_to_page(page->list.prev);
+
+    if ( !__page_list_del_head(page, head, next, prev) )
+    {
+        next->list.prev = page->list.prev;
+        prev->list.next = page->list.next;
+    }
+}
+static inline void
+page_list_del2(struct page_info *page, struct page_list_head *head1,
+               struct page_list_head *head2)
+{
+    struct page_info *next = mfn_to_page(page->list.next);
+    struct page_info *prev = mfn_to_page(page->list.prev);
+
+    if ( !__page_list_del_head(page, head1, next, prev) &&
+         !__page_list_del_head(page, head2, next, prev) )
+    {
+        next->list.prev = page->list.prev;
+        prev->list.next = page->list.next;
+    }
+}
+static inline struct page_info *
+page_list_remove_head(struct page_list_head *head)
+{
+    struct page_info *page = head->next;
+
+    if ( page )
+        page_list_del(page, head);
+
+    return page;
+}
+
+#define page_list_for_each(pos, head) \
+    for ( pos = (head)->next; pos; pos = page_list_next(pos, head) )
+#define page_list_for_each_safe(pos, tmp, head) \
+    for ( pos = (head)->next; \
+          pos ? (tmp = page_list_next(pos, head), 1) : 0; \
+          pos = tmp )
+#define page_list_for_each_safe_reverse(pos, tmp, head) \
+    for ( pos = (head)->tail; \
+          pos ? (tmp = page_list_prev(pos, head), 1) : 0; \
+          pos = tmp )
+#else
+# define page_list_head                  list_head
+# define PAGE_LIST_HEAD_INIT             LIST_HEAD_INIT
+# define PAGE_LIST_HEAD                  LIST_HEAD
+# define INIT_PAGE_LIST_HEAD             INIT_LIST_HEAD
+# define INIT_PAGE_LIST_ENTRY            INIT_LIST_HEAD
+# define page_list_empty                 list_empty
+# define page_list_first(hd)             list_entry((hd)->next, \
+                                                    struct page_info, list)
+# define page_list_next(pg, hd)          list_entry((pg)->list.next, \
+                                                    struct page_info, list)
+# define page_list_add(pg, hd)           list_add(&(pg)->list, hd)
+# define page_list_add_tail(pg, hd)      list_add_tail(&(pg)->list, hd)
+# define page_list_del(pg, hd)           list_del(&(pg)->list)
+# define page_list_del2(pg, hd1, hd2)    list_del(&(pg)->list)
+# define page_list_remove_head(hd)       (!page_list_empty(hd) ? \
+    ({ \
+        struct page_info *__pg = page_list_first(hd); \
+        list_del(&__pg->list); \
+        __pg; \
+    }) : NULL)
+# define page_list_for_each(pos, head)   list_for_each_entry(pos, head, list)
+# define page_list_for_each_safe(pos, tmp, head) \
+    list_for_each_entry_safe(pos, tmp, head, list)
+# define page_list_for_each_safe_reverse(pos, tmp, head) \
+    list_for_each_entry_safe_reverse(pos, tmp, head, list)
+#endif
+
 /* Automatic page scrubbing for dead domains. */
-extern struct list_head page_scrub_list;
-#define page_scrub_schedule_work()              \
-    do {                                        \
-        if ( !list_empty(&page_scrub_list) )    \
-            raise_softirq(PAGE_SCRUB_SOFTIRQ);  \
+extern struct page_list_head page_scrub_list;
+#define page_scrub_schedule_work()                 \
+    do {                                           \
+        if ( !page_list_empty(&page_scrub_list) )  \
+            raise_softirq(PAGE_SCRUB_SOFTIRQ);     \
     } while ( 0 )
 #define page_scrub_kick()                                               \
     do {                                                                \
-        if ( !list_empty(&page_scrub_list) )                            \
+        if ( !page_list_empty(&page_scrub_list) )                       \
             cpumask_raise_softirq(cpu_online_map, PAGE_SCRUB_SOFTIRQ);  \
     } while ( 0 )
 unsigned long avail_scrub_pages(void);
 
-#include <asm/mm.h>
-
 int guest_remove_page(struct domain *d, unsigned long gmfn);
 
 /* Returns TRUE if the whole page at @mfn is ordinary RAM. */
diff -r af992824b5cf -r c7cba853583d xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/sched.h   Fri Feb 13 11:22:28 2009 +0900
@@ -19,6 +19,7 @@
 #include <xen/xenoprof.h>
 #include <xen/rcupdate.h>
 #include <xen/irq.h>
+#include <xen/mm.h>
 
 #ifdef CONFIG_COMPAT
 #include <compat/vcpu.h>
@@ -171,8 +172,8 @@ struct domain
     spinlock_t       domain_lock;
 
     spinlock_t       page_alloc_lock; /* protects all the following fields  */
-    struct list_head page_list;       /* linked list, of size tot_pages     */
-    struct list_head xenpage_list;    /* linked list, of size xenheap_pages */
+    struct page_list_head page_list;  /* linked list, of size tot_pages     */
+    struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
     unsigned int     tot_pages;       /* number of pages currently possesed */
     unsigned int     max_pages;       /* maximum value for tot_pages        */
     unsigned int     xenheap_pages;   /* # pages allocated from Xen heap    */
diff -r af992824b5cf -r c7cba853583d xen/xsm/flask/hooks.c
--- a/xen/xsm/flask/hooks.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/xsm/flask/hooks.c     Fri Feb 13 11:22:28 2009 +0900
@@ -820,6 +820,7 @@ static int flask_hvmcontext(struct domai
             perm = HVM__SETHVMC;
         break;
         case XEN_DOMCTL_gethvmcontext:
+        case XEN_DOMCTL_gethvmcontext_partial:
             perm = HVM__GETHVMC;
         break;
         default:

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] merge with xen-unstable.hg, Xen patchbot-unstable <=