[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1234491748 -32400
# Node ID c7cba853583da45ee4478237047fdd5d6bed68cd
# Parent  af992824b5cfa3b81dbe68293216a5df3ec0bdf4
# Parent  32b15413749255e0cd518f25d9202759586dcb27
merge with xen-unstable.hg
---
 .hgignore                                        |   10 
 Config.mk                                        |    4 
 docs/man/xm.pod.1                                |    2 
 extras/mini-os/arch/x86/mm.c                     |    6 
 tools/blktap/drivers/Makefile                    |    2 
 tools/firmware/rombios/rombios.c                 |   15 -
 tools/libxc/xc_domain.c                          |   58 ++++
 tools/libxc/xc_pagetab.c                         |   36 +-
 tools/libxc/xenctrl.h                            |   25 +
 tools/python/xen/xend/XendAPIStore.py            |    3 
 tools/python/xen/xend/image.py                   |   34 +-
 tools/python/xen/xend/server/pciquirk.py         |    3 
 tools/python/xen/xm/create.py                    |    2 
 tools/python/xen/xm/main.py                      |    8 
 tools/xentrace/xenctx.c                          |   58 +++-
 xen/arch/ia64/linux-xen/iosapic.c                |   10 
 xen/arch/ia64/linux-xen/irq_ia64.c               |   26 +-
 xen/arch/ia64/linux-xen/mca.c                    |    9 
 xen/arch/ia64/xen/hypercall.c                    |    2 
 xen/arch/ia64/xen/irq.c                          |   24 -
 xen/arch/x86/Makefile                            |    1 
 xen/arch/x86/acpi/cpufreq/cpufreq.c              |   16 -
 xen/arch/x86/acpi/suspend.c                      |    3 
 xen/arch/x86/boot/Makefile                       |    6 
 xen/arch/x86/boot/build32.mk                     |   24 +
 xen/arch/x86/boot/head.S                         |   10 
 xen/arch/x86/boot/reloc.c                        |   89 ++++++
 xen/arch/x86/cpu/mcheck/amd_k8.c                 |   14 -
 xen/arch/x86/cpu/mcheck/amd_nonfatal.c           |   13 -
 xen/arch/x86/cpu/mcheck/mce.c                    |  130 ++++++++++
 xen/arch/x86/cpu/mcheck/mce.h                    |    3 
 xen/arch/x86/cpu/mcheck/mce_intel.c              |    8 
 xen/arch/x86/domain.c                            |   38 +-
 xen/arch/x86/domain_build.c                      |    2 
 xen/arch/x86/domctl.c                            |   42 ++-
 xen/arch/x86/e820.c                              |    2 
 xen/arch/x86/i8259.c                             |    4 
 xen/arch/x86/io_apic.c                           |   54 ----
 xen/arch/x86/irq.c                               |   77 +++++
 xen/arch/x86/mm.c                                |  158 ++++++------
 xen/arch/x86/mm/hap/hap.c                        |   23 -
 xen/arch/x86/mm/hap/p2m-ept.c                    |   88 ++++--
 xen/arch/x86/mm/p2m.c                            |   95 ++-----
 xen/arch/x86/mm/paging.c                         |    6 
 xen/arch/x86/mm/shadow/common.c                  |  296 +++++++++++------------
 xen/arch/x86/mm/shadow/multi.c                   |  131 ++++------
 xen/arch/x86/mm/shadow/private.h                 |  123 ++-------
 xen/arch/x86/numa.c                              |    2 
 xen/arch/x86/physdev.c                           |    4 
 xen/arch/x86/setup.c                             |   16 -
 xen/arch/x86/smpboot.c                           |    4 
 xen/arch/x86/x86_32/xen.lds.S                    |    1 
 xen/arch/x86/x86_64/entry.S                      |    1 
 xen/arch/x86/x86_64/xen.lds.S                    |    1 
 xen/common/domain.c                              |   18 -
 xen/common/grant_table.c                         |    2 
 xen/common/hvm/save.c                            |   48 +++
 xen/common/memory.c                              |   32 --
 xen/common/page_alloc.c                          |  158 +++++++-----
 xen/drivers/char/serial.c                        |    2 
 xen/drivers/cpufreq/cpufreq.c                    |   73 +++++
 xen/drivers/cpufreq/cpufreq_misc_governors.c     |   25 +
 xen/drivers/cpufreq/cpufreq_ondemand.c           |   95 +++----
 xen/drivers/passthrough/amd/iommu_init.c         |   23 -
 xen/drivers/passthrough/amd/iommu_map.c          |   22 -
 xen/drivers/passthrough/amd/pci_amd_iommu.c      |    1 
 xen/drivers/passthrough/io.c                     |    2 
 xen/drivers/passthrough/iommu.c                  |    9 
 xen/drivers/passthrough/vtd/dmar.c               |   18 -
 xen/drivers/passthrough/vtd/ia64/vtd.c           |   13 -
 xen/drivers/passthrough/vtd/intremap.c           |    2 
 xen/drivers/passthrough/vtd/iommu.c              |   61 +++-
 xen/drivers/passthrough/vtd/iommu.h              |    4 
 xen/drivers/passthrough/vtd/qinval.c             |    4 
 xen/drivers/passthrough/vtd/vtd.h                |    4 
 xen/drivers/passthrough/vtd/x86/vtd.c            |    9 
 xen/include/acpi/cpufreq/cpufreq.h               |    1 
 xen/include/asm-ia64/hardirq.h                   |    1 
 xen/include/asm-ia64/hvm/iommu.h                 |    1 
 xen/include/asm-ia64/hvm/irq.h                   |   14 -
 xen/include/asm-ia64/linux-xen/asm/smp.h         |    1 
 xen/include/asm-ia64/linux-xen/linux/interrupt.h |    4 
 xen/include/asm-ia64/linux/asm/hw_irq.h          |    2 
 xen/include/asm-ia64/mm.h                        |   12 
 xen/include/asm-ia64/tlbflush.h                  |    3 
 xen/include/asm-x86/domain.h                     |   13 -
 xen/include/asm-x86/hvm/vmx/vmx.h                |    6 
 xen/include/asm-x86/iocap.h                      |    5 
 xen/include/asm-x86/irq.h                        |    3 
 xen/include/asm-x86/mm.h                         |   84 +++++-
 xen/include/asm-x86/p2m.h                        |    4 
 xen/include/asm-x86/page.h                       |   38 ++
 xen/include/asm-x86/perfc.h                      |    1 
 xen/include/asm-x86/processor.h                  |    1 
 xen/include/public/arch-ia64/hvm/save.h          |    4 
 xen/include/public/arch-x86/hvm/save.h           |    4 
 xen/include/public/arch-x86/xen-mca.h            |   48 +++
 xen/include/public/domctl.h                      |   12 
 xen/include/public/io/pciif.h                    |    2 
 xen/include/xen/hvm/save.h                       |    2 
 xen/include/xen/iocap.h                          |    3 
 xen/include/xen/irq.h                            |   20 +
 xen/include/xen/mm.h                             |  186 +++++++++++++-
 xen/include/xen/sched.h                          |    5 
 xen/xsm/flask/hooks.c                            |    1 
 105 files changed, 1880 insertions(+), 1048 deletions(-)

diff -r af992824b5cf -r c7cba853583d .hgignore
--- a/.hgignore Fri Feb 13 10:56:01 2009 +0900
+++ b/.hgignore Fri Feb 13 11:22:28 2009 +0900
@@ -256,6 +256,7 @@
 ^xen/arch/x86/asm-offsets\.s$
 ^xen/arch/x86/boot/mkelf32$
 ^xen/arch/x86/xen\.lds$
+^xen/arch/x86/boot/reloc.S$
 ^xen/ddb/.*$
 ^xen/include/asm$
 ^xen/include/asm-.*/asm-offsets\.h$
@@ -279,15 +280,6 @@
 ^xen/arch/ia64/asm-xsi-offsets\.s$
 ^xen/arch/ia64/map\.out$
 ^xen/arch/ia64/xen\.lds\.s$
-^xen/arch/powerpc/dom0\.bin$
-^xen/arch/powerpc/asm-offsets\.s$
-^xen/arch/powerpc/firmware$
-^xen/arch/powerpc/firmware.dbg$
-^xen/arch/powerpc/firmware_image.bin$
-^xen/arch/powerpc/xen\.lds$
-^xen/arch/powerpc/\.xen-syms$
-^xen/arch/powerpc/xen-syms\.S$
-^xen/arch/powerpc/cmdline.dep$
 ^unmodified_drivers/linux-2.6/\.tmp_versions
 ^unmodified_drivers/linux-2.6/.*\.cmd$
 ^unmodified_drivers/linux-2.6/.*\.ko$
diff -r af992824b5cf -r c7cba853583d Config.mk
--- a/Config.mk Fri Feb 13 10:56:01 2009 +0900
+++ b/Config.mk Fri Feb 13 11:22:28 2009 +0900
@@ -1,7 +1,7 @@
 # -*- mode: Makefile; -*-
 
-# A debug build of Xen and tools?
-debug ?= y ## TEMPORARILY ENABLED
+# A debug build of Xen and tools? TEMPORARILY ENABLED
+debug ?= y
 
 XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
                          -e s/i86pc/x86_32/ -e s/amd64/x86_64/)
diff -r af992824b5cf -r c7cba853583d docs/man/xm.pod.1
--- a/docs/man/xm.pod.1 Fri Feb 13 10:56:01 2009 +0900
+++ b/docs/man/xm.pod.1 Fri Feb 13 11:22:28 2009 +0900
@@ -66,6 +66,8 @@ The attached console will perform much l
 The attached console will perform much like a standard serial console,
 so running curses based interfaces over the console B<is not
 advised>.  Vi tends to get very odd when using it over this interface.
+
+Use the key combination Ctrl+] to detach the domain console.
 
 =item B<create> I<configfile> [I<OPTIONS>] [I<vars>]..
 
diff -r af992824b5cf -r c7cba853583d extras/mini-os/arch/x86/mm.c
--- a/extras/mini-os/arch/x86/mm.c      Fri Feb 13 10:56:01 2009 +0900
+++ b/extras/mini-os/arch/x86/mm.c      Fri Feb 13 11:22:28 2009 +0900
@@ -550,9 +550,15 @@ static void clear_bootstrap(void)
 
 void arch_init_p2m(unsigned long max_pfn)
 {
+#ifdef __x86_64__
 #define L1_P2M_SHIFT    9
 #define L2_P2M_SHIFT    18    
 #define L3_P2M_SHIFT    27    
+#else
+#define L1_P2M_SHIFT    10
+#define L2_P2M_SHIFT    20    
+#define L3_P2M_SHIFT    30    
+#endif
 #define L1_P2M_ENTRIES  (1 << L1_P2M_SHIFT)    
 #define L2_P2M_ENTRIES  (1 << (L2_P2M_SHIFT - L1_P2M_SHIFT))    
 #define L3_P2M_ENTRIES  (1 << (L3_P2M_SHIFT - L2_P2M_SHIFT))    
diff -r af992824b5cf -r c7cba853583d tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile     Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/blktap/drivers/Makefile     Fri Feb 13 11:22:28 2009 +0900
@@ -13,7 +13,7 @@ CFLAGS   += -I $(LIBAIO_DIR)
 CFLAGS   += -I $(LIBAIO_DIR)
 CFLAGS   += -D_GNU_SOURCE
 
-ifeq ($(shell . ./check_gcrypt),"yes")
+ifeq ($(shell . ./check_gcrypt $(CC)),yes)
 CFLAGS += -DUSE_GCRYPT
 CRYPT_LIB := -lgcrypt
 else
diff -r af992824b5cf -r c7cba853583d tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/firmware/rombios/rombios.c  Fri Feb 13 11:22:28 2009 +0900
@@ -4609,6 +4609,10 @@ int15_function32(regs, ES, DS, FLAGS)
 {
   Bit32u  extended_memory_size=0; // 64bits long
   Bit16u  CX,DX;
+#ifdef HVMASSIST
+  Bit16u off, e820_table_size;
+  Bit32u base, type, size;
+#endif
 
 BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax);
 
@@ -4625,8 +4629,10 @@ ASM_START
 
       ;; Get the count in eax
       mov  bx, sp
+SEG SS
       mov  ax, _int15_function32.CX [bx]
       shl  eax, #16
+SEG SS
       mov  ax, _int15_function32.DX [bx]
 
       ;; convert to numbers of 15usec ticks
@@ -4660,8 +4666,7 @@ ASM_END
         {
 #ifdef HVMASSIST
        case 0x20: {
-            Bit16u e820_table_size =
-                read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
+            e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
 
             if (regs.u.r32.edx != 0x534D4150) /* SMAP */
                 goto int15_unimplemented;
@@ -4674,8 +4679,6 @@ ASM_END
                 if ((regs.u.r32.ebx + 0x14 - 1) > e820_table_size)
                     regs.u.r32.ebx = 0;
             } else if (regs.u.r16.bx == 1) {
-                Bit32u base, type;
-                Bit16u off;
                 for (off = 0; off < e820_table_size; off += 0x14) {
                     base = read_dword(E820_SEG, E820_OFFSET + off);
                     type = read_dword(E820_SEG, E820_OFFSET + 0x10 + off);
@@ -4699,9 +4702,7 @@ ASM_END
         }
 
         case 0x01: {
-            Bit16u off, e820_table_size =
-                read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
-            Bit32u base, type, size;
+            e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
 
             // do we have any reason to fail here ?
             CLEAR_CF();
diff -r af992824b5cf -r c7cba853583d tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/libxc/xc_domain.c   Fri Feb 13 11:22:28 2009 +0900
@@ -269,6 +269,38 @@ int xc_domain_hvm_getcontext(int xc_hand
         unlock_pages(ctxt_buf, size);
 
     return (ret < 0 ? -1 : domctl.u.hvmcontext.size);
+}
+
+/* Get just one element of the HVM guest context.
+ * size must be >= HVM_SAVE_LENGTH(type) */
+int xc_domain_hvm_getcontext_partial(int xc_handle,
+                                     uint32_t domid,
+                                     uint16_t typecode,
+                                     uint16_t instance,
+                                     void *ctxt_buf,
+                                     uint32_t size)
+{
+    int ret;
+    DECLARE_DOMCTL;
+
+    if ( !ctxt_buf ) 
+        return -EINVAL;
+
+    domctl.cmd = XEN_DOMCTL_gethvmcontext_partial;
+    domctl.domain = (domid_t) domid;
+    domctl.u.hvmcontext_partial.type = typecode;
+    domctl.u.hvmcontext_partial.instance = instance;
+    set_xen_guest_handle(domctl.u.hvmcontext_partial.buffer, ctxt_buf);
+
+    if ( (ret = lock_pages(ctxt_buf, size)) != 0 )
+        return ret;
+    
+    ret = do_domctl(xc_handle, &domctl);
+
+    if ( ctxt_buf ) 
+        unlock_pages(ctxt_buf, size);
+
+    return ret ? -1 : 0;
 }
 
 /* set info to hvm guest for restore */
@@ -909,6 +941,32 @@ int xc_domain_update_msi_irq(
     return rc;
 }
 
+int xc_domain_unbind_msi_irq(
+    int xc_handle,
+    uint32_t domid,
+    uint32_t gvec,
+    uint32_t pirq,
+    uint32_t gflags)
+{
+    int rc;
+    xen_domctl_bind_pt_irq_t *bind;
+
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_unbind_pt_irq;
+    domctl.domain = (domid_t)domid;
+
+    bind = &(domctl.u.bind_pt_irq);
+    bind->hvm_domid = domid;
+    bind->irq_type = PT_IRQ_TYPE_MSI;
+    bind->machine_irq = pirq;
+    bind->u.msi.gvec = gvec;
+    bind->u.msi.gflags = gflags;
+
+    rc = do_domctl(xc_handle, &domctl);
+    return rc;
+}
+
 /* Pass-through: binds machine irq to guests irq */
 int xc_domain_bind_pt_irq(
     int xc_handle,
diff -r af992824b5cf -r c7cba853583d tools/libxc/xc_pagetab.c
--- a/tools/libxc/xc_pagetab.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/libxc/xc_pagetab.c  Fri Feb 13 11:22:28 2009 +0900
@@ -4,50 +4,42 @@
  * Function to translate virtual to physical addresses.
  */
 #include "xc_private.h"
+#include <xen/hvm/save.h>
 
 #define CR0_PG  0x80000000
 #define CR4_PAE 0x20
 #define PTE_PSE 0x80
+#define EFER_LMA 0x400
+
 
 unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
                                            int vcpu, unsigned long long virt)
 {
     xc_dominfo_t dominfo;
-    vcpu_guest_context_any_t ctx;
     uint64_t paddr, mask, pte = 0;
     int size, level, pt_levels = 2;
     void *map;
 
     if (xc_domain_getinfo(xc_handle, dom, 1, &dominfo) != 1 
-        || dominfo.domid != dom
-        || xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0)
+        || dominfo.domid != dom)
         return 0;
 
     /* What kind of paging are we dealing with? */
     if (dominfo.hvm) {
-        unsigned long cr0, cr3, cr4;
-        xen_capabilities_info_t xen_caps = "";
-        if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0)
+        struct hvm_hw_cpu ctx;
+        if (xc_domain_hvm_getcontext_partial(xc_handle, dom,
+                                             HVM_SAVE_CODE(CPU), vcpu,
+                                             &ctx, sizeof ctx) != 0)
             return 0;
-        /* HVM context records are always host-sized */
-        if (strstr(xen_caps, "xen-3.0-x86_64")) {
-            cr0 = ctx.x64.ctrlreg[0];
-            cr3 = ctx.x64.ctrlreg[3];
-            cr4 = ctx.x64.ctrlreg[4];
-        } else {
-            cr0 = ctx.x32.ctrlreg[0];
-            cr3 = ctx.x32.ctrlreg[3];
-            cr4 = ctx.x32.ctrlreg[4];
-        }
-        if (!(cr0 & CR0_PG))
+        if (!(ctx.cr0 & CR0_PG))
             return virt;
-        if (0 /* XXX how to get EFER.LMA? */) 
-            pt_levels = 4;
-        else
-            pt_levels = (cr4 & CR4_PAE) ? 3 : 2;
-        paddr = cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull);
+        pt_levels = (ctx.msr_efer&EFER_LMA) ? 4 : (ctx.cr4&CR4_PAE) ? 3 : 2;
+        paddr = ctx.cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull);
     } else {
         DECLARE_DOMCTL;
+        vcpu_guest_context_any_t ctx;
+        if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0)
+            return 0;
         domctl.domain = dom;
         domctl.cmd = XEN_DOMCTL_get_address_size;
         if ( do_domctl(xc_handle, &domctl) != 0 )
diff -r af992824b5cf -r c7cba853583d tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/libxc/xenctrl.h     Fri Feb 13 11:22:28 2009 +0900
@@ -375,6 +375,25 @@ int xc_domain_hvm_getcontext(int xc_hand
                              uint8_t *ctxt_buf,
                              uint32_t size);
 
+
+/**
+ * This function returns one element of the context of a hvm domain
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to get information from
+ * @parm typecode which type of elemnt required 
+ * @parm instance which instance of the type
+ * @parm ctxt_buf a pointer to a structure to store the execution context of
+ *            the hvm domain
+ * @parm size the size of ctxt_buf (must be >= HVM_SAVE_LENGTH(typecode))
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_getcontext_partial(int xc_handle,
+                                     uint32_t domid,
+                                     uint16_t typecode,
+                                     uint16_t instance,
+                                     void *ctxt_buf,
+                                     uint32_t size);
+
 /**
  * This function will set the context for hvm domain
  *
@@ -1074,6 +1093,12 @@ int xc_domain_update_msi_irq(
     uint32_t gvec,
     uint32_t pirq,
     uint32_t gflags);
+
+int xc_domain_unbind_msi_irq(int xc_handle,
+                             uint32_t domid,
+                             uint32_t gvec,
+                             uint32_t pirq,
+                             uint32_t gflags);
 
 int xc_domain_bind_pt_irq(int xc_handle,
                           uint32_t domid,
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/XendAPIStore.py
--- a/tools/python/xen/xend/XendAPIStore.py     Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xend/XendAPIStore.py     Fri Feb 13 11:22:28 2009 +0900
@@ -33,7 +33,8 @@ def register(uuid, type, inst):
 
 def deregister(uuid, type):
     old = get(uuid, type)
-    del __classes[(uuid, type)]
+    if old is not None:
+        del __classes[(uuid, type)]
     return old
 
 def get(uuid, type):
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xend/image.py    Fri Feb 13 11:22:28 2009 +0900
@@ -372,8 +372,6 @@ class ImageHandler:
             env['DISPLAY'] = self.display
         if self.xauthority:
             env['XAUTHORITY'] = self.xauthority
-        if self.vncconsole:
-            args = args + ([ "-vncviewer" ])
         unique_id = "%i-%i" % (self.vm.getDomid(), time.time())
         sentinel_path = sentinel_path_prefix + unique_id
         sentinel_path_fifo = sentinel_path + '.fifo'
@@ -558,24 +556,30 @@ class ImageHandler:
                     os.kill(self.pid, signal.SIGHUP)
                 except OSError, exn:
                     log.exception(exn)
-                try:
-                    # Try to reap the child every 100ms for 10s. Then SIGKILL 
it.
-                    for i in xrange(100):
+                # Try to reap the child every 100ms for 10s. Then SIGKILL it.
+                for i in xrange(100):
+                    try:
                         (p, rv) = os.waitpid(self.pid, os.WNOHANG)
                         if p == self.pid:
                             break
-                        time.sleep(0.1)
-                    else:
-                        log.warning("DeviceModel %d took more than 10s "
-                                    "to terminate: sending SIGKILL" % self.pid)
+                    except OSError:
+                        # This is expected if Xend has been restarted within
+                        # the life of this domain.  In this case, we can kill
+                        # the process, but we can't wait for it because it's
+                        # not our child. We continue this loop, and after it is
+                        # terminated make really sure the process is going away
+                        # (SIGKILL).
+                        pass
+                    time.sleep(0.1)
+                else:
+                    log.warning("DeviceModel %d took more than 10s "
+                                "to terminate: sending SIGKILL" % self.pid)
+                    try:
                         os.kill(self.pid, signal.SIGKILL)
                         os.waitpid(self.pid, 0)
-                except OSError, exn:
-                    # This is expected if Xend has been restarted within the
-                    # life of this domain.  In this case, we can kill the 
process,
-                    # but we can't wait for it because it's not our child.
-                    # We just make really sure it's going away (SIGKILL) first.
-                    os.kill(self.pid, signal.SIGKILL)
+                    except OSError:
+                        # This happens if the process doesn't exist.
+                        pass
                 state = xstransact.Remove("/local/domain/0/device-model/%i"
                                           % self.vm.getDomid())
             finally:
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/server/pciquirk.py
--- a/tools/python/xen/xend/server/pciquirk.py  Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xend/server/pciquirk.py  Fri Feb 13 11:22:28 2009 +0900
@@ -123,7 +123,8 @@ class PCIQuirk:
             log.info("Config file does not exist: %s" % PERMISSIVE_CONFIG_FILE)
             self.pci_perm_dev_config = ['xend-pci-perm-devs']
 
-        devices = child_at(child(pci_perm_dev_config, 
'unconstrained_dev_ids'),0)
+        devices = child_at(child(self.pci_perm_dev_config,
+                                 'unconstrained_dev_ids'),0)
         if self.__matchPCIdev( devices ):
             log.debug("Permissive mode enabled for PCI device [%s]" %
                       self.devid)
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xm/create.py     Fri Feb 13 11:22:28 2009 +0900
@@ -1337,7 +1337,7 @@ def main(argv):
     elif not opts.is_xml:
         dom = make_domain(opts, config)
         
-    if opts.vals.vncviewer:
+    if opts.vals.vncconsole:
         domid = domain_name_to_domid(sxp.child_value(config, 'name', -1))
         vncviewer_autopass = getattr(opts.vals,'vncviewer-autopass', False)
         console.runVncViewer(domid, vncviewer_autopass, True)
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xm/main.py       Fri Feb 13 11:22:28 2009 +0900
@@ -59,7 +59,11 @@ import XenAPI
 import XenAPI
 
 import xen.lowlevel.xc
-xc = xen.lowlevel.xc.xc()
+try:
+    xc = xen.lowlevel.xc.xc()
+except Exception, ex:
+    print >>sys.stderr, ("Is xen kernel running?")
+    sys.exit(1)
 
 import inspect
 from xen.xend import XendOptions
@@ -735,7 +739,7 @@ def xm_save(args):
         (options, params) = getopt.gnu_getopt(args, 'c', ['checkpoint'])
     except getopt.GetoptError, opterr:
         err(opterr)
-        sys.exit(1)
+        usage('save')
 
     checkpoint = False
     for (k, v) in options:
diff -r af992824b5cf -r c7cba853583d tools/xentrace/xenctx.c
--- a/tools/xentrace/xenctx.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/xentrace/xenctx.c   Fri Feb 13 11:22:28 2009 +0900
@@ -26,6 +26,7 @@
 #include "xenctrl.h"
 #include <xen/foreign/x86_32.h>
 #include <xen/foreign/x86_64.h>
+#include <xen/hvm/save.h>
 
 int xc_handle = 0;
 int domid = 0;
@@ -287,6 +288,35 @@ static void print_ctx_32(vcpu_guest_cont
     }
 }
 
+static void print_ctx_32on64(vcpu_guest_context_x86_64_t *ctx)
+{
+    struct cpu_user_regs_x86_64 *regs = &ctx->user_regs;
+
+    printf("cs:eip: %04x:%08x ", regs->cs, (uint32_t)regs->eip);
+    print_symbol((uint32_t)regs->eip);
+    print_flags((uint32_t)regs->eflags);
+    printf("ss:esp: %04x:%08x\n", regs->ss, (uint32_t)regs->esp);
+
+    printf("eax: %08x\t", (uint32_t)regs->eax);
+    printf("ebx: %08x\t", (uint32_t)regs->ebx);
+    printf("ecx: %08x\t", (uint32_t)regs->ecx);
+    printf("edx: %08x\n", (uint32_t)regs->edx);
+
+    printf("esi: %08x\t", (uint32_t)regs->esi);
+    printf("edi: %08x\t", (uint32_t)regs->edi);
+    printf("ebp: %08x\n", (uint32_t)regs->ebp);
+
+    printf(" ds:     %04x\t", regs->ds);
+    printf(" es:     %04x\t", regs->es);
+    printf(" fs:     %04x\t", regs->fs);
+    printf(" gs:     %04x\n", regs->gs);
+
+    if (disp_all) {
+        print_special(ctx->ctrlreg, "cr", 0x1d, 4);
+        print_special(ctx->debugreg, "dr", 0xcf, 4);
+    }
+}
+
 static void print_ctx_64(vcpu_guest_context_x86_64_t *ctx)
 {
     struct cpu_user_regs_x86_64 *regs = &ctx->user_regs;
@@ -335,6 +365,8 @@ static void print_ctx(vcpu_guest_context
 {
     if (ctxt_word_size == 4) 
         print_ctx_32(&ctx->x32);
+    else if (guest_word_size == 4)
+        print_ctx_32on64(&ctx->x64);
     else 
         print_ctx_64(&ctx->x64);
 }
@@ -788,23 +820,29 @@ static void dump_ctx(int vcpu)
 
 #if defined(__i386__) || defined(__x86_64__)
     {
-        struct xen_domctl domctl;
-        memset(&domctl, 0, sizeof domctl);
-        domctl.domain = domid;
-        domctl.cmd = XEN_DOMCTL_get_address_size;
-        if (xc_domctl(xc_handle, &domctl) == 0)
-            ctxt_word_size = guest_word_size = domctl.u.address_size.size / 8;
         if (dominfo.hvm) {
+            struct hvm_hw_cpu cpuctx;
             xen_capabilities_info_t xen_caps = "";
+            if (xc_domain_hvm_getcontext_partial(
+                    xc_handle, domid, HVM_SAVE_CODE(CPU), 
+                    vcpu, &cpuctx, sizeof cpuctx) != 0) {
+                perror("xc_domain_hvm_getcontext_partial");
+                exit(-1);
+            }
+            guest_word_size = (cpuctx.msr_efer & 0x400) ? 8 : 4;
+            /* HVM guest context records are always host-sized */
             if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) {
                 perror("xc_version");
                 exit(-1);
             }
-            /* HVM guest context records are always host-sized */
             ctxt_word_size = (strstr(xen_caps, "xen-3.0-x86_64")) ? 8 : 4;
-            /* XXX For now we can't tell whether a HVM guest is in long
-             * XXX mode; eventually fix this here and in xc_pagetab.c */
-            guest_word_size = 4;
+        } else {
+            struct xen_domctl domctl;
+            memset(&domctl, 0, sizeof domctl);
+            domctl.domain = domid;
+            domctl.cmd = XEN_DOMCTL_get_address_size;
+            if (xc_domctl(xc_handle, &domctl) == 0)
+                ctxt_word_size = guest_word_size = domctl.u.address_size.size 
/ 8;
         }
     }
 #endif
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/iosapic.c
--- a/xen/arch/ia64/linux-xen/iosapic.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/linux-xen/iosapic.c Fri Feb 13 11:22:28 2009 +0900
@@ -93,6 +93,16 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
+#ifdef XEN
+static inline int iosapic_irq_to_vector (int irq)
+{
+       return irq;
+}
+
+#undef irq_to_vector
+#define irq_to_vector(irq)      iosapic_irq_to_vector(irq)
+#define AUTO_ASSIGN    AUTO_ASSIGN_IRQ
+#endif
 
 #undef DEBUG_INTERRUPT_ROUTING
 
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/irq_ia64.c
--- a/xen/arch/ia64/linux-xen/irq_ia64.c        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/linux-xen/irq_ia64.c        Fri Feb 13 11:22:28 2009 +0900
@@ -250,6 +250,7 @@ register_percpu_irq (ia64_vector vec, st
 register_percpu_irq (ia64_vector vec, struct irqaction *action)
 {
        irq_desc_t *desc;
+#ifndef XEN
        unsigned int irq;
 
        for (irq = 0; irq < NR_IRQS; ++irq)
@@ -258,16 +259,19 @@ register_percpu_irq (ia64_vector vec, st
                        desc->status |= IRQ_PER_CPU;
                        desc->handler = &irq_type_ia64_lsapic;
                        if (action)
-#ifdef XEN
-                               setup_vector(irq, action);
-#else
                                setup_irq(irq, action);
-#endif
-               }
-}
-
-#ifdef XEN
-int request_irq(unsigned int irq,
+               }
+#else
+       desc = irq_descp(vec);
+       desc->status |= IRQ_PER_CPU;
+       desc->handler = &irq_type_ia64_lsapic;
+       if (action)
+               setup_vector(vec, action);
+#endif
+}
+
+#ifdef XEN
+int request_irq_vector(unsigned int vector,
                void (*handler)(int, void *, struct cpu_user_regs *),
                unsigned long irqflags, const char * devname, void *dev_id)
 {
@@ -279,7 +283,7 @@ int request_irq(unsigned int irq,
         * otherwise we'll have trouble later trying to figure out
         * which interrupt is which (messes up the interrupt freeing logic etc).
         *                          */
-       if (irq >= NR_IRQS)
+       if (vector >= NR_VECTORS)
                return -EINVAL;
        if (!handler)
                return -EINVAL;
@@ -291,7 +295,7 @@ int request_irq(unsigned int irq,
        action->handler = handler;
        action->name = devname;
        action->dev_id = dev_id;
-       setup_vector(irq, action);
+       setup_vector(vector, action);
        if (retval)
                xfree(action);
 
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/mca.c
--- a/xen/arch/ia64/linux-xen/mca.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/linux-xen/mca.c     Fri Feb 13 11:22:28 2009 +0900
@@ -114,7 +114,6 @@ extern void                 ia64_slave_init_handler (v
 extern void                    ia64_slave_init_handler (void);
 #ifdef XEN
 extern void setup_vector (unsigned int vec, struct irqaction *action);
-#define setup_irq(irq, action) setup_vector(irq, action)
 #endif
 
 static ia64_mc_info_t          ia64_mc_info;
@@ -1931,12 +1930,18 @@ ia64_mca_late_init(void)
                if (cpe_vector >= 0) {
                        /* If platform supports CPEI, enable the irq. */
                        cpe_poll_enabled = 0;
+#ifndef XEN
                        for (irq = 0; irq < NR_IRQS; ++irq)
                                if (irq_to_vector(irq) == cpe_vector) {
                                        desc = irq_descp(irq);
                                        desc->status |= IRQ_PER_CPU;
-                                       setup_irq(irq, &mca_cpe_irqaction);
+                                       setup_vector(irq, &mca_cpe_irqaction);
                                }
+#else
+                       desc = irq_descp(cpe_vector);
+                       desc->status |= IRQ_PER_CPU;
+                       setup_vector(cpe_vector, &mca_cpe_irqaction);
+#endif
                        ia64_mca_register_cpev(cpe_vector);
                        IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", 
__FUNCTION__);
                } else {
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/xen/hypercall.c
--- a/xen/arch/ia64/xen/hypercall.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/xen/hypercall.c     Fri Feb 13 11:22:28 2009 +0900
@@ -543,7 +543,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
             break;
         irq_status_query.flags = 0;
         /* Edge-triggered interrupts don't need an explicit unmask downcall. */
-        if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") )
+        if ( !strstr(irq_descp(irq)->handler->typename, "edge") )
             irq_status_query.flags |= XENIRQSTAT_needs_eoi;
         ret = copy_to_guest(arg, &irq_status_query, 1) ? -EFAULT : 0;
         break;
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/xen/irq.c   Fri Feb 13 11:22:28 2009 +0900
@@ -228,11 +228,11 @@ out:
  * disabled.
  */
 
-int setup_vector(unsigned int irq, struct irqaction * new)
+int setup_vector(unsigned int vector, struct irqaction * new)
 {
        unsigned long flags;
        struct irqaction *old, **p;
-       irq_desc_t *desc = irq_descp(irq);
+       irq_desc_t *desc = irq_descp(vector);
 
        /*
         * The following block of code has to be executed atomically
@@ -248,8 +248,8 @@ int setup_vector(unsigned int irq, struc
 
        desc->depth = 0;
        desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_GUEST);
-       desc->handler->startup(irq);
-       desc->handler->enable(irq);
+       desc->handler->startup(vector);
+       desc->handler->enable(vector);
        spin_unlock_irqrestore(&desc->lock,flags);
 
        return 0;
@@ -258,13 +258,11 @@ int setup_vector(unsigned int irq, struc
 /* Vectors reserved by xen (and thus not sharable with domains).  */
 unsigned long ia64_xen_vector[BITS_TO_LONGS(NR_IRQS)];
 
-int setup_irq(unsigned int irq, struct irqaction * new)
-{
-       unsigned int vec;
+int setup_irq_vector(unsigned int vec, struct irqaction * new)
+{
        int res;
 
-       /* Get vector for IRQ.  */
-       if (acpi_gsi_to_irq (irq, &vec) < 0)
+       if ( vec == IA64_INVALID_VECTOR )
                return -ENOSYS;
        /* Reserve the vector (and thus the irq).  */
        if (test_and_set_bit(vec, ia64_xen_vector))
@@ -273,14 +271,12 @@ int setup_irq(unsigned int irq, struct i
        return res;
 }
 
-void free_irq(unsigned int irq)
-{
-       unsigned int vec;
+void release_irq_vector(unsigned int vec)
+{
        unsigned long flags;
        irq_desc_t *desc;
 
-       /* Get vector for IRQ.  */
-       if (acpi_gsi_to_irq(irq, &vec) < 0)
+       if ( vec == IA64_INVALID_VECTOR )
                return;
 
        desc = irq_descp(vec);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/Makefile     Fri Feb 13 11:22:28 2009 +0900
@@ -92,3 +92,4 @@ clean::
 clean::
        rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32
        rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d
+       rm -f boot/reloc.S boot/reloc.lnk boot/reloc.bin
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c       Fri Feb 13 11:22:28 2009 +0900
@@ -58,6 +58,9 @@ static struct acpi_cpufreq_data *drv_dat
 
 static struct cpufreq_driver acpi_cpufreq_driver;
 
+static unsigned int __read_mostly acpi_pstate_strict;
+integer_param("acpi_pstate_strict", acpi_pstate_strict);
+
 static int check_est_cpu(unsigned int cpuid)
 {
     struct cpuinfo_x86 *cpu = &cpu_data[cpuid];
@@ -180,7 +183,7 @@ static void drv_read(struct drv_cmd *cmd
     ASSERT(cpus_weight(cmd->mask) == 1);
 
     /* to reduce IPI for the sake of performance */
-    if (cpu_isset(smp_processor_id(), cmd->mask))
+    if (likely(cpu_isset(smp_processor_id(), cmd->mask)))
         do_drv_read((void *)cmd);
     else
         on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1);
@@ -196,15 +199,16 @@ static u32 get_cur_val(cpumask_t mask)
     struct cpufreq_policy *policy;
     struct processor_performance *perf;
     struct drv_cmd cmd;
-    unsigned int cpu;
+    unsigned int cpu = smp_processor_id();
 
     if (unlikely(cpus_empty(mask)))
         return 0;
 
-    cpu = first_cpu(mask);
+    if (!cpu_isset(cpu, mask))
+        cpu = first_cpu(mask);
     policy = cpufreq_cpu_policy[cpu];
 
-    if (!policy)
+    if (cpu >= NR_CPUS || !policy || !drv_data[policy->cpu])
         return 0;    
 
     switch (drv_data[policy->cpu]->cpu_feature) {
@@ -214,7 +218,7 @@ static u32 get_cur_val(cpumask_t mask)
         break;
     case SYSTEM_IO_CAPABLE:
         cmd.type = SYSTEM_IO_CAPABLE;
-        perf = drv_data[first_cpu(mask)]->acpi_data;
+        perf = drv_data[policy->cpu]->acpi_data;
         cmd.addr.io.port = perf->control_register.address;
         cmd.addr.io.bit_width = perf->control_register.bit_width;
         break;
@@ -393,7 +397,7 @@ static int acpi_cpufreq_target(struct cp
 
     drv_write(&cmd);
 
-    if (!check_freqs(cmd.mask, freqs.new, data)) {
+    if (acpi_pstate_strict && !check_freqs(cmd.mask, freqs.new, data)) {
         printk(KERN_WARNING "Fail transfer to new freq %d\n", freqs.new);
         return -EAGAIN;
     }
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/acpi/suspend.c
--- a/xen/arch/x86/acpi/suspend.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/acpi/suspend.c       Fri Feb 13 11:22:28 2009 +0900
@@ -65,6 +65,9 @@ void restore_rest_processor_state(void)
     /* Reload FPU state on next FPU use. */
     stts();
 
+    if (cpu_has_pat)
+        wrmsrl(MSR_IA32_CR_PAT, host_pat);
+
     mtrr_ap_init();
     mcheck_init(&boot_cpu_data);
 }
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/Makefile
--- a/xen/arch/x86/boot/Makefile        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/boot/Makefile        Fri Feb 13 11:22:28 2009 +0900
@@ -1,1 +1,7 @@ obj-y += head.o
 obj-y += head.o
+
+head.o: reloc.S
+
+# NB. BOOT_TRAMPOLINE == 0x8c000
+%.S: %.c
+       RELOC=0x8c000 $(MAKE) -f build32.mk $@
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/build32.mk
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/boot/build32.mk      Fri Feb 13 11:22:28 2009 +0900
@@ -0,0 +1,24 @@
+XEN_ROOT=../../../..
+override XEN_TARGET_ARCH=x86_32
+CFLAGS =
+include $(XEN_ROOT)/Config.mk
+
+# Disable PIE/SSP if GCC supports them. They can break us.
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
+
+CFLAGS += -Werror -fno-builtin -msoft-float
+
+%.S: %.bin
+       (od -v -t x $< | head -n -1 | \
+       sed 's/ /,0x/g' | sed 's/^[0-9]*,/ .long /') >$@
+
+%.bin: %.lnk
+       $(OBJCOPY) -O binary $< $@
+
+%.lnk: %.o
+       $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x8c000 -o $@ $<
+
+%.o: %.c
+       $(CC) $(CFLAGS) -c $< -o $@
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/head.S
--- a/xen/arch/x86/boot/head.S  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/boot/head.S  Fri Feb 13 11:22:28 2009 +0900
@@ -79,8 +79,11 @@ __start:
         cmp     $0x2BADB002,%eax
         jne     not_multiboot
 
-        /* Save the Multiboot info structure for later use. */
-        mov     %ebx,sym_phys(multiboot_ptr)
+        /* Save the Multiboot info struct (after relocation) for later use. */
+        mov     $sym_phys(cpu0_stack)+1024,%esp
+        push    %ebx
+        call    reloc
+        mov     %eax,sym_phys(multiboot_ptr)
 
         /* Initialize BSS (no nasty surprises!) */
         mov     $sym_phys(__bss_start),%edi
@@ -192,6 +195,9 @@ 2:      cmp     $L1_PAGETABLE_ENTRIES,%e
 
 #include "cmdline.S"
 
+reloc:
+#include "reloc.S"
+
         .align 16
         .globl trampoline_start, trampoline_end
 trampoline_start:
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/reloc.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/boot/reloc.c Fri Feb 13 11:22:28 2009 +0900
@@ -0,0 +1,89 @@
+/******************************************************************************
+ * reloc.c
+ * 
+ * 32-bit flat memory-map routines for relocating Multiboot structures
+ * and modules. This is most easily done early with paging disabled.
+ * 
+ * Copyright (c) 2009, Citrix Systems, Inc.
+ * 
+ * Authors:
+ *    Keir Fraser <keir.fraser@xxxxxxxxxx>
+ */
+
+asm (
+    "    .text                         \n"
+    "    .globl _start                 \n"
+    "_start:                           \n"
+    "    mov  $_start,%edi             \n"
+    "    call 1f                       \n"
+    "1:  pop  %esi                     \n"
+    "    sub  $1b-_start,%esi          \n"
+    "    mov  $__bss_start-_start,%ecx \n"
+    "    rep  movsb                    \n"
+    "    xor  %eax,%eax                \n"
+    "    mov  $_end,%ecx               \n"
+    "    sub  %edi,%ecx                \n"
+    "    rep  stosb                    \n"
+    "    mov  $reloc,%eax              \n"
+    "    jmp  *%eax                    \n"
+    );
+
+typedef unsigned int u32;
+#include "../../../include/xen/multiboot.h"
+
+extern char _start[];
+
+static void *memcpy(void *dest, const void *src, unsigned int n)
+{
+    char *s = (char *)src, *d = dest;
+    while ( n-- )
+        *d++ = *s++;
+    return dest;
+}
+
+static void *reloc_mbi_struct(void *old, unsigned int bytes)
+{
+    static void *alloc = &_start;
+    alloc = (void *)(((unsigned long)alloc - bytes) & ~15ul);
+    return memcpy(alloc, old, bytes);
+}
+
+static char *reloc_mbi_string(char *old)
+{
+    char *p;
+    for ( p = old; *p != '\0'; p++ )
+        continue;
+    return reloc_mbi_struct(old, p - old + 1);
+}
+
+multiboot_info_t *reloc(multiboot_info_t *mbi_old)
+{
+    multiboot_info_t *mbi = reloc_mbi_struct(mbi_old, sizeof(*mbi));
+    int i;
+
+    if ( mbi->flags & MBI_CMDLINE )
+        mbi->cmdline = (u32)reloc_mbi_string((char *)mbi->cmdline);
+
+    if ( mbi->flags & MBI_MODULES )
+    {
+        module_t *mods = reloc_mbi_struct(
+            (module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t));
+        mbi->mods_addr = (u32)mods;
+        for ( i = 0; i < mbi->mods_count; i++ )
+            if ( mods[i].string )
+                mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string);
+    }
+
+    if ( mbi->flags & MBI_MEMMAP )
+        mbi->mmap_addr = (u32)reloc_mbi_struct(
+            (memory_map_t *)mbi->mmap_addr, mbi->mmap_length);
+
+    /* Mask features we don't understand or don't relocate. */
+    mbi->flags &= (MBI_MEMLIMITS |
+                   MBI_DRIVES |
+                   MBI_CMDLINE |
+                   MBI_MODULES |
+                   MBI_MEMMAP);
+
+    return mbi;
+}
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/amd_k8.c
--- a/xen/arch/x86/cpu/mcheck/amd_k8.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/amd_k8.c  Fri Feb 13 11:22:28 2009 +0900
@@ -99,6 +99,8 @@ void k8_machine_check(struct cpu_user_re
 
        mc_data = x86_mcinfo_getptr();
        cpu_nr = smp_processor_id();
+       BUG_ON(cpu_nr != vcpu->processor);
+
        curdom = vcpu->domain;
 
        memset(&mc_global, 0, sizeof(mc_global));
@@ -106,14 +108,12 @@ void k8_machine_check(struct cpu_user_re
        mc_global.common.size = sizeof(mc_global);
 
        mc_global.mc_domid = curdom->domain_id; /* impacted domain */
-       mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
-       BUG_ON(cpu_nr != vcpu->processor);
-       mc_global.mc_core_threadid = 0;
+
+       x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid,
+           &mc_global.mc_coreid, &mc_global.mc_core_threadid,
+           &mc_global.mc_apicid, NULL, NULL, NULL);
+
        mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
-#if 0 /* TODO: on which socket is this physical core?
-         It's not clear to me how to figure this out. */
-       mc_global.mc_socketid = ???;
-#endif
        mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE;
        rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
 
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/amd_nonfatal.c
--- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c    Fri Feb 13 11:22:28 2009 +0900
@@ -95,6 +95,7 @@ void mce_amd_checkregs(void *info)
        mc_data = NULL;
 
        cpu_nr = smp_processor_id();
+       BUG_ON(cpu_nr != vcpu->processor);
        event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA);
        error_found = 0;
 
@@ -103,14 +104,12 @@ void mce_amd_checkregs(void *info)
        mc_global.common.size = sizeof(mc_global);
 
        mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */
-       mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
-       BUG_ON(cpu_nr != vcpu->processor);
-       mc_global.mc_core_threadid = 0;
        mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
-#if 0 /* TODO: on which socket is this physical core?
-         It's not clear to me how to figure this out. */
-       mc_global.mc_socketid = ???;
-#endif
+
+       x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid,
+           &mc_global.mc_coreid, &mc_global.mc_core_threadid,
+           &mc_global.mc_apicid, NULL, NULL, NULL);
+
        mc_global.mc_flags |= MC_FLAG_CORRECTABLE;
        rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
 
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Fri Feb 13 11:22:28 2009 +0900
@@ -443,6 +443,96 @@ next:
 
 
 
+static void do_mc_get_cpu_info(void *v)
+{
+       int cpu = smp_processor_id();
+       int cindex, cpn;
+       struct cpuinfo_x86 *c;
+       xen_mc_logical_cpu_t *log_cpus, *xcp;
+       uint32_t junk, ebx;
+
+       log_cpus = v;
+       c = &cpu_data[cpu];
+       cindex = 0;
+       cpn = cpu - 1;
+
+       /*
+        * Deal with sparse masks, condensed into a contig array.
+        */
+       while (cpn >= 0) {
+               if (cpu_isset(cpn, cpu_online_map))
+                       cindex++;
+               cpn--;
+       }
+
+       xcp = &log_cpus[cindex];
+       c = &cpu_data[cpu];
+       xcp->mc_cpunr = cpu;
+       x86_mc_get_cpu_info(cpu, &xcp->mc_chipid,
+           &xcp->mc_coreid, &xcp->mc_threadid,
+           &xcp->mc_apicid, &xcp->mc_ncores,
+           &xcp->mc_ncores_active, &xcp->mc_nthreads);
+       xcp->mc_cpuid_level = c->cpuid_level;
+       xcp->mc_family = c->x86;
+       xcp->mc_vendor = c->x86_vendor;
+       xcp->mc_model = c->x86_model;
+       xcp->mc_step = c->x86_mask;
+       xcp->mc_cache_size = c->x86_cache_size;
+       xcp->mc_cache_alignment = c->x86_cache_alignment;
+       memcpy(xcp->mc_vendorid, c->x86_vendor_id, sizeof xcp->mc_vendorid);
+       memcpy(xcp->mc_brandid, c->x86_model_id, sizeof xcp->mc_brandid);
+       memcpy(xcp->mc_cpu_caps, c->x86_capability, sizeof xcp->mc_cpu_caps);
+
+       /*
+        * This part needs to run on the CPU itself.
+        */
+       xcp->mc_nmsrvals = __MC_NMSRS;
+       xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP;
+       rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value);
+
+       if (c->cpuid_level >= 1) {
+               cpuid(1, &junk, &ebx, &junk, &junk);
+               xcp->mc_clusterid = (ebx >> 24) & 0xff;
+       } else
+               xcp->mc_clusterid = hard_smp_processor_id();
+}
+
+
+void x86_mc_get_cpu_info(unsigned cpu, uint32_t *chipid, uint16_t *coreid,
+                        uint16_t *threadid, uint32_t *apicid,
+                        unsigned *ncores, unsigned *ncores_active,
+                        unsigned *nthreads)
+{
+       struct cpuinfo_x86 *c;
+
+       *apicid = cpu_physical_id(cpu);
+       c = &cpu_data[cpu];
+       if (c->apicid == BAD_APICID) {
+               *chipid = cpu;
+               *coreid = 0;
+               *threadid = 0;
+               if (ncores != NULL)
+                       *ncores = 1;
+               if (ncores_active != NULL)
+                       *ncores_active = 1;
+               if (nthreads != NULL)
+                       *nthreads = 1;
+       } else {
+               *chipid = phys_proc_id[cpu];
+               if (c->x86_max_cores > 1)
+                       *coreid = cpu_core_id[cpu];
+               else
+                       *coreid = 0;
+               *threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1);
+               if (ncores != NULL)
+                       *ncores = c->x86_max_cores;
+               if (ncores_active != NULL)
+                       *ncores_active = c->booted_cores;
+               if (nthreads != NULL)
+                       *nthreads = c->x86_num_siblings;
+       }
+}
+
 /* Machine Check Architecture Hypercall */
 long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc)
 {
@@ -452,6 +542,7 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
        struct domain *domU;
        struct xen_mc_fetch *mc_fetch;
        struct xen_mc_notifydomain *mc_notifydomain;
+       struct xen_mc_physcpuinfo *mc_physcpuinfo;
        struct mc_info *mi;
        uint32_t flags;
        uint32_t fetch_idx;
@@ -460,6 +551,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
         * a DomU to fetch mc data while Dom0 notifies another DomU. */
        static DEFINE_SPINLOCK(mc_lock);
        static DEFINE_SPINLOCK(mc_notify_lock);
+       int nlcpu;
+       xen_mc_logical_cpu_t *log_cpus = NULL;
 
        if ( copy_from_guest(op, u_xen_mc, 1) )
                return -EFAULT;
@@ -580,6 +673,43 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
 
                spin_unlock(&mc_notify_lock);
                break;
+
+       case XEN_MC_physcpuinfo:
+              if ( !IS_PRIV(v->domain) )
+                      return -EPERM;
+ 
+              mc_physcpuinfo = &op->u.mc_physcpuinfo;
+              nlcpu = num_online_cpus();
+ 
+              if (!guest_handle_is_null(mc_physcpuinfo->info)) {
+                      if (mc_physcpuinfo->ncpus <= 0)
+                              return -EINVAL;
+                      nlcpu = min(nlcpu, (int)mc_physcpuinfo->ncpus);
+                      log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu);
+                      if (log_cpus == NULL)
+                              return -ENOMEM;
+ 
+                      if (on_each_cpu(do_mc_get_cpu_info, log_cpus,
+                          1, 1) != 0) {
+                              xfree(log_cpus);
+                              return -EIO;
+                      }
+              }
+ 
+              mc_physcpuinfo->ncpus = nlcpu;
+ 
+              if (copy_to_guest(u_xen_mc, op, 1)) {
+                      if (log_cpus != NULL)
+                              xfree(log_cpus);
+                      return -EFAULT;
+              }
+ 
+              if (!guest_handle_is_null(mc_physcpuinfo->info)) {
+                      if (copy_to_guest(mc_physcpuinfo->info,
+                          log_cpus, nlcpu))
+                              ret = -EFAULT;
+                      xfree(log_cpus);
+              }
        }
 
        return ret;
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Fri Feb 13 11:22:28 2009 +0900
@@ -34,4 +34,5 @@ int x86_mcinfo_add(struct mc_info *mi, v
 int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
 void x86_mcinfo_dump(struct mc_info *mi);
 void mc_panic(char *s);
-
+void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *,
+                        uint32_t *, uint32_t *, uint32_t *, uint32_t *);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Fri Feb 13 11:22:28 2009 +0900
@@ -182,11 +182,9 @@ static struct mc_info *machine_check_pol
         mcg.mc_flags = MC_FLAG_POLLED;
     else if (calltype == MC_FLAG_CMCI)
         mcg.mc_flags = MC_FLAG_CMCI;
-    mcg.mc_socketid = phys_proc_id[cpu];
-    mcg.mc_coreid = cpu_core_id[cpu];
-    mcg.mc_apicid = cpu_physical_id(cpu);
-    mcg.mc_core_threadid =
-        mcg.mc_apicid & ( 1 << (cpu_data[cpu].x86_num_siblings - 1)); 
+    x86_mc_get_cpu_info(
+        cpu, &mcg.mc_socketid, &mcg.mc_coreid,
+        &mcg.mc_core_threadid, &mcg.mc_apicid, NULL, NULL, NULL);
     rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
 
     for ( i = 0; i < nr_mce_banks; i++ ) {
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/domain.c     Fri Feb 13 11:22:28 2009 +0900
@@ -141,7 +141,7 @@ void dump_pageframe_info(struct domain *
     }
     else
     {
-        list_for_each_entry ( page, &d->page_list, list )
+        page_list_for_each ( page, &d->page_list )
         {
             printk("    DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
                    _p(page_to_mfn(page)),
@@ -154,7 +154,7 @@ void dump_pageframe_info(struct domain *
         p2m_pod_dump_data(d);
     }
 
-    list_for_each_entry ( page, &d->xenpage_list, list )
+    page_list_for_each ( page, &d->xenpage_list )
     {
         printk("    XenPage %p: caf=%08lx, taf=%" PRtype_info "\n",
                _p(page_to_mfn(page)),
@@ -352,6 +352,8 @@ int vcpu_initialise(struct vcpu *v)
     v->arch.perdomain_ptes =
         d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
 
+    spin_lock_init(&v->arch.shadow_ldt_lock);
+
     return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
 }
 
@@ -380,7 +382,7 @@ int arch_domain_create(struct domain *d,
     INIT_LIST_HEAD(&d->arch.pdev_list);
 
     d->arch.relmem = RELMEM_not_started;
-    INIT_LIST_HEAD(&d->arch.relmem_list);
+    INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
 
     pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
     d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order, 0);
@@ -1655,9 +1657,8 @@ int hypercall_xlat_continuation(unsigned
 #endif
 
 static int relinquish_memory(
-    struct domain *d, struct list_head *list, unsigned long type)
-{
-    struct list_head *ent;
+    struct domain *d, struct page_list_head *list, unsigned long type)
+{
     struct page_info  *page;
     unsigned long     x, y;
     int               ret = 0;
@@ -1665,17 +1666,13 @@ static int relinquish_memory(
     /* Use a recursive lock, as we may enter 'free_domheap_page'. */
     spin_lock_recursive(&d->page_alloc_lock);
 
-    ent = list->next;
-    while ( ent != list )
-    {
-        page = list_entry(ent, struct page_info, list);
-
+    while ( (page = page_list_remove_head(list)) )
+    {
         /* Grab a reference to the page so it won't disappear from under us. */
         if ( unlikely(!get_page(page, d)) )
         {
             /* Couldn't get a reference -- someone is freeing this page. */
-            ent = ent->next;
-            list_move_tail(&page->list, &d->arch.relmem_list);
+            page_list_add_tail(page, &d->arch.relmem_list);
             continue;
         }
 
@@ -1687,6 +1684,7 @@ static int relinquish_memory(
             break;
         case -EAGAIN:
         case -EINTR:
+            page_list_add(page, list);
             set_bit(_PGT_pinned, &page->u.inuse.type_info);
             put_page(page);
             goto out;
@@ -1723,6 +1721,7 @@ static int relinquish_memory(
                 case 0:
                     break;
                 case -EINTR:
+                    page_list_add(page, list);
                     page->u.inuse.type_info |= PGT_validated;
                     if ( x & PGT_partial )
                         put_page(page);
@@ -1730,6 +1729,7 @@ static int relinquish_memory(
                     ret = -EAGAIN;
                     goto out;
                 case -EAGAIN:
+                    page_list_add(page, list);
                     page->u.inuse.type_info |= PGT_partial;
                     if ( x & PGT_partial )
                         put_page(page);
@@ -1746,9 +1746,8 @@ static int relinquish_memory(
             }
         }
 
-        /* Follow the list chain and /then/ potentially free the page. */
-        ent = ent->next;
-        list_move_tail(&page->list, &d->arch.relmem_list);
+        /* Put the page on the list and /then/ potentially free it. */
+        page_list_add_tail(page, &d->arch.relmem_list);
         put_page(page);
 
         if ( hypercall_preempt_check() )
@@ -1758,7 +1757,12 @@ static int relinquish_memory(
         }
     }
 
-    list_splice_init(&d->arch.relmem_list, list);
+    /* list is empty at this point. */
+    if ( !page_list_empty(&d->arch.relmem_list) )
+    {
+        *list = d->arch.relmem_list;
+        INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
+    }
 
  out:
     spin_unlock_recursive(&d->page_alloc_lock);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/domain_build.c       Fri Feb 13 11:22:28 2009 +0900
@@ -880,7 +880,7 @@ int __init construct_dom0(
     }
     si->first_p2m_pfn = pfn;
     si->nr_p2m_frames = d->tot_pages - count;
-    list_for_each_entry ( page, &d->page_list, list )
+    page_list_for_each ( page, &d->page_list )
     {
         mfn = page_to_mfn(page);
         if ( get_gpfn_from_mfn(mfn) >= count )
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/domctl.c     Fri Feb 13 11:22:28 2009 +0900
@@ -240,7 +240,7 @@ long arch_do_domctl(
         struct domain *d = rcu_lock_domain_by_id(domctl->domain);
         unsigned long max_pfns = domctl->u.getmemlist.max_pfns;
         uint64_t mfn;
-        struct list_head *list_ent;
+        struct page_info *page;
 
         ret = -EINVAL;
         if ( d != NULL )
@@ -259,19 +259,19 @@ long arch_do_domctl(
                 goto getmemlist_out;
             }
 
-            ret = 0;
-            list_ent = d->page_list.next;
-            for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
+            ret = i = 0;
+            page_list_for_each(page, &d->page_list)
             {
-                mfn = page_to_mfn(list_entry(
-                    list_ent, struct page_info, list));
+                if ( i >= max_pfns )
+                    break;
+                mfn = page_to_mfn(page);
                 if ( copy_to_guest_offset(domctl->u.getmemlist.buffer,
                                           i, &mfn, 1) )
                 {
                     ret = -EFAULT;
                     break;
                 }
-                list_ent = mfn_to_page(mfn)->list.next;
+                ++i;
             }
             
             spin_unlock(&d->page_alloc_lock);
@@ -416,6 +416,34 @@ long arch_do_domctl(
         rcu_unlock_domain(d);
     }
     break;
+
+    case XEN_DOMCTL_gethvmcontext_partial:
+    { 
+        struct domain *d;
+
+        ret = -ESRCH;
+        if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
+            break;
+
+        ret = xsm_hvmcontext(d, domctl->cmd);
+        if ( ret )
+            goto gethvmcontext_partial_out;
+
+        ret = -EINVAL;
+        if ( !is_hvm_domain(d) ) 
+            goto gethvmcontext_partial_out;
+
+        domain_pause(d);
+        ret = hvm_save_one(d, domctl->u.hvmcontext_partial.type,
+                           domctl->u.hvmcontext_partial.instance,
+                           domctl->u.hvmcontext_partial.buffer);
+        domain_unpause(d);
+
+    gethvmcontext_partial_out:
+        rcu_unlock_domain(d);
+    }
+    break;
+
 
     case XEN_DOMCTL_set_address_size:
     {
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/e820.c
--- a/xen/arch/x86/e820.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/e820.c       Fri Feb 13 11:22:28 2009 +0900
@@ -1,10 +1,10 @@
 #include <xen/config.h>
 #include <xen/init.h>
 #include <xen/lib.h>
+#include <xen/mm.h>
 #include <xen/compat.h>
 #include <xen/dmi.h>
 #include <asm/e820.h>
-#include <asm/mm.h>
 #include <asm/page.h>
 
 /* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/i8259.c      Fri Feb 13 11:22:28 2009 +0900
@@ -410,8 +410,8 @@ void __init init_IRQ(void)
     }
 
     /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
-    vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN;
-    vector_irq[0x80] = NEVER_ASSIGN;
+    vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN_IRQ;
+    vector_irq[0x80] = NEVER_ASSIGN_IRQ;
 
     apic_intr_init();
 
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/io_apic.c    Fri Feb 13 11:22:28 2009 +0900
@@ -49,7 +49,6 @@ static struct { int pin, apic; } ioapic_
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
 
 int skip_ioapic_setup;
 
@@ -88,9 +87,6 @@ static struct irq_pin_list {
     [0 ... PIN_MAP_SIZE-1].pin = -1
 };
 static int irq_2_pin_free_entry = NR_IRQS;
-
-int vector_irq[NR_VECTORS] __read_mostly = {
-    [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN};
 
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -668,56 +664,6 @@ static inline int IO_APIC_irq_trigger(in
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
 u8 irq_vector[NR_IRQS] __read_mostly;
-
-int free_irq_vector(int vector)
-{
-    int irq;
-
-    BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR));
-
-    spin_lock(&vector_lock);
-    if ((irq = vector_irq[vector]) == AUTO_ASSIGN)
-        vector_irq[vector] = FREE_TO_ASSIGN;
-    spin_unlock(&vector_lock);
-
-    return (irq == AUTO_ASSIGN) ? 0 : -EINVAL;
-}
-
-int assign_irq_vector(int irq)
-{
-    static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
-    unsigned vector;
-
-    BUG_ON(irq >= NR_IRQS);
-
-    spin_lock(&vector_lock);
-
-    if ((irq != AUTO_ASSIGN) && (IO_APIC_VECTOR(irq) > 0)) {
-        spin_unlock(&vector_lock);
-        return IO_APIC_VECTOR(irq);
-    }
-
-    vector = current_vector;
-    while (vector_irq[vector] != FREE_TO_ASSIGN) {
-        vector += 8;
-        if (vector > LAST_DYNAMIC_VECTOR)
-            vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7);
-
-        if (vector == current_vector) {
-            spin_unlock(&vector_lock);
-            return -ENOSPC;
-        }
-    }
-
-    current_vector = vector;
-    vector_irq[vector] = irq;
-    if (irq != AUTO_ASSIGN)
-        IO_APIC_VECTOR(irq) = vector;
-
-    spin_unlock(&vector_lock);
-
-    return vector;
-}
 
 static struct hw_interrupt_type ioapic_level_type;
 static struct hw_interrupt_type ioapic_edge_type;
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/irq.c        Fri Feb 13 11:22:28 2009 +0900
@@ -27,6 +27,11 @@ boolean_param("noirqbalance", opt_noirqb
 
 irq_desc_t irq_desc[NR_VECTORS];
 
+static DEFINE_SPINLOCK(vector_lock);
+int vector_irq[NR_VECTORS] __read_mostly = {
+    [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ
+};
+
 static void __do_IRQ_guest(int vector);
 
 void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
@@ -54,6 +59,56 @@ struct hw_interrupt_type no_irq_type = {
 
 atomic_t irq_err_count;
 
+int free_irq_vector(int vector)
+{
+    int irq;
+
+    BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR));
+
+    spin_lock(&vector_lock);
+    if ((irq = vector_irq[vector]) == AUTO_ASSIGN_IRQ)
+        vector_irq[vector] = FREE_TO_ASSIGN_IRQ;
+    spin_unlock(&vector_lock);
+
+    return (irq == AUTO_ASSIGN_IRQ) ? 0 : -EINVAL;
+}
+
+int assign_irq_vector(int irq)
+{
+    static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
+    unsigned vector;
+
+    BUG_ON(irq >= NR_IRQS);
+
+    spin_lock(&vector_lock);
+
+    if ((irq != AUTO_ASSIGN_IRQ) && (IO_APIC_VECTOR(irq) > 0)) {
+        spin_unlock(&vector_lock);
+        return IO_APIC_VECTOR(irq);
+    }
+
+    vector = current_vector;
+    while (vector_irq[vector] != FREE_TO_ASSIGN_IRQ) {
+        vector += 8;
+        if (vector > LAST_DYNAMIC_VECTOR)
+            vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7);
+
+        if (vector == current_vector) {
+            spin_unlock(&vector_lock);
+            return -ENOSPC;
+        }
+    }
+
+    current_vector = vector;
+    vector_irq[vector] = irq;
+    if (irq != AUTO_ASSIGN_IRQ)
+        IO_APIC_VECTOR(irq) = vector;
+
+    spin_unlock(&vector_lock);
+
+    return vector;
+}
+
 asmlinkage void do_IRQ(struct cpu_user_regs *regs)
 {
     unsigned int      vector = regs->entry_vector;
@@ -104,7 +159,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
     spin_unlock(&desc->lock);
 }
 
-int request_irq(unsigned int irq,
+int request_irq_vector(unsigned int vector,
         void (*handler)(int, void *, struct cpu_user_regs *),
         unsigned long irqflags, const char * devname, void *dev_id)
 {
@@ -117,7 +172,7 @@ int request_irq(unsigned int irq,
      * which interrupt is which (messes up the interrupt freeing
      * logic etc).
      */
-    if (irq >= NR_IRQS)
+    if (vector >= NR_VECTORS)
         return -EINVAL;
     if (!handler)
         return -EINVAL;
@@ -130,34 +185,32 @@ int request_irq(unsigned int irq,
     action->name = devname;
     action->dev_id = dev_id;
 
-    retval = setup_irq(irq, action);
+    retval = setup_irq_vector(vector, action);
     if (retval)
         xfree(action);
 
     return retval;
 }
 
-void free_irq(unsigned int irq)
-{
-    unsigned int  vector = irq_to_vector(irq);
-    irq_desc_t   *desc = &irq_desc[vector];
+void release_irq_vector(unsigned int vector)
+{
+    irq_desc_t *desc = &irq_desc[vector];
     unsigned long flags;
 
     spin_lock_irqsave(&desc->lock,flags);
     desc->action  = NULL;
     desc->depth   = 1;
     desc->status |= IRQ_DISABLED;
-    desc->handler->shutdown(irq);
+    desc->handler->shutdown(vector);
     spin_unlock_irqrestore(&desc->lock,flags);
 
     /* Wait to make sure it's not being used on another CPU */
     do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
 }
 
-int setup_irq(unsigned int irq, struct irqaction *new)
-{
-    unsigned int  vector = irq_to_vector(irq);
-    irq_desc_t   *desc = &irq_desc[vector];
+int setup_irq_vector(unsigned int vector, struct irqaction *new)
+{
+    irq_desc_t *desc = &irq_desc[vector];
     unsigned long flags;
  
     spin_lock_irqsave(&desc->lock,flags);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm.c Fri Feb 13 11:22:28 2009 +0900
@@ -179,12 +179,6 @@ l2_pgentry_t *compat_idle_pg_table_l2 = 
 #define l3_disallow_mask(d) L3_DISALLOW_MASK
 #endif
 
-static void queue_deferred_ops(struct domain *d, unsigned int ops)
-{
-    ASSERT(d == current->domain);
-    this_cpu(percpu_mm_info).deferred_ops |= ops;
-}
-
 void __init init_frametable(void)
 {
     unsigned long nr_pages, page_step, i, mfn;
@@ -333,7 +327,7 @@ void share_xen_page_with_guest(
         page->count_info |= PGC_allocated | 1;
         if ( unlikely(d->xenheap_pages++ == 0) )
             get_knownalive_domain(d);
-        list_add_tail(&page->list, &d->xenpage_list);
+        page_list_add_tail(page, &d->xenpage_list);
     }
 
     spin_unlock(&d->page_alloc_lock);
@@ -464,14 +458,18 @@ void update_cr3(struct vcpu *v)
 }
 
 
-static void invalidate_shadow_ldt(struct vcpu *v)
+static void invalidate_shadow_ldt(struct vcpu *v, int flush)
 {
     int i;
     unsigned long pfn;
     struct page_info *page;
-    
+
+    BUG_ON(unlikely(in_irq()));
+
+    spin_lock(&v->arch.shadow_ldt_lock);
+
     if ( v->arch.shadow_ldt_mapcnt == 0 )
-        return;
+        goto out;
 
     v->arch.shadow_ldt_mapcnt = 0;
 
@@ -486,11 +484,12 @@ static void invalidate_shadow_ldt(struct
         put_page_and_type(page);
     }
 
-    /* Dispose of the (now possibly invalid) mappings from the TLB.  */
-    if ( v == current )
-        queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
-    else
-        flush_tlb_mask(v->domain->domain_dirty_cpumask);
+    /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */
+    if ( flush )
+        flush_tlb_mask(v->vcpu_dirty_cpumask);
+
+ out:
+    spin_unlock(&v->arch.shadow_ldt_lock);
 }
 
 
@@ -541,8 +540,10 @@ int map_ldt_shadow_page(unsigned int off
 
     nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
 
+    spin_lock(&v->arch.shadow_ldt_lock);
     l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
     v->arch.shadow_ldt_mapcnt++;
+    spin_unlock(&v->arch.shadow_ldt_lock);
 
     return 1;
 }
@@ -989,7 +990,7 @@ void put_page_from_l1e(l1_pgentry_t l1e,
              (d == e) )
         {
             for_each_vcpu ( d, v )
-                invalidate_shadow_ldt(v);
+                invalidate_shadow_ldt(v, 1);
         }
         put_page(page);
     }
@@ -2023,30 +2024,17 @@ int free_page_type(struct page_info *pag
     unsigned long gmfn;
     int rc;
 
-    if ( likely(owner != NULL) )
-    {
-        /*
-         * We have to flush before the next use of the linear mapping
-         * (e.g., update_va_mapping()) or we could end up modifying a page
-         * that is no longer a page table (and hence screw up ref counts).
-         */
-        if ( current->domain == owner )
-            queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS);
-        else
-            flush_tlb_mask(owner->domain_dirty_cpumask);
-
-        if ( unlikely(paging_mode_enabled(owner)) )
-        {
-            /* A page table is dirtied when its type count becomes zero. */
-            paging_mark_dirty(owner, page_to_mfn(page));
-
-            if ( shadow_mode_refcounts(owner) )
-                return 0;
-
-            gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
-            ASSERT(VALID_M2P(gmfn));
-            shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
-        }
+    if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) )
+    {
+        /* A page table is dirtied when its type count becomes zero. */
+        paging_mark_dirty(owner, page_to_mfn(page));
+
+        if ( shadow_mode_refcounts(owner) )
+            return 0;
+
+        gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
+        ASSERT(VALID_M2P(gmfn));
+        shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
     }
 
     if ( !(type & PGT_partial) )
@@ -2366,8 +2354,8 @@ void cleanup_page_cacheattr(struct page_
 
 int new_guest_cr3(unsigned long mfn)
 {
-    struct vcpu *v = current;
-    struct domain *d = v->domain;
+    struct vcpu *curr = current;
+    struct domain *d = curr->domain;
     int okay;
     unsigned long old_base_mfn;
 
@@ -2377,19 +2365,19 @@ int new_guest_cr3(unsigned long mfn)
         okay = paging_mode_refcounts(d)
             ? 0 /* Old code was broken, but what should it be? */
             : mod_l4_entry(
-                    __va(pagetable_get_paddr(v->arch.guest_table)),
+                    __va(pagetable_get_paddr(curr->arch.guest_table)),
                     l4e_from_pfn(
                         mfn,
                         (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
-                    pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0;
+                    pagetable_get_pfn(curr->arch.guest_table), 0, 0) == 0;
         if ( unlikely(!okay) )
         {
             MEM_LOG("Error while installing new compat baseptr %lx", mfn);
             return 0;
         }
 
-        invalidate_shadow_ldt(v);
-        write_ptbase(v);
+        invalidate_shadow_ldt(curr, 0);
+        write_ptbase(curr);
 
         return 1;
     }
@@ -2403,14 +2391,14 @@ int new_guest_cr3(unsigned long mfn)
         return 0;
     }
 
-    invalidate_shadow_ldt(v);
-
-    old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-
-    v->arch.guest_table = pagetable_from_pfn(mfn);
-    update_cr3(v);
-
-    write_ptbase(v);
+    invalidate_shadow_ldt(curr, 0);
+
+    old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
+
+    curr->arch.guest_table = pagetable_from_pfn(mfn);
+    update_cr3(curr);
+
+    write_ptbase(curr);
 
     if ( likely(old_base_mfn != 0) )
     {
@@ -2440,6 +2428,10 @@ static void process_deferred_ops(void)
             flush_tlb_local();
     }
 
+    /*
+     * Do this after flushing TLBs, to ensure we see fresh LDT mappings
+     * via the linear pagetable mapping.
+     */
     if ( deferred_ops & DOP_RELOAD_LDT )
         (void)map_ldt_shadow_page(0);
 
@@ -2565,8 +2557,8 @@ int do_mmuext_op(
     unsigned long mfn = 0, gmfn = 0, type;
     unsigned int done = 0;
     struct page_info *page;
-    struct vcpu *v = current;
-    struct domain *d = v->domain;
+    struct vcpu *curr = current;
+    struct domain *d = curr->domain;
 
     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
     {
@@ -2729,8 +2721,8 @@ int do_mmuext_op(
                 }
             }
 
-            old_mfn = pagetable_get_pfn(v->arch.guest_table_user);
-            v->arch.guest_table_user = pagetable_from_pfn(mfn);
+            old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
+            curr->arch.guest_table_user = pagetable_from_pfn(mfn);
 
             if ( old_mfn != 0 )
             {
@@ -2750,7 +2742,7 @@ int do_mmuext_op(
     
         case MMUEXT_INVLPG_LOCAL:
             if ( !paging_mode_enabled(d) 
-                 || paging_invlpg(v, op.arg1.linear_addr) != 0 )
+                 || paging_invlpg(curr, op.arg1.linear_addr) != 0 )
                 flush_tlb_one_local(op.arg1.linear_addr);
             break;
 
@@ -2773,7 +2765,7 @@ int do_mmuext_op(
         }
 
         case MMUEXT_TLB_FLUSH_ALL:
-            flush_tlb_mask(d->domain_dirty_cpumask);
+            this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
             break;
     
         case MMUEXT_INVLPG_ALL:
@@ -2809,13 +2801,14 @@ int do_mmuext_op(
                 okay = 0;
                 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
             }
-            else if ( (v->arch.guest_context.ldt_ents != ents) || 
-                      (v->arch.guest_context.ldt_base != ptr) )
+            else if ( (curr->arch.guest_context.ldt_ents != ents) || 
+                      (curr->arch.guest_context.ldt_base != ptr) )
             {
-                invalidate_shadow_ldt(v);
-                v->arch.guest_context.ldt_base = ptr;
-                v->arch.guest_context.ldt_ents = ents;
-                load_LDT(v);
+                invalidate_shadow_ldt(curr, 0);
+                this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
+                curr->arch.guest_context.ldt_base = ptr;
+                curr->arch.guest_context.ldt_ents = ents;
+                load_LDT(curr);
                 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT;
                 if ( ents != 0 )
                     this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT;
@@ -2931,8 +2924,7 @@ int do_mmu_update(
     struct page_info *page;
     int rc = 0, okay = 1, i = 0;
     unsigned int cmd, done = 0;
-    struct vcpu *v = current;
-    struct domain *d = v->domain;
+    struct domain *d = current->domain;
     struct domain_mmap_cache mapcache;
 
     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
@@ -3042,7 +3034,8 @@ int do_mmu_update(
 #endif
                 case PGT_writable_page:
                     perfc_incr(writable_mmu_updates);
-                    okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
+                    okay = paging_write_guest_entry(
+                        current, va, req.val, _mfn(mfn));
                     break;
                 }
                 page_unlock(page);
@@ -3052,7 +3045,8 @@ int do_mmu_update(
             else if ( get_page_type(page, PGT_writable_page) )
             {
                 perfc_incr(writable_mmu_updates);
-                okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
+                okay = paging_write_guest_entry(
+                    current, va, req.val, _mfn(mfn));
                 put_page_type(page);
             }
 
@@ -3508,7 +3502,7 @@ int steal_page(
     /* Unlink from original owner. */
     if ( !(memflags & MEMF_no_refcount) )
         d->tot_pages--;
-    list_del(&page->list);
+    page_list_del(page, &d->page_list);
 
     spin_unlock(&d->page_alloc_lock);
     return 0;
@@ -3567,34 +3561,40 @@ int do_update_va_mapping(unsigned long v
     if ( pl1e )
         guest_unmap_l1e(v, pl1e);
 
-    process_deferred_ops();
-
     switch ( flags & UVMF_FLUSHTYPE_MASK )
     {
     case UVMF_TLB_FLUSH:
         switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
         {
         case UVMF_LOCAL:
-            flush_tlb_local();
+            this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
             break;
         case UVMF_ALL:
-            flush_tlb_mask(d->domain_dirty_cpumask);
+            this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
             break;
         default:
+            if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS )
+                break;
             if ( unlikely(!is_pv_32on64_domain(d) ?
                           get_user(vmask, (unsigned long *)bmap_ptr) :
                           get_user(vmask, (unsigned int *)bmap_ptr)) )
-                rc = -EFAULT;
+                rc = -EFAULT, vmask = 0;
             pmask = vcpumask_to_pcpumask(d, vmask);
+            if ( cpu_isset(smp_processor_id(), pmask) )
+                this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
             flush_tlb_mask(pmask);
             break;
         }
         break;
 
     case UVMF_INVLPG:
+        if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS )
+            break;
         switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
         {
         case UVMF_LOCAL:
+            if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB )
+                break;
             if ( !paging_mode_enabled(d) ||
                  (paging_invlpg(v, va) != 0) ) 
                 flush_tlb_one_local(va);
@@ -3606,13 +3606,17 @@ int do_update_va_mapping(unsigned long v
             if ( unlikely(!is_pv_32on64_domain(d) ?
                           get_user(vmask, (unsigned long *)bmap_ptr) :
                           get_user(vmask, (unsigned int *)bmap_ptr)) )
-                rc = -EFAULT;
+                rc = -EFAULT, vmask = 0;
             pmask = vcpumask_to_pcpumask(d, vmask);
+            if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB )
+                cpu_clear(smp_processor_id(), pmask);
             flush_tlb_one_mask(pmask, va);
             break;
         }
         break;
     }
+
+    process_deferred_ops();
 
     return rc;
 }
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/hap/hap.c Fri Feb 13 11:22:28 2009 +0900
@@ -45,11 +45,11 @@
 
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
 #undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
 #undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /************************************************/
 /*            HAP LOG DIRTY SUPPORT             */
@@ -96,11 +96,10 @@ static struct page_info *hap_alloc(struc
 
     ASSERT(hap_locked_by_me(d));
 
-    if ( unlikely(list_empty(&d->arch.paging.hap.freelist)) )
+    pg = page_list_remove_head(&d->arch.paging.hap.freelist);
+    if ( unlikely(!pg) )
         return NULL;
 
-    pg = list_entry(d->arch.paging.hap.freelist.next, struct page_info, list);
-    list_del(&pg->list);
     d->arch.paging.hap.free_pages--;
 
     p = hap_map_domain_page(page_to_mfn(pg));
@@ -118,7 +117,7 @@ static void hap_free(struct domain *d, m
     ASSERT(hap_locked_by_me(d));
 
     d->arch.paging.hap.free_pages++;
-    list_add_tail(&pg->list, &d->arch.paging.hap.freelist);
+    page_list_add_tail(pg, &d->arch.paging.hap.freelist);
 }
 
 static struct page_info *hap_alloc_p2m_page(struct domain *d)
@@ -210,15 +209,13 @@ hap_set_allocation(struct domain *d, uns
             }
             d->arch.paging.hap.free_pages++;
             d->arch.paging.hap.total_pages++;
-            list_add_tail(&pg->list, &d->arch.paging.hap.freelist);
+            page_list_add_tail(pg, &d->arch.paging.hap.freelist);
         }
         else if ( d->arch.paging.hap.total_pages > pages )
         {
             /* Need to return memory to domheap */
-            ASSERT(!list_empty(&d->arch.paging.hap.freelist));
-            pg = list_entry(d->arch.paging.hap.freelist.next,
-                            struct page_info, list);
-            list_del(&pg->list);
+            pg = page_list_remove_head(&d->arch.paging.hap.freelist);
+            ASSERT(pg);
             d->arch.paging.hap.free_pages--;
             d->arch.paging.hap.total_pages--;
             pg->count_info = 0;
@@ -393,7 +390,7 @@ void hap_domain_init(struct domain *d)
 void hap_domain_init(struct domain *d)
 {
     hap_lock_init(d);
-    INIT_LIST_HEAD(&d->arch.paging.hap.freelist);
+    INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
 
     /* This domain will use HAP for log-dirty mode */
     paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty,
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Fri Feb 13 11:22:28 2009 +0900
@@ -63,7 +63,7 @@ static int ept_set_middle_entry(struct d
 
     pg->count_info = 1;
     pg->u.inuse.type_info = 1 | PGT_validated;
-    list_add_tail(&pg->list, &d->arch.p2m->pages);
+    page_list_add_tail(pg, &d->arch.p2m->pages);
 
     ept_entry->emt = 0;
     ept_entry->igmt = 0;
@@ -116,12 +116,12 @@ static int ept_next_level(struct domain 
 }
 
 /*
- * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself,
+ * ept_set_entry() computes 'need_modify_vtd_table' for itself,
  * by observing whether any gfn->mfn translations are modified.
  */
 static int
-_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
-              unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table)
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
+              unsigned int order, p2m_type_t p2mt)
 {
     ept_entry_t *table = NULL;
     unsigned long gfn_remainder = gfn, offset = 0;
@@ -131,6 +131,7 @@ _ept_set_entry(struct domain *d, unsigne
     int walk_level = order / EPT_TABLE_ORDER;
     int direct_mmio = (p2mt == p2m_mmio_direct);
     uint8_t igmt = 0;
+    int need_modify_vtd_table = 1;
 
     /* we only support 4k and 2m pages now */
 
@@ -171,14 +172,23 @@ _ept_set_entry(struct domain *d, unsigne
 
             if ( ret == GUEST_TABLE_SUPER_PAGE )
             {
-                ept_entry->mfn = mfn_x(mfn) - offset;
+                if ( ept_entry->mfn == (mfn_x(mfn) - offset) )
+                    need_modify_vtd_table = 0;  
+                else                  
+                    ept_entry->mfn = mfn_x(mfn) - offset;
+
                 if ( ept_entry->avail1 == p2m_ram_logdirty &&
                   p2mt == p2m_ram_rw )
                     for ( i = 0; i < 512; i++ )
                         paging_mark_dirty(d, mfn_x(mfn)-offset+i);
             }
             else
-                ept_entry->mfn = mfn_x(mfn);
+            {
+                if ( ept_entry->mfn == mfn_x(mfn) )
+                    need_modify_vtd_table = 0;
+                else
+                    ept_entry->mfn = mfn_x(mfn);
+            }
 
             ept_entry->avail1 = p2mt;
             ept_entry->rsvd = 0;
@@ -239,7 +249,10 @@ _ept_set_entry(struct domain *d, unsigne
                                                 &igmt, direct_mmio);
         split_ept_entry->igmt = igmt;
 
-        split_ept_entry->mfn = mfn_x(mfn);
+        if ( split_ept_entry->mfn == mfn_x(mfn) )
+            need_modify_vtd_table = 0;
+        else
+            split_ept_entry->mfn = mfn_x(mfn);
         split_ept_entry->avail1 = p2mt;
         ept_p2m_type_to_flags(split_ept_entry, p2mt);
 
@@ -287,17 +300,6 @@ out:
     }
 
     return rv;
-}
-
-static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
-              unsigned int order, p2m_type_t p2mt)
-{
-    /* ept_set_entry() are called from set_entry(),
-     * We should always create VT-d page table acording 
-     * to the gfn to mfn translations changes.
-     */
-    return _ept_set_entry(d, gfn, mfn, order, p2mt, 1); 
 }
 
 /* Read ept p2m entries */
@@ -393,6 +395,21 @@ static mfn_t ept_get_entry_current(unsig
     return ept_get_entry(current->domain, gfn, t, q);
 }
 
+/* To test if the new emt type is the same with old,
+ * return 1 to not to reset ept entry.
+ */
+static int need_modify_ept_entry(struct domain *d, unsigned long gfn,
+                                    unsigned long mfn, uint8_t o_igmt,
+                                    uint8_t o_emt, p2m_type_t p2mt)
+{
+    uint8_t igmt, emt;
+    emt = epte_get_entry_emt(d, gfn, mfn, &igmt, 
+                                (p2mt == p2m_mmio_direct));
+    if ( (emt == o_emt) && (igmt == o_igmt) )
+        return 0;
+    return 1; 
+}
+
 void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn,
                  unsigned long end_gfn)
 {
@@ -401,6 +418,7 @@ void ept_change_entry_emt_with_range(str
     uint64_t epte;
     int order = 0;
     unsigned long mfn;
+    uint8_t o_igmt, o_emt;
 
     for ( gfn = start_gfn; gfn <= end_gfn; gfn++ )
     {
@@ -410,7 +428,9 @@ void ept_change_entry_emt_with_range(str
         mfn = (epte & EPTE_MFN_MASK) >> PAGE_SHIFT;
         if ( !mfn_valid(mfn) )
             continue;
-        p2mt = (epte & EPTE_AVAIL1_MASK) >> 8;
+        p2mt = (epte & EPTE_AVAIL1_MASK) >> EPTE_AVAIL1_SHIFT;
+        o_igmt = (epte & EPTE_IGMT_MASK) >> EPTE_IGMT_SHIFT;
+        o_emt = (epte & EPTE_EMT_MASK) >> EPTE_EMT_SHIFT;
         order = 0;
 
         if ( epte & EPTE_SUPER_PAGE_MASK )
@@ -422,30 +442,26 @@ void ept_change_entry_emt_with_range(str
                  * Set emt for super page.
                  */
                 order = EPT_TABLE_ORDER;
-                /* vmx_set_uc_mode() dont' touch the gfn to mfn
-                 * translations, only modify the emt field of the EPT entries.
-                 * so we need not modify the current VT-d page tables.
-                 */
-                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
+                if ( need_modify_ept_entry(d, gfn, mfn, 
+                                            o_igmt, o_emt, p2mt) )
+                    ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
                 gfn += 0x1FF;
             }
             else
             {
-                /* 1)change emt for partial entries of the 2m area.
-                 * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
-                 * translations, only modify the emt field of the EPT entries.
-                 * so we need not modify the current VT-d page tables.
-                 */
-                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0);
+                /* change emt for partial entries of the 2m area. */
+                if ( need_modify_ept_entry(d, gfn, mfn, 
+                                            o_igmt, o_emt, p2mt) )
+                    ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
                 gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF;
             }
         }
-        else /* 1)gfn assigned with 4k
-              * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
-              * translations, only modify the emt field of the EPT entries.
-              * so we need not modify the current VT-d page tables.
-             */
-            _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
+        else /* gfn assigned with 4k */
+        {
+            if ( need_modify_ept_entry(d, gfn, mfn, 
+                                            o_igmt, o_emt, p2mt) )
+                ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+        }
     }
 }
 
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/p2m.c     Fri Feb 13 11:22:28 2009 +0900
@@ -89,11 +89,11 @@
 
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
 #undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
 #undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 
 /* PTE flags for the various types of p2m entry */
@@ -175,7 +175,7 @@ p2m_next_level(struct domain *d, mfn_t *
         struct page_info *pg = d->arch.p2m->alloc_page(d);
         if ( pg == NULL )
             return 0;
-        list_add_tail(&pg->list, &d->arch.p2m->pages);
+        page_list_add_tail(pg, &d->arch.p2m->pages);
         pg->u.inuse.type_info = type | 1 | PGT_validated;
         pg->count_info = 1;
 
@@ -214,7 +214,7 @@ p2m_next_level(struct domain *d, mfn_t *
         struct page_info *pg = d->arch.p2m->alloc_page(d);
         if ( pg == NULL )
             return 0;
-        list_add_tail(&pg->list, &d->arch.p2m->pages);
+        page_list_add_tail(pg, &d->arch.p2m->pages);
         pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
         pg->count_info = 1;
         
@@ -300,18 +300,18 @@ p2m_pod_cache_add(struct domain *d,
     for(i=0; i < 1 << order ; i++)
     {
         p = page + i;
-        list_del(&p->list);
+        page_list_del(p, &d->page_list);
     }
 
     /* Then add the first one to the appropriate populate-on-demand list */
     switch(order)
     {
     case 9:
-        list_add_tail(&page->list, &p2md->pod.super); /* lock: page_alloc */
+        page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */
         p2md->pod.count += 1 << order;
         break;
     case 0:
-        list_add_tail(&page->list, &p2md->pod.single); /* lock: page_alloc */
+        page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */
         p2md->pod.count += 1 ;
         break;
     default:
@@ -334,54 +334,51 @@ static struct page_info * p2m_pod_cache_
     struct page_info *p = NULL;
     int i;
 
-    if ( order == 9 && list_empty(&p2md->pod.super) )
+    if ( order == 9 && page_list_empty(&p2md->pod.super) )
     {
         return NULL;
     }
-    else if ( order == 0 && list_empty(&p2md->pod.single) )
+    else if ( order == 0 && page_list_empty(&p2md->pod.single) )
     {
         unsigned long mfn;
         struct page_info *q;
 
-        BUG_ON( list_empty(&p2md->pod.super) );
+        BUG_ON( page_list_empty(&p2md->pod.super) );
 
         /* Break up a superpage to make single pages. NB count doesn't
          * need to be adjusted. */
         printk("%s: Breaking up superpage.\n", __func__);
-        p = list_entry(p2md->pod.super.next, struct page_info, list);
-        list_del(&p->list);
+        p = page_list_remove_head(&p2md->pod.super);
         mfn = mfn_x(page_to_mfn(p));
 
         for ( i=0; i<(1<<9); i++ )
         {
             q = mfn_to_page(_mfn(mfn+i));
-            list_add_tail(&q->list, &p2md->pod.single);
+            page_list_add_tail(q, &p2md->pod.single);
         }
     }
 
     switch ( order )
     {
     case 9:
-        BUG_ON( list_empty(&p2md->pod.super) );
-        p = list_entry(p2md->pod.super.next, struct page_info, list); 
+        BUG_ON( page_list_empty(&p2md->pod.super) );
+        p = page_list_remove_head(&p2md->pod.super);
         p2md->pod.count -= 1 << order; /* Lock: page_alloc */
         break;
     case 0:
-        BUG_ON( list_empty(&p2md->pod.single) );
-        p = list_entry(p2md->pod.single.next, struct page_info, list);
+        BUG_ON( page_list_empty(&p2md->pod.single) );
+        p = page_list_remove_head(&p2md->pod.single);
         p2md->pod.count -= 1;
         break;
     default:
         BUG();
     }
 
-    list_del(&p->list);
-
     /* Put the pages back on the domain page_list */
     for ( i = 0 ; i < (1 << order) ; i++ )
     {
         BUG_ON(page_get_owner(p + i) != d);
-        list_add_tail(&p[i].list, &d->page_list);
+        page_list_add_tail(p + i, &d->page_list);
     }
 
     return p;
@@ -425,7 +422,7 @@ p2m_pod_set_cache_target(struct domain *
         spin_lock(&d->page_alloc_lock);
 
         if ( (p2md->pod.count - pod_target) > (1>>9)
-             && !list_empty(&p2md->pod.super) )
+             && !page_list_empty(&p2md->pod.super) )
             order = 9;
         else
             order = 0;
@@ -535,38 +532,27 @@ p2m_pod_empty_cache(struct domain *d)
 p2m_pod_empty_cache(struct domain *d)
 {
     struct p2m_domain *p2md = d->arch.p2m;
-    struct list_head *q, *p;
+    struct page_info *page;
 
     spin_lock(&d->page_alloc_lock);
 
-    list_for_each_safe(p, q, &p2md->pod.super) /* lock: page_alloc */
+    while ( (page = page_list_remove_head(&p2md->pod.super)) )
     {
         int i;
-        struct page_info *page;
             
-        list_del(p);
-            
-        page = list_entry(p, struct page_info, list);
-
         for ( i = 0 ; i < (1 << 9) ; i++ )
         {
             BUG_ON(page_get_owner(page + i) != d);
-            list_add_tail(&page[i].list, &d->page_list);
+            page_list_add_tail(page + i, &d->page_list);
         }
 
         p2md->pod.count -= 1<<9;
     }
 
-    list_for_each_safe(p, q, &p2md->pod.single)
-    {
-        struct page_info *page;
-            
-        list_del(p);
-            
-        page = list_entry(p, struct page_info, list);
-
+    while ( (page = page_list_remove_head(&p2md->pod.single)) )
+    {
         BUG_ON(page_get_owner(page) != d);
-        list_add_tail(&page->list, &d->page_list);
+        page_list_add_tail(page, &d->page_list);
 
         p2md->pod.count -= 1;
     }
@@ -952,7 +938,7 @@ p2m_pod_emergency_sweep_super(struct dom
          * NB that this is a zero-sum game; we're increasing our cache size
          * by increasing our 'debt'.  Since we hold the p2m lock,
          * (entry_count - count) must remain the same. */
-        if ( !list_empty(&p2md->pod.super) &&  i < limit )
+        if ( !page_list_empty(&p2md->pod.super) &&  i < limit )
             break;
     }
 
@@ -1035,12 +1021,12 @@ p2m_pod_demand_populate(struct domain *d
     }
 
     /* If we're low, start a sweep */
-    if ( order == 9 && list_empty(&p2md->pod.super) )
+    if ( order == 9 && page_list_empty(&p2md->pod.super) )
         p2m_pod_emergency_sweep_super(d);
 
-    if ( list_empty(&p2md->pod.single) &&
+    if ( page_list_empty(&p2md->pod.single) &&
          ( ( order == 0 )
-           || (order == 9 && list_empty(&p2md->pod.super) ) ) )
+           || (order == 9 && page_list_empty(&p2md->pod.super) ) ) )
         p2m_pod_emergency_sweep(d);
 
     /* Keep track of the highest gfn demand-populated by a guest fault */
@@ -1477,9 +1463,9 @@ int p2m_init(struct domain *d)
 
     memset(p2m, 0, sizeof(*p2m));
     p2m_lock_init(p2m);
-    INIT_LIST_HEAD(&p2m->pages);
-    INIT_LIST_HEAD(&p2m->pod.super);
-    INIT_LIST_HEAD(&p2m->pod.single);
+    INIT_PAGE_LIST_HEAD(&p2m->pages);
+    INIT_PAGE_LIST_HEAD(&p2m->pod.super);
+    INIT_PAGE_LIST_HEAD(&p2m->pod.single);
 
     p2m->set_entry = p2m_set_entry;
     p2m->get_entry = p2m_gfn_to_mfn;
@@ -1540,7 +1526,6 @@ int p2m_alloc_table(struct domain *d,
 
 {
     mfn_t mfn = _mfn(INVALID_MFN);
-    struct list_head *entry;
     struct page_info *page, *p2m_top;
     unsigned int page_count = 0;
     unsigned long gfn = -1UL;
@@ -1566,7 +1551,7 @@ int p2m_alloc_table(struct domain *d,
         p2m_unlock(p2m);
         return -ENOMEM;
     }
-    list_add_tail(&p2m_top->list, &p2m->pages);
+    page_list_add_tail(p2m_top, &p2m->pages);
 
     p2m_top->count_info = 1;
     p2m_top->u.inuse.type_info =
@@ -1587,11 +1572,8 @@ int p2m_alloc_table(struct domain *d,
         goto error;
 
     /* Copy all existing mappings from the page list and m2p */
-    for ( entry = d->page_list.next;
-          entry != &d->page_list;
-          entry = entry->next )
-    {
-        page = list_entry(entry, struct page_info, list);
+    page_list_for_each(page, &d->page_list)
+    {
         mfn = page_to_mfn(page);
         gfn = get_gpfn_from_mfn(mfn_x(mfn));
         page_count++;
@@ -1621,19 +1603,14 @@ void p2m_teardown(struct domain *d)
 /* Return all the p2m pages to Xen.
  * We know we don't have any extra mappings to these pages */
 {
-    struct list_head *entry, *n;
     struct page_info *pg;
     struct p2m_domain *p2m = d->arch.p2m;
 
     p2m_lock(p2m);
     d->arch.phys_table = pagetable_null();
 
-    list_for_each_safe(entry, n, &p2m->pages)
-    {
-        pg = list_entry(entry, struct page_info, list);
-        list_del(entry);
+    while ( (pg = page_list_remove_head(&p2m->pages)) )
         p2m->free_page(d, pg);
-    }
     p2m_unlock(p2m);
 }
 
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/paging.c  Fri Feb 13 11:22:28 2009 +0900
@@ -47,11 +47,11 @@
 /************************************************/
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
 #undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
 #undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /* The log-dirty lock.  This protects the log-dirty bitmap from
  * concurrent accesses (and teardowns, etc).
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/shadow/common.c   Fri Feb 13 11:22:28 2009 +0900
@@ -48,9 +48,9 @@ void shadow_domain_init(struct domain *d
     int i;
     shadow_lock_init(d);
     for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
-        INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
-    INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
-    INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
+        INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
+    INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
+    INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
 
     /* Use shadow pagetables for log-dirty support */
     paging_log_dirty_init(d, shadow_enable_log_dirty, 
@@ -1291,9 +1291,9 @@ static inline int space_is_available(
     for ( ; order <= shadow_max_order(d); ++order )
     {
         unsigned int n = count;
-        const struct list_head *p;
-
-        list_for_each ( p, &d->arch.paging.shadow.freelists[order] )
+        const struct page_info *sp;
+
+        page_list_for_each ( sp, &d->arch.paging.shadow.freelists[order] )
             if ( --n == 0 )
                 return 1;
         count = (count + 1) >> 1;
@@ -1306,8 +1306,8 @@ static inline int space_is_available(
  * non-Xen mappings in this top-level shadow mfn */
 static void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
 {
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
-    switch ( sp->type )
+    struct page_info *sp = mfn_to_page(smfn);
+    switch ( sp->u.sh.type )
     {
     case SH_type_l2_32_shadow:
         SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v,smfn);
@@ -1322,7 +1322,7 @@ static void shadow_unhook_mappings(struc
         break;
 #endif
     default:
-        SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->type);
+        SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->u.sh.type);
         BUG();
     }
 }
@@ -1334,7 +1334,7 @@ static inline void trace_shadow_prealloc
         /* Convert smfn to gfn */
         unsigned long gfn;
         ASSERT(mfn_valid(smfn));
-        gfn = mfn_to_gfn(d, _mfn(mfn_to_shadow_page(smfn)->backpointer));
+        gfn = mfn_to_gfn(d, _mfn(mfn_to_page(smfn)->v.sh.back));
         __trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/,
                     sizeof(gfn), (unsigned char*)&gfn);
     }
@@ -1350,8 +1350,7 @@ static void _shadow_prealloc(
     /* Need a vpcu for calling unpins; for now, since we don't have
      * per-vcpu shadows, any will do */
     struct vcpu *v, *v2;
-    struct list_head *l, *t;
-    struct shadow_page_info *sp;
+    struct page_info *sp, *t;
     mfn_t smfn;
     int i;
 
@@ -1365,10 +1364,9 @@ static void _shadow_prealloc(
 
     /* Stage one: walk the list of pinned pages, unpinning them */
     perfc_incr(shadow_prealloc_1);
-    list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows)
-    {
-        sp = list_entry(l, struct shadow_page_info, list);
-        smfn = shadow_page_to_mfn(sp);
+    page_list_for_each_safe_reverse(sp, t, 
&d->arch.paging.shadow.pinned_shadows)
+    {
+        smfn = page_to_mfn(sp);
 
         /* Unpin this top-level shadow */
         trace_shadow_prealloc_unpin(d, smfn);
@@ -1427,8 +1425,7 @@ void shadow_prealloc(struct domain *d, u
  * this domain's shadows */
 static void shadow_blow_tables(struct domain *d) 
 {
-    struct list_head *l, *t;
-    struct shadow_page_info *sp;
+    struct page_info *sp, *t;
     struct vcpu *v = d->vcpu[0];
     mfn_t smfn;
     int i;
@@ -1436,10 +1433,9 @@ static void shadow_blow_tables(struct do
     ASSERT(v != NULL);
 
     /* Pass one: unpin all pinned pages */
-    list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows)
-    {
-        sp = list_entry(l, struct shadow_page_info, list);
-        smfn = shadow_page_to_mfn(sp);
+    page_list_for_each_safe_reverse(sp, t, 
&d->arch.paging.shadow.pinned_shadows)
+    {
+        smfn = page_to_mfn(sp);
         sh_unpin(v, smfn);
     }
         
@@ -1493,6 +1489,18 @@ __initcall(shadow_blow_tables_keyhandler
 __initcall(shadow_blow_tables_keyhandler_init);
 #endif /* !NDEBUG */
 
+static inline struct page_info *
+next_shadow(const struct page_info *sp)
+{
+    return sp->next_shadow ? mfn_to_page(_mfn(sp->next_shadow)) : NULL;
+}
+
+static inline void
+set_next_shadow(struct page_info *sp, struct page_info *next)
+{
+    sp->next_shadow = next ? mfn_x(page_to_mfn(next)) : 0;
+}
+
 /* Allocate another shadow's worth of (contiguous, aligned) pages,
  * and fill in the type and backpointer fields of their page_infos. 
  * Never fails to allocate. */
@@ -1500,7 +1508,7 @@ mfn_t shadow_alloc(struct domain *d,
                     u32 shadow_type,
                     unsigned long backpointer)
 {
-    struct shadow_page_info *sp = NULL;
+    struct page_info *sp = NULL;
     unsigned int order = shadow_order(shadow_type);
     cpumask_t mask;
     void *p;
@@ -1515,7 +1523,7 @@ mfn_t shadow_alloc(struct domain *d,
 
     /* Find smallest order which can satisfy the request. */
     for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
-        if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
+        if ( (sp = page_list_remove_head(&d->arch.paging.shadow.freelists[i])) 
)
             goto found;
     
     /* If we get here, we failed to allocate. This should never happen.
@@ -1526,16 +1534,12 @@ mfn_t shadow_alloc(struct domain *d,
     BUG();
 
  found:
-    sp = list_entry(d->arch.paging.shadow.freelists[i].next, 
-                    struct shadow_page_info, list);
-    list_del(&sp->list);
-            
     /* We may have to halve the chunk a number of times. */
     while ( i != order )
     {
         i--;
-        sp->order = i;
-        list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]);
+        sp->v.free.order = i;
+        page_list_add_tail(sp, &d->arch.paging.shadow.freelists[i]);
         sp += 1 << i;
     }
     d->arch.paging.shadow.free_pages -= 1 << order;
@@ -1553,26 +1557,26 @@ mfn_t shadow_alloc(struct domain *d,
             flush_tlb_mask(mask);
         }
         /* Now safe to clear the page for reuse */
-        p = sh_map_domain_page(shadow_page_to_mfn(sp+i));
+        p = sh_map_domain_page(page_to_mfn(sp+i));
         ASSERT(p != NULL);
         clear_page(p);
         sh_unmap_domain_page(p);
-        INIT_LIST_HEAD(&sp[i].list);
-        sp[i].type = shadow_type;
-        sp[i].pinned = 0;
-        sp[i].count = 0;
-        sp[i].backpointer = backpointer;
-        sp[i].next_shadow = NULL;
+        INIT_PAGE_LIST_ENTRY(&sp[i].list);
+        sp[i].u.sh.type = shadow_type;
+        sp[i].u.sh.pinned = 0;
+        sp[i].u.sh.count = 0;
+        sp[i].v.sh.back = backpointer;
+        set_next_shadow(&sp[i], NULL);
         perfc_incr(shadow_alloc_count);
     }
-    return shadow_page_to_mfn(sp);
+    return page_to_mfn(sp);
 }
 
 
 /* Return some shadow pages to the pool. */
 void shadow_free(struct domain *d, mfn_t smfn)
 {
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn); 
+    struct page_info *sp = mfn_to_page(smfn); 
     u32 shadow_type;
     unsigned long order;
     unsigned long mask;
@@ -1581,7 +1585,7 @@ void shadow_free(struct domain *d, mfn_t
     ASSERT(shadow_locked_by_me(d));
     perfc_incr(shadow_free);
 
-    shadow_type = sp->type;
+    shadow_type = sp->u.sh.type;
     ASSERT(shadow_type != SH_type_none);
     ASSERT(shadow_type != SH_type_p2m_table);
     order = shadow_order(shadow_type);
@@ -1605,7 +1609,7 @@ void shadow_free(struct domain *d, mfn_t
         }
 #endif
         /* Strip out the type: this is now a free shadow page */
-        sp[i].type = 0;
+        sp[i].u.sh.type = 0;
         /* Remember the TLB timestamp so we will know whether to flush 
          * TLBs when we reuse the page.  Because the destructors leave the
          * contents of the pages in place, we can delay TLB flushes until
@@ -1618,22 +1622,24 @@ void shadow_free(struct domain *d, mfn_t
     for ( ; order < shadow_max_order(d); ++order )
     {
         mask = 1 << order;
-        if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) {
+        if ( (mfn_x(page_to_mfn(sp)) & mask) ) {
             /* Merge with predecessor block? */
-            if ( ((sp-mask)->type != PGT_none) || ((sp-mask)->order != order) )
+            if ( ((sp-mask)->u.sh.type != PGT_none) ||
+                 ((sp-mask)->v.free.order != order) )
                 break;
-            list_del(&(sp-mask)->list);
             sp -= mask;
+            page_list_del(sp, &d->arch.paging.shadow.freelists[order]);
         } else {
             /* Merge with successor block? */
-            if ( ((sp+mask)->type != PGT_none) || ((sp+mask)->order != order) )
+            if ( ((sp+mask)->u.sh.type != PGT_none) ||
+                 ((sp+mask)->v.free.order != order) )
                 break;
-            list_del(&(sp+mask)->list);
-        }
-    }
-
-    sp->order = order;
-    list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
+            page_list_del(sp + mask, &d->arch.paging.shadow.freelists[order]);
+        }
+    }
+
+    sp->v.free.order = order;
+    page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]);
 }
 
 /* Divert some memory from the pool to be used by the p2m mapping.
@@ -1672,7 +1678,7 @@ sh_alloc_p2m_pages(struct domain *d)
          */
         page_set_owner(&pg[i], d);
         pg[i].count_info = 1;
-        list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist);
+        page_list_add_tail(&pg[i], &d->arch.paging.shadow.p2m_freelist);
     }
     return 1;
 }
@@ -1681,25 +1687,22 @@ static struct page_info *
 static struct page_info *
 shadow_alloc_p2m_page(struct domain *d)
 {
-    struct list_head *entry;
     struct page_info *pg;
     mfn_t mfn;
     void *p;
     
     shadow_lock(d);
 
-    if ( list_empty(&d->arch.paging.shadow.p2m_freelist) &&
+    if ( page_list_empty(&d->arch.paging.shadow.p2m_freelist) &&
          !sh_alloc_p2m_pages(d) )
     {
         shadow_unlock(d);
         return NULL;
     }
-    entry = d->arch.paging.shadow.p2m_freelist.next;
-    list_del(entry);
+    pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist);
 
     shadow_unlock(d);
 
-    pg = list_entry(entry, struct page_info, list);
     mfn = page_to_mfn(pg);
     p = sh_map_domain_page(mfn);
     clear_page(p);
@@ -1780,7 +1783,7 @@ static unsigned int sh_set_allocation(st
                                       unsigned int pages,
                                       int *preempted)
 {
-    struct shadow_page_info *sp;
+    struct page_info *sp;
     unsigned int lower_bound;
     unsigned int j, order = shadow_max_order(d);
 
@@ -1802,7 +1805,7 @@ static unsigned int sh_set_allocation(st
         if ( d->arch.paging.shadow.total_pages < pages ) 
         {
             /* Need to allocate more memory from domheap */
-            sp = (struct shadow_page_info *)
+            sp = (struct page_info *)
                 alloc_domheap_pages(NULL, order, MEMF_node(domain_to_node(d)));
             if ( sp == NULL ) 
             { 
@@ -1813,23 +1816,26 @@ static unsigned int sh_set_allocation(st
             d->arch.paging.shadow.total_pages += 1 << order;
             for ( j = 0; j < 1U << order; j++ )
             {
-                sp[j].type = 0;  
-                sp[j].pinned = 0;
-                sp[j].count = 0;
-                sp[j].mbz = 0;
+                sp[j].u.sh.type = 0;
+                sp[j].u.sh.pinned = 0;
+                sp[j].u.sh.count = 0;
                 sp[j].tlbflush_timestamp = 0; /* Not in any TLB */
             }
-            sp->order = order;
-            list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
+            sp->v.free.order = order;
+            page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]);
         } 
         else if ( d->arch.paging.shadow.total_pages > pages ) 
         {
             /* Need to return memory to domheap */
             _shadow_prealloc(d, order, 1);
-            ASSERT(!list_empty(&d->arch.paging.shadow.freelists[order]));
-            sp = list_entry(d->arch.paging.shadow.freelists[order].next,
-                            struct shadow_page_info, list);
-            list_del(&sp->list);
+            sp = 
page_list_remove_head(&d->arch.paging.shadow.freelists[order]);
+            ASSERT(sp);
+            /*
+             * The pages were allocated anonymously, but the owner field
+             * gets overwritten normally, so need to clear it here.
+             */
+            for ( j = 0; j < 1U << order; j++ )
+                page_set_owner(&((struct page_info *)sp)[j], NULL);
             d->arch.paging.shadow.free_pages -= 1 << order;
             d->arch.paging.shadow.total_pages -= 1 << order;
             free_domheap_pages((struct page_info *)sp, order);
@@ -1880,7 +1886,7 @@ static void sh_hash_audit_bucket(struct 
 static void sh_hash_audit_bucket(struct domain *d, int bucket)
 /* Audit one bucket of the hash table */
 {
-    struct shadow_page_info *sp, *x;
+    struct page_info *sp, *x;
 
     if ( !(SHADOW_AUDIT_ENABLE) )
         return;
@@ -1889,38 +1895,39 @@ static void sh_hash_audit_bucket(struct 
     while ( sp )
     {
         /* Not a shadow? */
-        BUG_ON( sp->mbz != 0 );
+        BUG_ON( sp->count_info != 0 );
         /* Bogus type? */
-        BUG_ON( sp->type == 0 ); 
-        BUG_ON( sp->type > SH_type_max_shadow );
+        BUG_ON( sp->u.sh.type == 0 );
+        BUG_ON( sp->u.sh.type > SH_type_max_shadow );
         /* Wrong bucket? */
-        BUG_ON( sh_hash(sp->backpointer, sp->type) != bucket ); 
+        BUG_ON( sh_hash(sp->v.sh.back, sp->u.sh.type) != bucket );
         /* Duplicate entry? */
-        for ( x = sp->next_shadow; x; x = x->next_shadow )
-            BUG_ON( x->backpointer == sp->backpointer && x->type == sp->type );
+        for ( x = next_shadow(sp); x; x = next_shadow(x) )
+            BUG_ON( x->v.sh.back == sp->v.sh.back &&
+                    x->u.sh.type == sp->u.sh.type );
         /* Follow the backpointer to the guest pagetable */
-        if ( sp->type != SH_type_fl1_32_shadow
-             && sp->type != SH_type_fl1_pae_shadow
-             && sp->type != SH_type_fl1_64_shadow )
-        {
-            struct page_info *gpg = mfn_to_page(_mfn(sp->backpointer));
+        if ( sp->u.sh.type != SH_type_fl1_32_shadow
+             && sp->u.sh.type != SH_type_fl1_pae_shadow
+             && sp->u.sh.type != SH_type_fl1_64_shadow )
+        {
+            struct page_info *gpg = mfn_to_page(_mfn(sp->v.sh.back));
             /* Bad shadow flags on guest page? */
-            BUG_ON( !(gpg->shadow_flags & (1<<sp->type)) );
+            BUG_ON( !(gpg->shadow_flags & (1<<sp->u.sh.type)) );
             /* Bad type count on guest page? */
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
-            if ( sp->type == SH_type_l1_32_shadow
-                 || sp->type == SH_type_l1_pae_shadow
-                 || sp->type == SH_type_l1_64_shadow )
+            if ( sp->u.sh.type == SH_type_l1_32_shadow
+                 || sp->u.sh.type == SH_type_l1_pae_shadow
+                 || sp->u.sh.type == SH_type_l1_64_shadow )
             {
                 if ( (gpg->u.inuse.type_info & PGT_type_mask) == 
PGT_writable_page
                      && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
                 {
                     if ( !page_is_out_of_sync(gpg) )
                     {
-                        SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
+                        SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by 
%#"PRI_mfn")"
                                      " and not OOS but has typecount %#lx\n",
-                                     sp->backpointer, 
-                                     mfn_x(shadow_page_to_mfn(sp)), 
+                                     sp->v.sh.back,
+                                     mfn_x(page_to_mfn(sp)), 
                                      gpg->u.inuse.type_info);
                         BUG();
                     }
@@ -1931,15 +1938,15 @@ static void sh_hash_audit_bucket(struct 
             if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page 
                  && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
             {
-                SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
+                SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")"
                              " but has typecount %#lx\n",
-                             sp->backpointer, mfn_x(shadow_page_to_mfn(sp)), 
+                             sp->v.sh.back, mfn_x(page_to_mfn(sp)),
                              gpg->u.inuse.type_info);
                 BUG();
             }
         }
         /* That entry was OK; on we go */
-        sp = sp->next_shadow;
+        sp = next_shadow(sp);
     }
 }
 
@@ -1972,15 +1979,15 @@ static void sh_hash_audit(struct domain 
  * Returns 0 for success, 1 for error. */
 static int shadow_hash_alloc(struct domain *d)
 {
-    struct shadow_page_info **table;
+    struct page_info **table;
 
     ASSERT(shadow_locked_by_me(d));
     ASSERT(!d->arch.paging.shadow.hash_table);
 
-    table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS);
+    table = xmalloc_array(struct page_info *, SHADOW_HASH_BUCKETS);
     if ( !table ) return 1;
     memset(table, 0, 
-           SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *));
+           SHADOW_HASH_BUCKETS * sizeof (struct page_info *));
     d->arch.paging.shadow.hash_table = table;
     return 0;
 }
@@ -2002,7 +2009,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
  * or INVALID_MFN if it doesn't exist */
 {
     struct domain *d = v->domain;
-    struct shadow_page_info *sp, *prev;
+    struct page_info *sp, *prev;
     key_t key;
 
     ASSERT(shadow_locked_by_me(d));
@@ -2019,21 +2026,21 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
     prev = NULL;
     while(sp)
     {
-        if ( sp->backpointer == n && sp->type == t )
+        if ( sp->v.sh.back == n && sp->u.sh.type == t )
         {
             /* Pull-to-front if 'sp' isn't already the head item */
             if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) )
             {
                 if ( unlikely(d->arch.paging.shadow.hash_walking != 0) )
                     /* Can't reorder: someone is walking the hash chains */
-                    return shadow_page_to_mfn(sp);
+                    return page_to_mfn(sp);
                 else 
                 {
                     ASSERT(prev);
                     /* Delete sp from the list */
                     prev->next_shadow = sp->next_shadow;                    
                     /* Re-insert it at the head of the list */
-                    sp->next_shadow = d->arch.paging.shadow.hash_table[key];
+                    set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]);
                     d->arch.paging.shadow.hash_table[key] = sp;
                 }
             }
@@ -2041,10 +2048,10 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
             {
                 perfc_incr(shadow_hash_lookup_head);
             }
-            return shadow_page_to_mfn(sp);
+            return page_to_mfn(sp);
         }
         prev = sp;
-        sp = sp->next_shadow;
+        sp = next_shadow(sp);
     }
 
     perfc_incr(shadow_hash_lookup_miss);
@@ -2056,7 +2063,7 @@ void shadow_hash_insert(struct vcpu *v, 
 /* Put a mapping (n,t)->smfn into the hash table */
 {
     struct domain *d = v->domain;
-    struct shadow_page_info *sp;
+    struct page_info *sp;
     key_t key;
     
     ASSERT(shadow_locked_by_me(d));
@@ -2070,8 +2077,8 @@ void shadow_hash_insert(struct vcpu *v, 
     sh_hash_audit_bucket(d, key);
     
     /* Insert this shadow at the top of the bucket */
-    sp = mfn_to_shadow_page(smfn);
-    sp->next_shadow = d->arch.paging.shadow.hash_table[key];
+    sp = mfn_to_page(smfn);
+    set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]);
     d->arch.paging.shadow.hash_table[key] = sp;
     
     sh_hash_audit_bucket(d, key);
@@ -2082,7 +2089,7 @@ void shadow_hash_delete(struct vcpu *v, 
 /* Excise the mapping (n,t)->smfn from the hash table */
 {
     struct domain *d = v->domain;
-    struct shadow_page_info *sp, *x;
+    struct page_info *sp, *x;
     key_t key;
 
     ASSERT(shadow_locked_by_me(d));
@@ -2095,10 +2102,10 @@ void shadow_hash_delete(struct vcpu *v, 
     key = sh_hash(n, t);
     sh_hash_audit_bucket(d, key);
     
-    sp = mfn_to_shadow_page(smfn);
+    sp = mfn_to_page(smfn);
     if ( d->arch.paging.shadow.hash_table[key] == sp ) 
         /* Easy case: we're deleting the head item. */
-        d->arch.paging.shadow.hash_table[key] = sp->next_shadow;
+        d->arch.paging.shadow.hash_table[key] = next_shadow(sp);
     else 
     {
         /* Need to search for the one we want */
@@ -2107,15 +2114,15 @@ void shadow_hash_delete(struct vcpu *v, 
         {
             ASSERT(x); /* We can't have hit the end, since our target is
                         * still in the chain somehwere... */
-            if ( x->next_shadow == sp ) 
+            if ( next_shadow(x) == sp )
             {
                 x->next_shadow = sp->next_shadow;
                 break;
             }
-            x = x->next_shadow;
-        }
-    }
-    sp->next_shadow = NULL;
+            x = next_shadow(x);
+        }
+    }
+    set_next_shadow(sp, NULL);
 
     sh_hash_audit_bucket(d, key);
 }
@@ -2137,7 +2144,7 @@ static void hash_foreach(struct vcpu *v,
 {
     int i, done = 0;
     struct domain *d = v->domain;
-    struct shadow_page_info *x;
+    struct page_info *x;
 
     /* Say we're here, to stop hash-lookups reordering the chains */
     ASSERT(shadow_locked_by_me(d));
@@ -2149,14 +2156,14 @@ static void hash_foreach(struct vcpu *v,
         /* WARNING: This is not safe against changes to the hash table.
          * The callback *must* return non-zero if it has inserted or
          * deleted anything from the hash (lookups are OK, though). */
-        for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow )
-        {
-            if ( callback_mask & (1 << x->type) ) 
+        for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) )
+        {
+            if ( callback_mask & (1 << x->u.sh.type) )
             {
-                ASSERT(x->type <= 15);
-                ASSERT(callbacks[x->type] != NULL);
-                done = callbacks[x->type](v, shadow_page_to_mfn(x), 
-                                          callback_mfn);
+                ASSERT(x->u.sh.type <= 15);
+                ASSERT(callbacks[x->u.sh.type] != NULL);
+                done = callbacks[x->u.sh.type](v, page_to_mfn(x),
+                                               callback_mfn);
                 if ( done ) break;
             }
         }
@@ -2173,8 +2180,8 @@ static void hash_foreach(struct vcpu *v,
 
 void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
 {
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
-    unsigned int t = sp->type;
+    struct page_info *sp = mfn_to_page(smfn);
+    unsigned int t = sp->u.sh.type;
 
 
     SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn));
@@ -2186,7 +2193,7 @@ void sh_destroy_shadow(struct vcpu *v, m
            t == SH_type_fl1_64_shadow  || 
            t == SH_type_monitor_table  || 
            (is_pv_32on64_vcpu(v) && t == SH_type_l4_64_shadow) ||
-           (page_get_owner(mfn_to_page(_mfn(sp->backpointer))) 
+           (page_get_owner(mfn_to_page(_mfn(sp->v.sh.back)))
             == v->domain)); 
 
     /* The down-shifts here are so that the switch statement is on nice
@@ -2438,7 +2445,7 @@ int sh_remove_write_access(struct vcpu *
     {
         unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask);
         mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn);
-        int shtype = mfn_to_shadow_page(last_smfn)->type;
+        int shtype = mfn_to_page(last_smfn)->u.sh.type;
 
         if ( callbacks[shtype] ) 
             callbacks[shtype](v, last_smfn, gmfn);
@@ -2481,25 +2488,25 @@ int sh_remove_write_access_from_sl1p(str
 int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn,
                                      mfn_t smfn, unsigned long off)
 {
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+    struct page_info *sp = mfn_to_page(smfn);
     
     ASSERT(mfn_valid(smfn));
     ASSERT(mfn_valid(gmfn));
     
-    if ( sp->type == SH_type_l1_32_shadow
-         || sp->type == SH_type_fl1_32_shadow )
+    if ( sp->u.sh.type == SH_type_l1_32_shadow
+         || sp->u.sh.type == SH_type_fl1_32_shadow )
     {
         return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,2)
             (v, gmfn, smfn, off);
     }
 #if CONFIG_PAGING_LEVELS >= 3
-    else if ( sp->type == SH_type_l1_pae_shadow
-              || sp->type == SH_type_fl1_pae_shadow )
+    else if ( sp->u.sh.type == SH_type_l1_pae_shadow
+              || sp->u.sh.type == SH_type_fl1_pae_shadow )
         return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,3)
             (v, gmfn, smfn, off);
 #if CONFIG_PAGING_LEVELS >= 4
-    else if ( sp->type == SH_type_l1_64_shadow
-              || sp->type == SH_type_fl1_64_shadow )
+    else if ( sp->u.sh.type == SH_type_l1_64_shadow
+              || sp->u.sh.type == SH_type_fl1_64_shadow )
         return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,4)
             (v, gmfn, smfn, off);
 #endif
@@ -2601,17 +2608,17 @@ static int sh_remove_shadow_via_pointer(
 /* Follow this shadow's up-pointer, if it has one, and remove the reference
  * found there.  Returns 1 if that was the only reference to this shadow */
 {
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+    struct page_info *sp = mfn_to_page(smfn);
     mfn_t pmfn;
     void *vaddr;
     int rc;
 
-    ASSERT(sp->type > 0);
-    ASSERT(sp->type < SH_type_max_shadow);
-    ASSERT(sp->type != SH_type_l2_32_shadow);
-    ASSERT(sp->type != SH_type_l2_pae_shadow);
-    ASSERT(sp->type != SH_type_l2h_pae_shadow);
-    ASSERT(sp->type != SH_type_l4_64_shadow);
+    ASSERT(sp->u.sh.type > 0);
+    ASSERT(sp->u.sh.type < SH_type_max_shadow);
+    ASSERT(sp->u.sh.type != SH_type_l2_32_shadow);
+    ASSERT(sp->u.sh.type != SH_type_l2_pae_shadow);
+    ASSERT(sp->u.sh.type != SH_type_l2h_pae_shadow);
+    ASSERT(sp->u.sh.type != SH_type_l4_64_shadow);
     
     if (sp->up == 0) return 0;
     pmfn = _mfn(sp->up >> PAGE_SHIFT);
@@ -2622,10 +2629,10 @@ static int sh_remove_shadow_via_pointer(
     ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
     
     /* Is this the only reference to this shadow? */
-    rc = (sp->count == 1) ? 1 : 0;
+    rc = (sp->u.sh.count == 1) ? 1 : 0;
 
     /* Blank the offending entry */
-    switch (sp->type) 
+    switch (sp->u.sh.type)
     {
     case SH_type_l1_32_shadow:
     case SH_type_l2_32_shadow:
@@ -3156,7 +3163,6 @@ void shadow_teardown(struct domain *d)
 {
     struct vcpu *v;
     mfn_t mfn;
-    struct list_head *entry, *n;
     struct page_info *pg;
 
     ASSERT(d->is_dying);
@@ -3208,12 +3214,8 @@ void shadow_teardown(struct domain *d)
     }
 #endif /* (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) */
 
-    list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist)
-    {
-        list_del(entry);
-        pg = list_entry(entry, struct page_info, list);
+    while ( (pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist)) )
         shadow_free_p2m_page(d, pg);
-    }
 
     if ( d->arch.paging.shadow.total_pages != 0 )
     {
@@ -3657,7 +3659,6 @@ int shadow_track_dirty_vram(struct domai
         for ( i = 0; i < nr; i++ ) {
             mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
             struct page_info *page;
-            u32 count_info;
             int dirty = 0;
             paddr_t sl1ma = d->dirty_vram->sl1ma[i];
 
@@ -3668,8 +3669,7 @@ int shadow_track_dirty_vram(struct domai
             else
             {
                 page = mfn_to_page(mfn);
-                count_info = page->u.inuse.type_info & PGT_count_mask;
-                switch (count_info)
+                switch (page->u.inuse.type_info & PGT_count_mask)
                 {
                 case 0:
                     /* No guest reference, nothing to track. */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/shadow/multi.c    Fri Feb 13 11:22:28 2009 +0900
@@ -973,13 +973,13 @@ static int shadow_set_l2e(struct vcpu *v
         }
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
         {
-            struct shadow_page_info *sp = mfn_to_shadow_page(sl1mfn);
-            mfn_t gl1mfn = _mfn(sp->backpointer);
+            struct page_info *sp = mfn_to_page(sl1mfn);
+            mfn_t gl1mfn = _mfn(sp->v.sh.back);
 
             /* If the shadow is a fl1 then the backpointer contains
                the GFN instead of the GMFN, and it's definitely not
                OOS. */
-            if ( (sp->type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
+            if ( (sp->u.sh.type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
                  && mfn_is_out_of_sync(gl1mfn) )
                 sh_resync(v, gl1mfn);
         }
@@ -1036,9 +1036,8 @@ static inline void shadow_vram_get_l1e(s
     if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) 
{
         unsigned long i = gfn - d->dirty_vram->begin_pfn;
         struct page_info *page = mfn_to_page(mfn);
-        u32 count_info = page->u.inuse.type_info & PGT_count_mask;
         
-        if ( count_info == 1 )
+        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
             /* Initial guest reference, record it */
             d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
                 | ((unsigned long)sl1e & ~PAGE_MASK);
@@ -1064,12 +1063,11 @@ static inline void shadow_vram_put_l1e(s
     if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) 
{
         unsigned long i = gfn - d->dirty_vram->begin_pfn;
         struct page_info *page = mfn_to_page(mfn);
-        u32 count_info = page->u.inuse.type_info & PGT_count_mask;
         int dirty = 0;
         paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
             | ((unsigned long)sl1e & ~PAGE_MASK);
 
-        if ( count_info == 1 ) {
+        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 ) {
             /* Last reference */
             if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) {
                 /* We didn't know it was that one, let's say it is dirty */
@@ -1194,8 +1192,8 @@ do {                                    
 do {                                                                    \
     int _i;                                                             \
     shadow_l1e_t *_sp = sh_map_domain_page((_sl1mfn));                  \
-    ASSERT(mfn_to_shadow_page(_sl1mfn)->type == SH_type_l1_shadow       \
-           || mfn_to_shadow_page(_sl1mfn)->type == SH_type_fl1_shadow); \
+    ASSERT(mfn_to_page(_sl1mfn)->u.sh.type == SH_type_l1_shadow  \
+           || mfn_to_page(_sl1mfn)->u.sh.type == SH_type_fl1_shadow);\
     for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ )              \
     {                                                                   \
         (_sl1e) = _sp + _i;                                             \
@@ -1232,7 +1230,7 @@ do {                                    
 do {                                                                      \
     int _i, _j, __done = 0;                                               \
     int _xen = !shadow_mode_external(_dom);                               \
-    ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_32_shadow);    \
+    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_32_shadow);\
     for ( _j = 0; _j < 4 && !__done; _j++ )                               \
     {                                                                     \
         shadow_l2e_t *_sp = sh_map_domain_page(_sl2mfn);                  \
@@ -1260,11 +1258,11 @@ do {                                    
     int _i;                                                                \
     int _xen = !shadow_mode_external(_dom);                                \
     shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn));                     \
-    ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_pae_shadow      \
-           || mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_pae_shadow);\
+    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_pae_shadow \
+           || mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow);\
     for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
         if ( (!(_xen))                                                     \
-             || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_pae_shadow\
+             || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_pae_shadow\
              || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES))                  \
                  < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
         {                                                                  \
@@ -1285,13 +1283,13 @@ do {                                    
     int _i;                                                                 \
     int _xen = !shadow_mode_external(_dom);                                 \
     shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn));                      \
-    ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_64_shadow ||     \
-           mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_64_shadow);     \
+    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_64_shadow ||\
+           mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_64_shadow);\
     for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                  \
     {                                                                       \
         if ( (!(_xen))                                                      \
              || !is_pv_32on64_domain(_dom)                                  \
-             || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_64_shadow  \
+             || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_64_shadow\
              || (_i < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_dom)) )           \
         {                                                                   \
             (_sl2e) = _sp + _i;                                             \
@@ -1313,7 +1311,7 @@ do {                                    
 do {                                                                    \
     int _i;                                                             \
     shadow_l3e_t *_sp = sh_map_domain_page((_sl3mfn));                  \
-    ASSERT(mfn_to_shadow_page(_sl3mfn)->type == SH_type_l3_64_shadow);  \
+    ASSERT(mfn_to_page(_sl3mfn)->u.sh.type == SH_type_l3_64_shadow);\
     for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ )              \
     {                                                                   \
         (_sl3e) = _sp + _i;                                             \
@@ -1331,7 +1329,7 @@ do {                                    
     shadow_l4e_t *_sp = sh_map_domain_page((_sl4mfn));                  \
     int _xen = !shadow_mode_external(_dom);                             \
     int _i;                                                             \
-    ASSERT(mfn_to_shadow_page(_sl4mfn)->type == SH_type_l4_64_shadow);  \
+    ASSERT(mfn_to_page(_sl4mfn)->u.sh.type == SH_type_l4_64_shadow);\
     for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ )              \
     {                                                                   \
         if ( (!(_xen)) || is_guest_l4_slot(_dom, _i) )                  \
@@ -1506,7 +1504,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
          && shadow_type != SH_type_l2h_pae_shadow 
          && shadow_type != SH_type_l4_64_shadow )
         /* Lower-level shadow, not yet linked form a higher level */
-        mfn_to_shadow_page(smfn)->up = 0;
+        mfn_to_page(smfn)->up = 0;
 
 #if GUEST_PAGING_LEVELS == 4
 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 
@@ -1519,14 +1517,12 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
          * of them, decide that this isn't an old linux guest, and stop
          * pinning l3es.  This is not very quick but it doesn't happen
          * very often. */
-        struct list_head *l, *t;
-        struct shadow_page_info *sp;
+        struct page_info *sp, *t;
         struct vcpu *v2;
         int l4count = 0, vcpus = 0;
-        list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows)
-        {
-            sp = list_entry(l, struct shadow_page_info, list);
-            if ( sp->type == SH_type_l4_64_shadow )
+        page_list_for_each(sp, &v->domain->arch.paging.shadow.pinned_shadows)
+        {
+            if ( sp->u.sh.type == SH_type_l4_64_shadow )
                 l4count++;
         }
         for_each_vcpu ( v->domain, v2 ) 
@@ -1534,11 +1530,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
         if ( l4count > 2 * vcpus ) 
         {
             /* Unpin all the pinned l3 tables, and don't pin any more. */
-            list_for_each_safe(l, t, 
&v->domain->arch.paging.shadow.pinned_shadows)
+            page_list_for_each_safe(sp, t, 
&v->domain->arch.paging.shadow.pinned_shadows)
             {
-                sp = list_entry(l, struct shadow_page_info, list);
-                if ( sp->type == SH_type_l3_64_shadow )
-                    sh_unpin(v, shadow_page_to_mfn(sp));
+                if ( sp->u.sh.type == SH_type_l3_64_shadow )
+                    sh_unpin(v, page_to_mfn(sp));
             }
             v->domain->arch.paging.shadow.opt_flags &= 
~SHOPT_LINUX_L3_TOPLEVEL;
         }
@@ -1921,7 +1916,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
 void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
 {
     shadow_l4e_t *sl4e;
-    u32 t = mfn_to_shadow_page(smfn)->type;
+    u32 t = mfn_to_page(smfn)->u.sh.type;
     mfn_t gmfn, sl4mfn;
 
     SHADOW_DEBUG(DESTROY_SHADOW,
@@ -1929,7 +1924,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
     ASSERT(t == SH_type_l4_shadow);
 
     /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
     /* Decrement refcounts of all the old entries */
@@ -1950,7 +1945,7 @@ void sh_destroy_l3_shadow(struct vcpu *v
 void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
 {
     shadow_l3e_t *sl3e;
-    u32 t = mfn_to_shadow_page(smfn)->type;
+    u32 t = mfn_to_page(smfn)->u.sh.type;
     mfn_t gmfn, sl3mfn;
 
     SHADOW_DEBUG(DESTROY_SHADOW,
@@ -1958,7 +1953,7 @@ void sh_destroy_l3_shadow(struct vcpu *v
     ASSERT(t == SH_type_l3_shadow);
 
     /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
 
@@ -1980,7 +1975,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
 void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
 {
     shadow_l2e_t *sl2e;
-    u32 t = mfn_to_shadow_page(smfn)->type;
+    u32 t = mfn_to_page(smfn)->u.sh.type;
     mfn_t gmfn, sl2mfn;
 
     SHADOW_DEBUG(DESTROY_SHADOW,
@@ -1993,7 +1988,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
 #endif
 
     /* Record that the guest page isn't shadowed any more (in this type) */
-    gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
 
@@ -2014,7 +2009,7 @@ void sh_destroy_l1_shadow(struct vcpu *v
 {
     struct domain *d = v->domain;
     shadow_l1e_t *sl1e;
-    u32 t = mfn_to_shadow_page(smfn)->type;
+    u32 t = mfn_to_page(smfn)->u.sh.type;
 
     SHADOW_DEBUG(DESTROY_SHADOW,
                   "%s(%05lx)\n", __func__, mfn_x(smfn));
@@ -2023,12 +2018,12 @@ void sh_destroy_l1_shadow(struct vcpu *v
     /* Record that the guest page isn't shadowed any more (in this type) */
     if ( t == SH_type_fl1_shadow )
     {
-        gfn_t gfn = _gfn(mfn_to_shadow_page(smfn)->backpointer);
+        gfn_t gfn = _gfn(mfn_to_page(smfn)->v.sh.back);
         delete_fl1_shadow_status(v, gfn, smfn);
     }
     else 
     {
-        mfn_t gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+        mfn_t gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
         delete_shadow_status(v, gmfn, t, smfn);
         shadow_demote(v, gmfn, t);
     }
@@ -2054,7 +2049,7 @@ void sh_destroy_monitor_table(struct vcp
 void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
 {
     struct domain *d = v->domain;
-    ASSERT(mfn_to_shadow_page(mmfn)->type == SH_type_monitor_table);
+    ASSERT(mfn_to_page(mmfn)->u.sh.type == SH_type_monitor_table);
 
 #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
     {
@@ -2298,7 +2293,7 @@ static int validate_gl2e(struct vcpu *v,
 
 #if SHADOW_PAGING_LEVELS == 3
         reserved_xen_slot = 
-            ((mfn_to_shadow_page(sl2mfn)->type == SH_type_l2h_pae_shadow) &&
+            ((mfn_to_page(sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow) &&
              (shadow_index 
               >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))));
 #else /* SHADOW_PAGING_LEVELS == 2 */
@@ -2352,7 +2347,7 @@ static int validate_gl1e(struct vcpu *v,
     result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
-    gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+    gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
     if ( mfn_valid(gl1mfn) 
          && mfn_is_out_of_sync(gl1mfn) )
     {
@@ -2429,30 +2424,30 @@ void sh_resync_l1(struct vcpu *v, mfn_t 
  *      called in the *mode* of the vcpu that unsynced it.  Clear?  Good. */
 int sh_safe_not_to_sync(struct vcpu *v, mfn_t gl1mfn)
 {
-    struct shadow_page_info *sp;
+    struct page_info *sp;
     mfn_t smfn;
 
     smfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
     ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */
     
     /* Up to l2 */
-    sp = mfn_to_shadow_page(smfn);
-    if ( sp->count != 1 || !sp->up )
+    sp = mfn_to_page(smfn);
+    if ( sp->u.sh.count != 1 || !sp->up )
         return 0;
     smfn = _mfn(sp->up >> PAGE_SHIFT);
     ASSERT(mfn_valid(smfn));
 
 #if (SHADOW_PAGING_LEVELS == 4) 
     /* up to l3 */
-    sp = mfn_to_shadow_page(smfn);
-    if ( sp->count != 1 || !sp->up )
+    sp = mfn_to_page(smfn);
+    if ( sp->u.sh.count != 1 || !sp->up )
         return 0;
     smfn = _mfn(sp->up >> PAGE_SHIFT);
     ASSERT(mfn_valid(smfn));
 
     /* up to l4 */
-    sp = mfn_to_shadow_page(smfn);
-    if ( sp->count != 1 
+    sp = mfn_to_page(smfn);
+    if ( sp->u.sh.count != 1
          || sh_type_is_pinnable(v, SH_type_l3_64_shadow) || !sp->up )
         return 0;
     smfn = _mfn(sp->up >> PAGE_SHIFT);
@@ -2970,8 +2965,8 @@ static int sh_page_fault(struct vcpu *v,
                                         + shadow_l2_linear_offset(va)),
                                        sizeof(sl2e)) != 0)
                      || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT)
-                     || !mfn_valid(gl1mfn = _mfn(mfn_to_shadow_page(
-                                      shadow_l2e_get_mfn(sl2e))->backpointer))
+                     || !mfn_valid(gl1mfn = _mfn(mfn_to_page(
+                                      shadow_l2e_get_mfn(sl2e))->v.sh.back))
                      || unlikely(mfn_is_out_of_sync(gl1mfn)) )
                {
                    /* Hit the slow path as if there had been no 
@@ -3523,7 +3518,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
     // easier than invalidating all of the individual 4K pages).
     //
     sl1mfn = shadow_l2e_get_mfn(sl2e);
-    if ( mfn_to_shadow_page(sl1mfn)->type
+    if ( mfn_to_page(sl1mfn)->u.sh.type
          == SH_type_fl1_shadow )
     {
         flush_tlb_local();
@@ -3533,7 +3528,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
     /* Check to see if the SL1 is out of sync. */
     {
-        mfn_t gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+        mfn_t gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
         struct page_info *pg = mfn_to_page(gl1mfn);
         if ( mfn_valid(gl1mfn) 
              && page_is_out_of_sync(pg) )
@@ -3563,7 +3558,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
             }
 
             sl1mfn = shadow_l2e_get_mfn(sl2e);
-            gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+            gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
             pg = mfn_to_page(gl1mfn);
             
             if ( likely(sh_mfn_is_a_page_table(gl1mfn)
@@ -3968,7 +3963,7 @@ sh_set_toplevel_shadow(struct vcpu *v,
         /* Need to repin the old toplevel shadow if it's been unpinned
          * by shadow_prealloc(): in PV mode we're still running on this
          * shadow and it's not safe to free it yet. */
-        if ( !mfn_to_shadow_page(old_smfn)->pinned && !sh_pin(v, old_smfn) )
+        if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(v, old_smfn) )
         {
             SHADOW_ERROR("can't re-pin %#lx\n", mfn_x(old_smfn));
             domain_crash(v->domain);
@@ -4262,16 +4257,16 @@ int sh_rm_write_access_from_sl1p(struct 
 {
     int r;
     shadow_l1e_t *sl1p, sl1e;
-    struct shadow_page_info *sp;
+    struct page_info *sp;
 
     ASSERT(mfn_valid(gmfn));
     ASSERT(mfn_valid(smfn));
 
-    sp = mfn_to_shadow_page(smfn);
-
-    if ( sp->mbz != 0
-         || (sp->type != SH_type_l1_shadow
-             && sp->type != SH_type_fl1_shadow) )
+    sp = mfn_to_page(smfn);
+
+    if ( sp->count_info != 0
+         || (sp->u.sh.type != SH_type_l1_shadow
+             && sp->u.sh.type != SH_type_fl1_shadow) )
         goto fail;
 
     sl1p = sh_map_domain_page(smfn);
@@ -4410,7 +4405,7 @@ void sh_clear_shadow_entry(struct vcpu *
 void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn)
 /* Blank out a single shadow entry */
 {
-    switch ( mfn_to_shadow_page(smfn)->type )
+    switch ( mfn_to_page(smfn)->u.sh.type )
     {
     case SH_type_l1_shadow:
         (void) shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break;
@@ -4443,7 +4438,7 @@ int sh_remove_l1_shadow(struct vcpu *v, 
              && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) )
         {
             (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
-            if ( mfn_to_shadow_page(sl1mfn)->type == 0 )
+            if ( mfn_to_page(sl1mfn)->u.sh.type == 0 )
                 /* This breaks us cleanly out of the FOREACH macro */
                 done = 1;
         }
@@ -4466,7 +4461,7 @@ int sh_remove_l2_shadow(struct vcpu *v, 
              && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) )
         {
             (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
-            if ( mfn_to_shadow_page(sl2mfn)->type == 0 )
+            if ( mfn_to_page(sl2mfn)->u.sh.type == 0 )
                 /* This breaks us cleanly out of the FOREACH macro */
                 done = 1;
         }
@@ -4488,7 +4483,7 @@ int sh_remove_l3_shadow(struct vcpu *v, 
              && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) )
         {
             (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
-            if ( mfn_to_shadow_page(sl3mfn)->type == 0 )
+            if ( mfn_to_page(sl3mfn)->u.sh.type == 0 )
                 /* This breaks us cleanly out of the FOREACH macro */
                 done = 1;
         }
@@ -4890,7 +4885,7 @@ int sh_audit_l1_table(struct vcpu *v, mf
     int done = 0;
     
     /* Follow the backpointer */
-    gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+    gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     /* Out-of-sync l1 shadows can contain anything: just check the OOS hash */
@@ -4980,7 +4975,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
     int done = 0;
 
     /* Follow the backpointer */
-    gl2mfn = _mfn(mfn_to_shadow_page(sl2mfn)->backpointer);
+    gl2mfn = _mfn(mfn_to_page(sl2mfn)->v.sh.back);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     /* Only L1's may be out of sync. */
@@ -5029,7 +5024,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
     int done = 0;
 
     /* Follow the backpointer */
-    gl3mfn = _mfn(mfn_to_shadow_page(sl3mfn)->backpointer);
+    gl3mfn = _mfn(mfn_to_page(sl3mfn)->v.sh.back);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
     /* Only L1's may be out of sync. */
@@ -5076,7 +5071,7 @@ int sh_audit_l4_table(struct vcpu *v, mf
     int done = 0;
 
     /* Follow the backpointer */
-    gl4mfn = _mfn(mfn_to_shadow_page(sl4mfn)->backpointer);
+    gl4mfn = _mfn(mfn_to_page(sl4mfn)->v.sh.back);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
     /* Only L1's may be out of sync. */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/shadow/private.h  Fri Feb 13 11:22:28 2009 +0900
@@ -220,60 +220,6 @@ extern void shadow_audit_tables(struct v
 #undef GUEST_LEVELS
 #endif /* CONFIG_PAGING_LEVELS == 4 */
 
-/******************************************************************************
- * Page metadata for shadow pages.
- */
-
-struct shadow_page_info
-{
-    union {
-        /* Ensures that shadow_page_info is same size as page_info. */
-        struct page_info page_info;
-
-        struct {
-            union {
-                /* When in use, guest page we're a shadow of */
-                unsigned long backpointer;
-                /* When free, order of the freelist we're on */
-                unsigned int order;
-            };
-            union {
-                /* When in use, next shadow in this hash chain */
-                struct shadow_page_info *next_shadow;
-                /* When free, TLB flush time when freed */
-                u32 tlbflush_timestamp;
-            };
-            struct {
-                unsigned long mbz;     /* Must be zero: count_info is here. */
-                unsigned long type:5;   /* What kind of shadow is this? */
-                unsigned long pinned:1; /* Is the shadow pinned? */
-                unsigned long count:26; /* Reference count */
-            } __attribute__((packed));
-            union {
-                /* For unused shadow pages, a list of pages of this order; for 
-                 * pinnable shadows, if pinned, a list of other pinned shadows
-                 * (see sh_type_is_pinnable() below for the definition of 
-                 * "pinnable" shadow types). */
-                struct list_head list;
-                /* For non-pinnable shadows, a higher entry that points
-                 * at us. */
-                paddr_t up;
-            };
-        };
-    };
-};
-
-/* The structure above *must* be no larger than a struct page_info
- * from mm.h, since we'll be using the same space in the frametable. 
- * Also, the mbz field must line up with the count_info field of normal 
- * pages, so they cannot be successfully get_page()d. */
-static inline void shadow_check_page_struct_offsets(void) {
-    BUILD_BUG_ON(sizeof (struct shadow_page_info) !=
-                 sizeof (struct page_info));
-    BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) !=
-                 offsetof(struct page_info, count_info));
-};
-
 /* Shadow type codes */
 #define SH_type_none           (0U) /* on the shadow free list */
 #define SH_type_min_shadow     (1U)
@@ -528,22 +474,13 @@ mfn_t oos_snapshot_lookup(struct vcpu *v
  * MFN/page-info handling 
  */
 
-// Override mfn_to_page from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
+/* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
-#define mfn_to_shadow_page(_m) ((struct shadow_page_info *)mfn_to_page(_m))
-
-// Override page_to_mfn from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
+#undef mfn_valid
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
 #undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
-#define shadow_page_to_mfn(_spg) (page_to_mfn((struct page_info *)_spg))
-
-// Override mfn_valid from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
-#undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
 /* Override pagetable_t <-> struct page_info conversions to work with mfn_t */
 #undef pagetable_get_page
@@ -675,26 +612,26 @@ static inline int sh_get_ref(struct vcpu
 static inline int sh_get_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
 {
     u32 x, nx;
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+    struct page_info *sp = mfn_to_page(smfn);
 
     ASSERT(mfn_valid(smfn));
 
-    x = sp->count;
+    x = sp->u.sh.count;
     nx = x + 1;
 
     if ( unlikely(nx >= 1U<<26) )
     {
-        SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
-                       sp->backpointer, mfn_x(smfn));
+        SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRpgmfn " smfn=%lx\n",
+                       sp->v.sh.back, mfn_x(smfn));
         return 0;
     }
     
     /* Guarded by the shadow lock, so no need for atomic update */
-    sp->count = nx;
+    sp->u.sh.count = nx;
 
     /* We remember the first shadow entry that points to each shadow. */
     if ( entry_pa != 0 
-         && !sh_type_is_pinnable(v, sp->type) 
+         && !sh_type_is_pinnable(v, sp->u.sh.type)
          && sp->up == 0 ) 
         sp->up = entry_pa;
     
@@ -707,29 +644,29 @@ static inline void sh_put_ref(struct vcp
 static inline void sh_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
 {
     u32 x, nx;
-    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+    struct page_info *sp = mfn_to_page(smfn);
 
     ASSERT(mfn_valid(smfn));
-    ASSERT(sp->mbz == 0);
+    ASSERT(sp->count_info == 0);
 
     /* If this is the entry in the up-pointer, remove it */
     if ( entry_pa != 0 
-         && !sh_type_is_pinnable(v, sp->type) 
+         && !sh_type_is_pinnable(v, sp->u.sh.type)
          && sp->up == entry_pa ) 
         sp->up = 0;
 
-    x = sp->count;
+    x = sp->u.sh.count;
     nx = x - 1;
 
     if ( unlikely(x == 0) ) 
     {
         SHADOW_ERROR("shadow ref underflow, smfn=%lx oc=%08x t=%#x\n",
-                     mfn_x(smfn), sp->count, sp->type);
+                     mfn_x(smfn), sp->u.sh.count, sp->u.sh.type);
         BUG();
     }
 
     /* Guarded by the shadow lock, so no need for atomic update */
-    sp->count = nx;
+    sp->u.sh.count = nx;
 
     if ( unlikely(nx == 0) ) 
         sh_destroy_shadow(v, smfn);
@@ -741,26 +678,26 @@ static inline void sh_put_ref(struct vcp
  * Returns 0 for failure, 1 for success. */
 static inline int sh_pin(struct vcpu *v, mfn_t smfn)
 {
-    struct shadow_page_info *sp;
+    struct page_info *sp;
     
     ASSERT(mfn_valid(smfn));
-    sp = mfn_to_shadow_page(smfn);
-    ASSERT(sh_type_is_pinnable(v, sp->type));
-    if ( sp->pinned ) 
+    sp = mfn_to_page(smfn);
+    ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
+    if ( sp->u.sh.pinned )
     {
         /* Already pinned: take it out of the pinned-list so it can go 
          * at the front */
-        list_del(&sp->list);
+        page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows);
     }
     else
     {
         /* Not pinned: pin it! */
         if ( !sh_get_ref(v, smfn, 0) )
             return 0;
-        sp->pinned = 1;
+        sp->u.sh.pinned = 1;
     }
     /* Put it at the head of the list of pinned shadows */
-    list_add(&sp->list, &v->domain->arch.paging.shadow.pinned_shadows);
+    page_list_add(sp, &v->domain->arch.paging.shadow.pinned_shadows);
     return 1;
 }
 
@@ -768,15 +705,15 @@ static inline int sh_pin(struct vcpu *v,
  * of pinned shadows, and release the extra ref. */
 static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
 {
-    struct shadow_page_info *sp;
+    struct page_info *sp;
     
     ASSERT(mfn_valid(smfn));
-    sp = mfn_to_shadow_page(smfn);
-    ASSERT(sh_type_is_pinnable(v, sp->type));
-    if ( sp->pinned )
+    sp = mfn_to_page(smfn);
+    ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
+    if ( sp->u.sh.pinned )
     {
-        sp->pinned = 0;
-        list_del(&sp->list);
+        sp->u.sh.pinned = 0;
+        page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows);
         sp->up = 0; /* in case this stops being a pinnable type in future */
         sh_put_ref(v, smfn, 0);
     }
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/numa.c
--- a/xen/arch/x86/numa.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/numa.c       Fri Feb 13 11:22:28 2009 +0900
@@ -312,7 +312,7 @@ static void dump_numa(unsigned char key)
                for_each_online_node(i)
                        page_num_node[i] = 0;
 
-               list_for_each_entry(page, &d->page_list, list)
+               page_list_for_each(page, &d->page_list)
                {
                        i = phys_to_nid(page_to_mfn(page) << PAGE_SHIFT);
                        page_num_node[i]++;
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/physdev.c    Fri Feb 13 11:22:28 2009 +0900
@@ -62,7 +62,7 @@ static int physdev_map_pirq(struct physd
                 ret = -EINVAL;
                 goto free_domain;
             }
-            vector = IO_APIC_VECTOR(map->index);
+            vector = domain_irq_to_vector(current->domain, map->index);
             if ( !vector )
             {
                 dprintk(XENLOG_G_ERR, "dom%d: map irq with no vector %d\n",
@@ -75,7 +75,7 @@ static int physdev_map_pirq(struct physd
         case MAP_PIRQ_TYPE_MSI:
             vector = map->index;
             if ( vector == -1 )
-                vector = assign_irq_vector(AUTO_ASSIGN);
+                vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
 
             if ( vector < 0 || vector >= NR_VECTORS )
             {
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/setup.c      Fri Feb 13 11:22:28 2009 +0900
@@ -417,7 +417,7 @@ void __init __start_xen(unsigned long mb
     unsigned int initrdidx = 1;
     multiboot_info_t *mbi = __va(mbi_p);
     module_t *mod = (module_t *)__va(mbi->mods_addr);
-    unsigned long nr_pages, modules_length, modules_headroom = -1;
+    unsigned long nr_pages, modules_length, modules_headroom;
     unsigned long allocator_bitmap_end;
     int i, e820_warn = 0, bytes = 0;
     struct ns16550_defaults ns16550 = {
@@ -618,6 +618,12 @@ void __init __start_xen(unsigned long mb
      */
     modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
 
+    /* ensure mod[0] is mapped before parsing */
+    bootstrap_map(mod[0].mod_start, mod[0].mod_end);
+    modules_headroom = bzimage_headroom(
+                      (char *)(unsigned long)mod[0].mod_start,
+                      (unsigned long)(mod[0].mod_end - mod[0].mod_start));
+
     for ( i = boot_e820.nr_map-1; i >= 0; i-- )
     {
         uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
@@ -636,7 +642,8 @@ void __init __start_xen(unsigned long mb
             s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
 
 #if defined(CONFIG_X86_64)
-#define reloc_size ((__pa(&_end) + mask) & ~mask)
+/* Relocate Xen image, allocation bitmap, and one page of padding. */
+#define reloc_size ((__pa(&_end) + max_page/8 + PAGE_SIZE + mask) & ~mask)
         /* Is the region suitable for relocating Xen? */
         if ( !xen_phys_start && ((e-s) >= reloc_size) )
         {
@@ -721,11 +728,6 @@ void __init __start_xen(unsigned long mb
         }
 #endif
 
-        if ( modules_headroom == -1 )
-            modules_headroom = bzimage_headroom(
-                      (char *)(unsigned long)mod[0].mod_start,
-                      (unsigned long)(mod[0].mod_end - mod[0].mod_start));
-
         /* Is the region suitable for relocating the multiboot modules? */
         if ( !initial_images_start && (s < e) &&
              ((e-s) >= (modules_length+modules_headroom)) )
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/smpboot.c    Fri Feb 13 11:22:28 2009 +0900
@@ -1256,8 +1256,6 @@ int __cpu_disable(void)
        mdelay(1);
        local_irq_disable();
 
-       cpufreq_del_cpu(cpu);
-
        time_suspend();
 
        cpu_mcheck_disable();
@@ -1320,6 +1318,8 @@ int cpu_down(unsigned int cpu)
        }
 
        printk("Prepare to bring CPU%d down...\n", cpu);
+
+       cpufreq_del_cpu(cpu);
 
        err = stop_machine_run(take_cpu_down, NULL, cpu);
        if (err < 0)
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_32/xen.lds.S
--- a/xen/arch/x86/x86_32/xen.lds.S     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/x86_32/xen.lds.S     Fri Feb 13 11:22:28 2009 +0900
@@ -91,6 +91,7 @@ SECTIONS
        *(.exit.text)
        *(.exit.data)
        *(.exitcall.exit)
+       *(.eh_frame)
        }
 
   /* Stabs debugging sections.  */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/x86_64/entry.S       Fri Feb 13 11:22:28 2009 +0900
@@ -739,7 +739,6 @@ ENTRY(hypercall_args_table)
         .byte 1 /* do_sysctl            */  /* 35 */
         .byte 1 /* do_domctl            */
         .byte 2 /* do_kexec             */
-        .byte 1 /* do_xsm_op            */
         .rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_64/xen.lds.S
--- a/xen/arch/x86/x86_64/xen.lds.S     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/x86_64/xen.lds.S     Fri Feb 13 11:22:28 2009 +0900
@@ -89,6 +89,7 @@ SECTIONS
        *(.exit.text)
        *(.exit.data)
        *(.exitcall.exit)
+       *(.eh_frame)
        }
 
   /* Stabs debugging sections.  */
diff -r af992824b5cf -r c7cba853583d xen/common/domain.c
--- a/xen/common/domain.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/domain.c       Fri Feb 13 11:22:28 2009 +0900
@@ -41,7 +41,6 @@ boolean_param("dom0_vcpus_pin", opt_dom0
 
 /* set xen as default cpufreq */
 enum cpufreq_controller cpufreq_controller = FREQCTL_xen;
-struct cpufreq_governor *cpufreq_opt_governor;
 
 static void __init setup_cpufreq_option(char *str)
 {
@@ -70,19 +69,6 @@ static void __init setup_cpufreq_option(
             cpufreq_cmdline_parse(arg);
 }
 custom_param("cpufreq", setup_cpufreq_option);
-
-static void __init setup_cpufreq_gov_option(char *str)
-{
-    if ( !strcmp(str, "userspace") )
-        cpufreq_opt_governor = &cpufreq_gov_userspace;
-    else if ( !strcmp(str, "performance") )
-        cpufreq_opt_governor = &cpufreq_gov_performance;
-    else if ( !strcmp(str, "powersave") )
-        cpufreq_opt_governor = &cpufreq_gov_powersave;
-    else if ( !strcmp(str, "ondemand") )
-        cpufreq_opt_governor = &cpufreq_gov_dbs;
-}
-custom_param("cpufreq_governor", setup_cpufreq_gov_option);
 
 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
 DEFINE_SPINLOCK(domlist_update_lock);
@@ -233,8 +219,8 @@ struct domain *domain_create(
     spin_lock_init(&d->page_alloc_lock);
     spin_lock_init(&d->shutdown_lock);
     spin_lock_init(&d->hypercall_deadlock_mutex);
-    INIT_LIST_HEAD(&d->page_list);
-    INIT_LIST_HEAD(&d->xenpage_list);
+    INIT_PAGE_LIST_HEAD(&d->page_list);
+    INIT_PAGE_LIST_HEAD(&d->xenpage_list);
 
     if ( domcr_flags & DOMCRF_hvm )
         d->is_hvm = 1;
diff -r af992824b5cf -r c7cba853583d xen/common/grant_table.c
--- a/xen/common/grant_table.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/grant_table.c  Fri Feb 13 11:22:28 2009 +0900
@@ -1192,7 +1192,7 @@ gnttab_transfer(
         /* Okay, add the page to 'e'. */
         if ( unlikely(e->tot_pages++ == 0) )
             get_knownalive_domain(e);
-        list_add_tail(&page->list, &e->page_list);
+        page_list_add_tail(page, &e->page_list);
         page_set_owner(page, e);
 
         spin_unlock(&e->page_alloc_lock);
diff -r af992824b5cf -r c7cba853583d xen/common/hvm/save.c
--- a/xen/common/hvm/save.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/hvm/save.c     Fri Feb 13 11:22:28 2009 +0900
@@ -26,6 +26,7 @@
 #include <xen/version.h>
 #include <public/version.h>
 #include <xen/sched.h>
+#include <xen/guest_access.h>
 
 #include <asm/hvm/support.h>
 
@@ -75,6 +76,53 @@ size_t hvm_save_size(struct domain *d)
     return sz;
 }
 
+/* Extract a single instance of a save record, by marshalling all
+ * records of that type and copying out the one we need. */
+int hvm_save_one(struct domain *d, uint16_t typecode, uint16_t instance, 
+                 XEN_GUEST_HANDLE_64(uint8) handle)
+{
+    int rv = 0;
+    size_t sz = 0;
+    struct vcpu *v;
+    hvm_domain_context_t ctxt = { 0, };
+
+    if ( d->is_dying 
+         || typecode > HVM_SAVE_CODE_MAX 
+         || hvm_sr_handlers[typecode].size < sizeof(struct hvm_save_descriptor)
+         || hvm_sr_handlers[typecode].save == NULL )
+        return -EINVAL;
+
+    if ( hvm_sr_handlers[typecode].kind == HVMSR_PER_VCPU )
+        for_each_vcpu(d, v)
+            sz += hvm_sr_handlers[typecode].size;
+    else 
+        sz = hvm_sr_handlers[typecode].size;
+    
+    if ( (instance + 1) * hvm_sr_handlers[typecode].size > sz )
+        return -EINVAL;
+
+    ctxt.size = sz;
+    ctxt.data = xmalloc_bytes(sz);
+    if ( !ctxt.data )
+        return -ENOMEM;
+
+    if ( hvm_sr_handlers[typecode].save(d, &ctxt) != 0 )
+    {
+        gdprintk(XENLOG_ERR, 
+                 "HVM save: failed to save type %"PRIu16"\n", typecode);
+        rv = -EFAULT;
+    }
+    else if ( copy_to_guest(handle,
+                            ctxt.data 
+                            + (instance * hvm_sr_handlers[typecode].size) 
+                            + sizeof (struct hvm_save_descriptor), 
+                            hvm_sr_handlers[typecode].size
+                            - sizeof (struct hvm_save_descriptor)) )
+        rv = -EFAULT;
+
+    xfree(ctxt.data);
+    return rv;
+}
 
 int hvm_save(struct domain *d, hvm_domain_context_t *h)
 {
diff -r af992824b5cf -r c7cba853583d xen/common/memory.c
--- a/xen/common/memory.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/memory.c       Fri Feb 13 11:22:28 2009 +0900
@@ -218,8 +218,8 @@ static long memory_exchange(XEN_GUEST_HA
 static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
 {
     struct xen_memory_exchange exch;
-    LIST_HEAD(in_chunk_list);
-    LIST_HEAD(out_chunk_list);
+    PAGE_LIST_HEAD(in_chunk_list);
+    PAGE_LIST_HEAD(out_chunk_list);
     unsigned long in_chunk_order, out_chunk_order;
     xen_pfn_t     gpfn, gmfn, mfn;
     unsigned long i, j, k;
@@ -325,7 +325,7 @@ static long memory_exchange(XEN_GUEST_HA
                     goto fail;
                 }
 
-                list_add(&page->list, &in_chunk_list);
+                page_list_add(page, &in_chunk_list);
             }
         }
 
@@ -339,7 +339,7 @@ static long memory_exchange(XEN_GUEST_HA
                 goto fail;
             }
 
-            list_add(&page->list, &out_chunk_list);
+            page_list_add(page, &out_chunk_list);
         }
 
         /*
@@ -347,10 +347,8 @@ static long memory_exchange(XEN_GUEST_HA
          */
 
         /* Destroy final reference to each input page. */
-        while ( !list_empty(&in_chunk_list) )
-        {
-            page = list_entry(in_chunk_list.next, struct page_info, list);
-            list_del(&page->list);
+        while ( (page = page_list_remove_head(&in_chunk_list)) )
+        {
             if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
                 BUG();
             mfn = page_to_mfn(page);
@@ -360,10 +358,8 @@ static long memory_exchange(XEN_GUEST_HA
 
         /* Assign each output page to the domain. */
         j = 0;
-        while ( !list_empty(&out_chunk_list) )
-        {
-            page = list_entry(out_chunk_list.next, struct page_info, list);
-            list_del(&page->list);
+        while ( (page = page_list_remove_head(&out_chunk_list)) )
+        {
             if ( assign_pages(d, page, exch.out.extent_order,
                               MEMF_no_refcount) )
                 BUG();
@@ -399,21 +395,13 @@ static long memory_exchange(XEN_GUEST_HA
      */
  fail:
     /* Reassign any input pages we managed to steal. */
-    while ( !list_empty(&in_chunk_list) )
-    {
-        page = list_entry(in_chunk_list.next, struct page_info, list);
-        list_del(&page->list);
+    while ( (page = page_list_remove_head(&in_chunk_list)) )
         if ( assign_pages(d, page, 0, MEMF_no_refcount) )
             BUG();
-    }
 
     /* Free any output pages we managed to allocate. */
-    while ( !list_empty(&out_chunk_list) )
-    {
-        page = list_entry(out_chunk_list.next, struct page_info, list);
-        list_del(&page->list);
+    while ( (page = page_list_remove_head(&out_chunk_list)) )
         free_domheap_pages(page, exch.out.extent_order);
-    }
 
     exch.nr_exchanged = i << in_chunk_order;
 
diff -r af992824b5cf -r c7cba853583d xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/page_alloc.c   Fri Feb 13 11:22:28 2009 +0900
@@ -71,7 +71,7 @@ integer_param("dma_bits", dma_bitsize);
 #endif
 
 static DEFINE_SPINLOCK(page_scrub_lock);
-LIST_HEAD(page_scrub_list);
+PAGE_LIST_HEAD(page_scrub_list);
 static unsigned long scrub_pages;
 
 /*********************
@@ -264,7 +264,7 @@ unsigned long __init alloc_boot_pages(
 #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN :  \
                           (fls(page_to_mfn(pg)) - 1))
 
-typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
+typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
 #define heap(node, zone, order) ((*_heap[node])[zone][order])
 
@@ -272,13 +272,16 @@ static unsigned long *avail[MAX_NUMNODES
 
 static DEFINE_SPINLOCK(heap_lock);
 
-static void init_node_heap(int node)
+static unsigned long init_node_heap(int node, unsigned long mfn,
+                                    unsigned long nr)
 {
     /* First node to be discovered has its heap metadata statically alloced. */
     static heap_by_zone_and_order_t _heap_static;
     static unsigned long avail_static[NR_ZONES];
     static int first_node_initialised;
-
+    unsigned long needed = (sizeof(**_heap) +
+                            sizeof(**avail) * NR_ZONES +
+                            PAGE_SIZE - 1) >> PAGE_SHIFT;
     int i, j;
 
     if ( !first_node_initialised )
@@ -286,19 +289,40 @@ static void init_node_heap(int node)
         _heap[node] = &_heap_static;
         avail[node] = avail_static;
         first_node_initialised = 1;
+        needed = 0;
+    }
+#ifdef DIRECTMAP_VIRT_END
+    else if ( nr >= needed &&
+              mfn + needed <= virt_to_mfn(DIRECTMAP_VIRT_END) )
+    {
+        _heap[node] = mfn_to_virt(mfn);
+        avail[node] = mfn_to_virt(mfn + needed) - sizeof(**avail) * NR_ZONES;
+    }
+#endif
+    else if ( get_order_from_bytes(sizeof(**_heap)) ==
+              get_order_from_pages(needed) )
+    {
+        _heap[node] = alloc_xenheap_pages(get_order_from_pages(needed), 0);
+        BUG_ON(!_heap[node]);
+        avail[node] = (void *)_heap[node] + (needed << PAGE_SHIFT) -
+                      sizeof(**avail) * NR_ZONES;
+        needed = 0;
     }
     else
     {
         _heap[node] = xmalloc(heap_by_zone_and_order_t);
         avail[node] = xmalloc_array(unsigned long, NR_ZONES);
         BUG_ON(!_heap[node] || !avail[node]);
+        needed = 0;
     }
 
     memset(avail[node], 0, NR_ZONES * sizeof(long));
 
     for ( i = 0; i < NR_ZONES; i++ )
         for ( j = 0; j <= MAX_ORDER; j++ )
-            INIT_LIST_HEAD(&(*_heap[node])[i][j]);
+            INIT_PAGE_LIST_HEAD(&(*_heap[node])[i][j]);
+
+    return needed;
 }
 
 /* Allocate 2^@order contiguous pages. */
@@ -340,7 +364,7 @@ static struct page_info *alloc_heap_page
 
             /* Find smallest order which can satisfy the request. */
             for ( j = order; j <= MAX_ORDER; j++ )
-                if ( !list_empty(&heap(node, zone, j)) )
+                if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                     goto found;
         } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
 
@@ -354,14 +378,11 @@ static struct page_info *alloc_heap_page
     return NULL;
 
  found: 
-    pg = list_entry(heap(node, zone, j).next, struct page_info, list);
-    list_del(&pg->list);
-
     /* We may have to halve the chunk a number of times. */
     while ( j != order )
     {
         PFN_ORDER(pg) = --j;
-        list_add_tail(&pg->list, &heap(node, zone, j));
+        page_list_add_tail(pg, &heap(node, zone, j));
         pg += 1 << j;
     }
     
@@ -378,10 +399,13 @@ static struct page_info *alloc_heap_page
         /* Reference count must continuously be zero for free pages. */
         BUG_ON(pg[i].count_info != 0);
 
-        /* Add in any extra CPUs that need flushing because of this page. */
-        cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
-        tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
-        cpus_or(mask, mask, extra_cpus_mask);
+        if ( pg[i].u.free.need_tlbflush )
+        {
+            /* Add in extra CPUs that need flushing because of this page. */
+            cpus_andnot(extra_cpus_mask, cpu_online_map, mask);
+            tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
+            cpus_or(mask, mask, extra_cpus_mask);
+        }
 
         /* Initialise fields which have other uses for free pages. */
         pg[i].u.inuse.type_info = 0;
@@ -404,7 +428,6 @@ static void free_heap_pages(
     unsigned long mask;
     unsigned int i, node = phys_to_nid(page_to_maddr(pg));
     unsigned int zone = page_to_zone(pg);
-    struct domain *d;
 
     ASSERT(order <= MAX_ORDER);
     ASSERT(node >= 0);
@@ -425,15 +448,10 @@ static void free_heap_pages(
          */
         pg[i].count_info = 0;
 
-        if ( (d = page_get_owner(&pg[i])) != NULL )
-        {
+        /* If a page has no owner it will need no safety TLB flush. */
+        pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
+        if ( pg[i].u.free.need_tlbflush )
             pg[i].tlbflush_timestamp = tlbflush_current_time();
-            pg[i].u.free.cpumask     = d->domain_dirty_cpumask;
-        }
-        else
-        {
-            cpus_clear(pg[i].u.free.cpumask);
-        }
     }
 
     spin_lock(&heap_lock);
@@ -452,8 +470,8 @@ static void free_heap_pages(
             if ( allocated_in_map(page_to_mfn(pg)-mask) ||
                  (PFN_ORDER(pg-mask) != order) )
                 break;
-            list_del(&(pg-mask)->list);
             pg -= mask;
+            page_list_del(pg, &heap(node, zone, order));
         }
         else
         {
@@ -461,7 +479,7 @@ static void free_heap_pages(
             if ( allocated_in_map(page_to_mfn(pg)+mask) ||
                  (PFN_ORDER(pg+mask) != order) )
                 break;
-            list_del(&(pg+mask)->list);
+            page_list_del(pg + mask, &heap(node, zone, order));
         }
         
         order++;
@@ -471,7 +489,7 @@ static void free_heap_pages(
     }
 
     PFN_ORDER(pg) = order;
-    list_add_tail(&pg->list, &heap(node, zone, order));
+    page_list_add_tail(pg, &heap(node, zone, order));
 
     spin_unlock(&heap_lock);
 }
@@ -482,7 +500,6 @@ static void free_heap_pages(
  * latter is not on a MAX_ORDER boundary, then we reserve the page by
  * not freeing it to the buddy allocator.
  */
-#define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
 static void init_heap_pages(
     struct page_info *pg, unsigned long nr_pages)
 {
@@ -491,25 +508,33 @@ static void init_heap_pages(
 
     nid_prev = phys_to_nid(page_to_maddr(pg-1));
 
-    for ( i = 0; i < nr_pages; i++ )
+    for ( i = 0; i < nr_pages; nid_prev = nid_curr, i++ )
     {
         nid_curr = phys_to_nid(page_to_maddr(pg+i));
 
         if ( unlikely(!avail[nid_curr]) )
-            init_node_heap(nid_curr);
+        {
+            unsigned long n;
+
+            n = init_node_heap(nid_curr, page_to_mfn(pg+i), nr_pages - i);
+            if ( n )
+            {
+                BUG_ON(i + n > nr_pages);
+                i += n - 1;
+                continue;
+            }
+        }
 
         /*
-         * free pages of the same node, or if they differ, but are on a
-         * MAX_ORDER alignement boundary (which already get reserved)
+         * Free pages of the same node, or if they differ, but are on a
+         * MAX_ORDER alignment boundary (which already get reserved).
          */
-         if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
-                                         MAX_ORDER_ALIGNED) )
-             free_heap_pages(pg+i, 0);
-         else
-             printk("Reserving non-aligned node boundary @ mfn %lu\n",
-                    page_to_mfn(pg+i));
-
-        nid_prev = nid_curr;
+        if ( (nid_curr == nid_prev) ||
+             !(page_to_mfn(pg+i) & ((1UL << MAX_ORDER) - 1)) )
+            free_heap_pages(pg+i, 0);
+        else
+            printk("Reserving non-aligned node boundary @ mfn %#lx\n",
+                   page_to_mfn(pg+i));
     }
 }
 
@@ -537,7 +562,7 @@ static unsigned long avail_heap_pages(
 #define avail_for_domheap(mfn) !(allocated_in_map(mfn) || is_xen_heap_mfn(mfn))
 void __init end_boot_allocator(void)
 {
-    unsigned long i;
+    unsigned long i, nr = 0;
     int curr_free, next_free;
 
     /* Pages that are free now go to the domain sub-allocator. */
@@ -550,8 +575,15 @@ void __init end_boot_allocator(void)
         if ( next_free )
             map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
         if ( curr_free )
-            init_heap_pages(mfn_to_page(i), 1);
-    }
+            ++nr;
+        else if ( nr )
+        {
+            init_heap_pages(mfn_to_page(i - nr), nr);
+            nr = 0;
+        }
+    }
+    if ( nr )
+        init_heap_pages(mfn_to_page(i - nr), nr);
 
     if ( !dma_bitsize && (num_online_nodes() > 1) )
     {
@@ -786,7 +818,7 @@ int assign_pages(
         page_set_owner(&pg[i], d);
         wmb(); /* Domain pointer must be visible before updating refcnt. */
         pg[i].count_info = PGC_allocated | 1;
-        list_add_tail(&pg[i].list, &d->page_list);
+        page_list_add_tail(&pg[i], &d->page_list);
     }
 
     spin_unlock(&d->page_alloc_lock);
@@ -844,7 +876,7 @@ void free_domheap_pages(struct page_info
         spin_lock_recursive(&d->page_alloc_lock);
 
         for ( i = 0; i < (1 << order); i++ )
-            list_del(&pg[i].list);
+            page_list_del2(&pg[i], &d->xenpage_list, &d->arch.relmem_list);
 
         d->xenheap_pages -= 1 << order;
         drop_dom_ref = (d->xenheap_pages == 0);
@@ -859,7 +891,7 @@ void free_domheap_pages(struct page_info
         for ( i = 0; i < (1 << order); i++ )
         {
             BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
-            list_del(&pg[i].list);
+            page_list_del2(&pg[i], &d->page_list, &d->arch.relmem_list);
         }
 
         d->tot_pages -= 1 << order;
@@ -882,7 +914,7 @@ void free_domheap_pages(struct page_info
             {
                 page_set_owner(&pg[i], NULL);
                 spin_lock(&page_scrub_lock);
-                list_add(&pg[i].list, &page_scrub_list);
+                page_list_add(&pg[i], &page_scrub_list);
                 scrub_pages++;
                 spin_unlock(&page_scrub_lock);
             }
@@ -965,7 +997,7 @@ static DEFINE_PER_CPU(struct timer, page
 
 static void page_scrub_softirq(void)
 {
-    struct list_head *ent;
+    PAGE_LIST_HEAD(list);
     struct page_info  *pg;
     void             *p;
     int               i;
@@ -983,32 +1015,26 @@ static void page_scrub_softirq(void)
     do {
         spin_lock(&page_scrub_lock);
 
-        if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
+        /* Peel up to 16 pages from the list. */
+        for ( i = 0; i < 16; i++ )
+        {
+            if ( !(pg = page_list_remove_head(&page_scrub_list)) )
+                break;
+            page_list_add_tail(pg, &list);
+        }
+        
+        if ( unlikely(i == 0) )
         {
             spin_unlock(&page_scrub_lock);
             goto out;
         }
-        
-        /* Peel up to 16 pages from the list. */
-        for ( i = 0; i < 16; i++ )
-        {
-            if ( ent->next == &page_scrub_list )
-                break;
-            ent = ent->next;
-        }
-        
-        /* Remove peeled pages from the list. */
-        ent->next->prev = &page_scrub_list;
-        page_scrub_list.next = ent->next;
-        scrub_pages -= (i+1);
+
+        scrub_pages -= i;
 
         spin_unlock(&page_scrub_lock);
 
-        /* Working backwards, scrub each page in turn. */
-        while ( ent != &page_scrub_list )
-        {
-            pg = list_entry(ent, struct page_info, list);
-            ent = ent->prev;
+        /* Scrub each page in turn. */
+        while ( (pg = page_list_remove_head(&list)) ) {
             p = map_domain_page(page_to_mfn(pg));
             scrub_page(p);
             unmap_domain_page(p);
diff -r af992824b5cf -r c7cba853583d xen/drivers/char/serial.c
--- a/xen/drivers/char/serial.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/char/serial.c Fri Feb 13 11:22:28 2009 +0900
@@ -471,7 +471,7 @@ void serial_suspend(void)
     int i, irq;
     for ( i = 0; i < ARRAY_SIZE(com); i++ )
         if ( (irq = serial_irq(i)) >= 0 )
-            free_irq(irq);
+            release_irq(irq);
 }
 
 void serial_resume(void)
diff -r af992824b5cf -r c7cba853583d xen/drivers/cpufreq/cpufreq.c
--- a/xen/drivers/cpufreq/cpufreq.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/cpufreq/cpufreq.c     Fri Feb 13 11:22:28 2009 +0900
@@ -46,6 +46,9 @@
 #include <acpi/acpi.h>
 #include <acpi/cpufreq/cpufreq.h>
 
+static unsigned int usr_max_freq, usr_min_freq;
+static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy);
+
 struct cpufreq_dom {
     unsigned int       dom;
     cpumask_t          map;
@@ -53,6 +56,7 @@ struct cpufreq_dom {
 };
 static LIST_HEAD(cpufreq_dom_list_head);
 
+struct cpufreq_governor *cpufreq_opt_governor;
 LIST_HEAD(cpufreq_governor_list);
 
 struct cpufreq_governor *__find_governor(const char *governor)
@@ -213,6 +217,9 @@ int cpufreq_add_cpu(unsigned int cpu)
         perf->domain_info.num_processors) {
         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
         policy->governor = NULL;
+
+        cpufreq_cmdline_common_para(&new_policy);
+
         ret = __cpufreq_set_policy(policy, &new_policy);
         if (ret) {
             if (new_policy.governor == CPUFREQ_DEFAULT_GOVERNOR)
@@ -467,3 +474,69 @@ out:
     return ret;
 }
 
+static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy)
+{
+    if (usr_max_freq)
+        new_policy->max = usr_max_freq;
+    if (usr_min_freq)
+        new_policy->min = usr_min_freq;
+}
+
+static int __init cpufreq_handle_common_option(const char *name, const char 
*val)
+{
+    if (!strcmp(name, "maxfreq") && val) {
+        usr_max_freq = simple_strtoul(val, NULL, 0);
+        return 1;
+    }
+
+    if (!strcmp(name, "minfreq") && val) {
+        usr_min_freq = simple_strtoul(val, NULL, 0);
+        return 1;
+    }
+
+    return 0;
+}
+
+void __init cpufreq_cmdline_parse(char *str)
+{
+    static struct cpufreq_governor *__initdata cpufreq_governors[] =
+    {
+        &cpufreq_gov_userspace,
+        &cpufreq_gov_dbs,
+        &cpufreq_gov_performance,
+        &cpufreq_gov_powersave
+    };
+    unsigned int gov_index = 0;
+
+    do {
+        char *val, *end = strchr(str, ',');
+        unsigned int i;
+
+        if (end)
+            *end++ = '\0';
+        val = strchr(str, '=');
+        if (val)
+            *val++ = '\0';
+
+        if (!cpufreq_opt_governor) {
+            if (!val) {
+                for (i = 0; i < ARRAY_SIZE(cpufreq_governors); ++i) {
+                    if (!strcmp(str, cpufreq_governors[i]->name)) {
+                        cpufreq_opt_governor = cpufreq_governors[i];
+                        gov_index = i;
+                        str = NULL;
+                        break;
+                    }
+                }
+            } else {
+                cpufreq_opt_governor = CPUFREQ_DEFAULT_GOVERNOR;
+            }
+        }
+
+        if (str && !cpufreq_handle_common_option(str, val) &&
+            cpufreq_governors[gov_index]->handle_option)
+            cpufreq_governors[gov_index]->handle_option(str, val);
+
+        str = end;
+    } while (str);
+}
diff -r af992824b5cf -r c7cba853583d 
xen/drivers/cpufreq/cpufreq_misc_governors.c
--- a/xen/drivers/cpufreq/cpufreq_misc_governors.c      Fri Feb 13 10:56:01 
2009 +0900
+++ b/xen/drivers/cpufreq/cpufreq_misc_governors.c      Fri Feb 13 11:22:28 
2009 +0900
@@ -18,6 +18,7 @@
 #include <xen/sched.h>
 #include <acpi/cpufreq/cpufreq.h>
 
+static unsigned int usr_speed;
 
 /*
  * cpufreq userspace governor
@@ -26,6 +27,7 @@ static int cpufreq_governor_userspace(st
                                       unsigned int event)
 {
     int ret = 0;
+    unsigned int freq;
 
     if (!policy)
         return -EINVAL;
@@ -35,12 +37,17 @@ static int cpufreq_governor_userspace(st
     case CPUFREQ_GOV_STOP:
         break;
     case CPUFREQ_GOV_LIMITS:
-        if (policy->max < policy->cur)
+        freq = usr_speed ? : policy->cur;
+        if (policy->max < freq)
             ret = __cpufreq_driver_target(policy, policy->max,
                         CPUFREQ_RELATION_H);
-        else if (policy->min > policy->cur)
+        else if (policy->min > freq)
             ret = __cpufreq_driver_target(policy, policy->min,
                         CPUFREQ_RELATION_L);
+        else if (usr_speed)
+            ret = __cpufreq_driver_target(policy, freq,
+                        CPUFREQ_RELATION_L);
+
         break;
     default:
         ret = -EINVAL;
@@ -50,9 +57,17 @@ static int cpufreq_governor_userspace(st
     return ret;
 }
 
+static void __init 
+cpufreq_userspace_handle_option(const char *name, const char *val)
+{
+    if (!strcmp(name, "speed") && val)
+        usr_speed = simple_strtoul(val, NULL, 0);
+}
+
 struct cpufreq_governor cpufreq_gov_userspace = {
     .name = "userspace",
     .governor = cpufreq_governor_userspace,
+    .handle_option = cpufreq_userspace_handle_option
 };
 
 static int __init cpufreq_gov_userspace_init(void)
@@ -61,7 +76,7 @@ static int __init cpufreq_gov_userspace_
 }
 __initcall(cpufreq_gov_userspace_init);
 
-static void cpufreq_gov_userspace_exit(void)
+static void __exit cpufreq_gov_userspace_exit(void)
 {
     cpufreq_unregister_governor(&cpufreq_gov_userspace);
 }
@@ -106,7 +121,7 @@ static int __init cpufreq_gov_performanc
 }
 __initcall(cpufreq_gov_performance_init);
 
-static void cpufreq_gov_performance_exit(void)
+static void __exit cpufreq_gov_performance_exit(void)
 {
     cpufreq_unregister_governor(&cpufreq_gov_performance);
 }
@@ -151,7 +166,7 @@ static int __init cpufreq_gov_powersave_
 }
 __initcall(cpufreq_gov_powersave_init);
 
-static void cpufreq_gov_powersave_exit(void)
+static void __exit cpufreq_gov_powersave_exit(void)
 {
     cpufreq_unregister_governor(&cpufreq_gov_powersave);
 }
diff -r af992824b5cf -r c7cba853583d xen/drivers/cpufreq/cpufreq_ondemand.c
--- a/xen/drivers/cpufreq/cpufreq_ondemand.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/cpufreq/cpufreq_ondemand.c    Fri Feb 13 11:22:28 2009 +0900
@@ -281,9 +281,50 @@ int cpufreq_governor_dbs(struct cpufreq_
     return 0;
 }
 
+static void __init cpufreq_dbs_handle_option(const char *name, const char *val)
+{
+    if ( !strcmp(name, "rate") && val )
+    {
+        usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
+    }
+    else if ( !strcmp(name, "up_threshold") && val )
+    {
+        unsigned long tmp = simple_strtoul(val, NULL, 0);
+
+        if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
+        {
+            printk(XENLOG_WARNING "cpufreq/ondemand: "
+                   "specified threshold too low, using %d\n",
+                   MIN_FREQUENCY_UP_THRESHOLD);
+            tmp = MIN_FREQUENCY_UP_THRESHOLD;
+        }
+        else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
+        {
+            printk(XENLOG_WARNING "cpufreq/ondemand: "
+                   "specified threshold too high, using %d\n",
+                   MAX_FREQUENCY_UP_THRESHOLD);
+            tmp = MAX_FREQUENCY_UP_THRESHOLD;
+        }
+        dbs_tuners_ins.up_threshold = tmp;
+    }
+    else if ( !strcmp(name, "bias") && val )
+    {
+        unsigned long tmp = simple_strtoul(val, NULL, 0);
+
+        if ( tmp > 1000 )
+        {
+            printk(XENLOG_WARNING "cpufreq/ondemand: "
+                   "specified bias too high, using 1000\n");
+            tmp = 1000;
+        }
+        dbs_tuners_ins.powersave_bias = tmp;
+    }
+}
+
 struct cpufreq_governor cpufreq_gov_dbs = {
     .name = "ondemand",
     .governor = cpufreq_governor_dbs,
+    .handle_option = cpufreq_dbs_handle_option
 };
 
 static int __init cpufreq_gov_dbs_init(void)
@@ -292,60 +333,8 @@ static int __init cpufreq_gov_dbs_init(v
 }
 __initcall(cpufreq_gov_dbs_init);
 
-static void cpufreq_gov_dbs_exit(void)
+static void __exit cpufreq_gov_dbs_exit(void)
 {
     cpufreq_unregister_governor(&cpufreq_gov_dbs);
 }
 __exitcall(cpufreq_gov_dbs_exit);
-
-void __init cpufreq_cmdline_parse(char *str)
-{
-    do {
-        char *val, *end = strchr(str, ',');
-
-        if ( end )
-            *end++ = '\0';
-        val = strchr(str, '=');
-        if ( val )
-            *val++ = '\0';
-
-        if ( !strcmp(str, "rate") && val )
-        {
-            usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
-        }
-        else if ( !strcmp(str, "threshold") && val )
-        {
-            unsigned long tmp = simple_strtoul(val, NULL, 0);
-
-            if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
-            {
-                printk(XENLOG_WARNING "cpufreq/ondemand: "
-                       "specified threshold too low, using %d\n",
-                       MIN_FREQUENCY_UP_THRESHOLD);
-                tmp = MIN_FREQUENCY_UP_THRESHOLD;
-            }
-            else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
-            {
-                printk(XENLOG_WARNING "cpufreq/ondemand: "
-                       "specified threshold too high, using %d\n",
-                       MAX_FREQUENCY_UP_THRESHOLD);
-                tmp = MAX_FREQUENCY_UP_THRESHOLD;
-            }
-            dbs_tuners_ins.up_threshold = tmp;
-        }
-        else if ( !strcmp(str, "bias") && val )
-        {
-            unsigned long tmp = simple_strtoul(val, NULL, 0);
-
-            if ( tmp > 1000 )
-            {
-                printk(XENLOG_WARNING "cpufreq/ondemand: "
-                       "specified bias too high, using 1000\n");
-                tmp = 1000;
-            }
-            dbs_tuners_ins.powersave_bias = tmp;
-        }
-
-        str = end;
-    } while ( str );
-}
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/amd/iommu_init.c  Fri Feb 13 11:22:28 2009 +0900
@@ -479,26 +479,27 @@ static int set_iommu_interrupt_handler(s
 {
     int vector, ret;
 
-    vector = assign_irq_vector(AUTO_ASSIGN);
-    vector_to_iommu[vector] = iommu;
-
-    /* make irq == vector */
-    irq_vector[vector] = vector;
-    vector_irq[vector] = vector;
-
-    if ( !vector )
-    {
-        amd_iov_error("no vectors\n");
+    vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
+    if ( vector <= 0 )
+    {
+        gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
         return 0;
     }
 
     irq_desc[vector].handler = &iommu_msi_type;
-    ret = request_irq(vector, amd_iommu_page_fault, 0, "amd_iommu", iommu);
+    ret = request_irq_vector(vector, amd_iommu_page_fault, 0,
+                             "amd_iommu", iommu);
     if ( ret )
     {
+        irq_desc[vector].handler = &no_irq_type;
+        free_irq_vector(vector);
         amd_iov_error("can't request irq\n");
         return 0;
     }
+
+    /* Make sure that vector is never re-used. */
+    vector_irq[vector] = NEVER_ASSIGN_IRQ;
+    vector_to_iommu[vector] = iommu;
     iommu->vector = vector;
     return vector;
 }
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/amd/iommu_map.c   Fri Feb 13 11:22:28 2009 +0900
@@ -461,8 +461,8 @@ int amd_iommu_map_page(struct domain *d,
     iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
     if ( iommu_l2e == 0 )
     {
+        spin_unlock_irqrestore(&hd->mapping_lock, flags);
         amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
-        spin_unlock_irqrestore(&hd->mapping_lock, flags);
         return -EFAULT;
     }
     set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir);
@@ -493,8 +493,8 @@ int amd_iommu_unmap_page(struct domain *
 
     if ( iommu_l2e == 0 )
     {
+        spin_unlock_irqrestore(&hd->mapping_lock, flags);
         amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
-        spin_unlock_irqrestore(&hd->mapping_lock, flags);
         return -EFAULT;
     }
 
@@ -533,9 +533,9 @@ int amd_iommu_reserve_domain_unity_map(
 
         if ( iommu_l2e == 0 )
         {
-            amd_iov_error(
-            "Invalid IO pagetable entry phys_addr = %lx\n", phys_addr);
             spin_unlock_irqrestore(&hd->mapping_lock, flags);
+            amd_iov_error("Invalid IO pagetable entry phys_addr = %lx\n",
+                          phys_addr);
             return -EFAULT;
         }
 
@@ -552,7 +552,6 @@ int amd_iommu_sync_p2m(struct domain *d)
 {
     unsigned long mfn, gfn, flags;
     u64 iommu_l2e;
-    struct list_head *entry;
     struct page_info *page;
     struct hvm_iommu *hd;
     int iw = IOMMU_IO_WRITE_ENABLED;
@@ -568,10 +567,10 @@ int amd_iommu_sync_p2m(struct domain *d)
     if ( hd->p2m_synchronized )
         goto out;
 
-    for ( entry = d->page_list.next; entry != &d->page_list;
-            entry = entry->next )
-    {
-        page = list_entry(entry, struct page_info, list);
+    spin_lock(&d->page_alloc_lock);
+
+    page_list_for_each ( page, &d->page_list )
+    {
         mfn = page_to_mfn(page);
         gfn = get_gpfn_from_mfn(mfn);
 
@@ -582,13 +581,16 @@ int amd_iommu_sync_p2m(struct domain *d)
 
         if ( iommu_l2e == 0 )
         {
+            spin_unlock(&d->page_alloc_lock);
+            spin_unlock_irqrestore(&hd->mapping_lock, flags);
             amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
-            spin_unlock_irqrestore(&hd->mapping_lock, flags);
             return -EFAULT;
         }
 
         set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir);
     }
+
+    spin_unlock(&d->page_alloc_lock);
 
     hd->p2m_synchronized = 1;
 
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Fri Feb 13 10:56:01 
2009 +0900
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Fri Feb 13 11:22:28 
2009 +0900
@@ -23,7 +23,6 @@
 #include <xen/pci_regs.h>
 #include <asm/amd-iommu.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
-#include <asm/mm.h>
 
 extern unsigned short ivrs_bdf_entries;
 extern struct ivrs_mappings *ivrs_mappings;
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/io.c      Fri Feb 13 11:22:28 2009 +0900
@@ -87,8 +87,8 @@ int pt_irq_create_bind_vtd(
 
         if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
         {
+            spin_unlock(&d->event_lock);
             xfree(hvm_irq_dpci);
-            spin_unlock(&d->event_lock);
             return -EINVAL;
         }
     }
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/iommu.c   Fri Feb 13 11:22:28 2009 +0900
@@ -33,6 +33,8 @@ int amd_iov_detect(void);
  *   no-pv                      Disable IOMMU for PV domains (default)
  *   force|required             Don't boot unless IOMMU is enabled
  *   passthrough                Bypass VT-d translation for Dom0
+ *   snoop                      Utilize the snoop control for IOMMU (default)
+ *   no-snoop                   Dont utilize the snoop control for IOMMU
  */
 custom_param("iommu", parse_iommu_param);
 int iommu_enabled = 0;
@@ -45,6 +47,7 @@ static void __init parse_iommu_param(cha
 {
     char *ss;
     iommu_enabled = 1;
+    iommu_snoop = 1;
 
     do {
         ss = strchr(s, ',');
@@ -62,6 +65,10 @@ static void __init parse_iommu_param(cha
             force_iommu = 1;
         else if ( !strcmp(s, "passthrough") )
             iommu_passthrough = 1;
+        else if ( !strcmp(s, "snoop") )
+            iommu_snoop = 1;
+        else if ( !strcmp(s, "no-snoop") )
+            iommu_snoop = 0;
 
         s = ss + 1;
     } while ( ss );
@@ -141,7 +148,7 @@ static int iommu_populate_page_table(str
 
     spin_lock(&d->page_alloc_lock);
 
-    list_for_each_entry ( page, &d->page_list, list )
+    page_list_for_each ( page, &d->page_list )
     {
         if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
         {
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/dmar.c        Fri Feb 13 11:22:28 2009 +0900
@@ -21,6 +21,7 @@
 
 #include <xen/init.h>
 #include <xen/bitmap.h>
+#include <xen/errno.h>
 #include <xen/kernel.h>
 #include <xen/acpi.h>
 #include <xen/mm.h>
@@ -518,8 +519,6 @@ int acpi_dmar_init(void)
 int acpi_dmar_init(void)
 {
     int rc;
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
 
     rc = -ENODEV;
     if ( force_iommu )
@@ -536,20 +535,7 @@ int acpi_dmar_init(void)
     if ( list_empty(&acpi_drhd_units) )
         goto fail;
 
-    /* Giving that all devices within guest use same io page table,
-     * enable snoop control only if all VT-d engines support it.
-     */
-    iommu_snoop = 1;
-    for_each_drhd_unit ( drhd )
-    {
-        iommu = drhd->iommu;
-        if ( !ecap_snp_ctl(iommu->ecap) ) {
-            iommu_snoop = 0;
-            break;
-        }
-    }
-
-    printk("Intel VT-d has been enabled, snoop_control=%d.\n", iommu_snoop);
+    printk("Intel VT-d has been enabled\n");
 
     return 0;
 
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/ia64/vtd.c
--- a/xen/drivers/passthrough/vtd/ia64/vtd.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/ia64/vtd.c    Fri Feb 13 11:22:28 2009 +0900
@@ -29,7 +29,9 @@
 #include "../vtd.h"
 
 
-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+int vector_irq[NR_VECTORS] __read_mostly = {
+    [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ
+};
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
 u8 irq_vector[NR_IRQS] __read_mostly;
 
@@ -45,18 +47,19 @@ void unmap_vtd_domain_page(void *va)
 }
 
 /* Allocate page table, return its machine address */
-u64 alloc_pgtable_maddr(struct domain *d)
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages)
 {
     struct page_info *pg;
     u64 *vaddr;
 
-    pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0);
+    pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
+                             d ? MEMF_node(domain_to_node(d)) : 0);
     vaddr = map_domain_page(page_to_mfn(pg));
     if ( !vaddr )
         return 0;
-    memset(vaddr, 0, PAGE_SIZE);
+    memset(vaddr, 0, PAGE_SIZE * npages);
 
-    iommu_flush_cache_page(vaddr);
+    iommu_flush_cache_page(vaddr, npages);
     unmap_domain_page(vaddr);
 
     return page_to_maddr(pg);
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/intremap.c    Fri Feb 13 11:22:28 2009 +0900
@@ -502,7 +502,7 @@ int intremap_setup(struct iommu *iommu)
     ir_ctrl = iommu_ir_ctrl(iommu);
     if ( ir_ctrl->iremap_maddr == 0 )
     {
-        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL);
+        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL, 1);
         if ( ir_ctrl->iremap_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c       Fri Feb 13 11:22:28 2009 +0900
@@ -129,9 +129,9 @@ void iommu_flush_cache_entry(void *addr)
     __iommu_flush_cache(addr, 8);
 }
 
-void iommu_flush_cache_page(void *addr)
-{
-    __iommu_flush_cache(addr, PAGE_SIZE_4K);
+void iommu_flush_cache_page(void *addr, unsigned long npages)
+{
+    __iommu_flush_cache(addr, PAGE_SIZE_4K * npages);
 }
 
 int nr_iommus;
@@ -146,7 +146,7 @@ static u64 bus_to_context_maddr(struct i
     root = &root_entries[bus];
     if ( !root_present(*root) )
     {
-        maddr = alloc_pgtable_maddr(NULL);
+        maddr = alloc_pgtable_maddr(NULL, 1);
         if ( maddr == 0 )
         {
             unmap_vtd_domain_page(root_entries);
@@ -174,7 +174,7 @@ static u64 addr_to_dma_page_maddr(struct
     addr &= (((u64)1) << addr_width) - 1;
     ASSERT(spin_is_locked(&hd->mapping_lock));
     if ( hd->pgd_maddr == 0 )
-        if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain)) == 0) )
+        if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain, 1)) == 0) 
)
             goto out;
 
     parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
@@ -187,7 +187,7 @@ static u64 addr_to_dma_page_maddr(struct
         {
             if ( !alloc )
                 break;
-            maddr = alloc_pgtable_maddr(domain);
+            maddr = alloc_pgtable_maddr(domain, 1);
             if ( !maddr )
                 break;
             dma_set_pte_addr(*pte, maddr);
@@ -577,7 +577,7 @@ static int iommu_set_root_entry(struct i
     spin_lock(&iommu->lock);
 
     if ( iommu->root_maddr == 0 )
-        iommu->root_maddr = alloc_pgtable_maddr(NULL);
+        iommu->root_maddr = alloc_pgtable_maddr(NULL, 1);
     if ( iommu->root_maddr == 0 )
     {
         spin_unlock(&iommu->lock);
@@ -874,23 +874,27 @@ int iommu_set_interrupt(struct iommu *io
 {
     int vector, ret;
 
-    vector = assign_irq_vector(AUTO_ASSIGN);
-    vector_to_iommu[vector] = iommu;
-
-    /* VT-d fault is a MSI, make irq == vector */
-    irq_vector[vector] = vector;
-    vector_irq[vector] = vector;
-
-    if ( !vector )
+    vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
+    if ( vector <= 0 )
     {
         gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
         return -EINVAL;
     }
 
     irq_desc[vector].handler = &dma_msi_type;
-    ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
+    ret = request_irq_vector(vector, iommu_page_fault, 0, "dmar", iommu);
     if ( ret )
+    {
+        irq_desc[vector].handler = &no_irq_type;
+        free_irq_vector(vector);
         gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
+        return ret;
+    }
+
+    /* Make sure that vector is never re-used. */
+    vector_irq[vector] = NEVER_ASSIGN_IRQ;
+    vector_to_iommu[vector] = iommu;
+
     return vector;
 }
 
@@ -966,7 +970,7 @@ static void iommu_free(struct acpi_drhd_
         iounmap(iommu->reg);
 
     free_intel_iommu(iommu->intel);
-    free_irq(iommu->vector);
+    release_irq_vector(iommu->vector);
     xfree(iommu);
 
     drhd->iommu = NULL;
@@ -1677,6 +1681,11 @@ static int init_vtd_hw(void)
         }
 
         vector = iommu_set_interrupt(iommu);
+        if ( vector < 0 )
+        {
+            gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n");
+            return vector;
+        }
         dma_msi_data_init(iommu, vector);
         dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
         iommu->vector = vector;
@@ -1756,6 +1765,23 @@ int intel_vtd_setup(void)
     if ( init_vtd_hw() )
         goto error;
 
+    /* Giving that all devices within guest use same io page table,
+     * enable snoop control only if all VT-d engines support it.
+     */
+
+    if ( iommu_snoop )
+    {
+        for_each_drhd_unit ( drhd )
+        {
+            iommu = drhd->iommu;
+            if ( !ecap_snp_ctl(iommu->ecap) ) {
+                iommu_snoop = 0;
+                break;
+            }
+        }
+    }
+    
+    printk("Intel VT-d snoop control %sabled\n", iommu_snoop ? "en" : "dis");
     register_keyhandler('V', dump_iommu_info, "dump iommu info");
 
     return 0;
@@ -1764,6 +1790,7 @@ int intel_vtd_setup(void)
     for_each_drhd_unit ( drhd )
         iommu_free(drhd);
     vtd_enabled = 0;
+    iommu_snoop = 0;
     return -ENOMEM;
 }
 
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.h       Fri Feb 13 11:22:28 2009 +0900
@@ -397,7 +397,9 @@ struct poll_info {
     u32 udata;
 };
 
-#define QINVAL_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct qinval_entry))
+#define MAX_QINVAL_PAGES 8
+#define NUM_QINVAL_PAGES 1
+#define QINVAL_ENTRY_NR (PAGE_SIZE_4K*NUM_QINVAL_PAGES/sizeof(struct 
qinval_entry))
 #define qinval_present(v) ((v).lo & 1)
 #define qinval_fault_disable(v) (((v).lo >> 1) & 1)
 
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/qinval.c      Fri Feb 13 11:22:28 2009 +0900
@@ -427,7 +427,7 @@ int qinval_setup(struct iommu *iommu)
 
     if ( qi_ctrl->qinval_maddr == 0 )
     {
-        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL);
+        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL, NUM_QINVAL_PAGES);
         if ( qi_ctrl->qinval_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
@@ -445,6 +445,8 @@ int qinval_setup(struct iommu *iommu)
      * registers are automatically reset to 0 with write
      * to IQA register.
      */
+    if ( NUM_QINVAL_PAGES <= MAX_QINVAL_PAGES )
+        qi_ctrl->qinval_maddr |= NUM_QINVAL_PAGES - 1;
     dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
 
     /* enable queued invalidation hardware */
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/vtd.h
--- a/xen/drivers/passthrough/vtd/vtd.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/vtd.h Fri Feb 13 11:22:28 2009 +0900
@@ -101,12 +101,12 @@ void cacheline_flush(char *);
 void cacheline_flush(char *);
 void flush_all_cache(void);
 void *map_to_nocache_virt(int nr_iommus, u64 maddr);
-u64 alloc_pgtable_maddr(struct domain *d);
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages);
 void free_pgtable_maddr(u64 maddr);
 void *map_vtd_domain_page(u64 maddr);
 void unmap_vtd_domain_page(void *va);
 
 void iommu_flush_cache_entry(void *addr);
-void iommu_flush_cache_page(void *addr);
+void iommu_flush_cache_page(void *addr, unsigned long npages);
 
 #endif // _VTD_H_
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c     Fri Feb 13 11:22:28 2009 +0900
@@ -38,20 +38,21 @@ void unmap_vtd_domain_page(void *va)
 }
 
 /* Allocate page table, return its machine address */
-u64 alloc_pgtable_maddr(struct domain *d)
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages)
 {
     struct page_info *pg;
     u64 *vaddr;
     unsigned long mfn;
 
-    pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0);
+    pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
+                             d ? MEMF_node(domain_to_node(d)) : 0);
     if ( !pg )
         return 0;
     mfn = page_to_mfn(pg);
     vaddr = map_domain_page(mfn);
-    memset(vaddr, 0, PAGE_SIZE);
+    memset(vaddr, 0, PAGE_SIZE * npages);
 
-    iommu_flush_cache_page(vaddr);
+    iommu_flush_cache_page(vaddr, npages);
     unmap_domain_page(vaddr);
 
     return (u64)mfn << PAGE_SHIFT_4K;
diff -r af992824b5cf -r c7cba853583d xen/include/acpi/cpufreq/cpufreq.h
--- a/xen/include/acpi/cpufreq/cpufreq.h        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/acpi/cpufreq/cpufreq.h        Fri Feb 13 11:22:28 2009 +0900
@@ -87,6 +87,7 @@ struct cpufreq_governor {
     char    name[CPUFREQ_NAME_LEN];
     int     (*governor)(struct cpufreq_policy *policy,
                         unsigned int event);
+    void    (*handle_option)(const char *name, const char *value);
     struct list_head governor_list;
 };
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hardirq.h
--- a/xen/include/asm-ia64/hardirq.h    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/hardirq.h    Fri Feb 13 11:22:28 2009 +0900
@@ -4,6 +4,7 @@
 #define __ARCH_IRQ_STAT        1
 #define HARDIRQ_BITS   14
 #include <linux/hardirq.h>
+#include <xen/sched.h>
 
 #define local_softirq_pending()                
(local_cpu_data->softirq_pending)
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hvm/iommu.h
--- a/xen/include/asm-ia64/hvm/iommu.h  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/hvm/iommu.h  Fri Feb 13 11:22:28 2009 +0900
@@ -28,7 +28,6 @@ static inline void pci_cleanup_msi(struc
     /* TODO */
 }
 
-#define AUTO_ASSIGN         -1
 
 extern int assign_irq_vector (int irq);
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hvm/irq.h
--- a/xen/include/asm-ia64/hvm/irq.h    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/hvm/irq.h    Fri Feb 13 11:22:28 2009 +0900
@@ -90,13 +90,17 @@ struct hvm_irq {
 #define hvm_pci_intx_link(dev, intx) \
     (((dev) + (intx)) & 3)
 
-/* Extract the IA-64 vector that corresponds to IRQ.  */
-static inline int
-irq_to_vector (int irq)
+#define IA64_INVALID_VECTOR    ((unsigned int)((int)-1))
+static inline unsigned int irq_to_vector(int irq)
 {
-    return irq;
+    int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
+    unsigned int vector;
+
+    if ( acpi_gsi_to_irq(irq, &vector) < 0)
+        return 0;
+
+    return vector;
 }
-
 
 extern u8 irq_vector[NR_IRQS];
 extern int vector_irq[NR_VECTORS];
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/linux-xen/asm/smp.h
--- a/xen/include/asm-ia64/linux-xen/asm/smp.h  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/linux-xen/asm/smp.h  Fri Feb 13 11:22:28 2009 +0900
@@ -47,7 +47,6 @@ ia64_get_lid (void)
 #define SMP_IPI_REDIRECTION    (1 << 1)
 
 #ifdef XEN
-#include <xen/sched.h>
 #define raw_smp_processor_id() (current->processor)
 #else
 #define raw_smp_processor_id() (current_thread_info()->cpu)
diff -r af992824b5cf -r c7cba853583d 
xen/include/asm-ia64/linux-xen/linux/interrupt.h
--- a/xen/include/asm-ia64/linux-xen/linux/interrupt.h  Fri Feb 13 10:56:01 
2009 +0900
+++ b/xen/include/asm-ia64/linux-xen/linux/interrupt.h  Fri Feb 13 11:22:28 
2009 +0900
@@ -52,10 +52,10 @@ struct irqaction {
 };
 
 extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs);
-extern int request_irq(unsigned int,
+extern int request_irq_vector(unsigned int,
                       irqreturn_t (*handler)(int, void *, struct pt_regs *),
                       unsigned long, const char *, void *);
-extern void free_irq(unsigned int, void *);
+extern void release_irq_vector(unsigned int, void *);
 #endif
 
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/linux/asm/hw_irq.h
--- a/xen/include/asm-ia64/linux/asm/hw_irq.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/linux/asm/hw_irq.h   Fri Feb 13 11:22:28 2009 +0900
@@ -34,7 +34,7 @@ typedef u8 ia64_vector;
 #define IA64_MAX_VECTORED_IRQ          255
 #define IA64_NUM_VECTORS               256
 
-#define AUTO_ASSIGN                    -1
+#define AUTO_ASSIGN_IRQ                        (-1)
 
 #define IA64_SPURIOUS_INT_VECTOR       0x0f
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/mm.h Fri Feb 13 11:22:28 2009 +0900
@@ -13,7 +13,6 @@
 #include <xen/list.h>
 #include <xen/spinlock.h>
 #include <xen/perfc.h>
-#include <xen/sched.h>
 
 #include <asm/processor.h>
 #include <asm/atomic.h>
@@ -63,21 +62,14 @@ struct page_info
         struct {
             /* Order-size of the free chunk this page is the head of. */
             u32 order;
-            /* Mask of possibly-tainted TLBs. */
-            cpumask_t cpumask;
+            /* Do TLBs need flushing for safety before next page use? */
+            bool_t need_tlbflush;
         } free;
 
     } u;
 
     /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
     u32 tlbflush_timestamp;
-
-#if 0
-// following added for Linux compiling
-    page_flags_t flags;
-    atomic_t _count;
-    struct list_head lru;      // is this the same as above "list"?
-#endif
 };
 
 #define set_page_count(p,v)    atomic_set(&(p)->_count, v - 1)
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/tlbflush.h
--- a/xen/include/asm-ia64/tlbflush.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/tlbflush.h   Fri Feb 13 11:22:28 2009 +0900
@@ -1,7 +1,8 @@
 #ifndef __FLUSHTLB_H__
 #define __FLUSHTLB_H__
 
-#include <xen/sched.h>
+struct vcpu;
+struct domain;
 
 /* TLB flushes can be either local (current vcpu only) or domain wide (on
    all vcpus).
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/domain.h      Fri Feb 13 11:22:28 2009 +0900
@@ -79,11 +79,11 @@ struct shadow_domain {
     int               locker; /* processor which holds the lock */
     const char       *locker_function; /* Func that took it */
     unsigned int      opt_flags;    /* runtime tunable optimizations on/off */
-    struct list_head  pinned_shadows;
+    struct page_list_head pinned_shadows;
 
     /* Memory allocation */
-    struct list_head  freelists[SHADOW_MAX_ORDER + 1];
-    struct list_head  p2m_freelist;
+    struct page_list_head freelists[SHADOW_MAX_ORDER + 1];
+    struct page_list_head p2m_freelist;
     unsigned int      total_pages;  /* number of pages allocated */
     unsigned int      free_pages;   /* number of pages on freelists */
     unsigned int      p2m_pages;    /* number of pages allocates to p2m */
@@ -92,7 +92,7 @@ struct shadow_domain {
     pagetable_t unpaged_pagetable;
 
     /* Shadow hashtable */
-    struct shadow_page_info **hash_table;
+    struct page_info **hash_table;
     int hash_walking;  /* Some function is walking the hash table */
 
     /* Fast MMIO path heuristic */
@@ -143,7 +143,7 @@ struct hap_domain {
     int               locker;
     const char       *locker_function;
 
-    struct list_head  freelist;
+    struct page_list_head freelist;
     unsigned int      total_pages;  /* number of pages allocated */
     unsigned int      free_pages;   /* number of pages on freelists */
     unsigned int      p2m_pages;    /* number of pages allocates to p2m */
@@ -265,7 +265,7 @@ struct arch_domain
         RELMEM_l2,
         RELMEM_done,
     } relmem;
-    struct list_head relmem_list;
+    struct page_list_head relmem_list;
 
     cpuid_input_t cpuids[MAX_CPUID_INPUT];
 } __cacheline_aligned;
@@ -352,6 +352,7 @@ struct arch_vcpu
 
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
+    spinlock_t shadow_ldt_lock;
 
     struct paging_vcpu paging;
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Fri Feb 13 11:22:28 2009 +0900
@@ -48,7 +48,11 @@ typedef union {
 #define EPTE_SUPER_PAGE_MASK    0x80
 #define EPTE_MFN_MASK           0x1fffffffffff000
 #define EPTE_AVAIL1_MASK        0xF00
-#define EPTE_EMT_MASK           0x78
+#define EPTE_EMT_MASK           0x38
+#define EPTE_IGMT_MASK          0x40
+#define EPTE_AVAIL1_SHIFT       8
+#define EPTE_EMT_SHIFT          3
+#define EPTE_IGMT_SHIFT         6
 
 void vmx_asm_vmexit_handler(struct cpu_user_regs);
 void vmx_asm_do_vmentry(void);
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/iocap.h
--- a/xen/include/asm-x86/iocap.h       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/iocap.h       Fri Feb 13 11:22:28 2009 +0900
@@ -14,7 +14,8 @@
 #define ioports_access_permitted(d, s, e)               \
     rangeset_contains_range((d)->arch.ioport_caps, s, e)
 
-#define cache_flush_permitted(d)                       \
-    (!rangeset_is_empty((d)->iomem_caps))
+#define cache_flush_permitted(d)                        \
+    (!rangeset_is_empty((d)->iomem_caps) ||             \
+     !rangeset_is_empty((d)->arch.ioport_caps))
 
 #endif /* __X86_IOCAP_H__ */
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/irq.h Fri Feb 13 11:22:28 2009 +0900
@@ -19,9 +19,6 @@
 
 extern int vector_irq[NR_VECTORS];
 extern u8 irq_vector[NR_IRQS];
-#define AUTO_ASSIGN    -1
-#define NEVER_ASSIGN   -2
-#define FREE_TO_ASSIGN -3
 
 #define platform_legacy_irq(irq)       ((irq) < 16)
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/mm.h  Fri Feb 13 11:22:28 2009 +0900
@@ -12,15 +12,40 @@
  * Per-page-frame information.
  * 
  * Every architecture must ensure the following:
- *  1. 'struct page_info' contains a 'struct list_head list'.
+ *  1. 'struct page_info' contains a 'struct page_list_entry list'.
  *  2. Provide a PFN_ORDER() macro for accessing the order of a free page.
  */
-#define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
+#define PFN_ORDER(_pfn) ((_pfn)->v.free.order)
+
+/*
+ * This definition is solely for the use in struct page_info (and
+ * struct page_list_head), intended to allow easy adjustment once x86-64
+ * wants to support more than 16TB.
+ * 'unsigned long' should be used for MFNs everywhere else.
+ */
+#define __mfn_t unsigned int
+#define PRpgmfn "08x"
+
+#undef page_list_entry
+struct page_list_entry
+{
+    __mfn_t next, prev;
+};
 
 struct page_info
 {
-    /* Each frame can be threaded onto a doubly-linked list. */
-    struct list_head list;
+    union {
+        /* Each frame can be threaded onto a doubly-linked list.
+         *
+         * For unused shadow pages, a list of pages of this order; for
+         * pinnable shadows, if pinned, a list of other pinned shadows
+         * (see sh_type_is_pinnable() below for the definition of
+         * "pinnable" shadow types).
+         */
+        struct page_list_entry list;
+        /* For non-pinnable shadows, a higher entry that points at us. */
+        paddr_t up;
+    };
 
     /* Reference count and various PGC_xxx flags and fields. */
     unsigned long count_info;
@@ -30,21 +55,46 @@ struct page_info
 
         /* Page is in use: ((count_info & PGC_count_mask) != 0). */
         struct {
-            /* Owner of this page (NULL if page is anonymous). */
-            u32 _domain; /* pickled format */
             /* Type reference count and various PGT_xxx flags and fields. */
             unsigned long type_info;
         } inuse;
 
+        /* Page is in use as a shadow: count_info == 0. */
+        struct {
+            unsigned long type:5;   /* What kind of shadow is this? */
+            unsigned long pinned:1; /* Is the shadow pinned? */
+            unsigned long count:26; /* Reference count */
+        } sh;
+
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
+            /* Do TLBs need flushing for safety before next page use? */
+            bool_t need_tlbflush;
+        } free;
+
+    } u;
+
+    union {
+
+        /* Page is in use, but not as a shadow. */
+        struct {
+            /* Owner of this page (NULL if page is anonymous). */
+            u32 _domain; /* pickled format */
+        } inuse;
+
+        /* Page is in use as a shadow. */
+        struct {
+            /* GMFN of guest page we're a shadow of. */
+            __mfn_t back;
+        } sh;
+
+        /* Page is on a free list (including shadow code free lists). */
+        struct {
             /* Order-size of the free chunk this page is the head of. */
-            u32 order;
-            /* Mask of possibly-tainted TLBs. */
-            cpumask_t cpumask;
+            unsigned int order;
         } free;
 
-    } u;
+    } v;
 
     union {
         /*
@@ -95,8 +145,13 @@ struct page_info
          * tracked for TLB-flush avoidance when a guest runs in shadow mode.
          */
         u32 shadow_flags;
+
+        /* When in use as a shadow, next shadow in this hash chain. */
+        __mfn_t next_shadow;
     };
 };
+
+#undef __mfn_t
 
 #define PG_shift(idx)   (BITS_PER_LONG - (idx))
 #define PG_mask(x, idx) (x ## UL << PG_shift(idx))
@@ -155,7 +210,8 @@ struct page_info
 })
 #else
 #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
-#define is_xen_heap_mfn(mfn) is_xen_heap_page(&frame_table[mfn])
+#define is_xen_heap_mfn(mfn) \
+    (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn)))
 #endif
 
 #if defined(__i386__)
@@ -174,10 +230,10 @@ struct page_info
 #define SHADOW_OOS_FIXUPS 2
 
 #define page_get_owner(_p)                                              \
-    ((struct domain *)((_p)->u.inuse._domain ?                          \
-                       mfn_to_virt((_p)->u.inuse._domain) : NULL))
+    ((struct domain *)((_p)->v.inuse._domain ?                          \
+                       mfn_to_virt((_p)->v.inuse._domain) : NULL))
 #define page_set_owner(_p,_d)                                           \
-    ((_p)->u.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
+    ((_p)->v.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
 
 #define maddr_get_owner(ma)   (page_get_owner(maddr_to_page((ma))))
 #define vaddr_get_owner(va)   (page_get_owner(virt_to_page((va))))
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/p2m.h Fri Feb 13 11:22:28 2009 +0900
@@ -110,7 +110,7 @@ struct p2m_domain {
     const char        *locker_function; /* Func that took it */
 
     /* Pages used to construct the p2m */
-    struct list_head   pages;
+    struct page_list_head pages;
 
     /* Functions to call to get or free pages for the p2m */
     struct page_info * (*alloc_page  )(struct domain *d);
@@ -148,7 +148,7 @@ struct p2m_domain {
      *   protect moving stuff from the PoD cache to the domain page list.
      */
     struct {
-        struct list_head super,        /* List of superpages                */
+        struct page_list_head super,   /* List of superpages                */
                          single;       /* Non-super lists                   */
         int              count,        /* # of pages in cache lists         */
                          entry_count;  /* # of pages in p2m marked pod      */
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/page.h        Fri Feb 13 11:22:28 2009 +0900
@@ -220,31 +220,47 @@ void copy_page_sse2(void *, const void *
                              copy_page_sse2(_t, _f) :                   \
                              (void)memcpy(_t, _f, PAGE_SIZE))
 
-#define mfn_valid(mfn)      ((mfn) < max_page)
+#define __mfn_valid(mfn)    ((mfn) < max_page)
 
 /* Convert between Xen-heap virtual addresses and machine addresses. */
 #define __pa(x)             (virt_to_maddr(x))
 #define __va(x)             (maddr_to_virt(x))
 
 /* Convert between Xen-heap virtual addresses and machine frame numbers. */
-#define virt_to_mfn(va)     (virt_to_maddr(va) >> PAGE_SHIFT)
-#define mfn_to_virt(mfn)    (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT))
+#define __virt_to_mfn(va)   (virt_to_maddr(va) >> PAGE_SHIFT)
+#define __mfn_to_virt(mfn)  (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT))
 
 /* Convert between machine frame numbers and page-info structures. */
-#define mfn_to_page(mfn)    (frame_table + (mfn))
-#define page_to_mfn(pg)     ((unsigned long)((pg) - frame_table))
+#define __mfn_to_page(mfn)  (frame_table + (mfn))
+#define __page_to_mfn(pg)   ((unsigned long)((pg) - frame_table))
 
 /* Convert between machine addresses and page-info structures. */
-#define maddr_to_page(ma)   (frame_table + ((ma) >> PAGE_SHIFT))
-#define page_to_maddr(pg)   ((paddr_t)((pg) - frame_table) << PAGE_SHIFT)
+#define __maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT))
+#define __page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT)
 
 /* Convert between Xen-heap virtual addresses and page-info structures. */
-#define virt_to_page(va)    (frame_table + (__pa(va) >> PAGE_SHIFT))
-#define page_to_virt(pg)    (maddr_to_virt(page_to_maddr(pg)))
+#define __virt_to_page(va)  (frame_table + (__pa(va) >> PAGE_SHIFT))
+#define __page_to_virt(pg)  (maddr_to_virt(page_to_maddr(pg)))
 
 /* Convert between frame number and address formats.  */
-#define pfn_to_paddr(pfn)   ((paddr_t)(pfn) << PAGE_SHIFT)
-#define paddr_to_pfn(pa)    ((unsigned long)((pa) >> PAGE_SHIFT))
+#define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
+#define __paddr_to_pfn(pa)  ((unsigned long)((pa) >> PAGE_SHIFT))
+
+/*
+ * We define non-underscored wrappers for above conversion functions. These are
+ * overridden in various source files while underscored versions remain intact.
+ */
+#define mfn_valid(mfn)      __mfn_valid(mfn)
+#define virt_to_mfn(va)     __virt_to_mfn(va)
+#define mfn_to_virt(mfn)    __mfn_to_virt(mfn)
+#define mfn_to_page(mfn)    __mfn_to_page(mfn)
+#define page_to_mfn(pg)     __page_to_mfn(pg)
+#define maddr_to_page(ma)   __maddr_to_page(ma)
+#define page_to_maddr(pg)   __page_to_maddr(pg)
+#define virt_to_page(va)    __virt_to_page(va)
+#define page_to_virt(pg)    __page_to_virt(pg)
+#define pfn_to_paddr(pfn)   __pfn_to_paddr(pfn)
+#define paddr_to_pfn(pa)    __paddr_to_pfn(pa)
 
 #endif /* !defined(__ASSEMBLY__) */
 
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/perfc.h
--- a/xen/include/asm-x86/perfc.h       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/perfc.h       Fri Feb 13 11:22:28 2009 +0900
@@ -1,6 +1,5 @@
 #ifndef __ASM_PERFC_H__
 #define __ASM_PERFC_H__
-#include <asm/mm.h>
 
 static inline void arch_perfc_printall(void)
 {
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/processor.h   Fri Feb 13 11:22:28 2009 +0900
@@ -188,6 +188,7 @@ extern struct cpuinfo_x86 cpu_data[];
 #define current_cpu_data boot_cpu_data
 #endif
 
+extern u64 host_pat;
 extern int phys_proc_id[NR_CPUS];
 extern int cpu_core_id[NR_CPUS];
 
diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-ia64/hvm/save.h
--- a/xen/include/public/arch-ia64/hvm/save.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/arch-ia64/hvm/save.h   Fri Feb 13 11:22:28 2009 +0900
@@ -23,8 +23,8 @@
 #ifndef __XEN_PUBLIC_HVM_SAVE_IA64_H__
 #define __XEN_PUBLIC_HVM_SAVE_IA64_H__
 
-#include <public/hvm/save.h>
-#include <public/arch-ia64.h>
+#include "../../hvm/save.h"
+#include "../../arch-ia64.h"
 
 /* 
  * Save/restore header: general info about the save file. 
diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-x86/hvm/save.h
--- a/xen/include/public/arch-x86/hvm/save.h    Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/arch-x86/hvm/save.h    Fri Feb 13 11:22:28 2009 +0900
@@ -287,7 +287,7 @@ struct hvm_hw_pci_irqs {
      * Indexed by: device*4 + INTx#.
      */
     union {
-        DECLARE_BITMAP(i, 32*4);
+        unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 
32*4); */
         uint64_t pad[2];
     };
 };
@@ -300,7 +300,7 @@ struct hvm_hw_isa_irqs {
      * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing).
      */
     union {
-        DECLARE_BITMAP(i, 16);
+        unsigned long i[1];  /* DECLARE_BITMAP(i, 16); */
         uint64_t pad[1];
     };
 };
diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-x86/xen-mca.h
--- a/xen/include/public/arch-x86/xen-mca.h     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/arch-x86/xen-mca.h     Fri Feb 13 11:22:28 2009 +0900
@@ -56,7 +56,7 @@
 /* Hypercall */
 #define __HYPERVISOR_mca __HYPERVISOR_arch_0
 
-#define XEN_MCA_INTERFACE_VERSION 0x03000001
+#define XEN_MCA_INTERFACE_VERSION 0x03000002
 
 /* IN: Dom0 calls hypercall from MC event handler. */
 #define XEN_MC_CORRECTABLE  0x0
@@ -118,7 +118,7 @@ struct mcinfo_global {
     uint16_t mc_domid;
     uint32_t mc_socketid; /* physical socket of the physical core */
     uint16_t mc_coreid; /* physical impacted core */
-    uint8_t  mc_apicid;
+    uint32_t mc_apicid;
     uint16_t mc_core_threadid; /* core thread of physical core */
     uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
     uint64_t mc_gstatus; /* global status */
@@ -175,6 +175,41 @@ struct mc_info {
 };
 typedef struct mc_info mc_info_t;
 
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS       7       /* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX        0       /* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX        1       /* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM     2       /* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX  3       /* Linux-defined */
+#define MC_CAPS_STD_ECX        4       /* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA    5       /* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX        6       /* cpuid level 0x80000001 (%ecx) */
+
+typedef struct mcinfo_logical_cpu {
+    uint32_t mc_cpunr;          
+    uint32_t mc_chipid; 
+    uint16_t mc_coreid;
+    uint16_t mc_threadid;
+    uint32_t mc_apicid;
+    uint32_t mc_clusterid;
+    uint32_t mc_ncores;
+    uint32_t mc_ncores_active;
+    uint32_t mc_nthreads;
+    int32_t mc_cpuid_level;
+    uint32_t mc_family;
+    uint32_t mc_vendor;
+    uint32_t mc_model;
+    uint32_t mc_step;
+    char mc_vendorid[16];
+    char mc_brandid[64];
+    uint32_t mc_cpu_caps[MC_NCAPS];
+    uint32_t mc_cache_size;
+    uint32_t mc_cache_alignment;
+    int32_t mc_nmsrvals;
+    struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+} xen_mc_logical_cpu_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t);
 
 
 /* 
@@ -272,6 +307,14 @@ typedef struct xen_mc_notifydomain xen_m
 typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
 DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);
 
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+       /* IN/OUT */
+       uint32_t ncpus;
+       uint32_t pad0;
+       /* OUT */
+       XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;
+};
 
 struct xen_mc {
     uint32_t cmd;
@@ -279,6 +322,7 @@ struct xen_mc {
     union {
         struct xen_mc_fetch        mc_fetch;
         struct xen_mc_notifydomain mc_notifydomain;
+        struct xen_mc_physcpuinfo  mc_physcpuinfo;
         uint8_t pad[MCINFO_HYPERCALLSIZE];
     } u;
 };
diff -r af992824b5cf -r c7cba853583d xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/domctl.h       Fri Feb 13 11:22:28 2009 +0900
@@ -630,6 +630,17 @@ typedef struct xen_domctl_debug_op xen_d
 typedef struct xen_domctl_debug_op xen_domctl_debug_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t);
 
+/*
+ * Request a particular record from the HVM context
+ */
+#define XEN_DOMCTL_gethvmcontext_partial   55
+typedef struct xen_domctl_hvmcontext_partial {
+    uint32_t type;                      /* IN: Type of record required */
+    uint32_t instance;                  /* IN: Instance of that type */
+    XEN_GUEST_HANDLE_64(uint8) buffer;  /* OUT: buffer to write record into */
+} xen_domctl_hvmcontext_partial_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t);
+
 
 struct xen_domctl {
     uint32_t cmd;
@@ -658,6 +669,7 @@ struct xen_domctl {
         struct xen_domctl_settimeoffset     settimeoffset;
         struct xen_domctl_real_mode_area    real_mode_area;
         struct xen_domctl_hvmcontext        hvmcontext;
+        struct xen_domctl_hvmcontext_partial hvmcontext_partial;
         struct xen_domctl_address_size      address_size;
         struct xen_domctl_sendtrigger       sendtrigger;
         struct xen_domctl_get_device_group  get_device_group;
diff -r af992824b5cf -r c7cba853583d xen/include/public/io/pciif.h
--- a/xen/include/public/io/pciif.h     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/io/pciif.h     Fri Feb 13 11:22:28 2009 +0900
@@ -29,7 +29,7 @@
 
 /* xen_pci_sharedinfo flags */
 #define _XEN_PCIF_active     (0)
-#define XEN_PCIF_active      (1<<_XEN_PCI_active)
+#define XEN_PCIF_active      (1<<_XEN_PCIF_active)
 #define _XEN_PCIB_AERHANDLER (1)
 #define XEN_PCIB_AERHANDLER  (1<<_XEN_PCIB_AERHANDLER)
 #define _XEN_PCIB_active     (2)
diff -r af992824b5cf -r c7cba853583d xen/include/xen/hvm/save.h
--- a/xen/include/xen/hvm/save.h        Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/hvm/save.h        Fri Feb 13 11:22:28 2009 +0900
@@ -152,6 +152,8 @@ __initcall(__hvm_register_##_x##_save_an
 /* Entry points for saving and restoring HVM domain state */
 size_t hvm_save_size(struct domain *d);
 int hvm_save(struct domain *d, hvm_domain_context_t *h);
+int hvm_save_one(struct domain *d,  uint16_t typecode, uint16_t instance, 
+                 XEN_GUEST_HANDLE_64(uint8) handle);
 int hvm_load(struct domain *d, hvm_domain_context_t *h);
 
 /* Arch-specific definitions. */
diff -r af992824b5cf -r c7cba853583d xen/include/xen/iocap.h
--- a/xen/include/xen/iocap.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/iocap.h   Fri Feb 13 11:22:28 2009 +0900
@@ -29,6 +29,7 @@
     rangeset_contains_singleton((d)->irq_caps, i)
 
 #define multipage_allocation_permitted(d)               \
-    (!rangeset_is_empty((d)->iomem_caps))
+    (!rangeset_is_empty((d)->iomem_caps) ||             \
+     !rangeset_is_empty((d)->arch.ioport_caps))
 
 #endif /* __XEN_IOCAP_H__ */
diff -r af992824b5cf -r c7cba853583d xen/include/xen/irq.h
--- a/xen/include/xen/irq.h     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/irq.h     Fri Feb 13 11:22:28 2009 +0900
@@ -24,6 +24,11 @@ struct irqaction
 #define IRQ_GUEST       16      /* IRQ is handled by guest OS(es) */
 #define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
 #define IRQ_PER_CPU     256     /* IRQ is per CPU */
+
+/* Special IRQ numbers. */
+#define AUTO_ASSIGN_IRQ         (-1)
+#define NEVER_ASSIGN_IRQ        (-2)
+#define FREE_TO_ASSIGN_IRQ      (-3)
 
 /*
  * Interrupt controller descriptor. This is all we need
@@ -64,11 +69,20 @@ typedef struct {
 
 extern irq_desc_t irq_desc[NR_VECTORS];
 
-extern int setup_irq(unsigned int, struct irqaction *);
-extern void free_irq(unsigned int);
-extern int request_irq(unsigned int irq,
+extern int setup_irq_vector(unsigned int, struct irqaction *);
+extern void release_irq_vector(unsigned int);
+extern int request_irq_vector(unsigned int vector,
                void (*handler)(int, void *, struct cpu_user_regs *),
                unsigned long irqflags, const char * devname, void *dev_id);
+
+#define setup_irq(irq, action) \
+    setup_irq_vector(irq_to_vector(irq), action)
+
+#define release_irq(irq) \
+    release_irq_vector(irq_to_vector(irq))
+
+#define request_irq(irq, handler, irqflags, devname, devid) \
+    request_irq_vector(irq_to_vector(irq), handler, irqflags, defname, devid)
 
 extern hw_irq_controller no_irq_type;
 extern void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs);
diff -r af992824b5cf -r c7cba853583d xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/mm.h      Fri Feb 13 11:22:28 2009 +0900
@@ -85,22 +85,192 @@ int assign_pages(
 #define MAX_ORDER 20 /* 2^20 contiguous pages */
 #endif
 
+#define page_list_entry list_head
+
+#include <asm/mm.h>
+
+#ifndef page_list_entry
+struct page_list_head
+{
+    struct page_info *next, *tail;
+};
+/* These must only have instances in struct page_info. */
+# define page_list_entry
+
+# define PAGE_LIST_HEAD_INIT(name) { NULL, NULL }
+# define PAGE_LIST_HEAD(name) \
+    struct page_list_head name = PAGE_LIST_HEAD_INIT(name)
+# define INIT_PAGE_LIST_HEAD(head) ((head)->tail = (head)->next = NULL)
+# define INIT_PAGE_LIST_ENTRY(ent) ((ent)->prev = (ent)->next = ~0)
+
+static inline int
+page_list_empty(const struct page_list_head *head)
+{
+    return !head->next;
+}
+static inline struct page_info *
+page_list_first(const struct page_list_head *head)
+{
+    return head->next;
+}
+static inline struct page_info *
+page_list_next(const struct page_info *page,
+               const struct page_list_head *head)
+{
+    return page != head->tail ? mfn_to_page(page->list.next) : NULL;
+}
+static inline struct page_info *
+page_list_prev(const struct page_info *page,
+               const struct page_list_head *head)
+{
+    return page != head->next ? mfn_to_page(page->list.prev) : NULL;
+}
+static inline void
+page_list_add(struct page_info *page, struct page_list_head *head)
+{
+    if ( head->next )
+    {
+        page->list.next = page_to_mfn(head->next);
+        head->next->list.prev = page_to_mfn(page);
+    }
+    else
+    {
+        head->tail = page;
+        page->list.next = ~0;
+    }
+    page->list.prev = ~0;
+    head->next = page;
+}
+static inline void
+page_list_add_tail(struct page_info *page, struct page_list_head *head)
+{
+    page->list.next = ~0;
+    if ( head->next )
+    {
+        page->list.prev = page_to_mfn(head->tail);
+        head->tail->list.next = page_to_mfn(page);
+    }
+    else
+    {
+        page->list.prev = ~0;
+        head->next = page;
+    }
+    head->tail = page;
+}
+static inline bool_t
+__page_list_del_head(struct page_info *page, struct page_list_head *head,
+                     struct page_info *next, struct page_info *prev)
+{
+    if ( head->next == page )
+    {
+        if ( head->tail != page )
+        {
+            next->list.prev = ~0;
+            head->next = next;
+        }
+        else
+            head->tail = head->next = NULL;
+        return 1;
+    }
+
+    if ( head->tail == page )
+    {
+        prev->list.next = ~0;
+        head->tail = prev;
+        return 1;
+    }
+
+    return 0;
+}
+static inline void
+page_list_del(struct page_info *page, struct page_list_head *head)
+{
+    struct page_info *next = mfn_to_page(page->list.next);
+    struct page_info *prev = mfn_to_page(page->list.prev);
+
+    if ( !__page_list_del_head(page, head, next, prev) )
+    {
+        next->list.prev = page->list.prev;
+        prev->list.next = page->list.next;
+    }
+}
+static inline void
+page_list_del2(struct page_info *page, struct page_list_head *head1,
+               struct page_list_head *head2)
+{
+    struct page_info *next = mfn_to_page(page->list.next);
+    struct page_info *prev = mfn_to_page(page->list.prev);
+
+    if ( !__page_list_del_head(page, head1, next, prev) &&
+         !__page_list_del_head(page, head2, next, prev) )
+    {
+        next->list.prev = page->list.prev;
+        prev->list.next = page->list.next;
+    }
+}
+static inline struct page_info *
+page_list_remove_head(struct page_list_head *head)
+{
+    struct page_info *page = head->next;
+
+    if ( page )
+        page_list_del(page, head);
+
+    return page;
+}
+
+#define page_list_for_each(pos, head) \
+    for ( pos = (head)->next; pos; pos = page_list_next(pos, head) )
+#define page_list_for_each_safe(pos, tmp, head) \
+    for ( pos = (head)->next; \
+          pos ? (tmp = page_list_next(pos, head), 1) : 0; \
+          pos = tmp )
+#define page_list_for_each_safe_reverse(pos, tmp, head) \
+    for ( pos = (head)->tail; \
+          pos ? (tmp = page_list_prev(pos, head), 1) : 0; \
+          pos = tmp )
+#else
+# define page_list_head                  list_head
+# define PAGE_LIST_HEAD_INIT             LIST_HEAD_INIT
+# define PAGE_LIST_HEAD                  LIST_HEAD
+# define INIT_PAGE_LIST_HEAD             INIT_LIST_HEAD
+# define INIT_PAGE_LIST_ENTRY            INIT_LIST_HEAD
+# define page_list_empty                 list_empty
+# define page_list_first(hd)             list_entry((hd)->next, \
+                                                    struct page_info, list)
+# define page_list_next(pg, hd)          list_entry((pg)->list.next, \
+                                                    struct page_info, list)
+# define page_list_add(pg, hd)           list_add(&(pg)->list, hd)
+# define page_list_add_tail(pg, hd)      list_add_tail(&(pg)->list, hd)
+# define page_list_del(pg, hd)           list_del(&(pg)->list)
+# define page_list_del2(pg, hd1, hd2)    list_del(&(pg)->list)
+# define page_list_remove_head(hd)       (!page_list_empty(hd) ? \
+    ({ \
+        struct page_info *__pg = page_list_first(hd); \
+        list_del(&__pg->list); \
+        __pg; \
+    }) : NULL)
+# define page_list_for_each(pos, head)   list_for_each_entry(pos, head, list)
+# define page_list_for_each_safe(pos, tmp, head) \
+    list_for_each_entry_safe(pos, tmp, head, list)
+# define page_list_for_each_safe_reverse(pos, tmp, head) \
+    list_for_each_entry_safe_reverse(pos, tmp, head, list)
+#endif
+
 /* Automatic page scrubbing for dead domains. */
-extern struct list_head page_scrub_list;
-#define page_scrub_schedule_work()              \
-    do {                                        \
-        if ( !list_empty(&page_scrub_list) )    \
-            raise_softirq(PAGE_SCRUB_SOFTIRQ);  \
+extern struct page_list_head page_scrub_list;
+#define page_scrub_schedule_work()                 \
+    do {                                           \
+        if ( !page_list_empty(&page_scrub_list) )  \
+            raise_softirq(PAGE_SCRUB_SOFTIRQ);     \
     } while ( 0 )
 #define page_scrub_kick()                                               \
     do {                                                                \
-        if ( !list_empty(&page_scrub_list) )                            \
+        if ( !page_list_empty(&page_scrub_list) )                       \
             cpumask_raise_softirq(cpu_online_map, PAGE_SCRUB_SOFTIRQ);  \
     } while ( 0 )
 unsigned long avail_scrub_pages(void);
 
-#include <asm/mm.h>
-
 int guest_remove_page(struct domain *d, unsigned long gmfn);
 
 /* Returns TRUE if the whole page at @mfn is ordinary RAM. */
diff -r af992824b5cf -r c7cba853583d xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/sched.h   Fri Feb 13 11:22:28 2009 +0900
@@ -19,6 +19,7 @@
 #include <xen/xenoprof.h>
 #include <xen/rcupdate.h>
 #include <xen/irq.h>
+#include <xen/mm.h>
 
 #ifdef CONFIG_COMPAT
 #include <compat/vcpu.h>
@@ -171,8 +172,8 @@ struct domain
     spinlock_t       domain_lock;
 
     spinlock_t       page_alloc_lock; /* protects all the following fields  */
-    struct list_head page_list;       /* linked list, of size tot_pages     */
-    struct list_head xenpage_list;    /* linked list, of size xenheap_pages */
+    struct page_list_head page_list;  /* linked list, of size tot_pages     */
+    struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
     unsigned int     tot_pages;       /* number of pages currently possesed */
     unsigned int     max_pages;       /* maximum value for tot_pages        */
     unsigned int     xenheap_pages;   /* # pages allocated from Xen heap    */
diff -r af992824b5cf -r c7cba853583d xen/xsm/flask/hooks.c
--- a/xen/xsm/flask/hooks.c     Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/xsm/flask/hooks.c     Fri Feb 13 11:22:28 2009 +0900
@@ -820,6 +820,7 @@ static int flask_hvmcontext(struct domai
             perm = HVM__SETHVMC;
         break;
         case XEN_DOMCTL_gethvmcontext:
+        case XEN_DOMCTL_gethvmcontext_partial:
             perm = HVM__GETHVMC;
         break;
         default:

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
WARNING - OLD ARCHIVES

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg