WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Merged.

# HG changeset patch
# User emellor@xxxxxxxxxxxxxxxxxxxxxx
# Node ID 9d86c1a70f347b49393fa26796df4512bb114ebb
# Parent  b09dbe439169a2348c59b30fbdefe3f19e30c766
# Parent  e5c17d2d85a4dc189b98a0ed5a5921d2cda309c3
Merged.
---
 linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile              |   22 
 linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile          |   20 
 linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S                 |    2 
 linux-2.6-xen-sparse/arch/i386/kernel/vm86.c                     |    4 
 linux-2.6-xen-sparse/arch/i386/mm/init-xen.c                     |   14 
 linux-2.6-xen-sparse/arch/ia64/Kconfig                           |   36 +
 linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre               |    6 
 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c              |    7 
 linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c                |    2 
 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c                   |   27 
 linux-2.6-xen-sparse/drivers/xen/Kconfig                         |   26 
 linux-2.6-xen-sparse/drivers/xen/Makefile                        |    9 
 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c             |   21 
 linux-2.6-xen-sparse/drivers/xen/blkfront/block.h                |    1 
 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c                  |    1 
 linux-2.6-xen-sparse/drivers/xen/core/Makefile                   |   16 
 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c                  |    9 
 linux-2.6-xen-sparse/drivers/xen/netback/loopback.c              |    2 
 linux-2.6-xen-sparse/drivers/xen/netback/netback.c               |  288 
++++++++--
 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c                |   26 
 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c             |  173 ++++--
 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c               |   14 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h      |    6 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h |    7 
 linux-2.6-xen-sparse/include/xen/public/privcmd.h                |   16 
 tools/debugger/libxendebug/xendebug.c                            |    7 
 tools/firmware/hvmloader/Makefile                                |    4 
 tools/firmware/rombios/Makefile                                  |    4 
 tools/firmware/vmxassist/vm86.c                                  |   65 ++
 tools/ioemu/hw/cirrus_vga.c                                      |   17 
 tools/ioemu/hw/pc.c                                              |    5 
 tools/ioemu/hw/vga.c                                             |    2 
 tools/ioemu/vl.c                                                 |   36 -
 tools/ioemu/vl.h                                                 |    2 
 tools/libxc/xc_core.c                                            |    8 
 tools/libxc/xc_domain.c                                          |   10 
 tools/libxc/xc_hvm_build.c                                       |    6 
 tools/libxc/xc_ia64_stubs.c                                      |   12 
 tools/libxc/xc_linux.c                                           |    2 
 tools/libxc/xc_linux_build.c                                     |   76 +-
 tools/libxc/xc_linux_restore.c                                   |  214 ++++++-
 tools/libxc/xc_linux_save.c                                      |   57 +
 tools/libxc/xc_load_aout9.c                                      |    4 
 tools/libxc/xc_load_bin.c                                        |    4 
 tools/libxc/xc_load_elf.c                                        |   19 
 tools/libxc/xc_pagetab.c                                         |    2 
 tools/libxc/xc_private.c                                         |   62 +-
 tools/libxc/xc_ptrace.c                                          |   11 
 tools/libxc/xc_ptrace_core.c                                     |   15 
 tools/libxc/xenctrl.h                                            |   19 
 tools/libxc/xg_private.h                                         |    7 
 tools/libxc/xg_save_restore.h                                    |   12 
 tools/python/xen/util/security.py                                |    9 
 tools/tests/test_x86_emulator.c                                  |  131 ++--
 xen/arch/ia64/linux-xen/smpboot.c                                |    3 
 xen/arch/ia64/xen/domain.c                                       |   15 
 xen/arch/ia64/xen/xensetup.c                                     |    2 
 xen/arch/x86/audit.c                                             |    4 
 xen/arch/x86/cpu/mtrr/main.c                                     |    2 
 xen/arch/x86/dom0_ops.c                                          |    2 
 xen/arch/x86/domain.c                                            |   39 -
 xen/arch/x86/domain_build.c                                      |    9 
 xen/arch/x86/hvm/svm/svm.c                                       |   80 +-
 xen/arch/x86/hvm/vmx/vmx.c                                       |   34 -
 xen/arch/x86/hvm/vmx/x86_32/exits.S                              |   35 -
 xen/arch/x86/hvm/vmx/x86_64/exits.S                              |   71 +-
 xen/arch/x86/i8259.c                                             |    2 
 xen/arch/x86/microcode.c                                         |    2 
 xen/arch/x86/mm.c                                                |   34 -
 xen/arch/x86/setup.c                                             |    2 
 xen/arch/x86/shadow.c                                            |    9 
 xen/arch/x86/shadow32.c                                          |   14 
 xen/arch/x86/shadow_public.c                                     |   14 
 xen/arch/x86/smp.c                                               |    2 
 xen/arch/x86/smpboot.c                                           |   17 
 xen/arch/x86/time.c                                              |    6 
 xen/arch/x86/traps.c                                             |   10 
 xen/arch/x86/x86_32/asm-offsets.c                                |    2 
 xen/arch/x86/x86_32/domain_page.c                                |    2 
 xen/arch/x86/x86_32/entry.S                                      |    5 
 xen/arch/x86/x86_32/mm.c                                         |    3 
 xen/arch/x86/x86_32/traps.c                                      |    6 
 xen/arch/x86/x86_64/asm-offsets.c                                |    3 
 xen/arch/x86/x86_64/entry.S                                      |   10 
 xen/arch/x86/x86_64/mm.c                                         |    3 
 xen/arch/x86/x86_64/traps.c                                      |   14 
 xen/arch/x86/x86_emulate.c                                       |   19 
 xen/common/dom0_ops.c                                            |    2 
 xen/common/domain.c                                              |  134 +++-
 xen/common/kernel.c                                              |    5 
 xen/common/keyhandler.c                                          |    5 
 xen/common/memory.c                                              |   20 
 xen/common/page_alloc.c                                          |    4 
 xen/common/perfc.c                                               |    2 
 xen/common/sched_bvt.c                                           |   36 -
 xen/common/sched_credit.c                                        |   30 -
 xen/common/sched_sedf.c                                          |   39 -
 xen/common/schedule.c                                            |  108 ---
 xen/common/trace.c                                               |   12 
 xen/common/xmalloc.c                                             |    2 
 xen/drivers/char/console.c                                       |    6 
 xen/include/asm-x86/page.h                                       |   11 
 xen/include/public/arch-ia64.h                                   |    3 
 xen/include/public/arch-x86_32.h                                 |   27 
 xen/include/public/arch-x86_64.h                                 |   24 
 xen/include/public/callback.h                                    |   15 
 xen/include/public/dom0_ops.h                                    |   56 -
 xen/include/public/grant_table.h                                 |    2 
 xen/include/public/io/netif.h                                    |    4 
 xen/include/public/io/ring.h                                     |   16 
 xen/include/public/memory.h                                      |   10 
 xen/include/public/xen.h                                         |   22 
 xen/include/xen/console.h                                        |    2 
 xen/include/xen/domain.h                                         |   23 
 xen/include/xen/sched-if.h                                       |   11 
 xen/include/xen/sched.h                                          |   12 
 116 files changed, 1737 insertions(+), 958 deletions(-)

diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S  Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S  Wed Jun 07 11:03:51 
2006 +0100
@@ -173,7 +173,7 @@ ENTRY(cpu_gdt_table)
        .ascii           "|pae_pgdir_above_4gb"
        .ascii           "|supervisor_mode_kernel"
 #ifdef CONFIG_X86_PAE
-       .ascii  ",PAE=yes"
+       .ascii  ",PAE=yes[extended-cr3]"
 #else
        .ascii  ",PAE=no"
 #endif
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c      Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c      Wed Jun 07 11:03:51 
2006 +0100
@@ -132,7 +132,9 @@ struct pt_regs * fastcall save_v86_state
        current->thread.sysenter_cs = __KERNEL_CS;
        load_esp0(tss, &current->thread);
        current->thread.saved_esp0 = 0;
+#ifndef CONFIG_X86_NO_TSS
        put_cpu();
+#endif
 
        loadsegment(fs, current->thread.saved_fs);
        loadsegment(gs, current->thread.saved_gs);
@@ -310,7 +312,9 @@ static void do_sys_vm86(struct kernel_vm
        if (cpu_has_sep)
                tsk->thread.sysenter_cs = 0;
        load_esp0(tss, &tsk->thread);
+#ifndef CONFIG_X86_NO_TSS
        put_cpu();
+#endif
 
        tsk->thread.screen_bitmap = info->screen_bitmap;
        if (info->flags & VM86_SCREEN_BITMAP)
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Wed Jun 07 11:03:51 
2006 +0100
@@ -558,15 +558,11 @@ void __init paging_init(void)
 
        kmap_init();
 
-       if (!xen_feature(XENFEAT_auto_translated_physmap) ||
-           xen_start_info->shared_info >= xen_start_info->nr_pages) {
-               /* Switch to the real shared_info page, and clear the
-                * dummy page. */
-               set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
-               HYPERVISOR_shared_info =
-                       (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
-               memset(empty_zero_page, 0, sizeof(empty_zero_page));
-       }
+       /* Switch to the real shared_info page, and clear the
+        * dummy page. */
+       set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+       HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+       memset(empty_zero_page, 0, sizeof(empty_zero_page));
 
        /* Setup mapping of lower 1st MB */
        for (i = 0; i < NR_FIX_ISAMAPS; i++)
diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/ia64/Kconfig
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig    Wed Jun 07 11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig    Wed Jun 07 11:03:51 2006 +0100
@@ -73,7 +73,7 @@ config XEN_IA64_DOM0_VP
 
 config XEN_IA64_DOM0_NON_VP
        bool
-       depends on !(XEN && XEN_IA64_DOM0_VP)
+       depends on XEN && !XEN_IA64_DOM0_VP
        default y
        help
          dom0 P=M model
@@ -496,15 +496,39 @@ source "security/Kconfig"
 
 source "crypto/Kconfig"
 
+#
 # override default values of drivers/xen/Kconfig
-if !XEN_IA64_DOM0_VP
+#
+if XEN
+config XEN_UTIL
+       default n if XEN_IA64_DOM0_VP
+
 config HAVE_ARCH_ALLOC_SKB
-        bool
-        default n
+       default n if !XEN_IA64_DOM0_VP
 
 config HAVE_ARCH_DEV_ALLOC_SKB
-        bool
-        default n
+       default n if !XEN_IA64_DOM0_VP
+
+config XEN_BALLOON
+       default n if !XEN_IA64_DOM0_VP
+
+config XEN_SKBUFF
+       default n if !XEN_IA64_DOM0_VP
+
+config XEN_NETDEV_BACKEND
+       default n if !XEN_IA64_DOM0_VP
+
+config XEN_NETDEV_FRONTEND
+       default n if !XEN_IA64_DOM0_VP
+
+config XEN_DEVMEM
+       default n
+
+config XEN_REBOOT
+       default n
+
+config XEN_SMPBOOT
+       default n
 endif
 
 source "drivers/xen/Kconfig"
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre
--- a/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre        Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre        Wed Jun 07 
11:03:51 2006 +0100
@@ -10,12 +10,6 @@
 #eventually asm-xsi-offsets needs to be part of hypervisor.h/hypercall.h
 ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/asm-ia64/xen/
 
-#ia64 drivers/xen isn't fully functional yet, workaround...
-#also ignore core/evtchn.c which uses a different irq mechanism than ia64
-#(warning: there be dragons here if these files diverge)
-ln -sf ../../arch/ia64/xen/drivers/Makefile drivers/xen/Makefile
-ln -sf ../../../arch/ia64/xen/drivers/coreMakefile drivers/xen/core/Makefile
-
 #not sure where these ia64-specific files will end up in the future
 ln -sf ../../../arch/ia64/xen/drivers/xenia64_init.c drivers/xen/core
 
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Wed Jun 07 
11:03:51 2006 +0100
@@ -665,13 +665,6 @@ void __init setup_arch(char **cmdline_p)
 
        setup_xen_features();
 
-       if (xen_feature(XENFEAT_auto_translated_physmap) &&
-           xen_start_info->shared_info < xen_start_info->nr_pages) {
-               HYPERVISOR_shared_info =
-                       (shared_info_t *)__va(xen_start_info->shared_info);
-               memset(empty_zero_page, 0, sizeof(empty_zero_page));
-       }
-
        HYPERVISOR_vm_assist(VMASST_CMD_enable,
                             VMASST_TYPE_writable_pagetables);
 
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c Wed Jun 07 11:03:51 
2006 +0100
@@ -488,7 +488,7 @@ static void smp_really_stop_cpu(void *du
 {
        smp_stop_cpu(); 
        for (;;) 
-               asm("hlt"); 
+               halt();
 } 
 
 void smp_send_stop(void)
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Wed Jun 07 11:03:51 
2006 +0100
@@ -666,7 +666,18 @@ void __meminit init_memory_mapping(unsig
                        set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
        }
 
-       BUG_ON(!after_bootmem && start_pfn != table_end);
+       if (!after_bootmem) {
+               BUG_ON(start_pfn != table_end);
+               /*
+                * Destroy the temporary mappings created above. Prevents
+                * overlap with modules area (if init mapping is very big).
+                */
+               start = __START_KERNEL_map + (table_start << PAGE_SHIFT);
+               end   = __START_KERNEL_map + (table_end   << PAGE_SHIFT);
+               for (; start < end; start += PAGE_SIZE)
+                       WARN_ON(HYPERVISOR_update_va_mapping(
+                               start, __pte_ma(0), 0));
+       }
 
        __flush_tlb_all();
 }
@@ -752,15 +763,11 @@ void __init paging_init(void)
        free_area_init_node(0, NODE_DATA(0), zones,
                            __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
 
-       if (!xen_feature(XENFEAT_auto_translated_physmap) ||
-           xen_start_info->shared_info >= xen_start_info->nr_pages) {
-               /* Switch to the real shared_info page, and clear the
-                * dummy page. */
-               set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
-               HYPERVISOR_shared_info =
-                       (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
-               memset(empty_zero_page, 0, sizeof(empty_zero_page));
-       }
+       /* Switch to the real shared_info page, and clear the
+        * dummy page. */
+       set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+       HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+       memset(empty_zero_page, 0, sizeof(empty_zero_page));
 
        init_mm.context.pinned = 1;
 
diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/Kconfig
--- a/linux-2.6-xen-sparse/drivers/xen/Kconfig  Wed Jun 07 11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig  Wed Jun 07 11:03:51 2006 +0100
@@ -228,4 +228,30 @@ config NO_IDLE_HZ
        bool
        default y
 
+config XEN_UTIL
+       bool
+       default y
+
+config XEN_BALLOON
+       bool
+       default y
+
+config XEN_DEVMEM
+       bool
+       default y
+
+config XEN_SKBUFF
+       bool
+       default y
+       depends on NET
+
+config XEN_REBOOT
+       bool
+       default y
+
+config XEN_SMPBOOT
+       bool
+       default y
+       depends on SMP
+
 endif
diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Wed Jun 07 11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Wed Jun 07 11:03:51 2006 +0100
@@ -1,14 +1,12 @@
-
-obj-y  += util.o
-
 obj-y  += core/
-obj-y  += char/
 obj-y  += console/
 obj-y  += evtchn/
-obj-y  += balloon/
 obj-y  += privcmd/
 obj-y  += xenbus/
 
+obj-$(CONFIG_XEN_UTIL)                 += util.o
+obj-$(CONFIG_XEN_BALLOON)              += balloon/
+obj-$(CONFIG_XEN_DEVMEM)               += char/
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
 obj-$(CONFIG_XEN_TPMDEV_BACKEND)       += tpmback/
@@ -17,4 +15,3 @@ obj-$(CONFIG_XEN_BLKDEV_TAP)          += blkt
 obj-$(CONFIG_XEN_BLKDEV_TAP)           += blktap/
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)       += pciback/
 obj-$(CONFIG_XEN_PCIDEV_FRONTEND)      += pcifront/
-
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Wed Jun 07 
11:03:51 2006 +0100
@@ -452,10 +452,6 @@ int blkif_ioctl(struct inode *inode, str
                      command, (long)argument, inode->i_rdev);
 
        switch (command) {
-       case HDIO_GETGEO:
-               /* return ENOSYS to use defaults */
-               return -ENOSYS;
-
        case CDROMMULTISESSION:
                DPRINTK("FIXME: support multisession CDs later\n");
                for (i = 0; i < sizeof(struct cdrom_multisession); i++)
@@ -469,6 +465,23 @@ int blkif_ioctl(struct inode *inode, str
                return -EINVAL; /* same return as native Linux */
        }
 
+       return 0;
+}
+
+
+int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
+{
+       /* We don't have real geometry info, but let's at least return
+          values consistent with the size of the device */
+       sector_t nsect = get_capacity(bd->bd_disk);
+       sector_t cylinders = nsect;
+
+       hg->heads = 0xff;
+       hg->sectors = 0x3f;
+       sector_div(cylinders, hg->heads * hg->sectors);
+       hg->cylinders = cylinders;
+       if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
+               hg->cylinders = 0xffff;
        return 0;
 }
 
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Jun 07 11:03:51 
2006 +0100
@@ -140,6 +140,7 @@ extern int blkif_release(struct inode *i
 extern int blkif_release(struct inode *inode, struct file *filep);
 extern int blkif_ioctl(struct inode *inode, struct file *filep,
                        unsigned command, unsigned long argument);
+extern int blkif_getgeo(struct block_device *, struct hd_geometry *);
 extern int blkif_check(dev_t dev);
 extern int blkif_revalidate(dev_t dev);
 extern void do_blkif_request (request_queue_t *rq);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Wed Jun 07 11:03:51 
2006 +0100
@@ -91,6 +91,7 @@ static struct block_device_operations xl
        .open = blkif_open,
        .release = blkif_release,
        .ioctl  = blkif_ioctl,
+       .getgeo = blkif_getgeo
 };
 
 DEFINE_SPINLOCK(blkif_io_lock);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Wed Jun 07 11:03:51 
2006 +0100
@@ -2,11 +2,13 @@
 # Makefile for the linux kernel.
 #
 
-obj-y   := evtchn.o reboot.o gnttab.o features.o
+obj-y := evtchn.o gnttab.o features.o
 
-obj-$(CONFIG_PROC_FS)     += xen_proc.o
-obj-$(CONFIG_NET)         += skbuff.o
-obj-$(CONFIG_SMP)         += smpboot.o
-obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
-obj-$(CONFIG_SYSFS)       += hypervisor_sysfs.o
-obj-$(CONFIG_XEN_SYSFS)   += xen_sysfs.o
+obj-$(CONFIG_PROC_FS)          += xen_proc.o
+obj-$(CONFIG_SYSFS)            += hypervisor_sysfs.o
+obj-$(CONFIG_HOTPLUG_CPU)      += cpu_hotplug.o
+obj-$(CONFIG_XEN_SYSFS)                += xen_sysfs.o
+obj-$(CONFIG_IA64)             += xenia64_init.o
+obj-$(CONFIG_XEN_SKBUFF)       += skbuff.o
+obj-$(CONFIG_XEN_REBOOT)       += reboot.o
+obj-$(CONFIG_XEN_SMPBOOT)      += smpboot.o
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Wed Jun 07 11:03:51 
2006 +0100
@@ -89,9 +89,8 @@ void __init prefill_possible_map(void)
 
        for (i = 0; i < NR_CPUS; i++) {
                rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
-               if (rc == -ENOENT)
-                       break;
-               cpu_set(i, cpu_possible_map);
+               if (rc >= 0)
+                       cpu_set(i, cpu_possible_map);
        }
 }
 
@@ -209,7 +208,7 @@ void cpu_initialize_context(unsigned int
        ctxt.failsafe_callback_cs  = __KERNEL_CS;
        ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
 
-       ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
+       ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
 #else /* __x86_64__ */
        ctxt.user_regs.cs = __KERNEL_CS;
        ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
@@ -221,7 +220,7 @@ void cpu_initialize_context(unsigned int
        ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
        ctxt.syscall_callback_eip  = (unsigned long)system_call;
 
-       ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
+       ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
 
        ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
 #endif
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/netback/loopback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c       Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c       Wed Jun 07 
11:03:51 2006 +0100
@@ -146,11 +146,13 @@ static void loopback_construct(struct ne
        dev->hard_start_xmit = loopback_start_xmit;
        dev->get_stats       = loopback_get_stats;
        dev->set_multicast_list = loopback_set_multicast_list;
+       dev->change_mtu      = NULL; /* allow arbitrary mtu */
 
        dev->tx_queue_len    = 0;
 
        dev->features        = (NETIF_F_HIGHDMA |
                                NETIF_F_LLTX |
+                               NETIF_F_SG |
                                NETIF_F_IP_CSUM);
 
        SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Wed Jun 07 
11:03:51 2006 +0100
@@ -458,6 +458,9 @@ inline static void net_tx_action_dealloc
        dc = dealloc_cons;
        dp = dealloc_prod;
 
+       /* Ensure we see all indexes enqueued by netif_idx_release(). */
+       smp_rmb();
+
        /*
         * Free up any grants we have finished using
         */
@@ -487,6 +490,177 @@ inline static void net_tx_action_dealloc
        }
 }
 
+static void netbk_tx_err(netif_t *netif, RING_IDX end)
+{
+       RING_IDX cons = netif->tx.req_cons;
+
+       do {
+               netif_tx_request_t *txp = RING_GET_REQUEST(&netif->tx, cons);
+               make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+       } while (++cons < end);
+       netif->tx.req_cons = cons;
+       netif_schedule_work(netif);
+       netif_put(netif);
+}
+
+static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
+                               int work_to_do)
+{
+       netif_tx_request_t *first = txp;
+       RING_IDX cons = netif->tx.req_cons;
+       int frags = 1;
+
+       while (txp->flags & NETTXF_more_data) {
+               if (frags >= work_to_do) {
+                       DPRINTK("Need more frags\n");
+                       return -frags;
+               }
+
+               txp = RING_GET_REQUEST(&netif->tx, cons + frags);
+               if (txp->size > first->size) {
+                       DPRINTK("Frags galore\n");
+                       return -frags;
+               }
+
+               first->size -= txp->size;
+               frags++;
+
+               if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
+                       DPRINTK("txp->offset: %x, size: %u\n",
+                               txp->offset, txp->size);
+                       return -frags;
+               }
+       }
+
+       return frags;
+}
+
+static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
+                                                 struct sk_buff *skb,
+                                                 gnttab_map_grant_ref_t *mop)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       skb_frag_t *frags = shinfo->frags;
+       netif_tx_request_t *txp;
+       unsigned long pending_idx = *((u16 *)skb->data);
+       RING_IDX cons = netif->tx.req_cons + 1;
+       int i, start;
+
+       /* Skip first skb fragment if it is on same page as header fragment. */
+       start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+       for (i = start; i < shinfo->nr_frags; i++) {
+               txp = RING_GET_REQUEST(&netif->tx, cons++);
+               pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
+
+               gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
+                                 GNTMAP_host_map | GNTMAP_readonly,
+                                 txp->gref, netif->domid);
+
+               memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+               netif_get(netif);
+               pending_tx_info[pending_idx].netif = netif;
+               frags[i].page = (void *)pending_idx;
+       }
+
+       return mop;
+}
+
+static int netbk_tx_check_mop(struct sk_buff *skb,
+                              gnttab_map_grant_ref_t **mopp)
+{
+       gnttab_map_grant_ref_t *mop = *mopp;
+       int pending_idx = *((u16 *)skb->data);
+       netif_t *netif = pending_tx_info[pending_idx].netif;
+       netif_tx_request_t *txp;
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int nr_frags = shinfo->nr_frags;
+       int i, err, start;
+
+       /* Check status of header. */
+       err = mop->status;
+       if (unlikely(err)) {
+               txp = &pending_tx_info[pending_idx].req;
+               make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+               netif_put(netif);
+       } else {
+               set_phys_to_machine(
+                       __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
+                       FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+               grant_tx_handle[pending_idx] = mop->handle;
+       }
+
+       /* Skip first skb fragment if it is on same page as header fragment. */
+       start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+       for (i = start; i < nr_frags; i++) {
+               int j, newerr;
+
+               pending_idx = (unsigned long)shinfo->frags[i].page;
+
+               /* Check error status: if okay then remember grant handle. */
+               newerr = (++mop)->status;
+               if (likely(!newerr)) {
+                       set_phys_to_machine(
+                               __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
+                               FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+                       grant_tx_handle[pending_idx] = mop->handle;
+                       /* Had a previous error? Invalidate this fragment. */
+                       if (unlikely(err))
+                               netif_idx_release(pending_idx);
+                       continue;
+               }
+
+               /* Error on this fragment: respond to client with an error. */
+               txp = &pending_tx_info[pending_idx].req;
+               make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+               netif_put(netif);
+
+               /* Not the first error? Preceding frags already invalidated. */
+               if (err)
+                       continue;
+
+               /* First error: invalidate header and preceding fragments. */
+               pending_idx = *((u16 *)skb->data);
+               netif_idx_release(pending_idx);
+               for (j = start; j < i; j++) {
+                       pending_idx = (unsigned long)shinfo->frags[i].page;
+                       netif_idx_release(pending_idx);
+               }
+
+               /* Remember the error: invalidate all subsequent fragments. */
+               err = newerr;
+       }
+
+       *mopp = mop + 1;
+       return err;
+}
+
+static void netbk_fill_frags(struct sk_buff *skb)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int nr_frags = shinfo->nr_frags;
+       int i;
+
+       for (i = 0; i < nr_frags; i++) {
+               skb_frag_t *frag = shinfo->frags + i;
+               netif_tx_request_t *txp;
+               unsigned long pending_idx;
+
+               pending_idx = (unsigned long)frag->page;
+               txp = &pending_tx_info[pending_idx].req;
+               frag->page = virt_to_page(MMAP_VADDR(pending_idx));
+               frag->size = txp->size;
+               frag->page_offset = txp->offset;
+
+               skb->len += txp->size;
+               skb->data_len += txp->size;
+               skb->truesize += txp->size;
+       }
+}
+
 /* Called after netfront has transmitted */
 static void net_tx_action(unsigned long unused)
 {
@@ -504,7 +678,7 @@ static void net_tx_action(unsigned long 
                net_tx_action_dealloc();
 
        mop = tx_map_ops;
-       while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
+       while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
                !list_empty(&net_schedule_list)) {
                /* Get a netif from the list with work to do. */
                ent = net_schedule_list.next;
@@ -552,38 +726,44 @@ static void net_tx_action(unsigned long 
                }
                netif->remaining_credit -= txreq.size;
 
-               netif->tx.req_cons++;
-
-               netif_schedule_work(netif);
-
-               if (unlikely(txreq.size < ETH_HLEN) || 
-                   unlikely(txreq.size > ETH_FRAME_LEN)) {
+               ret = netbk_count_requests(netif, &txreq, work_to_do);
+               if (unlikely(ret < 0)) {
+                       netbk_tx_err(netif, i - ret);
+                       continue;
+               }
+               i += ret;
+
+               if (unlikely(ret > MAX_SKB_FRAGS + 1)) {
+                       DPRINTK("Too many frags\n");
+                       netbk_tx_err(netif, i);
+                       continue;
+               }
+
+               if (unlikely(txreq.size < ETH_HLEN)) {
                        DPRINTK("Bad packet size: %d\n", txreq.size);
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       netbk_tx_err(netif, i);
                        continue; 
                }
 
                /* No crossing a page as the payload mustn't fragment. */
-               if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
+               if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
                        DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", 
                                txreq.offset, txreq.size, 
                                (txreq.offset &~PAGE_MASK) + txreq.size);
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       netbk_tx_err(netif, i);
                        continue;
                }
 
                pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
 
-               data_len = (txreq.size > PKT_PROT_LEN) ?
+               data_len = (txreq.size > PKT_PROT_LEN &&
+                           ret < MAX_SKB_FRAGS + 1) ?
                        PKT_PROT_LEN : txreq.size;
 
                skb = alloc_skb(data_len+16, GFP_ATOMIC);
                if (unlikely(skb == NULL)) {
                        DPRINTK("Can't allocate a skb in start_xmit.\n");
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       netbk_tx_err(netif, i);
                        break;
                }
 
@@ -600,9 +780,23 @@ static void net_tx_action(unsigned long 
                pending_tx_info[pending_idx].netif = netif;
                *((u16 *)skb->data) = pending_idx;
 
+               __skb_put(skb, data_len);
+
+               skb_shinfo(skb)->nr_frags = ret - 1;
+               if (data_len < txreq.size) {
+                       skb_shinfo(skb)->nr_frags++;
+                       skb_shinfo(skb)->frags[0].page =
+                               (void *)(unsigned long)pending_idx;
+               }
+
                __skb_queue_tail(&tx_queue, skb);
 
                pending_cons++;
+
+               mop = netbk_get_requests(netif, skb, mop);
+
+               netif->tx.req_cons = i;
+               netif_schedule_work(netif);
 
                if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
                        break;
@@ -617,75 +811,56 @@ static void net_tx_action(unsigned long 
 
        mop = tx_map_ops;
        while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+               netif_tx_request_t *txp;
+
                pending_idx = *((u16 *)skb->data);
                netif       = pending_tx_info[pending_idx].netif;
-               memcpy(&txreq, &pending_tx_info[pending_idx].req,
-                      sizeof(txreq));
+               txp         = &pending_tx_info[pending_idx].req;
 
                /* Check the remap error code. */
-               if (unlikely(mop->status)) {
+               if (unlikely(netbk_tx_check_mop(skb, &mop))) {
                        printk(KERN_ALERT "#### netback grant fails\n");
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       skb_shinfo(skb)->nr_frags = 0;
                        kfree_skb(skb);
-                       mop++;
-                       pending_ring[MASK_PEND_IDX(pending_prod++)] =
-                               pending_idx;
                        continue;
                }
-               set_phys_to_machine(
-                       __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
-                       FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-               grant_tx_handle[pending_idx] = mop->handle;
-
-               data_len = (txreq.size > PKT_PROT_LEN) ?
-                       PKT_PROT_LEN : txreq.size;
-
-               __skb_put(skb, data_len);
+
+               data_len = skb->len;
                memcpy(skb->data, 
-                      (void *)(MMAP_VADDR(pending_idx)|txreq.offset),
+                      (void *)(MMAP_VADDR(pending_idx)|txp->offset),
                       data_len);
-               if (data_len < txreq.size) {
+               if (data_len < txp->size) {
                        /* Append the packet payload as a fragment. */
-                       skb_shinfo(skb)->frags[0].page        = 
-                               virt_to_page(MMAP_VADDR(pending_idx));
-                       skb_shinfo(skb)->frags[0].size        =
-                               txreq.size - data_len;
-                       skb_shinfo(skb)->frags[0].page_offset = 
-                               txreq.offset + data_len;
-                       skb_shinfo(skb)->nr_frags = 1;
+                       txp->offset += data_len;
+                       txp->size -= data_len;
                } else {
                        /* Schedule a response immediately. */
                        netif_idx_release(pending_idx);
                }
-
-               skb->data_len  = txreq.size - data_len;
-               skb->len      += skb->data_len;
-               skb->truesize += skb->data_len;
-
-               skb->dev      = netif->dev;
-               skb->protocol = eth_type_trans(skb, skb->dev);
 
                /*
                 * Old frontends do not assert data_validated but we
                 * can infer it from csum_blank so test both flags.
                 */
-               if (txreq.flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
+               if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
                        skb->proto_data_valid = 1;
                } else {
                        skb->ip_summed = CHECKSUM_NONE;
                        skb->proto_data_valid = 0;
                }
-               skb->proto_csum_blank = !!(txreq.flags & NETTXF_csum_blank);
-
-               netif->stats.rx_bytes += txreq.size;
+               skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
+
+               netbk_fill_frags(skb);
+
+               skb->dev      = netif->dev;
+               skb->protocol = eth_type_trans(skb, skb->dev);
+
+               netif->stats.rx_bytes += skb->len;
                netif->stats.rx_packets++;
 
                netif_rx(skb);
                netif->dev->last_rx = jiffies;
-
-               mop++;
        }
 }
 
@@ -695,7 +870,10 @@ static void netif_idx_release(u16 pendin
        unsigned long flags;
 
        spin_lock_irqsave(&_lock, flags);
-       dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
+       dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
+       /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+       smp_wmb();
+       dealloc_prod++;
        spin_unlock_irqrestore(&_lock, flags);
 
        tasklet_schedule(&net_tx_tasklet);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Wed Jun 07 11:03:51 
2006 +0100
@@ -69,6 +69,8 @@ static int netback_probe(struct xenbus_d
 static int netback_probe(struct xenbus_device *dev,
                         const struct xenbus_device_id *id)
 {
+       const char *message;
+       xenbus_transaction_t xbt;
        int err;
        struct backend_info *be = kzalloc(sizeof(struct backend_info),
                                          GFP_KERNEL);
@@ -86,6 +88,27 @@ static int netback_probe(struct xenbus_d
        if (err)
                goto fail;
 
+       do {
+               err = xenbus_transaction_start(&xbt);
+               if (err) {
+                       xenbus_dev_fatal(dev, err, "starting transaction");
+                       goto fail;
+               }
+
+               err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+               if (err) {
+                       message = "writing feature-sg";
+                       goto abort_transaction;
+               }
+
+               err = xenbus_transaction_end(xbt, 0);
+       } while (err == -EAGAIN);
+
+       if (err) {
+               xenbus_dev_fatal(dev, err, "completing transaction");
+               goto fail;
+       }
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err) {
                goto fail;
@@ -93,6 +116,9 @@ static int netback_probe(struct xenbus_d
 
        return 0;
 
+abort_transaction:
+       xenbus_transaction_end(xbt, 1);
+       xenbus_dev_fatal(dev, err, "%s", message);
 fail:
        DPRINTK("failed");
        netback_remove(dev);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Wed Jun 07 
11:03:51 2006 +0100
@@ -45,6 +45,7 @@
 #include <linux/bitops.h>
 #include <linux/ethtool.h>
 #include <linux/in.h>
+#include <linux/if_ether.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/arp.h>
@@ -173,6 +174,11 @@ static void xennet_sysfs_delif(struct ne
 #define xennet_sysfs_delif(dev) do { } while(0)
 #endif
 
+static inline int xennet_can_sg(struct net_device *dev)
+{
+       return dev->features & NETIF_F_SG;
+}
+
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffers for communication with the backend, and
@@ -307,8 +313,6 @@ again:
                goto destroy_ring;
        }
 
-       xenbus_switch_state(dev, XenbusStateConnected);
-
        return 0;
 
  abort_transaction:
@@ -370,12 +374,9 @@ static int setup_device(struct xenbus_de
                goto fail;
 
        memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
-       network_connect(netdev);
        info->irq = bind_evtchn_to_irqhandler(
                info->evtchn, netif_int, SA_SAMPLE_RANDOM, netdev->name,
                netdev);
-       (void)send_fake_arp(netdev);
-       show_device(info);
 
        return 0;
 
@@ -391,15 +392,24 @@ static void backend_changed(struct xenbu
 static void backend_changed(struct xenbus_device *dev,
                            enum xenbus_state backend_state)
 {
+       struct netfront_info *np = dev->data;
+       struct net_device *netdev = np->netdev;
+
        DPRINTK("\n");
 
        switch (backend_state) {
        case XenbusStateInitialising:
-       case XenbusStateInitWait:
        case XenbusStateInitialised:
        case XenbusStateConnected:
        case XenbusStateUnknown:
        case XenbusStateClosed:
+               break;
+
+       case XenbusStateInitWait:
+               network_connect(netdev);
+               xenbus_switch_state(dev, XenbusStateConnected);
+               (void)send_fake_arp(netdev);
+               show_device(np);
                break;
 
        case XenbusStateClosing:
@@ -452,13 +462,17 @@ static int network_open(struct net_devic
        return 0;
 }
 
+static inline int netfront_tx_slot_available(struct netfront_info *np)
+{
+       return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 1;
+}
+
 static inline void network_maybe_wake_tx(struct net_device *dev)
 {
        struct netfront_info *np = netdev_priv(dev);
 
        if (unlikely(netif_queue_stopped(dev)) &&
-           !RING_FULL(&np->tx) &&
-           !gnttab_empty_grant_references(&np->gref_tx_head) &&
+           netfront_tx_slot_available(np) &&
            likely(netif_running(dev)))
                netif_wake_queue(dev);
 }
@@ -485,7 +499,7 @@ static void network_tx_buf_gc(struct net
                                printk(KERN_ALERT "network_tx_buf_gc: warning "
                                       "-- grant still in use by backend "
                                       "domain.\n");
-                               break; /* bail immediately */
+                               BUG();
                        }
                        gnttab_end_foreign_access_ref(
                                np->grant_tx_ref[id], GNTMAP_readonly);
@@ -638,36 +652,95 @@ static void network_alloc_rx_buffers(str
        RING_PUSH_REQUESTS(&np->rx);
 }
 
+static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
+                             struct netif_tx_request *tx)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       char *data = skb->data;
+       unsigned long mfn;
+       RING_IDX prod = np->tx.req_prod_pvt;
+       int frags = skb_shinfo(skb)->nr_frags;
+       unsigned int offset = offset_in_page(data);
+       unsigned int len = skb_headlen(skb);
+       unsigned int id;
+       grant_ref_t ref;
+       int i;
+
+       while (len > PAGE_SIZE - offset) {
+               tx->size = PAGE_SIZE - offset;
+               tx->flags |= NETTXF_more_data;
+               len -= tx->size;
+               data += tx->size;
+               offset = 0;
+
+               id = get_id_from_freelist(np->tx_skbs);
+               np->tx_skbs[id] = skb_get(skb);
+               tx = RING_GET_REQUEST(&np->tx, prod++);
+               tx->id = id;
+               ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+               BUG_ON((signed short)ref < 0);
+
+               mfn = virt_to_mfn(data);
+               gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+                                               mfn, GNTMAP_readonly);
+
+               tx->gref = np->grant_tx_ref[id] = ref;
+               tx->offset = offset;
+               tx->size = len;
+               tx->flags = 0;
+       }
+
+       for (i = 0; i < frags; i++) {
+               skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+
+               tx->flags |= NETTXF_more_data;
+
+               id = get_id_from_freelist(np->tx_skbs);
+               np->tx_skbs[id] = skb_get(skb);
+               tx = RING_GET_REQUEST(&np->tx, prod++);
+               tx->id = id;
+               ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+               BUG_ON((signed short)ref < 0);
+
+               mfn = pfn_to_mfn(page_to_pfn(frag->page));
+               gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+                                               mfn, GNTMAP_readonly);
+
+               tx->gref = np->grant_tx_ref[id] = ref;
+               tx->offset = frag->page_offset;
+               tx->size = frag->size;
+               tx->flags = 0;
+       }
+
+       np->tx.req_prod_pvt = prod;
+}
 
 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        unsigned short id;
        struct netfront_info *np = netdev_priv(dev);
        struct netif_tx_request *tx;
+       char *data = skb->data;
        RING_IDX i;
        grant_ref_t ref;
        unsigned long mfn;
        int notify;
-
-       if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
-                    PAGE_SIZE)) {
-               struct sk_buff *nskb;
-               nskb = __dev_alloc_skb(skb->len, GFP_ATOMIC|__GFP_NOWARN);
-               if (unlikely(nskb == NULL))
-                       goto drop;
-               skb_put(nskb, skb->len);
-               memcpy(nskb->data, skb->data, skb->len);
-               /* Copy only the header fields we use in this driver. */
-               nskb->dev = skb->dev;
-               nskb->ip_summed = skb->ip_summed;
-               nskb->proto_data_valid = skb->proto_data_valid;
-               dev_kfree_skb(skb);
-               skb = nskb;
+       int frags = skb_shinfo(skb)->nr_frags;
+       unsigned int offset = offset_in_page(data);
+       unsigned int len = skb_headlen(skb);
+
+       frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
+       if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
+               printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
+                      frags);
+               dump_stack();
+               goto drop;
        }
 
        spin_lock_irq(&np->tx_lock);
 
-       if (unlikely(!netif_carrier_ok(dev))) {
+       if (unlikely(!netif_carrier_ok(dev) ||
+                    (frags > 1 && !xennet_can_sg(dev)))) {
                spin_unlock_irq(&np->tx_lock);
                goto drop;
        }
@@ -682,12 +755,12 @@ static int network_start_xmit(struct sk_
        tx->id   = id;
        ref = gnttab_claim_grant_reference(&np->gref_tx_head);
        BUG_ON((signed short)ref < 0);
-       mfn = virt_to_mfn(skb->data);
+       mfn = virt_to_mfn(data);
        gnttab_grant_foreign_access_ref(
                ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
        tx->gref = np->grant_tx_ref[id] = ref;
-       tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
-       tx->size = skb->len;
+       tx->offset = offset;
+       tx->size = len;
 
        tx->flags = 0;
        if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
@@ -696,14 +769,17 @@ static int network_start_xmit(struct sk_
                tx->flags |= NETTXF_data_validated;
 
        np->tx.req_prod_pvt = i + 1;
+
+       xennet_make_frags(skb, dev, tx);
+       tx->size = skb->len;
+
        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
        if (notify)
                notify_remote_via_irq(np->irq);
 
        network_tx_buf_gc(dev);
 
-       if (RING_FULL(&np->tx) ||
-           gnttab_empty_grant_references(&np->gref_tx_head))
+       if (!netfront_tx_slot_available(np))
                netif_stop_queue(dev);
 
        spin_unlock_irq(&np->tx_lock);
@@ -963,12 +1039,46 @@ static struct net_device_stats *network_
        return &np->stats;
 }
 
+static int xennet_change_mtu(struct net_device *dev, int mtu)
+{
+       int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+       if (mtu > max)
+               return -EINVAL;
+       dev->mtu = mtu;
+       return 0;
+}
+
+static int xennet_set_sg(struct net_device *dev, u32 data)
+{
+       if (data) {
+               struct netfront_info *np = netdev_priv(dev);
+               int val;
+
+               if (xenbus_scanf(XBT_NULL, np->xbdev->otherend, "feature-sg",
+                                "%d", &val) < 0)
+                       val = 0;
+               if (!val)
+                       return -ENOSYS;
+       } else if (dev->mtu > ETH_DATA_LEN)
+               dev->mtu = ETH_DATA_LEN;
+
+       return ethtool_op_set_sg(dev, data);
+}
+
+static void xennet_set_features(struct net_device *dev)
+{
+       xennet_set_sg(dev, 1);
+}
+
 static void network_connect(struct net_device *dev)
 {
        struct netfront_info *np;
        int i, requeue_idx;
        struct netif_tx_request *tx;
        struct sk_buff *skb;
+
+       xennet_set_features(dev);
 
        np = netdev_priv(dev);
        spin_lock_irq(&np->tx_lock);
@@ -1081,6 +1191,8 @@ static struct ethtool_ops network_ethtoo
 {
        .get_tx_csum = ethtool_op_get_tx_csum,
        .set_tx_csum = ethtool_op_set_tx_csum,
+       .get_sg = ethtool_op_get_sg,
+       .set_sg = xennet_set_sg,
 };
 
 #ifdef CONFIG_SYSFS
@@ -1297,6 +1409,7 @@ static struct net_device * __devinit cre
        netdev->poll            = netif_poll;
        netdev->set_multicast_list = network_set_multicast_list;
        netdev->uninit          = netif_uninit;
+       netdev->change_mtu      = xennet_change_mtu;
        netdev->weight          = 64;
        netdev->features        = NETIF_F_IP_CSUM;
 
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Wed Jun 07 
11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Wed Jun 07 
11:03:51 2006 +0100
@@ -61,11 +61,11 @@ static int privcmd_ioctl(struct inode *i
                __asm__ __volatile__ (
                        "pushl %%ebx; pushl %%ecx; pushl %%edx; "
                        "pushl %%esi; pushl %%edi; "
-                       "movl  4(%%eax),%%ebx ;"
-                       "movl  8(%%eax),%%ecx ;"
-                       "movl 12(%%eax),%%edx ;"
-                       "movl 16(%%eax),%%esi ;"
-                       "movl 20(%%eax),%%edi ;"
+                       "movl  8(%%eax),%%ebx ;"
+                       "movl 16(%%eax),%%ecx ;"
+                       "movl 24(%%eax),%%edx ;"
+                       "movl 32(%%eax),%%esi ;"
+                       "movl 40(%%eax),%%edi ;"
                        "movl   (%%eax),%%eax ;"
                        "shll $5,%%eax ;"
                        "addl $hypercall_page,%%eax ;"
@@ -161,7 +161,7 @@ static int privcmd_ioctl(struct inode *i
        case IOCTL_PRIVCMD_MMAPBATCH: {
                privcmd_mmapbatch_t m;
                struct vm_area_struct *vma = NULL;
-               unsigned long __user *p;
+               xen_pfn_t __user *p;
                unsigned long addr, mfn; 
                int i;
 
@@ -210,7 +210,7 @@ static int privcmd_ioctl(struct inode *i
        batch_err:
                printk("batch_err ret=%d vma=%p addr=%lx "
                       "num=%d arr=%p %lx-%lx\n", 
-                      ret, vma, m.addr, m.num, m.arr,
+                      ret, vma, (unsigned long)m.addr, m.num, m.arr,
                       vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
                break;
        }
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h       Wed Jun 
07 11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h       Wed Jun 
07 11:03:51 2006 +0100
@@ -116,10 +116,12 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
        __asm__ ( \
                "movl %%cr3,%0\n\t" \
                :"=r" (__dummy)); \
-       machine_to_phys(__dummy); \
+       __dummy = xen_cr3_to_pfn(__dummy); \
+       mfn_to_pfn(__dummy) << PAGE_SHIFT; \
 })
 #define write_cr3(x) ({                                                \
-       maddr_t __dummy = phys_to_machine(x);                   \
+       unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT);   \
+       __dummy = xen_pfn_to_cr3(__dummy);                      \
        __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy));  \
 })
 
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Wed Jun 
07 11:03:15 2006 +0100
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Wed Jun 
07 11:03:51 2006 +0100
@@ -61,13 +61,6 @@ static void __init machine_specific_arch
                .address = { __KERNEL_CS, (unsigned long)nmi },
        };
 
-       if (xen_feature(XENFEAT_auto_translated_physmap) &&
-           xen_start_info->shared_info < xen_start_info->nr_pages) {
-               HYPERVISOR_shared_info =
-                       (shared_info_t *)__va(xen_start_info->shared_info);
-               memset(empty_zero_page, 0, sizeof(empty_zero_page));
-       }
-
        ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
        if (ret == 0)
                ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/include/xen/public/privcmd.h
--- a/linux-2.6-xen-sparse/include/xen/public/privcmd.h Wed Jun 07 11:03:15 
2006 +0100
+++ b/linux-2.6-xen-sparse/include/xen/public/privcmd.h Wed Jun 07 11:03:51 
2006 +0100
@@ -33,20 +33,22 @@
 #ifndef __LINUX_PUBLIC_PRIVCMD_H__
 #define __LINUX_PUBLIC_PRIVCMD_H__
 
+#include <linux/types.h>
+
 #ifndef __user
 #define __user
 #endif
 
 typedef struct privcmd_hypercall
 {
-       unsigned long op;
-       unsigned long arg[5];
+       __u64 op;
+       __u64 arg[5];
 } privcmd_hypercall_t;
 
 typedef struct privcmd_mmap_entry {
-       unsigned long va;
-       unsigned long mfn;
-       unsigned long npages;
+       __u64 va;
+       __u64 mfn;
+       __u64 npages;
 } privcmd_mmap_entry_t; 
 
 typedef struct privcmd_mmap {
@@ -58,8 +60,8 @@ typedef struct privcmd_mmapbatch {
 typedef struct privcmd_mmapbatch {
        int num;     /* number of pages to populate */
        domid_t dom; /* target domain */
-       unsigned long addr;  /* virtual address */
-       unsigned long __user *arr; /* array of mfns - top nibble set on err */
+       __u64 addr;  /* virtual address */
+       xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
 } privcmd_mmapbatch_t; 
 
 /*
diff -r b09dbe439169 -r 9d86c1a70f34 tools/debugger/libxendebug/xendebug.c
--- a/tools/debugger/libxendebug/xendebug.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/debugger/libxendebug/xendebug.c     Wed Jun 07 11:03:51 2006 +0100
@@ -57,7 +57,7 @@ typedef struct domain_context           
     vcpu_guest_context_t context[MAX_VIRT_CPUS];
 
     long            total_pages;
-    unsigned long  *page_array;
+    xen_pfn_t      *page_array;
 
     unsigned long   cr3_phys[MAX_VIRT_CPUS];
     unsigned long  *cr3_virt[MAX_VIRT_CPUS];
@@ -346,8 +346,9 @@ xendebug_memory_page (domain_context_p c
         ctxt->cr3_phys[vcpu] = vcpu_ctxt->ctrlreg[3];
         if ( ctxt->cr3_virt[vcpu] )
             munmap(ctxt->cr3_virt[vcpu], PAGE_SIZE);
-        ctxt->cr3_virt[vcpu] = xc_map_foreign_range(xc_handle, ctxt->domid,
-                    PAGE_SIZE, PROT_READ, ctxt->cr3_phys[vcpu] >> PAGE_SHIFT);
+        ctxt->cr3_virt[vcpu] = xc_map_foreign_range(
+            xc_handle, ctxt->domid, PAGE_SIZE, PROT_READ,
+            xen_cr3_to_pfn(ctxt->cr3_phys[vcpu]));
         if ( ctxt->cr3_virt[vcpu] == NULL )
             return 0;
     } 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/firmware/hvmloader/Makefile
--- a/tools/firmware/hvmloader/Makefile Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/firmware/hvmloader/Makefile Wed Jun 07 11:03:51 2006 +0100
@@ -51,8 +51,8 @@ hvmloader: roms.h hvmloader.c acpi_madt.
        $(OBJCOPY) hvmloader.tmp hvmloader
        rm -f hvmloader.tmp
 
-roms.h:        ../rombios/BIOS-bochs-8-processors 
../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin 
../vmxassist/vmxassist.bin
-       sh ./mkhex rombios ../rombios/BIOS-bochs-8-processors > roms.h
+roms.h:        ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin 
../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../vmxassist/vmxassist.bin
+       sh ./mkhex rombios ../rombios/BIOS-bochs-latest > roms.h
        sh ./mkhex vgabios_stdvga ../vgabios/VGABIOS-lgpl-latest.bin >> roms.h
        sh ./mkhex vgabios_cirrusvga ../vgabios/VGABIOS-lgpl-latest.cirrus.bin 
>> roms.h
        sh ./mkhex vmxassist ../vmxassist/vmxassist.bin >> roms.h
diff -r b09dbe439169 -r 9d86c1a70f34 tools/firmware/rombios/Makefile
--- a/tools/firmware/rombios/Makefile   Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/firmware/rombios/Makefile   Wed Jun 07 11:03:51 2006 +0100
@@ -1,7 +1,7 @@
-#BIOS_BUILDS = BIOS-bochs-latest
+BIOS_BUILDS = BIOS-bochs-latest
 #BIOS_BUILDS += BIOS-bochs-2-processors
 #BIOS_BUILDS += BIOS-bochs-4-processors
-BIOS_BUILDS += BIOS-bochs-8-processors
+#BIOS_BUILDS += BIOS-bochs-8-processors
 
 .PHONY: all
 all: bios
diff -r b09dbe439169 -r 9d86c1a70f34 tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/firmware/vmxassist/vm86.c   Wed Jun 07 11:03:51 2006 +0100
@@ -36,6 +36,8 @@
 
 static unsigned prev_eip = 0;
 enum vm86_mode mode = 0;
+
+static struct regs saved_rm_regs;
 
 #ifdef DEBUG
 int traceset = 0;
@@ -795,6 +797,8 @@ protected_mode(struct regs *regs)
        oldctx.esp = regs->uesp;
        oldctx.eflags = regs->eflags;
 
+       memset(&saved_rm_regs, 0, sizeof(struct regs));
+
        /* reload all segment registers */
        if (!load_seg(regs->cs, &oldctx.cs_base,
                                &oldctx.cs_limit, &oldctx.cs_arbytes))
@@ -808,6 +812,7 @@ protected_mode(struct regs *regs)
                load_seg(0, &oldctx.es_base,
                            &oldctx.es_limit, &oldctx.es_arbytes);
                oldctx.es_sel = 0;
+               saved_rm_regs.ves = regs->ves;
        }
 
        if (load_seg(regs->uss, &oldctx.ss_base,
@@ -817,6 +822,7 @@ protected_mode(struct regs *regs)
                load_seg(0, &oldctx.ss_base,
                            &oldctx.ss_limit, &oldctx.ss_arbytes);
                oldctx.ss_sel = 0;
+               saved_rm_regs.uss = regs->uss;
        }
 
        if (load_seg(regs->vds, &oldctx.ds_base,
@@ -826,6 +832,7 @@ protected_mode(struct regs *regs)
                load_seg(0, &oldctx.ds_base,
                            &oldctx.ds_limit, &oldctx.ds_arbytes);
                oldctx.ds_sel = 0;
+               saved_rm_regs.vds = regs->vds;
        }
 
        if (load_seg(regs->vfs, &oldctx.fs_base,
@@ -835,6 +842,7 @@ protected_mode(struct regs *regs)
                load_seg(0, &oldctx.fs_base,
                            &oldctx.fs_limit, &oldctx.fs_arbytes);
                oldctx.fs_sel = 0;
+               saved_rm_regs.vfs = regs->vfs;
        }
 
        if (load_seg(regs->vgs, &oldctx.gs_base,
@@ -844,6 +852,7 @@ protected_mode(struct regs *regs)
                load_seg(0, &oldctx.gs_base,
                            &oldctx.gs_limit, &oldctx.gs_arbytes);
                oldctx.gs_sel = 0;
+               saved_rm_regs.vgs = regs->vgs;
        }
 
        /* initialize jump environment to warp back to protected mode */
@@ -880,16 +889,22 @@ real_mode(struct regs *regs)
                if (regs->uss >= HIGHMEM)
                        panic("%%ss 0x%lx higher than 1MB", regs->uss);
                regs->uss = address(regs, regs->uss, 0) >> 4;
+       } else {
+         regs->uss = saved_rm_regs.uss;
        }
        if (regs->vds != 0) {
                if (regs->vds >= HIGHMEM)
                        panic("%%ds 0x%lx higher than 1MB", regs->vds);
                regs->vds = address(regs, regs->vds, 0) >> 4;
+       } else {
+         regs->vds = saved_rm_regs.vds;
        }
        if (regs->ves != 0) {
                if (regs->ves >= HIGHMEM)
                        panic("%%es 0x%lx higher than 1MB", regs->ves);
                regs->ves = address(regs, regs->ves, 0) >> 4;
+       } else {
+         regs->ves = saved_rm_regs.ves;
        }
 
        /* this should get us into 16-bit mode */
@@ -971,6 +986,39 @@ jmpl(struct regs *regs, int prefix)
        } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
                eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs);
                cs = fetch16(regs);
+
+               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+
+                regs->cs = cs;
+                regs->eip = eip;
+               set_mode(regs, VM86_REAL);
+       } else
+               panic("jmpl");
+}
+
+static void
+jmpl_indirect(struct regs *regs, int prefix, unsigned modrm)
+{
+       unsigned n = regs->eip;
+       unsigned cs, eip;
+       unsigned addr;
+
+       addr  = operand(prefix, regs, modrm);
+
+       if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
+               eip = (prefix & DATA32) ? read32(addr) : read16(addr);
+               addr += (prefix & DATA32) ? 4 : 2;
+               cs = read16(addr);
+
+               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+
+                regs->cs = cs;
+                regs->eip = eip;
+               set_mode(regs, VM86_PROTECTED);
+       } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
+               eip = (prefix & DATA32) ? read32(addr) : read16(addr);
+               addr += (prefix & DATA32) ? 4 : 2;
+               cs = read16(addr);
 
                TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
 
@@ -1306,6 +1354,23 @@ opcode(struct regs *regs)
                        }
                        goto invalid;
 
+               case 0xFF: /* jmpl (indirect) */
+                       if ((mode == VM86_REAL_TO_PROTECTED) ||
+                           (mode == VM86_PROTECTED_TO_REAL)) {
+                               unsigned modrm = fetch8(regs);
+                               
+                               switch((modrm >> 3) & 7) {
+                               case 5:
+                                 jmpl_indirect(regs, prefix, modrm);
+                                 return OPC_INVALID;
+
+                               default:
+                                 break;
+                               }
+
+                       }
+                       goto invalid;
+
                case 0xEB: /* short jump */
                        if ((mode == VM86_REAL_TO_PROTECTED) ||
                            (mode == VM86_PROTECTED_TO_REAL)) {
diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/ioemu/hw/cirrus_vga.c       Wed Jun 07 11:03:51 2006 +0100
@@ -2460,10 +2460,9 @@ static CPUWriteMemoryFunc *cirrus_linear
 };
 
 extern FILE *logfile;
-#if defined(__i386__) || defined (__x86_64__)
 static void * set_vram_mapping(unsigned long begin, unsigned long end)
 {
-    unsigned long * extent_start = NULL;
+    xen_pfn_t *extent_start = NULL;
     unsigned long nr_extents;
     void *vram_pointer = NULL;
     int i;
@@ -2474,14 +2473,14 @@ static void * set_vram_mapping(unsigned 
     end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
     nr_extents = (end - begin) >> TARGET_PAGE_BITS;
 
-    extent_start = malloc(sizeof(unsigned long) * nr_extents );
+    extent_start = malloc(sizeof(xen_pfn_t) * nr_extents );
     if (extent_start == NULL)
     {
         fprintf(stderr, "Failed malloc on set_vram_mapping\n");
         return NULL;
     }
 
-    memset(extent_start, 0, sizeof(unsigned long) * nr_extents);
+    memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents);
 
     for (i = 0; i < nr_extents; i++)
     {
@@ -2509,7 +2508,7 @@ static void * set_vram_mapping(unsigned 
 
 static int unset_vram_mapping(unsigned long begin, unsigned long end)
 {
-    unsigned long * extent_start = NULL;
+    xen_pfn_t *extent_start = NULL;
     unsigned long nr_extents;
     int i;
 
@@ -2520,7 +2519,7 @@ static int unset_vram_mapping(unsigned l
     end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
     nr_extents = (end - begin) >> TARGET_PAGE_BITS;
 
-    extent_start = malloc(sizeof(unsigned long) * nr_extents );
+    extent_start = malloc(sizeof(xen_pfn_t) * nr_extents );
 
     if (extent_start == NULL)
     {
@@ -2528,7 +2527,7 @@ static int unset_vram_mapping(unsigned l
         return -1;
     }
 
-    memset(extent_start, 0, sizeof(unsigned long) * nr_extents);
+    memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents);
 
     for (i = 0; i < nr_extents; i++)
         extent_start[i] = (begin + (i * TARGET_PAGE_SIZE)) >> TARGET_PAGE_BITS;
@@ -2540,10 +2539,6 @@ static int unset_vram_mapping(unsigned l
     return 0;
 }
 
-#elif defined(__ia64__)
-static void * set_vram_mapping(unsigned long addr, unsigned long end) {}
-static int unset_vram_mapping(unsigned long addr, unsigned long end) {}
-#endif
 extern int vga_accelerate;
 
 /* Compute the memory access functions */
diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/hw/pc.c
--- a/tools/ioemu/hw/pc.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/ioemu/hw/pc.c       Wed Jun 07 11:03:51 2006 +0100
@@ -537,8 +537,11 @@ void pc_init(uint64_t ram_size, int vga_
     for(i = 0; i < MAX_SERIAL_PORTS; i++) {
         if (serial_hds[i]) {
             sp = serial_init(serial_io[i], serial_irq[i], serial_hds[i]);
-            if (i == SUMMA_PORT)
+            if (i == serial_summa_port) {
                summa_init(sp, serial_hds[i]);
+               fprintf(stderr, "Serial port %d (COM%d) initialized for 
Summagraphics\n",
+                       i, i+1);
+           }
         }
     }
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/hw/vga.c
--- a/tools/ioemu/hw/vga.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/ioemu/hw/vga.c      Wed Jun 07 11:03:51 2006 +0100
@@ -1995,6 +1995,7 @@ void vga_common_init(VGAState *s, Displa
     s->get_resolution = vga_get_resolution;
     /* XXX: currently needed for display */
     vga_state = s;
+    vga_bios_init(s);
 }
 
 
@@ -2082,7 +2083,6 @@ int vga_initialize(PCIBus *bus, DisplayS
 #endif
     }
 
-    vga_bios_init(s);
     return 0;
 }
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/ioemu/vl.c  Wed Jun 07 11:03:51 2006 +0100
@@ -146,6 +146,7 @@ int repeat_key = 1;
 int repeat_key = 1;
 TextConsole *vga_console;
 CharDriverState *serial_hds[MAX_SERIAL_PORTS];
+int serial_summa_port = -1;
 int xc_handle;
 time_t timeoffset = 0;
 
@@ -2457,7 +2458,7 @@ int unset_mm_mapping(int xc_handle,
                      uint32_t domid,
                      unsigned long nr_pages,
                      unsigned int address_bits,
-                     unsigned long *extent_start)
+                     xen_pfn_t *extent_start)
 {
     int err = 0;
     xc_dominfo_t info;
@@ -2490,7 +2491,7 @@ int set_mm_mapping(int xc_handle,
                     uint32_t domid,
                     unsigned long nr_pages,
                     unsigned int address_bits,
-                    unsigned long *extent_start)
+                    xen_pfn_t *extent_start)
 {
     xc_dominfo_t info;
     int err = 0;
@@ -2498,7 +2499,7 @@ int set_mm_mapping(int xc_handle,
     xc_domain_getinfo(xc_handle, domid, 1, &info);
 
     if ( xc_domain_setmaxmem(xc_handle, domid,
-                             (info.nr_pages + nr_pages) * PAGE_SIZE/1024) != 0)
+                             info.max_memkb + nr_pages * PAGE_SIZE/1024) !=0)
     {
         fprintf(logfile, "set maxmem returned error %d\n", errno);
         return -1;
@@ -2556,7 +2557,8 @@ int main(int argc, char **argv)
     int serial_device_index;
     char qemu_dm_logfilename[64];
     const char *loadvm = NULL;
-    unsigned long nr_pages, *page_array;
+    unsigned long nr_pages;
+    xen_pfn_t *page_array;
     extern void *shared_page;
 
 #if !defined(CONFIG_SOFTMMU)
@@ -2588,8 +2590,8 @@ int main(int argc, char **argv)
     pstrcpy(monitor_device, sizeof(monitor_device), "vc");
 
     pstrcpy(serial_devices[0], sizeof(serial_devices[0]), "vc");
-    pstrcpy(serial_devices[1], sizeof(serial_devices[1]), "null");
-    for(i = 2; i < MAX_SERIAL_PORTS; i++)
+    serial_summa_port = -1;
+    for(i = 1; i < MAX_SERIAL_PORTS; i++)
         serial_devices[i][0] = '\0';
     serial_device_index = 0;
 
@@ -3022,8 +3024,8 @@ int main(int argc, char **argv)
 
     xc_handle = xc_interface_open();
 
-    if ( (page_array = (unsigned long *)
-                        malloc(nr_pages * sizeof(unsigned long))) == NULL)
+    if ( (page_array = (xen_pfn_t *)
+                        malloc(nr_pages * sizeof(xen_pfn_t))) == NULL)
     {
         fprintf(logfile, "malloc returned error %d\n", errno);
         exit(-1);
@@ -3078,8 +3080,8 @@ int main(int argc, char **argv)
                                        page_array[0]);
 #endif
 
-    fprintf(logfile, "shared page at pfn:%lx, mfn: %lx\n", (nr_pages-1),
-           (page_array[nr_pages - 1]));
+    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", (nr_pages-1),
+           (uint64_t)(page_array[nr_pages - 1]));
 
     /* we always create the cdrom drive, even if no disk is there */
     bdrv_init();
@@ -3173,6 +3175,20 @@ int main(int argc, char **argv)
     }
     monitor_init(monitor_hd, !nographic);
 
+    /* Find which port should be the Summagraphics port */
+    /* It's the first unspecified serial line. Note that COM1 is set */
+    /* by default, so the Summagraphics port would be COM2 or higher */
+
+    for(i = 0; i < MAX_SERIAL_PORTS; i++) {
+      if (serial_devices[i][0] != '\0')
+       continue;
+      serial_summa_port = i;
+      pstrcpy(serial_devices[serial_summa_port], sizeof(serial_devices[0]), 
"null");
+      break;
+    }
+
+    /* Now, open the ports */
+
     for(i = 0; i < MAX_SERIAL_PORTS; i++) {
         if (serial_devices[i][0] != '\0') {
             serial_hds[i] = qemu_chr_open(serial_devices[i]);
diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/ioemu/vl.h  Wed Jun 07 11:03:51 2006 +0100
@@ -238,9 +238,9 @@ void console_select(unsigned int index);
 /* serial ports */
 
 #define MAX_SERIAL_PORTS 4
-#define SUMMA_PORT     1
 
 extern CharDriverState *serial_hds[MAX_SERIAL_PORTS];
+extern int serial_summa_port;
 
 /* network redirectors support */
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_core.c     Wed Jun 07 11:03:51 2006 +0100
@@ -28,7 +28,7 @@ xc_domain_dumpcore_via_callback(int xc_h
                                 dumpcore_rtn_t dump_rtn)
 {
     unsigned long nr_pages;
-    unsigned long *page_array = NULL;
+    xen_pfn_t *page_array = NULL;
     xc_dominfo_t info;
     int i, nr_vcpus = 0;
     char *dump_mem, *dump_mem_start = NULL;
@@ -70,7 +70,7 @@ xc_domain_dumpcore_via_callback(int xc_h
         sizeof(vcpu_guest_context_t)*nr_vcpus;
     dummy_len = (sizeof(struct xc_core_header) +
                  (sizeof(vcpu_guest_context_t) * nr_vcpus) +
-                 (nr_pages * sizeof(unsigned long)));
+                 (nr_pages * sizeof(xen_pfn_t)));
     header.xch_pages_offset = round_pgup(dummy_len);
 
     sts = dump_rtn(args, (char *)&header, sizeof(struct xc_core_header));
@@ -81,7 +81,7 @@ xc_domain_dumpcore_via_callback(int xc_h
     if ( sts != 0 )
         goto error_out;
 
-    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
     {
         printf("Could not allocate memory\n");
         goto error_out;
@@ -91,7 +91,7 @@ xc_domain_dumpcore_via_callback(int xc_h
         printf("Could not get the page frame list\n");
         goto error_out;
     }
-    sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(unsigned long));
+    sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(xen_pfn_t));
     if ( sts != 0 )
         goto error_out;
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_domain.c   Wed Jun 07 11:03:51 2006 +0100
@@ -291,7 +291,7 @@ int xc_domain_memory_increase_reservatio
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
                                           unsigned int address_bits,
-                                          unsigned long *extent_start)
+                                          xen_pfn_t *extent_start)
 {
     int err;
     struct xen_memory_reservation reservation = {
@@ -324,7 +324,7 @@ int xc_domain_memory_decrease_reservatio
                                           uint32_t domid,
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
-                                          unsigned long *extent_start)
+                                          xen_pfn_t *extent_start)
 {
     int err;
     struct xen_memory_reservation reservation = {
@@ -363,7 +363,7 @@ int xc_domain_memory_populate_physmap(in
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
                                           unsigned int address_bits,
-                                          unsigned long *extent_start)
+                                          xen_pfn_t *extent_start)
 {
     int err;
     struct xen_memory_reservation reservation = {
@@ -392,8 +392,8 @@ int xc_domain_translate_gpfn_list(int xc
 int xc_domain_translate_gpfn_list(int xc_handle,
                                   uint32_t domid,
                                   unsigned long nr_gpfns,
-                                  unsigned long *gpfn_list,
-                                  unsigned long *mfn_list)
+                                  xen_pfn_t *gpfn_list,
+                                  xen_pfn_t *mfn_list)
 {
     struct xen_translate_gpfn_list op = {
         .domid        = domid,
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_hvm_build.c        Wed Jun 07 11:03:51 2006 +0100
@@ -135,7 +135,7 @@ static void set_hvm_info_checksum(struct
  * hvmloader will use this info to set BIOS accordingly
  */
 static int set_hvm_info(int xc_handle, uint32_t dom,
-                        unsigned long *pfn_list, unsigned int vcpus,
+                        xen_pfn_t *pfn_list, unsigned int vcpus,
                         unsigned int pae, unsigned int acpi, unsigned int apic)
 {
     char *va_map;
@@ -178,7 +178,7 @@ static int setup_guest(int xc_handle,
                        unsigned int store_evtchn,
                        unsigned long *store_mfn)
 {
-    unsigned long *page_array = NULL;
+    xen_pfn_t *page_array = NULL;
     unsigned long count, i;
     unsigned long long ptr;
     xc_mmu_t *mmu = NULL;
@@ -223,7 +223,7 @@ static int setup_guest(int xc_handle,
         goto error_out;
     }
 
-    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
     {
         PERROR("Could not allocate memory.\n");
         goto error_out;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_ia64_stubs.c       Wed Jun 07 11:03:51 2006 +0100
@@ -57,7 +57,7 @@ xc_plan9_build(int xc_handle,
 
 int xc_ia64_get_pfn_list(int xc_handle,
                          uint32_t domid,
-                         unsigned long *pfn_buf,
+                         xen_pfn_t *pfn_buf,
                          unsigned int start_page,
                          unsigned int nr_pages)
 {
@@ -65,7 +65,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
     int num_pfns,ret;
     unsigned int __start_page, __nr_pages;
     unsigned long max_pfns;
-    unsigned long *__pfn_buf;
+    xen_pfn_t *__pfn_buf;
 
     __start_page = start_page;
     __nr_pages = nr_pages;
@@ -80,7 +80,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
         set_xen_guest_handle(op.u.getmemlist.buffer, __pfn_buf);
 
         if ( (max_pfns != -1UL)
-            && mlock(__pfn_buf, __nr_pages * sizeof(unsigned long)) != 0 )
+            && mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0 )
         {
             PERROR("Could not lock pfn list buffer");
             return -1;
@@ -89,7 +89,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
         ret = do_dom0_op(xc_handle, &op);
 
         if (max_pfns != -1UL)
-            (void)munlock(__pfn_buf, __nr_pages * sizeof(unsigned long));
+            (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
 
         if (max_pfns == -1UL)
             return 0;
@@ -122,10 +122,10 @@ int xc_ia64_copy_to_domain_pages(int xc_
 {
     // N.B. gva should be page aligned
 
-    unsigned long *page_array = NULL;
+    xen_pfn_t *page_array = NULL;
     int i;
 
-    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ){
+    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ){
         PERROR("Could not allocate memory");
         goto error_out;
     }
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c    Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_linux.c    Wed Jun 07 11:03:51 2006 +0100
@@ -28,7 +28,7 @@ int xc_interface_close(int xc_handle)
 }
 
 void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot,
-                           unsigned long *arr, int num)
+                           xen_pfn_t *arr, int num)
 {
     privcmd_mmapbatch_t ioctlx;
     void *addr;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_linux_build.c      Wed Jun 07 11:03:51 2006 +0100
@@ -10,6 +10,7 @@
 #include "xc_aout9.h"
 #include <stdlib.h>
 #include <unistd.h>
+#include <inttypes.h>
 #include <zlib.h>
 
 #if defined(__i386__)
@@ -136,7 +137,7 @@ int load_initrd(int xc_handle, domid_t d
 int load_initrd(int xc_handle, domid_t dom,
                 struct initrd_info *initrd,
                 unsigned long physbase,
-                unsigned long *phys_to_mach)
+                xen_pfn_t *phys_to_mach)
 {
     char page[PAGE_SIZE];
     unsigned long pfn_start, pfn, nr_pages;
@@ -189,7 +190,7 @@ static int setup_pg_tables(int xc_handle
                            vcpu_guest_context_t *ctxt,
                            unsigned long dsi_v_start,
                            unsigned long v_end,
-                           unsigned long *page_array,
+                           xen_pfn_t *page_array,
                            unsigned long vpt_start,
                            unsigned long vpt_end,
                            unsigned shadow_mode_enabled)
@@ -205,9 +206,9 @@ static int setup_pg_tables(int xc_handle
     alloc_pt(l2tab, vl2tab, pl2tab);
     vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
     if (shadow_mode_enabled)
-        ctxt->ctrlreg[3] = pl2tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl2tab >> PAGE_SHIFT);
     else
-        ctxt->ctrlreg[3] = l2tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(l2tab >> PAGE_SHIFT);
 
     for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++ )
     {
@@ -251,26 +252,42 @@ static int setup_pg_tables_pae(int xc_ha
                                vcpu_guest_context_t *ctxt,
                                unsigned long dsi_v_start,
                                unsigned long v_end,
-                               unsigned long *page_array,
+                               xen_pfn_t *page_array,
                                unsigned long vpt_start,
                                unsigned long vpt_end,
-                               unsigned shadow_mode_enabled)
+                               unsigned shadow_mode_enabled,
+                               unsigned pae_mode)
 {
     l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL;
     l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
     l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
     uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
-    unsigned long ppt_alloc, count;
+    unsigned long ppt_alloc, count, nmfn;
 
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
+
+    if ( pae_mode == PAEKERN_extended_cr3 )
+    {
+        ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3);
+    }
+    else if ( page_array[ppt_alloc] > 0xfffff )
+    {
+        nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
+        if ( nmfn == 0 )
+        {
+            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
+            goto error_out;
+        }
+        page_array[ppt_alloc] = nmfn;
+    }
 
     alloc_pt(l3tab, vl3tab, pl3tab);
     vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
     if (shadow_mode_enabled)
-        ctxt->ctrlreg[3] = pl3tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl3tab >> PAGE_SHIFT);
     else
-        ctxt->ctrlreg[3] = l3tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(l3tab >> PAGE_SHIFT);
 
     for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++)
     {
@@ -340,7 +357,7 @@ static int setup_pg_tables_64(int xc_han
                               vcpu_guest_context_t *ctxt,
                               unsigned long dsi_v_start,
                               unsigned long v_end,
-                              unsigned long *page_array,
+                              xen_pfn_t *page_array,
                               unsigned long vpt_start,
                               unsigned long vpt_end,
                               int shadow_mode_enabled)
@@ -361,9 +378,9 @@ static int setup_pg_tables_64(int xc_han
     alloc_pt(l4tab, vl4tab, pl4tab);
     vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
     if (shadow_mode_enabled)
-        ctxt->ctrlreg[3] = pl4tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl4tab >> PAGE_SHIFT);
     else
-        ctxt->ctrlreg[3] = l4tab;
+        ctxt->ctrlreg[3] = xen_pfn_to_cr3(l4tab >> PAGE_SHIFT);
 
     for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
     {
@@ -451,7 +468,7 @@ static int setup_guest(int xc_handle,
                        unsigned int console_evtchn, unsigned long *console_mfn,
                        uint32_t required_features[XENFEAT_NR_SUBMAPS])
 {
-    unsigned long *page_array = NULL;
+    xen_pfn_t *page_array = NULL;
     struct load_funcs load_funcs;
     struct domain_setup_info dsi;
     unsigned long vinitrd_start;
@@ -478,7 +495,7 @@ static int setup_guest(int xc_handle,
 
     start_page = dsi.v_start >> PAGE_SHIFT;
     pgnr = (v_end - dsi.v_start) >> PAGE_SHIFT;
-    if ( (page_array = malloc(pgnr * sizeof(unsigned long))) == NULL )
+    if ( (page_array = malloc(pgnr * sizeof(xen_pfn_t))) == NULL )
     {
         PERROR("Could not allocate memory");
         goto error_out;
@@ -579,11 +596,11 @@ static int compat_check(int xc_handle, s
     }
 
     if (strstr(xen_caps, "xen-3.0-x86_32p")) {
-        if (!dsi->pae_kernel) {
+        if (dsi->pae_kernel == PAEKERN_no) {
             ERROR("Non PAE-kernel on PAE host.");
             return 0;
         }
-    } else if (dsi->pae_kernel) {
+    } else if (dsi->pae_kernel != PAEKERN_no) {
         ERROR("PAE-kernel on non-PAE host.");
         return 0;
     }
@@ -606,7 +623,7 @@ static int setup_guest(int xc_handle,
                        unsigned int console_evtchn, unsigned long *console_mfn,
                        uint32_t required_features[XENFEAT_NR_SUBMAPS])
 {
-    unsigned long *page_array = NULL;
+    xen_pfn_t *page_array = NULL;
     unsigned long count, i, hypercall_pfn;
     start_info_t *start_info;
     shared_info_t *shared_info;
@@ -617,7 +634,7 @@ static int setup_guest(int xc_handle,
 
     unsigned long nr_pt_pages;
     unsigned long physmap_pfn;
-    unsigned long *physmap, *physmap_e;
+    xen_pfn_t *physmap, *physmap_e;
 
     struct load_funcs load_funcs;
     struct domain_setup_info dsi;
@@ -673,7 +690,8 @@ static int setup_guest(int xc_handle,
 
     for ( i = 0; i < XENFEAT_NR_SUBMAPS; i++ )
     {
-        if ( (supported_features[i]&required_features[i]) != 
required_features[i] )
+        if ( (supported_features[i] & required_features[i]) !=
+             required_features[i] )
         {
             ERROR("Guest kernel does not support a required feature.");
             goto error_out;
@@ -719,7 +737,7 @@ static int setup_guest(int xc_handle,
     (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
     ((_l) & ~((1UL<<(_s))-1))) >> (_s))
 #if defined(__i386__)
-        if ( dsi.pae_kernel )
+        if ( dsi.pae_kernel != PAEKERN_no )
         {
             if ( (1 + /* # L3 */
                   NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT_PAE) + /* # L2 */
@@ -797,11 +815,11 @@ static int setup_guest(int xc_handle,
 
     /* setup page tables */
 #if defined(__i386__)
-    if (dsi.pae_kernel)
+    if (dsi.pae_kernel != PAEKERN_no)
         rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
                                  dsi.v_start, v_end,
                                  page_array, vpt_start, vpt_end,
-                                 shadow_mode_enabled);
+                                 shadow_mode_enabled, dsi.pae_kernel);
     else
         rc = setup_pg_tables(xc_handle, dom, ctxt,
                              dsi.v_start, v_end,
@@ -824,16 +842,16 @@ static int setup_guest(int xc_handle,
      */
     if ( !shadow_mode_enabled )
     {
-        if ( dsi.pae_kernel )
+        if ( dsi.pae_kernel != PAEKERN_no )
         {
             if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
-                           ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+                           xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) )
                 goto error_out;
         }
         else
         {
             if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
-                           ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+                           xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) )
                 goto error_out;
         }
     }
@@ -845,7 +863,7 @@ static int setup_guest(int xc_handle,
      * correct protection for the page
      */
     if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
-                   ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+                   xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) )
         goto error_out;
 #endif
 
@@ -865,8 +883,8 @@ static int setup_guest(int xc_handle,
             ((uint64_t)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
             count) )
         {
-            fprintf(stderr,"m2p update failure p=%lx m=%lx\n",
-                    count, page_array[count]);
+            fprintf(stderr,"m2p update failure p=%lx m=%"PRIx64"\n",
+                    count, (uint64_t)page_array[count]);
             munmap(physmap, PAGE_SIZE);
             goto error_out;
         }
@@ -958,7 +976,7 @@ static int setup_guest(int xc_handle,
     rc = xc_version(xc_handle, XENVER_version, NULL);
     sprintf(start_info->magic, "xen-%i.%i-x86_%d%s",
             rc >> 16, rc & (0xFFFF), (unsigned int)sizeof(long)*8,
-            dsi.pae_kernel ? "p" : "");
+            (dsi.pae_kernel != PAEKERN_no) ? "p" : "");
     start_info->nr_pages     = nr_pages;
     start_info->shared_info  = guest_shared_info_mfn << PAGE_SHIFT;
     start_info->flags        = flags;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_linux_restore.c    Wed Jun 07 11:03:51 2006 +0100
@@ -25,10 +25,10 @@ static unsigned long max_pfn;
 static unsigned long max_pfn;
 
 /* Live mapping of the table mapping each PFN to its current MFN. */
-static unsigned long *live_p2m = NULL;
+static xen_pfn_t *live_p2m = NULL;
 
 /* A table mapping each PFN to its new MFN. */
-static unsigned long *p2m = NULL;
+static xen_pfn_t *p2m = NULL;
 
 
 static ssize_t
@@ -108,7 +108,7 @@ int xc_linux_restore(int xc_handle, int 
                      unsigned int console_evtchn, unsigned long *console_mfn)
 {
     DECLARE_DOM0_OP;
-    int rc = 1, i, n;
+    int rc = 1, i, n, pae_extended_cr3 = 0;
     unsigned long mfn, pfn;
     unsigned int prev_pc, this_pc;
     int verify = 0;
@@ -126,7 +126,7 @@ int xc_linux_restore(int xc_handle, int 
     unsigned long *pfn_type = NULL;
 
     /* A table of MFNs to map in the current region */
-    unsigned long *region_mfn = NULL;
+    xen_pfn_t *region_mfn = NULL;
 
     /* Types of the pfns in the current region */
     unsigned long region_pfn_type[MAX_BATCH_SIZE];
@@ -135,7 +135,7 @@ int xc_linux_restore(int xc_handle, int 
     unsigned long *page = NULL;
 
     /* A copy of the pfn-to-mfn table frame list. */
-    unsigned long *p2m_frame_list = NULL;
+    xen_pfn_t *p2m_frame_list = NULL;
 
     /* A temporary mapping of the guest's start_info page. */
     start_info_t *start_info;
@@ -162,30 +162,88 @@ int xc_linux_restore(int xc_handle, int 
         return 1;
     }
 
-
     if (mlock(&ctxt, sizeof(ctxt))) {
         /* needed for build dom0 op, but might as well do early */
         ERR("Unable to mlock ctxt");
         return 1;
     }
 
-
-    /* Read the saved P2M frame list */
-    if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
+    if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
         ERR("Couldn't allocate p2m_frame_list array");
         goto out;
     }
 
-    if (!read_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
+    /* Read first entry of P2M list, or extended-info signature (~0UL). */
+    if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
+        ERR("read extended-info signature failed");
+        goto out;
+    }
+
+    if (p2m_frame_list[0] == ~0UL) {
+        uint32_t tot_bytes;
+
+        /* Next 4 bytes: total size of following extended info. */
+        if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) {
+            ERR("read extended-info size failed");
+            goto out;
+        }
+
+        while (tot_bytes) {
+            uint32_t chunk_bytes;
+            char     chunk_sig[4];
+
+            /* 4-character chunk signature + 4-byte remaining chunk size. */
+            if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
+                !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) {
+                ERR("read extended-info chunk signature failed");
+                goto out;
+            }
+            tot_bytes -= 8;
+
+            /* VCPU context structure? */
+            if (!strncmp(chunk_sig, "vcpu", 4)) {
+                if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
+                    ERR("read extended-info vcpu context failed");
+                    goto out;
+                }
+                tot_bytes   -= sizeof(struct vcpu_guest_context);
+                chunk_bytes -= sizeof(struct vcpu_guest_context);
+
+                if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))
+                    pae_extended_cr3 = 1;
+            }
+
+            /* Any remaining bytes of this chunk: read and discard. */
+            while (chunk_bytes) {
+                unsigned long sz = chunk_bytes;
+                if ( sz > P2M_FL_SIZE )
+                    sz = P2M_FL_SIZE;
+                if (!read_exact(io_fd, p2m_frame_list, sz)) {
+                    ERR("read-and-discard extended-info chunk bytes failed");
+                    goto out;
+                }
+                chunk_bytes -= sz;
+                tot_bytes   -= sz;
+            }
+        }
+
+        /* Now read the real first entry of P2M list. */
+        if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
+            ERR("read first entry of p2m_frame_list failed");
+            goto out;
+        }
+    }
+
+    /* First entry is already read into the p2m array. */
+    if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) {
         ERR("read p2m_frame_list failed");
         goto out;
     }
 
-
     /* We want zeroed memory so use calloc rather than malloc. */
-    p2m        = calloc(max_pfn, sizeof(unsigned long));
+    p2m        = calloc(max_pfn, sizeof(xen_pfn_t));
     pfn_type   = calloc(max_pfn, sizeof(unsigned long));
-    region_mfn = calloc(MAX_BATCH_SIZE, sizeof(unsigned long));
+    region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
 
     if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) {
         ERR("memory alloc failed");
@@ -193,7 +251,7 @@ int xc_linux_restore(int xc_handle, int 
         goto out;
     }
 
-    if (mlock(region_mfn, sizeof(unsigned long) * MAX_BATCH_SIZE)) {
+    if (mlock(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
         ERR("Could not mlock region_mfn");
         goto out;
     }
@@ -331,17 +389,27 @@ int xc_linux_restore(int xc_handle, int 
                 ** A page table page - need to 'uncanonicalize' it, i.e.
                 ** replace all the references to pfns with the corresponding
                 ** mfns for the new domain.
+                **
+                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
+                ** so we may need to update the p2m after the main loop.
+                ** Hence we defer canonicalization of L1s until then.
                 */
-                if(!uncanonicalize_pagetable(pagetype, page)) {
-                    /*
-                    ** Failing to uncanonicalize a page table can be ok
-                    ** under live migration since the pages type may have
-                    ** changed by now (and we'll get an update later).
-                    */
-                    DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
-                            pagetype >> 28, pfn, mfn);
-                    nraces++;
-                    continue;
+                if ((pt_levels != 3) ||
+                    pae_extended_cr3 ||
+                    (pagetype != L1TAB)) {
+
+                    if (!uncanonicalize_pagetable(pagetype, page)) {
+                        /*
+                        ** Failing to uncanonicalize a page table can be ok
+                        ** under live migration since the pages type may have
+                        ** changed by now (and we'll get an update later).
+                        */
+                        DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
+                                pagetype >> 28, pfn, mfn);
+                        nraces++;
+                        continue;
+                    }
+
                 }
 
             } else if(pagetype != NOTAB) {
@@ -389,6 +457,100 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     DPRINTF("Received all pages (%d races)\n", nraces);
+
+    if ((pt_levels == 3) && !pae_extended_cr3) {
+
+        /*
+        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
+        ** is a little awkward and involves (a) finding all such PGDs and
+        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
+        ** with the new info; and (c) canonicalizing all the L1s using the
+        ** (potentially updated) p2m[].
+        **
+        ** This is relatively slow (and currently involves two passes through
+        ** the pfn_type[] array), but at least seems to be correct. May wish
+        ** to consider more complex approaches to optimize this later.
+        */
+
+        int j, k;
+
+        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
+        for (i = 0; i < max_pfn; i++) {
+
+            if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
+
+                unsigned long new_mfn;
+                uint64_t l3ptes[4];
+                uint64_t *l3tab;
+
+                l3tab = (uint64_t *)
+                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                         PROT_READ, p2m[i]);
+
+                for(j = 0; j < 4; j++)
+                    l3ptes[j] = l3tab[j];
+
+                munmap(l3tab, PAGE_SIZE);
+
+                if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
+                    ERR("Couldn't get a page below 4GB :-(");
+                    goto out;
+                }
+
+                p2m[i] = new_mfn;
+                if (xc_add_mmu_update(xc_handle, mmu,
+                                      (((unsigned long long)new_mfn)
+                                       << PAGE_SHIFT) |
+                                      MMU_MACHPHYS_UPDATE, i)) {
+                    ERR("Couldn't m2p on PAE root pgdir");
+                    goto out;
+                }
+
+                l3tab = (uint64_t *)
+                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                         PROT_READ | PROT_WRITE, p2m[i]);
+
+                for(j = 0; j < 4; j++)
+                    l3tab[j] = l3ptes[j];
+
+                munmap(l3tab, PAGE_SIZE);
+
+            }
+        }
+
+        /* Second pass: find all L1TABs and uncanonicalize them */
+        j = 0;
+
+        for(i = 0; i < max_pfn; i++) {
+
+            if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
+                region_mfn[j] = p2m[i];
+                j++;
+            }
+
+            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
+
+                if (!(region_base = xc_map_foreign_batch(
+                          xc_handle, dom, PROT_READ | PROT_WRITE,
+                          region_mfn, j))) {
+                    ERR("map batch failed");
+                    goto out;
+                }
+
+                for(k = 0; k < j; k++) {
+                    if(!uncanonicalize_pagetable(L1TAB,
+                                                 region_base + k*PAGE_SIZE)) {
+                        ERR("failed uncanonicalize pt!");
+                        goto out;
+                    }
+                }
+
+                munmap(region_base, j*PAGE_SIZE);
+                j = 0;
+            }
+        }
+
+    }
 
 
     if (xc_finish_mmu_updates(xc_handle, mmu)) {
@@ -536,7 +698,7 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     /* Uncanonicalise the page table base pointer. */
-    pfn = ctxt.ctrlreg[3] >> PAGE_SHIFT;
+    pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
 
     if (pfn >= max_pfn) {
         ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
@@ -552,7 +714,7 @@ int xc_linux_restore(int xc_handle, int 
         goto out;
     }
 
-    ctxt.ctrlreg[3] = p2m[pfn] << PAGE_SHIFT;
+    ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
 
     /* clear any pending events and the selector */
     memset(&(shared_info->evtchn_pending[0]), 0,
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_linux_save.c       Wed Jun 07 11:03:51 2006 +0100
@@ -40,10 +40,10 @@ static unsigned long max_pfn;
 static unsigned long max_pfn;
 
 /* Live mapping of the table mapping each PFN to its current MFN. */
-static unsigned long *live_p2m = NULL;
+static xen_pfn_t *live_p2m = NULL;
 
 /* Live mapping of system MFN to PFN table. */
-static unsigned long *live_m2p = NULL;
+static xen_pfn_t *live_m2p = NULL;
 
 /* grep fodder: machine_to_phys */
 
@@ -501,22 +501,22 @@ void canonicalize_pagetable(unsigned lon
 
 
 
-static unsigned long *xc_map_m2p(int xc_handle,
+static xen_pfn_t *xc_map_m2p(int xc_handle,
                                  unsigned long max_mfn,
                                  int prot)
 {
     struct xen_machphys_mfn_list xmml;
     privcmd_mmap_entry_t *entries;
     unsigned long m2p_chunks, m2p_size;
-    unsigned long *m2p;
-    unsigned long *extent_start;
+    xen_pfn_t *m2p;
+    xen_pfn_t *extent_start;
     int i, rc;
 
     m2p_size   = M2P_SIZE(max_mfn);
     m2p_chunks = M2P_CHUNKS(max_mfn);
 
     xmml.max_extents = m2p_chunks;
-    if (!(extent_start = malloc(m2p_chunks * sizeof(unsigned long)))) {
+    if (!(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t)))) {
         ERR("failed to allocate space for m2p mfns");
         return NULL;
     }
@@ -583,11 +583,11 @@ int xc_linux_save(int xc_handle, int io_
     char page[PAGE_SIZE];
 
     /* Double and single indirect references to the live P2M table */
-    unsigned long *live_p2m_frame_list_list = NULL;
-    unsigned long *live_p2m_frame_list = NULL;
+    xen_pfn_t *live_p2m_frame_list_list = NULL;
+    xen_pfn_t *live_p2m_frame_list = NULL;
 
     /* A copy of the pfn-to-mfn table frame list. */
-    unsigned long *p2m_frame_list = NULL;
+    xen_pfn_t *p2m_frame_list = NULL;
 
     /* Live mapping of shared info structure */
     shared_info_t *live_shinfo = NULL;
@@ -712,11 +712,11 @@ int xc_linux_save(int xc_handle, int io_
     memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
 
     /* Canonicalise the pfn-to-mfn table frame-number list. */
-    for (i = 0; i < max_pfn; i += ulpp) {
-        if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) {
+    for (i = 0; i < max_pfn; i += fpp) {
+        if (!translate_mfn_to_pfn(&p2m_frame_list[i/fpp])) {
             ERR("Frame# in pfn-to-mfn frame list is not in pseudophys");
-            ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp,
-                p2m_frame_list[i/ulpp]);
+            ERR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
+                (uint64_t)p2m_frame_list[i/fpp]);
             goto out;
         }
     }
@@ -818,12 +818,33 @@ int xc_linux_save(int xc_handle, int io_
 
     /* Start writing out the saved-domain record. */
 
-    if(!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
+    if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
         ERR("write: max_pfn");
         goto out;
     }
 
-    if(!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
+    /*
+     * Write an extended-info structure to inform the restore code that
+     * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
+     * slow paths in the restore code.
+     */
+    if ((pt_levels == 3) &&
+        (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))) {
+        unsigned long signature = ~0UL;
+        uint32_t tot_sz   = sizeof(struct vcpu_guest_context) + 8;
+        uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
+        char chunk_sig[]  = "vcpu";
+        if (!write_exact(io_fd, &signature, sizeof(signature)) ||
+            !write_exact(io_fd, &tot_sz,    sizeof(tot_sz)) ||
+            !write_exact(io_fd, &chunk_sig, 4) ||
+            !write_exact(io_fd, &chunk_sz,  sizeof(chunk_sz)) ||
+            !write_exact(io_fd, &ctxt,      sizeof(ctxt))) {
+            ERR("write: extended info");
+            goto out;
+        }
+    }
+
+    if (!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
         ERR("write: p2m_frame_list");
         goto out;
     }
@@ -1129,12 +1150,12 @@ int xc_linux_save(int xc_handle, int io_
     }
 
     /* Canonicalise the page table base pointer. */
-    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.ctrlreg[3] >> PAGE_SHIFT) ) {
+    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) ) {
         ERR("PT base is not in range of pseudophys map");
         goto out;
     }
-    ctxt.ctrlreg[3] = mfn_to_pfn(ctxt.ctrlreg[3] >> PAGE_SHIFT) <<
-        PAGE_SHIFT;
+    ctxt.ctrlreg[3] = 
+        xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
 
     if (!write_exact(io_fd, &ctxt, sizeof(ctxt)) ||
         !write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_load_aout9.c
--- a/tools/libxc/xc_load_aout9.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_load_aout9.c       Wed Jun 07 11:03:51 2006 +0100
@@ -17,7 +17,7 @@
 #define KOFFSET(_p)       ((_p)&~KZERO)
 
 static int parseaout9image(const char *, unsigned long, struct 
domain_setup_info *);
-static int loadaout9image(const char *, unsigned long, int, uint32_t, unsigned 
long *, struct domain_setup_info *);
+static int loadaout9image(const char *, unsigned long, int, uint32_t, 
xen_pfn_t *, struct domain_setup_info *);
 static void copyout(int, uint32_t, unsigned long *, unsigned long, const char 
*, int);
 struct Exec *get_header(const char *, unsigned long, struct Exec *);
 
@@ -79,7 +79,7 @@ loadaout9image(
     const char *image,
     unsigned long image_size,
     int xch, uint32_t dom,
-    unsigned long *parray,
+    xen_pfn_t *parray,
     struct domain_setup_info *dsi)
 {
     struct Exec ehdr;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_load_bin.c
--- a/tools/libxc/xc_load_bin.c Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_load_bin.c Wed Jun 07 11:03:51 2006 +0100
@@ -107,7 +107,7 @@ static int
 static int
 loadbinimage(
     const char *image, unsigned long image_size, int xch, uint32_t dom,
-    unsigned long *parray, struct domain_setup_info *dsi);
+    xen_pfn_t *parray, struct domain_setup_info *dsi);
 
 int probe_bin(const char *image,
               unsigned long image_size,
@@ -235,7 +235,7 @@ static int
 static int
 loadbinimage(
     const char *image, unsigned long image_size, int xch, uint32_t dom,
-    unsigned long *parray, struct domain_setup_info *dsi)
+    xen_pfn_t *parray, struct domain_setup_info *dsi)
 {
     unsigned long size;
     char         *va;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_load_elf.c Wed Jun 07 11:03:51 2006 +0100
@@ -16,10 +16,10 @@ static int
 static int
 loadelfimage(
     const char *image, unsigned long image_size, int xch, uint32_t dom,
-    unsigned long *parray, struct domain_setup_info *dsi);
+    xen_pfn_t *parray, struct domain_setup_info *dsi);
 static int
 loadelfsymtab(
-    const char *image, int xch, uint32_t dom, unsigned long *parray,
+    const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
     struct domain_setup_info *dsi);
 
 int probe_elf(const char *image,
@@ -122,8 +122,15 @@ static int parseelfimage(const char *ima
             ERROR("Actually saw: '%s'", guestinfo);
             return -EINVAL;
         }
-        if ( (strstr(guestinfo, "PAE=yes") != NULL) )
-            dsi->pae_kernel = 1;
+
+        dsi->pae_kernel = PAEKERN_no;
+        p = strstr(guestinfo, "PAE=yes");
+        if ( p != NULL )
+        {
+            dsi->pae_kernel = PAEKERN_yes;
+            if ( !strncmp(p+7, "[extended-cr3]", 14) )
+                dsi->pae_kernel = PAEKERN_extended_cr3;
+        }
 
         break;
     }
@@ -204,7 +211,7 @@ static int
 static int
 loadelfimage(
     const char *image, unsigned long elfsize, int xch, uint32_t dom,
-    unsigned long *parray, struct domain_setup_info *dsi)
+    xen_pfn_t *parray, struct domain_setup_info *dsi)
 {
     Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
     Elf_Phdr *phdr;
@@ -258,7 +265,7 @@ loadelfimage(
 
 static int
 loadelfsymtab(
-    const char *image, int xch, uint32_t dom, unsigned long *parray,
+    const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
     struct domain_setup_info *dsi)
 {
     Elf_Ehdr *ehdr = (Elf_Ehdr *)image, *sym_ehdr;
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_pagetab.c
--- a/tools/libxc/xc_pagetab.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_pagetab.c  Wed Jun 07 11:03:51 2006 +0100
@@ -78,7 +78,7 @@ unsigned long xc_translate_foreign_addre
         fprintf(stderr, "failed to retreive vcpu context\n");
         goto out;
     }
-    cr3 = ctx.ctrlreg[3];
+    cr3 = ((unsigned long long)xen_cr3_to_pfn(ctx.ctrlreg[3])) << PAGE_SHIFT;
 
     /* Page Map Level 4 */
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_private.c  Wed Jun 07 11:03:51 2006 +0100
@@ -4,6 +4,7 @@
  * Helper functions for the rest of the library.
  */
 
+#include <inttypes.h>
 #include "xc_private.h"
 
 /* NB: arr must be mlock'ed */
@@ -134,9 +135,9 @@ int xc_memory_op(int xc_handle,
     struct xen_memory_reservation *reservation = arg;
     struct xen_machphys_mfn_list *xmml = arg;
     struct xen_translate_gpfn_list *trans = arg;
-    unsigned long *extent_start;
-    unsigned long *gpfn_list;
-    unsigned long *mfn_list;
+    xen_pfn_t *extent_start;
+    xen_pfn_t *gpfn_list;
+    xen_pfn_t *mfn_list;
     long ret = -EINVAL;
 
     hypercall.op     = __HYPERVISOR_memory_op;
@@ -156,7 +157,7 @@ int xc_memory_op(int xc_handle,
         get_xen_guest_handle(extent_start, reservation->extent_start);
         if ( (extent_start != NULL) &&
              (mlock(extent_start,
-                    reservation->nr_extents * sizeof(unsigned long)) != 0) )
+                    reservation->nr_extents * sizeof(xen_pfn_t)) != 0) )
         {
             PERROR("Could not mlock");
             safe_munlock(reservation, sizeof(*reservation));
@@ -171,7 +172,7 @@ int xc_memory_op(int xc_handle,
         }
         get_xen_guest_handle(extent_start, xmml->extent_start);
         if ( mlock(extent_start,
-                   xmml->max_extents * sizeof(unsigned long)) != 0 )
+                   xmml->max_extents * sizeof(xen_pfn_t)) != 0 )
         {
             PERROR("Could not mlock");
             safe_munlock(xmml, sizeof(*xmml));
@@ -192,17 +193,17 @@ int xc_memory_op(int xc_handle,
             goto out1;
         }
         get_xen_guest_handle(gpfn_list, trans->gpfn_list);
-        if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(long)) != 0 )
+        if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 )
         {
             PERROR("Could not mlock");
             safe_munlock(trans, sizeof(*trans));
             goto out1;
         }
         get_xen_guest_handle(mfn_list, trans->mfn_list);
-        if ( mlock(mfn_list, trans->nr_gpfns * sizeof(long)) != 0 )
-        {
-            PERROR("Could not mlock");
-            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long));
+        if ( mlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 )
+        {
+            PERROR("Could not mlock");
+            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
             safe_munlock(trans, sizeof(*trans));
             goto out1;
         }
@@ -220,22 +221,22 @@ int xc_memory_op(int xc_handle,
         get_xen_guest_handle(extent_start, reservation->extent_start);
         if ( extent_start != NULL )
             safe_munlock(extent_start,
-                         reservation->nr_extents * sizeof(unsigned long));
+                         reservation->nr_extents * sizeof(xen_pfn_t));
         break;
     case XENMEM_machphys_mfn_list:
         safe_munlock(xmml, sizeof(*xmml));
         get_xen_guest_handle(extent_start, xmml->extent_start);
         safe_munlock(extent_start,
-                     xmml->max_extents * sizeof(unsigned long));
+                     xmml->max_extents * sizeof(xen_pfn_t));
         break;
     case XENMEM_add_to_physmap:
         safe_munlock(arg, sizeof(struct xen_add_to_physmap));
         break;
     case XENMEM_translate_gpfn_list:
             get_xen_guest_handle(mfn_list, trans->mfn_list);
-            safe_munlock(mfn_list, trans->nr_gpfns * sizeof(long));
+            safe_munlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
             get_xen_guest_handle(gpfn_list, trans->gpfn_list);
-            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long));
+            safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
             safe_munlock(trans, sizeof(*trans));
         break;
     }
@@ -263,7 +264,7 @@ long long xc_domain_get_cpu_usage( int x
 
 int xc_get_pfn_list(int xc_handle,
                     uint32_t domid,
-                    unsigned long *pfn_buf,
+                    xen_pfn_t *pfn_buf,
                     unsigned long max_pfns)
 {
     DECLARE_DOM0_OP;
@@ -274,10 +275,10 @@ int xc_get_pfn_list(int xc_handle,
     set_xen_guest_handle(op.u.getmemlist.buffer, pfn_buf);
 
 #ifdef VALGRIND
-    memset(pfn_buf, 0, max_pfns * sizeof(unsigned long));
+    memset(pfn_buf, 0, max_pfns * sizeof(xen_pfn_t));
 #endif
 
-    if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
+    if ( mlock(pfn_buf, max_pfns * sizeof(xen_pfn_t)) != 0 )
     {
         PERROR("xc_get_pfn_list: pfn_buf mlock failed");
         return -1;
@@ -285,7 +286,7 @@ int xc_get_pfn_list(int xc_handle,
 
     ret = do_dom0_op(xc_handle, &op);
 
-    safe_munlock(pfn_buf, max_pfns * sizeof(unsigned long));
+    safe_munlock(pfn_buf, max_pfns * sizeof(xen_pfn_t));
 
 #if 0
 #ifdef DEBUG
@@ -364,7 +365,7 @@ unsigned long xc_get_filesz(int fd)
 }
 
 void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size,
-                   int xch, uint32_t dom, unsigned long *parray,
+                   int xch, uint32_t dom, xen_pfn_t *parray,
                    unsigned long vstart)
 {
     char *va;
@@ -428,6 +429,29 @@ int xc_version(int xc_handle, int cmd, v
         safe_munlock(arg, argsize);
 
     return rc;
+}
+
+unsigned long xc_make_page_below_4G(
+    int xc_handle, uint32_t domid, unsigned long mfn)
+{
+    xen_pfn_t old_mfn = mfn;
+    xen_pfn_t new_mfn;
+
+    if ( xc_domain_memory_decrease_reservation(
+        xc_handle, domid, 1, 0, &old_mfn) != 0 )
+    {
+        fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
+        return 0;
+    }
+
+    if ( xc_domain_memory_increase_reservation(
+        xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
+    {
+        fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
+        return 0;
+    }
+
+    return new_mfn;
 }
 
 /*
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_ptrace.c   Wed Jun 07 11:03:51 2006 +0100
@@ -190,7 +190,8 @@ map_domain_va_32(
     static void *v[MAX_VIRT_CPUS];
 
     l2 = xc_map_foreign_range(
-         xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] 
>> PAGE_SHIFT);
+         xc_handle, current_domid, PAGE_SIZE, PROT_READ,
+         xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3]));
     if ( l2 == NULL )
         return NULL;
 
@@ -230,7 +231,8 @@ map_domain_va_pae(
     static void *v[MAX_VIRT_CPUS];
 
     l3 = xc_map_foreign_range(
-        xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] 
>> PAGE_SHIFT);
+        xc_handle, current_domid, PAGE_SIZE, PROT_READ,
+        xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3]));
     if ( l3 == NULL )
         return NULL;
 
@@ -282,8 +284,9 @@ map_domain_va_64(
     if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */
         return map_domain_va_32(xc_handle, cpu, guest_va, perm);
 
-    l4 = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE,
-            PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT);
+    l4 = xc_map_foreign_range(
+        xc_handle, current_domid, PAGE_SIZE, PROT_READ,
+        xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3]));
     if ( l4 == NULL )
         return NULL;
 
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xc_ptrace_core.c      Wed Jun 07 11:03:51 2006 +0100
@@ -12,8 +12,8 @@ static long   nr_pages = 0;
 static long   nr_pages = 0;
 static unsigned long  *p2m_array = NULL;
 static unsigned long  *m2p_array = NULL;
-static unsigned long            pages_offset;
-static unsigned long            cr3[MAX_VIRT_CPUS];
+static unsigned long   pages_offset;
+static unsigned long   cr3[MAX_VIRT_CPUS];
 
 /* --------------------- */
 
@@ -47,7 +47,7 @@ map_domain_va_core(unsigned long domfd, 
             munmap(cr3_virt[cpu], PAGE_SIZE);
         v = mmap(
             NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
-            map_mtop_offset(cr3_phys[cpu]));
+            map_mtop_offset(xen_cr3_to_pfn(cr3_phys[cpu])));
         if (v == MAP_FAILED)
         {
             perror("mmap failed");
@@ -127,14 +127,15 @@ xc_waitdomain_core(
             sizeof(vcpu_guest_context_t)*nr_vcpus)
             return -1;
 
-        for (i = 0; i < nr_vcpus; i++) {
+        for (i = 0; i < nr_vcpus; i++)
             cr3[i] = ctxt[i].ctrlreg[3];
-        }
+
         if ((p2m_array = malloc(nr_pages * sizeof(unsigned long))) == NULL)
         {
             printf("Could not allocate p2m_array\n");
             return -1;
         }
+
         if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) !=
             sizeof(unsigned long)*nr_pages)
             return -1;
@@ -146,10 +147,8 @@ xc_waitdomain_core(
         }
         bzero(m2p_array, sizeof(unsigned long)* 1 << 20);
 
-        for (i = 0; i < nr_pages; i++) {
+        for (i = 0; i < nr_pages; i++)
             m2p_array[p2m_array[i]] = i;
-        }
-
     }
     return 0;
 }
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xenctrl.h     Wed Jun 07 11:03:51 2006 +0100
@@ -415,26 +415,26 @@ int xc_domain_memory_increase_reservatio
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
                                           unsigned int address_bits,
-                                          unsigned long *extent_start);
+                                          xen_pfn_t *extent_start);
 
 int xc_domain_memory_decrease_reservation(int xc_handle,
                                           uint32_t domid,
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
-                                          unsigned long *extent_start);
+                                          xen_pfn_t *extent_start);
 
 int xc_domain_memory_populate_physmap(int xc_handle,
                                       uint32_t domid,
                                       unsigned long nr_extents,
                                       unsigned int extent_order,
                                       unsigned int address_bits,
-                                      unsigned long *extent_start);
+                                      xen_pfn_t *extent_start);
 
 int xc_domain_translate_gpfn_list(int xc_handle,
                                   uint32_t domid,
                                   unsigned long nr_gpfns,
-                                  unsigned long *gpfn_list,
-                                  unsigned long *mfn_list);
+                                  xen_pfn_t *gpfn_list,
+                                  xen_pfn_t *mfn_list);
 
 int xc_domain_ioport_permission(int xc_handle,
                                 uint32_t domid,
@@ -453,6 +453,9 @@ int xc_domain_iomem_permission(int xc_ha
                                unsigned long nr_mfns,
                                uint8_t allow_access);
 
+unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
+                                    unsigned long mfn);
+
 typedef dom0_perfc_desc_t xc_perfc_desc_t;
 /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
 int xc_perfc_control(int xc_handle,
@@ -484,7 +487,7 @@ void *xc_map_foreign_range(int xc_handle
                             unsigned long mfn );
 
 void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot,
-                           unsigned long *arr, int num );
+                           xen_pfn_t *arr, int num );
 
 /**
  * Translates a virtual address in the context of a given domain and
@@ -499,11 +502,11 @@ unsigned long xc_translate_foreign_addre
 unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
                                            int vcpu, unsigned long long virt);
 
-int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf,
+int xc_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf,
                     unsigned long max_pfns);
 
 int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid,
-                         unsigned long *pfn_buf,
+                         xen_pfn_t *pfn_buf,
                          unsigned int start_page, unsigned int nr_pages);
 
 int xc_copy_to_domain_page(int xc_handle, uint32_t domid,
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xg_private.h  Wed Jun 07 11:03:51 2006 +0100
@@ -156,6 +156,9 @@ struct domain_setup_info
 
     unsigned long elf_paddr_offset;
 
+#define PAEKERN_no           0
+#define PAEKERN_yes          1
+#define PAEKERN_extended_cr3 2
     unsigned int  pae_kernel;
 
     unsigned int  load_symtab;
@@ -170,7 +173,7 @@ typedef int (*parseimagefunc)(const char
                               struct domain_setup_info *dsi);
 typedef int (*loadimagefunc)(const char *image, unsigned long image_size,
                              int xch,
-                             uint32_t dom, unsigned long *parray,
+                             uint32_t dom, xen_pfn_t *parray,
                              struct domain_setup_info *dsi);
 
 struct load_funcs
@@ -198,7 +201,7 @@ unsigned long xc_get_filesz(int fd);
 unsigned long xc_get_filesz(int fd);
 
 void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size,
-                   int xch, uint32_t dom, unsigned long *parray,
+                   int xch, uint32_t dom, xen_pfn_t *parray,
                    unsigned long vstart);
 
 int pin_table(int xc_handle, unsigned int type, unsigned long mfn,
diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h     Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/libxc/xg_save_restore.h     Wed Jun 07 11:03:51 2006 +0100
@@ -105,23 +105,23 @@ static int get_platform_info(int xc_hand
 */
 #define M2P_SHIFT       L2_PAGETABLE_SHIFT_PAE
 #define M2P_CHUNK_SIZE  (1 << M2P_SHIFT)
-#define M2P_SIZE(_m)    ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT)
+#define M2P_SIZE(_m)    ROUNDUP(((_m) * sizeof(xen_pfn_t)), M2P_SHIFT)
 #define M2P_CHUNKS(_m)  (M2P_SIZE((_m)) >> M2P_SHIFT)
 
 /* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */
-#define P2M_SIZE        ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT)
+#define P2M_SIZE        ROUNDUP((max_pfn * sizeof(xen_pfn_t)), PAGE_SHIFT)
 
-/* Number of unsigned longs in a page */
-#define ulpp            (PAGE_SIZE/sizeof(unsigned long))
+/* Number of xen_pfn_t in a page */
+#define fpp             (PAGE_SIZE/sizeof(xen_pfn_t))
 
 /* Number of entries in the pfn_to_mfn_frame_list */
-#define P2M_FL_ENTRIES  (((max_pfn)+ulpp-1)/ulpp)
+#define P2M_FL_ENTRIES  (((max_pfn)+fpp-1)/fpp)
 
 /* Size in bytes of the pfn_to_mfn_frame_list     */
 #define P2M_FL_SIZE     ((P2M_FL_ENTRIES)*sizeof(unsigned long))
 
 /* Number of entries in the pfn_to_mfn_frame_list_list */
-#define P2M_FLL_ENTRIES (((max_pfn)+(ulpp*ulpp)-1)/(ulpp*ulpp))
+#define P2M_FLL_ENTRIES (((max_pfn)+(fpp*fpp)-1)/(fpp*fpp))
 
 /* Current guests allow 8MB 'slack' in their P2M */
 #define NR_SLACK_ENTRIES   ((8 * 1024 * 1024) / PAGE_SIZE)
diff -r b09dbe439169 -r 9d86c1a70f34 tools/python/xen/util/security.py
--- a/tools/python/xen/util/security.py Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/python/xen/util/security.py Wed Jun 07 11:03:51 2006 +0100
@@ -426,6 +426,15 @@ def get_decision(arg1, arg2):
             err("Argument type not supported.")
         ssidref = label2ssidref(arg2[2][1], arg2[1][1])
         arg2 = ['ssidref', str(ssidref)]
+
+    # accept only int or string types for domid and ssidref
+    if isinstance(arg1[1], int):
+        arg1[1] = str(arg1[1])
+    if isinstance(arg2[1], int):
+        arg2[1] = str(arg2[1])
+    if not isinstance(arg1[1], str) or not isinstance(arg2[1], str):
+        err("Invalid id or ssidref type, string or int required")
+
     try:
         decision = acm.getdecision(arg1[0], arg1[1], arg2[0], arg2[1])
     except:
diff -r b09dbe439169 -r 9d86c1a70f34 tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/tools/tests/test_x86_emulator.c   Wed Jun 07 11:03:51 2006 +0100
@@ -13,6 +13,7 @@ typedef int64_t            s64;
 typedef int64_t            s64;
 #include <public/xen.h>
 #include <asm-x86/x86_emulate.h>
+#include <sys/mman.h>
 
 static int read_any(
     unsigned long addr,
@@ -85,23 +86,30 @@ int main(int argc, char **argv)
     struct x86_emulate_ctxt ctxt;
     struct cpu_user_regs regs;
     char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
-    unsigned int res = 0x7FFFFFFF;
-    u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
+    unsigned int *res;
     int rc;
 
     ctxt.regs = &regs;
     ctxt.mode = X86EMUL_MODE_PROT32;
 
+    res = mmap((void *)0x100000, 0x1000, PROT_READ|PROT_WRITE,
+               MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+    if ( res == MAP_FAILED )
+    {
+        fprintf(stderr, "mmap to low address failed\n");
+        exit(1);
+    }
+
     printf("%-40s", "Testing addl %%ecx,(%%eax)...");
     instr[0] = 0x01; instr[1] = 0x08;
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    ctxt.cr2    = (unsigned long)&res;
-    res         = 0x7FFFFFFF;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0x92345677) || 
+    ctxt.cr2    = (unsigned long)res;
+    *res        = 0x7FFFFFFF;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x92345677) || 
          (regs.eflags != 0xa94) ||
          (regs.eip != (unsigned long)&instr[2]) )
         goto fail;
@@ -116,11 +124,25 @@ int main(int argc, char **argv)
 #else
     regs.ecx    = 0x12345678UL;
 #endif
-    ctxt.cr2    = (unsigned long)&res;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0x92345677) || 
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x92345677) || 
          (regs.ecx != 0x8000000FUL) ||
+         (regs.eip != (unsigned long)&instr[2]) )
+        goto fail;
+    printf("okay\n");
+
+    printf("%-40s", "Testing movl (%%eax),%%ecx...");
+    instr[0] = 0x8b; instr[1] = 0x08;
+    regs.eflags = 0x200;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.ecx    = ~0UL;
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x92345677) || 
+         (regs.ecx != 0x92345677UL) ||
          (regs.eip != (unsigned long)&instr[2]) )
         goto fail;
     printf("okay\n");
@@ -131,10 +153,10 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x92345677UL;
     regs.ecx    = 0xAA;
-    ctxt.cr2    = (unsigned long)&res;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0x923456AA) || 
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x923456AA) || 
          (regs.eflags != 0x244) ||
          (regs.eax != 0x92345677UL) ||
          (regs.eip != (unsigned long)&instr[4]) )
@@ -147,10 +169,10 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0xAABBCC77UL;
     regs.ecx    = 0xFF;
-    ctxt.cr2    = (unsigned long)&res;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0x923456AA) || 
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x923456AA) || 
          ((regs.eflags&0x240) != 0x200) ||
          (regs.eax != 0xAABBCCAA) ||
          (regs.ecx != 0xFF) ||
@@ -163,10 +185,10 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    ctxt.cr2    = (unsigned long)&res;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0x12345678) || 
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x12345678) || 
          (regs.eflags != 0x200) ||
          (regs.ecx != 0x923456AA) ||
          (regs.eip != (unsigned long)&instr[2]) )
@@ -176,14 +198,14 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing lock cmpxchgl %%ecx,(%%eax)...");
     instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb1; instr[3] = 0x08;
     regs.eflags = 0x200;
-    res         = 0x923456AA;
+    *res        = 0x923456AA;
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x923456AAUL;
     regs.ecx    = 0xDDEEFF00L;
-    ctxt.cr2    = (unsigned long)&res;
-    rc = x86_emulate_memop(&ctxt, &emulops);
-    if ( (rc != 0) || 
-         (res != 0xDDEEFF00) || 
+    ctxt.cr2    = (unsigned long)res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0xDDEEFF00) || 
          (regs.eflags != 0x244) ||
          (regs.eax != 0x923456AAUL) ||
          (regs.eip != (unsigned long)&instr[4]) )
@@ -192,54 +214,57 @@ int main(int argc, char **argv)
 
     printf("%-40s", "Testing rep movsw...");
     instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5;
-    res         = 0x22334455;
+    *res        = 0x22334455;
     regs.eflags = 0x200;
     regs.ecx    = 23;
     regs.eip    = (unsigned long)&instr[0];
-    regs.esi    = (unsigned long)&res + 0;
-    regs.edi    = (unsigned long)&res + 2;
+    regs.esi    = (unsigned long)res + 0;
+    regs.edi    = (unsigned long)res + 2;
     regs.error_code = 0; /* read fault */
     ctxt.cr2    = regs.esi;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
-         (res != 0x44554455) ||
+         (*res != 0x44554455) ||
          (regs.eflags != 0x200) ||
          (regs.ecx != 22) || 
-         (regs.esi != ((unsigned long)&res + 2)) ||
-         (regs.edi != ((unsigned long)&res + 4)) ||
+         (regs.esi != ((unsigned long)res + 2)) ||
+         (regs.edi != ((unsigned long)res + 4)) ||
          (regs.eip != (unsigned long)&instr[0]) )
         goto fail;
     printf("okay\n");
 
     printf("%-40s", "Testing btrl $0x1,(%edi)...");
     instr[0] = 0x0f; instr[1] = 0xba; instr[2] = 0x37; instr[3] = 0x01;
-    res         = 0x2233445F;
-    regs.eflags = 0x200;
-    regs.eip    = (unsigned long)&instr[0];
-    regs.edi    = (unsigned long)&res;
+    *res        = 0x2233445F;
+    regs.eflags = 0x200;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.edi    = (unsigned long)res;
     ctxt.cr2    = regs.edi;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
-         (res != 0x2233445D) ||
+         (*res != 0x2233445D) ||
          ((regs.eflags&0x201) != 0x201) ||
          (regs.eip != (unsigned long)&instr[4]) )
         goto fail;
     printf("okay\n");
+
+    res[0] = 0x12345678;
+    res[1] = 0x87654321;
 
     printf("%-40s", "Testing cmpxchg8b (%edi) [succeeding]...");
     instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
     regs.eflags = 0x200;
-    regs.eax    = cmpxchg8b_res[0];
-    regs.edx    = cmpxchg8b_res[1];
+    regs.eax    = res[0];
+    regs.edx    = res[1];
     regs.ebx    = 0x9999AAAA;
     regs.ecx    = 0xCCCCFFFF;
     regs.eip    = (unsigned long)&instr[0];
-    regs.edi    = (unsigned long)cmpxchg8b_res;
+    regs.edi    = (unsigned long)res;
     ctxt.cr2    = regs.edi;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
-         (cmpxchg8b_res[0] != 0x9999AAAA) ||
-         (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
+         (res[0] != 0x9999AAAA) ||
+         (res[1] != 0xCCCCFFFF) ||
          ((regs.eflags&0x240) != 0x240) ||
          (regs.eip != (unsigned long)&instr[3]) )
         goto fail;
@@ -248,12 +273,12 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing cmpxchg8b (%edi) [failing]...");
     instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
     regs.eip    = (unsigned long)&instr[0];
-    regs.edi    = (unsigned long)cmpxchg8b_res;
+    regs.edi    = (unsigned long)res;
     ctxt.cr2    = regs.edi;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
-         (cmpxchg8b_res[0] != 0x9999AAAA) ||
-         (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
+         (res[0] != 0x9999AAAA) ||
+         (res[1] != 0xCCCCFFFF) ||
          (regs.eax != 0x9999AAAA) ||
          (regs.edx != 0xCCCCFFFF) ||
          ((regs.eflags&0x240) != 0x200) ||
@@ -265,11 +290,11 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    ctxt.cr2    = (unsigned long)&res;
-    res         = 0x82;
+    ctxt.cr2    = (unsigned long)res;
+    *res        = 0x82;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
-         (res != 0x82) ||
+         (*res != 0x82) ||
          (regs.ecx != 0xFFFFFF82) ||
          ((regs.eflags&0x240) != 0x200) ||
          (regs.eip != (unsigned long)&instr[3]) )
@@ -280,11 +305,11 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    ctxt.cr2    = (unsigned long)&res;
-    res         = 0x1234aa82;
+    ctxt.cr2    = (unsigned long)res;
+    *res        = 0x1234aa82;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
-         (res != 0x1234aa82) ||
+         (*res != 0x1234aa82) ||
          (regs.ecx != 0xaa82) ||
          ((regs.eflags&0x240) != 0x200) ||
          (regs.eip != (unsigned long)&instr[3]) )
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/ia64/linux-xen/smpboot.c
--- a/xen/arch/ia64/linux-xen/smpboot.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/ia64/linux-xen/smpboot.c Wed Jun 07 11:03:51 2006 +0100
@@ -62,6 +62,7 @@
 #include <asm/unistd.h>
 
 #ifdef XEN
+#include <xen/domain.h>
 #include <asm/hw_irq.h>
 int ht_per_core = 1;
 #ifndef CONFIG_SMP
@@ -487,7 +488,7 @@ do_rest:
 #else
        struct vcpu *v;
 
-       v = idle_vcpu[cpu] = alloc_vcpu(idle_vcpu[0]->domain, cpu, cpu);
+       v = alloc_idle_vcpu(cpu);
        BUG_ON(v == NULL);
 
        //printf ("do_boot_cpu: cpu=%d, domain=%p, vcpu=%p\n", cpu, idle, v);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/ia64/xen/domain.c        Wed Jun 07 11:03:51 2006 +0100
@@ -42,6 +42,7 @@
 
 #include <asm/vcpu.h>   /* for function declarations */
 #include <public/arch-ia64.h>
+#include <xen/domain.h>
 #include <asm/vmx.h>
 #include <asm/vmx_vcpu.h>
 #include <asm/vmx_vpd.h>
@@ -92,26 +93,16 @@ alloc_dom_xen_and_dom_io(void)
      * Any Xen-heap pages that we will allow to be mapped will have
      * their domain field set to dom_xen.
      */
-    dom_xen = alloc_domain();
+    dom_xen = alloc_domain(DOMID_XEN);
     BUG_ON(dom_xen == NULL);
-    spin_lock_init(&dom_xen->page_alloc_lock);
-    INIT_LIST_HEAD(&dom_xen->page_list);
-    INIT_LIST_HEAD(&dom_xen->xenpage_list);
-    atomic_set(&dom_xen->refcnt, 1);
-    dom_xen->domain_id = DOMID_XEN;
 
     /*
      * Initialise our DOMID_IO domain.
      * This domain owns I/O pages that are within the range of the page_info
      * array. Mappings occur at the priv of the caller.
      */
-    dom_io = alloc_domain();
+    dom_io = alloc_domain(DOMID_IO);
     BUG_ON(dom_io == NULL);
-    spin_lock_init(&dom_io->page_alloc_lock);
-    INIT_LIST_HEAD(&dom_io->page_list);
-    INIT_LIST_HEAD(&dom_io->xenpage_list);
-    atomic_set(&dom_io->refcnt, 1);
-    dom_io->domain_id = DOMID_IO;
 }
 #endif
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/ia64/xen/xensetup.c      Wed Jun 07 11:03:51 2006 +0100
@@ -35,8 +35,6 @@ char saved_command_line[COMMAND_LINE_SIZ
 char saved_command_line[COMMAND_LINE_SIZE];
 char dom0_command_line[COMMAND_LINE_SIZE];
 
-struct vcpu *idle_vcpu[NR_CPUS];
-
 cpumask_t cpu_present_map;
 
 extern unsigned long domain0_ready;
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/audit.c
--- a/xen/arch/x86/audit.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/audit.c      Wed Jun 07 11:03:51 2006 +0100
@@ -432,10 +432,10 @@ int audit_adjust_pgtables(struct domain 
 
         for_each_vcpu(d, v)
         {
-            if ( pagetable_get_paddr(v->arch.guest_table) )
+            if ( !pagetable_is_null(v->arch.guest_table) )
                 adjust(mfn_to_page(pagetable_get_pfn(v->arch.guest_table)),
                        !shadow_mode_refcounts(d));
-            if ( pagetable_get_paddr(v->arch.shadow_table) )
+            if ( !pagetable_is_null(v->arch.shadow_table) )
                 adjust(mfn_to_page(pagetable_get_pfn(v->arch.shadow_table)),
                        0);
             if ( v->arch.monitor_shadow_ref )
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/cpu/mtrr/main.c
--- a/xen/arch/x86/cpu/mtrr/main.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/cpu/mtrr/main.c      Wed Jun 07 11:03:51 2006 +0100
@@ -43,7 +43,7 @@
 #include "mtrr.h"
 
 /* No blocking mutexes in Xen. Spin instead. */
-#define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED
+#define DECLARE_MUTEX(_m) DEFINE_SPINLOCK(_m)
 #define down(_m) spin_lock(_m)
 #define up(_m) spin_unlock(_m)
 #define lock_cpu_hotplug() ((void)0)
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/dom0_ops.c   Wed Jun 07 11:03:51 2006 +0100
@@ -467,7 +467,7 @@ void arch_getdomaininfo_ctxt(
     if ( hvm_guest(v) )
         c->flags |= VGCF_HVM_GUEST;
 
-    c->ctrlreg[3] = pagetable_get_paddr(v->arch.guest_table);
+    c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table));
 
     c->vm_assist = v->domain->vm_assist;
 }
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/domain.c     Wed Jun 07 11:03:51 2006 +0100
@@ -259,7 +259,7 @@ int arch_set_info_guest(
     struct vcpu *v, struct vcpu_guest_context *c)
 {
     struct domain *d = v->domain;
-    unsigned long phys_basetab = INVALID_MFN;
+    unsigned long cr3_pfn = INVALID_MFN;
     int i, rc;
 
     if ( !(c->flags & VGCF_HVM_GUEST) )
@@ -322,12 +322,8 @@ int arch_set_info_guest(
 
     if ( !(c->flags & VGCF_HVM_GUEST) )
     {
-        phys_basetab = c->ctrlreg[3];
-        phys_basetab =
-            (gmfn_to_mfn(d, phys_basetab >> PAGE_SHIFT) << PAGE_SHIFT) |
-            (phys_basetab & ~PAGE_MASK);
-
-        v->arch.guest_table = mk_pagetable(phys_basetab);
+        cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c->ctrlreg[3]));
+        v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
     }
 
     if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
@@ -335,14 +331,14 @@ int arch_set_info_guest(
 
     if ( c->flags & VGCF_HVM_GUEST )
     {
-        v->arch.guest_table = mk_pagetable(0);
+        v->arch.guest_table = pagetable_null();
 
         if ( !hvm_initialize_guest_resources(v) )
             return -EINVAL;
     }
     else if ( shadow_mode_refcounts(d) )
     {
-        if ( !get_page(mfn_to_page(phys_basetab>>PAGE_SHIFT), d) )
+        if ( !get_page(mfn_to_page(cr3_pfn), d) )
         {
             destroy_gdt(v);
             return -EINVAL;
@@ -350,7 +346,7 @@ int arch_set_info_guest(
     }
     else
     {
-        if ( !get_page_and_type(mfn_to_page(phys_basetab>>PAGE_SHIFT), d,
+        if ( !get_page_and_type(mfn_to_page(cr3_pfn), d,
                                 PGT_base_page_table) )
         {
             destroy_gdt(v);
@@ -528,20 +524,29 @@ static void load_segments(struct vcpu *n
     if ( unlikely(!all_segs_okay) )
     {
         struct cpu_user_regs *regs = guest_cpu_user_regs();
-        unsigned long   *rsp =
+        unsigned long *rsp =
             (n->arch.flags & TF_kernel_mode) ?
             (unsigned long *)regs->rsp :
             (unsigned long *)nctxt->kernel_sp;
+        unsigned long cs_and_mask, rflags;
 
         if ( !(n->arch.flags & TF_kernel_mode) )
             toggle_guest_mode(n);
         else
             regs->cs &= ~3;
 
+        /* CS longword also contains full evtchn_upcall_mask. */
+        cs_and_mask = (unsigned long)regs->cs |
+            ((unsigned long)n->vcpu_info->evtchn_upcall_mask << 32);
+
+        /* Fold upcall mask into RFLAGS.IF. */
+        rflags  = regs->rflags & ~X86_EFLAGS_IF;
+        rflags |= !n->vcpu_info->evtchn_upcall_mask << 9;
+
         if ( put_user(regs->ss,            rsp- 1) |
              put_user(regs->rsp,           rsp- 2) |
-             put_user(regs->rflags,        rsp- 3) |
-             put_user(regs->cs,            rsp- 4) |
+             put_user(rflags,              rsp- 3) |
+             put_user(cs_and_mask,         rsp- 4) |
              put_user(regs->rip,           rsp- 5) |
              put_user(nctxt->user_regs.gs, rsp- 6) |
              put_user(nctxt->user_regs.fs, rsp- 7) |
@@ -553,6 +558,10 @@ static void load_segments(struct vcpu *n
             DPRINTK("Error while creating failsafe callback frame.\n");
             domain_crash(n->domain);
         }
+
+        if ( test_bit(_VGCF_failsafe_disables_events,
+                      &n->arch.guest_context.flags) )
+            n->vcpu_info->evtchn_upcall_mask = 1;
 
         regs->entry_vector  = TRAP_syscall;
         regs->rflags       &= 0xFFFCBEFFUL;
@@ -935,7 +944,7 @@ void domain_relinquish_resources(struct 
                 put_page_type(mfn_to_page(pfn));
             put_page(mfn_to_page(pfn));
 
-            v->arch.guest_table = mk_pagetable(0);
+            v->arch.guest_table = pagetable_null();
         }
 
         if ( (pfn = pagetable_get_pfn(v->arch.guest_table_user)) != 0 )
@@ -944,7 +953,7 @@ void domain_relinquish_resources(struct 
                 put_page_type(mfn_to_page(pfn));
             put_page(mfn_to_page(pfn));
 
-            v->arch.guest_table_user = mk_pagetable(0);
+            v->arch.guest_table_user = pagetable_null();
         }
     }
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/domain_build.c       Wed Jun 07 11:03:51 2006 +0100
@@ -301,6 +301,9 @@ int construct_dom0(struct domain *d,
                xen_pae ? "yes" : "no", dom0_pae ? "yes" : "no");
         return -EINVAL;
     }
+
+    if ( xen_pae && !!strstr(dsi.xen_section_string, "PAE=yes[extended-cr3]") )
+        set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist);
 
     if ( (p = strstr(dsi.xen_section_string, "FEATURES=")) != NULL )
     {
@@ -443,13 +446,13 @@ int construct_dom0(struct domain *d,
         l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
             l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
     }
-    v->arch.guest_table = mk_pagetable((unsigned long)l3start);
+    v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
 #else
     l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
     memcpy(l2tab, idle_pg_table, PAGE_SIZE);
     l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
         l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
-    v->arch.guest_table = mk_pagetable((unsigned long)l2start);
+    v->arch.guest_table = pagetable_from_paddr((unsigned long)l2start);
 #endif
 
     for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
@@ -577,7 +580,7 @@ int construct_dom0(struct domain *d,
         l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
         l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
-    v->arch.guest_table = mk_pagetable(__pa(l4start));
+    v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
 
     l4tab += l4_table_offset(dsi.v_start);
     mfn = alloc_spfn;
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c        Wed Jun 07 11:03:51 2006 +0100
@@ -84,28 +84,26 @@ struct svm_percore_globals svm_globals[N
 /*
  * Initializes the POOL of ASID used by the guests per core.
  */
-void asidpool_init( int core )
+void asidpool_init(int core)
 {
     int i;
-    svm_globals[core].ASIDpool.asid_lock = SPIN_LOCK_UNLOCKED;
-    spin_lock(&svm_globals[core].ASIDpool.asid_lock);
+
+    spin_lock_init(&svm_globals[core].ASIDpool.asid_lock);
+
     /* Host ASID is always in use */
     svm_globals[core].ASIDpool.asid[INITIAL_ASID] = ASID_INUSE;
-    for( i=1; i<ASID_MAX; i++ )
-    {
+    for ( i = 1; i < ASID_MAX; i++ )
        svm_globals[core].ASIDpool.asid[i] = ASID_AVAILABLE;
-    }
-    spin_unlock(&svm_globals[core].ASIDpool.asid_lock);
 }
 
 
 /* internal function to get the next available ASID */
-static int asidpool_fetch_next( struct vmcb_struct *vmcb, int core )
+static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
 {
     int i;   
-    for( i = 1; i < ASID_MAX; i++ )
-    {
-        if( svm_globals[core].ASIDpool.asid[i] == ASID_AVAILABLE )
+    for ( i = 1; i < ASID_MAX; i++ )
+    {
+        if ( svm_globals[core].ASIDpool.asid[i] == ASID_AVAILABLE )
         {
             vmcb->guest_asid = i;
             svm_globals[core].ASIDpool.asid[i] = ASID_INUSE;
@@ -746,34 +744,34 @@ static void svm_ctxt_switch_to(struct vc
 
 void svm_final_setup_guest(struct vcpu *v)
 {
+    struct domain *d = v->domain;
+    struct vcpu *vc;
+
     v->arch.schedule_tail    = arch_svm_do_launch;
     v->arch.ctxt_switch_from = svm_ctxt_switch_from;
     v->arch.ctxt_switch_to   = svm_ctxt_switch_to;
 
-    if (v == v->domain->vcpu[0]) 
-    {
-       struct domain *d = v->domain;
-       struct vcpu *vc;
-
-       /* Initialize monitor page table */
-       for_each_vcpu(d, vc)
-           vc->arch.monitor_table = mk_pagetable(0);
-
-        /* 
-         * Required to do this once per domain
-         * TODO: add a seperate function to do these.
-         */
-        memset(&d->shared_info->evtchn_mask[0], 0xff, 
-               sizeof(d->shared_info->evtchn_mask));       
-
-        /* 
-         * Put the domain in shadow mode even though we're going to be using
-         * the shared 1:1 page table initially. It shouldn't hurt 
-         */
-        shadow_mode_enable(d, 
-                SHM_enable|SHM_refcounts|
-               SHM_translate|SHM_external|SHM_wr_pt_pte);
-    }
+    if ( v != d->vcpu[0] )
+        return;
+
+    /* Initialize monitor page table */
+    for_each_vcpu( d, vc )
+        vc->arch.monitor_table = pagetable_null();
+
+    /* 
+     * Required to do this once per domain
+     * TODO: add a seperate function to do these.
+     */
+    memset(&d->shared_info->evtchn_mask[0], 0xff, 
+           sizeof(d->shared_info->evtchn_mask));       
+
+    /* 
+     * Put the domain in shadow mode even though we're going to be using
+     * the shared 1:1 page table initially. It shouldn't hurt 
+     */
+    shadow_mode_enable(d,
+                       SHM_enable|SHM_refcounts|
+                       SHM_translate|SHM_external|SHM_wr_pt_pte);
 }
 
 
@@ -870,7 +868,7 @@ static int svm_do_page_fault(unsigned lo
     /* Use 1:1 page table to identify MMIO address space */
     if (mmio_space(gpa))
     {
-       /* No support for APIC */
+        /* No support for APIC */
         if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
         { 
             int inst_len;
@@ -1570,7 +1568,7 @@ static int svm_set_cr0(unsigned long val
         }
 
         /* Now arch.guest_table points to machine physical. */
-        v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+        v->arch.guest_table = pagetable_from_pfn(mfn);
         update_pagetables(v);
 
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
@@ -1590,7 +1588,7 @@ static int svm_set_cr0(unsigned long val
         if ( v->arch.hvm_svm.cpu_cr3 ) {
             put_page(mfn_to_page(get_mfn_from_gpfn(
                       v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
-            v->arch.guest_table = mk_pagetable(0);
+            v->arch.guest_table = pagetable_null();
         }
 
     /*
@@ -1599,7 +1597,7 @@ static int svm_set_cr0(unsigned long val
      * created.
      */
     if ((value & X86_CR0_PE) == 0) {
-       if (value & X86_CR0_PG) {
+        if (value & X86_CR0_PG) {
             svm_inject_exception(v, TRAP_gp_fault, 1, 0);
             return 0;
         }
@@ -1740,7 +1738,7 @@ static int mov_to_cr(int gpreg, int cr, 
             }
 
             old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-            v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+            v->arch.guest_table = pagetable_from_pfn(mfn);
 
             if (old_base_mfn)
                 put_page(mfn_to_page(old_base_mfn));
@@ -1797,7 +1795,7 @@ static int mov_to_cr(int gpreg, int cr, 
                  * Now arch.guest_table points to machine physical.
                  */
 
-                v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+                v->arch.guest_table = pagetable_from_pfn(mfn);
                 update_pagetables(v);
 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Wed Jun 07 11:03:51 2006 +0100
@@ -66,7 +66,7 @@ void vmx_final_setup_guest(struct vcpu *
 
         /* Initialize monitor page table */
         for_each_vcpu(d, vc)
-            vc->arch.monitor_table = mk_pagetable(0);
+            vc->arch.monitor_table = pagetable_null();
 
         /*
          * Required to do this once per domain
@@ -1223,7 +1223,7 @@ vmx_world_restore(struct vcpu *v, struct
         if(!get_page(mfn_to_page(mfn), v->domain))
                 return 0;
         old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-        v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+        v->arch.guest_table = pagetable_from_pfn(mfn);
         if (old_base_mfn)
              put_page(mfn_to_page(old_base_mfn));
         /*
@@ -1459,7 +1459,7 @@ static int vmx_set_cr0(unsigned long val
         /*
          * Now arch.guest_table points to machine physical.
          */
-        v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+        v->arch.guest_table = pagetable_from_pfn(mfn);
         update_pagetables(v);
 
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
@@ -1477,7 +1477,7 @@ static int vmx_set_cr0(unsigned long val
         if ( v->arch.hvm_vmx.cpu_cr3 ) {
             put_page(mfn_to_page(get_mfn_from_gpfn(
                       v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
-            v->arch.guest_table = mk_pagetable(0);
+            v->arch.guest_table = pagetable_null();
         }
 
     /*
@@ -1635,7 +1635,7 @@ static int mov_to_cr(int gp, int cr, str
                 domain_crash_synchronous(); /* need to take a clean path */
             }
             old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-            v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+            v->arch.guest_table = pagetable_from_pfn(mfn);
             if (old_base_mfn)
                 put_page(mfn_to_page(old_base_mfn));
             /*
@@ -1690,7 +1690,7 @@ static int mov_to_cr(int gp, int cr, str
                  * Now arch.guest_table points to machine physical.
                  */
 
-                v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT);
+                v->arch.guest_table = pagetable_from_pfn(mfn);
                 update_pagetables(v);
 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
@@ -1970,7 +1970,6 @@ static inline void vmx_vmexit_do_extint(
         __hvm_bug(regs);
 
     vector &= INTR_INFO_VECTOR_MASK;
-    local_irq_disable();
     TRACE_VMEXIT(1,vector);
 
     switch(vector) {
@@ -2065,30 +2064,33 @@ asmlinkage void vmx_vmexit_handler(struc
     struct vcpu *v = current;
     int error;
 
-    if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
-        __hvm_bug(&regs);
+    error = __vmread(VM_EXIT_REASON, &exit_reason);
+    BUG_ON(error);
 
     perfc_incra(vmexits, exit_reason);
 
-    /* don't bother H/W interrutps */
-    if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
-        exit_reason != EXIT_REASON_VMCALL &&
-        exit_reason != EXIT_REASON_IO_INSTRUCTION) 
+    if ( (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT) &&
+         (exit_reason != EXIT_REASON_VMCALL) &&
+         (exit_reason != EXIT_REASON_IO_INSTRUCTION) )
         HVM_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
 
-    if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
+    if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT )
+        local_irq_enable();
+
+    if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
+    {
         printk("Failed vm entry (reason 0x%x)\n", exit_reason);
         printk("*********** VMCS Area **************\n");
         vmcs_dump_vcpu();
         printk("**************************************\n");
         domain_crash_synchronous();
-        return;
     }
 
     __vmread(GUEST_RIP, &eip);
     TRACE_VMEXIT(0,exit_reason);
 
-    switch (exit_reason) {
+    switch ( exit_reason )
+    {
     case EXIT_REASON_EXCEPTION_NMI:
     {
         /*
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/vmx/x86_32/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_32/exits.S       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S       Wed Jun 07 11:03:51 2006 +0100
@@ -55,29 +55,26 @@
  * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
  */
 
-#define HVM_MONITOR_EFLAGS     0x202 /* IF on */
 #define NR_SKIPPED_REGS        6       /* See the above explanation */
-#define HVM_SAVE_ALL_NOSEGREGS \
-        pushl $HVM_MONITOR_EFLAGS; \
-        popf; \
-        subl $(NR_SKIPPED_REGS*4), %esp; \
+#define HVM_SAVE_ALL_NOSEGREGS                                              \
+        subl $(NR_SKIPPED_REGS*4), %esp;                                    \
         movl $0, 0xc(%esp);  /* XXX why do we need to force eflags==0 ?? */ \
-        pushl %eax; \
-        pushl %ebp; \
-        pushl %edi; \
-        pushl %esi; \
-        pushl %edx; \
-        pushl %ecx; \
+        pushl %eax;                                                         \
+        pushl %ebp;                                                         \
+        pushl %edi;                                                         \
+        pushl %esi;                                                         \
+        pushl %edx;                                                         \
+        pushl %ecx;                                                         \
         pushl %ebx;
 
-#define HVM_RESTORE_ALL_NOSEGREGS   \
-        popl %ebx;  \
-        popl %ecx;  \
-        popl %edx;  \
-        popl %esi;  \
-        popl %edi;  \
-        popl %ebp;  \
-        popl %eax;  \
+#define HVM_RESTORE_ALL_NOSEGREGS               \
+        popl %ebx;                              \
+        popl %ecx;                              \
+        popl %edx;                              \
+        popl %esi;                              \
+        popl %edi;                              \
+        popl %ebp;                              \
+        popl %eax;                              \
         addl $(NR_SKIPPED_REGS*4), %esp
 
         ALIGN
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/vmx/x86_64/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_64/exits.S       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S       Wed Jun 07 11:03:51 2006 +0100
@@ -51,45 +51,42 @@
  * (2/1)  u32 entry_vector;
  * (1/1)  u32 error_code;
  */
-#define HVM_MONITOR_RFLAGS     0x202 /* IF on */
 #define NR_SKIPPED_REGS        6       /* See the above explanation */
-#define HVM_SAVE_ALL_NOSEGREGS \
-        pushq $HVM_MONITOR_RFLAGS; \
-        popfq; \
-        subq $(NR_SKIPPED_REGS*8), %rsp; \
-        pushq %rdi; \
-        pushq %rsi; \
-        pushq %rdx; \
-        pushq %rcx; \
-        pushq %rax; \
-        pushq %r8;  \
-        pushq %r9;  \
-        pushq %r10; \
-        pushq %r11; \
-        pushq %rbx; \
-        pushq %rbp; \
-        pushq %r12; \
-        pushq %r13; \
-        pushq %r14; \
-        pushq %r15; \
+#define HVM_SAVE_ALL_NOSEGREGS                  \
+        subq $(NR_SKIPPED_REGS*8), %rsp;        \
+        pushq %rdi;                             \
+        pushq %rsi;                             \
+        pushq %rdx;                             \
+        pushq %rcx;                             \
+        pushq %rax;                             \
+        pushq %r8;                              \
+        pushq %r9;                              \
+        pushq %r10;                             \
+        pushq %r11;                             \
+        pushq %rbx;                             \
+        pushq %rbp;                             \
+        pushq %r12;                             \
+        pushq %r13;                             \
+        pushq %r14;                             \
+        pushq %r15;
 
-#define HVM_RESTORE_ALL_NOSEGREGS \
-        popq %r15; \
-        popq %r14; \
-        popq %r13; \
-        popq %r12; \
-        popq %rbp; \
-        popq %rbx; \
-        popq %r11; \
-        popq %r10; \
-        popq %r9;  \
-        popq %r8;  \
-        popq %rax; \
-        popq %rcx; \
-        popq %rdx; \
-        popq %rsi; \
-        popq %rdi; \
-        addq $(NR_SKIPPED_REGS*8), %rsp; \
+#define HVM_RESTORE_ALL_NOSEGREGS               \
+        popq %r15;                              \
+        popq %r14;                              \
+        popq %r13;                              \
+        popq %r12;                              \
+        popq %rbp;                              \
+        popq %rbx;                              \
+        popq %r11;                              \
+        popq %r10;                              \
+        popq %r9;                               \
+        popq %r8;                               \
+        popq %rax;                              \
+        popq %rcx;                              \
+        popq %rdx;                              \
+        popq %rsi;                              \
+        popq %rdi;                              \
+        addq $(NR_SKIPPED_REGS*8), %rsp;
 
 ENTRY(vmx_asm_vmexit_handler)
         /* selectors are restored/saved by VMX */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/i8259.c      Wed Jun 07 11:03:51 2006 +0100
@@ -102,7 +102,7 @@ BUILD_SMP_INTERRUPT(thermal_interrupt,TH
  * moves to arch independent land
  */
 
-spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(i8259A_lock);
 
 static void disable_8259A_vector(unsigned int vector)
 {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/microcode.c  Wed Jun 07 11:03:51 2006 +0100
@@ -83,7 +83,7 @@
 #include <asm/processor.h>
 
 #define pr_debug(x...) ((void)0)
-#define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED
+#define DECLARE_MUTEX(_m) DEFINE_SPINLOCK(_m)
 #define down(_m) spin_lock(_m)
 #define up(_m) spin_unlock(_m)
 #define vmalloc(_s) xmalloc_bytes(_s)
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/mm.c Wed Jun 07 11:03:51 2006 +0100
@@ -89,6 +89,7 @@
 #include <xen/kernel.h>
 #include <xen/lib.h>
 #include <xen/mm.h>
+#include <xen/domain.h>
 #include <xen/sched.h>
 #include <xen/errno.h>
 #include <xen/perfc.h>
@@ -187,20 +188,16 @@ void arch_init_memory(void)
      * Any Xen-heap pages that we will allow to be mapped will have
      * their domain field set to dom_xen.
      */
-    dom_xen = alloc_domain();
-    spin_lock_init(&dom_xen->page_alloc_lock);
-    atomic_set(&dom_xen->refcnt, 1);
-    dom_xen->domain_id = DOMID_XEN;
+    dom_xen = alloc_domain(DOMID_XEN);
+    BUG_ON(dom_xen == NULL);
 
     /*
      * Initialise our DOMID_IO domain.
      * This domain owns I/O pages that are within the range of the page_info
      * array. Mappings occur at the priv of the caller.
      */
-    dom_io = alloc_domain();
-    spin_lock_init(&dom_io->page_alloc_lock);
-    atomic_set(&dom_io->refcnt, 1);
-    dom_io->domain_id = DOMID_IO;
+    dom_io = alloc_domain(DOMID_IO);
+    BUG_ON(dom_io == NULL);
 
     /* First 1MB of RAM is historically marked as I/O. */
     for ( i = 0; i < 0x100; i++ )
@@ -999,6 +996,21 @@ static int alloc_l3_table(struct page_in
     int            i;
 
     ASSERT(!shadow_mode_refcounts(d));
+
+#ifdef CONFIG_X86_PAE
+    /*
+     * PAE pgdirs above 4GB are unacceptable if the guest does not understand
+     * the weird 'extended cr3' format for dealing with high-order address
+     * bits. We cut some slack for control tools (before vcpu0 is initialised).
+     */
+    if ( (pfn >= 0x100000) &&
+         unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) &&
+         d->vcpu[0] && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
+    {
+        MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
+        return 0;
+    }
+#endif
 
     pl3e = map_domain_page(pfn);
     for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
@@ -1717,7 +1729,7 @@ int new_guest_cr3(unsigned long mfn)
         {
             /* Switch to idle pagetable: this VCPU has no active p.t. now. */
             old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-            v->arch.guest_table = mk_pagetable(0);
+            v->arch.guest_table = pagetable_null();
             update_pagetables(v);
             write_cr3(__pa(idle_pg_table));
             if ( old_base_mfn != 0 )
@@ -1739,7 +1751,7 @@ int new_guest_cr3(unsigned long mfn)
     invalidate_shadow_ldt(v);
 
     old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-    v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+    v->arch.guest_table = pagetable_from_pfn(mfn);
     update_pagetables(v); /* update shadow_table and monitor_table */
 
     write_ptbase(v);
@@ -2006,7 +2018,7 @@ int do_mmuext_op(
             {
                 unsigned long old_mfn =
                     pagetable_get_pfn(v->arch.guest_table_user);
-                v->arch.guest_table_user = mk_pagetable(mfn << PAGE_SHIFT);
+                v->arch.guest_table_user = pagetable_from_pfn(mfn);
                 if ( old_mfn != 0 )
                     put_page_and_type(mfn_to_page(old_mfn));
             }
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/setup.c      Wed Jun 07 11:03:51 2006 +0100
@@ -85,8 +85,6 @@ extern void early_cpu_init(void);
 
 struct tss_struct init_tss[NR_CPUS];
 
-struct vcpu *idle_vcpu[NR_CPUS];
-
 extern unsigned long cpu0_stack[];
 
 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/shadow.c     Wed Jun 07 11:03:51 2006 +0100
@@ -2472,7 +2472,7 @@ static void shadow_update_pagetables(str
     if ( !get_shadow_ref(smfn) )
         BUG();
     old_smfn = pagetable_get_pfn(v->arch.shadow_table);
-    v->arch.shadow_table = mk_pagetable((u64)smfn << PAGE_SHIFT);
+    v->arch.shadow_table = pagetable_from_pfn(smfn);
     if ( old_smfn )
         put_shadow_ref(old_smfn);
 
@@ -3481,15 +3481,16 @@ static void shadow_set_l2e_64(unsigned l
 
     __shadow_get_l3e(v, va, &sl3e);
     if (!(l3e_get_flags(sl3e) & _PAGE_PRESENT)) {
-         if (create_l2_shadow) {
+        if (create_l2_shadow) {
             perfc_incrc(shadow_set_l2e_force_map);
             shadow_map_into_current(v, va, PAGING_L2, PAGING_L3);
             __shadow_get_l3e(v, va, &sl3e);
         } else {
             printk("For non HVM shadow, create_l1_shadow:%d\n", 
create_l2_shadow);
         }
-         shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va));
-
+
+        if ( v->domain->arch.ops->guest_paging_levels == PAGING_L4 )
+            shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va));
     }
 
     if ( put_ref_check ) {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/shadow32.c   Wed Jun 07 11:03:51 2006 +0100
@@ -583,7 +583,7 @@ static void free_shadow_pages(struct dom
         if ( pagetable_get_paddr(v->arch.shadow_table) )
         {
             put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
-            v->arch.shadow_table = mk_pagetable(0);
+            v->arch.shadow_table = pagetable_null();
 
             if ( shadow_mode_external(d) )
             {
@@ -765,7 +765,7 @@ static void alloc_monitor_pagetable(stru
     mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
     mpl2e[l2_table_offset(RO_MPT_VIRT_START)] = l2e_empty();
 
-    v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
+    v->arch.monitor_table = pagetable_from_pfn(mmfn);
     v->arch.monitor_vtable = mpl2e;
 
     if ( v->vcpu_id == 0 )
@@ -830,7 +830,7 @@ void free_monitor_pagetable(struct vcpu 
     unmap_domain_page_global(v->arch.monitor_vtable);
     free_domheap_page(mfn_to_page(mfn));
 
-    v->arch.monitor_table = mk_pagetable(0);
+    v->arch.monitor_table = pagetable_null();
     v->arch.monitor_vtable = 0;
 }
 
@@ -992,7 +992,7 @@ alloc_p2m_table(struct domain *d)
 
         l1tab = map_domain_page(page_to_mfn(page));
         memset(l1tab, 0, PAGE_SIZE);
-        d->arch.phys_table = mk_pagetable(page_to_maddr(page));
+        d->arch.phys_table = pagetable_from_page(page);
     }
 
     list_ent = d->page_list.next;
@@ -1126,7 +1126,7 @@ int shadow_direct_map_init(struct domain
     memset(root, 0, PAGE_SIZE);
     unmap_domain_page(root);
 
-    d->arch.phys_table = mk_pagetable(page_to_maddr(page));
+    d->arch.phys_table = pagetable_from_page(page);
 
     return 1;
 }
@@ -1156,7 +1156,7 @@ void shadow_direct_map_clean(struct doma
 
     unmap_domain_page(l2e);
 
-    d->arch.phys_table = mk_pagetable(0);
+    d->arch.phys_table = pagetable_null();
 }
 
 int __shadow_mode_enable(struct domain *d, unsigned int mode)
@@ -3231,7 +3231,7 @@ void __update_pagetables(struct vcpu *v)
     if ( !get_shadow_ref(smfn) )
         BUG();
     old_smfn = pagetable_get_pfn(v->arch.shadow_table);
-    v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
+    v->arch.shadow_table = pagetable_from_pfn(smfn);
     if ( old_smfn )
         put_shadow_ref(old_smfn);
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/shadow_public.c      Wed Jun 07 11:03:51 2006 +0100
@@ -50,7 +50,7 @@ int shadow_direct_map_init(struct domain
     memset(root, 0, PAGE_SIZE);
     root[PAE_SHADOW_SELF_ENTRY] = l3e_from_page(page, __PAGE_HYPERVISOR);
 
-    d->arch.phys_table = mk_pagetable(page_to_maddr(page));
+    d->arch.phys_table = pagetable_from_page(page);
 
     unmap_domain_page(root);
     return 1;
@@ -92,7 +92,7 @@ void shadow_direct_map_clean(struct doma
 
     unmap_domain_page(l3e);
 
-    d->arch.phys_table = mk_pagetable(0);
+    d->arch.phys_table = pagetable_null();
 }
 
 /****************************************************************************/
@@ -338,7 +338,7 @@ static void alloc_monitor_pagetable(stru
 
     /* map the phys_to_machine map into the per domain Read-Only MPT space */
 
-    v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
+    v->arch.monitor_table = pagetable_from_pfn(mmfn);
     v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e;
     mpl4e[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
 
@@ -380,7 +380,7 @@ void free_monitor_pagetable(struct vcpu 
     unmap_domain_page_global(v->arch.monitor_vtable);
     free_domheap_page(mfn_to_page(mfn));
 
-    v->arch.monitor_table = mk_pagetable(0);
+    v->arch.monitor_table = pagetable_null();
     v->arch.monitor_vtable = 0;
 }
 #elif CONFIG_PAGING_LEVELS == 3
@@ -431,7 +431,7 @@ static void alloc_monitor_pagetable(stru
     for ( i = 0; i < (MACHPHYS_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
         mpl2e[l2_table_offset(RO_MPT_VIRT_START) + i] = l2e_empty();
 
-    v->arch.monitor_table = mk_pagetable(m3mfn << PAGE_SHIFT); /* < 4GB */
+    v->arch.monitor_table = pagetable_from_pfn(m3mfn);
     v->arch.monitor_vtable = (l2_pgentry_t *) mpl3e;
 
     if ( v->vcpu_id == 0 )
@@ -492,7 +492,7 @@ void free_monitor_pagetable(struct vcpu 
     unmap_domain_page_global(v->arch.monitor_vtable);
     free_domheap_page(mfn_to_page(m3mfn));
 
-    v->arch.monitor_table = mk_pagetable(0);
+    v->arch.monitor_table = pagetable_null();
     v->arch.monitor_vtable = 0;
 }
 #endif
@@ -924,7 +924,7 @@ void free_shadow_pages(struct domain *d)
         if ( pagetable_get_paddr(v->arch.shadow_table) )
         {
             put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
-            v->arch.shadow_table = mk_pagetable(0);
+            v->arch.shadow_table = pagetable_null();
 
             if ( shadow_mode_external(d) )
             {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/smp.c        Wed Jun 07 11:03:51 2006 +0100
@@ -161,7 +161,7 @@ void send_IPI_mask_phys(cpumask_t mask, 
     local_irq_restore(flags);
 }
 
-static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(flush_lock);
 static cpumask_t flush_cpumask;
 static unsigned long flush_va;
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/smpboot.c    Wed Jun 07 11:03:51 2006 +0100
@@ -37,6 +37,7 @@
 #include <xen/init.h>
 #include <xen/kernel.h>
 #include <xen/mm.h>
+#include <xen/domain.h>
 #include <xen/sched.h>
 #include <xen/irq.h>
 #include <xen/delay.h>
@@ -886,28 +887,16 @@ static int __devinit do_boot_cpu(int api
        int timeout;
        unsigned long start_eip;
        unsigned short nmi_high = 0, nmi_low = 0;
-       struct domain *d;
        struct vcpu *v;
-       int vcpu_id;
 
        ++cpucount;
 
        booting_cpu = cpu;
 
-       if ((vcpu_id = cpu % MAX_VIRT_CPUS) == 0) {
-               d = domain_create(IDLE_DOMAIN_ID, cpu);
-               BUG_ON(d == NULL);
-               v = d->vcpu[0];
-       } else {
-               d = idle_vcpu[cpu - vcpu_id]->domain;
-               BUG_ON(d == NULL);
-               v = alloc_vcpu(d, vcpu_id, cpu);
-       }
-
-       idle_vcpu[cpu] = v;
+       v = alloc_idle_vcpu(cpu);
        BUG_ON(v == NULL);
 
-       v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+       v->arch.monitor_table = pagetable_from_paddr(__pa(idle_pg_table));
 
        /* start_eip had better be page-aligned! */
        start_eip = setup_trampoline();
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/time.c       Wed Jun 07 11:03:51 2006 +0100
@@ -40,10 +40,10 @@ boolean_param("hpet_force", opt_hpet_for
 
 unsigned long cpu_khz;  /* CPU clock frequency in kHz. */
 unsigned long hpet_address;
-spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(rtc_lock);
 unsigned long volatile jiffies;
 static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
-static spinlock_t wc_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(wc_lock);
 
 struct time_scale {
     int shift;
@@ -67,7 +67,7 @@ static s_time_t stime_platform_stamp;
 static s_time_t stime_platform_stamp;
 static u64 platform_timer_stamp;
 static struct time_scale platform_timer_scale;
-static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(platform_timer_lock);
 static u64 (*read_platform_count)(void);
 
 /*
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/traps.c      Wed Jun 07 11:03:51 2006 +0100
@@ -876,7 +876,7 @@ static int emulate_privileged_op(struct 
                     PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
                 break;
             }
-            regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
+            regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
             break;
 
         case 0x6e: /* OUTSB */
@@ -902,7 +902,7 @@ static int emulate_privileged_op(struct 
                 outl_user((u32)data, (u16)regs->edx, v, regs);
                 break;
             }
-            regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
+            regs->esi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
             break;
         }
 
@@ -1034,8 +1034,8 @@ static int emulate_privileged_op(struct 
             break;
             
         case 3: /* Read CR3 */
-            *reg = pfn_to_paddr(mfn_to_gmfn(v->domain,
-                                    pagetable_get_pfn(v->arch.guest_table)));
+            *reg = xen_pfn_to_cr3(mfn_to_gmfn(
+                v->domain, pagetable_get_pfn(v->arch.guest_table)));
             break;
 
         case 4: /* Read CR4 */
@@ -1085,7 +1085,7 @@ static int emulate_privileged_op(struct 
         case 3: /* Write CR3 */
             LOCK_BIGLOCK(v->domain);
             cleanup_writable_pagetable(v->domain);
-            (void)new_guest_cr3(gmfn_to_mfn(v->domain, paddr_to_pfn(*reg)));
+            (void)new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));
             UNLOCK_BIGLOCK(v->domain);
             break;
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/asm-offsets.c
--- a/xen/arch/x86/x86_32/asm-offsets.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_32/asm-offsets.c Wed Jun 07 11:03:51 2006 +0100
@@ -64,11 +64,13 @@ void __dummy__(void)
            arch.guest_context.kernel_ss);
     OFFSET(VCPU_kernel_sp, struct vcpu,
            arch.guest_context.kernel_sp);
+    OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
     OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
     OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
     OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
     DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
     DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
+    DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
     BLANK();
 
     OFFSET(TSS_ss0, struct tss_struct, ss0);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/domain_page.c
--- a/xen/arch/x86/x86_32/domain_page.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_32/domain_page.c Wed Jun 07 11:03:51 2006 +0100
@@ -183,7 +183,7 @@ static unsigned long inuse[BITS_TO_LONGS
 static unsigned long inuse[BITS_TO_LONGS(GLOBALMAP_BITS)];
 static unsigned long garbage[BITS_TO_LONGS(GLOBALMAP_BITS)];
 static unsigned int inuse_cursor;
-static spinlock_t globalmap_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(globalmap_lock);
 
 void *map_domain_page_global(unsigned long pfn)
 {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_32/entry.S       Wed Jun 07 11:03:51 2006 +0100
@@ -130,7 +130,10 @@ failsafe_callback:
         movl  VCPU_failsafe_sel(%ebx),%eax
         movw  %ax,TRAPBOUNCE_cs(%edx)
         movw  $TBF_FAILSAFE,TRAPBOUNCE_flags(%edx)
-        call  create_bounce_frame
+        bt    $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%ebx)
+        jnc   1f
+        orw   $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
+1:      call  create_bounce_frame
         xorl  %eax,%eax
         movl  %eax,UREGS_ds(%esp)
         movl  %eax,UREGS_es(%esp)
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_32/mm.c  Wed Jun 07 11:03:51 2006 +0100
@@ -75,7 +75,8 @@ void __init paging_init(void)
     printk("PAE disabled.\n");
 #endif
 
-    idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+    idle_vcpu[0]->arch.monitor_table =
+        pagetable_from_paddr(__pa(idle_pg_table));
 
     if ( cpu_has_pge )
     {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_32/traps.c       Wed Jun 07 11:03:51 2006 +0100
@@ -346,6 +346,12 @@ static long register_guest_callback(stru
     case CALLBACKTYPE_failsafe:
         v->arch.guest_context.failsafe_callback_cs  = reg->address.cs;
         v->arch.guest_context.failsafe_callback_eip = reg->address.eip;
+        if ( reg->flags & CALLBACKF_mask_events )
+            set_bit(_VGCF_failsafe_disables_events,
+                    &v->arch.guest_context.flags);
+        else
+            clear_bit(_VGCF_failsafe_disables_events,
+                      &v->arch.guest_context.flags);
         break;
 
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_64/asm-offsets.c Wed Jun 07 11:03:51 2006 +0100
@@ -64,11 +64,14 @@ void __dummy__(void)
            arch.guest_context.syscall_callback_eip);
     OFFSET(VCPU_kernel_sp, struct vcpu,
            arch.guest_context.kernel_sp);
+    OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
     OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
     OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
     OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
     DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
     DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
+    DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
+    DEFINE(_VGCF_syscall_disables_events,  _VGCF_syscall_disables_events);
     BLANK();
 
     OFFSET(VCPU_svm_vmcb_pa, struct vcpu, arch.hvm_svm.vmcb_pa);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_64/entry.S       Wed Jun 07 11:03:51 2006 +0100
@@ -30,7 +30,10 @@ switch_to_kernel:
         movq  VCPU_syscall_addr(%rbx),%rax
         movq  %rax,TRAPBOUNCE_eip(%rdx)
         movw  $0,TRAPBOUNCE_flags(%rdx)
-        call  create_bounce_frame
+        bt    $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
+        jnc   1f
+        orw   $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
+1:      call  create_bounce_frame
         jmp   test_all_events
 
 /* %rbx: struct vcpu, interrupts disabled */
@@ -77,7 +80,10 @@ failsafe_callback:
         movq  VCPU_failsafe_addr(%rbx),%rax
         movq  %rax,TRAPBOUNCE_eip(%rdx)
         movw  $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx)
-        call  create_bounce_frame
+        bt    $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%rbx)
+        jnc   1f
+        orw   $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
+1:      call  create_bounce_frame
         jmp   test_all_events
 .previous
 .section __pre_ex_table,"a"
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_64/mm.c  Wed Jun 07 11:03:51 2006 +0100
@@ -81,7 +81,8 @@ void __init paging_init(void)
     l2_pgentry_t *l2_ro_mpt;
     struct page_info *pg;
 
-    idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+    idle_vcpu[0]->arch.monitor_table =
+        pagetable_from_paddr(__pa(idle_pg_table));
 
     /* Create user-accessible L2 directory to map the MPT for guests. */
     l3_ro_mpt = alloc_xenheap_page();
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_64/traps.c       Wed Jun 07 11:03:51 2006 +0100
@@ -195,7 +195,7 @@ unsigned long do_iret(void)
     /* Returning to user mode? */
     if ( (iret_saved.cs & 3) == 3 )
     {
-        if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
+        if ( unlikely(pagetable_is_null(v->arch.guest_table_user)) )
         {
             DPRINTK("Guest switching to user mode with no user page tables\n");
             domain_crash_synchronous();
@@ -334,10 +334,22 @@ static long register_guest_callback(stru
 
     case CALLBACKTYPE_failsafe:
         v->arch.guest_context.failsafe_callback_eip = reg->address;
+        if ( reg->flags & CALLBACKF_mask_events )
+            set_bit(_VGCF_failsafe_disables_events,
+                    &v->arch.guest_context.flags);
+        else
+            clear_bit(_VGCF_failsafe_disables_events,
+                      &v->arch.guest_context.flags);
         break;
 
     case CALLBACKTYPE_syscall:
         v->arch.guest_context.syscall_callback_eip  = reg->address;
+        if ( reg->flags & CALLBACKF_mask_events )
+            set_bit(_VGCF_syscall_disables_events,
+                    &v->arch.guest_context.flags);
+        else
+            clear_bit(_VGCF_syscall_disables_events,
+                      &v->arch.guest_context.flags);
         break;
 
     case CALLBACKTYPE_nmi:
diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/arch/x86/x86_emulate.c        Wed Jun 07 11:03:51 2006 +0100
@@ -100,8 +100,8 @@ static uint8_t opcode_table[256] = {
     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
     /* 0x88 - 0x8F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+    ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov,
+    ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
     0, 0, 0, DstMem|SrcNone|ModRM|Mov,
     /* 0x90 - 0x9F */
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -380,11 +380,12 @@ do{ __asm__ __volatile__ (              
       ((reg) & ((1UL << (ad_bytes << 3)) - 1))))
 #define register_address_increment(reg, inc)                            \
 do {                                                                    \
+    int _inc = (inc); /* signed type ensures sign extension to long */  \
     if ( ad_bytes == sizeof(unsigned long) )                            \
-        (reg) += (inc);                                                 \
+        (reg) += _inc;                                                  \
     else                                                                \
         (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) |             \
-                (((reg) + (inc)) & ((1UL << (ad_bytes << 3)) - 1));     \
+                (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1));      \
 } while (0)
 
 void *
@@ -858,7 +859,7 @@ x86_emulate_memop(
                                          &dst.val, 8, ctxt)) != 0 )
                     goto done;
             }
-            register_address_increment(_regs.esp, -(int)dst.bytes);
+            register_address_increment(_regs.esp, -dst.bytes);
             if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
                                       dst.val, dst.bytes, ctxt)) != 0 )
                 goto done;
@@ -942,9 +943,9 @@ x86_emulate_memop(
                 goto done;
         }
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xa6 ... 0xa7: /* cmps */
         DPRINTF("Urk! I don't handle CMPS.\n");
@@ -955,7 +956,7 @@ x86_emulate_memop(
         dst.ptr   = (unsigned long *)cr2;
         dst.val   = _regs.eax;
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xac ... 0xad: /* lods */
         dst.type  = OP_REG;
@@ -964,7 +965,7 @@ x86_emulate_memop(
         if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xae ... 0xaf: /* scas */
         DPRINTF("Urk! I don't handle SCAS.\n");
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/dom0_ops.c     Wed Jun 07 11:03:51 2006 +0100
@@ -95,7 +95,7 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
     long ret = 0;
     struct dom0_op curop, *op = &curop;
     void *ssid = NULL; /* save security ptr between pre and post/fail hooks */
-    static spinlock_t dom0_lock = SPIN_LOCK_UNLOCKED;
+    static DEFINE_SPINLOCK(dom0_lock);
 
     if ( !IS_PRIV(current->domain) )
         return -EPERM;
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/domain.c
--- a/xen/common/domain.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/domain.c       Wed Jun 07 11:03:51 2006 +0100
@@ -32,22 +32,111 @@ struct domain *domain_list;
 
 struct domain *dom0;
 
-struct domain *domain_create(domid_t dom_id, unsigned int cpu)
-{
-    struct domain *d, **pd;
-    struct vcpu *v;
-
-    if ( (d = alloc_domain()) == NULL )
+struct vcpu *idle_vcpu[NR_CPUS];
+
+struct domain *alloc_domain(domid_t domid)
+{
+    struct domain *d;
+
+    if ( (d = xmalloc(struct domain)) == NULL )
         return NULL;
 
-    d->domain_id = dom_id;
-
+    memset(d, 0, sizeof(*d));
+    d->domain_id = domid;
     atomic_set(&d->refcnt, 1);
-
     spin_lock_init(&d->big_lock);
     spin_lock_init(&d->page_alloc_lock);
     INIT_LIST_HEAD(&d->page_list);
     INIT_LIST_HEAD(&d->xenpage_list);
+
+    return d;
+}
+
+
+void free_domain(struct domain *d)
+{
+    struct vcpu *v;
+    int i;
+
+    sched_destroy_domain(d);
+
+    for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
+        if ( (v = d->vcpu[i]) != NULL )
+            free_vcpu_struct(v);
+
+    xfree(d);
+}
+
+
+struct vcpu *alloc_vcpu(
+    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
+{
+    struct vcpu *v;
+
+    BUG_ON(d->vcpu[vcpu_id] != NULL);
+
+    if ( (v = alloc_vcpu_struct(d, vcpu_id)) == NULL )
+        return NULL;
+
+    v->domain = d;
+    v->vcpu_id = vcpu_id;
+    v->processor = cpu_id;
+    atomic_set(&v->pausecnt, 0);
+    v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id];
+
+    v->cpu_affinity = is_idle_domain(d) ?
+        cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
+
+    v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
+    v->runstate.state_entry_time = NOW();
+
+    if ( (vcpu_id != 0) && !is_idle_domain(d) )
+        set_bit(_VCPUF_down, &v->vcpu_flags);
+
+    if ( sched_init_vcpu(v) < 0 )
+    {
+        free_vcpu_struct(v);
+        return NULL;
+    }
+
+    d->vcpu[vcpu_id] = v;
+    if ( vcpu_id != 0 )
+        d->vcpu[v->vcpu_id-1]->next_in_list = v;
+
+    return v;
+}
+
+struct vcpu *alloc_idle_vcpu(unsigned int cpu_id)
+{
+    struct domain *d;
+    struct vcpu *v;
+    unsigned int vcpu_id;
+
+    if ((vcpu_id = cpu_id % MAX_VIRT_CPUS) == 0)
+    {
+        d = domain_create(IDLE_DOMAIN_ID, cpu_id);
+        BUG_ON(d == NULL);
+        v = d->vcpu[0];
+    }
+    else
+    {
+        d = idle_vcpu[cpu_id - vcpu_id]->domain;
+        BUG_ON(d == NULL);
+        v = alloc_vcpu(d, vcpu_id, cpu_id);
+    }
+
+    idle_vcpu[cpu_id] = v;
+
+    return v;
+}
+
+struct domain *domain_create(domid_t domid, unsigned int cpu)
+{
+    struct domain *d, **pd;
+    struct vcpu *v;
+
+    if ( (d = alloc_domain(domid)) == NULL )
+        return NULL;
 
     rangeset_domain_initialise(d);
 
@@ -74,14 +163,14 @@ struct domain *domain_create(domid_t dom
     if ( !is_idle_domain(d) )
     {
         write_lock(&domlist_lock);
-        pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
+        pd = &domain_list; /* NB. domain_list maintained in order of domid. */
         for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
             if ( (*pd)->domain_id > d->domain_id )
                 break;
         d->next_in_list = *pd;
         *pd = d;
-        d->next_in_hashbucket = domain_hash[DOMAIN_HASH(dom_id)];
-        domain_hash[DOMAIN_HASH(dom_id)] = d;
+        d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
+        domain_hash[DOMAIN_HASH(domid)] = d;
         write_unlock(&domlist_lock);
     }
 
@@ -126,19 +215,16 @@ struct domain *find_domain_by_id(domid_t
 
 void domain_kill(struct domain *d)
 {
-    struct vcpu *v;
-
     domain_pause(d);
-    if ( !test_and_set_bit(_DOMF_dying, &d->domain_flags) )
-    {
-        for_each_vcpu(d, v)
-            sched_rem_domain(v);
-        gnttab_release_mappings(d);
-        domain_relinquish_resources(d);
-        put_domain(d);
-
-        send_guest_global_virq(dom0, VIRQ_DOM_EXC);
-    }
+
+    if ( test_and_set_bit(_DOMF_dying, &d->domain_flags) )
+        return;
+
+    gnttab_release_mappings(d);
+    domain_relinquish_resources(d);
+    put_domain(d);
+
+    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
 }
 
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/kernel.c
--- a/xen/common/kernel.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/kernel.c       Wed Jun 07 11:03:51 2006 +0100
@@ -184,6 +184,7 @@ long do_xen_version(int cmd, XEN_GUEST_H
     case XENVER_get_features:
     {
         xen_feature_info_t fi;
+        struct domain *d = current->domain;
 
         if ( copy_from_guest(&fi, arg, 1) )
             return -EFAULT;
@@ -191,7 +192,9 @@ long do_xen_version(int cmd, XEN_GUEST_H
         switch ( fi.submap_idx )
         {
         case 0:
-            fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb);
+            fi.submap = 0;
+            if ( VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3) )
+                fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb);
             if ( shadow_mode_translate(current->domain) )
                 fi.submap |= 
                     (1U << XENFEAT_writable_page_tables) |
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/keyhandler.c   Wed Jun 07 11:03:51 2006 +0100
@@ -128,11 +128,12 @@ static void dump_domains(unsigned char k
                d->domain_flags, atomic_read(&d->refcnt),
                d->tot_pages, d->xenheap_pages, cpuset);
         printk("    handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
-               "%02x%02x-%02x%02x%02x%02x%02x%02x\n",
+               "%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n",
                d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3],
                d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7],
                d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
-               d->handle[12], d->handle[13], d->handle[14], d->handle[15]);
+               d->handle[12], d->handle[13], d->handle[14], d->handle[15],
+               d->vm_assist);
 
         arch_dump_domain_info(d);
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/memory.c
--- a/xen/common/memory.c       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/memory.c       Wed Jun 07 11:03:51 2006 +0100
@@ -31,14 +31,15 @@ static long
 static long
 increase_reservation(
     struct domain *d, 
-    XEN_GUEST_HANDLE(ulong) extent_list,
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
     unsigned int   nr_extents,
     unsigned int   extent_order,
     unsigned int   flags,
     int           *preempted)
 {
     struct page_info *page;
-    unsigned long     i, mfn;
+    unsigned long i;
+    xen_pfn_t mfn;
 
     if ( !guest_handle_is_null(extent_list) &&
          !guest_handle_okay(extent_list, nr_extents) )
@@ -80,14 +81,16 @@ static long
 static long
 populate_physmap(
     struct domain *d, 
-    XEN_GUEST_HANDLE(ulong) extent_list,
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
     unsigned int  nr_extents,
     unsigned int  extent_order,
     unsigned int  flags,
     int          *preempted)
 {
     struct page_info *page;
-    unsigned long    i, j, gpfn, mfn;
+    unsigned long i, j;
+    xen_pfn_t gpfn;
+    xen_pfn_t mfn;
 
     if ( !guest_handle_okay(extent_list, nr_extents) )
         return 0;
@@ -177,13 +180,14 @@ static long
 static long
 decrease_reservation(
     struct domain *d,
-    XEN_GUEST_HANDLE(ulong) extent_list,
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
     unsigned int   nr_extents,
     unsigned int   extent_order,
     unsigned int   flags,
     int           *preempted)
 {
-    unsigned long    i, j, gmfn;
+    unsigned long i, j;
+    xen_pfn_t gmfn;
 
     if ( !guest_handle_okay(extent_list, nr_extents) )
         return 0;
@@ -214,7 +218,9 @@ translate_gpfn_list(
     XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
 {
     struct xen_translate_gpfn_list op;
-    unsigned long i, gpfn, mfn;
+    unsigned long i;
+    xen_pfn_t gpfn;
+    xen_pfn_t mfn;
     struct domain *d;
 
     if ( copy_from_guest(&op, uop, 1) )
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/page_alloc.c   Wed Jun 07 11:03:51 2006 +0100
@@ -59,7 +59,7 @@ custom_param("lowmem_emergency_pool", pa
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 
-static spinlock_t page_scrub_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(page_scrub_lock);
 LIST_HEAD(page_scrub_list);
 
 /*********************
@@ -250,7 +250,7 @@ static struct list_head heap[NR_ZONES][M
 
 static unsigned long avail[NR_ZONES];
 
-static spinlock_t heap_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(heap_lock);
 
 void end_boot_allocator(void)
 {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/perfc.c
--- a/xen/common/perfc.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/perfc.c        Wed Jun 07 11:03:51 2006 +0100
@@ -209,7 +209,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
 /* Dom0 control of perf counters */
 int perfc_control(dom0_perfccontrol_t *pc)
 {
-    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+    static DEFINE_SPINLOCK(lock);
     u32 op = pc->op;
     int rc;
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/sched_bvt.c
--- a/xen/common/sched_bvt.c    Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/sched_bvt.c    Wed Jun 07 11:03:51 2006 +0100
@@ -160,15 +160,14 @@ static inline u32 calc_evt(struct vcpu *
 }
 
 /**
- * bvt_alloc_task - allocate BVT private structures for a task
- * @p:              task to allocate private structures for
- *
+ * bvt_init_vcpu - allocate BVT private structures for a VCPU.
  * Returns non-zero on failure.
  */
-static int bvt_alloc_task(struct vcpu *v)
+static int bvt_init_vcpu(struct vcpu *v)
 {
     struct domain *d = v->domain;
     struct bvt_dom_info *inf;
+    struct bvt_vcpu_info *einf;
 
     if ( (d->sched_priv == NULL) )
     {
@@ -199,15 +198,7 @@ static int bvt_alloc_task(struct vcpu *v
         init_timer(&inf->unwarp_timer, unwarp_timer_fn, inf, v->processor);
     }
 
-    return 0;
-}
-
-/*
- * Add and remove a domain
- */
-static void bvt_add_task(struct vcpu *v) 
-{
-    struct bvt_vcpu_info *einf = EBVT_INFO(v);
+    einf = EBVT_INFO(v);
 
     /* Allocate per-CPU context if this is the first domain to be added. */
     if ( CPU_INFO(v->processor) == NULL )
@@ -223,13 +214,15 @@ static void bvt_add_task(struct vcpu *v)
         einf->avt = einf->evt = ~0U;
         BUG_ON(__task_on_runqueue(v));
         __add_to_runqueue_head(v);
-    } 
+    }
     else 
     {
         /* Set avt and evt to system virtual time. */
         einf->avt = CPU_SVT(v->processor);
         einf->evt = CPU_SVT(v->processor);
     }
+
+    return 0;
 }
 
 static void bvt_wake(struct vcpu *v)
@@ -298,10 +291,9 @@ static int bvt_set_affinity(struct vcpu 
 
 
 /**
- * bvt_free_task - free BVT private structures for a task
- * @d:             task
- */
-static void bvt_free_task(struct domain *d)
+ * bvt_destroy_domain - free BVT private structures for a domain.
+ */
+static void bvt_destroy_domain(struct domain *d)
 {
     struct bvt_dom_info *inf = BVT_INFO(d);
 
@@ -568,10 +560,10 @@ struct scheduler sched_bvt_def = {
     .name     = "Borrowed Virtual Time",
     .opt_name = "bvt",
     .sched_id = SCHED_BVT,
-    
-    .alloc_task     = bvt_alloc_task,
-    .add_task       = bvt_add_task,
-    .free_task      = bvt_free_task,
+
+    .init_vcpu      = bvt_init_vcpu,
+    .destroy_domain = bvt_destroy_domain,
+
     .do_schedule    = bvt_do_schedule,
     .control        = bvt_ctl,
     .adjdom         = bvt_adjdom,
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/sched_credit.c Wed Jun 07 11:03:51 2006 +0100
@@ -75,14 +75,13 @@
     } while ( 0 );
 
 #define CSCHED_STATS_EXPAND_SCHED(_MACRO)   \
-    _MACRO(vcpu_alloc)                      \
-    _MACRO(vcpu_add)                        \
+    _MACRO(vcpu_init)                       \
     _MACRO(vcpu_sleep)                      \
     _MACRO(vcpu_wake_running)               \
     _MACRO(vcpu_wake_onrunq)                \
     _MACRO(vcpu_wake_runnable)              \
     _MACRO(vcpu_wake_not_runnable)          \
-    _MACRO(dom_free)                        \
+    _MACRO(dom_destroy)                     \
     _MACRO(schedule)                        \
     _MACRO(tickle_local_idler)              \
     _MACRO(tickle_local_over)               \
@@ -429,14 +428,14 @@ __csched_vcpu_acct_idle_locked(struct cs
 }
 
 static int
-csched_vcpu_alloc(struct vcpu *vc)
+csched_vcpu_init(struct vcpu *vc)
 {
     struct domain * const dom = vc->domain;
     struct csched_dom *sdom;
     struct csched_vcpu *svc;
     int16_t pri;
 
-    CSCHED_STAT_CRANK(vcpu_alloc);
+    CSCHED_STAT_CRANK(vcpu_init);
 
     /* Allocate, if appropriate, per-domain info */
     if ( is_idle_vcpu(vc) )
@@ -489,19 +488,13 @@ csched_vcpu_alloc(struct vcpu *vc)
     if ( likely(sdom != NULL) )
         csched_vcpu_acct(svc, 0);
 
-    return 0;
-}
-
-static void
-csched_vcpu_add(struct vcpu *vc) 
-{
-    CSCHED_STAT_CRANK(vcpu_add);
-
     /* Allocate per-PCPU info */
     if ( unlikely(!CSCHED_PCPU(vc->processor)) )
         csched_pcpu_init(vc->processor);
 
     CSCHED_VCPU_CHECK(vc);
+
+    return 0;
 }
 
 static void
@@ -644,12 +637,12 @@ csched_dom_cntl(
 }
 
 static void
-csched_dom_free(struct domain *dom)
+csched_dom_destroy(struct domain *dom)
 {
     struct csched_dom * const sdom = CSCHED_DOM(dom);
     int i;
 
-    CSCHED_STAT_CRANK(dom_free);
+    CSCHED_STAT_CRANK(dom_destroy);
 
     for ( i = 0; i < MAX_VIRT_CPUS; i++ )
     {
@@ -1215,14 +1208,15 @@ struct scheduler sched_credit_def = {
     .opt_name       = "credit",
     .sched_id       = SCHED_CREDIT,
 
-    .alloc_task     = csched_vcpu_alloc,
-    .add_task       = csched_vcpu_add,
+    .init_vcpu      = csched_vcpu_init,
+    .destroy_domain = csched_dom_destroy,
+
     .sleep          = csched_vcpu_sleep,
     .wake           = csched_vcpu_wake,
+
     .set_affinity   = csched_vcpu_set_affinity,
 
     .adjdom         = csched_dom_cntl,
-    .free_task      = csched_dom_free,
 
     .tick           = csched_tick,
     .do_schedule    = csched_schedule,
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/sched_sedf.c   Wed Jun 07 11:03:51 2006 +0100
@@ -328,11 +328,9 @@ static inline void __add_to_runqueue_sor
 }
 
 
-/* Allocates memory for per domain private scheduling data*/
-static int sedf_alloc_task(struct vcpu *v)
-{
-    PRINT(2, "sedf_alloc_task was called, domain-id %i.%i\n",
-          v->domain->domain_id, v->vcpu_id);
+static int sedf_init_vcpu(struct vcpu *v)
+{
+    struct sedf_vcpu_info *inf;
 
     if ( v->domain->sched_priv == NULL )
     {
@@ -344,23 +342,11 @@ static int sedf_alloc_task(struct vcpu *
 
     if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
         return -1;
-
     memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info));
 
-    return 0;
-}
-
-
-/* Setup the sedf_dom_info */
-static void sedf_add_task(struct vcpu *v)
-{
-    struct sedf_vcpu_info *inf = EDOM_INFO(v);
-
+    inf = EDOM_INFO(v);
     inf->vcpu = v;
  
-    PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",
-          v->domain->domain_id, v->vcpu_id);
-
     /* Allocate per-CPU context if this is the first domain to be added. */
     if ( unlikely(schedule_data[v->processor].sched_priv == NULL) )
     {
@@ -408,14 +394,13 @@ static void sedf_add_task(struct vcpu *v
         EDOM_INFO(v)->deadl_abs = 0;
         EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
     }
-}
-
-/* Frees memory used by domain info */
-static void sedf_free_task(struct domain *d)
+
+    return 0;
+}
+
+static void sedf_destroy_domain(struct domain *d)
 {
     int i;
-
-    PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
 
     xfree(d->sched_priv);
  
@@ -1452,9 +1437,9 @@ struct scheduler sched_sedf_def = {
     .opt_name = "sedf",
     .sched_id = SCHED_SEDF,
     
-    .alloc_task     = sedf_alloc_task,
-    .add_task       = sedf_add_task,
-    .free_task      = sedf_free_task,
+    .init_vcpu      = sedf_init_vcpu,
+    .destroy_domain = sedf_destroy_domain,
+
     .do_schedule    = sedf_do_schedule,
     .dump_cpu_state = sedf_dump_cpu_state,
     .sleep          = sedf_sleep,
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/schedule.c
--- a/xen/common/schedule.c     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/schedule.c     Wed Jun 07 11:03:51 2006 +0100
@@ -99,74 +99,7 @@ void vcpu_runstate_get(struct vcpu *v, s
     }
 }
 
-struct domain *alloc_domain(void)
-{
-    struct domain *d;
-
-    if ( (d = xmalloc(struct domain)) != NULL )
-        memset(d, 0, sizeof(*d));
-
-    return d;
-}
-
-void free_domain(struct domain *d)
-{
-    struct vcpu *v;
-    int i;
-
-    for_each_vcpu ( d, v )
-        sched_rem_domain(v);
-
-    SCHED_OP(free_task, d);
-
-    for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
-        if ( (v = d->vcpu[i]) != NULL )
-            free_vcpu_struct(v);
-
-    xfree(d);
-}
-
-struct vcpu *alloc_vcpu(
-    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
-{
-    struct vcpu *v;
-
-    BUG_ON(d->vcpu[vcpu_id] != NULL);
-
-    if ( (v = alloc_vcpu_struct(d, vcpu_id)) == NULL )
-        return NULL;
-
-    v->domain = d;
-    v->vcpu_id = vcpu_id;
-    v->processor = cpu_id;
-    atomic_set(&v->pausecnt, 0);
-    v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id];
-
-    v->cpu_affinity = is_idle_domain(d) ?
-        cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
-
-    v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
-    v->runstate.state_entry_time = NOW();
-
-    if ( (vcpu_id != 0) && !is_idle_domain(d) )
-        set_bit(_VCPUF_down, &v->vcpu_flags);
-
-    if ( SCHED_OP(alloc_task, v) < 0 )
-    {
-        free_vcpu_struct(v);
-        return NULL;
-    }
-
-    d->vcpu[vcpu_id] = v;
-    if ( vcpu_id != 0 )
-        d->vcpu[v->vcpu_id-1]->next_in_list = v;
-
-    sched_add_domain(v);
-
-    return v;
-}
-
-void sched_add_domain(struct vcpu *v) 
+int sched_init_vcpu(struct vcpu *v) 
 {
     /* Initialise the per-domain timers. */
     init_timer(&v->timer, vcpu_timer_fn, v, v->processor);
@@ -179,17 +112,23 @@ void sched_add_domain(struct vcpu *v)
         set_bit(_VCPUF_running, &v->vcpu_flags);
     }
 
-    SCHED_OP(add_task, v);
     TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
-}
-
-void sched_rem_domain(struct vcpu *v) 
-{
-    kill_timer(&v->timer);
-    kill_timer(&v->poll_timer);
-
-    SCHED_OP(rem_task, v);
-    TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
+
+    return SCHED_OP(init_vcpu, v);
+}
+
+void sched_destroy_domain(struct domain *d)
+{
+    struct vcpu *v;
+
+    for_each_vcpu ( d, v )
+    {
+        kill_timer(&v->timer);
+        kill_timer(&v->poll_timer);
+        TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
+    }
+
+    SCHED_OP(destroy_domain, d);
 }
 
 void vcpu_sleep_nosync(struct vcpu *v)
@@ -663,7 +602,7 @@ static void poll_timer_fn(void *data)
 /* Initialise the data structures. */
 void __init scheduler_init(void)
 {
-    int i, rc;
+    int i;
 
     open_softirq(SCHEDULE_SOFTIRQ, __enter_scheduler);
 
@@ -686,17 +625,6 @@ void __init scheduler_init(void)
 
     printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
     SCHED_OP(init);
-
-    if ( idle_vcpu[0] != NULL )
-    {
-        schedule_data[0].curr = idle_vcpu[0];
-        schedule_data[0].idle = idle_vcpu[0];
-
-        rc = SCHED_OP(alloc_task, idle_vcpu[0]);
-        BUG_ON(rc < 0);
-
-        sched_add_domain(idle_vcpu[0]);
-    }
 }
 
 /*
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/trace.c
--- a/xen/common/trace.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/trace.c        Wed Jun 07 11:03:51 2006 +0100
@@ -173,25 +173,17 @@ void init_trace_bufs(void)
  */
 int tb_control(dom0_tbufcontrol_t *tbc)
 {
-    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+    static DEFINE_SPINLOCK(lock);
     int rc = 0;
 
     spin_lock(&lock);
-
-    if ( !tb_init_done &&
-         (tbc->op != DOM0_TBUF_SET_SIZE) &&
-         (tbc->op != DOM0_TBUF_ENABLE) )
-    {
-        spin_unlock(&lock);
-        return -EINVAL;
-    }
 
     switch ( tbc->op )
     {
     case DOM0_TBUF_GET_INFO:
         tbc->cpu_mask   = tb_cpu_mask;
         tbc->evt_mask   = tb_event_mask;
-        tbc->buffer_mfn = __pa(t_bufs[0]) >> PAGE_SHIFT;
+        tbc->buffer_mfn = opt_tbuf_size ? virt_to_mfn(t_bufs[0]) : 0UL;
         tbc->size       = opt_tbuf_size * PAGE_SIZE;
         break;
     case DOM0_TBUF_SET_CPU_MASK:
diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/xmalloc.c
--- a/xen/common/xmalloc.c      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/common/xmalloc.c      Wed Jun 07 11:03:51 2006 +0100
@@ -35,7 +35,7 @@
 #include <xen/prefetch.h>
 
 static LIST_HEAD(freelist);
-static spinlock_t freelist_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(freelist_lock);
 
 struct xmalloc_hdr
 {
diff -r b09dbe439169 -r 9d86c1a70f34 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/drivers/char/console.c        Wed Jun 07 11:03:51 2006 +0100
@@ -53,7 +53,7 @@ static int sercon_handle = -1;
 static int sercon_handle = -1;
 static int vgacon_enabled = 0;
 
-spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(console_lock);
 
 /*
  * *******************************************************
@@ -563,7 +563,7 @@ static unsigned int debugtrace_prd; /* P
 static unsigned int debugtrace_prd; /* Producer index     */
 static unsigned int debugtrace_kilobytes = 128, debugtrace_bytes;
 static unsigned int debugtrace_used;
-static spinlock_t   debugtrace_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(debugtrace_lock);
 integer_param("debugtrace", debugtrace_kilobytes);
 
 void debugtrace_dump(void)
@@ -675,7 +675,7 @@ void panic(const char *fmt, ...)
     va_list args;
     char buf[128];
     unsigned long flags;
-    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+    static DEFINE_SPINLOCK(lock);
     extern void machine_restart(char *);
     
     debugtrace_dump();
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/asm-x86/page.h        Wed Jun 07 11:03:51 2006 +0100
@@ -172,10 +172,13 @@ typedef struct { u32 pfn; } pagetable_t;
 /* x86_64 */
 typedef struct { u64 pfn; } pagetable_t;
 #endif
-#define pagetable_get_paddr(x) ((paddr_t)(x).pfn << PAGE_SHIFT)
-#define pagetable_get_pfn(x)   ((x).pfn)
-#define mk_pagetable(pa)       \
-    ({ pagetable_t __p; __p.pfn = (pa) >> PAGE_SHIFT; __p; })
+#define pagetable_get_paddr(x)  ((paddr_t)(x).pfn << PAGE_SHIFT)
+#define pagetable_get_pfn(x)    ((x).pfn)
+#define pagetable_is_null(x)    ((x).pfn == 0)
+#define pagetable_from_pfn(pfn) ((pagetable_t) { (pfn) })
+#define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
+#define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
+#define pagetable_null()        pagetable_from_pfn(0)
 #endif
 
 #define clear_page(_p)      memset((void *)(_p), 0, PAGE_SIZE)
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h    Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/arch-ia64.h    Wed Jun 07 11:03:51 2006 +0100
@@ -26,6 +26,9 @@ DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
 DEFINE_XEN_GUEST_HANDLE(void);
+
+typedef unsigned long xen_pfn_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 #endif
 
 /* Arch specific VIRQs definition */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/arch-x86_32.h  Wed Jun 07 11:03:51 2006 +0100
@@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
 DEFINE_XEN_GUEST_HANDLE(void);
+
+typedef unsigned long xen_pfn_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 #endif
 
 /*
@@ -138,9 +141,17 @@ struct vcpu_guest_context {
 struct vcpu_guest_context {
     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
-#define VGCF_I387_VALID (1<<0)
-#define VGCF_HVM_GUEST  (1<<1)
-#define VGCF_IN_KERNEL  (1<<2)
+#define VGCF_I387_VALID                (1<<0)
+#define VGCF_HVM_GUEST                 (1<<1)
+#define VGCF_IN_KERNEL                 (1<<2)
+#define _VGCF_i387_valid               0
+#define VGCF_i387_valid                (1<<_VGCF_i387_valid)
+#define _VGCF_hvm_guest                1
+#define VGCF_hvm_guest                 (1<<_VGCF_hvm_guest)
+#define _VGCF_in_kernel                2
+#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
     unsigned long flags;                    /* VGCF_* flags                 */
     struct cpu_user_regs user_regs;         /* User-level CPU registers     */
     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
@@ -158,10 +169,18 @@ typedef struct vcpu_guest_context vcpu_g
 typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
+/*
+ * Page-directory addresses above 4GB do not fit into architectural %cr3.
+ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
+ * must use the following accessor macros to pack/unpack valid MFNs.
+ */
+#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
+#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
+
 struct arch_shared_info {
     unsigned long max_pfn;                  /* max pfn that appears in table */
     /* Frame containing list of mfns containing list of mfns containing p2m. */
-    unsigned long pfn_to_mfn_frame_list_list;
+    xen_pfn_t     pfn_to_mfn_frame_list_list;
     unsigned long nmi_reason;
 };
 typedef struct arch_shared_info arch_shared_info_t;
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/arch-x86_64.h  Wed Jun 07 11:03:51 2006 +0100
@@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
 DEFINE_XEN_GUEST_HANDLE(void);
+
+typedef unsigned long xen_pfn_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 #endif
 
 /*
@@ -211,9 +214,19 @@ struct vcpu_guest_context {
 struct vcpu_guest_context {
     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
-#define VGCF_I387_VALID (1<<0)
-#define VGCF_HVM_GUEST  (1<<1)
-#define VGCF_IN_KERNEL  (1<<2)
+#define VGCF_I387_VALID                (1<<0)
+#define VGCF_HVM_GUEST                 (1<<1)
+#define VGCF_IN_KERNEL                 (1<<2)
+#define _VGCF_i387_valid               0
+#define VGCF_i387_valid                (1<<_VGCF_i387_valid)
+#define _VGCF_hvm_guest                1
+#define VGCF_hvm_guest                 (1<<_VGCF_hvm_guest)
+#define _VGCF_in_kernel                2
+#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
+#define _VGCF_syscall_disables_events  4
+#define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)
     unsigned long flags;                    /* VGCF_* flags                 */
     struct cpu_user_regs user_regs;         /* User-level CPU registers     */
     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
@@ -234,10 +247,13 @@ typedef struct vcpu_guest_context vcpu_g
 typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
+#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
+#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
+
 struct arch_shared_info {
     unsigned long max_pfn;                  /* max pfn that appears in table */
     /* Frame containing list of mfns containing list of mfns containing p2m. */
-    unsigned long pfn_to_mfn_frame_list_list;
+    xen_pfn_t     pfn_to_mfn_frame_list_list;
     unsigned long nmi_reason;
 };
 typedef struct arch_shared_info arch_shared_info_t;
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/callback.h
--- a/xen/include/public/callback.h     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/callback.h     Wed Jun 07 11:03:51 2006 +0100
@@ -29,12 +29,20 @@
 #define CALLBACKTYPE_nmi                   4
 
 /*
+ * Disable event deliver during callback? This flag is ignored for event and
+ * NMI callbacks: event delivery is unconditionally disabled.
+ */
+#define _CALLBACKF_mask_events             0
+#define CALLBACKF_mask_events              (1U << _CALLBACKF_mask_events)
+
+/*
  * Register a callback.
  */
 #define CALLBACKOP_register                0
 struct callback_register {
-     int type;
-     xen_callback_t address;
+    uint16_t type;
+    uint16_t flags;
+    xen_callback_t address;
 };
 typedef struct callback_register callback_register_t;
 DEFINE_XEN_GUEST_HANDLE(callback_register_t);
@@ -47,7 +55,8 @@ DEFINE_XEN_GUEST_HANDLE(callback_registe
  */
 #define CALLBACKOP_unregister              1
 struct callback_unregister {
-     int type;
+    uint16_t type;
+    uint16_t _unused;
 };
 typedef struct callback_unregister callback_unregister_t;
 DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/dom0_ops.h     Wed Jun 07 11:03:51 2006 +0100
@@ -19,7 +19,7 @@
  * This makes sure that old versions of dom0 tools will stop working in a
  * well-defined way (rather than crashing the machine, for instance).
  */
-#define DOM0_INTERFACE_VERSION   0x03000000
+#define DOM0_INTERFACE_VERSION   0x03000001
 
 /************************************************************************/
 
@@ -27,10 +27,10 @@ struct dom0_getmemlist {
 struct dom0_getmemlist {
     /* IN variables. */
     domid_t       domain;
-    unsigned long max_pfns;
-    XEN_GUEST_HANDLE(ulong) buffer;
-    /* OUT variables. */
-    unsigned long num_pfns;
+    uint64_t max_pfns;
+    XEN_GUEST_HANDLE(xen_pfn_t) buffer;
+    /* OUT variables. */
+    uint64_t num_pfns;
 };
 typedef struct dom0_getmemlist dom0_getmemlist_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getmemlist_t);
@@ -96,9 +96,9 @@ struct dom0_getdomaininfo {
 #define DOMFLAGS_SHUTDOWNMASK 255 /* DOMFLAGS_SHUTDOWN guest-supplied code.  */
 #define DOMFLAGS_SHUTDOWNSHIFT 16
     uint32_t flags;
-    unsigned long tot_pages;
-    unsigned long max_pages;
-    unsigned long shared_info_frame;       /* MFN of shared_info struct */
+    uint64_t tot_pages;
+    uint64_t max_pages;
+    xen_pfn_t shared_info_frame;  /* MFN of shared_info struct */
     uint64_t cpu_time;
     uint32_t nr_online_vcpus;     /* Number of VCPUs currently online. */
     uint32_t max_vcpu_id;         /* Maximum VCPUID in use by this domain. */
@@ -162,7 +162,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_settime_t);
 
 struct dom0_getpageframeinfo {
     /* IN variables. */
-    unsigned long mfn;     /* Machine page frame number to query.       */
+    xen_pfn_t mfn;         /* Machine page frame number to query.       */
     domid_t domain;        /* To which domain does the frame belong?    */
     /* OUT variables. */
     /* Is the page PINNED to a type? */
@@ -213,7 +213,7 @@ struct dom0_tbufcontrol {
     cpumap_t      cpu_mask;
     uint32_t      evt_mask;
     /* OUT variables */
-    unsigned long buffer_mfn;
+    xen_pfn_t buffer_mfn;
     uint32_t size;
 };
 typedef struct dom0_tbufcontrol dom0_tbufcontrol_t;
@@ -229,8 +229,8 @@ struct dom0_physinfo {
     uint32_t sockets_per_node;
     uint32_t nr_nodes;
     uint32_t cpu_khz;
-    unsigned long total_pages;
-    unsigned long free_pages;
+    uint64_t total_pages;
+    uint64_t free_pages;
     uint32_t hw_cap[8];
 };
 typedef struct dom0_physinfo dom0_physinfo_t;
@@ -276,7 +276,7 @@ struct dom0_shadow_control {
     uint32_t       op;
     XEN_GUEST_HANDLE(ulong) dirty_bitmap;
     /* IN/OUT variables. */
-    unsigned long  pages;        /* size of buffer, updated with actual size */
+    uint64_t       pages;        /* size of buffer, updated with actual size */
     /* OUT variables. */
     struct dom0_shadow_control_stats stats;
 };
@@ -286,8 +286,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_shadow_cont
 #define DOM0_SETDOMAINMAXMEM   28
 struct dom0_setdomainmaxmem {
     /* IN variables. */
-    domid_t       domain;
-    unsigned long max_memkb;
+    domid_t  domain;
+    uint64_t max_memkb;
 };
 typedef struct dom0_setdomainmaxmem dom0_setdomainmaxmem_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setdomainmaxmem_t);
@@ -295,8 +295,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_setdomainma
 #define DOM0_GETPAGEFRAMEINFO2 29   /* batched interface */
 struct dom0_getpageframeinfo2 {
     /* IN variables. */
-    domid_t        domain;
-    unsigned long  num;
+    domid_t  domain;
+    uint64_t num;
     /* IN/OUT variables. */
     XEN_GUEST_HANDLE(ulong) array;
 };
@@ -313,12 +313,12 @@ DEFINE_XEN_GUEST_HANDLE(dom0_getpagefram
 #define DOM0_ADD_MEMTYPE         31
 struct dom0_add_memtype {
     /* IN variables. */
-    unsigned long mfn;
-    unsigned long nr_mfns;
-    uint32_t      type;
-    /* OUT variables. */
-    uint32_t      handle;
-    uint32_t      reg;
+    xen_pfn_t mfn;
+    uint64_t nr_mfns;
+    uint32_t type;
+    /* OUT variables. */
+    uint32_t handle;
+    uint32_t reg;
 };
 typedef struct dom0_add_memtype dom0_add_memtype_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype_t);
@@ -345,8 +345,8 @@ struct dom0_read_memtype {
     /* IN variables. */
     uint32_t reg;
     /* OUT variables. */
-    unsigned long mfn;
-    unsigned long nr_mfns;
+    xen_pfn_t mfn;
+    uint64_t nr_mfns;
     uint32_t type;
 };
 typedef struct dom0_read_memtype dom0_read_memtype_t;
@@ -499,8 +499,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_irq_permiss
 #define DOM0_IOMEM_PERMISSION 47
 struct dom0_iomem_permission {
     domid_t  domain;          /* domain to be affected */
-    unsigned long first_mfn;  /* first page (physical page number) in range */
-    unsigned long nr_mfns;    /* number of pages in range (>0) */
+    xen_pfn_t first_mfn;      /* first page (physical page number) in range */
+    uint64_t nr_mfns;         /* number of pages in range (>0) */
     uint8_t allow_access;     /* allow (!0) or deny (0) access to range? */
 };
 typedef struct dom0_iomem_permission dom0_iomem_permission_t;
@@ -509,7 +509,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_iomem_permi
 #define DOM0_HYPERCALL_INIT   48
 struct dom0_hypercall_init {
     domid_t  domain;          /* domain to be affected */
-    unsigned long mfn;        /* machine frame to be initialised */
+    xen_pfn_t mfn;            /* machine frame to be initialised */
 };
 typedef struct dom0_hypercall_init dom0_hypercall_init_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/grant_table.h  Wed Jun 07 11:03:51 2006 +0100
@@ -240,7 +240,7 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_dump_tabl
 #define GNTTABOP_transfer                4
 struct gnttab_transfer {
     /* IN parameters. */
-    unsigned long mfn;
+    xen_pfn_t     mfn;
     domid_t       domid;
     grant_ref_t   ref;
     /* OUT parameters. */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h     Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/io/netif.h     Wed Jun 07 11:03:51 2006 +0100
@@ -26,6 +26,10 @@
 /* Packet data has been validated against protocol checksum. */
 #define _NETTXF_data_validated (1)
 #define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
+
+/* Packet continues in the request. */
+#define _NETTXF_more_data      (2)
+#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
 
 struct netif_tx_request {
     grant_ref_t gref;      /* Reference to buffer page */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/io/ring.h
--- a/xen/include/public/io/ring.h      Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/io/ring.h      Wed Jun 07 11:03:51 2006 +0100
@@ -151,19 +151,27 @@ typedef struct __name##_back_ring __name
 #define RING_SIZE(_r)                                                   \
     ((_r)->nr_ents)
 
+/* Number of free requests (for use on front side only). */
+#define RING_FREE_REQUESTS(_r)                                         \
+    (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
+
 /* Test if there is an empty slot available on the front ring.
  * (This is only meaningful from the front. )
  */
 #define RING_FULL(_r)                                                   \
-    (((_r)->req_prod_pvt - (_r)->rsp_cons) == RING_SIZE(_r))
+    (RING_FREE_REQUESTS(_r) == 0)
 
 /* Test if there are outstanding messages to be processed on a ring. */
 #define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
-    ((_r)->rsp_cons != (_r)->sring->rsp_prod)
+    ((_r)->sring->rsp_prod - (_r)->rsp_cons)
 
 #define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \
-    (((_r)->req_cons != (_r)->sring->req_prod) &&                       \
-     (((_r)->req_cons - (_r)->rsp_prod_pvt) != RING_SIZE(_r)))
+    ({                                                                 \
+       unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;      \
+       unsigned int rsp = RING_SIZE(_r) -                              \
+                          ((_r)->req_cons - (_r)->rsp_prod_pvt);       \
+       req < rsp ? req : rsp;                                          \
+    })
 
 /* Direct access to individual ring elements, by index. */
 #define RING_GET_REQUEST(_r, _idx)                                      \
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/memory.h
--- a/xen/include/public/memory.h       Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/memory.h       Wed Jun 07 11:03:51 2006 +0100
@@ -29,7 +29,7 @@ struct xen_memory_reservation {
      *   OUT: GMFN bases of extents that were allocated
      *   (NB. This command also updates the mach_to_phys translation table)
      */
-    XEN_GUEST_HANDLE(ulong) extent_start;
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
 
     /* Number of extents, and size/alignment of each (2^extent_order pages). */
     unsigned long  nr_extents;
@@ -87,7 +87,7 @@ struct xen_machphys_mfn_list {
      * any large discontiguities in the machine address space, 2MB gaps in
      * the machphys table will be represented by an MFN base of zero.
      */
-    XEN_GUEST_HANDLE(ulong) extent_start;
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
 
     /*
      * Number of extents written to the above array. This will be smaller
@@ -117,7 +117,7 @@ struct xen_add_to_physmap {
     unsigned long idx;
 
     /* GPFN where the source mapping page should appear. */
-    unsigned long gpfn;
+    xen_pfn_t     gpfn;
 };
 typedef struct xen_add_to_physmap xen_add_to_physmap_t;
 DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
@@ -135,13 +135,13 @@ struct xen_translate_gpfn_list {
     unsigned long nr_gpfns;
 
     /* List of GPFNs to translate. */
-    XEN_GUEST_HANDLE(ulong) gpfn_list;
+    XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list;
 
     /*
      * Output list to contain MFN translations. May be the same as the input
      * list (in which case each input GPFN is overwritten with the output MFN).
      */
-    XEN_GUEST_HANDLE(ulong) mfn_list;
+    XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
 };
 typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
 DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/xen.h
--- a/xen/include/public/xen.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/public/xen.h  Wed Jun 07 11:03:51 2006 +0100
@@ -197,7 +197,7 @@ struct mmuext_op {
     unsigned int cmd;
     union {
         /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
-        unsigned long mfn;
+        xen_pfn_t     mfn;
         /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
         unsigned long linear_addr;
     } arg1;
@@ -234,10 +234,24 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
  */
 #define VMASST_CMD_enable                0
 #define VMASST_CMD_disable               1
+
+/* x86/32 guests: simulate full 4GB segment limits. */
 #define VMASST_TYPE_4gb_segments         0
+
+/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
 #define VMASST_TYPE_4gb_segments_notify  1
+
+/*
+ * x86 guests: support writes to bottom-level PTEs.
+ * NB1. Page-directory entries cannot be written.
+ * NB2. Guest must continue to remove all writable mappings of PTEs.
+ */
 #define VMASST_TYPE_writable_pagetables  2
-#define MAX_VMASST_TYPE 2
+
+/* x86/PAE guests: support PDPTs above 4GB. */
+#define VMASST_TYPE_pae_extended_cr3     3
+
+#define MAX_VMASST_TYPE                  3
 
 #ifndef __ASSEMBLY__
 
@@ -443,9 +457,9 @@ struct start_info {
     unsigned long nr_pages;     /* Total pages allocated to this domain.  */
     unsigned long shared_info;  /* MACHINE address of shared info struct. */
     uint32_t flags;             /* SIF_xxx flags.                         */
-    unsigned long store_mfn;    /* MACHINE page number of shared page.    */
+    xen_pfn_t store_mfn;        /* MACHINE page number of shared page.    */
     uint32_t store_evtchn;      /* Event channel for store communication. */
-    unsigned long console_mfn;  /* MACHINE address of console page.       */
+    xen_pfn_t console_mfn;      /* MACHINE page number of console page.   */
     uint32_t console_evtchn;    /* Event channel for console messages.    */
     /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME).     */
     unsigned long pt_base;      /* VIRTUAL address of page directory.     */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/console.h
--- a/xen/include/xen/console.h Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/xen/console.h Wed Jun 07 11:03:51 2006 +0100
@@ -9,8 +9,6 @@
 
 #include <xen/spinlock.h>
 #include <xen/guest_access.h>
-
-extern spinlock_t console_lock;
 
 void set_printk_prefix(const char *prefix);
 
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/domain.h
--- a/xen/include/xen/domain.h  Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/xen/domain.h  Wed Jun 07 11:03:51 2006 +0100
@@ -2,8 +2,14 @@
 #ifndef __XEN_DOMAIN_H__
 #define __XEN_DOMAIN_H__
 
-extern int boot_vcpu(
+struct vcpu *alloc_vcpu(
+    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
+int boot_vcpu(
     struct domain *d, int vcpuid, struct vcpu_guest_context *ctxt);
+struct vcpu *alloc_idle_vcpu(unsigned int cpu_id);
+
+struct domain *alloc_domain(domid_t domid);
+void free_domain(struct domain *d);
 
 /*
  * Arch-specifics.
@@ -11,19 +17,18 @@ extern int boot_vcpu(
 
 struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id);
 
-extern void free_vcpu_struct(struct vcpu *v);
+void free_vcpu_struct(struct vcpu *v);
 
-extern int arch_domain_create(struct domain *d);
+int arch_domain_create(struct domain *d);
 
-extern void arch_domain_destroy(struct domain *d);
+void arch_domain_destroy(struct domain *d);
 
-extern int arch_set_info_guest(
-    struct vcpu *v, struct vcpu_guest_context *c);
+int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c);
 
-extern void domain_relinquish_resources(struct domain *d);
+void domain_relinquish_resources(struct domain *d);
 
-extern void dump_pageframe_info(struct domain *d);
+void dump_pageframe_info(struct domain *d);
 
-extern void arch_dump_domain_info(struct domain *d);
+void arch_dump_domain_info(struct domain *d);
 
 #endif /* __XEN_DOMAIN_H__ */
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/xen/sched-if.h        Wed Jun 07 11:03:51 2006 +0100
@@ -60,14 +60,17 @@ struct scheduler {
 
     void         (*init)           (void);
     void         (*tick)           (unsigned int cpu);
-    int          (*alloc_task)     (struct vcpu *);
-    void         (*add_task)       (struct vcpu *);
-    void         (*free_task)      (struct domain *);
-    void         (*rem_task)       (struct vcpu *);
+
+    int          (*init_vcpu)      (struct vcpu *);
+    void         (*destroy_domain) (struct domain *);
+
     void         (*sleep)          (struct vcpu *);
     void         (*wake)           (struct vcpu *);
+
     int          (*set_affinity)   (struct vcpu *, cpumask_t *);
+
     struct task_slice (*do_schedule) (s_time_t);
+
     int          (*control)        (struct sched_ctl_cmd *);
     int          (*adjdom)         (struct domain *,
                                     struct sched_adjdom_cmd *);
diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Wed Jun 07 11:03:15 2006 +0100
+++ b/xen/include/xen/sched.h   Wed Jun 07 11:03:51 2006 +0100
@@ -186,12 +186,6 @@ extern struct vcpu *idle_vcpu[NR_CPUS];
 #define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
 #define is_idle_vcpu(v)   (is_idle_domain((v)->domain))
 
-struct vcpu *alloc_vcpu(
-    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
-
-struct domain *alloc_domain(void);
-void free_domain(struct domain *d);
-
 #define DOMAIN_DESTROYED (1<<31) /* assumes atomic_t is >= 32 bits */
 #define put_domain(_d) \
   if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
@@ -226,7 +220,7 @@ static inline void get_knownalive_domain
 }
 
 extern struct domain *domain_create(
-    domid_t dom_id, unsigned int cpu);
+    domid_t domid, unsigned int cpu);
 extern int construct_dom0(
     struct domain *d,
     unsigned long image_start, unsigned long image_len, 
@@ -269,8 +263,8 @@ void new_thread(struct vcpu *d,
 #define set_current_state(_s) do { current->state = (_s); } while (0)
 void scheduler_init(void);
 void schedulers_start(void);
-void sched_add_domain(struct vcpu *);
-void sched_rem_domain(struct vcpu *);
+int  sched_init_vcpu(struct vcpu *);
+void sched_destroy_domain(struct domain *);
 long sched_ctl(struct sched_ctl_cmd *);
 long sched_adjdom(struct sched_adjdom_cmd *);
 int  sched_id(void);
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile       Wed Jun 07 
11:03:15 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-
-ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
-obj-y   += util.o
-endif
-
-obj-y  += core/
-#obj-y += char/
-obj-y  += console/
-obj-y  += evtchn/
-obj-$(CONFIG_XEN_IA64_DOM0_VP) += balloon/
-obj-y  += privcmd/
-obj-y  += xenbus/
-
-obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
-obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
-obj-$(CONFIG_XEN_TPMDEV_BACKEND)       += tpmback/
-obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += blkfront/
-obj-$(CONFIG_XEN_NETDEV_FRONTEND)      += netfront/
-obj-$(CONFIG_XEN_BLKDEV_TAP)           += blktap/
-obj-$(CONFIG_XEN_TPMDEV_FRONTEND)      += tpmfront/
-obj-$(CONFIG_XEN_PCIDEV_BACKEND)       += pciback/
-obj-$(CONFIG_XEN_PCIDEV_FRONTEND)      += pcifront/
diff -r b09dbe439169 -r 9d86c1a70f34 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile   Wed Jun 07 
11:03:15 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-obj-y   := gnttab.o features.o
-obj-$(CONFIG_PROC_FS) += xen_proc.o
-
-ifeq ($(ARCH),ia64)
-obj-y   += evtchn.o
-obj-y   += xenia64_init.o
-ifeq ($(CONFIG_XEN_IA64_DOM0_VP),y)
-obj-$(CONFIG_NET)     += skbuff.o
-endif
-else
-obj-y   += reboot.o evtchn.o fixup.o 
-obj-$(CONFIG_SMP)     += smp.o         # setup_profiling_timer def'd in ia64
-obj-$(CONFIG_NET)     += skbuff.o      # until networking is up on ia64
-endif
-obj-$(CONFIG_SYSFS)   += hypervisor_sysfs.o
-obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Merged., Xen patchbot-unstable <=