WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-ppc-devel

[XenPPC] [PATCH] [ppc] merge with upstream

To: xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
Subject: [XenPPC] [PATCH] [ppc] merge with upstream
From: Hollis Blanchard <hollisb@xxxxxxxxxx>
Date: Tue, 30 May 2006 15:06:08 -0500
Delivery-date: Tue, 30 May 2006 13:06:20 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-ppc-devel-request@lists.xensource.com?subject=help>
List-id: Xen PPC development <xen-ppc-devel.lists.xensource.com>
List-post: <mailto:xen-ppc-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ppc-devel>, <mailto:xen-ppc-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ppc-devel>, <mailto:xen-ppc-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-ppc-devel-bounces@xxxxxxxxxxxxxxxxxxx
235 files changed, 7718 insertions(+), 4608 deletions(-)
.hgignore                                                               |    2 
buildconfigs/linux-defconfig_xen0_ia64                                  |   38 
buildconfigs/linux-defconfig_xenU_ia64                                  |   30 
buildconfigs/linux-defconfig_xen_ia64                                   |   38 
docs/src/interface.tex                                                  |   29 
docs/src/user.tex                                                       |   32 
extras/mini-os/Makefile                                                 |   15 
extras/mini-os/include/mm.h                                             |   79 
extras/mini-os/include/types.h                                          |    7 
extras/mini-os/kernel.c                                                 |   11 
extras/mini-os/lib/printf.c                                             |    4 
extras/mini-os/lib/string.c                                             |    4 
extras/mini-os/mm.c                                                     |   71 
extras/mini-os/traps.c                                                  |   21 
extras/mini-os/x86_32.S                                                 |    8 
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c                       |  266 
+-
linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c                         |    1 
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c                            |   13 
linux-2.6-xen-sparse/arch/ia64/Kconfig                                  |   54 
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c                           |    3 
linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre                      |   14 
linux-2.6-xen-sparse/arch/ia64/xen/Makefile                             |    6 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile                     |   24 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile                 |   12 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c                |   17 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c               |    9 
linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S                          |    4 
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c                         |  550 
+++-
linux-2.6-xen-sparse/arch/ia64/xen/util.c                               |  115 
linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c                          |   12 
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S                           |  198 -
linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S                             |  586 
++--
linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h                        |    2 
linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S                           |   21 
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c                      |  257 
--
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c                     |   73 
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c                          |   26 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c                         |    2 
linux-2.6-xen-sparse/drivers/xen/Makefile                               |    1 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c                      |   12 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c                      |    4 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c                       |    2 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c                    |    6 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c                         |    2 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c                        |    2 
linux-2.6-xen-sparse/drivers/xen/console/console.c                      |   32 
linux-2.6-xen-sparse/drivers/xen/core/Makefile                          |   11 
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c                     |  185 +
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c                          |   31 
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c                          |    5 
linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c                |    3 
linux-2.6-xen-sparse/drivers/xen/core/reboot.c                          |    9 
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c                         |  215 -
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c                        |    3 
linux-2.6-xen-sparse/drivers/xen/net_driver_util.c                      |   58 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c                      |    7 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c                       |   31 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c                    |   91 
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c                       |    2 
linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c                      |    4 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c                      |   26 
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c                       |    6 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c                 |    8 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c                  |   12 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h         |    2 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h        |   28 
linux-2.6-xen-sparse/include/asm-ia64/hypercall.h                       |   84 
linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h                      |   34 
linux-2.6-xen-sparse/include/asm-ia64/page.h                            |   50 
linux-2.6-xen-sparse/include/asm-ia64/privop.h                          |   11 
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h                      |   58 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h             |   63 
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h                          |   42 
linux-2.6-xen-sparse/include/xen/net_driver_util.h                      |   48 
linux-2.6-xen-sparse/include/xen/xenbus.h                               |    8 
patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch                       |   18 
patches/linux-2.6.16.13/xen-hotplug.patch                               |   11 
tools/examples/network-bridge                                           |    2 
tools/examples/xmexample.vti                                            |    2 
tools/libxc/Makefile                                                    |    1 
tools/libxc/xc_acm.c                                                    |   33 
tools/libxc/xc_csched.c                                                 |   50 
tools/libxc/xc_ia64_stubs.c                                             |    6 
tools/libxc/xc_linux_build.c                                            |   57 
tools/libxc/xc_linux_restore.c                                          |  122 
tools/libxc/xc_load_elf.c                                               |   54 
tools/libxc/xc_private.c                                                |   22 
tools/libxc/xc_ptrace.c                                                 |  173 -
tools/libxc/xc_ptrace.h                                                 |    3 
tools/libxc/xc_ptrace_core.c                                            |    7 
tools/libxc/xc_tbuf.c                                                   |   56 
tools/libxc/xenctrl.h                                                   |   13 
tools/libxc/xg_private.h                                                |   10 
tools/python/xen/lowlevel/acm/acm.c                                     |   54 
tools/python/xen/lowlevel/xc/xc.c                                       |   68 
tools/python/xen/lowlevel/xs/xs.c                                       |   11 
tools/python/xen/xend/XendDomain.py                                     |   22 
tools/python/xen/xend/XendDomainInfo.py                                 |   24 
tools/python/xen/xend/balloon.py                                        |   11 
tools/python/xen/xend/image.py                                          |   27 
tools/python/xen/xend/server/SrvDomain.py                               |   14 
tools/python/xen/xend/xenstore/xstransact.py                            |   28 
tools/python/xen/xm/main.py                                             |   45 
tools/security/secpol_tool.c                                            |   32 
tools/tests/test_x86_emulator.c                                         |   67 
tools/xenstore/Makefile                                                 |    8 
tools/xenstore/xenstored_core.c                                         |    7 
tools/xenstore/xenstored_core.h                                         |    8 
tools/xenstore/xenstored_domain.c                                       |   37 
tools/xenstore/xenstored_linux.c                                        |   69 
tools/xenstore/xenstored_proc.h                                         |   27 
tools/xentrace/xentrace_format                                          |    6 
tools/xm-test/configure.ac                                              |    1 
tools/xm-test/ramdisk/bin/create_disk_image                             |    7 
tools/xm-test/tests/Makefile.am                                         |    7 
tools/xm-test/tests/block-integrity/01_block_device_read_verify.py      |   62 
tools/xm-test/tests/block-integrity/Makefile.am                         |   21 
tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py |   32 
tools/xm-test/tests/network/03_network_local_tcp_pos.py                 |    4 
tools/xm-test/tests/network/04_network_local_udp_pos.py                 |    4 
tools/xm-test/tests/network/06_network_dom0_tcp_pos.py                  |    2 
tools/xm-test/tests/network/07_network_dom0_udp_pos.py                  |    2 
tools/xm-test/tests/network/12_network_domU_tcp_pos.py                  |    2 
tools/xm-test/tests/network/13_network_domU_udp_pos.py                  |    2 
xen/arch/ia64/Rules.mk                                                  |   28 
xen/arch/ia64/asm-offsets.c                                             |    2 
xen/arch/ia64/linux-xen/setup.c                                         |   10 
xen/arch/ia64/linux-xen/smp.c                                           |   32 
xen/arch/ia64/linux-xen/unaligned.c                                     |    2 
xen/arch/ia64/tools/sparse-merge                                        |  144 +
xen/arch/ia64/vmx/pal_emul.c                                            |    6 
xen/arch/ia64/vmx/vlsapic.c                                             |    7 
xen/arch/ia64/vmx/vmmu.c                                                |   22 
xen/arch/ia64/vmx/vmx_entry.S                                           |   69 
xen/arch/ia64/vmx/vmx_init.c                                            |   24 
xen/arch/ia64/vmx/vmx_interrupt.c                                       |    6 
xen/arch/ia64/vmx/vmx_ivt.S                                             |  127 -
xen/arch/ia64/vmx/vmx_phy_mode.c                                        |    9 
xen/arch/ia64/vmx/vmx_process.c                                         |    2 
xen/arch/ia64/vmx/vmx_support.c                                         |    8 
xen/arch/ia64/vmx/vmx_vcpu.c                                            |    4 
xen/arch/ia64/vmx/vmx_virt.c                                            |   23 
xen/arch/ia64/vmx/vtlb.c                                                |   86 
xen/arch/ia64/xen/Makefile                                              |    1 
xen/arch/ia64/xen/dom0_ops.c                                            |   12 
xen/arch/ia64/xen/dom_fw.c                                              |   51 
xen/arch/ia64/xen/domain.c                                              |  687 
++++-
xen/arch/ia64/xen/efi_emul.c                                            |  180 +
xen/arch/ia64/xen/hypercall.c                                           |   98 
xen/arch/ia64/xen/hyperprivop.S                                         |  138 -
xen/arch/ia64/xen/ivt.S                                                 |   49 
xen/arch/ia64/xen/privop.c                                              |   27 
xen/arch/ia64/xen/process.c                                             |  301 
--
xen/arch/ia64/xen/regionreg.c                                           |   10 
xen/arch/ia64/xen/vcpu.c                                                |   68 
xen/arch/ia64/xen/vhpt.c                                                |  214 +
xen/arch/ia64/xen/xenasm.S                                              |  349 
--
xen/arch/ia64/xen/xenmisc.c                                             |    3 
xen/arch/ia64/xen/xensetup.c                                            |    3 
xen/arch/x86/dom0_ops.c                                                 |   21 
xen/arch/x86/domain.c                                                   |    2 
xen/arch/x86/domain_build.c                                             |    5 
xen/arch/x86/hvm/hvm.c                                                  |   22 
xen/arch/x86/hvm/i8254.c                                                |  405 
+--
xen/arch/x86/hvm/intercept.c                                            |   82 
xen/arch/x86/hvm/svm/intr.c                                             |   47 
xen/arch/x86/hvm/svm/svm.c                                              |   48 
xen/arch/x86/hvm/svm/vmcb.c                                             |   31 
xen/arch/x86/hvm/vmx/io.c                                               |   62 
xen/arch/x86/hvm/vmx/vmx.c                                              |   37 
xen/arch/x86/mm.c                                                       |  204 +
xen/arch/x86/shadow.c                                                   |    6 
xen/arch/x86/shadow32.c                                                 |    8 
xen/arch/x86/shadow_public.c                                            |    5 
xen/arch/x86/traps.c                                                    |    4 
xen/arch/x86/x86_emulate.c                                              |   81 
xen/common/Makefile                                                     |    1 
xen/common/acm_ops.c                                                    |  282 
+-
xen/common/elf.c                                                        |   49 
xen/common/grant_table.c                                                |   15 
xen/common/kernel.c                                                     |    5 
xen/common/sched_credit.c                                               | 1233 
++++++++++
xen/common/schedule.c                                                   |    5 
xen/common/trace.c                                                      |    6 
xen/include/asm-ia64/config.h                                           |   11 
xen/include/asm-ia64/dom_fw.h                                           |   14 
xen/include/asm-ia64/domain.h                                           |   26 
xen/include/asm-ia64/event.h                                            |    2 
xen/include/asm-ia64/flushtlb.h                                         |    9 
xen/include/asm-ia64/grant_table.h                                      |   33 
xen/include/asm-ia64/linux-xen/asm/pgalloc.h                            |    2 
xen/include/asm-ia64/linux-xen/asm/pgtable.h                            |   14 
xen/include/asm-ia64/linux-xen/asm/tlbflush.h                           |  119 
xen/include/asm-ia64/mm.h                                               |   10 
xen/include/asm-ia64/shadow.h                                           |   57 
xen/include/asm-ia64/tlbflush.h                                         |   37 
xen/include/asm-ia64/vcpu.h                                             |    8 
xen/include/asm-ia64/vhpt.h                                             |   18 
xen/include/asm-ia64/vmx_vcpu.h                                         |    2 
xen/include/asm-x86/domain.h                                            |   21 
xen/include/asm-x86/fixmap.h                                            |   10 
xen/include/asm-x86/hvm/domain.h                                        |    6 
xen/include/asm-x86/hvm/svm/intr.h                                      |    1 
xen/include/asm-x86/hvm/svm/svm.h                                       |    1 
xen/include/asm-x86/hvm/vcpu.h                                          |    3 
xen/include/asm-x86/hvm/vmx/vmx.h                                       |    1 
xen/include/asm-x86/hvm/vpit.h                                          |   67 
xen/include/asm-x86/string.h                                            |  162 -
xen/include/asm-x86/x86_emulate.h                                       |   66 
xen/include/public/acm_ops.h                                            |   54 
xen/include/public/arch-ia64.h                                          |  119 
xen/include/public/arch-x86_32.h                                        |   36 
xen/include/public/arch-x86_64.h                                        |   29 
xen/include/public/callback.h                                           |   10 
xen/include/public/dom0_ops.h                                           |  205 -
xen/include/public/event_channel.h                                      |   99 
xen/include/public/grant_table.h                                        |   30 
xen/include/public/hvm/ioreq.h                                          |   26 
xen/include/public/hvm/vmx_assist.h                                     |    5 
xen/include/public/io/blkif.h                                           |   12 
xen/include/public/io/netif.h                                           |   32 
xen/include/public/io/tpmif.h                                           |   19 
xen/include/public/io/xenbus.h                                          |   59 
xen/include/public/memory.h                                             |   48 
xen/include/public/nmi.h                                                |    5 
xen/include/public/physdev.h                                            |   45 
xen/include/public/sched.h                                              |   15 
xen/include/public/sched_ctl.h                                          |    5 
xen/include/public/vcpu.h                                               |   10 
xen/include/public/version.h                                            |   15 
xen/include/public/xen.h                                                |   47 
xen/include/public/xenoprof.h                                           |   15 
xen/include/xen/hypercall.h                                             |    2 
xen/include/xen/sched-if.h                                              |    2 
xen/include/xen/softirq.h                                               |   13 


# HG changeset patch
# User Hollis Blanchard <hollisb@xxxxxxxxxx>
# Node ID f54d38cea8acaa870e6b73990fbff61fe4c3e2ac
# Parent  e7424645152709dfbacd30df4b996db736403408
# Parent  d5f98d23427a0d256b896fc63ccfd2c1f79e55ba
[ppc] merge with upstream
Signed-off-by: Hollis Blanchard <hollisb@xxxxxxxxxx>

diff -r e74246451527 -r f54d38cea8ac .hgignore
--- a/.hgignore Tue May 30 12:52:02 2006 -0500
+++ b/.hgignore Tue May 30 14:30:34 2006 -0500
@@ -14,7 +14,7 @@
 .*\.orig$
 .*\.rej$
 .*/a\.out$
-.*/cscope\.*$
+.*/cscope\..*$
 ^[^/]*\.bz2$
 ^TAGS$
 ^dist/.*$
diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xen0_ia64
--- a/buildconfigs/linux-defconfig_xen0_ia64    Tue May 30 12:52:02 2006 -0500
+++ b/buildconfigs/linux-defconfig_xen0_ia64    Tue May 30 14:30:34 2006 -0500
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-xen0
-# Mon Mar 27 14:46:03 2006
+# Linux kernel version: 2.6.16.13-xen0
+# Mon May 22 14:46:31 2006
 #
 
 #
@@ -92,12 +92,7 @@ CONFIG_GENERIC_IOMAP=y
 CONFIG_GENERIC_IOMAP=y
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
-CONFIG_XEN_PRIVILEGED_GUEST=y
-CONFIG_XEN_BLKDEV_GRANT=y
-CONFIG_XEN_BLKDEV_FRONTEND=y
-CONFIG_XEN_BACKEND=y
-CONFIG_XEN_BLKDEV_BACKEND=y
-CONFIG_XEN_SYSFS=y
+CONFIG_XEN_IA64_DOM0_VP=y
 CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
 CONFIG_DMA_IS_DMA32=y
 # CONFIG_IA64_GENERIC is not set
@@ -1522,3 +1517,30 @@ CONFIG_CRYPTO_DES=y
 #
 # Hardware crypto devices
 #
+CONFIG_HAVE_ARCH_ALLOC_SKB=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_XEN_INTERFACE_VERSION=0x00030202
+
+#
+# XEN
+#
+CONFIG_XEN_PRIVILEGED_GUEST=y
+# CONFIG_XEN_UNPRIVILEGED_GUEST is not set
+CONFIG_XEN_BACKEND=y
+# CONFIG_XEN_PCIDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_BACKEND=y
+# CONFIG_XEN_BLKDEV_TAP_BE is not set
+CONFIG_XEN_NETDEV_BACKEND=y
+# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
+CONFIG_XEN_NETDEV_LOOPBACK=y
+# CONFIG_XEN_TPMDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_BLKDEV_TAP is not set
+# CONFIG_XEN_SCRUB_PAGES is not set
+# CONFIG_XEN_DISABLE_SERIAL is not set
+CONFIG_XEN_SYSFS=y
+CONFIG_XEN_COMPAT_030002_AND_LATER=y
+# CONFIG_XEN_COMPAT_LATEST_ONLY is not set
+CONFIG_XEN_COMPAT_030002=y
+CONFIG_NO_IDLE_HZ=y
diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xenU_ia64
--- a/buildconfigs/linux-defconfig_xenU_ia64    Tue May 30 12:52:02 2006 -0500
+++ b/buildconfigs/linux-defconfig_xenU_ia64    Tue May 30 14:30:34 2006 -0500
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-xenU
-# Mon Mar 27 14:01:13 2006
+# Linux kernel version: 2.6.16.13-xenU
+# Mon May 22 15:05:32 2006
 #
 
 #
@@ -89,12 +89,7 @@ CONFIG_GENERIC_IOMAP=y
 CONFIG_GENERIC_IOMAP=y
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
-CONFIG_XEN_PRIVILEGED_GUEST=y
-CONFIG_XEN_BLKDEV_GRANT=y
-CONFIG_XEN_BLKDEV_FRONTEND=y
-CONFIG_XEN_BACKEND=y
-CONFIG_XEN_BLKDEV_BACKEND=y
-CONFIG_XEN_SYSFS=y
+CONFIG_XEN_IA64_DOM0_VP=y
 CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
 CONFIG_DMA_IS_DMA32=y
 # CONFIG_IA64_GENERIC is not set
@@ -1386,3 +1381,22 @@ CONFIG_CRYPTO_DES=y
 #
 # Hardware crypto devices
 #
+CONFIG_HAVE_ARCH_ALLOC_SKB=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_XEN_INTERFACE_VERSION=0x00030202
+
+#
+# XEN
+#
+# CONFIG_XEN_PRIVILEGED_GUEST is not set
+CONFIG_XEN_UNPRIVILEGED_GUEST=y
+# CONFIG_XEN_BACKEND is not set
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_SCRUB_PAGES is not set
+# CONFIG_XEN_DISABLE_SERIAL is not set
+CONFIG_XEN_SYSFS=y
+CONFIG_XEN_COMPAT_030002_AND_LATER=y
+# CONFIG_XEN_COMPAT_LATEST_ONLY is not set
+CONFIG_XEN_COMPAT_030002=y
+CONFIG_NO_IDLE_HZ=y
diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xen_ia64
--- a/buildconfigs/linux-defconfig_xen_ia64     Tue May 30 12:52:02 2006 -0500
+++ b/buildconfigs/linux-defconfig_xen_ia64     Tue May 30 14:30:34 2006 -0500
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-xen
-# Mon Mar 27 14:36:21 2006
+# Linux kernel version: 2.6.16.13-xen
+# Mon May 22 14:15:20 2006
 #
 
 #
@@ -92,12 +92,7 @@ CONFIG_GENERIC_IOMAP=y
 CONFIG_GENERIC_IOMAP=y
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
-CONFIG_XEN_PRIVILEGED_GUEST=y
-CONFIG_XEN_BLKDEV_GRANT=y
-CONFIG_XEN_BLKDEV_FRONTEND=y
-CONFIG_XEN_BACKEND=y
-CONFIG_XEN_BLKDEV_BACKEND=y
-CONFIG_XEN_SYSFS=y
+CONFIG_XEN_IA64_DOM0_VP=y
 CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
 CONFIG_DMA_IS_DMA32=y
 # CONFIG_IA64_GENERIC is not set
@@ -1528,3 +1523,30 @@ CONFIG_CRYPTO_DES=y
 #
 # Hardware crypto devices
 #
+CONFIG_HAVE_ARCH_ALLOC_SKB=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_XEN_INTERFACE_VERSION=0x00030202
+
+#
+# XEN
+#
+CONFIG_XEN_PRIVILEGED_GUEST=y
+# CONFIG_XEN_UNPRIVILEGED_GUEST is not set
+CONFIG_XEN_BACKEND=y
+# CONFIG_XEN_PCIDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_BACKEND=y
+# CONFIG_XEN_BLKDEV_TAP_BE is not set
+CONFIG_XEN_NETDEV_BACKEND=y
+# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
+CONFIG_XEN_NETDEV_LOOPBACK=y
+# CONFIG_XEN_TPMDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_BLKDEV_TAP is not set
+# CONFIG_XEN_SCRUB_PAGES is not set
+# CONFIG_XEN_DISABLE_SERIAL is not set
+CONFIG_XEN_SYSFS=y
+CONFIG_XEN_COMPAT_030002_AND_LATER=y
+# CONFIG_XEN_COMPAT_LATEST_ONLY is not set
+CONFIG_XEN_COMPAT_030002=y
+CONFIG_NO_IDLE_HZ=y
diff -r e74246451527 -r f54d38cea8ac docs/src/interface.tex
--- a/docs/src/interface.tex    Tue May 30 12:52:02 2006 -0500
+++ b/docs/src/interface.tex    Tue May 30 14:30:34 2006 -0500
@@ -205,30 +205,23 @@ implement timeout values when they block
 implement timeout values when they block.
 
 
-
-%% % akw: demoting this to a section -- not sure if there is any point
-%% % though, maybe just remove it.
-
-% KAF: Remove these random sections!
-\begin{comment}
 \section{Xen CPU Scheduling}
 
 Xen offers a uniform API for CPU schedulers.  It is possible to choose
 from a number of schedulers at boot and it should be easy to add more.
-The BVT, Atropos and Round Robin schedulers are part of the normal Xen
-distribution.  BVT provides proportional fair shares of the CPU to the
-running domains.  Atropos can be used to reserve absolute shares of
-the CPU for each domain.  Round-robin is provided as an example of
-Xen's internal scheduler API.
+The SEDF, BVT, and Credit schedulers are part of the normal Xen
+distribution.  BVT and SEDF will be going away and their use should be
+avoided once the credit scheduler has stabilized and become the default.
+The Credit scheduler provides proportional fair shares of the
+host's CPUs to the running domains. It does this while transparently
+load balancing runnable VCPUs across the whole system.
 
 \paragraph*{Note: SMP host support}
-Xen has always supported SMP host systems.  Domains are statically
-assigned to CPUs, either at creation time or when manually pinning to
-a particular CPU.  The current schedulers then run locally on each CPU
-to decide which of the assigned domains should be run there. The
-user-level control software can be used to perform coarse-grain
-load-balancing between CPUs.
-\end{comment}
+Xen has always supported SMP host systems. When using the credit scheduler,
+a domain's VCPUs will be dynamically moved across physical CPUs to maximise
+domain and system throughput. VCPUs can also be manually restricted to be
+mapped only on a subset of the host's physical CPUs, using the pinning
+mechanism.
 
 
 %% More information on the characteristics and use of these schedulers
diff -r e74246451527 -r f54d38cea8ac docs/src/user.tex
--- a/docs/src/user.tex Tue May 30 12:52:02 2006 -0500
+++ b/docs/src/user.tex Tue May 30 14:30:34 2006 -0500
@@ -1093,6 +1093,36 @@ running domains in \xend's SXP configura
 
 You can get access to the console of a particular domain using 
 the \verb_# xm console_ command  (e.g.\ \verb_# xm console myVM_). 
+
+\subsection{Domain Scheduling Management Commands}
+
+The credit CPU scheduler automatically load balances guest VCPUs
+across all available physical CPUs on an SMP host. The user need
+not manually pin VCPUs to load balance the system. However, she
+can restrict which CPUs a particular VCPU may run on using
+the \path{xm vcpu-pin} command.
+
+Each guest domain is assigned a \path{weight} and a \path{cap}.
+
+A domain with a weight of 512 will get twice as much CPU as a
+domain with a weight of 256 on a contended host. Legal weights
+range from 1 to 65535 and the default is 256.
+
+The cap optionally fixes the maximum amount of CPU a guest will
+be able to consume, even if the host system has idle CPU cycles.
+The cap is expressed in percentage of one physical CPU: 100 is
+1 physical CPU, 50 is half a CPU, 400 is 4 CPUs, etc... The
+default, 0, means there is no upper cap.
+
+When you are running with the credit scheduler, you can check and
+modify your domains' weights and caps using the \path{xm sched-credit}
+command:
+
+\begin{tabular}{ll}
+\verb!xm sched-credit -d <domain>! & lists weight and cap \\
+\verb!xm sched-credit -d <domain> -w <weight>! & sets the weight \\
+\verb!xm sched-credit -d <domain> -c <cap>! & sets the cap
+\end{tabular}
 
 
 
@@ -1985,7 +2015,7 @@ editing \path{grub.conf}.
 \item [ tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in
   pages (default 0).  
 \item [ sched=xxx ] Select the CPU scheduler Xen should use.  The
-  current possibilities are `sedf' (default) and `bvt'.
+  current possibilities are `sedf' (default), `credit', and `bvt'.
 \item [ apic\_verbosity=debug,verbose ] Print more detailed
   information about local APIC and IOAPIC configuration.
 \item [ lapic ] Force use of local APIC even when left disabled by
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/Makefile   Tue May 30 14:30:34 2006 -0500
@@ -1,4 +1,5 @@ debug ?= y
 debug ?= y
+pae ?= n
 
 include $(CURDIR)/../../Config.mk
 
@@ -12,11 +13,17 @@ override CPPFLAGS := -Iinclude $(CPPFLAG
 override CPPFLAGS := -Iinclude $(CPPFLAGS)
 ASFLAGS = -D__ASSEMBLY__
 
+LDLIBS =  -L. -lminios
 LDFLAGS := -N -T minios-$(TARGET_ARCH).lds
 
 ifeq ($(TARGET_ARCH),x86_32)
 CFLAGS += -m32 -march=i686
 LDFLAGS += -m elf_i386
+endif
+
+ifeq ($(TARGET_ARCH)$(pae),x86_32y)
+CFLAGS  += -DCONFIG_X86_PAE=1
+ASFLAGS += -DCONFIG_X86_PAE=1
 endif
 
 ifeq ($(TARGET_ARCH),x86_64)
@@ -49,11 +56,11 @@ links:
 links:
        [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen
 
-libminios.a: $(OBJS) $(HEAD)
-       ar r libminios.a $(HEAD) $(OBJS)
+libminios.a: links $(OBJS) $(HEAD)
+       $(AR) r libminios.a $(HEAD) $(OBJS)
 
-$(TARGET): links libminios.a $(HEAD)
-       $(LD) $(LDFLAGS) $(HEAD) -L. -lminios -o $@.elf
+$(TARGET): libminios.a $(HEAD)
+       $(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf
        gzip -f -9 -c $@.elf >$@.gz
 
 .PHONY: clean
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h       Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/include/mm.h       Tue May 30 14:30:34 2006 -0500
@@ -43,6 +43,8 @@
 
 #if defined(__i386__)
 
+#if !defined(CONFIG_X86_PAE)
+
 #define L2_PAGETABLE_SHIFT      22
 
 #define L1_PAGETABLE_ENTRIES    1024
@@ -50,6 +52,30 @@
 
 #define PADDR_BITS              32
 #define PADDR_MASK              (~0UL)
+
+#define UNMAPPED_PT_FRAMES        1
+#define PRIpte "08lx"
+typedef unsigned long pgentry_t;
+
+#else /* defined(CONFIG_X86_PAE) */
+
+#define L2_PAGETABLE_SHIFT      21
+#define L3_PAGETABLE_SHIFT      30
+
+#define L1_PAGETABLE_ENTRIES    512
+#define L2_PAGETABLE_ENTRIES    512
+#define L3_PAGETABLE_ENTRIES    4
+
+#define PADDR_BITS              44
+#define PADDR_MASK              ((1ULL << PADDR_BITS)-1)
+
+#define L2_MASK  ((1UL << L3_PAGETABLE_SHIFT) - 1)
+
+#define UNMAPPED_PT_FRAMES        2
+#define PRIpte "016llx"
+typedef uint64_t pgentry_t;
+
+#endif /* !defined(CONFIG_X86_PAE) */
 
 #elif defined(__x86_64__)
 
@@ -81,6 +107,10 @@
 #define L2_MASK  ((1UL << L3_PAGETABLE_SHIFT) - 1)
 #define L3_MASK  ((1UL << L4_PAGETABLE_SHIFT) - 1)
 
+#define UNMAPPED_PT_FRAMES        3
+#define PRIpte "016lx"
+typedef unsigned long pgentry_t;
+
 #endif
 
 #define L1_MASK  ((1UL << L2_PAGETABLE_SHIFT) - 1)
@@ -90,9 +120,11 @@
   (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
 #define l2_table_offset(_a) \
   (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
-#if defined(__x86_64__)
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
 #define l3_table_offset(_a) \
   (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
+#endif
+#if defined(__x86_64__)
 #define l4_table_offset(_a) \
   (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
 #endif
@@ -111,14 +143,21 @@
 #if defined(__i386__)
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER)
+#if defined(CONFIG_X86_PAE)
+#define L3_PROT (_PAGE_PRESENT)
+#endif /* CONFIG_X86_PAE */
 #elif defined(__x86_64__)
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#endif
-
+#endif /* __i386__ || __x86_64__ */
+
+#ifndef CONFIG_X86_PAE
 #define PAGE_SIZE       (1UL << L1_PAGETABLE_SHIFT)
+#else
+#define PAGE_SIZE       (1ULL << L1_PAGETABLE_SHIFT)
+#endif
 #define PAGE_SHIFT      L1_PAGETABLE_SHIFT
 #define PAGE_MASK       (~(PAGE_SIZE-1))
 
@@ -129,23 +168,31 @@
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr)        (((addr)+PAGE_SIZE-1)&PAGE_MASK)
 
+/* Definitions for machine and pseudophysical addresses. */
+#ifdef CONFIG_X86_PAE
+typedef unsigned long long paddr_t;
+typedef unsigned long long maddr_t;
+#else
+typedef unsigned long paddr_t;
+typedef unsigned long maddr_t;
+#endif
+
 extern unsigned long *phys_to_machine_mapping;
 extern char _text, _etext, _edata, _end;
 #define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)])
-static __inline__ unsigned long phys_to_machine(unsigned long phys)
-{
-    unsigned long machine = pfn_to_mfn(phys >> L1_PAGETABLE_SHIFT);
-    machine = (machine << L1_PAGETABLE_SHIFT) | (phys & ~PAGE_MASK);
-    return machine;
-}
-
+static __inline__ maddr_t phys_to_machine(paddr_t phys)
+{
+       maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+       machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
+       return machine;
+}
 
 #define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)])
-static __inline__ unsigned long machine_to_phys(unsigned long machine)
-{
-    unsigned long phys = mfn_to_pfn(machine >> L1_PAGETABLE_SHIFT);
-    phys = (phys << L1_PAGETABLE_SHIFT) | (machine & ~PAGE_MASK);
-    return phys;
+static __inline__ paddr_t machine_to_phys(maddr_t machine)
+{
+       paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+       phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
+       return phys;
 }
 
 #define VIRT_START                 ((unsigned long)&_text)
@@ -155,7 +202,7 @@ static __inline__ unsigned long machine_
 
 #define virt_to_pfn(_virt)         (PFN_DOWN(to_phys(_virt)))
 #define mach_to_virt(_mach)        (to_virt(machine_to_phys(_mach)))
-#define mfn_to_virt(_mfn)          (mach_to_virt(_mfn << PAGE_SHIFT))
+#define mfn_to_virt(_mfn)          (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT))
 #define pfn_to_virt(_pfn)          (to_virt(_pfn << PAGE_SHIFT))
 
 /* Pagetable walking. */
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/include/types.h
--- a/extras/mini-os/include/types.h    Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/include/types.h    Tue May 30 14:30:34 2006 -0500
@@ -43,14 +43,19 @@ typedef unsigned long long  u_quad_t;
 typedef unsigned long long  u_quad_t;
 typedef unsigned int        uintptr_t;
 
+#if !defined(CONFIG_X86_PAE)
 typedef struct { unsigned long pte_low; } pte_t;
+#else
+typedef struct { unsigned long pte_low, pte_high; } pte_t;
+#endif /* CONFIG_X86_PAE */
+
 #elif defined(__x86_64__)
 typedef long                quad_t;
 typedef unsigned long       u_quad_t;
 typedef unsigned long       uintptr_t;
 
 typedef struct { unsigned long pte; } pte_t;
-#endif
+#endif /* __i386__ || __x86_64__ */
 
 typedef  u8 uint8_t;
 typedef  s8 int8_t;
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/kernel.c   Tue May 30 14:30:34 2006 -0500
@@ -63,7 +63,12 @@ void failsafe_callback(void);
 
 extern char shared_info[PAGE_SIZE];
 
+#if !defined(CONFIG_X86_PAE)
 #define __pte(x) ((pte_t) { (x) } )
+#else
+#define __pte(x) ({ unsigned long long _x = (x);        \
+    ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); })
+#endif
 
 static shared_info_t *map_shared_info(unsigned long pa)
 {
@@ -71,7 +76,7 @@ static shared_info_t *map_shared_info(un
         (unsigned long)shared_info, __pte(pa | 7), UVMF_INVLPG) )
     {
         printk("Failed to map shared_info!!\n");
-        *(int*)0=0;
+        do_exit();
     }
     return (shared_info_t *)shared_info;
 }
@@ -126,6 +131,10 @@ void start_kernel(start_info_t *si)
     /* WARN: don't do printk before here, it uses information from
        shared_info. Use xprintk instead. */
     memcpy(&start_info, si, sizeof(*si));
+    
+    /* set up minimal memory infos */
+    phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
+
     /* Grab the shared_info pointer and put it in a safe place. */
     HYPERVISOR_shared_info = map_shared_info(start_info.shared_info);
 
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/lib/printf.c
--- a/extras/mini-os/lib/printf.c       Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/lib/printf.c       Tue May 30 14:30:34 2006 -0500
@@ -53,6 +53,8 @@
  *
  * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
  */
+
+#if !defined HAVE_LIBC
 
 #include <os.h>
 #include <types.h>
@@ -789,4 +791,4 @@ int sscanf(const char * buf, const char 
        return i;
 }
 
-
+#endif
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/lib/string.c
--- a/extras/mini-os/lib/string.c       Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/lib/string.c       Tue May 30 14:30:34 2006 -0500
@@ -17,6 +17,8 @@
  * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
  ****************************************************************************
  */
+
+#if !defined HAVE_LIBC
 
 #include <os.h>
 #include <types.h>
@@ -153,3 +155,5 @@ char * strstr(const char * s1,const char
         }
         return NULL;
 }
+
+#endif
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/mm.c
--- a/extras/mini-os/mm.c       Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/mm.c       Tue May 30 14:30:34 2006 -0500
@@ -368,7 +368,7 @@ void new_pt_frame(unsigned long *pt_pfn,
 void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn, 
                                 unsigned long offset, unsigned long level)
 {   
-    unsigned long *tab = (unsigned long *)start_info.pt_base;
+    pgentry_t *tab = (pgentry_t *)start_info.pt_base;
     unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); 
     unsigned long prot_e, prot_t, pincmd;
     mmu_update_t mmu_updates[1];
@@ -382,40 +382,45 @@ void new_pt_frame(unsigned long *pt_pfn,
        as a page table page */
     memset((unsigned long*)pfn_to_virt(*pt_pfn), 0, PAGE_SIZE);  
  
-    if (level == L1_FRAME)
-    {
+    switch ( level )
+    {
+    case L1_FRAME:
          prot_e = L1_PROT;
          prot_t = L2_PROT;
          pincmd = MMUEXT_PIN_L1_TABLE;
-    }
-#if (defined __x86_64__)
-    else if (level == L2_FRAME)
-    {
+         break;
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
+    case L2_FRAME:
          prot_e = L2_PROT;
          prot_t = L3_PROT;
          pincmd = MMUEXT_PIN_L2_TABLE;
-    }
-    else if (level == L3_FRAME)
-    {
+         break;
+#endif
+#if defined(__x86_64__)
+    case L3_FRAME:
          prot_e = L3_PROT;
          prot_t = L4_PROT;
          pincmd = MMUEXT_PIN_L3_TABLE;
-    }
-#endif
-    else
-    {
+         break;
+#endif
+    default:
          printk("new_pt_frame() called with invalid level number %d\n", level);
          do_exit();
-    }    
+         break;
+    }
 
     /* Update the entry */
-#if (defined __x86_64__)
+#if defined(__x86_64__)
     tab = pte_to_virt(tab[l4_table_offset(pt_page)]);
     tab = pte_to_virt(tab[l3_table_offset(pt_page)]);
 #endif
-    mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) + 
-                         sizeof(void *)* l1_table_offset(pt_page);
-    mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | 
+#if defined(CONFIG_X86_PAE)
+    tab = pte_to_virt(tab[l3_table_offset(pt_page)]);
+#endif
+
+    mmu_updates[0].ptr = ((pgentry_t)tab[l2_table_offset(pt_page)] & 
PAGE_MASK) + 
+                         sizeof(pgentry_t) * l1_table_offset(pt_page);
+    mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | 
                          (prot_e & ~_PAGE_RW);
     if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
     {
@@ -434,8 +439,8 @@ void new_pt_frame(unsigned long *pt_pfn,
 
     /* Now fill the new page table page with entries.
        Update the page directory as well. */
-    mmu_updates[0].ptr = (prev_l_mfn << PAGE_SHIFT) + sizeof(void *) * offset;
-    mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t;
+    mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + 
sizeof(pgentry_t) * offset;
+    mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t;
     if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0) 
     {            
        printk("ERROR: mmu_update failed\n");
@@ -450,16 +455,13 @@ void build_pagetable(unsigned long *star
     unsigned long start_address, end_address;
     unsigned long pfn_to_map, pt_pfn = *start_pfn;
     static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
-    unsigned long *tab = (unsigned long *)start_info.pt_base;
+    pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
     unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
-    unsigned long page, offset;
+    unsigned long offset;
     int count = 0;
 
-#if defined(__x86_64__)
-    pfn_to_map = (start_info.nr_pt_frames - 3) * L1_PAGETABLE_ENTRIES;
-#else
-    pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES;
-#endif
+    pfn_to_map = (start_info.nr_pt_frames - UNMAPPED_PT_FRAMES) * 
L1_PAGETABLE_ENTRIES;
+
     start_address = (unsigned long)pfn_to_virt(pfn_to_map);
     end_address = (unsigned long)pfn_to_virt(*max_pfn);
     
@@ -468,7 +470,7 @@ void build_pagetable(unsigned long *star
 
     while(start_address < end_address)
     {
-        tab = (unsigned long *)start_info.pt_base;
+        tab = (pgentry_t *)start_info.pt_base;
         mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
 
 #if defined(__x86_64__)
@@ -480,6 +482,8 @@ void build_pagetable(unsigned long *star
         page = tab[offset];
         mfn = pte_to_mfn(page);
         tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT);
+#endif
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
         offset = l3_table_offset(start_address);
         /* Need new L2 pt frame */
         if(!(start_address & L2_MASK)) 
@@ -498,9 +502,9 @@ void build_pagetable(unsigned long *star
         mfn = pte_to_mfn(page);
         offset = l1_table_offset(start_address);
 
-        mmu_updates[count].ptr = (mfn << PAGE_SHIFT) + sizeof(void *) * offset;
+        mmu_updates[count].ptr = ((pgentry_t)mfn << PAGE_SHIFT) + 
sizeof(pgentry_t) * offset;
         mmu_updates[count].val = 
-            pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
+            (pgentry_t)pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
         count++;
         if (count == L1_PAGETABLE_ENTRIES || pfn_to_map == *max_pfn)
         {
@@ -557,9 +561,6 @@ void init_mm(void)
     printk("  stack start:  %p\n", &stack);
     printk("  _end:         %p\n", &_end);
 
-    /* set up minimal memory infos */
-    phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
-   
     /* First page follows page table pages and 3 more pages (store page etc) */
     start_pfn = PFN_UP(to_phys(start_info.pt_base)) + 
                 start_info.nr_pt_frames + 3;
@@ -569,7 +570,7 @@ void init_mm(void)
     printk("  max_pfn:      %lx\n", max_pfn);
 
     build_pagetable(&start_pfn, &max_pfn);
-    
+
     /*
      * now we can initialise the page allocator
      */
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/traps.c
--- a/extras/mini-os/traps.c    Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/traps.c    Tue May 30 14:30:34 2006 -0500
@@ -95,25 +95,26 @@ DO_ERROR(18, "machine check", machine_ch
 
 void page_walk(unsigned long virt_address)
 {
-        unsigned long *tab = (unsigned long *)start_info.pt_base;
-        unsigned long addr = virt_address, page;
+        pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
+        unsigned long addr = virt_address;
         printk("Pagetable walk from virt %lx, base %lx:\n", virt_address, 
start_info.pt_base);
     
 #if defined(__x86_64__)
         page = tab[l4_table_offset(addr)];
-        tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
-        printk(" L4 = %p (%p)  [offset = %lx]\n", page, tab, 
l4_table_offset(addr));
-
+        tab = pte_to_virt(page);
+        printk(" L4 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, 
l4_table_offset(addr));
+#endif
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
         page = tab[l3_table_offset(addr)];
-        tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
-        printk("  L3 = %p (%p)  [offset = %lx]\n", page, tab, 
l3_table_offset(addr));
+        tab = pte_to_virt(page);
+        printk("  L3 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, 
l3_table_offset(addr));
 #endif
         page = tab[l2_table_offset(addr)];
-        tab =  to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
-        printk("   L2 = %p (%p)  [offset = %lx]\n", page, tab, 
l2_table_offset(addr));
+        tab = pte_to_virt(page);
+        printk("   L2 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, 
l2_table_offset(addr));
         
         page = tab[l1_table_offset(addr)];
-        printk("    L1 = %p (%p)  [offset = %lx]\n", page, tab, 
l1_table_offset(addr));
+        printk("    L1 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, 
l1_table_offset(addr));
 
 }
 
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/x86_32.S
--- a/extras/mini-os/x86_32.S   Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/x86_32.S   Tue May 30 14:30:34 2006 -0500
@@ -4,9 +4,15 @@
 .section __xen_guest
        .ascii  "GUEST_OS=Mini-OS"
        .ascii  ",XEN_VER=xen-3.0"
+       .ascii  ",VIRT_BASE=0xc0000000" /* &_text from minios_x86_32.lds */
+       .ascii  ",ELF_PADDR_OFFSET=0xc0000000"
        .ascii  ",HYPERCALL_PAGE=0x2"
+#ifdef CONFIG_X86_PAE
+       .ascii  ",PAE=yes"
+#else
+       .ascii  ",PAE=no"
+#endif
        .ascii  ",LOADER=generic"
-       .ascii  ",PT_MODE_WRITABLE"
        .byte   0
 .text
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Tue May 30 14:30:34 
2006 -0500
@@ -70,9 +70,6 @@
 
 /* Forward Declaration. */
 void __init find_max_pfn(void);
-
-/* Allows setting of maximum possible memory size  */
-static unsigned long xen_override_max_pfn;
 
 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
 static struct notifier_block xen_panic_block = {
@@ -399,6 +396,26 @@ start_info_t *xen_start_info;
 start_info_t *xen_start_info;
 EXPORT_SYMBOL(xen_start_info);
 
+static void __init add_memory_region(unsigned long long start,
+                                  unsigned long long size, int type)
+{
+       int x;
+
+       if (!efi_enabled) {
+                       x = e820.nr_map;
+
+               if (x == E820MAX) {
+                   printk(KERN_ERR "Ooops! Too many entries in the memory 
map!\n");
+                   return;
+               }
+
+               e820.map[x].addr = start;
+               e820.map[x].size = size;
+               e820.map[x].type = type;
+               e820.nr_map++;
+       }
+} /* add_memory_region */
+
 static void __init limit_regions(unsigned long long size)
 {
        unsigned long long current_addr = 0;
@@ -442,27 +459,20 @@ static void __init limit_regions(unsigne
                }
                return;
        }
-}
-
-static void __init add_memory_region(unsigned long long start,
-                                  unsigned long long size, int type)
-{
-       int x;
-
-       if (!efi_enabled) {
-                       x = e820.nr_map;
-
-               if (x == E820MAX) {
-                   printk(KERN_ERR "Ooops! Too many entries in the memory 
map!\n");
-                   return;
-               }
-
-               e820.map[x].addr = start;
-               e820.map[x].size = size;
-               e820.map[x].type = type;
-               e820.nr_map++;
-       }
-} /* add_memory_region */
+#ifdef CONFIG_XEN
+       if (i==e820.nr_map && current_addr < size) {
+               /*
+                 * The e820 map finished before our requested size so
+                 * extend the final entry to the requested address.
+                 */
+               --i;
+               if (e820.map[i].type == E820_RAM)
+                       e820.map[i].size -= current_addr - size;
+               else
+                       add_memory_region(current_addr, size - current_addr, 
E820_RAM);
+       }
+#endif
+}
 
 #define E820_DEBUG     1
 
@@ -492,7 +502,6 @@ static void __init print_memory_map(char
        }
 }
 
-#if 0
 /*
  * Sanitize the BIOS e820 map.
  *
@@ -680,9 +689,13 @@ static int __init sanitize_e820_map(stru
  */
 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
 {
+#ifndef CONFIG_XEN
        /* Only one memory region (or negative)? Ignore it */
        if (nr_map < 2)
                return -1;
+#else
+       BUG_ON(nr_map < 1);
+#endif
 
        do {
                unsigned long long start = biosmap->addr;
@@ -694,6 +707,7 @@ static int __init copy_e820_map(struct e
                if (start > end)
                        return -1;
 
+#ifndef CONFIG_XEN
                /*
                 * Some BIOSes claim RAM in the 640k - 1M region.
                 * Not right. Fix it up.
@@ -708,11 +722,11 @@ static int __init copy_e820_map(struct e
                                size = end - start;
                        }
                }
+#endif
                add_memory_region(start, size, type);
        } while (biosmap++,--nr_map);
        return 0;
 }
-#endif
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 struct edd edd;
@@ -785,13 +799,8 @@ static void __init parse_cmdline_early (
                                unsigned long long mem_size;
  
                                mem_size = memparse(from+4, &from);
-#if 0
                                limit_regions(mem_size);
                                userdef=1;
-#else
-                               xen_override_max_pfn =
-                                       (unsigned long)(mem_size>>PAGE_SHIFT);
-#endif
                        }
                }
 
@@ -984,7 +993,6 @@ static void __init parse_cmdline_early (
        }
 }
 
-#if 0 /* !XEN */
 /*
  * Callback for efi_memory_walk.
  */
@@ -1036,21 +1044,6 @@ void __init find_max_pfn(void)
                memory_present(0, start, end);
        }
 }
-#else
-/* We don't use the fake e820 because we need to respond to user override. */
-void __init find_max_pfn(void)
-{
-       if (xen_override_max_pfn == 0) {
-               max_pfn = xen_start_info->nr_pages;
-               /* Default 8MB slack (to balance backend allocations). */
-               max_pfn += 8 << (20 - PAGE_SHIFT);
-       } else if (xen_override_max_pfn > xen_start_info->nr_pages) {
-               max_pfn = xen_override_max_pfn;
-       } else {
-               max_pfn = xen_start_info->nr_pages;
-       }
-}
-#endif /* XEN */
 
 /*
  * Determine low and high memory ranges:
@@ -1158,6 +1151,15 @@ static void __init register_bootmem_low_
                 */
                last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
 
+#ifdef CONFIG_XEN
+               /*
+                 * Truncate to the number of actual pages currently
+                 * present.
+                 */
+               if (last_pfn > xen_start_info->nr_pages)
+                       last_pfn = xen_start_info->nr_pages;
+#endif
+
                if (last_pfn > max_low_pfn)
                        last_pfn = max_low_pfn;
 
@@ -1351,83 +1353,33 @@ void __init remapped_pgdat_init(void)
  * and also for regions reported as reserved by the e820.
  */
 static void __init
-legacy_init_iomem_resources(struct resource *code_resource, struct resource 
*data_resource)
+legacy_init_iomem_resources(struct e820entry *e820, int nr_map,
+                           struct resource *code_resource,
+                           struct resource *data_resource)
 {
        int i;
-#ifdef CONFIG_XEN
-       dom0_op_t op;
-       struct dom0_memory_map_entry *map;
-       unsigned long gapstart, gapsize;
-       unsigned long long last;
-#endif
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
        probe_roms();
 #endif
 
-#ifdef CONFIG_XEN
-       map = alloc_bootmem_low_pages(PAGE_SIZE);
-       op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
-       set_xen_guest_handle(op.u.physical_memory_map.memory_map, map);
-       op.u.physical_memory_map.max_map_entries =
-               PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
-       BUG_ON(HYPERVISOR_dom0_op(&op));
-
-       last = 0x100000000ULL;
-       gapstart = 0x10000000;
-       gapsize = 0x400000;
-
-       for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
+       for (i = 0; i < nr_map; i++) {
                struct resource *res;
-
-               if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
-                       gapsize = last - map[i].end;
-                       gapstart = map[i].end;
-               }
-               if (map[i].start < last)
-                       last = map[i].start;
-
-               if (map[i].end > 0x100000000ULL)
+               if (e820[i].addr + e820[i].size > 0x100000000ULL)
                        continue;
                res = alloc_bootmem_low(sizeof(struct resource));
-               res->name = map[i].is_ram ? "System RAM" : "reserved";
-               res->start = map[i].start;
-               res->end = map[i].end - 1;
-               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-               request_resource(&iomem_resource, res);
-       }
-
-       free_bootmem(__pa(map), PAGE_SIZE);
-
-       /*
-        * Start allocating dynamic PCI memory a bit into the gap,
-        * aligned up to the nearest megabyte.
-        *
-        * Question: should we try to pad it up a bit (do something
-        * like " + (gapsize >> 3)" in there too?). We now have the
-        * technology.
-        */
-       pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
-
-       printk("Allocating PCI resources starting at %08lx (gap: 
%08lx:%08lx)\n",
-               pci_mem_start, gapstart, gapsize);
-#else
-       for (i = 0; i < e820.nr_map; i++) {
-               struct resource *res;
-               if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
-                       continue;
-               res = alloc_bootmem_low(sizeof(struct resource));
-               switch (e820.map[i].type) {
+               switch (e820[i].type) {
                case E820_RAM:  res->name = "System RAM"; break;
                case E820_ACPI: res->name = "ACPI Tables"; break;
                case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
                default:        res->name = "reserved";
                }
-               res->start = e820.map[i].addr;
-               res->end = res->start + e820.map[i].size - 1;
+               res->start = e820[i].addr;
+               res->end = res->start + e820[i].size - 1;
                res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
                request_resource(&iomem_resource, res);
-               if (e820.map[i].type == E820_RAM) {
+#ifndef CONFIG_XEN
+               if (e820[i].type == E820_RAM) {
                        /*
                         *  We don't know which RAM region contains kernel data,
                         *  so we try it repeatedly and let the resource manager
@@ -1439,38 +1391,21 @@ legacy_init_iomem_resources(struct resou
                        request_resource(res, &crashk_res);
 #endif
                }
-       }
-#endif
-}
-
-/*
- * Request address space for all standard resources
- */
-static void __init register_memory(void)
-{
-#ifndef CONFIG_XEN
+#endif
+       }
+}
+
+/*
+ * Locate a unused range of the physical address space below 4G which
+ * can be used for PCI mappings.
+ */
+static void __init
+e820_setup_gap(struct e820entry *e820, int nr_map)
+{
        unsigned long gapstart, gapsize, round;
        unsigned long long last;
-#endif
-       int           i;
-
-       /* Nothing to do if not running in dom0. */
-       if (!(xen_start_info->flags & SIF_INITDOMAIN))
-               return;
-
-       if (efi_enabled)
-               efi_initialize_iomem_resources(&code_resource, &data_resource);
-       else
-               legacy_init_iomem_resources(&code_resource, &data_resource);
-
-       /* EFI systems may still have VGA */
-       request_resource(&iomem_resource, &video_ram_resource);
-
-       /* request I/O space for devices used on all i[345]86 PCs */
-       for (i = 0; i < STANDARD_IO_RESOURCES; i++)
-               request_resource(&ioport_resource, &standard_io_resources[i]);
-
-#ifndef CONFIG_XEN
+       int i;
+
        /*
         * Search for the bigest gap in the low 32 bits of the e820
         * memory space.
@@ -1478,10 +1413,10 @@ static void __init register_memory(void)
        last = 0x100000000ull;
        gapstart = 0x10000000;
        gapsize = 0x400000;
-       i = e820.nr_map;
+       i = nr_map;
        while (--i >= 0) {
-               unsigned long long start = e820.map[i].addr;
-               unsigned long long end = start + e820.map[i].size;
+               unsigned long long start = e820[i].addr;
+               unsigned long long end = start + e820[i].size;
 
                /*
                 * Since "last" is at most 4GB, we know we'll
@@ -1511,6 +1446,53 @@ static void __init register_memory(void)
 
        printk("Allocating PCI resources starting at %08lx (gap: 
%08lx:%08lx)\n",
                pci_mem_start, gapstart, gapsize);
+}
+
+/*
+ * Request address space for all standard resources
+ */
+static void __init register_memory(void)
+{
+#ifdef CONFIG_XEN
+       struct e820entry *machine_e820;
+       struct xen_memory_map memmap;
+#endif
+       int           i;
+
+       /* Nothing to do if not running in dom0. */
+       if (!(xen_start_info->flags & SIF_INITDOMAIN))
+               return;
+
+#ifdef CONFIG_XEN
+       machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
+
+       memmap.nr_entries = E820MAX;
+       set_xen_guest_handle(memmap.buffer, machine_e820);
+
+       BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap));
+
+       legacy_init_iomem_resources(machine_e820, memmap.nr_entries,
+                                   &code_resource, &data_resource);
+#else
+       if (efi_enabled)
+               efi_initialize_iomem_resources(&code_resource, &data_resource);
+       else
+               legacy_init_iomem_resources(e820.map, e820.nr_map,
+                                           &code_resource, &data_resource);
+#endif
+
+       /* EFI systems may still have VGA */
+       request_resource(&iomem_resource, &video_ram_resource);
+
+       /* request I/O space for devices used on all i[345]86 PCs */
+       for (i = 0; i < STANDARD_IO_RESOURCES; i++)
+               request_resource(&ioport_resource, &standard_io_resources[i]);
+
+#ifdef CONFIG_XEN
+       e820_setup_gap(machine_e820, memmap.nr_entries);
+       free_bootmem(__pa(machine_e820), PAGE_SIZE);
+#else
+       e820_setup_gap(e820.map, e820.nr_map);
 #endif
 }
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c   Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c   Tue May 30 14:30:34 
2006 -0500
@@ -191,6 +191,7 @@ swiotlb_init(void)
        if (swiotlb_force == 1) {
                swiotlb = 1;
        } else if ((swiotlb_force != -1) &&
+                  is_running_on_xen() &&
                   (xen_start_info->flags & SIF_INITDOMAIN)) {
                /* Domain 0 always has a swiotlb. */
                ram_end = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Tue May 30 14:30:34 
2006 -0500
@@ -228,6 +228,12 @@ static inline int page_kills_ppro(unsign
        return 0;
 }
 
+#else
+
+#define page_kills_ppro(p)     0
+
+#endif
+
 extern int is_available_memory(efi_memory_desc_t *);
 
 int page_is_ram(unsigned long pagenr)
@@ -268,13 +274,6 @@ int page_is_ram(unsigned long pagenr)
        }
        return 0;
 }
-
-#else /* CONFIG_XEN */
-
-#define page_kills_ppro(p)     0
-#define page_is_ram(p)         1
-
-#endif
 
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/Kconfig
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig    Tue May 30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig    Tue May 30 14:30:34 2006 -0500
@@ -51,7 +51,7 @@ config GENERIC_IOMAP
        default y
 
 config XEN
-       bool
+       bool "Xen hypervisor support"
        default y
        help
          Enable Xen hypervisor support.  Resulting kernel runs
@@ -60,34 +60,9 @@ config ARCH_XEN
 config ARCH_XEN
        bool
        default y
+       depends on XEN
        help
          TEMP ONLY. Needs to be on for drivers/xen to build.
-
-config XEN_PRIVILEGED_GUEST
-       bool "Privileged Guest"
-       default n
-       help
-         Used in drivers/xen/privcmd.c.  Should go away?
-
-config XEN_BLKDEV_GRANT
-       depends on XEN
-       bool
-       default y
-
-config XEN_BLKDEV_FRONTEND
-       depends on XEN
-       bool
-       default y
-
-config XEN_BACKEND
-       depends on XEN
-       bool
-       default y
-
-config XEN_BLKDEV_BACKEND
-       depends on XEN && XEN_BACKEND
-       bool
-       default y
 
 config XEN_IA64_DOM0_VP
        bool "dom0 vp model"
@@ -102,18 +77,6 @@ config XEN_IA64_DOM0_NON_VP
        default y
        help
          dom0 P=M model
-
-config XEN_SYSFS
-       bool "Export Xen attributes in sysfs"
-       depends on XEN && SYSFS
-       default y
-       help
-               Xen hypervisor attributes will show up under /sys/hypervisor/.
-
-config XEN_INTERFACE_VERSION
-       hex
-       depends on XEN
-       default 0x00030202
 
 config SCHED_NO_NO_OMIT_FRAME_POINTER
        bool
@@ -532,3 +495,16 @@ source "security/Kconfig"
 source "security/Kconfig"
 
 source "crypto/Kconfig"
+
+# override default values of drivers/xen/Kconfig
+if !XEN_IA64_DOM0_VP
+config HAVE_ARCH_ALLOC_SKB
+        bool
+        default n
+
+config HAVE_ARCH_DEV_ALLOC_SKB
+        bool
+        default n
+endif
+
+source "drivers/xen/Kconfig"
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c     Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c     Tue May 30 14:30:34 
2006 -0500
@@ -514,6 +514,9 @@ setup_arch (char **cmdline_p)
 #ifdef CONFIG_XEN
        if (running_on_xen) {
                extern shared_info_t *HYPERVISOR_shared_info;
+               extern int xen_init (void);
+
+               xen_init ();
 
                /* xen_start_info isn't setup yet, get the flags manually */
                if (HYPERVISOR_shared_info->arch.flags & SIF_INITDOMAIN) {
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre
--- a/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre        Tue May 30 
14:30:34 2006 -0500
@@ -6,20 +6,6 @@
 # for building (as all files in mv'd directories are thought by hg
 # to have been deleted).  I don't know how to avoid this right now,
 # but if someone has a better way, I'm all ears
-
-function try_to_mv() {
-       if [ ! -e $2 ]
-       then
-               mv $1 $2
-       fi
-}
-
-try_to_mv mm/Kconfig mm/Kconfig.xen-x86
-
-# need to grab a couple of xen-modified files for generic_page_range and
-# typedef pte_fn_t which are used by driver/xen blkif
-#ln -sf ../mm.xen-x86/memory.c mm/
-#ln -sf ../linux.xen-x86/mm.h include/linux/
 
 #eventually asm-xsi-offsets needs to be part of hypervisor.h/hypercall.h
 ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/asm-ia64/xen/
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/Makefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile       Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile       Tue May 30 14:30:34 
2006 -0500
@@ -2,7 +2,7 @@
 # Makefile for Xen components
 #
 
-obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o 
xenconsole.o xen_ksyms.o
+obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o 
xenconsole.o
 
-obj-$(CONFIG_XEN_IA64_DOM0_VP) += hypervisor.o pci-dma-xen.o
-pci-dma-xen-$(CONFIG_XEN_IA64_DOM0_VP) := ../../i386/kernel/pci-dma-xen.o
\ No newline at end of file
+obj-$(CONFIG_XEN_IA64_DOM0_VP) += hypervisor.o pci-dma-xen.o util.o
+pci-dma-xen-$(CONFIG_XEN_IA64_DOM0_VP) := ../../i386/kernel/pci-dma-xen.o
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile       Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile       Tue May 30 
14:30:34 2006 -0500
@@ -1,20 +1,22 @@
 
+ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
 obj-y   += util.o
+endif
 
 obj-y  += core/
+#obj-y += char/
 obj-y  += console/
 obj-y  += evtchn/
-#obj-y += balloon/
+obj-$(CONFIG_XEN_IA64_DOM0_VP) += balloon/
 obj-y  += privcmd/
-obj-y  += blkback/
-#obj-y += netback/
-obj-y  += blkfront/
 obj-y  += xenbus/
-#obj-y += netfront/
-#obj-$(CONFIG_XEN_PRIVILEGED_GUEST)    += privcmd/
-#obj-$(CONFIG_XEN_BLKDEV_BACKEND)      += blkback/
-#obj-$(CONFIG_XEN_NETDEV_BACKEND)      += netback/
-#obj-$(CONFIG_XEN_BLKDEV_FRONTEND)     += blkfront/
-#obj-$(CONFIG_XEN_NETDEV_FRONTEND)     += netfront/
-#obj-$(CONFIG_XEN_BLKDEV_TAP)          += blktap/
 
+obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
+obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
+obj-$(CONFIG_XEN_TPMDEV_BACKEND)       += tpmback/
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += blkfront/
+obj-$(CONFIG_XEN_NETDEV_FRONTEND)      += netfront/
+obj-$(CONFIG_XEN_BLKDEV_TAP)           += blktap/
+obj-$(CONFIG_XEN_TPMDEV_FRONTEND)      += tpmfront/
+obj-$(CONFIG_XEN_PCIDEV_BACKEND)       += pciback/
+obj-$(CONFIG_XEN_PCIDEV_FRONTEND)      += pcifront/
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile   Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile   Tue May 30 
14:30:34 2006 -0500
@@ -1,14 +1,6 @@
 #
 # Makefile for the linux kernel.
 #
-
-XENARCH        := $(subst ",,$(CONFIG_XENARCH))
-
-CPPFLAGS_vmlinux.lds += -U$(XENARCH)
-
-$(obj)/vmlinux.lds.S:
-       @ln -fsn $(srctree)/arch/$(XENARCH)/kernel/vmlinux.lds.S $@
-
 
 obj-y   := gnttab.o features.o
 obj-$(CONFIG_PROC_FS) += xen_proc.o
@@ -16,8 +8,10 @@ ifeq ($(ARCH),ia64)
 ifeq ($(ARCH),ia64)
 obj-y   += evtchn_ia64.o
 obj-y   += xenia64_init.o
+ifeq ($(CONFIG_XEN_IA64_DOM0_VP),y)
+obj-$(CONFIG_NET)     += skbuff.o
+endif
 else
-extra-y += vmlinux.lds
 obj-y   += reboot.o evtchn.o fixup.o 
 obj-$(CONFIG_SMP)     += smp.o         # setup_profiling_timer def'd in ia64
 obj-$(CONFIG_NET)     += skbuff.o      # until networking is up on ia64
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c  Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c  Tue May 30 
14:30:34 2006 -0500
@@ -246,25 +246,14 @@ static struct irqaction evtchn_irqaction
        .name =         "xen-event-channel"
 };
 
-int evtchn_irq = 0xe9;
+static int evtchn_irq = 0xe9;
 void __init evtchn_init(void)
 {
     shared_info_t *s = HYPERVISOR_shared_info;
-    vcpu_info_t   *vcpu_info = &s->vcpu_info[smp_processor_id()];
-
-#if 0
-    int ret;
-    irq = assign_irq_vector(AUTO_ASSIGN);
-    ret = request_irq(irq, evtchn_interrupt, 0, "xen-event-channel", NULL);
-    if (ret < 0)
-    {
-       printk("xen-event-channel unable to get irq %d (%d)\n", irq, ret);
-       return;
-    }
-#endif
+
     register_percpu_irq(evtchn_irq, &evtchn_irqaction);
 
-    vcpu_info->arch.evtchn_vector = evtchn_irq;
+    s->arch.evtchn_vector = evtchn_irq;
     printk("xen-event-channel using irq %d\n", evtchn_irq);
 
     spin_lock_init(&irq_mapping_update_lock);
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Tue May 30 
14:30:34 2006 -0500
@@ -11,17 +11,20 @@ shared_info_t *HYPERVISOR_shared_info = 
 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE;
 EXPORT_SYMBOL(HYPERVISOR_shared_info);
 
-static int initialized;
 start_info_t *xen_start_info;
+
+int running_on_xen;
+EXPORT_SYMBOL(running_on_xen);
 
 int xen_init(void)
 {
+       static int initialized;
        shared_info_t *s = HYPERVISOR_shared_info;
 
        if (initialized)
                return running_on_xen ? 0 : -1;
 
-       if (!running_on_xen)
+       if (!is_running_on_xen())
                return -1;
 
        xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
@@ -35,6 +38,7 @@ int xen_init(void)
        return 0;
 }
 
+#ifndef CONFIG_XEN_IA64_DOM0_VP
 /* We just need a range of legal va here, though finally identity
  * mapped one is instead used for gnttab mapping.
  */
@@ -47,6 +51,7 @@ unsigned long alloc_empty_foreign_map_pa
 
        return (unsigned long)vma->addr;
 }
+#endif
 
 #if 0
 /* These should be define'd but some drivers use them without
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S    Tue May 30 14:30:34 
2006 -0500
@@ -247,7 +247,7 @@ 1:  mov r8=r32
        XEN_RESTORE_PSR_IC
        ;;
        br.ret.sptk.many rp
-END(xen_set_rr)
+END(xen_set_kr)
 
 GLOBAL_ENTRY(xen_fc)
        movl r8=running_on_xen;;
@@ -345,7 +345,7 @@ GLOBAL_ENTRY(xen_send_ipi)
 GLOBAL_ENTRY(xen_send_ipi)
         mov r14=r32
         mov r15=r33
-        mov r2=0x380
+        mov r2=0x400
         break 0x1000
         ;;
         br.ret.sptk.many rp
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Tue May 30 14:30:34 
2006 -0500
@@ -23,18 +23,56 @@
 //#include <linux/kernel.h>
 #include <linux/spinlock.h>
 #include <linux/bootmem.h>
+#include <linux/vmalloc.h>
 #include <asm/page.h>
 #include <asm/hypervisor.h>
 #include <asm/hypercall.h>
-
-#define XEN_IA64_BALLOON_IS_NOT_YET
-#ifndef XEN_IA64_BALLOON_IS_NOT_YET
+#include <xen/interface/memory.h>
 #include <xen/balloon.h>
-#else
-#define balloon_lock(flags)    ((void)flags)
-#define balloon_unlock(flags)  ((void)flags)
-#endif
-
+
+//XXX xen/ia64 copy_from_guest() is broken.
+//    This is a temporal work around until it is fixed.
+//    used by balloon.c netfront.c
+
+// get_xen_guest_handle is defined only when __XEN_TOOLS__ is defined
+// if the definition in arch-ia64.h is changed, this must be updated.
+#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
+
+int
+ia64_xenmem_reservation_op(unsigned long op,
+                          struct xen_memory_reservation* reservation__)
+{
+       struct xen_memory_reservation reservation = *reservation__;
+       unsigned long* frame_list;
+       unsigned long nr_extents = reservation__->nr_extents;
+       int ret = 0;
+       get_xen_guest_handle(frame_list, reservation__->extent_start);
+
+       BUG_ON(op != XENMEM_increase_reservation &&
+              op != XENMEM_decrease_reservation &&
+              op != XENMEM_populate_physmap);
+
+       while (nr_extents > 0) {
+               int tmp_ret;
+               volatile unsigned long dummy;
+
+               set_xen_guest_handle(reservation.extent_start, frame_list);
+               reservation.nr_extents = nr_extents;
+
+               dummy = frame_list[0];// re-install tlb entry before hypercall
+               tmp_ret = ____HYPERVISOR_memory_op(op, &reservation);
+               if (tmp_ret < 0) {
+                       if (ret == 0) {
+                               ret = tmp_ret;
+                       }
+                       break;
+               }
+               frame_list += tmp_ret;
+               nr_extents -= tmp_ret;
+               ret += tmp_ret;
+       }
+       return ret;
+}
 
 //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
 // move those to lib/contiguous_bitmap?
@@ -105,6 +143,39 @@ static void contiguous_bitmap_clear(
        }
 }
 
+static unsigned long
+HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order,
+                           unsigned int address_bits)
+{
+       unsigned long ret;
+        struct xen_memory_reservation reservation = {
+               .nr_extents   = 1,
+                .address_bits = address_bits,
+                .extent_order = extent_order,
+                .domid        = DOMID_SELF
+        };
+       set_xen_guest_handle(reservation.extent_start, &gpfn);
+       ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+       BUG_ON(ret != 1);
+       return 0;
+}
+
+static unsigned long
+HYPERVISOR_remove_physmap(unsigned long gpfn, unsigned int extent_order)
+{
+       unsigned long ret;
+       struct xen_memory_reservation reservation = {
+               .nr_extents   = 1,
+               .address_bits = 0,
+               .extent_order = extent_order,
+               .domid        = DOMID_SELF
+       };
+       set_xen_guest_handle(reservation.extent_start, &gpfn);
+       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+       BUG_ON(ret != 1);
+       return 0;
+}
+
 /* Ensure multi-page extents are contiguous in machine memory. */
 int
 __xen_create_contiguous_region(unsigned long vstart,
@@ -113,29 +184,29 @@ __xen_create_contiguous_region(unsigned 
        unsigned long error = 0;
        unsigned long gphys = __pa(vstart);
        unsigned long start_gpfn = gphys >> PAGE_SHIFT;
-       unsigned long num_pfn = 1 << order;
+       unsigned long num_gpfn = 1 << order;
        unsigned long i;
        unsigned long flags;
 
-       scrub_pages(vstart, 1 << order);
+       scrub_pages(vstart, num_gpfn);
 
        balloon_lock(flags);
 
-       //XXX order
-       for (i = 0; i < num_pfn; i++) {
-               error = HYPERVISOR_zap_physmap(start_gpfn + i, 0);
-               if (error) {
-                       goto out;
-               }
+       error = HYPERVISOR_remove_physmap(start_gpfn, order);
+       if (error) {
+               goto fail;
        }
 
        error = HYPERVISOR_populate_physmap(start_gpfn, order, address_bits);
-       contiguous_bitmap_set(start_gpfn, 1UL << order);
+       if (error) {
+               goto fail;
+       }
+       contiguous_bitmap_set(start_gpfn, num_gpfn);
 #if 0
        {
        unsigned long mfn;
        unsigned long mfn_prev = ~0UL;
-       for (i = 0; i < 1 << order; i++) {
+       for (i = 0; i < num_gpfn; i++) {
                mfn = pfn_to_mfn_for_dma(start_gpfn + i);
                if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
                        xprintk("\n");
@@ -145,7 +216,7 @@ __xen_create_contiguous_region(unsigned 
                                vstart, virt_to_bus((void*)vstart),
                                phys_to_machine_for_dma(gphys));
                        xprintk("mfn: ");
-                       for (i = 0; i < 1 << order; i++) {
+                       for (i = 0; i < num_gpfn; i++) {
                                mfn = pfn_to_mfn_for_dma(start_gpfn + i);
                                xprintk("0x%lx ", mfn);
                        }
@@ -159,76 +230,405 @@ out:
 out:
        balloon_unlock(flags);
        return error;
+
+fail:
+       for (i = 0; i < num_gpfn; i++) {
+               error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0);
+               if (error) {
+                       BUG();//XXX
+               }
+       }
+       goto out;
 }
 
 void
 __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 {
+       unsigned long flags;
        unsigned long error = 0;
-       unsigned long gphys = __pa(vstart);
-       unsigned long start_gpfn = gphys >> PAGE_SHIFT;
-       unsigned long num_pfn = 1 << order;
+       unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
+       unsigned long num_gpfn = 1UL << order;
+       unsigned long* gpfns;
+       struct xen_memory_reservation reservation;
        unsigned long i;
-       unsigned long flags;
-
-       scrub_pages(vstart, 1 << order);
+
+       gpfns = kmalloc(sizeof(gpfns[0]) * num_gpfn,
+                       GFP_KERNEL | __GFP_NOFAIL);
+       for (i = 0; i < num_gpfn; i++) {
+               gpfns[i] = start_gpfn + i;
+       }
+
+       scrub_pages(vstart, num_gpfn);
 
        balloon_lock(flags);
 
-       contiguous_bitmap_clear(start_gpfn, 1UL << order);
-
-       //XXX order
-       for (i = 0; i < num_pfn; i++) {
-               error = HYPERVISOR_zap_physmap(start_gpfn + i, 0);
-               if (error) {
-                       goto out;
-               }
-       }
-
-       for (i = 0; i < num_pfn; i++) {
-               error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0);
-               if (error) {
-                       goto out;
-               }
-       }
-
+       contiguous_bitmap_clear(start_gpfn, num_gpfn);
+       error = HYPERVISOR_remove_physmap(start_gpfn, order);
+       if (error) {
+               goto fail;
+       }
+
+       set_xen_guest_handle(reservation.extent_start, gpfns);
+       reservation.nr_extents   = num_gpfn;
+       reservation.address_bits = 0;
+       reservation.extent_order = 0;
+       reservation.domid        = DOMID_SELF;
+       error = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+       if (error != num_gpfn) {
+               error = -EFAULT;//XXX
+               goto fail;
+       }
+       error = 0;
 out:
        balloon_unlock(flags);
+       kfree(gpfns);
        if (error) {
-               //XXX
-       }
+               // error can't be returned.
+               BUG();//XXX
+       }
+       return;
+
+fail:
+       for (i = 0; i < num_gpfn; i++) {
+               int tmp_error;// don't overwrite error.
+               tmp_error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0);
+               if (tmp_error) {
+                       BUG();//XXX
+               }
+       }
+       goto out;
 }
 
 
 ///////////////////////////////////////////////////////////////////////////
-//XXX taken from balloon.c
-//    temporal hack until balloon driver support.
-#include <linux/module.h>
-
-struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
-{
-       unsigned long vstart;
-       unsigned int  order = get_order(nr_pages * PAGE_SIZE);
-
-       vstart = __get_free_pages(GFP_KERNEL, order);
-       if (vstart == 0)
-               return NULL;
-
-       return virt_to_page(vstart);
-}
-
-void balloon_dealloc_empty_page_range(
-       struct page *page, unsigned long nr_pages)
-{
-       __free_pages(page, get_order(nr_pages * PAGE_SIZE));
-}
-
-void balloon_update_driver_allowance(long delta)
-{
-}
-
-EXPORT_SYMBOL(balloon_alloc_empty_page_range);
-EXPORT_SYMBOL(balloon_dealloc_empty_page_range);
-EXPORT_SYMBOL(balloon_update_driver_allowance);
-
-
+// grant table hack
+// cmd: GNTTABOP_xxx
+
+#include <linux/mm.h>
+#include <xen/interface/xen.h>
+#include <xen/gnttab.h>
+
+static void
+gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
+{
+       uint32_t flags;
+
+       flags = uop->flags;
+       if (flags & GNTMAP_readonly) {
+#if 0
+               xprintd("GNTMAP_readonly is not supported yet\n");
+#endif
+               flags &= ~GNTMAP_readonly;
+       }
+
+       if (flags & GNTMAP_host_map) {
+               if (flags & GNTMAP_application_map) {
+                       xprintd("GNTMAP_application_map is not supported yet: 
flags 0x%x\n", flags);
+                       BUG();
+               }
+               if (flags & GNTMAP_contains_pte) {
+                       xprintd("GNTMAP_contains_pte is not supported yet flags 
0x%x\n", flags);
+                       BUG();
+               }
+       } else if (flags & GNTMAP_device_map) {
+               xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags);
+               BUG();//XXX not yet. actually this flag is not used.
+       } else {
+               BUG();
+       }
+}
+
+int
+HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
+{
+       if (cmd == GNTTABOP_map_grant_ref) {
+               unsigned int i;
+               for (i = 0; i < count; i++) {
+                       gnttab_map_grant_ref_pre(
+                               (struct gnttab_map_grant_ref*)uop + i);
+               }
+       }
+
+       return ____HYPERVISOR_grant_table_op(cmd, uop, count);
+}
+
+
+///////////////////////////////////////////////////////////////////////////
+// PageForeign(), SetPageForeign(), ClearPageForeign()
+
+struct address_space xen_ia64_foreign_dummy_mapping;
+
+///////////////////////////////////////////////////////////////////////////
+// foreign mapping
+
+struct xen_ia64_privcmd_entry {
+       atomic_t        map_count;
+       struct page*    page;
+};
+
+static void
+xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
+{
+       atomic_set(&entry->map_count, 0);
+       entry->page = NULL;
+}
+
+//TODO alloc_page() to allocate pseudo physical address space is 
+//     waste of memory.
+//     When vti domain is created, qemu maps all of vti domain pages which 
+//     reaches to several hundred megabytes at least.
+//     remove alloc_page().
+static int
+xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
+                           unsigned long addr,
+                           struct xen_ia64_privcmd_entry* entry,
+                           unsigned long mfn,
+                           pgprot_t prot,
+                           domid_t domid)
+{
+       int error = 0;
+       struct page* page;
+       unsigned long gpfn;
+
+       BUG_ON((addr & ~PAGE_MASK) != 0);
+       BUG_ON(mfn == INVALID_MFN);
+
+       if (entry->page != NULL) {
+               error = -EBUSY;
+               goto out;
+       }
+       page = alloc_page(GFP_KERNEL);
+       if (page == NULL) {
+               error = -ENOMEM;
+               goto out;
+       }
+       gpfn = page_to_pfn(page);
+
+       error = HYPERVISOR_add_physmap(gpfn, mfn, 0/* prot:XXX */,
+                                      domid);
+       if (error != 0) {
+               goto out;
+       }
+
+       prot = vma->vm_page_prot;
+       error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
+       if (error != 0) {
+               (void)HYPERVISOR_zap_physmap(gpfn, 0);
+               error = HYPERVISOR_populate_physmap(gpfn, 0, 0);
+               if (error) {
+                       BUG();//XXX
+               }
+               __free_page(page);
+       } else {
+               atomic_inc(&entry->map_count);
+               entry->page = page;
+       }
+
+out:
+       return error;
+}
+
+static void
+xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_entry* entry)
+{
+       struct page* page = entry->page;
+       unsigned long gpfn = page_to_pfn(page);
+       int error;
+
+       error = HYPERVISOR_zap_physmap(gpfn, 0);
+       if (error) {
+               BUG();//XXX
+       }
+
+       error = HYPERVISOR_populate_physmap(gpfn, 0, 0);
+       if (error) {
+               BUG();//XXX
+       }
+
+       entry->page = NULL;
+       __free_page(page);
+}
+
+static int
+xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_entry* entry)
+{
+       if (entry->page != NULL) {
+               atomic_inc(&entry->map_count);
+       } else {
+               BUG_ON(atomic_read(&entry->map_count) != 0);
+       }
+}
+
+static int
+xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_entry* entry)
+{
+       if (entry->page != NULL && atomic_dec_and_test(&entry->map_count)) {
+               xen_ia64_privcmd_entry_munmap(entry);
+       }
+}
+
+struct xen_ia64_privcmd_range {
+       atomic_t                        ref_count;
+       unsigned long                   pgoff; // in PAGE_SIZE
+
+       unsigned long                   num_entries;
+       struct xen_ia64_privcmd_entry   entries[0];
+};
+
+struct xen_ia64_privcmd_vma {
+       struct xen_ia64_privcmd_range*  range;
+
+       unsigned long                   num_entries;
+       struct xen_ia64_privcmd_entry*  entries;
+};
+
+static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
+static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
+
+struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
+       .open = &xen_ia64_privcmd_vma_open,
+       .close = &xen_ia64_privcmd_vma_close,
+};
+
+static void
+__xen_ia64_privcmd_vma_open(struct vm_area_struct* vma,
+                           struct xen_ia64_privcmd_vma* privcmd_vma,
+                           struct xen_ia64_privcmd_range* privcmd_range)
+{
+       unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
+       unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+       unsigned long i;
+
+       BUG_ON(entry_offset < 0);
+       BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
+
+       privcmd_vma->range = privcmd_range;
+       privcmd_vma->num_entries = num_entries;
+       privcmd_vma->entries = &privcmd_range->entries[entry_offset];
+       vma->vm_private_data = privcmd_vma;
+       for (i = 0; i < privcmd_vma->num_entries; i++) {
+               xen_ia64_privcmd_entry_open(&privcmd_vma->entries[i]);
+       }
+
+       vma->vm_private_data = privcmd_vma;
+       vma->vm_ops = &xen_ia64_privcmd_vm_ops;
+}
+
+static void
+xen_ia64_privcmd_vma_open(struct vm_area_struct* vma)
+{
+       struct xen_ia64_privcmd_vma* privcmd_vma = (struct 
xen_ia64_privcmd_vma*)vma->vm_private_data;
+       struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
+
+       atomic_inc(&privcmd_range->ref_count);
+       // vm_op->open() can't fail.
+       privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
+
+       __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
+}
+
+static void
+xen_ia64_privcmd_vma_close(struct vm_area_struct* vma)
+{
+       struct xen_ia64_privcmd_vma* privcmd_vma =
+               (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
+       struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
+       unsigned long i;
+
+       for (i = 0; i < privcmd_vma->num_entries; i++) {
+               xen_ia64_privcmd_entry_close(&privcmd_vma->entries[i]);
+       }
+       vma->vm_private_data = NULL;
+       kfree(privcmd_vma);
+
+       if (atomic_dec_and_test(&privcmd_range->ref_count)) {
+#if 1
+               for (i = 0; i < privcmd_range->num_entries; i++) {
+                       struct xen_ia64_privcmd_entry* entry =
+                               &privcmd_range->entries[i];
+                       BUG_ON(atomic_read(&entry->map_count) != 0);
+                       BUG_ON(entry->page != NULL);
+               }
+#endif
+               vfree(privcmd_range);
+       }
+}
+
+int
+privcmd_mmap(struct file * file, struct vm_area_struct * vma)
+{
+       unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+       struct xen_ia64_privcmd_range* privcmd_range;
+       struct xen_ia64_privcmd_vma* privcmd_vma;
+       unsigned long i;
+       BUG_ON(!running_on_xen);
+
+       BUG_ON(file->private_data != NULL);
+       privcmd_range =
+               vmalloc(sizeof(*privcmd_range) +
+                       sizeof(privcmd_range->entries[0]) * num_entries);
+       if (privcmd_range == NULL) {
+               goto out_enomem0;
+       }
+       privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
+       if (privcmd_vma == NULL) {
+               goto out_enomem1;
+       }
+
+       /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+       vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
+
+       atomic_set(&privcmd_range->ref_count, 1);
+       privcmd_range->pgoff = vma->vm_pgoff;
+       privcmd_range->num_entries = num_entries;
+       for (i = 0; i < privcmd_range->num_entries; i++) {
+               xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
+       }
+
+       __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
+       return 0;
+
+out_enomem1:
+       kfree(privcmd_vma);
+out_enomem0:
+       vfree(privcmd_range);
+       return -ENOMEM;
+}
+
+int
+direct_remap_pfn_range(struct vm_area_struct *vma,
+                      unsigned long address,   // process virtual address
+                      unsigned long mfn,       // mfn, mfn + 1, ... mfn + 
size/PAGE_SIZE
+                      unsigned long size,
+                      pgprot_t prot,
+                      domid_t  domid)          // target domain
+{
+       struct xen_ia64_privcmd_vma* privcmd_vma =
+               (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
+       unsigned long i;
+       unsigned long offset;
+       int error = 0;
+       BUG_ON(!running_on_xen);
+
+#if 0
+       if (prot != vm->vm_page_prot) {
+               return -EINVAL;
+       }
+#endif
+
+       i = (address - vma->vm_start) >> PAGE_SHIFT;
+       for (offset = 0; offset < size; offset += PAGE_SIZE) {
+               struct xen_ia64_privcmd_entry* entry =
+                       &privcmd_vma->entries[i];
+               error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & 
PAGE_MASK, entry, mfn, prot, domid);
+               if (error != 0) {
+                       break;
+               }
+
+               i++;
+               mfn++;
+        }
+
+       return error;
+}
+
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S     Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S     Tue May 30 14:30:34 
2006 -0500
@@ -83,11 +83,7 @@ GLOBAL_ENTRY(ia64_switch_to)
        mov r8=1
        ;;
        st4 [r27]=r8                    // psr.ic back on
-       ;;
-#else
-(p6)   ssm psr.ic                      // if we had to map, reenable the 
psr.ic bit FIRST!!!
-       ;;
-(p6)   srlz.d
+#else
        ld8 sp=[r21]                    // load kernel stack pointer of new task
        mov IA64_KR(CURRENT)=in0        // update "current" application register
 #endif
@@ -136,6 +132,11 @@ GLOBAL_ENTRY(ia64_switch_to)
 #endif
        ;;
        itr.d dtr[r25]=r23              // wire in new mapping...
+#ifndef CONFIG_XEN
+       ssm psr.ic                      // reenable the psr.ic bit
+       ;;
+       srlz.d
+#endif
        br.cond.sptk .done
 #ifdef CONFIG_XEN
 END(xen_switch_to)
@@ -216,7 +217,9 @@ GLOBAL_ENTRY(ia64_trace_syscall)
 .mem.offset 0,0; st8.spill [r2]=r8             // store return value in slot 
for r8
 .mem.offset 8,0; st8.spill [r3]=r10            // clear error indication in 
slot for r10
        br.call.sptk.many rp=syscall_trace_leave // give parent a chance to 
catch return value
-.ret3: br.cond.sptk .work_pending_syscall_end
+.ret3:
+(pUStk)        cmp.eq.unc p6,p0=r0,r0                  // p6 <- pUStk
+       br.cond.sptk .work_pending_syscall_end
 
 strace_error:
        ld8 r3=[r2]                             // load pt_regs.r8
@@ -246,7 +249,7 @@ END(ia64_trace_syscall)
  *           r8-r11: restored (syscall return value(s))
  *              r12: restored (user-level stack pointer)
  *              r13: restored (user-level thread pointer)
- *              r14: cleared
+ *              r14: set to __kernel_syscall_via_epc
  *              r15: restored (syscall #)
  *          r16-r17: cleared
  *              r18: user-level b6
@@ -267,7 +270,7 @@ END(ia64_trace_syscall)
  *               pr: restored (user-level pr)
  *               b0: restored (user-level rp)
  *               b6: restored
- *               b7: cleared
+ *               b7: set to __kernel_syscall_via_epc
  *          ar.unat: restored (user-level ar.unat)
  *           ar.pfs: restored (user-level ar.pfs)
  *           ar.rsc: restored (user-level ar.rsc)
@@ -331,20 +334,20 @@ ENTRY(ia64_leave_syscall)
        ;;
 (p6)   ld4 r31=[r18]                           // load 
current_thread_info()->flags
        ld8 r19=[r2],PT(B6)-PT(LOADRS)          // load ar.rsc value for 
"loadrs"
-       mov b7=r0               // clear b7
-       ;;
-       ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)    // load ar.bspstore (may be 
garbage)
+       nop.i 0
+       ;;
+       mov r16=ar.bsp                          // M2  get existing backing 
store pointer
        ld8 r18=[r2],PT(R9)-PT(B6)              // load b6
 (p6)   and r15=TIF_WORK_MASK,r31               // any work other than 
TIF_SYSCALL_TRACE?
        ;;
-       mov r16=ar.bsp                          // M2  get existing backing 
store pointer
+       ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)    // load ar.bspstore (may be 
garbage)
 (p6)   cmp4.ne.unc p6,p0=r15, r0               // any special work pending?
 (p6)   br.cond.spnt .work_pending_syscall
        ;;
        // start restoring the state saved on the kernel stack (struct pt_regs):
        ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
        ld8 r11=[r3],PT(CR_IIP)-PT(R11)
-       mov f6=f0               // clear f6
+(pNonSys) break 0              //      bug check: we shouldn't be here if 
pNonSys is TRUE!
        ;;
        invala                  // M0|1 invalidate ALAT
 #ifdef CONFIG_XEN
@@ -358,57 +361,68 @@ ENTRY(ia64_leave_syscall)
        st4     [r29]=r0        // note: clears both vpsr.i and vpsr.ic!
        ;;
 #else
-       rsm psr.i | psr.ic      // M2 initiate turning off of interrupt and 
interruption collection
-#endif
-       mov f9=f0               // clear f9
-
-       ld8 r29=[r2],16         // load cr.ipsr
-       ld8 r28=[r3],16                 // load cr.iip
-       mov f8=f0               // clear f8
+       rsm psr.i | psr.ic      // M2   turn off interrupts and interruption 
collection
+#endif
+       cmp.eq p9,p0=r0,r0      // A    set p9 to indicate that we should 
restore cr.ifs
+
+       ld8 r29=[r2],16         // M0|1 load cr.ipsr
+       ld8 r28=[r3],16         // M0|1 load cr.iip
+       mov r22=r0              // A    clear r22
        ;;
        ld8 r30=[r2],16         // M0|1 load cr.ifs
-       mov.m ar.ssd=r0         // M2 clear ar.ssd
-       cmp.eq p9,p0=r0,r0      // set p9 to indicate that we should restore 
cr.ifs
-       ;;
        ld8 r25=[r3],16         // M0|1 load ar.unat
-       mov.m ar.csd=r0         // M2 clear ar.csd
-       mov r22=r0              // clear r22
+(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
        ;;
        ld8 r26=[r2],PT(B0)-PT(AR_PFS)  // M0|1 load ar.pfs
-(pKStk)        mov r22=psr             // M2 read PSR now that interrupts are 
disabled
-       mov f10=f0              // clear f10
-       ;;
-       ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
-       ld8 r27=[r3],PT(PR)-PT(AR_RSC)  // load ar.rsc
-       mov f11=f0              // clear f11
-       ;;
-       ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)    // load ar.rnat (may be garbage)
-       ld8 r31=[r3],PT(R1)-PT(PR)              // load predicates
-(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
-       ;;
-       ld8 r20=[r2],PT(R12)-PT(AR_FPSR)        // load ar.fpsr
-       ld8.fill r1=[r3],16     // load r1
-(pUStk) mov r17=1
-       ;;
-       srlz.d                  // M0  ensure interruption collection is off
-       ld8.fill r13=[r3],16
-       mov f7=f0               // clear f7
-       ;;
-       ld8.fill r12=[r2]       // restore r12 (sp)
-       ld8.fill r15=[r3]       // restore r15
-       addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
-       ;;
-(pUStk)        ld4 r3=[r3]             // r3 = cpu_data->phys_stacked_size_p8
-(pUStk) st1 [r14]=r17
-       mov b6=r18              // I0  restore b6
-       ;;
-       mov r14=r0              // clear r14
-       shr.u r18=r19,16        // I0|1 get byte size of existing "dirty" 
partition
-(pKStk) br.cond.dpnt.many skip_rbs_switch
-
-       mov.m ar.ccv=r0         // clear ar.ccv
-(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
-       br.cond.sptk.many rbs_switch
+(pKStk)        mov r22=psr                     // M2   read PSR now that 
interrupts are disabled
+       nop 0
+       ;;
+       ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
+       ld8 r27=[r3],PT(PR)-PT(AR_RSC)  // M0|1 load ar.rsc
+       mov f6=f0                       // F    clear f6
+       ;;
+       ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)    // M0|1 load ar.rnat (may be 
garbage)
+       ld8 r31=[r3],PT(R1)-PT(PR)              // M0|1 load predicates
+       mov f7=f0                               // F    clear f7
+       ;;
+       ld8 r20=[r2],PT(R12)-PT(AR_FPSR)        // M0|1 load ar.fpsr
+       ld8.fill r1=[r3],16                     // M0|1 load r1
+(pUStk) mov r17=1                              // A
+       ;;
+(pUStk) st1 [r14]=r17                          // M2|3
+       ld8.fill r13=[r3],16                    // M0|1
+       mov f8=f0                               // F    clear f8
+       ;;
+       ld8.fill r12=[r2]                       // M0|1 restore r12 (sp)
+       ld8.fill r15=[r3]                       // M0|1 restore r15
+       mov b6=r18                              // I0   restore b6
+
+       addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+       mov f9=f0                                       // F    clear f9
+(pKStk) br.cond.dpnt.many skip_rbs_switch              // B
+
+       srlz.d                          // M0   ensure interruption collection 
is off (for cover)
+       shr.u r18=r19,16                // I0|1 get byte size of existing 
"dirty" partition
+#ifdef CONFIG_XEN
+       XEN_HYPER_COVER;
+#else
+       cover                           // B    add current frame into dirty 
partition & set cr.ifs
+#endif
+       ;;
+(pUStk) ld4 r17=[r17]                  // M0|1 r17 = 
cpu_data->phys_stacked_size_p8
+       mov r19=ar.bsp                  // M2   get new backing store pointer
+       mov f10=f0                      // F    clear f10
+
+       nop.m 0
+       movl r14=__kernel_syscall_via_epc // X
+       ;;
+       mov.m ar.csd=r0                 // M2   clear ar.csd
+       mov.m ar.ccv=r0                 // M2   clear ar.ccv
+       mov b7=r14                      // I0   clear b7 (hint with 
__kernel_syscall_via_epc)
+
+       mov.m ar.ssd=r0                 // M2   clear ar.ssd
+       mov f11=f0                      // F    clear f11
+       br.cond.sptk.many rbs_switch    // B
 #ifdef CONFIG_XEN
 END(xen_leave_syscall)
 #else
@@ -546,7 +560,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
        ldf.fill f7=[r2],PT(F11)-PT(F7)
        ldf.fill f8=[r3],32
        ;;
-       srlz.i                  // ensure interruption collection is off
+       srlz.d  // ensure that inter. collection is off (VHPT is don't care, 
since text is pinned)
        mov ar.ccv=r15
        ;;
        ldf.fill f11=[r2]
@@ -556,29 +570,29 @@ GLOBAL_ENTRY(ia64_leave_kernel)
        movl r2=XSI_BANK1_R16
        movl r3=XSI_BANK1_R16+8
        ;;
-       st8.spill [r2]=r16,16
-       st8.spill [r3]=r17,16
-       ;;
-       st8.spill [r2]=r18,16
-       st8.spill [r3]=r19,16
-       ;;
-       st8.spill [r2]=r20,16
-       st8.spill [r3]=r21,16
-       ;;
-       st8.spill [r2]=r22,16
-       st8.spill [r3]=r23,16
-       ;;
-       st8.spill [r2]=r24,16
-       st8.spill [r3]=r25,16
-       ;;
-       st8.spill [r2]=r26,16
-       st8.spill [r3]=r27,16
-       ;;
-       st8.spill [r2]=r28,16
-       st8.spill [r3]=r29,16
-       ;;
-       st8.spill [r2]=r30,16
-       st8.spill [r3]=r31,16
+.mem.offset 0,0; st8.spill [r2]=r16,16
+.mem.offset 8,0; st8.spill [r3]=r17,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r18,16
+.mem.offset 8,0; st8.spill [r3]=r19,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r20,16
+.mem.offset 8,0; st8.spill [r3]=r21,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r22,16
+.mem.offset 8,0; st8.spill [r3]=r23,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r24,16
+.mem.offset 8,0; st8.spill [r3]=r25,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r26,16
+.mem.offset 8,0; st8.spill [r3]=r27,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r28,16
+.mem.offset 8,0; st8.spill [r3]=r29,16
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r30,16
+.mem.offset 8,0; st8.spill [r3]=r31,16
        ;;
        movl r2=XSI_BANKNUM;;
        st4 [r2]=r0;
@@ -641,14 +655,14 @@ GLOBAL_ENTRY(ia64_leave_kernel)
         */
 (pNonSys) br.cond.dpnt dont_preserve_current_frame
 
+#ifdef CONFIG_XEN
+       XEN_HYPER_COVER;
+#else
+       cover                           // add current frame into dirty 
partition and set cr.ifs
+#endif
+       ;;
+       mov r19=ar.bsp                  // get new backing store pointer
 rbs_switch:
-#ifdef CONFIG_XEN
-       XEN_HYPER_COVER;
-#else
-       cover                           // add current frame into dirty 
partition and set cr.ifs
-#endif
-       ;;
-       mov r19=ar.bsp                  // get new backing store pointer
        sub r16=r16,r18                 // krbs = old bsp - size of dirty 
partition
        cmp.ne p9,p0=r0,r0              // clear p9 to skip restore of cr.ifs
        ;;
@@ -723,14 +737,14 @@ rse_clear_invalid:
        mov loc5=0
        mov loc6=0
        mov loc7=0
-(pRecurse) br.call.sptk.few b0=rse_clear_invalid
+(pRecurse) br.call.dptk.few b0=rse_clear_invalid
        ;;
        mov loc8=0
        mov loc9=0
        cmp.ne pReturn,p0=r0,in1        // if recursion count != 0, we need to 
do a br.ret
        mov loc10=0
        mov loc11=0
-(pReturn) br.ret.sptk.many b0
+(pReturn) br.ret.dptk.many b0
 #endif /* !CONFIG_ITANIUM */
 #      undef pRecurse
 #      undef pReturn
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S       Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S       Tue May 30 14:30:34 
2006 -0500
@@ -87,16 +87,17 @@ ENTRY(vhpt_miss)
         * (the "original") TLB miss, which may either be caused by an 
instruction
         * fetch or a data access (or non-access).
         *
-        * What we do here is normal TLB miss handing for the _original_ miss, 
followed
-        * by inserting the TLB entry for the virtual page table page that the 
VHPT
-        * walker was attempting to access.  The latter gets inserted as long
-        * as both L1 and L2 have valid mappings for the faulting address.
-        * The TLB entry for the original miss gets inserted only if
-        * the L3 entry indicates that the page is present.
+        * What we do here is normal TLB miss handing for the _original_ miss,
+        * followed by inserting the TLB entry for the virtual page table page
+        * that the VHPT walker was attempting to access.  The latter gets
+        * inserted as long as page table entry above pte level have valid
+        * mappings for the faulting address.  The TLB entry for the original
+        * miss gets inserted only if the pte entry indicates that the page is
+        * present.
         *
         * do_page_fault gets invoked in the following cases:
         *      - the faulting virtual address uses unimplemented address bits
-        *      - the faulting virtual address has no L1, L2, or L3 mapping
+        *      - the faulting virtual address has no valid page table mapping
         */
 #ifdef CONFIG_XEN
        movl r16=XSI_IFA
@@ -127,7 +128,7 @@ ENTRY(vhpt_miss)
        shl r21=r16,3                           // shift bit 60 into sign bit
        shr.u r17=r16,61                        // get the region number into 
r17
        ;;
-       shr r22=r21,3
+       shr.u r22=r21,3
 #ifdef CONFIG_HUGETLB_PAGE
        extr.u r26=r25,2,6
        ;;
@@ -139,7 +140,7 @@ ENTRY(vhpt_miss)
 #endif
        ;;
        cmp.eq p6,p7=5,r17                      // is IFA pointing into to 
region 5?
-       shr.u r18=r22,PGDIR_SHIFT               // get bits 33-63 of the 
faulting address
+       shr.u r18=r22,PGDIR_SHIFT               // get bottom portion of pgd 
index bit
        ;;
 (p7)   dep r17=r17,r19,(PAGE_SHIFT-3),3        // put region number bits in 
place
 
@@ -150,41 +151,54 @@ ENTRY(vhpt_miss)
 (p6)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
 (p7)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
        ;;
-(p6)   dep r17=r18,r19,3,(PAGE_SHIFT-3)        // r17=PTA + IFA(33,42)*8
-(p7)   dep r17=r18,r17,3,(PAGE_SHIFT-6)        // r17=PTA + (((IFA(61,63) << 
7) | IFA(33,39))*8)
+(p6)   dep r17=r18,r19,3,(PAGE_SHIFT-3)        // r17=pgd_offset for region 5
+(p7)   dep r17=r18,r17,3,(PAGE_SHIFT-6)        // r17=pgd_offset for 
region[0-4]
        cmp.eq p7,p6=0,r21                      // unused address bits all 
zeroes?
-       shr.u r18=r22,PMD_SHIFT                 // shift L2 index into position
-       ;;
-       ld8 r17=[r17]                           // fetch the L1 entry (may be 0)
-       ;;
-(p7)   cmp.eq p6,p7=r17,r0                     // was L1 entry NULL?
-       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // compute address of L2 page 
table entry
-       ;;
-(p7)   ld8 r20=[r17]                           // fetch the L2 entry (may be 0)
-       shr.u r19=r22,PAGE_SHIFT                // shift L3 index into position
-       ;;
-(p7)   cmp.eq.or.andcm p6,p7=r20,r0            // was L2 entry NULL?
-       dep r21=r19,r20,3,(PAGE_SHIFT-3)        // compute address of L3 page 
table entry
-       ;;
-#ifdef CONFIG_XEN
-(p7)   ld8 r18=[r21]                           // read the L3 PTE
+#ifdef CONFIG_PGTABLE_4
+       shr.u r28=r22,PUD_SHIFT                 // shift pud index into position
+#else
+       shr.u r18=r22,PMD_SHIFT                 // shift pmd index into position
+#endif
+       ;;
+       ld8 r17=[r17]                           // get *pgd (may be 0)
+       ;;
+(p7)   cmp.eq p6,p7=r17,r0                     // was pgd_present(*pgd) == 
NULL?
+#ifdef CONFIG_PGTABLE_4
+       dep r28=r28,r17,3,(PAGE_SHIFT-3)        // r28=pud_offset(pgd,addr)
+       ;;
+       shr.u r18=r22,PMD_SHIFT                 // shift pmd index into position
+(p7)   ld8 r29=[r28]                           // get *pud (may be 0)
+       ;;
+(p7)   cmp.eq.or.andcm p6,p7=r29,r0            // was pud_present(*pud) == 
NULL?
+       dep r17=r18,r29,3,(PAGE_SHIFT-3)        // r17=pmd_offset(pud,addr)
+#else
+       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // r17=pmd_offset(pgd,addr)
+#endif
+       ;;
+(p7)   ld8 r20=[r17]                           // get *pmd (may be 0)
+       shr.u r19=r22,PAGE_SHIFT                // shift pte index into position
+       ;;
+(p7)   cmp.eq.or.andcm p6,p7=r20,r0            // was pmd_present(*pmd) == 
NULL?
+       dep r21=r19,r20,3,(PAGE_SHIFT-3)        // r21=pte_offset(pmd,addr)
+       ;;
+(p7)   ld8 r18=[r21]                           // read *pte
+#ifdef CONFIG_XEN
        movl r19=XSI_ISR
        ;;
        ld8 r19=[r19]
+#else
+       mov r19=cr.isr                          // cr.isr bit 32 tells us if 
this is an insn miss
+#endif
        ;;
 (p7)   tbit.z p6,p7=r18,_PAGE_P_BIT            // page present bit cleared?
+#ifdef CONFIG_XEN
        movl r22=XSI_IHA
        ;;
        ld8 r22=[r22]
-       ;;
-#else
-(p7)   ld8 r18=[r21]                           // read the L3 PTE
-       mov r19=cr.isr                          // cr.isr bit 0 tells us if 
this is an insn miss
-       ;;
-(p7)   tbit.z p6,p7=r18,_PAGE_P_BIT            // page present bit cleared?
+#else
        mov r22=cr.iha                          // get the VHPT address that 
caused the TLB miss
+#endif
        ;;                                      // avoid RAW on p7
-#endif
 (p7)   tbit.nz.unc p10,p11=r19,32              // is it an instruction TLB 
miss?
        dep r23=0,r20,0,PAGE_SHIFT              // clear low bits to get page 
address
        ;;
@@ -198,16 +212,17 @@ ENTRY(vhpt_miss)
        ;;
        mov r8=r24
        ;;
-(p6)   br.cond.spnt.many page_fault            // handle bad address/page not 
present (page fault)
-       ;;
-       movl r24=XSI_IFA
-       ;;
-       st8 [r24]=r22
-       ;;
 #else
 (p10)  itc.i r18                               // insert the instruction TLB 
entry
 (p11)  itc.d r18                               // insert the data TLB entry
+#endif
 (p6)   br.cond.spnt.many page_fault            // handle bad address/page not 
present (page fault)
+#ifdef CONFIG_XEN
+       movl r24=XSI_IFA
+       ;;
+       st8 [r24]=r22
+       ;;
+#else
        mov cr.ifa=r22
 #endif
 
@@ -242,25 +257,41 @@ ENTRY(vhpt_miss)
        dv_serialize_data
 
        /*
-        * Re-check L2 and L3 pagetable.  If they changed, we may have received 
a ptc.g
+        * Re-check pagetable entry.  If they changed, we may have received a 
ptc.g
         * between reading the pagetable and the "itc".  If so, flush the entry 
we
-        * inserted and retry.
-        */
-       ld8 r25=[r21]                           // read L3 PTE again
-       ld8 r26=[r17]                           // read L2 entry again
-       ;;
-       cmp.ne p6,p7=r26,r20                    // did L2 entry change
+        * inserted and retry.  At this point, we have:
+        *
+        * r28 = equivalent of pud_offset(pgd, ifa)
+        * r17 = equivalent of pmd_offset(pud, ifa)
+        * r21 = equivalent of pte_offset(pmd, ifa)
+        *
+        * r29 = *pud
+        * r20 = *pmd
+        * r18 = *pte
+        */
+       ld8 r25=[r21]                           // read *pte again
+       ld8 r26=[r17]                           // read *pmd again
+#ifdef CONFIG_PGTABLE_4
+       ld8 r19=[r28]                           // read *pud again
+#endif
+       cmp.ne p6,p7=r0,r0
+       ;;
+       cmp.ne.or.andcm p6,p7=r26,r20           // did *pmd change
+#ifdef CONFIG_PGTABLE_4
+       cmp.ne.or.andcm p6,p7=r19,r29           // did *pud change
+#endif
        mov r27=PAGE_SHIFT<<2
        ;;
 (p6)   ptc.l r22,r27                           // purge PTE page translation
-(p7)   cmp.ne.or.andcm p6,p7=r25,r18           // did L3 PTE change
+(p7)   cmp.ne.or.andcm p6,p7=r25,r18           // did *pte change
        ;;
 (p6)   ptc.l r16,r27                           // purge translation
 #endif
 
        mov pr=r31,-1                           // restore predicate registers
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -272,10 +303,10 @@ ENTRY(itlb_miss)
 ENTRY(itlb_miss)
        DBG_FAULT(1)
        /*
-        * The ITLB handler accesses the L3 PTE via the virtually mapped linear
+        * The ITLB handler accesses the PTE via the virtually mapped linear
         * page table.  If a nested TLB miss occurs, we switch into physical
-        * mode, walk the page table, and then re-execute the L3 PTE read
-        * and go on normally after that.
+        * mode, walk the page table, and then re-execute the PTE read and
+        * go on normally after that.
         */
 #ifdef CONFIG_XEN
        movl r16=XSI_IFA
@@ -292,11 +323,11 @@ ENTRY(itlb_miss)
        ;;
        ld8 r17=[r17]                           // get virtual address of L3 PTE
 #else
-       mov r17=cr.iha                          // get virtual address of L3 PTE
+       mov r17=cr.iha                          // get virtual address of PTE
 #endif
        movl r30=1f                             // load nested fault 
continuation point
        ;;
-1:     ld8 r18=[r17]                           // read L3 PTE
+1:     ld8 r18=[r17]                           // read *pte
        ;;
        mov b0=r29
        tbit.z p6,p0=r18,_PAGE_P_BIT            // page present bit cleared?
@@ -320,7 +351,7 @@ 1:  ld8 r18=[r17]                           // read L3 PTE
         */
        dv_serialize_data
 
-       ld8 r19=[r17]                           // read L3 PTE again and see if 
same
+       ld8 r19=[r17]                           // read *pte again and see if 
same
        mov r20=PAGE_SHIFT<<2                   // setup page size for purge
        ;;
        cmp.ne p7,p0=r18,r19
@@ -329,7 +360,8 @@ 1:  ld8 r18=[r17]                           // read L3 PTE
 #endif
        mov pr=r31,-1
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -341,10 +373,10 @@ ENTRY(dtlb_miss)
 ENTRY(dtlb_miss)
        DBG_FAULT(2)
        /*
-        * The DTLB handler accesses the L3 PTE via the virtually mapped linear
+        * The DTLB handler accesses the PTE via the virtually mapped linear
         * page table.  If a nested TLB miss occurs, we switch into physical
-        * mode, walk the page table, and then re-execute the L3 PTE read
-        * and go on normally after that.
+        * mode, walk the page table, and then re-execute the PTE read and
+        * go on normally after that.
         */
 #ifdef CONFIG_XEN
        movl r16=XSI_IFA
@@ -361,11 +393,11 @@ dtlb_fault:
        ;;
        ld8 r17=[r17]                           // get virtual address of L3 PTE
 #else
-       mov r17=cr.iha                          // get virtual address of L3 PTE
+       mov r17=cr.iha                          // get virtual address of PTE
 #endif
        movl r30=1f                             // load nested fault 
continuation point
        ;;
-1:     ld8 r18=[r17]                           // read L3 PTE
+1:     ld8 r18=[r17]                           // read *pte
        ;;
        mov b0=r29
        tbit.z p6,p0=r18,_PAGE_P_BIT            // page present bit cleared?
@@ -390,7 +422,7 @@ 1:  ld8 r18=[r17]                           // read L3 PTE
         */
        dv_serialize_data
 
-       ld8 r19=[r17]                           // read L3 PTE again and see if 
same
+       ld8 r19=[r17]                           // read *pte again and see if 
same
        mov r20=PAGE_SHIFT<<2                   // setup page size for purge
        ;;
        cmp.ne p7,p0=r18,r19
@@ -399,7 +431,8 @@ 1:  ld8 r18=[r17]                           // read L3 PTE
 #endif
        mov pr=r31,-1
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -416,19 +449,15 @@ ENTRY(alt_itlb_miss)
        ld8 r21=[r31],XSI_IFA-XSI_IPSR  // get ipsr, point to ifa
        movl r17=PAGE_KERNEL
        ;;
-       movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-       ;;
        ld8 r16=[r31]           // get ifa
-       mov r31=pr
-       ;;
 #else
        mov r16=cr.ifa          // get address that caused the TLB miss
        movl r17=PAGE_KERNEL
        mov r21=cr.ipsr
+#endif
        movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
        mov r31=pr
        ;;
-#endif
 #ifdef CONFIG_DISABLE_VHPT
        shr.u r22=r16,61                        // get the region number into 
r21
        ;;
@@ -486,17 +515,15 @@ ENTRY(alt_dtlb_miss)
        movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
        ;;
        ld8 r16=[r31]           // get ifa
-       mov r31=pr
-       ;;
 #else
        mov r16=cr.ifa          // get address that caused the TLB miss
        movl r17=PAGE_KERNEL
        mov r20=cr.isr
        movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
        mov r21=cr.ipsr
+#endif
        mov r31=pr
        ;;
-#endif
 #ifdef CONFIG_DISABLE_VHPT
        shr.u r22=r16,61                        // get the region number into 
r21
        ;;
@@ -565,12 +592,12 @@ ENTRY(nested_dtlb_miss)
         *              r30:    continuation address
         *              r31:    saved pr
         *
-        * Output:      r17:    physical address of L3 PTE of faulting address
+        * Output:      r17:    physical address of PTE of faulting address
         *              r29:    saved b0
         *              r30:    continuation address
         *              r31:    saved pr
         *
-        * Clobbered:   b0, r18, r19, r21, psr.dt (cleared)
+        * Clobbered:   b0, r18, r19, r21, r22, psr.dt (cleared)
         */
 #ifdef CONFIG_XEN
        XEN_HYPER_RSM_PSR_DT;
@@ -579,12 +606,23 @@ ENTRY(nested_dtlb_miss)
 #endif
        mov r19=IA64_KR(PT_BASE)                // get the page table base 
address
        shl r21=r16,3                           // shift bit 60 into sign bit
+#ifdef CONFIG_XEN
+       movl r18=XSI_ITIR
+       ;;
+       ld8 r18=[r18]
+#else
+       mov r18=cr.itir
+#endif
        ;;
        shr.u r17=r16,61                        // get the region number into 
r17
+       extr.u r18=r18,2,6                      // get the faulting page size
        ;;
        cmp.eq p6,p7=5,r17                      // is faulting address in 
region 5?
-       shr.u r18=r16,PGDIR_SHIFT               // get bits 33-63 of faulting 
address
-       ;;
+       add r22=-PAGE_SHIFT,r18                 // adjustment for hugetlb 
address
+       add r18=PGDIR_SHIFT-PAGE_SHIFT,r18
+       ;;
+       shr.u r22=r16,r22
+       shr.u r18=r16,r18
 (p7)   dep r17=r17,r19,(PAGE_SHIFT-3),3        // put region number bits in 
place
 
        srlz.d
@@ -594,21 +632,33 @@ ENTRY(nested_dtlb_miss)
 (p6)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
 (p7)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
        ;;
-(p6)   dep r17=r18,r19,3,(PAGE_SHIFT-3)        // r17=PTA + IFA(33,42)*8
-(p7)   dep r17=r18,r17,3,(PAGE_SHIFT-6)        // r17=PTA + (((IFA(61,63) << 
7) | IFA(33,39))*8)
+(p6)   dep r17=r18,r19,3,(PAGE_SHIFT-3)        // r17=pgd_offset for region 5
+(p7)   dep r17=r18,r17,3,(PAGE_SHIFT-6)        // r17=pgd_offset for 
region[0-4]
        cmp.eq p7,p6=0,r21                      // unused address bits all 
zeroes?
-       shr.u r18=r16,PMD_SHIFT                 // shift L2 index into position
-       ;;
-       ld8 r17=[r17]                           // fetch the L1 entry (may be 0)
-       ;;
-(p7)   cmp.eq p6,p7=r17,r0                     // was L1 entry NULL?
-       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // compute address of L2 page 
table entry
-       ;;
-(p7)   ld8 r17=[r17]                           // fetch the L2 entry (may be 0)
-       shr.u r19=r16,PAGE_SHIFT                // shift L3 index into position
-       ;;
-(p7)   cmp.eq.or.andcm p6,p7=r17,r0            // was L2 entry NULL?
-       dep r17=r19,r17,3,(PAGE_SHIFT-3)        // compute address of L3 page 
table entry
+#ifdef CONFIG_PGTABLE_4
+       shr.u r18=r22,PUD_SHIFT                 // shift pud index into position
+#else
+       shr.u r18=r22,PMD_SHIFT                 // shift pmd index into position
+#endif
+       ;;
+       ld8 r17=[r17]                           // get *pgd (may be 0)
+       ;;
+(p7)   cmp.eq p6,p7=r17,r0                     // was pgd_present(*pgd) == 
NULL?
+       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // r17=p[u|m]d_offset(pgd,addr)
+       ;;
+#ifdef CONFIG_PGTABLE_4
+(p7)   ld8 r17=[r17]                           // get *pud (may be 0)
+       shr.u r18=r22,PMD_SHIFT                 // shift pmd index into position
+       ;;
+(p7)   cmp.eq.or.andcm p6,p7=r17,r0            // was pud_present(*pud) == 
NULL?
+       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // r17=pmd_offset(pud,addr)
+       ;;
+#endif
+(p7)   ld8 r17=[r17]                           // get *pmd (may be 0)
+       shr.u r19=r22,PAGE_SHIFT                // shift pte index into position
+       ;;
+(p7)   cmp.eq.or.andcm p6,p7=r17,r0            // was pmd_present(*pmd) == 
NULL?
+       dep r17=r19,r17,3,(PAGE_SHIFT-3)        // r17=pte_offset(pmd,addr);
 (p6)   br.cond.spnt page_fault
        mov b0=r30
        br.sptk.many b0                         // return to continuation point
@@ -626,7 +676,7 @@ END(ikey_miss)
        // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is 
faulting address)
 ENTRY(page_fault)
 #ifdef CONFIG_XEN
-       XEN_HYPER_SSM_PSR_DT;
+       XEN_HYPER_SSM_PSR_DT
 #else
        ssm psr.dt
        ;;
@@ -742,11 +792,12 @@ 1:        ld8 r18=[r17]
        ;;                                      // avoid RAW on r18
        mov ar.ccv=r18                          // set compare value for cmpxchg
        or r25=_PAGE_D|_PAGE_A,r18              // set the dirty and accessed 
bits
-       ;;
-       cmpxchg8.acq r26=[r17],r25,ar.ccv
+       tbit.z p7,p6 = r18,_PAGE_P_BIT          // Check present bit
+       ;;
+(p6)   cmpxchg8.acq r26=[r17],r25,ar.ccv       // Only update if page is 
present
        mov r24=PAGE_SHIFT<<2
        ;;
-       cmp.eq p6,p7=r26,r18
+(p6)   cmp.eq p6,p7=r26,r18                    // Only compare if page is 
present
        ;;
 (p6)   itc.d r25                               // install updated PTE
        ;;
@@ -775,7 +826,8 @@ 1:  ld8 r18=[r17]
 #endif
        mov pr=r31,-1                           // restore pr
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -826,11 +878,12 @@ 1:        ld8 r18=[r17]
        ;;
        mov ar.ccv=r18                          // set compare value for cmpxchg
        or r25=_PAGE_A,r18                      // set the accessed bit
-       ;;
-       cmpxchg8.acq r26=[r17],r25,ar.ccv
+       tbit.z p7,p6 = r18,_PAGE_P_BIT          // Check present bit
+       ;;
+(p6)   cmpxchg8.acq r26=[r17],r25,ar.ccv       // Only if page present
        mov r24=PAGE_SHIFT<<2
        ;;
-       cmp.eq p6,p7=r26,r18
+(p6)   cmp.eq p6,p7=r26,r18                    // Only if page present
        ;;
 #ifdef CONFIG_XEN
        mov r26=r8
@@ -869,7 +922,8 @@ 1:  ld8 r18=[r17]
 #endif /* !CONFIG_SMP */
        mov pr=r31,-1
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -892,11 +946,13 @@ ENTRY(daccess_bit)
        movl r30=1f                             // load continuation point in 
case of nested fault
        ;;
 #ifdef CONFIG_XEN
-       mov r18=r8;
-       mov r8=r16;
-       XEN_HYPER_THASH;;
-       mov r17=r8;
-       mov r8=r18;;
+       mov r18=r8
+       mov r8=r16
+       XEN_HYPER_THASH
+       ;;
+       mov r17=r8
+       mov r8=r18
+       ;;
 #else
        thash r17=r16                           // compute virtual address of 
L3 PTE
 #endif
@@ -909,11 +965,12 @@ 1:        ld8 r18=[r17]
        ;;                                      // avoid RAW on r18
        mov ar.ccv=r18                          // set compare value for cmpxchg
        or r25=_PAGE_A,r18                      // set the dirty bit
-       ;;
-       cmpxchg8.acq r26=[r17],r25,ar.ccv
+       tbit.z p7,p6 = r18,_PAGE_P_BIT          // Check present bit
+       ;;
+(p6)   cmpxchg8.acq r26=[r17],r25,ar.ccv       // Only if page is present
        mov r24=PAGE_SHIFT<<2
        ;;
-       cmp.eq p6,p7=r26,r18
+(p6)   cmp.eq p6,p7=r26,r18                    // Only if page is present
        ;;
 #ifdef CONFIG_XEN
        mov r26=r8
@@ -950,7 +1007,8 @@ 1: ld8 r18=[r17]
        mov b0=r29                              // restore b0
        mov pr=r31,-1
 #ifdef CONFIG_XEN
-       XEN_HYPER_RFI;
+       XEN_HYPER_RFI
+       dv_serialize_data
 #else
        rfi
 #endif
@@ -976,143 +1034,157 @@ ENTRY(break_fault)
         * to prevent leaking bits from kernel to user level.
         */
        DBG_FAULT(11)
-       mov r16=IA64_KR(CURRENT)                // r16 = current task; 12 cycle 
read lat.
-#ifdef CONFIG_XEN
-       movl r31=XSI_IPSR
-       ;;
-       ld8 r29=[r31],XSI_IIP-XSI_IPSR          // get ipsr, point to iip
-       mov r18=__IA64_BREAK_SYSCALL
-       mov r21=ar.fpsr
-       ;;
-       ld8 r28=[r31],XSI_IIM-XSI_IIP           // get iip, point to iim
-       mov r19=b6
-       mov r25=ar.unat
-       ;;
-       ld8 r17=[r31]                           // get iim
-       mov r27=ar.rsc
-       mov r26=ar.pfs
-       ;;
-#else
-       mov r17=cr.iim
-       mov r18=__IA64_BREAK_SYSCALL
-       mov r21=ar.fpsr
-       mov r29=cr.ipsr
-       mov r19=b6
-       mov r25=ar.unat
-       mov r27=ar.rsc
-       mov r26=ar.pfs
-       mov r28=cr.iip
-#endif
-       mov r31=pr                              // prepare to save predicates
-       mov r20=r1
-       ;;
+       mov.m r16=IA64_KR(CURRENT)              // M2 r16 <- current task (12 
cyc)
+#ifdef CONFIG_XEN
+       movl r22=XSI_IPSR
+       ;;
+       ld8 r29=[r22],XSI_IIM-XSI_IPSR          // get ipsr, point to iip
+#else
+       mov r29=cr.ipsr                         // M2 (12 cyc)
+#endif
+       mov r31=pr                              // I0 (2 cyc)
+
+#ifdef CONFIG_XEN
+       ;;
+       ld8 r17=[r22],XSI_IIP-XSI_IIM
+#else
+       mov r17=cr.iim                          // M2 (2 cyc)
+#endif
+       mov.m r27=ar.rsc                        // M2 (12 cyc)
+       mov r18=__IA64_BREAK_SYSCALL            // A
+
+       mov.m ar.rsc=0                          // M2
+       mov.m r21=ar.fpsr                       // M2 (12 cyc)
+       mov r19=b6                              // I0 (2 cyc)
+       ;;
+       mov.m r23=ar.bspstore                   // M2 (12 cyc)
+       mov.m r24=ar.rnat                       // M2 (5 cyc)
+       mov.i r26=ar.pfs                        // I0 (2 cyc)
+
+       invala                                  // M0|1
+       nop.m 0                                 // M
+       mov r20=r1                              // A                    save r1
+
+       nop.m 0
+       movl r30=sys_call_table                 // X
+
+#ifdef CONFIG_XEN
+       ld8 r28=[r22]
+#else
+       mov r28=cr.iip                          // M2 (2 cyc)
+#endif
+       cmp.eq p0,p7=r18,r17                    // I0 is this a system call?
+(p7)   br.cond.spnt non_syscall                // B  no ->
+       //
+       // From this point on, we are definitely on the syscall-path
+       // and we can use (non-banked) scratch registers.
+       //
+///////////////////////////////////////////////////////////////////////
+       mov r1=r16                              // A    move task-pointer to 
"addl"-addressable reg
+       mov r2=r16                              // A    setup r2 for 
ia64_syscall_setup
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16      // A    r9 = 
&current_thread_info()->flags
+
        adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
-       cmp.eq p0,p7=r18,r17                    // is this a system call? (p7 
<- false, if so)
-(p7)   br.cond.spnt non_syscall
-       ;;
-       ld1 r17=[r16]                           // load 
current->thread.on_ustack flag
-       st1 [r16]=r0                            // clear 
current->thread.on_ustack flag
-       add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16   // set r1 for 
MINSTATE_START_SAVE_MIN_VIRT
-       ;;
-       invala
-
-       /* adjust return address so we skip over the break instruction: */
-
-       extr.u r8=r29,41,2                      // extract ei field from cr.ipsr
-       ;;
-       cmp.eq p6,p7=2,r8                       // isr.ei==2?
-       mov r2=r1                               // setup r2 for 
ia64_syscall_setup
-       ;;
-(p6)   mov r8=0                                // clear ei to 0
-(p6)   adds r28=16,r28                         // switch cr.iip to next bundle 
cr.ipsr.ei wrapped
-(p7)   adds r8=1,r8                            // increment ei to next slot
-       ;;
-       cmp.eq pKStk,pUStk=r0,r17               // are we in kernel mode 
already?
-       dep r29=r8,r29,41,2                     // insert new ei into cr.ipsr
-       ;;
-
-       // switch from user to kernel RBS:
-       MINSTATE_START_SAVE_MIN_VIRT
-       br.call.sptk.many b7=ia64_syscall_setup
-       ;;
+       adds r15=-1024,r15                      // A    subtract 1024 from 
syscall number
+       mov r3=NR_syscalls - 1
+       ;;
+       ld1.bias r17=[r16]                      // M0|1 r17 = 
current->thread.on_ustack flag
+       ld4 r9=[r9]                             // M0|1 r9 = 
current_thread_info()->flags
+       extr.u r8=r29,41,2                      // I0   extract ei field from 
cr.ipsr
+
+       shladd r30=r15,3,r30                    // A    r30 = sys_call_table + 
8*(syscall-1024)
+       addl r22=IA64_RBS_OFFSET,r1             // A    compute base of RBS
+       cmp.leu p6,p7=r15,r3                    // A    syscall number in range?
+       ;;
+
+       lfetch.fault.excl.nt1 [r22]             // M0|1 prefetch RBS
+(p6)   ld8 r30=[r30]                           // M0|1 load address of syscall 
entry point
+       tnat.nz.or p7,p0=r15                    // I0   is syscall nr a NaT?
+
+       mov.m ar.bspstore=r22                   // M2   switch to kernel RBS
+       cmp.eq p8,p9=2,r8                       // A    isr.ei==2?
+       ;;
+
+(p8)   mov r8=0                                // A    clear ei to 0
+(p7)   movl r30=sys_ni_syscall                 // X
+
+(p8)   adds r28=16,r28                         // A    switch cr.iip to next 
bundle
+(p9)   adds r8=1,r8                            // A    increment ei to next 
slot
+       nop.i 0
+       ;;
+
+       mov.m r25=ar.unat                       // M2 (5 cyc)
+       dep r29=r8,r29,41,2                     // I0   insert new ei into 
cr.ipsr
+       adds r15=1024,r15                       // A    restore original 
syscall number
+       //
+       // If any of the above loads miss in L1D, we'll stall here until
+       // the data arrives.
+       //
+///////////////////////////////////////////////////////////////////////
+       st1 [r16]=r0                            // M2|3 clear 
current->thread.on_ustack flag
+       mov b6=r30                              // I0   setup syscall handler 
branch reg early
+       cmp.eq pKStk,pUStk=r0,r17               // A    were we on kernel 
stacks already?
+
+       and r9=_TIF_SYSCALL_TRACEAUDIT,r9       // A    mask trace or audit
+       mov r18=ar.bsp                          // M2 (12 cyc)
+(pKStk)        br.cond.spnt .break_fixup               // B    we're already 
in kernel-mode -- fix up RBS
+       ;;
+.back_from_break_fixup:
+(pUStk)        addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A    compute 
base of memory stack
+       cmp.eq p14,p0=r9,r0                     // A    are syscalls being 
traced/audited?
+       br.call.sptk.many b7=ia64_syscall_setup // B
+1:
+       mov ar.rsc=0x3                          // M2   set eager mode, pl 0, 
LE, loadrs=0
+       nop 0
 #ifdef CONFIG_XEN
        mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;;
 #else
-       MINSTATE_END_SAVE_MIN_VIRT              // switch to bank 1
-#endif
-#ifdef CONFIG_XEN
-       movl r3=XSI_PSR_IC
-       mov r16=1
-       ;;
-#if 1
-       st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC  // vpsr.ic = 1
-       ;;
-(p15)  ld8 r3=[r3]
-       ;;
-(p15)  st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR     // if (p15) vpsr.i = 1
-       mov r16=r0
-       ;;
-(p15)  ld4 r16=[r3]                            // if (pending_interrupts)
-       ;;
-       cmp.ne  p6,p0=r16,r0
+       bsw.1                                   // B (6 cyc) regs are saved, 
switch to bank 1
+#endif
+       ;;
+
+#ifdef CONFIG_XEN
+       movl r16=XSI_PSR_IC
+       mov r3=1
+       ;;
+       st4 [r16]=r3,XSI_PSR_I_ADDR-XSI_PSR_IC  // vpsr.ic = 1
+#else
+       ssm psr.ic | PSR_DEFAULT_BITS           // M2   now it's safe to 
re-enable intr.-collection
+#endif
+       movl r3=ia64_ret_from_syscall           // X
+       ;;
+
+       srlz.i                                  // M0   ensure interruption 
collection is on
+       mov rp=r3                               // I0   set the real return addr
+(p10)  br.cond.spnt.many ia64_ret_from_syscall // B    return if bad 
call-frame or r15 is a NaT
+
+#ifdef CONFIG_XEN
+(p15)  ld8 r16=[r16]                           // vpsr.i
+       ;;
+(p15)  st1 [r16]=r0,XSI_PEND-XSI_PSR_I_ADDR    // if (p15) vpsr.i = 1
+       mov r2=r0
+       ;;
+(p15)  ld4 r2=[r16]                            // if (pending_interrupts)
+       ;;
+       cmp.ne  p6,p0=r2,r0
        ;;
 (p6)   ssm     psr.i                           //   do a real ssm psr.i
-       ;;
-#else
-//     st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC  // vpsr.ic = 1
-       adds r3=XSI_PSR_I_ADDR-XSI_PSR_IC,r3    // SKIP vpsr.ic = 1
-       ;;
-(p15)  ld8 r3=[r3]
-       ;;
-(p15)  st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR     // if (p15) vpsr.i = 1
-       mov r16=r0
-       ;;
-(p15)  ld4 r16=[r3]                            // if (pending_interrupts)
-       ;;
-       cmp.ne  p6,p0=r16,r0
-       ;;
-//(p6) ssm     psr.i                           //   do a real ssm psr.i
-//(p6) XEN_HYPER_SSM_I;
-(p6)   break 0x7;
-       ;;
-#endif
-       mov r3=NR_syscalls - 1
-       ;;
-#else
-       ssm psr.ic | PSR_DEFAULT_BITS
-       ;;
-       srlz.i                                  // guarantee that interruption 
collection is on
-       mov r3=NR_syscalls - 1
-       ;;
-(p15)  ssm psr.i                               // restore psr.i
-#endif
-       // p10==true means out registers are more than 8 or r15's Nat is true
-(p10)  br.cond.spnt.many ia64_ret_from_syscall
-       ;;
-       movl r16=sys_call_table
-
-       adds r15=-1024,r15                      // r15 contains the syscall 
number---subtract 1024
-       movl r2=ia64_ret_from_syscall
-       ;;
-       shladd r20=r15,3,r16                    // r20 = sys_call_table + 
8*(syscall-1024)
-       cmp.leu p6,p7=r15,r3                    // (syscall > 0 && syscall < 
1024 + NR_syscalls) ?
-       mov rp=r2                               // set the real return addr
-       ;;
-(p6)   ld8 r20=[r20]                           // load address of syscall 
entry point
-(p7)   movl r20=sys_ni_syscall
-
-       add r2=TI_FLAGS+IA64_TASK_SIZE,r13
-       ;;
-       ld4 r2=[r2]                             // r2 = 
current_thread_info()->flags
-       ;;
-       and r2=_TIF_SYSCALL_TRACEAUDIT,r2       // mask trace or audit
-       ;;
-       cmp.eq p8,p0=r2,r0
-       mov b6=r20
-       ;;
-(p8)   br.call.sptk.many b6=b6                 // ignore this return addr
-       br.cond.sptk ia64_trace_syscall
+#else
+(p15)  ssm psr.i                               // M2   restore psr.i
+#endif
+(p14)  br.call.sptk.many b6=b6                 // B    invoke syscall-handker 
(ignore return addr)
+       br.cond.spnt.many ia64_trace_syscall    // B    do syscall-tracing 
thingamagic
        // NOT REACHED
+///////////////////////////////////////////////////////////////////////
+       // On entry, we optimistically assumed that we're coming from 
user-space.
+       // For the rare cases where a system-call is done from within the 
kernel,
+       // we fix things up at this point:
+.break_fixup:
+       add r1=-IA64_PT_REGS_SIZE,sp            // A    allocate space for 
pt_regs structure
+       mov ar.rnat=r24                         // M2   restore kernel's AR.RNAT
+       ;;
+       mov ar.bspstore=r23                     // M2   restore kernel's 
AR.BSPSTORE
+       br.cond.sptk .back_from_break_fixup
 END(break_fault)
 
        .org ia64_ivt+0x3000
@@ -1201,8 +1273,6 @@ END(interrupt)
         *      - r31: saved pr
         *      -  b0: original contents (to be saved)
         * On exit:
-        *      - executing on bank 1 registers
-        *      - psr.ic enabled, interrupts restored
         *      -  p10: TRUE if syscall is invoked with more than 8 out
         *              registers or r15's Nat is true
         *      -  r1: kernel's gp
@@ -1210,8 +1280,11 @@ END(interrupt)
         *      -  r8: -EINVAL if p10 is true
         *      - r12: points to kernel stack
         *      - r13: points to current task
+        *      - r14: preserved (same as on entry)
+        *      - p13: preserved
         *      - p15: TRUE if interrupts need to be re-enabled
         *      - ar.fpsr: set to kernel settings
+        *      -  b6: preserved (same as on entry)
         */
 #ifndef CONFIG_XEN
 GLOBAL_ENTRY(ia64_syscall_setup)
@@ -1280,10 +1353,10 @@ GLOBAL_ENTRY(ia64_syscall_setup)
 (p13)  mov in5=-1
        ;;
        st8 [r16]=r21,PT(R8)-PT(AR_FPSR)        // save ar.fpsr
-       tnat.nz p14,p0=in6
+       tnat.nz p13,p0=in6
        cmp.lt p10,p9=r11,r8    // frame size can't be more than local+8
        ;;
-       stf8 [r16]=f1           // ensure pt_regs.r8 != 0 (see 
handle_syscall_error)
+       mov r8=1
 (p9)   tnat.nz p10,p0=r15
        adds r12=-16,r1         // switch to kernel memory stack (with 16 bytes 
of scratch)
 
@@ -1294,9 +1367,9 @@ GLOBAL_ENTRY(ia64_syscall_setup)
        mov r13=r2                              // establish `current'
        movl r1=__gp                            // establish kernel global 
pointer
        ;;
-(p14)  mov in6=-1
+       st8 [r16]=r8            // ensure pt_regs.r8 != 0 (see 
handle_syscall_error)
+(p13)  mov in6=-1
 (p8)   mov in7=-1
-       nop.i 0
 
        cmp.eq pSys,pNonSys=r0,r0               // set pSys=1, pNonSys=0
        movl r17=FPSR_DEFAULT
@@ -1323,6 +1396,8 @@ END(ia64_syscall_setup)
         * element, followed by the arguments.
         */
 ENTRY(dispatch_illegal_op_fault)
+       .prologue
+       .body
        SAVE_MIN_WITH_COVER
        ssm psr.ic | PSR_DEFAULT_BITS
        ;;
@@ -1335,6 +1410,7 @@ ENTRY(dispatch_illegal_op_fault)
        mov out0=ar.ec
        ;;
        SAVE_REST
+       PT_REGS_UNWIND_INFO(0)
        ;;
        br.call.sptk.many rp=ia64_illegal_op_fault
 .ret0: ;;
@@ -1365,6 +1441,8 @@ END(dispatch_illegal_op_fault)
        FAULT(17)
 
 ENTRY(non_syscall)
+       mov ar.rsc=r27                  // restore ar.rsc before 
SAVE_MIN_WITH_COVER
+       ;;
        SAVE_MIN_WITH_COVER
 
        // There is no particular reason for this code to be here, other than 
that
@@ -1540,7 +1618,7 @@ ENTRY(daccess_rights)
        ;;
        ld8 r16=[r16]
        ;;
-       XEN_HYPER_RSM_PSR_DT;
+       XEN_HYPER_RSM_PSR_DT
 #else
        mov r16=cr.ifa
        rsm psr.dt
@@ -1584,6 +1662,25 @@ END(disabled_fp_reg)
 // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
 ENTRY(nat_consumption)
        DBG_FAULT(26)
+
+       mov r16=cr.ipsr
+       mov r17=cr.isr
+       mov r31=pr                              // save PR
+       ;;
+       and r18=0xf,r17                         // r18 = cr.ipsr.code{3:0}
+       tbit.z p6,p0=r17,IA64_ISR_NA_BIT
+       ;;
+       cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18
+       dep r16=-1,r16,IA64_PSR_ED_BIT,1
+(p6)   br.cond.spnt 1f         // branch if (cr.ispr.na == 0 || 
cr.ipsr.code{3:0} != LFETCH)
+       ;;
+       mov cr.ipsr=r16         // set cr.ipsr.na
+       mov pr=r31,-1
+       ;;
+       rfi
+
+1:     mov pr=r31,-1
+       ;;
        FAULT(26)
 END(nat_consumption)
 
@@ -1624,7 +1721,7 @@ ENTRY(speculation_vector)
 #ifdef CONFIG_XEN
        XEN_HYPER_RFI;
 #else
-       rfi
+       rfi                             // and go back
 #endif
 END(speculation_vector)
 
@@ -1647,7 +1744,6 @@ END(debug_vector)
 // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
 ENTRY(unaligned_access)
        DBG_FAULT(30)
-       mov r16=cr.ipsr
        mov r31=pr              // prepare to save predicates
        ;;
        br.sptk.many dispatch_unaligned_handler
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h  Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h  Tue May 30 14:30:34 
2006 -0500
@@ -155,6 +155,8 @@
        ;;                                                                      
                \
        ld4 r30=[r8];                                                           
                \
        ;;                                                                      
                \
+       /* set XSI_INCOMPL_REGFR 0 */                                           
                \
+       st4 [r8]=r0;                                                            
                \
        cmp.eq  p6,p7=r30,r0;                                                   
                \
        ;; /* not sure if this stop bit is necessary */                         
                \
 (p6)   adds r8=XSI_PRECOVER_IFS-XSI_INCOMPL_REGFR,r8;                          
                \
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S     Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S     Tue May 30 14:30:34 
2006 -0500
@@ -8,28 +8,17 @@
 #include <asm/processor.h>
 #include <asm/asmmacro.h>
 
-       .data
-       .align 8
-       .globl running_on_xen
-running_on_xen:
-       data4 0
-
 #define isBP   p3      // are we the Bootstrap Processor?
 
        .text
 GLOBAL_ENTRY(early_xen_setup)
-       mov r8=cr.dcr
+       mov r8=ar.rsc           // Initialized in head.S
 (isBP) movl r9=running_on_xen;;
-       extr.u r8=r8,63,1;;
-       cmp.ne p7,p0=r8,r0;;
+       extr.u r8=r8,2,2;;      // Extract pl fields
+       cmp.ne p7,p0=r8,r0;;    // p7: running on xen 
+(p7)   mov r8=1                // booleanize.
+(p7)   movl r10=xen_ivt;;
 (isBP) st4 [r9]=r8
-(p7)   movl r10=xen_ivt;;
 (p7)   mov cr.iva=r10
        br.ret.sptk.many rp;;
 END(early_xen_setup)
-
-GLOBAL_ENTRY(is_running_on_xen)
-       movl r9=running_on_xen;;
-       ld4 r8=[r9]
-       br.ret.sptk.many rp;;
-END(is_running_on_xen)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c        Tue May 30 
14:30:34 2006 -0500
@@ -26,53 +26,34 @@
 #include <asm/sections.h>
 #include <xen/interface/memory.h>
 
-unsigned long pci_mem_start = 0xaeedbabe;
-
 /* 
  * PFN of last memory page.
  */
 unsigned long end_pfn; 
 EXPORT_SYMBOL(end_pfn);
-unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;  
-unsigned long end_pfn_map; 
-
-/* 
- * Add a memory region to the kernel e820 map.
- */ 
-void __init add_memory_region(unsigned long start, unsigned long size, int 
type)
-{
-       int x = e820.nr_map;
-
-       if (x == E820MAX) {
-               printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
-               return;
-       }
-
-       e820.map[x].addr = start;
-       e820.map[x].size = size;
-       e820.map[x].type = type;
-       e820.nr_map++;
-}
-
-#ifndef CONFIG_XEN
 
 /* 
  * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
  * The direct mapping extends to end_pfn_map, so that we can directly access
  * apertures, ACPI and other tables without having to play with fixmaps.
  */ 
+unsigned long end_pfn_map; 
 
 /* 
  * Last pfn which the user wants to use.
  */
-
+unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;  
+
+#ifndef CONFIG_XEN
 extern struct resource code_resource, data_resource;
+#endif
 
 /* Check for some hardcoded bad areas that early boot is not allowed to touch 
*/ 
 static inline int bad_addr(unsigned long *addrp, unsigned long size)
 { 
        unsigned long addr = *addrp, last = addr + size; 
 
+#ifndef CONFIG_XEN
        /* various gunk below that needed for SMP startup */
        if (addr < 0x8000) { 
                *addrp = 0x8000;
@@ -100,9 +81,16 @@ static inline int bad_addr(unsigned long
                return 1;
        }
        /* XXX ramdisk image here? */ 
+#else
+       if (last < (table_end<<PAGE_SHIFT)) {
+               *addrp = table_end << PAGE_SHIFT;
+               return 1;
+       }
+#endif
        return 0;
 } 
 
+#ifndef CONFIG_XEN
 int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) 
 { 
        int i;
@@ -116,6 +104,7 @@ int __init e820_mapped(unsigned long sta
        } 
        return 0;
 }
+#endif
 
 /* 
  * Find a free area in a specific range. 
@@ -246,22 +235,23 @@ e820_hole_size(unsigned long start_pfn, 
 /*
  * Mark e820 reserved areas as busy for the resource manager.
  */
-void __init e820_reserve_resources(void)
-{
-       int i;
-       for (i = 0; i < e820.nr_map; i++) {
+void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
+{
+       int i;
+       for (i = 0; i < nr_map; i++) {
                struct resource *res;
                res = alloc_bootmem_low(sizeof(struct resource));
-               switch (e820.map[i].type) {
+               switch (e820[i].type) {
                case E820_RAM:  res->name = "System RAM"; break;
                case E820_ACPI: res->name = "ACPI Tables"; break;
                case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
                default:        res->name = "reserved";
                }
-               res->start = e820.map[i].addr;
-               res->end = res->start + e820.map[i].size - 1;
+               res->start = e820[i].addr;
+               res->end = res->start + e820[i].size - 1;
                res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
                request_resource(&iomem_resource, res);
+#ifndef CONFIG_XEN
                if (e820.map[i].type == E820_RAM) {
                        /*
                         *  We don't know which RAM region contains kernel data,
@@ -274,9 +264,27 @@ void __init e820_reserve_resources(void)
                        request_resource(res, &crashk_res);
 #endif
                }
-       }
-}
-#endif /* CONFIG_XEN */
+#endif
+       }
+}
+
+/* 
+ * Add a memory region to the kernel e820 map.
+ */ 
+void __init add_memory_region(unsigned long start, unsigned long size, int 
type)
+{
+       int x = e820.nr_map;
+
+       if (x == E820MAX) {
+               printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+               return;
+       }
+
+       e820.map[x].addr = start;
+       e820.map[x].size = size;
+       e820.map[x].type = type;
+       e820.nr_map++;
+}
 
 void __init e820_print_map(char *who)
 {
@@ -304,7 +312,6 @@ void __init e820_print_map(char *who)
        }
 }
 
-#ifndef CONFIG_XEN
 /*
  * Sanitize the BIOS e820 map.
  *
@@ -491,9 +498,13 @@ static int __init sanitize_e820_map(stru
  */
 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
 {
+#ifndef CONFIG_XEN
        /* Only one memory region (or negative)? Ignore it */
        if (nr_map < 2)
                return -1;
+#else
+       BUG_ON(nr_map < 1);
+#endif
 
        do {
                unsigned long start = biosmap->addr;
@@ -505,6 +516,7 @@ static int __init copy_e820_map(struct e
                if (start > end)
                        return -1;
 
+#ifndef CONFIG_XEN
                /*
                 * Some BIOSes claim RAM in the 640k - 1M region.
                 * Not right. Fix it up.
@@ -523,12 +535,14 @@ static int __init copy_e820_map(struct e
                                size = end - start;
                        }
                }
+#endif
 
                add_memory_region(start, size, type);
        } while (biosmap++,--nr_map);
        return 0;
 }
 
+#ifndef CONFIG_XEN
 void __init setup_memory_region(void)
 {
        char *who = "BIOS-e820";
@@ -562,104 +576,63 @@ void __init setup_memory_region(void)
 
 #else  /* CONFIG_XEN */
 
-extern unsigned long xen_override_max_pfn;
-extern union xen_start_info_union xen_start_info_union;
-
-unsigned long __init e820_end_of_ram(void)
-{
-       unsigned long max_end_pfn;
-
-       if (xen_override_max_pfn == 0) {
-               max_end_pfn = xen_start_info->nr_pages;
-               /* Default 8MB slack (to balance backend allocations). */
-               max_end_pfn += 8 << (20 - PAGE_SHIFT);
-       } else if (xen_override_max_pfn > xen_start_info->nr_pages) {
-               max_end_pfn = xen_override_max_pfn;
-       } else {
-               max_end_pfn = xen_start_info->nr_pages;
-       }
-
-       return max_end_pfn;
-}
-
-unsigned long __init
-e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
-{
-       return 0;
-}
-
-void __init e820_reserve_resources(void) 
-{
-       dom0_op_t op;
-       struct dom0_memory_map_entry *map;
-       unsigned long gapstart, gapsize, round, last;
-       int i, found = 0;
-
-       if (!(xen_start_info->flags & SIF_INITDOMAIN))
-               return;
-
-       map = alloc_bootmem_low_pages(PAGE_SIZE);
-       op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
-       set_xen_guest_handle(op.u.physical_memory_map.memory_map, map);
-       op.u.physical_memory_map.max_map_entries =
-               PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
-       BUG_ON(HYPERVISOR_dom0_op(&op));
-
-       last = 0x100000000ULL;
-       gapstart = 0x10000000;
-       gapsize = 0x400000;
-
-       for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
-               struct resource *res;
-
-               if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
-                       gapsize = last - map[i].end;
-                       gapstart = map[i].end;
-                       found = 1;
-               }
-               if (map[i].start < last)
-                       last = map[i].start;
-
-               if (map[i].end > 0x100000000ULL)
-                       continue;
-               res = alloc_bootmem_low(sizeof(struct resource));
-               res->name = map[i].is_ram ? "System RAM" : "reserved";
-               res->start = map[i].start;
-               res->end = map[i].end - 1;
-               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-               request_resource(&iomem_resource, res);
-       }
-
-       free_bootmem(__pa(map), PAGE_SIZE);
-
-       if (!found) {
-               gapstart = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
-               gapstart = (gapstart << PAGE_SHIFT) + 1024*1024;
-               printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit 
address range\n"
-                      KERN_ERR "PCI: Unassigned devices with 32bit resource 
registers may break!\n");
-       }
-
+void __init setup_memory_region(void)
+{
+       int rc;
+       struct xen_memory_map memmap;
        /*
-        * See how much we want to round up: start off with
-        * rounding to the next 1MB area.
+        * This is rather large for a stack variable but this early in
+        * the boot process we know we have plenty slack space.
         */
-       round = 0x100000;
-       while ((gapsize >> 4) > round)
-               round += round;
-       /* Fun with two's complement */
-       pci_mem_start = (gapstart + round) & -round;
-
-       printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: 
%lx:%lx)\n",
-               pci_mem_start, gapstart, gapsize);
-}
-
+       struct e820entry map[E820MAX];
+
+       memmap.nr_entries = E820MAX;
+       set_xen_guest_handle(memmap.buffer, map);
+
+       rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+       if ( rc == -ENOSYS ) {
+               memmap.nr_entries = 1;
+               map[0].addr = 0ULL;
+               map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
+               /* 8MB slack (to balance backend allocations). */
+               map[0].size += 8 << 20;
+               map[0].type = E820_RAM;
+               rc = 0;
+       }
+       BUG_ON(rc);
+
+       sanitize_e820_map(map, (char *)&memmap.nr_entries);
+
+       BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
+
+       printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+       e820_print_map("Xen");
+}
 #endif
 
 void __init parse_memopt(char *p, char **from) 
 { 
+       int i;
+       unsigned long current_end;
+       unsigned long end;
+
        end_user_pfn = memparse(p, from);
        end_user_pfn >>= PAGE_SHIFT;    
-       xen_override_max_pfn = (unsigned long) end_user_pfn;
+
+       end = end_user_pfn<<PAGE_SHIFT;
+       i = e820.nr_map-1;
+       current_end = e820.map[i].addr + e820.map[i].size;
+
+       if (current_end < end) {
+               /*
+                 * The e820 map ends before our requested size so
+                 * extend the final entry to the requested address.
+                 */
+               if (e820.map[i].type == E820_RAM)
+                       e820.map[i].size = end - e820.map[i].addr;
+               else
+                       add_memory_region(current_end, end - current_end, 
E820_RAM);
+       }
 } 
 
 void __init parse_memmapopt(char *p, char **from)
@@ -683,16 +656,17 @@ void __init parse_memmapopt(char *p, cha
        p = *from;
 }
 
+unsigned long pci_mem_start = 0xaeedbabe;
+
 /*
  * Search for the biggest gap in the low 32 bits of the e820
  * memory space.  We pass this space to PCI to assign MMIO resources
  * for hotplug or unconfigured devices in.
  * Hopefully the BIOS let enough space left.
  */
-__init void e820_setup_gap(void)
-{
-#ifndef CONFIG_XEN
-       unsigned long gapstart, gapsize;
+__init void e820_setup_gap(struct e820entry *e820, int nr_map)
+{
+       unsigned long gapstart, gapsize, round;
        unsigned long last;
        int i;
        int found = 0;
@@ -700,10 +674,10 @@ __init void e820_setup_gap(void)
        last = 0x100000000ull;
        gapstart = 0x10000000;
        gapsize = 0x400000;
-       i = e820.nr_map;
+       i = nr_map;
        while (--i >= 0) {
-               unsigned long long start = e820.map[i].addr;
-               unsigned long long end = start + e820.map[i].size;
+               unsigned long long start = e820[i].addr;
+               unsigned long long end = start + e820[i].size;
 
                /*
                 * Since "last" is at most 4GB, we know we'll
@@ -729,16 +703,15 @@ __init void e820_setup_gap(void)
        }
 
        /*
-        * Start allocating dynamic PCI memory a bit into the gap,
-        * aligned up to the nearest megabyte.
-        *
-        * Question: should we try to pad it up a bit (do something
-        * like " + (gapsize >> 3)" in there too?). We now have the
-        * technology.
+        * See how much we want to round up: start off with
+        * rounding to the next 1MB area.
         */
-       pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
+       round = 0x100000;
+       while ((gapsize >> 4) > round)
+               round += round;
+       /* Fun with two's complement */
+       pci_mem_start = (gapstart + round) & -round;
 
        printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: 
%lx:%lx)\n",
                pci_mem_start, gapstart, gapsize);
-#endif
-}
+}
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Tue May 30 
14:30:34 2006 -0500
@@ -76,8 +76,8 @@
 #include <xen/features.h>
 #define PFN_UP(x)       (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
 #define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
-#define end_pfn_map end_pfn
 #include <asm/mach-xen/setup_arch_post.h>
+#include <xen/interface/memory.h>
 
 extern unsigned long start_pfn;
 extern struct edid_info edid_info;
@@ -490,19 +490,6 @@ static __init void parse_cmdline_early (
 }
 
 #ifndef CONFIG_NUMA
-#ifdef CONFIG_XEN
-static void __init
-contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
-        unsigned long bootmap_size;
-
-        bootmap_size = init_bootmem(start_pfn, end_pfn);
-        free_bootmem(0, xen_start_info->nr_pages << PAGE_SHIFT);   
-        reserve_bootmem(HIGH_MEMORY,
-                        (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1)
-                        - HIGH_MEMORY);
-}
-#else
 static void __init
 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 {
@@ -513,10 +500,13 @@ contig_initmem_init(unsigned long start_
        if (bootmap == -1L)
                panic("Cannot find bootmem map of size %ld\n",bootmap_size);
        bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
+#ifdef CONFIG_XEN
+       e820_bootmem_free(NODE_DATA(0), 0, 
xen_start_info->nr_pages<<PAGE_SHIFT);
+#else
        e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
+#endif
        reserve_bootmem(bootmap, bootmap_size);
 } 
-#endif /* !CONFIG_XEN */
 #endif
 
 /* Use inline assembly to define this because the nops are defined 
@@ -636,6 +626,11 @@ void __init setup_arch(char **cmdline_p)
 {
        unsigned long kernel_end;
 
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+       struct e820entry *machine_e820;
+       struct xen_memory_map memmap;
+#endif
+
 #ifdef CONFIG_XEN
        /* Register a call for panic conditions. */
        notifier_chain_register(&panic_notifier_list, &xen_panic_block);
@@ -693,20 +688,18 @@ void __init setup_arch(char **cmdline_p)
        rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
        rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
 #endif
+#endif /* !CONFIG_XEN */
        setup_memory_region();
        copy_edd();
-#endif /* !CONFIG_XEN */
 
        if (!MOUNT_ROOT_RDONLY)
                root_mountflags &= ~MS_RDONLY;
        init_mm.start_code = (unsigned long) &_text;
        init_mm.end_code = (unsigned long) &_etext;
        init_mm.end_data = (unsigned long) &_edata;
-#ifdef CONFIG_XEN
-       init_mm.brk = start_pfn << PAGE_SHIFT;
-#else
-       init_mm.brk = (unsigned long) &_end;    
-
+       init_mm.brk = (unsigned long) &_end;
+
+#ifndef CONFIG_XEN
        code_resource.start = virt_to_phys(&_text);
        code_resource.end = virt_to_phys(&_etext)-1;
        data_resource.start = virt_to_phys(&_etext);
@@ -735,12 +728,11 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 #ifdef CONFIG_NUMA
-       numa_initmem_init(start_pfn, end_pfn); 
+       numa_initmem_init(0, end_pfn); 
 #else
-       contig_initmem_init(start_pfn, end_pfn);
-#endif
-
-#ifndef CONFIG_XEN
+       contig_initmem_init(0, end_pfn);
+#endif
+
        /* Reserve direct mapping */
        reserve_bootmem_generic(table_start << PAGE_SHIFT, 
                                (table_end - table_start) << PAGE_SHIFT);
@@ -749,6 +741,10 @@ void __init setup_arch(char **cmdline_p)
        kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
        reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
 
+#ifdef CONFIG_XEN
+       /* reserve physmap, start info and initial page tables */
+       reserve_bootmem(kernel_end, (table_start<<PAGE_SHIFT)-kernel_end);
+#else
        /*
         * reserve physical page 0 - it's a special BIOS page on many boxes,
         * enabling clean reboots, SMP operation, laptop functions.
@@ -933,13 +929,25 @@ void __init setup_arch(char **cmdline_p)
        prefill_possible_map();
 #endif
 
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
        /*
         * Request address space for all standard RAM and ROM resources
         * and also for regions reported as reserved by the e820.
         */
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
        probe_roms();
-       e820_reserve_resources(); 
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
+
+               memmap.nr_entries = E820MAX;
+               set_xen_guest_handle(memmap.buffer, machine_e820);
+
+               BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, 
&memmap));
+
+               e820_reserve_resources(machine_e820, memmap.nr_entries);
+       }
+#elif !defined(CONFIG_XEN)
+       probe_roms();
+       e820_reserve_resources(e820.map, e820.nr_map);
 #endif
 
        request_resource(&iomem_resource, &video_ram_resource);
@@ -951,7 +959,14 @@ void __init setup_arch(char **cmdline_p)
                request_resource(&ioport_resource, &standard_io_resources[i]);
        }
 
-       e820_setup_gap();
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               e820_setup_gap(machine_e820, memmap.nr_entries);
+               free_bootmem(__pa(machine_e820), PAGE_SIZE);
+       }
+#elif !defined(CONFIG_XEN)
+       e820_setup_gap(e820.map, e820.nr_map);
+#endif
 
 #ifdef CONFIG_GART_IOMMU
        iommu_hole_init();
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Tue May 30 14:30:34 
2006 -0500
@@ -370,7 +370,7 @@ void __set_fixmap_user (enum fixed_addre
        set_pte_phys(address, phys, prot, SET_FIXMAP_USER); 
 }
 
-unsigned long __initdata table_start, tables_space; 
+unsigned long __initdata table_start, table_end; 
 
 unsigned long get_machine_pfn(unsigned long addr)
 {
@@ -409,11 +409,17 @@ static inline int make_readonly(unsigned
 {
        int readonly = 0;
 
-       /* Make old and new page tables read-only. */
+       /* Make new page tables read-only. */
+       if (!xen_feature(XENFEAT_writable_page_tables)
+           && (paddr >= (table_start << PAGE_SHIFT))
+           && (paddr < (table_end << PAGE_SHIFT)))
+               readonly = 1;
+       /* Make old page tables read-only. */
        if (!xen_feature(XENFEAT_writable_page_tables)
            && (paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
-           && (paddr < ((table_start << PAGE_SHIFT) + tables_space)))
+           && (paddr < (start_pfn << PAGE_SHIFT)))
                readonly = 1;
+
        /*
         * No need for writable mapping of kernel image. This also ensures that
         * page and descriptor tables embedded inside don't have writable
@@ -544,7 +550,7 @@ void __init xen_init_pt(void)
                mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
 }
 
-void __init extend_init_mapping(void) 
+void __init extend_init_mapping(unsigned long tables_space)
 {
        unsigned long va = __START_KERNEL_map;
        unsigned long phys, addr, *pte_page;
@@ -599,23 +605,23 @@ void __init extend_init_mapping(void)
 
 static void __init find_early_table_space(unsigned long end)
 {
-       unsigned long puds, pmds, ptes; 
+       unsigned long puds, pmds, ptes, tables; 
 
        puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
        pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
        ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
 
-       tables_space =
-               round_up(puds * 8, PAGE_SIZE) + 
+       tables = round_up(puds * 8, PAGE_SIZE) + 
                round_up(pmds * 8, PAGE_SIZE) + 
                round_up(ptes * 8, PAGE_SIZE); 
 
-       extend_init_mapping();
+       extend_init_mapping(tables);
 
        table_start = start_pfn;
+       table_end = table_start + (tables>>PAGE_SHIFT);
 
        early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
-               end, table_start << PAGE_SHIFT, start_pfn << PAGE_SHIFT);
+               end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
 }
 
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
@@ -660,7 +666,7 @@ void __meminit init_memory_mapping(unsig
                        set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
        }
 
-       BUG_ON(!after_bootmem && start_pfn != table_start + (tables_space >> 
PAGE_SHIFT));
+       BUG_ON(!after_bootmem && start_pfn != table_end);
 
        __flush_tlb_all();
 }
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Tue May 30 14:30:34 
2006 -0500
@@ -329,7 +329,7 @@ out:
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        struct tpm_private *tp = dev->data;
        DPRINTK("\n");
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Tue May 30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Tue May 30 14:30:34 2006 -0500
@@ -1,5 +1,4 @@
 
-obj-y  += net_driver_util.o
 obj-y  += util.o
 
 obj-y  += core/
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Tue May 30 
14:30:34 2006 -0500
@@ -67,7 +67,7 @@ static DECLARE_MUTEX(balloon_mutex);
  * Also protects non-atomic updates of current_pages and driver_pages, and
  * balloon lists.
  */
-spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(balloon_lock);
 
 /* We aim for 'current allocation' == 'target allocation'. */
 static unsigned long current_pages;
@@ -360,6 +360,12 @@ static void balloon_process(void *unused
 /* Resets the Xen limit, sets new target, and kicks off processing. */
 static void set_new_target(unsigned long target)
 {
+       unsigned long min_target;
+
+       /* Do not allow target to reduce below 2% of maximum memory size. */
+       min_target = max_pfn / 50;
+       target = max(target, min_target);
+
        /* No need for lock. Not read-modify-write updates. */
        hard_limit   = ~0UL;
        target_pages = target;
@@ -468,8 +474,8 @@ static int __init balloon_init(void)
 
        IPRINTK("Initialising balloon driver.\n");
 
-       if (xen_init() < 0)
-               return -1;
+       if (!is_running_on_xen())
+               return -ENODEV;
 
        current_pages = min(xen_start_info->nr_pages, max_pfn);
        totalram_pages = current_pages;
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Tue May 30 
14:30:34 2006 -0500
@@ -82,7 +82,7 @@ typedef struct {
 
 static pending_req_t *pending_reqs;
 static struct list_head pending_free;
-static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pending_free_lock);
 static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
 
 #define BLKBACK_INVALID_HANDLE (~0)
@@ -526,7 +526,7 @@ static int __init blkif_init(void)
        struct page *page;
        int i;
 
-       if (xen_init() < 0)
+       if (!is_running_on_xen())
                return -ENODEV;
 
        mmap_pages            = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue May 30 14:30:34 
2006 -0500
@@ -247,7 +247,7 @@ static void backend_changed(struct xenbu
  * Callback received when the frontend's state changes.
  */
 static void frontend_changed(struct xenbus_device *dev,
-                            XenbusState frontend_state)
+                            enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
        int err;
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Tue May 30 
14:30:34 2006 -0500
@@ -247,7 +247,7 @@ fail:
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        struct blkfront_info *info = dev->data;
        struct block_device *bd;
@@ -434,7 +434,7 @@ int blkif_release(struct inode *inode, s
                   have ignored this request initially, as the device was
                   still mounted. */
                struct xenbus_device * dev = info->xbdev;
-               XenbusState state = xenbus_read_driver_state(dev->otherend);
+               enum xenbus_state state = 
xenbus_read_driver_state(dev->otherend);
 
                if (state == XenbusStateClosing)
                        blkfront_closing(dev);
@@ -792,7 +792,7 @@ static struct xenbus_driver blkfront = {
 
 static int __init xlblk_init(void)
 {
-       if (xen_init() < 0)
+       if (!is_running_on_xen())
                return -ENODEV;
 
        return xenbus_register_frontend(&blkfront);
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Tue May 30 14:30:34 
2006 -0500
@@ -93,7 +93,7 @@ static struct block_device_operations xl
        .ioctl  = blkif_ioctl,
 };
 
-spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(blkif_io_lock);
 
 static struct xlbd_major_info *
 xlbd_alloc_major_info(int major, int minor, int index)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Tue May 30 14:30:34 
2006 -0500
@@ -138,7 +138,7 @@ typedef struct {
  */
 static pending_req_t pending_reqs[MAX_PENDING_REQS];
 static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pend_prod_lock);
 /* NB. We use a different index type to differentiate from shared blk rings. */
 typedef unsigned int PEND_RING_IDX;
 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c        Tue May 30 
14:30:34 2006 -0500
@@ -117,14 +117,17 @@ static int __init xencons_bufsz_setup(ch
 {
        unsigned int goal;
        goal = simple_strtoul(str, NULL, 0);
-       while (wbuf_size < goal)
-               wbuf_size <<= 1;
+       if (goal) {
+               goal = roundup_pow_of_two(goal);
+               if (wbuf_size < goal)
+                       wbuf_size = goal;
+       }
        return 1;
 }
 __setup("xencons_bufsz=", xencons_bufsz_setup);
 
 /* This lock protects accesses to the common transmit buffer. */
-static spinlock_t xencons_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(xencons_lock);
 
 /* Common transmit-kick routine. */
 static void __xencons_tx_flush(void);
@@ -133,8 +136,7 @@ static struct tty_driver *xencons_driver
 
 /******************** Kernel console driver ********************************/
 
-static void kcons_write(
-       struct console *c, const char *s, unsigned int count)
+static void kcons_write(struct console *c, const char *s, unsigned int count)
 {
        int           i = 0;
        unsigned long flags;
@@ -155,14 +157,14 @@ static void kcons_write(
        spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
-static void kcons_write_dom0(
-       struct console *c, const char *s, unsigned int count)
-{
-       int rc;
-
-       while ((count > 0) &&
-              ((rc = HYPERVISOR_console_io(
-                       CONSOLEIO_write, count, (char *)s)) > 0)) {
+static void kcons_write_dom0(struct console *c, const char *s, unsigned int 
count)
+{
+
+       while (count > 0) {
+               int rc;
+               rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
+               if (rc <= 0)
+                       break;
                count -= rc;
                s += rc;
        }
@@ -183,7 +185,7 @@ static struct console kcons_info = {
 #define __RETCODE 0
 static int __init xen_console_init(void)
 {
-       if (xen_init() < 0)
+       if (!is_running_on_xen())
                return __RETCODE;
 
        if (xen_start_info->flags & SIF_INITDOMAIN) {
@@ -566,7 +568,7 @@ static int __init xencons_init(void)
 {
        int rc;
 
-       if (xen_init() < 0)
+       if (!is_running_on_xen())
                return -ENODEV;
 
        if (xc_mode == XC_OFF)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Tue May 30 14:30:34 
2006 -0500
@@ -4,8 +4,9 @@
 
 obj-y   := evtchn.o reboot.o gnttab.o features.o
 
-obj-$(CONFIG_PROC_FS) += xen_proc.o
-obj-$(CONFIG_NET)     += skbuff.o
-obj-$(CONFIG_SMP)     += smpboot.o
-obj-$(CONFIG_SYSFS)   += hypervisor_sysfs.o
-obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
+obj-$(CONFIG_PROC_FS)     += xen_proc.o
+obj-$(CONFIG_NET)         += skbuff.o
+obj-$(CONFIG_SMP)         += smpboot.o
+obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
+obj-$(CONFIG_SYSFS)       += hypervisor_sysfs.o
+obj-$(CONFIG_XEN_SYSFS)   += xen_sysfs.o
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Tue May 30 14:30:34 
2006 -0500
@@ -51,10 +51,10 @@
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
  */
-static spinlock_t irq_mapping_update_lock;
+static DEFINE_SPINLOCK(irq_mapping_update_lock);
 
 /* IRQ <-> event-channel mappings. */
-static int evtchn_to_irq[NR_EVENT_CHANNELS];
+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {[0 ...  NR_EVENT_CHANNELS-1] = 
-1};
 
 /* Packed IRQ information: binding type, sub-type index, and event channel. */
 static u32 irq_info[NR_IRQS];
@@ -91,13 +91,13 @@ static inline unsigned int type_from_irq
 }
 
 /* IRQ <-> VIRQ mapping. */
-DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]);
+DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
 
 /* IRQ <-> IPI mapping. */
 #ifndef NR_IPIS
 #define NR_IPIS 1
 #endif
-DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
 
 /* Reference counts for bindings to IRQs. */
 static int irq_bindcount[NR_IRQS];
@@ -751,7 +751,9 @@ void irq_resume(void)
                BUG_ON(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND);
 
        /* Secondary CPUs must have no VIRQ or IPI bindings. */
-       for (cpu = 1; cpu < NR_CPUS; cpu++) {
+       for_each_possible_cpu(cpu) {
+               if (cpu == 0)
+                       continue;
                for (virq = 0; virq < NR_VIRQS; virq++)
                        BUG_ON(per_cpu(virq_to_irq, cpu)[virq] != -1);
                for (ipi = 0; ipi < NR_IPIS; ipi++)
@@ -813,25 +815,12 @@ void __init xen_init_IRQ(void)
 void __init xen_init_IRQ(void)
 {
        int i;
-       int cpu;
-
-       spin_lock_init(&irq_mapping_update_lock);
 
        init_evtchn_cpu_bindings();
 
-       /* No VIRQ or IPI bindings. */
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
-               for (i = 0; i < NR_VIRQS; i++)
-                       per_cpu(virq_to_irq, cpu)[i] = -1;
-               for (i = 0; i < NR_IPIS; i++)
-                       per_cpu(ipi_to_irq, cpu)[i] = -1;
-       }
-
-       /* No event-channel -> IRQ mappings. */
-       for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-               evtchn_to_irq[i] = -1;
-               mask_evtchn(i); /* No event channels are 'live' right now. */
-       }
+       /* No event channels are 'live' right now. */
+       for (i = 0; i < NR_EVENT_CHANNELS; i++)
+               mask_evtchn(i);
 
        /* No IRQ -> event-channel mappings. */
        for (i = 0; i < NR_IRQS; i++)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Tue May 30 14:30:34 
2006 -0500
@@ -38,7 +38,6 @@
 #include <linux/vmalloc.h>
 #include <asm/pgtable.h>
 #include <xen/interface/xen.h>
-#include <asm/fixmap.h>
 #include <asm/uaccess.h>
 #include <xen/gnttab.h>
 #include <asm/synch_bitops.h>
@@ -81,7 +80,7 @@ static grant_ref_t gnttab_list[NR_GRANT_
 static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
 static int gnttab_free_count;
 static grant_ref_t gnttab_free_head;
-static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(gnttab_list_lock);
 
 static grant_entry_t *shared = NULL;
 
@@ -443,7 +442,7 @@ gnttab_init(void)
 {
        int i;
 
-       if (xen_init() < 0)
+       if (!is_running_on_xen())
                return -ENODEV;
 
        if (gnttab_resume() < 0)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c  Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c  Tue May 30 
14:30:34 2006 -0500
@@ -49,6 +49,9 @@ static struct kobj_type hyp_sysfs_kobj_t
 
 static int __init hypervisor_subsys_init(void)
 {
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type;
        return subsystem_register(&hypervisor_subsys);
 }
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/reboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c    Tue May 30 14:30:34 
2006 -0500
@@ -17,6 +17,7 @@
 #include <linux/kthread.h>
 #include <xen/gnttab.h>
 #include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
 
 #if defined(__i386__) || defined(__x86_64__)
 /*
@@ -80,14 +81,6 @@ static int shutting_down = SHUTDOWN_INVA
 static int shutting_down = SHUTDOWN_INVALID;
 static void __shutdown_handler(void *unused);
 static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
-
-#ifdef CONFIG_SMP
-int  smp_suspend(void);
-void smp_resume(void);
-#else
-#define smp_suspend()  (0)
-#define smp_resume()   ((void)0)
-#endif
 
 /* Ensure we run on the idle task page tables so that we will
    switch page tables before running user space. This is needed
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Tue May 30 14:30:34 
2006 -0500
@@ -23,6 +23,7 @@
 #include <asm/pgalloc.h>
 #include <xen/evtchn.h>
 #include <xen/interface/vcpu.h>
+#include <xen/cpu_hotplug.h>
 #include <xen/xenbus.h>
 
 #ifdef CONFIG_SMP_ALTERNATIVES
@@ -78,15 +79,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
 #elif !defined(CONFIG_X86_IO_APIC)
 unsigned int maxcpus = NR_CPUS;
 #endif
-
-/*
- * Set of CPUs that remote admin software will allow us to bring online.
- * Notified to us via xenbus.
- */
-static cpumask_t xenbus_allowed_cpumask;
-
-/* Set of CPUs that local admin will allow us to bring online. */
-static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
 
 void __init prefill_possible_map(void)
 {
@@ -167,17 +159,17 @@ static void cpu_bringup(void)
        cpu_idle();
 }
 
-static void vcpu_prepare(int vcpu)
+void cpu_initialize_context(unsigned int cpu)
 {
        vcpu_guest_context_t ctxt;
-       struct task_struct *idle = idle_task(vcpu);
+       struct task_struct *idle = idle_task(cpu);
 #ifdef __x86_64__
-       struct desc_ptr *gdt_descr = &cpu_gdt_descr[vcpu];
+       struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
 #else
-       struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, vcpu);
-#endif
-
-       if (vcpu == 0)
+       struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+#endif
+
+       if (cpu == 0)
                return;
 
        memset(&ctxt, 0, sizeof(ctxt));
@@ -226,10 +218,10 @@ static void vcpu_prepare(int vcpu)
 
        ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
 
-       ctxt.gs_base_kernel = (unsigned long)(cpu_pda(vcpu));
-#endif
-
-       BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt));
+       ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
+#endif
+
+       BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -304,10 +296,10 @@ void __init smp_prepare_cpus(unsigned in
                cpu_set(cpu, cpu_present_map);
 #endif
 
-               vcpu_prepare(cpu);
-       }
-
-       xenbus_allowed_cpumask = cpu_present_map;
+               cpu_initialize_context(cpu);
+       }
+
+       init_xenbus_allowed_cpumask();
 
        /* Currently, Xen gives no dynamic NUMA/HT info. */
        for (cpu = 1; cpu < NR_CPUS; cpu++) {
@@ -332,15 +324,6 @@ void __devinit smp_prepare_boot_cpu(void
        cpu_online_map   = cpumask_of_cpu(0);
 }
 
-static int local_cpu_hotplug_request(void)
-{
-       /*
-        * We assume a CPU hotplug request comes from local admin if it is made
-        * via a userspace process (i.e., one with a real mm_struct).
-        */
-       return (current->mm != NULL);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
@@ -355,141 +338,6 @@ static int __init initialize_cpu_present
 }
 core_initcall(initialize_cpu_present_map);
 
-static void vcpu_hotplug(unsigned int cpu)
-{
-       int err;
-       char dir[32], state[32];
-
-       if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
-               return;
-
-       sprintf(dir, "cpu/%d", cpu);
-       err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
-       if (err != 1) {
-               printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
-               return;
-       }
-
-       if (strcmp(state, "online") == 0) {
-               cpu_set(cpu, xenbus_allowed_cpumask);
-               (void)cpu_up(cpu);
-       } else if (strcmp(state, "offline") == 0) {
-               cpu_clear(cpu, xenbus_allowed_cpumask);
-               (void)cpu_down(cpu);
-       } else {
-               printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
-                      state, cpu);
-       }
-}
-
-static void handle_vcpu_hotplug_event(
-       struct xenbus_watch *watch, const char **vec, unsigned int len)
-{
-       int cpu;
-       char *cpustr;
-       const char *node = vec[XS_WATCH_PATH];
-
-       if ((cpustr = strstr(node, "cpu/")) != NULL) {
-               sscanf(cpustr, "cpu/%d", &cpu);
-               vcpu_hotplug(cpu);
-       }
-}
-
-static int smpboot_cpu_notify(struct notifier_block *notifier,
-                             unsigned long action, void *hcpu)
-{
-       int cpu = (long)hcpu;
-
-       /*
-        * We do this in a callback notifier rather than __cpu_disable()
-        * because local_cpu_hotplug_request() does not work in the latter
-        * as it's always executed from within a stopmachine kthread.
-        */
-       if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
-               cpu_clear(cpu, local_allowed_cpumask);
-
-       return NOTIFY_OK;
-}
-
-static int setup_cpu_watcher(struct notifier_block *notifier,
-                             unsigned long event, void *data)
-{
-       int i;
-
-       static struct xenbus_watch cpu_watch = {
-               .node = "cpu",
-               .callback = handle_vcpu_hotplug_event,
-               .flags = XBWF_new_thread };
-       (void)register_xenbus_watch(&cpu_watch);
-
-       if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
-               for_each_cpu(i)
-                       vcpu_hotplug(i);
-               printk(KERN_INFO "Brought up %ld CPUs\n",
-                      (long)num_online_cpus());
-       }
-
-       return NOTIFY_DONE;
-}
-
-static int __init setup_vcpu_hotplug_event(void)
-{
-       static struct notifier_block hotplug_cpu = {
-               .notifier_call = smpboot_cpu_notify };
-       static struct notifier_block xsn_cpu = {
-               .notifier_call = setup_cpu_watcher };
-
-       register_cpu_notifier(&hotplug_cpu);
-       register_xenstore_notifier(&xsn_cpu);
-
-       return 0;
-}
-
-arch_initcall(setup_vcpu_hotplug_event);
-
-int smp_suspend(void)
-{
-       int i, err;
-
-       lock_cpu_hotplug();
-
-       /*
-        * Take all other CPUs offline. We hold the hotplug mutex to
-        * avoid other processes bringing up CPUs under our feet.
-        */
-       while (num_online_cpus() > 1) {
-               unlock_cpu_hotplug();
-               for_each_online_cpu(i) {
-                       if (i == 0)
-                               continue;
-                       err = cpu_down(i);
-                       if (err) {
-                               printk(KERN_CRIT "Failed to take all CPUs "
-                                      "down: %d.\n", err);
-                               for_each_cpu(i)
-                                       vcpu_hotplug(i);
-                               return err;
-                       }
-               }
-               lock_cpu_hotplug();
-       }
-
-       return 0;
-}
-
-void smp_resume(void)
-{
-       int i;
-
-       for_each_cpu(i)
-               vcpu_prepare(i);
-
-       unlock_cpu_hotplug();
-
-       for_each_cpu(i)
-               vcpu_hotplug(i);
-}
-
 static void
 remove_siblinginfo(int cpu)
 {
@@ -536,20 +384,6 @@ void __cpu_die(unsigned int cpu)
 
 #else /* !CONFIG_HOTPLUG_CPU */
 
-int smp_suspend(void)
-{
-       if (num_online_cpus() > 1) {
-               printk(KERN_WARNING "Can't suspend SMP guests "
-                      "without CONFIG_HOTPLUG_CPU\n");
-               return -EOPNOTSUPP;
-       }
-       return 0;
-}
-
-void smp_resume(void)
-{
-}
-
 int __cpu_disable(void)
 {
        return -ENOSYS;
@@ -566,17 +400,9 @@ int __devinit __cpu_up(unsigned int cpu)
 {
        int rc;
 
-       if (local_cpu_hotplug_request()) {
-               cpu_set(cpu, local_allowed_cpumask);
-               if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
-                       printk("%s: attempt to bring up CPU %u disallowed by "
-                              "remote admin.\n", __FUNCTION__, cpu);
-                       return -EBUSY;
-               }
-       } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
-                  !cpu_isset(cpu, xenbus_allowed_cpumask)) {
-               return -EBUSY;
-       }
+       rc = cpu_up_check(cpu);
+       if (rc)
+               return rc;
 
 #ifdef CONFIG_SMP_ALTERNATIVES
        if (num_online_cpus() == 1)
@@ -591,8 +417,7 @@ int __devinit __cpu_up(unsigned int cpu)
        cpu_set(cpu, cpu_online_map);
 
        rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
-       if (rc != 0)
-               BUG();
+       BUG_ON(rc);
 
        return 0;
 }
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Tue May 30 14:30:34 
2006 -0500
@@ -429,6 +429,9 @@ static int __init evtchn_init(void)
 {
        int err;
 
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        spin_lock_init(&port_user_lock);
        memset(port_user, 0, sizeof(port_user));
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Tue May 30 
14:30:34 2006 -0500
@@ -99,7 +99,7 @@ static spinlock_t net_schedule_list_lock
 #define MAX_MFN_ALLOC 64
 static unsigned long mfn_list[MAX_MFN_ALLOC];
 static unsigned int alloc_index = 0;
-static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(mfn_lock);
 
 static unsigned long alloc_mfn(void)
 {
@@ -691,7 +691,7 @@ static void net_tx_action(unsigned long 
 
 static void netif_idx_release(u16 pending_idx)
 {
-       static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
+       static DEFINE_SPINLOCK(_lock);
        unsigned long flags;
 
        spin_lock_irqsave(&_lock, flags);
@@ -810,6 +810,9 @@ static int __init netback_init(void)
        int i;
        struct page *page;
 
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        /* We can increase reservation by this much in net_rx_action(). */
        balloon_update_driver_allowance(NET_RX_RING_SIZE);
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue May 30 14:30:34 
2006 -0500
@@ -17,13 +17,10 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
-
 #include <stdarg.h>
 #include <linux/module.h>
 #include <xen/xenbus.h>
-#include <xen/net_driver_util.h>
 #include "common.h"
-
 
 #if 0
 #undef DPRINTK
@@ -31,22 +28,19 @@
     printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
 #endif
 
-
 struct backend_info
 {
        struct xenbus_device *dev;
        netif_t *netif;
        struct xenbus_watch backend_watch;
-       XenbusState frontend_state;
+       enum xenbus_state frontend_state;
 };
-
 
 static int connect_rings(struct backend_info *);
 static void connect(struct backend_info *);
 static void maybe_connect(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
                            unsigned int);
-
 
 static int netback_remove(struct xenbus_device *dev)
 {
@@ -191,7 +185,7 @@ static void backend_changed(struct xenbu
  * Callback received when the frontend's state changes.
  */
 static void frontend_changed(struct xenbus_device *dev,
-                            XenbusState frontend_state)
+                            enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
 
@@ -273,6 +267,27 @@ static void xen_net_read_rate(struct xen
        kfree(ratestr);
 }
 
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+       char *s, *e, *macstr;
+       int i;
+
+       macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+       if (IS_ERR(macstr))
+               return PTR_ERR(macstr);
+
+       for (i = 0; i < ETH_ALEN; i++) {
+               mac[i] = simple_strtoul(s, &e, 16);
+               if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+                       kfree(macstr);
+                       return -ENOENT;
+               }
+               s = e+1;
+       }
+
+       kfree(macstr);
+       return 0;
+}
 
 static void connect(struct backend_info *be)
 {
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Tue May 30 
14:30:34 2006 -0500
@@ -60,12 +60,11 @@
 #include <asm/uaccess.h>
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
-#include <xen/net_driver_util.h>
 
 #define GRANT_INVALID_REF      0
 
-#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
-#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
 
 static inline void init_skb_shinfo(struct sk_buff *skb)
 {
@@ -80,20 +79,14 @@ struct netfront_info {
 
        struct net_device_stats stats;
 
-       netif_tx_front_ring_t tx;
-       netif_rx_front_ring_t rx;
+       struct netif_tx_front_ring tx;
+       struct netif_rx_front_ring rx;
 
        spinlock_t   tx_lock;
        spinlock_t   rx_lock;
 
        unsigned int handle;
        unsigned int evtchn, irq;
-
-       /* What is the status of our connection to the remote backend? */
-#define BEST_CLOSED       0
-#define BEST_DISCONNECTED 1
-#define BEST_CONNECTED    2
-       unsigned int backend_state;
 
        /* Receive-ring batched refills. */
 #define RX_MIN_TARGET 8
@@ -123,8 +116,8 @@ struct netfront_info {
        u8 mac[ETH_ALEN];
 
        unsigned long rx_pfn_array[NET_RX_RING_SIZE];
-       multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
-       mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+       struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
+       struct mmu_update rx_mmu[NET_RX_RING_SIZE];
 };
 
 /*
@@ -143,14 +136,6 @@ static inline unsigned short get_id_from
        list[0] = list[id];
        return id;
 }
-
-#ifdef DEBUG
-static const char *be_state_name[] = {
-       [BEST_CLOSED]       = "closed",
-       [BEST_DISCONNECTED] = "disconnected",
-       [BEST_CONNECTED]    = "connected",
-};
-#endif
 
 #define DPRINTK(fmt, args...) pr_debug("netfront (%s:%d) " fmt, \
                                        __FUNCTION__, __LINE__, ##args)
@@ -247,6 +232,27 @@ static int netfront_resume(struct xenbus
        return talk_to_backend(dev, info);
 }
 
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+       char *s, *e, *macstr;
+       int i;
+
+       macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+       if (IS_ERR(macstr))
+               return PTR_ERR(macstr);
+
+       for (i = 0; i < ETH_ALEN; i++) {
+               mac[i] = simple_strtoul(s, &e, 16);
+               if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+                       kfree(macstr);
+                       return -ENOENT;
+               }
+               s = e+1;
+       }
+
+       kfree(macstr);
+       return 0;
+}
 
 /* Common code used when first setting up, and when resuming. */
 static int talk_to_backend(struct xenbus_device *dev,
@@ -317,8 +323,8 @@ again:
 
 static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
 {
-       netif_tx_sring_t *txs;
-       netif_rx_sring_t *rxs;
+       struct netif_tx_sring *txs;
+       struct netif_rx_sring *rxs;
        int err;
        struct net_device *netdev = info->netdev;
 
@@ -328,13 +334,13 @@ static int setup_device(struct xenbus_de
        info->tx.sring = NULL;
        info->irq = 0;
 
-       txs = (netif_tx_sring_t *)__get_free_page(GFP_KERNEL);
+       txs = (struct netif_tx_sring *)__get_free_page(GFP_KERNEL);
        if (!txs) {
                err = -ENOMEM;
                xenbus_dev_fatal(dev, err, "allocating tx ring page");
                goto fail;
        }
-       rxs = (netif_rx_sring_t *)__get_free_page(GFP_KERNEL);
+       rxs = (struct netif_rx_sring *)__get_free_page(GFP_KERNEL);
        if (!rxs) {
                err = -ENOMEM;
                xenbus_dev_fatal(dev, err, "allocating rx ring page");
@@ -342,7 +348,6 @@ static int setup_device(struct xenbus_de
        }
        memset(txs, 0, PAGE_SIZE);
        memset(rxs, 0, PAGE_SIZE);
-       info->backend_state = BEST_DISCONNECTED;
 
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
@@ -384,7 +389,7 @@ static int setup_device(struct xenbus_de
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        DPRINTK("\n");
 
@@ -465,7 +470,7 @@ static void network_tx_buf_gc(struct net
        struct netfront_info *np = netdev_priv(dev);
        struct sk_buff *skb;
 
-       if (np->backend_state != BEST_CONNECTED)
+       if (unlikely(!netif_carrier_ok(dev)))
                return;
 
        do {
@@ -527,7 +532,7 @@ static void network_alloc_rx_buffers(str
        struct xen_memory_reservation reservation;
        grant_ref_t ref;
 
-       if (unlikely(np->backend_state != BEST_CONNECTED))
+       if (unlikely(!netif_carrier_ok(dev)))
                return;
 
        /*
@@ -638,7 +643,7 @@ static int network_start_xmit(struct sk_
 {
        unsigned short id;
        struct netfront_info *np = netdev_priv(dev);
-       netif_tx_request_t *tx;
+       struct netif_tx_request *tx;
        RING_IDX i;
        grant_ref_t ref;
        unsigned long mfn;
@@ -662,7 +667,7 @@ static int network_start_xmit(struct sk_
 
        spin_lock_irq(&np->tx_lock);
 
-       if (np->backend_state != BEST_CONNECTED) {
+       if (unlikely(!netif_carrier_ok(dev))) {
                spin_unlock_irq(&np->tx_lock);
                goto drop;
        }
@@ -736,10 +741,10 @@ static int netif_poll(struct net_device 
 {
        struct netfront_info *np = netdev_priv(dev);
        struct sk_buff *skb, *nskb;
-       netif_rx_response_t *rx;
+       struct netif_rx_response *rx;
        RING_IDX i, rp;
-       mmu_update_t *mmu = np->rx_mmu;
-       multicall_entry_t *mcl = np->rx_mcl;
+       struct mmu_update *mmu = np->rx_mmu;
+       struct multicall_entry *mcl = np->rx_mcl;
        int work_done, budget, more_to_do = 1;
        struct sk_buff_head rxq;
        unsigned long flags;
@@ -748,7 +753,7 @@ static int netif_poll(struct net_device 
 
        spin_lock(&np->rx_lock);
 
-       if (np->backend_state != BEST_CONNECTED) {
+       if (unlikely(!netif_carrier_ok(dev))) {
                spin_unlock(&np->rx_lock);
                return 0;
        }
@@ -962,7 +967,7 @@ static void network_connect(struct net_d
 {
        struct netfront_info *np;
        int i, requeue_idx;
-       netif_tx_request_t *tx;
+       struct netif_tx_request *tx;
        struct sk_buff *skb;
 
        np = netdev_priv(dev);
@@ -1041,11 +1046,9 @@ static void network_connect(struct net_d
         * domain a kick because we've probably just requeued some
         * packets.
         */
-       np->backend_state = BEST_CONNECTED;
+       netif_carrier_on(dev);
        notify_remote_via_irq(np->irq);
        network_tx_buf_gc(dev);
-
-       network_maybe_wake_tx(dev);
 
        spin_unlock(&np->rx_lock);
        spin_unlock_irq(&np->tx_lock);
@@ -1057,7 +1060,7 @@ static void show_device(struct netfront_
        if (np) {
                IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n",
                        np->handle,
-                       be_state_name[np->backend_state],
+                       netif_carrier_ok(np->netdev) ? "on" : "off",
                        netif_running(np->netdev) ? "open" : "closed",
                        np->evtchn,
                        np->tx,
@@ -1243,9 +1246,10 @@ static struct net_device * __devinit cre
        }
 
        np                = netdev_priv(netdev);
-       np->backend_state = BEST_CLOSED;
        np->handle        = handle;
        np->xbdev         = dev;
+
+       netif_carrier_off(netdev);
 
        spin_lock_init(&np->tx_lock);
        spin_lock_init(&np->rx_lock);
@@ -1394,7 +1398,7 @@ static void netif_disconnect_backend(str
        /* Stop old i/f to prevent errors whilst we rebuild the state. */
        spin_lock_irq(&info->tx_lock);
        spin_lock(&info->rx_lock);
-       info->backend_state = BEST_DISCONNECTED;
+       netif_carrier_off(info->netdev);
        spin_unlock(&info->rx_lock);
        spin_unlock_irq(&info->tx_lock);
 
@@ -1454,6 +1458,9 @@ static struct notifier_block notifier_in
 
 static int __init netif_init(void)
 {
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        if (xen_start_info->flags & SIF_INITDOMAIN)
                return 0;
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue May 30 14:30:34 
2006 -0500
@@ -166,7 +166,7 @@ static int pciback_attach(struct pciback
 }
 
 static void pciback_frontend_changed(struct xenbus_device *xdev,
-                                    XenbusState fe_state)
+                                    enum xenbus_state fe_state)
 {
        struct pciback_device *pdev = xdev->data;
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c        Tue May 30 
14:30:34 2006 -0500
@@ -196,7 +196,7 @@ static int pcifront_try_disconnect(struc
 static int pcifront_try_disconnect(struct pcifront_device *pdev)
 {
        int err = 0;
-       XenbusState prev_state;
+       enum xenbus_state prev_state;
 
        spin_lock(&pdev->dev_lock);
 
@@ -214,7 +214,7 @@ static int pcifront_try_disconnect(struc
 }
 
 static void pcifront_backend_changed(struct xenbus_device *xdev,
-                                    XenbusState be_state)
+                                    enum xenbus_state be_state)
 {
        struct pcifront_device *pdev = xdev->data;
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Tue May 30 
14:30:34 2006 -0500
@@ -159,10 +159,6 @@ static int privcmd_ioctl(struct inode *i
        break;
 
        case IOCTL_PRIVCMD_MMAPBATCH: {
-#ifndef __ia64__
-               mmu_update_t u;
-               uint64_t ptep;
-#endif
                privcmd_mmapbatch_t m;
                struct vm_area_struct *vma = NULL;
                unsigned long __user *p;
@@ -200,24 +196,12 @@ static int privcmd_ioctl(struct inode *i
                for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
                        if (get_user(mfn, p))
                                return -EFAULT;
-#ifdef __ia64__
+
                        ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
-                                                    mfn, 1 << PAGE_SHIFT,
+                                                    mfn, PAGE_SIZE,
                                                     vma->vm_page_prot, m.dom);
                        if (ret < 0)
-                           goto batch_err;
-#else
-
-                       ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
-                       if (ret)
-                               goto batch_err;
-
-                       u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
-                       u.ptr = ptep;
-
-                       if (HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0)
                                put_user(0xF0000000 | mfn, p);
-#endif
                }
 
                ret = 0;
@@ -271,6 +255,9 @@ static int capabilities_read(char *page,
 
 static int __init privcmd_init(void)
 {
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        /* Set of hypercalls that privileged applications may execute. */
        set_bit(__HYPERVISOR_acm_op,           hypercall_permission_map);
        set_bit(__HYPERVISOR_dom0_op,          hypercall_permission_map);
@@ -280,6 +267,9 @@ static int __init privcmd_init(void)
        set_bit(__HYPERVISOR_mmuext_op,        hypercall_permission_map);
        set_bit(__HYPERVISOR_xen_version,      hypercall_permission_map);
        set_bit(__HYPERVISOR_sched_op,         hypercall_permission_map);
+       set_bit(__HYPERVISOR_sched_op_compat,  hypercall_permission_map);
+       set_bit(__HYPERVISOR_event_channel_op_compat,
+               hypercall_permission_map);
 
        privcmd_intf = create_xen_proc_entry("privcmd", 0400);
        if (privcmd_intf != NULL)
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Tue May 30 14:30:34 
2006 -0500
@@ -34,7 +34,7 @@ struct backend_info
 
        /* watch front end for changes */
        struct xenbus_watch backend_watch;
-       XenbusState frontend_state;
+       enum xenbus_state frontend_state;
 };
 
 static void maybe_connect(struct backend_info *be);
@@ -43,7 +43,7 @@ static void backend_changed(struct xenbu
 static void backend_changed(struct xenbus_watch *watch,
                             const char **vec, unsigned int len);
 static void frontend_changed(struct xenbus_device *dev,
-                             XenbusState frontend_state);
+                             enum xenbus_state frontend_state);
 
 static int tpmback_remove(struct xenbus_device *dev)
 {
@@ -129,7 +129,7 @@ static void backend_changed(struct xenbu
 
 
 static void frontend_changed(struct xenbus_device *dev,
-                             XenbusState frontend_state)
+                             enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
        int err;
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Tue May 30 
14:30:34 2006 -0500
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path2);
 EXPORT_SYMBOL_GPL(xenbus_watch_path2);
 
 
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState state)
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
 {
        /* We check whether the state is currently set to the given value, and
           if not, then the state is set.  We don't want to unconditionally
@@ -269,9 +269,9 @@ int xenbus_free_evtchn(struct xenbus_dev
 }
 
 
-XenbusState xenbus_read_driver_state(const char *path)
-{
-       XenbusState result;
+enum xenbus_state xenbus_read_driver_state(const char *path)
+{
+       enum xenbus_state result;
        int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL);
        if (err)
                result = XenbusStateClosed;
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Tue May 30 
14:30:34 2006 -0500
@@ -284,7 +284,7 @@ static void otherend_changed(struct xenb
        struct xenbus_device *dev =
                container_of(watch, struct xenbus_device, otherend_watch);
        struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
-       XenbusState state;
+       enum xenbus_state state;
 
        /* Protect us against watches firing on old details when the otherend
           details change, say immediately after a resume. */
@@ -539,7 +539,7 @@ static int xenbus_probe_node(struct xen_
        size_t stringlen;
        char *tmpstring;
 
-       XenbusState state = xenbus_read_driver_state(nodename);
+       enum xenbus_state state = xenbus_read_driver_state(nodename);
 
        if (state != XenbusStateInitialising) {
                /* Device is not new, so ignore it.  This can happen if a
@@ -966,10 +966,8 @@ static int __init xenbus_probe_init(void
 
        DPRINTK("");
 
-       if (xen_init() < 0) {
-               DPRINTK("failed");
+       if (!is_running_on_xen())
                return -ENODEV;
-       }
 
        /* Register ourselves with the kernel bus subsystem */
        bus_register(&xenbus_frontend.bus);
@@ -1069,10 +1067,8 @@ static int __init wait_for_devices(void)
 {
        unsigned long timeout = jiffies + 10*HZ;
 
-       if (xen_init() < 0) {
-               DPRINTK("failed");
+       if (!is_running_on_xen())
                return -ENODEV;
-       }
 
        while (time_before(jiffies, timeout)) {
                if (all_devices_ready())
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h   Tue May 
30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h   Tue May 
30 14:30:34 2006 -0500
@@ -118,7 +118,7 @@ u64 jiffies_to_st(unsigned long jiffies)
 #define MULTI_UVMDOMID_INDEX 4
 #endif
 
-#define xen_init()     (0)
+#define is_running_on_xen() 1
 
 static inline int
 HYPERVISOR_yield(
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Tue May 
30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Tue May 
30 14:30:34 2006 -0500
@@ -10,10 +10,32 @@
 
 static char * __init machine_specific_memory_setup(void)
 {
-       unsigned long max_pfn = xen_start_info->nr_pages;
+       int rc;
+       struct xen_memory_map memmap;
+       /*
+        * This is rather large for a stack variable but this early in
+        * the boot process we know we have plenty slack space.
+        */
+       struct e820entry map[E820MAX];
 
-       e820.nr_map = 0;
-       add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
+       memmap.nr_entries = E820MAX;
+       set_xen_guest_handle(memmap.buffer, map);
+
+       rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+       if ( rc == -ENOSYS ) {
+               memmap.nr_entries = 1;
+               map[0].addr = 0ULL;
+               map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
+               /* 8MB slack (to balance backend allocations). */
+               map[0].size += 8 << 20;
+               map[0].type = E820_RAM;
+               rc = 0;
+       }
+       BUG_ON(rc);
+
+       sanitize_e820_map(map, (char *)&memmap.nr_entries);
+
+       BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
 
        return "Xen";
 }
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-ia64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue May 30 14:30:34 
2006 -0500
@@ -195,12 +195,42 @@ HYPERVISOR_multicall(
     return _hypercall2(int, multicall, call_list, nr_calls);
 }
 
+#ifndef CONFIG_XEN_IA64_DOM0_VP
 static inline int
 HYPERVISOR_memory_op(
     unsigned int cmd, void *arg)
 {
     return _hypercall2(int, memory_op, cmd, arg);
 }
+#else
+//XXX xen/ia64 copy_from_guest() is broken.
+//    This is a temporal work around until it is fixed.
+static inline int
+____HYPERVISOR_memory_op(
+    unsigned int cmd, void *arg)
+{
+    return _hypercall2(int, memory_op, cmd, arg);
+}
+
+#include <xen/interface/memory.h>
+int ia64_xenmem_reservation_op(unsigned long op,
+                  struct xen_memory_reservation* reservation__);
+static inline int
+HYPERVISOR_memory_op(
+    unsigned int cmd, void *arg)
+{
+    switch (cmd) {
+    case XENMEM_increase_reservation:
+    case XENMEM_decrease_reservation:
+    case XENMEM_populate_physmap:
+        return ia64_xenmem_reservation_op(cmd, 
+                                          (struct xen_memory_reservation*)arg);
+    default:
+        return ____HYPERVISOR_memory_op(cmd, arg);
+    }
+    /* NOTREACHED */
+}
+#endif
 
 static inline int
 HYPERVISOR_event_channel_op(
@@ -244,12 +274,19 @@ HYPERVISOR_physdev_op(
     return rc;
 }
 
-static inline int
-HYPERVISOR_grant_table_op(
+//XXX __HYPERVISOR_grant_table_op is used for this hypercall constant.
+static inline int
+____HYPERVISOR_grant_table_op(
     unsigned int cmd, void *uop, unsigned int count)
 {
     return _hypercall3(int, grant_table_op, cmd, uop, count);
 }
+#ifndef CONFIG_XEN_IA64_DOM0_VP
+#define HYPERVISOR_grant_table_op(cmd, uop, count) \
+       ____HYPERVISOR_grant_table_op((cmd), (uop), (count))
+#else
+int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count);
+#endif
 
 static inline int
 HYPERVISOR_vcpu_op(
@@ -281,6 +318,7 @@ static inline void exit_idle(void) {}
 #define do_IRQ(irq, regs) __do_IRQ((irq), (regs))
 
 #ifdef CONFIG_XEN_IA64_DOM0_VP
+#include <linux/err.h>
 #include <asm/xen/privop.h>
 
 #define _hypercall_imm1(type, name, imm, a1)                   \
@@ -382,6 +420,10 @@ HYPERVISOR_ioremap(unsigned long ioaddr,
        unsigned long ret = ioaddr;
        if (running_on_xen) {
                ret = __HYPERVISOR_ioremap(ioaddr, size);
+               if (unlikely(IS_ERR_VALUE(ret)))
+                       panic("hypercall %s failed with %ld. "
+                             "Please check Xen and Linux config mismatch\n",
+                             __func__, -ret);
        }
        return ret;
 }
@@ -421,27 +463,6 @@ HYPERVISOR_machtophys(unsigned long mfn)
 }
 
 static inline unsigned long
-__HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order,
-                             unsigned int address_bits)
-{
-       return _hypercall_imm3(unsigned long, ia64_dom0vp_op,
-                              IA64_DOM0VP_populate_physmap, gpfn, 
-                              extent_order, address_bits);
-}
-
-static inline unsigned long
-HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order,
-                           unsigned int address_bits)
-{
-       unsigned long ret = 0;
-       if (running_on_xen) {
-               ret = __HYPERVISOR_populate_physmap(gpfn, extent_order,
-                                                   address_bits);
-       }
-       return ret;
-}
-
-static inline unsigned long
 __HYPERVISOR_zap_physmap(unsigned long gpfn, unsigned int extent_order)
 {
        return _hypercall_imm2(unsigned long, ia64_dom0vp_op,
@@ -466,6 +487,7 @@ __HYPERVISOR_add_physmap(unsigned long g
                               IA64_DOM0VP_add_physmap, gpfn, mfn, flags,
                               domid);
 }
+
 static inline unsigned long
 HYPERVISOR_add_physmap(unsigned long gpfn, unsigned long mfn,
                       unsigned int flags, domid_t domid)
@@ -477,13 +499,15 @@ HYPERVISOR_add_physmap(unsigned long gpf
        }
        return ret;
 }
+
+// for balloon driver
+#define HYPERVISOR_update_va_mapping(va, new_val, flags) (0)
+
 #else
-#define HYPERVISOR_ioremap(ioaddr, size)               ({ioaddr;})
-#define HYPERVISOR_phystomach(gpfn)                    ({gpfn;})
-#define HYPERVISOR_machtophys(mfn)                     ({mfn;})
-#define HYPERVISOR_populate_physmap(gpfn, extent_order, address_bits) \
-                                                       ({0;})
-#define HYPERVISOR_zap_physmap(gpfn, extent_order)     ({0;})
-#define HYPERVISOR_add_physmap(gpfn, mfn, flags)       ({0;})
+#define HYPERVISOR_ioremap(ioaddr, size)               (ioaddr)
+#define HYPERVISOR_phystomach(gpfn)                    (gpfn)
+#define HYPERVISOR_machtophys(mfn)                     (mfn)
+#define HYPERVISOR_zap_physmap(gpfn, extent_order)     (0)
+#define HYPERVISOR_add_physmap(gpfn, mfn, flags)       (0)
 #endif
 #endif /* __HYPERCALL_H__ */
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h        Tue May 30 
14:30:34 2006 -0500
@@ -53,7 +53,7 @@ extern start_info_t *xen_start_info;
 
 void force_evtchn_callback(void);
 
-int xen_init(void);
+#define is_running_on_xen() running_on_xen
 
 /* Turn jiffies into Xen system time. XXX Implement me. */
 #define jiffies_to_st(j)       0
@@ -118,11 +118,22 @@ HYPERVISOR_poll(
 }
 
 // for drivers/xen/privcmd/privcmd.c
-#define direct_remap_pfn_range(a,b,c,d,e,f) remap_pfn_range(a,b,c,d,e)
 #define machine_to_phys_mapping 0
 #ifndef CONFIG_XEN_IA64_DOM0_VP
+#define direct_remap_pfn_range(a,b,c,d,e,f) remap_pfn_range(a,b,c,d,e)
 #define        pfn_to_mfn(x)   (x)
 #define        mfn_to_pfn(x)   (x)
+#else
+struct vm_area_struct;
+int direct_remap_pfn_range(struct vm_area_struct *vma,
+                          unsigned long address,
+                          unsigned long mfn,
+                          unsigned long size,
+                          pgprot_t prot,
+                          domid_t  domid);
+struct file;
+int privcmd_mmap(struct file * file, struct vm_area_struct * vma);
+#define HAVE_ARCH_PRIVCMD_MMAP
 #endif
 
 // for drivers/xen/balloon/balloon.c
@@ -147,7 +158,7 @@ xen_create_contiguous_region(unsigned lo
                              unsigned int order, unsigned int address_bits)
 {
        int ret = 0;
-       if (running_on_xen) {
+       if (is_running_on_xen()) {
                ret = __xen_create_contiguous_region(vstart, order,
                                                     address_bits);
        }
@@ -158,11 +169,24 @@ static inline void
 static inline void
 xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 {
-       if (running_on_xen)
+       if (is_running_on_xen())
                __xen_destroy_contiguous_region(vstart, order);
 }
+
+// for netfront.c, netback.c
+#define MULTI_UVMFLAGS_INDEX 0 //XXX any value
+
+static inline void
+MULTI_update_va_mapping(
+       multicall_entry_t *mcl, unsigned long va,
+       pte_t new_val, unsigned long flags)
+{
+       mcl->op = __HYPERVISOR_update_va_mapping;
+       mcl->result = 0;
+}
+
 #else
-#define xen_create_contiguous_region(vstart, order, address_bits)      ({0;})
+#define xen_create_contiguous_region(vstart, order, address_bits)      (0)
 #define xen_destroy_contiguous_region(vstart, order)   do {} while (0)
 #endif
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-ia64/page.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/page.h      Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/page.h      Tue May 30 14:30:34 
2006 -0500
@@ -234,6 +234,43 @@ get_order (unsigned long size)
 
 #include <linux/kernel.h>
 #include <asm/hypervisor.h>
+#include <xen/features.h>      // to compile netback, netfront
+typedef unsigned long maddr_t; // to compile netback, netfront
+
+// XXX hack!
+//     Linux/IA64 uses PG_arch_1.
+//     This hack will be removed once PG_foreign bit is taken.
+//#include <xen/foreign_page.h>
+#ifdef __ASM_XEN_FOREIGN_PAGE_H__
+# error "don't include include/xen/foreign_page.h!"
+#endif
+
+extern struct address_space xen_ia64_foreign_dummy_mapping;
+#define PageForeign(page)      \
+       ((page)->mapping == &xen_ia64_foreign_dummy_mapping)
+
+#define SetPageForeign(page, dtor) do {                                \
+       set_page_private((page), (unsigned long)(dtor));        \
+       (page)->mapping = &xen_ia64_foreign_dummy_mapping;      \
+       smp_rmb();                                              \
+} while (0)
+
+#define ClearPageForeign(page) do {    \
+       (page)->mapping = NULL;         \
+       smp_rmb();                      \
+       set_page_private((page), 0);    \
+} while (0)
+
+#define PageForeignDestructor(page)    \
+       ( (void (*) (struct page *)) page_private(page) )
+
+#define arch_free_page(_page,_order)                   \
+({      int foreign = PageForeign(_page);               \
+       if (foreign)                                    \
+               (PageForeignDestructor(_page))(_page);  \
+       foreign;                                        \
+})
+#define HAVE_ARCH_FREE_PAGE
 
 //XXX xen page size != page size
 
@@ -279,11 +316,14 @@ machine_to_phys_for_dma(unsigned long ma
 #define set_phys_to_machine(pfn, mfn) do { } while (0)
 #define xen_machphys_update(mfn, pfn) do { } while (0)
 
-#define mfn_to_pfn(mfn)                        ({(mfn);})
-#define mfn_to_virt(mfn)               ({__va((mfn) << PAGE_SHIFT);})
-#define pfn_to_mfn(pfn)                        ({(pfn);})
-#define virt_to_mfn(virt)              ({__pa(virt) >> PAGE_SHIFT;})
-#define virt_to_machine(virt)          ({__pa(virt);}) // for tpmfront.c
+//XXX to compile set_phys_to_machine(vaddr, FOREIGN_FRAME(m))
+#define FOREIGN_FRAME(m)        (INVALID_P2M_ENTRY)
+
+#define mfn_to_pfn(mfn)                        (mfn)
+#define mfn_to_virt(mfn)               (__va((mfn) << PAGE_SHIFT))
+#define pfn_to_mfn(pfn)                        (pfn)
+#define virt_to_mfn(virt)              (__pa(virt) >> PAGE_SHIFT)
+#define virt_to_machine(virt)          __pa(virt) // for tpmfront.c
 
 #endif /* CONFIG_XEN_IA64_DOM0_VP */
 #endif /* CONFIG_XEN */
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-ia64/privop.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/privop.h    Tue May 30 12:52:02 
2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/privop.h    Tue May 30 14:30:34 
2006 -0500
@@ -44,13 +44,14 @@
 #define ia64_ptcl                      __ia64_ptcl
 #define ia64_ptri                      __ia64_ptri
 #define ia64_ptrd                      __ia64_ptrd
-#define        ia64_get_psr_i                  __ia64_get_psr_i
+#define ia64_get_psr_i                 __ia64_get_psr_i
 #define ia64_intrin_local_irq_restore  __ia64_intrin_local_irq_restore
 #define ia64_pal_halt_light            __ia64_pal_halt_light
-#define        ia64_leave_kernel               __ia64_leave_kernel
-#define        ia64_leave_syscall              __ia64_leave_syscall
-#define        ia64_switch_to                  __ia64_switch_to
-#define        ia64_pal_call_static            __ia64_pal_call_static
+#define ia64_leave_kernel              __ia64_leave_kernel
+#define ia64_leave_syscall             __ia64_leave_syscall
+#define ia64_trace_syscall             __ia64_trace_syscall
+#define ia64_switch_to                 __ia64_switch_to
+#define ia64_pal_call_static           __ia64_pal_call_static
 
 #endif /* !IA64_PARAVIRTUALIZED */
 
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h        Tue May 30 
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h        Tue May 30 
14:30:34 2006 -0500
@@ -11,45 +11,41 @@
 
 
 #include <asm/xen/asm-xsi-offsets.h>
+#include <xen/interface/arch-ia64.h>
 
 #define IA64_PARAVIRTUALIZED
 
 #ifdef __ASSEMBLY__
-#define        XEN_HYPER_RFI                   break 0x1
-#define        XEN_HYPER_RSM_PSR_DT            break 0x2
-#define        XEN_HYPER_SSM_PSR_DT            break 0x3
-#define        XEN_HYPER_COVER                 break 0x4
-#define        XEN_HYPER_ITC_D                 break 0x5
-#define        XEN_HYPER_ITC_I                 break 0x6
-#define        XEN_HYPER_SSM_I                 break 0x7
-#define        XEN_HYPER_GET_IVR               break 0x8
-#define        XEN_HYPER_GET_TPR               break 0x9
-#define        XEN_HYPER_SET_TPR               break 0xa
-#define        XEN_HYPER_EOI                   break 0xb
-#define        XEN_HYPER_SET_ITM               break 0xc
-#define        XEN_HYPER_THASH                 break 0xd
-#define        XEN_HYPER_PTC_GA                break 0xe
-#define        XEN_HYPER_ITR_D                 break 0xf
-#define        XEN_HYPER_GET_RR                break 0x10
-#define        XEN_HYPER_SET_RR                break 0x11
-#define        XEN_HYPER_SET_KR                break 0x12
-#define        XEN_HYPER_FC                    break 0x13
-#define        XEN_HYPER_GET_CPUID             break 0x14
-#define        XEN_HYPER_GET_PMD               break 0x15
-#define        XEN_HYPER_GET_EFLAG             break 0x16
-#define        XEN_HYPER_SET_EFLAG             break 0x17
+#define        XEN_HYPER_RFI                   break HYPERPRIVOP_RFI
+#define        XEN_HYPER_RSM_PSR_DT            break HYPERPRIVOP_RSM_DT
+#define        XEN_HYPER_SSM_PSR_DT            break HYPERPRIVOP_SSM_DT
+#define        XEN_HYPER_COVER                 break HYPERPRIVOP_COVER
+#define        XEN_HYPER_ITC_D                 break HYPERPRIVOP_ITC_D
+#define        XEN_HYPER_ITC_I                 break HYPERPRIVOP_ITC_I
+#define        XEN_HYPER_SSM_I                 break HYPERPRIVOP_SSM_I
+#define        XEN_HYPER_GET_IVR               break HYPERPRIVOP_GET_IVR
+#define        XEN_HYPER_GET_TPR               break HYPERPRIVOP_GET_TPR
+#define        XEN_HYPER_SET_TPR               break HYPERPRIVOP_SET_TPR
+#define        XEN_HYPER_EOI                   break HYPERPRIVOP_EOI
+#define        XEN_HYPER_SET_ITM               break HYPERPRIVOP_SET_ITM
+#define        XEN_HYPER_THASH                 break HYPERPRIVOP_THASH
+#define        XEN_HYPER_PTC_GA                break HYPERPRIVOP_PTC_GA
+#define        XEN_HYPER_ITR_D                 break HYPERPRIVOP_ITR_D
+#define        XEN_HYPER_GET_RR                break HYPERPRIVOP_GET_RR
+#define        XEN_HYPER_SET_RR                break HYPERPRIVOP_SET_RR
+#define        XEN_HYPER_SET_KR                break HYPERPRIVOP_SET_KR
+#define        XEN_HYPER_FC                    break HYPERPRIVOP_FC
+#define        XEN_HYPER_GET_CPUID             break HYPERPRIVOP_GET_CPUID
+#define        XEN_HYPER_GET_PMD               break HYPERPRIVOP_GET_PMD
+#define        XEN_HYPER_GET_EFLAG             break HYPERPRIVOP_GET_EFLAG
+#define        XEN_HYPER_SET_EFLAG             break HYPERPRIVOP_SET_EFLAG
 #endif
 
 #ifndef __ASSEMBLY__
-#ifdef MODULE
-extern int is_running_on_xen(void);
-#define running_on_xen (is_running_on_xen())
-#else
 extern int running_on_xen;
-#endif
-
-#define        XEN_HYPER_SSM_I                 asm("break 0x7");
-#define        XEN_HYPER_GET_IVR               asm("break 0x8");
+
+#define        XEN_HYPER_SSM_I         asm("break %0" : : "i" 
(HYPERPRIVOP_SSM_I))
+#define        XEN_HYPER_GET_IVR       asm("break %0" : : "i" 
(HYPERPRIVOP_GET_IVR))
 
 /************************************************/
 /* Instructions paravirtualized for correctness */
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h Tue May 30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Tue May 30 14:30:34 2006 -0500
@@ -75,7 +75,7 @@ struct xenbus_device {
        int otherend_id;
        struct xenbus_watch otherend_watch;
        struct device dev;
-       XenbusState state;
+       enum xenbus_state state;
        void *data;
 };
 
@@ -98,7 +98,7 @@ struct xenbus_driver {
        int (*probe)(struct xenbus_device *dev,
                     const struct xenbus_device_id *id);
        void (*otherend_changed)(struct xenbus_device *dev,
-                                XenbusState backend_state);
+                                enum xenbus_state backend_state);
        int (*remove)(struct xenbus_device *dev);
        int (*suspend)(struct xenbus_device *dev);
        int (*resume)(struct xenbus_device *dev);
@@ -207,7 +207,7 @@ int xenbus_watch_path2(struct xenbus_dev
  * Return 0 on success, or -errno on error.  On error, the device will switch
  * to XenbusStateClosing, and the error will be saved in the store.
  */
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState new_state);
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state 
new_state);
 
 
 /**
@@ -273,7 +273,7 @@ int xenbus_free_evtchn(struct xenbus_dev
  * Return the state of the driver rooted at the given store path, or
  * XenbusStateClosed if no state can be read.
  */
-XenbusState xenbus_read_driver_state(const char *path);
+enum xenbus_state xenbus_read_driver_state(const char *path);
 
 
 /***
diff -r e74246451527 -r f54d38cea8ac tools/examples/network-bridge
--- a/tools/examples/network-bridge     Tue May 30 12:52:02 2006 -0500
+++ b/tools/examples/network-bridge     Tue May 30 14:30:34 2006 -0500
@@ -59,7 +59,7 @@ findCommand "$@"
 findCommand "$@"
 evalVariables "$@"
 
-vifnum=${vifnum:-$(ip route list | awk '/^default / { 
sub(/^(eth|xenbr)/,"",$NF); print $NF }')}
+vifnum=${vifnum:-$(ip route list | awk '/^default / { print $NF }' | sed 
's/^[^0-9]*//')}
 bridge=${bridge:-xenbr${vifnum}}
 netdev=${netdev:-eth${vifnum}}
 antispoof=${antispoof:-no}
diff -r e74246451527 -r f54d38cea8ac tools/examples/xmexample.vti
--- a/tools/examples/xmexample.vti      Tue May 30 12:52:02 2006 -0500
+++ b/tools/examples/xmexample.vti      Tue May 30 14:30:34 2006 -0500
@@ -36,7 +36,7 @@ name = "ExampleVTIDomain"
 # Random MACs are assigned if not given.
 #vif = [ 'type=ioemu, mac=00:16:3e:00:00:11, bridge=xenbr0' ]
 # type=ioemu specify the NIC is an ioemu device not netfront
-#vif = [ 'type=ioemu, bridge=xenbr0' ]
+vif = [ 'type=ioemu, bridge=xenbr0' ]
 # for multiple NICs in device model, 3 in this example
 #vif = [ 'type=ioemu, bridge=xenbr0', 'type=ioemu', 'type=ioemu']
 
diff -r e74246451527 -r f54d38cea8ac tools/libxc/Makefile
--- a/tools/libxc/Makefile      Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/Makefile      Tue May 30 14:30:34 2006 -0500
@@ -20,6 +20,7 @@ SRCS       += xc_physdev.c
 SRCS       += xc_physdev.c
 SRCS       += xc_private.c
 SRCS       += xc_sedf.c
+SRCS       += xc_csched.c
 SRCS       += xc_tbuf.c
 
 ifeq ($(patsubst x86%,x86,$(XEN_TARGET_ARCH)),x86)
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_acm.c
--- a/tools/libxc/xc_acm.c      Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_acm.c      Tue May 30 14:30:34 2006 -0500
@@ -1,13 +1,10 @@
 /******************************************************************************
+ * xc_acm.c
  *
- * Copyright (C) 2005 IBM Corporation
+ * Copyright (C) 2005, 2006 IBM Corporation, R Sailer
  *
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
- *
- * Authors:
- * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Stefan Berger <stefanb@xxxxxxxxxxxxxx>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -17,29 +14,23 @@
 
 #include "xc_private.h"
 
-int xc_acm_op(int xc_handle, struct acm_op *op)
+
+int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size)
 {
     int ret = -1;
     DECLARE_HYPERCALL;
 
-    op->interface_version = ACM_INTERFACE_VERSION;
+    hypercall.op = __HYPERVISOR_acm_op;
+    hypercall.arg[0] = cmd;
+    hypercall.arg[1] = (unsigned long) arg;
 
-    hypercall.op = __HYPERVISOR_acm_op;
-    hypercall.arg[0] = (unsigned long) op;
-
-    if (mlock(op, sizeof(*op)) != 0) {
-        PERROR("Could not lock memory for Xen policy hypercall");
-        goto out1;
+    if (mlock(arg, arg_size) != 0) {
+        PERROR("xc_acm_op: arg mlock failed");
+        goto out;
     }
-
     ret = do_xen_hypercall(xc_handle, &hypercall);
-    ret = ioctl(xc_handle, IOCTL_PRIVCMD_HYPERCALL, &hypercall);
-    if (ret < 0) {
-        goto out2;
-    }
- out2:
-    safe_munlock(op, sizeof(*op));
- out1:
+    safe_munlock(arg, arg_size);
+ out:
     return ret;
 }
 
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c       Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ia64_stubs.c       Tue May 30 14:30:34 2006 -0500
@@ -50,7 +50,7 @@ xc_plan9_build(int xc_handle,
 }
 /*  
     VMM uses put_user to copy pfn_list to guest buffer, this maybe fail,
-    VMM don't handle this now.
+    VMM doesn't handle this now.
     This method will touch guest buffer to make sure the buffer's mapping
     is tracked by VMM,
  */
@@ -66,6 +66,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
     unsigned int __start_page, __nr_pages;
     unsigned long max_pfns;
     unsigned long *__pfn_buf;
+
     __start_page = start_page;
     __nr_pages = nr_pages;
     __pfn_buf = pfn_buf;
@@ -75,6 +76,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
         op.cmd = DOM0_GETMEMLIST;
         op.u.getmemlist.domain   = (domid_t)domid;
         op.u.getmemlist.max_pfns = max_pfns;
+        op.u.getmemlist.num_pfns = 0;
         set_xen_guest_handle(op.u.getmemlist.buffer, __pfn_buf);
 
         if ( (max_pfns != -1UL)
@@ -723,7 +725,7 @@ int xc_hvm_build(int xc_handle,
 
     ctxt->flags = VGCF_VMX_GUEST;
     ctxt->regs.cr_iip = 0x80000000ffffffb0UL;
-    ctxt->vcpu.privregs = 0;
+    ctxt->privregs = 0;
 
     memset( &launch_op, 0, sizeof(launch_op) );
 
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_linux_build.c      Tue May 30 14:30:34 2006 -0500
@@ -268,21 +268,10 @@ static int setup_pg_tables_pae(int xc_ha
     l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
     l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
     uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
-    unsigned long ppt_alloc, count, nmfn;
+    unsigned long ppt_alloc, count;
 
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
-
-    if ( page_array[ppt_alloc] > 0xfffff )
-    {
-        nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
-        if ( nmfn == 0 )
-        {
-            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
-            goto error_out;
-        }
-        page_array[ppt_alloc] = nmfn;
-    }
 
     alloc_pt(l3tab, vl3tab, pl3tab);
     vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
@@ -327,6 +316,13 @@ static int setup_pg_tables_pae(int xc_ha
                 *vl1e &= ~_PAGE_RW;
         }
         vl1e++;
+    }
+
+    /* Xen requires a mid-level pgdir mapping 0xC0000000 region. */
+    if ( (vl3tab[3] & _PAGE_PRESENT) == 0 )
+    {
+        alloc_pt(l2tab, vl2tab, pl2tab);
+        vl3tab[3] = l2tab | L3_PROT;
     }
 
     munmap(vl1tab, PAGE_SIZE);
@@ -727,25 +723,28 @@ static int setup_guest(int xc_handle,
         v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
         if ( (v_end - vstack_end) < (512UL << 10) )
             v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
-#if defined(__i386__)
-        if ( dsi.pae_kernel )
-        {
-            /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
-            if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
-                   L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
-                break;
-        }
-        else
-        {
-            if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
-                   L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
-                break;
-        }
-#endif
-#if defined(__x86_64__)
 #define NR(_l,_h,_s) \
     (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
     ((_l) & ~((1UL<<(_s))-1))) >> (_s))
+#if defined(__i386__)
+        if ( dsi.pae_kernel )
+        {
+            if ( (1 + /* # L3 */
+                  NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT_PAE) + /* # L2 */
+                  NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT_PAE) + /* # L1 */
+                  /* Include a fourth mid-level page directory for Xen. */
+                  (v_end <= (3 << L3_PAGETABLE_SHIFT_PAE)))
+                  <= nr_pt_pages )
+                break;
+        }
+        else
+        {
+            if ( (1 + /* # L2 */
+                  NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
+                 <= nr_pt_pages )
+                break;
+        }
+#elif defined(__x86_64__)
         if ( (1 + /* # L4 */
               NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
               NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
@@ -1116,7 +1115,7 @@ static int xc_linux_build_internal(int x
     ctxt->regs.ar_fpsr = xc_ia64_fpsr_default();
     /* currently done by hypervisor, should move here */
     /* ctxt->regs.r28 = dom_fw_setup(); */
-    ctxt->vcpu.privregs = 0;
+    ctxt->privregs = 0;
     ctxt->sys_pgnr = 3;
     i = 0; /* silence unused variable warning */
 #else /* x86 */
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_linux_restore.c    Tue May 30 14:30:34 2006 -0500
@@ -331,25 +331,17 @@ int xc_linux_restore(int xc_handle, int 
                 ** A page table page - need to 'uncanonicalize' it, i.e.
                 ** replace all the references to pfns with the corresponding
                 ** mfns for the new domain.
-                **
-                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
-                ** so we may need to update the p2m after the main loop.
-                ** Hence we defer canonicalization of L1s until then.
                 */
-                if(pt_levels != 3 || pagetype != L1TAB) {
-
-                    if(!uncanonicalize_pagetable(pagetype, page)) {
-                        /*
-                        ** Failing to uncanonicalize a page table can be ok
-                        ** under live migration since the pages type may have
-                        ** changed by now (and we'll get an update later).
-                        */
-                        DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
-                                pagetype >> 28, pfn, mfn);
-                        nraces++;
-                        continue;
-                    }
-
+                if(!uncanonicalize_pagetable(pagetype, page)) {
+                    /*
+                    ** Failing to uncanonicalize a page table can be ok
+                    ** under live migration since the pages type may have
+                    ** changed by now (and we'll get an update later).
+                    */
+                    DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
+                            pagetype >> 28, pfn, mfn);
+                    nraces++;
+                    continue;
                 }
 
             } else if(pagetype != NOTAB) {
@@ -397,100 +389,6 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     DPRINTF("Received all pages (%d races)\n", nraces);
-
-    if(pt_levels == 3) {
-
-        /*
-        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
-        ** is a little awkward and involves (a) finding all such PGDs and
-        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
-        ** with the new info; and (c) canonicalizing all the L1s using the
-        ** (potentially updated) p2m[].
-        **
-        ** This is relatively slow (and currently involves two passes through
-        ** the pfn_type[] array), but at least seems to be correct. May wish
-        ** to consider more complex approaches to optimize this later.
-        */
-
-        int j, k;
-
-        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
-        for (i = 0; i < max_pfn; i++) {
-
-            if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
-
-                unsigned long new_mfn;
-                uint64_t l3ptes[4];
-                uint64_t *l3tab;
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                         PROT_READ, p2m[i]);
-
-                for(j = 0; j < 4; j++)
-                    l3ptes[j] = l3tab[j];
-
-                munmap(l3tab, PAGE_SIZE);
-
-                if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
-                    ERR("Couldn't get a page below 4GB :-(");
-                    goto out;
-                }
-
-                p2m[i] = new_mfn;
-                if (xc_add_mmu_update(xc_handle, mmu,
-                                      (((unsigned long long)new_mfn)
-                                       << PAGE_SHIFT) |
-                                      MMU_MACHPHYS_UPDATE, i)) {
-                    ERR("Couldn't m2p on PAE root pgdir");
-                    goto out;
-                }
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                         PROT_READ | PROT_WRITE, p2m[i]);
-
-                for(j = 0; j < 4; j++)
-                    l3tab[j] = l3ptes[j];
-
-                munmap(l3tab, PAGE_SIZE);
-
-            }
-        }
-
-        /* Second pass: find all L1TABs and uncanonicalize them */
-        j = 0;
-
-        for(i = 0; i < max_pfn; i++) {
-
-            if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
-                region_mfn[j] = p2m[i];
-                j++;
-            }
-
-            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
-
-                if (!(region_base = xc_map_foreign_batch(
-                          xc_handle, dom, PROT_READ | PROT_WRITE,
-                          region_mfn, j))) {
-                    ERR("map batch failed");
-                    goto out;
-                }
-
-                for(k = 0; k < j; k++) {
-                    if(!uncanonicalize_pagetable(L1TAB,
-                                                 region_base + k*PAGE_SIZE)) {
-                        ERR("failed uncanonicalize pt!");
-                        goto out;
-                    }
-                }
-
-                munmap(region_base, j*PAGE_SIZE);
-                j = 0;
-            }
-        }
-
-    }
 
 
     if (xc_finish_mmu_updates(xc_handle, mmu)) {
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_load_elf.c Tue May 30 14:30:34 2006 -0500
@@ -59,10 +59,10 @@ static int parseelfimage(const char *ima
     Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
     Elf_Phdr *phdr;
     Elf_Shdr *shdr;
-    unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base;
+    unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base, elf_pa_off;
     const char *shstrtab;
     char *guestinfo=NULL, *p;
-    int h;
+    int h, virt_base_defined, elf_pa_off_defined;
 
     if ( !IS_ELF(*ehdr) )
     {
@@ -164,34 +164,40 @@ static int parseelfimage(const char *ima
 
     dsi->xen_guest_string = guestinfo;
 
-    if ( (p = strstr(guestinfo, "VIRT_BASE=")) == NULL )
-    {
-        ERROR("Malformed ELF image. No VIRT_BASE specified");
-        return -EINVAL;
-    }
-
-    virt_base = strtoul(p+10, &p, 0);
-
-    dsi->elf_paddr_offset = virt_base;
-    if ( (p = strstr(guestinfo, "ELF_PADDR_OFFSET=")) != NULL )
-        dsi->elf_paddr_offset = strtoul(p+17, &p, 0);
+    /* Initial guess for virt_base is 0 if it is not explicitly defined. */
+    p = strstr(guestinfo, "VIRT_BASE=");
+    virt_base_defined = (p != NULL);
+    virt_base = virt_base_defined ? strtoul(p+10, &p, 0) : 0;
+
+    /* Initial guess for elf_pa_off is virt_base if not explicitly defined. */
+    p = strstr(guestinfo, "ELF_PADDR_OFFSET=");
+    elf_pa_off_defined = (p != NULL);
+    elf_pa_off = elf_pa_off_defined ? strtoul(p+17, &p, 0) : virt_base;
+
+    if ( elf_pa_off_defined && !virt_base_defined )
+        goto bad_image;
 
     for ( h = 0; h < ehdr->e_phnum; h++ )
     {
         phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize));
         if ( !is_loadable_phdr(phdr) )
             continue;
-        vaddr = phdr->p_paddr - dsi->elf_paddr_offset + virt_base;
+        vaddr = phdr->p_paddr - elf_pa_off + virt_base;
+        if ( (vaddr + phdr->p_memsz) < vaddr )
+            goto bad_image;
         if ( vaddr < kernstart )
             kernstart = vaddr;
         if ( (vaddr + phdr->p_memsz) > kernend )
             kernend = vaddr + phdr->p_memsz;
     }
 
-    if ( virt_base )
-        dsi->v_start = virt_base;
-    else
-        dsi->v_start = kernstart;
+    /*
+     * Legacy compatibility and images with no __xen_guest section: assume
+     * header addresses are virtual addresses, and that guest memory should be
+     * mapped starting at kernel load address.
+     */
+    dsi->v_start          = virt_base_defined  ? virt_base  : kernstart;
+    dsi->elf_paddr_offset = elf_pa_off_defined ? elf_pa_off : dsi->v_start;
 
     dsi->v_kernentry = ehdr->e_entry;
     if ( (p = strstr(guestinfo, "VIRT_ENTRY=")) != NULL )
@@ -199,11 +205,9 @@ static int parseelfimage(const char *ima
 
     if ( (kernstart > kernend) ||
          (dsi->v_kernentry < kernstart) ||
-         (dsi->v_kernentry > kernend) )
-    {
-        ERROR("Malformed ELF image.");
-        return -EINVAL;
-    }
+         (dsi->v_kernentry > kernend) ||
+         (dsi->v_start > kernstart) )
+        goto bad_image;
 
     if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL )
         dsi->load_symtab = 1;
@@ -215,6 +219,10 @@ static int parseelfimage(const char *ima
     loadelfsymtab(image, 0, 0, NULL, dsi);
 
     return 0;
+
+ bad_image:
+    ERROR("Malformed ELF image.");
+    return -EINVAL;
 }
 
 static int
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_private.c  Tue May 30 14:30:34 2006 -0500
@@ -430,28 +430,6 @@ int xc_version(int xc_handle, int cmd, v
     return rc;
 }
 
-unsigned long xc_make_page_below_4G(
-    int xc_handle, uint32_t domid, unsigned long mfn)
-{
-    unsigned long new_mfn;
-
-    if ( xc_domain_memory_decrease_reservation(
-        xc_handle, domid, 1, 0, &mfn) != 0 )
-    {
-        fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
-        return 0;
-    }
-
-    if ( xc_domain_memory_increase_reservation(
-        xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
-    {
-        fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
-        return 0;
-    }
-
-    return new_mfn;
-}
-
 /*
  * Local variables:
  * mode: C
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ptrace.c   Tue May 30 14:30:34 2006 -0500
@@ -185,61 +185,36 @@ map_domain_va_32(
     void *guest_va,
     int perm)
 {
-    unsigned long pde, page;
-    unsigned long va = (unsigned long)guest_va;
-
-    static unsigned long  cr3_phys[MAX_VIRT_CPUS];
-    static uint32_t *cr3_virt[MAX_VIRT_CPUS];
-    static unsigned long  pde_phys[MAX_VIRT_CPUS];
-    static uint32_t *pde_virt[MAX_VIRT_CPUS];
-    static unsigned long  page_phys[MAX_VIRT_CPUS];
-    static uint32_t *page_virt[MAX_VIRT_CPUS];
-    static int            prev_perm[MAX_VIRT_CPUS];
-
-   if (ctxt[cpu].ctrlreg[3] == 0)
-       return NULL;
-   if ( ctxt[cpu].ctrlreg[3] != cr3_phys[cpu] )
-    {
-        cr3_phys[cpu] = ctxt[cpu].ctrlreg[3];
-        if ( cr3_virt[cpu] )
-            munmap(cr3_virt[cpu], PAGE_SIZE);
-        cr3_virt[cpu] = xc_map_foreign_range(
-            xc_handle, current_domid, PAGE_SIZE, PROT_READ,
-            cr3_phys[cpu] >> PAGE_SHIFT);
-        if ( cr3_virt[cpu] == NULL )
-            return NULL;
-    }
-    pde = to_ma(cpu, cr3_virt[cpu][vtopdi(va)]);
-    if ( pde != pde_phys[cpu] )
-    {
-        pde_phys[cpu] = pde;
-        if ( pde_virt[cpu] )
-            munmap(pde_virt[cpu], PAGE_SIZE);
-        pde_virt[cpu] = xc_map_foreign_range(
-            xc_handle, current_domid, PAGE_SIZE, PROT_READ,
-            pde_phys[cpu] >> PAGE_SHIFT);
-        if ( pde_virt[cpu] == NULL )
-            return NULL;
-    }
-    page = to_ma(cpu, pde_virt[cpu][vtopti(va)]);
-
-    if ( (page != page_phys[cpu]) || (perm != prev_perm[cpu]) )
-    {
-        page_phys[cpu] = page;
-        if ( page_virt[cpu] )
-            munmap(page_virt[cpu], PAGE_SIZE);
-        page_virt[cpu] = xc_map_foreign_range(
-            xc_handle, current_domid, PAGE_SIZE, perm,
-            page_phys[cpu] >> PAGE_SHIFT);
-        if ( page_virt[cpu] == NULL )
-        {
-            page_phys[cpu] = 0;
-            return NULL;
-        }
-        prev_perm[cpu] = perm;
-    }
-
-    return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK));
+    unsigned long l2e, l1e, l1p, p, va = (unsigned long)guest_va;
+    uint32_t *l2, *l1;
+    static void *v[MAX_VIRT_CPUS];
+
+    l2 = xc_map_foreign_range(
+         xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] 
>> PAGE_SHIFT);
+    if ( l2 == NULL )
+        return NULL;
+
+    l2e = l2[l2_table_offset_i386(va)];
+    munmap(l2, PAGE_SIZE);
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
+    l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l1p >> PAGE_SHIFT);
+    if ( l1 == NULL )
+        return NULL;
+
+    l1e = l1[l1_table_offset_i386(va)];
+    munmap(l1, PAGE_SIZE);
+    if ( !(l1e & _PAGE_PRESENT) )
+        return NULL;
+    p = to_ma(cpu, l1e);
+    if ( v[cpu] != NULL )
+        munmap(v[cpu], PAGE_SIZE);
+    v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p 
>> PAGE_SHIFT);
+    if ( v[cpu] == NULL )
+        return NULL;
+
+    return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
 }
 
 
@@ -250,36 +225,45 @@ map_domain_va_pae(
     void *guest_va,
     int perm)
 {
-    unsigned long l2p, l1p, p, va = (unsigned long)guest_va;
+    unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va;
     uint64_t *l3, *l2, *l1;
-    static void *v;
+    static void *v[MAX_VIRT_CPUS];
 
     l3 = xc_map_foreign_range(
         xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] 
>> PAGE_SHIFT);
     if ( l3 == NULL )
         return NULL;
 
-    l2p = to_ma(cpu, l3[l3_table_offset_pae(va)]);
+    l3e = l3[l3_table_offset_pae(va)];
+    munmap(l3, PAGE_SIZE);
+    if ( !(l3e & _PAGE_PRESENT) )
+        return NULL;
+    l2p = to_ma(cpu, l3e);
     l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l2p >> PAGE_SHIFT);
-    munmap(l3, PAGE_SIZE);
     if ( l2 == NULL )
         return NULL;
 
-    l1p = to_ma(cpu, l2[l2_table_offset_pae(va)]);
+    l2e = l2[l2_table_offset_pae(va)];
+    munmap(l2, PAGE_SIZE);
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
     l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p 
>> PAGE_SHIFT);
-    munmap(l2, PAGE_SIZE);
     if ( l1 == NULL )
         return NULL;
 
-    p = to_ma(cpu, l1[l1_table_offset_pae(va)]);
-    if ( v != NULL )
-        munmap(v, PAGE_SIZE);
-    v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> 
PAGE_SHIFT);
+    l1e = l1[l1_table_offset_pae(va)];
     munmap(l1, PAGE_SIZE);
-    if ( v == NULL )
-        return NULL;
-
-    return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1)));
+    if ( !(l1e & _PAGE_PRESENT) )
+        return NULL;
+    p = to_ma(cpu, l1e);
+    if ( v[cpu] != NULL )
+        munmap(v[cpu], PAGE_SIZE);
+    v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p 
>> PAGE_SHIFT);
+    if ( v[cpu] == NULL )
+        return NULL;
+
+    return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
 }
 
 #ifdef __x86_64__
@@ -290,9 +274,10 @@ map_domain_va_64(
     void *guest_va,
     int perm)
 {
-    unsigned long l3p, l2p, l1p, l1e, p, va = (unsigned long)guest_va;
+    unsigned long l4e, l3e, l2e, l1e, l3p, l2p, l1p, p, va = (unsigned 
long)guest_va;
     uint64_t *l4, *l3, *l2, *l1;
-    static void *v;
+    static void *v[MAX_VIRT_CPUS];
+
 
     if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */
         return map_domain_va_32(xc_handle, cpu, guest_va, perm);
@@ -302,41 +287,51 @@ map_domain_va_64(
     if ( l4 == NULL )
         return NULL;
 
-    l3p = to_ma(cpu, l4[l4_table_offset(va)]);
+    l4e = l4[l4_table_offset(va)];
+    munmap(l4, PAGE_SIZE);
+    if ( !(l4e & _PAGE_PRESENT) )
+        return NULL;
+    l3p = to_ma(cpu, l4e);
     l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l3p >> PAGE_SHIFT);
-    munmap(l4, PAGE_SIZE);
     if ( l3 == NULL )
         return NULL;
 
-    l2p = to_ma(cpu, l3[l3_table_offset(va)]);
+    l3e = l3[l3_table_offset(va)];
+    munmap(l3, PAGE_SIZE);
+    if ( !(l3e & _PAGE_PRESENT) )
+        return NULL;
+    l2p = to_ma(cpu, l3e);
     l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l2p >> PAGE_SHIFT);
-    munmap(l3, PAGE_SIZE);
     if ( l2 == NULL )
         return NULL;
 
     l1 = NULL;
-    l1e = to_ma(cpu, l2[l2_table_offset(va)]);
-    l1p = l1e >> PAGE_SHIFT;
-    if (l1e & 0x80)  { /* 2M pages */
+    l2e = l2[l2_table_offset(va)];
+    munmap(l2, PAGE_SIZE);
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
+    if (l2e & 0x80)  { /* 2M pages */
         p = to_ma(cpu, (l1p + l1_table_offset(va)) << PAGE_SHIFT);
     } else { /* 4K pages */
-        //l1p = to_ma(cpu, l1e[l1_table_offset(va)]);
         l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, 
l1p >> PAGE_SHIFT);
-        munmap(l2, PAGE_SIZE);
         if ( l1 == NULL )
             return NULL;
 
-        p = to_ma(cpu, l1[l1_table_offset(va)]);
-    }
-    if ( v != NULL )
-        munmap(v, PAGE_SIZE);
-    v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> 
PAGE_SHIFT);
+        l1e = l1[l1_table_offset(va)];
+        if ( !(l1e & _PAGE_PRESENT) )
+            return NULL;
+        p = to_ma(cpu, l1e);
+    }
+    if ( v[cpu] != NULL )
+        munmap(v[cpu], PAGE_SIZE);
+    v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p 
>> PAGE_SHIFT);
     if (l1)
         munmap(l1, PAGE_SIZE);
-    if ( v == NULL )
-        return NULL;
-
-    return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1)));
+    if ( v[cpu] == NULL )
+        return NULL;
+
+    return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
 }
 #endif
 
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace.h
--- a/tools/libxc/xc_ptrace.h   Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ptrace.h   Tue May 30 14:30:34 2006 -0500
@@ -7,7 +7,6 @@
 #define X86_CR0_PE              0x00000001 /* Enable Protected Mode    (RW) */
 #define X86_CR0_PG              0x80000000 /* Paging                   (RW) */
 #define BSD_PAGE_MASK (PAGE_SIZE-1)
-#define PDRSHIFT        22
 #define PSL_T  0x00000100 /* trace enable bit */
 
 #ifdef __x86_64__
@@ -162,8 +161,6 @@ struct gdb_regs {
 #endif
 
 #define printval(x) printf("%s = %lx\n", #x, (long)x);
-#define vtopdi(va) ((va) >> PDRSHIFT)
-#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
 #endif
 
 typedef void (*thr_ev_handler_t)(long);
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ptrace_core.c      Tue May 30 14:30:34 2006 -0500
@@ -3,6 +3,7 @@
 #include <sys/ptrace.h>
 #include <sys/wait.h>
 #include "xc_private.h"
+#include "xg_private.h"
 #include "xc_ptrace.h"
 #include <time.h>
 
@@ -54,7 +55,7 @@ map_domain_va_core(unsigned long domfd, 
         }
         cr3_virt[cpu] = v;
     }
-    if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
+    if ((pde = cr3_virt[cpu][l2_table_offset_i386(va)]) == 0) /* logical 
address */
         return NULL;
     if (ctxt[cpu].flags & VGCF_HVM_GUEST)
         pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -70,7 +71,7 @@ map_domain_va_core(unsigned long domfd, 
             return NULL;
         pde_virt[cpu] = v;
     }
-    if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
+    if ((page = pde_virt[cpu][l1_table_offset_i386(va)]) == 0) /* logical 
address */
         return NULL;
     if (ctxt[cpu].flags & VGCF_HVM_GUEST)
         page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -84,7 +85,7 @@ map_domain_va_core(unsigned long domfd, 
             map_mtop_offset(page_phys[cpu]));
         if (v == MAP_FAILED)
         {
-            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
vtopti(va));
+            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
l1_table_offset_i386(va));
             page_phys[cpu] = 0;
             return NULL;
         }
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_tbuf.c
--- a/tools/libxc/xc_tbuf.c     Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_tbuf.c     Tue May 30 14:30:34 2006 -0500
@@ -18,53 +18,57 @@
 
 static int tbuf_enable(int xc_handle, int enable)
 {
-  DECLARE_DOM0_OP;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  if (enable)
-    op.u.tbufcontrol.op  = DOM0_TBUF_ENABLE;
-  else
-    op.u.tbufcontrol.op  = DOM0_TBUF_DISABLE;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    if (enable)
+        op.u.tbufcontrol.op  = DOM0_TBUF_ENABLE;
+    else
+        op.u.tbufcontrol.op  = DOM0_TBUF_DISABLE;
 
-  return xc_dom0_op(xc_handle, &op);
+    return xc_dom0_op(xc_handle, &op);
 }
 
 int xc_tbuf_set_size(int xc_handle, unsigned long size)
 {
-  DECLARE_DOM0_OP;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  op.u.tbufcontrol.op  = DOM0_TBUF_SET_SIZE;
-  op.u.tbufcontrol.size = size;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    op.u.tbufcontrol.op  = DOM0_TBUF_SET_SIZE;
+    op.u.tbufcontrol.size = size;
 
-  return xc_dom0_op(xc_handle, &op);
+    return xc_dom0_op(xc_handle, &op);
 }
 
 int xc_tbuf_get_size(int xc_handle, unsigned long *size)
 {
-  int rc;
-  DECLARE_DOM0_OP;
+    int rc;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;
 
-  rc = xc_dom0_op(xc_handle, &op);
-  if (rc == 0)
-    *size = op.u.tbufcontrol.size;
-  return rc;
+    rc = xc_dom0_op(xc_handle, &op);
+    if (rc == 0)
+        *size = op.u.tbufcontrol.size;
+    return rc;
 }
 
 int xc_tbuf_enable(int xc_handle, size_t cnt, unsigned long *mfn,
-    unsigned long *size)
+                   unsigned long *size)
 {
     DECLARE_DOM0_OP;
     int rc;
 
-    if ( xc_tbuf_set_size(xc_handle, cnt) != 0 )
-        return -1;
+    /*
+     * Ignore errors (at least for now) as we get an error if size is already
+     * set (since trace buffers cannot be reallocated). If we really have no
+     * buffers at all then tbuf_enable() will fail, so this is safe.
+     */
+    (void)xc_tbuf_set_size(xc_handle, cnt);
 
     if ( tbuf_enable(xc_handle, 1) != 0 )
         return -1;
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xenctrl.h     Tue May 30 14:30:34 2006 -0500
@@ -359,6 +359,14 @@ int xc_sedf_domain_get(int xc_handle,
                        uint64_t *latency, uint16_t *extratime,
                        uint16_t *weight);
 
+int xc_sched_credit_domain_set(int xc_handle,
+                               uint32_t domid,
+                               struct sched_credit_adjdom *sdom);
+
+int xc_sched_credit_domain_get(int xc_handle,
+                               uint32_t domid,
+                               struct sched_credit_adjdom *sdom);
+
 typedef evtchn_status_t xc_evtchn_status_t;
 
 /*
@@ -449,9 +457,6 @@ int xc_domain_iomem_permission(int xc_ha
                                unsigned long first_mfn,
                                unsigned long nr_mfns,
                                uint8_t allow_access);
-
-unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
-                                    unsigned long mfn);
 
 typedef dom0_perfc_desc_t xc_perfc_desc_t;
 /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
@@ -599,6 +604,6 @@ int xc_add_mmu_update(int xc_handle, xc_
                    unsigned long long ptr, unsigned long long val);
 int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu);
 
-int xc_acm_op(int xc_handle, struct acm_op *op);
+int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size);
 
 #endif
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xg_private.h  Tue May 30 14:30:34 2006 -0500
@@ -48,6 +48,8 @@ unsigned long csum_page (void * page);
 #define L2_PAGETABLE_SHIFT_PAE   21
 #define L3_PAGETABLE_SHIFT_PAE   30
 
+#define L2_PAGETABLE_SHIFT_I386  22
+
 #if defined(__i386__)
 #define L1_PAGETABLE_SHIFT       12
 #define L2_PAGETABLE_SHIFT       22
@@ -61,6 +63,9 @@ unsigned long csum_page (void * page);
 #define L1_PAGETABLE_ENTRIES_PAE  512
 #define L2_PAGETABLE_ENTRIES_PAE  512
 #define L3_PAGETABLE_ENTRIES_PAE    4
+
+#define L1_PAGETABLE_ENTRIES_I386 1024
+#define L2_PAGETABLE_ENTRIES_I386 1024
 
 #if defined(__i386__)
 #define L1_PAGETABLE_ENTRIES   1024
@@ -95,6 +100,11 @@ typedef unsigned long l4_pgentry_t;
 #define l3_table_offset_pae(_a) \
   (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1))
 
+#define l1_table_offset_i386(_a) \
+  (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES_I386 - 1))
+#define l2_table_offset_i386(_a) \
+  (((_a) >> L2_PAGETABLE_SHIFT_I386) & (L2_PAGETABLE_ENTRIES_I386 - 1))
+
 #if defined(__i386__)
 #define l1_table_offset(_a) \
           (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/acm/acm.c
--- a/tools/python/xen/lowlevel/acm/acm.c       Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/lowlevel/acm/acm.c       Tue May 30 14:30:34 2006 -0500
@@ -38,7 +38,7 @@ fprintf(stderr, "ERROR: " _m " (%d = %s)
 /* generic shared function */
 void * __getssid(int domid, uint32_t *buflen)
 {
-    struct acm_op op;
+    struct acm_getssid getssid;
     int xc_handle;
     #define SSID_BUFFER_SIZE    4096
     void *buf = NULL;
@@ -51,14 +51,13 @@ void * __getssid(int domid, uint32_t *bu
         goto out2;
     }
     memset(buf, 0, SSID_BUFFER_SIZE);
-    op.cmd = ACM_GETSSID;
-    op.interface_version = ACM_INTERFACE_VERSION;
-    op.u.getssid.ssidbuf = buf;
-    op.u.getssid.ssidbuf_size = SSID_BUFFER_SIZE;
-    op.u.getssid.get_ssid_by = DOMAINID;
-    op.u.getssid.id.domainid = domid;
-
-    if (xc_acm_op(xc_handle, &op) < 0) {
+    getssid.interface_version = ACM_INTERFACE_VERSION;
+    getssid.ssidbuf = buf;
+    getssid.ssidbuf_size = SSID_BUFFER_SIZE;
+    getssid.get_ssid_by = DOMAINID;
+    getssid.id.domainid = domid;
+
+    if (xc_acm_op(xc_handle, ACMOP_getssid, &getssid, sizeof(getssid)) < 0) {
         if (errno == EACCES)
             PERROR("ACM operation failed.");
         free(buf);
@@ -147,7 +146,7 @@ static PyObject *getdecision(PyObject * 
 static PyObject *getdecision(PyObject * self, PyObject * args)
 {
     char *arg1_name, *arg1, *arg2_name, *arg2, *decision = NULL;
-    struct acm_op op;
+    struct acm_getdecision getdecision;
     int xc_handle;
 
     if (!PyArg_ParseTuple(args, "ssss", &arg1_name, &arg1, &arg2_name, &arg2)) 
{
@@ -163,34 +162,33 @@ static PyObject *getdecision(PyObject * 
     (strcmp(arg2_name, "domid") && strcmp(arg2_name, "ssidref")))
         return NULL;
 
-    op.cmd = ACM_GETDECISION;
-    op.interface_version = ACM_INTERFACE_VERSION;
-    op.u.getdecision.hook = SHARING;
+    getdecision.interface_version = ACM_INTERFACE_VERSION;
+    getdecision.hook = SHARING;
     if (!strcmp(arg1_name, "domid")) {
-        op.u.getdecision.get_decision_by1 = DOMAINID;
-        op.u.getdecision.id1.domainid = atoi(arg1);
-    } else {
-        op.u.getdecision.get_decision_by1 = SSIDREF;
-        op.u.getdecision.id1.ssidref = atol(arg1);
+        getdecision.get_decision_by1 = DOMAINID;
+        getdecision.id1.domainid = atoi(arg1);
+    } else {
+        getdecision.get_decision_by1 = SSIDREF;
+        getdecision.id1.ssidref = atol(arg1);
     }
     if (!strcmp(arg2_name, "domid")) {
-        op.u.getdecision.get_decision_by2 = DOMAINID;
-        op.u.getdecision.id2.domainid = atoi(arg2);
-    } else {
-        op.u.getdecision.get_decision_by2 = SSIDREF;
-        op.u.getdecision.id2.ssidref = atol(arg2);
-    }
-
-    if (xc_acm_op(xc_handle, &op) < 0) {
+        getdecision.get_decision_by2 = DOMAINID;
+        getdecision.id2.domainid = atoi(arg2);
+    } else {
+        getdecision.get_decision_by2 = SSIDREF;
+        getdecision.id2.ssidref = atol(arg2);
+    }
+
+    if (xc_acm_op(xc_handle, ACMOP_getdecision, &getdecision, 
sizeof(getdecision)) < 0) {
         if (errno == EACCES)
             PERROR("ACM operation failed.");
     }
 
     xc_interface_close(xc_handle);
 
-    if (op.u.getdecision.acm_decision == ACM_ACCESS_PERMITTED)
+    if (getdecision.acm_decision == ACM_ACCESS_PERMITTED)
         decision = "PERMITTED";
-    else if (op.u.getdecision.acm_decision == ACM_ACCESS_DENIED)
+    else if (getdecision.acm_decision == ACM_ACCESS_DENIED)
         decision = "DENIED";
 
     return Py_BuildValue("s", decision);
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/lowlevel/xc/xc.c Tue May 30 14:30:34 2006 -0500
@@ -716,6 +716,49 @@ static PyObject *pyxc_sedf_domain_get(Xc
                          "weight",    weight);
 }
 
+static PyObject *pyxc_sched_credit_domain_set(XcObject *self,
+                                              PyObject *args,
+                                              PyObject *kwds)
+{
+    uint32_t domid;
+    uint16_t weight;
+    uint16_t cap;
+    static char *kwd_list[] = { "dom", "weight", "cap", NULL };
+    static char kwd_type[] = "I|HH";
+    struct sched_credit_adjdom sdom;
+    
+    weight = 0;
+    cap = (uint16_t)~0U;
+    if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, 
+                                     &domid, &weight, &cap) )
+        return NULL;
+
+    sdom.weight = weight;
+    sdom.cap = cap;
+
+    if ( xc_sched_credit_domain_set(self->xc_handle, domid, &sdom) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_sched_credit_domain_get(XcObject *self, PyObject *args)
+{
+    uint32_t domid;
+    struct sched_credit_adjdom sdom;
+    
+    if( !PyArg_ParseTuple(args, "I", &domid) )
+        return NULL;
+    
+    if ( xc_sched_credit_domain_get(self->xc_handle, domid, &sdom) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    return Py_BuildValue("{s:H,s:H}",
+                         "weight",  sdom.weight,
+                         "cap",     sdom.cap);
+}
+
 static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args)
 {
     uint32_t dom;
@@ -1040,6 +1083,24 @@ static PyMethodDef pyxc_methods[] = {
       " slice     [long]: CPU reservation per period\n"
       " latency   [long]: domain's wakeup latency hint\n"
       " extratime [int]:  domain aware of extratime?\n"},
+    
+    { "sched_credit_domain_set",
+      (PyCFunction)pyxc_sched_credit_domain_set,
+      METH_KEYWORDS, "\n"
+      "Set the scheduling parameters for a domain when running with the\n"
+      "SMP credit scheduler.\n"
+      " domid     [int]:   domain id to set\n"
+      " weight    [short]: domain's scheduling weight\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "sched_credit_domain_get",
+      (PyCFunction)pyxc_sched_credit_domain_get,
+      METH_VARARGS, "\n"
+      "Get the scheduling parameters for a domain when running with the\n"
+      "SMP credit scheduler.\n"
+      " domid     [int]:   domain id to get\n"
+      "Returns:   [dict]\n"
+      " weight    [short]: domain's scheduling weight\n"},
 
     { "evtchn_alloc_unbound", 
       (PyCFunction)pyxc_evtchn_alloc_unbound,
@@ -1172,7 +1233,7 @@ PyXc_init(XcObject *self, PyObject *args
 PyXc_init(XcObject *self, PyObject *args, PyObject *kwds)
 {
     if ((self->xc_handle = xc_interface_open()) == -1) {
-        PyErr_SetFromErrno(PyExc_RuntimeError);
+        PyErr_SetFromErrno(xc_error);
         return -1;
     }
 
@@ -1245,7 +1306,7 @@ PyMODINIT_FUNC initxc(void)
     if (m == NULL)
       return;
 
-    xc_error = PyErr_NewException(PKG ".error", NULL, NULL);
+    xc_error = PyErr_NewException(PKG ".Error", PyExc_RuntimeError, NULL);
     zero = PyInt_FromLong(0);
 
     /* KAF: This ensures that we get debug output in a timely manner. */
@@ -1254,6 +1315,9 @@ PyMODINIT_FUNC initxc(void)
 
     Py_INCREF(&PyXcType);
     PyModule_AddObject(m, CLS, (PyObject *)&PyXcType);
+
+    Py_INCREF(xc_error);
+    PyModule_AddObject(m, "Error", xc_error);
 }
 
 
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/lowlevel/xs/xs.c Tue May 30 14:30:34 2006 -0500
@@ -272,11 +272,12 @@ static PyObject *xspy_get_permissions(Xs
 
     if (perms) {
         PyObject *val = PyList_New(perms_n);
-        for (i = 0; i < perms_n; i++, perms++) {
-            PyObject *p = Py_BuildValue("{s:i,s:i,s:i}",
-                                        "dom",  perms->id,
-                                        "read", perms->perms & XS_PERM_READ,
-                                        "write",perms->perms & XS_PERM_WRITE);
+        for (i = 0; i < perms_n; i++) {
+            PyObject *p =
+                Py_BuildValue("{s:i,s:i,s:i}",
+                              "dom",   perms[i].id,
+                              "read",  perms[i].perms & XS_PERM_READ,
+                              "write", perms[i].perms & XS_PERM_WRITE);
             PyList_SetItem(val, i, p);
         }
 
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/XendDomain.py       Tue May 30 14:30:34 2006 -0500
@@ -522,6 +522,28 @@ class XendDomain:
         except Exception, ex:
             raise XendError(str(ex))
 
+    def domain_sched_credit_get(self, domid):
+        """Get credit scheduler parameters for a domain.
+        """
+        dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        try:
+            return xc.sched_credit_domain_get(dominfo.getDomid())
+        except Exception, ex:
+            raise XendError(str(ex))
+    
+    def domain_sched_credit_set(self, domid, weight, cap):
+        """Set credit scheduler parameters for a domain.
+        """
+        dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        try:
+            return xc.sched_credit_domain_set(dominfo.getDomid(), weight, cap)
+        except Exception, ex:
+            raise XendError(str(ex))
+
     def domain_maxmem_set(self, domid, mem):
         """Set the memory limit for a domain.
 
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/XendDomainInfo.py   Tue May 30 14:30:34 2006 -0500
@@ -29,6 +29,7 @@ import string
 import string
 import time
 import threading
+import os
 
 import xen.lowlevel.xc
 from xen.util import asserts
@@ -700,6 +701,16 @@ class XendDomainInfo:
         log.debug("Storing VM details: %s", to_store)
 
         self.writeVm(to_store)
+        self.setVmPermissions()
+
+
+    def setVmPermissions(self):
+        """Allow the guest domain to read its UUID.  We don't allow it to
+        access any other entry, for security."""
+        xstransact.SetPermissions('%s/uuid' % self.vmpath,
+                                  { 'dom' : self.domid,
+                                    'read' : True,
+                                    'write' : False })
 
 
     def storeDomDetails(self):
@@ -1264,7 +1275,14 @@ class XendDomainInfo:
             m = self.image.getDomainMemory(self.info['memory'] * 1024)
             balloon.free(m)
             xc.domain_setmaxmem(self.domid, m)
-            xc.domain_memory_increase_reservation(self.domid, m, 0, 0)
+
+            init_reservation = self.info['memory'] * 1024
+            if os.uname()[4] == 'ia64':
+                # Workaround until ia64 properly supports ballooning.
+                init_reservation = m
+
+            xc.domain_memory_increase_reservation(self.domid, init_reservation,
+                                                  0, 0)
 
             self.createChannels()
 
@@ -1527,6 +1545,10 @@ class XendDomainInfo:
 
         self.configure_bootloader()
         config = self.sxpr()
+
+        if self.infoIsSet('cpus') and len(self.info['cpus']) != 0:
+            config.append(['cpus', reduce(lambda x, y: str(x) + "," + str(y),
+                                          self.info['cpus'])])
 
         if self.readVm(RESTART_IN_PROGRESS):
             log.error('Xend failed during restart of domain %d.  '
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/balloon.py  Tue May 30 14:30:34 2006 -0500
@@ -32,6 +32,7 @@ BALLOON_OUT_SLACK = 1 # MiB.  We need th
 BALLOON_OUT_SLACK = 1 # MiB.  We need this because the physinfo details are
                       # rounded.
 RETRY_LIMIT = 10
+RETRY_LIMIT_INCR = 5
 ##
 # The time to sleep between retries grows linearly, using this value (in
 # seconds).  When the system is lightly loaded, memory should be scrubbed and
@@ -118,7 +119,8 @@ def free(required):
         retries = 0
         sleep_time = SLEEP_TIME_GROWTH
         last_new_alloc = None
-        while retries < RETRY_LIMIT:
+        rlimit = RETRY_LIMIT
+        while retries < rlimit:
             free_mem = xc.physinfo()['free_memory']
 
             if free_mem >= need_mem:
@@ -127,7 +129,9 @@ def free(required):
                 return
 
             if retries == 0:
-                log.debug("Balloon: free %d; need %d.", free_mem, need_mem)
+                rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR
+                log.debug("Balloon: free %d; need %d; retries: %d.", 
+                          free_mem, need_mem, rlimit)
 
             if dom0_min_mem > 0:
                 dom0_alloc = get_dom0_current_alloc()
@@ -143,8 +147,9 @@ def free(required):
                     # Continue to retry, waiting for ballooning.
 
             time.sleep(sleep_time)
+            if retries < 2 * RETRY_LIMIT:
+                sleep_time += SLEEP_TIME_GROWTH
             retries += 1
-            sleep_time += SLEEP_TIME_GROWTH
 
         # Not enough memory; diagnose the problem.
         if dom0_min_mem == 0:
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/image.py    Tue May 30 14:30:34 2006 -0500
@@ -19,6 +19,7 @@
 
 import os, string
 import re
+import math
 
 import xen.lowlevel.xc
 from xen.xend import sxp
@@ -141,11 +142,16 @@ class ImageHandler:
                           % (self.ostype, self.vm.getDomid(), str(result)))
 
 
-    def getDomainMemory(self, mem):
+    def getDomainMemory(self, mem_kb):
         """@return The memory required, in KiB, by the domain to store the
-        given amount, also in KiB.  This is normally just mem, but HVM domains
-        have overheads to account for."""
-        return mem
+        given amount, also in KiB."""
+        if os.uname()[4] != 'ia64':
+            # A little extra because auto-ballooning is broken w.r.t. HVM
+            # guests. Also, slack is necessary for live migration since that
+            # uses shadow page tables.
+            if 'hvm' in xc.xeninfo()['xen_caps']:
+                mem_kb += 4*1024;
+        return mem_kb
 
     def buildDomain(self):
         """Build the domain. Define in subclass."""
@@ -377,15 +383,20 @@ class HVMImageHandler(ImageHandler):
         os.waitpid(self.pid, 0)
         self.pid = 0
 
-    def getDomainMemory(self, mem):
+    def getDomainMemory(self, mem_kb):
         """@see ImageHandler.getDomainMemory"""
-        page_kb = 4
-        extra_pages = 0
         if os.uname()[4] == 'ia64':
             page_kb = 16
             # ROM size for guest firmware, ioreq page and xenstore page
             extra_pages = 1024 + 2
-        return mem + extra_pages * page_kb
+        else:
+            page_kb = 4
+            # This was derived emperically:
+            #   2.4 MB overhead per 1024 MB RAM + 8 MB constant
+            #   + 4 to avoid low-memory condition
+            extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
+            extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
+        return mem_kb + extra_pages * page_kb
 
     def register_shutdown_watch(self):
         """ add xen store watch on control/shutdown """
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/server/SrvDomain.py Tue May 30 14:30:34 2006 -0500
@@ -129,6 +129,20 @@ class SrvDomain(SrvDir):
                     ['latency', 'int'],
                     ['extratime', 'int'],
                     ['weight', 'int']])
+        val = fn(req.args, {'dom': self.dom.domid})
+        return val
+    
+    def op_domain_sched_credit_get(self, _, req):
+        fn = FormFn(self.xd.domain_sched_credit_get,
+                    [['dom', 'int']])
+        val = fn(req.args, {'dom': self.dom.domid})
+        return val
+
+
+    def op_domain_sched_credit_set(self, _, req):
+        fn = FormFn(self.xd.domain_sched_credit_set,
+                    [['dom', 'int'],
+                     ['weight', 'int']])
         val = fn(req.args, {'dom': self.dom.domid})
         return val
 
diff -r e74246451527 -r f54d38cea8ac 
tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py      Tue May 30 12:52:02 
2006 -0500
+++ b/tools/python/xen/xend/xenstore/xstransact.py      Tue May 30 14:30:34 
2006 -0500
@@ -221,6 +221,34 @@ class xstransact:
                 xshandle().mkdir(self.transaction, self.prependPath(key))
 
 
+    def get_permissions(self, *args):
+        """If no arguments are given, return the permissions at this
+        transaction's path.  If one argument is given, treat that argument as
+        a subpath to this transaction's path, and return the permissions at
+        that path.  Otherwise, treat each argument as a subpath to this
+        transaction's path, and return a list composed of the permissions at
+        each of those instead.
+        """
+        if len(args) == 0:
+            return xshandle().get_permissions(self.transaction, self.path)
+        if len(args) == 1:
+            return self._get_permissions(args[0])
+        ret = []
+        for key in args:
+            ret.append(self._get_permissions(key))
+        return ret
+
+
+    def _get_permissions(self, key):
+        path = self.prependPath(key)
+        try:
+            return xshandle().get_permissions(self.transaction, path)
+        except RuntimeError, ex:
+            raise RuntimeError(ex.args[0],
+                               '%s, while getting permissions from %s' %
+                               (ex.args[1], path))
+
+
     def set_permissions(self, *args):
         if len(args) == 0:
             raise TypeError
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xm/main.py       Tue May 30 14:30:34 2006 -0500
@@ -99,6 +99,7 @@ sched_sedf_help = "sched-sedf [DOM] [OPT
                                     specifies another way of setting a 
domain's\n\
                                     cpu period/slice."
 
+sched_credit_help = "sched-credit                           Set or get credit 
scheduler parameters"
 block_attach_help = """block-attach <DomId> <BackDev> <FrontDev> <Mode>
                 [BackDomId]         Create a new virtual block device"""
 block_detach_help = """block-detach  <DomId> <DevId>    Destroy a domain's 
virtual block device,
@@ -174,6 +175,7 @@ host_commands = [
     ]
 
 scheduler_commands = [
+    "sched-credit",
     "sched-bvt",
     "sched-bvt-ctxallow",
     "sched-sedf",
@@ -735,6 +737,48 @@ def xm_sched_sedf(args):
         else:
             print_sedf(sedf_info)
 
+def xm_sched_credit(args):
+    usage_msg = """sched-credit:     Set or get credit scheduler parameters
+ Usage:
+
+        sched-credit -d domain [-w weight] [-c cap]
+    """
+    try:
+        opts, args = getopt.getopt(args[0:], "d:w:c:",
+            ["domain=", "weight=", "cap="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print usage_msg
+        sys.exit(1)
+
+    domain = None
+    weight = None
+    cap = None
+
+    for o, a in opts:
+        if o == "-d":
+            domain = a
+        elif o == "-w":
+            weight = int(a)
+        elif o == "-c":
+            cap = int(a);
+
+    if domain is None:
+        # place holder for system-wide scheduler parameters
+        print usage_msg
+        sys.exit(1)
+
+    if weight is None and cap is None:
+        print server.xend.domain.sched_credit_get(domain)
+    else:
+        if weight is None:
+            weight = int(0)
+        if cap is None:
+            cap = int(~0)
+
+        err = server.xend.domain.sched_credit_set(domain, weight, cap)
+        if err != 0:
+            print err
 
 def xm_info(args):
     arg_check(args, "info", 0)
@@ -1032,6 +1076,7 @@ commands = {
     "sched-bvt": xm_sched_bvt,
     "sched-bvt-ctxallow": xm_sched_bvt_ctxallow,
     "sched-sedf": xm_sched_sedf,
+    "sched-credit": xm_sched_credit,
     # block
     "block-attach": xm_block_attach,
     "block-detach": xm_block_detach,
diff -r e74246451527 -r f54d38cea8ac tools/security/secpol_tool.c
--- a/tools/security/secpol_tool.c      Tue May 30 12:52:02 2006 -0500
+++ b/tools/security/secpol_tool.c      Tue May 30 14:30:34 2006 -0500
@@ -231,14 +231,16 @@ uint8_t pull_buffer[PULL_CACHE_SIZE];
 uint8_t pull_buffer[PULL_CACHE_SIZE];
 int acm_domain_getpolicy(int xc_handle)
 {
-    struct acm_op op;
+    struct acm_getpolicy getpolicy;
     int ret;
 
     memset(pull_buffer, 0x00, sizeof(pull_buffer));
-    op.cmd = ACM_GETPOLICY;
-    op.u.getpolicy.pullcache = (void *) pull_buffer;
-    op.u.getpolicy.pullcache_size = sizeof(pull_buffer);
-    if ((ret = xc_acm_op(xc_handle, &op)) < 0) {
+    getpolicy.interface_version = ACM_INTERFACE_VERSION;
+    getpolicy.pullcache = (void *) pull_buffer;
+    getpolicy.pullcache_size = sizeof(pull_buffer);
+    ret = xc_acm_op(xc_handle, ACMOP_getpolicy, &getpolicy, sizeof(getpolicy));
+
+    if (ret < 0) {
         printf("ACM operation failed: errno=%d\n", errno);
         if (errno == EACCES)
             fprintf(stderr, "ACM operation failed -- need to"
@@ -275,13 +277,13 @@ int acm_domain_loadpolicy(int xc_handle,
         goto free_out;
     }
     if (len == read(fd, buffer, len)) {
-        struct acm_op op;
+        struct acm_setpolicy setpolicy;
         /* dump it and then push it down into xen/acm */
         acm_dump_policy_buffer(buffer, len);
-        op.cmd = ACM_SETPOLICY;
-        op.u.setpolicy.pushcache = (void *) buffer;
-        op.u.setpolicy.pushcache_size = len;
-        ret = xc_acm_op(xc_handle, &op);
+        setpolicy.interface_version = ACM_INTERFACE_VERSION;
+        setpolicy.pushcache = (void *) buffer;
+        setpolicy.pushcache_size = len;
+        ret = xc_acm_op(xc_handle, ACMOP_setpolicy, &setpolicy, 
sizeof(setpolicy));
 
         if (ret)
             printf
@@ -322,15 +324,15 @@ int acm_domain_dumpstats(int xc_handle)
 int acm_domain_dumpstats(int xc_handle)
 {
     uint8_t stats_buffer[PULL_STATS_SIZE];
-    struct acm_op op;
+    struct acm_dumpstats dumpstats;
     int ret;
     struct acm_stats_buffer *stats;
 
     memset(stats_buffer, 0x00, sizeof(stats_buffer));
-    op.cmd = ACM_DUMPSTATS;
-    op.u.dumpstats.pullcache = (void *) stats_buffer;
-    op.u.dumpstats.pullcache_size = sizeof(stats_buffer);
-    ret = xc_acm_op(xc_handle, &op);
+    dumpstats.interface_version = ACM_INTERFACE_VERSION;
+    dumpstats.pullcache = (void *) stats_buffer;
+    dumpstats.pullcache_size = sizeof(stats_buffer);
+    ret = xc_acm_op(xc_handle, ACMOP_dumpstats, &dumpstats, sizeof(dumpstats));
 
     if (ret < 0) {
         printf
diff -r e74246451527 -r f54d38cea8ac tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   Tue May 30 12:52:02 2006 -0500
+++ b/tools/tests/test_x86_emulator.c   Tue May 30 14:30:34 2006 -0500
@@ -17,7 +17,8 @@ static int read_any(
 static int read_any(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -32,7 +33,8 @@ static int write_any(
 static int write_any(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -48,7 +50,8 @@ static int cmpxchg_any(
     unsigned long addr,
     unsigned long old,
     unsigned long new,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -65,34 +68,38 @@ static int cmpxchg8b_any(
     unsigned long old_lo,
     unsigned long old_hi,
     unsigned long new_lo,
-    unsigned long new_hi)
+    unsigned long new_hi,
+    struct x86_emulate_ctxt *ctxt)
 {
     ((unsigned long *)addr)[0] = new_lo;
     ((unsigned long *)addr)[1] = new_hi;
     return X86EMUL_CONTINUE;
 }
 
-static struct x86_mem_emulator emulops = {
+static struct x86_emulate_ops emulops = {
     read_any, write_any, read_any, write_any, cmpxchg_any, cmpxchg8b_any
 };
 
 int main(int argc, char **argv)
 {
+    struct x86_emulate_ctxt ctxt;
     struct cpu_user_regs regs;
     char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
     unsigned int res = 0x7FFFFFFF;
     u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
-    unsigned long cr2;
     int rc;
+
+    ctxt.regs = &regs;
+    ctxt.mode = X86EMUL_MODE_PROT32;
 
     printf("%-40s", "Testing addl %%ecx,(%%eax)...");
     instr[0] = 0x01; instr[1] = 0x08;
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x7FFFFFFF;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.eflags != 0xa94) ||
@@ -109,8 +116,8 @@ int main(int argc, char **argv)
 #else
     regs.ecx    = 0x12345678UL;
 #endif
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.ecx != 0x8000000FUL) ||
@@ -124,8 +131,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x92345677UL;
     regs.ecx    = 0xAA;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          (regs.eflags != 0x244) ||
@@ -140,8 +147,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0xAABBCC77UL;
     regs.ecx    = 0xFF;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          ((regs.eflags&0x240) != 0x200) ||
@@ -156,8 +163,8 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x12345678) || 
          (regs.eflags != 0x200) ||
@@ -173,8 +180,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x923456AAUL;
     regs.ecx    = 0xDDEEFF00L;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0xDDEEFF00) || 
          (regs.eflags != 0x244) ||
@@ -192,8 +199,8 @@ int main(int argc, char **argv)
     regs.esi    = (unsigned long)&res + 0;
     regs.edi    = (unsigned long)&res + 2;
     regs.error_code = 0; /* read fault */
-    cr2         = regs.esi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = regs.esi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x44554455) ||
          (regs.eflags != 0x200) ||
@@ -210,8 +217,8 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)&res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x2233445D) ||
          ((regs.eflags&0x201) != 0x201) ||
@@ -228,8 +235,8 @@ int main(int argc, char **argv)
     regs.ecx    = 0xCCCCFFFF;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -242,8 +249,8 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -258,9 +265,9 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x82;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
          (res != 0x82) ||
          (regs.ecx != 0xFFFFFF82) ||
@@ -273,9 +280,9 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x1234aa82;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
          (res != 0x1234aa82) ||
          (regs.ecx != 0xaa82) ||
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/Makefile   Tue May 30 14:30:34 2006 -0500
@@ -27,6 +27,12 @@ CLIENTS += xenstore-write
 CLIENTS += xenstore-write
 CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS))
 
+XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o 
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+
+XENSTORED_Linux = xenstored_linux.o
+
+XENSTORED_OBJS += $(XENSTORED_$(OS))
+
 .PHONY: all
 all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control 
xenstore-ls
 
@@ -36,7 +42,7 @@ test_interleaved_transactions: test_inte
 .PHONY: testcode
 testcode: xs_test xenstored_test xs_random
 
-xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o 
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+xenstored: $(XENSTORED_OBJS)
        $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
 
 $(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/xenstored_core.c   Tue May 30 14:30:34 2006 -0500
@@ -451,6 +451,11 @@ static struct node *read_node(struct con
 
 static bool write_node(struct connection *conn, const struct node *node)
 {
+       /*
+        * conn will be null when this is called from manual_node.
+        * tdb_context copes with this.
+        */
+
        TDB_DATA key, data;
        void *p;
 
@@ -478,7 +483,7 @@ static bool write_node(struct connection
 
        /* TDB should set errno, but doesn't even set ecode AFAICT. */
        if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
-               corrupt(conn, "Write of %s = %s failed", key, data);
+               corrupt(conn, "Write of %s failed", key.dptr);
                goto error;
        }
        return true;
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h   Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/xenstored_core.h   Tue May 30 14:30:34 2006 -0500
@@ -19,6 +19,8 @@
 
 #ifndef _XENSTORED_CORE_H
 #define _XENSTORED_CORE_H
+
+#include <xenctrl.h>
 
 #include <sys/types.h>
 #include <dirent.h>
@@ -163,6 +165,12 @@ void trace(const char *fmt, ...);
 
 extern int event_fd;
 
+/* Map the kernel's xenstore page. */
+void *xenbus_map(void);
+
+/* Return the event channel used by xenbus. */
+evtchn_port_t xenbus_evtchn(void);
+
 #endif /* _XENSTORED_CORE_H */
 
 /*
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/xenstored_domain.c Tue May 30 14:30:34 2006 -0500
@@ -33,12 +33,11 @@
 #include "talloc.h"
 #include "xenstored_core.h"
 #include "xenstored_domain.h"
-#include "xenstored_proc.h"
 #include "xenstored_watch.h"
 #include "xenstored_test.h"
 
 #include <xenctrl.h>
-#include <xen/linux/evtchn.h>
+#include <xen/sys/evtchn.h>
 
 static int *xc_handle;
 static evtchn_port_t virq_port;
@@ -476,44 +475,24 @@ void restore_existing_connections(void)
 
 static int dom0_init(void) 
 { 
-       int rc, fd;
-       evtchn_port_t port; 
-       char str[20]; 
-       struct domain *dom0; 
-
-       fd = open(XENSTORED_PROC_PORT, O_RDONLY); 
-       if (fd == -1)
+       evtchn_port_t port;
+       struct domain *dom0;
+
+       port = xenbus_evtchn();
+       if (port == -1)
                return -1;
 
-       rc = read(fd, str, sizeof(str)); 
-       if (rc == -1)
-               goto outfd;
-       str[rc] = '\0'; 
-       port = strtoul(str, NULL, 0); 
-
-       close(fd); 
-
        dom0 = new_domain(NULL, 0, port); 
 
-       fd = open(XENSTORED_PROC_KVA, O_RDWR);
-       if (fd == -1)
+       dom0->interface = xenbus_map();
+       if (dom0->interface == NULL)
                return -1;
 
-       dom0->interface = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
-                              MAP_SHARED, fd, 0);
-       if (dom0->interface == MAP_FAILED)
-               goto outfd;
-
-       close(fd);
-
        talloc_steal(dom0->conn, dom0); 
 
        evtchn_notify(dom0->port); 
 
        return 0; 
-outfd:
-       close(fd);
-       return -1;
 }
 
 
diff -r e74246451527 -r f54d38cea8ac tools/xentrace/xentrace_format
--- a/tools/xentrace/xentrace_format    Tue May 30 12:52:02 2006 -0500
+++ b/tools/xentrace/xentrace_format    Tue May 30 14:30:34 2006 -0500
@@ -89,7 +89,7 @@ CPUREC = "I"
 CPUREC = "I"
 TRCREC = "QLLLLLL"
 
-last_tsc = [0,0,0,0,0,0,0,0]
+last_tsc = [0]
 
 i=0
 
@@ -111,7 +111,9 @@ while not interrupted:
 
        #print i, tsc
 
-       if tsc < last_tsc[cpu]:
+        if cpu >= len(last_tsc):
+            last_tsc += [0] * (cpu - len(last_tsc) + 1)
+       elif tsc < last_tsc[cpu]:
            print "TSC stepped backward cpu %d !  %d %d" % 
(cpu,tsc,last_tsc[cpu])
 
        last_tsc[cpu] = tsc
diff -r e74246451527 -r f54d38cea8ac tools/xm-test/configure.ac
--- a/tools/xm-test/configure.ac        Tue May 30 12:52:02 2006 -0500
+++ b/tools/xm-test/configure.ac        Tue May 30 14:30:34 2006 -0500
@@ -99,6 +99,7 @@ AC_CONFIG_FILES([
     tests/block-list/Makefile
     tests/block-create/Makefile
     tests/block-destroy/Makefile
+    tests/block-integrity/Makefile
     tests/console/Makefile
     tests/create/Makefile
     tests/destroy/Makefile
diff -r e74246451527 -r f54d38cea8ac tools/xm-test/ramdisk/bin/create_disk_image
--- a/tools/xm-test/ramdisk/bin/create_disk_image       Tue May 30 12:52:02 
2006 -0500
+++ b/tools/xm-test/ramdisk/bin/create_disk_image       Tue May 30 14:30:34 
2006 -0500
@@ -207,6 +207,13 @@ function dd_rootfs_to_image()
        dd if="$ROOTFS" of="$LOOPP" > /dev/null 2>&1
        if [ $? -ne 0 ]; then
                die "Failed to dd $ROOTFS to $LOOPP."
+       fi
+
+       # Resize fs to use full partition
+       e2fsck -f $LOOPP 
+       resize2fs $LOOPP
+       if [ $? -ne 0 ]; then
+               die "Failed to resize rootfs on $LOOPP."
        fi
 }
 
diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/Makefile.am
--- a/tools/xm-test/tests/Makefile.am   Tue May 30 12:52:02 2006 -0500
+++ b/tools/xm-test/tests/Makefile.am   Tue May 30 14:30:34 2006 -0500
@@ -1,14 +1,15 @@ SUBDIRS =                     \
 SUBDIRS =                      \
                block-create    \
-               block-list      \
-                block-destroy   \
+               block-list      \
+               block-destroy   \
+               block-integrity \
                console         \
                create          \
                destroy         \
                dmesg           \
                domid           \
                domname         \
-               help            \
+               help            \
                info            \
                list            \
                memmax          \
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py
--- a/tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py   
Tue May 30 12:52:02 2006 -0500
+++ b/tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py   
Tue May 30 14:30:34 2006 -0500
@@ -65,13 +65,24 @@ if check_status and status != 0:
         FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
 
 # 5) check /proc/cpuinfo for cpu count
-cmd = "grep \"^processor\" /proc/cpuinfo | wc -l"
-status, output = traceCommand(cmd)
-if check_status and status != 0:
-    os.unsetenv("XEND_CONFIG")
-    restartXend()
-    FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
 
+# It takes some time for the CPU count to change, on multi-proc systems, so 
check the number of procs in a loop for 20 seconds. 
+#Sleep inside the loop for a second each time.
+timeout = 20
+starttime = time.time()
+while timeout + starttime > time.time():
+# Check /proc/cpuinfo
+    cmd = "grep \"^processor\" /proc/cpuinfo | wc -l"
+    status, output = traceCommand(cmd)
+    if check_status and status != 0:
+        os.unsetenv("XEND_CONFIG")
+        restartXend()
+        FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
+# Has it succeeded? If so, we can leave the loop
+    if output == str(enforce_dom0_cpus):
+        break
+# Sleep for 1 second before trying again
+    time.sleep(1)
 if output != str(enforce_dom0_cpus):
     os.unsetenv("XEND_CONFIG")
     restartXend()
@@ -94,7 +105,14 @@ if check_status and status != 0:
     FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
 
 # check restore worked
-num_online = int(getDomInfo("Domain-0", "VCPUs"))
+# Since this also takes time, we will do it in a loop with a 20 second timeout.
+timeout=20
+starttime=time.time()
+while timeout + starttime > time.time(): 
+    num_online = int(getDomInfo("Domain-0", "VCPUs"))
+    if num_online == dom0_online_vcpus:
+        break
+    time.sleep(1)
 if num_online != dom0_online_vcpus:
     os.unsetenv("XEND_CONFIG")
     restartXend()
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/03_network_local_tcp_pos.py
--- a/tools/xm-test/tests/network/03_network_local_tcp_pos.py   Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/03_network_local_tcp_pos.py   Tue May 30 
14:30:34 2006 -0500
@@ -44,7 +44,7 @@ try:
     lofails=""
     for size in trysizes:
         out = console.runCmd("hping2 127.0.0.1 -E /dev/urandom -q -c 20 " 
-              + "--fast -d " + str(size))
+              + "--fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             lofails += " " + str(size)
 
@@ -54,7 +54,7 @@ try:
     ip = netdev.getNetDevIP()
     for size in trysizes:
         out = console.runCmd("hping2 " + ip + " -E /dev/urandom -q -c 20 "
-              + "--fast -d "+ str(size))
+              + "--fast -d "+ str(size) + " -N " + str(size))
         if out["return"]:
             eth0fails += " " + str(size) 
 except ConsoleError, e:
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/04_network_local_udp_pos.py
--- a/tools/xm-test/tests/network/04_network_local_udp_pos.py   Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/04_network_local_udp_pos.py   Tue May 30 
14:30:34 2006 -0500
@@ -43,7 +43,7 @@ try:
     lofails=""
     for size in trysizes:
         out = console.runCmd("hping2 127.0.0.1 -E /dev/urandom -2 -q -c 20 "
-              + "--fast -d " + str(size))
+              + "--fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             lofails += " " + str(size)
             print out["output"]
@@ -54,7 +54,7 @@ try:
     ip = netdev.getNetDevIP()
     for size in trysizes:
         out = console.runCmd("hping2 " + ip + " -E /dev/urandom -2 -q -c 20 "
-              + "--fast -d " + str(size))
+              + "--fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             eth0fails += " " + str(size) 
             print out["output"]
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/06_network_dom0_tcp_pos.py
--- a/tools/xm-test/tests/network/06_network_dom0_tcp_pos.py    Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/06_network_dom0_tcp_pos.py    Tue May 30 
14:30:34 2006 -0500
@@ -44,7 +44,7 @@ try:
     dom0ip = netdev.getDom0AliasIP()
     for size in trysizes:
         out = console.runCmd("hping2 " + dom0ip + " -E /dev/urandom -q -c 20 "
-              + "--fast -d " + str(size))
+              + "--fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             fails += " " + str(size) 
             print out["output"]
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/07_network_dom0_udp_pos.py
--- a/tools/xm-test/tests/network/07_network_dom0_udp_pos.py    Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/07_network_dom0_udp_pos.py    Tue May 30 
14:30:34 2006 -0500
@@ -43,7 +43,7 @@ try:
     dom0ip = netdev.getDom0AliasIP()
     for size in trysizes:
         out = console.runCmd("hping2 " + dom0ip + " -E /dev/urandom -2 -q -c 
20"
-             + " --fast -d " + str(size))
+             + " --fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             fails += " " + str(size) 
             print out["output"]
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/12_network_domU_tcp_pos.py
--- a/tools/xm-test/tests/network/12_network_domU_tcp_pos.py    Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/12_network_domU_tcp_pos.py    Tue May 30 
14:30:34 2006 -0500
@@ -50,7 +50,7 @@ try:
     ip2 = dst_netdev.getNetDevIP()
     for size in pingsizes:
         out = src_console.runCmd("hping2 " + ip2 + " -E /dev/urandom -q -c 20 "
-              + "--fast -d " + str(size))
+              + "--fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             fails += " " + str(size) 
             print out["output"]
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/network/13_network_domU_udp_pos.py
--- a/tools/xm-test/tests/network/13_network_domU_udp_pos.py    Tue May 30 
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/13_network_domU_udp_pos.py    Tue May 30 
14:30:34 2006 -0500
@@ -50,7 +50,7 @@ try:
     ip2 = dst_netdev.getNetDevIP()
     for size in pingsizes:
         out = src_console.runCmd("hping2 " + ip2 + " -E /dev/urandom -2 -q "
-              + "-c 20 --fast -d " + str(size))
+              + "-c 20 --fast -d " + str(size) + " -N " + str(size))
         if out["return"]:
             fails += " " + str(size) 
             print out["output"]
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/Rules.mk
--- a/xen/arch/ia64/Rules.mk    Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/Rules.mk    Tue May 30 14:30:34 2006 -0500
@@ -3,31 +3,31 @@
 
 HAS_ACPI := y
 VALIDATE_VT    ?= n
-xen_ia64_dom0_virtual_physical ?= n
+xen_ia64_dom0_virtual_physical ?= y
+no_warns ?= n
 
 ifneq ($(COMPILE_ARCH),$(TARGET_ARCH))
 CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux-
 endif
-AFLAGS  += -D__ASSEMBLY__ -nostdinc $(CPPFLAGS)
-AFLAGS  += -mconstant-gp
-CPPFLAGS  += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-ia64        \
-             -I$(BASEDIR)/include/asm-ia64/linux                       \
-            -I$(BASEDIR)/include/asm-ia64/linux-xen                    \
-            -I$(BASEDIR)/include/asm-ia64/linux-null                   \
-             -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen
+
+# Used only by linux/Makefile.
+AFLAGS_KERNEL  += -mconstant-gp
+
+# Note: .S -> .o rule uses AFLAGS and CFLAGS.
 
 CFLAGS  += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
 CFLAGS  += -mconstant-gp
 #CFLAGS  += -O3                # -O3 over-inlines making debugging tough!
 CFLAGS  += -O2         # but no optimization causes compile errors!
-#CFLAGS  += -iwithprefix include -Wall -DMONITOR_BASE=$(MONITOR_BASE)
-CFLAGS  += -iwithprefix include -Wall
-CFLAGS  += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__
-CFLAGS  += -I$(BASEDIR)/include/asm-ia64 -I$(BASEDIR)/include/asm-ia64/linux \
+CFLAGS  += -fomit-frame-pointer -D__KERNEL__
+CFLAGS  += -iwithprefix include
+CPPFLAGS+= -I$(BASEDIR)/include                                         \
+           -I$(BASEDIR)/include/asm-ia64                                \
            -I$(BASEDIR)/include/asm-ia64/linux                                 
\
            -I$(BASEDIR)/include/asm-ia64/linux-xen                     \
           -I$(BASEDIR)/include/asm-ia64/linux-null                     \
            -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen
+CFLAGS += $(CPPFLAGS)
 #CFLAGS  += -Wno-pointer-arith -Wredundant-decls
 CFLAGS  += -DIA64 -DXEN -DLINUX_2_6 -DV_IOSAPIC_READY
 CFLAGS += -ffixed-r13 -mfixed-range=f2-f5,f12-f127
@@ -39,4 +39,8 @@ ifeq ($(xen_ia64_dom0_virtual_physical),
 ifeq ($(xen_ia64_dom0_virtual_physical),y)
 CFLAGS += -DCONFIG_XEN_IA64_DOM0_VP
 endif
+ifeq ($(no_warns),y)
+CFLAGS += -Wa,--fatal-warnings
+endif
+
 LDFLAGS := -g
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/asm-offsets.c       Tue May 30 14:30:34 2006 -0500
@@ -50,8 +50,6 @@ void foo(void)
        DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu, 
arch.metaphysical_saved_rr0));
        DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu, 
arch.breakimm));
        DEFINE(IA64_VCPU_IVA_OFFSET, offsetof (struct vcpu, arch.iva));
-       DEFINE(IA64_VCPU_DTLB_PTE_OFFSET, offsetof (struct vcpu, 
arch.dtlb_pte));
-       DEFINE(IA64_VCPU_ITLB_PTE_OFFSET, offsetof (struct vcpu, 
arch.itlb_pte));
        DEFINE(IA64_VCPU_IRR0_OFFSET, offsetof (struct vcpu, arch.irr[0]));
        DEFINE(IA64_VCPU_IRR3_OFFSET, offsetof (struct vcpu, arch.irr[3]));
        DEFINE(IA64_VCPU_INSVC3_OFFSET, offsetof (struct vcpu, arch.insvc[3]));
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/setup.c
--- a/xen/arch/ia64/linux-xen/setup.c   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/linux-xen/setup.c   Tue May 30 14:30:34 2006 -0500
@@ -800,8 +800,7 @@ cpu_init (void)
        cpu_data = per_cpu_init();
 
 #ifdef XEN
-       printf ("cpu_init: current=%p, current->domain->arch.mm=%p\n",
-               current, current->domain->arch.mm);
+       printf ("cpu_init: current=%p\n", current);
 #endif
 
        /*
@@ -872,12 +871,11 @@ cpu_init (void)
 #ifndef XEN
        current->active_mm = &init_mm;
 #endif
-#ifdef XEN
-       if (current->domain->arch.mm)
-#else
+#ifndef XEN
        if (current->mm)
-#endif
                BUG();
+#endif
+
 
 #ifdef XEN
        ia64_fph_enable();
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/smp.c
--- a/xen/arch/ia64/linux-xen/smp.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/linux-xen/smp.c     Tue May 30 14:30:34 2006 -0500
@@ -53,28 +53,6 @@
 #endif
 
 #ifdef XEN
-// FIXME: MOVE ELSEWHERE
-//Huh? This seems to be used on ia64 even if !CONFIG_SMP
-void flush_tlb_mask(cpumask_t mask)
-{
-    int cpu;
-
-    cpu = smp_processor_id();
-    if (cpu_isset (cpu, mask)) {
-        cpu_clear(cpu, mask);
-       local_flush_tlb_all ();
-    }
-
-#ifdef CONFIG_SMP
-    if (cpus_empty(mask))
-        return;
-
-    for (cpu = 0; cpu < NR_CPUS; ++cpu)
-        if (cpu_isset(cpu, mask))
-          smp_call_function_single
-            (cpu, (void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
-#endif
-}
 //#if CONFIG_SMP || IA64
 #if CONFIG_SMP
 //Huh? This seems to be used on ia64 even if !CONFIG_SMP
@@ -276,7 +254,6 @@ smp_send_reschedule (int cpu)
 {
        platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
 }
-#endif
 
 void
 smp_flush_tlb_all (void)
@@ -284,15 +261,6 @@ smp_flush_tlb_all (void)
        on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
 }
 
-#ifdef XEN
-void
-smp_vhpt_flush_all(void)
-{
-       on_each_cpu((void (*)(void *))vhpt_flush, NULL, 1, 1);
-}
-#endif
-
-#ifndef XEN
 void
 smp_flush_tlb_mm (struct mm_struct *mm)
 {
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/unaligned.c
--- a/xen/arch/ia64/linux-xen/unaligned.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/linux-xen/unaligned.c       Tue May 30 14:30:34 2006 -0500
@@ -377,7 +377,7 @@ get_rse_reg (struct pt_regs *regs, unsig
     if (ridx >= sof) {
         /* read of out-of-frame register returns an undefined value; 0 in our 
case.  */
         DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", 
r1, sof);
-        panic("wrong stack register number (iip=%p)\n", regs->cr_iip);
+        panic("wrong stack register number (iip=%lx)\n", regs->cr_iip);
     }
 
     if (ridx < sor)
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/pal_emul.c
--- a/xen/arch/ia64/vmx/pal_emul.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/pal_emul.c      Tue May 30 14:30:34 2006 -0500
@@ -62,8 +62,8 @@ pal_cache_flush (VCPU *vcpu) {
 //             ia64_pal_call_static(gr28 ,gr29, gr30, 
 //                             result.v1,1LL);
 //     }
-       while (result.status != 0) {
-        panic("PAL_CACHE_FLUSH ERROR, status %ld", result.status);
+       if(result.status != 0) {
+               panic_domain(vcpu_regs(vcpu),"PAL_CACHE_FLUSH ERROR, status 
%ld", result.status);
        }
 
        return result;
@@ -445,7 +445,7 @@ pal_emul( VCPU *vcpu) {
                        break;
 
                default:
-                       panic("pal_emul(): guest call unsupported pal" );
+                       panic_domain(vcpu_regs(vcpu),"pal_emul(): guest call 
unsupported pal" );
   }
                set_pal_result (vcpu, result);
 }
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vlsapic.c
--- a/xen/arch/ia64/vmx/vlsapic.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vlsapic.c       Tue May 30 14:30:34 2006 -0500
@@ -568,7 +568,7 @@ int vmx_check_pending_irq(VCPU *vcpu)
     if (  vpsr.i && IRQ_NO_MASKED == mask ) {
         isr = vpsr.val & IA64_PSR_RI;
         if ( !vpsr.ic )
-            panic("Interrupt when IC=0\n");
+            panic_domain(regs,"Interrupt when IC=0\n");
         vmx_reflect_interruption(0,isr,0, 12, regs ); // EXT IRQ
         injected = 1;
     }
@@ -595,7 +595,8 @@ void guest_write_eoi(VCPU *vcpu)
     uint64_t  spsr;
 
     vec = highest_inservice_irq(vcpu);
-    if ( vec == NULL_VECTOR ) panic("Wrong vector to EOI\n");
+    if ( vec == NULL_VECTOR ) 
+       panic_domain(vcpu_regs(vcpu),"Wrong vector to EOI\n");
     local_irq_save(spsr);
     VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63));
     local_irq_restore(spsr);
@@ -634,7 +635,7 @@ static void generate_exirq(VCPU *vcpu)
     update_vhpi(vcpu, NULL_VECTOR);
     isr = vpsr.val & IA64_PSR_RI;
     if ( !vpsr.ic )
-        panic("Interrupt when IC=0\n");
+        panic_domain(regs,"Interrupt when IC=0\n");
     vmx_reflect_interruption(0,isr,0, 12, regs); // EXT IRQ
 }
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmmu.c
--- a/xen/arch/ia64/vmx/vmmu.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmmu.c  Tue May 30 14:30:34 2006 -0500
@@ -134,11 +134,11 @@ static void init_domain_vhpt(struct vcpu
     void * vbase;
     page = alloc_domheap_pages (NULL, VCPU_VHPT_ORDER, 0);
     if ( page == NULL ) {
-        panic("No enough contiguous memory for init_domain_vhpt\n");
+        panic_domain(vcpu_regs(v),"No enough contiguous memory for 
init_domain_vhpt\n");
     }
     vbase = page_to_virt(page);
     memset(vbase, 0, VCPU_VHPT_SIZE);
-    printk("Allocate domain tlb at 0x%p\n", vbase);
+    printk("Allocate domain vhpt at 0x%p\n", vbase);
     
     VHPT(v,hash) = vbase;
     VHPT(v,hash_sz) = VCPU_VHPT_SIZE/2;
@@ -157,11 +157,11 @@ void init_domain_tlb(struct vcpu *v)
     init_domain_vhpt(v);
     page = alloc_domheap_pages (NULL, VCPU_VTLB_ORDER, 0);
     if ( page == NULL ) {
-        panic("No enough contiguous memory for init_domain_tlb\n");
+        panic_domain(vcpu_regs(v),"No enough contiguous memory for 
init_domain_tlb\n");
     }
     vbase = page_to_virt(page);
     memset(vbase, 0, VCPU_VTLB_SIZE);
-    printk("Allocate domain tlb at 0x%p\n", vbase);
+    printk("Allocate domain vtlb at 0x%p\n", vbase);
     
     VTLB(v,hash) = vbase;
     VTLB(v,hash_sz) = VCPU_VTLB_SIZE/2;
@@ -202,7 +202,7 @@ void machine_tlb_insert(struct vcpu *d, 
     mtlb.ppn = get_mfn(d->domain,tlb->ppn);
     mtlb_ppn=mtlb.ppn;
     if (mtlb_ppn == INVALID_MFN)
-    panic("Machine tlb insert with invalid mfn number.\n");
+        panic_domain(vcpu_regs(d),"Machine tlb insert with invalid mfn 
number.\n");
 
     psr = ia64_clear_ic();
     if ( cl == ISIDE_TLB ) {
@@ -325,12 +325,12 @@ fetch_code(VCPU *vcpu, u64 gip, u64 *cod
     }
     if( gpip){
         mfn = gmfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT);
-       if( mfn == INVALID_MFN )  panic("fetch_code: invalid memory\n");
+       if( mfn == INVALID_MFN )  panic_domain(vcpu_regs(vcpu),"fetch_code: 
invalid memory\n");
        vpa =(u64 *)__va( (gip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT));
     }else{
        tlb = vhpt_lookup(gip);
        if( tlb == NULL)
-           panic("No entry found in ITLB and DTLB\n");
+           panic_domain(vcpu_regs(vcpu),"No entry found in ITLB and DTLB\n");
        vpa =(u64 
*)__va((tlb->ppn>>(PAGE_SHIFT-ARCH_PAGE_SHIFT)<<PAGE_SHIFT)|(gip&(PAGE_SIZE-1)));
     }
     *code1 = *vpa++;
@@ -347,7 +347,7 @@ IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UIN
     slot = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB);
     if (slot >=0) {
         // generate MCA.
-        panic("Tlb conflict!!");
+        panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
         return IA64_FAULT;
     }
     thash_purge_and_insert(vcpu, pte, itir, ifa);
@@ -363,7 +363,7 @@ IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UIN
     slot = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB);
     if (slot >=0) {
         // generate MCA.
-        panic("Tlb conflict!!");
+        panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
         return IA64_FAULT;
     }
     gpfn = (pte & _PAGE_PPN_MASK)>> PAGE_SHIFT;
@@ -385,7 +385,7 @@ IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, u64
     index = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB);
     if (index >=0) {
         // generate MCA.
-        panic("Tlb conflict!!");
+        panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
         return IA64_FAULT;
     }
     thash_purge_entries(vcpu, va, ps);
@@ -407,7 +407,7 @@ IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, u64
     index = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB);
     if (index>=0) {
         // generate MCA.
-        panic("Tlb conflict!!");
+        panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
         return IA64_FAULT;
     }
     thash_purge_entries(vcpu, va, ps);
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_entry.S
--- a/xen/arch/ia64/vmx/vmx_entry.S     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_entry.S     Tue May 30 14:30:34 2006 -0500
@@ -290,10 +290,59 @@ GLOBAL_ENTRY(ia64_leave_hypervisor)
     mov ar.ccv=r18
     ;;
 //rbs_switch
-    // loadrs has already been shifted
+    
+    shr.u r18=r20,16
+    ;;
+    movl r19= THIS_CPU(ia64_phys_stacked_size_p8)
+    ;;
+    ld4 r19=[r19]
+     
+vmx_dont_preserve_current_frame:
+/*
+    * To prevent leaking bits between the hypervisor and guest domain,
+    * we must clear the stacked registers in the "invalid" partition here.
+    * 5 registers/cycle on McKinley).
+    */
+#   define pRecurse    p6
+#   define pReturn     p7
+#   define Nregs       14
+    
+    alloc loc0=ar.pfs,2,Nregs-2,2,0
+    shr.u loc1=r18,9           // RNaTslots <= floor(dirtySize / (64*8))
+    sub r19=r19,r18                    // r19 = (physStackedSize + 8) - 
dirtySize
+    ;;
+    mov ar.rsc=r20                     // load ar.rsc to be used for "loadrs"
+    shladd in0=loc1,3,r19
+    mov in1=0
+    ;;
+    TEXT_ALIGN(32)
+vmx_rse_clear_invalid:
+    alloc loc0=ar.pfs,2,Nregs-2,2,0
+    cmp.lt pRecurse,p0=Nregs*8,in0     // if more than Nregs regs left to 
clear, (re)curse
+    add out0=-Nregs*8,in0
+    add out1=1,in1                     // increment recursion count
+    mov loc1=0
+    mov loc2=0
+    ;;
+    mov loc3=0
+    mov loc4=0
+    mov loc5=0
+    mov loc6=0
+    mov loc7=0
+(pRecurse) br.call.dptk.few b0=vmx_rse_clear_invalid
+    ;;
+    mov loc8=0
+    mov loc9=0
+    cmp.ne pReturn,p0=r0,in1   // if recursion count != 0, we need to do a 
br.ret
+    mov loc10=0
+    mov loc11=0
+(pReturn) br.ret.dptk.many b0
+
+#      undef pRecurse
+#      undef pReturn
+
+// loadrs has already been shifted
     alloc r16=ar.pfs,0,0,0,0    // drop current register frame
-    ;;
-    mov ar.rsc=r20
     ;;
     loadrs
     ;;
@@ -315,7 +364,9 @@ vmx_dorfirfi_back:
     adds r18=IA64_VPD_BASE_OFFSET,r21
     ;;
     ld8 r18=[r18]   //vpd
-    ;;
+    adds r17=IA64_VCPU_ISR_OFFSET,r21
+    ;;
+    ld8 r17=[r17]
     adds r19=VPD(VPSR),r18
     ;;
     ld8 r19=[r19]        //vpsr
@@ -331,12 +382,14 @@ vmx_dorfirfi_back:
     mov b0=r16
     br.cond.sptk b0         // call the service
     ;;
+END(ia64_leave_hypervisor)
 switch_rr7:
 // fall through
 GLOBAL_ENTRY(ia64_vmm_entry)
 /*
  *  must be at bank 0
  *  parameter:
+ *  r17:cr.isr
  *  r18:vpd
  *  r19:vpsr
  *  r20:__vsa_base
@@ -348,13 +401,19 @@ GLOBAL_ENTRY(ia64_vmm_entry)
     tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
     ;;
     (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+    (p1) br.sptk.many ia64_vmm_entry_out
+    ;;
+    tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT                //p1=cr.isr.ir
+    ;;
+    (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
     (p2) add r29=PAL_VPS_RESUME_HANDLER,r20
     ;;
+ia64_vmm_entry_out:    
     mov pr=r23,-2
     mov b0=r29
     ;;
     br.cond.sptk b0             // call pal service
-END(ia64_leave_hypervisor)
+END(ia64_vmm_entry)
 
 //r24 rfi_pfs
 //r17 address of rfi_pfs
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_init.c
--- a/xen/arch/ia64/vmx/vmx_init.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_init.c      Tue May 30 14:30:34 2006 -0500
@@ -208,8 +208,9 @@ vmx_create_vp(struct vcpu *v)
        ivt_base = (u64) &vmx_ia64_ivt;
        printk("ivt_base: 0x%lx\n", ivt_base);
        ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)ivt_base, 0);
-       if (ret != PAL_STATUS_SUCCESS)
-               panic("ia64_pal_vp_create failed. \n");
+       if (ret != PAL_STATUS_SUCCESS){
+               panic_domain(vcpu_regs(v),"ia64_pal_vp_create failed. \n");
+       }
 }
 
 /* Other non-context related tasks can be done in context switch */
@@ -220,8 +221,9 @@ vmx_save_state(struct vcpu *v)
 
        /* FIXME: about setting of pal_proc_vector... time consuming */
        status = ia64_pal_vp_save((u64 *)v->arch.privregs, 0);
-       if (status != PAL_STATUS_SUCCESS)
-               panic("Save vp status failed\n");
+       if (status != PAL_STATUS_SUCCESS){
+               panic_domain(vcpu_regs(v),"Save vp status failed\n");
+       }
 
 
        /* Need to save KR when domain switch, though HV itself doesn;t
@@ -244,8 +246,9 @@ vmx_load_state(struct vcpu *v)
        u64 status;
 
        status = ia64_pal_vp_restore((u64 *)v->arch.privregs, 0);
-       if (status != PAL_STATUS_SUCCESS)
-               panic("Restore vp status failed\n");
+       if (status != PAL_STATUS_SUCCESS){
+               panic_domain(vcpu_regs(v),"Restore vp status failed\n");
+       }
 
        ia64_set_kr(0, v->arch.arch_vmx.vkr[0]);
        ia64_set_kr(1, v->arch.arch_vmx.vkr[1]);
@@ -343,17 +346,18 @@ int vmx_build_physmap_table(struct domai
            for (j = io_ranges[i].start;
                 j < io_ranges[i].start + io_ranges[i].size;
                 j += PAGE_SIZE)
-               assign_domain_page(d, j, io_ranges[i].type);
+               __assign_domain_page(d, j, io_ranges[i].type);
        }
 
        /* Map normal memory below 3G */
        end = VMX_CONFIG_PAGES(d) << PAGE_SHIFT;
        tmp = end < MMIO_START ? end : MMIO_START;
        for (i = 0; (i < tmp) && (list_ent != &d->page_list); i += PAGE_SIZE) {
-           mfn = page_to_mfn(list_entry(
-               list_ent, struct page_info, list));
+           mfn = page_to_mfn(list_entry(list_ent, struct page_info, list));
+           list_ent = mfn_to_page(mfn)->list.next;
+           if (VGA_IO_START <= i && i < VGA_IO_START + VGA_IO_SIZE)
+               continue;
            assign_domain_page(d, i, mfn << PAGE_SHIFT);
-           list_ent = mfn_to_page(mfn)->list.next;
        }
        ASSERT(list_ent != &d->page_list);
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_interrupt.c
--- a/xen/arch/ia64/vmx/vmx_interrupt.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_interrupt.c Tue May 30 14:30:34 2006 -0500
@@ -91,8 +91,12 @@ inject_guest_interruption(VCPU *vcpu, u6
 {
     u64 viva;
     REGS *regs;
+    ISR pt_isr;
     regs=vcpu_regs(vcpu);
-
+    // clear cr.isr.ri 
+    pt_isr.val = VMX(vcpu,cr_isr);
+    pt_isr.ir = 0;
+    VMX(vcpu,cr_isr) = pt_isr.val;
     collect_interruption(vcpu);
 
     vmx_vcpu_get_iva(vcpu,&viva);
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_ivt.S
--- a/xen/arch/ia64/vmx/vmx_ivt.S       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_ivt.S       Tue May 30 14:30:34 2006 -0500
@@ -143,35 +143,62 @@ ENTRY(vmx_itlb_miss)
     thash r17 = r16
     ;;
     ttag r20 = r16
+    mov r18 = r17      
     ;;
 vmx_itlb_loop:
     cmp.eq p6,p0 = r0, r17
-(p6) br vmx_itlb_out
-    ;;
-    adds r22 = VLE_TITAG_OFFSET, r17
-    adds r23 = VLE_CCHAIN_OFFSET, r17
-    ;;
-    ld8 r24 = [r22]
-    ld8 r25 = [r23]
-    ;;
-    lfetch [r25]
-    cmp.eq  p6,p7 = r20, r24
-    ;;
-(p7)    mov r17 = r25;
-(p7)    br.sptk vmx_itlb_loop
+(p6)br vmx_itlb_out
+    ;;
+    adds r16 = VLE_TITAG_OFFSET, r17
+    adds r19 = VLE_CCHAIN_OFFSET, r17
+    ;;
+    ld8 r22 = [r16]
+    ld8 r23 = [r19]
+    ;;
+    lfetch [r23]
+    cmp.eq  p6,p7 = r20, r22
+    ;;
+(p7)mov r17 = r23;
+(p7)br.sptk vmx_itlb_loop
     ;;
     adds r23 = VLE_PGFLAGS_OFFSET, r17
     adds r24 = VLE_ITIR_OFFSET, r17
     ;;
-    ld8 r26 = [r23]
-    ld8 r25 = [r24]
-    ;;
-    mov cr.itir = r25
-    ;;
-    itc.i r26
+    ld8 r25 = [r23]
+    ld8 r26 = [r24]
+    ;;
+    cmp.eq p6,p7=r18,r17
+(p6) br vmx_itlb_loop1
+    ;;
+    ld8 r27 = [r18]
+    ;;
+    extr.u r19 = r27, 56, 8
+    extr.u r20 = r25, 56, 8
+    ;;
+    dep r27 = r20, r27, 56, 8
+    dep r25 = r19, r25, 56, 8
+    ;;
+    st8 [r18] = r25,8
+    st8 [r23] = r27
+    ;;
+    ld8 r28 = [r18]
+    ;;
+    st8 [r18] = r26,8
+    st8 [r24] = r28
+    ;;
+    ld8 r30 = [r18]
+    ;;
+    st8 [r18] = r22
+    st8 [r16] = r30 
+    ;;
+vmx_itlb_loop1:
+    mov cr.itir = r26
+    ;;
+    itc.i r25
     ;;
     srlz.i
     ;;
+    mov r17=cr.isr
     mov r23=r31
     mov r22=b0
     adds r16=IA64_VPD_BASE_OFFSET,r21
@@ -201,42 +228,68 @@ ENTRY(vmx_dtlb_miss)
     mov r29=cr.ipsr;
     ;;
     tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-    (p6)br.sptk vmx_alt_dtlb_miss_1
-//(p6)br.sptk vmx_fault_2
+(p6)br.sptk vmx_alt_dtlb_miss_1
     mov r16 = cr.ifa
     ;;
     thash r17 = r16
     ;;
     ttag r20 = r16
+    mov r18 = r17      
     ;;
 vmx_dtlb_loop:
     cmp.eq p6,p0 = r0, r17
 (p6)br vmx_dtlb_out
     ;;
-    adds r22 = VLE_TITAG_OFFSET, r17
-    adds r23 = VLE_CCHAIN_OFFSET, r17
-    ;;
-    ld8 r24 = [r22]
-    ld8 r25 = [r23]
-    ;;
-    lfetch [r25]
-    cmp.eq  p6,p7 = r20, r24
-    ;;
-(p7)mov r17 = r25;
+    adds r16 = VLE_TITAG_OFFSET, r17
+    adds r19 = VLE_CCHAIN_OFFSET, r17
+    ;;
+    ld8 r22 = [r16]
+    ld8 r23 = [r19]
+    ;;
+    lfetch [r23]
+    cmp.eq  p6,p7 = r20, r22
+    ;;
+(p7)mov r17 = r23;
 (p7)br.sptk vmx_dtlb_loop
     ;;
     adds r23 = VLE_PGFLAGS_OFFSET, r17
     adds r24 = VLE_ITIR_OFFSET, r17
     ;;
-    ld8 r26 = [r23]
-    ld8 r25 = [r24]
-    ;;
-    mov cr.itir = r25
-    ;;
-    itc.d r26
+    ld8 r25 = [r23]
+    ld8 r26 = [r24]
+    ;;
+    cmp.eq p6,p7=r18,r17
+(p6) br vmx_dtlb_loop1
+    ;;
+    ld8 r27 = [r18]
+    ;;
+    extr.u r19 = r27, 56, 8
+    extr.u r20 = r25, 56, 8
+    ;;
+    dep r27 = r20, r27, 56, 8
+    dep r25 = r19, r25, 56, 8
+    ;;
+    st8 [r18] = r25,8
+    st8 [r23] = r27
+    ;;
+    ld8 r28 = [r18]
+    ;;
+    st8 [r18] = r26,8
+    st8 [r24] = r28
+    ;;
+    ld8 r30 = [r18]
+    ;;
+    st8 [r18] = r22
+    st8 [r16] = r30 
+    ;;
+vmx_dtlb_loop1:
+    mov cr.itir = r26
+    ;;
+    itc.d r25
     ;;
     srlz.d;
     ;;
+    mov r17=cr.isr
     mov r23=r31
     mov r22=b0
     adds r16=IA64_VPD_BASE_OFFSET,r21
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_phy_mode.c
--- a/xen/arch/ia64/vmx/vmx_phy_mode.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_phy_mode.c  Tue May 30 14:30:34 2006 -0500
@@ -186,8 +186,10 @@ vmx_load_all_rr(VCPU *vcpu)
         * mode in same region
         */
        if (is_physical_mode(vcpu)) {
-               if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
-                       panic("Unexpected domain switch in phy emul\n");
+               if (vcpu->arch.mode_flags & GUEST_PHY_EMUL){
+                       panic_domain(vcpu_regs(vcpu),
+                                    "Unexpected domain switch in phy emul\n");
+               }
                phy_rr.rrval = vcpu->arch.metaphysical_rr0;
                //phy_rr.ps = PAGE_SHIFT;
                phy_rr.ve = 1;
@@ -322,8 +324,7 @@ switch_mm_mode(VCPU *vcpu, IA64_PSR old_
         break;
     default:
         /* Sanity check */
-    printf("old: %lx, new: %lx\n", old_psr.val, new_psr.val);
-        panic("Unexpected virtual <--> physical mode transition");
+        panic_domain(vcpu_regs(vcpu),"Unexpected virtual <--> physical mode 
transition,old:%lx,new:%lx\n",old_psr.val,new_psr.val);
         break;
     }
     return;
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_process.c
--- a/xen/arch/ia64/vmx/vmx_process.c   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_process.c   Tue May 30 14:30:34 2006 -0500
@@ -338,7 +338,7 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r
     }
     if(vec == 1) type = ISIDE_TLB;
     else if(vec == 2) type = DSIDE_TLB;
-    else panic("wrong vec\n");
+    else panic_domain(regs,"wrong vec:%0xlx\n",vec);
 
 //    prepare_if_physical_mode(v);
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_support.c
--- a/xen/arch/ia64/vmx/vmx_support.c   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_support.c   Tue May 30 14:30:34 2006 -0500
@@ -92,12 +92,12 @@ void vmx_io_assist(struct vcpu *v)
      */
     vio = get_vio(v->domain, v->vcpu_id);
     if (!vio)
-       panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
+       panic_domain(vcpu_regs(v),"Corruption: bad shared page: %lx\n", 
(unsigned long)vio);
 
     p = &vio->vp_ioreq;
 
     if (p->state == STATE_IORESP_HOOK)
-       panic("Not supported: No hook available for DM request\n");
+       panic_domain(vcpu_regs(v),"Not supported: No hook available for DM 
request\n");
 
     if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
        if (p->state != STATE_IORESP_READY) {
@@ -135,7 +135,7 @@ void vmx_intr_assist(struct vcpu *v)
      * out of vmx_wait_io, when guest is still waiting for response.
      */
     if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags))
-       panic("!!!Bad resume to guest before I/O emulation is done.\n");
+       panic_domain(vcpu_regs(v),"!!!Bad resume to guest before I/O emulation 
is done.\n");
 
     /* Clear indicator specific to interrupt delivered from DM */
     if (test_and_clear_bit(port,
@@ -154,7 +154,7 @@ void vmx_intr_assist(struct vcpu *v)
      */
     vio = get_vio(v->domain, v->vcpu_id);
     if (!vio)
-       panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
+       panic_domain(vcpu_regs(v),"Corruption: bad shared page: %lx\n", 
(unsigned long)vio);
 
 #ifdef V_IOSAPIC_READY
     /* Confirm virtual interrupt line signals, and set pending bits in vpd */
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_vcpu.c
--- a/xen/arch/ia64/vmx/vmx_vcpu.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_vcpu.c      Tue May 30 14:30:34 2006 -0500
@@ -91,7 +91,7 @@ vmx_vcpu_set_psr(VCPU *vcpu, unsigned lo
      * Otherwise panic
      */
     if ( value & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM )) {
-        panic ("Setting unsupport guest psr!");
+        panic_domain (regs,"Setting unsupport guest psr!");
     }
 
     /*
@@ -206,7 +206,7 @@ IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UI
     vcpu_get_rr(vcpu, reg, &oldrr.rrval);
     newrr.rrval=val;
     if (newrr.rid >= (1 << vcpu->domain->arch.rid_bits))
-        panic_domain (NULL, "use of invalid rid %lx\n", newrr.rid);
+        panic_domain (NULL, "use of invalid rid %x\n", newrr.rid);
     if(oldrr.ps!=newrr.ps){
         thash_purge_all(vcpu);
     }
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_virt.c
--- a/xen/arch/ia64/vmx/vmx_virt.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_virt.c      Tue May 30 14:30:34 2006 -0500
@@ -182,8 +182,9 @@ IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu
 IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu, INST64 inst)
 {
     UINT64 val;
+
     if(vcpu_get_gr_nat(vcpu, inst.M35.r2, &val) != IA64_NO_FAULT)
-       panic(" get_psr nat bit fault\n");
+       panic_domain(vcpu_regs(vcpu),"get_psr nat bit fault\n");
 
        val = (val & MASK(0, 32)) | (VCPU(vcpu, vpsr) & MASK(32, 32));
 #if 0
@@ -216,7 +217,7 @@ IA64FAULT vmx_emul_rfi(VCPU *vcpu, INST6
     regs=vcpu_regs(vcpu);
     vpsr.val=regs->cr_ipsr;
     if ( vpsr.is == 1 ) {
-        panic ("We do not support IA32 instruction yet");
+        panic_domain(regs,"We do not support IA32 instruction yet");
     }
 
     return vmx_vcpu_rfi(vcpu);
@@ -715,8 +716,9 @@ IA64FAULT vmx_emul_mov_to_ar_imm(VCPU *v
 {
     // I27 and M30 are identical for these fields
     UINT64  imm;
+
     if(inst.M30.ar3!=44){
-        panic("Can't support ar register other than itc");
+        panic_domain(vcpu_regs(vcpu),"Can't support ar register other than 
itc");
     }
 #ifdef  CHECK_FAULT
     IA64_PSR vpsr;
@@ -741,7 +743,7 @@ IA64FAULT vmx_emul_mov_to_ar_reg(VCPU *v
     // I26 and M29 are identical for these fields
     u64 r2;
     if(inst.M29.ar3!=44){
-        panic("Can't support ar register other than itc");
+        panic_domain(vcpu_regs(vcpu),"Can't support ar register other than 
itc");
     }
     if(vcpu_get_gr_nat(vcpu,inst.M29.r2,&r2)){
 #ifdef  CHECK_FAULT
@@ -769,7 +771,7 @@ IA64FAULT vmx_emul_mov_from_ar_reg(VCPU 
     // I27 and M30 are identical for these fields
     u64 r1;
     if(inst.M31.ar3!=44){
-        panic("Can't support ar register other than itc");
+        panic_domain(vcpu_regs(vcpu),"Can't support ar register other than 
itc");
     }
 #ifdef  CHECK_FAULT
     if(check_target_register(vcpu,inst.M31.r1)){
@@ -1359,8 +1361,7 @@ if ( (cause == 0xff && opcode == 0x1e000
     slot_type = slot_types[bundle.template][slot];
     ia64_priv_decoder(slot_type, inst, &cause);
     if(cause==0){
-        printf("This instruction at 0x%lx slot %d can't be  virtualized", iip, 
slot);
-        panic("123456\n");
+        panic_domain(regs,"This instruction at 0x%lx slot %d can't be  
virtualized", iip, slot);
     }
 #else
     inst.inst=opcode;
@@ -1494,12 +1495,8 @@ if ( (cause == 0xff && opcode == 0x1e000
        status=IA64_FAULT;
         break;
     default:
-        printf("unknown cause %ld, iip: %lx, ipsr: %lx\n", 
cause,regs->cr_iip,regs->cr_ipsr);
-        while(1);
-       /* For unknown cause, let hardware to re-execute */
-       status=IA64_RETRY;
-        break;
-//        panic("unknown cause in virtualization intercept");
+        panic_domain(regs,"unknown cause %ld, iip: %lx, ipsr: %lx\n", 
cause,regs->cr_iip,regs->cr_ipsr);
+        break;
     };
 
 #if 0
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vtlb.c
--- a/xen/arch/ia64/vmx/vtlb.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vtlb.c  Tue May 30 14:30:34 2006 -0500
@@ -274,36 +274,36 @@ static void vtlb_purge(thash_cb_t *hcb, 
 static void vtlb_purge(thash_cb_t *hcb, u64 va, u64 ps)
 {
     thash_data_t *hash_table, *prev, *next;
-    u64 start, end, size, tag, rid;
+    u64 start, end, size, tag, rid, def_size;
     ia64_rr vrr;
     vcpu_get_rr(current, va, &vrr.rrval);
     rid = vrr.rid;
     size = PSIZE(ps);
     start = va & (-size);
     end = start + size;
+    def_size = PSIZE(vrr.ps);
     while(start < end){
         hash_table = vsa_thash(hcb->pta, start, vrr.rrval, &tag);
-//         tag = ia64_ttag(start);
         if(!INVALID_TLB(hash_table)){
-       if(hash_table->etag == tag){
-            __rem_hash_head(hcb, hash_table);
-       }
-           else{
-           prev=hash_table;
-               next=prev->next;
-               while(next){
-                       if(next->etag == tag){
-                           prev->next=next->next;
-                           cch_free(hcb,next);
-                           hash_table->len--;
-                           break;
-                       }
-                       prev=next;
-                   next=next->next;
-           }
-       }
-        }
-           start += PAGE_SIZE;
+            if(hash_table->etag == tag){
+                __rem_hash_head(hcb, hash_table);
+            }
+            else{
+                prev=hash_table;
+                next=prev->next;
+                while(next){
+                    if(next->etag == tag){
+                        prev->next=next->next;
+                        cch_free(hcb,next);
+                        hash_table->len--;
+                        break;
+                    }
+                    prev=next;
+                    next=next->next;
+                }
+            }
+        }
+        start += def_size;
     }
 //    machine_tlb_purge(va, ps);
 }
@@ -319,26 +319,26 @@ static void vhpt_purge(thash_cb_t *hcb, 
     start = va & (-size);
     end = start + size;
     while(start < end){
-       hash_table = (thash_data_t *)ia64_thash(start);
-           tag = ia64_ttag(start);
-       if(hash_table->etag == tag ){
+        hash_table = (thash_data_t *)ia64_thash(start);
+        tag = ia64_ttag(start);
+        if(hash_table->etag == tag ){
             __rem_hash_head(hcb, hash_table);
-       }
-           else{
-           prev=hash_table;
-               next=prev->next;
-               while(next){
-                       if(next->etag == tag){
-                           prev->next=next->next;
-                           cch_free(hcb,next);
-                           hash_table->len--;
-                           break;
-                       }
-                       prev=next;
-                   next=next->next;
-           }
-       }
-           start += PAGE_SIZE;
+        }
+        else{
+            prev=hash_table;
+            next=prev->next;
+            while(next){
+                if(next->etag == tag){
+                    prev->next=next->next;
+                    cch_free(hcb,next);
+                    hash_table->len--;
+                    break;
+                }
+                prev=next;
+                next=next->next;
+            }
+        }
+        start += PAGE_SIZE;
     }
     machine_tlb_purge(va, ps);
 }
@@ -390,9 +390,9 @@ void vtlb_insert(thash_cb_t *hcb, u64 pt
     vcpu_get_rr(current, va, &vrr.rrval);
     if (vrr.ps != ps) {
 //        machine_tlb_insert(hcb->vcpu, entry);
-       panic_domain(NULL, "not preferred ps with va: 0x%lx vrr.ps=%d ps=%d\n",
-                    va, vrr.ps, ps);
-       return;
+        panic_domain(NULL, "not preferred ps with va: 0x%lx vrr.ps=%d 
ps=%ld\n",
+                     va, vrr.ps, ps);
+        return;
     }
     hash_table = vsa_thash(hcb->pta, va, vrr.rrval, &tag);
     if( INVALID_TLB(hash_table) ) {
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/Makefile
--- a/xen/arch/ia64/xen/Makefile        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/Makefile        Tue May 30 14:30:34 2006 -0500
@@ -2,6 +2,7 @@ obj-y += dom0_ops.o
 obj-y += dom0_ops.o
 obj-y += domain.o
 obj-y += dom_fw.o
+obj-y += efi_emul.o
 obj-y += hpsimserial.o
 obj-y += hypercall.o
 obj-y += hyperprivop.o
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/dom0_ops.c
--- a/xen/arch/ia64/xen/dom0_ops.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/dom0_ops.c      Tue May 30 14:30:34 2006 -0500
@@ -151,10 +151,7 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_
         put_domain(d);
     }
     break;
-    /*
-     * NOTE: DOM0_GETMEMLIST has somewhat different semantics on IA64 -
-     * it actually allocates and maps pages.
-     */
+
     case DOM0_GETMEMLIST:
     {
         unsigned long i = 0;
@@ -198,7 +195,8 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_
                 ret = -ENOMEM;
 
             op->u.getmemlist.num_pfns = i - start_page;
-            copy_to_guest(u_dom0_op, op, 1);
+            if (copy_to_guest(u_dom0_op, op, 1))
+                ret = -EFAULT;
             
             put_domain(d);
         }
@@ -264,10 +262,6 @@ do_dom0vp_op(unsigned long cmd,
         }
         ret = get_gpfn_from_mfn(arg0);
         break;
-    case IA64_DOM0VP_populate_physmap:
-        ret = dom0vp_populate_physmap(d, arg0,
-                                      (unsigned int)arg1, (unsigned int)arg2);
-        break;
     case IA64_DOM0VP_zap_physmap:
         ret = dom0vp_zap_physmap(d, arg0, (unsigned int)arg1);
         break;
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/dom_fw.c
--- a/xen/arch/ia64/xen/dom_fw.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/dom_fw.c        Tue May 30 14:30:34 2006 -0500
@@ -462,7 +462,7 @@ static void print_md(efi_memory_desc_t *
 static void print_md(efi_memory_desc_t *md)
 {
 #if 1
-       printk("domain mem: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) 
(%luMB)\n",
+       printk("domain mem: type=%2u, attr=0x%016lx, range=[0x%016lx-0x%016lx) 
(%luMB)\n",
                md->type, md->attribute, md->phys_addr,
                md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
                md->num_pages >> (20 - EFI_PAGE_SHIFT));
@@ -541,7 +541,7 @@ struct fake_acpi_tables {
        struct fadt_descriptor_rev2 fadt;
        struct facs_descriptor_rev2 facs;
        struct acpi_table_header dsdt;
-       u8 aml[16];
+       u8 aml[8 + 11 * MAX_VIRT_CPUS];
        struct acpi_table_madt madt;
        struct acpi_table_lsapic lsapic[MAX_VIRT_CPUS];
        u8 pm1a_evt_blk[4];
@@ -561,6 +561,7 @@ dom_fw_fake_acpi(struct domain *d, struc
        struct acpi_table_madt *madt = &tables->madt;
        struct acpi_table_lsapic *lsapic = tables->lsapic;
        int i;
+       int aml_len;
 
        memset(tables, 0, sizeof(struct fake_acpi_tables));
 
@@ -629,7 +630,6 @@ dom_fw_fake_acpi(struct domain *d, struc
        /* setup DSDT with trivial namespace. */ 
        strncpy(dsdt->signature, DSDT_SIG, 4);
        dsdt->revision = 1;
-       dsdt->length = sizeof(struct acpi_table_header) + sizeof(tables->aml);
        strcpy(dsdt->oem_id, "XEN");
        strcpy(dsdt->oem_table_id, "Xen/ia64");
        strcpy(dsdt->asl_compiler_id, "XEN");
@@ -637,15 +637,33 @@ dom_fw_fake_acpi(struct domain *d, struc
 
        /* Trivial namespace, avoids ACPI CA complaints */
        tables->aml[0] = 0x10; /* Scope */
-       tables->aml[1] = 0x12; /* length/offset to next object */
-       strncpy((char *)&tables->aml[2], "_SB_", 4);
+       tables->aml[1] = 0x40; /* length/offset to next object (patched) */
+       tables->aml[2] = 0x00;
+       strncpy((char *)&tables->aml[3], "_SB_", 4);
 
        /* The processor object isn't absolutely necessary, revist for SMP */
-       tables->aml[6] = 0x5b; /* processor object */
-       tables->aml[7] = 0x83;
-       tables->aml[8] = 0x0b; /* next */
-       strncpy((char *)&tables->aml[9], "CPU0", 4);
-
+       aml_len = 7;
+       for (i = 0; i < 3; i++) {
+               unsigned char *p = tables->aml + aml_len;
+               p[0] = 0x5b; /* processor object */
+               p[1] = 0x83;
+               p[2] = 0x0b; /* next */
+               p[3] = 'C';
+               p[4] = 'P';
+               snprintf ((char *)p + 5, 3, "%02x", i);
+               if (i < 16)
+                       p[5] = 'U';
+               p[7] = i;       /* acpi_id */
+               p[8] = 0;       /* pblk_addr */
+               p[9] = 0;
+               p[10] = 0;
+               p[11] = 0;
+               p[12] = 0;      /* pblk_len */
+               aml_len += 13;
+       }
+       tables->aml[1] = 0x40 + ((aml_len - 1) & 0x0f);
+       tables->aml[2] = (aml_len - 1) >> 4;
+       dsdt->length = sizeof(struct acpi_table_header) + aml_len;
        dsdt->checksum = generate_acpi_checksum(dsdt, dsdt->length);
 
        /* setup MADT */
@@ -662,6 +680,7 @@ dom_fw_fake_acpi(struct domain *d, struc
        for (i = 0; i < MAX_VIRT_CPUS; i++) {
                lsapic[i].header.type = ACPI_MADT_LSAPIC;
                lsapic[i].header.length = sizeof(struct acpi_table_lsapic);
+               lsapic[i].acpi_id = i;
                lsapic[i].id = i;
                lsapic[i].eid = 0;
                lsapic[i].flags.enabled = (d->vcpu[i] != NULL);
@@ -798,6 +817,9 @@ dom_fw_init (struct domain *d, const cha
        pfn         = (void *) cp; cp += NFUNCPTRS * 2 * sizeof(pfn);
        cmd_line    = (void *) cp;
 
+       /* Initialise for EFI_SET_VIRTUAL_ADDRESS_MAP emulation */
+       d->arch.efi_runtime = efi_runtime;
+
        if (args) {
                if (arglen >= 1024)
                        arglen = 1023;
@@ -959,7 +981,7 @@ dom_fw_init (struct domain *d, const cha
                MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0);//XXX
 #endif
                /* hypercall patches live here, masquerade as reserved PAL 
memory */
-               
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 0);
+               
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB|EFI_MEMORY_RUNTIME,HYPERCALL_START,HYPERCALL_END,
 0);
                
MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem-IA64_GRANULE_SIZE,
 0);//XXX make sure this doesn't overlap on i/o, runtime area.
 #ifndef CONFIG_XEN_IA64_DOM0_VP
 /* hack */     
MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,last_start,last_end,1);
@@ -993,7 +1015,7 @@ dom_fw_init (struct domain *d, const cha
                MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 1);
 #endif
                /* hypercall patches live here, masquerade as reserved PAL 
memory */
-               
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 1);
+               
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB|EFI_MEMORY_RUNTIME,HYPERCALL_START,HYPERCALL_END,
 1);
                
MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 1);
                /* Create a dummy entry for IO ports, so that IO accesses are
                   trapped by Xen.  */
@@ -1009,7 +1031,7 @@ dom_fw_init (struct domain *d, const cha
        BUG_ON(i > NUM_MEM_DESCS);
        bp->efi_memmap_size = i * sizeof(efi_memory_desc_t);
        bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
-       bp->efi_memdesc_version = 1;
+       bp->efi_memdesc_version = EFI_MEMDESC_VERSION;
        bp->command_line = dom_pa((unsigned long) cmd_line);
        bp->console_info.num_cols = 80;
        bp->console_info.num_rows = 25;
@@ -1019,7 +1041,8 @@ dom_fw_init (struct domain *d, const cha
        if (d == dom0) {
                // XXX CONFIG_XEN_IA64_DOM0_VP
                // initrd_start address is hard coded in start_kernel()
-               bp->initrd_start = ia64_boot_param->initrd_start;
+               bp->initrd_start = (dom0_start+dom0_size) -
+                 (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024);
                bp->initrd_size = ia64_boot_param->initrd_size;
        }
        else {
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/domain.c        Tue May 30 14:30:34 2006 -0500
@@ -77,36 +77,19 @@ static void init_switch_stack(struct vcp
 static void init_switch_stack(struct vcpu *v);
 void build_physmap_table(struct domain *d);
 
+static void try_to_clear_PGC_allocate(struct domain* d,
+                                      struct page_info* page);
+
 /* this belongs in include/asm, but there doesn't seem to be a suitable place 
*/
 void arch_domain_destroy(struct domain *d)
 {
-       struct page_info *page;
-       struct list_head *ent, *prev;
-
-       if (d->arch.mm->pgd != NULL)
-       {
-               list_for_each ( ent, &d->arch.mm->pt_list )
-               {
-                       page = list_entry(ent, struct page_info, list);
-                       prev = ent->prev;
-                       list_del(ent);
-                       free_xenheap_page(page_to_virt(page));
-                       ent = prev;
-               }
-               pgd_free(d->arch.mm->pgd);
-       }
-       if (d->arch.mm != NULL)
-               xfree(d->arch.mm);
+       BUG_ON(d->arch.mm.pgd != NULL);
        if (d->shared_info != NULL)
                free_xenheap_page(d->shared_info);
 
+       domain_flush_destroy (d);
+
        deallocate_rid_range(d);
-
-       /* It is really good in this? */
-       flush_tlb_all();
-
-       /* It is really good in this? */
-       vhpt_flush_all();
 }
 
 static void default_idle(void)
@@ -179,7 +162,6 @@ struct vcpu *alloc_vcpu_struct(struct do
                memset(&d->shared_info->evtchn_mask[0], 0xff,
                    sizeof(d->shared_info->evtchn_mask));
 
-           v->vcpu_info = &(d->shared_info->vcpu_info[0]);
            v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
            v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
            v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
@@ -239,7 +221,8 @@ int arch_domain_create(struct domain *d)
        // the following will eventually need to be negotiated dynamically
        d->xen_vastart = XEN_START_ADDR;
        d->xen_vaend = XEN_END_ADDR;
-       d->shared_info_va = SHAREDINFO_ADDR;
+       d->arch.shared_info_va = SHAREDINFO_ADDR;
+       d->arch.breakimm = 0x1000;
 
        if (is_idle_domain(d))
            return 0;
@@ -255,26 +238,20 @@ int arch_domain_create(struct domain *d)
         */
        if (!allocate_rid_range(d,0))
                goto fail_nomem;
-       d->arch.breakimm = 0x1000;
        d->arch.sys_pgnr = 0;
 
-       if ((d->arch.mm = xmalloc(struct mm_struct)) == NULL)
-           goto fail_nomem;
-       memset(d->arch.mm, 0, sizeof(*d->arch.mm));
-       INIT_LIST_HEAD(&d->arch.mm->pt_list);
+       memset(&d->arch.mm, 0, sizeof(d->arch.mm));
 
        d->arch.physmap_built = 0;
-       if ((d->arch.mm->pgd = pgd_alloc(d->arch.mm)) == NULL)
+       if ((d->arch.mm.pgd = pgd_alloc(&d->arch.mm)) == NULL)
            goto fail_nomem;
 
        printf ("arch_domain_create: domain=%p\n", d);
        return 0;
 
 fail_nomem:
-       if (d->arch.mm->pgd != NULL)
-           pgd_free(d->arch.mm->pgd);
-       if (d->arch.mm != NULL)
-           xfree(d->arch.mm);
+       if (d->arch.mm.pgd != NULL)
+           pgd_free(d->arch.mm.pgd);
        if (d->shared_info != NULL)
            free_xenheap_page(d->shared_info);
        return -ENOMEM;
@@ -282,11 +259,7 @@ fail_nomem:
 
 void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
 {
-       struct pt_regs *regs = vcpu_regs (v);
-
-       c->regs = *regs;
-       c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector;
-
+       c->regs = *vcpu_regs (v);
        c->shared = v->domain->shared_info->arch;
 }
 
@@ -325,11 +298,10 @@ int arch_set_info_guest(struct vcpu *v, 
        }
        new_thread(v, regs->cr_iip, 0, 0);
 
-       v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
-       if ( c->vcpu.privregs && copy_from_user(v->arch.privregs,
-                          c->vcpu.privregs, sizeof(mapped_regs_t))) {
+       if ( c->privregs && copy_from_user(v->arch.privregs,
+                          c->privregs, sizeof(mapped_regs_t))) {
            printk("Bad ctxt address in arch_set_info_guest: %p\n",
-                  c->vcpu.privregs);
+                  c->privregs);
            return -EFAULT;
        }
 
@@ -394,19 +366,129 @@ static void relinquish_memory(struct dom
 
         /* Follow the list chain and /then/ potentially free the page. */
         ent = ent->next;
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+#if 1
+        BUG_ON(get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY);
+#else
+        //XXX this should be done at traversing the P2M table.
+        if (page_get_owner(page) == d)
+            set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
+#endif
+#endif
         put_page(page);
     }
 
     spin_unlock_recursive(&d->page_alloc_lock);
 }
 
+static void
+relinquish_pte(struct domain* d, pte_t* pte)
+{
+    unsigned long mfn = pte_pfn(*pte);
+    struct page_info* page;
+
+    // vmx domain use bit[58:56] to distinguish io region from memory.
+    // see vmx_build_physmap_table() in vmx_init.c
+    if (((mfn << PAGE_SHIFT) & GPFN_IO_MASK) != GPFN_MEM)
+        return;
+
+    // domain might map IO space or acpi table pages. check it.
+    if (!mfn_valid(mfn))
+        return;
+    page = mfn_to_page(mfn);
+    // struct page_info corresponding to mfn may exist or not depending
+    // on CONFIG_VIRTUAL_FRAME_TABLE.
+    // This check is too easy.
+    // The right way is to check whether this page is of io area or acpi pages
+    if (page_get_owner(page) == NULL) {
+        BUG_ON(page->count_info != 0);
+        return;
+    }
+
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+    if (page_get_owner(page) == d) {
+        BUG_ON(get_gpfn_from_mfn(mfn) == INVALID_M2P_ENTRY);
+        set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+    }
+#endif
+    try_to_clear_PGC_allocate(d, page);
+    put_page(page);
+}
+
+static void
+relinquish_pmd(struct domain* d, pmd_t* pmd, unsigned long offset)
+{
+    unsigned long i;
+    pte_t* pte = pte_offset_map(pmd, offset);
+
+    for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+        if (!pte_present(*pte))
+            continue;
+        
+        relinquish_pte(d, pte);
+    }
+    pte_free_kernel(pte_offset_map(pmd, offset));
+}
+
+static void
+relinquish_pud(struct domain* d, pud_t *pud, unsigned long offset)
+{
+    unsigned long i;
+    pmd_t *pmd = pmd_offset(pud, offset);
+    
+    for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+        if (!pmd_present(*pmd))
+            continue;
+        
+        relinquish_pmd(d, pmd, offset + (i << PMD_SHIFT));
+    }
+    pmd_free(pmd_offset(pud, offset));
+}
+
+static void
+relinquish_pgd(struct domain* d, pgd_t *pgd, unsigned long offset)
+{
+    unsigned long i;
+    pud_t *pud = pud_offset(pgd, offset);
+
+    for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+        if (!pud_present(*pud))
+            continue;
+
+        relinquish_pud(d, pud, offset + (i << PUD_SHIFT));
+    }
+    pud_free(pud_offset(pgd, offset));
+}
+
+static void
+relinquish_mm(struct domain* d)
+{
+    struct mm_struct* mm = &d->arch.mm;
+    unsigned long i;
+    pgd_t* pgd;
+
+    if (mm->pgd == NULL)
+        return;
+
+    pgd = pgd_offset(mm, 0);
+    for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+        if (!pgd_present(*pgd))
+            continue;
+
+        relinquish_pgd(d, pgd, i << PGDIR_SHIFT);
+    }
+    pgd_free(mm->pgd);
+    mm->pgd = NULL;
+}
+
 void domain_relinquish_resources(struct domain *d)
 {
     /* Relinquish every page of memory. */
 
-    /* xenheap_list is not used in ia64. */
-    BUG_ON(!list_empty(&d->xenpage_list));
-
+    // relase page traversing d->arch.mm.
+    relinquish_mm(d);
+
+    relinquish_memory(d, &d->xenpage_list);
     relinquish_memory(d, &d->page_list);
 }
 
@@ -483,11 +565,58 @@ void new_thread(struct vcpu *v,
        }
 }
 
+// stolen from share_xen_page_with_guest() in xen/arch/x86/mm.c
+void
+share_xen_page_with_guest(struct page_info *page,
+                          struct domain *d, int readonly)
+{
+    if ( page_get_owner(page) == d )
+        return;
+
+#if 1
+    if (readonly) {
+        printk("%s:%d readonly is not supported yet\n", __func__, __LINE__);
+    }
+#endif
+
+    // alloc_xenheap_pages() doesn't initialize page owner.
+    //BUG_ON(page_get_owner(page) != NULL);
+#if 0
+    if (get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY) {
+        printk("%s:%d page 0x%p mfn 0x%lx gpfn 0x%lx\n", __func__, __LINE__,
+               page, page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)));
+    }
+#endif
+    // grant_table_destroy() release these pages.
+    // but it doesn't clear m2p entry. So there might remain stale entry.
+    // We clear such a stale entry here.
+    set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
+
+    spin_lock(&d->page_alloc_lock);
+
+#ifndef __ia64__
+    /* The incremented type count pins as writable or read-only. */
+    page->u.inuse.type_info  = (readonly ? PGT_none : PGT_writable_page);
+    page->u.inuse.type_info |= PGT_validated | 1;
+#endif
+
+    page_set_owner(page, d);
+    wmb(); /* install valid domain ptr before updating refcnt. */
+    ASSERT(page->count_info == 0);
+    page->count_info |= PGC_allocated | 1;
+
+    if ( unlikely(d->xenheap_pages++ == 0) )
+        get_knownalive_domain(d);
+    list_add_tail(&page->list, &d->xenpage_list);
+
+    spin_unlock(&d->page_alloc_lock);
+}
+
+//XXX !xxx_present() should be used instread of !xxx_none()?
 static pte_t*
 lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
 {
-    struct page_info *pt;
-    struct mm_struct *mm = d->arch.mm;
+    struct mm_struct *mm = &d->arch.mm;
     pgd_t *pgd;
     pud_t *pud;
     pmd_t *pmd;
@@ -496,22 +625,16 @@ lookup_alloc_domain_pte(struct domain* d
     pgd = pgd_offset(mm, mpaddr);
     if (pgd_none(*pgd)) {
         pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
-        pt = maddr_to_page(pgd_val(*pgd));
-        list_add_tail(&pt->list, &d->arch.mm->pt_list);
     }
 
     pud = pud_offset(pgd, mpaddr);
     if (pud_none(*pud)) {
         pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
-        pt = maddr_to_page(pud_val(*pud));
-        list_add_tail(&pt->list, &d->arch.mm->pt_list);
     }
 
     pmd = pmd_offset(pud, mpaddr);
     if (pmd_none(*pmd)) {
         pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr));
-        pt = maddr_to_page(pmd_val(*pmd));
-        list_add_tail(&pt->list, &d->arch.mm->pt_list);
     }
 
     return pte_offset_map(pmd, mpaddr);
@@ -521,7 +644,7 @@ static pte_t*
 static pte_t*
 lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr)
 {
-    struct mm_struct *mm = d->arch.mm;
+    struct mm_struct *mm = &d->arch.mm;
     pgd_t *pgd;
     pud_t *pud;
     pmd_t *pmd;
@@ -549,7 +672,7 @@ static pte_t*
 static pte_t*
 lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr)
 {
-    struct mm_struct *mm = d->arch.mm;
+    struct mm_struct *mm = &d->arch.mm;
     pgd_t *pgd;
     pud_t *pud;
     pmd_t *pmd;
@@ -581,6 +704,7 @@ __assign_new_domain_page(struct domain *
 {
     struct page_info *p = NULL;
     unsigned long maddr;
+    int ret;
 
     BUG_ON(!pte_none(*pte));
 
@@ -601,14 +725,13 @@ __assign_new_domain_page(struct domain *
 #endif
 
     p = alloc_domheap_page(d);
-    // zero out pages for security reasons
-    if (p)
-        clear_page(page_to_virt(p));
-
     if (unlikely(!p)) {
         printf("assign_new_domain_page: Can't alloc!!!! Aaaargh!\n");
         return(p);
     }
+
+    // zero out pages for security reasons
+    clear_page(page_to_virt(p));
     maddr = page_to_maddr (p);
     if (unlikely(maddr > __get_cpu_var(vhpt_paddr)
                  && maddr < __get_cpu_var(vhpt_pend))) {
@@ -618,13 +741,15 @@ __assign_new_domain_page(struct domain *
                maddr);
     }
 
+    ret = get_page(p, d);
+    BUG_ON(ret == 0);
     set_pte(pte, pfn_pte(maddr >> PAGE_SHIFT,
                          __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
 
+    mb ();
     //XXX CONFIG_XEN_IA64_DOM0_VP
     //    TODO racy
-    if ((mpaddr & GPFN_IO_MASK) == GPFN_MEM)
-        set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
+    set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
     return p;
 }
 
@@ -668,21 +793,38 @@ assign_new_domain0_page(struct domain *d
 }
 
 /* map a physical address to the specified metaphysical addr */
-void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long 
physaddr)
-{
-       pte_t *pte;
-
-       pte = lookup_alloc_domain_pte(d, mpaddr);
-       if (pte_none(*pte)) {
-               set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT,
-                       __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
-
-       //XXX CONFIG_XEN_IA64_DOM0_VP
-       //    TODO racy
-       if ((physaddr & GPFN_IO_MASK) == GPFN_MEM)
-               set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
-       }
-       else printk("assign_domain_page: mpaddr %lx already mapped!\n",mpaddr);
+void
+__assign_domain_page(struct domain *d,
+                     unsigned long mpaddr, unsigned long physaddr)
+{
+    pte_t *pte;
+
+    pte = lookup_alloc_domain_pte(d, mpaddr);
+    if (pte_none(*pte)) {
+        set_pte(pte,
+                pfn_pte(physaddr >> PAGE_SHIFT,
+                        __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+        mb ();
+    } else
+        printk("%s: mpaddr %lx already mapped!\n", __func__, mpaddr);
+}
+
+/* get_page() and map a physical address to the specified metaphysical addr */
+void
+assign_domain_page(struct domain *d,
+                   unsigned long mpaddr, unsigned long physaddr)
+{
+    struct page_info* page = mfn_to_page(physaddr >> PAGE_SHIFT);
+    int ret;
+
+    BUG_ON((physaddr & GPFN_IO_MASK) != GPFN_MEM);
+    ret = get_page(page, d);
+    BUG_ON(ret == 0);
+    __assign_domain_page(d, mpaddr, physaddr);
+
+    //XXX CONFIG_XEN_IA64_DOM0_VP
+    //    TODO racy
+    set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
 }
 
 #ifdef CONFIG_XEN_IA64_DOM0_VP
@@ -693,8 +835,58 @@ assign_domain_same_page(struct domain *d
     //XXX optimization
     unsigned long end = mpaddr + size;
     for (; mpaddr < end; mpaddr += PAGE_SIZE) {
-        assign_domain_page(d, mpaddr, mpaddr);
-    }
+        __assign_domain_page(d, mpaddr, mpaddr);
+    }
+}
+
+static int
+efi_mmio(unsigned long physaddr, unsigned long size)
+{
+    void *efi_map_start, *efi_map_end;
+    u64 efi_desc_size;
+    void* p;
+
+    efi_map_start = __va(ia64_boot_param->efi_memmap);
+    efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+    efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+    for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+        efi_memory_desc_t* md = (efi_memory_desc_t *)p;
+        unsigned long start = md->phys_addr;
+        unsigned long end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+        
+        if (start <= physaddr && physaddr < end) {
+            if ((physaddr + size) > end) {
+                DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n",
+                        __func__, __LINE__, physaddr, size);
+                return 0;
+            }
+
+            // for io space
+            if (md->type == EFI_MEMORY_MAPPED_IO ||
+                md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
+                return 1;
+            }
+
+            // for runtime
+            // see efi_enter_virtual_mode(void)
+            // in linux/arch/ia64/kernel/efi.c
+            if ((md->attribute & EFI_MEMORY_RUNTIME) &&
+                !(md->attribute & EFI_MEMORY_WB)) {
+                return 1;
+            }
+
+            DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n",
+                    __func__, __LINE__, physaddr, size);
+            return 0;
+        }
+
+        if (physaddr < start) {
+            break;
+        }
+    }
+
+    return 1;
 }
 
 unsigned long
@@ -704,6 +896,11 @@ assign_domain_mmio_page(struct domain *d
     if (size == 0) {
         DPRINTK("%s: domain %p mpaddr 0x%lx size = 0x%lx\n",
                 __func__, d, mpaddr, size);
+    }
+    if (!efi_mmio(mpaddr, size)) {
+        DPRINTK("%s:%d domain %p mpaddr 0x%lx size = 0x%lx\n",
+                __func__, __LINE__, d, mpaddr, size);
+        return -EINVAL;
     }
     assign_domain_same_page(d, mpaddr, size);
     return mpaddr;
@@ -723,23 +920,55 @@ domain_page_flush(struct domain* d, unsi
 domain_page_flush(struct domain* d, unsigned long mpaddr,
                   unsigned long old_mfn, unsigned long new_mfn)
 {
-    struct vcpu* v;
-    //XXX SMP
-    for_each_vcpu(d, v) {
-        vcpu_purge_tr_entry(&v->arch.dtlb);
-        vcpu_purge_tr_entry(&v->arch.itlb);
-    }
-
-    // flush vhpt
-    vhpt_flush();
-    // flush tlb
-    flush_tlb_all();
-}
-
+    domain_flush_vtlb_all();
+}
+#endif
+
+//XXX heavily depends on the struct page_info layout.
+//
+// if (page_get_owner(page) == d &&
+//     test_and_clear_bit(_PGC_allocated, &page->count_info)) {
+//     put_page(page);
+// }
 static void
-zap_domain_page_one(struct domain *d, unsigned long mpaddr)
-{
-    struct mm_struct *mm = d->arch.mm;
+try_to_clear_PGC_allocate(struct domain* d, struct page_info* page)
+{
+    u32 _d, _nd;
+    u64 x, nx, y;
+
+    _d = pickle_domptr(d);
+    y = *((u64*)&page->count_info);
+    do {
+        x = y;
+        _nd = x >> 32;
+        nx = x - 1;
+        __clear_bit(_PGC_allocated, &nx);
+
+        if (unlikely(!(x & PGC_allocated)) || unlikely(_nd != _d)) {
+            struct domain* nd = unpickle_domptr(_nd);
+            if (nd == NULL) {
+                DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
+                        "sd=%p 0x%x,"
+                        " caf=%016lx, taf=%" PRtype_info "\n",
+                        (void *) page_to_mfn(page),
+                        d, d->domain_id, _d,
+                        nd, _nd,
+                        x,
+                        page->u.inuse.type_info);
+            }
+            break;
+        }
+
+        BUG_ON((nx & PGC_count_mask) < 1);
+        y = cmpxchg((u64*)&page->count_info, x, nx);
+    } while (unlikely(y != x));
+}
+
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+static void
+zap_domain_page_one(struct domain *d, unsigned long mpaddr, int do_put_page)
+{
+    struct mm_struct *mm = &d->arch.mm;
     pte_t *pte;
     pte_t old_pte;
     unsigned long mfn;
@@ -755,6 +984,7 @@ zap_domain_page_one(struct domain *d, un
     old_pte = ptep_get_and_clear(mm, mpaddr, pte);
     mfn = pte_pfn(old_pte);
     page = mfn_to_page(mfn);
+    BUG_ON((page->count_info & PGC_count_mask) == 0);
 
     if (page_get_owner(page) == d) {
         BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT));
@@ -763,7 +993,10 @@ zap_domain_page_one(struct domain *d, un
 
     domain_page_flush(d, mpaddr, mfn, INVALID_MFN);
 
-    put_page(page);
+    if (do_put_page) {
+        try_to_clear_PGC_allocate(d, page);
+        put_page(page);
+    }
 }
 #endif
 
@@ -867,66 +1100,6 @@ unsigned long lookup_domain_mpa(struct d
 #ifdef CONFIG_XEN_IA64_DOM0_VP
 //XXX SMP
 unsigned long
-dom0vp_populate_physmap(struct domain *d, unsigned long gpfn,
-                        unsigned int extent_order, unsigned int address_bits)
-{
-    unsigned long ret = 0;
-    int flags = 0;
-    unsigned long mpaddr = gpfn << PAGE_SHIFT;
-    unsigned long extent_size = 1UL << extent_order;
-    unsigned long offset;
-    struct page_info* page;
-    unsigned long physaddr;
-
-    if (extent_order > 0 && !multipage_allocation_permitted(d)) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (gpfn + (1 << extent_order) < gpfn) {
-        ret = -EINVAL;
-        goto out;
-    }
-    if (gpfn > d->max_pages || gpfn + (1 << extent_order) > d->max_pages) {
-        ret = -EINVAL;
-        goto out;
-    }
-    if ((extent_size << PAGE_SHIFT) < extent_size) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    //XXX check address_bits and set flags = ALLOC_DOM_DMA if needed
-
-    // check the rage is not populated yet.
-    //XXX loop optimization
-    for (offset = 0; offset < extent_size << PAGE_SHIFT; offset += PAGE_SIZE) {
-        if (____lookup_domain_mpa(d, mpaddr + offset) != INVALID_MFN) {
-            ret = -EBUSY;
-            goto out;
-        }
-    }
-
-    page = alloc_domheap_pages(d, extent_order, flags);
-    if (page == NULL) {
-        ret = -ENOMEM;
-        DPRINTK("Could not allocate order=%d extent: id=%d flags=%x\n",
-                extent_order, d->domain_id, flags);
-        goto out;
-    }
-
-    //XXX loop optimization
-    physaddr = page_to_maddr(page);
-    for (offset = 0; offset < extent_size << PAGE_SHIFT; offset += PAGE_SIZE) {
-        assign_domain_page(d, mpaddr + offset, physaddr + offset);
-    }
-
-out:
-    return ret;
-}
-
-//XXX SMP
-unsigned long
 dom0vp_zap_physmap(struct domain *d, unsigned long gpfn,
                    unsigned int extent_order)
 {
@@ -937,26 +1110,28 @@ dom0vp_zap_physmap(struct domain *d, uns
         goto out;
     }
 
-    zap_domain_page_one(d, gpfn << PAGE_SHIFT);
+    zap_domain_page_one(d, gpfn << PAGE_SHIFT, 1);
 
 out:
     return ret;
 }
 
+// caller must get_page(mfn_to_page(mfn)) before
+// caller must call set_gpfn_from_mfn().
 static void
 assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
                            unsigned long mfn, unsigned int flags)
 {
-    struct mm_struct *mm = d->arch.mm;
+    struct mm_struct *mm = &d->arch.mm;
     pte_t* pte;
     pte_t old_pte;
+    pte_t npte;
 
     pte = lookup_alloc_domain_pte(d, mpaddr);
 
     // update pte
-    old_pte = ptep_get_and_clear(mm, mpaddr, pte);
-    set_pte(pte, pfn_pte(mfn,
-                         __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+    npte = pfn_pte(mfn, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
+    old_pte = ptep_xchg(mm, mpaddr, pte, npte);
     if (!pte_none(old_pte)) {
         unsigned long old_mfn;
         struct page_info* old_page;
@@ -973,8 +1148,10 @@ assign_domain_page_replace(struct domain
 
         domain_page_flush(d, mpaddr, old_mfn, mfn);
 
+        try_to_clear_PGC_allocate(d, old_page);
         put_page(old_page);
     } else {
+        BUG_ON(!mfn_valid(mfn));
         BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
                get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
     }
@@ -1002,17 +1179,195 @@ dom0vp_add_physmap(struct domain* d, uns
     }
 
     assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, 0/* flags:XXX */);
+    //don't update p2m table because this page belongs to rd, not d.
 out1:
     put_domain(rd);
 out0:
     return error;
 }
+
+// grant table host mapping
+// mpaddr: host_addr: pseudo physical address
+// mfn: frame: machine page frame
+// flags: GNTMAP_readonly | GNTMAP_application_map | GNTMAP_contains_pte
+int
+create_grant_host_mapping(unsigned long gpaddr,
+                         unsigned long mfn, unsigned int flags)
+{
+    struct domain* d = current->domain;
+    struct page_info* page;
+    int ret;
+
+    if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
+        DPRINTK("%s: flags 0x%x\n", __func__, flags);
+        return GNTST_general_error;
+    }
+    if (flags & GNTMAP_readonly) {
+#if 0
+        DPRINTK("%s: GNTMAP_readonly is not implemented yet. flags %x\n",
+                __func__, flags);
+#endif
+        flags &= ~GNTMAP_readonly;
+    }
+
+    page = mfn_to_page(mfn);
+    ret = get_page(page, page_get_owner(page));
+    BUG_ON(ret == 0);
+    assign_domain_page_replace(d, gpaddr, mfn, flags);
+
+    return GNTST_okay;
+}
+
+// grant table host unmapping
+int
+destroy_grant_host_mapping(unsigned long gpaddr,
+                          unsigned long mfn, unsigned int flags)
+{
+    struct domain* d = current->domain;
+    pte_t* pte;
+    pte_t old_pte;
+    unsigned long old_mfn = INVALID_MFN;
+    struct page_info* old_page;
+
+    if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
+        DPRINTK("%s: flags 0x%x\n", __func__, flags);
+        return GNTST_general_error;
+    }
+    if (flags & GNTMAP_readonly) {
+#if 0
+        DPRINTK("%s: GNTMAP_readonly is not implemented yet. flags %x\n",
+                __func__, flags);
+#endif
+        flags &= ~GNTMAP_readonly;
+    }
+
+    pte = lookup_noalloc_domain_pte(d, gpaddr);
+    if (pte == NULL || !pte_present(*pte) || pte_pfn(*pte) != mfn)
+        return GNTST_general_error;//XXX GNTST_bad_pseudo_phys_addr
+
+    // update pte
+    old_pte = ptep_get_and_clear(&d->arch.mm, gpaddr, pte);
+    if (pte_present(old_pte)) {
+        old_mfn = pte_pfn(old_pte);//XXX
+    }
+    domain_page_flush(d, gpaddr, old_mfn, INVALID_MFN);
+
+    old_page = mfn_to_page(old_mfn);
+    BUG_ON(page_get_owner(old_page) == d);//try_to_clear_PGC_allocate(d, page) 
is not needed.
+    put_page(old_page);
+
+    return GNTST_okay;
+}
+
+//XXX needs refcount patch
+//XXX heavily depends on the struct page layout.
+//XXX SMP
+int
+steal_page_for_grant_transfer(struct domain *d, struct page_info *page)
+{
+#if 0 /* if big endian */
+# error "implement big endian version of steal_page_for_grant_transfer()"
+#endif
+    u32 _d, _nd;
+    u64 x, nx, y;
+    unsigned long mpaddr = get_gpfn_from_mfn(page_to_mfn(page)) << PAGE_SHIFT;
+    struct page_info *new;
+
+    zap_domain_page_one(d, mpaddr, 0);
+    put_page(page);
+
+    spin_lock(&d->page_alloc_lock);
+
+    /*
+     * The tricky bit: atomically release ownership while there is just one
+     * benign reference to the page (PGC_allocated). If that reference
+     * disappears then the deallocation routine will safely spin.
+     */
+    _d  = pickle_domptr(d);
+    y = *((u64*)&page->count_info);
+    do {
+        x = y;
+        nx = x & 0xffffffff;
+        // page->count_info: untouched
+        // page->u.inused._domain = 0;
+        _nd = x >> 32;
+
+        if (unlikely((x & (PGC_count_mask | PGC_allocated)) !=
+                     (1 | PGC_allocated)) ||
+            unlikely(_nd != _d)) {
+            struct domain* nd = unpickle_domptr(_nd);
+            if (nd == NULL) {
+                DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
+                        "sd=%p 0x%x,"
+                        " caf=%016lx, taf=%" PRtype_info "\n",
+                        (void *) page_to_mfn(page),
+                        d, d->domain_id, _d,
+                        nd, _nd,
+                        x,
+                        page->u.inuse.type_info);
+            } else {
+                DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
+                        "sd=%p(%u) 0x%x,"
+                        " caf=%016lx, taf=%" PRtype_info "\n",
+                        (void *) page_to_mfn(page),
+                        d, d->domain_id, _d,
+                        nd, nd->domain_id, _nd,
+                        x,
+                        page->u.inuse.type_info);
+            }
+            spin_unlock(&d->page_alloc_lock);
+            return -1;
+        }
+
+        y = cmpxchg((u64*)&page->count_info, x, nx);
+    } while (unlikely(y != x));
+
+    /*
+     * Unlink from 'd'. At least one reference remains (now anonymous), so
+     * noone else is spinning to try to delete this page from 'd'.
+     */
+    d->tot_pages--;
+    list_del(&page->list);
+
+    spin_unlock(&d->page_alloc_lock);
+
+#if 1
+    //XXX Until net_rx_action() fix
+    // assign new page for this mpaddr
+    new = assign_new_domain_page(d, mpaddr);
+    BUG_ON(new == NULL);//XXX
+#endif
+
+    return 0;
+}
+
+void
+guest_physmap_add_page(struct domain *d, unsigned long gpfn,
+                       unsigned long mfn)
+{
+    int ret;
+
+    ret = get_page(mfn_to_page(mfn), d);
+    BUG_ON(ret == 0);
+    assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, 0/* XXX */);
+    set_gpfn_from_mfn(mfn, gpfn);//XXX SMP
+
+    //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> 
PAGE_SHIFT));
+}
+
+void
+guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
+                          unsigned long mfn)
+{
+    BUG_ON(mfn == 0);//XXX
+    zap_domain_page_one(d, gpfn << PAGE_SHIFT, 1);
+}
 #endif
 
 /* Flush cache of domain d.  */
 void domain_cache_flush (struct domain *d, int sync_only)
 {
-       struct mm_struct *mm = d->arch.mm;
+       struct mm_struct *mm = &d->arch.mm;
        pgd_t *pgd = mm->pgd;
        unsigned long maddr;
        int i,j,k, l;
@@ -1478,9 +1833,9 @@ void domain_pend_keyboard_interrupt(int 
 
 void sync_vcpu_execstate(struct vcpu *v)
 {
-       __ia64_save_fpu(v->arch._thread.fph);
-       if (VMX_DOMAIN(v))
-               vmx_save_state(v);
+//     __ia64_save_fpu(v->arch._thread.fph);
+//     if (VMX_DOMAIN(v))
+//             vmx_save_state(v);
        // FIXME SMP: Anything else needed here for SMP?
 }
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/hypercall.c
--- a/xen/arch/ia64/xen/hypercall.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/hypercall.c     Tue May 30 14:30:34 2006 -0500
@@ -26,7 +26,6 @@
 #include <public/physdev.h>
 #include <xen/domain.h>
 
-extern unsigned long translate_domain_mpaddr(unsigned long);
 static long do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop);
 static long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg);
 /* FIXME: where these declarations should be there ? */
@@ -71,13 +70,39 @@ hypercall_t ia64_hypercall_table[] =
        (hypercall_t)do_ni_hypercall,           /*  */                          
/* 30 */
        (hypercall_t)do_ni_hypercall,           /*  */
        (hypercall_t)do_event_channel_op,
-       (hypercall_t)do_physdev_op
+       (hypercall_t)do_physdev_op,
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */                  /* 35 */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */                  /* 40 */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */                  /* 45 */
+       (hypercall_t)do_ni_hypercall,           /*  */
+       (hypercall_t)do_ni_hypercall,           /*  */
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+       (hypercall_t)do_dom0vp_op,                      /* dom0vp_op */
+#else
+       (hypercall_t)do_ni_hypercall,           /* arch_0 */
+#endif
+       (hypercall_t)do_ni_hypercall,           /* arch_1 */
+       (hypercall_t)do_ni_hypercall,           /* arch_2 */            /* 50 */
+       (hypercall_t)do_ni_hypercall,           /* arch_3 */
+       (hypercall_t)do_ni_hypercall,           /* arch_4 */
+       (hypercall_t)do_ni_hypercall,           /* arch_5 */
+       (hypercall_t)do_ni_hypercall,           /* arch_6 */
+       (hypercall_t)do_ni_hypercall            /* arch_7 */            /* 55 */
        };
 
 uint32_t nr_hypercalls =
        sizeof(ia64_hypercall_table) / sizeof(hypercall_t);
 
-static int
+static IA64FAULT
 xen_hypercall (struct pt_regs *regs)
 {
        uint32_t cmd = (uint32_t)regs->r2;
@@ -91,15 +116,9 @@ xen_hypercall (struct pt_regs *regs)
                        regs->r18,
                        regs->r19);
        else
-#ifdef CONFIG_XEN_IA64_DOM0_VP
-       if (cmd ==  __HYPERVISOR_ia64_dom0vp_op) 
-               regs->r8 = do_dom0vp_op(regs->r14, regs->r15, regs->r16,
-                                       regs->r17, regs->r18);
-       else
-#endif
                regs->r8 = -ENOSYS;
 
-       return 1;
+       return IA64_NO_FAULT;
 }
 
 
@@ -134,9 +153,6 @@ fw_hypercall_ipi (struct pt_regs *regs)
                c.regs.cr_iip = targ_regs->cr_iip;
                c.regs.r1 = targ_regs->r1;
                
-               /* Copy from vcpu 0.  */
-               c.vcpu.evtchn_vector =
-                       current->domain->vcpu[0]->vcpu_info->arch.evtchn_vector;
                if (arch_set_info_guest (targ, &c) != 0) {
                        printf ("arch_boot_vcpu: failure\n");
                        return;
@@ -162,14 +178,16 @@ fw_hypercall_ipi (struct pt_regs *regs)
        return;
 }
 
-static int
+static IA64FAULT
 fw_hypercall (struct pt_regs *regs)
 {
        struct vcpu *v = current;
        struct sal_ret_values x;
-       unsigned long *tv, *tc;
-
-       switch (regs->r2) {
+       efi_status_t efi_ret_value;
+       IA64FAULT fault; 
+       unsigned long index = regs->r2 & FW_HYPERCALL_NUM_MASK_HIGH;
+
+       switch (index) {
            case FW_HYPERCALL_PAL_CALL:
                //printf("*** PAL hypercall: index=%d\n",regs->r28);
                //FIXME: This should call a C routine
@@ -227,40 +245,10 @@ fw_hypercall (struct pt_regs *regs)
                regs->r8 = x.r8; regs->r9 = x.r9;
                regs->r10 = x.r10; regs->r11 = x.r11;
                break;
-           case FW_HYPERCALL_EFI_RESET_SYSTEM:
-               printf("efi.reset_system called ");
-               if (current->domain == dom0) {
-                       printf("(by dom0)\n ");
-                       (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
-               }
-               else
-                       domain_shutdown (current->domain, SHUTDOWN_reboot);
-               regs->r8 = EFI_UNSUPPORTED;
-               break;
-           case FW_HYPERCALL_EFI_GET_TIME:
-               tv = (unsigned long *) vcpu_get_gr(v,32);
-               tc = (unsigned long *) vcpu_get_gr(v,33);
-               //printf("efi_get_time(%p,%p) called...",tv,tc);
-               tv = (unsigned long *) __va(translate_domain_mpaddr((unsigned 
long) tv));
-               if (tc) tc = (unsigned long *) 
__va(translate_domain_mpaddr((unsigned long) tc));
-               regs->r8 = (*efi.get_time)((efi_time_t *) tv, (efi_time_cap_t 
*) tc);
-               //printf("and returns %lx\n",regs->r8);
-               break;
-           case FW_HYPERCALL_EFI_SET_TIME:
-           case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
-           case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
-               // FIXME: need fixes in efi.h from 2.6.9
-           case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
-               // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
-               // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS 
-               // POINTER ARGUMENTS WILL BE VIRTUAL!!
-           case FW_HYPERCALL_EFI_GET_VARIABLE:
-               // FIXME: need fixes in efi.h from 2.6.9
-           case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
-           case FW_HYPERCALL_EFI_SET_VARIABLE:
-           case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
-               // FIXME: need fixes in efi.h from 2.6.9
-               regs->r8 = EFI_UNSUPPORTED;
+           case FW_HYPERCALL_EFI_CALL:
+               efi_ret_value = efi_emulator (regs, &fault);
+               if (fault != IA64_NO_FAULT) return fault;
+               regs->r8 = efi_ret_value;
                break;
            case FW_HYPERCALL_IPI:
                fw_hypercall_ipi (regs);
@@ -269,7 +257,7 @@ fw_hypercall (struct pt_regs *regs)
                printf("unknown ia64 fw hypercall %lx\n", regs->r2);
                regs->r8 = do_ni_hypercall();
        }
-       return 1;
+       return IA64_NO_FAULT;
 }
 
 /* opt_unsafe_hypercall: If true, unsafe debugging hypercalls are allowed.
@@ -277,7 +265,7 @@ static int opt_unsafe_hypercall = 0;
 static int opt_unsafe_hypercall = 0;
 boolean_param("unsafe_hypercall", opt_unsafe_hypercall);
 
-int
+IA64FAULT
 ia64_hypercall (struct pt_regs *regs)
 {
        struct vcpu *v = current;
@@ -307,7 +295,7 @@ ia64_hypercall (struct pt_regs *regs)
                        printf("unknown user xen/ia64 hypercall %lx\n", index);
                        regs->r8 = do_ni_hypercall();
            }
-           return 1;
+           return IA64_NO_FAULT;
        }
 
        /* Hypercalls are only allowed by kernel.
@@ -316,7 +304,7 @@ ia64_hypercall (struct pt_regs *regs)
            /* FIXME: Return a better error value ?
               Reflection ? Illegal operation ?  */
            regs->r8 = -1;
-           return 1;
+           return IA64_NO_FAULT;
        }
 
        if (index >= FW_HYPERCALL_FIRST_ARCH)
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/hyperprivop.S   Tue May 30 14:30:34 2006 -0500
@@ -30,7 +30,7 @@
 #undef FAST_ITC        //XXX CONFIG_XEN_IA64_DOM0_VP
                //    TODO fast_itc doesn't suport dom0 vp yet.
 #else
-//#define FAST_ITC     // working but default off for now
+//#define FAST_ITC     // to be reviewed
 #endif
 #define FAST_BREAK
 #ifndef CONFIG_XEN_IA64_DOM0_VP
@@ -46,27 +46,8 @@
 #undef RFI_TO_INTERRUPT // not working yet
 #endif
 
-#define    XEN_HYPER_RFI           0x1
-#define    XEN_HYPER_RSM_DT        0x2
-#define    XEN_HYPER_SSM_DT        0x3
-#define    XEN_HYPER_COVER         0x4
-#define    XEN_HYPER_ITC_D         0x5
-#define    XEN_HYPER_ITC_I         0x6
-#define    XEN_HYPER_SSM_I         0x7
-#define    XEN_HYPER_GET_IVR       0x8
-#define    XEN_HYPER_GET_TPR       0x9
-#define    XEN_HYPER_SET_TPR       0xa
-#define    XEN_HYPER_EOI           0xb
-#define    XEN_HYPER_SET_ITM       0xc
-#define    XEN_HYPER_THASH         0xd
-#define    XEN_HYPER_PTC_GA        0xe
-#define    XEN_HYPER_ITR_D         0xf
-#define    XEN_HYPER_GET_RR        0x10
-#define    XEN_HYPER_SET_RR        0x11
-#define    XEN_HYPER_SET_KR        0x12
-
 #ifdef CONFIG_SMP
-#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
+//#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
 #undef FAST_PTC_GA
 #endif
 
@@ -106,7 +87,7 @@ GLOBAL_ENTRY(fast_hyperprivop)
 #endif
        // HYPERPRIVOP_SSM_I?
        // assumes domain interrupts pending, so just do it
-       cmp.eq p7,p6=XEN_HYPER_SSM_I,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SSM_I,r17
 (p7)   br.sptk.many hyper_ssm_i;;
 
        // FIXME. This algorithm gives up (goes to the slow path) if there
@@ -127,75 +108,75 @@ 1:        // when we get to here r20=~=interrup
 1:     // when we get to here r20=~=interrupts pending
 
        // HYPERPRIVOP_RFI?
-       cmp.eq p7,p6=XEN_HYPER_RFI,r17
+       cmp.eq p7,p6=HYPERPRIVOP_RFI,r17
 (p7)   br.sptk.many hyper_rfi;;
 
        // HYPERPRIVOP_GET_IVR?
-       cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_GET_IVR,r17
 (p7)   br.sptk.many hyper_get_ivr;;
 
        cmp.ne p7,p0=r20,r0
 (p7)   br.spnt.many dispatch_break_fault ;;
 
        // HYPERPRIVOP_COVER?
-       cmp.eq p7,p6=XEN_HYPER_COVER,r17
+       cmp.eq p7,p6=HYPERPRIVOP_COVER,r17
 (p7)   br.sptk.many hyper_cover;;
 
        // HYPERPRIVOP_SSM_DT?
-       cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SSM_DT,r17
 (p7)   br.sptk.many hyper_ssm_dt;;
 
        // HYPERPRIVOP_RSM_DT?
-       cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17
+       cmp.eq p7,p6=HYPERPRIVOP_RSM_DT,r17
 (p7)   br.sptk.many hyper_rsm_dt;;
 
        // HYPERPRIVOP_GET_TPR?
-       cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_GET_TPR,r17
 (p7)   br.sptk.many hyper_get_tpr;;
 
        // HYPERPRIVOP_SET_TPR?
-       cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SET_TPR,r17
 (p7)   br.sptk.many hyper_set_tpr;;
 
        // HYPERPRIVOP_EOI?
-       cmp.eq p7,p6=XEN_HYPER_EOI,r17
+       cmp.eq p7,p6=HYPERPRIVOP_EOI,r17
 (p7)   br.sptk.many hyper_eoi;;
 
        // HYPERPRIVOP_SET_ITM?
-       cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SET_ITM,r17
 (p7)   br.sptk.many hyper_set_itm;;
 
        // HYPERPRIVOP_SET_RR?
-       cmp.eq p7,p6=XEN_HYPER_SET_RR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SET_RR,r17
 (p7)   br.sptk.many hyper_set_rr;;
 
        // HYPERPRIVOP_GET_RR?
-       cmp.eq p7,p6=XEN_HYPER_GET_RR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_GET_RR,r17
 (p7)   br.sptk.many hyper_get_rr;;
 
        // HYPERPRIVOP_PTC_GA?
-       cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17
+       cmp.eq p7,p6=HYPERPRIVOP_PTC_GA,r17
 (p7)   br.sptk.many hyper_ptc_ga;;
 
        // HYPERPRIVOP_ITC_D?
-       cmp.eq p7,p6=XEN_HYPER_ITC_D,r17
+       cmp.eq p7,p6=HYPERPRIVOP_ITC_D,r17
 (p7)   br.sptk.many hyper_itc_d;;
 
        // HYPERPRIVOP_ITC_I?
-       cmp.eq p7,p6=XEN_HYPER_ITC_I,r17
+       cmp.eq p7,p6=HYPERPRIVOP_ITC_I,r17
 (p7)   br.sptk.many hyper_itc_i;;
 
        // HYPERPRIVOP_THASH?
-       cmp.eq p7,p6=XEN_HYPER_THASH,r17
+       cmp.eq p7,p6=HYPERPRIVOP_THASH,r17
 (p7)   br.sptk.many hyper_thash;;
 
        // HYPERPRIVOP_SET_KR?
-       cmp.eq p7,p6=XEN_HYPER_SET_KR,r17
+       cmp.eq p7,p6=HYPERPRIVOP_SET_KR,r17
 (p7)   br.sptk.many hyper_set_kr;;
 
        // if not one of the above, give up for now and do it the slow way
        br.sptk.many dispatch_break_fault ;;
-
+END(fast_hyperprivop)
 
 // give up for now if: ipsr.be==1, ipsr.pp==1
 // from reflect_interruption, don't need to:
@@ -250,7 +231,7 @@ ENTRY(hyper_ssm_i)
        cmp.ne p7,p0=r21,r0
 (p7)   br.sptk.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SSM_I);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -348,6 +329,7 @@ ENTRY(hyper_ssm_i)
        mov pr=r31,-1 ;;
        rfi
        ;;
+END(hyper_ssm_i)
 
 // reflect domain clock interrupt
 //     r31 == pr
@@ -594,7 +576,7 @@ 1:
        adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
        st8 [r21]=r17;;
        // fall through
-
+END(fast_break_reflect)
 
 // reflect to domain ivt+r20
 // sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
@@ -723,6 +705,7 @@ ENTRY(fast_reflect)
        mov pr=r31,-1 ;;
        rfi
        ;;
+END(fast_reflect)
 
 // reflect access faults (0x2400,0x2800,0x5300) directly to domain
 //     r16 == isr
@@ -762,6 +745,7 @@ GLOBAL_ENTRY(fast_access_reflect)
        and r22=~3,r22;;
        st8 [r23]=r22;;
        br.cond.sptk.many fast_reflect;;
+END(fast_access_reflect)
 
 // when we get to here, VHPT_CCHAIN_LOOKUP has failed and everything
 // is as it was at the time of original miss.  We want to preserve that
@@ -769,7 +753,7 @@ GLOBAL_ENTRY(fast_tlb_miss_reflect)
 GLOBAL_ENTRY(fast_tlb_miss_reflect)
 #ifndef FAST_TLB_MISS_REFLECT // see beginning of file
        br.spnt.few page_fault ;;
-#endif
+#else
        mov r31=pr
        mov r30=cr.ipsr
        mov r29=cr.iip
@@ -957,6 +941,7 @@ 1:  // check the guest VHPT
        extr.u r24=r24,2,6;;
        // IFA already in PSCB
        br.cond.sptk.many fast_insert;;
+END(fast_tlb_miss_reflect)
 
 // we get here if fast_insert fails (e.g. due to metaphysical lookup)
 ENTRY(recover_and_page_fault)
@@ -1007,6 +992,7 @@ 1: extr.u r25=r17,61,3;;
        mov r29=cr.iip
        mov r30=cr.ipsr
        br.sptk.many fast_reflect;;
+#endif
 END(fast_tlb_miss_reflect)
 
 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
@@ -1065,7 +1051,7 @@ 1:
 
 1:     // OK now, let's do an rfi.
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_RFI);;
        ld8 r23=[r20];;
        adds r23=1,r23;;
        st8 [r20]=r23;;
@@ -1145,9 +1131,10 @@ 1:       mov pr=r31,-1
        ;;
        rfi
        ;;
-
+END(hyper_rfi)
+       
 #ifdef RFI_TO_INTERRUPT
-GLOBAL_ENTRY(rfi_check_extint)
+ENTRY(rfi_check_extint)
        //br.sptk.many dispatch_break_fault ;;
 
        // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
@@ -1214,11 +1201,12 @@ GLOBAL_ENTRY(rfi_check_extint)
        adds r29=15,r29;;
        cmp.ge p6,p0=r29,r26    // if tpr masks interrupt, just rfi
 (p6)   br.cond.spnt.few just_do_rfi;;
+END(rfi_check_extint)
 
 // this doesn't work yet (dies early after getting to user mode)
 // but happens relatively infrequently, so fix it later.
 // NOTE that these will be counted incorrectly for now (for privcnt output)
-GLOBAL_ENTRY(rfi_with_interrupt)
+ENTRY(rfi_with_interrupt)
 #if 1
        br.sptk.many dispatch_break_fault ;;
 #endif
@@ -1313,11 +1301,12 @@ GLOBAL_ENTRY(rfi_with_interrupt)
        st4 [r20]=r0 ;;
        mov pr=r31,-1 ;;
        rfi
+END(rfi_with_interrupt)
 #endif // RFI_TO_INTERRUPT
 
 ENTRY(hyper_cover)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_COVER);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1348,11 +1337,12 @@ ENTRY(hyper_cover)
        mov pr=r31,-1 ;;
        rfi
        ;;
+END(hyper_cover)
 
 // return from metaphysical mode (meta=1) to virtual mode (meta=0)
 ENTRY(hyper_ssm_dt)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SSM_DT);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1384,11 +1374,12 @@ 1:      extr.u r26=r24,41,2 ;;
        mov pr=r31,-1 ;;
        rfi
        ;;
+END(hyper_ssm_dt)
 
 // go to metaphysical mode (meta=1) from virtual mode (meta=0)
 ENTRY(hyper_rsm_dt)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_RSM_DT);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1421,10 +1412,11 @@ 1:      extr.u r26=r24,41,2 ;;
        mov pr=r31,-1 ;;
        rfi
        ;;
+END(hyper_rsm_dt)
 
 ENTRY(hyper_get_tpr)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_TPR);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1453,7 +1445,7 @@ END(hyper_get_tpr)
 // (or accidentally missing) delivering an interrupt
 ENTRY(hyper_set_tpr)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_TPR);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1481,7 +1473,7 @@ END(hyper_set_tpr)
 
 ENTRY(hyper_get_ivr)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);;
+       movl r22=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_IVR);;
        ld8 r21=[r22];;
        adds r21=1,r21;;
        st8 [r22]=r21;;
@@ -1593,7 +1585,7 @@ ENTRY(hyper_eoi)
        cmp.ne p7,p0=r20,r0
 (p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_EOI);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1657,7 +1649,7 @@ ENTRY(hyper_set_itm)
        cmp.ne p7,p0=r20,r0
 (p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_ITM);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1698,7 +1690,7 @@ END(hyper_set_itm)
 
 ENTRY(hyper_get_rr)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_RR);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1730,7 +1722,7 @@ ENTRY(hyper_set_rr)
        cmp.leu p7,p0=7,r25     // punt on setting rr7
 (p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_RR);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1788,7 +1780,7 @@ ENTRY(hyper_set_kr)
        cmp.ne p7,p0=r0,r25     // if kr# > 7, go slow way
 (p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_KR);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_KR);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1844,9 +1836,9 @@ END(hyper_set_kr)
 // On entry:
 //     r18 == XSI_PSR_IC
 //     r31 == pr
-GLOBAL_ENTRY(hyper_thash)
+ENTRY(hyper_thash)
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_THASH);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1915,7 +1907,7 @@ ENTRY(hyper_ptc_ga)
 #endif
        // FIXME: validate not flushing Xen addresses
 #ifdef FAST_HYPERPRIVOP_CNT
-       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);;
+       movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_PTC_GA);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -1992,18 +1984,19 @@ ENTRY(recover_and_dispatch_break_fault)
 #endif
        mov b0=r29 ;;
        br.sptk.many dispatch_break_fault;;
+END(recover_and_dispatch_break_fault)
 
 //  Registers at entry
-//     r17 = break immediate (XEN_HYPER_ITC_D or I)
+//     r17 = break immediate (HYPERPRIVOP_ITC_D or I)
 //     r18 == XSI_PSR_IC_OFS
 //     r31 == pr
-GLOBAL_ENTRY(hyper_itc)
-ENTRY(hyper_itc_i)
+ENTRY(hyper_itc)
+hyper_itc_i:   
        // fall through, hyper_itc_d handles both i and d
-ENTRY(hyper_itc_d)
+hyper_itc_d:   
 #ifndef FAST_ITC
        br.sptk.many dispatch_break_fault ;;
-#endif
+#else
        // ensure itir.ps >= xen's pagesize
        adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
        ld8 r23=[r23];;
@@ -2027,9 +2020,9 @@ ENTRY(hyper_itc_d)
        cmp.ne p7,p0=r27,r28
 (p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
-       cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;;
-(p6)   movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_D);;
-(p7)   movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_I);;
+       cmp.eq p6,p7=HYPERPRIVOP_ITC_D,r17;;
+(p6)   movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_ITC_D);;
+(p7)   movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_ITC_I);;
        ld8 r21=[r20];;
        adds r21=1,r21;;
        st8 [r20]=r21;;
@@ -2040,7 +2033,10 @@ ENTRY(hyper_itc_d)
        movl r30=recover_and_dispatch_break_fault ;;
        mov r16=r8;;
        // fall through
-
+#endif
+END(hyper_itc)
+
+#if defined(FAST_ITC) || defined (FAST_TLB_MISS_REFLECT)
 
 // fast_insert(PSCB(ifa),r24=ps,r16=pte)
 //     r16 == pte
@@ -2050,7 +2046,7 @@ ENTRY(hyper_itc_d)
 //     r29 == saved value of b0 in case of recovery
 //     r30 == recovery ip if failure occurs
 //     r31 == pr
-GLOBAL_ENTRY(fast_insert)
+ENTRY(fast_insert)
        // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir)
        mov r19=1;;
        shl r20=r19,r24;;
@@ -2175,4 +2171,4 @@ no_inc_iip:
        rfi
        ;;
 END(fast_insert)
-
+#endif
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/ivt.S
--- a/xen/arch/ia64/xen/ivt.S   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/ivt.S   Tue May 30 14:30:34 2006 -0500
@@ -100,6 +100,15 @@
        mov r19=n;;                     /* prepare to save predicates */        
        \
        br.sptk.many dispatch_to_fault_handler
 
+#define FAULT_OR_REFLECT(n)                                                    
        \
+       mov r31=pr;                                                             
        \
+       mov r20=cr.ipsr;;                                                       
        \
+       mov r19=n;      /* prepare to save predicates */                        
        \
+       extr.u r20=r20,IA64_PSR_CPL0_BIT,2;;                                    
        \
+       cmp.ne p6,p0=r0,r20;    /* cpl != 0?*/                                  
        \
+(p6)   br.dptk.many dispatch_reflection;                                       
        \
+       br.sptk.few dispatch_to_fault_handler
+
 #ifdef XEN
 #define REFLECT(n)                                                             
        \
        mov r31=pr;                                                             
        \
@@ -697,7 +706,7 @@ ENTRY(ikey_miss)
 ENTRY(ikey_miss)
        DBG_FAULT(6)
 #ifdef XEN
-       REFLECT(6)
+       FAULT_OR_REFLECT(6)
 #endif
        FAULT(6)
 END(ikey_miss)
@@ -746,7 +755,7 @@ ENTRY(dkey_miss)
 ENTRY(dkey_miss)
        DBG_FAULT(7)
 #ifdef XEN
-       REFLECT(7)
+       FAULT_OR_REFLECT(7)
 #endif
        FAULT(7)
 END(dkey_miss)
@@ -757,7 +766,7 @@ ENTRY(dirty_bit)
 ENTRY(dirty_bit)
        DBG_FAULT(8)
 #ifdef XEN
-       REFLECT(8)
+       FAULT_OR_REFLECT(8)
 #endif
        /*
         * What we do here is to simply turn on the dirty bit in the PTE.  We 
need to
@@ -1523,7 +1532,7 @@ ENTRY(page_not_present)
 ENTRY(page_not_present)
        DBG_FAULT(20)
 #ifdef XEN
-       REFLECT(20)
+       FAULT_OR_REFLECT(20)
 #endif
        mov r16=cr.ifa
        rsm psr.dt
@@ -1546,7 +1555,7 @@ ENTRY(key_permission)
 ENTRY(key_permission)
        DBG_FAULT(21)
 #ifdef XEN
-       REFLECT(21)
+       FAULT_OR_REFLECT(21)
 #endif
        mov r16=cr.ifa
        rsm psr.dt
@@ -1562,7 +1571,7 @@ ENTRY(iaccess_rights)
 ENTRY(iaccess_rights)
        DBG_FAULT(22)
 #ifdef XEN
-       REFLECT(22)
+       FAULT_OR_REFLECT(22)
 #endif
        mov r16=cr.ifa
        rsm psr.dt
@@ -1637,7 +1646,7 @@ ENTRY(disabled_fp_reg)
        mov pr=r20,-1
        ;;
 #endif
-       REFLECT(25)
+       FAULT_OR_REFLECT(25)
 //floating_panic:
 //     br.sptk.many floating_panic
        ;;
@@ -1656,7 +1665,7 @@ ENTRY(nat_consumption)
 ENTRY(nat_consumption)
        DBG_FAULT(26)
 #ifdef XEN
-       REFLECT(26)
+       FAULT_OR_REFLECT(26)
 #endif
        FAULT(26)
 END(nat_consumption)
@@ -1668,7 +1677,7 @@ ENTRY(speculation_vector)
        DBG_FAULT(27)
 #ifdef XEN
        // this probably need not reflect...
-       REFLECT(27)
+       FAULT_OR_REFLECT(27)
 #endif
        /*
         * A [f]chk.[as] instruction needs to take the branch to the recovery 
code but
@@ -1714,7 +1723,7 @@ ENTRY(debug_vector)
 ENTRY(debug_vector)
        DBG_FAULT(29)
 #ifdef XEN
-       REFLECT(29)
+       FAULT_OR_REFLECT(29)
 #endif
        FAULT(29)
 END(debug_vector)
@@ -1725,7 +1734,7 @@ ENTRY(unaligned_access)
 ENTRY(unaligned_access)
        DBG_FAULT(30)
 #ifdef XEN
-       REFLECT(30)
+       FAULT_OR_REFLECT(30)
 #endif
        mov r16=cr.ipsr
        mov r31=pr              // prepare to save predicates
@@ -1739,7 +1748,7 @@ ENTRY(unsupported_data_reference)
 ENTRY(unsupported_data_reference)
        DBG_FAULT(31)
 #ifdef XEN
-       REFLECT(31)
+       FAULT_OR_REFLECT(31)
 #endif
        FAULT(31)
 END(unsupported_data_reference)
@@ -1750,7 +1759,7 @@ ENTRY(floating_point_fault)
 ENTRY(floating_point_fault)
        DBG_FAULT(32)
 #ifdef XEN
-       REFLECT(32)
+       FAULT_OR_REFLECT(32)
 #endif
        FAULT(32)
 END(floating_point_fault)
@@ -1761,7 +1770,7 @@ ENTRY(floating_point_trap)
 ENTRY(floating_point_trap)
        DBG_FAULT(33)
 #ifdef XEN
-       REFLECT(33)
+       FAULT_OR_REFLECT(33)
 #endif
        FAULT(33)
 END(floating_point_trap)
@@ -1772,7 +1781,7 @@ ENTRY(lower_privilege_trap)
 ENTRY(lower_privilege_trap)
        DBG_FAULT(34)
 #ifdef XEN
-       REFLECT(34)
+       FAULT_OR_REFLECT(34)
 #endif
        FAULT(34)
 END(lower_privilege_trap)
@@ -1783,7 +1792,7 @@ ENTRY(taken_branch_trap)
 ENTRY(taken_branch_trap)
        DBG_FAULT(35)
 #ifdef XEN
-       REFLECT(35)
+       FAULT_OR_REFLECT(35)
 #endif
        FAULT(35)
 END(taken_branch_trap)
@@ -1794,7 +1803,7 @@ ENTRY(single_step_trap)
 ENTRY(single_step_trap)
        DBG_FAULT(36)
 #ifdef XEN
-       REFLECT(36)
+       FAULT_OR_REFLECT(36)
 #endif
        FAULT(36)
 END(single_step_trap)
@@ -1853,7 +1862,7 @@ ENTRY(ia32_exception)
 ENTRY(ia32_exception)
        DBG_FAULT(45)
 #ifdef XEN
-       REFLECT(45)
+       FAULT_OR_REFLECT(45)
 #endif
        FAULT(45)
 END(ia32_exception)
@@ -1864,7 +1873,7 @@ ENTRY(ia32_intercept)
 ENTRY(ia32_intercept)
        DBG_FAULT(46)
 #ifdef XEN
-       REFLECT(46)
+       FAULT_OR_REFLECT(46)
 #endif
 #ifdef CONFIG_IA32_SUPPORT
        mov r31=pr
@@ -1897,7 +1906,7 @@ ENTRY(ia32_interrupt)
 ENTRY(ia32_interrupt)
        DBG_FAULT(47)
 #ifdef XEN
-       REFLECT(47)
+       FAULT_OR_REFLECT(47)
 #endif
 #ifdef CONFIG_IA32_SUPPORT
        mov r31=pr
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/privop.c
--- a/xen/arch/ia64/xen/privop.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/privop.c        Tue May 30 14:30:34 2006 -0500
@@ -793,33 +793,6 @@ priv_emulate(VCPU *vcpu, REGS *regs, UIN
                printf("priv_emulate: priv_handle_op fails, isr=0x%lx\n",isr);
        return fault;
 }
-
-
-// FIXME: Move these to include/public/arch-ia64?
-#define HYPERPRIVOP_RFI                        0x1
-#define HYPERPRIVOP_RSM_DT             0x2
-#define HYPERPRIVOP_SSM_DT             0x3
-#define HYPERPRIVOP_COVER              0x4
-#define HYPERPRIVOP_ITC_D              0x5
-#define HYPERPRIVOP_ITC_I              0x6
-#define HYPERPRIVOP_SSM_I              0x7
-#define HYPERPRIVOP_GET_IVR            0x8
-#define HYPERPRIVOP_GET_TPR            0x9
-#define HYPERPRIVOP_SET_TPR            0xa
-#define HYPERPRIVOP_EOI                        0xb
-#define HYPERPRIVOP_SET_ITM            0xc
-#define HYPERPRIVOP_THASH              0xd
-#define HYPERPRIVOP_PTC_GA             0xe
-#define HYPERPRIVOP_ITR_D              0xf
-#define HYPERPRIVOP_GET_RR             0x10
-#define HYPERPRIVOP_SET_RR             0x11
-#define HYPERPRIVOP_SET_KR             0x12
-#define HYPERPRIVOP_FC                 0x13
-#define HYPERPRIVOP_GET_CPUID          0x14
-#define HYPERPRIVOP_GET_PMD            0x15
-#define HYPERPRIVOP_GET_EFLAG          0x16
-#define HYPERPRIVOP_SET_EFLAG          0x17
-#define HYPERPRIVOP_MAX                        0x17
 
 static const char * const hyperpriv_str[HYPERPRIVOP_MAX+1] = {
        0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i",
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/process.c
--- a/xen/arch/ia64/xen/process.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/process.c       Tue May 30 14:30:34 2006 -0500
@@ -15,7 +15,6 @@
 #include <asm/ptrace.h>
 #include <xen/delay.h>
 
-#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
 #include <asm/sal.h>   /* FOR struct ia64_sal_retval */
 
 #include <asm/system.h>
@@ -40,7 +39,7 @@ extern void panic_domain(struct pt_regs 
 extern void panic_domain(struct pt_regs *, const char *, ...);
 extern long platform_is_hp_ski(void);
 extern int ia64_hyperprivop(unsigned long, REGS *);
-extern int ia64_hypercall(struct pt_regs *regs);
+extern IA64FAULT ia64_hypercall(struct pt_regs *regs);
 extern void vmx_do_launch(struct vcpu *);
 extern unsigned long lookup_domain_mpa(struct domain *,unsigned long);
 
@@ -195,10 +194,10 @@ void check_bad_nested_interruption(unsig
        }
        vector &= ~0xf;
        if (vector != IA64_DATA_TLB_VECTOR &&
-               vector != IA64_ALT_DATA_TLB_VECTOR &&
-               vector != IA64_VHPT_TRANS_VECTOR) {
-panic_domain(regs,"psr.ic off, delivering 
fault=%lx,ipsr=%p,iip=%p,ifa=%p,isr=%p,PSCB.iip=%p\n",
-       vector,regs->cr_ipsr,regs->cr_iip,PSCB(v,ifa),isr,PSCB(v,iip));
+           vector != IA64_ALT_DATA_TLB_VECTOR &&
+           vector != IA64_VHPT_TRANS_VECTOR) {
+               panic_domain(regs,"psr.ic off, delivering 
fault=%lx,ipsr=%lx,iip=%lx,ifa=%lx,isr=%lx,PSCB.iip=%lx\n",
+                            
vector,regs->cr_ipsr,regs->cr_iip,PSCB(v,ifa),isr,PSCB(v,iip));
        }
 }
 
@@ -265,7 +264,8 @@ void deliver_pending_interrupt(struct pt
 }
 unsigned long lazy_cover_count = 0;
 
-int handle_lazy_cover(struct vcpu *v, unsigned long isr, struct pt_regs *regs)
+static int
+handle_lazy_cover(struct vcpu *v, struct pt_regs *regs)
 {
        if (!PSCB(v,interrupt_collection_enabled)) {
                PSCB(v,ifs) = regs->cr_ifs;
@@ -285,7 +285,7 @@ void ia64_do_page_fault (unsigned long a
        unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL);
        IA64FAULT fault;
 
-       if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, isr, regs)) 
return;
+       if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, regs)) return;
        if ((isr & IA64_ISR_SP)
            || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == 
IA64_ISR_CODE_LFETCH))
        {
@@ -299,7 +299,7 @@ void ia64_do_page_fault (unsigned long a
        }
 
  again:
-       fault = vcpu_translate(current,address,is_data,0,&pteval,&itir,&iha);
+       fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha);
        if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) {
                u64 logps;
                pteval = translate_domain_pte(pteval, address, itir, &logps);
@@ -307,11 +307,7 @@ void ia64_do_page_fault (unsigned long a
                if (fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) {
                        /* dtlb has been purged in-between.  This dtlb was
                           matching.  Undo the work.  */
-#ifdef VHPT_GLOBAL
-                       vhpt_flush_address (address, 1);
-#endif
-                       ia64_ptcl(address, 1<<2);
-                       ia64_srlz_i();
+                       vcpu_flush_tlb_vhpt_range (address, 1);
                        goto again;
                }
                return;
@@ -357,7 +353,7 @@ ia64_fault (unsigned long vector, unsign
        struct pt_regs *regs = (struct pt_regs *) &stack;
        unsigned long code;
        char buf[128];
-       static const char * const reason[] = {
+       static const char *reason[] = {
                "IA-64 Illegal Operation fault",
                "IA-64 Privileged Operation fault",
                "IA-64 Privileged Register fault",
@@ -367,10 +363,10 @@ ia64_fault (unsigned long vector, unsign
                "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", 
"Unknown fault 12",
                "Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
        };
-#if 0
-printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n",
- vector, ifa, regs->cr_iip, regs->cr_ipsr, isr);
-#endif
+
+       printf("ia64_fault, vector=0x%lx, ifa=0x%016lx, iip=0x%016lx, 
ipsr=0x%016lx, isr=0x%016lx\n",
+              vector, ifa, regs->cr_iip, regs->cr_ipsr, isr);
+
 
        if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == 
IA64_ISR_CODE_LFETCH)) {
                /*
@@ -383,15 +379,48 @@ printf("ia64_fault, vector=0x%p, ifa=%p,
        }
 
        switch (vector) {
-             case 24: /* General Exception */
+           case 0:
+               printk("VHPT Translation.\n");
+               break;
+         
+           case 4:
+               printk("Alt DTLB.\n");
+               break;
+         
+           case 6:
+               printk("Instruction Key Miss.\n");
+               break;
+
+           case 7: 
+               printk("Data Key Miss.\n");
+               break;
+
+           case 8: 
+               printk("Dirty-bit.\n");
+               break;
+
+           case 20:
+               printk("Page Not Found.\n");
+               break;
+
+           case 21:
+               printk("Key Permission.\n");
+               break;
+
+           case 22:
+               printk("Instruction Access Rights.\n");
+               break;
+
+           case 24: /* General Exception */
                code = (isr >> 4) & 0xf;
                sprintf(buf, "General Exception: %s%s", reason[code],
-                       (code == 3) ? ((isr & (1UL << 37))
-                                      ? " (RSE access)" : " (data access)") : 
"");
+                       (code == 3) ? ((isr & (1UL << 37)) ? " (RSE access)" :
+                                      " (data access)") : "");
                if (code == 8) {
 # ifdef CONFIG_IA64_PRINT_HAZARDS
                        printk("%s[%d]: possible hazard @ ip=%016lx (pr = 
%016lx)\n",
-                              current->comm, current->pid, regs->cr_iip + 
ia64_psr(regs)->ri,
+                              current->comm, current->pid,
+                              regs->cr_iip + ia64_psr(regs)->ri,
                               regs->pr);
 # endif
                        printf("ia64_fault: returning on hazard\n");
@@ -399,162 +428,65 @@ printf("ia64_fault, vector=0x%p, ifa=%p,
                }
                break;
 
-             case 25: /* Disabled FP-Register */
-               if (isr & 2) {
-                       //disabled_fph_fault(regs);
-                       //return;
-               }
-               sprintf(buf, "Disabled FPL fault---not supposed to happen!");
-               break;
-
-             case 26: /* NaT Consumption */
-               if (user_mode(regs)) {
-                       void *addr;
-
-                       if (((isr >> 4) & 0xf) == 2) {
-                               /* NaT page consumption */
-                               //sig = SIGSEGV;
-                               //code = SEGV_ACCERR;
-                               addr = (void *) ifa;
-                       } else {
-                               /* register NaT consumption */
-                               //sig = SIGILL;
-                               //code = ILL_ILLOPN;
-                               addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
-                       }
-                       //siginfo.si_signo = sig;
-                       //siginfo.si_code = code;
-                       //siginfo.si_errno = 0;
-                       //siginfo.si_addr = addr;
-                       //siginfo.si_imm = vector;
-                       //siginfo.si_flags = __ISR_VALID;
-                       //siginfo.si_isr = isr;
-                       //force_sig_info(sig, &siginfo, current);
-                       //return;
-               } //else if (ia64_done_with_exception(regs))
-                       //return;
-               sprintf(buf, "NaT consumption");
-               break;
-
-             case 31: /* Unsupported Data Reference */
-               if (user_mode(regs)) {
-                       //siginfo.si_signo = SIGILL;
-                       //siginfo.si_code = ILL_ILLOPN;
-                       //siginfo.si_errno = 0;
-                       //siginfo.si_addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
-                       //siginfo.si_imm = vector;
-                       //siginfo.si_flags = __ISR_VALID;
-                       //siginfo.si_isr = isr;
-                       //force_sig_info(SIGILL, &siginfo, current);
-                       //return;
-               }
-               sprintf(buf, "Unsupported data reference");
-               break;
-
-             case 29: /* Debug */
-             case 35: /* Taken Branch Trap */
-             case 36: /* Single Step Trap */
-               //if (fsys_mode(current, regs)) {}
-               switch (vector) {
-                     case 29:
-                       //siginfo.si_code = TRAP_HWBKPT;
-#ifdef CONFIG_ITANIUM
-                       /*
-                        * Erratum 10 (IFA may contain incorrect address) now 
has
-                        * "NoFix" status.  There are no plans for fixing this.
-                        */
-                       if (ia64_psr(regs)->is == 0)
-                         ifa = regs->cr_iip;
-#endif
-                       break;
-                     case 35: ifa = 0; break;
-                     case 36: ifa = 0; break;
-                     //case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
-                     //case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
-               }
-               //siginfo.si_signo = SIGTRAP;
-               //siginfo.si_errno = 0;
-               //siginfo.si_addr  = (void *) ifa;
-               //siginfo.si_imm   = 0;
-               //siginfo.si_flags = __ISR_VALID;
-               //siginfo.si_isr   = isr;
-               //force_sig_info(SIGTRAP, &siginfo, current);
-               //return;
-
-             case 32: /* fp fault */
-             case 33: /* fp trap */
-               //result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
-               //if ((result < 0) || (current->thread.flags & 
IA64_THREAD_FPEMU_SIGFPE)) {
-                       //siginfo.si_signo = SIGFPE;
-                       //siginfo.si_errno = 0;
-                       //siginfo.si_code = FPE_FLTINV;
-                       //siginfo.si_addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
-                       //siginfo.si_flags = __ISR_VALID;
-                       //siginfo.si_isr = isr;
-                       //siginfo.si_imm = 0;
-                       //force_sig_info(SIGFPE, &siginfo, current);
-               //}
-               //return;
-               sprintf(buf, "FP fault/trap");
-               break;
-
-             case 34:
-               if (isr & 0x2) {
-                       /* Lower-Privilege Transfer Trap */
-                       /*
-                        * Just clear PSR.lp and then return immediately: all 
the
-                        * interesting work (e.g., signal delivery is done in 
the kernel
-                        * exit path).
-                        */
-                       //ia64_psr(regs)->lp = 0;
-                       //return;
-                       sprintf(buf, "Lower-Privilege Transfer trap");
-               } else {
-                       /* Unimplemented Instr. Address Trap */
-                       if (user_mode(regs)) {
-                               //siginfo.si_signo = SIGILL;
-                               //siginfo.si_code = ILL_BADIADDR;
-                               //siginfo.si_errno = 0;
-                               //siginfo.si_flags = 0;
-                               //siginfo.si_isr = 0;
-                               //siginfo.si_imm = 0;
-                               //siginfo.si_addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
-                               //force_sig_info(SIGILL, &siginfo, current);
-                               //return;
-                       }
-                       sprintf(buf, "Unimplemented Instruction Address fault");
-               }
-               break;
-
-             case 45:
-               printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
-               printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
-                      regs->cr_iip, ifa, isr);
-               //force_sig(SIGSEGV, current);
-               break;
-
-             case 46:
-               printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
-               printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 
0x%lx\n",
-                      regs->cr_iip, ifa, isr, iim);
-               //force_sig(SIGSEGV, current);
-               return;
-
-             case 47:
-               sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
-               break;
-
-             default:
-               sprintf(buf, "Fault %lu", vector);
-               break;
-       }
-       //die_if_kernel(buf, regs, error);
-printk("ia64_fault: %s: reflecting\n",buf);
-PSCB(current,itir) = vcpu_get_itir_on_fault(current,ifa);
-PSCB(current,ifa) = ifa;
-reflect_interruption(isr,regs,IA64_GENEX_VECTOR);
-//while(1);
-       //force_sig(SIGILL, current);
+           case 25:
+               printk("Disabled FP-Register.\n");
+               break;
+
+           case 26:
+               printk("NaT consumption.\n");
+               break;
+
+           case 29:
+               printk("Debug.\n");
+               break;
+
+           case 30:
+               printk("Unaligned Reference.\n");
+               break;
+
+           case 31:
+               printk("Unsupported data reference.\n");
+               break;
+
+           case 32:
+               printk("Floating-Point Fault.\n");
+               break;
+
+           case 33:
+               printk("Floating-Point Trap.\n");
+               break;
+
+           case 34:
+               printk("Lower Privilege Transfer Trap.\n");
+               break;
+
+           case 35:
+               printk("Taken Branch Trap.\n");
+               break;
+
+           case 36:
+               printk("Single Step Trap.\n");
+               break;
+    
+           case 45:
+               printk("IA-32 Exception.\n");
+               break;
+
+           case 46:
+               printk("IA-32 Intercept.\n");
+               break;
+
+           case 47:
+               printk("IA-32 Interrupt.\n");
+               break;
+
+           default:
+               printk("Fault %lu\n", vector);
+               break;
+       }
+
+       show_registers(regs);
+       panic("Fault in Xen.\n");
 }
 
 unsigned long running_on_sim = 0;
@@ -679,6 +611,7 @@ ia64_handle_break (unsigned long ifa, st
 {
        struct domain *d = current->domain;
        struct vcpu *v = current;
+       IA64FAULT vector;
 
        if (first_break) {
                if (platform_is_hp_ski()) running_on_sim = 1;
@@ -699,9 +632,11 @@ ia64_handle_break (unsigned long ifa, st
                /* by default, do not continue */
                v->arch.hypercall_continuation = 0;
 
-               if (ia64_hypercall(regs) &&
-                   !PSCBX(v, hypercall_continuation))
-                       vcpu_increment_iip(current);
+               if ((vector = ia64_hypercall(regs)) == IA64_NO_FAULT) {
+                       if (!PSCBX(v, hypercall_continuation))
+                               vcpu_increment_iip(current);
+               }
+               else reflect_interruption(isr, regs, vector);
        }
        else if (!PSCB(v,interrupt_collection_enabled)) {
                if (ia64_hyperprivop(iim,regs))
@@ -813,7 +748,7 @@ printf("*** Handled privop masquerading 
                while(vector);
                return;
        }
-       if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, 
isr, regs)) return;
+       if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, 
regs)) return;
        PSCB(current,ifa) = ifa;
        PSCB(current,itir) = vcpu_get_itir_on_fault(v,ifa);
        reflect_interruption(isr,regs,vector);
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/regionreg.c
--- a/xen/arch/ia64/xen/regionreg.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/regionreg.c     Tue May 30 14:30:34 2006 -0500
@@ -17,9 +17,7 @@
 #include <asm/vcpu.h>
 
 /* Defined in xemasm.S  */
-extern void ia64_new_rr7(unsigned long rid,void *shared_info, void 
*shared_arch_info, unsigned long p_vhpt, unsigned long v_pal);
-
-extern void *pal_vaddr;
+extern void ia64_new_rr7(unsigned long rid, void *shared_info, void 
*shared_arch_info, unsigned long shared_info_va, unsigned long p_vhpt);
 
 /* RID virtualization mechanism is really simple:  domains have less rid bits
    than the host and the host rid space is shared among the domains.  (Values
@@ -260,9 +258,9 @@ int set_one_rr(unsigned long rr, unsigne
                if (!PSCB(v,metaphysical_mode))
                        set_rr(rr,newrrv.rrval);
        } else if (rreg == 7) {
-               ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info,
-                            v->arch.privregs, __get_cpu_var(vhpt_paddr),
-                            (unsigned long) pal_vaddr);
+               ia64_new_rr7(vmMangleRID(newrrv.rrval),v->domain->shared_info,
+                            v->arch.privregs, v->domain->arch.shared_info_va,
+                            __get_cpu_var(vhpt_paddr));
        } else {
                set_rr(rr,newrrv.rrval);
        }
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/vcpu.c  Tue May 30 14:30:34 2006 -0500
@@ -28,8 +28,6 @@ extern void setfpreg (unsigned long regn
 
 extern void panic_domain(struct pt_regs *, const char *, ...);
 extern unsigned long translate_domain_mpaddr(unsigned long);
-extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits);
-
 
 typedef        union {
        struct ia64_psr ia64_psr;
@@ -682,9 +680,9 @@ UINT64 vcpu_check_pending_interrupts(VCP
         */
 check_start:
        if (event_pending(vcpu) && 
-               !test_bit(vcpu->vcpu_info->arch.evtchn_vector,
+               !test_bit(vcpu->domain->shared_info->arch.evtchn_vector,
                        &PSCBX(vcpu, insvc[0])))
-               vcpu_pend_interrupt(vcpu, vcpu->vcpu_info->arch.evtchn_vector);
+               vcpu_pend_interrupt(vcpu, 
vcpu->domain->shared_info->arch.evtchn_vector);
 
        p = &PSCBX(vcpu,irr[3]);
        r = &PSCBX(vcpu,insvc[3]);
@@ -1290,8 +1288,7 @@ static inline int vcpu_match_tr_entry(TR
        return trp->pte.p && vcpu_match_tr_entry_no_p(trp, ifa, rid);
 }
 
-// in_tpa is not used when CONFIG_XEN_IA64_DOM0_VP
-IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, BOOLEAN 
in_tpa, UINT64 *pteval, UINT64 *itir, UINT64 *iha)
+IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64 
*pteval, UINT64 *itir, UINT64 *iha)
 {
        unsigned long region = address >> 61;
        unsigned long pta, rid, rr;
@@ -1368,12 +1365,7 @@ IA64FAULT vcpu_translate(VCPU *vcpu, UIN
        pte = trp->pte;
        if (/* is_data && */ pte.p
            && vcpu_match_tr_entry_no_p(trp,address,rid)) {
-#ifndef CONFIG_XEN_IA64_DOM0_VP
-               if (vcpu->domain==dom0 && !in_tpa)
-                       *pteval = pte.val;
-               else
-#endif
-               *pteval = vcpu->arch.dtlb_pte;
+               *pteval = pte.val;
                *itir = trp->itir;
                dtlb_translate_count++;
                return IA64_USE_TLB;
@@ -1422,7 +1414,7 @@ IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 va
        UINT64 pteval, itir, mask, iha;
        IA64FAULT fault;
 
-       fault = vcpu_translate(vcpu, vadr, TRUE, TRUE, &pteval, &itir, &iha);
+       fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha);
        if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB)
        {
                mask = itir_mask(itir);
@@ -1708,11 +1700,6 @@ IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT6
  VCPU translation register access routines
 **************************************************************************/
 
-void vcpu_purge_tr_entry(TR_ENTRY *trp)
-{
-       trp->pte.val = 0;
-}
-
 static void vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64 
ifa)
 {
        UINT64 ps;
@@ -1800,12 +1787,10 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64
        if ((mp_pte == -1UL) || (IorD & 0x4)) // don't place in 1-entry TLB
                return;
        if (IorD & 0x1) {
-               vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr);
-               PSCBX(vcpu,itlb_pte) = mp_pte;
+               vcpu_set_tr_entry(&PSCBX(vcpu,itlb),mp_pte,ps<<2,vaddr);
        }
        if (IorD & 0x2) {
-               vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr);
-               PSCBX(vcpu,dtlb_pte) = mp_pte;
+               vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),mp_pte,ps<<2,vaddr);
        }
 }
 
@@ -1875,20 +1860,14 @@ IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vad
        return fault;
 }
 
-int ptce_count = 0;
 IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr)
 {
        // Note that this only needs to be called once, i.e. the
        // architected loop to purge the entire TLB, should use
        //  base = stride1 = stride2 = 0, count0 = count 1 = 1
 
-#ifdef VHPT_GLOBAL
-       vhpt_flush();   // FIXME: This is overdoing it
-#endif
-       local_flush_tlb_all();
-       // just invalidate the "whole" tlb
-       vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
-       vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
+       vcpu_flush_vtlb_all ();
+
        return IA64_NO_FAULT;
 }
 
@@ -1905,33 +1884,8 @@ IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 
        // FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE
 //printf("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range);
 
-#ifdef CONFIG_XEN_SMP
-       struct domain *d = vcpu->domain;
-       struct vcpu *v;
-
-       for_each_vcpu (d, v) {
-               if (v == vcpu)
-                       continue;
-
-               /* Purge TC entries.
-                  FIXME: clear only if match.  */
-               vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
-               vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
-
-#ifdef VHPT_GLOBAL
-               /* Invalidate VHPT entries.  */
-               vhpt_flush_address_remote (v->processor, vadr, addr_range);
-#endif
-       }
-#endif
-
-#ifdef VHPT_GLOBAL
-       vhpt_flush_address(vadr,addr_range);
-#endif
-       ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
-       /* Purge tc.  */
-       vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
-       vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
+       domain_flush_vtlb_range (vcpu->domain, vadr, addr_range);
+
        return IA64_NO_FAULT;
 }
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/vhpt.c  Tue May 30 14:30:34 2006 -0500
@@ -12,32 +12,31 @@
 #include <asm/system.h>
 #include <asm/pgalloc.h>
 #include <asm/page.h>
-#include <asm/dma.h>
 #include <asm/vhpt.h>
+#include <asm/vcpu.h>
+
+/* Defined in tlb.c  */
+extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits);
 
 extern long running_on_sim;
 
 DEFINE_PER_CPU (unsigned long, vhpt_paddr);
 DEFINE_PER_CPU (unsigned long, vhpt_pend);
 
-void vhpt_flush(void)
-{
-       struct vhpt_lf_entry *v =__va(__ia64_per_cpu_var(vhpt_paddr));
-       int i;
-#if 0
-static int firsttime = 2;
-
-if (firsttime) firsttime--;
-else {
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: flushing vhpt (seems to crash at rid wrap?)...\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-}
-#endif
+static void vhpt_flush(void)
+{
+       struct vhpt_lf_entry *v = __va(__ia64_per_cpu_var(vhpt_paddr));
+       int i;
+
+       for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
+               v->ti_tag = INVALID_TI_TAG;
+}
+
+static void vhpt_erase(void)
+{
+       struct vhpt_lf_entry *v = (struct vhpt_lf_entry *)VHPT_ADDR;
+       int i;
+
        for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
                v->itir = 0;
                v->CChain = 0;
@@ -47,51 +46,6 @@ printf("vhpt_flush: ********************
        // initialize cache too???
 }
 
-#ifdef VHPT_GLOBAL
-void vhpt_flush_address(unsigned long vadr, unsigned long addr_range)
-{
-       struct vhpt_lf_entry *vlfe;
-
-       if ((vadr >> 61) == 7) {
-               // no vhpt for region 7 yet, see vcpu_itc_no_srlz
-               printf("vhpt_flush_address: region 7, spinning...\n");
-               while(1);
-       }
-#if 0
-       // this only seems to occur at shutdown, but it does occur
-       if ((!addr_range) || addr_range & (addr_range - 1)) {
-               printf("vhpt_flush_address: weird range, spinning...\n");
-               while(1);
-       }
-//printf("************** vhpt_flush_address(%p,%p)\n",vadr,addr_range);
-#endif
-       while ((long)addr_range > 0) {
-               vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
-               // FIXME: for now, just blow it away even if it belongs to
-               // another domain.  Later, use ttag to check for match
-//if (!(vlfe->ti_tag & INVALID_TI_TAG)) {
-//printf("vhpt_flush_address: blowing away valid tag for vadr=%p\n",vadr);
-//}
-               vlfe->ti_tag |= INVALID_TI_TAG;
-               addr_range -= PAGE_SIZE;
-               vadr += PAGE_SIZE;
-       }
-}
-
-void vhpt_flush_address_remote(int cpu,
-                              unsigned long vadr, unsigned long addr_range)
-{
-       while ((long)addr_range > 0) {
-               /* Get the VHPT entry.  */
-               unsigned int off = ia64_thash(vadr) - VHPT_ADDR;
-               volatile struct vhpt_lf_entry *v;
-               v =__va(per_cpu(vhpt_paddr, cpu) + off);
-               v->ti_tag = INVALID_TI_TAG;
-               addr_range -= PAGE_SIZE;
-               vadr += PAGE_SIZE;
-       }
-}
-#endif
 
 static void vhpt_map(unsigned long pte)
 {
@@ -147,17 +101,11 @@ void vhpt_multiple_insert(unsigned long 
 
 void vhpt_init(void)
 {
-       unsigned long vhpt_total_size, vhpt_alignment;
        unsigned long paddr, pte;
        struct page_info *page;
 #if !VHPT_ENABLED
        return;
 #endif
-       // allocate a huge chunk of physical memory.... how???
-       vhpt_total_size = 1 << VHPT_SIZE_LOG2;  // 4MB, 16MB, 64MB, or 256MB
-       vhpt_alignment = 1 << VHPT_SIZE_LOG2;   // 4MB, 16MB, 64MB, or 256MB
-       printf("vhpt_init: vhpt size=0x%lx, align=0x%lx\n",
-               vhpt_total_size, vhpt_alignment);
        /* This allocation only holds true if vhpt table is unique for
         * all domains. Or else later new vhpt table should be allocated
         * from domain heap when each domain is created. Assume xen buddy
@@ -167,17 +115,135 @@ void vhpt_init(void)
        if (!page)
                panic("vhpt_init: can't allocate VHPT!\n");
        paddr = page_to_maddr(page);
+       if (paddr & ((1 << VHPT_SIZE_LOG2) - 1))
+               panic("vhpt_init: bad VHPT alignment!\n");
        __get_cpu_var(vhpt_paddr) = paddr;
-       __get_cpu_var(vhpt_pend) = paddr + vhpt_total_size - 1;
+       __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;
        printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
                paddr, __get_cpu_var(vhpt_pend));
        pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL));
        vhpt_map(pte);
        ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
                VHPT_ENABLED);
-       vhpt_flush();
-}
-
+       vhpt_erase();
+}
+
+
+void vcpu_flush_vtlb_all (void)
+{
+       struct vcpu *v = current;
+
+       /* First VCPU tlb.  */
+       vcpu_purge_tr_entry(&PSCBX(v,dtlb));
+       vcpu_purge_tr_entry(&PSCBX(v,itlb));
+
+       /* Then VHPT.  */
+       vhpt_flush ();
+
+       /* Then mTLB.  */
+       local_flush_tlb_all ();
+
+       /* We could clear bit in d->domain_dirty_cpumask only if domain d in
+          not running on this processor.  There is currently no easy way to
+          check this.  */
+}
+
+void domain_flush_vtlb_all (void)
+{
+       int cpu = smp_processor_id ();
+       struct vcpu *v;
+
+       for_each_vcpu (current->domain, v)
+               if (v->processor == cpu)
+                       vcpu_flush_vtlb_all ();
+               else
+                       smp_call_function_single
+                               (v->processor,
+                                (void(*)(void *))vcpu_flush_vtlb_all,
+                                NULL,1,1);
+}
+
+static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range)
+{
+       void *vhpt_base = __va(per_cpu(vhpt_paddr, cpu));
+
+       while ((long)addr_range > 0) {
+               /* Get the VHPT entry.  */
+               unsigned int off = ia64_thash(vadr) - VHPT_ADDR;
+               volatile struct vhpt_lf_entry *v;
+               v = vhpt_base + off;
+               v->ti_tag = INVALID_TI_TAG;
+               addr_range -= PAGE_SIZE;
+               vadr += PAGE_SIZE;
+       }
+}
+
+void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range)
+{
+       cpu_flush_vhpt_range (current->processor, vadr, 1UL << log_range);
+       ia64_ptcl(vadr, log_range << 2);
+       ia64_srlz_i();
+}
+
+void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range)
+{
+       struct vcpu *v;
+
+#if 0
+       // this only seems to occur at shutdown, but it does occur
+       if ((!addr_range) || addr_range & (addr_range - 1)) {
+               printf("vhpt_flush_address: weird range, spinning...\n");
+               while(1);
+       }
+#endif
+
+       for_each_vcpu (d, v) {
+               /* Purge TC entries.
+                  FIXME: clear only if match.  */
+               vcpu_purge_tr_entry(&PSCBX(v,dtlb));
+               vcpu_purge_tr_entry(&PSCBX(v,itlb));
+
+               /* Invalidate VHPT entries.  */
+               cpu_flush_vhpt_range (v->processor, vadr, addr_range);
+       }
+
+       /* ptc.ga  */
+       ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
+}
+
+static void flush_tlb_vhpt_all (struct domain *d)
+{
+       /* First VHPT.  */
+       vhpt_flush ();
+
+       /* Then mTLB.  */
+       local_flush_tlb_all ();
+}
+
+void domain_flush_destroy (struct domain *d)
+{
+       /* Very heavy...  */
+       on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);
+       cpus_clear (d->domain_dirty_cpumask);
+}
+
+void flush_tlb_mask(cpumask_t mask)
+{
+    int cpu;
+
+    cpu = smp_processor_id();
+    if (cpu_isset (cpu, mask)) {
+        cpu_clear(cpu, mask);
+        flush_tlb_vhpt_all (NULL);
+    }
+
+    if (cpus_empty(mask))
+        return;
+
+    for_each_cpu_mask (cpu, mask)
+        smp_call_function_single
+            (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1);
+}
 
 void zero_vhpt_stats(void)
 {
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xenasm.S
--- a/xen/arch/ia64/xen/xenasm.S        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/xenasm.S        Tue May 30 14:30:34 2006 -0500
@@ -11,242 +11,160 @@
 #include <asm/pgtable.h>
 #include <asm/vhpt.h>
 
-#if 0
-// FIXME: there's gotta be a better way...
-// ski and spaski are different... moved to xenmisc.c
-#define RunningOnHpSki(rx,ry,pn)                       \
-       addl rx = 2, r0;                                \
-       addl ry = 3, r0;                                \
-       ;;                                              \
-       mov rx = cpuid[rx];                             \
-       mov ry = cpuid[ry];                             \
-       ;;                                              \
-       cmp.eq pn,p0 = 0, rx;                           \
-       ;;                                              \
-       (pn) movl rx = 0x7000004 ;                      \
-       ;;                                              \
-       (pn) cmp.ge pn,p0 = ry, rx;                     \
-       ;;
-
-//int platform_is_hp_ski(void)
-GLOBAL_ENTRY(platform_is_hp_ski)
-       mov r8 = 0
-       RunningOnHpSki(r3,r9,p8)
-(p8)   mov r8 = 1
-       br.ret.sptk.many b0
-END(platform_is_hp_ski)
-#endif
-
 // Change rr7 to the passed value while ensuring
 // Xen is mapped into the new region.
-//   in0: new rr7 value
-//   in1: Xen virtual address of shared info (to be pinned)
 #define PSR_BITS_TO_CLEAR                                              \
        (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |         \
         IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |        \
-        IA64_PSR_DFL | IA64_PSR_DFH)
+        IA64_PSR_DFL | IA64_PSR_DFH | IA64_PSR_IC)
 // FIXME? Note that this turns off the DB bit (debug)
 #define PSR_BITS_TO_SET        IA64_PSR_BN
 
-//extern void ia64_new_rr7(unsigned long rid,void *shared_info, void 
*shared_arch_info, unsigned long p_vhpt, unsigned long v_pal);
+//extern void ia64_new_rr7(unsigned long rid,           /* in0 */
+//                         void *shared_info,           /* in1 */
+//                         void *shared_arch_info,      /* in2 */
+//                         unsigned long shared_info_va, /* in3 */
+//                         unsigned long p_vhpt)        /* in4 */
+//Local usage:
+//  loc0=rp, loc1=ar.pfs, loc2=percpu_paddr, loc3=psr, loc4=ar.rse
+//  loc5=pal_vaddr, loc6=xen_paddr, loc7=shared_archinfo_paddr,
 GLOBAL_ENTRY(ia64_new_rr7)
        // not sure this unwind statement is correct...
        .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(1)
-       alloc loc1 = ar.pfs, 5, 9, 0, 0
+       alloc loc1 = ar.pfs, 5, 8, 0, 0
+       movl loc2=PERCPU_ADDR
 1:     {
-         mov r28  = in0                // copy procedure index
+         mov loc3 = psr                // save psr     
+         mov loc0 = rp                 // save rp
          mov r8   = ip                 // save ip to compute branch
-         mov loc0 = rp                 // save rp
        };;
        .body
-       movl loc2=PERCPU_ADDR
-       ;;
        tpa loc2=loc2                   // grab this BEFORE changing rr7
-       ;;
-       dep loc8=0,in4,60,4
-       ;;
-#if VHPT_ENABLED
-       mov loc6=in3
-       ;;
-       //tpa loc6=loc6                 // grab this BEFORE changing rr7
-       ;;
-#endif
-       mov loc5=in1
-       ;;
-       tpa loc5=loc5                   // grab this BEFORE changing rr7
-       ;;
-       mov loc7=in2                    // arch_vcpu_info_t
-       ;;
-       tpa loc7=loc7                   // grab this BEFORE changing rr7
-       ;;
-       mov loc3 = psr                  // save psr
-       adds r8  = 1f-1b,r8             // calculate return address for call
-       ;;
+       tpa in1=in1                     // grab shared_info BEFORE changing rr7
+       adds r8 = 1f-1b,r8              // calculate return address for call
+       ;;
+       tpa loc7=in2                    // grab arch_vcpu_info BEFORE chg rr7
+       movl r17=PSR_BITS_TO_SET
+       mov loc4=ar.rsc                 // save RSE configuration
+       movl r16=PSR_BITS_TO_CLEAR
+       ;; 
        tpa r8=r8                       // convert rp to physical
-       ;;
-       mov loc4=ar.rsc                 // save RSE configuration
-       ;;
        mov ar.rsc=0                    // put RSE in enforced lazy, LE mode
-       movl r16=PSR_BITS_TO_CLEAR
-       movl r17=PSR_BITS_TO_SET
-       ;;
        or loc3=loc3,r17                // add in psr the bits to set
        ;;
        andcm r16=loc3,r16              // removes bits to clear from psr
+       dep loc6=0,r8,0,KERNEL_TR_PAGE_SHIFT // Xen code paddr
        br.call.sptk.many rp=ia64_switch_mode_phys
 1:
        // now in physical mode with psr.i/ic off so do rr7 switch
-       dep     r16=-1,r0,61,3
-       ;;
+       movl r16=pal_vaddr              // Note: belong to region 7!
+       ;; 
        mov     rr[r16]=in0
+       ;; 
        srlz.d
-       ;;
+       dep     r16=0,r16,60,4          // Get physical address.
+       ;;
+       ld8 loc5=[r16]                  // read pal_vaddr
+       movl    r26=PAGE_KERNEL
+       ;; 
 
        // re-pin mappings for kernel text and data
-       mov r18=KERNEL_TR_PAGE_SHIFT<<2
+       mov r24=KERNEL_TR_PAGE_SHIFT<<2
        movl r17=KERNEL_START
        ;;
-       rsm psr.i | psr.ic
-       ;;
-       srlz.i
-       ;;
-       ptr.i   r17,r18
-       ptr.d   r17,r18
-       ;;
-       mov cr.itir=r18
+       ptr.i   r17,r24
+       ptr.d   r17,r24
+       mov r16=IA64_TR_KERNEL
+       mov cr.itir=r24
        mov cr.ifa=r17
-       mov r16=IA64_TR_KERNEL
-       //mov r3=ip
-       movl r18=PAGE_KERNEL
-       ;;
-       dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
-       ;;
-       or r18=r2,r18
-       ;;
-       srlz.i
+       or r18=loc6,r26
        ;;
        itr.i itr[r16]=r18
-       ;;
+       ;; 
        itr.d dtr[r16]=r18
-       ;;
-
-       // re-pin mappings for stack (current), per-cpu, vhpt, and shared info
+
+       // re-pin mappings for stack (current)
 
        // unless overlaps with KERNEL_TR
        dep r18=0,r13,0,KERNEL_TR_PAGE_SHIFT
        ;;
        cmp.eq p7,p0=r17,r18
 (p7)   br.cond.sptk    .stack_overlaps
-       ;;
-       movl r25=PAGE_KERNEL
+       mov r25=IA64_GRANULE_SHIFT<<2
        dep r21=0,r13,60,4              // physical address of "current"
        ;;
-       or r23=r25,r21                  // construct PA | page properties
-       mov r25=IA64_GRANULE_SHIFT<<2
-       ;;
        ptr.d   r13,r25
-       ;;
+       or r23=r21,r26                  // construct PA | page properties
        mov cr.itir=r25
        mov cr.ifa=r13                  // VA of next task...
-       ;;
-       mov r25=IA64_TR_CURRENT_STACK
+       mov r21=IA64_TR_CURRENT_STACK
+       ;;
+       itr.d dtr[r21]=r23              // wire in new mapping...
+
+       //  Per-cpu     
+.stack_overlaps:
+       mov r24=PERCPU_PAGE_SHIFT<<2
+       movl r22=PERCPU_ADDR
+       ;;
+       ptr.d   r22,r24
+       or r23=loc2,r26                 // construct PA | page properties
+       mov cr.itir=r24
+       mov cr.ifa=r22
+       mov r25=IA64_TR_PERCPU_DATA
        ;;
        itr.d dtr[r25]=r23              // wire in new mapping...
-       ;;
-.stack_overlaps:
-
-       movl r22=PERCPU_ADDR
-       ;;
-       movl r25=PAGE_KERNEL
-       ;;
-       mov r21=loc2                    // saved percpu physical address
-       ;;
-       or r23=r25,r21                  // construct PA | page properties
-       mov r24=PERCPU_PAGE_SHIFT<<2
+
+       // VHPT
+#if VHPT_ENABLED
+       mov r24=VHPT_SIZE_LOG2<<2
+       movl r22=VHPT_ADDR
+       mov r21=IA64_TR_VHPT
        ;;
        ptr.d   r22,r24
-       ;;
+       or r23=in4,r26                  // construct PA | page properties
        mov cr.itir=r24
        mov cr.ifa=r22
        ;;
-       mov r25=IA64_TR_PERCPU_DATA
-       ;;
-       itr.d dtr[r25]=r23              // wire in new mapping...
-       ;;
-
-#if VHPT_ENABLED
-       movl r22=VHPT_ADDR
-       ;;
-       movl r25=PAGE_KERNEL
-       ;;
-       mov r21=loc6                    // saved vhpt physical address
-       ;;
-       or r23=r25,r21                  // construct PA | page properties
-       mov r24=VHPT_SIZE_LOG2<<2
+       itr.d dtr[r21]=r23              // wire in new mapping...
+#endif
+
+       //  Shared info
+       mov r24=PAGE_SHIFT<<2
+       movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
+       ;;
+       ptr.d   in3,r24
+       or r23=in1,r25                  // construct PA | page properties
+       mov cr.itir=r24
+       mov cr.ifa=in3
+       mov r21=IA64_TR_SHARED_INFO
+       ;;
+       itr.d dtr[r21]=r23              // wire in new mapping...
+       
+       // Map for arch_vcpu_info_t
+       movl r22=XSI_OFS
+       mov r24=PAGE_SHIFT<<2
+       ;; 
+       add r22=r22,in3
        ;;
        ptr.d   r22,r24
-       ;;
+       or r23=loc7,r25                 // construct PA | page properties
        mov cr.itir=r24
        mov cr.ifa=r22
-       ;;
-       mov r25=IA64_TR_VHPT
-       ;;
-       itr.d dtr[r25]=r23              // wire in new mapping...
-       ;;
-#endif
-
-       movl r22=SHAREDINFO_ADDR
-       ;;
-       movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
-       ;;
-       mov r21=loc5                    // saved sharedinfo physical address
-       ;;
-       or r23=r25,r21                  // construct PA | page properties
-       mov r24=PAGE_SHIFT<<2
-       ;;
-       ptr.d   r22,r24
-       ;;
-       mov cr.itir=r24
-       mov cr.ifa=r22
-       ;;
-       mov r25=IA64_TR_SHARED_INFO
-       ;;
-       itr.d dtr[r25]=r23              // wire in new mapping...
-       ;;
-       // Map for arch_vcpu_info_t
-       movl r22=SHARED_ARCHINFO_ADDR
-       ;;
-       movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
-       ;;
-       mov r21=loc7                    // saved sharedinfo physical address
-       ;;
-       or r23=r25,r21                  // construct PA | page properties
-       mov r24=PAGE_SHIFT<<2
-       ;;
-       ptr.d   r22,r24
-       ;;
-       mov cr.itir=r24
-       mov cr.ifa=r22
-       ;;
-       mov r25=IA64_TR_ARCH_INFO
-       ;;
-       itr.d dtr[r25]=r23              // wire in new mapping...
-       ;;
-
-       //Purge/insert PAL TR
+       mov r21=IA64_TR_ARCH_INFO
+       ;;
+       itr.d dtr[r21]=r23              // wire in new mapping...
+
+       // Purge/insert PAL TR
        mov r24=IA64_TR_PALCODE
-       movl r25=PAGE_KERNEL
-       ;;
-       or loc8=r25,loc8
        mov r23=IA64_GRANULE_SHIFT<<2
-       ;;
-       ptr.i   in4,r23
-       ;;
+       dep r25=0,loc5,60,4             // convert pal vaddr to paddr
+       ;;
+       ptr.i   loc5,r23
+       or r25=r25,r26          // construct PA | page properties
        mov cr.itir=r23
-       mov cr.ifa=in4
-       ;;
-       itr.i itr[r24]=loc8
-       ;;
+       mov cr.ifa=loc5
+       ;;
+       itr.i itr[r24]=r25
 
        // done, switch back to virtual and return
        mov r16=loc3                    // r16= original psr
@@ -261,6 +179,7 @@ 1:
        br.ret.sptk.many rp
 END(ia64_new_rr7)
 
+#if 0 /* Not used */
 #include "minstate.h"
 
 GLOBAL_ENTRY(ia64_prepare_handle_privop)
@@ -301,6 +220,7 @@ GLOBAL_ENTRY(ia64_prepare_handle_reflect
        DO_LOAD_SWITCH_STACK
        br.cond.sptk.many rp                    // goes to ia64_leave_kernel
 END(ia64_prepare_handle_reflection)
+#endif
 
 GLOBAL_ENTRY(__get_domain_bundle)
        EX(.failure_in_get_bundle,ld8 r8=[r32],8)
@@ -331,80 +251,9 @@ GLOBAL_ENTRY(dorfirfi)
         mov cr.ipsr=r17
         mov cr.ifs=r18
        ;;
-        // fall through
+        rfi
+       ;;
 END(dorfirfi)
-
-GLOBAL_ENTRY(dorfi)
-        rfi
-       ;;
-END(dorfirfi)
-
-//
-// Long's Peak UART Offsets
-//
-#define COM_TOP 0xff5e0000
-#define COM_BOT 0xff5e2000
-
-// UART offsets        
-#define UART_TX                0       /* Out: Transmit buffer (DLAB=0) */
-#define UART_INT_ENB   1       /* interrupt enable (DLAB=0) */ 
-#define UART_INT_ID    2       /* Interrupt ID register */
-#define UART_LINE_CTL  3       /* Line control register */
-#define UART_MODEM_CTL 4       /* Modem Control Register */
-#define UART_LSR       5       /* In:  Line Status Register */
-#define UART_MSR       6       /* Modem status register */     
-#define UART_DLATCH_LOW UART_TX
-#define UART_DLATCH_HIGH UART_INT_ENB
-#define COM1   0x3f8
-#define COM2   0x2F8
-#define COM3   0x3E8
-
-/* interrupt enable bits (offset 1) */
-#define DATA_AVAIL_INT 1
-#define XMIT_HOLD_EMPTY_INT 2
-#define LINE_STAT_INT 4
-#define MODEM_STAT_INT 8
-
-/* line status bits (offset 5) */
-#define REC_DATA_READY 1
-#define OVERRUN 2
-#define PARITY_ERROR 4
-#define FRAMING_ERROR 8
-#define BREAK_INTERRUPT 0x10
-#define XMIT_HOLD_EMPTY 0x20
-#define XMIT_SHIFT_EMPTY 0x40
-
-// Write a single character
-// input: r32 = character to be written
-// output: none
-GLOBAL_ENTRY(longs_peak_putc)  
-       rsm psr.dt
-        movl r16 = 0x8000000000000000 + COM_TOP + UART_LSR
-       ;;
-       srlz.i
-       ;;
-
-.Chk_THRE_p:
-        ld1.acq r18=[r16]
-        ;;
-       
-       and r18 = XMIT_HOLD_EMPTY, r18
-       ;;
-       cmp4.eq p6,p0=0,r18
-       ;;
-       
-(p6)    br .Chk_THRE_p
-       ;;
-        movl r16 = 0x8000000000000000 + COM_TOP + UART_TX
-       ;;
-       st1.rel [r16]=r32
-       ;;
-       ssm psr.dt
-       ;;
-       srlz.i
-       ;;
-       br.ret.sptk.many b0
-END(longs_peak_putc)   
 
 /* derived from linux/arch/ia64/hp/sim/boot/boot_head.S */
 GLOBAL_ENTRY(pal_emulator_static)
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xenmisc.c
--- a/xen/arch/ia64/xen/xenmisc.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/xenmisc.c       Tue May 30 14:30:34 2006 -0500
@@ -267,6 +267,9 @@ void context_switch(struct vcpu *prev, s
            vmx_load_state(next);
     /*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/
     prev = ia64_switch_to(next);
+
+    //cpu_set(smp_processor_id(), current->domain->domain_dirty_cpumask);
+
     if (!VMX_DOMAIN(current)){
            vcpu_set_next_timer(current);
     }
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/xensetup.c      Tue May 30 14:30:34 2006 -0500
@@ -415,8 +415,7 @@ printk("About to call domain_create()\n"
     printk("About to call construct_dom0()\n");
     dom0_memory_start = (unsigned long) __va(initial_images_start);
     dom0_memory_size = ia64_boot_param->domain_size;
-    dom0_initrd_start = (unsigned long) __va(initial_images_start +
-                            PAGE_ALIGN(ia64_boot_param->domain_size));
+    dom0_initrd_start = (unsigned long) __va(ia64_boot_param->initrd_start);
     dom0_initrd_size = ia64_boot_param->initrd_size;
  
     if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_size,
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/dom0_ops.c   Tue May 30 14:30:34 2006 -0500
@@ -404,27 +404,6 @@ long arch_do_dom0_op(struct dom0_op *op,
     }
     break;
 
-    case DOM0_PHYSICAL_MEMORY_MAP:
-    {
-        struct dom0_memory_map_entry entry;
-        int i;
-
-        for ( i = 0; i < e820.nr_map; i++ )
-        {
-            if ( i >= op->u.physical_memory_map.max_map_entries )
-                break;
-            entry.start  = e820.map[i].addr;
-            entry.end    = e820.map[i].addr + e820.map[i].size;
-            entry.is_ram = (e820.map[i].type == E820_RAM);
-            (void)copy_to_guest_offset(
-                op->u.physical_memory_map.memory_map, i, &entry, 1);
-        }
-
-        op->u.physical_memory_map.nr_map_entries = i;
-        (void)copy_to_guest(u_dom0_op, op, 1);
-    }
-    break;
-
     case DOM0_HYPERCALL_INIT:
     {
         struct domain *d; 
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/domain.c     Tue May 30 14:30:34 2006 -0500
@@ -146,6 +146,8 @@ struct vcpu *alloc_vcpu_struct(struct do
     v->arch.guest_vl4table = __linear_l4_table;
 #endif
 
+    pae_l3_cache_init(&v->arch.pae_l3_cache);
+
     return v;
 }
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/domain_build.c       Tue May 30 14:30:34 2006 -0500
@@ -367,7 +367,10 @@ int construct_dom0(struct domain *d,
     if ( (1UL << order) > nr_pages )
         panic("Domain 0 allocation is too small for kernel image.\n");
 
-    /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */
+    /*
+     * Allocate from DMA pool: on i386 this ensures that our low-memory 1:1
+     * mapping covers the allocation.
+     */
     if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL )
         panic("Not enough RAM for domain 0 allocation.\n");
     alloc_spfn = page_to_mfn(page);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/hvm.c    Tue May 30 14:30:34 2006 -0500
@@ -185,11 +185,16 @@ void hvm_setup_platform(struct domain* d
 void hvm_setup_platform(struct domain* d)
 {
     struct hvm_domain *platform;
-
-    if ( !hvm_guest(current) || (current->vcpu_id != 0) )
+    struct vcpu *v=current;
+
+    if ( !hvm_guest(v) || (v->vcpu_id != 0) )
         return;
 
-    shadow_direct_map_init(d);
+    if ( shadow_direct_map_init(d) == 0 )
+    {
+        printk("Can not allocate shadow direct map for HVM domain.\n");
+        domain_crash_synchronous();
+    }
 
     hvm_map_io_shared_page(d);
     hvm_get_info(d);
@@ -204,7 +209,8 @@ void hvm_setup_platform(struct domain* d
         hvm_vioapic_init(d);
     }
 
-    pit_init(&platform->vpit, current);
+    init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v, 
v->processor);
+    pit_init(v, cpu_khz);
 }
 
 void pic_irq_request(void *data, int level)
@@ -234,6 +240,14 @@ void hvm_pic_assist(struct vcpu *v)
         } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
         do_pic_irqs(pic, irqs);
     }
+}
+
+u64 hvm_get_guest_time(struct vcpu *v)
+{
+    u64    host_tsc;
+    
+    rdtscll(host_tsc);
+    return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
 }
 
 int cpu_get_interrupt(struct vcpu *v, int *type)
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/i8254.c  Tue May 30 14:30:34 2006 -0500
@@ -22,11 +22,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-/* Edwin Zhai <edwin.zhai@xxxxxxxxx>
+/* Edwin Zhai <edwin.zhai@xxxxxxxxx>, Eddie Dong <eddie.dong@xxxxxxxxx>
  * Ported to xen:
- * use actimer for intr generation;
+ * Add a new layer of periodic time on top of PIT;
  * move speaker io access to hypervisor;
- * use new method for counter/intrs calculation
  */
 
 #include <xen/config.h>
@@ -42,184 +41,117 @@
 #include <asm/hvm/vpit.h>
 #include <asm/current.h>
 
-/*#define DEBUG_PIT*/
+/* Enable DEBUG_PIT may cause guest calibration inaccuracy */
+/* #define DEBUG_PIT */
 
 #define RW_STATE_LSB 1
 #define RW_STATE_MSB 2
 #define RW_STATE_WORD0 3
 #define RW_STATE_WORD1 4
 
-#ifndef NSEC_PER_SEC
-#define NSEC_PER_SEC (1000000000ULL)
-#endif
-
-#ifndef TIMER_SLOP 
-#define TIMER_SLOP (50*1000) /* ns */
-#endif
-
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time);
-
-s_time_t hvm_get_clock(void)
-{
-    /* TODO: add pause/unpause support */
-    return NOW();
+#define ticks_per_sec(v)      (v->domain->arch.hvm_domain.tsc_frequency)
+static int handle_pit_io(ioreq_t *p);
+static int handle_speaker_io(ioreq_t *p);
+
+/* compute with 96 bit intermediate result: (a*b)/c */
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+    union {
+        uint64_t ll;
+        struct {
+#ifdef WORDS_BIGENDIAN
+            uint32_t high, low;
+#else
+            uint32_t low, high;
+#endif            
+        } l;
+    } u, res;
+    uint64_t rl, rh;
+
+    u.ll = a;
+    rl = (uint64_t)u.l.low * (uint64_t)b;
+    rh = (uint64_t)u.l.high * (uint64_t)b;
+    rh += (rl >> 32);
+    res.l.high = rh / c;
+    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+    return res.ll;
+}
+
+/*
+ * get processor time.
+ * unit: TSC
+ */
+int64_t hvm_get_clock(struct vcpu *v)
+{
+    uint64_t  gtsc;
+    gtsc = hvm_get_guest_time(v);
+    return gtsc;
 }
 
 static int pit_get_count(PITChannelState *s)
 {
-    u64 d;
-    u64 counter;
-
-    d = hvm_get_clock() - s->count_load_time;
+    uint64_t d;
+    int  counter;
+
+    d = muldiv64(hvm_get_clock(s->vcpu) - s->count_load_time, PIT_FREQ, 
ticks_per_sec(s->vcpu));
     switch(s->mode) {
     case 0:
     case 1:
     case 4:
     case 5:
-        counter = (s->period - d) & 0xffff;
+        counter = (s->count - d) & 0xffff;
         break;
     case 3:
         /* XXX: may be incorrect for odd counts */
-        counter = s->period - ((2 * d) % s->period);
+        counter = s->count - ((2 * d) % s->count);
         break;
     default:
-        /* mod 2 counter handle */
-        d = hvm_get_clock() - s->hvm_time->count_point;
-        d += s->hvm_time->count_advance;
-        counter = s->period - (d % s->period);
-        break;
-    }
-    /* change from ns to pit counter */
-    counter = DIV_ROUND( (counter * PIT_FREQ), NSEC_PER_SEC);
+        counter = s->count - (d % s->count);
+        break;
+    }
     return counter;
 }
 
 /* get pit output bit */
-static int pit_get_out1(PITChannelState *s, s64 current_time)
-{
-    u64 d;
+static int pit_get_out1(PITChannelState *s, int64_t current_time)
+{
+    uint64_t d;
     int out;
 
-    d = current_time - s->count_load_time;
+    d = muldiv64(current_time - s->count_load_time, PIT_FREQ, 
ticks_per_sec(s->vcpu));
     switch(s->mode) {
     default:
     case 0:
-        out = (d >= s->period);
+        out = (d >= s->count);
         break;
     case 1:
-        out = (d < s->period);
+        out = (d < s->count);
         break;
     case 2:
-        /* mod2 out is no meaning, since intr are generated in background */
-        if ((d % s->period) == 0 && d != 0)
+        if ((d % s->count) == 0 && d != 0)
             out = 1;
         else
             out = 0;
         break;
     case 3:
-        out = (d % s->period) < ((s->period + 1) >> 1);
+        out = (d % s->count) < ((s->count + 1) >> 1);
         break;
     case 4:
     case 5:
-        out = (d == s->period);
+        out = (d == s->count);
         break;
     }
     return out;
 }
 
-int pit_get_out(hvm_virpit *pit, int channel, s64 current_time)
+int pit_get_out(PITState *pit, int channel, int64_t current_time)
 {
     PITChannelState *s = &pit->channels[channel];
     return pit_get_out1(s, current_time);
 }
 
-static __inline__ s64 missed_ticks(PITChannelState *s, s64 current_time)
-{
-    struct hvm_time_info *hvm_time = s->hvm_time;
-    struct domain *d = (void *) s - 
-        offsetof(struct domain, arch.hvm_domain.vpit.channels[0]);
-
-    /* ticks from current time(expected time) to NOW */ 
-    int missed_ticks;
-    /* current_time is expected time for next intr, check if it's true
-     * (actimer has a TIMER_SLOP in advance)
-     */
-    s64 missed_time = hvm_get_clock() + TIMER_SLOP - current_time;
-
-    if (missed_time >= 0) {
-        missed_ticks = missed_time/(s_time_t)s->period + 1;
-        if (test_bit(_DOMF_debugging, &d->domain_flags)) {
-            hvm_time->pending_intr_nr++;
-        } else {
-            hvm_time->pending_intr_nr += missed_ticks;
-        }
-        s->next_transition_time = current_time + (missed_ticks ) * s->period;
-    }
-
-    return s->next_transition_time;
-}
-
-/* only rearm the actimer when return value > 0
- *  -2: init state
- *  -1: the mode has expired
- *   0: current VCPU is not running
- *  >0: the next fired time
- */
-s64 pit_get_next_transition_time(PITChannelState *s, 
-                                            s64 current_time)
-{
-    s64 d, next_time, base;
-    int period2;
-    struct hvm_time_info *hvm_time = s->hvm_time;
-
-    d = current_time - s->count_load_time;
-    switch(s->mode) {
-    default:
-    case 0:
-    case 1:
-        if (d < s->period)
-            next_time = s->period;
-        else
-            return -1;
-        break;
-    case 2:
-        next_time = missed_ticks(s, current_time);
-        if ( !test_bit(_VCPUF_running, &(hvm_time->vcpu->vcpu_flags)) )
-            return 0;
-        break;
-    case 3:
-        base = (d / s->period) * s->period;
-        period2 = ((s->period + 1) >> 1);
-        if ((d - base) < period2) 
-            next_time = base + period2;
-        else
-            next_time = base + s->period;
-        break;
-    case 4:
-    case 5:
-        if (d < s->period)
-            next_time = s->period;
-        else if (d == s->period)
-            next_time = s->period + 1;
-        else
-            return -1;
-        break;
-    case 0xff:
-        return -2;      /* for init state */ 
-        break;
-    }
-    /* XXX: better solution: use a clock at PIT_FREQ Hz */
-    if (next_time <= current_time){
-#ifdef DEBUG_PIT
-        printk("HVM_PIT:next_time <= current_time. next=0x%llx, 
current=0x%llx!\n",next_time, current_time);
-#endif
-        next_time = current_time + 1;
-    }
-    return next_time;
-}
-
 /* val must be 0 or 1 */
-void pit_set_gate(hvm_virpit *pit, int channel, int val)
+void pit_set_gate(PITState *pit, int channel, int val)
 {
     PITChannelState *s = &pit->channels[channel];
 
@@ -233,16 +165,16 @@ void pit_set_gate(hvm_virpit *pit, int c
     case 5:
         if (s->gate < val) {
             /* restart counting on rising edge */
-            s->count_load_time = hvm_get_clock();
-            pit_irq_timer_update(s, s->count_load_time);
+            s->count_load_time = hvm_get_clock(s->vcpu);
+//            pit_irq_timer_update(s, s->count_load_time);
         }
         break;
     case 2:
     case 3:
         if (s->gate < val) {
             /* restart counting on rising edge */
-            s->count_load_time = hvm_get_clock();
-            pit_irq_timer_update(s, s->count_load_time);
+            s->count_load_time = hvm_get_clock(s->vcpu);
+//            pit_irq_timer_update(s, s->count_load_time);
         }
         /* XXX: disable/enable counting */
         break;
@@ -250,7 +182,7 @@ void pit_set_gate(hvm_virpit *pit, int c
     s->gate = val;
 }
 
-int pit_get_gate(hvm_virpit *pit, int channel)
+int pit_get_gate(PITState *pit, int channel)
 {
     PITChannelState *s = &pit->channels[channel];
     return s->gate;
@@ -258,37 +190,37 @@ int pit_get_gate(hvm_virpit *pit, int ch
 
 static inline void pit_load_count(PITChannelState *s, int val)
 {
+    u32   period;
     if (val == 0)
         val = 0x10000;
-
-    s->count_load_time = hvm_get_clock();
+    s->count_load_time = hvm_get_clock(s->vcpu);
     s->count = val;
-    s->period = DIV_ROUND(((s->count) * NSEC_PER_SEC), PIT_FREQ);
+    period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ);
 
 #ifdef DEBUG_PIT
-    printk("HVM_PIT: pit-load-counter, count=0x%x,period=0x%u us,mode=%d, 
load_time=%lld\n",
+    printk("HVM_PIT: pit-load-counter(%p), count=0x%x, period=%uns mode=%d, 
load_time=%lld\n",
+            s,
             val,
-            s->period / 1000,
+            period,
             s->mode,
-            s->count_load_time);
+            (long long)s->count_load_time);
 #endif
 
-    if (s->mode == HVM_PIT_ACCEL_MODE) {
-        if (!s->hvm_time) {
-            printk("HVM_PIT:guest should only set mod 2 on channel 0!\n");
-            return;
-        }
-        s->hvm_time->period_cycles = (u64)s->period * cpu_khz / 1000000L;
-        s->hvm_time->first_injected = 0;
-
-        if (s->period < 900000) { /* < 0.9 ms */
-            printk("HVM_PIT: guest programmed too small an count: %x\n",
-                    s->count);
-            s->period = 1000000;
-        }
-    }
-        
-    pit_irq_timer_update(s, s->count_load_time);
+    switch (s->mode) {
+        case 2:
+            /* create periodic time */
+            s->pt = create_periodic_time (s->vcpu, period, 0, 0);
+            break;
+        case 1:
+            /* create one shot time */
+            s->pt = create_periodic_time (s->vcpu, period, 0, 1);
+#ifdef DEBUG_PIT
+            printk("HVM_PIT: create one shot time.\n");
+#endif
+            break;
+        default:
+            break;
+    }
 }
 
 /* if already latched, do not latch again */
@@ -300,9 +232,9 @@ static void pit_latch_count(PITChannelSt
     }
 }
 
-static void pit_ioport_write(void *opaque, u32 addr, u32 val)
-{
-    hvm_virpit *pit = opaque;
+static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    PITState *pit = opaque;
     int channel, access;
     PITChannelState *s;
     val &= 0xff;
@@ -321,7 +253,7 @@ static void pit_ioport_write(void *opaqu
                     if (!(val & 0x10) && !s->status_latched) {
                         /* status latch */
                         /* XXX: add BCD and null count */
-                        s->status =  (pit_get_out1(s, hvm_get_clock()) << 7) |
+                        s->status =  (pit_get_out1(s, hvm_get_clock(s->vcpu)) 
<< 7) |
                             (s->rw_mode << 4) |
                             (s->mode << 1) |
                             s->bcd;
@@ -366,9 +298,9 @@ static void pit_ioport_write(void *opaqu
     }
 }
 
-static u32 pit_ioport_read(void *opaque, u32 addr)
-{
-    hvm_virpit *pit = opaque;
+static uint32_t pit_ioport_read(void *opaque, uint32_t addr)
+{
+    PITState *pit = opaque;
     int ret, count;
     PITChannelState *s;
     
@@ -419,84 +351,51 @@ static u32 pit_ioport_read(void *opaque,
     return ret;
 }
 
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time)
-{
-    s64 expire_time;
-    int irq_level;
-    struct vcpu *v = current;
-    struct hvm_virpic *pic= &v->domain->arch.hvm_domain.vpic;
-
-    if (!s->hvm_time || s->mode == 0xff)
-        return;
-
-    expire_time = pit_get_next_transition_time(s, current_time);
-    /* not generate intr by direct pic_set_irq in mod 2
-     * XXX:mod 3 should be same as mod 2
-     */
-    if (s->mode != HVM_PIT_ACCEL_MODE) {
-        irq_level = pit_get_out1(s, current_time);
-        pic_set_irq(pic, s->irq, irq_level);
-        s->next_transition_time = expire_time;
-#ifdef DEBUG_PIT
-        printk("HVM_PIT:irq_level=%d next_delay=%l ns\n",
-                irq_level, 
-                (expire_time - current_time));
-#endif
-    }
-
-    if (expire_time > 0)
-        set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
-
-}
-
-static void pit_irq_timer(void *data)
-{
-    PITChannelState *s = data;
-
-    pit_irq_timer_update(s, s->next_transition_time);
-}
-
 static void pit_reset(void *opaque)
 {
-    hvm_virpit *pit = opaque;
+    PITState *pit = opaque;
     PITChannelState *s;
     int i;
 
     for(i = 0;i < 3; i++) {
         s = &pit->channels[i];
+        if ( s -> pt ) {
+            destroy_periodic_time (s->pt);
+            s->pt = NULL;
+        }
         s->mode = 0xff; /* the init mode */
         s->gate = (i != 2);
         pit_load_count(s, 0);
     }
 }
 
-/* hvm_io_assist light-weight version, specific to PIT DM */ 
-static void resume_pit_io(ioreq_t *p)
-{
-    struct cpu_user_regs *regs = guest_cpu_user_regs();
-    unsigned long old_eax = regs->eax;
-    p->state = STATE_INVALID;
-
-    switch(p->size) {
-    case 1:
-        regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
-        break;
-    case 2:
-        regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
-        break;
-    case 4:
-        regs->eax = (p->u.data & 0xffffffff);
-        break;
-    default:
-        BUG();
-    }
+void pit_init(struct vcpu *v, unsigned long cpu_khz)
+{
+    PITState *pit = &v->domain->arch.hvm_domain.pl_time.vpit;
+    PITChannelState *s;
+
+    s = &pit->channels[0];
+    /* the timer 0 is connected to an IRQ */
+    s->vcpu = v;
+    s++; s->vcpu = v;
+    s++; s->vcpu = v;
+
+    register_portio_handler(PIT_BASE, 4, handle_pit_io);
+    /* register the speaker port */
+    register_portio_handler(0x61, 1, handle_speaker_io);
+    ticks_per_sec(v) = cpu_khz * (int64_t)1000; 
+#ifdef DEBUG_PIT
+    printk("HVM_PIT: guest frequency =%lld\n", (long long)ticks_per_sec(v));
+#endif
+    pit_reset(pit);
+    return;
 }
 
 /* the intercept action for PIT DM retval:0--not handled; 1--handled */  
-int handle_pit_io(ioreq_t *p)
+static int handle_pit_io(ioreq_t *p)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
 
     if (p->size != 1 ||
         p->pdata_valid ||
@@ -508,18 +407,18 @@ int handle_pit_io(ioreq_t *p)
     if (p->dir == 0) {/* write */
         pit_ioport_write(vpit, p->addr, p->u.data);
     } else if (p->dir == 1) { /* read */
-        p->u.data = pit_ioport_read(vpit, p->addr);
-        resume_pit_io(p);
-    }
-
-    /* always return 1, since PIT sit in HV now */
+        if ( (p->addr & 3) != 3 ) {
+            p->u.data = pit_ioport_read(vpit, p->addr);
+        } else {
+            printk("HVM_PIT: read A1:A0=3!\n");
+        }
+    }
     return 1;
 }
 
 static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
-    hvm_virpit *pit = opaque;
-    val &= 0xff;
+    PITState *pit = opaque;
     pit->speaker_data_on = (val >> 1) & 1;
     pit_set_gate(pit, 2, val & 1);
 }
@@ -527,18 +426,18 @@ static uint32_t speaker_ioport_read(void
 static uint32_t speaker_ioport_read(void *opaque, uint32_t addr)
 {
     int out;
-    hvm_virpit *pit = opaque;
-    out = pit_get_out(pit, 2, hvm_get_clock());
+    PITState *pit = opaque;
+    out = pit_get_out(pit, 2, hvm_get_clock(pit->channels[2].vcpu));
     pit->dummy_refresh_clock ^= 1;
 
     return (pit->speaker_data_on << 1) | pit_get_gate(pit, 2) | (out << 5) |
       (pit->dummy_refresh_clock << 4);
 }
 
-int handle_speaker_io(ioreq_t *p)
+static int handle_speaker_io(ioreq_t *p)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
 
     if (p->size != 1 ||
         p->pdata_valid ||
@@ -551,45 +450,7 @@ int handle_speaker_io(ioreq_t *p)
         speaker_ioport_write(vpit, p->addr, p->u.data);
     } else if (p->dir == 1) {/* read */
         p->u.data = speaker_ioport_read(vpit, p->addr);
-        resume_pit_io(p);
     }
 
     return 1;
 }
-
-/* pick up missed timer ticks at deactive time */
-void pickup_deactive_ticks(struct hvm_virpit *vpit)
-{
-    s64 next_time;
-    PITChannelState *s = &(vpit->channels[0]);
-    if ( !active_timer(&(vpit->time_info.pit_timer)) ) {
-        next_time = pit_get_next_transition_time(s, s->next_transition_time); 
-        if (next_time >= 0)
-            set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
-    }
-}
-
-void pit_init(struct hvm_virpit *pit, struct vcpu *v)
-{
-    PITChannelState *s;
-    struct hvm_time_info *hvm_time;
-
-    s = &pit->channels[0];
-    /* the timer 0 is connected to an IRQ */
-    s->irq = 0;
-    /* channel 0 need access the related time info for intr injection */
-    hvm_time = s->hvm_time = &pit->time_info;
-    hvm_time->vcpu = v;
-
-    init_timer(&(hvm_time->pit_timer), pit_irq_timer, s, v->processor);
-
-    register_portio_handler(PIT_BASE, 4, handle_pit_io);
-
-    /* register the speaker port */
-    register_portio_handler(0x61, 1, handle_speaker_io);
-
-    pit_reset(pit);
-
-    return;
-
-}
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/intercept.c      Tue May 30 14:30:34 2006 -0500
@@ -214,6 +214,88 @@ void hlt_timer_fn(void *data)
     evtchn_set_pending(v, iopacket_port(v));
 }
 
+static __inline__ void missed_ticks(struct periodic_time *pt)
+{
+    int missed_ticks;
+
+    missed_ticks = (NOW() - pt->scheduled)/(s_time_t) pt->period;
+    if ( missed_ticks++ >= 0 ) {
+        if ( missed_ticks > 1000 ) {
+            /* TODO: Adjust guest time togther */
+            pt->pending_intr_nr ++;
+        }
+        else {
+            pt->pending_intr_nr += missed_ticks;
+        }
+        pt->scheduled += missed_ticks * pt->period;
+    }
+}
+
+/* hook function for the platform periodic time */
+void pt_timer_fn(void *data)
+{
+    struct vcpu *v = data;
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    /* pick up missed timer tick */
+    missed_ticks(pt);
+    if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) {
+        set_timer(&pt->timer, pt->scheduled);
+    }
+}
+
+/* pick up missed timer ticks at deactive time */
+void pickup_deactive_ticks(struct periodic_time *pt)
+{
+    if ( !active_timer(&(pt->timer)) ) {
+        missed_ticks(pt);
+        set_timer(&pt->timer, pt->scheduled);
+    }
+}
+
+/*
+ * period: fire frequency in ns.
+ */
+struct periodic_time * create_periodic_time(
+        struct vcpu *v, 
+        u32 period, 
+        char irq,
+        char one_shot)
+{
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+    if ( pt->enabled ) {
+        if ( v->vcpu_id != 0 ) {
+            printk("HVM_PIT: start 2nd periodic time on non BSP!\n");
+        }
+        stop_timer (&pt->timer);
+        pt->enabled = 0;
+    }
+    pt->pending_intr_nr = 0;
+    pt->first_injected = 0;
+    if (period < 900000) { /* < 0.9 ms */
+        printk("HVM_PlatformTime: program too small period %u\n",period);
+        period = 900000;   /* force to 0.9ms */
+    }
+    pt->period = period;
+    pt->irq = irq;
+    pt->period_cycles = (u64)period * cpu_khz / 1000000L;
+    pt->one_shot = one_shot;
+    if ( one_shot ) {
+        printk("HVM_PL: No support for one shot platform time yet\n");
+    }
+    pt->scheduled = NOW() + period;
+    set_timer (&pt->timer,pt->scheduled);
+    pt->enabled = 1;
+    return pt;
+}
+
+void destroy_periodic_time(struct periodic_time *pt)
+{
+    if ( pt->enabled ) {
+        stop_timer(&pt->timer);
+        pt->enabled = 0;
+    }
+}
 
 /*
  * Local variables:
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/svm/intr.c       Tue May 30 14:30:34 2006 -0500
@@ -44,45 +44,33 @@
  */
 #define BSP_CPU(v)    (!(v->vcpu_id))
 
-u64 svm_get_guest_time(struct vcpu *v)
-{
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
-    u64    host_tsc;
-    
-    rdtscll(host_tsc);
-    return host_tsc + time_info->cache_tsc_offset;
-}
-
 void svm_set_guest_time(struct vcpu *v, u64 gtime)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
     u64    host_tsc;
    
     rdtscll(host_tsc);
     
-    time_info->cache_tsc_offset = gtime - host_tsc;
-    v->arch.hvm_svm.vmcb->tsc_offset = time_info->cache_tsc_offset;
+    v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+    v->arch.hvm_svm.vmcb->tsc_offset = v->arch.hvm_vcpu.cache_tsc_offset;
 }
 
 static inline void
 interrupt_post_injection(struct vcpu * v, int vector, int type)
 {
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct  periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
 
     if ( is_pit_irq(v, vector, type) ) {
-        if ( !time_info->first_injected ) {
-            time_info->pending_intr_nr = 0;
-            time_info->last_pit_gtime = svm_get_guest_time(v);
-            time_info->first_injected = 1;
+        if ( !pt->first_injected ) {
+            pt->pending_intr_nr = 0;
+            pt->last_plt_gtime = hvm_get_guest_time(v);
+            pt->scheduled = NOW() + pt->period;
+            set_timer(&pt->timer, pt->scheduled);
+            pt->first_injected = 1;
         } else {
-            time_info->pending_intr_nr--;
+            pt->pending_intr_nr--;
+            pt->last_plt_gtime += pt->period_cycles;
+            svm_set_guest_time(v, pt->last_plt_gtime);
         }
-        time_info->count_advance = 0;
-        time_info->count_point = NOW();
-
-        time_info->last_pit_gtime += time_info->period_cycles;
-        svm_set_guest_time(v, time_info->last_pit_gtime);
     }
 
     switch(type)
@@ -121,8 +109,7 @@ asmlinkage void svm_intr_assist(void)
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     struct hvm_domain *plat=&v->domain->arch.hvm_domain; 
-    struct hvm_virpit *vpit = &plat->vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &plat->pl_time.periodic_tm;
     struct hvm_virpic *pic= &plat->vpic;
     int intr_type = VLAPIC_DELIV_MODE_EXT;
     int intr_vector = -1;
@@ -174,9 +161,9 @@ asmlinkage void svm_intr_assist(void)
       if ( cpu_has_pending_irq(v) ) {
            intr_vector = cpu_get_interrupt(v, &intr_type);
       }
-      else  if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
-          pic_set_irq(pic, 0, 0);
-          pic_set_irq(pic, 0, 1);
+      else  if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+          pic_set_irq(pic, pt->irq, 0);
+          pic_set_irq(pic, pt->irq, 1);
           intr_vector = cpu_get_interrupt(v, &intr_type);
       }
     }
@@ -190,7 +177,7 @@ asmlinkage void svm_intr_assist(void)
             /* Re-injecting a PIT interruptt? */
             if (re_injecting && 
                 is_pit_irq(v, intr_vector, intr_type)) {
-                    ++time_info->pending_intr_nr;
+                    ++pt->pending_intr_nr;
             }
             /* let's inject this interrupt */
             TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/svm/svm.c        Tue May 30 14:30:34 2006 -0500
@@ -51,13 +51,6 @@
 
 #define SVM_EXTRA_DEBUG
 
-#ifdef TRACE_BUFFER
-static unsigned long trace_values[NR_CPUS][4];
-#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
-#else
-#define TRACE_VMEXIT(index,value) ((void)0)
-#endif
-
 /* Useful define */
 #define MAX_INST_SIZE  15
 
@@ -458,6 +451,9 @@ int start_svm(void)
     
     if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
         return 0;
+    svm_globals[cpu].hsa = alloc_host_save_area();
+    if (! svm_globals[cpu].hsa)
+        return 0;
     
     rdmsr(MSR_EFER, eax, edx);
     eax |= EFER_SVME;
@@ -466,7 +462,6 @@ int start_svm(void)
     printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
 
     /* Initialize the HSA for this core */
-    svm_globals[cpu].hsa = alloc_host_save_area();
     phys_hsa = (u64) virt_to_maddr( svm_globals[cpu].hsa ); 
     phys_hsa_lo = (u32) phys_hsa;
     phys_hsa_hi = (u32) (phys_hsa >> 32);    
@@ -670,12 +665,11 @@ static void arch_svm_do_launch(struct vc
 
 static void svm_freeze_time(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     
-    if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) 
{
-        v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v);
-        time_info->count_advance += (NOW() - time_info->count_point);
-        stop_timer(&(time_info->pit_timer));
+    if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+        v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+        stop_timer(&(pt->timer));
     }
 }
 
@@ -752,7 +746,7 @@ static void svm_relinquish_guest_resourc
         }
     }
 
-    kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+    kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
 
     if ( d->arch.hvm_domain.shared_page_va )
         unmap_domain_page_global(
@@ -782,10 +776,12 @@ void arch_svm_do_resume(struct vcpu *v)
 
 void svm_migrate_timers(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
-
-    migrate_timer(&time_info->pit_timer, v->processor);
-    migrate_timer(&v->arch.hvm_svm.hlt_timer, v->processor);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    if ( pt->enabled ) {
+        migrate_timer( &pt->timer, v->processor );
+        migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
+    }
     if ( hvm_apic_support(v->domain) && VLAPIC( v ))
         migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
 }
@@ -814,7 +810,6 @@ static int svm_do_page_fault(unsigned lo
             return 1;
 
         handle_mmio(va, va);
-        TRACE_VMEXIT(2,2);
         return 1;
     }
 
@@ -840,7 +835,6 @@ static int svm_do_page_fault(unsigned lo
             return 1;
         }
 
-        TRACE_VMEXIT (2,2);
         handle_mmio(va, gpa);
 
         return 1;
@@ -852,8 +846,6 @@ static int svm_do_page_fault(unsigned lo
         /* Let's make sure that the Guest TLB is flushed */
         set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
     }
-
-    TRACE_VMEXIT (2,result);
 
     return result;
 }
@@ -1899,14 +1891,8 @@ static inline void svm_do_msr_access(str
         regs->edx = 0;
         switch (regs->ecx) {
         case MSR_IA32_TIME_STAMP_COUNTER:
-        {
-            struct hvm_time_info *time_info;
-
-            rdtscll(msr_content);
-            time_info = &v->domain->arch.hvm_domain.vpit.time_info;
-            msr_content += time_info->cache_tsc_offset;
+            msr_content = hvm_get_guest_time(v);
             break;
-        }
         case MSR_IA32_SYSENTER_CS:
             msr_content = vmcb->sysenter_cs;
             break;
@@ -1973,7 +1959,7 @@ static inline void svm_vmexit_do_hlt(str
 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     s_time_t  next_pit = -1, next_wakeup;
 
     __update_guest_eip(vmcb, 1);
@@ -1983,7 +1969,7 @@ static inline void svm_vmexit_do_hlt(str
        return; 
 
     if ( !v->vcpu_id )
-        next_pit = get_pit_scheduled(v, vpit);
+        next_pit = get_scheduled(v, pt->irq, pt);
     next_wakeup = get_apictime_scheduled(v);
     if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
         next_wakeup = next_pit;
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Tue May 30 14:30:34 2006 -0500
@@ -139,17 +139,20 @@ static int construct_vmcb_controls(struc
 
     /* The following is for I/O and MSR permision map */
     iopm = alloc_xenheap_pages(get_order_from_bytes(IOPM_SIZE));
-
-    ASSERT(iopm);
-    memset(iopm, 0xff, IOPM_SIZE);
-    clear_bit(PC_DEBUG_PORT, iopm);
+    if (iopm)
+    {
+        memset(iopm, 0xff, IOPM_SIZE);
+        clear_bit(PC_DEBUG_PORT, iopm);
+    }
     msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
-
-    ASSERT(msrpm);
-    memset(msrpm, 0xff, MSRPM_SIZE);
+    if (msrpm)
+        memset(msrpm, 0xff, MSRPM_SIZE);
 
     arch_svm->iopm = iopm;
     arch_svm->msrpm = msrpm;
+
+    if (! iopm || ! msrpm)
+        return 1;
 
     vmcb->iopm_base_pa = (u64) virt_to_maddr(iopm);
     vmcb->msrpm_base_pa = (u64) virt_to_maddr(msrpm);
@@ -439,19 +442,17 @@ void svm_do_resume(struct vcpu *v)
 void svm_do_resume(struct vcpu *v) 
 {
     struct domain *d = v->domain;
-    struct hvm_virpit *vpit = &d->arch.hvm_domain.vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &d->arch.hvm_domain.pl_time.periodic_tm;
 
     svm_stts(v);
 
     /* pick up the elapsed PIT ticks and re-enable pit_timer */
-    if ( time_info->first_injected ) {
-        if ( v->domain->arch.hvm_domain.guest_time ) {
-            svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
-            time_info->count_point = NOW();
-            v->domain->arch.hvm_domain.guest_time = 0;
+    if ( pt->enabled && pt->first_injected ) {
+        if ( v->arch.hvm_vcpu.guest_time ) {
+            svm_set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+            v->arch.hvm_vcpu.guest_time = 0;
         }
-        pickup_deactive_ticks(vpit);
+        pickup_deactive_ticks(pt);
     }
 
     if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/vmx/io.c
--- a/xen/arch/x86/hvm/vmx/io.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/vmx/io.c Tue May 30 14:30:34 2006 -0500
@@ -49,45 +49,33 @@ void __set_tsc_offset(u64  offset)
 #endif
 }
 
-u64 get_guest_time(struct vcpu *v)
-{
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
-    u64    host_tsc;
-    
-    rdtscll(host_tsc);
-    return host_tsc + time_info->cache_tsc_offset;
-}
-
 void set_guest_time(struct vcpu *v, u64 gtime)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
     u64    host_tsc;
    
     rdtscll(host_tsc);
     
-    time_info->cache_tsc_offset = gtime - host_tsc;
-    __set_tsc_offset(time_info->cache_tsc_offset);
+    v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+    __set_tsc_offset(v->arch.hvm_vcpu.cache_tsc_offset);
 }
 
 static inline void
 interrupt_post_injection(struct vcpu * v, int vector, int type)
 {
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
 
     if ( is_pit_irq(v, vector, type) ) {
-        if ( !time_info->first_injected ) {
-            time_info->pending_intr_nr = 0;
-            time_info->last_pit_gtime = get_guest_time(v);
-            time_info->first_injected = 1;
+        if ( !pt->first_injected ) {
+            pt->pending_intr_nr = 0;
+            pt->last_plt_gtime = hvm_get_guest_time(v);
+            pt->scheduled = NOW() + pt->period;
+            set_timer(&pt->timer, pt->scheduled);
+            pt->first_injected = 1;
         } else {
-            time_info->pending_intr_nr--;
-        }
-        time_info->count_advance = 0;
-        time_info->count_point = NOW();
-
-        time_info->last_pit_gtime += time_info->period_cycles;
-        set_guest_time(v, time_info->last_pit_gtime);
+            pt->pending_intr_nr--;
+            pt->last_plt_gtime += pt->period_cycles;
+            set_guest_time(v, pt->last_plt_gtime);
+        }
     }
 
     switch(type)
@@ -151,7 +139,7 @@ asmlinkage void vmx_intr_assist(void)
     unsigned long eflags;
     struct vcpu *v = current;
     struct hvm_domain *plat=&v->domain->arch.hvm_domain;
-    struct hvm_time_info *time_info = &plat->vpit.time_info;
+    struct periodic_time *pt = &plat->pl_time.periodic_tm;
     struct hvm_virpic *pic= &plat->vpic;
     unsigned int idtv_info_field;
     unsigned long inst_len;
@@ -160,9 +148,9 @@ asmlinkage void vmx_intr_assist(void)
     if ( v->vcpu_id == 0 )
         hvm_pic_assist(v);
 
-    if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
-        pic_set_irq(pic, 0, 0);
-        pic_set_irq(pic, 0, 1);
+    if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+        pic_set_irq(pic, pt->irq, 0);
+        pic_set_irq(pic, pt->irq, 1);
     }
 
     has_ext_irq = cpu_has_pending_irq(v);
@@ -232,19 +220,17 @@ void vmx_do_resume(struct vcpu *v)
 void vmx_do_resume(struct vcpu *v)
 {
     struct domain *d = v->domain;
-    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm;
 
     vmx_stts();
 
     /* pick up the elapsed PIT ticks and re-enable pit_timer */
-    if ( time_info->first_injected ) {
-        if ( v->domain->arch.hvm_domain.guest_time ) {
-            time_info->count_point = NOW();
-            set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
-            v->domain->arch.hvm_domain.guest_time = 0;
-        }
-        pickup_deactive_ticks(vpit);
+    if ( pt->enabled && pt->first_injected ) {
+        if ( v->arch.hvm_vcpu.guest_time ) {
+            set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+            v->arch.hvm_vcpu.guest_time = 0;
+        }
+        pickup_deactive_ticks(pt);
     }
 
     if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Tue May 30 14:30:34 2006 -0500
@@ -47,7 +47,7 @@
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vlapic.h>
 
-static unsigned long trace_values[NR_CPUS][4];
+static unsigned long trace_values[NR_CPUS][5];
 #define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
 
 static void vmx_ctxt_switch_from(struct vcpu *v);
@@ -102,7 +102,7 @@ static void vmx_relinquish_guest_resourc
         }
     }
 
-    kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+    kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
 
     if ( d->arch.hvm_domain.shared_page_va )
         unmap_domain_page_global(
@@ -358,12 +358,11 @@ static inline int long_mode_do_msr_write
 
 static void vmx_freeze_time(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     
-    if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) 
{
-        v->domain->arch.hvm_domain.guest_time = get_guest_time(v);
-        time_info->count_advance += (NOW() - time_info->count_point);
-        stop_timer(&(time_info->pit_timer));
+    if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+        v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+        stop_timer(&(pt->timer));
     }
 }
 
@@ -393,10 +392,12 @@ int vmx_initialize_guest_resources(struc
 
 void vmx_migrate_timers(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
-
-    migrate_timer(&time_info->pit_timer, v->processor);
-    migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    if ( pt->enabled ) {
+        migrate_timer(&pt->timer, v->processor);
+        migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+    }
     if ( hvm_apic_support(v->domain) && VLAPIC(v))
         migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor);
 }
@@ -1861,14 +1862,8 @@ static inline void vmx_do_msr_read(struc
                 (unsigned long)regs->edx);
     switch (regs->ecx) {
     case MSR_IA32_TIME_STAMP_COUNTER:
-    {
-        struct hvm_time_info *time_info;
-
-        rdtscll(msr_content);
-        time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
-        msr_content += time_info->cache_tsc_offset;
-        break;
-    }
+        msr_content = hvm_get_guest_time(v);
+        break;
     case MSR_IA32_SYSENTER_CS:
         __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
         break;
@@ -1941,11 +1936,11 @@ void vmx_vmexit_do_hlt(void)
 void vmx_vmexit_do_hlt(void)
 {
     struct vcpu *v=current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
     s_time_t   next_pit=-1,next_wakeup;
 
     if ( !v->vcpu_id )
-        next_pit = get_pit_scheduled(v,vpit);
+        next_pit = get_scheduled(v, pt->irq, pt);
     next_wakeup = get_apictime_scheduled(v);
     if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
         next_wakeup = next_pit;
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/mm.c Tue May 30 14:30:34 2006 -0500
@@ -260,9 +260,82 @@ void share_xen_page_with_privileged_gues
     share_xen_page_with_guest(page, dom_xen, readonly);
 }
 
+#if defined(CONFIG_X86_PAE)
+
+#ifdef NDEBUG
+/* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
+#define l3tab_needs_shadow(mfn) (mfn >= 0x100000)
+#else
+/* In debug builds we aggressively shadow PDPTs to exercise code paths. */
+#define l3tab_needs_shadow(mfn) ((mfn << PAGE_SHIFT) != __pa(idle_pg_table))
+#endif
+
+static l1_pgentry_t *fix_pae_highmem_pl1e;
+
+/* Cache the address of PAE high-memory fixmap page tables. */
+static int __init cache_pae_fixmap_address(void)
+{
+    unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0);
+    l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base);
+    fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base);
+    return 0;
+}
+__initcall(cache_pae_fixmap_address);
+
+static void __write_ptbase(unsigned long mfn)
+{
+    l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
+    struct pae_l3_cache *cache = &current->arch.pae_l3_cache;
+    unsigned int cpu = smp_processor_id();
+
+    /* Fast path 1: does this mfn need a shadow at all? */
+    if ( !l3tab_needs_shadow(mfn) )
+    {
+        write_cr3(mfn << PAGE_SHIFT);
+        return;
+    }
+
+    /* Caching logic is not interrupt safe. */
+    ASSERT(!in_irq());
+
+    /* Fast path 2: is this mfn already cached? */
+    if ( cache->high_mfn == mfn )
+    {
+        write_cr3(__pa(cache->table[cache->inuse_idx]));
+        return;
+    }
+
+    /* Protects against pae_flush_pgd(). */
+    spin_lock(&cache->lock);
+
+    cache->inuse_idx ^= 1;
+    cache->high_mfn   = mfn;
+
+    /* Map the guest L3 table and copy to the chosen low-memory cache. */
+    *(fix_pae_highmem_pl1e - cpu) = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
+    highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
+    lowmem_l3tab  = cache->table[cache->inuse_idx];
+    memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
+    *(fix_pae_highmem_pl1e - cpu) = l1e_empty();
+
+    /* Install the low-memory L3 table in CR3. */
+    write_cr3(__pa(lowmem_l3tab));
+
+    spin_unlock(&cache->lock);
+}
+
+#else /* !CONFIG_X86_PAE */
+
+static void __write_ptbase(unsigned long mfn)
+{
+    write_cr3(mfn << PAGE_SHIFT);
+}
+
+#endif /* !CONFIG_X86_PAE */
+
 void write_ptbase(struct vcpu *v)
 {
-    write_cr3(pagetable_get_paddr(v->arch.monitor_table));
+    __write_ptbase(pagetable_get_pfn(v->arch.monitor_table));
 }
 
 void invalidate_shadow_ldt(struct vcpu *v)
@@ -401,6 +474,7 @@ static int get_page_and_type_from_pagenr
     return 1;
 }
 
+#ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */
 /*
  * We allow root tables to map each other (a.k.a. linear page tables). It
  * needs some special care with reference counts and access permissions:
@@ -456,6 +530,7 @@ get_linear_pagetable(
 
     return 1;
 }
+#endif /* !CONFIG_X86_PAE */
 
 int
 get_page_from_l1e(
@@ -564,10 +639,6 @@ get_page_from_l3e(
     rc = get_page_and_type_from_pagenr(
         l3e_get_pfn(l3e),
         PGT_l2_page_table | vaddr, d);
-#if CONFIG_PAGING_LEVELS == 3
-    if ( unlikely(!rc) )
-        rc = get_linear_pagetable(l3e, pfn, d);
-#endif
     return rc;
 }
 #endif /* 3 level */
@@ -773,6 +844,41 @@ static int create_pae_xen_mappings(l3_pg
     return 1;
 }
 
+/* Flush a pgdir update into low-memory caches. */
+static void pae_flush_pgd(
+    unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
+{
+    struct domain *d = page_get_owner(mfn_to_page(mfn));
+    struct vcpu   *v;
+    intpte_t       _ol3e, _nl3e, _pl3e;
+    l3_pgentry_t  *l3tab_ptr;
+    struct pae_l3_cache *cache;
+
+    /* If below 4GB then the pgdir is not shadowed in low memory. */
+    if ( !l3tab_needs_shadow(mfn) )
+        return;
+
+    for_each_vcpu ( d, v )
+    {
+        cache = &v->arch.pae_l3_cache;
+
+        spin_lock(&cache->lock);
+
+        if ( cache->high_mfn == mfn )
+        {
+            l3tab_ptr = &cache->table[cache->inuse_idx][idx];
+            _ol3e = l3e_get_intpte(*l3tab_ptr);
+            _nl3e = l3e_get_intpte(nl3e);
+            _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
+            BUG_ON(_pl3e != _ol3e);
+        }
+
+        spin_unlock(&cache->lock);
+    }
+
+    flush_tlb_mask(d->domain_dirty_cpumask);
+}
+
 static inline int l1_backptr(
     unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
 {
@@ -787,6 +893,7 @@ static inline int l1_backptr(
 
 #elif CONFIG_X86_64
 # define create_pae_xen_mappings(pl3e) (1)
+# define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
 
 static inline int l1_backptr(
     unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
@@ -886,14 +993,6 @@ static int alloc_l3_table(struct page_in
 
     ASSERT(!shadow_mode_refcounts(d));
 
-#ifdef CONFIG_X86_PAE
-    if ( pfn >= 0x100000 )
-    {
-        MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
-        return 0;
-    }
-#endif
-
     pl3e = map_domain_page(pfn);
     for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
     {
@@ -1240,6 +1339,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
 
     okay = create_pae_xen_mappings(pl3e);
     BUG_ON(!okay);
+
+    pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
 
     put_page_from_l3e(ol3e, pfn);
     return 1;
@@ -2811,6 +2912,8 @@ long do_update_descriptor(u64 pa, u64 de
     return ret;
 }
 
+typedef struct e820entry e820entry_t;
+DEFINE_XEN_GUEST_HANDLE(e820entry_t);
 
 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
 {
@@ -2869,6 +2972,39 @@ long arch_memory_op(int op, XEN_GUEST_HA
         break;
     }
 
+    case XENMEM_memory_map:
+    {
+        return -ENOSYS;
+    }
+
+    case XENMEM_machine_memory_map:
+    {
+        struct xen_memory_map memmap;
+        XEN_GUEST_HANDLE(e820entry_t) buffer;
+        int count;
+
+        if ( !IS_PRIV(current->domain) )
+            return -EINVAL;
+
+        if ( copy_from_guest(&memmap, arg, 1) )
+            return -EFAULT;
+        if ( memmap.nr_entries < e820.nr_map + 1 )
+            return -EINVAL;
+
+        buffer = guest_handle_cast(memmap.buffer, e820entry_t);
+
+        count = min((unsigned int)e820.nr_map, memmap.nr_entries);
+        if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
+            return -EFAULT;
+
+        memmap.nr_entries = count;
+
+        if ( copy_to_guest(arg, &memmap, 1) )
+            return -EFAULT;
+
+        return 0;
+    }
+
     default:
         return subarch_memory_op(op, arg);
     }
@@ -3074,7 +3210,7 @@ void ptwr_flush(struct domain *d, const 
 
     if ( unlikely(d->arch.ptwr[which].vcpu != current) )
         /* Don't use write_ptbase: it may switch to guest_user on x86/64! */
-        write_cr3(pagetable_get_paddr(
+        __write_ptbase(pagetable_get_pfn(
             d->arch.ptwr[which].vcpu->arch.guest_table));
     else
         TOGGLE_MODE();
@@ -3185,15 +3321,16 @@ static int ptwr_emulated_update(
     /* Turn a sub-word access into a full-word access. */
     if ( bytes != sizeof(paddr_t) )
     {
-        int           rc;
-        paddr_t    full;
-        unsigned int  offset = addr & (sizeof(paddr_t)-1);
+        paddr_t      full;
+        unsigned int offset = addr & (sizeof(paddr_t)-1);
 
         /* Align address; read full word. */
         addr &= ~(sizeof(paddr_t)-1);
-        if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
-                                        sizeof(paddr_t))) )
-            return rc; 
+        if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
+        {
+            propagate_page_fault(addr, 4); /* user mode, read fault */
+            return X86EMUL_PROPAGATE_FAULT;
+        }
         /* Mask out bits provided by caller. */
         full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
         /* Shift the caller value and OR in the missing bits. */
@@ -3271,7 +3408,8 @@ static int ptwr_emulated_write(
 static int ptwr_emulated_write(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     return ptwr_emulated_update(addr, 0, val, bytes, 0);
 }
@@ -3280,7 +3418,8 @@ static int ptwr_emulated_cmpxchg(
     unsigned long addr,
     unsigned long old,
     unsigned long new,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     return ptwr_emulated_update(addr, old, new, bytes, 1);
 }
@@ -3290,7 +3429,8 @@ static int ptwr_emulated_cmpxchg8b(
     unsigned long old,
     unsigned long old_hi,
     unsigned long new,
-    unsigned long new_hi)
+    unsigned long new_hi,
+    struct x86_emulate_ctxt *ctxt)
 {
     if ( CONFIG_PAGING_LEVELS == 2 )
         return X86EMUL_UNHANDLEABLE;
@@ -3299,7 +3439,7 @@ static int ptwr_emulated_cmpxchg8b(
             addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1);
 }
 
-static struct x86_mem_emulator ptwr_mem_emulator = {
+static struct x86_emulate_ops ptwr_emulate_ops = {
     .read_std           = x86_emulate_read_std,
     .write_std          = x86_emulate_write_std,
     .read_emulated      = x86_emulate_read_std,
@@ -3318,6 +3458,7 @@ int ptwr_do_page_fault(struct domain *d,
     l2_pgentry_t    *pl2e, l2e;
     int              which, flags;
     unsigned long    l2_idx;
+    struct x86_emulate_ctxt emul_ctxt;
 
     if ( unlikely(shadow_mode_enabled(d)) )
         return 0;
@@ -3472,8 +3613,10 @@ int ptwr_do_page_fault(struct domain *d,
     return EXCRET_fault_fixed;
 
  emulate:
-    if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
-                           &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
+    emul_ctxt.regs = guest_cpu_user_regs();
+    emul_ctxt.cr2  = addr;
+    emul_ctxt.mode = X86EMUL_MODE_HOST;
+    if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) )
         return 0;
     perfc_incrc(ptwr_emulations);
     return EXCRET_fault_fixed;
@@ -3596,11 +3739,10 @@ int map_pages_to_xen(
 }
 
 void __set_fixmap(
-    enum fixed_addresses idx, unsigned long p, unsigned long flags)
-{
-    if ( unlikely(idx >= __end_of_fixed_addresses) )
-        BUG();
-    map_pages_to_xen(fix_to_virt(idx), p >> PAGE_SHIFT, 1, flags);
+    enum fixed_addresses idx, unsigned long mfn, unsigned long flags)
+{
+    BUG_ON(idx >= __end_of_fixed_addresses);
+    map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags);
 }
 
 #ifdef MEMORY_GUARD
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/shadow.c     Tue May 30 14:30:34 2006 -0500
@@ -430,7 +430,8 @@ no_shadow_page:
            perfc_value(shadow_l2_pages),
            perfc_value(hl2_table_pages),
            perfc_value(snapshot_pages));
-    BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
+    /* XXX FIXME: try a shadow flush to free up some memory. */
+    domain_crash_synchronous();
 
     return 0;
 }
@@ -3064,7 +3065,8 @@ static inline unsigned long init_bl2(
     if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
     {
         printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, 
gmfn);
-        BUG(); /* XXX Deal gracefully with failure. */
+        /* XXX Deal gracefully with failure. */
+        domain_crash_synchronous();
     }
 
     spl4e = (l4_pgentry_t *)map_domain_page(smfn);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/shadow32.c   Tue May 30 14:30:34 2006 -0500
@@ -246,7 +246,8 @@ alloc_shadow_page(struct domain *d,
                perfc_value(shadow_l2_pages),
                perfc_value(hl2_table_pages),
                perfc_value(snapshot_pages));
-        BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
+        /* XXX FIXME: try a shadow flush to free up some memory. */
+        domain_crash_synchronous();
     }
 
     smfn = page_to_mfn(page);
@@ -983,6 +984,11 @@ alloc_p2m_table(struct domain *d)
     else
     {
         page = alloc_domheap_page(NULL);
+        if (!page)
+        {
+            printk("Alloc p2m table fail\n");
+            domain_crash(d);
+        }
 
         l1tab = map_domain_page(page_to_mfn(page));
         memset(l1tab, 0, PAGE_SIZE);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/shadow_public.c      Tue May 30 14:30:34 2006 -0500
@@ -324,6 +324,11 @@ static void alloc_monitor_pagetable(stru
 
     mmfn_info = alloc_domheap_page(NULL);
     ASSERT( mmfn_info );
+    if (!mmfn_info)
+    {
+        printk("Fail to allocate monitor pagetable\n");
+        domain_crash(v->domain);
+    }
 
     mmfn = page_to_mfn(mmfn_info);
     mpl4e = (l4_pgentry_t *) map_domain_page_global(mmfn);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/traps.c      Tue May 30 14:30:34 2006 -0500
@@ -876,7 +876,7 @@ static int emulate_privileged_op(struct 
                     PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
                 break;
             }
-            regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+            regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
             break;
 
         case 0x6e: /* OUTSB */
@@ -902,7 +902,7 @@ static int emulate_privileged_op(struct 
                 outl_user((u32)data, (u16)regs->edx, v, regs);
                 break;
             }
-            regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+            regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
             break;
         }
 
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/x86_emulate.c        Tue May 30 14:30:34 2006 -0500
@@ -363,12 +363,13 @@ do{ __asm__ __volatile__ (              
 #endif /* __i386__ */
 
 /* Fetch next part of the instruction being emulated. */
-#define insn_fetch(_type, _size, _eip) \
-({ unsigned long _x; \
-   if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \
-       goto done; \
-   (_eip) += (_size); \
-   (_type)_x; \
+#define insn_fetch(_type, _size, _eip)                                  \
+({ unsigned long _x;                                                    \
+   rc = ops->read_std((unsigned long)(_eip), &_x, (_size), ctxt);       \
+   if ( rc != 0 )                                                       \
+       goto done;                                                       \
+   (_eip) += (_size);                                                   \
+   (_type)_x;                                                           \
 })
 
 /* Access/update address held in a register, based on addressing mode. */
@@ -426,12 +427,10 @@ decode_register(
     return p;
 }
 
-int 
+int
 x86_emulate_memop(
-    struct cpu_user_regs *regs,
-    unsigned long cr2,
-    struct x86_mem_emulator *ops,
-    int mode)
+    struct x86_emulate_ctxt *ctxt,
+    struct x86_emulate_ops  *ops)
 {
     uint8_t b, d, sib, twobyte = 0, rex_prefix = 0;
     uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
@@ -439,9 +438,11 @@ x86_emulate_memop(
     unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
     int rc = 0;
     struct operand src, dst;
+    unsigned long cr2 = ctxt->cr2;
+    int mode = ctxt->mode;
 
     /* Shadow copy of register state. Committed on successful emulation. */
-    struct cpu_user_regs _regs = *regs;
+    struct cpu_user_regs _regs = *ctxt->regs;
 
     switch ( mode )
     {
@@ -628,7 +629,7 @@ x86_emulate_memop(
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
              ((rc = ops->read_emulated((unsigned long)dst.ptr,
-                                       &dst.val, dst.bytes)) != 0) )
+                                       &dst.val, dst.bytes, ctxt)) != 0) )
              goto done;
         break;
     }
@@ -670,7 +671,7 @@ x86_emulate_memop(
         src.type  = OP_MEM;
         src.ptr   = (unsigned long *)cr2;
         if ( (rc = ops->read_emulated((unsigned long)src.ptr, 
-                                      &src.val, src.bytes)) != 0 )
+                                      &src.val, src.bytes, ctxt)) != 0 )
             goto done;
         src.orig_val = src.val;
         break;
@@ -776,7 +777,7 @@ x86_emulate_memop(
         if ( mode == X86EMUL_MODE_PROT64 )
             dst.bytes = 8;
         if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
-                                 &dst.val, dst.bytes)) != 0 )
+                                 &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(_regs.esp, dst.bytes);
         break;
@@ -854,12 +855,12 @@ x86_emulate_memop(
             {
                 dst.bytes = 8;
                 if ( (rc = ops->read_std((unsigned long)dst.ptr,
-                                         &dst.val, 8)) != 0 )
+                                         &dst.val, 8, ctxt)) != 0 )
                     goto done;
             }
-            register_address_increment(_regs.esp, -dst.bytes);
+            register_address_increment(_regs.esp, -(int)dst.bytes);
             if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
-                                      dst.val, dst.bytes)) != 0 )
+                                      dst.val, dst.bytes, ctxt)) != 0 )
                 goto done;
             dst.val = dst.orig_val; /* skanky: disable writeback */
             break;
@@ -887,10 +888,11 @@ x86_emulate_memop(
         case OP_MEM:
             if ( lock_prefix )
                 rc = ops->cmpxchg_emulated(
-                    (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes);
+                    (unsigned long)dst.ptr, dst.orig_val,
+                    dst.val, dst.bytes, ctxt);
             else
                 rc = ops->write_emulated(
-                    (unsigned long)dst.ptr, dst.val, dst.bytes);
+                    (unsigned long)dst.ptr, dst.val, dst.bytes, ctxt);
             if ( rc != 0 )
                 goto done;
         default:
@@ -899,7 +901,7 @@ x86_emulate_memop(
     }
 
     /* Commit shadow register state. */
-    *regs = _regs;
+    *ctxt->regs = _regs;
 
  done:
     return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
@@ -911,11 +913,11 @@ x86_emulate_memop(
     {
         if ( _regs.ecx == 0 )
         {
-            regs->eip = _regs.eip;
+            ctxt->regs->eip = _regs.eip;
             goto done;
         }
         _regs.ecx--;
-        _regs.eip = regs->eip;
+        _regs.eip = ctxt->regs->eip;
     }
     switch ( b )
     {
@@ -928,20 +930,21 @@ x86_emulate_memop(
             dst.ptr = (unsigned long *)cr2;
             if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds,
                                                       _regs.esi),
-                                     &dst.val, dst.bytes)) != 0 )
+                                     &dst.val, dst.bytes, ctxt)) != 0 )
                 goto done;
         }
         else
         {
             /* Read fault: source is special memory. */
             dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
-            if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+            if ( (rc = ops->read_emulated(cr2, &dst.val,
+                                          dst.bytes, ctxt)) != 0 )
                 goto done;
         }
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xa6 ... 0xa7: /* cmps */
         DPRINTF("Urk! I don't handle CMPS.\n");
@@ -952,16 +955,16 @@ x86_emulate_memop(
         dst.ptr   = (unsigned long *)cr2;
         dst.val   = _regs.eax;
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xac ... 0xad: /* lods */
         dst.type  = OP_REG;
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.ptr   = (unsigned long *)&_regs.eax;
-        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xae ... 0xaf: /* scas */
         DPRINTF("Urk! I don't handle SCAS.\n");
@@ -1074,8 +1077,8 @@ x86_emulate_memop(
 #if defined(__i386__)
     {
         unsigned long old_lo, old_hi;
-        if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4)) != 0) ||
-             ((rc = ops->read_emulated(cr2+4, &old_hi, 4)) != 0) )
+        if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4, ctxt)) != 0) ||
+             ((rc = ops->read_emulated(cr2+4, &old_hi, 4, ctxt)) != 0) )
             goto done;
         if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
         {
@@ -1090,8 +1093,8 @@ x86_emulate_memop(
         }
         else
         {
-            if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi,
-                                               _regs.ebx, _regs.ecx)) != 0 )
+            if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, _regs.ebx,
+                                               _regs.ecx, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
@@ -1100,7 +1103,7 @@ x86_emulate_memop(
 #elif defined(__x86_64__)
     {
         unsigned long old, new;
-        if ( (rc = ops->read_emulated(cr2, &old, 8)) != 0 )
+        if ( (rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0 )
             goto done;
         if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
              ((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
@@ -1112,7 +1115,7 @@ x86_emulate_memop(
         else
         {
             new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
-            if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8)) != 0 )
+            if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
@@ -1136,7 +1139,8 @@ x86_emulate_read_std(
 x86_emulate_read_std(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     *val = 0;
     if ( copy_from_user((void *)val, (void *)addr, bytes) )
@@ -1151,7 +1155,8 @@ x86_emulate_write_std(
 x86_emulate_write_std(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     if ( copy_to_user((void *)addr, (void *)&val, bytes) )
     {
diff -r e74246451527 -r f54d38cea8ac xen/common/Makefile
--- a/xen/common/Makefile       Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/Makefile       Tue May 30 14:30:34 2006 -0500
@@ -13,6 +13,7 @@ obj-y += page_alloc.o
 obj-y += page_alloc.o
 obj-y += rangeset.o
 obj-y += sched_bvt.o
+obj-y += sched_credit.o
 obj-y += sched_sedf.o
 obj-y += schedule.o
 obj-y += softirq.o
diff -r e74246451527 -r f54d38cea8ac xen/common/acm_ops.c
--- a/xen/common/acm_ops.c      Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/acm_ops.c      Tue May 30 14:30:34 2006 -0500
@@ -32,100 +32,94 @@
 
 #ifndef ACM_SECURITY
 
-long do_acm_op(XEN_GUEST_HANDLE(acm_op_t) u_acm_op)
+
+long do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg)
 {
     return -ENOSYS;
 }
 
+
 #else
 
-enum acm_operation {
-    POLICY,                     /* access to policy interface (early drop) */
-    GETPOLICY,                  /* dump policy cache */
-    SETPOLICY,                  /* set policy cache (controls security) */
-    DUMPSTATS,                  /* dump policy statistics */
-    GETSSID,                    /* retrieve ssidref for domain id (decide 
inside authorized domains) */
-    GETDECISION                 /* retrieve ACM decision from authorized 
domains */
-};
-
-int acm_authorize_acm_ops(struct domain *d, enum acm_operation pops)
+
+int acm_authorize_acm_ops(struct domain *d)
 {
     /* currently, policy management functions are restricted to privileged 
domains */
     if (!IS_PRIV(d))
         return -EPERM;
-
     return 0;
 }
 
-long do_acm_op(XEN_GUEST_HANDLE(acm_op_t) u_acm_op)
-{
-    long ret = 0;
-    struct acm_op curop, *op = &curop;
-
-    if (acm_authorize_acm_ops(current->domain, POLICY))
+
+long do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg)
+{
+    long rc = -EFAULT;
+
+    if (acm_authorize_acm_ops(current->domain))
         return -EPERM;
 
-    if (copy_from_guest(op, u_acm_op, 1))
-        return -EFAULT;
-
-    if (op->interface_version != ACM_INTERFACE_VERSION)
-        return -EACCES;
-
-    switch (op->cmd)
+    switch ( cmd )
     {
-    case ACM_SETPOLICY:
-    {
-        ret = acm_authorize_acm_ops(current->domain, SETPOLICY);
-        if (!ret)
-            ret = acm_set_policy(op->u.setpolicy.pushcache,
-                                 op->u.setpolicy.pushcache_size, 1);
-    }
-    break;
-
-    case ACM_GETPOLICY:
-    {
-        ret = acm_authorize_acm_ops(current->domain, GETPOLICY);
-        if (!ret)
-            ret = acm_get_policy(op->u.getpolicy.pullcache,
-                                 op->u.getpolicy.pullcache_size);
-        if (!ret)
-            copy_to_guest(u_acm_op, op, 1);
-    }
-    break;
-
-    case ACM_DUMPSTATS:
-    {
-        ret = acm_authorize_acm_ops(current->domain, DUMPSTATS);
-        if (!ret)
-            ret = acm_dump_statistics(op->u.dumpstats.pullcache,
-                                      op->u.dumpstats.pullcache_size);
-        if (!ret)
-            copy_to_guest(u_acm_op, op, 1);
-    }
-    break;
-
-    case ACM_GETSSID:
-    {
+
+    case ACMOP_setpolicy: {
+        struct acm_setpolicy setpolicy;
+        if (copy_from_guest(&setpolicy, arg, 1) != 0)
+            return -EFAULT;
+        if (setpolicy.interface_version != ACM_INTERFACE_VERSION)
+            return -EACCES;
+
+        rc = acm_set_policy(setpolicy.pushcache,
+                            setpolicy.pushcache_size, 1);
+        break;
+    }
+
+    case ACMOP_getpolicy: {
+        struct acm_getpolicy getpolicy;
+        if (copy_from_guest(&getpolicy, arg, 1) != 0)
+            return -EFAULT;
+        if (getpolicy.interface_version != ACM_INTERFACE_VERSION)
+            return -EACCES;
+
+        rc = acm_get_policy(getpolicy.pullcache,
+                            getpolicy.pullcache_size);
+        break;
+    }
+
+    case ACMOP_dumpstats: {
+        struct acm_dumpstats dumpstats;
+        if (copy_from_guest(&dumpstats, arg, 1) != 0)
+            return -EFAULT;
+        if (dumpstats.interface_version != ACM_INTERFACE_VERSION)
+            return -EACCES;
+
+        rc = acm_dump_statistics(dumpstats.pullcache,
+                                 dumpstats.pullcache_size);
+        break;
+    }
+
+    case ACMOP_getssid: {
+        struct acm_getssid getssid;
         ssidref_t ssidref;
 
-        ret = acm_authorize_acm_ops(current->domain, GETSSID);
-        if (ret)
-            break;
-
-        if (op->u.getssid.get_ssid_by == SSIDREF)
-            ssidref = op->u.getssid.id.ssidref;
-        else if (op->u.getssid.get_ssid_by == DOMAINID)
-        {
-            struct domain *subj = find_domain_by_id(op->u.getssid.id.domainid);
-            if (!subj)
-            {
-                ret = -ESRCH; /* domain not found */
-                break;
-            }
-            if (subj->ssid == NULL)
-            {
-                put_domain(subj);
-                ret = -ESRCH;
+        if (copy_from_guest(&getssid, arg, 1) != 0)
+            return -EFAULT;
+        if (getssid.interface_version != ACM_INTERFACE_VERSION)
+            return -EACCES;
+
+        if (getssid.get_ssid_by == SSIDREF)
+            ssidref = getssid.id.ssidref;
+        else if (getssid.get_ssid_by == DOMAINID)
+        {
+            struct domain *subj = find_domain_by_id(getssid.id.domainid);
+            if (!subj)
+            {
+                rc = -ESRCH; /* domain not found */
+                break;
+            }
+            if (subj->ssid == NULL)
+            {
+                put_domain(subj);
+                rc = -ESRCH;
                 break;
             }
             ssidref = ((struct acm_ssid_domain *)(subj->ssid))->ssidref;
@@ -133,39 +127,36 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t
         }
         else
         {
-            ret = -ESRCH;
-            break;
-        }
-        ret = acm_get_ssid(ssidref,
-                           op->u.getssid.ssidbuf,
-                           op->u.getssid.ssidbuf_size);
-        if (!ret)
-            copy_to_guest(u_acm_op, op, 1);
-    }
-    break;
-
-    case ACM_GETDECISION:
-    {
+            rc = -ESRCH;
+            break;
+        }
+        rc = acm_get_ssid(ssidref, getssid.ssidbuf, getssid.ssidbuf_size);
+        break;
+    }
+
+    case ACMOP_getdecision: {
+        struct acm_getdecision getdecision;
         ssidref_t ssidref1, ssidref2;
 
-        ret = acm_authorize_acm_ops(current->domain, GETDECISION);
-        if (ret)
-            break;
-
-        if (op->u.getdecision.get_decision_by1 == SSIDREF)
-            ssidref1 = op->u.getdecision.id1.ssidref;
-        else if (op->u.getdecision.get_decision_by1 == DOMAINID)
-        {
-            struct domain *subj = 
find_domain_by_id(op->u.getdecision.id1.domainid);
-            if (!subj)
-            {
-                ret = -ESRCH; /* domain not found */
-                break;
-            }
-            if (subj->ssid == NULL)
-            {
-                put_domain(subj);
-                ret = -ESRCH;
+        if (copy_from_guest(&getdecision, arg, 1) != 0)
+            return -EFAULT;
+        if (getdecision.interface_version != ACM_INTERFACE_VERSION)
+            return -EACCES;
+
+        if (getdecision.get_decision_by1 == SSIDREF)
+            ssidref1 = getdecision.id1.ssidref;
+        else if (getdecision.get_decision_by1 == DOMAINID)
+        {
+            struct domain *subj = find_domain_by_id(getdecision.id1.domainid);
+            if (!subj)
+            {
+                rc = -ESRCH; /* domain not found */
+                break;
+            }
+            if (subj->ssid == NULL)
+            {
+                put_domain(subj);
+                rc = -ESRCH;
                 break;
             }
             ssidref1 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref;
@@ -173,23 +164,23 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t
         }
         else
         {
-            ret = -ESRCH;
-            break;
-        }
-        if (op->u.getdecision.get_decision_by2 == SSIDREF)
-            ssidref2 = op->u.getdecision.id2.ssidref;
-        else if (op->u.getdecision.get_decision_by2 == DOMAINID)
-        {
-            struct domain *subj = 
find_domain_by_id(op->u.getdecision.id2.domainid);
-            if (!subj)
-            {
-                ret = -ESRCH; /* domain not found */
+            rc = -ESRCH;
+            break;
+        }
+        if (getdecision.get_decision_by2 == SSIDREF)
+            ssidref2 = getdecision.id2.ssidref;
+        else if (getdecision.get_decision_by2 == DOMAINID)
+        {
+            struct domain *subj = find_domain_by_id(getdecision.id2.domainid);
+            if (!subj)
+            {
+                rc = -ESRCH; /* domain not found */
                 break;;
             }
             if (subj->ssid == NULL)
             {
                 put_domain(subj);
-                ret = -ESRCH;
+                rc = -ESRCH;
                 break;
             }
             ssidref2 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref;
@@ -197,34 +188,35 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t
         }
         else
         {
-            ret = -ESRCH;
-            break;
-        }
-        ret = acm_get_decision(ssidref1, ssidref2, op->u.getdecision.hook);
-
-        if (ret == ACM_ACCESS_PERMITTED)
-        {
-            op->u.getdecision.acm_decision = ACM_ACCESS_PERMITTED;
-            ret = 0;
-        }
-        else if  (ret == ACM_ACCESS_DENIED)
-        {
-            op->u.getdecision.acm_decision = ACM_ACCESS_DENIED;
-            ret = 0;
-        }
-        else
-            ret = -ESRCH;
-
-        if (!ret)
-            copy_to_guest(u_acm_op, op, 1);
-    }
-    break;
+            rc = -ESRCH;
+            break;
+        }
+        rc = acm_get_decision(ssidref1, ssidref2, getdecision.hook);
+
+        if (rc == ACM_ACCESS_PERMITTED)
+        {
+            getdecision.acm_decision = ACM_ACCESS_PERMITTED;
+            rc = 0;
+        }
+        else if  (rc == ACM_ACCESS_DENIED)
+        {
+            getdecision.acm_decision = ACM_ACCESS_DENIED;
+            rc = 0;
+        }
+        else
+            rc = -ESRCH;
+
+        if ( (rc == 0) && (copy_to_guest(arg, &getdecision, 1) != 0) )
+            rc = -EFAULT;
+        break;
+    }
 
     default:
-        ret = -ESRCH;
-    }
-
-    return ret;
+        rc = -ENOSYS;
+        break;
+    }
+
+    return rc;
 }
 
 #endif
diff -r e74246451527 -r f54d38cea8ac xen/common/elf.c
--- a/xen/common/elf.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/elf.c  Tue May 30 14:30:34 2006 -0500
@@ -23,10 +23,10 @@ int parseelfimage(struct domain_setup_in
     Elf_Ehdr *ehdr = (Elf_Ehdr *)dsi->image_addr;
     Elf_Phdr *phdr;
     Elf_Shdr *shdr;
-    unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base;
+    unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base, elf_pa_off;
     char *shstrtab, *guestinfo=NULL, *p;
     char *elfbase = (char *)dsi->image_addr;
-    int h;
+    int h, virt_base_defined, elf_pa_off_defined;
 
     if ( !elf_sanity_check(ehdr) )
         return -EINVAL;
@@ -84,29 +84,40 @@ int parseelfimage(struct domain_setup_in
     if ( guestinfo == NULL )
         guestinfo = "";
 
-    virt_base = 0;
-    if ( (p = strstr(guestinfo, "VIRT_BASE=")) != NULL )
-        virt_base = simple_strtoul(p+10, &p, 0);
-    dsi->elf_paddr_offset = virt_base;
-    if ( (p = strstr(guestinfo, "ELF_PADDR_OFFSET=")) != NULL )
-        dsi->elf_paddr_offset = simple_strtoul(p+17, &p, 0);
+    /* Initial guess for virt_base is 0 if it is not explicitly defined. */
+    p = strstr(guestinfo, "VIRT_BASE=");
+    virt_base_defined = (p != NULL);
+    virt_base = virt_base_defined ? simple_strtoul(p+10, &p, 0) : 0;
+
+    /* Initial guess for elf_pa_off is virt_base if not explicitly defined. */
+    p = strstr(guestinfo, "ELF_PADDR_OFFSET=");
+    elf_pa_off_defined = (p != NULL);
+    elf_pa_off = elf_pa_off_defined ? simple_strtoul(p+17, &p, 0) : virt_base;
+
+    if ( elf_pa_off_defined && !virt_base_defined )
+        goto bad_image;
 
     for ( h = 0; h < ehdr->e_phnum; h++ )
     {
         phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
         if ( !is_loadable_phdr(phdr) )
             continue;
-        vaddr = phdr->p_paddr - dsi->elf_paddr_offset + virt_base;
+        vaddr = phdr->p_paddr - elf_pa_off + virt_base;
+        if ( (vaddr + phdr->p_memsz) < vaddr )
+            goto bad_image;
         if ( vaddr < kernstart )
             kernstart = vaddr;
         if ( (vaddr + phdr->p_memsz) > kernend )
             kernend = vaddr + phdr->p_memsz;
     }
 
-    if ( virt_base )
-        dsi->v_start = virt_base;
-    else
-        dsi->v_start = kernstart;
+    /*
+     * Legacy compatibility and images with no __xen_guest section: assume
+     * header addresses are virtual addresses, and that guest memory should be
+     * mapped starting at kernel load address.
+     */
+    dsi->v_start          = virt_base_defined  ? virt_base  : kernstart;
+    dsi->elf_paddr_offset = elf_pa_off_defined ? elf_pa_off : dsi->v_start;
 
     dsi->v_kernentry = ehdr->e_entry;
     if ( (p = strstr(guestinfo, "VIRT_ENTRY=")) != NULL )
@@ -114,11 +125,9 @@ int parseelfimage(struct domain_setup_in
 
     if ( (kernstart > kernend) || 
          (dsi->v_kernentry < kernstart) ||
-         (dsi->v_kernentry > kernend) )
-    {
-        printk("Malformed ELF image.\n");
-        return -EINVAL;
-    }
+         (dsi->v_kernentry > kernend) ||
+         (dsi->v_start > kernstart) )
+        goto bad_image;
 
     if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL )
             dsi->load_symtab = 1;
@@ -130,6 +139,10 @@ int parseelfimage(struct domain_setup_in
     loadelfsymtab(dsi, 0);
 
     return 0;
+
+ bad_image:
+    printk("Malformed ELF image.\n");
+    return -EINVAL;
 }
 
 int loadelfimage(struct domain_setup_info *dsi)
diff -r e74246451527 -r f54d38cea8ac xen/common/grant_table.c
--- a/xen/common/grant_table.c  Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/grant_table.c  Tue May 30 14:30:34 2006 -0500
@@ -505,15 +505,12 @@ gnttab_setup_table(
         goto out;
     }
 
-    if ( op.nr_frames <= NR_GRANT_FRAMES )
-    {
-        ASSERT(d->grant_table != NULL);
-        op.status = GNTST_okay;
-        for ( i = 0; i < op.nr_frames; i++ )
-        {
-            gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
-            (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
-        }
+    ASSERT(d->grant_table != NULL);
+    op.status = GNTST_okay;
+    for ( i = 0; i < op.nr_frames; i++ )
+    {
+        gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
+        (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
     }
 
     put_domain(d);
diff -r e74246451527 -r f54d38cea8ac xen/common/kernel.c
--- a/xen/common/kernel.c       Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/kernel.c       Tue May 30 14:30:34 2006 -0500
@@ -191,12 +191,11 @@ long do_xen_version(int cmd, XEN_GUEST_H
         switch ( fi.submap_idx )
         {
         case 0:
-            fi.submap = 0;
+            fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb);
             if ( shadow_mode_translate(current->domain) )
                 fi.submap |= 
                     (1U << XENFEAT_writable_page_tables) |
-                    (1U << XENFEAT_auto_translated_physmap) |
-                    (1U << XENFEAT_pae_pgdir_above_4gb);
+                    (1U << XENFEAT_auto_translated_physmap);
             if ( supervisor_mode_kernel )
                 fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
             break;
diff -r e74246451527 -r f54d38cea8ac xen/common/schedule.c
--- a/xen/common/schedule.c     Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/schedule.c     Tue May 30 14:30:34 2006 -0500
@@ -50,9 +50,11 @@ struct schedule_data schedule_data[NR_CP
 
 extern struct scheduler sched_bvt_def;
 extern struct scheduler sched_sedf_def;
+extern struct scheduler sched_credit_def;
 static struct scheduler *schedulers[] = { 
     &sched_bvt_def,
     &sched_sedf_def,
+    &sched_credit_def,
     NULL
 };
 
@@ -639,6 +641,8 @@ static void t_timer_fn(void *unused)
 
     page_scrub_schedule_work();
 
+    SCHED_OP(tick, cpu);
+
     set_timer(&t_timer[cpu], NOW() + MILLISECS(10));
 }
 
@@ -681,6 +685,7 @@ void __init scheduler_init(void)
         printk("Could not find scheduler: %s\n", opt_sched);
 
     printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
+    SCHED_OP(init);
 
     if ( idle_vcpu[0] != NULL )
     {
diff -r e74246451527 -r f54d38cea8ac xen/common/trace.c
--- a/xen/common/trace.c        Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/trace.c        Tue May 30 14:30:34 2006 -0500
@@ -91,6 +91,7 @@ static int alloc_trace_bufs(void)
     if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
     {
         printk("Xen trace buffers: memory allocation failed\n");
+        opt_tbuf_size = 0;
         return -EINVAL;
     }
 
@@ -135,10 +136,7 @@ static int tb_set_size(int size)
 
     opt_tbuf_size = size;
     if ( alloc_trace_bufs() != 0 )
-    {
-        opt_tbuf_size = 0;
-        return -EINVAL;
-    }
+        return -EINVAL;
 
     printk("Xen trace buffers: initialized\n");
     return 0;
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/config.h
--- a/xen/include/asm-ia64/config.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/config.h     Tue May 30 14:30:34 2006 -0500
@@ -97,6 +97,13 @@ extern char _end[]; /* standard ELF symb
 //#define HZ 1000
 // FIXME SMP: leave SMP for a later time
 
+/* A power-of-two value greater than or equal to number of hypercalls. */
+#define NR_hypercalls 64
+
+#if NR_hypercalls & (NR_hypercalls - 1)
+#error "NR_hypercalls must be a power-of-two value"
+#endif
+
 ///////////////////////////////////////////////////////////////
 // xen/include/asm/config.h
 // Natural boundary upon TR size to define xenheap space
@@ -239,6 +246,10 @@ void dummy_called(char *function);
 // these declarations got moved at some point, find a better place for them
 extern int ht_per_core;
 
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+#define CONFIG_SHADOW  1
+#endif
+
 // xen/include/asm/config.h
 /******************************************************************************
  * config.h
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/dom_fw.h
--- a/xen/include/asm-ia64/dom_fw.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/dom_fw.h     Tue May 30 14:30:34 2006 -0500
@@ -5,7 +5,7 @@
  *     Dan Magenheimer (dan.magenheimer@xxxxxx)
  */
 
-extern unsigned long dom_fw_setup(struct domain *, const char *, int);
+#include <linux/efi.h>
 
 #ifndef MB
 #define MB (1024*1024)
@@ -55,7 +55,7 @@ extern unsigned long dom_fw_setup(struct
 
 #define FW_HYPERCALL_SAL_CALL_INDEX    0x82UL
 #define FW_HYPERCALL_SAL_CALL_PADDR    
FW_HYPERCALL_PADDR(FW_HYPERCALL_SAL_CALL_INDEX)
-#define FW_HYPERCALL_SAL_CALL          0x1001UL
+#define FW_HYPERCALL_SAL_CALL          0x1100UL
 
 /*
  * EFI is accessed via the EFI system table, which contains:
@@ -94,6 +94,7 @@ extern unsigned long dom_fw_setup(struct
 #define FW_HYPERCALL_EFI_RESET_SYSTEM_INDEX            9UL
 
 /* these are hypercall numbers */
+#define FW_HYPERCALL_EFI_CALL                          0x300UL
 #define FW_HYPERCALL_EFI_GET_TIME                      0x300UL
 #define FW_HYPERCALL_EFI_SET_TIME                      0x301UL
 #define FW_HYPERCALL_EFI_GET_WAKEUP_TIME               0x302UL
@@ -125,7 +126,7 @@ extern unsigned long dom_fw_setup(struct
 */
 #define FW_HYPERCALL_FIRST_ARCH                0x300UL
 
-#define FW_HYPERCALL_IPI               0x380UL
+#define FW_HYPERCALL_IPI               0x400UL
 
 /* Xen/ia64 user hypercalls.  Only used for debugging.  */
 #define FW_HYPERCALL_FIRST_USER                0xff00UL
@@ -133,9 +134,16 @@ extern unsigned long dom_fw_setup(struct
 /* Interrupt vector used for os boot rendez vous.  */
 #define XEN_SAL_BOOT_RENDEZ_VEC        0xF3
 
+#define FW_HYPERCALL_NUM_MASK_HIGH     ~0xffUL
+#define FW_HYPERCALL_NUM_MASK_LOW       0xffUL
+
+#define EFI_MEMDESC_VERSION            1
+
 extern struct ia64_pal_retval xen_pal_emulator(UINT64, u64, u64, u64);
 extern struct sal_ret_values sal_emulator (long index, unsigned long in1, 
unsigned long in2, unsigned long in3, unsigned long in4, unsigned long in5, 
unsigned long in6, unsigned long in7);
 extern struct ia64_pal_retval pal_emulator_static (unsigned long);
+extern unsigned long dom_fw_setup (struct domain *, const char *, int);
+extern efi_status_t efi_emulator (struct pt_regs *regs, unsigned long *fault);
 
 extern void build_pal_hypercall_bundles(unsigned long *imva, unsigned long 
brkimm, unsigned long hypnum);
 extern void build_hypercall_bundle(UINT64 *imva, UINT64 brkimm, UINT64 hypnum, 
UINT64 ret);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/domain.h     Tue May 30 14:30:34 2006 -0500
@@ -22,8 +22,13 @@ extern void panic_domain(struct pt_regs 
 extern void panic_domain(struct pt_regs *, const char *, ...)
      __attribute__ ((noreturn, format (printf, 2, 3)));
 
+struct mm_struct {
+       pgd_t * pgd;
+    // atomic_t mm_users;                      /* How many users with user 
space? */
+};
+
 struct arch_domain {
-    struct mm_struct *mm;
+    struct mm_struct mm;
     unsigned long metaphysical_rr0;
     unsigned long metaphysical_rr4;
 
@@ -54,10 +59,11 @@ struct arch_domain {
     unsigned long initrd_start;
     unsigned long initrd_len;
     char *cmdline;
+    int efi_virt_mode;         /* phys : 0 , virt : 1 */
+    void *efi_runtime;
 };
 #define xen_vastart arch.xen_vastart
 #define xen_vaend arch.xen_vaend
-#define shared_info_va arch.shared_info_va
 #define INT_ENABLE_OFFSET(v)             \
     (sizeof(vcpu_info_t) * (v)->vcpu_id + \
     offsetof(vcpu_info_t, evtchn_upcall_mask))
@@ -69,8 +75,6 @@ struct arch_vcpu {
        TR_ENTRY dtlb;
        unsigned int itr_regions;
        unsigned int dtr_regions;
-       unsigned long itlb_pte;
-       unsigned long dtlb_pte;
        unsigned long irr[4];
        unsigned long insvc[4];
        unsigned long tc_regions;
@@ -106,27 +110,15 @@ struct arch_vcpu {
     struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */
 };
 
-//#define thread arch._thread
-
-// FOLLOWING FROM linux-2.6.7/include/sched.h
-
-struct mm_struct {
-       pgd_t * pgd;
-    // atomic_t mm_users;                      /* How many users with user 
space? */
-       struct list_head pt_list;               /* List of pagetable */
-};
-
-extern struct mm_struct init_mm;
-
 struct page_info * assign_new_domain_page(struct domain *d, unsigned long 
mpaddr);
 void assign_new_domain0_page(struct domain *d, unsigned long mpaddr);
+void __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned 
long physaddr);
 void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long 
physaddr);
 void assign_domain_io_page(struct domain *d, unsigned long mpaddr, unsigned 
long flags);
 #ifdef CONFIG_XEN_IA64_DOM0_VP
 unsigned long assign_domain_mmio_page(struct domain *d, unsigned long mpaddr, 
unsigned long size);
 unsigned long assign_domain_mach_page(struct domain *d, unsigned long mpaddr, 
unsigned long size);
 unsigned long do_dom0vp_op(unsigned long cmd, unsigned long arg0, unsigned 
long arg1, unsigned long arg2, unsigned long arg3);
-unsigned long dom0vp_populate_physmap(struct domain *d, unsigned long gpfn, 
unsigned int extent_order, unsigned int address_bits);
 unsigned long dom0vp_zap_physmap(struct domain *d, unsigned long gpfn, 
unsigned int extent_order);
 unsigned long dom0vp_add_physmap(struct domain* d, unsigned long gpfn, 
unsigned long mfn, unsigned int flags, domid_t domid);
 #endif
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/event.h
--- a/xen/include/asm-ia64/event.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/event.h      Tue May 30 14:30:34 2006 -0500
@@ -29,7 +29,7 @@ static inline void evtchn_notify(struct 
         smp_send_event_check_cpu(v->processor);
 
     if(!VMX_DOMAIN(v))
-       vcpu_pend_interrupt(v, v->vcpu_info->arch.evtchn_vector);
+       vcpu_pend_interrupt(v, v->domain->shared_info->arch.evtchn_vector);
 }
 
 /* Note: Bitwise operations result in fast code with no branches. */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/grant_table.h
--- a/xen/include/asm-ia64/grant_table.h        Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/grant_table.h        Tue May 30 14:30:34 2006 -0500
@@ -7,12 +7,33 @@
 
 #define ORDER_GRANT_FRAMES 0
 
+#ifndef CONFIG_XEN_IA64_DOM0_VP
+// for grant map/unmap
 #define create_grant_host_mapping(a, f, fl)  0
 #define destroy_grant_host_mapping(a, f, fl) 0
 
+// for grant transfer
 #define steal_page_for_grant_transfer(d, p)  0
 
-#define gnttab_create_shared_page(d, t, i) ((void)0)
+#else
+// for grant map/unmap
+int create_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, 
unsigned int flags);
+int destroy_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, 
unsigned int flags);
+
+// for grant transfer
+int steal_page_for_grant_transfer(struct domain *d, struct page_info *page);
+void guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned 
long mfn);
+
+#endif
+
+// for grant table shared page
+#define gnttab_create_shared_page(d, t, i)                              \
+    do {                                                                \
+        share_xen_page_with_guest(                                      \
+            virt_to_page((char *)(t)->shared + ((i) << PAGE_SHIFT)),    \
+            (d), XENSHARE_writable);                                    \
+    } while (0)
+
 
 /* Guest physical address of the grant table.  */
 #define IA64_GRANT_TABLE_PADDR (1UL << 40)
@@ -20,13 +41,21 @@
 #define gnttab_shared_maddr(d, t, i)                        \
     virt_to_maddr((char*)(t)->shared + ((i) << PAGE_SHIFT))
 
-#define gnttab_shared_gmfn(d, t, i)                                          \
+#ifndef CONFIG_XEN_IA64_DOM0_VP
+# define gnttab_shared_gmfn(d, t, i)                                         \
     ({ ((d) == dom0) ?                                                       \
             (virt_to_maddr((t)->shared) >> PAGE_SHIFT) + (i):                \
             assign_domain_page((d),                                          \
                                IA64_GRANT_TABLE_PADDR + ((i) << PAGE_SHIFT), \
                                gnttab_shared_maddr(d, t, i)),                \
             (IA64_GRANT_TABLE_PADDR >> PAGE_SHIFT) + (i);})
+#else
+# define gnttab_shared_gmfn(d, t, i)                                    \
+    ({ assign_domain_page((d),                                          \
+                          IA64_GRANT_TABLE_PADDR + ((i) << PAGE_SHIFT), \
+                          gnttab_shared_maddr((d), (t), (i)));          \
+        (IA64_GRANT_TABLE_PADDR >> PAGE_SHIFT) + (i);})
+#endif
 
 #define gnttab_log_dirty(d, f) ((void)0)
 
diff -r e74246451527 -r f54d38cea8ac 
xen/include/asm-ia64/linux-xen/asm/pgalloc.h
--- a/xen/include/asm-ia64/linux-xen/asm/pgalloc.h      Tue May 30 12:52:02 
2006 -0500
+++ b/xen/include/asm-ia64/linux-xen/asm/pgalloc.h      Tue May 30 14:30:34 
2006 -0500
@@ -139,12 +139,14 @@ static inline void pte_free(struct page 
 {
        pgtable_quicklist_free(page_address(pte));
 }
+#endif
 
 static inline void pte_free_kernel(pte_t * pte)
 {
        pgtable_quicklist_free(pte);
 }
 
+#ifndef XEN
 #define __pte_free_tlb(tlb, pte)       pte_free(pte)
 #endif
 
diff -r e74246451527 -r f54d38cea8ac 
xen/include/asm-ia64/linux-xen/asm/pgtable.h
--- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h      Tue May 30 12:52:02 
2006 -0500
+++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h      Tue May 30 14:30:34 
2006 -0500
@@ -383,6 +383,7 @@ ptep_test_and_clear_dirty (struct vm_are
        return 1;
 #endif
 }
+#endif
 
 static inline pte_t
 ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
@@ -396,6 +397,19 @@ ptep_get_and_clear(struct mm_struct *mm,
 #endif
 }
 
+static inline pte_t
+ptep_xchg(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t npte)
+{
+#ifdef CONFIG_SMP
+       return __pte(xchg((long *) ptep, pte_val(npte)));
+#else
+       pte_t pte = *ptep;
+       set_pte (ptep, npte);
+       return pte;
+#endif
+}
+
+#ifndef XEN
 static inline void
 ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/mm.h Tue May 30 14:30:34 2006 -0500
@@ -12,7 +12,7 @@
 
 #include <asm/processor.h>
 #include <asm/atomic.h>
-#include <asm/flushtlb.h>
+#include <asm/tlbflush.h>
 #include <asm/io.h>
 
 #include <public/xen.h>
@@ -128,8 +128,10 @@ static inline u32 pickle_domptr(struct d
 #define page_get_owner(_p)     (unpickle_domptr((_p)->u.inuse._domain))
 #define page_set_owner(_p, _d) ((_p)->u.inuse._domain = pickle_domptr(_d))
 
-/* Dummy now */
-#define share_xen_page_with_guest(p, d, r) do { } while (0)
+#define XENSHARE_writable 0
+#define XENSHARE_readonly 1
+void share_xen_page_with_guest(struct page_info *page,
+                               struct domain *d, int readonly);
 #define share_xen_page_with_privileged_guests(p, r) do { } while (0)
 
 extern struct page_info *frame_table;
@@ -471,6 +473,4 @@ extern unsigned long ____lookup_domain_m
 /* Arch-specific portion of memory_op hypercall. */
 #define arch_memory_op(op, arg) (-ENOSYS)
 
-extern void assign_domain_page(struct domain *d, unsigned long mpaddr,
-                              unsigned long physaddr);
 #endif /* __ASM_IA64_MM_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/shadow.h     Tue May 30 14:30:34 2006 -0500
@@ -1,2 +1,57 @@
-/* empty */
+/******************************************************************************
+ * include/asm-ia64/shadow.h
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
 
+#ifndef _XEN_SHADOW_H
+#define _XEN_SHADOW_H
+
+#include <xen/config.h>
+
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+#ifndef CONFIG_SHADOW
+# error "CONFIG_SHADOW must be defined"
+#endif
+
+#define shadow_drop_references(d, p)          ((void)0)
+
+// this is used only x86-specific code
+//#define shadow_sync_and_drop_references(d, p) ((void)0)
+
+#define shadow_mode_translate(d)              (1)
+
+// for granttab transfer. XENMEM_populate_physmap
+void guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned 
long mfn);
+// for balloon driver. XENMEM_decrease_reservation
+void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned 
long mfn);
+#endif
+
+#endif // _XEN_SHADOW_H
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vcpu.h
--- a/xen/include/asm-ia64/vcpu.h       Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/vcpu.h       Tue May 30 14:30:34 2006 -0500
@@ -135,7 +135,10 @@ extern IA64FAULT vcpu_set_pkr(VCPU *vcpu
 extern IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val);
 extern IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key);
 /* TLB */
-extern void vcpu_purge_tr_entry(TR_ENTRY *trp);
+static inline void vcpu_purge_tr_entry(TR_ENTRY *trp)
+{
+       trp->pte.val = 0;
+}
 extern IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 padr,
                UINT64 itir, UINT64 ifa);
 extern IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 padr,
@@ -148,8 +151,7 @@ extern IA64FAULT vcpu_ptc_ga(VCPU *vcpu,
 extern IA64FAULT vcpu_ptc_ga(VCPU *vcpu, UINT64 vadr, UINT64 addr_range);
 extern IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr, UINT64 addr_range);
 extern IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr, UINT64 addr_range);
-extern IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address,
-                               BOOLEAN is_data, BOOLEAN in_tpa,
+extern IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data,
                                UINT64 *pteval, UINT64 *itir, UINT64 *iha);
 extern IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr);
 extern IA64FAULT vcpu_force_data_miss(VCPU *vcpu, UINT64 ifa);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vhpt.h
--- a/xen/include/asm-ia64/vhpt.h       Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/vhpt.h       Tue May 30 14:30:34 2006 -0500
@@ -4,18 +4,17 @@
 #define VHPT_ENABLED 1
 
 /* Size of the VHPT.  */
-#define        VHPT_SIZE_LOG2                  24
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+// XXX work around to avoid trigerring xenLinux software lock up detection.
+# define       VHPT_SIZE_LOG2                  16      // 64KB
+#else
+# define       VHPT_SIZE_LOG2                  24      // 16MB default
+#endif
 
 /* Number of entries in the VHPT.  The size of an entry is 4*8B == 32B */
 #define        VHPT_NUM_ENTRIES                (1 << (VHPT_SIZE_LOG2 - 5))
 
-#ifdef CONFIG_SMP
-# define vhpt_flush_all()      smp_vhpt_flush_all()
-#else
-# define vhpt_flush_all()      vhpt_flush()
-#endif
 // FIXME: These should be automatically generated
-
 #define        VLE_PGFLAGS_OFFSET              0
 #define        VLE_ITIR_OFFSET                 8
 #define        VLE_TITAG_OFFSET                16
@@ -37,15 +36,10 @@ extern void vhpt_init (void);
 extern void vhpt_init (void);
 extern void zero_vhpt_stats(void);
 extern int dump_vhpt_stats(char *buf);
-extern void vhpt_flush_address(unsigned long vadr, unsigned long addr_range);
-extern void vhpt_flush_address_remote(int cpu, unsigned long vadr,
-                                     unsigned long addr_range);
 extern void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte,
                                 unsigned long logps);
 extern void vhpt_insert (unsigned long vadr, unsigned long pte,
                         unsigned long logps);
-extern void vhpt_flush(void);
-extern void smp_vhpt_flush_all(void);
 
 /* Currently the VHPT is allocated per CPU.  */
 DECLARE_PER_CPU (unsigned long, vhpt_paddr);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vmx_vcpu.h
--- a/xen/include/asm-ia64/vmx_vcpu.h   Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/vmx_vcpu.h   Tue May 30 14:30:34 2006 -0500
@@ -359,7 +359,7 @@ IA64FAULT vmx_vcpu_get_cpuid(VCPU *vcpu,
     // TODO: unimplemented DBRs return a reserved register fault
     // TODO: Should set Logical CPU state, not just physical
     if(reg > 4){
-        panic("there are only five cpuid registers");
+        panic_domain(vcpu_regs(vcpu),"there are only five cpuid registers");
     }
     *pval=VCPU(vcpu,vcpuid[reg]);
     return (IA64_NO_FAULT);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/domain.h      Tue May 30 14:30:34 2006 -0500
@@ -114,11 +114,32 @@ struct arch_domain
     unsigned long first_reserved_pfn;
 } __cacheline_aligned;
 
+#ifdef CONFIG_X86_PAE
+struct pae_l3_cache {
+    /*
+     * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
+     * supplies a >=4GB PAE L3 table. We need two because we cannot set up
+     * an L3 table while we are currently running on it (without using
+     * expensive atomic 64-bit operations).
+     */
+    l3_pgentry_t  table[2][4] __attribute__((__aligned__(32)));
+    unsigned long high_mfn;  /* The >=4GB MFN being shadowed. */
+    unsigned int  inuse_idx; /* Which of the two cache slots is in use? */
+    spinlock_t    lock;
+};
+#define pae_l3_cache_init(c) spin_lock_init(&(c)->lock)
+#else /* !CONFIG_X86_PAE */
+struct pae_l3_cache { };
+#define pae_l3_cache_init(c) ((void)0)
+#endif
+
 struct arch_vcpu
 {
     /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */
     struct vcpu_guest_context guest_context
     __attribute__((__aligned__(16)));
+
+    struct pae_l3_cache pae_l3_cache;
 
     unsigned long      flags; /* TF_ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/fixmap.h
--- a/xen/include/asm-x86/fixmap.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/fixmap.h      Tue May 30 14:30:34 2006 -0500
@@ -25,6 +25,10 @@
  * from the end of virtual memory backwards.
  */
 enum fixed_addresses {
+#ifdef CONFIG_X86_PAE
+    FIX_PAE_HIGHMEM_0,
+    FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1,
+#endif
     FIX_APIC_BASE,
     FIX_IO_APIC_BASE_0,
     FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
@@ -40,13 +44,13 @@ enum fixed_addresses {
 #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
 
 extern void __set_fixmap(
-    enum fixed_addresses idx, unsigned long p, unsigned long flags);
+    enum fixed_addresses idx, unsigned long mfn, unsigned long flags);
 
 #define set_fixmap(idx, phys) \
-    __set_fixmap(idx, phys, PAGE_HYPERVISOR)
+    __set_fixmap(idx, (phys)>>PAGE_SHIFT, PAGE_HYPERVISOR)
 
 #define set_fixmap_nocache(idx, phys) \
-    __set_fixmap(idx, phys, PAGE_HYPERVISOR_NOCACHE)
+    __set_fixmap(idx, (phys)>>PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE)
 
 #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
 #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/domain.h  Tue May 30 14:30:34 2006 -0500
@@ -35,9 +35,9 @@ struct hvm_domain {
     unsigned int           nr_vcpus;
     unsigned int           apic_enabled;
     unsigned int           pae_enabled;
-
-    struct hvm_virpit      vpit;
-    u64                    guest_time;
+    s64                    tsc_frequency;
+    struct pl_time         pl_time;
+    
     struct hvm_virpic      vpic;
     struct hvm_vioapic     vioapic;
     struct hvm_io_handler  io_handler;
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/svm/intr.h
--- a/xen/include/asm-x86/hvm/svm/intr.h        Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/svm/intr.h        Tue May 30 14:30:34 2006 -0500
@@ -21,7 +21,6 @@
 #ifndef __ASM_X86_HVM_SVM_INTR_H__
 #define __ASM_X86_HVM_SVM_INTR_H__
 
-extern void svm_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit);
 extern void svm_intr_assist(void);
 extern void svm_intr_assist_update(struct vcpu *v, int highest_vector);
 extern void svm_intr_assist_test_valid(struct vcpu *v, 
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/svm/svm.h Tue May 30 14:30:34 2006 -0500
@@ -48,7 +48,6 @@ extern void svm_do_launch(struct vcpu *v
 extern void svm_do_launch(struct vcpu *v);
 extern void svm_do_resume(struct vcpu *v);
 extern void svm_set_guest_time(struct vcpu *v, u64 gtime);
-extern u64 svm_get_guest_time(struct vcpu *v);
 extern void arch_svm_do_resume(struct vcpu *v);
 extern int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa);
 /* For debugging. Remove when no longer needed. */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/vcpu.h    Tue May 30 14:30:34 2006 -0500
@@ -32,6 +32,9 @@ struct hvm_vcpu {
     unsigned long   ioflags;
     struct mmio_op  mmio_op;
     struct vlapic   *vlapic;
+    s64             cache_tsc_offset;
+    u64             guest_time;
+
     /* For AP startup */
     unsigned long   init_sipi_sipi_state;
 
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Tue May 30 14:30:34 2006 -0500
@@ -34,7 +34,6 @@ extern void arch_vmx_do_launch(struct vc
 extern void arch_vmx_do_launch(struct vcpu *);
 extern void arch_vmx_do_resume(struct vcpu *);
 extern void set_guest_time(struct vcpu *v, u64 gtime);
-extern u64  get_guest_time(struct vcpu *v);
 
 extern unsigned int cpu_rev;
 
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vpit.h
--- a/xen/include/asm-x86/hvm/vpit.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/vpit.h    Tue May 30 14:30:34 2006 -0500
@@ -29,9 +29,7 @@
 #include <asm/hvm/vpic.h>
 
 #define PIT_FREQ 1193181
-
-#define PIT_BASE 0x40
-#define HVM_PIT_ACCEL_MODE 2
+#define PIT_BASE        0x40
 
 typedef struct PITChannelState {
     int count; /* can be 65536 */
@@ -48,47 +46,56 @@ typedef struct PITChannelState {
     u8 gate; /* timer start */
     s64 count_load_time;
     /* irq handling */
-    s64 next_transition_time;
-    int irq;
-    struct hvm_time_info *hvm_time;
-    u32 period; /* period(ns) based on count */
+    struct vcpu      *vcpu;
+    struct periodic_time *pt;
 } PITChannelState;
-
-struct hvm_time_info {
-    /* extra info for the mode 2 channel */
-    struct timer pit_timer;
-    struct vcpu *vcpu;          /* which vcpu the ac_timer bound to */
-    u64 period_cycles;          /* pit frequency in cpu cycles */
-    s_time_t count_advance;     /* accumulated count advance since last fire */
-    s_time_t count_point;        /* last point accumulating count advance */
-    unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
-    int first_injected;         /* flag to prevent shadow window */
-    s64 cache_tsc_offset;       /* cache of VMCS TSC_OFFSET offset */
-    u64 last_pit_gtime;         /* guest time when last pit is injected */
+   
+/*
+ * Abstract layer of periodic time, one short time.
+ */
+struct periodic_time {
+    char enabled;               /* enabled */
+    char one_shot;              /* one shot time */
+    char irq;
+    char first_injected;        /* flag to prevent shadow window */
+    u32 pending_intr_nr;        /* the couner for pending timer interrupts */
+    u32 period;                 /* frequency in ns */
+    u64 period_cycles;          /* frequency in cpu cycles */
+    s_time_t scheduled;         /* scheduled timer interrupt */
+    u64 last_plt_gtime;         /* platform time when last IRQ is injected */
+    struct timer timer;         /* ac_timer */
 };
 
-typedef struct hvm_virpit {
+typedef struct PITState {
     PITChannelState channels[3];
-    struct hvm_time_info time_info;
     int speaker_data_on;
     int dummy_refresh_clock;
-}hvm_virpit;
+} PITState;
 
+struct pl_time {    /* platform time */
+    struct periodic_time periodic_tm;
+    struct PITState      vpit;
+    /* TODO: RTC/ACPI time */
+};
 
-static __inline__ s_time_t get_pit_scheduled(
-    struct vcpu *v,
-    struct hvm_virpit *vpit)
+static __inline__ s_time_t get_scheduled(
+    struct vcpu *v, int irq,
+    struct periodic_time *pt)
 {
-    struct PITChannelState *s = &(vpit->channels[0]);
-    if ( is_irq_enabled(v, 0) ) {
-        return s->next_transition_time;
+    if ( is_irq_enabled(v, irq) ) {
+        return pt->scheduled;
     }
     else
         return -1;
 }
 
 /* to hook the ioreq packet to get the PIT initialization info */
-extern void pit_init(struct hvm_virpit *pit, struct vcpu *v);
-extern void pickup_deactive_ticks(struct hvm_virpit *vpit);
+extern void hvm_hooks_assist(struct vcpu *v);
+extern void pickup_deactive_ticks(struct periodic_time *vpit);
+extern u64 hvm_get_guest_time(struct vcpu *v);
+extern struct periodic_time *create_periodic_time(struct vcpu *v, u32 period, 
char irq, char one_shot);
+extern void destroy_periodic_time(struct periodic_time *pt);
+void pit_init(struct vcpu *v, unsigned long cpu_khz);
+void pt_timer_fn(void *data);
 
 #endif /* __ASM_X86_HVM_VPIT_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/string.h
--- a/xen/include/asm-x86/string.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/string.h      Tue May 30 14:30:34 2006 -0500
@@ -2,152 +2,6 @@
 #define __X86_STRING_H__
 
 #include <xen/config.h>
-
-#define __HAVE_ARCH_STRCPY
-static inline char *strcpy(char *dest, const char *src)
-{
-    long d0, d1, d2;
-    __asm__ __volatile__ (
-        "1: lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2)
-        : "0" (src), "1" (dest) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRNCPY
-static inline char *strncpy(char *dest, const char *src, size_t count)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "1: dec  %2        \n"
-        "   js   2f        \n"
-        "   lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        "   rep ; stosb    \n"
-        "2:                \n"
-        : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
-        : "0" (src), "1" (dest), "2" (count) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRCAT
-static inline char *strcat(char *dest, const char *src)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "   repne ; scasb  \n"
-        "   dec  %1        \n"
-        "1: lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-        : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRNCAT
-static inline char *strncat(char *dest, const char *src, size_t count)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "   repne ; scasb   \n"
-        "   dec  %1         \n"
-        "   mov  %8,%3      \n"
-        "1: dec  %3         \n"
-        "   js   2f         \n"
-        "   lodsb           \n"
-        "   stosb           \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "2: xor  %%eax,%%eax\n"
-        "   stosb"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-        : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL), "g" (count)
-        : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRCMP
-static inline int strcmp(const char *cs, const char *ct)
-{
-    long d0, d1;
-    register int __res;
-    __asm__ __volatile__ (
-        "1: lodsb           \n"
-        "   scasb           \n"
-        "   jne  2f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "   xor  %%eax,%%eax\n"
-        "   jmp  3f         \n"
-        "2: sbb  %%eax,%%eax\n"
-        "   or   $1,%%al    \n"
-        "3:                 \n"
-        : "=a" (__res), "=&S" (d0), "=&D" (d1)
-        : "1" (cs), "2" (ct) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRNCMP
-static inline int strncmp(const char *cs, const char *ct, size_t count)
-{
-    long d0, d1, d2;
-    register int __res;
-    __asm__ __volatile__ (
-        "1: dec  %3         \n"
-        "   js   2f         \n"
-        "   lodsb           \n"
-        "   scasb           \n"
-        "   jne  3f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "2: xor  %%eax,%%eax\n"
-        "   jmp  4f         \n"
-        "3: sbb  %%eax,%%eax\n"
-        "   or   $1,%%al    \n"
-        "4:                 \n"
-        : "=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
-        : "1" (cs), "2" (ct), "3" (count) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRCHR
-static inline char *strchr(const char *s, int c)
-{
-    long d0;
-    register char *__res;
-    __asm__ __volatile__ (
-        "   mov  %%al,%%ah  \n"
-        "1: lodsb           \n"
-        "   cmp  %%ah,%%al  \n"
-        "   je   2f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "   mov  $1,%1      \n"
-        "2: mov  %1,%0      \n"
-        "   dec  %0         \n"
-        : "=a" (__res), "=&S" (d0) : "1" (s), "0" (c) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRLEN
-static inline size_t strlen(const char *s)
-{
-    long d0;
-    register int __res;
-    __asm__ __volatile__ (
-        "   repne ; scasb  \n"
-        "   notl %0        \n"
-        "   decl %0        \n"
-        : "=c" (__res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffUL) );
-    return __res;
-}
 
 static inline void *__variable_memcpy(void *to, const void *from, size_t n)
 {
@@ -258,22 +112,6 @@ extern void *memmove(void *dest, const v
 #define __HAVE_ARCH_MEMCMP
 #define memcmp __builtin_memcmp
 
-#define __HAVE_ARCH_MEMCHR
-static inline void *memchr(const void *cs, int c, size_t count)
-{
-    long d0;
-    register void *__res;
-    if ( count == 0 )
-        return NULL;
-    __asm__ __volatile__ (
-        "   repne ; scasb\n"
-        "   je   1f      \n"
-        "   mov  $1,%0   \n"
-        "1: dec  %0      \n"
-        : "=D" (__res), "=&c" (d0) : "a" (c), "0" (cs), "1" (count) );
-    return __res;
-}
-
 static inline void *__memset_generic(void *s, char c, size_t count)
 {
     long d0, d1;
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/x86_emulate.h
--- a/xen/include/asm-x86/x86_emulate.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/x86_emulate.h Tue May 30 14:30:34 2006 -0500
@@ -9,8 +9,10 @@
 #ifndef __X86_EMULATE_H__
 #define __X86_EMULATE_H__
 
-/*
- * x86_mem_emulator:
+struct x86_emulate_ctxt;
+
+/*
+ * x86_emulate_ops:
  * 
  * These operations represent the instruction emulator's interface to memory.
  * There are two categories of operation: those that act on ordinary memory
@@ -47,7 +49,7 @@
 #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
 #define X86EMUL_RETRY_INSTR     2 /* retry the instruction for some reason */
 #define X86EMUL_CMPXCHG_FAILED  2 /* cmpxchg did not see expected value */
-struct x86_mem_emulator
+struct x86_emulate_ops
 {
     /*
      * read_std: Read bytes of standard (non-emulated/special) memory.
@@ -59,7 +61,8 @@ struct x86_mem_emulator
     int (*read_std)(
         unsigned long addr,
         unsigned long *val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * write_std: Write bytes of standard (non-emulated/special) memory.
@@ -71,7 +74,8 @@ struct x86_mem_emulator
     int (*write_std)(
         unsigned long addr,
         unsigned long val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * read_emulated: Read bytes from emulated/special memory area.
@@ -82,7 +86,8 @@ struct x86_mem_emulator
     int (*read_emulated)(
         unsigned long addr,
         unsigned long *val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * write_emulated: Read bytes from emulated/special memory area.
@@ -93,7 +98,8 @@ struct x86_mem_emulator
     int (*write_emulated)(
         unsigned long addr,
         unsigned long val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@@ -107,11 +113,12 @@ struct x86_mem_emulator
         unsigned long addr,
         unsigned long old,
         unsigned long new,
-        unsigned int bytes);
-
-    /*
-     * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
-     *                   emulated/special memory area.
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
+
+    /*
+     * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
+     *                     emulated/special memory area.
      *  @addr:  [IN ] Linear address to access.
      *  @old:   [IN ] Value expected to be current at @addr.
      *  @new:   [IN ] Value to write to @addr.
@@ -126,7 +133,8 @@ struct x86_mem_emulator
         unsigned long old_lo,
         unsigned long old_hi,
         unsigned long new_lo,
-        unsigned long new_hi);
+        unsigned long new_hi,
+        struct x86_emulate_ctxt *ctxt);
 };
 
 /* Standard reader/writer functions that callers may wish to use. */
@@ -134,14 +142,28 @@ x86_emulate_read_std(
 x86_emulate_read_std(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes);
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt);
 extern int
 x86_emulate_write_std(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes);
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt);
 
 struct cpu_user_regs;
+
+struct x86_emulate_ctxt
+{
+    /* Register state before/after emulation. */
+    struct cpu_user_regs   *regs;
+
+    /* Linear faulting address (if emulating a page-faulting instruction). */
+    unsigned long           cr2;
+
+    /* Emulated execution mode, represented by an X86EMUL_MODE value. */
+    int                     mode;
+};
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0 /* Real mode.             */
@@ -159,25 +181,19 @@ struct cpu_user_regs;
 /*
  * x86_emulate_memop: Emulate an instruction that faulted attempting to
  *                    read/write a 'special' memory area.
- *  @regs: Register state at time of fault.
- *  @cr2:  Linear faulting address within an emulated/special memory area.
- *  @ops:  Interface to access special memory.
- *  @mode: Emulated execution mode, represented by an X86EMUL_MODE value.
  * Returns -1 on failure, 0 on success.
  */
-extern int
+int
 x86_emulate_memop(
-    struct cpu_user_regs *regs,
-    unsigned long cr2,
-    struct x86_mem_emulator *ops,
-    int mode);
+    struct x86_emulate_ctxt *ctxt,
+    struct x86_emulate_ops  *ops);
 
 /*
  * Given the 'reg' portion of a ModRM byte, and a register block, return a
  * pointer into the block that addresses the relevant register.
  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
  */
-extern void *
+void *
 decode_register(
     uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs);
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/acm_ops.h
--- a/xen/include/public/acm_ops.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/acm_ops.h      Tue May 30 14:30:34 2006 -0500
@@ -2,7 +2,7 @@
  * acm_ops.h: Xen access control module hypervisor commands
  *
  * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Copyright (c) 2005, International Business Machines Corporation.
+ * Copyright (c) 2005,2006 International Business Machines Corporation.
  */
 
 #ifndef __XEN_PUBLIC_ACM_OPS_H__
@@ -17,36 +17,50 @@
  * This makes sure that old versions of acm tools will stop working in a
  * well-defined way (rather than crashing the machine, for instance).
  */
-#define ACM_INTERFACE_VERSION   0xAAAA0006
+#define ACM_INTERFACE_VERSION   0xAAAA0007
 
 /************************************************************************/
 
-#define ACM_SETPOLICY         4
+/*
+ * Prototype for this hypercall is:
+ *  int acm_op(int cmd, void *args)
+ * @cmd  == ACMOP_??? (access control module operation).
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+
+#define ACMOP_setpolicy         1
 struct acm_setpolicy {
-    /* OUT variables */
+    /* IN */
+    uint32_t interface_version;
     void *pushcache;
     uint32_t pushcache_size;
 };
 
 
-#define ACM_GETPOLICY         5
+#define ACMOP_getpolicy         2
 struct acm_getpolicy {
-    /* OUT variables */
+    /* IN */
+    uint32_t interface_version;
     void *pullcache;
     uint32_t pullcache_size;
 };
 
 
-#define ACM_DUMPSTATS         6
+#define ACMOP_dumpstats         3
 struct acm_dumpstats {
+    /* IN */
+    uint32_t interface_version;
     void *pullcache;
     uint32_t pullcache_size;
 };
 
 
-#define ACM_GETSSID           7
+#define ACMOP_getssid           4
 enum get_type {UNSET=0, SSIDREF, DOMAINID};
 struct acm_getssid {
+    /* IN */
+    uint32_t interface_version;
     enum get_type get_ssid_by;
     union {
         domaintype_t domainid;
@@ -56,9 +70,11 @@ struct acm_getssid {
     uint32_t ssidbuf_size;
 };
 
-#define ACM_GETDECISION        8
+#define ACMOP_getdecision      5
 struct acm_getdecision {
-    enum get_type get_decision_by1; /* in */
+    /* IN */
+    uint32_t interface_version;
+    enum get_type get_decision_by1;
     enum get_type get_decision_by2;
     union {
         domaintype_t domainid;
@@ -69,23 +85,11 @@ struct acm_getdecision {
         ssidref_t    ssidref;
     } id2;
     enum acm_hook_type hook;
-    int acm_decision;           /* out */
+    /* OUT */
+    int acm_decision;
 };
 
-typedef struct acm_op {
-    uint32_t cmd;
-    uint32_t interface_version;      /* ACM_INTERFACE_VERSION */
-    union {
-        struct acm_setpolicy setpolicy;
-        struct acm_getpolicy getpolicy;
-        struct acm_dumpstats dumpstats;
-        struct acm_getssid getssid;
-        struct acm_getdecision getdecision;
-    } u;
-} acm_op_t;
-DEFINE_XEN_GUEST_HANDLE(acm_op_t);
-
-#endif                          /* __XEN_PUBLIC_ACM_OPS_H__ */
+#endif /* __XEN_PUBLIC_ACM_OPS_H__ */
 
 /*
  * Local variables:
diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/arch-ia64.h    Tue May 30 14:30:34 2006 -0500
@@ -38,15 +38,17 @@ DEFINE_XEN_GUEST_HANDLE(void);
 #ifndef __ASSEMBLY__
 
 #define MAX_NR_SECTION  32  /* at most 32 memory holes */
-typedef struct {
+struct mm_section {
     unsigned long start;  /* start of memory hole */
     unsigned long end;    /* end of memory hole */
-} mm_section_t;
-
-typedef struct {
+};
+typedef struct mm_section mm_section_t;
+
+struct pmt_entry {
     unsigned long mfn : 56;
     unsigned long type: 8;
-} pmt_entry_t;
+};
+typedef struct pmt_entry pmt_entry_t;
 
 #define GPFN_MEM          (0UL << 56) /* Guest pfn is normal mem */
 #define GPFN_FRAME_BUFFER (1UL << 56) /* VGA framebuffer */
@@ -93,10 +95,11 @@ typedef struct {
  * NB. This may become a 64-bit count with no shift. If this happens then the 
  * structure size will still be 8 bytes, so no other alignments will change.
  */
-typedef struct {
+struct tsc_timestamp {
     unsigned int  tsc_bits;      /* 0: 32 bits read from the CPU's TSC. */
     unsigned int  tsc_bitshift;  /* 4: 'tsc_bits' uses N:N+31 of TSC.   */
-} tsc_timestamp_t; /* 8 bytes */
+}; /* 8 bytes */
+typedef struct tsc_timestamp tsc_timestamp_t;
 
 struct pt_fpreg {
     union {
@@ -105,7 +108,7 @@ struct pt_fpreg {
     } u;
 };
 
-typedef struct cpu_user_regs{
+struct cpu_user_regs {
     /* The following registers are saved by SAVE_MIN: */
     unsigned long b6;  /* scratch */
     unsigned long b7;  /* scratch */
@@ -179,9 +182,10 @@ typedef struct cpu_user_regs{
     unsigned long eml_unat;    /* used for emulating instruction */
     unsigned long rfi_pfs;     /* used for elulating rfi */
 
-}cpu_user_regs_t;
-
-typedef union {
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+
+union vac {
     unsigned long value;
     struct {
         int a_int:1;
@@ -193,9 +197,10 @@ typedef union {
         int a_bsw:1;
         long reserved:57;
     };
-} vac_t;
-
-typedef union {
+};
+typedef union vac vac_t;
+
+union vdc {
     unsigned long value;
     struct {
         int d_vmsw:1;
@@ -206,11 +211,12 @@ typedef union {
         int d_itm:1;
         long reserved:58;
     };
-} vdc_t;
-
-typedef struct {
-    vac_t   vac;
-    vdc_t   vdc;
+};
+typedef union vdc vdc_t;
+
+struct mapped_regs {
+    union vac   vac;
+    union vdc   vdc;
     unsigned long  virt_env_vaddr;
     unsigned long  reserved1[29];
     unsigned long  vhpi;
@@ -290,27 +296,32 @@ typedef struct {
     unsigned long  reserved6[3456];
     unsigned long  vmm_avail[128];
     unsigned long  reserved7[4096];
-} mapped_regs_t;
-
-typedef struct {
-    mapped_regs_t *privregs;
-    int evtchn_vector;
-} arch_vcpu_info_t;
+};
+typedef struct mapped_regs mapped_regs_t;
+
+struct arch_vcpu_info {
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
 
 typedef mapped_regs_t vpd_t;
 
-typedef struct {
+struct arch_shared_info {
     unsigned int flags;
     unsigned long start_info_pfn;
-} arch_shared_info_t;
-
-typedef struct {
+
+    /* Interrupt vector for event channel.  */
+    int evtchn_vector;
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+struct arch_initrd_info {
     unsigned long start;
     unsigned long size;
-} arch_initrd_info_t;
+};
+typedef struct arch_initrd_info arch_initrd_info_t;
 
 #define IA64_COMMAND_LINE_SIZE 512
-typedef struct vcpu_guest_context {
+struct vcpu_guest_context {
 #define VGCF_FPU_VALID (1<<0)
 #define VGCF_VMX_GUEST (1<<1)
 #define VGCF_IN_KERNEL (1<<2)
@@ -320,19 +331,17 @@ typedef struct vcpu_guest_context {
     unsigned long sys_pgnr;    /* System pages out of domain memory */
     unsigned long vm_assist;   /* VMASST_TYPE_* bitmap, now none on IPF */
 
-    cpu_user_regs_t regs;
-    arch_vcpu_info_t vcpu;
-    arch_shared_info_t shared;
-    arch_initrd_info_t initrd;
+    struct cpu_user_regs regs;
+    struct mapped_regs *privregs;
+    struct arch_shared_info shared;
+    struct arch_initrd_info initrd;
     char cmdline[IA64_COMMAND_LINE_SIZE];
-} vcpu_guest_context_t;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
 // dom0 vp op
-#define __HYPERVISOR_ia64_dom0vp_op     256 // XXX sufficient large
-                                            // TODO
-                                            // arch specific hypercall
-                                            // number conversion
+#define __HYPERVISOR_ia64_dom0vp_op     __HYPERVISOR_arch_0
 #define IA64_DOM0VP_ioremap             0       // map io space in machine
                                                 // address to dom0 physical
                                                 // address space.
@@ -352,10 +361,6 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte
                                                 // to the corresponding
                                                 // pseudo physical page frame
                                                 // number of the caller domain
-#define IA64_DOM0VP_populate_physmap    16      // allocate machine-contigusous
-                                                // memory region and
-                                                // map it to pseudo physical
-                                                // address
 #define IA64_DOM0VP_zap_physmap         17      // unmap and free pages
                                                 // contained in the specified
                                                 // pseudo physical region
@@ -364,6 +369,32 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte
                                                 // address space.
 
 #endif /* !__ASSEMBLY__ */
+
+/* Hyperprivops.  */
+#define HYPERPRIVOP_RFI                        0x1
+#define HYPERPRIVOP_RSM_DT             0x2
+#define HYPERPRIVOP_SSM_DT             0x3
+#define HYPERPRIVOP_COVER              0x4
+#define HYPERPRIVOP_ITC_D              0x5
+#define HYPERPRIVOP_ITC_I              0x6
+#define HYPERPRIVOP_SSM_I              0x7
+#define HYPERPRIVOP_GET_IVR            0x8
+#define HYPERPRIVOP_GET_TPR            0x9
+#define HYPERPRIVOP_SET_TPR            0xa
+#define HYPERPRIVOP_EOI                        0xb
+#define HYPERPRIVOP_SET_ITM            0xc
+#define HYPERPRIVOP_THASH              0xd
+#define HYPERPRIVOP_PTC_GA             0xe
+#define HYPERPRIVOP_ITR_D              0xf
+#define HYPERPRIVOP_GET_RR             0x10
+#define HYPERPRIVOP_SET_RR             0x11
+#define HYPERPRIVOP_SET_KR             0x12
+#define HYPERPRIVOP_FC                 0x13
+#define HYPERPRIVOP_GET_CPUID          0x14
+#define HYPERPRIVOP_GET_PMD            0x15
+#define HYPERPRIVOP_GET_EFLAG          0x16
+#define HYPERPRIVOP_SET_EFLAG          0x17
+#define HYPERPRIVOP_MAX                        0x17
 
 #endif /* __HYPERVISOR_IF_IA64_H__ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/arch-x86_32.h  Tue May 30 14:30:34 2006 -0500
@@ -95,15 +95,16 @@ DEFINE_XEN_GUEST_HANDLE(void);
 #define TI_GET_IF(_ti)       ((_ti)->flags & 4)
 #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
 #define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))
-typedef struct trap_info {
+struct trap_info {
     uint8_t       vector;  /* exception vector                              */
     uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
     uint16_t      cs;      /* code selector                                 */
     unsigned long address; /* code offset                                   */
-} trap_info_t;
+};
+typedef struct trap_info trap_info_t;
 DEFINE_XEN_GUEST_HANDLE(trap_info_t);
 
-typedef struct cpu_user_regs {
+struct cpu_user_regs {
     uint32_t ebx;
     uint32_t ecx;
     uint32_t edx;
@@ -124,7 +125,8 @@ typedef struct cpu_user_regs {
     uint16_t ds, _pad3;
     uint16_t fs, _pad4;
     uint16_t gs, _pad5;
-} cpu_user_regs_t;
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
 DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
 
 typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
@@ -133,14 +135,14 @@ typedef uint64_t tsc_timestamp_t; /* RDT
  * The following is all CPU context. Note that the fpu_ctxt block is filled 
  * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
  */
-typedef struct vcpu_guest_context {
+struct vcpu_guest_context {
     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
 #define VGCF_I387_VALID (1<<0)
 #define VGCF_HVM_GUEST  (1<<1)
 #define VGCF_IN_KERNEL  (1<<2)
     unsigned long flags;                    /* VGCF_* flags                 */
-    cpu_user_regs_t user_regs;              /* User-level CPU registers     */
+    struct cpu_user_regs user_regs;         /* User-level CPU registers     */
     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
     unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
     unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
@@ -152,25 +154,29 @@ typedef struct vcpu_guest_context {
     unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */
     unsigned long failsafe_callback_eip;
     unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
-} vcpu_guest_context_t;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
-typedef struct arch_shared_info {
+struct arch_shared_info {
     unsigned long max_pfn;                  /* max pfn that appears in table */
     /* Frame containing list of mfns containing list of mfns containing p2m. */
     unsigned long pfn_to_mfn_frame_list_list;
     unsigned long nmi_reason;
-} arch_shared_info_t;
-
-typedef struct {
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+struct arch_vcpu_info {
     unsigned long cr2;
     unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
-} arch_vcpu_info_t;
-
-typedef struct {
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+struct xen_callback {
     unsigned long cs;
     unsigned long eip;
-} xen_callback_t;
+};
+typedef struct xen_callback xen_callback_t;
 
 #endif /* !__ASSEMBLY__ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/arch-x86_64.h  Tue May 30 14:30:34 2006 -0500
@@ -150,12 +150,13 @@ struct iret_context {
 #define TI_GET_IF(_ti)       ((_ti)->flags & 4)
 #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
 #define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))
-typedef struct trap_info {
+struct trap_info {
     uint8_t       vector;  /* exception vector                              */
     uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
     uint16_t      cs;      /* code selector                                 */
     unsigned long address; /* code offset                                   */
-} trap_info_t;
+};
+typedef struct trap_info trap_info_t;
 DEFINE_XEN_GUEST_HANDLE(trap_info_t);
 
 #ifdef __GNUC__
@@ -166,7 +167,7 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t);
 #define __DECL_REG(name) uint64_t r ## name
 #endif
 
-typedef struct cpu_user_regs {
+struct cpu_user_regs {
     uint64_t r15;
     uint64_t r14;
     uint64_t r13;
@@ -195,7 +196,8 @@ typedef struct cpu_user_regs {
     uint16_t ds, _pad4[3];
     uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base.     */
     uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */
-} cpu_user_regs_t;
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
 DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
 
 #undef __DECL_REG
@@ -206,14 +208,14 @@ typedef uint64_t tsc_timestamp_t; /* RDT
  * The following is all CPU context. Note that the fpu_ctxt block is filled 
  * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
  */
-typedef struct vcpu_guest_context {
+struct vcpu_guest_context {
     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
 #define VGCF_I387_VALID (1<<0)
 #define VGCF_HVM_GUEST  (1<<1)
 #define VGCF_IN_KERNEL  (1<<2)
     unsigned long flags;                    /* VGCF_* flags                 */
-    cpu_user_regs_t user_regs;              /* User-level CPU registers     */
+    struct cpu_user_regs user_regs;         /* User-level CPU registers     */
     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
     unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
     unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
@@ -228,20 +230,23 @@ typedef struct vcpu_guest_context {
     uint64_t      fs_base;
     uint64_t      gs_base_kernel;
     uint64_t      gs_base_user;
-} vcpu_guest_context_t;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
-typedef struct arch_shared_info {
+struct arch_shared_info {
     unsigned long max_pfn;                  /* max pfn that appears in table */
     /* Frame containing list of mfns containing list of mfns containing p2m. */
     unsigned long pfn_to_mfn_frame_list_list;
     unsigned long nmi_reason;
-} arch_shared_info_t;
-
-typedef struct {
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+struct arch_vcpu_info {
     unsigned long cr2;
     unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
-} arch_vcpu_info_t;
+};
+typedef struct arch_vcpu_info  arch_vcpu_info_t;
 
 typedef unsigned long xen_callback_t;
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/callback.h
--- a/xen/include/public/callback.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/callback.h     Tue May 30 14:30:34 2006 -0500
@@ -32,10 +32,11 @@
  * Register a callback.
  */
 #define CALLBACKOP_register                0
-typedef struct callback_register {
+struct callback_register {
      int type;
      xen_callback_t address;
-} callback_register_t;
+};
+typedef struct callback_register callback_register_t;
 DEFINE_XEN_GUEST_HANDLE(callback_register_t);
 
 /*
@@ -45,9 +46,10 @@ DEFINE_XEN_GUEST_HANDLE(callback_registe
  * you attempt to unregister such a callback.
  */
 #define CALLBACKOP_unregister              1
-typedef struct callback_unregister {
+struct callback_unregister {
      int type;
-} callback_unregister_t;
+};
+typedef struct callback_unregister callback_unregister_t;
 DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
 
 #endif /* __XEN_PUBLIC_CALLBACK_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/dom0_ops.h     Tue May 30 14:30:34 2006 -0500
@@ -24,14 +24,15 @@
 /************************************************************************/
 
 #define DOM0_GETMEMLIST        2
-typedef struct dom0_getmemlist {
+struct dom0_getmemlist {
     /* IN variables. */
     domid_t       domain;
     unsigned long max_pfns;
     XEN_GUEST_HANDLE(ulong) buffer;
     /* OUT variables. */
     unsigned long num_pfns;
-} dom0_getmemlist_t;
+};
+typedef struct dom0_getmemlist dom0_getmemlist_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getmemlist_t);
 
 #define DOM0_SCHEDCTL          6
@@ -45,39 +46,43 @@ DEFINE_XEN_GUEST_HANDLE(dom0_adjustdom_t
 DEFINE_XEN_GUEST_HANDLE(dom0_adjustdom_t);
 
 #define DOM0_CREATEDOMAIN      8
-typedef struct dom0_createdomain {
+struct dom0_createdomain {
     /* IN parameters */
     uint32_t ssidref;
     xen_domain_handle_t handle;
     /* IN/OUT parameters. */
     /* Identifier for new domain (auto-allocate if zero is specified). */
     domid_t domain;
-} dom0_createdomain_t;
+};
+typedef struct dom0_createdomain dom0_createdomain_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_createdomain_t);
 
 #define DOM0_DESTROYDOMAIN     9
-typedef struct dom0_destroydomain {
-    /* IN variables. */
-    domid_t domain;
-} dom0_destroydomain_t;
+struct dom0_destroydomain {
+    /* IN variables. */
+    domid_t domain;
+};
+typedef struct dom0_destroydomain dom0_destroydomain_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_destroydomain_t);
 
 #define DOM0_PAUSEDOMAIN      10
-typedef struct dom0_pausedomain {
+struct dom0_pausedomain {
     /* IN parameters. */
     domid_t domain;
-} dom0_pausedomain_t;
+};
+typedef struct dom0_pausedomain dom0_pausedomain_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_pausedomain_t);
 
 #define DOM0_UNPAUSEDOMAIN    11
-typedef struct dom0_unpausedomain {
+struct dom0_unpausedomain {
     /* IN parameters. */
     domid_t domain;
-} dom0_unpausedomain_t;
+};
+typedef struct dom0_unpausedomain dom0_unpausedomain_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_unpausedomain_t);
 
 #define DOM0_GETDOMAININFO    12
-typedef struct dom0_getdomaininfo {
+struct dom0_getdomaininfo {
     /* IN variables. */
     domid_t  domain;                  /* NB. IN/OUT variable. */
     /* OUT variables. */
@@ -99,21 +104,23 @@ typedef struct dom0_getdomaininfo {
     uint32_t max_vcpu_id;         /* Maximum VCPUID in use by this domain. */
     uint32_t ssidref;
     xen_domain_handle_t handle;
-} dom0_getdomaininfo_t;
+};
+typedef struct dom0_getdomaininfo dom0_getdomaininfo_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getdomaininfo_t);
 
 #define DOM0_SETVCPUCONTEXT   13
-typedef struct dom0_setvcpucontext {
+struct dom0_setvcpucontext {
     /* IN variables. */
     domid_t               domain;
     uint32_t              vcpu;
     /* IN/OUT parameters */
     XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt;
-} dom0_setvcpucontext_t;
+};
+typedef struct dom0_setvcpucontext dom0_setvcpucontext_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setvcpucontext_t);
 
 #define DOM0_MSR              15
-typedef struct dom0_msr {
+struct dom0_msr {
     /* IN variables. */
     uint32_t write;
     cpumap_t cpu_mask;
@@ -123,7 +130,8 @@ typedef struct dom0_msr {
     /* OUT variables. */
     uint32_t out1;
     uint32_t out2;
-} dom0_msr_t;
+};
+typedef struct dom0_msr dom0_msr_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_msr_t);
 
 /*
@@ -131,12 +139,13 @@ DEFINE_XEN_GUEST_HANDLE(dom0_msr_t);
  * 1 January, 1970 if the current system time was <system_time>.
  */
 #define DOM0_SETTIME          17
-typedef struct dom0_settime {
+struct dom0_settime {
     /* IN variables. */
     uint32_t secs;
     uint32_t nsecs;
     uint64_t system_time;
-} dom0_settime_t;
+};
+typedef struct dom0_settime dom0_settime_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_settime_t);
 
 #define DOM0_GETPAGEFRAMEINFO 18
@@ -151,44 +160,47 @@ DEFINE_XEN_GUEST_HANDLE(dom0_settime_t);
 #define LTAB_MASK XTAB
 #define LTABTYPE_MASK (0x7<<LTAB_SHIFT)
 
-typedef struct dom0_getpageframeinfo {
+struct dom0_getpageframeinfo {
     /* IN variables. */
     unsigned long mfn;     /* Machine page frame number to query.       */
     domid_t domain;        /* To which domain does the frame belong?    */
     /* OUT variables. */
     /* Is the page PINNED to a type? */
     uint32_t type;         /* see above type defs */
-} dom0_getpageframeinfo_t;
+};
+typedef struct dom0_getpageframeinfo dom0_getpageframeinfo_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getpageframeinfo_t);
 
 /*
  * Read console content from Xen buffer ring.
  */
 #define DOM0_READCONSOLE      19
-typedef struct dom0_readconsole {
+struct dom0_readconsole {
     /* IN variables. */
     uint32_t clear;            /* Non-zero -> clear after reading. */
     /* IN/OUT variables. */
     XEN_GUEST_HANDLE(char) buffer; /* In: Buffer start; Out: Used buffer start 
*/
     uint32_t count;            /* In: Buffer size;  Out: Used buffer size  */
-} dom0_readconsole_t;
+};
+typedef struct dom0_readconsole dom0_readconsole_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_readconsole_t);
 
 /*
  * Set which physical cpus a vcpu can execute on.
  */
 #define DOM0_SETVCPUAFFINITY  20
-typedef struct dom0_setvcpuaffinity {
+struct dom0_setvcpuaffinity {
     /* IN variables. */
     domid_t   domain;
     uint32_t  vcpu;
     cpumap_t  cpumap;
-} dom0_setvcpuaffinity_t;
+};
+typedef struct dom0_setvcpuaffinity dom0_setvcpuaffinity_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setvcpuaffinity_t);
 
 /* Get trace buffers machine base address */
 #define DOM0_TBUFCONTROL       21
-typedef struct dom0_tbufcontrol {
+struct dom0_tbufcontrol {
     /* IN variables */
 #define DOM0_TBUF_GET_INFO     0
 #define DOM0_TBUF_SET_CPU_MASK 1
@@ -203,14 +215,15 @@ typedef struct dom0_tbufcontrol {
     /* OUT variables */
     unsigned long buffer_mfn;
     uint32_t size;
-} dom0_tbufcontrol_t;
+};
+typedef struct dom0_tbufcontrol dom0_tbufcontrol_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_tbufcontrol_t);
 
 /*
  * Get physical information about the host machine
  */
 #define DOM0_PHYSINFO         22
-typedef struct dom0_physinfo {
+struct dom0_physinfo {
     uint32_t threads_per_core;
     uint32_t cores_per_socket;
     uint32_t sockets_per_node;
@@ -219,17 +232,19 @@ typedef struct dom0_physinfo {
     unsigned long total_pages;
     unsigned long free_pages;
     uint32_t hw_cap[8];
-} dom0_physinfo_t;
+};
+typedef struct dom0_physinfo dom0_physinfo_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_physinfo_t);
 
 /*
  * Get the ID of the current scheduler.
  */
 #define DOM0_SCHED_ID        24
-typedef struct dom0_sched_id {
+struct dom0_sched_id {
     /* OUT variable */
     uint32_t sched_id;
-} dom0_sched_id_t;
+};
+typedef struct dom0_physinfo dom0_sched_id_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_sched_id_t);
 
 /*
@@ -246,15 +261,16 @@ DEFINE_XEN_GUEST_HANDLE(dom0_sched_id_t)
 #define DOM0_SHADOW_CONTROL_OP_CLEAN       11
 #define DOM0_SHADOW_CONTROL_OP_PEEK        12
 
-typedef struct dom0_shadow_control_stats {
+struct dom0_shadow_control_stats {
     uint32_t fault_count;
     uint32_t dirty_count;
     uint32_t dirty_net_count;
     uint32_t dirty_block_count;
-} dom0_shadow_control_stats_t;
+};
+typedef struct dom0_shadow_control_stats dom0_shadow_control_stats_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_shadow_control_stats_t);
 
-typedef struct dom0_shadow_control {
+struct dom0_shadow_control {
     /* IN variables. */
     domid_t        domain;
     uint32_t       op;
@@ -262,26 +278,29 @@ typedef struct dom0_shadow_control {
     /* IN/OUT variables. */
     unsigned long  pages;        /* size of buffer, updated with actual size */
     /* OUT variables. */
-    dom0_shadow_control_stats_t stats;
-} dom0_shadow_control_t;
+    struct dom0_shadow_control_stats stats;
+};
+typedef struct dom0_shadow_control dom0_shadow_control_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_shadow_control_t);
 
 #define DOM0_SETDOMAINMAXMEM   28
-typedef struct dom0_setdomainmaxmem {
+struct dom0_setdomainmaxmem {
     /* IN variables. */
     domid_t       domain;
     unsigned long max_memkb;
-} dom0_setdomainmaxmem_t;
+};
+typedef struct dom0_setdomainmaxmem dom0_setdomainmaxmem_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setdomainmaxmem_t);
 
 #define DOM0_GETPAGEFRAMEINFO2 29   /* batched interface */
-typedef struct dom0_getpageframeinfo2 {
+struct dom0_getpageframeinfo2 {
     /* IN variables. */
     domid_t        domain;
     unsigned long  num;
     /* IN/OUT variables. */
     XEN_GUEST_HANDLE(ulong) array;
-} dom0_getpageframeinfo2_t;
+};
+typedef struct dom0_getpageframeinfo2 dom0_getpageframeinfo2_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getpageframeinfo2_t);
 
 /*
@@ -292,7 +311,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_getpagefram
  * (x86-specific).
  */
 #define DOM0_ADD_MEMTYPE         31
-typedef struct dom0_add_memtype {
+struct dom0_add_memtype {
     /* IN variables. */
     unsigned long mfn;
     unsigned long nr_mfns;
@@ -300,7 +319,8 @@ typedef struct dom0_add_memtype {
     /* OUT variables. */
     uint32_t      handle;
     uint32_t      reg;
-} dom0_add_memtype_t;
+};
+typedef struct dom0_add_memtype dom0_add_memtype_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype_t);
 
 /*
@@ -311,23 +331,25 @@ DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype
  * (x86-specific).
  */
 #define DOM0_DEL_MEMTYPE         32
-typedef struct dom0_del_memtype {
+struct dom0_del_memtype {
     /* IN variables. */
     uint32_t handle;
     uint32_t reg;
-} dom0_del_memtype_t;
+};
+typedef struct dom0_del_memtype dom0_del_memtype_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_del_memtype_t);
 
 /* Read current type of an MTRR (x86-specific). */
 #define DOM0_READ_MEMTYPE        33
-typedef struct dom0_read_memtype {
+struct dom0_read_memtype {
     /* IN variables. */
     uint32_t reg;
     /* OUT variables. */
     unsigned long mfn;
     unsigned long nr_mfns;
     uint32_t type;
-} dom0_read_memtype_t;
+};
+typedef struct dom0_read_memtype dom0_read_memtype_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_read_memtype_t);
 
 /* Interface for controlling Xen software performance counters. */
@@ -335,50 +357,56 @@ DEFINE_XEN_GUEST_HANDLE(dom0_read_memtyp
 /* Sub-operations: */
 #define DOM0_PERFCCONTROL_OP_RESET 1   /* Reset all counters to zero. */
 #define DOM0_PERFCCONTROL_OP_QUERY 2   /* Get perfctr information. */
-typedef struct dom0_perfc_desc {
+struct dom0_perfc_desc {
     char         name[80];             /* name of perf counter */
     uint32_t     nr_vals;              /* number of values for this counter */
     uint32_t     vals[64];             /* array of values */
-} dom0_perfc_desc_t;
+};
+typedef struct dom0_perfc_desc dom0_perfc_desc_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_perfc_desc_t);
-typedef struct dom0_perfccontrol {
+
+struct dom0_perfccontrol {
     /* IN variables. */
     uint32_t       op;                /*  DOM0_PERFCCONTROL_OP_??? */
     /* OUT variables. */
     uint32_t       nr_counters;       /*  number of counters */
     XEN_GUEST_HANDLE(dom0_perfc_desc_t) desc; /*  counter information (or 
NULL) */
-} dom0_perfccontrol_t;
+};
+typedef struct dom0_perfccontrol dom0_perfccontrol_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_perfccontrol_t);
 
 #define DOM0_MICROCODE           35
-typedef struct dom0_microcode {
+struct dom0_microcode {
     /* IN variables. */
     XEN_GUEST_HANDLE(void) data;          /* Pointer to microcode data */
     uint32_t length;                  /* Length of microcode data. */
-} dom0_microcode_t;
+};
+typedef struct dom0_microcode dom0_microcode_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_microcode_t);
 
 #define DOM0_IOPORT_PERMISSION   36
-typedef struct dom0_ioport_permission {
+struct dom0_ioport_permission {
     domid_t  domain;                  /* domain to be affected */
     uint32_t first_port;              /* first port int range */
     uint32_t nr_ports;                /* size of port range */
     uint8_t  allow_access;            /* allow or deny access to range? */
-} dom0_ioport_permission_t;
+};
+typedef struct dom0_ioport_permission dom0_ioport_permission_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_ioport_permission_t);
 
 #define DOM0_GETVCPUCONTEXT      37
-typedef struct dom0_getvcpucontext {
+struct dom0_getvcpucontext {
     /* IN variables. */
     domid_t  domain;                  /* domain to be affected */
     uint32_t vcpu;                    /* vcpu # */
     /* OUT variables. */
     XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt;
-} dom0_getvcpucontext_t;
+};
+typedef struct dom0_getvcpucontext dom0_getvcpucontext_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getvcpucontext_t);
 
 #define DOM0_GETVCPUINFO         43
-typedef struct dom0_getvcpuinfo {
+struct dom0_getvcpuinfo {
     /* IN variables. */
     domid_t  domain;                  /* domain to be affected */
     uint32_t vcpu;                    /* vcpu # */
@@ -389,92 +417,104 @@ typedef struct dom0_getvcpuinfo {
     uint64_t cpu_time;                /* total cpu time consumed (ns) */
     uint32_t cpu;                     /* current mapping   */
     cpumap_t cpumap;                  /* allowable mapping */
-} dom0_getvcpuinfo_t;
+};
+typedef struct dom0_getvcpuinfo dom0_getvcpuinfo_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getvcpuinfo_t);
 
 #define DOM0_GETDOMAININFOLIST   38
-typedef struct dom0_getdomaininfolist {
+struct dom0_getdomaininfolist {
     /* IN variables. */
     domid_t               first_domain;
     uint32_t              max_domains;
     XEN_GUEST_HANDLE(dom0_getdomaininfo_t) buffer;
     /* OUT variables. */
     uint32_t              num_domains;
-} dom0_getdomaininfolist_t;
+};
+typedef struct dom0_getdomaininfolist dom0_getdomaininfolist_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_getdomaininfolist_t);
 
 #define DOM0_PLATFORM_QUIRK      39
 #define QUIRK_NOIRQBALANCING      1 /* Do not restrict IO-APIC RTE targets */
 #define QUIRK_IOAPIC_BAD_REGSEL   2 /* IO-APIC REGSEL forgets its value    */
 #define QUIRK_IOAPIC_GOOD_REGSEL  3 /* IO-APIC REGSEL behaves properly     */
-typedef struct dom0_platform_quirk {
+struct dom0_platform_quirk {
     /* IN variables. */
     uint32_t quirk_id;
-} dom0_platform_quirk_t;
+};
+typedef struct dom0_platform_quirk dom0_platform_quirk_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_platform_quirk_t);
 
-#define DOM0_PHYSICAL_MEMORY_MAP 40
-typedef struct dom0_memory_map_entry {
+#define DOM0_PHYSICAL_MEMORY_MAP 40   /* Unimplemented from 3.0.3 onwards */
+struct dom0_memory_map_entry {
     uint64_t start, end;
     uint32_t flags; /* reserved */
     uint8_t  is_ram;
-} dom0_memory_map_entry_t;
+};
+typedef struct dom0_memory_map_entry dom0_memory_map_entry_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t);
-typedef struct dom0_physical_memory_map {
+
+struct dom0_physical_memory_map {
     /* IN variables. */
     uint32_t max_map_entries;
     /* OUT variables. */
     uint32_t nr_map_entries;
     XEN_GUEST_HANDLE(dom0_memory_map_entry_t) memory_map;
-} dom0_physical_memory_map_t;
+};
+typedef struct dom0_physical_memory_map dom0_physical_memory_map_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_physical_memory_map_t);
 
 #define DOM0_MAX_VCPUS 41
-typedef struct dom0_max_vcpus {
+struct dom0_max_vcpus {
     domid_t  domain;        /* domain to be affected */
     uint32_t max;           /* maximum number of vcpus */
-} dom0_max_vcpus_t;
+};
+typedef struct dom0_max_vcpus dom0_max_vcpus_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_max_vcpus_t);
 
 #define DOM0_SETDOMAINHANDLE 44
-typedef struct dom0_setdomainhandle {
+struct dom0_setdomainhandle {
     domid_t domain;
     xen_domain_handle_t handle;
-} dom0_setdomainhandle_t;
+};
+typedef struct dom0_setdomainhandle dom0_setdomainhandle_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setdomainhandle_t);
 
 #define DOM0_SETDEBUGGING 45
-typedef struct dom0_setdebugging {
+struct dom0_setdebugging {
     domid_t domain;
     uint8_t enable;
-} dom0_setdebugging_t;
+};
+typedef struct dom0_setdebugging dom0_setdebugging_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_setdebugging_t);
 
 #define DOM0_IRQ_PERMISSION 46
-typedef struct dom0_irq_permission {
+struct dom0_irq_permission {
     domid_t domain;          /* domain to be affected */
     uint8_t pirq;
     uint8_t allow_access;    /* flag to specify enable/disable of IRQ access */
-} dom0_irq_permission_t;
+};
+typedef struct dom0_irq_permission dom0_irq_permission_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_irq_permission_t);
 
 #define DOM0_IOMEM_PERMISSION 47
-typedef struct dom0_iomem_permission {
+struct dom0_iomem_permission {
     domid_t  domain;          /* domain to be affected */
     unsigned long first_mfn;  /* first page (physical page number) in range */
     unsigned long nr_mfns;    /* number of pages in range (>0) */
     uint8_t allow_access;     /* allow (!0) or deny (0) access to range? */
-} dom0_iomem_permission_t;
+};
+typedef struct dom0_iomem_permission dom0_iomem_permission_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_iomem_permission_t);
 
 #define DOM0_HYPERCALL_INIT   48
-typedef struct dom0_hypercall_init {
+struct dom0_hypercall_init {
     domid_t  domain;          /* domain to be affected */
     unsigned long mfn;        /* machine frame to be initialised */
-} dom0_hypercall_init_t;
+};
+typedef struct dom0_hypercall_init dom0_hypercall_init_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t);
 
-typedef struct dom0_op {
+struct dom0_op {
     uint32_t cmd;
     uint32_t interface_version; /* DOM0_INTERFACE_VERSION */
     union {
@@ -517,7 +557,8 @@ typedef struct dom0_op {
         struct dom0_hypercall_init    hypercall_init;
         uint8_t                       pad[128];
     } u;
-} dom0_op_t;
+};
+typedef struct dom0_op dom0_op_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_op_t);
 
 #endif /* __XEN_PUBLIC_DOM0_OPS_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/event_channel.h
--- a/xen/include/public/event_channel.h        Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/event_channel.h        Tue May 30 14:30:34 2006 -0500
@@ -28,12 +28,13 @@ DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);
  *  2. <rdom> may be DOMID_SELF, allowing loopback connections.
  */
 #define EVTCHNOP_alloc_unbound    6
-typedef struct evtchn_alloc_unbound {
+struct evtchn_alloc_unbound {
     /* IN parameters */
     domid_t dom, remote_dom;
     /* OUT parameters */
     evtchn_port_t port;
-} evtchn_alloc_unbound_t;
+};
+typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t;
 
 /*
  * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
@@ -45,13 +46,14 @@ typedef struct evtchn_alloc_unbound {
  *  2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
  */
 #define EVTCHNOP_bind_interdomain 0
-typedef struct evtchn_bind_interdomain {
+struct evtchn_bind_interdomain {
     /* IN parameters. */
     domid_t remote_dom;
     evtchn_port_t remote_port;
     /* OUT parameters. */
     evtchn_port_t local_port;
-} evtchn_bind_interdomain_t;
+};
+typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t;
 
 /*
  * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
@@ -66,13 +68,14 @@ typedef struct evtchn_bind_interdomain {
  *     binding cannot be changed.
  */
 #define EVTCHNOP_bind_virq        1
-typedef struct evtchn_bind_virq {
+struct evtchn_bind_virq {
     /* IN parameters. */
     uint32_t virq;
     uint32_t vcpu;
     /* OUT parameters. */
     evtchn_port_t port;
-} evtchn_bind_virq_t;
+};
+typedef struct evtchn_bind_virq evtchn_bind_virq_t;
 
 /*
  * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
@@ -81,14 +84,15 @@ typedef struct evtchn_bind_virq {
  *  2. Only a sufficiently-privileged domain may bind to a physical IRQ.
  */
 #define EVTCHNOP_bind_pirq        2
-typedef struct evtchn_bind_pirq {
+struct evtchn_bind_pirq {
     /* IN parameters. */
     uint32_t pirq;
 #define BIND_PIRQ__WILL_SHARE 1
     uint32_t flags; /* BIND_PIRQ__* */
     /* OUT parameters. */
     evtchn_port_t port;
-} evtchn_bind_pirq_t;
+};
+typedef struct evtchn_bind_pirq evtchn_bind_pirq_t;
 
 /*
  * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
@@ -97,11 +101,12 @@ typedef struct evtchn_bind_pirq {
  *     may not be changed.
  */
 #define EVTCHNOP_bind_ipi         7
-typedef struct evtchn_bind_ipi {
-    uint32_t vcpu;
-    /* OUT parameters. */
-    evtchn_port_t port;
-} evtchn_bind_ipi_t;
+struct evtchn_bind_ipi {
+    uint32_t vcpu;
+    /* OUT parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_bind_ipi evtchn_bind_ipi_t;
 
 /*
  * EVTCHNOP_close: Close a local event channel <port>. If the channel is
@@ -109,20 +114,22 @@ typedef struct evtchn_bind_ipi {
  * (EVTCHNSTAT_unbound), awaiting a new connection.
  */
 #define EVTCHNOP_close            3
-typedef struct evtchn_close {
-    /* IN parameters. */
-    evtchn_port_t port;
-} evtchn_close_t;
+struct evtchn_close {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_close evtchn_close_t;
 
 /*
  * EVTCHNOP_send: Send an event to the remote end of the channel whose local
  * endpoint is <port>.
  */
 #define EVTCHNOP_send             4
-typedef struct evtchn_send {
-    /* IN parameters. */
-    evtchn_port_t port;
-} evtchn_send_t;
+struct evtchn_send {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_send evtchn_send_t;
 
 /*
  * EVTCHNOP_status: Get the current status of the communication channel which
@@ -133,7 +140,7 @@ typedef struct evtchn_send {
  *     channel for which <dom> is not DOMID_SELF.
  */
 #define EVTCHNOP_status           5
-typedef struct evtchn_status {
+struct evtchn_status {
     /* IN parameters */
     domid_t  dom;
     evtchn_port_t port;
@@ -157,7 +164,8 @@ typedef struct evtchn_status {
         uint32_t pirq;      /* EVTCHNSTAT_pirq        */
         uint32_t virq;      /* EVTCHNSTAT_virq        */
     } u;
-} evtchn_status_t;
+};
+typedef struct evtchn_status evtchn_status_t;
 
 /*
  * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
@@ -172,41 +180,44 @@ typedef struct evtchn_status {
  *     has its binding reset to vcpu0).
  */
 #define EVTCHNOP_bind_vcpu        8
-typedef struct evtchn_bind_vcpu {
-    /* IN parameters. */
-    evtchn_port_t port;
-    uint32_t vcpu;
-} evtchn_bind_vcpu_t;
+struct evtchn_bind_vcpu {
+    /* IN parameters. */
+    evtchn_port_t port;
+    uint32_t vcpu;
+};
+typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t;
 
 /*
  * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
  * a notification to the appropriate VCPU if an event is pending.
  */
 #define EVTCHNOP_unmask           9
-typedef struct evtchn_unmask {
-    /* IN parameters. */
-    evtchn_port_t port;
-} evtchn_unmask_t;
+struct evtchn_unmask {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_unmask evtchn_unmask_t;
 
 /*
  * Argument to event_channel_op_compat() hypercall. Superceded by new
  * event_channel_op() hypercall since 0x00030202.
  */
-typedef struct evtchn_op {
+struct evtchn_op {
     uint32_t cmd; /* EVTCHNOP_* */
     union {
-        evtchn_alloc_unbound_t    alloc_unbound;
-        evtchn_bind_interdomain_t bind_interdomain;
-        evtchn_bind_virq_t        bind_virq;
-        evtchn_bind_pirq_t        bind_pirq;
-        evtchn_bind_ipi_t         bind_ipi;
-        evtchn_close_t            close;
-        evtchn_send_t             send;
-        evtchn_status_t           status;
-        evtchn_bind_vcpu_t        bind_vcpu;
-        evtchn_unmask_t           unmask;
+        struct evtchn_alloc_unbound    alloc_unbound;
+        struct evtchn_bind_interdomain bind_interdomain;
+        struct evtchn_bind_virq        bind_virq;
+        struct evtchn_bind_pirq        bind_pirq;
+        struct evtchn_bind_ipi         bind_ipi;
+        struct evtchn_close            close;
+        struct evtchn_send             send;
+        struct evtchn_status           status;
+        struct evtchn_bind_vcpu        bind_vcpu;
+        struct evtchn_unmask           unmask;
     } u;
-} evtchn_op_t;
+};
+typedef struct evtchn_op evtchn_op_t;
 DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);
 
 #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/grant_table.h  Tue May 30 14:30:34 2006 -0500
@@ -71,7 +71,7 @@
  * [XEN]: This field is written by Xen and read by the sharing guest.
  * [GST]: This field is written by the guest and read by Xen.
  */
-typedef struct grant_entry {
+struct grant_entry {
     /* GTF_xxx: various type and flag information.  [XEN,GST] */
 #if defined(__powerpc__)
     ulong flags;
@@ -85,7 +85,8 @@ typedef struct grant_entry {
      * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
      */
     uint32_t frame;
-} grant_entry_t;
+};
+typedef struct grant_entry grant_entry_t;
 
 /*
  * Type of grant entry.
@@ -160,7 +161,7 @@ typedef uint32_t grant_handle_t;
  *     to be accounted to the correct grant reference!
  */
 #define GNTTABOP_map_grant_ref        0
-typedef struct gnttab_map_grant_ref {
+struct gnttab_map_grant_ref {
     /* IN parameters. */
     uint64_t host_addr;
     uint32_t flags;               /* GNTMAP_* */
@@ -170,7 +171,8 @@ typedef struct gnttab_map_grant_ref {
     int16_t  status;              /* GNTST_* */
     grant_handle_t handle;
     uint64_t dev_bus_addr;
-} gnttab_map_grant_ref_t;
+};
+typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
 
 /*
@@ -185,14 +187,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant
  *     mappings will remain in the device or host TLBs.
  */
 #define GNTTABOP_unmap_grant_ref      1
-typedef struct gnttab_unmap_grant_ref {
+struct gnttab_unmap_grant_ref {
     /* IN parameters. */
     uint64_t host_addr;
     uint64_t dev_bus_addr;
     grant_handle_t handle;
     /* OUT parameters. */
     int16_t  status;              /* GNTST_* */
-} gnttab_unmap_grant_ref_t;
+};
+typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
 
 /*
@@ -205,14 +208,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_gra
  *  3. Xen may not support more than a single grant-table page per domain.
  */
 #define GNTTABOP_setup_table          2
-typedef struct gnttab_setup_table {
+struct gnttab_setup_table {
     /* IN parameters. */
     domid_t  dom;
     uint32_t nr_frames;
     /* OUT parameters. */
     int16_t  status;              /* GNTST_* */
     XEN_GUEST_HANDLE(ulong) frame_list;
-} gnttab_setup_table_t;
+};
+typedef struct gnttab_setup_table gnttab_setup_table_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
 
 /*
@@ -220,12 +224,13 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_setup_tab
  * xen console. Debugging use only.
  */
 #define GNTTABOP_dump_table           3
-typedef struct gnttab_dump_table {
+struct gnttab_dump_table {
     /* IN parameters. */
     domid_t dom;
     /* OUT parameters. */
     int16_t status;               /* GNTST_* */
-} gnttab_dump_table_t;
+};
+typedef struct gnttab_dump_table gnttab_dump_table_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
 
 /*
@@ -237,14 +242,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_dump_tabl
  * to the calling domain *unless* the error is GNTST_bad_page.
  */
 #define GNTTABOP_transfer                4
-typedef struct gnttab_transfer {
+struct gnttab_transfer {
     /* IN parameters. */
     unsigned long mfn;
     domid_t       domid;
     grant_ref_t   ref;
     /* OUT parameters. */
     int16_t       status;
-} gnttab_transfer_t;
+};
+typedef struct gnttab_transfer gnttab_transfer_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
 
 /*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/hvm/ioreq.h
--- a/xen/include/public/hvm/ioreq.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/hvm/ioreq.h    Tue May 30 14:30:34 2006 -0500
@@ -41,7 +41,7 @@
  * prepare this structure and notify service OS and DM by sending
  * virq
  */
-typedef struct {
+struct ioreq {
     uint64_t addr;          /*  physical address            */
     uint64_t size;          /*  size in bytes               */
     uint64_t count;         /*  for rep prefixes            */
@@ -55,31 +55,35 @@ typedef struct {
     uint8_t df:1;
     uint8_t type;           /* I/O type                     */
     uint64_t io_count;      /* How many IO done on a vcpu   */
-} ioreq_t;
+};
+typedef struct ioreq ioreq_t;
 
 #define MAX_VECTOR      256
 #define BITS_PER_BYTE   8
 #define INTR_LEN        (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t)))
 #define INTR_LEN_32     (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t)))
 
-typedef struct {
+struct global_iodata {
     uint16_t    pic_elcr;
     uint16_t    pic_irr;
     uint16_t    pic_last_irr;
     uint16_t    pic_clear_irr;
-} global_iodata_t;
+};
+typedef struct global_iodata global_iodata_t;
 
-typedef struct {
-    ioreq_t         vp_ioreq;
+struct vcpu_iodata {
+    struct ioreq         vp_ioreq;
     /* Event channel port */
     unsigned int    vp_eport;   /* VMX vcpu uses this to notify DM */
     unsigned int    dm_eport;   /* DM uses this to notify VMX vcpu */
-} vcpu_iodata_t;
+};
+typedef struct vcpu_iodata vcpu_iodata_t;
 
-typedef struct {
-    global_iodata_t sp_global;
-    vcpu_iodata_t   vcpu_iodata[1];
-} shared_iopage_t;
+struct shared_iopage {
+    struct global_iodata sp_global;
+    struct vcpu_iodata   vcpu_iodata[1];
+};
+typedef struct shared_iopage shared_iopage_t;
 
 #endif /* _IOREQ_H_ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/hvm/vmx_assist.h
--- a/xen/include/public/hvm/vmx_assist.h       Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/hvm/vmx_assist.h       Tue May 30 14:30:34 2006 -0500
@@ -37,7 +37,7 @@ union vmcs_arbytes {
 /*
  * World switch state
  */
-typedef struct vmx_assist_context {
+struct vmx_assist_context {
     uint32_t  eip;        /* execution pointer */
     uint32_t  esp;        /* stack pointer */
     uint32_t  eflags;     /* flags register */
@@ -80,7 +80,8 @@ typedef struct vmx_assist_context {
     uint32_t  ldtr_limit;
     uint32_t  ldtr_base;
     union vmcs_arbytes ldtr_arbytes;
-} vmx_assist_context_t;
+};
+typedef struct vmx_assist_context vmx_assist_context_t;
 
 #endif /* __ASSEMBLY__ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/blkif.h
--- a/xen/include/public/io/blkif.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/blkif.h     Tue May 30 14:30:34 2006 -0500
@@ -39,7 +39,7 @@
  */
 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
 
-typedef struct blkif_request {
+struct blkif_request {
     uint8_t        operation;    /* BLKIF_OP_???                         */
     uint8_t        nr_segments;  /* number of segments                   */
     blkif_vdev_t   handle;       /* only for read/write requests         */
@@ -51,13 +51,15 @@ typedef struct blkif_request {
         /* @last_sect: last sector in frame to transfer (inclusive).     */
         uint8_t     first_sect, last_sect;
     } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-} blkif_request_t;
+};
+typedef struct blkif_request blkif_request_t;
 
-typedef struct blkif_response {
+struct blkif_response {
     uint64_t        id;              /* copied from request */
     uint8_t         operation;       /* copied from request */
     int16_t         status;          /* BLKIF_RSP_???       */
-} blkif_response_t;
+};
+typedef struct blkif_response blkif_response_t;
 
 #define BLKIF_RSP_ERROR  -1 /* non-specific 'error' */
 #define BLKIF_RSP_OKAY    0 /* non-specific 'okay'  */
@@ -66,7 +68,7 @@ typedef struct blkif_response {
  * Generate blkif ring structures and types.
  */
 
-DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t);
+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
 
 #define VDISK_CDROM        0x1
 #define VDISK_REMOVABLE    0x2
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/netif.h     Tue May 30 14:30:34 2006 -0500
@@ -13,10 +13,10 @@
 #include "../grant_table.h"
 
 /*
- * Note that there is *never* any need to notify the backend when enqueuing
- * receive requests (netif_rx_request_t). Notifications after enqueuing any
- * other type of message should be conditional on the appropriate req_event
- * or rsp_event field in the shared ring.
+ * Note that there is *never* any need to notify the backend when
+ * enqueuing receive requests (struct netif_rx_request). Notifications
+ * after enqueuing any other type of message should be conditional on
+ * the appropriate req_event or rsp_event field in the shared ring.
  */
 
 /* Protocol checksum field is blank in the packet (hardware offload)? */
@@ -27,23 +27,26 @@
 #define _NETTXF_data_validated (1)
 #define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
 
-typedef struct netif_tx_request {
+struct netif_tx_request {
     grant_ref_t gref;      /* Reference to buffer page */
     uint16_t offset;       /* Offset within buffer page */
     uint16_t flags;        /* NETTXF_* */
     uint16_t id;           /* Echoed in response message. */
     uint16_t size;         /* Packet size in bytes.       */
-} netif_tx_request_t;
+};
+typedef struct netif_tx_request netif_tx_request_t;
 
-typedef struct netif_tx_response {
+struct netif_tx_response {
     uint16_t id;
     int16_t  status;       /* NETIF_RSP_* */
-} netif_tx_response_t;
+};
+typedef struct netif_tx_response netif_tx_response_t;
 
-typedef struct {
+struct netif_rx_request {
     uint16_t    id;        /* Echoed in response message.        */
     grant_ref_t gref;      /* Reference to incoming granted frame */
-} netif_rx_request_t;
+};
+typedef struct netif_rx_request netif_rx_request_t;
 
 /* Packet data has been validated against protocol checksum. */
 #define _NETRXF_data_validated (0)
@@ -53,19 +56,20 @@ typedef struct {
 #define _NETRXF_csum_blank     (1)
 #define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
 
-typedef struct {
+struct netif_rx_response {
     uint16_t id;
     uint16_t offset;       /* Offset in page of start of received packet  */
     uint16_t flags;        /* NETRXF_* */
     int16_t  status;       /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
-} netif_rx_response_t;
+};
+typedef struct netif_rx_response netif_rx_response_t;
 
 /*
  * Generate netif ring structures and types.
  */
 
-DEFINE_RING_TYPES(netif_tx, netif_tx_request_t, netif_tx_response_t);
-DEFINE_RING_TYPES(netif_rx, netif_rx_request_t, netif_rx_response_t);
+DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
+DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
 
 #define NETIF_RSP_DROPPED         -2
 #define NETIF_RSP_ERROR           -1
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/tpmif.h
--- a/xen/include/public/io/tpmif.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/tpmif.h     Tue May 30 14:30:34 2006 -0500
@@ -18,12 +18,13 @@
 
 #include "../grant_table.h"
 
-typedef struct {
+struct tpmif_tx_request {
     unsigned long addr;   /* Machine address of packet.   */
     grant_ref_t ref;      /* grant table access reference */
     uint16_t unused;
     uint16_t size;        /* Packet size in bytes.        */
-} tpmif_tx_request_t;
+};
+typedef struct tpmif_tx_request tpmif_tx_request_t;
 
 /*
  * The TPMIF_TX_RING_SIZE defines the number of pages the
@@ -35,13 +36,15 @@ typedef uint32_t TPMIF_RING_IDX;
 
 /* This structure must fit in a memory page. */
 
-typedef struct {
-    tpmif_tx_request_t req;
-} tpmif_ring_t;
+struct tpmif_ring {
+    struct tpmif_tx_request req;
+};
+typedef struct tpmif_ring tpmif_ring_t;
 
-typedef struct {
-    tpmif_ring_t ring[TPMIF_TX_RING_SIZE];
-} tpmif_tx_interface_t;
+struct tpmif_tx_interface {
+    struct tpmif_ring ring[TPMIF_TX_RING_SIZE];
+};
+typedef struct tpmif_tx_interface tpmif_tx_interface_t;
 
 #endif
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/xenbus.h
--- a/xen/include/public/io/xenbus.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/xenbus.h    Tue May 30 14:30:34 2006 -0500
@@ -9,34 +9,37 @@
 #ifndef _XEN_PUBLIC_IO_XENBUS_H
 #define _XEN_PUBLIC_IO_XENBUS_H
 
-/* The state of either end of the Xenbus, i.e. the current communication
-   status of initialisation across the bus.  States here imply nothing about
-   the state of the connection between the driver and the kernel's device
-   layers.  */
-typedef enum
-{
-  XenbusStateUnknown      = 0,
-  XenbusStateInitialising = 1,
-  XenbusStateInitWait     = 2,  /* Finished early initialisation, but waiting
-                                   for information from the peer or hotplug
-                                  scripts. */
-  XenbusStateInitialised  = 3,  /* Initialised and waiting for a connection
-                                  from the peer. */
-  XenbusStateConnected    = 4,
-  XenbusStateClosing      = 5,  /* The device is being closed due to an error
-                                  or an unplug event. */
-  XenbusStateClosed       = 6
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus.  States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+    XenbusStateUnknown       = 0,
 
-} XenbusState;
+    XenbusStateInitialising  = 1,
+
+    /*
+     * InitWait: Finished early initialisation but waiting for information
+     * from the peer or hotplug scripts.
+     */
+    XenbusStateInitWait      = 2,
+
+    /*
+     * Initialised: Waiting for a connection from the peer.
+     */
+    XenbusStateInitialised   = 3,
+
+    XenbusStateConnected     = 4,
+
+    /*
+     * Closing: The device is being closed due to an error or an unplug event.
+     */
+    XenbusStateClosing       = 5,
+
+    XenbusStateClosed       = 6
+};
+typedef enum xenbus_state XenbusState;
 
 #endif /* _XEN_PUBLIC_IO_XENBUS_H */
-
-/*
- * Local variables:
- *  c-file-style: "linux"
- *  indent-tabs-mode: t
- *  c-indent-level: 8
- *  c-basic-offset: 8
- *  tab-width: 8
- * End:
- */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/memory.h
--- a/xen/include/public/memory.h       Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/memory.h       Tue May 30 14:30:34 2006 -0500
@@ -17,7 +17,7 @@
 #define XENMEM_increase_reservation 0
 #define XENMEM_decrease_reservation 1
 #define XENMEM_populate_physmap     6
-typedef struct xen_memory_reservation {
+struct xen_memory_reservation {
 
     /*
      * XENMEM_increase_reservation:
@@ -49,7 +49,8 @@ typedef struct xen_memory_reservation {
      */
     domid_t        domid;
 
-} xen_memory_reservation_t;
+};
+typedef struct xen_memory_reservation xen_memory_reservation_t;
 DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
 
 /*
@@ -74,7 +75,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_reser
  * arg == addr of xen_machphys_mfn_list_t.
  */
 #define XENMEM_machphys_mfn_list    5
-typedef struct xen_machphys_mfn_list {
+struct xen_machphys_mfn_list {
     /*
      * Size of the 'extent_start' array. Fewer entries will be filled if the
      * machphys table is smaller than max_extents * 2MB.
@@ -93,7 +94,8 @@ typedef struct xen_machphys_mfn_list {
      * than 'max_extents' if the machphys table is smaller than max_e * 2MB.
      */
     unsigned int nr_extents;
-} xen_machphys_mfn_list_t;
+};
+typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
 DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
 
 /*
@@ -102,7 +104,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn
  * arg == addr of xen_add_to_physmap_t.
  */
 #define XENMEM_add_to_physmap      7
-typedef struct xen_add_to_physmap {
+struct xen_add_to_physmap {
     /* Which domain to change the mapping for. */
     domid_t domid;
 
@@ -116,7 +118,8 @@ typedef struct xen_add_to_physmap {
 
     /* GPFN where the source mapping page should appear. */
     unsigned long gpfn;
-} xen_add_to_physmap_t;
+};
+typedef struct xen_add_to_physmap xen_add_to_physmap_t;
 DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
 
 /*
@@ -124,7 +127,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_add_to_physm
  * code on failure. This call only works for auto-translated guests.
  */
 #define XENMEM_translate_gpfn_list  8
-typedef struct xen_translate_gpfn_list {
+struct xen_translate_gpfn_list {
     /* Which domain to translate for? */
     domid_t domid;
 
@@ -139,8 +142,37 @@ typedef struct xen_translate_gpfn_list {
      * list (in which case each input GPFN is overwritten with the output MFN).
      */
     XEN_GUEST_HANDLE(ulong) mfn_list;
-} xen_translate_gpfn_list_t;
+};
+typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
 DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
+
+/*
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started.
+ */
+#define XENMEM_memory_map           9
+struct xen_memory_map {
+    /*
+     * On call the number of entries which can be stored in buffer. On
+     * return the number of entries which have been stored in
+     * buffer.
+     */
+    unsigned int nr_entries;
+
+    /*
+     * Entries in the buffer are in the same format as returned by the
+     * BIOS INT 0x15 EAX=0xE820 call.
+     */
+    XEN_GUEST_HANDLE(void) buffer;
+};
+typedef struct xen_memory_map xen_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ */
+#define XENMEM_machine_memory_map      10
 
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/nmi.h
--- a/xen/include/public/nmi.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/nmi.h  Tue May 30 14:30:34 2006 -0500
@@ -34,10 +34,11 @@
  * arg == pointer to xennmi_callback structure.
  */
 #define XENNMI_register_callback   0
-typedef struct xennmi_callback {
+struct xennmi_callback {
     unsigned long handler_address;
     unsigned long pad;
-} xennmi_callback_t;
+};
+typedef struct xennmi_callback xennmi_callback_t;
 DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t);
 
 /*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/physdev.h
--- a/xen/include/public/physdev.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/physdev.h      Tue May 30 14:30:34 2006 -0500
@@ -14,10 +14,11 @@
  * @arg == pointer to physdev_eoi structure.
  */
 #define PHYSDEVOP_eoi                   12
-typedef struct physdev_eoi {
+struct physdev_eoi {
     /* IN */
     uint32_t irq;
-} physdev_eoi_t;
+};
+typedef struct physdev_eoi physdev_eoi_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
 
 /*
@@ -25,12 +26,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
  * @arg == pointer to physdev_irq_status_query structure.
  */
 #define PHYSDEVOP_irq_status_query       5
-typedef struct physdev_irq_status_query {
+struct physdev_irq_status_query {
     /* IN */
     uint32_t irq;
     /* OUT */
     uint32_t flags; /* XENIRQSTAT_* */
-} physdev_irq_status_query_t;
+};
+typedef struct physdev_irq_status_query physdev_irq_status_query_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t);
 
 /* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */
@@ -42,10 +44,11 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_stat
  * @arg == pointer to physdev_set_iopl structure.
  */
 #define PHYSDEVOP_set_iopl               6
-typedef struct physdev_set_iopl {
+struct physdev_set_iopl {
     /* IN */
     uint32_t iopl;
-} physdev_set_iopl_t;
+};
+typedef struct physdev_set_iopl physdev_set_iopl_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t);
 
 /*
@@ -53,11 +56,12 @@ DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl
  * @arg == pointer to physdev_set_iobitmap structure.
  */
 #define PHYSDEVOP_set_iobitmap           7
-typedef struct physdev_set_iobitmap {
+struct physdev_set_iobitmap {
     /* IN */
     uint8_t *bitmap;
     uint32_t nr_ports;
-} physdev_set_iobitmap_t;
+};
+typedef struct physdev_set_iobitmap physdev_set_iobitmap_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t);
 
 /*
@@ -66,13 +70,14 @@ DEFINE_XEN_GUEST_HANDLE(physdev_set_iobi
  */
 #define PHYSDEVOP_apic_read              8
 #define PHYSDEVOP_apic_write             9
-typedef struct physdev_apic {
+struct physdev_apic {
     /* IN */
     unsigned long apic_physbase;
     uint32_t reg;
     /* IN or OUT */
     uint32_t value;
-} physdev_apic_t;
+};
+typedef struct physdev_apic physdev_apic_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);
 
 /*
@@ -81,28 +86,30 @@ DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);
  */
 #define PHYSDEVOP_alloc_irq_vector      10
 #define PHYSDEVOP_free_irq_vector       11
-typedef struct physdev_irq {
+struct physdev_irq {
     /* IN */
     uint32_t irq;
     /* IN or OUT */
     uint32_t vector;
-} physdev_irq_t;
+};
+typedef struct physdev_irq physdev_irq_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
 
 /*
  * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
  * hypercall since 0x00030202.
  */
-typedef struct physdev_op {
+struct physdev_op {
     uint32_t cmd;
     union {
-        physdev_irq_status_query_t      irq_status_query;
-        physdev_set_iopl_t              set_iopl;
-        physdev_set_iobitmap_t          set_iobitmap;
-        physdev_apic_t                  apic_op;
-        physdev_irq_t                   irq_op;
+        struct physdev_irq_status_query      irq_status_query;
+        struct physdev_set_iopl              set_iopl;
+        struct physdev_set_iobitmap          set_iobitmap;
+        struct physdev_apic                  apic_op;
+        struct physdev_irq                   irq_op;
     } u;
-} physdev_op_t;
+};
+typedef struct physdev_op physdev_op_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_op_t);
 
 /*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/sched.h
--- a/xen/include/public/sched.h        Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/sched.h        Tue May 30 14:30:34 2006 -0500
@@ -46,9 +46,10 @@
  * @arg == pointer to sched_shutdown structure.
  */
 #define SCHEDOP_shutdown    2
-typedef struct sched_shutdown {
+struct sched_shutdown {
     unsigned int reason; /* SHUTDOWN_* */
-} sched_shutdown_t;
+};
+typedef struct sched_shutdown sched_shutdown_t;
 DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);
 
 /*
@@ -57,11 +58,12 @@ DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t
  * @arg == pointer to sched_poll structure.
  */
 #define SCHEDOP_poll        3
-typedef struct sched_poll {
+struct sched_poll {
     XEN_GUEST_HANDLE(evtchn_port_t) ports;
     unsigned int nr_ports;
     uint64_t timeout;
-} sched_poll_t;
+};
+typedef struct sched_poll sched_poll_t;
 DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
 
 /*
@@ -71,10 +73,11 @@ DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
  * @arg == pointer to sched_remote_shutdown structure.
  */
 #define SCHEDOP_remote_shutdown        4
-typedef struct sched_remote_shutdown {
+struct sched_remote_shutdown {
     domid_t domain_id;         /* Remote domain ID */
     unsigned int reason;       /* SHUTDOWN_xxx reason */
-} sched_remote_shutdown_t;
+};
+typedef struct sched_remote_shutdown sched_remote_shutdown_t;
 DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
 
 /*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/sched_ctl.h
--- a/xen/include/public/sched_ctl.h    Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/sched_ctl.h    Tue May 30 14:30:34 2006 -0500
@@ -10,6 +10,7 @@
 /* Scheduler types. */
 #define SCHED_BVT      0
 #define SCHED_SEDF     4
+#define SCHED_CREDIT   5
 
 /* Set or get info? */
 #define SCHED_INFO_PUT 0
@@ -48,6 +49,10 @@ struct sched_adjdom_cmd {
             uint32_t extratime;
             uint32_t weight;
         } sedf;
+        struct sched_credit_adjdom {
+            uint16_t weight;
+            uint16_t cap;
+        } credit;
     } u;
 };
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/vcpu.h
--- a/xen/include/public/vcpu.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/vcpu.h Tue May 30 14:30:34 2006 -0500
@@ -56,7 +56,7 @@
  * @extra_arg == pointer to vcpu_runstate_info structure.
  */
 #define VCPUOP_get_runstate_info    4
-typedef struct vcpu_runstate_info {
+struct vcpu_runstate_info {
     /* VCPU's current state (RUNSTATE_*). */
     int      state;
     /* When was current state entered (system time, ns)? */
@@ -66,7 +66,8 @@ typedef struct vcpu_runstate_info {
      * guaranteed not to drift from system time.
      */
     uint64_t time[4];
-} vcpu_runstate_info_t;
+};
+typedef struct vcpu_runstate_info vcpu_runstate_info_t;
 
 /* VCPU is currently running on a physical CPU. */
 #define RUNSTATE_running  0
@@ -99,12 +100,13 @@ typedef struct vcpu_runstate_info {
  * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
  */
 #define VCPUOP_register_runstate_memory_area 5
-typedef struct vcpu_register_runstate_memory_area {
+struct vcpu_register_runstate_memory_area {
     union {
         struct vcpu_runstate_info *v;
         uint64_t p;
     } addr;
-} vcpu_register_runstate_memory_area_t;
+};
+typedef struct vcpu_register_runstate_memory_area 
vcpu_register_runstate_memory_area_t;
 
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
diff -r e74246451527 -r f54d38cea8ac xen/include/public/version.h
--- a/xen/include/public/version.h      Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/version.h      Tue May 30 14:30:34 2006 -0500
@@ -22,12 +22,13 @@ typedef char xen_extraversion_t[16];
 
 /* arg == xen_compile_info_t. */
 #define XENVER_compile_info 2
-typedef struct xen_compile_info {
+struct xen_compile_info {
     char compiler[64];
     char compile_by[16];
     char compile_domain[32];
     char compile_date[32];
-} xen_compile_info_t;
+};
+typedef struct xen_compile_info xen_compile_info_t;
 
 #define XENVER_capabilities 3
 typedef char xen_capabilities_info_t[1024];
@@ -38,15 +39,17 @@ typedef char xen_changeset_info_t[64];
 #define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))
 
 #define XENVER_platform_parameters 5
-typedef struct xen_platform_parameters {
+struct xen_platform_parameters {
     unsigned long virt_start;
-} xen_platform_parameters_t;
+};
+typedef struct xen_platform_parameters xen_platform_parameters_t;
 
 #define XENVER_get_features 6
-typedef struct xen_feature_info {
+struct xen_feature_info {
     unsigned int submap_idx;    /* IN: which 32-bit submap to return */
     uint32_t     submap;        /* OUT: 32-bit submap */
-} xen_feature_info_t;
+};
+typedef struct xen_feature_info xen_feature_info_t;
 
 /* Declares the features reported by XENVER_get_features. */
 #include "features.h"
diff -r e74246451527 -r f54d38cea8ac xen/include/public/xen.h
--- a/xen/include/public/xen.h  Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/xen.h  Tue May 30 14:30:34 2006 -0500
@@ -195,7 +195,7 @@
 #define MMUEXT_NEW_USER_BASEPTR 15
 
 #ifndef __ASSEMBLY__
-typedef struct mmuext_op {
+struct mmuext_op {
     unsigned int cmd;
     union {
         /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
@@ -209,7 +209,8 @@ typedef struct mmuext_op {
         /* TLB_FLUSH_MULTI, INVLPG_MULTI */
         void *vcpumask;
     } arg2;
-} mmuext_op_t;
+};
+typedef struct mmuext_op mmuext_op_t;
 DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
 #endif
 
@@ -273,20 +274,22 @@ typedef uint16_t domid_t;
  * Send an array of these to HYPERVISOR_mmu_update().
  * NB. The fields are natural pointer/address size for this architecture.
  */
-typedef struct mmu_update {
+struct mmu_update {
     uint64_t ptr;       /* Machine address of PTE. */
     uint64_t val;       /* New contents of PTE.    */
-} mmu_update_t;
+};
+typedef struct mmu_update mmu_update_t;
 DEFINE_XEN_GUEST_HANDLE(mmu_update_t);
 
 /*
  * Send an array of these to HYPERVISOR_multicall().
  * NB. The fields are natural register size for this architecture.
  */
-typedef struct multicall_entry {
+struct multicall_entry {
     unsigned long op, result;
     unsigned long args[6];
-} multicall_entry_t;
+};
+typedef struct multicall_entry multicall_entry_t;
 DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
 
 /*
@@ -295,7 +298,7 @@ DEFINE_XEN_GUEST_HANDLE(multicall_entry_
  */
 #define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)
 
-typedef struct vcpu_time_info {
+struct vcpu_time_info {
     /*
      * Updates to the following values are preceded and followed by an
      * increment of 'version'. The guest can therefore detect updates by
@@ -319,9 +322,10 @@ typedef struct vcpu_time_info {
     uint32_t tsc_to_system_mul;
     int8_t   tsc_shift;
     int8_t   pad1[3];
-} vcpu_time_info_t; /* 32 bytes */
-
-typedef struct vcpu_info {
+}; /* 32 bytes */
+typedef struct vcpu_time_info vcpu_time_info_t;
+
+struct vcpu_info {
     /*
      * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
      * a pending notification for a particular VCPU. It is then cleared 
@@ -354,16 +358,17 @@ typedef struct vcpu_info {
 #endif
     uint8_t evtchn_upcall_mask;
     unsigned long evtchn_pending_sel;
-    arch_vcpu_info_t arch;
-    vcpu_time_info_t time;
-} vcpu_info_t; /* 64 bytes (x86) */
+    struct arch_vcpu_info arch;
+    struct vcpu_time_info time;
+}; /* 64 bytes (x86) */
+typedef struct vcpu_info vcpu_info_t;
 
 /*
  * Xen/kernel shared data -- pointer provided in start_info.
  * NB. We expect that this struct is smaller than a page.
  */
-typedef struct shared_info {
-    vcpu_info_t vcpu_info[MAX_VIRT_CPUS];
+struct shared_info {
+    struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
 
     /*
      * A domain can create "event channels" on which it can send and receive
@@ -407,9 +412,10 @@ typedef struct shared_info {
     uint32_t wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
     uint32_t wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
 
-    arch_shared_info_t arch;
-
-} shared_info_t;
+    struct arch_shared_info arch;
+
+};
+typedef struct shared_info shared_info_t;
 
 /*
  * Start-of-day memory layout for the initial domain (DOM0):
@@ -437,7 +443,7 @@ typedef struct shared_info {
  */
 
 #define MAX_GUEST_CMDLINE 1024
-typedef struct start_info {
+struct start_info {
     /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME.    */
     char magic[32];             /* "xen-<version>-<platform>".            */
     unsigned long nr_pages;     /* Total pages allocated to this domain.  */
@@ -454,7 +460,8 @@ typedef struct start_info {
     unsigned long mod_start;    /* VIRTUAL address of pre-loaded module.  */
     unsigned long mod_len;      /* Size (bytes) of pre-loaded module.     */
     int8_t cmd_line[MAX_GUEST_CMDLINE];
-} start_info_t;
+};
+typedef struct start_info start_info_t;
 
 /* These flags are passed in the 'flags' field of start_info_t. */
 #define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/xenoprof.h
--- a/xen/include/public/xenoprof.h     Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/xenoprof.h     Tue May 30 14:30:34 2006 -0500
@@ -41,7 +41,7 @@ struct event_log {
 };
 
 /* Xenoprof buffer shared between Xen and domain - 1 per VCPU */
-typedef struct xenoprof_buf {
+struct xenoprof_buf {
     uint32_t event_head;
     uint32_t event_tail;
     uint32_t event_size;
@@ -51,10 +51,11 @@ typedef struct xenoprof_buf {
     uint64_t user_samples;
     uint64_t lost_samples;
     struct event_log event_log[1];
-} xenoprof_buf_t;
+};
+typedef struct xenoprof_buf xenoprof_buf_t;
 DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t);
 
-typedef struct xenoprof_init {
+struct xenoprof_init {
     int32_t  max_samples;
     int32_t  num_events;
     int32_t  is_primary;
@@ -62,10 +63,11 @@ typedef struct xenoprof_init {
     int32_t  bufsize;
     uint64_t buf_maddr;
     char cpu_type[XENOPROF_CPU_TYPE_SIZE];
-} xenoprof_init_t;
+};
+typedef struct xenoprof_init xenoprof_init_t;
 DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t);
 
-typedef struct xenoprof_counter {
+struct xenoprof_counter {
     uint32_t ind;
     uint64_t count;
     uint32_t enabled;
@@ -74,7 +76,8 @@ typedef struct xenoprof_counter {
     uint32_t kernel;
     uint32_t user;
     uint64_t unit_mask;
-} xenoprof_counter_t;
+};
+typedef struct xenoprof_counter xenoprof_counter_t;
 DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t);
 
 
diff -r e74246451527 -r f54d38cea8ac xen/include/xen/hypercall.h
--- a/xen/include/xen/hypercall.h       Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/xen/hypercall.h       Tue May 30 14:30:34 2006 -0500
@@ -80,7 +80,7 @@ do_vcpu_op(
 
 extern long
 do_acm_op(
-    XEN_GUEST_HANDLE(acm_op_t) u_acm_op);
+    int cmd, XEN_GUEST_HANDLE(void) arg);
 
 extern long
 do_nmi_op(
diff -r e74246451527 -r f54d38cea8ac xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/xen/sched-if.h        Tue May 30 14:30:34 2006 -0500
@@ -58,6 +58,8 @@ struct scheduler {
     char *opt_name;         /* option name for this scheduler    */
     unsigned int sched_id;  /* ID for this scheduler             */
 
+    void         (*init)           (void);
+    void         (*tick)           (unsigned int cpu);
     int          (*alloc_task)     (struct vcpu *);
     void         (*add_task)       (struct vcpu *);
     void         (*free_task)      (struct domain *);
diff -r e74246451527 -r f54d38cea8ac xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/xen/softirq.h Tue May 30 14:30:34 2006 -0500
@@ -26,6 +26,19 @@ asmlinkage void do_softirq(void);
 asmlinkage void do_softirq(void);
 extern void open_softirq(int nr, softirq_handler handler);
 
+static inline void cpumask_raise_softirq(cpumask_t mask, unsigned int nr)
+{
+    int cpu;
+
+    for_each_cpu_mask(cpu, mask)
+    {
+        if ( test_and_set_bit(nr, &softirq_pending(cpu)) )
+            cpu_clear(cpu, mask);
+    }
+
+    smp_send_event_check_mask(mask);
+}
+
 static inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
 {
     if ( !test_and_set_bit(nr, &softirq_pending(cpu)) )
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/util.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/util.c Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,115 @@
+/******************************************************************************
+ * arch/ia64/xen/util.c
+ * This file is the ia64 counterpart of drivers/xen/util.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/uaccess.h>
+#include <xen/driver_util.h>
+
+struct vm_struct *alloc_vm_area(unsigned long size)
+{
+       int order;
+       unsigned long virt;
+       unsigned long nr_pages;
+       struct vm_struct* area;
+       
+       order = get_order(size);
+       virt = __get_free_pages(GFP_KERNEL, order);
+       if (virt == 0) {
+               goto err0;
+       }
+       nr_pages = 1 << order;
+       scrub_pages(virt, nr_pages);
+       
+       area = kmalloc(sizeof(*area), GFP_KERNEL);
+       if (area == NULL) {
+               goto err1;
+       }
+       
+        area->flags = VM_IOREMAP;//XXX
+        area->addr = (void*)virt;
+        area->size = size;
+        area->pages = NULL; //XXX
+        area->nr_pages = nr_pages;
+        area->phys_addr = __pa(virt);
+
+       return area;
+
+err1:
+       free_pages(virt, order);
+err0:
+       return NULL;
+       
+}
+EXPORT_SYMBOL_GPL(alloc_vm_area);
+
+void free_vm_area(struct vm_struct *area)
+{
+       unsigned int order = get_order(area->size);
+       unsigned long i;
+
+       // This area is used for foreign page mappping.
+       // So underlying machine page may not be assigned.
+       for (i = 0; i < (1 << order); i++) {
+               unsigned long ret;
+               unsigned long gpfn = (area->phys_addr >> PAGE_SHIFT) + i;
+               struct xen_memory_reservation reservation = {
+                       .nr_extents   = 1,
+                       .address_bits = 0,
+                       .extent_order = 0,
+                       .domid        = DOMID_SELF
+               };
+               set_xen_guest_handle(reservation.extent_start, &gpfn);
+               ret = HYPERVISOR_memory_op(XENMEM_populate_physmap,
+                                          &reservation);
+               BUG_ON(ret != 1);
+       }
+       free_pages((unsigned long)area->addr, order);
+       kfree(area);
+}
+EXPORT_SYMBOL_GPL(free_vm_area);
+
+void lock_vm_area(struct vm_struct *area)
+{
+       // nothing
+}
+EXPORT_SYMBOL_GPL(lock_vm_area);
+
+void unlock_vm_area(struct vm_struct *area)
+{
+       // nothing
+}
+EXPORT_SYMBOL_GPL(unlock_vm_area);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c       Tue May 30 
14:30:34 2006 -0500
@@ -0,0 +1,185 @@
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <xen/cpu_hotplug.h>
+#include <xen/xenbus.h>
+
+/*
+ * Set of CPUs that remote admin software will allow us to bring online.
+ * Notified to us via xenbus.
+ */
+static cpumask_t xenbus_allowed_cpumask;
+
+/* Set of CPUs that local admin will allow us to bring online. */
+static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
+
+static int local_cpu_hotplug_request(void)
+{
+       /*
+        * We assume a CPU hotplug request comes from local admin if it is made
+        * via a userspace process (i.e., one with a real mm_struct).
+        */
+       return (current->mm != NULL);
+}
+
+static void vcpu_hotplug(unsigned int cpu)
+{
+       int err;
+       char dir[32], state[32];
+
+       if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
+               return;
+
+       sprintf(dir, "cpu/%d", cpu);
+       err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
+       if (err != 1) {
+               printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+               return;
+       }
+
+       if (strcmp(state, "online") == 0) {
+               cpu_set(cpu, xenbus_allowed_cpumask);
+               (void)cpu_up(cpu);
+       } else if (strcmp(state, "offline") == 0) {
+               cpu_clear(cpu, xenbus_allowed_cpumask);
+               (void)cpu_down(cpu);
+       } else {
+               printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
+                      state, cpu);
+       }
+}
+
+static void handle_vcpu_hotplug_event(
+       struct xenbus_watch *watch, const char **vec, unsigned int len)
+{
+       int cpu;
+       char *cpustr;
+       const char *node = vec[XS_WATCH_PATH];
+
+       if ((cpustr = strstr(node, "cpu/")) != NULL) {
+               sscanf(cpustr, "cpu/%d", &cpu);
+               vcpu_hotplug(cpu);
+       }
+}
+
+static int smpboot_cpu_notify(struct notifier_block *notifier,
+                             unsigned long action, void *hcpu)
+{
+       int cpu = (long)hcpu;
+
+       /*
+        * We do this in a callback notifier rather than __cpu_disable()
+        * because local_cpu_hotplug_request() does not work in the latter
+        * as it's always executed from within a stopmachine kthread.
+        */
+       if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
+               cpu_clear(cpu, local_allowed_cpumask);
+
+       return NOTIFY_OK;
+}
+
+static int setup_cpu_watcher(struct notifier_block *notifier,
+                             unsigned long event, void *data)
+{
+       int i;
+
+       static struct xenbus_watch cpu_watch = {
+               .node = "cpu",
+               .callback = handle_vcpu_hotplug_event,
+               .flags = XBWF_new_thread };
+       (void)register_xenbus_watch(&cpu_watch);
+
+       if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
+               for_each_cpu(i)
+                       vcpu_hotplug(i);
+               printk(KERN_INFO "Brought up %ld CPUs\n",
+                      (long)num_online_cpus());
+       }
+
+       return NOTIFY_DONE;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+       static struct notifier_block hotplug_cpu = {
+               .notifier_call = smpboot_cpu_notify };
+       static struct notifier_block xsn_cpu = {
+               .notifier_call = setup_cpu_watcher };
+
+       register_cpu_notifier(&hotplug_cpu);
+       register_xenstore_notifier(&xsn_cpu);
+
+       return 0;
+}
+
+arch_initcall(setup_vcpu_hotplug_event);
+
+int smp_suspend(void)
+{
+       int i, err;
+
+       lock_cpu_hotplug();
+
+       /*
+        * Take all other CPUs offline. We hold the hotplug mutex to
+        * avoid other processes bringing up CPUs under our feet.
+        */
+       while (num_online_cpus() > 1) {
+               unlock_cpu_hotplug();
+               for_each_online_cpu(i) {
+                       if (i == 0)
+                               continue;
+                       err = cpu_down(i);
+                       if (err) {
+                               printk(KERN_CRIT "Failed to take all CPUs "
+                                      "down: %d.\n", err);
+                               for_each_cpu(i)
+                                       vcpu_hotplug(i);
+                               return err;
+                       }
+               }
+               lock_cpu_hotplug();
+       }
+
+       return 0;
+}
+
+void smp_resume(void)
+{
+       int cpu;
+
+       for_each_cpu(cpu)
+               cpu_initialize_context(cpu);
+
+       unlock_cpu_hotplug();
+
+       for_each_cpu(cpu)
+               vcpu_hotplug(cpu);
+}
+
+int cpu_up_check(unsigned int cpu)
+{
+       int rc = 0;
+
+       if (local_cpu_hotplug_request()) {
+               cpu_set(cpu, local_allowed_cpumask);
+               if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
+                       printk("%s: attempt to bring up CPU %u disallowed by "
+                              "remote admin.\n", __FUNCTION__, cpu);
+                       rc = -EBUSY;
+               }
+       } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
+                  !cpu_isset(cpu, xenbus_allowed_cpumask)) {
+               rc = -EBUSY;
+       }
+
+       return rc;
+}
+
+void init_xenbus_allowed_cpumask(void)
+{
+       xenbus_allowed_cpumask = cpu_present_map;
+}
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h       Tue May 
30 14:30:34 2006 -0500
@@ -0,0 +1,63 @@
+/*
+ * structures and definitions for the int 15, ax=e820 memory map
+ * scheme.
+ *
+ * In a nutshell, setup.S populates a scratch table in the
+ * empty_zero_block that contains a list of usable address/size
+ * duples.  setup.c, this information is transferred into the e820map,
+ * and in init.c/numa.c, that new information is used to mark pages
+ * reserved or not.
+ */
+#ifndef __E820_HEADER
+#define __E820_HEADER
+
+#include <linux/mmzone.h>
+
+#define E820MAP        0x2d0           /* our map */
+#define E820MAX        128             /* number of entries in E820MAP */
+#define E820NR 0x1e8           /* # entries in E820MAP */
+
+#define E820_RAM       1
+#define E820_RESERVED  2
+#define E820_ACPI      3 /* usable as RAM once ACPI tables have been read */
+#define E820_NVS       4
+
+#define HIGH_MEMORY    (1024*1024)
+
+#define LOWMEMSIZE()   (0x9f000)
+
+#ifndef __ASSEMBLY__
+struct e820entry {
+       u64 addr;       /* start of memory segment */
+       u64 size;       /* size of memory segment */
+       u32 type;       /* type of memory segment */
+} __attribute__((packed));
+
+struct e820map {
+    int nr_map;
+       struct e820entry map[E820MAX];
+};
+
+extern unsigned long find_e820_area(unsigned long start, unsigned long end, 
+                                   unsigned size);
+extern void add_memory_region(unsigned long start, unsigned long size, 
+                             int type);
+extern void setup_memory_region(void);
+extern void contig_e820_setup(void); 
+extern unsigned long e820_end_of_ram(void);
+extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
+extern void e820_print_map(char *who);
+extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
+
+extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned 
long end);
+extern void e820_setup_gap(struct e820entry *e820, int nr_map);
+extern unsigned long e820_hole_size(unsigned long start_pfn,
+                                   unsigned long end_pfn);
+
+extern void __init parse_memopt(char *p, char **end);
+extern void __init parse_memmapopt(char *p, char **end);
+
+extern struct e820map e820;
+#endif/*!__ASSEMBLY__*/
+
+#endif/*__E820_HEADER*/
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h    Tue May 30 14:30:34 
2006 -0500
@@ -0,0 +1,42 @@
+#ifndef __XEN_CPU_HOTPLUG_H__
+#define __XEN_CPU_HOTPLUG_H__
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+
+#if defined(CONFIG_HOTPLUG_CPU)
+
+#if defined(CONFIG_X86)
+void cpu_initialize_context(unsigned int cpu);
+#else
+#define cpu_initialize_context(cpu)    ((void)0)
+#endif
+
+int cpu_up_check(unsigned int cpu);
+void init_xenbus_allowed_cpumask(void);
+int smp_suspend(void);
+void smp_resume(void);
+
+#else /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#define cpu_up_check(cpu)              (0)
+#define init_xenbus_allowed_cpumask()  ((void)0)
+
+static inline int smp_suspend(void)
+{
+       if (num_online_cpus() > 1) {
+               printk(KERN_WARNING "Can't suspend SMP guests "
+                      "without CONFIG_HOTPLUG_CPU\n");
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+static inline void smp_resume(void)
+{
+}
+
+#endif /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#endif /* __XEN_CPU_HOTPLUG_H__ */
diff -r e74246451527 -r f54d38cea8ac 
patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch Tue May 30 14:30:34 
2006 -0500
@@ -0,0 +1,18 @@
+diff -ru ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c 
./drivers/ide/ide-lib.c
+--- ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c  2006-05-02 
22:38:44.000000000 +0100
++++ ./drivers/ide/ide-lib.c    2006-05-24 18:37:05.000000000 +0100
+@@ -410,10 +410,10 @@
+ {
+       u64 addr = BLK_BOUNCE_HIGH;     /* dma64_addr_t */
+ 
+-      if (!PCI_DMA_BUS_IS_PHYS) {
+-              addr = BLK_BOUNCE_ANY;
+-      } else if (on && drive->media == ide_disk) {
+-              if (HWIF(drive)->pci_dev)
++      if (on && drive->media == ide_disk) {
++              if (!PCI_DMA_BUS_IS_PHYS)
++                      addr = BLK_BOUNCE_ANY;
++              else if (HWIF(drive)->pci_dev)
+                       addr = HWIF(drive)->pci_dev->dma_mask;
+       }
+ 
diff -r e74246451527 -r f54d38cea8ac patches/linux-2.6.16.13/xen-hotplug.patch
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/xen-hotplug.patch Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,11 @@
+--- ../pristine-linux-2.6.16.13/fs/proc/proc_misc.c    2006-05-02 
22:38:44.000000000 +0100
++++ ./fs/proc/proc_misc.c      2006-05-22 15:29:34.000000000 +0100
+@@ -433,7 +433,7 @@ static int show_stat(struct seq_file *p,
+               (unsigned long long)cputime64_to_clock_t(irq),
+               (unsigned long long)cputime64_to_clock_t(softirq),
+               (unsigned long long)cputime64_to_clock_t(steal));
+-      for_each_online_cpu(i) {
++      for_each_cpu(i) {
+ 
+               /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
+               user = kstat_cpu(i).cpustat.user;
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_csched.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_csched.c   Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,50 @@
+/****************************************************************************
+ * (C) 2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ *        File: xc_csched.c
+ *      Author: Emmanuel Ackaouy
+ *
+ * Description: XC Interface to the credit scheduler
+ *
+ */
+#include "xc_private.h"
+
+
+int
+xc_sched_credit_domain_set(
+    int xc_handle,
+    uint32_t domid,
+    struct sched_credit_adjdom *sdom)
+{
+    DECLARE_DOM0_OP;
+
+    op.cmd = DOM0_ADJUSTDOM;    
+    op.u.adjustdom.domain = (domid_t) domid;
+    op.u.adjustdom.sched_id = SCHED_CREDIT;
+    op.u.adjustdom.direction = SCHED_INFO_PUT;
+    op.u.adjustdom.u.credit = *sdom;
+
+    return do_dom0_op(xc_handle, &op);
+}
+
+int
+xc_sched_credit_domain_get(
+    int xc_handle,
+    uint32_t domid,
+    struct sched_credit_adjdom *sdom)
+{
+    DECLARE_DOM0_OP;
+    int err;
+
+    op.cmd = DOM0_ADJUSTDOM;    
+    op.u.adjustdom.domain = (domid_t) domid;
+    op.u.adjustdom.sched_id = SCHED_CREDIT;
+    op.u.adjustdom.direction = SCHED_INFO_GET;
+
+    err = do_dom0_op(xc_handle, &op);
+    if ( err == 0 )
+        *sdom = op.u.adjustdom.u.credit;
+
+    return err;
+}
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_linux.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenstore/xenstored_linux.c  Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,69 @@
+/******************************************************************************
+ *
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "xenstored_core.h"
+
+#define XENSTORED_PROC_KVA  "/proc/xen/xsd_kva"
+#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
+
+evtchn_port_t xenbus_evtchn(void)
+{
+       int fd;
+       int rc;
+       evtchn_port_t port; 
+       char str[20]; 
+
+       fd = open(XENSTORED_PROC_PORT, O_RDONLY); 
+       if (fd == -1)
+               return -1;
+
+       rc = read(fd, str, sizeof(str)); 
+       if (rc == -1)
+       {
+               int err = errno;
+               close(fd);
+               errno = err;
+               return -1;
+       }
+
+       str[rc] = '\0'; 
+       port = strtoul(str, NULL, 0); 
+
+       close(fd); 
+       return port;
+}
+
+void *xenbus_map(void)
+{
+       int fd;
+       void *addr;
+
+       fd = open(XENSTORED_PROC_KVA, O_RDWR);
+       if (fd == -1)
+               return NULL;
+
+       addr = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
+               MAP_SHARED, fd, 0);
+
+       if (addr == MAP_FAILED)
+               addr = NULL;
+
+       close(fd);
+
+       return addr;
+}
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/block-integrity/01_block_device_read_verify.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py        
Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,62 @@
+#!/usr/bin/python
+
+# Copyright (C) International Business Machines Corp., 2006
+# Author: Harry Butterworth <butterwo@xxxxxxxxxx>
+
+# This test initialises a ram disk in dom0 with data from /dev/urandom and
+# then imports the ram disk device as a physical device into a domU. The md5
+# checksum of the data in the ramdisk is calculated in dom0 and also
+# calculated by the domU reading the data through the blk frontend and
+# backend drivers.  The test succeeds if the checksums match indicating that
+# the domU successfully read all the correct data from the device.
+
+import re
+
+from XmTestLib import *
+from XmTestLib.block_utils import *
+
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
+
+domain = XmTestDomain()
+
+try:
+    console = domain.start()
+except DomainError, e:
+    FAIL(str(e))
+
+console.setHistorySaveCmds(value=True)
+
+traceCommand("cat /dev/urandom > /dev/ram1")
+
+s, o = traceCommand("md5sum /dev/ram1")
+
+dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o)
+
+block_attach(domain, "phy:ram1", "hda1")
+
+try:
+    run = console.runCmd("md5sum /dev/hda1")
+except ConsoleError, e:
+    FAIL(str(e))
+
+domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"])
+
+domain.closeConsole()
+
+domain.stop()
+
+if dom0_md5sum_match == None:
+    FAIL("Failed to get md5sum of test ram disk in dom0.")
+
+if domU_md5sum_match == None:
+    FAIL("Failed to get md5sum of test ram disk in domU.")
+
+if verbose:
+    print "md5sum dom0:"
+    print dom0_md5sum_match.group()
+    print "md5sum domU:"
+    print domU_md5sum_match.group()
+
+if dom0_md5sum_match.group() != domU_md5sum_match.group():
+    FAIL("MISCOMPARE: data read in domU did not match data provided by domO.")
diff -r e74246451527 -r f54d38cea8ac 
tools/xm-test/tests/block-integrity/Makefile.am
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xm-test/tests/block-integrity/Makefile.am   Tue May 30 14:30:34 
2006 -0500
@@ -0,0 +1,21 @@
+
+SUBDIRS =
+
+TESTS = 01_block_device_read_verify.test
+
+XFAIL_TESTS = 
+
+EXTRA_DIST = $(TESTS) $(XFAIL_TESTS)
+
+TESTS_ENVIRONMENT=@TENV@
+
+%.test: %.py
+       cp $< $@
+       chmod +x $@
+
+clean-local: am_config_clean-local
+
+am_config_clean-local:
+       rm -f *test
+       rm -f *log
+       rm -f *~
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/tools/sparse-merge
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/tools/sparse-merge  Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,144 @@
+#!/bin/bash
+# Generate a patch for each of the ia64 files in the linux-2.6-xen-sparse tree
+
+# Path to mercurial tree of upstream Linux
+# WARNING: This will do an 'hg up -C' on the upstream Linux tree, you
+#          will lose data if there's anything there you care about.
+: ${LINUXPATH:=/tmp/linux-2.6}
+# Tag of current base upstream image for Xen files
+: ${OLDTAG:=v$(awk '/^LINUX_VER/{print $NF}' buildconfigs/mk.linux-2.6-xen)}
+# Tag of new upstream base to go to
+: ${NEWTAG:=v$(wget -O- -o/dev/null http://kernel.org/kdist/finger_banner \
+    | awk '/latest stable/{print $NF}')}
+# Restrict merge to specific arch (set to . for all)
+: ${ARCH:=ia64}
+
+SPARSEDIR=linux-2.6-xen-sparse
+WD=$PWD
+
+if [ ! -d $SPARSEDIR ]; then
+       echo "Can't find $SPARSEDIR directory."
+       exit
+fi
+
+# Check for modified files in the sparse tree before starting
+if hg st $SPARSEDIR | head | grep .; then
+    echo
+    echo "$SPARSEDIR contains modifications, please clean it up first"
+    exit
+fi
+
+# We want the linux upstream tree to be at the OLDTAG to get the OLDTAG-Xen 
diff.
+# Save current revision to restore when done
+cd $LINUXPATH || exit 1
+OLDCSET=$(hg parents | awk '/^changeset:/{print($2)}' | cut -f 1 -d :)
+for t in $OLDTAG $NEWTAG; do
+    if ! hg tags | cut -f1 -d' ' | grep -Fx $t; then
+       echo "Tag $t not found, ketching up"
+       hg up -C ${t%.*} || exit 1
+       ketchup ${t#v} || exit 1
+       hg addremove
+       hg ci -m $t
+       hg tag -l $t
+    fi
+done
+hg up -C $OLDTAG || exit 1
+
+cd $WD
+for i in $(hg manifest | awk '{print($3)}' | grep $SPARSEDIR | grep "$ARCH"); 
do
+       cd $WD
+
+       FILENAME=$(basename $i)
+       DIRNAME=$(dirname $i)
+       DIFFPATH=$(echo $i | sed -e "s,^$SPARSEDIR,$LINUXPATH,")
+
+       if [ ! -d $DIRNAME ]; then
+               echo "Hmm, something bad happened parsing directory name: $i"
+               continue
+       fi
+
+       if [ ! -e $DIFFPATH ]; then
+               continue
+       fi
+
+       echo -n "$i ... "
+
+       cd $DIRNAME
+       XENDIR=$(pwd)
+
+       ORIGPATH=$(echo $i | sed -e "s/^$SPARSEDIR/./")
+       APATH=$(echo $i | sed -e "s/^$SPARSEDIR/a/")
+       BPATH=$(echo $i | sed -e "s/^$SPARSEDIR/b/")
+       cd $LINUXPATH
+       hg diff -r $OLDTAG -r $NEWTAG $ORIGPATH | \
+           sed -e "s,^--- $APATH,--- $FILENAME," \
+               -e "s,^+++ $BPATH,+++ $FILENAME," \
+           > $XENDIR/$FILENAME-$OLDTAG-$NEWTAG.diff
+       cd $XENDIR
+
+       # Do we have a diff file?  Did anything change?
+       if [ ! -s $FILENAME-$OLDTAG-$NEWTAG.diff ]; then
+               echo "SUCCESS (Upstream unchanged)"
+               continue
+       fi
+
+       if ! patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff > /dev/null 2>&1; then
+               # It failed, how badly?
+               if [ ! -e ${FILENAME}.rej ]; then
+                       echo "ERROR, Hmm, no .rej file, but diff failed, fix 
manually"
+                       continue
+               fi
+               TONEWREJ=$(wc -l ${FILENAME}.rej | \
+                          awk '{print($1)}')
+               hg st $FILENAME | grep -q . && hg revert $FILENAME
+               rm -f ${FILENAME}.rej ${FILENAME}.orig
+               diff -uN $DIFFPATH $FILENAME | \
+                   sed -e "s,^--- $DIFFPATH,--- $FILENAME," \
+                   > $FILENAME-$OLDTAG-Xen.diff
+
+               if [ ! -e $FILENAME-$OLDTAG-Xen.diff ]; then
+                       echo "ERROR, failed to create patch file"
+                       continue
+               fi
+
+               if ! patch -R -i $FILENAME-$OLDTAG-Xen.diff > /dev/null 2>&1; 
then
+                       echo "ERROR, reverting Xen changes failed"
+                       hg revert $FILENAME
+                       continue
+               fi
+
+               if ! patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff > /dev/null 
2>&1; then
+                       echo "ERROR, new upstream patch failed on reverted file"
+                       hg revert $FILENAME
+                       continue
+               fi
+
+               if ! patch -f -i $FILENAME-$OLDTAG-Xen.diff > /dev/null 2>&1; 
then
+                       if [ ! -e ${FILENAME}.rej ]; then
+                               echo "ERROR, Hmm, no .rej file, but diff 
failed, fix manually"
+                               continue
+                       fi
+                       TOXENREJ=$(wc -l ${FILENAME}.rej | \
+                                  awk '{print($1)}')
+
+                       if  [ $TOXENREJ -gt $TONEWREJ ]; then
+                               hg revert $FILENAME
+                               rm -f ${FILENAME}.rej ${FILENAME}.orig
+                               patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff > 
/dev/null 2>&1
+                               echo "MANUAL MERGE REQUIRED (Upstream reject)"
+                       else
+                               echo "MANUAL MERGE REQUIRED (Xen reject)"
+                       fi
+
+               else
+                       rm -f ${FILENAME}.rej ${FILENAME}.orig
+                       echo "SUCCESS (Re-applied Xen patch)"
+               fi
+       else
+                       rm -f ${FILENAME}.rej ${FILENAME}.orig
+                       echo "SUCCESS (Upstream applied)"
+       fi
+done
+find $SPARSEDIR -name \*.diff -empty | xargs -r rm -f
+cd $LINUXPATH
+hg up -C $OLDCSET
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/efi_emul.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/xen/efi_emul.c      Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,180 @@
+/*
+ * efi_emul.c:
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/compile.h>
+#include <asm/pgalloc.h>
+#include <asm/vcpu.h>
+#include <asm/dom_fw.h>
+#include <public/sched.h>
+
+extern unsigned long translate_domain_mpaddr(unsigned long);
+extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr);
+
+// given a current domain (virtual or metaphysical) address, return the 
virtual address
+static unsigned long
+efi_translate_domain_addr(unsigned long domain_addr, IA64FAULT *fault)
+{
+       struct vcpu *v = current;
+       unsigned long mpaddr = domain_addr;
+       *fault = IA64_NO_FAULT;
+
+       if (v->domain->arch.efi_virt_mode) {
+               *fault = vcpu_tpa(v, domain_addr, &mpaddr);
+               if (*fault != IA64_NO_FAULT) return 0;
+       }
+
+       return ((unsigned long) __va(translate_domain_mpaddr(mpaddr)));
+}
+
+static efi_status_t
+efi_emulate_get_time(
+       unsigned long tv_addr, unsigned long tc_addr,
+       IA64FAULT *fault)
+{
+       unsigned long tv = 0, tc = 0;
+       efi_status_t status;
+
+       //printf("efi_get_time(%016lx,%016lx) called\n", tv_addr, tc_addr);
+       tv = efi_translate_domain_addr(tv_addr, fault);
+       if (*fault != IA64_NO_FAULT) return 0;
+       if (tc_addr) {
+               tc = efi_translate_domain_addr(tc_addr, fault);
+               if (*fault != IA64_NO_FAULT) return 0;
+       }
+       //printf("efi_get_time(%016lx,%016lx) translated to xen virtual 
address\n", tv, tc);
+       status = (*efi.get_time)((efi_time_t *) tv, (efi_time_cap_t *) tc);
+       //printf("efi_get_time returns %lx\n", status);
+       return status;
+}
+
+static efi_status_t
+efi_emulate_set_virtual_address_map(
+       unsigned long memory_map_size, unsigned long descriptor_size,
+       u32 descriptor_version, efi_memory_desc_t *virtual_map)
+{
+       void *efi_map_start, *efi_map_end, *p;
+       efi_memory_desc_t entry, *md = &entry;
+       u64 efi_desc_size;
+
+       unsigned long *vfn;
+       struct domain *d = current->domain;
+       efi_runtime_services_t *efi_runtime = d->arch.efi_runtime;
+
+       if (descriptor_version != EFI_MEMDESC_VERSION) {
+               printf ("efi_emulate_set_virtual_address_map: memory descriptor 
version unmatched\n");
+               return EFI_INVALID_PARAMETER;
+       }
+
+       if (descriptor_size != sizeof(efi_memory_desc_t)) {
+               printf ("efi_emulate_set_virtual_address_map: memory descriptor 
size unmatched\n");
+               return EFI_INVALID_PARAMETER;
+       }
+
+       if (d->arch.efi_virt_mode) return EFI_UNSUPPORTED;
+
+       efi_map_start = virtual_map;
+       efi_map_end   = efi_map_start + memory_map_size;
+       efi_desc_size = sizeof(efi_memory_desc_t);
+
+       for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+               if (copy_from_user(&entry, p, sizeof(efi_memory_desc_t))) {
+                       printf ("efi_emulate_set_virtual_address_map: 
copy_from_user() fault. addr=0x%p\n", p);
+                       return EFI_UNSUPPORTED;
+               }
+
+               /* skip over non-PAL_CODE memory descriptors; EFI_RUNTIME is 
included in PAL_CODE. */
+                if (md->type != EFI_PAL_CODE)
+                        continue;
+
+#define EFI_HYPERCALL_PATCH_TO_VIRT(tgt,call) \
+       do { \
+               vfn = (unsigned long *) domain_mpa_to_imva(d, tgt); \
+               *vfn++ = FW_HYPERCALL_##call##_INDEX * 16UL + md->virt_addr; \
+               *vfn++ = 0; \
+       } while (0)
+
+               EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_time,EFI_GET_TIME);
+               EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_time,EFI_SET_TIME);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_wakeup_time,EFI_GET_WAKEUP_TIME);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_wakeup_time,EFI_SET_WAKEUP_TIME);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_virtual_address_map,EFI_SET_VIRTUAL_ADDRESS_MAP);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_variable,EFI_GET_VARIABLE);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_next_variable,EFI_GET_NEXT_VARIABLE);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_variable,EFI_SET_VARIABLE);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_next_high_mono_count,EFI_GET_NEXT_HIGH_MONO_COUNT);
+               
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->reset_system,EFI_RESET_SYSTEM);
+       }
+
+       /* The virtual address map has been applied. */
+       d->arch.efi_virt_mode = 1;
+
+       return EFI_SUCCESS;
+}
+
+efi_status_t
+efi_emulator (struct pt_regs *regs, IA64FAULT *fault)
+{
+       struct vcpu *v = current;
+       efi_status_t status;
+
+       *fault = IA64_NO_FAULT;
+
+       switch (regs->r2) {
+           case FW_HYPERCALL_EFI_RESET_SYSTEM:
+               printf("efi.reset_system called ");
+               if (current->domain == dom0) {
+                       printf("(by dom0)\n ");
+                       (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
+               }
+               else
+                       domain_shutdown (current->domain, SHUTDOWN_reboot);
+               status = EFI_UNSUPPORTED;
+               break;
+           case FW_HYPERCALL_EFI_GET_TIME:
+               status = efi_emulate_get_time (
+                               vcpu_get_gr(v,32),
+                               vcpu_get_gr(v,33),
+                               fault);
+               break;
+           case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
+               status = efi_emulate_set_virtual_address_map (
+                               vcpu_get_gr(v,32),
+                               vcpu_get_gr(v,33),
+                               (u32) vcpu_get_gr(v,34),
+                               (efi_memory_desc_t *) vcpu_get_gr(v,35));
+               break;
+           case FW_HYPERCALL_EFI_SET_TIME:
+           case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
+           case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
+               // FIXME: need fixes in efi.h from 2.6.9
+           case FW_HYPERCALL_EFI_GET_VARIABLE:
+               // FIXME: need fixes in efi.h from 2.6.9
+           case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
+           case FW_HYPERCALL_EFI_SET_VARIABLE:
+           case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
+               // FIXME: need fixes in efi.h from 2.6.9
+               status = EFI_UNSUPPORTED;
+               break;
+           default:
+               printf("unknown ia64 fw hypercall %lx\n", regs->r2);
+               status = EFI_UNSUPPORTED;
+       }
+
+       return status;
+}
diff -r e74246451527 -r f54d38cea8ac xen/common/sched_credit.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/common/sched_credit.c Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,1233 @@
+/****************************************************************************
+ * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ *        File: common/csched_credit.c
+ *      Author: Emmanuel Ackaouy
+ *
+ * Description: Credit-based SMP CPU scheduler
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+
+
+/*
+ * CSCHED_STATS
+ *
+ * Manage very basic counters and stats.
+ *
+ * Useful for debugging live systems. The stats are displayed
+ * with runq dumps ('r' on the Xen console).
+ */
+#define CSCHED_STATS
+
+
+/*
+ * Basic constants
+ */
+#define CSCHED_TICK             10      /* milliseconds */
+#define CSCHED_TSLICE           30      /* milliseconds */
+#define CSCHED_ACCT_NTICKS      3
+#define CSCHED_ACCT_PERIOD      (CSCHED_ACCT_NTICKS * CSCHED_TICK)
+#define CSCHED_DEFAULT_WEIGHT   256
+
+
+/*
+ * Priorities
+ */
+#define CSCHED_PRI_TS_UNDER     -1      /* time-share w/ credits */
+#define CSCHED_PRI_TS_OVER      -2      /* time-share w/o credits */
+#define CSCHED_PRI_IDLE         -64     /* idle */
+#define CSCHED_PRI_TS_PARKED    -65     /* time-share w/ capped credits */
+
+
+/*
+ * Useful macros
+ */
+#define CSCHED_PCPU(_c)     ((struct csched_pcpu 
*)schedule_data[_c].sched_priv)
+#define CSCHED_VCPU(_vcpu)  ((struct csched_vcpu *) (_vcpu)->sched_priv)
+#define CSCHED_DOM(_dom)    ((struct csched_dom *) (_dom)->sched_priv)
+#define RUNQ(_cpu)          (&(CSCHED_PCPU(_cpu)->runq))
+
+
+/*
+ * Stats
+ */
+#ifdef CSCHED_STATS
+
+#define CSCHED_STAT(_X)         (csched_priv.stats._X)
+#define CSCHED_STAT_DEFINE(_X)  uint32_t _X;
+#define CSCHED_STAT_PRINTK(_X)                                  \
+    do                                                          \
+    {                                                           \
+        printk("\t%-30s = %u\n", #_X, CSCHED_STAT(_X));  \
+    } while ( 0 );
+
+#define CSCHED_STATS_EXPAND_SCHED(_MACRO)   \
+    _MACRO(vcpu_alloc)                      \
+    _MACRO(vcpu_add)                        \
+    _MACRO(vcpu_sleep)                      \
+    _MACRO(vcpu_wake_running)               \
+    _MACRO(vcpu_wake_onrunq)                \
+    _MACRO(vcpu_wake_runnable)              \
+    _MACRO(vcpu_wake_not_runnable)          \
+    _MACRO(dom_free)                        \
+    _MACRO(schedule)                        \
+    _MACRO(tickle_local_idler)              \
+    _MACRO(tickle_local_over)               \
+    _MACRO(tickle_local_under)              \
+    _MACRO(tickle_local_other)              \
+    _MACRO(acct_run)                        \
+    _MACRO(acct_no_work)                    \
+    _MACRO(acct_balance)                    \
+    _MACRO(acct_reorder)                    \
+    _MACRO(acct_min_credit)                 \
+    _MACRO(acct_vcpu_active)                \
+    _MACRO(acct_vcpu_idle)                  \
+    _MACRO(acct_vcpu_credit_min)
+
+#define CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO)    \
+    _MACRO(vcpu_migrate)                                \
+    _MACRO(load_balance_idle)                           \
+    _MACRO(load_balance_over)                           \
+    _MACRO(load_balance_other)                          \
+    _MACRO(steal_trylock_failed)                        \
+    _MACRO(steal_peer_down)                             \
+    _MACRO(steal_peer_idle)                             \
+    _MACRO(steal_peer_running)                          \
+    _MACRO(steal_peer_pinned)                           \
+    _MACRO(tickle_idlers_none)                          \
+    _MACRO(tickle_idlers_some)
+
+#ifndef NDEBUG
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)  \
+    _MACRO(vcpu_check)
+#else
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+#endif
+
+#define CSCHED_STATS_EXPAND(_MACRO)                 \
+    CSCHED_STATS_EXPAND_SCHED(_MACRO)               \
+    CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO)    \
+    CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+
+#define CSCHED_STATS_RESET()                                        \
+    do                                                              \
+    {                                                               \
+        memset(&csched_priv.stats, 0, sizeof(csched_priv.stats));   \
+    } while ( 0 )
+
+#define CSCHED_STATS_DEFINE()                   \
+    struct                                      \
+    {                                           \
+        CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
+    } stats
+
+#define CSCHED_STATS_PRINTK()                   \
+    do                                          \
+    {                                           \
+        printk("stats:\n");                     \
+        CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
+    } while ( 0 )
+
+#define CSCHED_STAT_CRANK(_X)   (CSCHED_STAT(_X)++)
+
+#else /* CSCHED_STATS */
+
+#define CSCHED_STATS_RESET()    do {} while ( 0 )
+#define CSCHED_STATS_DEFINE()   do {} while ( 0 )
+#define CSCHED_STATS_PRINTK()   do {} while ( 0 )
+#define CSCHED_STAT_CRANK(_X)   do {} while ( 0 )
+
+#endif /* CSCHED_STATS */
+
+
+/*
+ * Physical CPU
+ */
+struct csched_pcpu {
+    struct list_head runq;
+    uint32_t runq_sort_last;
+};
+
+/*
+ * Virtual CPU
+ */
+struct csched_vcpu {
+    struct list_head runq_elem;
+    struct list_head active_vcpu_elem;
+    struct csched_dom *sdom;
+    struct vcpu *vcpu;
+    atomic_t credit;
+    int credit_last;
+    uint32_t credit_incr;
+    uint32_t state_active;
+    uint32_t state_idle;
+    int16_t pri;
+};
+
+/*
+ * Domain
+ */
+struct csched_dom {
+    struct list_head active_vcpu;
+    struct list_head active_sdom_elem;
+    struct domain *dom;
+    uint16_t active_vcpu_count;
+    uint16_t weight;
+    uint16_t cap;
+};
+
+/*
+ * System-wide private data
+ */
+struct csched_private {
+    spinlock_t lock;
+    struct list_head active_sdom;
+    uint32_t ncpus;
+    unsigned int master;
+    cpumask_t idlers;
+    uint32_t weight;
+    uint32_t credit;
+    int credit_balance;
+    uint32_t runq_sort;
+    CSCHED_STATS_DEFINE();
+};
+
+
+/*
+ * Global variables
+ */
+static struct csched_private csched_priv;
+
+
+
+static inline int
+__vcpu_on_runq(struct csched_vcpu *svc)
+{
+    return !list_empty(&svc->runq_elem);
+}
+
+static inline struct csched_vcpu *
+__runq_elem(struct list_head *elem)
+{
+    return list_entry(elem, struct csched_vcpu, runq_elem);
+}
+
+static inline void
+__runq_insert(unsigned int cpu, struct csched_vcpu *svc)
+{
+    const struct list_head * const runq = RUNQ(cpu);
+    struct list_head *iter;
+
+    BUG_ON( __vcpu_on_runq(svc) );
+    BUG_ON( cpu != svc->vcpu->processor );
+
+    list_for_each( iter, runq )
+    {
+        const struct csched_vcpu * const iter_svc = __runq_elem(iter);
+        if ( svc->pri > iter_svc->pri )
+            break;
+    }
+
+    list_add_tail(&svc->runq_elem, iter);
+}
+
+static inline void
+__runq_remove(struct csched_vcpu *svc)
+{
+    BUG_ON( !__vcpu_on_runq(svc) );
+    list_del_init(&svc->runq_elem);
+}
+
+static inline void
+__runq_tickle(unsigned int cpu, struct csched_vcpu *new)
+{
+    struct csched_vcpu * const cur = CSCHED_VCPU(schedule_data[cpu].curr);
+    cpumask_t mask;
+
+    ASSERT(cur);
+    cpus_clear(mask);
+
+    /* If strictly higher priority than current VCPU, signal the CPU */
+    if ( new->pri > cur->pri )
+    {
+        if ( cur->pri == CSCHED_PRI_IDLE )
+            CSCHED_STAT_CRANK(tickle_local_idler);
+        else if ( cur->pri == CSCHED_PRI_TS_OVER )
+            CSCHED_STAT_CRANK(tickle_local_over);
+        else if ( cur->pri == CSCHED_PRI_TS_UNDER )
+            CSCHED_STAT_CRANK(tickle_local_under);
+        else
+            CSCHED_STAT_CRANK(tickle_local_other);
+
+        cpu_set(cpu, mask);
+    }
+
+    /*
+     * If this CPU has at least two runnable VCPUs, we tickle any idlers to
+     * let them know there is runnable work in the system...
+     */
+    if ( cur->pri > CSCHED_PRI_IDLE )
+    {
+        if ( cpus_empty(csched_priv.idlers) )
+        {
+            CSCHED_STAT_CRANK(tickle_idlers_none);
+        }
+        else
+        {
+            CSCHED_STAT_CRANK(tickle_idlers_some);
+            cpus_or(mask, mask, csched_priv.idlers);
+        }
+    }
+
+    /* Send scheduler interrupts to designated CPUs */
+    if ( !cpus_empty(mask) )
+        cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
+}
+
+static void
+csched_pcpu_init(int cpu)
+{
+    struct csched_pcpu *spc;
+    unsigned long flags;
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    /* Initialize/update system-wide config */
+    csched_priv.credit += CSCHED_ACCT_PERIOD;
+    if ( csched_priv.ncpus <= cpu )
+        csched_priv.ncpus = cpu + 1;
+    if ( csched_priv.master >= csched_priv.ncpus )
+        csched_priv.master = cpu;
+
+    /* Allocate per-PCPU info */
+    spc = xmalloc(struct csched_pcpu);
+    BUG_ON( spc == NULL );
+    INIT_LIST_HEAD(&spc->runq);
+    spc->runq_sort_last = csched_priv.runq_sort;
+    schedule_data[cpu].sched_priv = spc;
+
+    /* Start off idling... */
+    BUG_ON( !is_idle_vcpu(schedule_data[cpu].curr) );
+    cpu_set(cpu, csched_priv.idlers);
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+}
+
+#ifndef NDEBUG
+static inline void
+__csched_vcpu_check(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    struct csched_dom * const sdom = svc->sdom;
+
+    BUG_ON( svc->vcpu != vc );
+    BUG_ON( sdom != CSCHED_DOM(vc->domain) );
+    if ( sdom )
+    {
+        BUG_ON( is_idle_vcpu(vc) );
+        BUG_ON( sdom->dom != vc->domain );
+    }
+    else
+    {
+        BUG_ON( !is_idle_vcpu(vc) );
+    }
+
+    CSCHED_STAT_CRANK(vcpu_check);
+}
+#define CSCHED_VCPU_CHECK(_vc)  (__csched_vcpu_check(_vc))
+#else
+#define CSCHED_VCPU_CHECK(_vc)
+#endif
+
+static inline int
+__csched_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
+{
+    /*
+     * Don't pick up work that's in the peer's scheduling tail. Also only pick
+     * up work that's allowed to run on our CPU.
+     */
+    if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) )
+    {
+        CSCHED_STAT_CRANK(steal_peer_running);
+        return 0;
+    }
+
+    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
+    {
+        CSCHED_STAT_CRANK(steal_peer_pinned);
+        return 0;
+    }
+
+    return 1;
+}
+
+static void
+csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec)
+{
+    struct csched_dom * const sdom = svc->sdom;
+    unsigned long flags;
+
+    /* Update credits */
+    atomic_sub(credit_dec, &svc->credit);
+
+    /* Put this VCPU and domain back on the active list if it was idling */
+    if ( list_empty(&svc->active_vcpu_elem) )
+    {
+        spin_lock_irqsave(&csched_priv.lock, flags);
+
+        if ( list_empty(&svc->active_vcpu_elem) )
+        {
+            CSCHED_STAT_CRANK(acct_vcpu_active);
+            svc->state_active++;
+
+            sdom->active_vcpu_count++;
+            list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
+            if ( list_empty(&sdom->active_sdom_elem) )
+            {
+                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+                csched_priv.weight += sdom->weight;
+            }
+        }
+
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+    }
+}
+
+static inline void
+__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+
+    BUG_ON( list_empty(&svc->active_vcpu_elem) );
+
+    CSCHED_STAT_CRANK(acct_vcpu_idle);
+    svc->state_idle++;
+
+    sdom->active_vcpu_count--;
+    list_del_init(&svc->active_vcpu_elem);
+    if ( list_empty(&sdom->active_vcpu) )
+    {
+        BUG_ON( csched_priv.weight < sdom->weight );
+        list_del_init(&sdom->active_sdom_elem);
+        csched_priv.weight -= sdom->weight;
+    }
+
+    atomic_set(&svc->credit, 0);
+}
+
+static int
+csched_vcpu_alloc(struct vcpu *vc)
+{
+    struct domain * const dom = vc->domain;
+    struct csched_dom *sdom;
+    struct csched_vcpu *svc;
+    int16_t pri;
+
+    CSCHED_STAT_CRANK(vcpu_alloc);
+
+    /* Allocate, if appropriate, per-domain info */
+    if ( is_idle_vcpu(vc) )
+    {
+        sdom = NULL;
+        pri = CSCHED_PRI_IDLE;
+    }
+    else if ( CSCHED_DOM(dom) )
+    {
+        sdom = CSCHED_DOM(dom);
+        pri = CSCHED_PRI_TS_UNDER;
+    }
+    else 
+    {
+        sdom = xmalloc(struct csched_dom);
+        if ( !sdom )
+            return -1;
+
+        /* Initialize credit and weight */
+        INIT_LIST_HEAD(&sdom->active_vcpu);
+        sdom->active_vcpu_count = 0;
+        INIT_LIST_HEAD(&sdom->active_sdom_elem);
+        sdom->dom = dom;
+        sdom->weight = CSCHED_DEFAULT_WEIGHT;
+        sdom->cap = 0U;
+        dom->sched_priv = sdom;
+        pri = CSCHED_PRI_TS_UNDER;
+    }
+
+    /* Allocate per-VCPU info */
+    svc = xmalloc(struct csched_vcpu);
+    if ( !svc )
+        return -1;
+
+    INIT_LIST_HEAD(&svc->runq_elem);
+    INIT_LIST_HEAD(&svc->active_vcpu_elem);
+    svc->sdom = sdom;
+    svc->vcpu = vc;
+    atomic_set(&svc->credit, 0);
+    svc->credit_last = 0;
+    svc->credit_incr = 0U;
+    svc->state_active = 0U;
+    svc->state_idle = 0U;
+    svc->pri = pri;
+    vc->sched_priv = svc;
+
+    CSCHED_VCPU_CHECK(vc);
+
+    /* Attach fair-share VCPUs to the accounting list */
+    if ( likely(sdom != NULL) )
+        csched_vcpu_acct(svc, 0);
+
+    return 0;
+}
+
+static void
+csched_vcpu_add(struct vcpu *vc) 
+{
+    CSCHED_STAT_CRANK(vcpu_add);
+
+    /* Allocate per-PCPU info */
+    if ( unlikely(!CSCHED_PCPU(vc->processor)) )
+        csched_pcpu_init(vc->processor);
+
+    CSCHED_VCPU_CHECK(vc);
+}
+
+static void
+csched_vcpu_free(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    struct csched_dom * const sdom = svc->sdom;
+    unsigned long flags;
+
+    BUG_ON( sdom == NULL );
+    BUG_ON( !list_empty(&svc->runq_elem) );
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    if ( !list_empty(&svc->active_vcpu_elem) )
+        __csched_vcpu_acct_idle_locked(svc);
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+    xfree(svc);
+}
+
+static void
+csched_vcpu_sleep(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+
+    CSCHED_STAT_CRANK(vcpu_sleep);
+
+    BUG_ON( is_idle_vcpu(vc) );
+
+    if ( schedule_data[vc->processor].curr == vc )
+        cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
+    else if ( __vcpu_on_runq(svc) )
+        __runq_remove(svc);
+}
+
+static void
+csched_vcpu_wake(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    const unsigned int cpu = vc->processor;
+
+    BUG_ON( is_idle_vcpu(vc) );
+
+    if ( unlikely(schedule_data[cpu].curr == vc) )
+    {
+        CSCHED_STAT_CRANK(vcpu_wake_running);
+        return;
+    }
+    if ( unlikely(__vcpu_on_runq(svc)) )
+    {
+        CSCHED_STAT_CRANK(vcpu_wake_onrunq);
+        return;
+    }
+
+    if ( likely(vcpu_runnable(vc)) )
+        CSCHED_STAT_CRANK(vcpu_wake_runnable);
+    else
+        CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
+
+    /* Put the VCPU on the runq and tickle CPUs */
+    __runq_insert(cpu, svc);
+    __runq_tickle(cpu, svc);
+}
+
+static int
+csched_vcpu_set_affinity(struct vcpu *vc, cpumask_t *affinity)
+{
+    unsigned long flags;
+    int lcpu;
+
+    if ( vc == current )
+    {
+        /* No locking needed but also can't move on the spot... */
+        if ( !cpu_isset(vc->processor, *affinity) )
+            return -EBUSY;
+
+        vc->cpu_affinity = *affinity;
+    }
+    else
+    {
+        /* Pause, modify, and unpause. */
+        vcpu_pause(vc);
+
+        vc->cpu_affinity = *affinity;
+        if ( !cpu_isset(vc->processor, vc->cpu_affinity) )
+        {
+            /*
+             * We must grab the scheduler lock for the CPU currently owning
+             * this VCPU before changing its ownership.
+             */
+            vcpu_schedule_lock_irqsave(vc, flags);
+            lcpu = vc->processor;
+
+            vc->processor = first_cpu(vc->cpu_affinity);
+
+            spin_unlock_irqrestore(&schedule_data[lcpu].schedule_lock, flags);
+        }
+
+        vcpu_unpause(vc);
+    }
+
+    return 0;
+}
+
+static int
+csched_dom_cntl(
+    struct domain *d,
+    struct sched_adjdom_cmd *cmd)
+{
+    struct csched_dom * const sdom = CSCHED_DOM(d);
+    unsigned long flags;
+
+    if ( cmd->direction == SCHED_INFO_GET )
+    {
+        cmd->u.credit.weight = sdom->weight;
+        cmd->u.credit.cap = sdom->cap;
+    }
+    else
+    {
+        ASSERT( cmd->direction == SCHED_INFO_PUT );
+
+        spin_lock_irqsave(&csched_priv.lock, flags);
+
+        if ( cmd->u.credit.weight != 0 )
+        {
+            csched_priv.weight -= sdom->weight;
+            sdom->weight = cmd->u.credit.weight;
+            csched_priv.weight += sdom->weight;
+        }
+
+        if ( cmd->u.credit.cap != (uint16_t)~0U )
+            sdom->cap = cmd->u.credit.cap;
+
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+    }
+
+    return 0;
+}
+
+static void
+csched_dom_free(struct domain *dom)
+{
+    struct csched_dom * const sdom = CSCHED_DOM(dom);
+    int i;
+
+    CSCHED_STAT_CRANK(dom_free);
+
+    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+    {
+        if ( dom->vcpu[i] )
+            csched_vcpu_free(dom->vcpu[i]);
+    }
+
+    xfree(sdom);
+}
+
+/*
+ * This is a O(n) optimized sort of the runq.
+ *
+ * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
+ * through the runq and move up any UNDERs that are preceded by OVERS. We
+ * remember the last UNDER to make the move up operation O(1).
+ */
+static void
+csched_runq_sort(unsigned int cpu)
+{
+    struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
+    struct list_head *runq, *elem, *next, *last_under;
+    struct csched_vcpu *svc_elem;
+    unsigned long flags;
+    int sort_epoch;
+
+    sort_epoch = csched_priv.runq_sort;
+    if ( sort_epoch == spc->runq_sort_last )
+        return;
+
+    spc->runq_sort_last = sort_epoch;
+
+    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
+
+    runq = &spc->runq;
+    elem = runq->next;
+    last_under = runq;
+
+    while ( elem != runq )
+    {
+        next = elem->next;
+        svc_elem = __runq_elem(elem);
+
+        if ( svc_elem->pri == CSCHED_PRI_TS_UNDER )
+        {
+            /* does elem need to move up the runq? */
+            if ( elem->prev != last_under )
+            {
+                list_del(elem);
+                list_add(elem, last_under);
+            }
+            last_under = elem;
+        }
+
+        elem = next;
+    }
+
+    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
+}
+
+static void
+csched_acct(void)
+{
+    unsigned long flags;
+    struct list_head *iter_vcpu, *next_vcpu;
+    struct list_head *iter_sdom, *next_sdom;
+    struct csched_vcpu *svc;
+    struct csched_dom *sdom;
+    uint32_t credit_total;
+    uint32_t weight_total;
+    uint32_t weight_left;
+    uint32_t credit_fair;
+    uint32_t credit_peak;
+    int credit_balance;
+    int credit_xtra;
+    int credit;
+
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    weight_total = csched_priv.weight;
+    credit_total = csched_priv.credit;
+
+    /* Converge balance towards 0 when it drops negative */
+    if ( csched_priv.credit_balance < 0 )
+    {
+        credit_total -= csched_priv.credit_balance;
+        CSCHED_STAT_CRANK(acct_balance);
+    }
+
+    if ( unlikely(weight_total == 0) )
+    {
+        csched_priv.credit_balance = 0;
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+        CSCHED_STAT_CRANK(acct_no_work);
+        return;
+    }
+
+    CSCHED_STAT_CRANK(acct_run);
+
+    weight_left = weight_total;
+    credit_balance = 0;
+    credit_xtra = 0;
+
+    list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
+    {
+        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+        BUG_ON( is_idle_domain(sdom->dom) );
+        BUG_ON( sdom->active_vcpu_count == 0 );
+        BUG_ON( sdom->weight == 0 );
+        BUG_ON( sdom->weight > weight_left );
+
+        weight_left -= sdom->weight;
+
+        /*
+         * A domain's fair share is computed using its weight in competition
+         * with that of all other active domains.
+         *
+         * At most, a domain can use credits to run all its active VCPUs
+         * for one full accounting period. We allow a domain to earn more
+         * only when the system-wide credit balance is negative.
+         */
+        credit_peak = sdom->active_vcpu_count * CSCHED_ACCT_PERIOD;
+        if ( csched_priv.credit_balance < 0 )
+        {
+            credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
+                             (weight_total - 1)
+                           ) / weight_total;
+        }
+        if ( sdom->cap != 0U )
+        {
+            uint32_t credit_cap = ((sdom->cap * CSCHED_ACCT_PERIOD) + 99) / 
100;
+            if ( credit_cap < credit_peak )
+                credit_peak = credit_cap;
+        }
+
+        credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1)
+                      ) / weight_total;
+
+        if ( credit_fair < credit_peak )
+        {
+            credit_xtra = 1;
+        }
+        else
+        {
+            if ( weight_left != 0U )
+            {
+                /* Give other domains a chance at unused credits */
+                credit_total += ( ( ( credit_fair - credit_peak
+                                    ) * weight_total
+                                  ) + ( weight_left - 1 )
+                                ) / weight_left;
+            }
+
+            if ( credit_xtra )
+            {
+                /*
+                 * Lazily keep domains with extra credits at the head of
+                 * the queue to give others a chance at them in future
+                 * accounting periods.
+                 */
+                CSCHED_STAT_CRANK(acct_reorder);
+                list_del(&sdom->active_sdom_elem);
+                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+            }
+
+            credit_fair = credit_peak;
+        }
+
+        /* Compute fair share per VCPU */
+        credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
+                      ) / sdom->active_vcpu_count;
+
+
+        list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
+        {
+            svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
+            BUG_ON( sdom != svc->sdom );
+
+            /* Increment credit */
+            atomic_add(credit_fair, &svc->credit);
+            credit = atomic_read(&svc->credit);
+
+            /*
+             * Recompute priority or, if VCPU is idling, remove it from
+             * the active list.
+             */
+            if ( credit < 0 )
+            {
+                if ( sdom->cap == 0U )
+                    svc->pri = CSCHED_PRI_TS_OVER;
+                else
+                    svc->pri = CSCHED_PRI_TS_PARKED;
+
+                if ( credit < -CSCHED_TSLICE )
+                {
+                    CSCHED_STAT_CRANK(acct_min_credit);
+                    credit = -CSCHED_TSLICE;
+                    atomic_set(&svc->credit, credit);
+                }
+            }
+            else
+            {
+                svc->pri = CSCHED_PRI_TS_UNDER;
+
+                if ( credit > CSCHED_TSLICE )
+                    __csched_vcpu_acct_idle_locked(svc);
+            }
+
+            svc->credit_last = credit;
+            svc->credit_incr = credit_fair;
+            credit_balance += credit;
+        }
+    }
+
+    csched_priv.credit_balance = credit_balance;
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+    /* Inform each CPU that its runq needs to be sorted */
+    csched_priv.runq_sort++;
+}
+
+static void
+csched_tick(unsigned int cpu)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(current);
+    struct csched_dom * const sdom = svc->sdom;
+
+    /*
+     * Accounting for running VCPU
+     *
+     * Note: Some VCPUs, such as the idle tasks, are not credit scheduled.
+     */
+    if ( likely(sdom != NULL) )
+    {
+        csched_vcpu_acct(svc, CSCHED_TICK);
+    }
+
+    /*
+     * Accounting duty
+     *
+     * Note: Currently, this is always done by the master boot CPU. Eventually,
+     * we could distribute or at the very least cycle the duty.
+     */
+    if ( (csched_priv.master == cpu) &&
+         (schedule_data[cpu].tick % CSCHED_ACCT_NTICKS) == 0 )
+    {
+        csched_acct();
+    }
+
+    /*
+     * Check if runq needs to be sorted
+     *
+     * Every physical CPU resorts the runq after the accounting master has
+     * modified priorities. This is a special O(n) sort and runs at most
+     * once per accounting period (currently 30 milliseconds).
+     */
+    csched_runq_sort(cpu);
+}
+
+static struct csched_vcpu *
+csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri)
+{
+    struct list_head *iter;
+    struct csched_vcpu *speer;
+    struct vcpu *vc;
+
+    list_for_each( iter, &spc->runq )
+    {
+        speer = __runq_elem(iter);
+
+        /*
+         * If next available VCPU here is not of higher priority than ours,
+         * this PCPU is useless to us.
+         */
+        if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
+        {
+            CSCHED_STAT_CRANK(steal_peer_idle);
+            break;
+        }
+
+        /* Is this VCPU is runnable on our PCPU? */
+        vc = speer->vcpu;
+        BUG_ON( is_idle_vcpu(vc) );
+
+        if ( __csched_vcpu_is_stealable(cpu, vc) )
+        {
+            /* We got a candidate. Grab it! */
+            __runq_remove(speer);
+            vc->processor = cpu;
+
+            return speer;
+        }
+    }
+
+    return NULL;
+}
+
+static struct csched_vcpu *
+csched_load_balance(int cpu, struct csched_vcpu *snext)
+{
+    struct csched_pcpu *spc;
+    struct csched_vcpu *speer;
+    int peer_cpu;
+
+    if ( snext->pri == CSCHED_PRI_IDLE )
+        CSCHED_STAT_CRANK(load_balance_idle);
+    else if ( snext->pri == CSCHED_PRI_TS_OVER )
+        CSCHED_STAT_CRANK(load_balance_over);
+    else
+        CSCHED_STAT_CRANK(load_balance_other);
+
+    peer_cpu = cpu;
+    BUG_ON( peer_cpu != snext->vcpu->processor );
+
+    while ( 1 )
+    {
+        /* For each PCPU in the system starting with our neighbour... */
+        peer_cpu = (peer_cpu + 1) % csched_priv.ncpus;
+        if ( peer_cpu == cpu )
+            break;
+
+        BUG_ON( peer_cpu >= csched_priv.ncpus );
+        BUG_ON( peer_cpu == cpu );
+
+        /*
+         * Get ahold of the scheduler lock for this peer CPU.
+         *
+         * Note: We don't spin on this lock but simply try it. Spinning could
+         * cause a deadlock if the peer CPU is also load balancing and trying
+         * to lock this CPU.
+         */
+        if ( spin_trylock(&schedule_data[peer_cpu].schedule_lock) )
+        {
+
+            spc = CSCHED_PCPU(peer_cpu);
+            if ( unlikely(spc == NULL) )
+            {
+                CSCHED_STAT_CRANK(steal_peer_down);
+                speer = NULL;
+            }
+            else
+            {
+                speer = csched_runq_steal(spc, cpu, snext->pri);
+            }
+
+            spin_unlock(&schedule_data[peer_cpu].schedule_lock);
+
+            /* Got one! */
+            if ( speer )
+            {
+                CSCHED_STAT_CRANK(vcpu_migrate);
+                return speer;
+            }
+        }
+        else
+        {
+            CSCHED_STAT_CRANK(steal_trylock_failed);
+        }
+    }
+
+
+    /* Failed to find more important work */
+    __runq_remove(snext);
+    return snext;
+}
+
+/*
+ * This function is in the critical path. It is designed to be simple and
+ * fast for the common case.
+ */
+static struct task_slice
+csched_schedule(s_time_t now)
+{
+    const int cpu = smp_processor_id();
+    struct list_head * const runq = RUNQ(cpu);
+    struct csched_vcpu * const scurr = CSCHED_VCPU(current);
+    struct csched_vcpu *snext;
+    struct task_slice ret;
+
+    CSCHED_STAT_CRANK(schedule);
+    CSCHED_VCPU_CHECK(current);
+
+    /*
+     * Select next runnable local VCPU (ie top of local runq)
+     */
+    if ( vcpu_runnable(current) )
+        __runq_insert(cpu, scurr);
+    else
+        BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
+
+    snext = __runq_elem(runq->next);
+
+    /*
+     * SMP Load balance:
+     *
+     * If the next highest priority local runnable VCPU has already eaten
+     * through its credits, look on other PCPUs to see if we have more
+     * urgent work... If not, csched_load_balance() will return snext, but
+     * already removed from the runq.
+     */
+    if ( snext->pri > CSCHED_PRI_TS_OVER )
+        __runq_remove(snext);
+    else
+        snext = csched_load_balance(cpu, snext);
+
+    /*
+     * Update idlers mask if necessary. When we're idling, other CPUs
+     * will tickle us when they get extra work.
+     */
+    if ( snext->pri == CSCHED_PRI_IDLE )
+    {
+        if ( !cpu_isset(cpu, csched_priv.idlers) )
+            cpu_set(cpu, csched_priv.idlers);
+    }
+    else if ( cpu_isset(cpu, csched_priv.idlers) )
+    {
+        cpu_clear(cpu, csched_priv.idlers);
+    }
+
+    /*
+     * Return task to run next...
+     */
+    ret.time = MILLISECS(CSCHED_TSLICE);
+    ret.task = snext->vcpu;
+
+    CSCHED_VCPU_CHECK(ret.task);
+    BUG_ON( !vcpu_runnable(ret.task) );
+
+    return ret;
+}
+
+static void
+csched_dump_vcpu(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+
+    printk("[%i.%i] pri=%i cpu=%i",
+            svc->vcpu->domain->domain_id,
+            svc->vcpu->vcpu_id,
+            svc->pri,
+            svc->vcpu->processor);
+
+    if ( sdom )
+    {
+        printk(" credit=%i (%d+%u) {a=%u i=%u w=%u}",
+            atomic_read(&svc->credit),
+            svc->credit_last,
+            svc->credit_incr,
+            svc->state_active,
+            svc->state_idle,
+            sdom->weight);
+    }
+
+    printk("\n");
+}
+
+static void
+csched_dump_pcpu(int cpu)
+{
+    struct list_head *runq, *iter;
+    struct csched_pcpu *spc;
+    struct csched_vcpu *svc;
+    int loop;
+
+    spc = CSCHED_PCPU(cpu);
+    runq = &spc->runq;
+
+    printk(" tick=%lu, sort=%d\n",
+            schedule_data[cpu].tick,
+            spc->runq_sort_last);
+
+    /* current VCPU */
+    svc = CSCHED_VCPU(schedule_data[cpu].curr);
+    if ( svc )
+    {
+        printk("\trun: ");
+        csched_dump_vcpu(svc);
+    }
+
+    loop = 0;
+    list_for_each( iter, runq )
+    {
+        svc = __runq_elem(iter);
+        if ( svc )
+        {
+            printk("\t%3d: ", ++loop);
+            csched_dump_vcpu(svc);
+        }
+    }
+}
+
+static void
+csched_dump(void)
+{
+    struct list_head *iter_sdom, *iter_svc;
+    int loop;
+
+    printk("info:\n"
+           "\tncpus              = %u\n"
+           "\tmaster             = %u\n"
+           "\tcredit             = %u\n"
+           "\tcredit balance     = %d\n"
+           "\tweight             = %u\n"
+           "\trunq_sort          = %u\n"
+           "\ttick               = %dms\n"
+           "\ttslice             = %dms\n"
+           "\taccounting period  = %dms\n"
+           "\tdefault-weight     = %d\n",
+           csched_priv.ncpus,
+           csched_priv.master,
+           csched_priv.credit,
+           csched_priv.credit_balance,
+           csched_priv.weight,
+           csched_priv.runq_sort,
+           CSCHED_TICK,
+           CSCHED_TSLICE,
+           CSCHED_ACCT_PERIOD,
+           CSCHED_DEFAULT_WEIGHT);
+
+    printk("idlers: 0x%lx\n", csched_priv.idlers.bits[0]);
+
+    CSCHED_STATS_PRINTK();
+
+    printk("active vcpus:\n");
+    loop = 0;
+    list_for_each( iter_sdom, &csched_priv.active_sdom )
+    {
+        struct csched_dom *sdom;
+        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+        list_for_each( iter_svc, &sdom->active_vcpu )
+        {
+            struct csched_vcpu *svc;
+            svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
+
+            printk("\t%3d: ", ++loop);
+            csched_dump_vcpu(svc);
+        }
+    }
+}
+
+static void
+csched_init(void)
+{
+    spin_lock_init(&csched_priv.lock);
+    INIT_LIST_HEAD(&csched_priv.active_sdom);
+    csched_priv.ncpus = 0;
+    csched_priv.master = UINT_MAX;
+    cpus_clear(csched_priv.idlers);
+    csched_priv.weight = 0U;
+    csched_priv.credit = 0U;
+    csched_priv.credit_balance = 0;
+    csched_priv.runq_sort = 0U;
+    CSCHED_STATS_RESET();
+}
+
+
+struct scheduler sched_credit_def = {
+    .name           = "SMP Credit Scheduler",
+    .opt_name       = "credit",
+    .sched_id       = SCHED_CREDIT,
+
+    .alloc_task     = csched_vcpu_alloc,
+    .add_task       = csched_vcpu_add,
+    .sleep          = csched_vcpu_sleep,
+    .wake           = csched_vcpu_wake,
+    .set_affinity   = csched_vcpu_set_affinity,
+
+    .adjdom         = csched_dom_cntl,
+    .free_task      = csched_dom_free,
+
+    .tick           = csched_tick,
+    .do_schedule    = csched_schedule,
+
+    .dump_cpu_state = csched_dump_pcpu,
+    .dump_settings  = csched_dump,
+    .init           = csched_init,
+};
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/tlbflush.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-ia64/tlbflush.h   Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,37 @@
+#ifndef __FLUSHTLB_H__
+#define __FLUSHTLB_H__
+
+#include <xen/sched.h>
+
+/* TLB flushes can be either local (current vcpu only) or domain wide (on
+   all vcpus).
+   TLB flushes can be either all-flush or range only.
+
+   vTLB flushing means flushing VCPU virtual TLB + machine TLB + machine VHPT.
+*/
+
+/* Local all flush of vTLB.  */
+void vcpu_flush_vtlb_all (void);
+
+/* Local range flush of machine TLB only (not full VCPU virtual TLB!!!)  */
+void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range);
+
+/* Global all flush of vTLB  */
+void domain_flush_vtlb_all (void);
+
+/* Global range-flush of vTLB.  */
+void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range);
+
+/* Final vTLB flush on every dirty cpus.  */
+void domain_flush_destroy (struct domain *d);
+
+/* Flush v-tlb on cpus set in mask for current domain.  */
+void flush_tlb_mask(cpumask_t mask);
+
+/* Flush local machine TLB.  */
+void local_flush_tlb_all (void);
+
+#define tlbflush_current_time() 0
+#define tlbflush_filter(x,y) ((void)0)
+
+#endif
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c    Tue May 30 12:52:02 
2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-/*
- * Architecture-specific kernel symbols
- *
- * Don't put any exports here unless it's defined in an assembler file.
- * All other exports should be put directly after the definition.
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-extern int is_running_on_xen(void);
-EXPORT_SYMBOL(is_running_on_xen);
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/drivers/xen/net_driver_util.c
--- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c        Tue May 30 
12:52:02 2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/if_ether.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <xen/net_driver_util.h>
-
-
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
-{
-       char *s;
-       int i;
-       char *e;
-       char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
-       if (IS_ERR(macstr))
-               return PTR_ERR(macstr);
-       s = macstr;
-       for (i = 0; i < ETH_ALEN; i++) {
-               mac[i] = simple_strtoul(s, &e, 16);
-               if (s == e || (e[0] != ':' && e[0] != 0)) {
-                       kfree(macstr);
-                       return -ENOENT;
-               }
-               s = &e[1];
-       }
-       kfree(macstr);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(xen_net_read_mac);
diff -r e74246451527 -r f54d38cea8ac 
linux-2.6-xen-sparse/include/xen/net_driver_util.h
--- a/linux-2.6-xen-sparse/include/xen/net_driver_util.h        Tue May 30 
12:52:02 2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _ASM_XEN_NET_DRIVER_UTIL_H
-#define _ASM_XEN_NET_DRIVER_UTIL_H
-
-
-#include <xen/xenbus.h>
-
-
-/**
- * Read the 'mac' node at the given device's node in the store, and parse that
- * as colon-separated octets, placing result the given mac array.  mac must be
- * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
- * Return 0 on success, or -errno on error.
- */
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]);
-
-
-#endif /* _ASM_XEN_NET_DRIVER_UTIL_H */
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_proc.h
--- a/tools/xenstore/xenstored_proc.h   Tue May 30 12:52:02 2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-/* 
-    Copyright (C) 2005 XenSource Ltd
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-*/
-
-#ifndef _XENSTORED_PROC_H
-#define _XENSTORED_PROC_H
-
-#define XENSTORED_PROC_KVA  "/proc/xen/xsd_kva"
-#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
-
-
-#endif /* _XENSTORED_PROC_H */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/flushtlb.h
--- a/xen/include/asm-ia64/flushtlb.h   Tue May 30 12:52:02 2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-#ifndef __FLUSHTLB_H__
-#define __FLUSHTLB_H__
-
-#include <asm/tlbflush.h>
-
-#define tlbflush_current_time() 0
-#define tlbflush_filter(x,y) ((void)0)
-
-#endif
diff -r e74246451527 -r f54d38cea8ac 
xen/include/asm-ia64/linux-xen/asm/tlbflush.h
--- a/xen/include/asm-ia64/linux-xen/asm/tlbflush.h     Tue May 30 12:52:02 
2006 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,119 +0,0 @@
-#ifndef _ASM_IA64_TLBFLUSH_H
-#define _ASM_IA64_TLBFLUSH_H
-
-/*
- * Copyright (C) 2002 Hewlett-Packard Co
- *     David Mosberger-Tang <davidm@xxxxxxxxxx>
- */
-
-#include <linux/config.h>
-
-#include <linux/mm.h>
-
-#include <asm/intrinsics.h>
-#include <asm/mmu_context.h>
-#include <asm/page.h>
-
-/*
- * Now for some TLB flushing routines.  This is the kind of stuff that
- * can be very expensive, so try to avoid them whenever possible.
- */
-
-/*
- * Flush everything (kernel mapping may also have changed due to
- * vmalloc/vfree).
- */
-extern void local_flush_tlb_all (void);
-
-#ifdef CONFIG_SMP
-  extern void smp_flush_tlb_all (void);
-  extern void smp_flush_tlb_mm (struct mm_struct *mm);
-# define flush_tlb_all()       smp_flush_tlb_all()
-#else
-# define flush_tlb_all()       local_flush_tlb_all()
-#endif
-
-#ifndef XEN
-static inline void
-local_finish_flush_tlb_mm (struct mm_struct *mm)
-{
-#ifndef XEN
-// FIXME SMP?
-       if (mm == current->active_mm)
-               activate_context(mm);
-#endif
-}
-
-/*
- * Flush a specified user mapping.  This is called, e.g., as a result of 
fork() and
- * exit().  fork() ends up here because the copy-on-write mechanism needs to 
write-protect
- * the PTEs of the parent task.
- */
-static inline void
-flush_tlb_mm (struct mm_struct *mm)
-{
-       if (!mm)
-               return;
-
-#ifndef XEN
-// FIXME SMP?
-       mm->context = 0;
-#endif
-
-       if (atomic_read(&mm->mm_users) == 0)
-               return;         /* happens as a result of exit_mmap() */
-
-#ifdef CONFIG_SMP
-       smp_flush_tlb_mm(mm);
-#else
-       local_finish_flush_tlb_mm(mm);
-#endif
-}
-
-extern void flush_tlb_range (struct vm_area_struct *vma, unsigned long start, 
unsigned long end);
-
-/*
- * Page-granular tlb flush.
- */
-static inline void
-flush_tlb_page (struct vm_area_struct *vma, unsigned long addr)
-{
-#ifdef CONFIG_SMP
-       flush_tlb_range(vma, (addr & PAGE_MASK), (addr & PAGE_MASK) + 
PAGE_SIZE);
-#else
-#ifdef XEN
-       if (vma->vm_mm == current->domain->arch.mm)
-#else
-       if (vma->vm_mm == current->active_mm)
-#endif
-               ia64_ptcl(addr, (PAGE_SHIFT << 2));
-#ifndef XEN
-// FIXME SMP?
-       else
-               vma->vm_mm->context = 0;
-#endif
-#endif
-}
-
-/*
- * Flush the TLB entries mapping the virtually mapped linear page
- * table corresponding to address range [START-END).
- */
-static inline void
-flush_tlb_pgtables (struct mm_struct *mm, unsigned long start, unsigned long 
end)
-{
-       /*
-        * Deprecated.  The virtual page table is now flushed via the normal 
gather/flush
-        * interface (see tlb.h).
-        */
-}
-
-
-#define flush_tlb_kernel_range(start, end)     flush_tlb_all() /* XXX fix me */
-#endif /* XEN */
-
-#ifdef XEN
-extern void flush_tlb_mask(cpumask_t mask);
-#endif
-
-#endif /* _ASM_IA64_TLBFLUSH_H */

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel

<Prev in Thread] Current Thread [Next in Thread>