WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 15 Dec 2006 10:50:15 +0000
Delivery-date: Fri, 15 Dec 2006 02:49:52 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User awilliam@xxxxxxxxxxxx
# Node ID 4762d73ced42da37b957cd465b191b4f9c8ea3b7
# Parent  ed56ef3e9716c407351918424e2c1054a249c4f9
# Parent  35c724302bdd1339e17dad43085c841917a5dd88
merge with xen-unstable.hg
---
 xen/arch/powerpc/boot/boot32.S                     |   75 --
 xen/arch/powerpc/boot/start.S                      |   51 -
 xen/arch/powerpc/delay.c                           |   37 -
 xen/arch/powerpc/mambo.S                           |   64 --
 xen/include/asm-powerpc/misc.h                     |   33 -
 xen/include/asm-powerpc/uaccess.h                  |   38 -
 .hgignore                                          |   13 
 config/powerpc64.mk                                |    2 
 linux-2.6-xen-sparse/arch/i386/kernel/fixup.c      |   10 
 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c    |   25 
 tools/blktap/drivers/blktapctrl.c                  |   49 +
 tools/examples/external-device-migrate             |    4 
 tools/ioemu/target-i386-dm/exec-dm.c               |   11 
 tools/ioemu/vl.c                                   |    7 
 tools/libaio/src/syscall-ppc.h                     |    6 
 tools/libxc/powerpc64/Makefile                     |    4 
 tools/libxc/powerpc64/flatdevtree.c                |   23 
 tools/libxc/powerpc64/flatdevtree.h                |    2 
 tools/libxc/powerpc64/utils.c                      |  211 ++++++
 tools/libxc/powerpc64/utils.h                      |   38 +
 tools/libxc/powerpc64/xc_linux_build.c             |  292 ++-------
 tools/libxc/powerpc64/xc_prose_build.c             |  323 ++++++++++
 tools/libxc/xc_linux_build.c                       |   16 
 tools/libxc/xc_load_elf.c                          |   28 
 tools/libxc/xenctrl.h                              |    4 
 tools/libxc/xenguest.h                             |   15 
 tools/libxc/xg_private.h                           |    1 
 tools/libxen/include/xen_console.h                 |    4 
 tools/libxen/include/xen_host.h                    |    4 
 tools/libxen/include/xen_host_cpu.h                |    4 
 tools/libxen/include/xen_network.h                 |    4 
 tools/libxen/include/xen_pif.h                     |    4 
 tools/libxen/include/xen_sr.h                      |    4 
 tools/libxen/include/xen_user.h                    |    4 
 tools/libxen/include/xen_vdi.h                     |    4 
 tools/libxen/include/xen_vif.h                     |    4 
 tools/libxen/include/xen_vm.h                      |   66 +-
 tools/libxen/include/xen_vtpm.h                    |    4 
 tools/libxen/src/xen_vm.c                          |  119 +++
 tools/python/xen/lowlevel/xc/xc.c                  |   83 ++
 tools/python/xen/xend/FlatDeviceTree.py            |   94 ++
 tools/python/xen/xend/XendDomain.py                |    4 
 tools/python/xen/xend/XendDomainInfo.py            |   30 
 tools/python/xen/xend/image.py                     |   68 ++
 tools/python/xen/xend/server/DevController.py      |   35 +
 tools/python/xen/xend/server/blkif.py              |    6 
 tools/python/xen/xm/main.py                        |   18 
 tools/xenstore/xenstored_domain.c                  |    2 
 xen/arch/powerpc/Makefile                          |   69 --
 xen/arch/powerpc/backtrace.c                       |   34 -
 xen/arch/powerpc/bitops.c                          |  124 +--
 xen/arch/powerpc/boot_of.c                         |  621 +++++++++++++------
 xen/arch/powerpc/cmdline.c                         |   24 
 xen/arch/powerpc/crash.c                           |    1 
 xen/arch/powerpc/dart.c                            |   13 
 xen/arch/powerpc/dart_u4.c                         |    7 
 xen/arch/powerpc/domain.c                          |   33 -
 xen/arch/powerpc/domain_build.c                    |    3 
 xen/arch/powerpc/domctl.c                          |    6 
 xen/arch/powerpc/exceptions.c                      |   34 -
 xen/arch/powerpc/exceptions.h                      |    7 
 xen/arch/powerpc/external.c                        |   30 
 xen/arch/powerpc/gdbstub.c                         |    1 
 xen/arch/powerpc/iommu.c                           |   34 -
 xen/arch/powerpc/machine_kexec.c                   |    6 
 xen/arch/powerpc/memory.c                          |  104 ++-
 xen/arch/powerpc/mm.c                              |  235 ++++++-
 xen/arch/powerpc/mpic.c                            |  127 +---
 xen/arch/powerpc/mpic_init.c                       |   54 +
 xen/arch/powerpc/numa.c                            |    1 
 xen/arch/powerpc/of-devtree.h                      |   40 -
 xen/arch/powerpc/of-devwalk.c                      |   14 
 xen/arch/powerpc/of_handler/console.c              |   12 
 xen/arch/powerpc/ofd_fixup.c                       |   12 
 xen/arch/powerpc/ofd_fixup_memory.c                |   18 
 xen/arch/powerpc/papr/xlate.c                      |  259 ++++----
 xen/arch/powerpc/powerpc64/exceptions.S            |   18 
 xen/arch/powerpc/powerpc64/io.S                    |   65 +-
 xen/arch/powerpc/powerpc64/ppc970.c                |   71 +-
 xen/arch/powerpc/powerpc64/ppc970_machinecheck.c   |    7 
 xen/arch/powerpc/powerpc64/ppc970_scom.c           |  175 +++--
 xen/arch/powerpc/powerpc64/scom.h                  |   39 +
 xen/arch/powerpc/powerpc64/traps.c                 |    4 
 xen/arch/powerpc/rtas.c                            |   84 ++
 xen/arch/powerpc/rtas.h                            |   34 +
 xen/arch/powerpc/setup.c                           |  144 ++--
 xen/arch/powerpc/shadow.c                          |    7 
 xen/arch/powerpc/smp.c                             |  192 +++++-
 xen/arch/powerpc/smpboot.c                         |   29 
 xen/arch/powerpc/start.S                           |   62 +
 xen/arch/powerpc/systemsim.S                       |   64 ++
 xen/arch/powerpc/time.c                            |    3 
 xen/arch/powerpc/usercopy.c                        |  248 -------
 xen/arch/powerpc/xen.lds.S                         |    8 
 xen/arch/x86/crash.c                               |    4 
 xen/arch/x86/domain_build.c                        |    8 
 xen/arch/x86/mm.c                                  |   12 
 xen/arch/x86/mm/shadow/common.c                    |    4 
 xen/arch/x86/mm/shadow/multi.c                     |    3 
 xen/arch/x86/numa.c                                |    2 
 xen/common/Makefile                                |    2 
 xen/common/domain.c                                |   25 
 xen/common/elf.c                                   |   27 
 xen/common/gdbstub.c                               |    1 
 xen/common/kexec.c                                 |   14 
 xen/common/sched_credit.c                          |  663 +++++++++------------
 xen/common/xencomm.c                               |  316 ++++++++++
 xen/include/asm-powerpc/acpi.h                     |    2 
 xen/include/asm-powerpc/cache.h                    |    1 
 xen/include/asm-powerpc/config.h                   |    4 
 xen/include/asm-powerpc/debugger.h                 |   70 +-
 xen/include/asm-powerpc/delay.h                    |   16 
 xen/include/asm-powerpc/domain.h                   |    5 
 xen/include/asm-powerpc/flushtlb.h                 |    1 
 xen/include/asm-powerpc/grant_table.h              |   12 
 xen/include/asm-powerpc/guest_access.h             |   78 --
 xen/include/asm-powerpc/mach-default/irq_vectors.h |   22 
 xen/include/asm-powerpc/mm.h                       |  100 ++-
 xen/include/asm-powerpc/msr.h                      |    4 
 xen/include/asm-powerpc/numa.h                     |    2 
 xen/include/asm-powerpc/page.h                     |    5 
 xen/include/asm-powerpc/powerpc64/string.h         |    3 
 xen/include/asm-powerpc/processor.h                |  108 ++-
 xen/include/asm-powerpc/smp.h                      |   22 
 xen/include/asm-powerpc/spinlock.h                 |   33 -
 xen/include/asm-powerpc/xenoprof.h                 |   26 
 xen/include/asm-x86/numa.h                         |    2 
 xen/include/asm-x86/page.h                         |   36 -
 xen/include/asm-x86/shadow.h                       |    3 
 xen/include/asm-x86/x86_32/page-2level.h           |    6 
 xen/include/asm-x86/x86_32/page-3level.h           |   29 
 xen/include/asm-x86/x86_64/page.h                  |    6 
 xen/include/public/arch-powerpc.h                  |    2 
 xen/include/public/domctl.h                        |    8 
 xen/include/public/io/fbif.h                       |   88 +-
 xen/include/public/io/kbdif.h                      |   70 +-
 xen/include/public/io/pciif.h                      |   44 -
 xen/include/public/io/xenbus.h                     |   12 
 xen/include/public/memory.h                        |    2 
 xen/include/public/sysctl.h                        |    2 
 xen/include/public/trace.h                         |    2 
 xen/include/public/xenoprof.h                      |    2 
 xen/include/xen/elfcore.h                          |    4 
 xen/include/xen/sched.h                            |    2 
 xen/include/xen/xencomm.h                          |  115 +++
 145 files changed, 4717 insertions(+), 2437 deletions(-)

diff -r ed56ef3e9716 -r 4762d73ced42 .hgignore
--- a/.hgignore Thu Dec 14 08:54:54 2006 -0700
+++ b/.hgignore Thu Dec 14 08:57:36 2006 -0700
@@ -53,6 +53,8 @@
 ^docs/user/labels\.pl$
 ^docs/user/user\.css$
 ^docs/user/user\.html$
+^docs/xen-api/vm_lifecycle.eps$
+^docs/xen-api/xenapi-datamodel-graph.eps$
 ^extras/mini-os/h/hypervisor-ifs$
 ^extras/mini-os/h/xen-public$
 ^extras/mini-os/mini-os\..*$
@@ -98,17 +100,15 @@
 ^tools/firmware/.*\.bin$
 ^tools/firmware/.*\.sym$
 ^tools/firmware/.*bios/.*bios.*\.txt$
+^tools/firmware/hvmloader/acpi/acpigen$
 ^tools/firmware/hvmloader/hvmloader$
 ^tools/firmware/hvmloader/roms\.h$
 ^tools/firmware/rombios/BIOS-bochs-[^/]*$
 ^tools/firmware/rombios/_rombios[^/]*_\.c$
 ^tools/firmware/rombios/rombios[^/]*\.s$
-^tools/firmware/vmxassist/acpi\.h$
 ^tools/firmware/vmxassist/gen$
 ^tools/firmware/vmxassist/offsets\.h$
-^tools/firmware/vmxassist/roms\.h$
 ^tools/firmware/vmxassist/vmxassist$
-^tools/firmware/vmxassist/vmxloader$
 ^tools/ioemu/\.pc/.*$
 ^tools/ioemu/config-host\.h$
 ^tools/ioemu/config-host\.mak$
@@ -220,10 +220,11 @@
 ^xen/arch/powerpc/dom0\.bin$
 ^xen/arch/powerpc/asm-offsets\.s$
 ^xen/arch/powerpc/firmware$
-^xen/arch/powerpc/firmware_image$
+^xen/arch/powerpc/firmware_image.bin$
 ^xen/arch/powerpc/xen\.lds$
-^xen/arch/powerpc/.xen-syms$
-^xen/arch/powerpc/xen-syms.S$
+^xen/arch/powerpc/\.xen-syms$
+^xen/arch/powerpc/xen-syms\.S$
+^xen/arch/powerpc/cmdline.dep$
 ^unmodified_drivers/linux-2.6/\.tmp_versions
 ^unmodified_drivers/linux-2.6/.*\.cmd$
 ^unmodified_drivers/linux-2.6/.*\.ko$
diff -r ed56ef3e9716 -r 4762d73ced42 config/powerpc64.mk
--- a/config/powerpc64.mk       Thu Dec 14 08:54:54 2006 -0700
+++ b/config/powerpc64.mk       Thu Dec 14 08:57:36 2006 -0700
@@ -1,5 +1,7 @@ CONFIG_POWERPC := y
 CONFIG_POWERPC := y
 CONFIG_POWERPC_$(XEN_OS) := y
 
+CONFIG_XENCOMM := y
+
 CFLAGS += -DELFSIZE=64
 LIBDIR := lib
diff -r ed56ef3e9716 -r 4762d73ced42 
linux-2.6-xen-sparse/arch/i386/kernel/fixup.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c     Thu Dec 14 08:54:54 
2006 -0700
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c     Thu Dec 14 08:57:36 
2006 -0700
@@ -43,17 +43,17 @@ fastcall void do_fixup_4gb_segment(struc
        char info[100];
        int i;
 
-       if (test_and_set_bit(0, &printed))
+       /* Ignore statically-linked init. */
+       if (current->tgid == 1)
                return;
-
-        if (current->tgid == 1) /* Ignore statically linked init */
-                return; 
             
        HYPERVISOR_vm_assist(
                VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify);
 
+       if (test_and_set_bit(0, &printed))
+               return;
+
        sprintf(info, "%s (pid=%d)", current->comm, current->tgid);
-
 
        DP("");
        DP("***************************************************************");
diff -r ed56ef3e9716 -r 4762d73ced42 
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Thu Dec 14 08:54:54 
2006 -0700
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Thu Dec 14 08:57:36 
2006 -0700
@@ -110,6 +110,18 @@ set_cpu_sibling_map(int cpu)
        cpu_data[cpu].booted_cores = 1;
 }
 
+static void
+remove_siblinginfo(int cpu)
+{
+       phys_proc_id[cpu] = BAD_APICID;
+       cpu_core_id[cpu]  = BAD_APICID;
+
+       cpus_clear(cpu_sibling_map[cpu]);
+       cpus_clear(cpu_core_map[cpu]);
+
+       cpu_data[cpu].booted_cores = 0;
+}
+
 static int xen_smp_intr_init(unsigned int cpu)
 {
        int rc;
@@ -358,18 +370,6 @@ static int __init initialize_cpu_present
 }
 core_initcall(initialize_cpu_present_map);
 
-static void
-remove_siblinginfo(int cpu)
-{
-       phys_proc_id[cpu] = BAD_APICID;
-       cpu_core_id[cpu]  = BAD_APICID;
-
-       cpus_clear(cpu_sibling_map[cpu]);
-       cpus_clear(cpu_core_map[cpu]);
-
-       cpu_data[cpu].booted_cores = 0;
-}
-
 int __cpu_disable(void)
 {
        cpumask_t map = cpu_online_map;
@@ -432,7 +432,6 @@ int __devinit __cpu_up(unsigned int cpu)
        /* This must be done before setting cpu_online_map */
        set_cpu_sibling_map(cpu);
        wmb();
-
 
        rc = xen_smp_intr_init(cpu);
        if (rc) {
diff -r ed56ef3e9716 -r 4762d73ced42 tools/blktap/drivers/blktapctrl.c
--- a/tools/blktap/drivers/blktapctrl.c Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/blktap/drivers/blktapctrl.c Thu Dec 14 08:57:36 2006 -0700
@@ -57,6 +57,8 @@
 #include "blktapctrl.h"
 #include "tapdisk.h"
 
+#define PIDFILE "/var/run/blktapctrl.pid"
+
 #define NUM_POLL_FDS 2
 #define MSG_SIZE 4096
 #define MAX_TIMEOUT 10
@@ -622,6 +624,42 @@ static void print_drivers(void)
                DPRINTF("Found driver: [%s]\n",dtypes[i]->name);
 } 
 
+static void write_pidfile(long pid)
+{
+       char buf[100];
+       int len;
+       int fd;
+       int flags;
+
+       fd = open(PIDFILE, O_RDWR | O_CREAT, 0600);
+       if (fd == -1) {
+               DPRINTF("Opening pid file failed (%d)\n", errno);
+               exit(1);
+       }
+
+       /* We exit silently if daemon already running. */
+       if (lockf(fd, F_TLOCK, 0) == -1)
+               exit(0);
+
+       /* Set FD_CLOEXEC, so that tapdisk doesn't get this file
+          descriptor. */
+       if ((flags = fcntl(fd, F_GETFD)) == -1) {
+               DPRINTF("F_GETFD failed (%d)\n", errno);
+               exit(1);
+       }
+       flags |= FD_CLOEXEC;
+       if (fcntl(fd, F_SETFD, flags) == -1) {
+               DPRINTF("F_SETFD failed (%d)\n", errno);
+               exit(1);
+       }
+
+       len = sprintf(buf, "%ld\n", pid);
+       if (write(fd, buf, len) != len) {
+               DPRINTF("Writing pid file failed (%d)\n", errno);
+               exit(1);
+       }
+}
+
 int main(int argc, char *argv[])
 {
        char *devname;
@@ -681,6 +719,7 @@ int main(int argc, char *argv[])
        ioctl(ctlfd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
 
        process = getpid();
+       write_pidfile(process);
        ret = ioctl(ctlfd, BLKTAP_IOCTL_SENDPID, process );
 
        /*Static pollhooks*/
@@ -716,3 +755,13 @@ int main(int argc, char *argv[])
        closelog();
        return -1;
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r ed56ef3e9716 -r 4762d73ced42 tools/examples/external-device-migrate
--- a/tools/examples/external-device-migrate    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/examples/external-device-migrate    Thu Dec 14 08:57:36 2006 -0700
@@ -60,8 +60,8 @@ function evaluate_params()
                -step)          step=$2; shift 2;;
                -host)          host=$2; shift 2;;
                -domname)       domname=$2; shift 2;;
-               -type)          type=$2; shift 2;;
-               -subtype)       subtype=$2; shift 2;;
+               -type)          typ=$2; shift 2;;
+               -subtype)       stype=$2; shift 2;;
                -recover)       recover=1; shift;;
                -help)          ext_dev_migrate_usage; exit 0;;
                *)              break;;
diff -r ed56ef3e9716 -r 4762d73ced42 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/ioemu/target-i386-dm/exec-dm.c      Thu Dec 14 08:57:36 2006 -0700
@@ -439,7 +439,12 @@ void cpu_physical_memory_rw(target_phys_
     int l, io_index;
     uint8_t *ptr;
     uint32_t val;
-    
+
+#if defined(__i386__) || defined(__x86_64__)
+    static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+    pthread_mutex_lock(&mutex);
+#endif
+
     while (len > 0) {
         /* How much can we copy before the next page boundary? */
         l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK); 
@@ -504,6 +509,10 @@ void cpu_physical_memory_rw(target_phys_
         buf += l;
         addr += l;
     }
+
+#if defined(__i386__) || defined(__x86_64__)
+    pthread_mutex_unlock(&mutex);
+#endif
 }
 #endif
 
diff -r ed56ef3e9716 -r 4762d73ced42 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/ioemu/vl.c  Thu Dec 14 08:57:36 2006 -0700
@@ -5820,8 +5820,8 @@ static int qemu_map_cache_init(unsigned 
     if (nr_pages < max_pages)
         max_pages = nr_pages;
 
-    nr_buckets = (max_pages << PAGE_SHIFT) >> MCACHE_BUCKET_SHIFT;
-
+    nr_buckets   = max_pages + (1UL << (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)) - 1;
+    nr_buckets >>= (MCACHE_BUCKET_SHIFT - PAGE_SHIFT);
     fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets);
 
     mapcache_entry = malloc(nr_buckets * sizeof(struct map_cache));
@@ -5857,8 +5857,7 @@ uint8_t *qemu_map_cache(target_phys_addr
 
     entry = &mapcache_entry[address_index % nr_buckets];
 
-    if (entry->vaddr_base == NULL || entry->paddr_index != address_index)
-    { 
+    if (entry->vaddr_base == NULL || entry->paddr_index != address_index) {
         /* We need to remap a bucket. */
         uint8_t *vaddr_base;
         unsigned long pfns[MCACHE_BUCKET_SIZE >> PAGE_SHIFT];
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libaio/src/syscall-ppc.h
--- a/tools/libaio/src/syscall-ppc.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libaio/src/syscall-ppc.h    Thu Dec 14 08:57:36 2006 -0700
@@ -1,3 +1,6 @@
+#include <asm/unistd.h>
+#include <errno.h>
+
 #define __NR_io_setup          227
 #define __NR_io_destroy                228
 #define __NR_io_getevents      229
@@ -9,7 +12,7 @@
  * "sc; bnslr" sequence) and CR (where only CR0.SO is clobbered to signal
  * an error return status).
  */
-
+#ifndef __syscall_nr
 #define __syscall_nr(nr, type, name, args...)                          \
        unsigned long __sc_ret, __sc_err;                               \
        {                                                               \
@@ -37,6 +40,7 @@
        }                                                               \
        if (__sc_err & 0x10000000) return -((int)__sc_ret);             \
        return (type) __sc_ret
+#endif
 
 #define __sc_loadargs_0(name, dummy...)                                        
\
        __sc_0 = __NR_##name
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/Makefile
--- a/tools/libxc/powerpc64/Makefile    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/powerpc64/Makefile    Thu Dec 14 08:57:36 2006 -0700
@@ -1,4 +1,6 @@ GUEST_SRCS-y += powerpc64/xc_linux_build
+GUEST_SRCS-y += powerpc64/flatdevtree.c
 GUEST_SRCS-y += powerpc64/xc_linux_build.c
-GUEST_SRCS-y += powerpc64/flatdevtree.c
+GUEST_SRCS-y += powerpc64/xc_prose_build.c
+GUEST_SRCS-y += powerpc64/utils.c
 
 CTRL_SRCS-y += powerpc64/xc_memory.c
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/flatdevtree.c
--- a/tools/libxc/powerpc64/flatdevtree.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/powerpc64/flatdevtree.c       Thu Dec 14 08:57:36 2006 -0700
@@ -220,6 +220,29 @@ void ft_add_rsvmap(struct ft_cxt *cxt, u
        cxt->p_anchor = cxt->pres + 16; /* over the terminator */
 }
 
+int ft_set_rsvmap(void *bphp, int m, u64 physaddr, u64 size)
+{
+       const struct boot_param_header *bph = bphp;
+       u64 *p_rsvmap = (u64 *)
+               ((char *)bph + be32_to_cpu(bph->off_mem_rsvmap));
+       u32 i;
+
+       for (i = 0;; i++) {
+               u64 addr, sz;
+
+               addr = be64_to_cpu(p_rsvmap[i * 2]);
+               sz = be64_to_cpu(p_rsvmap[i * 2 + 1]);
+               if (addr == 0 && size == 0)
+                       break;
+               if (m == i) {
+                       p_rsvmap[i * 2] = cpu_to_be64(physaddr);
+                       p_rsvmap[i * 2 + 1] = cpu_to_be64(size);
+                       return 0;
+               }
+       }
+       return -1;
+}
+
 void ft_begin_tree(struct ft_cxt *cxt)
 {
        cxt->p_begin = cxt->p_anchor;
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/flatdevtree.h
--- a/tools/libxc/powerpc64/flatdevtree.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/powerpc64/flatdevtree.h       Thu Dec 14 08:57:36 2006 -0700
@@ -66,8 +66,10 @@ void ft_prop_int(struct ft_cxt *cxt, con
 void ft_prop_int(struct ft_cxt *cxt, const char *name, unsigned int val);
 void ft_begin(struct ft_cxt *cxt, void *blob, unsigned int max_size);
 void ft_add_rsvmap(struct ft_cxt *cxt, u64 physaddr, u64 size);
+int ft_set_rsvmap(void *bphp, int m, u64 physaddr, u64 size);
 
 void ft_dump_blob(const void *bphp);
+void ft_backtrack_node(struct ft_cxt *cxt);
 void ft_merge_blob(struct ft_cxt *cxt, void *blob);
 
 void *ft_find_node(const void *bphp, const char *srch_path);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/xc_linux_build.c
--- a/tools/libxc/powerpc64/xc_linux_build.c    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/powerpc64/xc_linux_build.c    Thu Dec 14 08:57:36 2006 -0700
@@ -35,60 +35,10 @@
 
 #include "flatdevtree_env.h"
 #include "flatdevtree.h"
+#include "utils.h"
 
 #define INITRD_ADDR (24UL << 20)
 #define DEVTREE_ADDR (16UL << 20)
-
-#define ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1)))
-
-#define max(x,y) ({ \
-        const typeof(x) _x = (x);       \
-        const typeof(y) _y = (y);       \
-        (void) (&_x == &_y);            \
-        _x > _y ? _x : _y; })
-
-static void *load_file(const char *path, unsigned long *filesize)
-{
-    void *img;
-    ssize_t size;
-    int fd;
-
-    DPRINTF("load_file(%s)\n", path);
-
-    fd = open(path, O_RDONLY);
-    if (fd < 0) {
-        perror(path);
-        return NULL;
-    }
-
-    size = lseek(fd, 0, SEEK_END);
-    if (size < 0) {
-        perror(path);
-        close(fd);
-        return NULL;
-    }
-    lseek(fd, 0, SEEK_SET);
-
-    img = malloc(size);
-    if (img == NULL) {
-        perror(path);
-        close(fd);
-        return NULL;
-    }
-
-    size = read(fd, img, size);
-    if (size <= 0) {
-        perror(path);
-        close(fd);
-        free(img);
-        return NULL;
-    }
-
-    if (filesize)
-        *filesize = size;
-    close(fd);
-    return img;
-}
 
 static int init_boot_vcpu(
     int xc_handle,
@@ -128,37 +78,6 @@ static int init_boot_vcpu(
     return rc;
 }
 
-static int install_image(
-        int xc_handle,
-        int domid,
-        xen_pfn_t *page_array,
-        void *image,
-        unsigned long paddr,
-        unsigned long size)
-{
-    uint8_t *img = image;
-    int i;
-    int rc = 0;
-
-    if (paddr & ~PAGE_MASK) {
-        printf("*** unaligned address\n");
-        return -1;
-    }
-
-    for (i = 0; i < size; i += PAGE_SIZE) {
-        void *page = img + i;
-        xen_pfn_t pfn = (paddr + i) >> PAGE_SHIFT;
-        xen_pfn_t mfn = page_array[pfn];
-
-        rc = xc_copy_to_domain_page(xc_handle, domid, mfn, page);
-        if (rc < 0) {
-            perror("xc_copy_to_domain_page");
-            break;
-        }
-    }
-    return rc;
-}
-
 static int load_devtree(
     int xc_handle,
     int domid,
@@ -167,10 +86,10 @@ static int load_devtree(
     unsigned long devtree_addr,
     uint64_t initrd_base,
     unsigned long initrd_len,
-    start_info_t *si,
-    unsigned long si_addr)
-{
-    uint32_t start_info[4] = {0, si_addr, 0, 0x1000};
+    start_info_t *start_info __attribute__((unused)),
+    unsigned long start_info_addr)
+{
+    uint32_t si[4] = {0, start_info_addr, 0, 0x1000};
     struct boot_param_header *header;
     void *chosen;
     void *xen;
@@ -208,9 +127,14 @@ static int load_devtree(
         return rc;
     }
 
+    rc = ft_set_rsvmap(devtree, 1, initrd_base, initrd_len);
+    if (rc < 0) {
+        DPRINTF("couldn't set initrd reservation\n");
+        return ~0UL;
+    }
+
     /* start-info (XXX being removed soon) */
-    rc = ft_set_prop(&devtree, xen, "start-info",
-            start_info, sizeof(start_info));
+    rc = ft_set_prop(&devtree, xen, "start-info", si, sizeof(si));
     if (rc < 0) {
         DPRINTF("couldn't set /xen/start-info\n");
         return rc;
@@ -218,91 +142,19 @@ static int load_devtree(
 
     header = devtree;
     devtree_size = header->totalsize;
+    {
+        static const char dtb[] = "/tmp/xc_domU.dtb";
+        int dfd = creat(dtb, 0666);
+        if (dfd != -1) {
+            write(dfd, devtree, devtree_size);
+            close(dfd);
+        } else
+            DPRINTF("could not open(\"%s\")\n", dtb);
+    }
 
     DPRINTF("copying device tree to 0x%lx[0x%x]\n", DEVTREE_ADDR, 
devtree_size);
     return install_image(xc_handle, domid, page_array, devtree, DEVTREE_ADDR,
                        devtree_size);
-}
-
-unsigned long spin_list[] = {
-#if 0
-    0x100,
-    0x200,
-    0x300,
-    0x380,
-    0x400,
-    0x480,
-    0x500,
-    0x700,
-    0x900,
-    0xc00,
-#endif
-    0
-};
-
-/* XXX yes, this is a hack */
-static void hack_kernel_img(char *img)
-{
-    const off_t file_offset = 0x10000;
-    unsigned long *addr = spin_list;
-
-    while (*addr) {
-        uint32_t *instruction = (uint32_t *)(img + *addr + file_offset);
-        printf("installing spin loop at %lx (%x)\n", *addr, *instruction);
-        *instruction = 0x48000000;
-        addr++;
-    }
-}
-
-static int load_kernel(
-    int xc_handle,
-    int domid,
-    const char *kernel_path,
-    struct domain_setup_info *dsi,
-    xen_pfn_t *page_array)
-{
-    struct load_funcs load_funcs;
-    char *kernel_img;
-    unsigned long kernel_size;
-    int rc;
-
-    /* load the kernel ELF file */
-    kernel_img = load_file(kernel_path, &kernel_size);
-    if (kernel_img == NULL) {
-        rc = -1;
-        goto out;
-    }
-
-    hack_kernel_img(kernel_img);
-
-    DPRINTF("probe_elf\n");
-    rc = probe_elf(kernel_img, kernel_size, &load_funcs);
-    if (rc < 0) {
-        rc = -1;
-        printf("%s is not an ELF file\n", kernel_path);
-        goto out;
-    }
-
-    DPRINTF("parseimage\n");
-    rc = (load_funcs.parseimage)(kernel_img, kernel_size, dsi);
-    if (rc < 0) {
-        rc = -1;
-        goto out;
-    }
-
-    DPRINTF("loadimage\n");
-    (load_funcs.loadimage)(kernel_img, kernel_size, xc_handle, domid,
-            page_array, dsi);
-
-    DPRINTF("  v_start     %016"PRIx64"\n", dsi->v_start);
-    DPRINTF("  v_end       %016"PRIx64"\n", dsi->v_end);
-    DPRINTF("  v_kernstart %016"PRIx64"\n", dsi->v_kernstart);
-    DPRINTF("  v_kernend   %016"PRIx64"\n", dsi->v_kernend);
-    DPRINTF("  v_kernentry %016"PRIx64"\n", dsi->v_kernentry);
-
-out:
-    free(kernel_img);
-    return rc;
 }
 
 static int load_initrd(
@@ -334,49 +186,38 @@ out:
     return rc;
 }
 
-static unsigned long create_start_info(start_info_t *si,
+static unsigned long create_start_info(
+       void *devtree, start_info_t *start_info,
         unsigned int console_evtchn, unsigned int store_evtchn,
-        unsigned long nr_pages)
-{
-    unsigned long si_addr;
-
-    memset(si, 0, sizeof(*si));
-    snprintf(si->magic, sizeof(si->magic), "xen-%d.%d-powerpc64HV", 3, 0);
-
-    si->nr_pages = nr_pages;
-    si->shared_info = (nr_pages - 1) << PAGE_SHIFT;
-    si->store_mfn = si->nr_pages - 2;
-    si->store_evtchn = store_evtchn;
-    si->console.domU.mfn = si->nr_pages - 3;
-    si->console.domU.evtchn = console_evtchn;
-    si_addr = (si->nr_pages - 4) << PAGE_SHIFT;
-
-    return si_addr;
-}
-
-static int get_page_array(int xc_handle, int domid, xen_pfn_t **page_array,
-                          unsigned long *nr_pages)
-{
+       unsigned long nr_pages, unsigned long rma_pages)
+{
+    unsigned long start_info_addr;
+    uint64_t rma_top;
     int rc;
 
-    DPRINTF("xc_get_tot_pages\n");
-    *nr_pages = xc_get_tot_pages(xc_handle, domid);
-    DPRINTF("  0x%lx\n", *nr_pages);
-
-    *page_array = malloc(*nr_pages * sizeof(xen_pfn_t));
-    if (*page_array == NULL) {
-        perror("malloc");
-        return -1;
-    }
-
-    DPRINTF("xc_get_pfn_list\n");
-    rc = xc_get_pfn_list(xc_handle, domid, *page_array, *nr_pages);
-    if (rc != *nr_pages) {
-        perror("Could not get the page frame list");
-        return -1;
-    }
-
-    return 0;
+    memset(start_info, 0, sizeof(*start_info));
+    snprintf(start_info->magic, sizeof(start_info->magic),
+             "xen-%d.%d-powerpc64HV", 3, 0);
+
+    rma_top = rma_pages << PAGE_SHIFT;
+    DPRINTF("RMA top = 0x%"PRIX64"\n", rma_top);
+
+    start_info->nr_pages = nr_pages;
+    start_info->shared_info = rma_top - PAGE_SIZE;
+    start_info->store_mfn = (rma_top >> PAGE_SHIFT) - 2;
+    start_info->store_evtchn = store_evtchn;
+    start_info->console.domU.mfn = (rma_top >> PAGE_SHIFT) - 3;
+    start_info->console.domU.evtchn = console_evtchn;
+    start_info_addr = rma_top - 4*PAGE_SIZE;
+
+    rc = ft_set_rsvmap(devtree, 0, start_info_addr, 4*PAGE_SIZE);
+    if (rc < 0) {
+        DPRINTF("couldn't set start_info reservation\n");
+        return ~0UL;
+    }
+
+
+    return start_info_addr;
 }
 
 static void free_page_array(xen_pfn_t *page_array)
@@ -388,6 +229,7 @@ static void free_page_array(xen_pfn_t *p
 
 int xc_linux_build(int xc_handle,
                    uint32_t domid,
+                   unsigned int mem_mb,
                    const char *image_name,
                    const char *initrd_name,
                    const char *cmdline,
@@ -399,7 +241,7 @@ int xc_linux_build(int xc_handle,
                    unsigned long *console_mfn,
                    void *devtree)
 {
-    start_info_t si;
+    start_info_t start_info;
     struct domain_setup_info dsi;
     xen_pfn_t *page_array = NULL;
     unsigned long nr_pages;
@@ -407,18 +249,28 @@ int xc_linux_build(int xc_handle,
     unsigned long kern_addr;
     unsigned long initrd_base = 0;
     unsigned long initrd_len = 0;
-    unsigned long si_addr;
+    unsigned long start_info_addr;
+    unsigned long rma_pages;
     int rc = 0;
 
     DPRINTF("%s\n", __func__);
 
-    if (get_page_array(xc_handle, domid, &page_array, &nr_pages)) {
+    nr_pages = mem_mb << (20 - PAGE_SHIFT);
+    DPRINTF("nr_pages 0x%lx\n", nr_pages);
+
+    rma_pages = get_rma_pages(devtree);
+    if (rma_pages == 0) {
+           rc = -1;
+           goto out;
+    }
+
+    if (get_rma_page_array(xc_handle, domid, &page_array, rma_pages)) {
         rc = -1;
         goto out;
     }
 
     DPRINTF("loading image '%s'\n", image_name);
-    if (load_kernel(xc_handle, domid, image_name, &dsi, page_array)) {
+    if (load_elf_kernel(xc_handle, domid, image_name, &dsi, page_array)) {
         rc = -1;
         goto out;
     }
@@ -434,11 +286,12 @@ int xc_linux_build(int xc_handle,
     }
 
     /* start_info stuff: about to be removed  */
-    si_addr = create_start_info(&si, console_evtchn, store_evtchn, nr_pages);
-    *console_mfn = page_array[si.console.domU.mfn];
-    *store_mfn = page_array[si.store_mfn];
-    if (install_image(xc_handle, domid, page_array, &si, si_addr,
-                sizeof(start_info_t))) {
+    start_info_addr = create_start_info(devtree, &start_info, console_evtchn,
+                                        store_evtchn, nr_pages, rma_pages);
+    *console_mfn = page_array[start_info.console.domU.mfn];
+    *store_mfn = page_array[start_info.store_mfn];
+    if (install_image(xc_handle, domid, page_array, &start_info,
+                      start_info_addr, sizeof(start_info_t))) {
         rc = -1;
         goto out;
     }
@@ -447,7 +300,8 @@ int xc_linux_build(int xc_handle,
         DPRINTF("loading flattened device tree\n");
         devtree_addr = DEVTREE_ADDR;
         if (load_devtree(xc_handle, domid, page_array, devtree, devtree_addr,
-                     initrd_base, initrd_len, &si, si_addr)) {
+                         initrd_base, initrd_len, &start_info,
+                         start_info_addr)) {
             DPRINTF("couldn't load flattened device tree.\n");
             rc = -1;
             goto out;
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/xc_linux_build.c      Thu Dec 14 08:57:36 2006 -0700
@@ -596,15 +596,21 @@ static int compat_check(int xc_handle, s
     }
 
     if (strstr(xen_caps, "xen-3.0-x86_32p")) {
-        if (dsi->pae_kernel == PAEKERN_no) {
+        if (dsi->pae_kernel == PAEKERN_bimodal) {
+            dsi->pae_kernel = PAEKERN_extended_cr3;
+        } else if (dsi->pae_kernel == PAEKERN_no) {
             xc_set_error(XC_INVALID_KERNEL,
                          "Non PAE-kernel on PAE host.");
             return 0;
         }
-    } else if (dsi->pae_kernel != PAEKERN_no) {
-        xc_set_error(XC_INVALID_KERNEL,
-                     "PAE-kernel on non-PAE host.");
-        return 0;
+    } else {
+        if (dsi->pae_kernel == PAEKERN_bimodal) {
+            dsi->pae_kernel = PAEKERN_no;
+        } else if (dsi->pae_kernel != PAEKERN_no) {
+            xc_set_error(XC_INVALID_KERNEL,
+                         "PAE-kernel on non-PAE host.");
+            return 0;
+        }
     }
 
     return 1;
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/xc_load_elf.c Thu Dec 14 08:57:36 2006 -0700
@@ -325,17 +325,6 @@ static int parseelfimage(const char *ima
         return -EINVAL;
     }
 
-    /* Find the section-header strings table. */
-    if ( ehdr->e_shstrndx == SHN_UNDEF )
-    {
-        xc_set_error(XC_INVALID_KERNEL,
-                     "ELF image has no section-header strings table 
(shstrtab).");
-        return -EINVAL;
-    }
-    shdr = (Elf_Shdr *)(image + ehdr->e_shoff +
-                        (ehdr->e_shstrndx*ehdr->e_shentsize));
-    shstrtab = image + shdr->sh_offset;
-
     dsi->__elfnote_section = NULL;
     dsi->__xen_guest_string = NULL;
 
@@ -354,6 +343,17 @@ static int parseelfimage(const char *ima
     /* Fall back to looking for the special '__xen_guest' section. */
     if ( dsi->__elfnote_section == NULL )
     {
+        /* Find the section-header strings table. */
+        if ( ehdr->e_shstrndx == SHN_UNDEF )
+        {
+            xc_set_error(XC_INVALID_KERNEL,
+                         "ELF image has no section-header strings table.");
+            return -EINVAL;
+        }
+        shdr = (Elf_Shdr *)(image + ehdr->e_shoff +
+                            (ehdr->e_shstrndx*ehdr->e_shentsize));
+        shstrtab = image + shdr->sh_offset;
+
         for ( h = 0; h < ehdr->e_shnum; h++ )
         {
             shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (h*ehdr->e_shentsize));
@@ -400,6 +400,8 @@ static int parseelfimage(const char *ima
     }
 
     /*
+     * A "bimodal" ELF note indicates the kernel will adjust to the
+     * current paging mode, including handling extended cr3 syntax.
      * If we have ELF notes then PAE=yes implies that we must support
      * the extended cr3 syntax. Otherwise we need to find the
      * [extended-cr3] syntax in the __xen_guest string.
@@ -408,7 +410,9 @@ static int parseelfimage(const char *ima
     if ( dsi->__elfnote_section )
     {
         p = xen_elfnote_string(dsi, XEN_ELFNOTE_PAE_MODE);
-        if ( p != NULL && strncmp(p, "yes", 3) == 0 )
+        if ( p != NULL && strncmp(p, "bimodal", 7) == 0 )
+            dsi->pae_kernel = PAEKERN_bimodal;
+        else if ( p != NULL && strncmp(p, "yes", 3) == 0 )
             dsi->pae_kernel = PAEKERN_extended_cr3;
 
     }
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/xenctrl.h     Thu Dec 14 08:57:36 2006 -0700
@@ -728,4 +728,8 @@ const char *xc_error_code_to_desc(int co
  */
 xc_error_handler xc_set_error_handler(xc_error_handler handler);
 
+/* PowerPC specific. */
+int xc_alloc_real_mode_area(int xc_handle,
+                            uint32_t domid,
+                            unsigned int log);
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/xenguest.h    Thu Dec 14 08:57:36 2006 -0700
@@ -122,4 +122,19 @@ int xc_get_hvm_param(
 int xc_get_hvm_param(
     int handle, domid_t dom, int param, unsigned long *value);
 
+/* PowerPC specific. */
+int xc_prose_build(int xc_handle,
+                   uint32_t domid,
+                   unsigned int mem_mb,
+                   const char *image_name,
+                   const char *ramdisk_name,
+                   const char *cmdline,
+                   const char *features,
+                   unsigned long flags,
+                   unsigned int store_evtchn,
+                   unsigned long *store_mfn,
+                   unsigned int console_evtchn,
+                   unsigned long *console_mfn,
+                   void *arch_args);
+
 #endif /* XENGUEST_H */
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxc/xg_private.h  Thu Dec 14 08:57:36 2006 -0700
@@ -132,6 +132,7 @@ struct domain_setup_info
 #define PAEKERN_no           0
 #define PAEKERN_yes          1
 #define PAEKERN_extended_cr3 2
+#define PAEKERN_bimodal      3
     unsigned int  pae_kernel;
 
     unsigned int  load_symtab;
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_console.h
--- a/tools/libxen/include/xen_console.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_console.h        Thu Dec 14 08:57:36 2006 -0700
@@ -149,14 +149,14 @@ xen_console_record_opt_set_free(xen_cons
 
 
 /**
- * Get the current state of the given console.
+ * Get a record containing the current state of the given console.
  */
 extern bool
 xen_console_get_record(xen_session *session, xen_console_record **result, 
xen_console console);
 
 
 /**
- * Get a reference to the object with the specified UUID.
+ * Get a reference to the console instance with the specified UUID.
  */
 extern bool
 xen_console_get_by_uuid(xen_session *session, xen_console *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_host.h
--- a/tools/libxen/include/xen_host.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_host.h   Thu Dec 14 08:57:36 2006 -0700
@@ -154,14 +154,14 @@ xen_host_record_opt_set_free(xen_host_re
 
 
 /**
- * Get the current state of the given host.  !!!
+ * Get a record containing the current state of the given host.
  */
 extern bool
 xen_host_get_record(xen_session *session, xen_host_record **result, xen_host 
host);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the host instance with the specified UUID.
  */
 extern bool
 xen_host_get_by_uuid(xen_session *session, xen_host *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_host_cpu.h
--- a/tools/libxen/include/xen_host_cpu.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_host_cpu.h       Thu Dec 14 08:57:36 2006 -0700
@@ -153,14 +153,14 @@ xen_host_cpu_record_opt_set_free(xen_hos
 
 
 /**
- * Get the current state of the given host_cpu.  !!!
+ * Get a record containing the current state of the given host_cpu.
  */
 extern bool
 xen_host_cpu_get_record(xen_session *session, xen_host_cpu_record **result, 
xen_host_cpu host_cpu);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the host_cpu instance with the specified UUID.
  */
 extern bool
 xen_host_cpu_get_by_uuid(xen_session *session, xen_host_cpu *result, char 
*uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_network.h
--- a/tools/libxen/include/xen_network.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_network.h        Thu Dec 14 08:57:36 2006 -0700
@@ -152,14 +152,14 @@ xen_network_record_opt_set_free(xen_netw
 
 
 /**
- * Get the current state of the given network.  !!!
+ * Get a record containing the current state of the given network.
  */
 extern bool
 xen_network_get_record(xen_session *session, xen_network_record **result, 
xen_network network);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the network instance with the specified UUID.
  */
 extern bool
 xen_network_get_by_uuid(xen_session *session, xen_network *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_pif.h
--- a/tools/libxen/include/xen_pif.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_pif.h    Thu Dec 14 08:57:36 2006 -0700
@@ -155,14 +155,14 @@ xen_pif_record_opt_set_free(xen_pif_reco
 
 
 /**
- * Get the current state of the given PIF.  !!!
+ * Get a record containing the current state of the given PIF.
  */
 extern bool
 xen_pif_get_record(xen_session *session, xen_pif_record **result, xen_pif pif);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the PIF instance with the specified UUID.
  */
 extern bool
 xen_pif_get_by_uuid(xen_session *session, xen_pif *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_sr.h
--- a/tools/libxen/include/xen_sr.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_sr.h     Thu Dec 14 08:57:36 2006 -0700
@@ -153,14 +153,14 @@ xen_sr_record_opt_set_free(xen_sr_record
 
 
 /**
- * Get the current state of the given SR.  !!!
+ * Get a record containing the current state of the given SR.
  */
 extern bool
 xen_sr_get_record(xen_session *session, xen_sr_record **result, xen_sr sr);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the SR instance with the specified UUID.
  */
 extern bool
 xen_sr_get_by_uuid(xen_session *session, xen_sr *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_user.h
--- a/tools/libxen/include/xen_user.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_user.h   Thu Dec 14 08:57:36 2006 -0700
@@ -146,14 +146,14 @@ xen_user_record_opt_set_free(xen_user_re
 
 
 /**
- * Get the current state of the given user.  !!!
+ * Get a record containing the current state of the given user.
  */
 extern bool
 xen_user_get_record(xen_session *session, xen_user_record **result, xen_user 
user);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the user instance with the specified UUID.
  */
 extern bool
 xen_user_get_by_uuid(xen_session *session, xen_user *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vdi.h
--- a/tools/libxen/include/xen_vdi.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_vdi.h    Thu Dec 14 08:57:36 2006 -0700
@@ -159,14 +159,14 @@ xen_vdi_record_opt_set_free(xen_vdi_reco
 
 
 /**
- * Get the current state of the given VDI.  !!!
+ * Get a record containing the current state of the given VDI.
  */
 extern bool
 xen_vdi_get_record(xen_session *session, xen_vdi_record **result, xen_vdi vdi);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the VDI instance with the specified UUID.
  */
 extern bool
 xen_vdi_get_by_uuid(xen_session *session, xen_vdi *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vif.h
--- a/tools/libxen/include/xen_vif.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_vif.h    Thu Dec 14 08:57:36 2006 -0700
@@ -156,14 +156,14 @@ xen_vif_record_opt_set_free(xen_vif_reco
 
 
 /**
- * Get the current state of the given VIF.  !!!
+ * Get a record containing the current state of the given VIF.
  */
 extern bool
 xen_vif_get_record(xen_session *session, xen_vif_record **result, xen_vif vif);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the VIF instance with the specified UUID.
  */
 extern bool
 xen_vif_get_by_uuid(xen_session *session, xen_vif *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vm.h
--- a/tools/libxen/include/xen_vm.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_vm.h     Thu Dec 14 08:57:36 2006 -0700
@@ -79,6 +79,7 @@ typedef struct xen_vm_record
     char *name_description;
     int64_t user_version;
     bool is_a_template;
+    bool auto_power_on;
     struct xen_host_record_opt *resident_on;
     int64_t memory_static_max;
     int64_t memory_dynamic_max;
@@ -198,14 +199,14 @@ xen_vm_record_opt_set_free(xen_vm_record
 
 
 /**
- * Get the current state of the given VM.  !!!
+ * Get a record containing the current state of the given VM.
  */
 extern bool
 xen_vm_get_record(xen_session *session, xen_vm_record **result, xen_vm vm);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the VM instance with the specified UUID.
  */
 extern bool
 xen_vm_get_by_uuid(xen_session *session, xen_vm *result, char *uuid);
@@ -277,6 +278,13 @@ xen_vm_get_is_a_template(xen_session *se
 
 
 /**
+ * Get the auto_power_on field of the given VM.
+ */
+extern bool
+xen_vm_get_auto_power_on(xen_session *session, bool *result, xen_vm vm);
+
+
+/**
  * Get the resident_on field of the given VM.
  */
 extern bool
@@ -564,6 +572,13 @@ xen_vm_set_is_a_template(xen_session *se
 
 
 /**
+ * Set the auto_power_on field of the given VM.
+ */
+extern bool
+xen_vm_set_auto_power_on(xen_session *session, xen_vm vm, bool auto_power_on);
+
+
+/**
  * Set the memory/dynamic_max field of the given VM.
  */
 extern bool
@@ -592,6 +607,13 @@ xen_vm_set_vcpus_params(xen_session *ses
 
 
 /**
+ * Set the VCPUs/number field of the given VM.
+ */
+extern bool
+xen_vm_set_vcpus_number(xen_session *session, xen_vm vm, int64_t number);
+
+
+/**
  * Set the VCPUs/features/force_on field of the given VM.
  */
 extern bool
@@ -599,10 +621,42 @@ xen_vm_set_vcpus_features_force_on(xen_s
 
 
 /**
+ * Add the given value to the VCPUs/features/force_on field of the
+ * given VM.  If the value is already in that Set, then do nothing.
+ */
+extern bool
+xen_vm_add_vcpus_features_force_on(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value);
+
+
+/**
+ * Remove the given value from the VCPUs/features/force_on field of the
+ * given VM.  If the value is not in that Set, then do nothing.
+ */
+extern bool
+xen_vm_remove_vcpus_features_force_on(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value);
+
+
+/**
  * Set the VCPUs/features/force_off field of the given VM.
  */
 extern bool
 xen_vm_set_vcpus_features_force_off(xen_session *session, xen_vm vm, struct 
xen_cpu_feature_set *force_off);
+
+
+/**
+ * Add the given value to the VCPUs/features/force_off field of the
+ * given VM.  If the value is already in that Set, then do nothing.
+ */
+extern bool
+xen_vm_add_vcpus_features_force_off(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value);
+
+
+/**
+ * Remove the given value from the VCPUs/features/force_off field of
+ * the given VM.  If the value is not in that Set, then do nothing.
+ */
+extern bool
+xen_vm_remove_vcpus_features_force_off(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value);
 
 
 /**
@@ -817,12 +871,4 @@ xen_vm_get_all(xen_session *session, str
 xen_vm_get_all(xen_session *session, struct xen_vm_set **result);
 
 
-/**
- * Destroy the specified VM.  The VM is completely removed from the system.
- * This function can only be called when the VM is in the Halted State.
- */
-extern bool
-xen_vm_destroy(xen_session *session, xen_vm vm);
-
-
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vtpm.h
--- a/tools/libxen/include/xen_vtpm.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/include/xen_vtpm.h   Thu Dec 14 08:57:36 2006 -0700
@@ -151,14 +151,14 @@ xen_vtpm_record_opt_set_free(xen_vtpm_re
 
 
 /**
- * Get the current state of the given VTPM.  !!!
+ * Get a record containing the current state of the given VTPM.
  */
 extern bool
 xen_vtpm_get_record(xen_session *session, xen_vtpm_record **result, xen_vtpm 
vtpm);
 
 
 /**
- * Get a reference to the object with the specified UUID.  !!!
+ * Get a reference to the VTPM instance with the specified UUID.
  */
 extern bool
 xen_vtpm_get_by_uuid(xen_session *session, xen_vtpm *result, char *uuid);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/src/xen_vm.c
--- a/tools/libxen/src/xen_vm.c Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/libxen/src/xen_vm.c Thu Dec 14 08:57:36 2006 -0700
@@ -67,6 +67,9 @@ static const struct_member xen_vm_record
         { .key = "is_a_template",
           .type = &abstract_type_bool,
           .offset = offsetof(xen_vm_record, is_a_template) },
+        { .key = "auto_power_on",
+          .type = &abstract_type_bool,
+          .offset = offsetof(xen_vm_record, auto_power_on) },
         { .key = "resident_on",
           .type = &abstract_type_ref,
           .offset = offsetof(xen_vm_record, resident_on) },
@@ -399,6 +402,22 @@ xen_vm_get_is_a_template(xen_session *se
 
 
 bool
+xen_vm_get_auto_power_on(xen_session *session, bool *result, xen_vm vm)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm }
+        };
+
+    abstract_type result_type = abstract_type_bool;
+
+    XEN_CALL_("VM.get_auto_power_on");
+    return session->ok;
+}
+
+
+bool
 xen_vm_get_resident_on(xen_session *session, xen_host *result, xen_vm vm)
 {
     abstract_value param_values[] =
@@ -1082,6 +1101,22 @@ xen_vm_set_is_a_template(xen_session *se
 
 
 bool
+xen_vm_set_auto_power_on(xen_session *session, xen_vm vm, bool auto_power_on)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &abstract_type_bool,
+              .u.bool_val = auto_power_on }
+        };
+
+    xen_call_(session, "VM.set_auto_power_on", param_values, 2, NULL, NULL);
+    return session->ok;
+}
+
+
+bool
 xen_vm_set_memory_dynamic_max(xen_session *session, xen_vm vm, int64_t 
dynamic_max)
 {
     abstract_value param_values[] =
@@ -1146,6 +1181,22 @@ xen_vm_set_vcpus_params(xen_session *ses
 
 
 bool
+xen_vm_set_vcpus_number(xen_session *session, xen_vm vm, int64_t number)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &abstract_type_int,
+              .u.int_val = number }
+        };
+
+    xen_call_(session, "VM.set_VCPUs_number", param_values, 2, NULL, NULL);
+    return session->ok;
+}
+
+
+bool
 xen_vm_set_vcpus_features_force_on(xen_session *session, xen_vm vm, struct 
xen_cpu_feature_set *force_on)
 {
     abstract_value param_values[] =
@@ -1162,6 +1213,38 @@ xen_vm_set_vcpus_features_force_on(xen_s
 
 
 bool
+xen_vm_add_vcpus_features_force_on(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &xen_cpu_feature_abstract_type_,
+              .u.string_val = xen_cpu_feature_to_string(value) }
+        };
+
+    xen_call_(session, "VM.add_VCPUs_features_force_on", param_values, 2, 
NULL, NULL);
+    return session->ok;
+}
+
+
+bool
+xen_vm_remove_vcpus_features_force_on(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &xen_cpu_feature_abstract_type_,
+              .u.string_val = xen_cpu_feature_to_string(value) }
+        };
+
+    xen_call_(session, "VM.remove_VCPUs_features_force_on", param_values, 2, 
NULL, NULL);
+    return session->ok;
+}
+
+
+bool
 xen_vm_set_vcpus_features_force_off(xen_session *session, xen_vm vm, struct 
xen_cpu_feature_set *force_off)
 {
     abstract_value param_values[] =
@@ -1178,6 +1261,38 @@ xen_vm_set_vcpus_features_force_off(xen_
 
 
 bool
+xen_vm_add_vcpus_features_force_off(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &xen_cpu_feature_abstract_type_,
+              .u.string_val = xen_cpu_feature_to_string(value) }
+        };
+
+    xen_call_(session, "VM.add_VCPUs_features_force_off", param_values, 2, 
NULL, NULL);
+    return session->ok;
+}
+
+
+bool
+xen_vm_remove_vcpus_features_force_off(xen_session *session, xen_vm vm, enum 
xen_cpu_feature value)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm },
+            { .type = &xen_cpu_feature_abstract_type_,
+              .u.string_val = xen_cpu_feature_to_string(value) }
+        };
+
+    xen_call_(session, "VM.remove_VCPUs_features_force_off", param_values, 2, 
NULL, NULL);
+    return session->ok;
+}
+
+
+bool
 xen_vm_set_actions_after_shutdown(xen_session *session, xen_vm vm, enum 
xen_on_normal_exit after_shutdown)
 {
     abstract_value param_values[] =
@@ -1268,7 +1383,7 @@ xen_vm_set_platform_std_vga(xen_session 
               .u.bool_val = std_vga }
         };
 
-    xen_call_(session, "VM.set_platform_std_vga", param_values, 2, NULL, NULL);
+    xen_call_(session, "VM.set_platform_std_VGA", param_values, 2, NULL, NULL);
     return session->ok;
 }
 
@@ -1444,7 +1559,7 @@ xen_vm_set_otherconfig(xen_session *sess
               .u.set_val = (arbitrary_set *)otherconfig }
         };
 
-    xen_call_(session, "VM.set_otherconfig", param_values, 2, NULL, NULL);
+    xen_call_(session, "VM.set_otherConfig", param_values, 2, NULL, NULL);
     return session->ok;
 }
 
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/lowlevel/xc/xc.c Thu Dec 14 08:57:36 2006 -0700
@@ -919,6 +919,68 @@ static PyObject *dom_op(XcObject *self, 
     return zero;
 }
 
+#ifdef __powerpc__
+static PyObject *pyxc_alloc_real_mode_area(XcObject *self,
+                                           PyObject *args,
+                                           PyObject *kwds)
+{
+    uint32_t dom;
+    unsigned int log;
+
+    static char *kwd_list[] = { "dom", "log", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "ii", kwd_list, 
+                                      &dom, &log) )
+        return NULL;
+
+    if ( xc_alloc_real_mode_area(self->xc_handle, dom, log) )
+        return PyErr_SetFromErrno(xc_error);
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_prose_build(XcObject *self,
+                                  PyObject *args,
+                                  PyObject *kwds)
+{
+    uint32_t dom;
+    char *image, *ramdisk = NULL, *cmdline = "", *features = NULL;
+    int flags = 0;
+    int store_evtchn, console_evtchn;
+    unsigned long store_mfn = 0;
+    unsigned long console_mfn = 0;
+    void *arch_args = NULL;
+    int unused;
+
+    static char *kwd_list[] = { "dom", "store_evtchn",
+                                "console_evtchn", "image",
+                                /* optional */
+                                "ramdisk", "cmdline", "flags",
+                                "features", "arch_args", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssiss#", kwd_list,
+                                      &dom, &store_evtchn,
+                                      &console_evtchn, &image,
+                                      /* optional */
+                                      &ramdisk, &cmdline, &flags,
+                                      &features, &arch_args, &unused) )
+        return NULL;
+
+    if ( xc_prose_build(self->xc_handle, dom, image,
+                        ramdisk, cmdline, features, flags,
+                        store_evtchn, &store_mfn,
+                        console_evtchn, &console_mfn,
+                        arch_args) != 0 ) {
+        if (!errno)
+             errno = EINVAL;
+        return PyErr_SetFromErrno(xc_error);
+    }
+    return Py_BuildValue("{s:i,s:i}", 
+                         "store_mfn", store_mfn,
+                         "console_mfn", console_mfn);
+}
+#endif /* powerpc */
 
 static PyMethodDef pyxc_methods[] = {
     { "handle",
@@ -1224,6 +1286,27 @@ static PyMethodDef pyxc_methods[] = {
       "Set a domain's time offset to Dom0's localtime\n"
       " dom        [int]: Domain whose time offset is being set.\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
+
+#ifdef __powerpc__
+    { "arch_alloc_real_mode_area", 
+      (PyCFunction)pyxc_alloc_real_mode_area, 
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Allocate a domain's real mode area.\n"
+      " dom [int]: Identifier of domain.\n"
+      " log [int]: Specifies the area's size.\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "arch_prose_build", 
+      (PyCFunction)pyxc_prose_build, 
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Build a new Linux guest OS.\n"
+      " dom     [int]:      Identifier of domain to build into.\n"
+      " image   [str]:      Name of kernel image file. May be gzipped.\n"
+      " ramdisk [str, n/a]: Name of ramdisk file, if any.\n"
+      " cmdline [str, n/a]: Kernel parameters, if any.\n\n"
+      " vcpus   [int, 1]:   Number of Virtual CPUS in domain.\n\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+#endif /* __powerpc */
 
     { NULL, NULL, 0, NULL }
 };
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/FlatDeviceTree.py
--- a/tools/python/xen/xend/FlatDeviceTree.py   Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xend/FlatDeviceTree.py   Thu Dec 14 08:57:36 2006 -0700
@@ -22,6 +22,10 @@ import struct
 import struct
 import stat
 import re
+import glob
+import math
+
+_host_devtree_root = '/proc/device-tree'
 
 _OF_DT_HEADER = int("d00dfeed", 16) # avoid signed/unsigned FutureWarning
 _OF_DT_BEGIN_NODE = 0x1
@@ -33,8 +37,10 @@ def _bincat(seq, separator=''):
     '''Concatenate the contents of seq into a bytestream.'''
     strs = []
     for item in seq:
-        if type(item) == type(0):
+        if isinstance(item, int):
             strs.append(struct.pack(">I", item))
+        elif isinstance(item, long):
+            strs.append(struct.pack(">Q", item))
         else:
             try:
                 strs.append(item.to_bin())
@@ -231,37 +237,50 @@ class Tree(_Node):
         header.totalsize = len(payload) + _alignup(len(header.to_bin()), 8)
         return _pad(header.to_bin(), 8) + payload
 
-_host_devtree_root = '/proc/device-tree'
-def _getprop(propname):
-    '''Extract a property from the system's device tree.'''
-    f = file(os.path.join(_host_devtree_root, propname), 'r')
+def _readfile(fullpath):
+    '''Return full contents of a file.'''
+    f = file(fullpath, 'r')
     data = f.read()
     f.close()
     return data
 
+def _find_first_cpu(dirpath):
+    '''Find the first node of type 'cpu' in a directory tree.'''
+    cpulist = glob.glob(os.path.join(dirpath, 'cpus', '*'))
+    for node in cpulist:
+        try:
+            data = _readfile(os.path.join(node, 'device_type'))
+        except IOError:
+            continue
+        if 'cpu' in data:
+            return node
+    raise IOError("couldn't find any CPU nodes under " + dirpath)
+
 def _copynode(node, dirpath, propfilter):
-    '''Extract all properties from a node in the system's device tree.'''
+    '''Copy all properties and children nodes from a directory tree.'''
     dirents = os.listdir(dirpath)
     for dirent in dirents:
         fullpath = os.path.join(dirpath, dirent)
         st = os.lstat(fullpath)
         if stat.S_ISDIR(st.st_mode):
             child = node.addnode(dirent)
-            _copytree(child, fullpath, propfilter)
+            _copynode(child, fullpath, propfilter)
         elif stat.S_ISREG(st.st_mode) and propfilter(fullpath):
-            node.addprop(dirent, _getprop(fullpath))
-
-def _copytree(node, dirpath, propfilter):
-    path = os.path.join(_host_devtree_root, dirpath)
-    _copynode(node, path, propfilter)
+            node.addprop(dirent, _readfile(fullpath))
 
 def build(imghandler):
     '''Construct a device tree by combining the domain's configuration and
     the host's device tree.'''
     root = Tree()
 
-    # 4 pages: start_info, console, store, shared_info
+    # 1st reseravtion entry used for start_info, console, store, shared_info
     root.reserve(0x3ffc000, 0x4000)
+
+    # 2nd reservation enrty used for initrd, later on when we load the
+    # initrd we may fill this in with zeroes which signifies the end
+    # of the reservation map.  So as to avoid adding a zero map now we
+    # put some bogus yet sensible numbers here.
+    root.reserve(0x1000000, 0x1000)
 
     root.addprop('device_type', 'chrp-but-not-really\0')
     root.addprop('#size-cells', 2)
@@ -270,35 +289,52 @@ def build(imghandler):
     root.addprop('compatible', 'Momentum,Maple\0')
 
     xen = root.addnode('xen')
-    xen.addprop('start-info', 0, 0x3ffc000, 0, 0x1000)
+    xen.addprop('start-info', long(0x3ffc000), long(0x1000))
     xen.addprop('version', 'Xen-3.0-unstable\0')
-    xen.addprop('reg', 0, imghandler.vm.domid, 0, 0)
+    xen.addprop('reg', long(imghandler.vm.domid), long(0))
     xen.addprop('domain-name', imghandler.vm.getName() + '\0')
     xencons = xen.addnode('console')
     xencons.addprop('interrupts', 1, 0)
 
-    # XXX split out RMA node
-    mem = root.addnode('memory@0')
+    # add memory nodes
     totalmem = imghandler.vm.getMemoryTarget() * 1024
-    mem.addprop('reg', 0, 0, 0, totalmem)
-    mem.addprop('device_type', 'memory\0')
-
+    rma_log = 26 ### imghandler.vm.info.get('powerpc_rma_log')
+    rma_bytes = 1 << rma_log
+
+    # RMA node
+    rma = root.addnode('memory@0')
+    rma.addprop('reg', long(0), long(rma_bytes))
+    rma.addprop('device_type', 'memory\0')
+
+    # all the rest in a single node
+    remaining = totalmem - rma_bytes
+    if remaining > 0:
+        mem = root.addnode('memory@1')
+        mem.addprop('reg', long(rma_bytes), long(remaining))
+        mem.addprop('device_type', 'memory\0')
+
+    # add CPU nodes
     cpus = root.addnode('cpus')
     cpus.addprop('smp-enabled')
     cpus.addprop('#size-cells', 0)
     cpus.addprop('#address-cells', 1)
 
     # Copy all properties the system firmware gave us, except for 'linux,'
-    # properties, from 'cpus/@0', once for every vcpu. Hopefully all cpus are
-    # identical...
+    # properties, from the first CPU node in the device tree. Do this once for
+    # every vcpu. Hopefully all cpus are identical...
     cpu0 = None
+    cpu0path = _find_first_cpu(_host_devtree_root)
     def _nolinuxprops(fullpath):
         return not os.path.basename(fullpath).startswith('linux,')
     for i in range(imghandler.vm.getVCpuCount()):
-        cpu = cpus.addnode('PowerPC,970@0')
-        _copytree(cpu, 'cpus/PowerPC,970@0', _nolinuxprops)
-        # and then overwrite what we need to
-        pft_size = imghandler.vm.info.get('pft-size', 0x14)
+        # create new node and copy all properties
+        cpu = cpus.addnode('PowerPC,970@%d' % i)
+        _copynode(cpu, cpu0path, _nolinuxprops)
+
+        # overwrite what we need to
+        shadow_mb = imghandler.vm.info.get('shadow_memory', 1)
+        shadow_mb_log = int(math.log(shadow_mb, 2))
+        pft_size = shadow_mb_log + 20
         cpu.setprop('ibm,pft-size', 0, pft_size)
 
         # set default CPU
@@ -307,13 +343,13 @@ def build(imghandler):
 
     chosen = root.addnode('chosen')
     chosen.addprop('cpu', cpu0.get_phandle())
-    chosen.addprop('memory', mem.get_phandle())
+    chosen.addprop('memory', rma.get_phandle())
     chosen.addprop('linux,stdout-path', '/xen/console\0')
     chosen.addprop('interrupt-controller', xen.get_phandle())
     chosen.addprop('bootargs', imghandler.cmdline + '\0')
     # xc_linux_load.c will overwrite these 64-bit properties later
-    chosen.addprop('linux,initrd-start', 0, 0)
-    chosen.addprop('linux,initrd-end', 0, 0)
+    chosen.addprop('linux,initrd-start', long(0))
+    chosen.addprop('linux,initrd-end', long(0))
 
     if 1:
         f = file('/tmp/domU.dtb', 'w')
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xend/XendDomain.py       Thu Dec 14 08:57:36 2006 -0700
@@ -591,7 +591,9 @@ class XendDomain:
         try:
             self.domains_lock.acquire()
             result = [d.get_uuid() for d in self.domains.values()]
-            result += self.managed_domains.keys()
+            for d in self.managed_domains.keys():
+                if d not in result:
+                    result.append(d)
             return result
         finally:
             self.domains_lock.release()
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xend/XendDomainInfo.py   Thu Dec 14 08:57:36 2006 -0700
@@ -167,7 +167,7 @@ def recreate(info, priv):
 
     @param xeninfo: Parsed configuration
     @type  xeninfo: Dictionary
-    @param priv: TODO, unknown, something to do with memory
+    @param priv: Is a privileged domain (Dom 0)
     @type  priv: bool
 
     @rtype:  XendDomainInfo
@@ -381,7 +381,7 @@ class XendDomainInfo:
         @type    dompath: string
         @keyword augment: Augment given info with xenstored VM info
         @type    augment: bool
-        @keyword priv: Is a privledged domain (Dom 0) (TODO: really?)
+        @keyword priv: Is a privileged domain (Dom 0)
         @type    priv: bool
         @keyword resume: Is this domain being resumed?
         @type    resume: bool
@@ -563,7 +563,7 @@ class XendDomainInfo:
         for devclass in XendDevices.valid_devices():
             self.getDeviceController(devclass).waitForDevices()
 
-    def destroyDevice(self, deviceClass, devid):
+    def destroyDevice(self, deviceClass, devid, force=None):
         try:
             devid = int(devid)
         except ValueError:
@@ -578,7 +578,7 @@ class XendDomainInfo:
                     devid = entry
                     break
                 
-        return self.getDeviceController(deviceClass).destroyDevice(devid)
+        return self.getDeviceController(deviceClass).destroyDevice(devid, 
force)
 
 
 
@@ -647,6 +647,8 @@ class XendDomainInfo:
         if priv:
             augment_entries.remove('memory')
             augment_entries.remove('maxmem')
+            augment_entries.remove('vcpus')
+            augment_entries.remove('vcpu_avail')
 
         vm_config = self._readVMDetails([(k, XendConfig.LEGACY_CFG_TYPES[k])
                                          for k in augment_entries])
@@ -663,6 +665,14 @@ class XendDomainInfo:
                     self.info[xapiarg] = val
                 else:
                     self.info[arg] = val
+
+        # For dom0, we ignore any stored value for the vcpus fields, and
+        # read the current value from Xen instead.  This allows boot-time
+        # settings to take precedence over any entries in the store.
+        if priv:
+            xeninfo = dom_get(self.domid)
+            self.info['vcpus_number'] = xeninfo['online_vcpus']
+            self.info['vcpu_avail'] = (1 << xeninfo['online_vcpus']) - 1
 
         # read image value
         image_sxp = self._readVm('image')
@@ -895,6 +905,10 @@ class XendDomainInfo:
     def getMemoryTarget(self):
         """Get this domain's target memory size, in KB."""
         return self.info['memory_static_min'] * 1024
+
+    def getMemoryMaximum(self):
+        """Get this domain's maximum memory size, in KB."""
+        return self.info['memory_static_max'] * 1024
 
     def getResume(self):
         return str(self._resume)
@@ -1363,9 +1377,9 @@ class XendDomainInfo:
             # Use architecture- and image-specific calculations to determine
             # the various headrooms necessary, given the raw configured
             # values. maxmem, memory, and shadow are all in KiB.
+            memory = self.image.getRequiredAvailableMemory(
+                self.info['memory_static_min'] * 1024)
             maxmem = self.image.getRequiredAvailableMemory(
-                self.info['memory_static_min'] * 1024)
-            memory = self.image.getRequiredAvailableMemory(
                 self.info['memory_static_max'] * 1024)
             shadow = self.image.getRequiredShadowMemory(
                 self.info['shadow_memory'] * 1024,
@@ -1727,7 +1741,7 @@ class XendDomainInfo:
             raise VmError("VM name '%s' already exists%s" %
                           (name,
                            dom.domid is not None and
-                           ("as domain %s" % str(dom.domid)) or ""))
+                           (" as domain %s" % str(dom.domid)) or ""))
         
 
     def update(self, info = None, refresh = True):
@@ -2031,7 +2045,7 @@ class XendDomainInfo:
         if not dev_uuid:
             raise XendError('Failed to create device')
         
-        if self.state in (DOM_STATE_HALTED,):
+        if self.state in (XEN_API_VM_POWER_STATE_RUNNING,):
             sxpr = self.info.device_sxpr(dev_uuid)
             devid = self.getDeviceController('vif').createDevice(sxpr)
             raise XendError("Device creation failed")
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xend/image.py    Thu Dec 14 08:57:36 2006 -0700
@@ -145,6 +145,14 @@ class ImageHandler:
         add headroom where necessary."""
         return self.getRequiredAvailableMemory(self.vm.getMemoryTarget())
 
+    def getRequiredMaximumReservation(self):
+        """@param mem_kb The maximum possible memory, in KiB.
+        @return The corresponding required amount of memory to be free, also
+        in KiB. This is normally the same as getRequiredAvailableMemory, but
+        architecture- or image-specific code may override this to
+        add headroom where necessary."""
+        return self.getRequiredAvailableMemory(self.vm.getMemoryMaximum())
+
     def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
         """@param shadow_mem_kb The configured shadow memory, in KiB.
         @param maxmem_kb The configured maxmem, in KiB.
@@ -234,6 +242,60 @@ class PPC_LinuxImageHandler(LinuxImageHa
                               ramdisk        = self.ramdisk,
                               features       = self.vm.getFeatures(),
                               arch_args      = devtree.to_bin())
+
+    def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
+        """@param shadow_mem_kb The configured shadow memory, in KiB.
+        @param maxmem_kb The configured maxmem, in KiB.
+        @return The corresponding required amount of shadow memory, also in
+        KiB.
+        PowerPC currently uses "shadow memory" to refer to the hash table."""
+        return max(maxmem_kb / 64, shadow_mem_kb)
+
+
+class PPC_ProseImageHandler(LinuxImageHandler):
+
+    ostype = "prose"
+
+    def configure(self, imageConfig, deviceConfig):
+        LinuxImageHandler.configure(self, imageConfig, deviceConfig)
+        self.imageConfig = imageConfig
+
+    def buildDomain(self):
+        store_evtchn = self.vm.getStorePort()
+        console_evtchn = self.vm.getConsolePort()
+
+        mem_mb = self.getRequiredInitialReservation() / 1024
+
+        log.debug("dom            = %d", self.vm.getDomid())
+        log.debug("memsize        = %d", mem_mb)
+        log.debug("image          = %s", self.kernel)
+        log.debug("store_evtchn   = %d", store_evtchn)
+        log.debug("console_evtchn = %d", console_evtchn)
+        log.debug("cmdline        = %s", self.cmdline)
+        log.debug("ramdisk        = %s", self.ramdisk)
+        log.debug("vcpus          = %d", self.vm.getVCpuCount())
+        log.debug("features       = %s", self.vm.getFeatures())
+
+        devtree = FlatDeviceTree.build(self)
+
+        return xc.arch_prose_build(dom            = self.vm.getDomid(),
+                                   memsize        = mem_mb,
+                                   image          = self.kernel,
+                                   store_evtchn   = store_evtchn,
+                                   console_evtchn = console_evtchn,
+                                   cmdline        = self.cmdline,
+                                   ramdisk        = self.ramdisk,
+                                   features       = self.vm.getFeatures(),
+                                   arch_args      = devtree.to_bin())
+
+    def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
+        """@param shadow_mem_kb The configured shadow memory, in KiB.
+        @param maxmem_kb The configured maxmem, in KiB.
+        @return The corresponding required amount of shadow memory, also in
+        KiB.
+        PowerPC currently uses "shadow memory" to refer to the hash table."""
+        return max(maxmem_kb / 64, shadow_mem_kb)
+
 
 class HVMImageHandler(ImageHandler):
 
@@ -539,6 +601,9 @@ class X86_HVM_ImageHandler(HVMImageHandl
     def getRequiredInitialReservation(self):
         return self.vm.getMemoryTarget()
 
+    def getRequiredMaximumReservation(self):
+        return self.vm.getMemoryMaximum()
+
     def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
         # 256 pages (1MB) per vcpu,
         # plus 1 page per MiB of RAM for the P2M map,
@@ -553,13 +618,14 @@ class X86_Linux_ImageHandler(LinuxImageH
     def buildDomain(self):
         # set physical mapping limit
         # add an 8MB slack to balance backend allocations.
-        mem_kb = self.getRequiredInitialReservation() + (8 * 1024)
+        mem_kb = self.getRequiredMaximumReservation() + (8 * 1024)
         xc.domain_set_memmap_limit(self.vm.getDomid(), mem_kb)
         return LinuxImageHandler.buildDomain(self)
 
 _handlers = {
     "powerpc": {
         "linux": PPC_LinuxImageHandler,
+        "prose": PPC_ProseImageHandler,
     },
     "ia64": {
         "linux": LinuxImageHandler,
diff -r ed56ef3e9716 -r 4762d73ced42 
tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py     Thu Dec 14 08:54:54 
2006 -0700
+++ b/tools/python/xen/xend/server/DevController.py     Thu Dec 14 08:57:36 
2006 -0700
@@ -19,12 +19,14 @@ from threading import Event
 from threading import Event
 import types
 
-from xen.xend import sxp
+from xen.xend import sxp, XendRoot
 from xen.xend.XendError import VmError
 from xen.xend.XendLogging import log
 
 from xen.xend.xenstore.xstransact import xstransact, complete
 from xen.xend.xenstore.xswatch import xswatch
+
+import os
 
 DEVICE_CREATE_TIMEOUT = 100
 HOTPLUG_STATUS_NODE = "hotplug-status"
@@ -47,6 +49,8 @@ xenbusState = {
     'Closing'      : 5,
     'Closed'       : 6,
     }
+
+xroot = XendRoot.instance()
 
 xenbusState.update(dict(zip(xenbusState.values(), xenbusState.keys())))
 
@@ -191,7 +195,7 @@ class DevController:
         raise VmError('%s devices may not be reconfigured' % self.deviceClass)
 
 
-    def destroyDevice(self, devid):
+    def destroyDevice(self, devid, force):
         """Destroy the specified device.
 
         @param devid The device ID, or something device-specific from which
@@ -211,6 +215,13 @@ class DevController:
         # drivers, so this ordering avoids a race).
         self.writeBackend(devid, 'online', "0")
         self.writeBackend(devid, 'state', str(xenbusState['Closing']))
+
+        if force:
+            frontpath = self.frontendPath(devid)
+            backpath = xstransact.Read(frontpath, "backend")
+            if backpath:
+                xstransact.Remove(backpath)
+            xstransact.Remove(frontpath)
 
 
     def configurations(self):
@@ -313,6 +324,16 @@ class DevController:
                       Make sure that the migration has finished and only
                       then return from the call.
         """
+        tool = xroot.get_external_migration_tool()
+        if tool:
+            log.info("Calling external migration tool for step %d" % step)
+            fd = os.popen("%s -type %s -step %d -host %s -domname %s" %
+                          (tool, self.deviceClass, step, dst, domName))
+            for line in fd:
+                log.info(line.rstrip())
+            rc = fd.close()
+            if rc:
+                raise VmError('Migration tool returned %d' % (rc >> 8))
         return 0
 
 
@@ -320,6 +341,16 @@ class DevController:
         """ Recover from device migration. The given step was the
             last one that was successfully executed.
         """
+        tool = xroot.get_external_migration_tool()
+        if tool:
+            log.info("Calling external migration tool")
+            fd = os.popen("%s -type %s -step %d -host %s -domname %s -recover" 
%
+                          (tool, self.deviceClass, step, dst, domName))
+            for line in fd:
+                log.info(line.rstrip())
+            rc = fd.close()
+            if rc:
+                raise VmError('Migration tool returned %d' % (rc >> 8))
         return 0
 
 
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xend/server/blkif.py     Thu Dec 14 08:57:36 2006 -0700
@@ -133,7 +133,7 @@ class BlkifController(DevController):
 
         return config
 
-    def destroyDevice(self, devid):
+    def destroyDevice(self, devid, force):
         """@see DevController.destroyDevice"""
 
         # If we are given a device name, then look up the device ID from it,
@@ -142,13 +142,13 @@ class BlkifController(DevController):
         # superclass's method.
 
         try:
-            DevController.destroyDevice(self, int(devid))
+            DevController.destroyDevice(self, int(devid), force)
         except ValueError:
             devid_end = type(devid) is str and devid.split('/')[-1] or None
 
             for i in self.deviceIDs():
                 d = self.readBackend(i, 'dev')
                 if d == devid or (devid_end and d == devid_end):
-                    DevController.destroyDevice(self, i)
+                    DevController.destroyDevice(self, i, force)
                     return
             raise VmError("Device %s not connected" % devid)
diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/python/xen/xm/main.py       Thu Dec 14 08:57:36 2006 -0700
@@ -142,14 +142,14 @@ SUBCOMMAND_HELP = {
                         'Create a new virtual block device.'),
     'block-configure': ('<Domain> <BackDev> <FrontDev> <Mode> [BackDomain]',
                         'Change block device configuration'),
-    'block-detach'  :  ('<Domain> <DevId>',
+    'block-detach'  :  ('<Domain> <DevId> [-f|--force]',
                         'Destroy a domain\'s virtual block device.'),
     'block-list'    :  ('<Domain> [--long]',
                         'List virtual block devices for a domain.'),
     'network-attach':  ('<Domain> [--script=<script>] [--ip=<ip>] '
                         '[--mac=<mac>]',
                         'Create a new virtual network device.'),
-    'network-detach':  ('<Domain> <DevId>',
+    'network-detach':  ('<Domain> <DevId> [-f|--force]',
                         'Destroy a domain\'s virtual network device.'),
     'network-list'  :  ('<Domain> [--long]',
                         'List virtual network interfaces for a domain.'),
@@ -1493,16 +1493,24 @@ def xm_network_attach(args):
 
 
 def detach(args, command, deviceClass):
-    arg_check(args, command, 2)
+    arg_check(args, command, 2, 3)
 
     dom = args[0]
     dev = args[1]
-
-    server.xend.domain.destroyDevice(dom, deviceClass, dev)
+    try:
+        force = args[2]
+        if (force != "--force") and (force != "-f"):
+            print "Ignoring option %s"%(force)
+            force = None
+    except IndexError:
+        force = None
+
+    server.xend.domain.destroyDevice(dom, deviceClass, dev, force)
 
 
 def xm_block_detach(args):
     detach(args, 'block-detach', 'vbd')
+    detach(args, 'block-detach', 'tap')
 
 
 def xm_network_detach(args):
diff -r ed56ef3e9716 -r 4762d73ced42 tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Thu Dec 14 08:54:54 2006 -0700
+++ b/tools/xenstore/xenstored_domain.c Thu Dec 14 08:57:36 2006 -0700
@@ -459,6 +459,8 @@ static int dom0_init(void)
                return -1;
 
        dom0 = new_domain(NULL, 0, port); 
+       if (dom0 == NULL)
+               return -1;
 
        dom0->interface = xenbus_map();
        if (dom0->interface == NULL)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/Makefile
--- a/xen/arch/powerpc/Makefile Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/Makefile Thu Dec 14 08:57:36 2006 -0700
@@ -9,10 +9,10 @@ obj-y += backtrace.o
 obj-y += backtrace.o
 obj-y += bitops.o
 obj-y += boot_of.o
+obj-y += cmdline.o
 obj-y += dart.o
 obj-y += dart_u3.o
 obj-y += dart_u4.o
-obj-y += delay.o
 obj-y += domctl.o
 obj-y += domain_build.o
 obj-y += domain.o
@@ -22,11 +22,12 @@ obj-y += hcalls.o
 obj-y += hcalls.o
 obj-y += iommu.o
 obj-y += irq.o
-obj-y += mambo.o
+obj-y += systemsim.o
 obj-y += memory.o
 obj-y += mm.o
 obj-y += mpic.o
 obj-y += mpic_init.o
+obj-y += numa.o
 obj-y += of-devtree.o
 obj-y += of-devwalk.o
 obj-y += ofd_fixup.o
@@ -36,6 +37,7 @@ obj-y += setup.o
 obj-y += setup.o
 obj-y += shadow.o
 obj-y += smp.o
+obj-y += smpboot.o
 obj-y += smp-tbsync.o
 obj-y += sysctl.o
 obj-y += time.o
@@ -57,11 +59,6 @@ PPC_C_WARNINGS += -Wshadow
 PPC_C_WARNINGS += -Wshadow
 CFLAGS += $(PPC_C_WARNINGS)
 
-LINK=0x400000
-boot32_link_base = $(LINK)
-xen_link_offset  = 100
-xen_link_base    = $(patsubst %000,%$(xen_link_offset),$(LINK))
-
 #
 # The following flags are fed to gcc in order to link several
 # objects into a single ELF segment and to not link in any additional
@@ -72,34 +69,39 @@ firmware: of_handler/built_in.o $(TARGET
 firmware: of_handler/built_in.o $(TARGET_SUBARCH)/memcpy.o of-devtree.o
        $(CC) $(CFLAGS) $(OMAGIC) -e __ofh_start -Wl,-Ttext,0x0 $^ -o $@
 
-firmware_image: firmware
+firmware_image.bin: firmware
        $(CROSS_COMPILE)objcopy --output-target=binary $< $@
-
-firmware_image.o: firmware_image
-       $(CROSS_COMPILE)objcopy --input-target=binary \
-               --output-target=elf64-powerpc \
-               --binary-architecture=powerpc \
-               --redefine-sym _binary_$<_start=$(@:%.o=%)_start \
-               --redefine-sym _binary_$<_end=$(@:%.o=%)_end \
-               --redefine-sym _binary_$<_size=$(@:%.o=%)_size  $< $@
 
 #
 # Hacks for included C files
 #
 irq.o: ../x86/irq.c
 physdev.o: ../x86/physdev.c
+numa.o: ../x86/numa.c
 
 HDRS += $(wildcard *.h)
 
+ifneq ($(CMDLINE),)
 # The first token in the arguments will be silently dropped.
-IMAGENAME = xen
-CMDLINE = ""
-boot_of.o: CFLAGS += -DCMDLINE="\"$(IMAGENAME) $(CMDLINE)\""
+FULL_CMDLINE := xen $(CMDLINE)
+endif
 
-start.o: boot/start.S
-       $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
+ifeq ($(wildcard cmdline.dep),)
+cmdline.dep:
+       echo $(FULL_CMDLINE) > cmdline.dep
+else
+ifneq ($(FULL_CMDLINE),$(shell cat cmdline.dep))
+cmdline.dep::
+       echo $(FULL_CMDLINE) > cmdline.dep
+else
+cmdline.dep:
+endif
+endif
 
-TARGET_OPTS = $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds
+cmdline.o: cmdline.dep
+cmdline.o: CFLAGS += -DCMDLINE="\"$(FULL_CMDLINE)\""
+
+TARGET_OPTS = $(OMAGIC) -Wl,-T,xen.lds
 TARGET_OPTS += start.o $(ALL_OBJS)
 
 .xen-syms: start.o $(ALL_OBJS) xen.lds
@@ -122,22 +124,10 @@ xen-syms.o: xen-syms.S
 $(TARGET)-syms: start.o $(ALL_OBJS) xen-syms.o xen.lds
        $(CC) $(CFLAGS) $(TARGET_OPTS) xen-syms.o -o $@
 
-$(TARGET).bin: $(TARGET)-syms
-       $(CROSS_COMPILE)objcopy --output-target=binary $< $@
-
-$(TARGET).bin.o: $(TARGET).bin
-       $(CROSS_COMPILE)objcopy --input-target=binary \
-               --output-target=elf32-powerpc \
-               --binary-architecture=powerpc  $< $@
-
-boot32.o: boot/boot32.S
-       $(CC) -m32 -Wa,-a32,-mppc64bridge \
-               -D__ASSEMBLY__ -D__BRIDGE64__ $(CFLAGS) -c $< -o $@
-
-$(TARGET): boot32.o $(TARGET).bin.o
-       $(CC) -m32 -N -Wl,-melf32ppclinux -static -nostdlib \
-               -Wl,-Ttext,$(boot32_link_base)  -Wl,-Tdata,$(xen_link_base) \
-               $(CFLAGS) $^ -o $@
+# our firmware only loads 32-bit ELF files
+OCPYFLAGS := --input-target=elf64-powerpc --output-target=elf32-powerpc
+$(TARGET): $(TARGET)-syms
+       $(CROSS_COMPILE)objcopy $(OCPYFLAGS) $^ $@
 
 asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
        $(CC) $(CFLAGS) -S -o $@ $<
@@ -150,4 +140,5 @@ dom0.bin: $(DOM0_IMAGE)
 
 clean::
        $(MAKE) -f $(BASEDIR)/Rules.mk -C of_handler clean
-       rm -f firmware firmware_image dom0.bin .xen-syms
+       rm -f firmware firmware_image.bin dom0.bin .xen-syms xen-syms.S \
+               xen.lds asm-offsets.s cmdline.dep
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/backtrace.c
--- a/xen/arch/powerpc/backtrace.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/backtrace.c      Thu Dec 14 08:57:36 2006 -0700
@@ -14,6 +14,7 @@
 #include <xen/console.h>
 #include <xen/sched.h>
 #include <xen/symbols.h>
+#include <asm/debugger.h>
 
 static char namebuf[KSYM_NAME_LEN+1];
 
@@ -192,6 +193,19 @@ void show_backtrace(ulong sp, ulong lr, 
     console_end_sync();
 }
 
+void show_backtrace_regs(struct cpu_user_regs *regs)
+{
+    console_start_sync();
+    
+    show_registers(regs);
+    printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr());
+    printk("hid4 0x%016lx\n", regs->hid4);
+    printk("---[ backtrace ]---\n");
+    show_backtrace(regs->gprs[1], regs->lr, regs->pc);
+
+    console_end_sync();
+}
+
 void __warn(char *file, int line)
 {
     ulong sp;
@@ -202,9 +216,19 @@ void __warn(char *file, int line)
 
     sp = (ulong)__builtin_frame_address(0);
     lr = (ulong)__builtin_return_address(0);
-
     backtrace(sp, lr, lr);
-    console_end_sync();
-}
-
-    
+
+    console_end_sync();
+}
+
+void dump_execution_state(void)
+{
+    struct vcpu *v = current;
+    struct cpu_user_regs *regs = &v->arch.ctxt;
+
+    show_registers(regs);
+    if (regs->msr & MSR_HV) {
+        printk("In Xen:\n");
+        show_backtrace(regs->gprs[1], regs->pc, regs->lr);
+    }
+}
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/bitops.c
--- a/xen/arch/powerpc/bitops.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/bitops.c Thu Dec 14 08:57:36 2006 -0700
@@ -12,42 +12,42 @@
  * @size: The maximum size to search
  */
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
-                           unsigned long offset)
+                            unsigned long offset)
 {
-       const unsigned long *p = addr + BITOP_WORD(offset);
-       unsigned long result = offset & ~(BITS_PER_LONG-1);
-       unsigned long tmp;
+    const unsigned long *p = addr + BITOP_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
 
-       if (offset >= size)
-               return size;
-       size -= result;
-       offset %= BITS_PER_LONG;
-       if (offset) {
-               tmp = *(p++);
-               tmp &= (~0UL << offset);
-               if (size < BITS_PER_LONG)
-                       goto found_first;
-               if (tmp)
-                       goto found_middle;
-               size -= BITS_PER_LONG;
-               result += BITS_PER_LONG;
-       }
-       while (size & ~(BITS_PER_LONG-1)) {
-               if ((tmp = *(p++)))
-                       goto found_middle;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
-       }
-       if (!size)
-               return result;
-       tmp = *p;
+    if (offset >= size)
+        return size;
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp &= (~0UL << offset);
+        if (size < BITS_PER_LONG)
+            goto found_first;
+        if (tmp)
+            goto found_middle;
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size & ~(BITS_PER_LONG-1)) {
+        if ((tmp = *(p++)))
+            goto found_middle;
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size)
+        return result;
+    tmp = *p;
 
 found_first:
-       tmp &= (~0UL >> (BITS_PER_LONG - size));
-       if (tmp == 0UL)         /* Are any bits set? */
-               return result + size;   /* Nope. */
+    tmp &= (~0UL >> (BITS_PER_LONG - size));
+    if (tmp == 0UL)        /* Are any bits set? */
+        return result + size;    /* Nope. */
 found_middle:
-       return result + __ffs(tmp);
+    return result + __ffs(tmp);
 }
 
 /*
@@ -55,40 +55,40 @@ found_middle:
  * Linus' asm-alpha/bitops.h.
  */
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
-                                unsigned long offset)
+                                 unsigned long offset)
 {
-       const unsigned long *p = addr + BITOP_WORD(offset);
-       unsigned long result = offset & ~(BITS_PER_LONG-1);
-       unsigned long tmp;
+    const unsigned long *p = addr + BITOP_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
 
-       if (offset >= size)
-               return size;
-       size -= result;
-       offset %= BITS_PER_LONG;
-       if (offset) {
-               tmp = *(p++);
-               tmp |= ~0UL >> (BITS_PER_LONG - offset);
-               if (size < BITS_PER_LONG)
-                       goto found_first;
-               if (~tmp)
-                       goto found_middle;
-               size -= BITS_PER_LONG;
-               result += BITS_PER_LONG;
-       }
-       while (size & ~(BITS_PER_LONG-1)) {
-               if (~(tmp = *(p++)))
-                       goto found_middle;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
-       }
-       if (!size)
-               return result;
-       tmp = *p;
+    if (offset >= size)
+        return size;
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp |= ~0UL >> (BITS_PER_LONG - offset);
+        if (size < BITS_PER_LONG)
+            goto found_first;
+        if (~tmp)
+            goto found_middle;
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size & ~(BITS_PER_LONG-1)) {
+        if (~(tmp = *(p++)))
+            goto found_middle;
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size)
+        return result;
+    tmp = *p;
 
 found_first:
-       tmp |= ~0UL << size;
-       if (tmp == ~0UL)        /* Are any bits zero? */
-               return result + size;   /* Nope. */
+    tmp |= ~0UL << size;
+    if (tmp == ~0UL)    /* Are any bits zero? */
+        return result + size;    /* Nope. */
 found_middle:
-       return result + ffz(tmp);
+    return result + ffz(tmp);
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/boot_of.c
--- a/xen/arch/powerpc/boot_of.c        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/boot_of.c        Thu Dec 14 08:57:36 2006 -0700
@@ -16,6 +16,7 @@
  * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ *          Hollis Blanchard <hollisb@xxxxxxxxxx>
  */
 
 #include <xen/config.h>
@@ -32,6 +33,7 @@
 #include "exceptions.h"
 #include "of-devtree.h"
 #include "oftree.h"
+#include "rtas.h"
 
 /* Secondary processors use this for handshaking with main processor.  */
 volatile unsigned int __spin_ack;
@@ -39,20 +41,27 @@ static ulong of_vec;
 static ulong of_vec;
 static ulong of_msr;
 static int of_out;
-static char bootargs[256];
-
-#define COMMAND_LINE_SIZE 512
-static char builtin_cmdline[COMMAND_LINE_SIZE]
-    __attribute__((section("__builtin_cmdline"))) = CMDLINE;
-
+static ulong eomem;
+
+#define MEM_AVAILABLE_PAGES ((32 << 20) >> PAGE_SHIFT)
+static DECLARE_BITMAP(mem_available_pages, MEM_AVAILABLE_PAGES);
+
+extern char builtin_cmdline[];
 extern struct ns16550_defaults ns16550;
 
 #undef OF_DEBUG
+#undef OF_DEBUG_LOW
 
 #ifdef OF_DEBUG
 #define DBG(args...) of_printf(args)
 #else
 #define DBG(args...)
+#endif
+
+#ifdef OF_DEBUG_LOW
+#define DBG_LOW(args...) of_printf(args)
+#else
+#define DBG_LOW(args...)
 #endif
 
 #define of_panic(MSG...) \
@@ -68,7 +77,6 @@ static int bof_chosen;
 static int bof_chosen;
 
 static struct of_service s;
-extern s32 prom_call(void *arg, ulong rtas_base, ulong func, ulong msr);
 
 static int __init of_call(
     const char *service, u32 nargs, u32 nrets, s32 rets[], ...)
@@ -78,7 +86,6 @@ static int __init of_call(
     if (of_vec != 0) {
         va_list args;
         int i;
-
         memset(&s, 0, sizeof (s));
         s.ofs_service = (ulong)service;
         s.ofs_nargs = nargs;
@@ -189,7 +196,7 @@ static int __init of_finddevice(const ch
         DBG("finddevice %s -> FAILURE %d\n",devspec,rets[0]);
         return OF_FAILURE;
     }
-    DBG("finddevice %s -> %d\n",devspec, rets[0]);
+    DBG_LOW("finddevice %s -> %d\n",devspec, rets[0]);
     return rets[0];
 }
 
@@ -200,11 +207,11 @@ static int __init of_getprop(int ph, con
     of_call("getprop", 4, 1, rets, ph, name, buf, buflen);
 
     if (rets[0] == OF_FAILURE) {
-        DBG("getprop 0x%x %s -> FAILURE\n", ph, name);
+        DBG_LOW("getprop 0x%x %s -> FAILURE\n", ph, name);
         return OF_FAILURE;
     }
 
-    DBG("getprop 0x%x %s -> 0x%x (%s)\n", ph, name, rets[0], (char *)buf);
+    DBG_LOW("getprop 0x%x %s -> 0x%x (%s)\n", ph, name, rets[0], (char *)buf);
     return rets[0];
 }
 
@@ -220,7 +227,7 @@ static int __init of_setprop(
         return OF_FAILURE;
     }
 
-    DBG("setprop 0x%x %s -> %s\n", ph, name, (char *)buf);
+    DBG_LOW("setprop 0x%x %s -> %s\n", ph, name, (char *)buf);
     return rets[0];
 }
 
@@ -232,7 +239,7 @@ static int __init of_getchild(int ph)
     int rets[1] = { OF_FAILURE };
 
     of_call("child", 1, 1, rets, ph);
-    DBG("getchild 0x%x -> 0x%x\n", ph, rets[0]);
+    DBG_LOW("getchild 0x%x -> 0x%x\n", ph, rets[0]);
 
     return rets[0];
 }
@@ -245,7 +252,7 @@ static int __init of_getpeer(int ph)
     int rets[1] = { OF_FAILURE };
 
     of_call("peer", 1, 1, rets, ph);
-    DBG("getpeer 0x%x -> 0x%x\n", ph, rets[0]);
+    DBG_LOW("getpeer 0x%x -> 0x%x\n", ph, rets[0]);
 
     return rets[0];
 }
@@ -259,7 +266,7 @@ static int __init of_getproplen(int ph, 
         DBG("getproplen 0x%x %s -> FAILURE\n", ph, name);
         return OF_FAILURE;
     }
-    DBG("getproplen 0x%x %s -> 0x%x\n", ph, name, rets[0]);
+    DBG_LOW("getproplen 0x%x %s -> 0x%x\n", ph, name, rets[0]);
     return rets[0];
 }
 
@@ -272,7 +279,7 @@ static int __init of_package_to_path(int
         DBG("%s 0x%x -> FAILURE\n", __func__, ph);
         return OF_FAILURE;
     }
-    DBG("%s 0x%x %s -> 0x%x\n", __func__, ph, buffer, rets[0]);
+    DBG_LOW("%s 0x%x %s -> 0x%x\n", __func__, ph, buffer, rets[0]);
     if (rets[0] <= buflen)
         buffer[rets[0]] = '\0';
     return rets[0];
@@ -289,7 +296,7 @@ static int __init of_nextprop(int ph, co
         return OF_FAILURE;
     }
 
-    DBG("nextprop 0x%x %s -> %s\n", ph, name, (char *)buf);
+    DBG_LOW("nextprop 0x%x %s -> %s\n", ph, name, (char *)buf);
     return rets[0];
 }
 
@@ -336,7 +343,7 @@ static int __init of_claim(u32 virt, u32
         return OF_FAILURE;
     }
 
-    DBG("%s 0x%08x 0x%08x  0x%08x -> 0x%08x\n", __func__, virt, size, align,
+    DBG_LOW("%s 0x%08x 0x%08x  0x%08x -> 0x%08x\n", __func__, virt, size, 
align,
         rets[0]);
     return rets[0];
 }
@@ -358,29 +365,194 @@ static int __init of_getparent(int ph)
 
     of_call("parent", 1, 1, rets, ph);
 
-    DBG("getparent 0x%x -> 0x%x\n", ph, rets[0]);
-    return rets[0];
-}
-
-static void boot_of_probemem(multiboot_info_t *mbi)
+    DBG_LOW("getparent 0x%x -> 0x%x\n", ph, rets[0]);
+    return rets[0];
+}
+
+static int __init of_open(const char *devspec)
+{
+    int rets[1] = { OF_FAILURE };
+
+    of_call("open", 1, 1, rets, devspec);
+    return rets[0];
+}
+
+static void boot_of_alloc_init(int m, uint addr_cells, uint size_cells)
+{
+    int rc;
+    uint pg;
+    uint a[64];
+    int tst;
+    u64 start;
+    u64 size;
+
+    rc = of_getprop(m, "available", a, sizeof (a));
+    if (rc > 0) {
+        int l =  rc / sizeof(a[0]);
+        int r = 0;
+
+#ifdef OF_DEBUG
+        { 
+            int i;
+            of_printf("avail:\n");
+            for (i = 0; i < l; i += 4)
+                of_printf("  0x%x%x, 0x%x%x\n",
+                          a[i], a[i + 1],
+                          a[i + 2] ,a[i + 3]);
+        }
+#endif
+            
+        pg = 0;
+        while (pg < MEM_AVAILABLE_PAGES && r < l) {
+            ulong end;
+
+            start = a[r++];
+            if (addr_cells == 2 && (r < l) )
+                start = (start << 32) | a[r++];
+            
+            size = a[r++];
+            if (size_cells == 2 && (r < l) )
+                size = (size << 32) | a[r++];
+                
+            end = ALIGN_DOWN(start + size, PAGE_SIZE);
+
+            start = ALIGN_UP(start, PAGE_SIZE);
+
+            DBG("%s: marking 0x%x - 0x%lx\n", __func__,
+                pg << PAGE_SHIFT, start);
+
+            start >>= PAGE_SHIFT;
+            while (pg < MEM_AVAILABLE_PAGES && pg < start) {
+                set_bit(pg, mem_available_pages);
+                pg++;
+            }
+
+            pg = end  >> PAGE_SHIFT;
+        }
+    }
+
+    /* Now make sure we mark our own memory */
+    pg =  (ulong)_start >> PAGE_SHIFT;
+    start = (ulong)_end >> PAGE_SHIFT;
+
+    DBG("%s: marking 0x%x - 0x%lx\n", __func__,
+        pg << PAGE_SHIFT, start << PAGE_SHIFT);
+
+    /* Lets try and detect if our image has stepped on something. It
+     * is possible that FW has already subtracted our image from
+     * available memory so we must make sure that the previous bits
+     * are the same for the whole image */
+    tst = test_and_set_bit(pg, mem_available_pages);
+    ++pg;
+    while (pg <= start) {
+        if (test_and_set_bit(pg, mem_available_pages) != tst)
+            of_panic("%s: pg :0x%x of our image is different\n",
+                     __func__, pg);
+        ++pg;
+    }
+
+    DBG("%s: marking 0x%x - 0x%x\n", __func__,
+        0 << PAGE_SHIFT, 3 << PAGE_SHIFT);
+    /* First for pages (where the vectors are) should be left alone as well */
+    set_bit(0, mem_available_pages);
+    set_bit(1, mem_available_pages);
+    set_bit(2, mem_available_pages);
+    set_bit(3, mem_available_pages);
+}
+
+#ifdef BOOT_OF_FREE
+/* this is here in case we ever need a free call at a later date */
+static void boot_of_free(ulong addr, ulong size)
+{
+    ulong bits;
+    ulong pos;
+    ulong i;
+
+    size = ALIGN_UP(size, PAGE_SIZE);
+    bits = size >> PAGE_SHIFT;
+    pos = addr >> PAGE_SHIFT;
+
+    for (i = 0; i < bits; i++) {
+        if (!test_and_clear_bit(pos + i, mem_available_pages))
+            of_panic("%s: pg :0x%lx was never allocated\n",
+                     __func__, pos + i);
+    }
+}
+#endif
+
+static ulong boot_of_alloc(ulong size)
+{
+    ulong bits;
+    ulong pos;
+
+    if (size == 0)
+        return 0;
+
+    DBG("%s(0x%lx)\n", __func__, size);
+
+    size = ALIGN_UP(size, PAGE_SIZE);
+    bits = size >> PAGE_SHIFT;
+    pos = 0;
+    for (;;) {
+        ulong i;
+
+        pos = find_next_zero_bit(mem_available_pages,
+                                 MEM_AVAILABLE_PAGES, pos);
+        DBG("%s: found start bit at: 0x%lx\n", __func__, pos);
+
+        /* found nothing */
+        if ((pos + bits) > MEM_AVAILABLE_PAGES) {
+            of_printf("%s: allocation of size: 0x%lx failed\n",
+                     __func__, size);
+            return 0;
+        }
+
+        /* find a set that fits */
+        DBG("%s: checking for 0x%lx bits: 0x%lx\n", __func__, bits, pos);
+
+        i = find_next_bit(mem_available_pages, MEM_AVAILABLE_PAGES, pos);  
+        if (i - pos >= bits) {
+            uint addr = pos << PAGE_SHIFT;
+
+            /* make sure OF is happy with our choice */
+            if (of_claim(addr, size, 0) != OF_FAILURE) {
+                for (i = 0; i < bits; i++)
+                    set_bit(pos + i, mem_available_pages);
+
+                DBG("%s: 0x%lx is good returning 0x%x\n",
+                    __func__, pos, addr);
+                return addr;
+            }
+            /* if OF did not like the address then simply start from
+             * the next bit */
+            i = 1;
+        }
+
+        pos = pos + i;
+    }
+}
+
+static ulong boot_of_mem_init(void)
 {
     int root;
     int p;
-    u32 addr_cells = 1;
-    u32 size_cells = 1;
     int rc;
-    int mcount = 0;
-    static memory_map_t mmap[16];
+    uint addr_cells;
+    uint size_cells;
 
     root = of_finddevice("/");
     p = of_getchild(root);
 
     /* code is writen to assume sizes of 1 */
-    of_getprop(root, "#address-cells", &addr_cells, sizeof (addr_cells));
-    of_getprop(root, "#size-cells", &size_cells, sizeof (size_cells));
+    of_getprop(root, "#address-cells", &addr_cells,
+               sizeof (addr_cells));
+    of_getprop(root, "#size-cells", &size_cells,
+               sizeof (size_cells));
     DBG("%s: address_cells=%d  size_cells=%d\n",
                     __func__, addr_cells, size_cells);
-    
+
+    /* We do ream memory discovery later, for now we only want to find
+     * the first LMB */
     do {
         const char memory[] = "memory";
         char type[32];
@@ -389,82 +561,69 @@ static void boot_of_probemem(multiboot_i
 
         of_getprop(p, "device_type", type, sizeof (type));
         if (strncmp(type, memory, sizeof (memory)) == 0) {
-            u32 reg[48];  
-            u32 al, ah, ll, lh;
+            uint reg[48];  
+            u64 start;
+            u64 size;
             int r;
+            int l;
 
             rc = of_getprop(p, "reg", reg, sizeof (reg));
             if (rc == OF_FAILURE) {
                 of_panic("no reg property for memory node: 0x%x.\n", p);
             }
-            int l = rc/sizeof(u32); /* number reg element */
+
+            l = rc / sizeof(reg[0]); /* number reg element */
             DBG("%s: number of bytes in property 'reg' %d\n",
                             __func__, rc);
             
             r = 0;
             while (r < l) {
-                al = ah = ll = lh = 0;
-                if (addr_cells == 2) {
-                    ah = reg[r++];
-                    if (r >= l)
-                        break;  /* partial line.  Skip  */
-                    al = reg[r++];
-                    if (r >= l)
-                        break;  /* partial line.  Skip */
-                } else {
-                    al = reg[r++];
-                    if (r >= l)
-                        break;  /* partial line.  Skip */
+                start = reg[r++];
+                if (addr_cells == 2 && (r < l) )
+                    start = (start << 32) | reg[r++];
+
+                if (r >= l)
+                    break;  /* partial line.  Skip */
+
+                if (start > 0) {
+                    /* this is not the first LMB so we skip it */
+                    break;
                 }
-                if (size_cells == 2) {
-                    lh = reg[r++];
-                    if (r >= l)
-                        break;  /* partial line.  Skip */
-                    ll = reg[r++];
-                } else {
-                    ll = reg[r++];
-                }
-
-                if ((ll != 0) || (lh != 0)) {
-                    mmap[mcount].size = 20; /* - size field */
-                    mmap[mcount].type = 1; /* Regular ram */
-                    mmap[mcount].length_high = lh;
-                    mmap[mcount].length_low = ll;
-                    mmap[mcount].base_addr_high = ah;
-                    mmap[mcount].base_addr_low = al;
-                    of_printf("%s: memory 0x%016lx[0x%08lx]\n",
-                      __func__,
-                      (u64)(((u64)mmap[mcount].base_addr_high << 32)
-                            | mmap[mcount].base_addr_low),
-                      (u64)(((u64)mmap[mcount].length_high << 32)
-                            | mmap[mcount].length_low));
-                    ++mcount;
-                }
+
+                size = reg[r++];
+                if (size_cells == 2 && (r < l) )
+                    size = (size << 32) | reg[r++];
+                
+                if (r > l)
+                    break;  /* partial line.  Skip */
+
+                boot_of_alloc_init(p, addr_cells, size_cells);
+                
+                eomem = size;
+                return size;
             }
         }
         p = of_getpeer(p);
     } while (p != OF_FAILURE && p != 0);
 
-    if (mcount > 0) {
-        mbi->flags |= MBI_MEMMAP;
-        mbi->mmap_length = sizeof (mmap[0]) * mcount;
-        mbi->mmap_addr = (ulong)mmap;
-    }
+    return 0;
 }
 
 static void boot_of_bootargs(multiboot_info_t *mbi)
 {
     int rc;
 
-    rc = of_getprop(bof_chosen, "bootargs", &bootargs, sizeof (bootargs));
-    if (rc == OF_FAILURE || bootargs[0] == '\0') {
-        strlcpy(bootargs, builtin_cmdline, sizeof(bootargs));
+    if (builtin_cmdline[0] == '\0') {
+        rc = of_getprop(bof_chosen, "bootargs", builtin_cmdline,
+                CONFIG_CMDLINE_SIZE);
+        if (rc > CONFIG_CMDLINE_SIZE)
+            of_panic("bootargs[] not big enough for /chosen/bootargs\n");
     }
 
     mbi->flags |= MBI_CMDLINE;
-    mbi->cmdline = (u32)bootargs;
-
-    of_printf("bootargs = %s\n", bootargs);
+    mbi->cmdline = (ulong)builtin_cmdline;
+
+    of_printf("bootargs = %s\n", builtin_cmdline);
 }
 
 static int save_props(void *m, ofdn_t n, int pkg)
@@ -500,7 +659,8 @@ static int save_props(void *m, ofdn_t n,
                     of_panic("obj array not big enough for 0x%x\n", sz);
                 }
                 actual = of_getprop(pkg, name, obj, sz);
-                if (actual > sz) of_panic("obj too small");
+                if (actual > sz)
+                    of_panic("obj too small");
             }
 
             if (strncmp(name, name_str, sizeof(name_str)) == 0) {
@@ -512,7 +672,8 @@ static int save_props(void *m, ofdn_t n,
             }
 
             pos = ofd_prop_add(m, n, name, obj, actual);
-            if (pos == 0) of_panic("prop_create");
+            if (pos == 0)
+                of_panic("prop_create");
         }
 
         result = of_nextprop(pkg, name, name);
@@ -536,10 +697,12 @@ retry:
 
     if (pnext != 0) {
         sz = of_package_to_path(pnext, path, psz);
-        if (sz == OF_FAILURE) of_panic("bad path\n");
+        if (sz == OF_FAILURE)
+            of_panic("bad path\n");
 
         nnext = ofd_node_child_create(m, n, path, sz);
-        if (nnext == 0) of_panic("out of mem\n");
+        if (nnext == 0)
+            of_panic("out of mem\n");
 
         do_pkg(m, nnext, pnext, path, psz);
     }
@@ -551,7 +714,8 @@ retry:
         sz = of_package_to_path(pnext, path, psz);
 
         nnext = ofd_node_peer_create(m, n, path, sz);
-        if (nnext <= 0) of_panic("out of space in OFD tree.\n");
+        if (nnext <= 0)
+            of_panic("out of space in OFD tree.\n");
 
         n = nnext;
         p = pnext;
@@ -559,7 +723,7 @@ retry:
     }
 }
 
-static int pkg_save(void *mem)
+static long pkg_save(void *mem)
 {
     int root;
     char path[256];
@@ -570,11 +734,12 @@ static int pkg_save(void *mem)
 
     /* get root */
     root = of_getpeer(0);
-    if (root == OF_FAILURE) of_panic("no root package\n");
+    if (root == OF_FAILURE)
+        of_panic("no root package\n");
 
     do_pkg(mem, OFD_ROOT, root, path, sizeof(path));
 
-    r = (((ofdn_t *)mem)[1] + 1) * sizeof (u64);
+    r = ofd_size(mem);
 
     of_printf("%s: saved device tree in 0x%x bytes\n", __func__, r);
 
@@ -604,7 +769,8 @@ static int boot_of_fixup_refs(void *mem)
             char ofpath[256];
 
             path = ofd_node_path(mem, c);
-            if (path == NULL) of_panic("no path to found prop: %s\n", name);
+            if (path == NULL)
+                of_panic("no path to found prop: %s\n", name);
 
             rp = of_finddevice(path);
             if (rp == OF_FAILURE)
@@ -629,13 +795,15 @@ static int boot_of_fixup_refs(void *mem)
                          "ref 0x%x\n", name, path, rp, ref);
 
             dp = ofd_node_find(mem, ofpath);
-            if (dp <= 0) of_panic("no ofd node for OF node[0x%x]: %s\n",
-                                  ref, ofpath);
+            if (dp <= 0)
+                of_panic("no ofd node for OF node[0x%x]: %s\n",
+                         ref, ofpath);
 
             ref = dp;
 
             upd = ofd_prop_add(mem, c, name, &ref, sizeof(ref));
-            if (upd <= 0) of_panic("update failed: %s\n", name);
+            if (upd <= 0)
+                of_panic("update failed: %s\n", name);
 
 #ifdef DEBUG
             of_printf("%s: %s/%s -> %s\n", __func__,
@@ -658,7 +826,8 @@ static int boot_of_fixup_chosen(void *me
     char ofpath[256];
 
     ch = of_finddevice("/chosen");
-    if (ch == OF_FAILURE) of_panic("/chosen not found\n");
+    if (ch == OF_FAILURE)
+        of_panic("/chosen not found\n");
 
     rc = of_getprop(ch, "cpu", &val, sizeof (val));
 
@@ -667,16 +836,19 @@ static int boot_of_fixup_chosen(void *me
 
         if (rc > 0) {
             dn = ofd_node_find(mem, ofpath);
-            if (dn <= 0) of_panic("no node for: %s\n", ofpath);
+            if (dn <= 0)
+                of_panic("no node for: %s\n", ofpath);
 
             ofd_boot_cpu = dn;
             val = dn;
 
             dn = ofd_node_find(mem, "/chosen");
-            if (dn <= 0) of_panic("no /chosen node\n");
+            if (dn <= 0)
+                of_panic("no /chosen node\n");
 
             dc = ofd_prop_add(mem, dn, "cpu", &val, sizeof (val));
-            if (dc <= 0) of_panic("could not fix /chosen/cpu\n");
+            if (dc <= 0)
+                of_panic("could not fix /chosen/cpu\n");
             rc = 1;
         } else {
             of_printf("*** can't find path to booting cpu, "
@@ -685,56 +857,6 @@ static int boot_of_fixup_chosen(void *me
         }
     }
     return rc;
-}
-
-static ulong space_base;
-
-/*
- * The following function is necessary because we cannot depend on all
- * FW to actually allocate us any space, so we look for it _hoping_
- * that at least is will fail if we try to claim something that
- * belongs to FW.  This hope does not seem to be true on some version
- * of PIBS.
- */
-static ulong find_space(u32 size, u32 align, multiboot_info_t *mbi)
-{
-    memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr);
-    ulong eomem = ((u64)map->length_high << 32) | (u64)map->length_low;
-    ulong base;
-
-    if (size == 0)
-        return 0;
-
-    if (align == 0)
-        of_panic("cannot call %s() with align of 0\n", __func__);
-
-#ifdef BROKEN_CLAIM_WORKAROUND
-    {
-        static int broken_claim;
-        if (!broken_claim) {
-            /* just try and claim it to the FW chosen address */
-            base = of_claim(0, size, align);
-            if (base != OF_FAILURE)
-                return base;
-            of_printf("%s: Firmware does not allocate memory for you\n",
-                      __func__);
-            broken_claim = 1;
-        }
-    }
-#endif
-
-    of_printf("%s base=0x%016lx  eomem=0x%016lx  size=0x%08x  align=0x%x\n",
-                    __func__, space_base, eomem, size, align);
-    base = ALIGN_UP(space_base, PAGE_SIZE);
-
-    while ((base + size) < rma_size(cpu_default_rma_order_pages())) {
-        if (of_claim(base, size, 0) != OF_FAILURE) {
-            space_base = base + size;
-            return base;
-        }
-        base += (PAGE_SIZE >  align) ? PAGE_SIZE : align;
-    }
-    of_panic("Cannot find memory in the RMA\n");
 }
 
 /* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges
@@ -798,8 +920,10 @@ static int __init boot_of_serial(void *o
             of_panic("package-to-path failed\n");
 
         rc = of_getprop(p, "device_type", type, sizeof (type));
-        if (rc == OF_FAILURE)
-            of_panic("fetching device type failed\n");
+        if (rc == OF_FAILURE) {
+            of_printf("%s: fetching type of `%s' failed\n", __func__, buf);
+            continue;
+        }
 
         if (strcmp(type, "serial") != 0)
             continue;
@@ -855,17 +979,104 @@ static int __init boot_of_serial(void *o
     return 1;
 }
 
-static void boot_of_module(ulong r3, ulong r4, multiboot_info_t *mbi)
-{
-    static module_t mods[3];
+static int __init boot_of_rtas(module_t *mod, multiboot_info_t *mbi)
+{
+    int rtas_node;
+    int rtas_instance;
+    uint size = 0;
+    int res[2];
+    int mem;
+    int ret;
+
+    rtas_node = of_finddevice("/rtas");
+
+    if (rtas_node <= 0) {
+        of_printf("No RTAS, Xen has no power control\n");
+        return 0;
+    }
+    of_getprop(rtas_node, "rtas-size", &size, sizeof (size));
+    if (size == 0) {
+        of_printf("RTAS, has no size\n");
+        return 0;
+    }
+
+    rtas_instance = of_open("/rtas");
+    if (rtas_instance == OF_FAILURE) {
+        of_printf("RTAS, could not open\n");
+        return 0;
+    }
+
+    size = ALIGN_UP(size, PAGE_SIZE);
+    
+    mem = boot_of_alloc(size);
+    if (mem == 0)
+        of_panic("Could not allocate RTAS tree\n");
+
+    of_printf("instantiating RTAS at: 0x%x\n", mem);
+
+    ret = of_call("call-method", 3, 2, res,
+                  "instantiate-rtas", rtas_instance, mem);
+    if (ret == OF_FAILURE) {
+        of_printf("RTAS, could not open\n");
+        return 0;
+    }
+    
+    rtas_entry = res[1];
+    rtas_base = mem;
+    rtas_end = mem + size;
+    rtas_msr = of_msr;
+
+    mod->mod_start = rtas_base;
+    mod->mod_end = rtas_end;
+    return 1;
+}
+
+static void * __init boot_of_devtree(module_t *mod, multiboot_info_t *mbi)
+{
     void *oft;
     ulong oft_sz = 48 * PAGE_SIZE;
+
+    /* snapshot the tree */
+    oft = (void *)boot_of_alloc(oft_sz);
+    if (oft == NULL)
+        of_panic("Could not allocate OFD tree\n");
+
+    of_printf("creating oftree at: 0x%p\n", oft);
+    of_test("package-to-path");
+    oft = ofd_create(oft, oft_sz);
+    pkg_save(oft);
+
+    if (ofd_size(oft) > oft_sz)
+         of_panic("Could not fit all of native devtree\n");
+
+    boot_of_fixup_refs(oft);
+    boot_of_fixup_chosen(oft);
+
+    if (ofd_size(oft) > oft_sz)
+         of_panic("Could not fit all devtree fixups\n");
+
+    ofd_walk(oft, __func__, OFD_ROOT, /* add_hype_props */ NULL, 2);
+
+    mod->mod_start = (ulong)oft;
+    mod->mod_end = mod->mod_start + oft_sz;
+    of_printf("%s: devtree mod @ 0x%016x - 0x%016x\n", __func__,
+              mod->mod_start, mod->mod_end);
+
+    return oft;
+}
+
+static void * __init boot_of_module(ulong r3, ulong r4, multiboot_info_t *mbi)
+{
+    static module_t mods[4];
     ulong mod0_start;
     ulong mod0_size;
-    static const char sepr[] = " -- ";
+    static const char * sepr[] = {" -- ", " || "};
+    int sepr_index;
     extern char dom0_start[] __attribute__ ((weak));
     extern char dom0_size[] __attribute__ ((weak));
-    const char *p;
+    const char *p = NULL;
+    int mod;
+    void *oft;
 
     if ((r3 > 0) && (r4 > 0)) {
         /* was it handed to us in registers ? */
@@ -908,57 +1119,50 @@ static void boot_of_module(ulong r3, ulo
         of_printf("mod0: %o %c %c %c\n", c[0], c[1], c[2], c[3]);
     }
 
-    space_base = (ulong)_end;
-    mods[0].mod_start = mod0_start;
-    mods[0].mod_end = mod0_start + mod0_size;
-
-    of_printf("%s: mod[0] @ 0x%016x[0x%x]\n", __func__,
-              mods[0].mod_start, mods[0].mod_end);
-    p = strstr((char *)(ulong)mbi->cmdline, sepr);
+    mod = 0;
+    mods[mod].mod_start = mod0_start;
+    mods[mod].mod_end = mod0_start + mod0_size;
+
+    of_printf("%s: dom0 mod @ 0x%016x[0x%x]\n", __func__,
+              mods[mod].mod_start, mods[mod].mod_end);
+
+    /* look for delimiter: "--" or "||" */
+    for (sepr_index = 0; sepr_index < ARRAY_SIZE(sepr); sepr_index++){
+        p = strstr((char *)(ulong)mbi->cmdline, sepr[sepr_index]);
+        if (p != NULL)
+            break;
+    }
+
     if (p != NULL) {
-        p += sizeof (sepr) - 1;
-        mods[0].string = (u32)(ulong)p;
-        of_printf("%s: mod[0].string: %s\n", __func__, p);
-    }
-
-    /* snapshot the tree */
-    oft = (void*)find_space(oft_sz, PAGE_SIZE, mbi);
-    if (oft == 0)
-        of_panic("Could not allocate OFD tree\n");
-
-    of_printf("creating oft\n");
-    of_test("package-to-path");
-    oft = ofd_create(oft, oft_sz);
-    pkg_save(oft);
-
-    if (ofd_size(oft) > oft_sz)
-         of_panic("Could not fit all of native devtree\n");
-
-    boot_of_fixup_refs(oft);
-    boot_of_fixup_chosen(oft);
-
-    if (ofd_size(oft) > oft_sz)
-         of_panic("Could not fit all devtree fixups\n");
-
-    ofd_walk(oft, OFD_ROOT, /* add_hype_props */ NULL, 2);
-
-    mods[1].mod_start = (ulong)oft;
-    mods[1].mod_end = mods[1].mod_start + oft_sz;
-    of_printf("%s: mod[1] @ 0x%016x[0x%x]\n", __func__,
-              mods[1].mod_start, mods[1].mod_end);
-
+        /* Xen proper should never know about the dom0 args.  */
+        *(char *)p = '\0';
+        p += strlen(sepr[sepr_index]);
+        mods[mod].string = (u32)(ulong)p;
+        of_printf("%s: dom0 mod string: %s\n", __func__, p);
+    }
+
+    ++mod;
+    if (boot_of_rtas(&mods[mod], mbi))
+        ++mod;
+
+    oft = boot_of_devtree(&mods[mod], mbi);
+    if (oft == NULL)
+        of_panic("%s: boot_of_devtree failed\n", __func__);
+
+    ++mod;
 
     mbi->flags |= MBI_MODULES;
-    mbi->mods_count = 2;
+    mbi->mods_count = mod;
     mbi->mods_addr = (u32)mods;
 
-    boot_of_serial(oft);
+    return oft;
 }
 
 static int __init boot_of_cpus(void)
 {
-    int cpus_node;
-    int cpu_node, bootcpu_node, logical;
+    int cpus_node, cpu_node;
+    int bootcpu_instance, bootcpu_node;
+    int logical;
     int result;
     s32 cpuid;
     u32 cpu_clock[2];
@@ -967,9 +1171,13 @@ static int __init boot_of_cpus(void)
     /* Look up which CPU we are running on right now and get all info
      * from there */
     result = of_getprop(bof_chosen, "cpu",
-                        &bootcpu_node, sizeof (bootcpu_node));
+                        &bootcpu_instance, sizeof (bootcpu_instance));
     if (result == OF_FAILURE)
-        of_panic("Failed to look up boot cpu\n");
+        of_panic("Failed to look up boot cpu instance\n");
+
+    bootcpu_node = of_instance_to_package(bootcpu_instance);
+    if (result == OF_FAILURE)
+        of_panic("Failed to look up boot cpu package\n");
 
     cpu_node = bootcpu_node;
 
@@ -1070,15 +1278,12 @@ static int __init boot_of_cpus(void)
     return 1;
 }
 
-static int __init boot_of_rtas(void)
-{
-    return 1;
-}
-
 multiboot_info_t __init *boot_of_init(
         ulong r3, ulong r4, ulong vec, ulong r6, ulong r7, ulong orig_msr)
 {
     static multiboot_info_t mbi;
+    void *oft;
+    int r;
 
     of_vec = vec;
     of_msr = orig_msr;
@@ -1098,18 +1303,20 @@ multiboot_info_t __init *boot_of_init(
             r3, r4, vec, r6, r7, orig_msr);
 
     if ((vec >= (ulong)_start) && (vec <= (ulong)_end)) {
-        of_printf("Hmm.. OF[0x%lx] seems to have stepped on our image "
-                "that ranges: %p .. %p.\n HANG!\n",
+        of_panic("Hmm.. OF[0x%lx] seems to have stepped on our image "
+                "that ranges: %p .. %p.\n",
                 vec, _start, _end);
     }
     of_printf("%s: _start %p _end %p 0x%lx\n", __func__, _start, _end, r6);
 
     boot_of_fix_maple();
-    boot_of_probemem(&mbi);
+    r = boot_of_mem_init();
+    if (r == 0)
+        of_panic("failure to initialize memory allocator");
     boot_of_bootargs(&mbi);
-    boot_of_module(r3, r4, &mbi);
+    oft = boot_of_module(r3, r4, &mbi);
     boot_of_cpus();
-    boot_of_rtas();
+    boot_of_serial(oft);
 
     /* end of OF */
     of_printf("Quiescing Open Firmware ...\n");
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/crash.c
--- a/xen/arch/powerpc/crash.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/crash.c  Thu Dec 14 08:57:36 2006 -0700
@@ -1,5 +1,6 @@
 #include <xen/lib.h>       /* for printk() used in stub */
 #include <xen/types.h>
+#include <xen/kexec.h>
 #include <public/kexec.h>
 
 void machine_crash_shutdown(void)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/dart.c
--- a/xen/arch/powerpc/dart.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/dart.c   Thu Dec 14 08:57:36 2006 -0700
@@ -60,8 +60,8 @@ union dart_entry {
     u32 de_word;
     struct {
         u32 de_v:1;             /* valid */
-        u32 de_rp:1;             /* read protected*/
-        u32 de_wp:1;             /* write protected*/
+        u32 de_rp:1;             /* read protected */
+        u32 de_wp:1;             /* write protected */
         u32 _de_res:5;
         u32 de_ppn:24;         /* 24 bit Physical Page Number
                                  * representing address [28:51] */
@@ -98,7 +98,6 @@ static u32 dart_encode(int perm, ulong r
     if (perm & DART_WRITE) {
         e.de_bits.de_wp = 0;
     }
-
     return e.de_word;
 }
 
@@ -190,10 +189,8 @@ static int find_dart(struct dart_info *d
     ofdn_t n;
     char compat[128];
 
-
-    if (on_mambo()) {
-        /* mambo has no dart */
-        DBG("%s: Mambo does not support a dart\n", __func__);
+    if (on_systemsim()) {
+        DBG("%s: systemsim does not support a dart\n", __func__);
         return -1;
     }
 
@@ -263,7 +260,7 @@ static int init_dart(void)
 
     /* Linux uses a dummy page, filling "empty" DART entries with a
        reference to this page to capture stray DMA's */
-    dummy_page = (ulong)alloc_xenheap_pages(1);
+    dummy_page = (ulong)alloc_xenheap_pages(0);
     clear_page((void *)dummy_page);
     dummy_page >>= PAGE_SHIFT;
 
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/dart_u4.c
--- a/xen/arch/powerpc/dart_u4.c        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/dart_u4.c        Thu Dec 14 08:57:36 2006 -0700
@@ -19,6 +19,7 @@
  */
 
 #undef DEBUG
+#define INVALIDATE_ALL
 
 #include <xen/config.h>
 #include <xen/types.h>
@@ -123,8 +124,13 @@ static void u4_inv_all(void)
 
 static void u4_inv_entry(ulong pgn)
 {
+#ifdef INVALIDATE_ALL
+    return u4_inv_all();
+#else
     union dart_ctl dc;
     ulong retries = 0;
+
+    return u4_inv_all();
 
     dc.dc_word = in_32(&dart->d_dartcntl.dc_word);
     dc.dc_bits.dc_ilpn = pgn;
@@ -139,6 +145,7 @@ static void u4_inv_entry(ulong pgn)
         if (retries > 1000000)
             panic("WAY! too long\n");
     } while (dc.dc_bits.dc_ione != 0);
+#endif
 }
 
 static struct dart_ops u4_ops = {
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/domain.c
--- a/xen/arch/powerpc/domain.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/domain.c Thu Dec 14 08:57:36 2006 -0700
@@ -33,6 +33,8 @@
 #include <asm/htab.h>
 #include <asm/current.h>
 #include <asm/hcalls.h>
+#include "rtas.h"
+#include "exceptions.h"
 
 #define next_arg(fmt, args) ({                                              \
     unsigned long __arg;                                                    \
@@ -46,7 +48,6 @@
     }                                                                       \
     __arg;                                                                  \
 })
-extern void idle_loop(void);
 
 unsigned long hypercall_create_continuation(unsigned int op,
         const char *format, ...)
@@ -87,26 +88,44 @@ int arch_domain_create(struct domain *d)
 
     INIT_LIST_HEAD(&d->arch.extent_list);
 
+    d->arch.foreign_mfn_count = 1024;
+    d->arch.foreign_mfns = xmalloc_array(uint, d->arch.foreign_mfn_count);
+    BUG_ON(d->arch.foreign_mfns == NULL);
+
+    memset(d->arch.foreign_mfns, -1, d->arch.foreign_mfn_count * sizeof(uint));
+
     return 0;
 }
 
 void arch_domain_destroy(struct domain *d)
 {
     shadow_teardown(d);
-}
-
+    /* shared_info is part of the RMA so no need to release it */
+}
+
+static void machine_fail(const char *s)
+{
+    printk("%s failed, manual powercycle required!\n", s);
+    for (;;)
+        sleep();
+}
 void machine_halt(void)
 {
     printk("machine_halt called: spinning....\n");
     console_start_sync();
-    while(1);
+    printk("%s called\n", __func__);
+    rtas_halt();
+
+    machine_fail(__func__);
 }
 
 void machine_restart(char * __unused)
 {
     printk("machine_restart called: spinning....\n");
     console_start_sync();
-    while(1);
+    printk("%s called\n", __func__);
+    rtas_reboot();
+    machine_fail(__func__);
 }
 
 struct vcpu *alloc_vcpu_struct(void)
@@ -222,6 +241,7 @@ void context_switch(struct vcpu *prev, s
 
     mtsdr1(next->domain->arch.htab.sdr1);
     local_flush_tlb(); /* XXX maybe flush_tlb_mask? */
+    cpu_flush_icache();
 
     if (is_idle_vcpu(next)) {
         reset_stack_and_jump(idle_loop);
@@ -278,8 +298,10 @@ static void relinquish_memory(struct dom
 
 void domain_relinquish_resources(struct domain *d)
 {
+    relinquish_memory(d, &d->xenpage_list);
     relinquish_memory(d, &d->page_list);
     free_extents(d);
+    xfree(d->arch.foreign_mfns);
     return;
 }
 
@@ -291,7 +313,6 @@ void arch_dump_vcpu_info(struct vcpu *v)
 {
 }
 
-extern void sleep(void);
 static void safe_halt(void)
 {
     int cpu = smp_processor_id();
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/domain_build.c
--- a/xen/arch/powerpc/domain_build.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/domain_build.c   Thu Dec 14 08:57:36 2006 -0700
@@ -178,8 +178,7 @@ int construct_dom0(struct domain *d,
         shadow_set_allocation(d, opt_dom0_shadow, &preempt);
     } while (preempt);
     if (shadow_get_allocation(d) == 0)
-        panic("shadow allocation failed 0x%x < 0x%x\n",
-              shadow_get_allocation(d), opt_dom0_shadow);
+        panic("shadow allocation failed: %dMib\n", opt_dom0_shadow);
 
     ASSERT( image_len < rma_sz );
 
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/domctl.c
--- a/xen/arch/powerpc/domctl.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/domctl.c Thu Dec 14 08:57:36 2006 -0700
@@ -96,14 +96,14 @@ long arch_do_domctl(struct xen_domctl *d
     case XEN_DOMCTL_real_mode_area:
     {
         struct domain *d;
-        unsigned int log = domctl->u.real_mode_area.log;
+        unsigned int order = domctl->u.real_mode_area.log - PAGE_SHIFT;
 
         ret = -ESRCH;
         d = find_domain_by_id(domctl->domain);
         if (d != NULL) {
             ret = -EINVAL;
-            if (cpu_rma_valid(log))
-                ret = allocate_rma(d, log - PAGE_SHIFT);
+            if (cpu_rma_valid(order))
+                ret = allocate_rma(d, order);
             put_domain(d);
         }
     }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/exceptions.c
--- a/xen/arch/powerpc/exceptions.c     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/exceptions.c     Thu Dec 14 08:57:36 2006 -0700
@@ -25,8 +25,10 @@
 #include <xen/serial.h>
 #include <xen/gdbstub.h>
 #include <xen/console.h>
+#include <xen/shutdown.h>
 #include <asm/time.h>
 #include <asm/processor.h>
+#include <asm/debugger.h>
 
 #undef DEBUG
 
@@ -56,25 +58,19 @@ void do_dec(struct cpu_user_regs *regs)
 
 void program_exception(struct cpu_user_regs *regs, unsigned long cookie)
 {
+    if (cookie == 0x200) {
+        if (cpu_machinecheck(regs))
+            return;
+
+        printk("%s: machine check\n", __func__);
+    } else {
 #ifdef CRASH_DEBUG
-    __trap_to_gdb(regs, cookie);
-#else /* CRASH_DEBUG */
-    int recover = 0;
+        if (__trap_to_gdb(regs, cookie) == 0)
+            return;
+#endif /* CRASH_DEBUG */
 
-    console_start_sync();
-
-    show_registers(regs);
-    printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr());
-    printk("hid4 0x%016lx\n", regs->hid4);
-    printk("---[ backtrace ]---\n");
-    show_backtrace(regs->gprs[1], regs->lr, regs->pc);
-
-    if (cookie == 0x200)
-        recover = cpu_machinecheck(regs);
-
-    if (!recover)
-        panic("%s: 0x%lx\n", __func__, cookie);
-
-    console_end_sync();
-#endif /* CRASH_DEBUG */
+        printk("%s: type: 0x%lx\n", __func__, cookie);
+        show_backtrace_regs(regs);
+    }
+    machine_halt();
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/exceptions.h
--- a/xen/arch/powerpc/exceptions.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/exceptions.h     Thu Dec 14 08:57:36 2006 -0700
@@ -43,13 +43,14 @@ extern void program_exception(
     struct cpu_user_regs *regs, unsigned long cookie);
 
 extern long xen_hvcall_jump(struct cpu_user_regs *regs, ulong address);
-extern void *mambo_memset(void *, int, ulong);
-extern void *mambo_memcpy(void *, const void *, ulong);
+
+extern void sleep(void);
+extern void idle_loop(void);
 
 extern ulong *__hypercall_table[];
 
 extern char exception_vectors[];
 extern char exception_vectors_end[];
 extern int spin_start[];
-extern int secondary_cpu_init(int cpuid, unsigned long r4);
+extern void secondary_cpu_init(int cpuid, unsigned long r4);
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/external.c       Thu Dec 14 08:57:36 2006 -0700
@@ -82,7 +82,14 @@ void do_external(struct cpu_user_regs *r
 
     vec = xen_mpic_get_irq(regs);
 
-    if (vec != -1) {
+    if (irq_desc[vec].status & IRQ_PER_CPU) {
+        /* x86 do_IRQ does not respect the per cpu flag.  */
+        irq_desc_t *desc = &irq_desc[vec];
+        regs->entry_vector = vec;
+        desc->handler->ack(vec);
+        desc->action->handler(vector_to_irq(vec), desc->action->dev_id, regs);
+        desc->handler->end(vec);
+    } else if (vec != -1) {
         DBG("EE:0x%lx isrc: %d\n", regs->msr, vec);
         regs->entry_vector = vec;
         do_IRQ(regs);
@@ -253,3 +260,24 @@ int ioapic_guest_write(unsigned long phy
     BUG_ON(val != val);
     return 0;
 }
+
+void send_IPI_mask(cpumask_t mask, int vector)
+{
+    unsigned int cpus;
+    int const bits = 8 * sizeof(cpus);
+
+    switch(vector) {
+    case CALL_FUNCTION_VECTOR:
+    case EVENT_CHECK_VECTOR:
+        break;
+    default:
+        BUG();
+        return;
+    }
+
+    BUG_ON(NR_CPUS > bits);
+    BUG_ON(fls(mask.bits[0]) > bits);
+
+    cpus = mask.bits[0];
+    mpic_send_ipi(vector, cpus);
+}
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/gdbstub.c
--- a/xen/arch/powerpc/gdbstub.c        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/gdbstub.c        Thu Dec 14 08:57:36 2006 -0700
@@ -25,6 +25,7 @@
 #include <asm/msr.h>
 #include <asm/bitops.h>
 #include <asm/cache.h>
+#include <asm/debugger.h>
 #include <asm/processor.h>
 
 asm(".globl trap_instruction\n"
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/iommu.c
--- a/xen/arch/powerpc/iommu.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/iommu.c  Thu Dec 14 08:57:36 2006 -0700
@@ -32,6 +32,12 @@
 #include "tce.h"
 #include "iommu.h"
 
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
 struct iommu_funcs {
     int (*iommu_put)(ulong, union tce);
 };
@@ -46,17 +52,31 @@ int iommu_put(u32 buid, ulong ioba, unio
     struct domain *d = v->domain;
 
     if (buid < iommu_phbs_num && iommu_phbs[buid].iommu_put != NULL) {
-        ulong pfn;
+        ulong gmfn;
         ulong mfn;
         int mtype;
 
-        pfn = tce.tce_bits.tce_rpn;
-        mfn = pfn2mfn(d, pfn, &mtype);
+        gmfn = tce.tce_bits.tce_rpn;
+
+        
+        mfn = pfn2mfn(d, gmfn, &mtype);
         if (mfn != INVALID_MFN) {
-#ifdef DEBUG
-            printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
-                   ioba, pfn, mfn);
-#endif
+            switch (mtype) {
+            case PFN_TYPE_RMA:
+            case PFN_TYPE_LOGICAL:
+                break;
+            case PFN_TYPE_FOREIGN:
+                DBG("%s: assigning to Foriegn page: "
+                    "gmfn: 0x%lx mfn: 0x%lx\n",  __func__, gmfn, mfn);
+                break;
+            default:
+                printk("%s: unsupported type[%d]: gmfn: 0x%lx mfn: 0x%lx\n",
+                       __func__, mtype, gmfn, mfn);
+                return -1;
+            break;
+            }
+            DBG("%s: ioba=0x%lx gmfn=0x%lx mfn=0x%lx\n", __func__,
+                ioba, gmfn, mfn);
             tce.tce_bits.tce_rpn = mfn;
             return iommu_phbs[buid].iommu_put(ioba, tce);
         }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/machine_kexec.c
--- a/xen/arch/powerpc/machine_kexec.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/machine_kexec.c  Thu Dec 14 08:57:36 2006 -0700
@@ -1,5 +1,6 @@
 #include <xen/lib.h>       /* for printk() used in stubs */
 #include <xen/types.h>
+#include <xen/kexec.h>
 #include <public/kexec.h>
 
 int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
@@ -9,11 +10,6 @@ int machine_kexec_load(int type, int slo
 }
 
 void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image)
-{
-    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
-}
-
-void machine_kexec(xen_kexec_image_t *image)
 {
     printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/memory.c
--- a/xen/arch/powerpc/memory.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/memory.c Thu Dec 14 08:57:36 2006 -0700
@@ -20,10 +20,31 @@
  */
 #include <xen/sched.h>
 #include <xen/mm.h>
+#include <xen/numa.h>
 #include "of-devtree.h"
 #include "oftree.h"
+#include "rtas.h"
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/*
+ * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
+ * page_info table and allocation bitmap.
+ */
+static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
+integer_param("xenheap_megabytes", opt_xenheap_megabytes);
 
 unsigned long xenheap_phys_end;
+static uint nr_pages;
+static ulong xenheap_size;
+static ulong save_start;
+static ulong save_end;
+
 struct membuf {
     ulong start;
     ulong size;
@@ -36,15 +57,20 @@ static ulong free_xenheap(ulong start, u
     start = ALIGN_UP(start, PAGE_SIZE);
     end = ALIGN_DOWN(end, PAGE_SIZE);
 
-    printk("%s: 0x%lx - 0x%lx\n", __func__, start, end);
-
-    if (oftree <= end && oftree >= start) {
-        printk("%s:     Go around the devtree: 0x%lx - 0x%lx\n",
-               __func__, oftree, oftree_end);
-        init_xenheap_pages(start, ALIGN_DOWN(oftree, PAGE_SIZE));
-        init_xenheap_pages(ALIGN_UP(oftree_end, PAGE_SIZE), end);
+    DBG("%s: 0x%lx - 0x%lx\n", __func__, start, end);
+
+    /* need to do this better */
+    if (save_start <= end && save_start >= start) {
+        DBG("%s:     Go around the saved area: 0x%lx - 0x%lx\n",
+               __func__, save_start, save_end);
+        init_xenheap_pages(start, ALIGN_DOWN(save_start, PAGE_SIZE));
+        xenheap_size += ALIGN_DOWN(save_start, PAGE_SIZE) - start;
+
+        init_xenheap_pages(ALIGN_UP(save_end, PAGE_SIZE), end);
+        xenheap_size += end - ALIGN_UP(save_end, PAGE_SIZE);
     } else {
         init_xenheap_pages(start, end);
+        xenheap_size += end - start;
     }
 
     return ALIGN_UP(end, PAGE_SIZE);
@@ -57,8 +83,10 @@ static void set_max_page(struct membuf *
     for (i = 0; i < entries; i++) {
         ulong end_page;
 
+        printk("  %016lx: %016lx\n", mb[i].start, mb[i].size);
+        nr_pages += mb[i].size >> PAGE_SHIFT;
+
         end_page = (mb[i].start + mb[i].size) >> PAGE_SHIFT;
-
         if (end_page > max_page)
             max_page = end_page;
     }
@@ -71,11 +99,11 @@ static void heap_init(struct membuf *mb,
     ulong start_blk;
     ulong end_blk = 0;
 
-       for (i = 0; i < entries; i++) {
-           start_blk = mb[i].start;
-           end_blk = start_blk + mb[i].size;
-
-           if (start_blk < xenheap_phys_end) {
+    for (i = 0; i < entries; i++) {
+        start_blk = mb[i].start;
+        end_blk = start_blk + mb[i].size;
+
+        if (start_blk < xenheap_phys_end) {
             if (xenheap_phys_end > end_blk) {
                 panic("xenheap spans LMB\n");
             }
@@ -87,7 +115,7 @@ static void heap_init(struct membuf *mb,
 
         init_boot_pages(start_blk, end_blk);
         total_pages += (end_blk - start_blk) >> PAGE_SHIFT;
-       }
+    }
 }
 
 static void ofd_walk_mem(void *m, walk_mem_fn fn)
@@ -123,7 +151,7 @@ static void setup_xenheap(module_t *mod,
     for (i = 0; i < mcount; i++) {
         u32 s;
 
-        if(mod[i].mod_end == mod[i].mod_start)
+        if (mod[i].mod_end == mod[i].mod_start)
             continue;
 
         s = ALIGN_DOWN(mod[i].mod_start, PAGE_SIZE);
@@ -149,19 +177,42 @@ void memory_init(module_t *mod, int mcou
 void memory_init(module_t *mod, int mcount)
 {
     ulong eomem;
-    ulong heap_start, heap_size;
-
-    printk("Physical RAM map:\n");
+    ulong heap_start;
+    ulong xh_pages;
 
     /* lets find out how much memory there is and set max_page */
     max_page = 0;
+    printk("Physical RAM map:\n");
     ofd_walk_mem((void *)oftree, set_max_page);
     eomem = max_page << PAGE_SHIFT;
 
     if (eomem == 0){
         panic("ofd_walk_mem() failed\n");
     }
-    printk("End of RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10);
+
+    /* find the portion of memory we need to keep safe */
+    save_start = oftree;
+    save_end = oftree_end;
+    if (rtas_base) {
+        if (save_start > rtas_base)
+            save_start = rtas_base;
+        if (save_end < rtas_end)
+            save_end = rtas_end;
+    }
+
+    /* minimum heap has to reach to the end of all Xen required memory */
+    xh_pages = ALIGN_UP(save_end, PAGE_SIZE) >> PAGE_SHIFT;
+    xh_pages += opt_xenheap_megabytes << (20 - PAGE_SHIFT);
+
+    /* While we are allocating HTABS from The Xen Heap we need it to
+     * be larger */
+    xh_pages  += nr_pages >> 5;
+
+    xenheap_phys_end = xh_pages << PAGE_SHIFT;
+    printk("End of Xen Area: %luMiB (%luKiB)\n",
+           xenheap_phys_end >> 20, xenheap_phys_end >> 10);
+
+    printk("End of RAM: %luMiB (%luKiB)\n", eomem >> 20, eomem >> 10);
 
     /* Architecturally the first 4 pages are exception hendlers, we
      * will also be copying down some code there */
@@ -185,22 +236,23 @@ void memory_init(module_t *mod, int mcou
         panic("total_pages > max_page: 0x%lx > 0x%lx\n",
               total_pages, max_page);
 
-    printk("total_pages: 0x%016lx\n", total_pages);
+    DBG("total_pages: 0x%016lx\n", total_pages);
 
     init_frametable();
+
+    numa_initmem_init(0, max_page);
+
     end_boot_allocator();
 
     /* Add memory between the beginning of the heap and the beginning
-     * of out text */
+     * of our text */
     free_xenheap(heap_start, (ulong)_start);
-
-    heap_size = xenheap_phys_end - heap_start;
-    printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10);
-
     setup_xenheap(mod, mcount);
+    printk("Xen Heap: %luMiB (%luKiB)\n",
+           xenheap_size >> 20, xenheap_size >> 10);
 
     eomem = avail_domheap_pages();
-    printk("Domheap pages: 0x%lx %luMB (%lukB)\n", eomem,
+    printk("Dom Heap: %luMiB (%luKiB)\n",
            (eomem << PAGE_SHIFT) >> 20,
            (eomem << PAGE_SHIFT) >> 10);
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/mm.c     Thu Dec 14 08:57:36 2006 -0700
@@ -25,9 +25,9 @@
 #include <xen/kernel.h>
 #include <xen/sched.h>
 #include <xen/perfc.h>
-#include <asm/misc.h>
 #include <asm/init.h>
 #include <asm/page.h>
+#include <asm/string.h>
 
 #ifdef VERBOSE
 #define MEM_LOG(_f, _a...)                                  \
@@ -42,18 +42,129 @@ unsigned long max_page;
 unsigned long max_page;
 unsigned long total_pages;
 
+void __init init_frametable(void)
+{
+    unsigned long p;
+    unsigned long nr_pages;
+    int i;
+
+    nr_pages = PFN_UP(max_page * sizeof(struct page_info));
+
+    p = alloc_boot_pages(nr_pages, 1);
+    if (p == 0)
+        panic("Not enough memory for frame table\n");
+
+    frame_table = (struct page_info *)(p << PAGE_SHIFT);
+    for (i = 0; i < nr_pages; i += 1)
+        clear_page((void *)((p + i) << PAGE_SHIFT));
+}
+
+void share_xen_page_with_guest(
+    struct page_info *page, struct domain *d, int readonly)
+{
+    if ( page_get_owner(page) == d )
+        return;
+
+    /* this causes us to leak pages in the Domain and reuslts in
+     * Zombie domains, I think we are missing a piece, until we find
+     * it we disable the following code */
+    set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
+
+    spin_lock(&d->page_alloc_lock);
+
+    /* The incremented type count pins as writable or read-only. */
+    page->u.inuse.type_info  = (readonly ? PGT_none : PGT_writable_page);
+    page->u.inuse.type_info |= PGT_validated | 1;
+
+    page_set_owner(page, d);
+    wmb(); /* install valid domain ptr before updating refcnt. */
+    ASSERT(page->count_info == 0);
+    page->count_info |= PGC_allocated | 1;
+
+    if ( unlikely(d->xenheap_pages++ == 0) )
+        get_knownalive_domain(d);
+    list_add_tail(&page->list, &d->xenpage_list);
+
+    spin_unlock(&d->page_alloc_lock);
+}
+
+void share_xen_page_with_privileged_guests(
+    struct page_info *page, int readonly)
+{
+        unimplemented();
+}
+
+static ulong foreign_to_mfn(struct domain *d, ulong pfn)
+{
+
+    pfn -= 1UL << cpu_foreign_map_order();
+
+    BUG_ON(pfn >= d->arch.foreign_mfn_count);
+
+    return d->arch.foreign_mfns[pfn];
+}
+
+static int set_foreign(struct domain *d, ulong pfn, ulong mfn)
+{
+    pfn -= 1UL << cpu_foreign_map_order();
+
+    BUG_ON(pfn >= d->arch.foreign_mfn_count);
+    d->arch.foreign_mfns[pfn] = mfn;
+
+    return 0;
+}
+
+static int create_grant_va_mapping(
+    unsigned long va, unsigned long frame, struct vcpu *v)
+{
+    if (v->domain->domain_id != 0) {
+        printk("only Dom0 can map a grant entry\n");
+        BUG();
+        return GNTST_permission_denied;
+    }
+    set_foreign(v->domain, va >> PAGE_SHIFT, frame);
+    return GNTST_okay;
+}
+
+static int destroy_grant_va_mapping(
+    unsigned long addr, unsigned long frame, struct domain *d)
+{
+    if (d->domain_id != 0) {
+        printk("only Dom0 can map a grant entry\n");
+        BUG();
+        return GNTST_permission_denied;
+    }
+    set_foreign(d, addr >> PAGE_SHIFT, ~0UL);
+    return GNTST_okay;
+}
+
 int create_grant_host_mapping(
     unsigned long addr, unsigned long frame, unsigned int flags)
 {
-    panic("%s called\n", __func__);
-    return 1;
+    if (flags & GNTMAP_application_map) {
+        printk("%s: GNTMAP_application_map not supported\n", __func__);
+        BUG();
+        return GNTST_general_error;
+    }
+    if (flags & GNTMAP_contains_pte) {
+        printk("%s: GNTMAP_contains_pte not supported\n", __func__);
+        BUG();
+        return GNTST_general_error;
+    }
+    return create_grant_va_mapping(addr, frame, current);
 }
 
 int destroy_grant_host_mapping(
     unsigned long addr, unsigned long frame, unsigned int flags)
 {
-    panic("%s called\n", __func__);
-    return 1;
+    if (flags & GNTMAP_contains_pte) {
+        printk("%s: GNTMAP_contains_pte not supported\n", __func__);
+        BUG();
+        return GNTST_general_error;
+    }
+
+    /* may have force the remove here */
+    return destroy_grant_va_mapping(addr, frame, current->domain);
 }
 
 int steal_page(struct domain *d, struct page_info *page, unsigned int memflags)
@@ -139,7 +250,7 @@ int get_page_type(struct page_info *page
         {
             return 0;
         }
-        if ( unlikely(!(x & PGT_validated)) )
+        else if ( unlikely(!(x & PGT_validated)) )
         {
             /* Someone else is updating validation of this page. Wait... */
             while ( (y = page->u.inuse.type_info) == x )
@@ -158,25 +269,6 @@ int get_page_type(struct page_info *page
     return 1;
 }
 
-void __init init_frametable(void)
-{
-    unsigned long p;
-    unsigned long nr_pages;
-    int i;
-
-    nr_pages = PFN_UP(max_page * sizeof(struct page_info));
-    nr_pages = min(nr_pages, (4UL << (20 - PAGE_SHIFT)));
-    
-
-    p = alloc_boot_pages(nr_pages, 1);
-    if (p == 0)
-        panic("Not enough memory for frame table\n");
-
-    frame_table = (struct page_info *)(p << PAGE_SHIFT);
-    for (i = 0; i < nr_pages; i += 1)
-        clear_page((void *)((p + i) << PAGE_SHIFT));
-}
-
 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
 {
     printk("%s: no PPC specific memory ops\n", __func__);
@@ -185,29 +277,28 @@ long arch_memory_op(int op, XEN_GUEST_HA
 
 extern void copy_page(void *dp, void *sp)
 {
-    if (on_mambo()) {
-        extern void *mambo_memcpy(void *,const void *,__kernel_size_t);
-        mambo_memcpy(dp, sp, PAGE_SIZE);
+    if (on_systemsim()) {
+        systemsim_memcpy(dp, sp, PAGE_SIZE);
     } else {
         memcpy(dp, sp, PAGE_SIZE);
     }
 }
 
+/* XXX should probably replace with faster data structure */
 static uint add_extent(struct domain *d, struct page_info *pg, uint order)
 {
     struct page_extents *pe;
 
     pe = xmalloc(struct page_extents);
     if (pe == NULL)
-        return 0;
+        return -ENOMEM;
 
     pe->pg = pg;
     pe->order = order;
-    pe->pfn = page_to_mfn(pg);
 
     list_add_tail(&pe->pe_list, &d->arch.extent_list);
 
-    return pe->pfn;
+    return 0;
 }
 
 void free_extents(struct domain *d)
@@ -246,7 +337,7 @@ uint allocate_extents(struct domain *d, 
         if (pg == NULL)
             return total_nrpages;
 
-        if (add_extent(d, pg, ext_order) == 0) {
+        if (add_extent(d, pg, ext_order) < 0) {
             free_domheap_pages(pg, ext_order);
             return total_nrpages;
         }
@@ -299,13 +390,13 @@ int allocate_rma(struct domain *d, unsig
 
     return 0;
 }
+
 void free_rma_check(struct page_info *page)
 {
     if (test_bit(_PGC_page_RMA, &page->count_info) &&
         !test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags))
         panic("Attempt to free an RMA page: 0x%lx\n", page_to_mfn(page));
 }
-
 
 ulong pfn2mfn(struct domain *d, ulong pfn, int *type)
 {
@@ -314,9 +405,17 @@ ulong pfn2mfn(struct domain *d, ulong pf
     struct page_extents *pe;
     ulong mfn = INVALID_MFN;
     int t = PFN_TYPE_NONE;
+    ulong foreign_map_pfn = 1UL << cpu_foreign_map_order();
 
     /* quick tests first */
-    if (d->is_privileged && cpu_io_mfn(pfn)) {
+    if (pfn & foreign_map_pfn) {
+        t = PFN_TYPE_FOREIGN;
+        mfn = foreign_to_mfn(d, pfn);
+    } else if (pfn >= max_page && pfn < (max_page + NR_GRANT_FRAMES)) {
+        /* Its a grant table access */
+        t = PFN_TYPE_GNTTAB;
+        mfn = gnttab_shared_mfn(d, d->grant_table, (pfn - max_page));
+    } else if (d->is_privileged && cpu_io_mfn(pfn)) {
         t = PFN_TYPE_IO;
         mfn = pfn;
     } else {
@@ -324,17 +423,32 @@ ulong pfn2mfn(struct domain *d, ulong pf
             t = PFN_TYPE_RMA;
             mfn = pfn + rma_base_mfn;
         } else {
+            ulong cur_pfn = rma_size_mfn;
+
             list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
-                uint end_pfn = pe->pfn + (1 << pe->order);
-
-                if (pfn >= pe->pfn && pfn < end_pfn) {
+                uint pe_pages = 1UL << pe->order;
+                uint end_pfn = cur_pfn + pe_pages;
+
+                if (pfn >= cur_pfn && pfn < end_pfn) {
                     t = PFN_TYPE_LOGICAL;
-                    mfn = page_to_mfn(pe->pg) + (pfn - pe->pfn);
+                    mfn = page_to_mfn(pe->pg) + (pfn - cur_pfn);
                     break;
                 }
+                cur_pfn += pe_pages;
             }
         }
-        BUG_ON(t != PFN_TYPE_NONE && page_get_owner(mfn_to_page(mfn)) != d);
+#ifdef DEBUG
+        if (t != PFN_TYPE_NONE &&
+            (d->domain_flags & DOMF_dying) &&
+            page_get_owner(mfn_to_page(mfn)) != d) {
+            printk("%s: page type: %d owner Dom[%d]:%p expected Dom[%d]:%p\n",
+                   __func__, t,
+                   page_get_owner(mfn_to_page(mfn))->domain_id,
+                   page_get_owner(mfn_to_page(mfn)),
+                   d->domain_id, d);
+            BUG();
+        }
+#endif
     }
 
     if (t == PFN_TYPE_NONE) {
@@ -368,6 +482,42 @@ ulong pfn2mfn(struct domain *d, ulong pf
     return mfn;
 }
 
+unsigned long mfn_to_gmfn(struct domain *d, unsigned long mfn)
+{
+    struct page_extents *pe;
+    ulong cur_pfn;
+    ulong gnttab_mfn;
+    ulong rma_mfn;
+
+    /* grant? */
+    gnttab_mfn = gnttab_shared_mfn(d, d->grant_table, 0);
+    if (mfn >= gnttab_mfn && mfn < (gnttab_mfn + NR_GRANT_FRAMES))
+        return max_page + (mfn - gnttab_mfn);
+
+    /* IO? */
+    if (d->is_privileged && cpu_io_mfn(mfn))
+        return mfn;
+
+    rma_mfn = page_to_mfn(d->arch.rma_page);
+    if (mfn >= rma_mfn &&
+        mfn < (rma_mfn + (1 << d->arch.rma_order)))
+        return mfn - rma_mfn;
+
+    /* Extent? */
+    cur_pfn = 1UL << d->arch.rma_order;
+    list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
+        uint pe_pages = 1UL << pe->order;
+        uint b_mfn = page_to_mfn(pe->pg);
+        uint e_mfn = b_mfn + pe_pages;
+
+        if (mfn >= b_mfn && mfn < e_mfn) {
+            return cur_pfn + (mfn - b_mfn);
+        }
+        cur_pfn += pe_pages;
+    }
+    return INVALID_M2P_ENTRY;
+}
+
 void guest_physmap_add_page(
     struct domain *d, unsigned long gpfn, unsigned long mfn)
 {
@@ -382,3 +532,10 @@ void shadow_drop_references(
     struct domain *d, struct page_info *page)
 {
 }
+
+int arch_domain_add_extent(struct domain *d, struct page_info *page, int order)
+{
+    if (add_extent(d, page, order) < 0)
+        return -ENOMEM;
+    return 0;
+}
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/mpic.c
--- a/xen/arch/powerpc/mpic.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/mpic.c   Thu Dec 14 08:57:36 2006 -0700
@@ -15,22 +15,18 @@
 /* XXX Xen hacks ... */
 /* make this generic */
 
-#define le32_to_cpu(x) \
-({ \
-       __u32 __x = (x); \
-       ((__u32)( \
-               (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \
-               (((__u32)(__x) & (__u32)0x0000ff00UL) <<  8) | \
-               (((__u32)(__x) & (__u32)0x00ff0000UL) >>  8) | \
-               (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \
-})
+#define le32_to_cpu(x)                                          \
+    ({                                                          \
+        __u32 __x = (x);                                        \
+        ((__u32)(                                               \
+             (((__u32)(__x) & (__u32)0x000000ffUL) << 24) |     \
+             (((__u32)(__x) & (__u32)0x0000ff00UL) <<  8) |     \
+             (((__u32)(__x) & (__u32)0x00ff0000UL) >>  8) |     \
+             (((__u32)(__x) & (__u32)0xff000000UL) >> 24) ));   \
+    })
 
 
 #define alloc_bootmem(x) xmalloc_bytes(x)
-#define request_irq(irq, handler, f, devname, dev_id) \
-    panic("IPI requested: %d: %p: %s: %p\n", irq, handler, devname, dev_id)
-
-typedef int irqreturn_t;
 
 #define IRQ_NONE       (0)
 #define IRQ_HANDLED    (1)
@@ -97,11 +93,6 @@ typedef int irqreturn_t;
 #include <asm/mpic.h>
 #include <asm/smp.h>
 
-static inline void smp_message_recv(int msg, struct pt_regs *regs)
-{
-    return;
-}
-
 #ifdef DEBUG
 #define DBG(fmt...) printk(fmt)
 #else
@@ -126,7 +117,7 @@ static DEFINE_SPINLOCK(mpic_lock);
 
 
 static inline u32 _mpic_read(unsigned int be, volatile u32 __iomem *base,
-                           unsigned int reg)
+                             unsigned int reg)
 {
        if (be)
                return in_be32(base + (reg >> 2));
@@ -135,7 +126,7 @@ static inline u32 _mpic_read(unsigned in
 }
 
 static inline void _mpic_write(unsigned int be, volatile u32 __iomem *base,
-                             unsigned int reg, u32 value)
+                               unsigned int reg, u32 value)
 {
        if (be)
                out_be32(base + (reg >> 2), value);
@@ -186,17 +177,17 @@ static inline u32 _mpic_irq_read(struct 
        unsigned int    idx = src_no & mpic->isu_mask;
 
        return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu],
-                         reg + (idx * MPIC_IRQ_STRIDE));
+                      reg + (idx * MPIC_IRQ_STRIDE));
 }
 
 static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no,
-                                  unsigned int reg, u32 value)
+                                   unsigned int reg, u32 value)
 {
        unsigned int    isu = src_no >> mpic->isu_shift;
        unsigned int    idx = src_no & mpic->isu_mask;
 
        _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu],
-                   reg + (idx * MPIC_IRQ_STRIDE), value);
+                reg + (idx * MPIC_IRQ_STRIDE), value);
 }
 
 #define mpic_read(b,r)         _mpic_read(mpic->flags & 
MPIC_BIG_ENDIAN,(b),(r))
@@ -261,7 +252,7 @@ static inline void mpic_ht_end_irq(struc
 }
 
 static void mpic_startup_ht_interrupt(struct mpic *mpic, unsigned int source,
-                                     unsigned int irqflags)
+                                      unsigned int irqflags)
 {
        struct mpic_irq_fixup *fixup = &mpic->fixups[source];
        unsigned long flags;
@@ -284,7 +275,7 @@ static void mpic_startup_ht_interrupt(st
 }
 
 static void mpic_shutdown_ht_interrupt(struct mpic *mpic, unsigned int source,
-                                      unsigned int irqflags)
+                                       unsigned int irqflags)
 {
        struct mpic_irq_fixup *fixup = &mpic->fixups[source];
        unsigned long flags;
@@ -305,7 +296,7 @@ static void mpic_shutdown_ht_interrupt(s
 }
 
 static void __init mpic_scan_ht_pic(struct mpic *mpic, u8 __iomem *devbase,
-                                   unsigned int devfn, u32 vdid)
+                                    unsigned int devfn, u32 vdid)
 {
        int i, irq, n;
        u8 __iomem *base;
@@ -485,8 +476,8 @@ static void mpic_enable_irq(unsigned int
        DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src);
 
        mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
-                      mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) &
-                      ~MPIC_VECPRI_MASK);
+                   mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) &
+                   ~MPIC_VECPRI_MASK);
 
        /* make sure mask gets to controller before we return to user */
        do {
@@ -532,8 +523,8 @@ static void mpic_disable_irq(unsigned in
        DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src);
 
        mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
-                      mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) |
-                      MPIC_VECPRI_MASK);
+                   mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) |
+                   MPIC_VECPRI_MASK);
 
        /* make sure mask gets to controller before we return to user */
        do {
@@ -623,7 +614,7 @@ static void mpic_set_affinity(unsigned i
        cpus_and(tmp, cpumask, cpu_online_map);
 
        mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_DESTINATION,
-                      mpic_physmask(cpus_addr(tmp)[0]));       
+                   mpic_physmask(cpus_addr(tmp)[0]));  
 }
 
 
@@ -633,14 +624,14 @@ static void mpic_set_affinity(unsigned i
 
 
 struct mpic * __init mpic_alloc(unsigned long phys_addr,
-                               unsigned int flags,
-                               unsigned int isu_size,
-                               unsigned int irq_offset,
-                               unsigned int irq_count,
-                               unsigned int ipi_offset,
-                               unsigned char *senses,
-                               unsigned int senses_count,
-                               const char *name)
+                                unsigned int flags,
+                                unsigned int isu_size,
+                                unsigned int irq_offset,
+                                unsigned int irq_count,
+                                unsigned int ipi_offset,
+                                unsigned char *senses,
+                                unsigned int senses_count,
+                                const char *name)
 {
        struct mpic     *mpic;
        u32             reg;
@@ -687,8 +678,8 @@ struct mpic * __init mpic_alloc(unsigned
        /* Reset */
        if (flags & MPIC_WANTS_RESET) {
                mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0,
-                          mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
-                          | MPIC_GREG_GCONF_RESET);
+                   mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
+                   | MPIC_GREG_GCONF_RESET);
                while( mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
                       & MPIC_GREG_GCONF_RESET)
                        mb();
@@ -700,15 +691,15 @@ struct mpic * __init mpic_alloc(unsigned
         */
        reg = mpic_read(mpic->gregs, MPIC_GREG_FEATURE_0);
        mpic->num_cpus = ((reg & MPIC_GREG_FEATURE_LAST_CPU_MASK)
-                         >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1;
+                      >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1;
        if (isu_size == 0)
                mpic->num_sources = ((reg & MPIC_GREG_FEATURE_LAST_SRC_MASK)
-                                    >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1;
+                             >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1;
 
        /* Map the per-CPU registers */
        for (i = 0; i < mpic->num_cpus; i++) {
                mpic->cpuregs[i] = ioremap(phys_addr + MPIC_CPU_BASE +
-                                          i * MPIC_CPU_STRIDE, 0x1000);
+                                   i * MPIC_CPU_STRIDE, 0x1000);
                BUG_ON(mpic->cpuregs[i] == NULL);
        }
 
@@ -716,7 +707,7 @@ struct mpic * __init mpic_alloc(unsigned
        if (mpic->isu_size == 0) {
                mpic->isu_size = mpic->num_sources;
                mpic->isus[0] = ioremap(phys_addr + MPIC_IRQ_BASE,
-                                       MPIC_IRQ_STRIDE * mpic->isu_size);
+                                MPIC_IRQ_STRIDE * mpic->isu_size);
                BUG_ON(mpic->isus[0] == NULL);
        }
        mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1);
@@ -752,7 +743,7 @@ struct mpic * __init mpic_alloc(unsigned
 }
 
 void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
-                           unsigned long phys_addr)
+                            unsigned long phys_addr)
 {
        unsigned int isu_first = isu_num * mpic->isu_size;
 
@@ -764,7 +755,7 @@ void __init mpic_assign_isu(struct mpic 
 }
 
 void __init mpic_setup_cascade(unsigned int irq, mpic_cascade_t handler,
-                              void *data)
+                               void *data)
 {
        struct mpic *mpic = mpic_find(irq, NULL);
        unsigned long flags;
@@ -799,20 +790,20 @@ void __init mpic_init(struct mpic *mpic)
        /* Initialize timers: just disable them all */
        for (i = 0; i < 4; i++) {
                mpic_write(mpic->tmregs,
-                          i * MPIC_TIMER_STRIDE + MPIC_TIMER_DESTINATION, 0);
+                   i * MPIC_TIMER_STRIDE + MPIC_TIMER_DESTINATION, 0);
                mpic_write(mpic->tmregs,
-                          i * MPIC_TIMER_STRIDE + MPIC_TIMER_VECTOR_PRI,
-                          MPIC_VECPRI_MASK |
-                          (MPIC_VEC_TIMER_0 + i));
+                   i * MPIC_TIMER_STRIDE + MPIC_TIMER_VECTOR_PRI,
+                   MPIC_VECPRI_MASK |
+                   (MPIC_VEC_TIMER_0 + i));
        }
 
        /* Initialize IPIs to our reserved vectors and mark them disabled for 
now */
        mpic_test_broken_ipi(mpic);
        for (i = 0; i < 4; i++) {
                mpic_ipi_write(i,
-                              MPIC_VECPRI_MASK |
-                              (10 << MPIC_VECPRI_PRIORITY_SHIFT) |
-                              (MPIC_VEC_IPI_0 + i));
+                       MPIC_VECPRI_MASK |
+                       (10 << MPIC_VECPRI_PRIORITY_SHIFT) |
+                       (MPIC_VEC_IPI_0 + i));
 #ifdef CONFIG_SMP
                if (!(mpic->flags & MPIC_PRIMARY))
                        continue;
@@ -859,7 +850,7 @@ void __init mpic_init(struct mpic *mpic)
 #ifdef CONFIG_MPIC_BROKEN_U3
                        if (mpic_is_ht_interrupt(mpic, i)) {
                                vecpri &= ~(MPIC_VECPRI_SENSE_MASK |
-                                           MPIC_VECPRI_POLARITY_MASK);
+                            MPIC_VECPRI_POLARITY_MASK);
                                vecpri |= MPIC_VECPRI_POLARITY_POSITIVE;
                        }
 #else
@@ -873,7 +864,7 @@ void __init mpic_init(struct mpic *mpic)
                /* init hw */
                mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri);
                mpic_irq_write(i, MPIC_IRQ_DESTINATION,
-                              1 << hard_smp_processor_id());
+                       1 << hard_smp_processor_id());
 
                /* init linux descriptors */
                if (i < mpic->irq_count) {
@@ -887,8 +878,8 @@ void __init mpic_init(struct mpic *mpic)
 
        /* Disable 8259 passthrough */
        mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0,
-                  mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
-                  | MPIC_GREG_GCONF_8259_PTHROU_DIS);
+               mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
+               | MPIC_GREG_GCONF_8259_PTHROU_DIS);
 
        /* Set current processor priority to 0 */
        mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0);
@@ -908,12 +899,12 @@ void mpic_irq_set_priority(unsigned int 
                reg = mpic_ipi_read(irq - mpic->ipi_offset) &
                        ~MPIC_VECPRI_PRIORITY_MASK;
                mpic_ipi_write(irq - mpic->ipi_offset,
-                              reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+                       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
        } else {
                reg = mpic_irq_read(irq - mpic->irq_offset,MPIC_IRQ_VECTOR_PRI)
                        & ~MPIC_VECPRI_PRIORITY_MASK;
                mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI,
-                              reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+                       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
        }
        spin_unlock_irqrestore(&mpic_lock, flags);
 }
@@ -956,7 +947,7 @@ void mpic_setup_this_cpu(void)
        if (distribute_irqs) {
                for (i = 0; i < mpic->num_sources ; i++)
                        mpic_irq_write(i, MPIC_IRQ_DESTINATION,
-                               mpic_irq_read(i, MPIC_IRQ_DESTINATION) | msk);
+                           mpic_irq_read(i, MPIC_IRQ_DESTINATION) | msk);
        }
 
        /* Set current processor priority to 0 */
@@ -1001,7 +992,7 @@ void mpic_teardown_this_cpu(int secondar
        /* let the mpic know we don't want intrs.  */
        for (i = 0; i < mpic->num_sources ; i++)
                mpic_irq_write(i, MPIC_IRQ_DESTINATION,
-                       mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk);
+                       mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk);
 
        /* Set current processor priority to max */
        mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf);
@@ -1021,7 +1012,7 @@ void mpic_send_ipi(unsigned int ipi_no, 
 #endif
 
        mpic_cpu_write(MPIC_CPU_IPI_DISPATCH_0 + ipi_no * 0x10,
-                      mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0]));
+                   mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0]));
 }
 
 int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs)
@@ -1049,7 +1040,7 @@ int mpic_get_one_irq(struct mpic *mpic, 
                return irq + mpic->irq_offset;
        }
 #ifdef DEBUG_IPI
-               DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0);
+    DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0);
 #endif
        return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset;
 }
@@ -1075,13 +1066,13 @@ void mpic_request_ipis(void)
 
        /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
        request_irq(mpic->ipi_offset+0, mpic_ipi_action, SA_INTERRUPT,
-                   "IPI0 (call function)", mpic);
+                "IPI0 (call function)", mpic);
        request_irq(mpic->ipi_offset+1, mpic_ipi_action, SA_INTERRUPT,
-                  "IPI1 (reschedule)", mpic);
+                "IPI1 (reschedule)", mpic);
        request_irq(mpic->ipi_offset+2, mpic_ipi_action, SA_INTERRUPT,
-                  "IPI2 (unused)", mpic);
+                "IPI2 (unused)", mpic);
        request_irq(mpic->ipi_offset+3, mpic_ipi_action, SA_INTERRUPT,
-                  "IPI3 (debugger break)", mpic);
+                "IPI3 (debugger break)", mpic);
 
        printk("IPIs requested... \n");
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/mpic_init.c
--- a/xen/arch/powerpc/mpic_init.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/mpic_init.c      Thu Dec 14 08:57:36 2006 -0700
@@ -22,6 +22,7 @@
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <asm/mpic.h>
+#include <errno.h>
 #include "mpic_init.h"
 #include "oftree.h"
 #include "of-devtree.h"
@@ -74,7 +75,7 @@ static unsigned long reg2(void *oft_p, o
     rc = ofd_getprop(oft_p, c, "reg", &isa_reg, sizeof(isa_reg));
 
     DBG("%s: reg property address=0x%08x  size=0x%08x\n", __func__,
-                    isa_reg.address, isa_reg.size);
+        isa_reg.address, isa_reg.size);
     return isa_reg.address;
 }
 
@@ -92,7 +93,7 @@ static unsigned long reg1(void *oft_p, o
     rc = ofd_getprop(oft_p, c, "reg", &reg, sizeof(reg));
 
     DBG("%s: reg property address=0x%08x  size=0x%08x\n", __func__,
-                        reg.address, reg.size);
+        reg.address, reg.size);
     return reg.address;
 }
 
@@ -173,15 +174,15 @@ static unsigned long find_ranges_addr_fr
         break;
     case 2:
         ranges_addr = (((u64)ranges[ranges_i]) << 32) |
-                      ranges[ranges_i + 1];
+            ranges[ranges_i + 1];
         break;
     case 3:  /* the G5 case, how to squeeze 96 bits into 64 */
         ranges_addr = (((u64)ranges[ranges_i+1]) << 32) |
-                      ranges[ranges_i + 2];
+            ranges[ranges_i + 2];
         break;
     case 4:
         ranges_addr = (((u64)ranges[ranges_i+2]) << 32) |
-                      ranges[ranges_i + 4];
+            ranges[ranges_i + 4];
         break;
     default:
         PANIC("#address-cells out of range\n");
@@ -266,7 +267,7 @@ static int find_mpic_canonical_probe(voi
      * We select the one without an 'interrupt' property.
      */
     c = ofd_node_find_by_prop(oft_p, OFD_ROOT, "device_type", mpic_type,
-                                        sizeof(mpic_type));
+                              sizeof(mpic_type));
     while (c > 0) {
         int int_len;
         int good_mpic;
@@ -357,6 +358,42 @@ static struct hw_interrupt_type *share_m
 #define share_mpic(M,X) (M)
 
 #endif
+
+static unsigned int mpic_startup_ipi(unsigned int irq)
+{
+    mpic->hc_ipi.enable(irq);
+    return 0;
+}
+
+int request_irq(unsigned int irq,
+                irqreturn_t (*handler)(int, void *, struct cpu_user_regs *),
+                unsigned long irqflags, const char * devname, void *dev_id)
+{
+    int retval;
+    struct irqaction *action;
+    void (*func)(int, void *, struct cpu_user_regs *);
+
+    action = xmalloc(struct irqaction);
+    if (!action) {
+        BUG();
+        return -ENOMEM;
+    }
+
+    /* Xen's handler prototype is slightly different than Linux's.  */
+    func = (void (*)(int, void *, struct cpu_user_regs *))handler;
+
+    action->handler = func;
+    action->name = devname;
+    action->dev_id = dev_id;
+
+    retval = setup_irq(irq, action);
+    if (retval) {
+        BUG();
+        xfree(action);
+    }
+
+    return retval;
+}
 
 struct hw_interrupt_type *xen_mpic_init(struct hw_interrupt_type *xen_irq)
 {
@@ -397,6 +434,11 @@ struct hw_interrupt_type *xen_mpic_init(
     hit = share_mpic(&mpic->hc_irq, xen_irq);
 
     printk("%s: success\n", __func__);
+
+    mpic->hc_ipi.ack = xen_irq->ack;
+    mpic->hc_ipi.startup = mpic_startup_ipi;
+    mpic_request_ipis();
+
     return hit;
 }
 
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/of-devtree.h
--- a/xen/arch/powerpc/of-devtree.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/of-devtree.h     Thu Dec 14 08:57:36 2006 -0700
@@ -33,15 +33,15 @@ union of_pci_hi {
 union of_pci_hi {
     u32 word;
     struct {
-        u32    opa_n: 1; /* relocatable */
-        u32    opa_p: 1; /* prefetchable */
-        u32    opa_t: 1; /* aliased */
+        u32 opa_n: 1; /* relocatable */
+        u32 opa_p: 1; /* prefetchable */
+        u32 opa_t: 1; /* aliased */
         u32 _opa_res: 3;
-        u32    opa: 2; /* space code */
+        u32 opa: 2; /* space code */
         u32  opa_b: 8; /* bus number */
-        u32    opa_d: 5; /* device number */
-        u32    opa_f: 3; /* function number */
-        u32    opa_r: 8; /* register number */
+        u32 opa_d: 5; /* device number */
+        u32 opa_f: 3; /* function number */
+        u32 opa_r: 8; /* register number */
     } bits;
 };
 
@@ -79,9 +79,9 @@ typedef s32 ofdn_t;
 typedef s32 ofdn_t;
 
 #define OFD_ROOT 1
-#define OFD_DUMP_NAMES 0x1
-#define OFD_DUMP_VALUES        0x2
-#define OFD_DUMP_ALL   (OFD_DUMP_VALUES|OFD_DUMP_NAMES)
+#define OFD_DUMP_NAMES 0x1
+#define OFD_DUMP_VALUES 0x2
+#define OFD_DUMP_ALL (OFD_DUMP_VALUES|OFD_DUMP_NAMES)
 
 extern void *ofd_create(void *mem, size_t sz);
 extern ofdn_t ofd_node_parent(void *mem, ofdn_t n);
@@ -90,9 +90,9 @@ extern const char *ofd_node_path(void *m
 extern const char *ofd_node_path(void *mem, ofdn_t p);
 extern int ofd_node_to_path(void *mem, ofdn_t p, void *buf, size_t sz);
 extern ofdn_t ofd_node_child_create(void *mem, ofdn_t parent,
-                                   const char *path, size_t pathlen);
+                                    const char *path, size_t pathlen);
 extern ofdn_t ofd_node_peer_create(void *mem, ofdn_t sibling,
-                                  const char *path, size_t pathlen);
+                                   const char *path, size_t pathlen);
 extern ofdn_t ofd_node_find(void *mem, const char *devspec);
 extern ofdn_t ofd_node_add(void *m, ofdn_t n, const char *path, size_t sz);
 extern int ofd_node_prune(void *m, ofdn_t n);
@@ -102,23 +102,23 @@ extern ofdn_t ofd_nextprop(void *mem, of
 extern ofdn_t ofd_nextprop(void *mem, ofdn_t n, const char *prev, char *name);
 extern ofdn_t ofd_prop_find(void *mem, ofdn_t n, const char *name);
 extern int ofd_getprop(void *mem, ofdn_t n, const char *name,
-                       void *buf, size_t sz);
+                       void *buf, size_t sz);
 extern int ofd_getproplen(void *mem, ofdn_t n, const char *name);
 
 extern int ofd_setprop(void *mem, ofdn_t n, const char *name,
-                       const void *buf, size_t sz);
+                       const void *buf, size_t sz);
 extern void ofd_prop_remove(void *mem, ofdn_t node, ofdn_t prop);
 extern ofdn_t ofd_prop_add(void *mem, ofdn_t n, const char *name,
-                          const void *buf, size_t sz);
+                           const void *buf, size_t sz);
 extern ofdn_t ofd_io_create(void *m, ofdn_t node, u64 open);
 extern u32 ofd_io_open(void *mem, ofdn_t n);
 extern void ofd_io_close(void *mem, ofdn_t n);
 
 
-typedef void (*walk_fn)(void *m, ofdn_t p, int arg);
-extern void ofd_dump_props(void *m, ofdn_t p, int dump);
+typedef void (*walk_fn)(void *m, const char *pre, ofdn_t p, int arg);
+extern void ofd_dump_props(void *m, const char *pre, ofdn_t p, int dump);
 
-extern void ofd_walk(void *m, ofdn_t p, walk_fn fn, int arg);
+extern void ofd_walk(void *m, const char *pre, ofdn_t p, walk_fn fn, int arg);
 
 
 /* Recursively look up #address_cells and #size_cells properties */
@@ -129,10 +129,10 @@ extern size_t ofd_space(void *mem);
 extern size_t ofd_space(void *mem);
 
 extern void ofd_prop_print(const char *head, const char *path,
-                          const char *name, const char *prop, size_t sz);
+                           const char *name, const char *prop, size_t sz);
 
 extern ofdn_t ofd_node_find_by_prop(void *mem, ofdn_t n, const char *name,
-                                   const void *val, size_t sz);
+                                    const void *val, size_t sz);
 extern ofdn_t ofd_node_find_next(void *mem, ofdn_t n);
 extern ofdn_t ofd_node_find_prev(void *mem, ofdn_t n);
 extern void ofd_init(int (*write)(const char *, size_t len));
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/of-devwalk.c
--- a/xen/arch/powerpc/of-devwalk.c     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/of-devwalk.c     Thu Dec 14 08:57:36 2006 -0700
@@ -80,7 +80,7 @@ void ofd_prop_print(
 #endif
 }
 
-void ofd_dump_props(void *mem, ofdn_t n, int dump)
+void ofd_dump_props(void *mem, const char *pre, ofdn_t n, int dump)
 {
     ofdn_t p;
     char name[128];
@@ -95,7 +95,7 @@ void ofd_dump_props(void *mem, ofdn_t n,
     }
 
     if (dump & OFD_DUMP_NAMES) {
-        printk("of_walk: %s: phandle 0x%x\n", path, n);
+        printk("%s: %s: phandle 0x%x\n", pre, path, n);
     }
 
     p = ofd_nextprop(mem, n, NULL, name);
@@ -106,30 +106,30 @@ void ofd_dump_props(void *mem, ofdn_t n,
         }
 
         if ( dump & OFD_DUMP_VALUES ) {
-            ofd_prop_print("of_walk", path, name, prop, sz);
+            ofd_prop_print(pre, path, name, prop, sz);
         }
 
         p = ofd_nextprop(mem, n, name, name);
     }
 }
 
-void ofd_walk(void *m, ofdn_t p, walk_fn fn, int arg)
+void ofd_walk(void *m, const char *pre, ofdn_t p, walk_fn fn, int arg)
 {
     ofdn_t n;
 
     if ( fn != NULL ) {
-        (*fn)(m, p, arg);
+        (*fn)(m, pre, p, arg);
     }
 
     /* child */
     n = ofd_node_child(m, p);
     if ( n != 0 ) {
-        ofd_walk(m, n, fn, arg);
+        ofd_walk(m, pre, n, fn, arg);
     }
 
     /* peer */
     n = ofd_node_peer(m, p);
     if ( n != 0 ) {
-        ofd_walk(m, n, fn, arg);
+        ofd_walk(m, pre, n, fn, arg);
     }
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/of_handler/console.c
--- a/xen/arch/powerpc/of_handler/console.c     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/of_handler/console.c     Thu Dec 14 08:57:36 2006 -0700
@@ -113,7 +113,7 @@ static s32 ofh_xen_dom0_read(s32 chan, v
             return ret;
 
         rc = xen_hvcall(XEN_MARK(__HYPERVISOR_console_io), CONSOLEIO_read,
-                count, desc);
+                        count, desc);
         if (rc <= 0) {
             return ret;
         }
@@ -139,7 +139,7 @@ static s32 ofh_xen_dom0_write(s32 chan, 
             return ret;
 
         rc = xen_hvcall(XEN_MARK(__HYPERVISOR_console_io), CONSOLEIO_write,
-                count, desc);
+                        count, desc);
         if (rc <= 0) {
             return ret;
         }
@@ -157,8 +157,8 @@ static s32 ofh_xen_domu_read(s32 chan, v
 static s32 ofh_xen_domu_read(s32 chan, void *buf, u32 count, s32 *actual,
                              ulong b)
 {
-       struct xencons_interface *intf;
-       XENCONS_RING_IDX cons, prod;
+    struct xencons_interface *intf;
+    XENCONS_RING_IDX cons, prod;
     s32 ret;
 
     intf = DRELA(ofh_ihp, b)->ofi_intf;
@@ -180,8 +180,8 @@ static s32 ofh_xen_domu_write(s32 chan, 
 static s32 ofh_xen_domu_write(s32 chan, const void *buf, u32 count,
                               s32 *actual, ulong b)
 {
-       struct xencons_interface *intf;
-       XENCONS_RING_IDX cons, prod;
+    struct xencons_interface *intf;
+    XENCONS_RING_IDX cons, prod;
     s32 ret;
 
     intf = DRELA(ofh_ihp, b)->ofi_intf;
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/ofd_fixup.c
--- a/xen/arch/powerpc/ofd_fixup.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/ofd_fixup.c      Thu Dec 14 08:57:36 2006 -0700
@@ -25,6 +25,7 @@
 #include <public/xen.h>
 #include "of-devtree.h"
 #include "oftree.h"
+#include "rtas.h"
 
 #undef RTAS
 
@@ -347,6 +348,15 @@ static ofdn_t ofd_xen_props(void *m, str
         val[0] =  rma_size(d->arch.rma_order) - val[1];
         ofd_prop_add(m, n, "reserved", val, sizeof (val));
 
+        /* tell dom0 that Xen depends on it to have power control */
+        if (!rtas_entry)
+            ofd_prop_add(m, n, "power-control", NULL, 0);
+
+        /* tell dom0 where ranted pages go in the linear map */
+        val[0] = cpu_foreign_map_order();
+        val[1] = d->arch.foreign_mfn_count;
+        ofd_prop_add(m, n, "foreign-map", val, sizeof (val));
+
         n = ofd_node_add(m, n, console, sizeof (console));
         if (n > 0) {
             val[0] = 0;
@@ -417,7 +427,7 @@ int ofd_dom0_fixup(struct domain *d, ulo
 
 
 #ifdef DEBUG
-    ofd_walk(m, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL);
+    ofd_walk(m, __func__, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL);
 #endif
     return 1;
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/ofd_fixup_memory.c
--- a/xen/arch/powerpc/ofd_fixup_memory.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/ofd_fixup_memory.c       Thu Dec 14 08:57:36 2006 -0700
@@ -68,6 +68,8 @@ static ofdn_t ofd_memory_node_create(
     reg.sz = size;
     ofd_prop_add(m, n, "reg", &reg, sizeof (reg));
 
+    printk("Dom0: %s: %016lx, %016lx\n", path, start, size);
+
     return n;
 }
 
@@ -86,17 +88,19 @@ static void ofd_memory_extent_nodes(void
     ulong size;
     ofdn_t n;
     struct page_extents *pe;
+    ulong cur_pfn = 1UL << d->arch.rma_order;
 
+    start = cur_pfn << PAGE_SHIFT;
+    size = 0;
     list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
 
-        start = pe->pfn << PAGE_SHIFT;
-        size = 1UL << (pe->order + PAGE_SHIFT);
-
-        n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory,
-                                    start, size);
-
-        BUG_ON(n <= 0);
+        size += 1UL << (pe->order + PAGE_SHIFT);
+        if (pe->order != cpu_extent_order())
+            panic("we don't handle this yet\n");
     }
+    n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory,
+                               start, size);
+    BUG_ON(n <= 0);
 }
 
 void ofd_memory_props(void *m, struct domain *d)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/papr/xlate.c
--- a/xen/arch/powerpc/papr/xlate.c     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/papr/xlate.c     Thu Dec 14 08:57:36 2006 -0700
@@ -19,7 +19,7 @@
  */
 
 #undef DEBUG
-#undef DEBUG_FAIL
+#undef DEBUG_LOW
 
 #include <xen/config.h>
 #include <xen/types.h>
@@ -30,6 +30,17 @@
 #include <asm/papr.h>
 #include <asm/hcalls.h>
 
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) printk(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
+
 #ifdef USE_PTE_INSERT
 static inline void pte_insert(union pte volatile *pte,
         ulong vsid, ulong rpn, ulong lrpn)
@@ -106,11 +117,8 @@ static void pte_tlbie(union pte volatile
 
 }
 
-static void h_enter(struct cpu_user_regs *regs)
-{
-    ulong flags = regs->gprs[4];
-    ulong ptex = regs->gprs[5];
-
+long pte_enter(ulong flags, ulong ptex, ulong vsid, ulong rpn)
+{
     union pte pte;
     union pte volatile *ppte;
     struct domain_htab *htab;
@@ -129,14 +137,13 @@ static void h_enter(struct cpu_user_regs
 
     htab = &d->arch.htab;
     if (ptex > (1UL << htab->log_num_ptes)) {
-        regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
-        return;
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
+        return H_Parameter;
     }
 
     /* use local HPTE to avoid manual shifting & masking */
-    pte.words.vsid = regs->gprs[6];
-    pte.words.rpn = regs->gprs[7];
+    pte.words.vsid = vsid;
+    pte.words.rpn = rpn;
 
     if ( pte.bits.l ) {        /* large page? */
         /* figure out the page size for the selected large page */
@@ -150,10 +157,9 @@ static void h_enter(struct cpu_user_regs
         }
 
         if ( lp_size >= d->arch.large_page_sizes ) {
-            printk("%s: attempt to use unsupported lp_size %d\n",
-                   __func__, lp_size);
-            regs->gprs[3] = H_Parameter;
-            return;
+            DBG("%s: attempt to use unsupported lp_size %d\n",
+                __func__, lp_size);
+            return H_Parameter;
         }
 
         /* get correct pgshift value */
@@ -168,31 +174,32 @@ static void h_enter(struct cpu_user_regs
 
     mfn = pfn2mfn(d, pfn, &mtype);
     if (mfn == INVALID_MFN) {
-        regs->gprs[3] =  H_Parameter;
-        return;
-    }
-
+        DBG("%s: Bad PFN: 0x%lx\n", __func__, pfn);
+        return H_Parameter;
+    }
+
+    if (mtype == PFN_TYPE_IO && !d->is_privileged) {
+        /* only a privilaged dom can access outside IO space */
+        DBG("%s: unprivileged access to physical page: 0x%lx\n",
+            __func__, pfn);
+        return H_Privilege;
+    }
     if (mtype == PFN_TYPE_IO) {
-        /* only a privilaged dom can access outside IO space */
-        if ( !d->is_privileged ) {
-            regs->gprs[3] =  H_Privilege;
-            printk("%s: unprivileged access to physical page: 0x%lx\n",
-                   __func__, pfn);
-            return;
-        }
-
         if ( !((pte.bits.w == 0)
              && (pte.bits.i == 1)
              && (pte.bits.g == 1)) ) {
-#ifdef DEBUG_FAIL
-            printk("%s: expecting an IO WIMG "
-                   "w=%x i=%d m=%d, g=%d\n word 0x%lx\n", __func__,
-                   pte.bits.w, pte.bits.i, pte.bits.m, pte.bits.g,
-                   pte.words.rpn);
-#endif
-            regs->gprs[3] =  H_Parameter;
-            return;
-        }
+            DBG("%s: expecting an IO WIMG "
+                "w=%x i=%d m=%d, g=%d\n word 0x%lx\n", __func__,
+                pte.bits.w, pte.bits.i, pte.bits.m, pte.bits.g,
+                pte.words.rpn);
+            return H_Parameter;
+        }
+    }
+    if (mtype == PFN_TYPE_GNTTAB) {
+        DBG("%s: Dom[%d] mapping grant table: 0x%lx\n",
+            __func__, d->domain_id, pfn << PAGE_SHIFT);
+        pte.bits.i = 0;
+        pte.bits.g = 0;
     }
     /* fixup the RPN field of our local PTE copy */
     pte.bits.rpn = mfn | lp_bits;
@@ -213,13 +220,13 @@ static void h_enter(struct cpu_user_regs
         BUG_ON(f == d);
 
         if (unlikely(!get_domain(f))) {
-            regs->gprs[3] = H_Rescinded;
-            return;
+            DBG("%s: Rescinded, no domain: 0x%lx\n",  __func__, pfn);
+            return H_Rescinded;
         }
         if (unlikely(!get_page(pg, f))) {
             put_domain(f);
-            regs->gprs[3] = H_Rescinded;
-            return;
+            DBG("%s: Rescinded, no page: 0x%lx\n",  __func__, pfn);
+            return H_Rescinded;
         }
     }
 
@@ -276,17 +283,12 @@ static void h_enter(struct cpu_user_regs
                 : "b" (ppte), "r" (pte.words.rpn), "r" (pte.words.vsid)
                 : "memory");
 
-            regs->gprs[3] = H_Success;
-            regs->gprs[4] = idx;
-
-            return;
-        }
-    }
-
-#ifdef DEBUG
+            return idx;
+        }
+    }
+
     /* If the PTEG is full then no additional values are returned. */
-    printk("%s: PTEG FULL\n", __func__);
-#endif
+    DBG("%s: PTEG FULL\n", __func__);
 
     if (pg != NULL)
         put_page(pg);
@@ -294,7 +296,24 @@ static void h_enter(struct cpu_user_regs
     if (f != NULL)
         put_domain(f);
 
-    regs->gprs[3] = H_PTEG_Full;
+    return H_PTEG_Full;
+}
+
+static void h_enter(struct cpu_user_regs *regs)
+{
+    ulong flags = regs->gprs[4];
+    ulong ptex = regs->gprs[5];
+    ulong vsid = regs->gprs[6];
+    ulong rpn = regs->gprs[7];
+    long ret;
+
+    ret = pte_enter(flags, ptex, vsid, rpn);
+
+    if (ret >= 0) {
+        regs->gprs[3] = H_Success;
+        regs->gprs[4] = ret;
+    } else
+        regs->gprs[3] = ret;
 }
 
 static void h_protect(struct cpu_user_regs *regs)
@@ -308,13 +327,11 @@ static void h_protect(struct cpu_user_re
     union pte volatile *ppte;
     union pte lpte;
 
-#ifdef DEBUG
-    printk("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__,
-           flags, ptex, avpn);
-#endif
+    DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__,
+            flags, ptex, avpn);
     if ( ptex > (1UL << htab->log_num_ptes) ) {
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
         regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
         return;
     }
     ppte = &htab->map[ptex];
@@ -324,10 +341,8 @@ static void h_protect(struct cpu_user_re
 
     /* the AVPN param occupies the bit-space of the word */
     if ( (flags & H_AVPN) && lpte.bits.avpn != avpn >> 7 ) {
-#ifdef DEBUG_FAIL
-        printk("%s: %p: AVPN check failed: 0x%lx, 0x%lx\n", __func__,
-                ppte, lpte.words.vsid, lpte.words.rpn);
-#endif
+        DBG_LOW("%s: %p: AVPN check failed: 0x%lx, 0x%lx\n", __func__,
+            ppte, lpte.words.vsid, lpte.words.rpn);
         regs->gprs[3] = H_Not_Found;
         return;
     }
@@ -337,9 +352,7 @@ static void h_protect(struct cpu_user_re
          * we invalidate entires where the PAPR says to 0 the whole hi
          * dword, so the AVPN should catch this first */
 
-#ifdef DEBUG_FAIL
-        printk("%s: pte invalid\n", __func__);
-#endif
+        DBG("%s: pte invalid\n", __func__);
         regs->gprs[3] =  H_Not_Found;
         return;
     }
@@ -374,7 +387,6 @@ static void h_protect(struct cpu_user_re
 
 static void h_clear_ref(struct cpu_user_regs *regs)
 {
-    ulong flags = regs->gprs[4];
     ulong ptex = regs->gprs[5];
     struct vcpu *v = get_current();
     struct domain *d = v->domain;
@@ -382,20 +394,20 @@ static void h_clear_ref(struct cpu_user_
     union pte volatile *pte;
     union pte lpte;
 
+    DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx\n", __func__,
+            regs->gprs[4], ptex);
+
 #ifdef DEBUG
-    printk("%s: flags: 0x%lx ptex: 0x%lx\n", __func__,
-           flags, ptex);
-#endif
-
-    if (flags != 0) {
-        printk("WARNING: %s: "
-                "flags are undefined and should be 0: 0x%lx\n",
-                __func__, flags);
-    }
+    if (regs->gprs[4] != 0) {
+        DBG("WARNING: %s: "
+            "flags are undefined and should be 0: 0x%lx\n",
+            __func__, regs->gprs[4]);
+    }
+#endif
 
     if (ptex > (1UL << htab->log_num_ptes)) {
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
         regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
         return;
     }
     pte = &htab->map[ptex];
@@ -417,7 +429,6 @@ static void h_clear_ref(struct cpu_user_
 
 static void h_clear_mod(struct cpu_user_regs *regs)
 {
-    ulong flags = regs->gprs[4];
     ulong ptex = regs->gprs[5];
     struct vcpu *v = get_current();
     struct domain *d = v->domain;
@@ -425,19 +436,20 @@ static void h_clear_mod(struct cpu_user_
     union pte volatile *pte;
     union pte lpte;
 
+    DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx\n", __func__,
+          regs->gprs[4], ptex);
+
 #ifdef DEBUG
-    printk("%s: flags: 0x%lx ptex: 0x%lx\n", __func__,
-           flags, ptex);
-#endif
-    if (flags != 0) {
-        printk("WARNING: %s: "
-                "flags are undefined and should be 0: 0x%lx\n",
-                __func__, flags);
-    }
-    
+    if (regs->gprs[4] != 0) {
+        DBG("WARNING: %s: "
+            "flags are undefined and should be 0: 0x%lx\n",
+            __func__, regs->gprs[4]);
+    }
+#endif
+
     if (ptex > (1UL << htab->log_num_ptes)) {
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
         regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
         return;
     }
     pte = &htab->map[ptex];
@@ -466,63 +478,53 @@ static void h_clear_mod(struct cpu_user_
     }
 }
 
-static void h_remove(struct cpu_user_regs *regs)
-{
-    ulong flags = regs->gprs[4];
-    ulong ptex = regs->gprs[5];
-    ulong avpn = regs->gprs[6];
+long pte_remove(ulong flags, ulong ptex, ulong avpn, ulong *hi, ulong *lo)
+{
     struct vcpu *v = get_current();
     struct domain *d = v->domain;
     struct domain_htab *htab = &d->arch.htab;
     union pte volatile *pte;
     union pte lpte;
 
-#ifdef DEBUG
-    printk("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__,
-           flags, ptex, avpn);
-#endif
+    DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__,
+            flags, ptex, avpn);
+
     if ( ptex > (1UL << htab->log_num_ptes) ) {
-        regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
-        return;
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
+        return H_Parameter;
     }
     pte = &htab->map[ptex];
     lpte.words.vsid = pte->words.vsid;
     lpte.words.rpn = pte->words.rpn;
 
     if ((flags & H_AVPN) && lpte.bits.avpn != (avpn >> 7)) {
-#ifdef DEBUG_FAIL
-        printk("%s: avpn doesn not match\n", __func__);
-#endif
-        regs->gprs[3] = H_Not_Found;
-        return;
+        DBG_LOW("%s: AVPN does not match\n", __func__);
+        return H_Not_Found;
     }
 
     if ((flags & H_ANDCOND) && ((avpn & pte->words.vsid) != 0)) {
-#ifdef DEBUG_FAIL
-        printk("%s: andcond does not match\n", __func__);
-#endif
-        regs->gprs[3] = H_Not_Found;
-        return;
-    }
-
-    regs->gprs[3] = H_Success;
+        DBG("%s: andcond does not match\n", __func__);
+        return H_Not_Found;
+    }
+
     /* return old PTE in regs 4 and 5 */
-    regs->gprs[4] = lpte.words.vsid;
-    regs->gprs[5] = lpte.words.rpn;
-
+    *hi = lpte.words.vsid;
+    *lo = lpte.words.rpn;
+
+#ifdef DEBUG_LOW
     /* XXX - I'm very skeptical of doing ANYTHING if not bits.v */
     /* XXX - I think the spec should be questioned in this case (MFM) */
     if (lpte.bits.v == 0) {
-        printk("%s: removing invalid entry\n", __func__);
-    }
+        DBG_LOW("%s: removing invalid entry\n", __func__);
+    }
+#endif
 
     if (lpte.bits.v) {
         ulong mfn = lpte.bits.rpn;
         if (!cpu_io_mfn(mfn)) {
             struct page_info *pg = mfn_to_page(mfn);
             struct domain *f = page_get_owner(pg);
-
+            
             if (f != d) {
                 put_domain(f);
                 put_page(pg);
@@ -536,6 +538,27 @@ static void h_remove(struct cpu_user_reg
             : "memory");
 
     pte_tlbie(&lpte, ptex);
+
+    return H_Success;
+}
+
+static void h_remove(struct cpu_user_regs *regs)
+{
+    ulong flags = regs->gprs[4];
+    ulong ptex = regs->gprs[5];
+    ulong avpn = regs->gprs[6];
+    ulong hi, lo;
+    long ret;
+
+    ret = pte_remove(flags, ptex, avpn, &hi, &lo);
+
+    regs->gprs[3] = ret;
+
+    if (ret == H_Success) {
+        regs->gprs[4] = hi;
+        regs->gprs[5] = lo;
+    }
+    return;
 }
 
 static void h_read(struct cpu_user_regs *regs)
@@ -547,12 +570,12 @@ static void h_read(struct cpu_user_regs 
     struct domain_htab *htab = &d->arch.htab;
     union pte volatile *pte;
 
-       if (flags & H_READ_4)
+    if (flags & H_READ_4)
         ptex &= ~0x3UL;
 
     if (ptex > (1UL << htab->log_num_ptes)) {
+        DBG("%s: bad ptex: 0x%lx\n", __func__, ptex);
         regs->gprs[3] = H_Parameter;
-        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
         return;
     }
     pte = &htab->map[ptex];
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/exceptions.S
--- a/xen/arch/powerpc/powerpc64/exceptions.S   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/powerpc64/exceptions.S   Thu Dec 14 08:57:36 2006 -0700
@@ -564,6 +564,22 @@ _GLOBAL(sleep)
  */    
     .globl spin_start
 spin_start:
+    /* We discovered by experiment that the ERAT must be flushed early.  */
+    isync
+    slbia
+    isync
+       
+    /* Do a cache flush for our text, in case the loader didn't */
+    LOADADDR(r9, _start)
+    LOADADDR(r8, _etext)
+4:  dcbf r0,r9
+    icbi r0,r9
+    addi r9,r9,0x20            /* up to a 4 way set per line */
+    cmpld cr0,r9,r8
+    blt        4b
+    sync
+    isync
+
     /* Write our processor number as an acknowledgment that we're alive.  */
     LOADADDR(r14, __spin_ack)
     stw r3, 0(r14)
@@ -575,7 +591,7 @@ spin_start:
     b .
     /* Find our index in the array of processor_area struct pointers.  */
 2:  LOADADDR(r14, global_cpu_table)
-    muli r15, r3, 8
+    mulli r15, r3, 8
     add r14, r14, r15
     /* Spin until the pointer for our processor goes valid.  */
 1:  ld r15, 0(r14)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/io.S
--- a/xen/arch/powerpc/powerpc64/io.S   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/powerpc64/io.S   Thu Dec 14 08:57:36 2006 -0700
@@ -23,6 +23,11 @@
 #include <asm/processor.h>
 #include <asm/percpu.h>
 
+/* There is no reason why I can't use a tlbie, which should be less
+ * "destructive" but useing SLBIE proves to be more stable result.
+ */
+#define INVALIDATE_ERAT_WITH_SLBIE
+
 /* Xen runs in real mode (i.e. untranslated, MMU disabled). This avoids TLB
  * flushes and also makes it easy to access all domains' memory. However, on
  * PowerPC real mode accesses are cacheable, which is good for general
@@ -34,12 +39,14 @@
  * make the access, then re-enable it...
  */
 
+#ifdef INVALIDATE_ERAT_WITH_SLBIE
 /* Not all useful assemblers understand 'tlbiel'.
  * 'addr' is a GPR containing the address being accessed.
  */
 .macro tlbiel addr
        .long 0x7c000224 | (\addr << 11)
 .endm
+#endif
 
 .macro DISABLE_DCACHE addr
        mfmsr r8
@@ -48,29 +55,53 @@
        ori r6, r6, MSR_EE
        andc r5, r8, r6
        mtmsr r5
+       sync
 
-       /* set HID4.RM_CI */
+#ifdef INVALIDATE_ERAT_WITH_SLBIE 
+       /* create an slbie entry for the io setting a high order bit
+        * to avoid any important SLBs */
+       extldi r0, \addr, 36, 0 
+#endif
+       /* setup HID4.RM_CI */
        mfspr r9, SPRN_HID4
        li r6, 0x100
        sldi r6, r6, 32
-       or r5, r9, r6
-       tlbiel \addr /* invalidate the ERAT entry */
-       sync
-       mtspr SPRN_HID4, r5
+       or r10, r9, r6
+
+       /* Mark the processor as "in CI mode" */
+       li r7,0
+       mfspr r5, SPRN_PIR
+       li r6, MCK_CPU_STAT_CI
+       /* store that we are in a CI routine */
+       stb r6, MCK_CPU_STAT_BASE(r5)
+       /* r7 = MCK_CPU_STAT_CI IO in progress */
+       mr r7, r5
+       lwsync
+
+       /* switch modes */
+       mtspr SPRN_HID4, r10
+       /* invalidate the ERAT entry */
+#ifdef INVALIDATE_ERAT_WITH_SLBIE
+       slbie r0
+#else
+       tlbiel \addr
+#endif
        isync
 
-       /* Mark the processor as "in CI mode" */
-       mfspr r5, SPRN_PIR
-       li r6, MCK_CPU_STAT_CI
-       stb r6, MCK_CPU_STAT_BASE(r5)
-       sync
 .endm
 
 .macro ENABLE_DCACHE addr
-       /* re-zero HID4.RM_CI */
+       /* r7 = 0, IO is complete */
+       li r7, 0
+       lwsync
+       /* restore HID4.RM_CI */
+       mtspr SPRN_HID4, r9
+       /* invalidate the ERAT entry */
+#ifdef INVALIDATE_ERAT_WITH_SLBIE
+       slbie r0
+#else
        tlbiel \addr /* invalidate the ERAT entry */
-       sync
-       mtspr SPRN_HID4, r9
+#endif
        isync
 
        /* Mark the processor as "out of CI mode" */
@@ -83,9 +114,13 @@
        mtmsr r8
 .endm
 
-/* The following assembly cannot use r8 or r9 since they hold original
- * values of msr and hid4 repectively
+/* The following assembly cannot use some registers since they hold original
+ * values of we need to keep
  */
+#undef r0
+#define r0 do_not_use_r0
+#undef r7
+#define r7 do_not_use_r7
 #undef r8
 #define r8 do_not_use_r8
 #undef r9
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/ppc970.c
--- a/xen/arch/powerpc/powerpc64/ppc970.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/powerpc64/ppc970.c       Thu Dec 14 08:57:36 2006 -0700
@@ -30,6 +30,7 @@
 #include <asm/powerpc64/procarea.h>
 #include <asm/powerpc64/processor.h>
 #include <asm/powerpc64/ppc970-hid.h>
+#include "scom.h"
 
 #undef DEBUG
 #undef SERIALIZE
@@ -38,48 +39,77 @@ struct cpu_caches cpu_caches = {
     .dline_size = 0x80,
     .log_dline_size = 7,
     .dlines_per_page = PAGE_SIZE >> 7,
+    .isize = (64 << 10),        /* 64 KiB */
     .iline_size = 0x80,
     .log_iline_size = 7,
     .ilines_per_page = PAGE_SIZE >> 7,
 };
 
+
+void cpu_flush_icache(void)
+{
+    union hid1 hid1;
+    ulong flags;
+    ulong ea;
+
+    local_irq_save(flags);
+
+    /* uses special processor mode that forces a real address match on
+     * the whole line */
+    hid1.word = mfhid1();
+    hid1.bits.en_icbi = 1;
+    mthid1(hid1.word);
+
+    for (ea = 0; ea < cpu_caches.isize; ea += cpu_caches.iline_size)
+        icbi(ea);
+
+    sync();
+
+    hid1.bits.en_icbi = 0;
+    mthid1(hid1.word);
+
+    local_irq_restore(flags);
+}
+
+
 struct rma_settings {
-    int order;
+    int log;
     int rmlr_0;
     int rmlr_1_2;
 };
 
-static struct rma_settings rma_orders[] = {
-    { .order = 26, .rmlr_0 = 0, .rmlr_1_2 = 3, }, /*  64 MB */
-    { .order = 27, .rmlr_0 = 1, .rmlr_1_2 = 3, }, /* 128 MB */
-    { .order = 28, .rmlr_0 = 1, .rmlr_1_2 = 0, }, /* 256 MB */
-    { .order = 30, .rmlr_0 = 0, .rmlr_1_2 = 2, }, /*   1 GB */
-    { .order = 34, .rmlr_0 = 0, .rmlr_1_2 = 1, }, /*  16 GB */
-    { .order = 38, .rmlr_0 = 0, .rmlr_1_2 = 0, }, /* 256 GB */
+static struct rma_settings rma_logs[] = {
+    { .log = 26, .rmlr_0 = 0, .rmlr_1_2 = 3, }, /*  64 MB */
+    { .log = 27, .rmlr_0 = 1, .rmlr_1_2 = 3, }, /* 128 MB */
+    { .log = 28, .rmlr_0 = 1, .rmlr_1_2 = 0, }, /* 256 MB */
+    { .log = 30, .rmlr_0 = 0, .rmlr_1_2 = 2, }, /*   1 GB */
+    { .log = 34, .rmlr_0 = 0, .rmlr_1_2 = 1, }, /*  16 GB */
+    { .log = 38, .rmlr_0 = 0, .rmlr_1_2 = 0, }, /* 256 GB */
 };
 
 static uint log_large_page_sizes[] = {
     4 + 20, /* (1 << 4) == 16M */
 };
 
-static struct rma_settings *cpu_find_rma(unsigned int order)
+static struct rma_settings *cpu_find_rma(unsigned int log)
 {
     int i;
-    for (i = 0; i < ARRAY_SIZE(rma_orders); i++) {
-        if (rma_orders[i].order == order)
-            return &rma_orders[i];
+
+    for (i = 0; i < ARRAY_SIZE(rma_logs); i++) {
+        if (rma_logs[i].log == log)
+            return &rma_logs[i];
     }
     return NULL;
 }
 
 unsigned int cpu_default_rma_order_pages(void)
 {
-    return rma_orders[0].order - PAGE_SHIFT;
-}
-
-int cpu_rma_valid(unsigned int log)
-{
-    return cpu_find_rma(log) != NULL;
+    return rma_logs[0].log - PAGE_SHIFT;
+}
+
+int cpu_rma_valid(unsigned int order)
+{
+    return cpu_find_rma(order + PAGE_SHIFT) != NULL;
 }
 
 unsigned int cpu_large_page_orders(uint *sizes, uint max)
@@ -163,8 +193,11 @@ void cpu_initialize(int cpuid)
     mtdec(timebase_freq);
     mthdec(timebase_freq);
 
-    hid0.bits.nap = 1;      /* NAP */
+    /* FIXME Do not set the NAP bit in HID0 until we have had a chance
+     * to audit the safe halt and idle loop code. */
+    hid0.bits.nap = 0;      /* NAP */
     hid0.bits.dpm = 1;      /* Dynamic Power Management */
+
     hid0.bits.nhr = 1;      /* Not Hard Reset */
     hid0.bits.hdice_en = 1; /* enable HDEC */
     hid0.bits.en_therm = 0; /* ! Enable ext thermal ints */
diff -r ed56ef3e9716 -r 4762d73ced42 
xen/arch/powerpc/powerpc64/ppc970_machinecheck.c
--- a/xen/arch/powerpc/powerpc64/ppc970_machinecheck.c  Thu Dec 14 08:54:54 
2006 -0700
+++ b/xen/arch/powerpc/powerpc64/ppc970_machinecheck.c  Thu Dec 14 08:57:36 
2006 -0700
@@ -24,6 +24,8 @@
 #include <public/xen.h>
 #include <asm/processor.h>
 #include <asm/percpu.h>
+#include <asm/debugger.h>
+#include "scom.h"
 
 #define MCK_SRR1_INSN_FETCH_UNIT    0x0000000000200000 /* 42 */
 #define MCK_SRR1_LOAD_STORE         0x0000000000100000 /* 43 */
@@ -54,6 +56,8 @@ int cpu_machinecheck(struct cpu_user_reg
     if (mck_cpu_stats[mfpir()] != 0)
         printk("While in CI IO\n");
 
+    show_backtrace_regs(regs);
+
     printk("SRR1: 0x%016lx\n", regs->msr);
     if (regs->msr & MCK_SRR1_INSN_FETCH_UNIT)
         printk("42: Exception caused by Instruction Fetch Unit (IFU)\n"
@@ -67,6 +71,7 @@ int cpu_machinecheck(struct cpu_user_reg
     case 0:
         printk("0b00: Likely caused by an asynchronous machine check,\n"
                "      see SCOM Asynchronous Machine Check Register\n");
+        cpu_scom_AMCR();
         break;
     case MCK_SRR1_CAUSE_SLB_PAR:
         printk("0b01: Exception caused by an SLB parity error detected\n"
@@ -116,5 +121,5 @@ int cpu_machinecheck(struct cpu_user_reg
         dump_segments(0);
     }
 
-    return 0; /* for now lets not recover; */
+    return 0; /* for now lets not recover */
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/ppc970_scom.c
--- a/xen/arch/powerpc/powerpc64/ppc970_scom.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/powerpc64/ppc970_scom.c  Thu Dec 14 08:57:36 2006 -0700
@@ -22,33 +22,17 @@
 #include <xen/types.h>
 #include <xen/lib.h>
 #include <xen/console.h>
+#include <xen/errno.h>
+#include <asm/delay.h>
+#include <asm/processor.h>
+#include "scom.h"
+
+#undef CONFIG_SCOM
 
 #define SPRN_SCOMC 276
 #define SPRN_SCOMD 277
-
-static inline void mtscomc(ulong scomc)
-{
-    __asm__ __volatile__ ("mtspr %1, %0" : : "r" (scomc), "i"(SPRN_SCOMC));
-}
-
-static inline ulong mfscomc(void)
-{
-    ulong scomc;
-    __asm__ __volatile__ ("mfspr %0, %1" : "=r" (scomc): "i"(SPRN_SCOMC));
-    return scomc;
-}
-
-static inline void mtscomd(ulong scomd)
-{
-    __asm__ __volatile__ ("mtspr %1, %0" : : "r" (scomd), "i"(SPRN_SCOMD));
-}
-
-static inline ulong mfscomd(void)
-{
-    ulong scomd;
-    __asm__ __volatile__ ("mfspr %0, %1" : "=r" (scomd): "i"(SPRN_SCOMD));
-    return scomd;
-}
+#define SCOMC_READ 1
+#define SCOMC_WRITE (!(SCOMC_READ))
 
 union scomc {
     struct scomc_bits {
@@ -68,50 +52,133 @@ union scomc {
 };
 
 
-static inline ulong read_scom(ulong addr)
+int cpu_scom_read(uint addr, ulong *d)
 {
     union scomc c;
-    ulong d;
+    ulong flags;
 
-    c.word = 0;
-    c.bits.addr = addr;
-    c.bits.RW = 0;
+    /* drop the low 8bits (including parity) */
+    addr >>= 8;
 
-    mtscomc(c.word);
-    d = mfscomd();
-    c.word = mfscomc();
-    if (c.bits.failure)
-        panic("scom status: 0x%016lx\n", c.word);
+    /* these give iface errors because the addresses are not software
+     * accessible */
+    BUG_ON(addr & 0x8000);
 
-    return d;
+    for (;;) {
+        c.word = 0;
+        c.bits.addr = addr;
+        c.bits.RW = SCOMC_READ;
+
+        local_irq_save(flags);
+        asm volatile (
+            "sync         \n\t"
+            "mtspr %2, %0 \n\t"
+            "isync        \n\t"
+            "mfspr %1, %3 \n\t"
+            "isync        \n\t"
+            "mfspr %0, %2 \n\t"
+            "isync        \n\t"
+            : "+r" (c.word), "=r" (*d)
+            : "i"(SPRN_SCOMC), "i"(SPRN_SCOMD));
+
+        local_irq_restore(flags);
+        /* WARNING! older 970s (pre FX) shift the bits right 1 position */
+
+        if (!c.bits.failure)
+            return 0;
+
+        /* deal with errors */
+        /* has SCOM been disabled? */
+        if (c.bits.disabled)
+            return -ENOSYS;
+
+        /* we were passed a bad addr return -1 */
+        if (c.bits.addr_error)
+            return -EINVAL;
+
+        /* this is way bad and we will checkstop soon */
+        BUG_ON(c.bits.proto_error);
+
+        if (c.bits.iface_error)
+            udelay(10);
+    }
 }
 
-static inline void write_scom(ulong addr, ulong val)
+int cpu_scom_write(uint addr, ulong d)
 {
     union scomc c;
+    ulong flags;
 
-    c.word = 0;
-    c.bits.addr = addr;
-    c.bits.RW = 1;
+    /* drop the low 8bits (including parity) */
+    addr >>= 8;
 
-    mtscomd(val);
-    mtscomc(c.word);
-    c.word = mfscomc();
-    if (c.bits.failure)
-        panic("scom status: 0x%016lx\n", c.word);
+    /* these give iface errors because the addresses are not software
+     * accessible */
+    BUG_ON(addr & 0x8000);
+
+    for (;;) {
+        c.word = 0;
+        c.bits.addr = addr;
+        c.bits.RW = SCOMC_WRITE;
+
+        local_irq_save(flags);
+        asm volatile(
+            "sync         \n\t"
+            "mtspr %3, %1 \n\t"
+            "isync        \n\t"
+            "mtspr %2, %0 \n\t"
+            "isync        \n\t"
+            "mfspr %0, %2 \n\t"
+            "isync        \n\t"
+            : "+r" (c.word)
+            : "r" (d), "i"(SPRN_SCOMC), "i"(SPRN_SCOMD));
+        local_irq_restore(flags);
+
+        if (!c.bits.failure)
+            return 0;
+
+        /* has SCOM been disabled? */
+        if (c.bits.disabled)
+            return -ENOSYS;
+
+        /* we were passed a bad addr return -1 */
+        if (c.bits.addr_error)
+            return -EINVAL;
+
+        /* this is way bad and we will checkstop soon */
+        BUG_ON(c.bits.proto_error);
+
+        /* check for iface and retry */
+        if (c.bits.iface_error)
+            udelay(10);
+    }
 }
-
-#define SCOM_AMCS_REG      0x022601
-#define SCOM_AMCS_AND_MASK 0x022700
-#define SCOM_AMCS_OR_MASK  0x022800
-#define SCOM_CMCE          0x030901
-#define SCOM_PMCR          0x400801
 
 void cpu_scom_init(void)
 {
-#ifdef not_yet
-    console_start_sync();
-    printk("scom PMCR: 0x%016lx\n", read_scom(SCOM_PMCR));
-    console_end_sync();
+#ifdef CONFIG_SCOM
+    ulong val;
+    if (PVR_REV(mfpvr()) == 0x0300) {
+        /* these address are only good for 970FX */
+        console_start_sync();
+        if (!cpu_scom_read(SCOM_PTSR, &val))
+            printk("SCOM PTSR: 0x%016lx\n", val);
+
+        console_end_sync();
+    }
 #endif
 }
+
+void cpu_scom_AMCR(void)
+{
+#ifdef CONFIG_SCOM
+    ulong val;
+
+    if (PVR_REV(mfpvr()) == 0x0300) {
+        /* these address are only good for 970FX */
+        cpu_scom_read(SCOM_AMC_REG, &val);
+        printk("SCOM AMCR: 0x%016lx\n", val);
+    }
+#endif
+}
+
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/traps.c
--- a/xen/arch/powerpc/powerpc64/traps.c        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/powerpc64/traps.c        Thu Dec 14 08:57:36 2006 -0700
@@ -48,7 +48,3 @@ void show_registers(struct cpu_user_regs
     console_end_sync();
 }
 
-void show_execution_state(struct cpu_user_regs *regs)
-{
-    show_registers(regs);
-}
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/rtas.c
--- a/xen/arch/powerpc/rtas.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/rtas.c   Thu Dec 14 08:57:36 2006 -0700
@@ -13,12 +13,90 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2006
  *
  * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include "of-devtree.h"
+#include "rtas.h"
 
-int rtas_halt = -1;
-int rtas_reboot = -1;
+static int rtas_halt_token = -1;
+static int rtas_reboot_token = -1;
+int rtas_entry;
+unsigned long rtas_msr;
+unsigned long rtas_base;
+unsigned long rtas_end;
+
+struct rtas_args {
+    int ra_token;
+    int ra_nargs;
+    int ra_nrets;
+    int ra_args[10];
+} __attribute__ ((aligned(8)));
+
+static int rtas_call(struct rtas_args *r)
+{
+    if (rtas_entry == 0)
+        return -ENOSYS;
+
+    return prom_call(r, rtas_base, rtas_entry, rtas_msr);
+}
+
+int __init rtas_init(void *m)
+{
+    static const char halt[] = "power-off";
+    static const char reboot[] = "system-reboot";
+    ofdn_t n;
+
+    if (rtas_entry == 0)
+        return -ENOSYS;
+
+    n = ofd_node_find(m, "/rtas");
+    if (n <= 0)
+        return -ENOSYS;
+
+    ofd_getprop(m, n, halt,
+                &rtas_halt_token, sizeof (rtas_halt_token));
+    ofd_getprop(m, n, reboot,
+                &rtas_reboot_token, sizeof (rtas_reboot_token));
+    return 1;
+}
+
+int
+rtas_halt(void)
+{
+    struct rtas_args r;
+
+    if (rtas_halt_token == -1)
+        return -1;
+
+    r.ra_token = rtas_halt_token;
+    r.ra_nargs = 2;
+    r.ra_nrets = 1;
+    r.ra_args[0] = 0;
+    r.ra_args[1] = 0;
+
+    return rtas_call(&r);
+}
+
+int
+rtas_reboot(void)
+{
+    struct rtas_args r;
+
+    if (rtas_reboot_token == -1)
+        return -ENOSYS;
+
+    r.ra_token = rtas_reboot_token;
+    r.ra_nargs = 2;
+    r.ra_nrets = 1;
+    r.ra_args[0] = 0;
+    r.ra_args[1] = 0;
+
+    return rtas_call(&r);
+}
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/setup.c
--- a/xen/arch/powerpc/setup.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/setup.c  Thu Dec 14 08:57:36 2006 -0700
@@ -1,8 +1,8 @@
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -35,8 +35,10 @@
 #include <xen/gdbstub.h>
 #include <xen/symbols.h>
 #include <xen/keyhandler.h>
+#include <xen/numa.h>
 #include <acm/acm_hooks.h>
 #include <public/version.h>
+#include <asm/mpic.h>
 #include <asm/processor.h>
 #include <asm/desc.h>
 #include <asm/cache.h>
@@ -47,6 +49,7 @@
 #include "exceptions.h"
 #include "of-devtree.h"
 #include "oftree.h"
+#include "rtas.h"
 
 #define DEBUG
 
@@ -75,10 +78,7 @@ ulong oftree_end;
 ulong oftree_end;
 
 uint cpu_hard_id[NR_CPUS] __initdata;
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
-cpumask_t cpu_online_map; /* missing ifdef in schedule.c */
 cpumask_t cpu_present_map;
-cpumask_t cpu_possible_map;
 
 /* XXX get this from ISA node in device tree */
 char *vgabase;
@@ -87,6 +87,8 @@ struct ns16550_defaults ns16550;
 
 extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
 
+static struct domain *idle_domain;
+
 volatile struct processor_area * volatile global_cpu_table[NR_CPUS];
 
 int is_kernel_text(unsigned long addr)
@@ -110,12 +112,28 @@ static void __init do_initcalls(void)
     }
 }
 
-static void hw_probe_attn(unsigned char key, struct cpu_user_regs *regs)
+
+void noinline __attn(void)
 {
     /* To continue the probe will step over the ATTN instruction.  The
      * NOP is there to make sure there is something sane to "step
      * over" to. */
-    asm volatile(".long 0x00000200; nop");
+    console_start_sync();
+    asm volatile(".long 0x200;nop");
+    console_end_sync();
+}
+
+static void key_hw_probe_attn(unsigned char key)
+{
+    __attn();
+}
+
+static void key_ofdump(unsigned char key)
+{
+    printk("ofdump:\n");
+    /* make sure the OF devtree is good */
+    ofd_walk((void *)oftree, "devtree", OFD_ROOT,
+             ofd_dump_props, OFD_DUMP_ALL);
 }
 
 static void percpu_init_areas(void)
@@ -150,8 +168,6 @@ static void percpu_free_unused_areas(voi
 
 static void __init start_of_day(void)
 {
-    struct domain *idle_domain;
-
     init_IRQ();
 
     scheduler_init();
@@ -166,36 +182,19 @@ static void __init start_of_day(void)
     /* for some reason we need to set our own bit in the thread map */
     cpu_set(0, cpu_sibling_map[0]);
 
-    percpu_free_unused_areas();
-
-    {
-        /* FIXME: Xen assumes that an online CPU is a schedualable
-         * CPU, but we just are not there yet. Remove this fragment when
-         * scheduling processors actually works. */
-        int cpuid;
-
-        printk("WARNING!: Taking all secondary CPUs offline\n");
-
-        for_each_online_cpu(cpuid) {
-            if (cpuid == 0)
-                continue;
-            cpu_clear(cpuid, cpu_online_map);
-        }
-    }
-
     initialize_keytable();
     /* Register another key that will allow for the the Harware Probe
      * to be contacted, this works with RiscWatch probes and should
      * work with Chronos and FSPs */
-    register_irq_keyhandler('^', hw_probe_attn,   "Trap to Hardware Probe");
+    register_keyhandler('^', key_hw_probe_attn, "Trap to Hardware Probe");
+
+    /* allow the dumping of the devtree */
+    register_keyhandler('D', key_ofdump , "Dump OF Devtree");
 
     timer_init();
     serial_init_postirq();
     do_initcalls();
-    schedulers_start();
-}
-
-extern void idle_loop(void);
+}
 
 void startup_cpu_idle_loop(void)
 {
@@ -208,6 +207,15 @@ void startup_cpu_idle_loop(void)
     /* Finally get off the boot stack. */
     reset_stack_and_jump(idle_loop);
 }
+
+/* The boot_pa is enough "parea" for the boot CPU to get thru
+ * initialization, it will ultimately get replaced later */
+static __init void init_boot_cpu(void)
+{
+    static struct processor_area boot_pa;
+    boot_pa.whoami = 0;
+    parea = &boot_pa;
+}    
 
 static void init_parea(int cpuid)
 {
@@ -227,6 +235,7 @@ static void init_parea(int cpuid)
     pa->whoami = cpuid;
     pa->hard_id = cpu_hard_id[cpuid];
     pa->hyp_stack_base = (void *)((ulong)stack + STACK_SIZE);
+    mb();
 
     /* This store has the effect of invoking secondary_cpu_init.  */
     global_cpu_table[cpuid] = pa;
@@ -248,18 +257,34 @@ static int kick_secondary_cpus(int maxcp
         /* wait for it */
         while (!cpu_online(cpuid))
             cpu_relax();
+
+        numa_set_node(cpuid, 0);
+        numa_add_cpu(cpuid);
     }
 
     return 0;
 }
 
 /* This is the first C code that secondary processors invoke.  */
-int secondary_cpu_init(int cpuid, unsigned long r4)
-{
+void secondary_cpu_init(int cpuid, unsigned long r4)
+{
+    struct vcpu *vcpu;
+
     cpu_initialize(cpuid);
     smp_generic_take_timebase();
+
+    /* If we are online, we must be able to ACK IPIs.  */
+    mpic_setup_this_cpu();
     cpu_set(cpuid, cpu_online_map);
-    while(1);
+
+    vcpu = alloc_vcpu(idle_domain, cpuid, cpuid);
+    BUG_ON(vcpu == NULL);
+
+    set_current(idle_domain->vcpu[cpuid]);
+    idle_vcpu[cpuid] = current;
+    startup_cpu_idle_loop();
+
+    panic("should never get here\n");
 }
 
 static void __init __start_xen(multiboot_info_t *mbi)
@@ -277,6 +302,9 @@ static void __init __start_xen(multiboot
     /* Parse the command-line options. */
     if ((mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0))
         cmdline_parse(__va((ulong)mbi->cmdline));
+
+    /* we need to be able to identify this CPU early on */
+    init_boot_cpu();
 
     /* We initialise the serial devices very early so we can get debugging. */
     ns16550.io_base = 0x3f8;
@@ -286,20 +314,12 @@ static void __init __start_xen(multiboot
     serial_init_preirq();
 
     init_console();
-#ifdef CONSOLE_SYNC
+    /* let synchronize until we really get going */
     console_start_sync();
-#endif
-
-    /* we give the first RMA to the hypervisor */
-    xenheap_phys_end = rma_size(cpu_default_rma_order_pages());
 
     /* Check that we have at least one Multiboot module. */
     if (!(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0)) {
         panic("FATAL ERROR: Require at least one Multiboot module.\n");
-    }
-
-    if (!(mbi->flags & MBI_MEMMAP)) {
-        panic("FATAL ERROR: Bootloader provided no memory information.\n");
     }
 
     /* OF dev tree is the last module */
@@ -312,14 +332,18 @@ static void __init __start_xen(multiboot
     mod[mbi->mods_count-1].mod_end = 0;
     --mbi->mods_count;
 
+    if (rtas_entry) {
+        rtas_init((void *)oftree);
+        /* remove rtas module from consideration */
+        mod[mbi->mods_count-1].mod_start = 0;
+        mod[mbi->mods_count-1].mod_end = 0;
+        --mbi->mods_count;
+    }
     memory_init(mod, mbi->mods_count);
 
 #ifdef OF_DEBUG
-    printk("ofdump:\n");
-    /* make sure the OF devtree is good */
-    ofd_walk((void *)oftree, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL);
+    key_ofdump(0);
 #endif
-
     percpu_init_areas();
 
     init_parea(0);
@@ -330,6 +354,10 @@ static void __init __start_xen(multiboot
     if (opt_earlygdb)
         debugger_trap_immediate();
 #endif
+
+    start_of_day();
+
+    mpic_setup_this_cpu();
 
     /* Deal with secondary processors.  */
     if (opt_nosmp || ofd_boot_cpu == -1) {
@@ -339,7 +367,11 @@ static void __init __start_xen(multiboot
         kick_secondary_cpus(max_cpus);
     }
 
-    start_of_day();
+    /* Secondary processors must be online before we call this.  */
+    schedulers_start();
+
+    /* This cannot be called before secondary cpus are marked online.  */
+    percpu_free_unused_areas();
 
     /* Create initial domain 0. */
     dom0 = domain_create(0, 0);
@@ -383,10 +415,10 @@ static void __init __start_xen(multiboot
     }
 
     init_xenheap_pages(ALIGN_UP(dom0_start, PAGE_SIZE),
-                 ALIGN_DOWN(dom0_start + dom0_len, PAGE_SIZE));
+                       ALIGN_DOWN(dom0_start + dom0_len, PAGE_SIZE));
     if (initrd_start)
         init_xenheap_pages(ALIGN_UP(initrd_start, PAGE_SIZE),
-                     ALIGN_DOWN(initrd_start + initrd_len, PAGE_SIZE));
+                           ALIGN_DOWN(initrd_start + initrd_len, PAGE_SIZE));
 
     init_trace_bufs();
 
@@ -395,8 +427,12 @@ static void __init __start_xen(multiboot
     /* Hide UART from DOM0 if we're using it */
     serial_endboot();
 
+    console_end_sync();
+
     domain_unpause_by_systemcontroller(dom0);
-
+#ifdef DEBUG_IPI
+    ipi_torture_test();
+#endif
     startup_cpu_idle_loop();
 }
 
@@ -414,7 +450,7 @@ void __init __start_xen_ppc(
 
     } else {
         /* booted by someone else that hopefully has a trap handler */
-        trap();
+        __builtin_trap();
     }
 
     __start_xen(mbi);
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/shadow.c
--- a/xen/arch/powerpc/shadow.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/shadow.c Thu Dec 14 08:57:36 2006 -0700
@@ -101,9 +101,6 @@ unsigned int shadow_set_allocation(struc
 
     addr = htab_alloc(d, order);
 
-    printk("%s: ibm,fpt-size should be: 0x%x\n", __func__,
-           d->arch.htab.log_num_ptes + LOG_PTE_SIZE);
-
     if (addr == 0)
         return -ENOMEM;
 
@@ -115,8 +112,8 @@ unsigned int shadow_set_allocation(struc
 }
 
 int shadow_domctl(struct domain *d, 
-                                 xen_domctl_shadow_op_t *sc,
-                                 XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+                  xen_domctl_shadow_op_t *sc,
+                  XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
 {
     if ( unlikely(d == current->domain) )
     {
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.c    Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/smp.c    Thu Dec 14 08:57:36 2006 -0700
@@ -13,15 +13,18 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005,2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ * Authors: Amos Waterland <apw@xxxxxxxxxx>
  */
 
-#include <asm/misc.h>
 #include <xen/cpumask.h>
 #include <xen/smp.h>
 #include <asm/flushtlb.h>
+#include <asm/debugger.h>
+#include <asm/mpic.h>
+#include <asm/mach-default/irq_vectors.h>
 
 int smp_num_siblings = 1;
 int smp_num_cpus = 1;
@@ -29,25 +32,56 @@ int ht_per_core = 1;
 
 void __flush_tlb_mask(cpumask_t mask, unsigned long addr)
 {
-    unimplemented();
-}
-
-void smp_send_event_check_mask(cpumask_t cpu_mask)
-{
-    unimplemented();
-}
-
-int smp_call_function(void (*func) (void *info), void *info, int unused,
-        int wait)
-{
-    unimplemented();
-    return 0;
+    if (cpu_isset(smp_processor_id(), mask)) {
+        cpu_clear(smp_processor_id(), mask);
+        if (cpus_empty(mask)) {
+            /* only local */
+            if (addr == FLUSH_ALL_ADDRS)
+                local_flush_tlb();
+            else
+                local_flush_tlb_one(addr);
+            return;
+        }
+    }
+    /* if we are still here and the mask is non-empty, then we need to
+     * flush other TLBs so we flush em all */
+    if (!cpus_empty(mask))
+        unimplemented();
+}
+
+void smp_send_event_check_mask(cpumask_t mask)
+{
+    cpu_clear(smp_processor_id(), mask);
+    if (!cpus_empty(mask))
+        send_IPI_mask(mask, EVENT_CHECK_VECTOR);
+}
+
+
+int smp_call_function(void (*func) (void *info), void *info, int retry,
+                      int wait)
+{
+    cpumask_t allbutself = cpu_online_map;
+    cpu_clear(smp_processor_id(), allbutself);
+
+    return on_selected_cpus(allbutself, func, info, retry, wait);
 }
 
 void smp_send_stop(void)
 {
-    unimplemented();
-}
+    BUG();
+}
+
+struct call_data_struct {
+    void (*func) (void *info);
+    void *info;
+    int wait;
+    atomic_t started;
+    atomic_t finished;
+    cpumask_t selected;
+};
+
+static DEFINE_SPINLOCK(call_lock);
+static struct call_data_struct call_data;
 
 int on_selected_cpus(
     cpumask_t selected,
@@ -56,5 +90,125 @@ int on_selected_cpus(
     int retry,
     int wait)
 {
-    return 0;
-}
+    int retval = 0, nr_cpus = cpus_weight(selected);
+    unsigned long start, stall = SECONDS(1);
+
+    spin_lock(&call_lock);
+
+    call_data.func = func;
+    call_data.info = info;
+    call_data.wait = wait;
+    atomic_set(&call_data.started, 0);
+    atomic_set(&call_data.finished, 0);
+    mb();
+
+    send_IPI_mask(selected, CALL_FUNCTION_VECTOR);
+
+    /* We always wait for an initiation ACK from remote CPU.  */
+    for (start = NOW(); atomic_read(&call_data.started) != nr_cpus; ) {
+        if (NOW() > start + stall) {
+            printk("IPI start stall: %d ACKS to %d SYNS\n", 
+                   atomic_read(&call_data.started), nr_cpus);
+            start = NOW();
+        }
+    }
+
+    /* If told to, we wait for a completion ACK from remote CPU.  */
+    if (wait) {
+        for (start = NOW(); atomic_read(&call_data.finished) != nr_cpus; ) {
+            if (NOW() > start + stall) {
+                printk("IPI finish stall: %d ACKS to %d SYNS\n", 
+                       atomic_read(&call_data.finished), nr_cpus);
+                start = NOW();
+            }
+        }
+    }
+
+    spin_unlock(&call_lock);
+
+    return retval;
+}
+
+void smp_call_function_interrupt(struct cpu_user_regs *regs)
+{
+
+    void (*func)(void *info) = call_data.func;
+    void *info = call_data.info;
+    int wait = call_data.wait;
+
+    atomic_inc(&call_data.started);
+    mb();
+    (*func)(info);
+    mb();
+
+    if (wait)
+        atomic_inc(&call_data.finished);
+
+    return;
+}
+
+void smp_event_check_interrupt(void)
+{
+    /* We are knocked out of NAP state at least.  */
+    return;
+}
+
+void smp_message_recv(int msg, struct cpu_user_regs *regs)
+{
+    switch(msg) {
+    case CALL_FUNCTION_VECTOR:
+        smp_call_function_interrupt(regs);
+        break;
+    case EVENT_CHECK_VECTOR:
+        smp_event_check_interrupt();
+        break;
+    default:
+        BUG();
+        break;
+    }
+}
+
+#ifdef DEBUG_IPI
+static void debug_ipi_ack(void *info)
+{
+    if (info) {
+        unsigned long start, stall = SECONDS(5);
+        for (start = NOW(); NOW() < start + stall; );
+        printk("IPI recv on cpu #%d: %s\n", smp_processor_id(), (char *)info);
+    }
+    return;
+}
+
+void ipi_torture_test(void)
+{
+    int cpu;
+    unsigned long before, after, delta;
+    unsigned long min = ~0, max = 0, mean = 0, sum = 0, trials = 0;
+    cpumask_t mask;
+
+    cpus_clear(mask);
+
+    while (trials < 1000000) {
+        for_each_online_cpu(cpu) {
+            cpu_set(cpu, mask);
+            before = mftb();
+            on_selected_cpus(mask, debug_ipi_ack, NULL, 1, 1);
+            after = mftb();
+            cpus_clear(mask);
+
+            delta = after - before;
+            if (delta > max) max = delta;
+            if (delta < min) min = delta;
+            sum += delta;
+            trials++;
+        }
+    }
+
+    mean = tb_to_ns(sum / trials);
+
+    printk("IPI latency: min = %ld ticks, max = %ld ticks, mean = %ldns\n",
+           min, max, mean);
+
+    smp_call_function(debug_ipi_ack, "Hi", 0, 1);
+}
+#endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/time.c
--- a/xen/arch/powerpc/time.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/time.c   Thu Dec 14 08:57:36 2006 -0700
@@ -25,7 +25,7 @@
 #include <xen/sched.h>
 #include <asm/processor.h>
 #include <asm/current.h>
-#include <asm/misc.h>
+#include <asm/debugger.h>
 
 #define Dprintk(x...) printk(x)
 
@@ -93,5 +93,4 @@ void do_settime(unsigned long secs, unsi
 
 void update_vcpu_system_time(struct vcpu *v)
 {
-    unimplemented();
 }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/usercopy.c
--- a/xen/arch/powerpc/usercopy.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/usercopy.c       Thu Dec 14 08:57:36 2006 -0700
@@ -18,267 +18,33 @@
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
  */
 
-#include <xen/config.h>
-#include <xen/mm.h>
 #include <xen/sched.h>
+#include <xen/lib.h>
 #include <asm/current.h>
-#include <asm/uaccess.h>
-#include <public/xen.h>
-#include <public/xencomm.h>
-
-#undef DEBUG
-#ifdef DEBUG
-static int xencomm_debug = 1; /* extremely verbose */
-#else
-#define xencomm_debug 0
-#endif
+#include <asm/page.h>
+#include <asm/debugger.h>
 
 /* XXX need to return error, not panic, if domain passed a bad pointer */
-static unsigned long paddr_to_maddr(unsigned long paddr)
+unsigned long paddr_to_maddr(unsigned long paddr)
 {
     struct vcpu *v = get_current();
     struct domain *d = v->domain;
-    int mtype;
-    ulong pfn;
+    ulong gpfn;
     ulong offset;
     ulong pa = paddr;
 
     offset = pa & ~PAGE_MASK;
-    pfn = pa >> PAGE_SHIFT;
+    gpfn = pa >> PAGE_SHIFT;
 
-    pa = pfn2mfn(d, pfn, &mtype);
+    pa = gmfn_to_mfn(d, gpfn);
     if (pa == INVALID_MFN) {
         printk("%s: Dom:%d bad paddr: 0x%lx\n",
                __func__, d->domain_id, paddr);
         return 0;
     }
-    switch (mtype) {
-    case PFN_TYPE_RMA:
-    case PFN_TYPE_LOGICAL:
-        break;
 
-    case PFN_TYPE_FOREIGN:
-        /* I don't think this should ever happen, but I suppose it
-         * could be possible */
-        printk("%s: Dom:%d paddr: 0x%lx type: FOREIGN\n",
-               __func__, d->domain_id, paddr);
-        WARN();
-        break;
-
-    case PFN_TYPE_IO:
-    default:
-        printk("%s: Dom:%d paddr: 0x%lx bad type: 0x%x\n",
-               __func__, d->domain_id, paddr, mtype);
-        WARN();
-        return 0;
-    }
     pa <<= PAGE_SHIFT;
     pa |= offset;
 
     return pa;
 }
-
-/**
- * xencomm_copy_from_guest: Copy a block of data from domain space.
- * @to:   Machine address.
- * @from: Physical address to a xencomm buffer descriptor.
- * @n:    Number of bytes to copy.
- * @skip: Number of bytes from the start to skip.
- *
- * Copy data from domain to hypervisor.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-unsigned long
-xencomm_copy_from_guest(void *to, const void *from, unsigned int n,
-        unsigned int skip)
-{
-    struct xencomm_desc *desc;
-    unsigned int from_pos = 0;
-    unsigned int to_pos = 0;
-    unsigned int i = 0;
-
-    /* first we need to access the descriptor */
-    desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)from);
-    if (desc == NULL)
-        return n;
-
-    if (desc->magic != XENCOMM_MAGIC) {
-        printk("%s: error: %p magic was 0x%x\n",
-               __func__, desc, desc->magic);
-        return n;
-    }
-
-    /* iterate through the descriptor, copying up to a page at a time */
-    while ((to_pos < n) && (i < desc->nr_addrs)) {
-        unsigned long src_paddr = desc->address[i];
-        unsigned int pgoffset;
-        unsigned int chunksz;
-        unsigned int chunk_skip;
-
-        if (src_paddr == XENCOMM_INVALID) {
-            i++;
-            continue;
-        }
-
-        pgoffset = src_paddr % PAGE_SIZE;
-        chunksz = PAGE_SIZE - pgoffset;
-
-        chunk_skip = min(chunksz, skip);
-        from_pos += chunk_skip;
-        chunksz -= chunk_skip;
-        skip -= chunk_skip;
-
-        if (skip == 0) {
-            unsigned long src_maddr;
-            unsigned long dest = (unsigned long)to + to_pos;
-            unsigned int bytes = min(chunksz, n - to_pos);
-
-            src_maddr = paddr_to_maddr(src_paddr + chunk_skip);
-            if (src_maddr == 0)
-                return n - to_pos;
-
-            if (xencomm_debug)
-                printk("%lx[%d] -> %lx\n", src_maddr, bytes, dest);
-            memcpy((void *)dest, (void *)src_maddr, bytes);
-            from_pos += bytes;
-            to_pos += bytes;
-        }
-
-        i++;
-    }
-
-    return n - to_pos;
-}
-
-/**
- * xencomm_copy_to_guest: Copy a block of data to domain space.
- * @to:     Physical address to xencomm buffer descriptor.
- * @from:   Machine address.
- * @n:      Number of bytes to copy.
- * @skip: Number of bytes from the start to skip.
- *
- * Copy data from hypervisor to domain.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-unsigned long
-xencomm_copy_to_guest(void *to, const void *from, unsigned int n,
-        unsigned int skip)
-{
-    struct xencomm_desc *desc;
-    unsigned int from_pos = 0;
-    unsigned int to_pos = 0;
-    unsigned int i = 0;
-
-    /* first we need to access the descriptor */
-    desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)to);
-    if (desc == NULL)
-        return n;
-
-    if (desc->magic != XENCOMM_MAGIC) {
-        printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
-        return n;
-    }
-
-    /* iterate through the descriptor, copying up to a page at a time */
-    while ((from_pos < n) && (i < desc->nr_addrs)) {
-        unsigned long dest_paddr = desc->address[i];
-        unsigned int pgoffset;
-        unsigned int chunksz;
-        unsigned int chunk_skip;
-
-        if (dest_paddr == XENCOMM_INVALID) {
-            i++;
-            continue;
-        }
-
-        pgoffset = dest_paddr % PAGE_SIZE;
-        chunksz = PAGE_SIZE - pgoffset;
-
-        chunk_skip = min(chunksz, skip);
-        to_pos += chunk_skip;
-        chunksz -= chunk_skip;
-        skip -= chunk_skip;
-
-        if (skip == 0) {
-            unsigned long dest_maddr;
-            unsigned long source = (unsigned long)from + from_pos;
-            unsigned int bytes = min(chunksz, n - from_pos);
-
-            dest_maddr = paddr_to_maddr(dest_paddr + chunk_skip);
-            if (dest_maddr == 0)
-                return -1;
-
-            if (xencomm_debug)
-                printk("%lx[%d] -> %lx\n", source, bytes, dest_maddr);
-            memcpy((void *)dest_maddr, (void *)source, bytes);
-            from_pos += bytes;
-            to_pos += bytes;
-        }
-
-        i++;
-    }
-
-    return n - from_pos;
-}
-
-/* Offset page addresses in 'handle' to skip 'bytes' bytes. Set completely
- * exhausted pages to XENCOMM_INVALID. */
-int xencomm_add_offset(void *handle, unsigned int bytes)
-{
-    struct xencomm_desc *desc;
-    int i = 0;
-
-    /* first we need to access the descriptor */
-    desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)handle);
-    if (desc == NULL)
-        return -1;
-
-    if (desc->magic != XENCOMM_MAGIC) {
-        printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
-        return -1;
-    }
-
-    /* iterate through the descriptor incrementing addresses */
-    while ((bytes > 0) && (i < desc->nr_addrs)) {
-        unsigned long dest_paddr = desc->address[i];
-        unsigned int pgoffset;
-        unsigned int chunksz;
-        unsigned int chunk_skip;
-
-        if (dest_paddr == XENCOMM_INVALID) {
-            i++;
-            continue;
-        }
-
-        pgoffset = dest_paddr % PAGE_SIZE;
-        chunksz = PAGE_SIZE - pgoffset;
-
-        chunk_skip = min(chunksz, bytes);
-        if (chunk_skip == chunksz) {
-            /* exhausted this page */
-            desc->address[i] = XENCOMM_INVALID;
-        } else {
-            desc->address[i] += chunk_skip;
-        }
-        bytes -= chunk_skip;
-
-       i++;
-    }
-    return 0;
-}
-
-int xencomm_handle_is_null(void *ptr)
-{
-    struct xencomm_desc *desc;
-
-    desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)ptr);
-    if (desc == NULL)
-        return 1;
-
-    return (desc->nr_addrs == 0);
-}
-
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/xen.lds.S
--- a/xen/arch/powerpc/xen.lds.S        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/powerpc/xen.lds.S        Thu Dec 14 08:57:36 2006 -0700
@@ -12,12 +12,12 @@ SEARCH_DIR("=/usr/local/lib64"); SEARCH_
    __DYNAMIC = 0;    */
 PHDRS
 {
-  text PT_LOAD FILEHDR PHDRS;
+  text PT_LOAD;
 }   
 SECTIONS
 {
+  . = 0x00400000;
   /* Read-only sections, merged into text segment: */
-  PROVIDE (__executable_start = 0x10000000); . = 0x10000000 + SIZEOF_HEADERS;
   .interp         : { *(.interp) } :text
   .hash           : { *(.hash) }
   .dynsym         : { *(.dynsym) }
@@ -111,8 +111,6 @@ SECTIONS
     SORT(CONSTRUCTORS)
   }
 
-  /* Xen addition */
-
   . = ALIGN(32);
   __setup_start = .;
   .setup.init : { *(.setup.init) }
@@ -130,8 +128,6 @@ SECTIONS
   . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT);
   . = ALIGN(STACK_SIZE);
   __per_cpu_end = .;
-
-  /* end Xen addition */
 
   .data1          : { *(.data1) }
   .tdata         : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/crash.c
--- a/xen/arch/x86/crash.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/crash.c      Thu Dec 14 08:57:36 2006 -0700
@@ -58,9 +58,9 @@ static void smp_send_nmi_allbutself(void
 static void smp_send_nmi_allbutself(void)
 {
     cpumask_t allbutself = cpu_online_map;
-
     cpu_clear(smp_processor_id(), allbutself);
-    send_IPI_mask(allbutself, APIC_DM_NMI);
+    if ( !cpus_empty(allbutself) )
+        send_IPI_mask(allbutself, APIC_DM_NMI);
 }
 
 static void nmi_shootdown_cpus(void)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/domain_build.c       Thu Dec 14 08:57:36 2006 -0700
@@ -321,8 +321,11 @@ int construct_dom0(struct domain *d,
     if ( (rc = parseelfimage(&dsi)) != 0 )
         return rc;
 
-    dom0_pae = (dsi.pae_kernel != PAEKERN_no);
     xen_pae  = (CONFIG_PAGING_LEVELS == 3);
+    if (dsi.pae_kernel == PAEKERN_bimodal)
+        dom0_pae = xen_pae; 
+    else
+        dom0_pae = (dsi.pae_kernel != PAEKERN_no);
     if ( dom0_pae != xen_pae )
     {
         printk("PAE mode mismatch between Xen and DOM0 (xen=%s, dom0=%s)\n",
@@ -330,7 +333,8 @@ int construct_dom0(struct domain *d,
         return -EINVAL;
     }
 
-    if ( xen_pae && dsi.pae_kernel == PAEKERN_extended_cr3 )
+    if ( xen_pae && (dsi.pae_kernel == PAEKERN_extended_cr3 ||
+            dsi.pae_kernel == PAEKERN_bimodal) )
             set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist);
 
     if ( (p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES)) != NULL )
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/mm.c Thu Dec 14 08:57:36 2006 -0700
@@ -2951,7 +2951,17 @@ long arch_memory_op(int op, XEN_GUEST_HA
         guest_physmap_add_page(d, xatp.gpfn, mfn);
 
         UNLOCK_BIGLOCK(d);
-        
+
+        /* If we're doing FAST_FAULT_PATH, then shadow mode may have
+           cached the fact that this is an mmio region in the shadow
+           page tables.  Blow the tables away to remove the cache.
+           This is pretty heavy handed, but this is a rare operation
+           (it might happen a dozen times during boot and then never
+           again), so it doesn't matter too much. */
+        shadow_lock(d);
+        shadow_blow_tables(d);
+        shadow_unlock(d);
+
         put_domain(d);
 
         break;
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/mm/shadow/common.c   Thu Dec 14 08:57:36 2006 -0700
@@ -791,7 +791,7 @@ void shadow_prealloc(struct domain *d, u
 
 /* Deliberately free all the memory we can: this will tear down all of
  * this domain's shadows */
-static void shadow_blow_tables(struct domain *d) 
+void shadow_blow_tables(struct domain *d) 
 {
     struct list_head *l, *t;
     struct shadow_page_info *sp;
@@ -3123,7 +3123,7 @@ static int shadow_log_dirty_op(
  out:
     shadow_unlock(d);
     domain_unpause(d);
-    return 0;
+    return rv;
 }
 
 
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/mm/shadow/multi.c    Thu Dec 14 08:57:36 2006 -0700
@@ -3488,6 +3488,9 @@ sh_update_cr3(struct vcpu *v)
                                        ? SH_type_l2h_shadow 
                                        : SH_type_l2_shadow);
             }
+            else
+                /* The guest is not present: clear out the shadow. */
+                sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
         }
     }
 #elif GUEST_PAGING_LEVELS == 4
diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/numa.c
--- a/xen/arch/x86/numa.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/arch/x86/numa.c       Thu Dec 14 08:57:36 2006 -0700
@@ -214,7 +214,7 @@ void __init numa_initmem_init(unsigned l
 
 __cpuinit void numa_add_cpu(int cpu)
 {
-       set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+       cpu_set(cpu, node_to_cpumask[cpu_to_node(cpu)]);
 } 
 
 void __cpuinit numa_set_node(int cpu, int node)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/Makefile
--- a/xen/common/Makefile       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/Makefile       Thu Dec 14 08:57:36 2006 -0700
@@ -32,5 +32,7 @@ obj-$(crash_debug) += gdbstub.o
 obj-$(crash_debug) += gdbstub.o
 obj-$(xenoprof)    += xenoprof.o
 
+obj-$(CONFIG_XENCOMM) += xencomm.o
+
 # Object file contains changeset and compiler information.
 version.o: $(BASEDIR)/include/xen/compile.h
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/domain.c
--- a/xen/common/domain.c       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/domain.c       Thu Dec 14 08:57:36 2006 -0700
@@ -238,7 +238,11 @@ void domain_kill(struct domain *d)
 
 void __domain_crash(struct domain *d)
 {
-    if ( d == current->domain )
+    if ( test_bit(_DOMF_shutdown, &d->domain_flags) )
+    {
+        /* Print nothing: the domain is already shutting down. */
+    }
+    else if ( d == current->domain )
     {
         printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
                d->domain_id, current->vcpu_id, smp_processor_id());
@@ -346,16 +350,25 @@ void domain_destroy(struct domain *d)
     send_guest_global_virq(dom0, VIRQ_DOM_EXC);
 }
 
-void vcpu_pause(struct vcpu *v)
-{
-    ASSERT(v != current);
-
+static void vcpu_pause_setup(struct vcpu *v)
+{
     spin_lock(&v->pause_lock);
     if ( v->pause_count++ == 0 )
         set_bit(_VCPUF_paused, &v->vcpu_flags);
     spin_unlock(&v->pause_lock);
-
+}
+
+void vcpu_pause(struct vcpu *v)
+{
+    ASSERT(v != current);
+    vcpu_pause_setup(v);
     vcpu_sleep_sync(v);
+}
+
+void vcpu_pause_nosync(struct vcpu *v)
+{
+    vcpu_pause_setup(v);
+    vcpu_sleep_nosync(v);
 }
 
 void vcpu_unpause(struct vcpu *v)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/elf.c
--- a/xen/common/elf.c  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/elf.c  Thu Dec 14 08:57:36 2006 -0700
@@ -216,16 +216,6 @@ int parseelfimage(struct domain_setup_in
         return -EINVAL;
     }
 
-    /* Find the section-header strings table. */
-    if ( ehdr->e_shstrndx == SHN_UNDEF )
-    {
-        printk("ELF image has no section-header strings table (shstrtab).\n");
-        return -EINVAL;
-    }
-    shdr = (Elf_Shdr *)(image + ehdr->e_shoff +
-                        (ehdr->e_shstrndx*ehdr->e_shentsize));
-    shstrtab = image + shdr->sh_offset;
-
     dsi->__elfnote_section = NULL;
     dsi->__xen_guest_string = NULL;
 
@@ -244,6 +234,16 @@ int parseelfimage(struct domain_setup_in
     /* Fall back to looking for the special '__xen_guest' section. */
     if ( dsi->__elfnote_section == NULL )
     {
+        /* Find the section-header strings table. */
+        if ( ehdr->e_shstrndx == SHN_UNDEF )
+        {
+            printk("ELF image has no section-header strings table.\n");
+            return -EINVAL;
+        }
+        shdr = (Elf_Shdr *)(image + ehdr->e_shoff +
+                            (ehdr->e_shstrndx*ehdr->e_shentsize));
+        shstrtab = image + shdr->sh_offset;
+
         for ( h = 0; h < ehdr->e_shnum; h++ )
         {
             shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (h*ehdr->e_shentsize));
@@ -286,6 +286,8 @@ int parseelfimage(struct domain_setup_in
     }
 
     /*
+     * A "bimodal" ELF note indicates the kernel will adjust to the
+     * current paging mode, including handling extended cr3 syntax.
      * If we have ELF notes then PAE=yes implies that we must support
      * the extended cr3 syntax. Otherwise we need to find the
      * [extended-cr3] syntax in the __xen_guest string.
@@ -294,9 +296,10 @@ int parseelfimage(struct domain_setup_in
     if ( dsi->__elfnote_section )
     {
         p = xen_elfnote_string(dsi, XEN_ELFNOTE_PAE_MODE);
-        if ( p != NULL && strncmp(p, "yes", 3) == 0 )
+        if ( p != NULL && strncmp(p, "bimodal", 7) == 0 )
+            dsi->pae_kernel = PAEKERN_bimodal;
+        else if ( p != NULL && strncmp(p, "yes", 3) == 0 )
             dsi->pae_kernel = PAEKERN_extended_cr3;
-
     }
     else
     {
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/gdbstub.c
--- a/xen/common/gdbstub.c      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/gdbstub.c      Thu Dec 14 08:57:36 2006 -0700
@@ -42,6 +42,7 @@
 #include <xen/init.h>
 #include <xen/smp.h>
 #include <xen/console.h>
+#include <xen/errno.h>
 
 /* Printk isn't particularly safe just after we've trapped to the
    debugger. so avoid it. */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/kexec.c
--- a/xen/common/kexec.c        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/kexec.c        Thu Dec 14 08:57:36 2006 -0700
@@ -140,13 +140,21 @@ void machine_crash_kexec(void)
 
 static void do_crashdump_trigger(unsigned char key)
 {
-       printk("triggering crashdump\n");
-       machine_crash_kexec();
+    int pos = (test_bit(KEXEC_FLAG_CRASH_POS, &kexec_flags) != 0);
+    if ( test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) )
+    {
+        printk("'%c' pressed -> triggering crashdump\n", key);
+        machine_crash_kexec();
+    }
+    else
+    {
+        printk("'%c' pressed -> no crash kernel loaded -- not triggering 
crashdump\n", key);
+    }
 }
 
 static __init int register_crashdump_trigger(void)
 {
-       register_keyhandler('c', do_crashdump_trigger, "trigger a crashdump");
+       register_keyhandler('C', do_crashdump_trigger, "trigger a crashdump");
        return 0;
 }
 __initcall(register_crashdump_trigger);
diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/common/sched_credit.c Thu Dec 14 08:57:36 2006 -0700
@@ -56,7 +56,12 @@
 #define CSCHED_PRI_TS_UNDER     -1      /* time-share w/ credits */
 #define CSCHED_PRI_TS_OVER      -2      /* time-share w/o credits */
 #define CSCHED_PRI_IDLE         -64     /* idle */
-#define CSCHED_PRI_TS_PARKED    -65     /* time-share w/ capped credits */
+
+
+/*
+ * Flags
+ */
+#define CSCHED_FLAG_VCPU_PARKED 0x0001  /* VCPU over capped credits */
 
 
 /*
@@ -100,26 +105,21 @@
     _MACRO(vcpu_wake_onrunq)                \
     _MACRO(vcpu_wake_runnable)              \
     _MACRO(vcpu_wake_not_runnable)          \
+    _MACRO(vcpu_park)                       \
+    _MACRO(vcpu_unpark)                     \
     _MACRO(tickle_local_idler)              \
     _MACRO(tickle_local_over)               \
     _MACRO(tickle_local_under)              \
     _MACRO(tickle_local_other)              \
     _MACRO(tickle_idlers_none)              \
     _MACRO(tickle_idlers_some)              \
-    _MACRO(vcpu_migrate)                    \
     _MACRO(load_balance_idle)               \
     _MACRO(load_balance_over)               \
     _MACRO(load_balance_other)              \
     _MACRO(steal_trylock_failed)            \
-    _MACRO(steal_peer_down)                 \
     _MACRO(steal_peer_idle)                 \
-    _MACRO(steal_peer_running)              \
-    _MACRO(steal_peer_pinned)               \
-    _MACRO(steal_peer_migrating)            \
-    _MACRO(steal_peer_best_idler)           \
-    _MACRO(steal_loner_candidate)           \
-    _MACRO(steal_loner_signal)              \
-    _MACRO(cpu_pick)                        \
+    _MACRO(migrate_queued)                  \
+    _MACRO(migrate_running)                 \
     _MACRO(dom_init)                        \
     _MACRO(dom_destroy)                     \
     _MACRO(vcpu_init)                       \
@@ -146,7 +146,7 @@
     struct                                      \
     {                                           \
         CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
-    } stats
+    } stats;
 
 #define CSCHED_STATS_PRINTK()                   \
     do                                          \
@@ -155,14 +155,27 @@
         CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
     } while ( 0 )
 
-#define CSCHED_STAT_CRANK(_X)   (CSCHED_STAT(_X)++)
+#define CSCHED_STAT_CRANK(_X)               (CSCHED_STAT(_X)++)
+
+#define CSCHED_VCPU_STATS_RESET(_V)                     \
+    do                                                  \
+    {                                                   \
+        memset(&(_V)->stats, 0, sizeof((_V)->stats));   \
+    } while ( 0 )
+
+#define CSCHED_VCPU_STAT_CRANK(_V, _X)      (((_V)->stats._X)++)
+
+#define CSCHED_VCPU_STAT_SET(_V, _X, _Y)    (((_V)->stats._X) = (_Y))
 
 #else /* CSCHED_STATS */
 
-#define CSCHED_STATS_RESET()    do {} while ( 0 )
-#define CSCHED_STATS_DEFINE()   do {} while ( 0 )
-#define CSCHED_STATS_PRINTK()   do {} while ( 0 )
-#define CSCHED_STAT_CRANK(_X)   do {} while ( 0 )
+#define CSCHED_STATS_RESET()                do {} while ( 0 )
+#define CSCHED_STATS_DEFINE()
+#define CSCHED_STATS_PRINTK()               do {} while ( 0 )
+#define CSCHED_STAT_CRANK(_X)               do {} while ( 0 )
+#define CSCHED_VCPU_STATS_RESET(_V)         do {} while ( 0 )
+#define CSCHED_VCPU_STAT_CRANK(_V, _X)      do {} while ( 0 )
+#define CSCHED_VCPU_STAT_SET(_V, _X, _Y)    do {} while ( 0 )
 
 #endif /* CSCHED_STATS */
 
@@ -184,14 +197,18 @@ struct csched_vcpu {
     struct csched_dom *sdom;
     struct vcpu *vcpu;
     atomic_t credit;
+    uint16_t flags;
     int16_t pri;
+#ifdef CSCHED_STATS
     struct {
         int credit_last;
         uint32_t credit_incr;
         uint32_t state_active;
         uint32_t state_idle;
-        uint32_t migrate;
+        uint32_t migrate_q;
+        uint32_t migrate_r;
     } stats;
+#endif
 };
 
 /*
@@ -219,7 +236,7 @@ struct csched_private {
     uint32_t credit;
     int credit_balance;
     uint32_t runq_sort;
-    CSCHED_STATS_DEFINE();
+    CSCHED_STATS_DEFINE()
 };
 
 
@@ -229,6 +246,15 @@ static struct csched_private csched_priv
 static struct csched_private csched_priv;
 
 
+
+static inline int
+__cycle_cpu(int cpu, const cpumask_t *mask)
+{
+    int nxt = next_cpu(cpu, *mask);
+    if (nxt == NR_CPUS)
+        nxt = first_cpu(*mask);
+    return nxt;
+}
 
 static inline int
 __vcpu_on_runq(struct csched_vcpu *svc)
@@ -375,118 +401,138 @@ __csched_vcpu_check(struct vcpu *vc)
 #define CSCHED_VCPU_CHECK(_vc)
 #endif
 
-/*
- * Indicates which of two given idlers is most efficient to run
- * an additional VCPU.
- *
- * Returns:
- *  0:           They are the same.
- *  negative:    One is less efficient than Two.
- *  positive:    One is more efficient than Two.
- */
-static int
-csched_idler_compare(int one, int two)
-{
-    cpumask_t idlers;
-    cpumask_t one_idlers;
-    cpumask_t two_idlers;
-
-    idlers = csched_priv.idlers;
-    cpu_clear(one, idlers);
-    cpu_clear(two, idlers);
-
-    if ( cpu_isset(one, cpu_core_map[two]) )
-    {
-        cpus_and(one_idlers, idlers, cpu_sibling_map[one]);
-        cpus_and(two_idlers, idlers, cpu_sibling_map[two]);
-    }
-    else
-    {
-        cpus_and(one_idlers, idlers, cpu_core_map[one]);
-        cpus_and(two_idlers, idlers, cpu_core_map[two]);
-    }
-
-    return cpus_weight(one_idlers) - cpus_weight(two_idlers);
-}
-
 static inline int
-__csched_queued_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
+__csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
 {
     /*
      * Don't pick up work that's in the peer's scheduling tail. Also only pick
      * up work that's allowed to run on our CPU.
      */
-    if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) )
-    {
-        CSCHED_STAT_CRANK(steal_peer_running);
-        return 0;
-    }
-
-    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
-    {
-        CSCHED_STAT_CRANK(steal_peer_pinned);
-        return 0;
-    }
-
-    return 1;
-}
-
-static inline int
-__csched_running_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
-{
-    BUG_ON( is_idle_vcpu(vc) );
-
-    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
-    {
-        CSCHED_STAT_CRANK(steal_peer_pinned);
-        return 0;
-    }
-
-    if ( test_bit(_VCPUF_migrating, &vc->vcpu_flags) )
-    {
-        CSCHED_STAT_CRANK(steal_peer_migrating);
-        return 0;
-    }
-
-    if ( csched_idler_compare(local_cpu, vc->processor) <= 0 )
-    {
-        CSCHED_STAT_CRANK(steal_peer_best_idler);
-        return 0;
-    }
-
-    return 1;
-}
-
-static void
-csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec)
+    return !test_bit(_VCPUF_running, &vc->vcpu_flags) &&
+           cpu_isset(dest_cpu, vc->cpu_affinity);
+}
+
+static int
+csched_cpu_pick(struct vcpu *vc)
+{
+    cpumask_t cpus;
+    cpumask_t idlers;
+    int cpu;
+
+    /*
+     * Pick from online CPUs in VCPU's affinity mask, giving a
+     * preference to its current processor if it's in there.
+     */
+    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+    cpu = cpu_isset(vc->processor, cpus)
+            ? vc->processor
+            : __cycle_cpu(vc->processor, &cpus);
+    ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) );
+
+    /*
+     * Try to find an idle processor within the above constraints.
+     *
+     * In multi-core and multi-threaded CPUs, not all idle execution
+     * vehicles are equal!
+     *
+     * We give preference to the idle execution vehicle with the most
+     * idling neighbours in its grouping. This distributes work across
+     * distinct cores first and guarantees we don't do something stupid
+     * like run two VCPUs on co-hyperthreads while there are idle cores
+     * or sockets.
+     */
+    idlers = csched_priv.idlers;
+    cpu_set(cpu, idlers);
+    cpus_and(cpus, cpus, idlers);
+    cpu_clear(cpu, cpus);
+
+    while ( !cpus_empty(cpus) )
+    {
+        cpumask_t cpu_idlers;
+        cpumask_t nxt_idlers;
+        int nxt;
+
+        nxt = __cycle_cpu(cpu, &cpus);
+
+        if ( cpu_isset(cpu, cpu_core_map[nxt]) )
+        {
+            ASSERT( cpu_isset(nxt, cpu_core_map[cpu]) );
+            cpus_and(cpu_idlers, idlers, cpu_sibling_map[cpu]);
+            cpus_and(nxt_idlers, idlers, cpu_sibling_map[nxt]);
+        }
+        else
+        {
+            ASSERT( !cpu_isset(nxt, cpu_core_map[cpu]) );
+            cpus_and(cpu_idlers, idlers, cpu_core_map[cpu]);
+            cpus_and(nxt_idlers, idlers, cpu_core_map[nxt]);
+        }
+
+        if ( cpus_weight(cpu_idlers) < cpus_weight(nxt_idlers) )
+        {
+            cpu = nxt;
+            cpu_clear(cpu, cpus);
+        }
+        else
+        {
+            cpus_andnot(cpus, cpus, nxt_idlers);
+        }
+    }
+
+    return cpu;
+}
+
+static inline void
+__csched_vcpu_acct_start(struct csched_vcpu *svc)
 {
     struct csched_dom * const sdom = svc->sdom;
     unsigned long flags;
 
-    /* Update credits */
-    atomic_sub(credit_dec, &svc->credit);
-
-    /* Put this VCPU and domain back on the active list if it was idling */
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
     if ( list_empty(&svc->active_vcpu_elem) )
     {
-        spin_lock_irqsave(&csched_priv.lock, flags);
-
-        if ( list_empty(&svc->active_vcpu_elem) )
-        {
-            CSCHED_STAT_CRANK(acct_vcpu_active);
-            svc->stats.state_active++;
-
-            sdom->active_vcpu_count++;
-            list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
-            if ( list_empty(&sdom->active_sdom_elem) )
-            {
-                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
-                csched_priv.weight += sdom->weight;
-            }
-        }
-
-        spin_unlock_irqrestore(&csched_priv.lock, flags);
-    }
+        CSCHED_VCPU_STAT_CRANK(svc, state_active);
+        CSCHED_STAT_CRANK(acct_vcpu_active);
+
+        sdom->active_vcpu_count++;
+        list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
+        if ( list_empty(&sdom->active_sdom_elem) )
+        {
+            list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+            csched_priv.weight += sdom->weight;
+        }
+    }
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+}
+
+static inline void
+__csched_vcpu_acct_stop_locked(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+
+    BUG_ON( list_empty(&svc->active_vcpu_elem) );
+
+    CSCHED_VCPU_STAT_CRANK(svc, state_idle);
+    CSCHED_STAT_CRANK(acct_vcpu_idle);
+
+    sdom->active_vcpu_count--;
+    list_del_init(&svc->active_vcpu_elem);
+    if ( list_empty(&sdom->active_vcpu) )
+    {
+        BUG_ON( csched_priv.weight < sdom->weight );
+        list_del_init(&sdom->active_sdom_elem);
+        csched_priv.weight -= sdom->weight;
+    }
+}
+
+static void
+csched_vcpu_acct(unsigned int cpu)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(current);
+
+    ASSERT( current->processor == cpu );
+    ASSERT( svc->sdom != NULL );
 
     /*
      * If this VCPU's priority was boosted when it last awoke, reset it.
@@ -495,25 +541,30 @@ csched_vcpu_acct(struct csched_vcpu *svc
      */
     if ( svc->pri == CSCHED_PRI_TS_BOOST )
         svc->pri = CSCHED_PRI_TS_UNDER;
-}
-
-static inline void
-__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc)
-{
-    struct csched_dom * const sdom = svc->sdom;
-
-    BUG_ON( list_empty(&svc->active_vcpu_elem) );
-
-    CSCHED_STAT_CRANK(acct_vcpu_idle);
-    svc->stats.state_idle++;
-
-    sdom->active_vcpu_count--;
-    list_del_init(&svc->active_vcpu_elem);
-    if ( list_empty(&sdom->active_vcpu) )
-    {
-        BUG_ON( csched_priv.weight < sdom->weight );
-        list_del_init(&sdom->active_sdom_elem);
-        csched_priv.weight -= sdom->weight;
+
+    /*
+     * Update credits
+     */
+    atomic_sub(CSCHED_CREDITS_PER_TICK, &svc->credit);
+
+    /*
+     * Put this VCPU and domain back on the active list if it was
+     * idling.
+     *
+     * If it's been active a while, check if we'd be better off
+     * migrating it to run elsewhere (see multi-core and multi-thread
+     * support in csched_cpu_pick()).
+     */
+    if ( list_empty(&svc->active_vcpu_elem) )
+    {
+        __csched_vcpu_acct_start(svc);
+    }
+    else if ( csched_cpu_pick(current) != cpu )
+    {
+        CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
+        CSCHED_STAT_CRANK(migrate_running);
+        set_bit(_VCPUF_migrating, &current->vcpu_flags);
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
     }
 }
 
@@ -536,15 +587,10 @@ csched_vcpu_init(struct vcpu *vc)
     svc->sdom = sdom;
     svc->vcpu = vc;
     atomic_set(&svc->credit, 0);
+    svc->flags = 0U;
     svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
-    memset(&svc->stats, 0, sizeof(svc->stats));
+    CSCHED_VCPU_STATS_RESET(svc);
     vc->sched_priv = svc;
-
-    CSCHED_VCPU_CHECK(vc);
-
-    /* Attach fair-share VCPUs to the accounting list */
-    if ( likely(sdom != NULL) )
-        csched_vcpu_acct(svc, 0);
 
     /* Allocate per-PCPU info */
     if ( unlikely(!CSCHED_PCPU(vc->processor)) )
@@ -554,7 +600,6 @@ csched_vcpu_init(struct vcpu *vc)
     }
 
     CSCHED_VCPU_CHECK(vc);
-
     return 0;
 }
 
@@ -573,7 +618,7 @@ csched_vcpu_destroy(struct vcpu *vc)
     spin_lock_irqsave(&csched_priv.lock, flags);
 
     if ( !list_empty(&svc->active_vcpu_elem) )
-        __csched_vcpu_acct_idle_locked(svc);
+        __csched_vcpu_acct_stop_locked(svc);
 
     spin_unlock_irqrestore(&csched_priv.lock, flags);
 
@@ -634,9 +679,16 @@ csched_vcpu_wake(struct vcpu *vc)
      * This allows wake-to-run latency sensitive VCPUs to preempt
      * more CPU resource intensive VCPUs without impacting overall 
      * system fairness.
-     */
-    if ( svc->pri == CSCHED_PRI_TS_UNDER )
+     *
+     * The one exception is for VCPUs of capped domains unpausing
+     * after earning credits they had overspent. We don't boost
+     * those.
+     */
+    if ( svc->pri == CSCHED_PRI_TS_UNDER &&
+         !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
+    {
         svc->pri = CSCHED_PRI_TS_BOOST;
+    }
 
     /* Put the VCPU on the runq and tickle CPUs */
     __runq_insert(cpu, svc);
@@ -710,71 +762,8 @@ static void
 static void
 csched_dom_destroy(struct domain *dom)
 {
-    struct csched_dom * const sdom = CSCHED_DOM(dom);
-
     CSCHED_STAT_CRANK(dom_destroy);
-
-    xfree(sdom);
-}
-
-static int
-csched_cpu_pick(struct vcpu *vc)
-{
-    cpumask_t cpus;
-    int cpu, nxt;
-
-    CSCHED_STAT_CRANK(cpu_pick);
-
-    /*
-     * Pick from online CPUs in VCPU's affinity mask, giving a
-     * preference to its current processor if it's in there.
-     */
-    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
-    ASSERT( !cpus_empty(cpus) );
-    cpu = cpu_isset(vc->processor, cpus) ? vc->processor : first_cpu(cpus);
-
-    /*
-     * Try to find an idle processor within the above constraints.
-     */
-    cpus_and(cpus, cpus, csched_priv.idlers);
-    if ( !cpus_empty(cpus) )
-    {
-        cpu = cpu_isset(cpu, cpus) ? cpu : first_cpu(cpus);
-        cpu_clear(cpu, cpus);
-
-        /*
-         * In multi-core and multi-threaded CPUs, not all idle execution
-         * vehicles are equal!
-         *
-         * We give preference to the idle execution vehicle with the most
-         * idling neighbours in its grouping. This distributes work across
-         * distinct cores first and guarantees we don't do something stupid
-         * like run two VCPUs on co-hyperthreads while there are idle cores
-         * or sockets.
-         */
-        while ( !cpus_empty(cpus) )
-        {
-            nxt = first_cpu(cpus);
-
-            if ( csched_idler_compare(cpu, nxt) < 0 )
-            {
-                cpu = nxt;
-                cpu_clear(nxt, cpus);
-            }
-            else if ( cpu_isset(cpu, cpu_core_map[nxt]) )
-            {
-                cpus_andnot(cpus, cpus, cpu_sibling_map[nxt]);
-            }
-            else
-            {
-                cpus_andnot(cpus, cpus, cpu_core_map[nxt]);
-            }
-
-            ASSERT( !cpu_isset(nxt, cpus) );
-        }
-    }
-
-    return cpu;
+    xfree(CSCHED_DOM(dom));
 }
 
 /*
@@ -963,11 +952,19 @@ csched_acct(void)
              */
             if ( credit < 0 )
             {
-                if ( sdom->cap != 0U && credit < -credit_cap )
-                    svc->pri = CSCHED_PRI_TS_PARKED;
-                else
-                    svc->pri = CSCHED_PRI_TS_OVER;
-
+                svc->pri = CSCHED_PRI_TS_OVER;
+
+                /* Park running VCPUs of capped-out domains */
+                if ( sdom->cap != 0U &&
+                     credit < -credit_cap &&
+                     !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
+                {
+                    CSCHED_STAT_CRANK(vcpu_park);
+                    vcpu_pause_nosync(svc->vcpu);
+                    svc->flags |= CSCHED_FLAG_VCPU_PARKED;
+                }
+
+                /* Lower bound on credits */
                 if ( credit < -CSCHED_CREDITS_PER_TSLICE )
                 {
                     CSCHED_STAT_CRANK(acct_min_credit);
@@ -979,16 +976,30 @@ csched_acct(void)
             {
                 svc->pri = CSCHED_PRI_TS_UNDER;
 
+                /* Unpark any capped domains whose credits go positive */
+                if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
+                {
+                    /*
+                     * It's important to unset the flag AFTER the unpause()
+                     * call to make sure the VCPU's priority is not boosted
+                     * if it is woken up here.
+                     */
+                    CSCHED_STAT_CRANK(vcpu_unpark);
+                    vcpu_unpause(svc->vcpu);
+                    svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
+                }
+
+                /* Upper bound on credits means VCPU stops earning */
                 if ( credit > CSCHED_CREDITS_PER_TSLICE )
                 {
-                    __csched_vcpu_acct_idle_locked(svc);
+                    __csched_vcpu_acct_stop_locked(svc);
                     credit = 0;
                     atomic_set(&svc->credit, credit);
                 }
             }
 
-            svc->stats.credit_last = credit;
-            svc->stats.credit_incr = credit_fair;
+            CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
+            CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
             credit_balance += credit;
         }
     }
@@ -1004,21 +1015,14 @@ static void
 static void
 csched_tick(unsigned int cpu)
 {
-    struct csched_vcpu * const svc = CSCHED_VCPU(current);
-    struct csched_dom * const sdom = svc->sdom;
-
     /*
      * Accounting for running VCPU
-     *
-     * Note: Some VCPUs, such as the idle tasks, are not credit scheduled.
-     */
-    if ( likely(sdom != NULL) )
-    {
-        csched_vcpu_acct(svc, CSCHED_CREDITS_PER_TICK);
-    }
-
-    /*
-     * Accounting duty
+     */
+    if ( !is_idle_vcpu(current) )
+        csched_vcpu_acct(cpu);
+
+    /*
+     * Host-wide accounting duty
      *
      * Note: Currently, this is always done by the master boot CPU. Eventually,
      * we could distribute or at the very least cycle the duty.
@@ -1040,40 +1044,48 @@ csched_tick(unsigned int cpu)
 }
 
 static struct csched_vcpu *
-csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri)
-{
+csched_runq_steal(int peer_cpu, int cpu, int pri)
+{
+    const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu);
+    const struct vcpu * const peer_vcpu = per_cpu(schedule_data, 
peer_cpu).curr;
+    struct csched_vcpu *speer;
     struct list_head *iter;
-    struct csched_vcpu *speer;
     struct vcpu *vc;
 
-    list_for_each( iter, &spc->runq )
-    {
-        speer = __runq_elem(iter);
-
-        /*
-         * If next available VCPU here is not of higher priority than ours,
-         * this PCPU is useless to us.
-         */
-        if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
-        {
-            CSCHED_STAT_CRANK(steal_peer_idle);
-            break;
-        }
-
-        /* Is this VCPU is runnable on our PCPU? */
-        vc = speer->vcpu;
-        BUG_ON( is_idle_vcpu(vc) );
-
-        if ( __csched_queued_vcpu_is_stealable(cpu, vc) )
-        {
-            /* We got a candidate. Grab it! */
-            __runq_remove(speer);
-            vc->processor = cpu;
-
-            return speer;
-        }
-    }
-
+    /*
+     * Don't steal from an idle CPU's runq because it's about to
+     * pick up work from it itself.
+     */
+    if ( peer_pcpu != NULL && !is_idle_vcpu(peer_vcpu) )
+    {
+        list_for_each( iter, &peer_pcpu->runq )
+        {
+            speer = __runq_elem(iter);
+
+            /*
+             * If next available VCPU here is not of strictly higher
+             * priority than ours, this PCPU is useless to us.
+             */
+            if ( speer->pri <= pri )
+                break;
+
+            /* Is this VCPU is runnable on our PCPU? */
+            vc = speer->vcpu;
+            BUG_ON( is_idle_vcpu(vc) );
+
+            if (__csched_vcpu_is_migrateable(vc, cpu))
+            {
+                /* We got a candidate. Grab it! */
+                CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
+                CSCHED_STAT_CRANK(migrate_queued);
+                __runq_remove(speer);
+                vc->processor = cpu;
+                return speer;
+            }
+        }
+    }
+
+    CSCHED_STAT_CRANK(steal_peer_idle);
     return NULL;
 }
 
@@ -1081,11 +1093,10 @@ csched_load_balance(int cpu, struct csch
 csched_load_balance(int cpu, struct csched_vcpu *snext)
 {
     struct csched_vcpu *speer;
-    struct csched_pcpu *spc;
-    struct vcpu *peer_vcpu;
     cpumask_t workers;
-    cpumask_t loners;
     int peer_cpu;
+
+    BUG_ON( cpu != snext->vcpu->processor );
 
     if ( snext->pri == CSCHED_PRI_IDLE )
         CSCHED_STAT_CRANK(load_balance_idle);
@@ -1095,22 +1106,16 @@ csched_load_balance(int cpu, struct csch
         CSCHED_STAT_CRANK(load_balance_other);
 
     /*
-     * Peek at non-idling CPUs in the system
-     */
-    cpus_clear(loners);
+     * Peek at non-idling CPUs in the system, starting with our
+     * immediate neighbour.
+     */
     cpus_andnot(workers, cpu_online_map, csched_priv.idlers);
     cpu_clear(cpu, workers);
-
     peer_cpu = cpu;
-    BUG_ON( peer_cpu != snext->vcpu->processor );
 
     while ( !cpus_empty(workers) )
     {
-        /* For each CPU of interest, starting with our neighbour... */
-        peer_cpu = next_cpu(peer_cpu, workers);
-        if ( peer_cpu == NR_CPUS )
-            peer_cpu = first_cpu(workers);
-
+        peer_cpu = __cycle_cpu(peer_cpu, &workers);
         cpu_clear(peer_cpu, workers);
 
         /*
@@ -1126,83 +1131,13 @@ csched_load_balance(int cpu, struct csch
             continue;
         }
 
-        peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
-        spc = CSCHED_PCPU(peer_cpu);
-
-        if ( unlikely(spc == NULL) )
-        {
-            CSCHED_STAT_CRANK(steal_peer_down);
-        }
-        else if ( unlikely(is_idle_vcpu(peer_vcpu)) )
-        {
-            /*
-             * Don't steal from an idle CPU's runq because it's about to
-             * pick up work from it itself.
-             */
-            CSCHED_STAT_CRANK(steal_peer_idle);
-        }
-        else if ( is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) )
-        {
-            if ( snext->pri == CSCHED_PRI_IDLE &&
-                 __csched_running_vcpu_is_stealable(cpu, peer_vcpu) )
-            {
-                CSCHED_STAT_CRANK(steal_loner_candidate);
-                cpu_set(peer_cpu, loners);
-            }
-        }
-        else
-        {
-            /* Try to steal work from a remote CPU's runq. */
-            speer = csched_runq_steal(spc, cpu, snext->pri);
-            if ( speer != NULL )
-            {
-                spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
-                CSCHED_STAT_CRANK(vcpu_migrate);
-                speer->stats.migrate++;
-                return speer;
-            }
-        }
-
+        /*
+         * Any work over there to steal?
+         */
+        speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
         spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
-    }
-
-    /*
-     * If we failed to find any remotely queued VCPUs to move here,
-     * see if it would be more efficient to move any of the running
-     * remote VCPUs over here.
-     */
-    while ( !cpus_empty(loners) )
-    {
-        /* For each CPU of interest, starting with our neighbour... */
-        peer_cpu = next_cpu(peer_cpu, loners);
-        if ( peer_cpu == NR_CPUS )
-            peer_cpu = first_cpu(loners);
-
-        cpu_clear(peer_cpu, loners);
-
-        if ( !spin_trylock(&per_cpu(schedule_data, peer_cpu).schedule_lock) )
-        {
-            CSCHED_STAT_CRANK(steal_trylock_failed);
-            continue;
-        }
-
-        peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
-        spc = CSCHED_PCPU(peer_cpu);
-
-        /* Signal the first candidate only. */
-        if ( !is_idle_vcpu(peer_vcpu) &&
-             is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) &&
-             __csched_running_vcpu_is_stealable(cpu, peer_vcpu) )
-        {
-            set_bit(_VCPUF_migrating, &peer_vcpu->vcpu_flags);
-            spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
-
-            CSCHED_STAT_CRANK(steal_loner_signal);
-            cpu_raise_softirq(peer_cpu, SCHEDULE_SOFTIRQ);
-            break;
-        }
-
-        spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
+        if ( speer != NULL )
+            return speer;
     }
 
     /* Failed to find more important work elsewhere... */
@@ -1270,7 +1205,6 @@ csched_schedule(s_time_t now)
     ret.task = snext->vcpu;
 
     CSCHED_VCPU_CHECK(ret.task);
-
     return ret;
 }
 
@@ -1279,22 +1213,25 @@ csched_dump_vcpu(struct csched_vcpu *svc
 {
     struct csched_dom * const sdom = svc->sdom;
 
-    printk("[%i.%i] pri=%i cpu=%i",
+    printk("[%i.%i] pri=%i flags=%x cpu=%i",
             svc->vcpu->domain->domain_id,
             svc->vcpu->vcpu_id,
             svc->pri,
+            svc->flags,
             svc->vcpu->processor);
 
     if ( sdom )
     {
-        printk(" credit=%i (%d+%u) {a/i=%u/%u m=%u w=%u}",
-            atomic_read(&svc->credit),
-            svc->stats.credit_last,
-            svc->stats.credit_incr,
-            svc->stats.state_active,
-            svc->stats.state_idle,
-            svc->stats.migrate,
-            sdom->weight);
+        printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
+#ifdef CSCHED_STATS
+        printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
+                svc->stats.credit_last,
+                svc->stats.credit_incr,
+                svc->stats.state_active,
+                svc->stats.state_idle,
+                svc->stats.migrate_q,
+                svc->stats.migrate_r);
+#endif
     }
 
     printk("\n");
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/cache.h
--- a/xen/include/asm-powerpc/cache.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/cache.h   Thu Dec 14 08:57:36 2006 -0700
@@ -70,4 +70,5 @@ struct cpu_caches {
     u32 ilines_per_page;
 };
 extern struct cpu_caches cpu_caches;
+extern void cpu_flush_icache(void);
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/config.h
--- a/xen/include/asm-powerpc/config.h  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/config.h  Thu Dec 14 08:57:36 2006 -0700
@@ -21,7 +21,7 @@
 #ifndef __PPC_CONFIG_H__
 #define __PPC_CONFIG_H__
 
-#define CONFIG_MAMBO 1
+#define CONFIG_SYSTEMSIM 1
 #define HYPERVISOR_VIRT_START 0x0 /* XXX temp hack for common/kernel.c */
 
 
@@ -50,6 +50,8 @@ extern char __bss_start[];
 #define CONFIG_GDB 1
 #define CONFIG_SMP 1
 #define CONFIG_PCI 1
+#define CONFIG_NUMA 1
+#define CONFIG_CMDLINE_SIZE 512
 #define NR_CPUS 16
 
 #ifndef ELFSIZE
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/debugger.h
--- a/xen/include/asm-powerpc/debugger.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/debugger.h        Thu Dec 14 08:57:36 2006 -0700
@@ -13,13 +13,68 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #ifndef _ASM_DEBUGGER_H_
 #define _ASM_DEBUGGER_H_
+
+#include <public/xen.h>
+
+extern void show_backtrace_regs(struct cpu_user_regs *);
+extern void show_backtrace(ulong sp, ulong lr, ulong pc);
+
+static inline void show_execution_state(struct cpu_user_regs *regs)
+{
+    show_registers(regs);
+}
+
+extern void dump_execution_state(void);
+
+static inline void dump_all_execution_state(void)
+{
+    ulong sp;
+    ulong lr;
+
+    dump_execution_state();
+    sp = (ulong)__builtin_frame_address(0);
+    lr = (ulong)__builtin_return_address(0);
+
+    show_backtrace(sp, lr, lr);
+}
+
+static inline void __force_crash(void)
+{
+    dump_all_execution_state();
+    __builtin_trap();
+}
+
+static inline void debugger_trap_immediate(void)
+{
+    dump_all_execution_state();
+#ifdef CRASH_DEBUG
+    __builtin_trap();
+#endif
+}
+
+static inline void unimplemented(void)
+{
+#ifdef VERBOSE
+    dump_all_execution_state();
+#endif
+}
+
+extern void __warn(char *file, int line);
+#define WARN() __warn(__FILE__, __LINE__)
+#define WARN_ON(_p) do { if (_p) WARN(); } while ( 0 )
+
+extern void __attn(void);
+#define ATTN() __attn();
+
+#define FORCE_CRASH() __force_crash()
 
 #ifdef CRASH_DEBUG
 
@@ -32,8 +87,6 @@ static inline int debugger_trap_fatal(
     return vector;
 }
 
-#define debugger_trap_immediate() __asm__ __volatile__ ("trap");
-
 #else /* CRASH_DEBUG */
 
 static inline int debugger_trap_fatal(
@@ -43,17 +96,6 @@ static inline int debugger_trap_fatal(
     return vector;
 }
 
-static inline void debugger_trap_immediate(void)
-{
-    ulong sp;
-    ulong lr;
-
-    sp = (ulong)__builtin_frame_address(0);
-    lr = (ulong)__builtin_return_address(0);
-
-    show_backtrace(sp, lr, lr);
-}
-
 #endif /* CRASH_DEBUG */
 
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/delay.h
--- a/xen/include/asm-powerpc/delay.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/delay.h   Thu Dec 14 08:57:36 2006 -0700
@@ -13,16 +13,28 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #ifndef _ASM_DELAY_H_
 #define _ASM_DELAY_H_
 
+#include <asm/time.h>
+
 extern unsigned long ticks_per_usec; 
 #define __udelay udelay
-extern void udelay(unsigned long usecs);
+static inline void udelay(unsigned long usecs)
+{
+    unsigned long ticks = usecs * ticks_per_usec;
+    unsigned long s;
+    unsigned long e;
 
+    s = get_timebase();
+    do {
+        e = get_timebase();
+    } while ((e-s) < ticks);
+}
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/domain.h
--- a/xen/include/asm-powerpc/domain.h  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/domain.h  Thu Dec 14 08:57:36 2006 -0700
@@ -40,6 +40,9 @@ struct arch_domain {
 
     /* list of extents beyond RMA */
     struct list_head extent_list;
+
+    uint foreign_mfn_count;
+    uint *foreign_mfns;
 
     /* I/O-port access bitmap mask. */
     u8 *iobmp_mask;       /* Address of IO bitmap mask, or NULL.      */
@@ -86,7 +89,7 @@ struct arch_vcpu {
     struct slb_entry slb_entries[NUM_SLB_ENTRIES];
 
     /* I/O-port access bitmap. */
-    u8 *iobmp;        /* Guest kernel virtual address of the bitmap. */
+    XEN_GUEST_HANDLE(uint8_t) iobmp; /* Guest kernel virtual address of the 
bitmap. */
     int iobmp_limit;  /* Number of ports represented in the bitmap.  */
     int iopl;         /* Current IOPL for this VCPU. */
 
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/flushtlb.h
--- a/xen/include/asm-powerpc/flushtlb.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/flushtlb.h        Thu Dec 14 08:57:36 2006 -0700
@@ -24,7 +24,6 @@
 #include <xen/config.h>
 #include <xen/percpu.h>
 #include <xen/types.h>
-#include <asm/misc.h>
 
 /* The current time as shown by the virtual TLB clock. */
 extern u32 tlbflush_clock;
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/grant_table.h
--- a/xen/include/asm-powerpc/grant_table.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/grant_table.h     Thu Dec 14 08:57:36 2006 -0700
@@ -29,6 +29,10 @@
  * Caller must own caller's BIGLOCK, is responsible for flushing the TLB, and
  * must hold a reference to the page.
  */
+extern long pte_enter(ulong flags, ulong ptex, ulong vsid, ulong rpn);
+extern long pte_remove(ulong flags, ulong ptex, ulong avpn,
+                       ulong *hi, ulong *lo);
+
 int create_grant_host_mapping(
     unsigned long addr, unsigned long frame, unsigned int flags);
 int destroy_grant_host_mapping(
@@ -41,8 +45,7 @@ int destroy_grant_host_mapping(
             (d), XENSHARE_writable);                                     \
     } while ( 0 )
 
-#define gnttab_shared_mfn(d, t, i)                      \
-    ((virt_to_maddr((t)->shared) >> PAGE_SHIFT) + (i))
+#define gnttab_shared_mfn(d, t, i) (((ulong)((t)->shared) >> PAGE_SHIFT) + (i))
 
 #define gnttab_shared_gmfn(d, t, i)                     \
     (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i)))
@@ -61,4 +64,9 @@ static inline void gnttab_clear_flag(uns
     clear_bit(lnr, laddr);
 }
 
+static inline uint cpu_foreign_map_order(void)
+{
+    /* 16 GiB */
+    return 34 - PAGE_SHIFT;
+}
 #endif  /* __ASM_PPC_GRANT_TABLE_H__ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/guest_access.h
--- a/xen/include/asm-powerpc/guest_access.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/guest_access.h    Thu Dec 14 08:57:36 2006 -0700
@@ -21,82 +21,6 @@
 #ifndef __PPC_GUEST_ACCESS_H__
 #define __PPC_GUEST_ACCESS_H__
 
-extern unsigned long xencomm_copy_to_guest(void *to, const void *from,
-        unsigned int len, unsigned int skip); 
-extern unsigned long xencomm_copy_from_guest(void *to, const void *from,
-        unsigned int len, unsigned int skip); 
-extern int xencomm_add_offset(void *handle, unsigned int bytes);
-extern int xencomm_handle_is_null(void *ptr);
-
-
-/* Is the guest handle a NULL reference? */
-#define guest_handle_is_null(hnd) \
-    ((hnd).p == NULL || xencomm_handle_is_null((hnd).p))
-
-/* Offset the given guest handle into the array it refers to. */
-#define guest_handle_add_offset(hnd, nr) ({         \
-    const typeof((hnd).p) _ptr = (hnd).p;           \
-    xencomm_add_offset(_ptr, nr * sizeof(*_ptr));   \
-})
-
-/* Cast a guest handle to the specified type of handle. */
-#define guest_handle_cast(hnd, type) ({         \
-    type *_x = (hnd).p;                         \
-    XEN_GUEST_HANDLE(type) _y; \
-    set_xen_guest_handle(_y, _x); \
-    _y; \
-})
-
-/* Since we run in real mode, we can safely access all addresses. That also
- * means our __routines are identical to our "normal" routines. */
-#define guest_handle_okay(hnd, nr) 1
-
-/*
- * Copy an array of objects to guest context via a guest handle.
- * Optionally specify an offset into the guest array.
- */
-#define copy_to_guest_offset(hnd, idx, ptr, nr) \
-    __copy_to_guest_offset(hnd, idx, ptr, nr)
-
-/* Copy sub-field of a structure to guest context via a guest handle. */
-#define copy_field_to_guest(hnd, ptr, field) \
-    __copy_field_to_guest(hnd, ptr, field)
-
-/*
- * Copy an array of objects from guest context via a guest handle.
- * Optionally specify an offset into the guest array.
- */
-#define copy_from_guest_offset(ptr, hnd, idx, nr) \
-    __copy_from_guest_offset(ptr, hnd, idx, nr)
-
-/* Copy sub-field of a structure from guest context via a guest handle. */
-#define copy_field_from_guest(ptr, hnd, field) \
-    __copy_field_from_guest(ptr, hnd, field)
-
-#define __copy_to_guest_offset(hnd, idx, ptr, nr) ({                \
-    const typeof(ptr) _x = (hnd).p;                                 \
-    const typeof(ptr) _y = (ptr);                                   \
-    xencomm_copy_to_guest(_x, _y, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \
-})
-
-#define __copy_field_to_guest(hnd, ptr, field) ({                   \
-    const int _off = offsetof(typeof(*ptr), field);                  \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;               \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;                 \
-    xencomm_copy_to_guest(_x, _y, sizeof(*_x), sizeof(*_x)*(_off)); \
-})
-
-#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({              \
-    const typeof(ptr) _x = (hnd).p;                                 \
-    const typeof(ptr) _y = (ptr);                                   \
-    xencomm_copy_from_guest(_y, _x, sizeof(*_x)*(nr), sizeof(*_x)*(idx));  \
-})
-
-#define __copy_field_from_guest(ptr, hnd, field) ({                 \
-    const int _off = offsetof(typeof(*ptr), field);                 \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;               \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;                 \
-    xencomm_copy_to_guest(_y, _x, sizeof(*_x), sizeof(*_x)*(_off)); \
-})
+#include <xen/xencomm.h>
 
 #endif /* __PPC_GUEST_ACCESS_H__ */
diff -r ed56ef3e9716 -r 4762d73ced42 
xen/include/asm-powerpc/mach-default/irq_vectors.h
--- a/xen/include/asm-powerpc/mach-default/irq_vectors.h        Thu Dec 14 
08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/mach-default/irq_vectors.h        Thu Dec 14 
08:57:36 2006 -0700
@@ -37,26 +37,10 @@
 #define FAST_TRAP -1 /* 0x80 */
 #define FIRST_SYSTEM_VECTOR    -1
 
+#define CALL_FUNCTION_VECTOR   0x0
+#define EVENT_CHECK_VECTOR     0x1
+
 #if 0
-
-/*
- * Vectors 0-16 in some cases are used for ISA interrupts.
- */
-
-/*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- *
- *  some of the following vectors are 'rare', they are merged
- *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
- *  TLB, reschedule and local APIC vectors are performance-critical.
- *
- *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
- */
-#define SPURIOUS_APIC_VECTOR   0xff
-#define ERROR_APIC_VECTOR      0xfe
-#define INVALIDATE_TLB_VECTOR  0xfd
-#define EVENT_CHECK_VECTOR     0xfc
-#define CALL_FUNCTION_VECTOR   0xfb
 
 #define THERMAL_APIC_VECTOR    0xf0
 /*
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/mm.h
--- a/xen/include/asm-powerpc/mm.h      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/mm.h      Thu Dec 14 08:57:36 2006 -0700
@@ -13,9 +13,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #ifndef _ASM_MM_H_
@@ -25,10 +26,10 @@
 #include <xen/list.h>
 #include <xen/types.h>
 #include <xen/mm.h>
-#include <asm/misc.h>
 #include <asm/system.h>
 #include <asm/flushtlb.h>
-#include <asm/uaccess.h>
+#include <asm/page.h>
+#include <asm/debugger.h>
 
 #define memguard_guard_range(_p,_l)    ((void)0)
 #define memguard_unguard_range(_p,_l)    ((void)0)
@@ -86,39 +87,38 @@ struct page_extents {
     /* page extent */
     struct page_info *pg;
     uint order;
-    ulong pfn;
 };
 
  /* The following page types are MUTUALLY EXCLUSIVE. */
-#define PGT_none            (0<<29) /* no special uses of this page */
-#define PGT_RMA             (1<<29) /* This page is an RMA page? */
-#define PGT_writable_page   (7<<29) /* has writable mappings of this page? */
-#define PGT_type_mask       (7<<29) /* Bits 29-31. */
+#define PGT_none            (0UL<<29) /* no special uses of this page */
+#define PGT_RMA             (1UL<<29) /* This page is an RMA page? */
+#define PGT_writable_page   (7UL<<29) /* has writable mappings of this page? */
+#define PGT_type_mask       (7UL<<29) /* Bits 29-31. */
 
  /* Owning guest has pinned this page to its current type? */
 #define _PGT_pinned         28
-#define PGT_pinned          (1U<<_PGT_pinned)
+#define PGT_pinned          (1UL<<_PGT_pinned)
  /* Has this page been validated for use as its current type? */
 #define _PGT_validated      27
-#define PGT_validated       (1U<<_PGT_validated)
+#define PGT_validated       (1UL<<_PGT_validated)
 
  /* 16-bit count of uses of this frame as its current type. */
-#define PGT_count_mask      ((1U<<16)-1)
+#define PGT_count_mask      ((1UL<<16)-1)
 
  /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated      31
-#define PGC_allocated       (1U<<_PGC_allocated)
+#define PGC_allocated       (1UL<<_PGC_allocated)
  /* Set on a *guest* page to mark it out-of-sync with its shadow */
 #define _PGC_out_of_sync     30
-#define PGC_out_of_sync     (1U<<_PGC_out_of_sync)
+#define PGC_out_of_sync     (1UL<<_PGC_out_of_sync)
  /* Set when is using a page as a page table */
 #define _PGC_page_table      29
-#define PGC_page_table      (1U<<_PGC_page_table)
+#define PGC_page_table      (1UL<<_PGC_page_table)
 /* Set when using page for RMA */
 #define _PGC_page_RMA      28
-#define PGC_page_RMA      (1U<<_PGC_page_RMA)
+#define PGC_page_RMA      (1UL<<_PGC_page_RMA)
  /* 29-bit count of references to this frame. */
-#define PGC_count_mask      ((1U<<28)-1)
+#define PGC_count_mask      ((1UL<<28)-1)
 
 #define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
 
@@ -132,6 +132,13 @@ static inline u32 pickle_domptr(struct d
 
 #define page_get_owner(_p)    (unpickle_domptr((_p)->u.inuse._domain))
 #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
+
+#define XENSHARE_writable 0
+#define XENSHARE_readonly 1
+extern void share_xen_page_with_guest(
+    struct page_info *page, struct domain *d, int readonly);
+extern void share_xen_page_with_privileged_guests(
+    struct page_info *page, int readonly);
 
 extern struct page_info *frame_table;
 extern unsigned long max_page;
@@ -218,16 +225,18 @@ typedef struct {
 } vm_assist_info_t;
 extern vm_assist_info_t vm_assist_info[];
 
-#define share_xen_page_with_guest(p, d, r) do { } while (0)
-#define share_xen_page_with_privileged_guests(p, r) do { } while (0)
 
 /* hope that accesses to this will fail spectacularly */
-#define machine_to_phys_mapping ((u32 *)-1UL)
-
-extern int update_grant_va_mapping(unsigned long va,
-                                   unsigned long val,
-                                   struct domain *,
-                                   struct vcpu *);
+#undef machine_to_phys_mapping
+#define INVALID_M2P_ENTRY        (~0UL)
+
+/* do nothing, its all calculated */
+#define set_gpfn_from_mfn(mfn, pfn) do { } while (0)
+#define get_gpfn_from_mfn(mfn) (mfn)
+
+extern unsigned long mfn_to_gmfn(struct domain *d, unsigned long mfn);
+
+extern unsigned long paddr_to_maddr(unsigned long paddr);
 
 #define INVALID_MFN (~0UL)
 #define PFN_TYPE_NONE 0
@@ -235,29 +244,48 @@ extern int update_grant_va_mapping(unsig
 #define PFN_TYPE_LOGICAL 2
 #define PFN_TYPE_IO 3
 #define PFN_TYPE_FOREIGN 4
+#define PFN_TYPE_GNTTAB 5
 
 extern ulong pfn2mfn(struct domain *d, ulong pfn, int *type);
+static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gmfn)
+{
+    int mtype;
+    ulong mfn;
+    
+    mfn = pfn2mfn(d, gmfn, &mtype);
+    if (mfn != INVALID_MFN) {
+        switch (mtype) {
+        case PFN_TYPE_RMA:
+        case PFN_TYPE_LOGICAL:
+            break;
+        default:
+            WARN();
+            mfn = INVALID_MFN;
+            break;
+        }
+    }
+    return mfn;
+}
+
+extern int update_grant_va_mapping(unsigned long va,
+                                   unsigned long val,
+                                   struct domain *,
+                                   struct vcpu *);
 
 /* Arch-specific portion of memory_op hypercall. */
 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
-
-/* XXX implement me? */
-#define set_gpfn_from_mfn(mfn, pfn) do { } while (0)
-/* XXX only used for debug print right now... */
-#define get_gpfn_from_mfn(mfn) (mfn)
-
-static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gmfn)
-{
-       return pfn2mfn(d, gmfn, NULL);
-}
-
-#define mfn_to_gmfn(_d, mfn) (mfn)
 
 extern int allocate_rma(struct domain *d, unsigned int order_pages);
 extern uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages);
 extern void free_extents(struct domain *d);
 
+extern int arch_domain_add_extent(struct domain *d, struct page_info *page,
+        int order);
+
 extern int steal_page(struct domain *d, struct page_info *page,
                         unsigned int memflags);
 
+/* XXX these just exist until we can stop #including x86 code */
+#define access_ok(addr,size) 1
+#define array_access_ok(addr,count,size) 1
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/msr.h
--- a/xen/include/asm-powerpc/msr.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/msr.h     Thu Dec 14 08:57:36 2006 -0700
@@ -51,9 +51,9 @@
 #define MSR_RI      ULL(0x0000000000000002)
 #define MSR_LE      ULL(0x0000000000000001)
 
-/* MSR bits set on the Mambo simulator */
+/* MSR bits set on the systemsim simulator */
 #define MSR_SIM     ULL(0x0000000020000000)
-#define MSR_MAMBO   ULL(0x0000000010000000)
+#define MSR_SYSTEMSIM ULL(0x0000000010000000)
 
 /* On a trap, srr1's copy of msr defines some bits as follows: */
 #define MSR_TRAP_FE     ULL(0x0000000000100000) /* Floating Point Exception */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/page.h
--- a/xen/include/asm-powerpc/page.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/page.h    Thu Dec 14 08:57:36 2006 -0700
@@ -13,9 +13,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #ifndef _ASM_PAGE_H
@@ -28,7 +29,6 @@
 #ifndef __ASSEMBLY__
 
 #include <xen/config.h>
-#include <asm/misc.h>
 #include <asm/cache.h>
 
 #define PFN_DOWN(x)   ((x) >> PAGE_SHIFT)
@@ -129,5 +129,6 @@ static inline int get_order_from_pages(u
 #define _PAGE_PAT      0x080UL
 #define _PAGE_PSE      0x080UL
 #define _PAGE_GLOBAL   0x100UL
+
 #endif  /* ! __ASSEMBLY__ */
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/powerpc64/string.h
--- a/xen/include/asm-powerpc/powerpc64/string.h        Thu Dec 14 08:54:54 
2006 -0700
+++ b/xen/include/asm-powerpc/powerpc64/string.h        Thu Dec 14 08:57:36 
2006 -0700
@@ -37,4 +37,7 @@ extern int memcmp(const void *,const voi
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
 
+extern void *systemsim_memset(void *, int, ulong);
+extern void *systemsim_memcpy(void *, const void *, ulong);
+
 #endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/processor.h
--- a/xen/include/asm-powerpc/processor.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/processor.h       Thu Dec 14 08:57:36 2006 -0700
@@ -31,6 +31,85 @@
 /* most assembler do not know this instruction */
 #define HRFID .long 0x4c000224
 
+/* Processor Version Register (PVR) field extraction */
+
+#define PVR_VER(pvr)   (((pvr) >>  16) & 0xFFFF)       /* Version field */
+#define PVR_REV(pvr)   (((pvr) >>   0) & 0xFFFF)       /* Revison field */
+
+#define __is_processor(pv)     (PVR_VER(mfspr(SPRN_PVR)) == (pv))
+
+/*
+ * IBM has further subdivided the standard PowerPC 16-bit version and
+ * revision subfields of the PVR for the PowerPC 403s into the following:
+ */
+
+#define PVR_FAM(pvr)   (((pvr) >> 20) & 0xFFF) /* Family field */
+#define PVR_MEM(pvr)   (((pvr) >> 16) & 0xF)   /* Member field */
+#define PVR_CORE(pvr)  (((pvr) >> 12) & 0xF)   /* Core field */
+#define PVR_CFG(pvr)   (((pvr) >>  8) & 0xF)   /* Configuration field */
+#define PVR_MAJ(pvr)   (((pvr) >>  4) & 0xF)   /* Major revision field */
+#define PVR_MIN(pvr)   (((pvr) >>  0) & 0xF)   /* Minor revision field */
+
+/* Processor Version Numbers */
+
+#define PVR_403GA      0x00200000
+#define PVR_403GB      0x00200100
+#define PVR_403GC      0x00200200
+#define PVR_403GCX     0x00201400
+#define PVR_405GP      0x40110000
+#define PVR_STB03XXX   0x40310000
+#define PVR_NP405H     0x41410000
+#define PVR_NP405L     0x41610000
+#define PVR_601                0x00010000
+#define PVR_602                0x00050000
+#define PVR_603                0x00030000
+#define PVR_603e       0x00060000
+#define PVR_603ev      0x00070000
+#define PVR_603r       0x00071000
+#define PVR_604                0x00040000
+#define PVR_604e       0x00090000
+#define PVR_604r       0x000A0000
+#define PVR_620                0x00140000
+#define PVR_740                0x00080000
+#define PVR_750                PVR_740
+#define PVR_740P       0x10080000
+#define PVR_750P       PVR_740P
+#define PVR_7400       0x000C0000
+#define PVR_7410       0x800C0000
+#define PVR_7450       0x80000000
+#define PVR_8540       0x80200000
+#define PVR_8560       0x80200000
+/*
+ * For the 8xx processors, all of them report the same PVR family for
+ * the PowerPC core. The various versions of these processors must be
+ * differentiated by the version number in the Communication Processor
+ * Module (CPM).
+ */
+#define PVR_821                0x00500000
+#define PVR_823                PVR_821
+#define PVR_850                PVR_821
+#define PVR_860                PVR_821
+#define PVR_8240       0x00810100
+#define PVR_8245       0x80811014
+#define PVR_8260       PVR_8240
+
+/* 64-bit processors */
+/* XXX the prefix should be PVR_, we'll do a global sweep to fix it one day */
+#define PV_NORTHSTAR   0x0033
+#define PV_PULSAR      0x0034
+#define PV_POWER4      0x0035
+#define PV_ICESTAR     0x0036
+#define PV_SSTAR       0x0037
+#define PV_POWER4p     0x0038
+#define PV_970         0x0039
+#define PV_POWER5      0x003A
+#define PV_POWER5p     0x003B
+#define PV_970FX       0x003C
+#define PV_630         0x0040
+#define PV_630p        0x0041
+#define PV_970MP       0x0044
+#define PV_BE          0x0070
+
 #ifndef __ASSEMBLY__ 
 #include <xen/types.h>
 
@@ -38,13 +117,10 @@ struct vcpu;
 struct vcpu;
 struct cpu_user_regs;
 extern int cpu_machinecheck(struct cpu_user_regs *);
-extern void cpu_scom_init(void);
 extern void show_registers(struct cpu_user_regs *);
-extern void show_execution_state(struct cpu_user_regs *);
-extern void show_backtrace(ulong sp, ulong lr, ulong pc);
 extern unsigned int cpu_extent_order(void);
 extern unsigned int cpu_default_rma_order_pages(void);
-extern int cpu_rma_valid(unsigned int log);
+extern int cpu_rma_valid(unsigned int order);
 extern uint cpu_large_page_orders(uint *sizes, uint max);
 extern void cpu_initialize(int cpuid);
 extern void cpu_init_vcpu(struct vcpu *);
@@ -54,13 +130,6 @@ extern void flush_segments(void);
 extern void flush_segments(void);
 extern void dump_segments(int valid);
 
-/* XXX this could also land us in GDB */
-#define dump_execution_state() BUG()
-
-extern void __warn(char *file, int line);
-#define WARN() __warn(__FILE__, __LINE__)
-#define WARN_ON(_p) do { if (_p) WARN(); } while ( 0 )
-
 #define ARCH_HAS_PREFETCH
 static inline void prefetch(const void *x) {;}
 
@@ -83,7 +152,8 @@ static inline void nop(void) {
 static inline void nop(void) {
     __asm__ __volatile__ ("nop");
 }
-#define cpu_relax() nop()
+/* will need to address thread priorities when we go SMT */
+#define cpu_relax() barrier()
 
 static inline unsigned int mfpir(void)
 {
@@ -207,13 +277,13 @@ static inline unsigned mfdsisr(void)
     return val;
 }
 
-#ifdef CONFIG_MAMBO
-static inline int on_mambo(void)
-{
-    return !!(mfmsr() & MSR_MAMBO);
-}
-#else /* CONFIG_MAMBO */
-static inline int on_mambo(void) { return 0; }
+#ifdef CONFIG_SYSTEMSIM
+static inline int on_systemsim(void)
+{
+    return !!(mfmsr() & MSR_SYSTEMSIM);
+}
+#else /* CONFIG_SYSTEMSIM */
+static inline int on_systemsim(void) { return 0; }
 #endif
 
 #endif /* __ASSEMBLY__ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/smp.h
--- a/xen/include/asm-powerpc/smp.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/smp.h     Thu Dec 14 08:57:36 2006 -0700
@@ -25,6 +25,12 @@
 #include <xen/cpumask.h>
 #include <xen/init.h>
 #include <asm/current.h>
+
+/* crap to make x86 "common code" happy */
+#define BAD_APICID 0xFFu
+extern u8 x86_cpu_to_apicid[];
+
+
 extern int smp_num_siblings;
 
 /* revisit when we support SMP */
@@ -35,4 +41,20 @@ extern cpumask_t cpu_core_map[];
 extern cpumask_t cpu_core_map[];
 extern void __devinit smp_generic_take_timebase(void);
 extern void __devinit smp_generic_give_timebase(void);
+
+#define SA_INTERRUPT   0x20000000u
+typedef int irqreturn_t;
+extern int request_irq(unsigned int irq,
+    irqreturn_t (*handler)(int, void *, struct cpu_user_regs *),
+    unsigned long irqflags, const char * devname, void *dev_id);
+void smp_message_recv(int msg, struct cpu_user_regs *regs);
+void smp_call_function_interrupt(struct cpu_user_regs *regs);
+void smp_event_check_interrupt(void);
+void send_IPI_mask(cpumask_t mask, int vector);
+
+#undef DEBUG_IPI
+#ifdef DEBUG_IPI
+void ipi_torture_test(void);
 #endif
+
+#endif
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/spinlock.h
--- a/xen/include/asm-powerpc/spinlock.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-powerpc/spinlock.h        Thu Dec 14 08:57:36 2006 -0700
@@ -70,18 +70,15 @@ cas_u32(volatile u32 *ptr, u32 oval, u32
     return tmp;
 }
 
-typedef union {
+typedef struct {
     volatile u32 lock;
-    struct {
-        s8 recurse_cpu;
-        u8 recurse_cnt;
-        s16 lock;
-    } fields;
+    s16 recurse_cpu;
+    u16 recurse_cnt;
 } spinlock_t;
 
 #define __UNLOCKED (0U)
 #define __LOCKED (~__UNLOCKED)
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { __UNLOCKED }
+#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { __UNLOCKED, -1, 0 }
 static inline void spin_lock_init(spinlock_t *lock)
 {
     *lock = (spinlock_t) SPIN_LOCK_UNLOCKED;
@@ -181,17 +178,17 @@ static inline void _raw_spin_unlock_recu
 static inline void _raw_spin_unlock_recursive(spinlock_t *lock)
 {
     int cpu = smp_processor_id();
-    if (likely(lock->fields.recurse_cpu != cpu)) {
+    if (likely(lock->recurse_cpu != cpu)) {
         spin_lock(lock);
-        lock->fields.recurse_cpu = cpu;
-    }
-    lock->fields.recurse_cnt++;
+        lock->recurse_cpu = cpu;
+    }
+    lock->recurse_cnt++;
 }
 
 static inline void _raw_spin_unlock_recursive(spinlock_t *lock)
 {
-    if (likely(--lock->fields.recurse_cnt == 0)) {
-        lock->fields.recurse_cpu = -1;
+    if (likely(--lock->recurse_cnt == 0)) {
+        lock->recurse_cpu = -1;
         spin_unlock(lock);
     }
 }
@@ -200,19 +197,19 @@ static inline void _raw_spin_unlock_recu
 #define _raw_spin_lock_recursive(_lock)            \
     do {                                           \
         int cpu = smp_processor_id();              \
-        if ( likely((_lock)->fields.recurse_cpu != cpu) ) \
+        if ( likely((_lock)->recurse_cpu != cpu) ) \
         {                                          \
             spin_lock(_lock);                      \
-            (_lock)->fields.recurse_cpu = cpu;            \
+            (_lock)->recurse_cpu = cpu;            \
         }                                          \
-        (_lock)->fields.recurse_cnt++;                    \
+        (_lock)->recurse_cnt++;                    \
     } while ( 0 )
 
 #define _raw_spin_unlock_recursive(_lock)          \
     do {                                           \
-        if ( likely(--(_lock)->fields.recurse_cnt == 0) ) \
+        if ( likely(--(_lock)->recurse_cnt == 0) ) \
         {                                          \
-            (_lock)->fields.recurse_cpu = -1;             \
+            (_lock)->recurse_cpu = -1;             \
             spin_unlock(_lock);                    \
         }                                          \
     } while ( 0 )
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/numa.h
--- a/xen/include/asm-x86/numa.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/numa.h        Thu Dec 14 08:57:36 2006 -0700
@@ -37,7 +37,7 @@ extern void __init init_cpu_to_node(void
 
 static inline void clear_node_cpumask(int cpu)
 {
-       clear_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+       cpu_clear(cpu, node_to_cpumask[cpu_to_node(cpu)]);
 }
 
 /* Simple perfect hash to map physical addresses to node numbers */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/page.h        Thu Dec 14 08:57:36 2006 -0700
@@ -26,25 +26,37 @@
 #endif
 
 /* Read a pte atomically from memory. */
-#define l1e_read_atomic(l1ep) l1e_from_intpte(pte_read_atomic(l1ep))
-#define l2e_read_atomic(l2ep) l2e_from_intpte(pte_read_atomic(l2ep))
-#define l3e_read_atomic(l3ep) l3e_from_intpte(pte_read_atomic(l3ep))
-#define l4e_read_atomic(l4ep) l4e_from_intpte(pte_read_atomic(l4ep))
+#define l1e_read_atomic(l1ep) \
+    l1e_from_intpte(pte_read_atomic(&l1e_get_intpte(*(l1ep))))
+#define l2e_read_atomic(l2ep) \
+    l2e_from_intpte(pte_read_atomic(&l2e_get_intpte(*(l2ep))))
+#define l3e_read_atomic(l3ep) \
+    l3e_from_intpte(pte_read_atomic(&l3e_get_intpte(*(l3ep))))
+#define l4e_read_atomic(l4ep) \
+    l4e_from_intpte(pte_read_atomic(&l4e_get_intpte(*(l4ep))))
 
 /* Write a pte atomically to memory. */
-#define l1e_write_atomic(l1ep, l1e) pte_write_atomic(l1ep, l1e_get_intpte(l1e))
-#define l2e_write_atomic(l2ep, l2e) pte_write_atomic(l2ep, l2e_get_intpte(l2e))
-#define l3e_write_atomic(l3ep, l3e) pte_write_atomic(l3ep, l3e_get_intpte(l3e))
-#define l4e_write_atomic(l4ep, l4e) pte_write_atomic(l4ep, l4e_get_intpte(l4e))
+#define l1e_write_atomic(l1ep, l1e) \
+    pte_write_atomic(&l1e_get_intpte(*(l1ep)), l1e_get_intpte(l1e))
+#define l2e_write_atomic(l2ep, l2e) \
+    pte_write_atomic(&l2e_get_intpte(*(l2ep)), l2e_get_intpte(l2e))
+#define l3e_write_atomic(l3ep, l3e) \
+    pte_write_atomic(&l3e_get_intpte(*(l3ep)), l3e_get_intpte(l3e))
+#define l4e_write_atomic(l4ep, l4e) \
+    pte_write_atomic(&l4e_get_intpte(*(l4ep)), l4e_get_intpte(l4e))
 
 /*
  * Write a pte safely but non-atomically to memory.
  * The PTE may become temporarily not-present during the update.
  */
-#define l1e_write(l1ep, l1e) pte_write(l1ep, l1e_get_intpte(l1e))
-#define l2e_write(l2ep, l2e) pte_write(l2ep, l2e_get_intpte(l2e))
-#define l3e_write(l3ep, l3e) pte_write(l3ep, l3e_get_intpte(l3e))
-#define l4e_write(l4ep, l4e) pte_write(l4ep, l4e_get_intpte(l4e))
+#define l1e_write(l1ep, l1e) \
+    pte_write(&l1e_get_intpte(*(l1ep)), l1e_get_intpte(l1e))
+#define l2e_write(l2ep, l2e) \
+    pte_write(&l2e_get_intpte(*(l2ep)), l2e_get_intpte(l2e))
+#define l3e_write(l3ep, l3e) \
+    pte_write(&l3e_get_intpte(*(l3ep)), l3e_get_intpte(l3e))
+#define l4e_write(l4ep, l4e) \
+    pte_write(&l4e_get_intpte(*(l4ep)), l4e_get_intpte(l4e))
 
 /* Get direct integer representation of a pte's contents (intpte_t). */
 #define l1e_get_intpte(x)          ((x).l1)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/shadow.h      Thu Dec 14 08:57:36 2006 -0700
@@ -540,6 +540,9 @@ extern int shadow_remove_write_access(st
  * Returns non-zero if we need to flush TLBs. */
 extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
 
+/* Remove all mappings from the shadows. */
+extern void shadow_blow_tables(struct domain *d);
+
 void
 shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
 /* This is a HVM page that we thing is no longer a pagetable.
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/x86_32/page-2level.h
--- a/xen/include/asm-x86/x86_32/page-2level.h  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/x86_32/page-2level.h  Thu Dec 14 08:57:36 2006 -0700
@@ -28,9 +28,9 @@ typedef l2_pgentry_t root_pgentry_t;
 
 #endif /* !__ASSEMBLY__ */
 
-#define pte_read_atomic(ptep)       (*(intpte_t *)(ptep))
-#define pte_write_atomic(ptep, pte) ((*(intpte_t *)(ptep)) = (pte))
-#define pte_write(ptep, pte)        ((*(intpte_t *)(ptep)) = (pte))
+#define pte_read_atomic(ptep)       (*(ptep))
+#define pte_write_atomic(ptep, pte) (*(ptep) = (pte))
+#define pte_write(ptep, pte)        (*(ptep) = (pte))
 
 /* root table */
 #define root_get_pfn              l2e_get_pfn
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h  Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/x86_32/page-3level.h  Thu Dec 14 08:57:36 2006 -0700
@@ -38,22 +38,23 @@ typedef l3_pgentry_t root_pgentry_t;
 
 #endif /* !__ASSEMBLY__ */
 
-#define pte_read_atomic(ptep) ({                                            \
-    intpte_t __pte = *(intpte_t *)(ptep), __npte;                           \
-    while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, __pte)) != __pte ) \
-        __pte = __npte;                                                     \
+#define pte_read_atomic(ptep) ({                              \
+    intpte_t __pte = *(ptep), __npte;                         \
+    while ( (__npte = cmpxchg(ptep, __pte, __pte)) != __pte ) \
+        __pte = __npte;                                       \
     __pte; })
-#define pte_write_atomic(ptep, pte) do {                                    \
-    intpte_t __pte = *(intpte_t *)(ptep), __npte;                           \
-    while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, (pte))) != __pte ) \
-        __pte = __npte;                                                     \
+#define pte_write_atomic(ptep, pte) do {                      \
+    intpte_t __pte = *(ptep), __npte;                         \
+    while ( (__npte = cmpxchg(ptep, __pte, (pte))) != __pte ) \
+        __pte = __npte;                                       \
 } while ( 0 )
-#define pte_write(ptep, pte) do {               \
-    *((u32 *)(ptep)+0) = 0;                     \
-    wmb();                                      \
-    *((u32 *)(ptep)+1) = (pte) >> 32;           \
-    wmb();                                      \
-    *((u32 *)(ptep)+0) = (pte) >>  0;           \
+#define pte_write(ptep, pte) do {                             \
+    u32 *__ptep_words = (u32 *)(ptep);                        \
+    __ptep_words[0] = 0;                                      \
+    wmb();                                                    \
+    __ptep_words[1] = (pte) >> 32;                            \
+    wmb();                                                    \
+    __ptep_words[0] = (pte) >>  0;                            \
 } while ( 0 )
 
 /* root table */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/asm-x86/x86_64/page.h Thu Dec 14 08:57:36 2006 -0700
@@ -43,9 +43,9 @@ typedef l4_pgentry_t root_pgentry_t;
 
 #endif /* !__ASSEMBLY__ */
 
-#define pte_read_atomic(ptep)       (*(intpte_t *)(ptep))
-#define pte_write_atomic(ptep, pte) ((*(intpte_t *)(ptep)) = (pte))
-#define pte_write(ptep, pte)        ((*(intpte_t *)(ptep)) = (pte))
+#define pte_read_atomic(ptep)       (*(ptep))
+#define pte_write_atomic(ptep, pte) (*(ptep) = (pte))
+#define pte_write(ptep, pte)        (*(ptep) = (pte))
 
 /* Given a virtual address, get an entry offset into a linear page table. */
 #define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT)
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/arch-powerpc.h
--- a/xen/include/public/arch-powerpc.h Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/arch-powerpc.h Thu Dec 14 08:57:36 2006 -0700
@@ -73,6 +73,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 
 #ifndef __ASSEMBLY__
 
+#define XENCOMM_INLINE_FLAG (1UL << 63)
+
 typedef uint64_t xen_ulong_t;
 
 /* User-accessible registers: need to be saved/restored for every nested Xen
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/domctl.h       Thu Dec 14 08:57:36 2006 -0700
@@ -385,6 +385,13 @@ typedef struct xen_domctl_settimeoffset 
 typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t);
 
+#define XEN_DOMCTL_real_mode_area     26
+struct xen_domctl_real_mode_area {
+    uint32_t log; /* log2 of Real Mode Area size */
+};
+typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
+
 struct xen_domctl {
     uint32_t cmd;
     uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
@@ -410,6 +417,7 @@ struct xen_domctl {
         struct xen_domctl_hypercall_init    hypercall_init;
         struct xen_domctl_arch_setup        arch_setup;
         struct xen_domctl_settimeoffset     settimeoffset;
+        struct xen_domctl_real_mode_area    real_mode_area;
         uint8_t                             pad[128];
     } u;
 };
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/fbif.h
--- a/xen/include/public/io/fbif.h      Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/io/fbif.h      Thu Dec 14 08:57:36 2006 -0700
@@ -1,18 +1,30 @@
 /*
  * fbif.h -- Xen virtual frame buffer device
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (C) 2005 Anthony Liguori <aliguori@xxxxxxxxxx>
  * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@xxxxxxxxxx>
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License. See the file COPYING in the main directory of this archive for
- *  more details.
  */
 
 #ifndef __XEN_PUBLIC_IO_FBIF_H__
 #define __XEN_PUBLIC_IO_FBIF_H__
-
-#include <asm/types.h>
 
 /* Out events (frontend -> backend) */
 
@@ -31,20 +43,20 @@
 
 struct xenfb_update
 {
-       __u8 type;              /* XENFB_TYPE_UPDATE */
-       __s32 x;                /* source x */
-       __s32 y;                /* source y */
-       __s32 width;            /* rect width */
-       __s32 height;           /* rect height */
+    uint8_t type;    /* XENFB_TYPE_UPDATE */
+    int32_t x;      /* source x */
+    int32_t y;      /* source y */
+    int32_t width;  /* rect width */
+    int32_t height; /* rect height */
 };
 
 #define XENFB_OUT_EVENT_SIZE 40
 
 union xenfb_out_event
 {
-       __u8 type;
-       struct xenfb_update update;
-       char pad[XENFB_OUT_EVENT_SIZE];
+    uint8_t type;
+    struct xenfb_update update;
+    char pad[XENFB_OUT_EVENT_SIZE];
 };
 
 /* In events (backend -> frontend) */
@@ -58,8 +70,8 @@ union xenfb_out_event
 
 union xenfb_in_event
 {
-       __u8 type;
-       char pad[XENFB_IN_EVENT_SIZE];
+    uint8_t type;
+    char pad[XENFB_IN_EVENT_SIZE];
 };
 
 /* shared page */
@@ -82,25 +94,25 @@ union xenfb_in_event
 
 struct xenfb_page
 {
-       __u32 in_cons, in_prod;
-       __u32 out_cons, out_prod;
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
 
-       __s32 width;         /* the width of the framebuffer (in pixels) */
-       __s32 height;        /* the height of the framebuffer (in pixels) */
-       __u32 line_length;   /* the length of a row of pixels (in bytes) */
-       __u32 mem_length;    /* the length of the framebuffer (in bytes) */
-       __u8 depth;          /* the depth of a pixel (in bits) */
+    int32_t width;          /* the width of the framebuffer (in pixels) */
+    int32_t height;         /* the height of the framebuffer (in pixels) */
+    uint32_t line_length;   /* the length of a row of pixels (in bytes) */
+    uint32_t mem_length;    /* the length of the framebuffer (in bytes) */
+    uint8_t depth;          /* the depth of a pixel (in bits) */
 
-       /*
-        * Framebuffer page directory
-        *
-        * Each directory page holds PAGE_SIZE / sizeof(*pd)
-        * framebuffer pages, and can thus map up to PAGE_SIZE *
-        * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
-        * sizeof(unsigned long) == 4, that's 4 Megs.  Two directory
-        * pages should be enough for a while.
-        */
-       unsigned long pd[2];
+    /*
+     * Framebuffer page directory
+     *
+     * Each directory page holds PAGE_SIZE / sizeof(*pd)
+     * framebuffer pages, and can thus map up to PAGE_SIZE *
+     * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
+     * sizeof(unsigned long) == 4, that's 4 Megs.  Two directory
+     * pages should be enough for a while.
+     */
+    unsigned long pd[2];
 };
 
 /*
@@ -114,3 +126,13 @@ struct xenfb_page
 #endif
 
 #endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/kbdif.h
--- a/xen/include/public/io/kbdif.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/io/kbdif.h     Thu Dec 14 08:57:36 2006 -0700
@@ -1,18 +1,30 @@
 /*
  * kbdif.h -- Xen virtual keyboard/mouse
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (C) 2005 Anthony Liguori <aliguori@xxxxxxxxxx>
  * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@xxxxxxxxxx>
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License. See the file COPYING in the main directory of this archive for
- *  more details.
  */
 
 #ifndef __XEN_PUBLIC_IO_KBDIF_H__
 #define __XEN_PUBLIC_IO_KBDIF_H__
-
-#include <asm/types.h>
 
 /* In events (backend -> frontend) */
 
@@ -35,34 +47,34 @@
 
 struct xenkbd_motion
 {
-       __u8 type;         /* XENKBD_TYPE_MOTION */
-       __s32 rel_x;       /* relative X motion */
-       __s32 rel_y;       /* relative Y motion */
+    uint8_t type;        /* XENKBD_TYPE_MOTION */
+    int32_t rel_x;       /* relative X motion */
+    int32_t rel_y;       /* relative Y motion */
 };
 
 struct xenkbd_key
 {
-       __u8 type;         /* XENKBD_TYPE_KEY */
-       __u8 pressed;      /* 1 if pressed; 0 otherwise */
-       __u32 keycode;     /* KEY_* from linux/input.h */
+    uint8_t type;         /* XENKBD_TYPE_KEY */
+    uint8_t pressed;      /* 1 if pressed; 0 otherwise */
+    uint32_t keycode;     /* KEY_* from linux/input.h */
 };
 
 struct xenkbd_position
 {
-       __u8 type;         /* XENKBD_TYPE_POS */
-       __s32 abs_x;       /* absolute X position (in FB pixels) */
-       __s32 abs_y;       /* absolute Y position (in FB pixels) */
+    uint8_t type;        /* XENKBD_TYPE_POS */
+    int32_t abs_x;       /* absolute X position (in FB pixels) */
+    int32_t abs_y;       /* absolute Y position (in FB pixels) */
 };
 
 #define XENKBD_IN_EVENT_SIZE 40
 
 union xenkbd_in_event
 {
-       __u8 type;
-       struct xenkbd_motion motion;
-       struct xenkbd_key key;
-       struct xenkbd_position pos;
-       char pad[XENKBD_IN_EVENT_SIZE];
+    uint8_t type;
+    struct xenkbd_motion motion;
+    struct xenkbd_key key;
+    struct xenkbd_position pos;
+    char pad[XENKBD_IN_EVENT_SIZE];
 };
 
 /* Out events (frontend -> backend) */
@@ -77,8 +89,8 @@ union xenkbd_in_event
 
 union xenkbd_out_event
 {
-       __u8 type;
-       char pad[XENKBD_OUT_EVENT_SIZE];
+    uint8_t type;
+    char pad[XENKBD_OUT_EVENT_SIZE];
 };
 
 /* shared page */
@@ -101,8 +113,18 @@ union xenkbd_out_event
 
 struct xenkbd_page
 {
-       __u32 in_cons, in_prod;
-       __u32 out_cons, out_prod;
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
 };
 
 #endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/pciif.h
--- a/xen/include/public/io/pciif.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/io/pciif.h     Thu Dec 14 08:57:36 2006 -0700
@@ -25,7 +25,7 @@
 #define __XEN_PCI_COMMON_H__
 
 /* Be sure to bump this number if you change this file */
-#define XEN_PCI_MAGIC          "7"
+#define XEN_PCI_MAGIC "7"
 
 /* xen_pci_sharedinfo flags */
 #define _XEN_PCIF_active     (0)
@@ -45,29 +45,39 @@
 #define XEN_PCI_ERR_op_failed       (-5)
 
 struct xen_pci_op {
-       /* IN: what action to perform: XEN_PCI_OP_* */
-       uint32_t cmd;
+    /* IN: what action to perform: XEN_PCI_OP_* */
+    uint32_t cmd;
 
-       /* OUT: will contain an error number (if any) from errno.h */
-       int32_t err;
+    /* OUT: will contain an error number (if any) from errno.h */
+    int32_t err;
 
-       /* IN: which device to touch */
-       uint32_t domain; /* PCI Domain/Segment */
-       uint32_t bus;
-       uint32_t devfn;
+    /* IN: which device to touch */
+    uint32_t domain; /* PCI Domain/Segment */
+    uint32_t bus;
+    uint32_t devfn;
 
-       /* IN: which configuration registers to touch */
-       int32_t offset;
-       int32_t size;
+    /* IN: which configuration registers to touch */
+    int32_t offset;
+    int32_t size;
 
-       /* IN/OUT: Contains the result after a READ or the value to WRITE */
-       uint32_t value;
+    /* IN/OUT: Contains the result after a READ or the value to WRITE */
+    uint32_t value;
 };
 
 struct xen_pci_sharedinfo {
-       /* flags - XEN_PCIF_* */
-       uint32_t flags;
-       struct xen_pci_op op;
+    /* flags - XEN_PCIF_* */
+    uint32_t flags;
+    struct xen_pci_op op;
 };
 
 #endif /* __XEN_PCI_COMMON_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/xenbus.h
--- a/xen/include/public/io/xenbus.h    Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/io/xenbus.h    Thu Dec 14 08:57:36 2006 -0700
@@ -56,8 +56,18 @@ enum xenbus_state {
      */
     XenbusStateClosing       = 5,
 
-    XenbusStateClosed       = 6
+    XenbusStateClosed        = 6
 };
 typedef enum xenbus_state XenbusState;
 
 #endif /* _XEN_PUBLIC_IO_XENBUS_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/memory.h
--- a/xen/include/public/memory.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/memory.h       Thu Dec 14 08:57:36 2006 -0700
@@ -248,7 +248,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t
  * XENMEM_memory_map.
  * arg == addr of xen_memory_map_t.
  */
-#define XENMEM_machine_memory_map      10
+#define XENMEM_machine_memory_map   10
 
 /*
  * Set the pseudo-physical memory map of a domain, as returned by
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/sysctl.h       Thu Dec 14 08:57:36 2006 -0700
@@ -119,7 +119,7 @@ struct xen_sysctl_perfc_op {
     uint32_t       cmd;                /*  XEN_SYSCTL_PERFCOP_??? */
     /* OUT variables. */
     uint32_t       nr_counters;       /*  number of counters description  */
-    uint32_t       nr_vals;                      /*  number of values  */
+    uint32_t       nr_vals;           /*  number of values  */
     /* counter information (or NULL) */
     XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc;
     /* counter values (or NULL) */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/trace.h
--- a/xen/include/public/trace.h        Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/trace.h        Thu Dec 14 08:57:36 2006 -0700
@@ -32,7 +32,7 @@
 #define TRC_SCHED   0x0002f000    /* Xen Scheduler trace      */
 #define TRC_DOM0OP  0x0004f000    /* Xen DOM0 operation trace */
 #define TRC_VMX     0x0008f000    /* Xen VMX trace            */
-#define TRC_MEM     0x000af000    /* Xen memory trace         */
+#define TRC_MEM     0x0010f000    /* Xen memory trace         */
 #define TRC_ALL     0xfffff000
 
 /* Trace subclasses */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/xenoprof.h
--- a/xen/include/public/xenoprof.h     Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/public/xenoprof.h     Thu Dec 14 08:57:36 2006 -0700
@@ -52,7 +52,7 @@
 #define XENOPROF_last_op            14
 
 #define MAX_OPROF_EVENTS    32
-#define MAX_OPROF_DOMAINS   25 
+#define MAX_OPROF_DOMAINS   25
 #define XENOPROF_CPU_TYPE_SIZE 64
 
 /* Xenoprof performance events (not Xen events) */
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/xen/elfcore.h
--- a/xen/include/xen/elfcore.h Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/xen/elfcore.h Thu Dec 14 08:57:36 2006 -0700
@@ -87,7 +87,7 @@ typedef struct
             desctype desc;                      \
             PAD32(sizeof(desctype));            \
         } desc;                                 \
-    } __attribute__ ((packed)) type
+    } type
 
 #define CORE_STR                "CORE"
 #define CORE_STR_LEN            5 /* including terminating zero */
@@ -119,7 +119,7 @@ typedef struct {
     crash_note_core_t core;
     crash_note_xen_core_t xen_regs;
     crash_note_xen_info_t xen_info;
-} __attribute__ ((packed)) crash_note_t;
+} crash_note_t;
 
 #define setup_crash_note(np, member, str, str_len, id) \
   np->member.note.note.note.namesz = str_len; \
diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Thu Dec 14 08:54:54 2006 -0700
+++ b/xen/include/xen/sched.h   Thu Dec 14 08:57:36 2006 -0700
@@ -188,6 +188,7 @@ struct domain_setup_info
 #define PAEKERN_no           0
 #define PAEKERN_yes          1
 #define PAEKERN_extended_cr3 2
+#define PAEKERN_bimodal      3
     unsigned int  pae_kernel;
     /* Initialised by loader: Private. */
     unsigned long elf_paddr_offset;
@@ -437,6 +438,7 @@ static inline int vcpu_runnable(struct v
 }
 
 void vcpu_pause(struct vcpu *v);
+void vcpu_pause_nosync(struct vcpu *v);
 void domain_pause(struct domain *d);
 void vcpu_unpause(struct vcpu *v);
 void domain_unpause(struct domain *d);
diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/utils.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/powerpc64/utils.c     Thu Dec 14 08:57:36 2006 -0700
@@ -0,0 +1,211 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation 2006
+ *
+ * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <inttypes.h>
+
+#include <xen/xen.h>
+#include <xen/memory.h>
+#include <xc_private.h>
+#include <xg_private.h>
+#include <xenctrl.h>
+
+#include "flatdevtree_env.h"
+#include "flatdevtree.h"
+#include "utils.h"
+
+unsigned long get_rma_pages(void *devtree)
+{
+    void *rma;
+    uint64_t rma_reg[2];
+    int rc;
+
+    rma = ft_find_node(devtree, "/memory@0");
+    if (rma == NULL) {
+        DPRINTF("couldn't find /memory@0\n");
+        return 0;
+    }
+    rc = ft_get_prop(devtree, rma, "reg", rma_reg, sizeof(rma_reg));
+    if (rc < 0) {
+        DPRINTF("couldn't get /memory@0/reg\n");
+        return 0;
+    }
+    if (rma_reg[0] != 0) {
+        DPRINTF("RMA did not start at 0\n");
+        return 0;
+    }
+    return rma_reg[1] >> PAGE_SHIFT;
+}
+
+int get_rma_page_array(int xc_handle, int domid, xen_pfn_t **page_array,
+                      unsigned long nr_pages)
+{
+    int rc;
+    int i;
+    xen_pfn_t *p;
+
+    *page_array = malloc(nr_pages * sizeof(xen_pfn_t));
+    if (*page_array == NULL) {
+        perror("malloc");
+        return -1;
+    }
+
+    DPRINTF("xc_get_pfn_list\n");
+    /* We know that the RMA is machine contiguous so lets just get the
+     * first MFN and fill the rest in ourselves */
+    rc = xc_get_pfn_list(xc_handle, domid, *page_array, 1);
+    if (rc == -1) {
+        perror("Could not get the page frame list");
+        return -1;
+    }
+    p = *page_array;
+    for (i = 1; i < nr_pages; i++)
+        p[i] = p[i - 1] + 1;
+    return 0;
+}
+
+int install_image(
+        int xc_handle,
+        int domid,
+        xen_pfn_t *page_array,
+        void *image,
+        unsigned long paddr,
+        unsigned long size)
+{
+    uint8_t *img = image;
+    int i;
+    int rc = 0;
+
+    if (paddr & ~PAGE_MASK) {
+        printf("*** unaligned address\n");
+        return -1;
+    }
+
+    for (i = 0; i < size; i += PAGE_SIZE) {
+        void *page = img + i;
+        xen_pfn_t pfn = (paddr + i) >> PAGE_SHIFT;
+        xen_pfn_t mfn = page_array[pfn];
+
+        rc = xc_copy_to_domain_page(xc_handle, domid, mfn, page);
+        if (rc < 0) {
+            perror("xc_copy_to_domain_page");
+            break;
+        }
+    }
+    return rc;
+}
+
+void *load_file(const char *path, unsigned long *filesize)
+{
+    void *img;
+    ssize_t size;
+    int fd;
+
+    DPRINTF("load_file(%s)\n", path);
+
+    fd = open(path, O_RDONLY);
+    if (fd < 0) {
+        perror(path);
+        return NULL;
+    }
+
+    size = lseek(fd, 0, SEEK_END);
+    if (size < 0) {
+        perror(path);
+        close(fd);
+        return NULL;
+    }
+    lseek(fd, 0, SEEK_SET);
+
+    img = malloc(size);
+    if (img == NULL) {
+        perror(path);
+        close(fd);
+        return NULL;
+    }
+
+    size = read(fd, img, size);
+    if (size <= 0) {
+        perror(path);
+        close(fd);
+        free(img);
+        return NULL;
+    }
+
+    if (filesize)
+        *filesize = size;
+    close(fd);
+    return img;
+}
+
+int load_elf_kernel(
+    int xc_handle,
+    int domid,
+    const char *kernel_path,
+    struct domain_setup_info *dsi,
+    xen_pfn_t *page_array)
+{
+    struct load_funcs load_funcs;
+    char *kernel_img;
+    unsigned long kernel_size;
+    int rc;
+
+    /* load the kernel ELF file */
+    kernel_img = load_file(kernel_path, &kernel_size);
+    if (kernel_img == NULL) {
+        rc = -1;
+        goto out;
+    }
+
+    DPRINTF("probe_elf\n");
+    rc = probe_elf(kernel_img, kernel_size, &load_funcs);
+    if (rc < 0) {
+        rc = -1;
+        printf("%s is not an ELF file\n", kernel_path);
+        goto out;
+    }
+
+    DPRINTF("parseimage\n");
+    rc = (load_funcs.parseimage)(kernel_img, kernel_size, dsi);
+    if (rc < 0) {
+        rc = -1;
+        goto out;
+    }
+
+    DPRINTF("loadimage\n");
+    (load_funcs.loadimage)(kernel_img, kernel_size, xc_handle, domid,
+            page_array, dsi);
+

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog