WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-ppc-devel

[XenPPC] [xenppc-unstable] [ppc] merge with upstream

To: xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
Subject: [XenPPC] [xenppc-unstable] [ppc] merge with upstream
From: Xen patchbot-xenppc-unstable <patchbot-xenppc-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 26 Jun 2006 21:12:06 +0000
Delivery-date: Mon, 26 Jun 2006 14:21:45 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-ppc-devel-request@lists.xensource.com?subject=help>
List-id: Xen PPC development <xen-ppc-devel.lists.xensource.com>
List-post: <mailto:xen-ppc-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ppc-devel>, <mailto:xen-ppc-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ppc-devel>, <mailto:xen-ppc-devel-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-ppc-devel-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Hollis Blanchard <hollisb@xxxxxxxxxx>
# Node ID b29806fb6ba093cfe30e815e99e5abe08e6a0b7a
# Parent  049e669e6a8abb8b2f0c7dced3c8ff53a9d9b21f
# Parent  9ec0b4f10b4f588f5e7764a6a64e5fcd0f9af0f1
[ppc] merge with upstream
Signed-off-by: Hollis Blanchard <hollisb@xxxxxxxxxx>
---
 tools/blktap/Makefile                                            |   93 
 tools/blktap/README                                              |  137 -
 tools/blktap/README.sept05                                       |   33 
 tools/blktap/blkdump.c                                           |   62 
 tools/blktap/blkif.c                                             |  212 -
 tools/blktap/blktaplib.c                                         |  453 ---
 tools/blktap/blktaplib.h                                         |  171 -
 tools/blktap/list.h                                              |   55 
 tools/blktap/parallax/Makefile                                   |   62 
 tools/blktap/parallax/README                                     |  171 -
 tools/blktap/parallax/block-async.c                              |  393 --
 tools/blktap/parallax/block-async.h                              |   69 
 tools/blktap/parallax/blockstore.c                               | 1348 
----------
 tools/blktap/parallax/blockstore.h                               |  134 
 tools/blktap/parallax/blockstored.c                              |  275 --
 tools/blktap/parallax/bstest.c                                   |  191 -
 tools/blktap/parallax/parallax.c                                 |  608 ----
 tools/blktap/parallax/radix.c                                    |  631 ----
 tools/blktap/parallax/radix.h                                    |   45 
 tools/blktap/parallax/requests-async.c                           |  762 -----
 tools/blktap/parallax/requests-async.h                           |   29 
 tools/blktap/parallax/snaplog.c                                  |  238 -
 tools/blktap/parallax/snaplog.h                                  |   61 
 tools/blktap/parallax/vdi.c                                      |  367 --
 tools/blktap/parallax/vdi.h                                      |   55 
 tools/blktap/parallax/vdi_create.c                               |   52 
 tools/blktap/parallax/vdi_fill.c                                 |   81 
 tools/blktap/parallax/vdi_list.c                                 |   47 
 tools/blktap/parallax/vdi_snap.c                                 |   43 
 tools/blktap/parallax/vdi_snap_delete.c                          |   48 
 tools/blktap/parallax/vdi_snap_list.c                            |   82 
 tools/blktap/parallax/vdi_tree.c                                 |  132 
 tools/blktap/parallax/vdi_unittest.c                             |  184 -
 tools/blktap/parallax/vdi_validate.c                             |   97 
 tools/blktap/ublkback/Makefile                                   |   40 
 tools/blktap/ublkback/ublkback.c                                 |   18 
 tools/blktap/ublkback/ublkbacklib.c                              |  473 ---
 tools/blktap/ublkback/ublkbacklib.h                              |   16 
 tools/blktap/xenbus.c                                            |  568 ----
 docs/src/user.tex                                                |    3 
 linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c                    |   51 
 linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c             |   19 
 linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c                  |   54 
 linux-2.6-xen-sparse/drivers/xen/Kconfig                         |   10 
 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c               |   10 
 linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c              |    3 
 linux-2.6-xen-sparse/drivers/xen/core/evtchn.c                   |    4 
 linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile                |    2 
 linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c               |   16 
 linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile                 |    2 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c           |    4 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h        |   23 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h |   17 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h      |   22 
 patches/linux-2.6.16.13/ipv6-no-autoconf.patch                   |   20 
 tools/examples/network-bridge                                    |   36 
 tools/examples/vif-bridge                                        |   12 
 tools/examples/vtpm                                              |    6 
 tools/examples/vtpm-common.sh                                    |   36 
 tools/examples/xen-network-common.sh                             |   45 
 tools/examples/xmexample.hvm                                     |   12 
 tools/firmware/acpi/Makefile                                     |    7 
 tools/firmware/acpi/acpi2_0.h                                    |    2 
 tools/firmware/acpi/acpi_dsdt.asl                                |  345 ++
 tools/firmware/acpi/acpi_dsdt.c                                  |  399 +-
 tools/firmware/acpi/acpi_fadt.h                                  |   21 
 tools/ioemu/hw/pc.c                                              |    8 
 tools/ioemu/hw/pci.c                                             |   19 
 tools/ioemu/hw/piix4acpi.c                                       |  481 +++
 tools/ioemu/target-i386-dm/Makefile                              |    2 
 tools/libxc/xc_domain.c                                          |   11 
 tools/libxc/xc_linux_restore.c                                   |   66 
 tools/libxc/xc_linux_save.c                                      |    4 
 tools/libxc/xenctrl.h                                            |    4 
 tools/python/xen/lowlevel/xc/xc.c                                |   31 
 tools/python/xen/util/SSHTransport.py                            |  102 
 tools/python/xen/util/xmlrpclib2.py                              |   55 
 tools/python/xen/xend/XendClient.py                              |   13 
 tools/python/xen/xend/XendDomainInfo.py                          |    5 
 tools/python/xen/xend/balloon.py                                 |    2 
 tools/python/xen/xm/create.py                                    |    2 
 tools/python/xen/xm/main.py                                      |   34 
 tools/security/secpol_tool.c                                     |    7 
 tools/xm-test/grouptest/default                                  |    2 
 tools/xm-test/grouptest/medium                                   |    2 
 tools/xm-test/lib/XmTestLib/Console.py                           |   70 
 tools/xm-test/tests/memset/03_memset_random_pos.py               |    6 
 xen/acm/acm_core.c                                               |    5 
 xen/acm/acm_policy.c                                             |   45 
 xen/arch/ia64/linux-xen/smp.c                                    |   36 
 xen/arch/ia64/xen/domain.c                                       |    4 
 xen/arch/ia64/xen/xensetup.c                                     |    3 
 xen/arch/x86/Makefile                                            |    2 
 xen/arch/x86/audit.c                                             |    4 
 xen/arch/x86/hvm/vmx/vmcs.c                                      |   17 
 xen/arch/x86/hvm/vmx/vmx.c                                       |   13 
 xen/arch/x86/mm.c                                                |  193 -
 xen/arch/x86/setup.c                                             |    7 
 xen/arch/x86/shadow.c                                            |  125 
 xen/arch/x86/shadow32.c                                          |   12 
 xen/arch/x86/shadow_guest32pae.c                                 |    2 
 xen/arch/x86/shadow_public.c                                     |   40 
 xen/arch/x86/time.c                                              |    2 
 xen/arch/x86/traps.c                                             |  242 +
 xen/arch/x86/x86_32/seg_fixup.c                                  |    2 
 xen/arch/x86/x86_32/traps.c                                      |   44 
 xen/arch/x86/x86_64/traps.c                                      |   37 
 xen/arch/x86/x86_emulate.c                                       |    4 
 xen/common/acm_ops.c                                             |    2 
 xen/common/dom0_ops.c                                            |   19 
 xen/common/domain.c                                              |    2 
 xen/common/kernel.c                                              |    5 
 xen/common/keyhandler.c                                          |   20 
 xen/common/memory.c                                              |    2 
 xen/common/sched_credit.c                                        |    4 
 xen/common/sched_sedf.c                                          |   26 
 xen/common/schedule.c                                            |    4 
 xen/drivers/char/console.c                                       |   45 
 xen/include/acm/acm_core.h                                       |    9 
 xen/include/acm/acm_hooks.h                                      |   18 
 xen/include/asm-ia64/debugger.h                                  |    8 
 xen/include/asm-ia64/vmx.h                                       |    1 
 xen/include/asm-ia64/xenprocessor.h                              |    2 
 xen/include/asm-x86/hvm/support.h                                |    2 
 xen/include/asm-x86/mm.h                                         |    2 
 xen/include/asm-x86/processor.h                                  |   15 
 xen/include/asm-x86/shadow.h                                     |   44 
 xen/include/asm-x86/shadow_64.h                                  |   36 
 xen/include/asm-x86/shadow_ops.h                                 |    8 
 xen/include/public/arch-x86_32.h                                 |    9 
 xen/include/public/arch-x86_64.h                                 |   14 
 xen/include/public/dom0_ops.h                                    |   23 
 xen/include/public/memory.h                                      |   14 
 xen/include/xen/console.h                                        |    2 
 xen/include/xen/lib.h                                            |    1 
 xen/include/xen/sched.h                                          |    1 
 136 files changed, 2494 insertions(+), 9326 deletions(-)

diff -r 049e669e6a8a -r b29806fb6ba0 docs/src/user.tex
--- a/docs/src/user.tex Mon Jun 26 13:09:11 2006 -0400
+++ b/docs/src/user.tex Mon Jun 26 14:53:55 2006 -0500
@@ -1972,7 +1972,8 @@ editing \path{grub.conf}.
 \item [ console=$<$specifier list$>$ ] Specify the destination for Xen
   console I/O.  This is a comma-separated list of, for example:
   \begin{description}
-  \item[ vga ] Use VGA console and allow keyboard input.
+  \item[ vga ] Use VGA console (only until domain 0 boots, unless {\bf
+  vga[keep] } is specified).
   \item[ com1 ] Use serial port com1.
   \item[ com2H ] Use serial port com2. Transmitted chars will have the
     MSB set. Received chars must have MSB set.
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c     Mon Jun 26 13:09:11 
2006 -0400
+++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c     Mon Jun 26 14:53:55 
2006 -0500
@@ -273,6 +273,49 @@ static void dump_fault_path(unsigned lon
 }
 #endif
 
+static int spurious_fault(struct pt_regs *regs,
+                         unsigned long address,
+                         unsigned long error_code)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+#ifdef CONFIG_XEN
+       /* Faults in hypervisor area are never spurious. */
+       if (address >= HYPERVISOR_VIRT_START)
+               return 0;
+#endif
+
+       /* Reserved-bit violation or user access to kernel space? */
+       if (error_code & 0x0c)
+               return 0;
+
+       pgd = init_mm.pgd + pgd_index(address);
+       if (!pgd_present(*pgd))
+               return 0;
+
+       pud = pud_offset(pgd, address);
+       if (!pud_present(*pud))
+               return 0;
+
+       pmd = pmd_offset(pud, address);
+       if (!pmd_present(*pmd))
+               return 0;
+
+       pte = pte_offset_kernel(pmd, address);
+       if (!pte_present(*pte))
+               return 0;
+       if ((error_code & 0x02) && !pte_write(*pte))
+               return 0;
+#ifdef CONFIG_X86_PAE
+       if ((error_code & 0x10) && (pte_val(*pte) & _PAGE_NX))
+               return 0;
+#endif
+
+       return 1;
+}
 
 /*
  * This routine handles page faults.  It determines the address,
@@ -327,8 +370,16 @@ fastcall void __kprobes do_page_fault(st
         * protection error (error_code & 1) == 0.
         */
        if (unlikely(address >= TASK_SIZE)) { 
+#ifdef CONFIG_XEN
+               /* Faults in hypervisor area can never be patched up. */
+               if (address >= HYPERVISOR_VIRT_START)
+                       goto bad_area_nosemaphore;
+#endif
                if (!(error_code & 5))
                        goto vmalloc_fault;
+               /* Can take a spurious fault if mapping changes R/O -> R/W. */
+               if (spurious_fault(regs, address, error_code))
+                       return;
                /* 
                 * Don't take the mm semaphore here. If we fixup a prefetch
                 * fault we could otherwise deadlock.
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c      Mon Jun 26 
13:09:11 2006 -0400
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c      Mon Jun 26 
14:53:55 2006 -0500
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/percpu.h>
+#include <linux/module.h>
 
 #include <asm/processor.h>
 #include <asm/proto.h>
@@ -92,8 +93,16 @@ static void __init setup_boot_cpu_data(v
        boot_cpu_data.x86_mask = eax & 0xf;
 }
 
+#include <xen/interface/memory.h>
+unsigned long *machine_to_phys_mapping;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned int machine_to_phys_order;
+EXPORT_SYMBOL(machine_to_phys_order);
+
 void __init x86_64_start_kernel(char * real_mode_data)
 {
+       struct xen_machphys_mapping mapping;
+       unsigned long machine_to_phys_nr_ents;
        char *s;
        int i;
 
@@ -104,6 +113,16 @@ void __init x86_64_start_kernel(char * r
                start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
                        xen_start_info->nr_pt_frames;
        }
+
+
+       machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
+       machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+       if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+               machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+               machine_to_phys_nr_ents = mapping.max_mfn + 1;
+       }
+       while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
+               machine_to_phys_order++;
 
 #if 0
        for (i = 0; i < 256; i++)
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c   Mon Jun 26 13:09:11 
2006 -0400
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c   Mon Jun 26 14:53:55 
2006 -0500
@@ -307,6 +307,49 @@ int exception_trace = 1;
 #define MEM_LOG(_f, _a...) ((void)0)
 #endif
 
+static int spurious_fault(struct pt_regs *regs,
+                         unsigned long address,
+                         unsigned long error_code)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+#ifdef CONFIG_XEN
+       /* Faults in hypervisor area are never spurious. */
+       if ((address >= HYPERVISOR_VIRT_START) &&
+           (address < HYPERVISOR_VIRT_END))
+               return 0;
+#endif
+
+       /* Reserved-bit violation or user access to kernel space? */
+       if (error_code & (PF_RSVD|PF_USER))
+               return 0;
+
+       pgd = init_mm.pgd + pgd_index(address);
+       if (!pgd_present(*pgd))
+               return 0;
+
+       pud = pud_offset(pgd, address);
+       if (!pud_present(*pud))
+               return 0;
+
+       pmd = pmd_offset(pud, address);
+       if (!pmd_present(*pmd))
+               return 0;
+
+       pte = pte_offset_kernel(pmd, address);
+       if (!pte_present(*pte))
+               return 0;
+       if ((error_code & PF_WRITE) && !pte_write(*pte))
+               return 0;
+       if ((error_code & PF_INSTR) && (pte_val(*pte) & _PAGE_NX))
+               return 0;
+
+       return 1;
+}
+
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -361,16 +404,19 @@ asmlinkage void __kprobes do_page_fault(
         */
        if (unlikely(address >= TASK_SIZE64)) {
                /*
-                * Must check for the entire kernel range here: with writable
-                * page tables the hypervisor may temporarily clear PMD
-                * entries.
+                * Don't check for the module range here: its PML4
+                * is always initialized because it's shared with the main
+                * kernel text. Only vmalloc may need PML4 syncups.
                 */
                if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
-                   address >= PAGE_OFFSET) {
+                     ((address >= VMALLOC_START && address < VMALLOC_END))) {
                        if (vmalloc_fault(address) < 0)
                                goto bad_area_nosemaphore;
                        return;
                }
+               /* Can take a spurious fault if mapping changes R/O -> R/W. */
+               if (spurious_fault(regs, address, error_code))
+                       return;
                /*
                 * Don't take the mm semaphore here. If we fixup a prefetch
                 * fault we could otherwise deadlock.
diff -r 049e669e6a8a -r b29806fb6ba0 linux-2.6-xen-sparse/drivers/xen/Kconfig
--- a/linux-2.6-xen-sparse/drivers/xen/Kconfig  Mon Jun 26 13:09:11 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig  Mon Jun 26 14:53:55 2006 -0500
@@ -27,6 +27,11 @@ config XEN_UNPRIVILEGED_GUEST
 config XEN_UNPRIVILEGED_GUEST
        bool
        default !XEN_PRIVILEGED_GUEST
+
+config XEN_PRIVCMD
+       bool
+       depends on PROC_FS
+       default y
 
 config XEN_BACKEND
         tristate "Backend driver support"
@@ -84,6 +89,11 @@ config XEN_BLKDEV_BACKEND
          block devices to other guests via a high-performance shared-memory
          interface.
 
+config XEN_XENBUS_DEV
+       bool
+       depends on PROC_FS
+       default y
+
 config XEN_NETDEV_BACKEND
        tristate "Network-device backend driver"
         depends on XEN_BACKEND && NET
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Mon Jun 26 
13:09:11 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Mon Jun 26 
14:53:55 2006 -0500
@@ -58,7 +58,9 @@
 
 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
 
+#ifdef CONFIG_PROC_FS
 static struct proc_dir_entry *balloon_pde;
+#endif
 
 static DECLARE_MUTEX(balloon_mutex);
 
@@ -403,6 +405,7 @@ static int balloon_init_watcher(struct n
        return NOTIFY_DONE;
 }
 
+#ifdef CONFIG_PROC_FS
 static int balloon_write(struct file *file, const char __user *buffer,
                         unsigned long count, void *data)
 {
@@ -456,6 +459,7 @@ static int balloon_read(char *page, char
        *eof = 1;
        return len;
 }
+#endif
 
 static struct notifier_block xenstore_notifier;
 
@@ -464,10 +468,10 @@ static int __init balloon_init(void)
        unsigned long pfn;
        struct page *page;
 
-       IPRINTK("Initialising balloon driver.\n");
-
        if (!is_running_on_xen())
                return -ENODEV;
+
+       IPRINTK("Initialising balloon driver.\n");
 
        current_pages = min(xen_start_info->nr_pages, max_pfn);
        totalram_pages = current_pages;
@@ -481,6 +485,7 @@ static int __init balloon_init(void)
        balloon_timer.data = 0;
        balloon_timer.function = balloon_alarm;
     
+#ifdef CONFIG_PROC_FS
        if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
                WPRINTK("Unable to create /proc/xen/balloon.\n");
                return -1;
@@ -488,6 +493,7 @@ static int __init balloon_init(void)
 
        balloon_pde->read_proc  = balloon_read;
        balloon_pde->write_proc = balloon_write;
+#endif
     
        /* Initialise the balloon with excess memory space. */
        for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c       Mon Jun 26 
13:09:11 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c       Mon Jun 26 
14:53:55 2006 -0500
@@ -109,6 +109,9 @@ static int __init setup_vcpu_hotplug_eve
        static struct notifier_block xsn_cpu = {
                .notifier_call = setup_cpu_watcher };
 
+       if (!is_running_on_xen())
+               return -ENODEV;
+
        register_cpu_notifier(&hotplug_cpu);
        register_xenstore_notifier(&xsn_cpu);
 
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Mon Jun 26 13:09:11 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Mon Jun 26 14:53:55 
2006 -0500
@@ -666,6 +666,10 @@ int irq_ignore_unhandled(unsigned int ir
 int irq_ignore_unhandled(unsigned int irq)
 {
        struct physdev_irq_status_query irq_status = { .irq = irq };
+
+       if (!is_running_on_xen())
+               return 0;
+
        (void)HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status);
        return !!(irq_status.flags & XENIRQSTAT_shared);
 }
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile Mon Jun 26 13:09:11 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile Mon Jun 26 14:53:55 
2006 -0500
@@ -1,2 +1,2 @@
 
-obj-y  := privcmd.o
+obj-$(CONFIG_XEN_PRIVCMD)      := privcmd.o
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c        Mon Jun 26 
13:09:11 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c        Mon Jun 26 
14:53:55 2006 -0500
@@ -71,8 +71,6 @@ static int packet_read_shmem(struct pack
                             char *buffer, int isuserbuffer, u32 left);
 static int vtpm_queue_packet(struct packet *pak);
 
-#define MIN(x,y)  (x) < (y) ? (x) : (y)
-
 /***************************************************************
  Buffer copying fo user and kernel space buffes.
 ***************************************************************/
@@ -309,7 +307,7 @@ int _packet_write(struct packet *pak,
                        return 0;
                }
 
-               tocopy = MIN(size - offset, PAGE_SIZE);
+               tocopy = min_t(size_t, size - offset, PAGE_SIZE);
 
                if (copy_from_buffer((void *)(MMAP_VADDR(tpmif, i) |
                                              (tx->addr & ~PAGE_MASK)),
@@ -365,7 +363,7 @@ static int packet_read(struct packet *pa
                u32 instance_no = htonl(pak->tpm_instance);
                u32 last_read = pak->last_read;
 
-               to_copy = MIN(4 - last_read, numbytes);
+               to_copy = min_t(size_t, 4 - last_read, numbytes);
 
                if (copy_to_buffer(&buffer[0],
                                   &(((u8 *) & instance_no)[last_read]),
@@ -384,7 +382,7 @@ static int packet_read(struct packet *pa
 
        if (room_left > 0) {
                if (pak->data_buffer) {
-                       u32 to_copy = MIN(pak->data_len - offset, room_left);
+                       u32 to_copy = min_t(u32, pak->data_len - offset, 
room_left);
                        u32 last_read = pak->last_read - 4;
 
                        if (copy_to_buffer(&buffer[offset],
@@ -424,7 +422,7 @@ static int packet_read_shmem(struct pack
         * and within that page at offset 'offset'.
         * Copy a maximum of 'room_left' bytes.
         */
-       to_copy = MIN(PAGE_SIZE - pg_offset, room_left);
+       to_copy = min_t(u32, PAGE_SIZE - pg_offset, room_left);
        while (to_copy > 0) {
                void *src;
                struct gnttab_map_grant_ref map_op;
@@ -451,7 +449,7 @@ static int packet_read_shmem(struct pack
                        /*
                         * User requests more than what's available
                         */
-                       to_copy = MIN(tx->size, to_copy);
+                       to_copy = min_t(u32, tx->size, to_copy);
                }
 
                DPRINTK("Copying from mapped memory at %08lx\n",
@@ -483,7 +481,7 @@ static int packet_read_shmem(struct pack
                last_read += to_copy;
                room_left -= to_copy;
 
-               to_copy = MIN(PAGE_SIZE, room_left);
+               to_copy = min_t(u32, PAGE_SIZE, room_left);
                i++;
        }                       /* while (to_copy > 0) */
        /*
@@ -545,7 +543,7 @@ static ssize_t vtpm_op_read(struct file 
 
                DPRINTK("size given by app: %d, available: %d\n", size, left);
 
-               ret_size = MIN(size, left);
+               ret_size = min_t(size_t, size, left);
 
                ret_size = packet_read(pak, ret_size, data, size, 1);
 
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile  Mon Jun 26 13:09:11 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile  Mon Jun 26 14:53:55 
2006 -0500
@@ -9,4 +9,4 @@ xenbus-objs += xenbus_comms.o
 xenbus-objs += xenbus_comms.o
 xenbus-objs += xenbus_xs.o
 xenbus-objs += xenbus_probe.o
-xenbus-objs += xenbus_dev.o
+obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Mon Jun 26 
13:09:11 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Mon Jun 26 
14:53:55 2006 -0500
@@ -926,6 +926,7 @@ void xenbus_probe(void *unused)
 }
 
 
+#ifdef CONFIG_PROC_FS
 static struct file_operations xsd_kva_fops;
 static struct proc_dir_entry *xsd_kva_intf;
 static struct proc_dir_entry *xsd_port_intf;
@@ -964,6 +965,7 @@ static int xsd_port_read(char *page, cha
        *eof = 1;
        return len;
 }
+#endif
 
 
 static int __init xenbus_probe_init(void)
@@ -1008,6 +1010,7 @@ static int __init xenbus_probe_init(void
                BUG_ON(err);
                xen_start_info->store_evtchn = alloc_unbound.port;
 
+#ifdef CONFIG_PROC_FS
                /* And finally publish the above info in /proc/xen */
                xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
                if (xsd_kva_intf) {
@@ -1020,6 +1023,7 @@ static int __init xenbus_probe_init(void
                xsd_port_intf = create_xen_proc_entry("xsd_port", 0400);
                if (xsd_port_intf)
                        xsd_port_intf->read_proc = xsd_port_read;
+#endif
        } else
                xenstored_ready = 1;
 
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Mon Jun 26 
13:09:11 2006 -0400
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Mon Jun 26 
14:53:55 2006 -0500
@@ -67,6 +67,10 @@
 
 extern unsigned long *phys_to_machine_mapping;
 
+#undef machine_to_phys_mapping
+extern unsigned long *machine_to_phys_mapping;
+extern unsigned int   machine_to_phys_order;
+
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
        if (xen_feature(XENFEAT_auto_translated_physmap))
@@ -84,24 +88,29 @@ static inline int phys_to_machine_mappin
 
 static inline unsigned long mfn_to_pfn(unsigned long mfn)
 {
+       extern unsigned long max_mapnr;
        unsigned long pfn;
 
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return mfn;
 
-       /*
-        * The array access can fail (e.g., device space beyond end of RAM).
-        * In such cases it doesn't matter what we return (we return garbage),
-        * but we must handle the fault without crashing!
-        */
+       if (unlikely((mfn >> machine_to_phys_order) != 0))
+               return max_mapnr;
+
+       /* The array access can fail (e.g., device space beyond end of RAM). */
        asm (
                "1:     movl %1,%0\n"
                "2:\n"
+               ".section .fixup,\"ax\"\n"
+               "3:     movl %2,%0\n"
+               "       jmp  2b\n"
+               ".previous\n"
                ".section __ex_table,\"a\"\n"
                "       .align 4\n"
-               "       .long 1b,2b\n"
+               "       .long 1b,3b\n"
                ".previous"
-               : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
+               : "=r" (pfn)
+               : "m" (machine_to_phys_mapping[mfn]), "m" (max_mapnr) );
 
        return pfn;
 }
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Mon Jun 
26 13:09:11 2006 -0400
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Mon Jun 
26 14:53:55 2006 -0500
@@ -7,6 +7,7 @@
  **/
 
 #include <xen/interface/callback.h>
+#include <xen/interface/memory.h>
 
 static char * __init machine_specific_memory_setup(void)
 {
@@ -44,9 +45,16 @@ extern void failsafe_callback(void);
 extern void failsafe_callback(void);
 extern void nmi(void);
 
+unsigned long *machine_to_phys_mapping;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned int machine_to_phys_order;
+EXPORT_SYMBOL(machine_to_phys_order);
+
 static void __init machine_specific_arch_setup(void)
 {
        int ret;
+       struct xen_machphys_mapping mapping;
+       unsigned long machine_to_phys_nr_ents;
        struct xen_platform_parameters pp;
        struct callback_register event = {
                .type = CALLBACKTYPE_event,
@@ -81,4 +89,13 @@ static void __init machine_specific_arch
        if (HYPERVISOR_xen_version(XENVER_platform_parameters,
                                   &pp) == 0)
                set_fixaddr_top(pp.virt_start - PAGE_SIZE);
+
+       machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
+       machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+       if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+               machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+               machine_to_phys_nr_ents = mapping.max_mfn + 1;
+       }
+       while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
+               machine_to_phys_order++;
 }
diff -r 049e669e6a8a -r b29806fb6ba0 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h       Mon Jun 
26 13:09:11 2006 -0400
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h       Mon Jun 
26 14:53:55 2006 -0500
@@ -85,6 +85,10 @@ void copy_page(void *, void *);
 
 extern unsigned long *phys_to_machine_mapping;
 
+#undef machine_to_phys_mapping
+extern unsigned long *machine_to_phys_mapping;
+extern unsigned int   machine_to_phys_order;
+
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
        if (xen_feature(XENFEAT_auto_translated_physmap))
@@ -107,19 +111,23 @@ static inline unsigned long mfn_to_pfn(u
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return mfn;
 
-       /*
-        * The array access can fail (e.g., device space beyond end of RAM).
-        * In such cases it doesn't matter what we return (we return garbage),
-        * but we must handle the fault without crashing!
-        */
+       if (unlikely((mfn >> machine_to_phys_order) != 0))
+               return end_pfn;
+
+       /* The array access can fail (e.g., device space beyond end of RAM). */
        asm (
                "1:     movq %1,%0\n"
                "2:\n"
+               ".section .fixup,\"ax\"\n"
+               "3:     movq %2,%0\n"
+               "       jmp  2b\n"
+               ".previous\n"
                ".section __ex_table,\"a\"\n"
                "       .align 8\n"
-               "       .quad 1b,2b\n"
+               "       .quad 1b,3b\n"
                ".previous"
-               : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
+               : "=r" (pfn)
+               : "m" (machine_to_phys_mapping[mfn]), "m" (end_pfn) );
 
        return pfn;
 }
diff -r 049e669e6a8a -r b29806fb6ba0 tools/examples/network-bridge
--- a/tools/examples/network-bridge     Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/examples/network-bridge     Mon Jun 26 14:53:55 2006 -0500
@@ -151,30 +151,6 @@ link_exists()
     fi
 }
 
-
-# Usage: create_bridge bridge
-create_bridge () {
-    local bridge=$1
-
-    # Don't create the bridge if it already exists.
-    if ! brctl show | grep -q ${bridge} ; then
-       brctl addbr ${bridge}
-       brctl stp ${bridge} off
-       brctl setfd ${bridge} 0
-    fi
-    ip link set ${bridge} up
-}
-
-# Usage: add_to_bridge bridge dev
-add_to_bridge () {
-    local bridge=$1
-    local dev=$2
-    # Don't add $dev to $bridge if it's already on a bridge.
-    if ! brctl show | grep -q ${dev} ; then
-       brctl addif ${bridge} ${dev}
-    fi
-}
-
 # Set the default forwarding policy for $dev to drop.
 # Allow forwarding to the bridge.
 antispoofing () {
@@ -238,14 +214,13 @@ using loopback.nloopbacks=<N> on the dom
        fi
        ip link set ${netdev} name ${pdev}
        ip link set ${vdev} name ${netdev}
-       ip link set ${pdev} down arp off
-       ip link set ${pdev} addr fe:ff:ff:ff:ff:ff
-       ip addr flush ${pdev}
+
+       setup_bridge_port ${pdev}
+       setup_bridge_port ${vif0}
        ip link set ${netdev} addr ${mac} arp on
-       add_to_bridge ${bridge} ${vif0}
+
        ip link set ${bridge} up
-       ip link set ${vif0} up
-       ip link set ${pdev} up
+       add_to_bridge  ${bridge} ${vif0}
        add_to_bridge2 ${bridge} ${pdev}
        do_ifup ${netdev}
     else
@@ -301,6 +276,7 @@ add_to_bridge2() {
     local maxtries=10
 
     echo -n "Waiting for ${dev} to negotiate link."
+    ip link set ${dev} up
     for i in `seq ${maxtries}` ; do
        if ifconfig ${dev} | grep -q RUNNING ; then
            break
diff -r 049e669e6a8a -r b29806fb6ba0 tools/examples/vif-bridge
--- a/tools/examples/vif-bridge Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/examples/vif-bridge Mon Jun 26 14:53:55 2006 -0500
@@ -48,16 +48,8 @@ fi
 
 case "$command" in
     online)
-        if brctl show | grep -q "$vif"
-        then
-          log debug "$vif already attached to a bridge"
-          exit 0
-        fi
-
-        brctl addif "$bridge" "$vif" ||
-          fatal "brctl addif $bridge $vif failed"
-
-        ifconfig "$vif" up || fatal "ifconfig $vif up failed"
+       setup_bridge_port "$vif"
+       add_to_bridge "$bridge" "$vif"
         ;;
 
     offline)
diff -r 049e669e6a8a -r b29806fb6ba0 tools/examples/vtpm
--- a/tools/examples/vtpm       Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/examples/vtpm       Mon Jun 26 14:53:55 2006 -0500
@@ -9,13 +9,7 @@ case "$command" in
   add)
     vtpm_create_instance
   ;;
-  online)
-    vtpm_create_instance
-  ;;
   remove)
-    vtpm_remove_instance
-  ;;
-  offline)
     vtpm_remove_instance
   ;;
 esac
diff -r 049e669e6a8a -r b29806fb6ba0 tools/examples/vtpm-common.sh
--- a/tools/examples/vtpm-common.sh     Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/examples/vtpm-common.sh     Mon Jun 26 14:53:55 2006 -0500
@@ -23,7 +23,7 @@ VTPMDB="/etc/xen/vtpm.db"
 VTPMDB="/etc/xen/vtpm.db"
 
 #In the vtpm-impl file some commands should be defined:
-#      vtpm_create, vtpm_setup, vtpm_reset, etc. (see below)
+#      vtpm_create, vtpm_setup, vtpm_start, etc. (see below)
 #This should be indicated by setting VTPM_IMPL_DEFINED.
 if [ -r "$dir/vtpm-impl" ]; then
        . "$dir/vtpm-impl"
@@ -36,7 +36,7 @@ if [ -z "$VTPM_IMPL_DEFINED" ]; then
        function vtpm_setup() {
                true
        }
-       function vtpm_reset() {
+       function vtpm_start() {
                true
        }
        function vtpm_suspend() {
@@ -256,22 +256,22 @@ function vtpm_create_instance () {
                else
                        instance=$(vtpmdb_get_free_instancenum)
                fi
-               if [ "$reason" == "create" ]; then
-                       vtpm_create $instance
-               else
-                       vtpm_resume $instance $domname
-               fi
+
+               vtpm_create $instance
+
                if [ $vtpm_fatal_error -eq 0 ]; then
                        vtpmdb_add_instance $domname $instance
                fi
+       else
+               if [ "$reason" == "resume" ]; then
+                       vtpm_resume $instance
+               else
+                       vtpm_start $instance
+               fi
        fi
 
        release_lock vtpmdb
 
-       if [ $vtpm_fatal_error -eq 0 -a \
-            "$reason" == "create" ]; then
-               vtpm_reset $instance
-       fi
        xenstore_write $XENBUS_PATH/instance $instance
 }
 
@@ -283,19 +283,17 @@ function vtpm_remove_instance () {
        local instance reason domname
        domname=$(xenstore_read "$XENBUS_PATH"/domain)
 
-       if [ "$doname" != "" ]; then
+       if [ "$domname" != "" ]; then
                claim_lock vtpmdb
 
                instance=$(vtpmdb_find_instance $domname)
 
                if [ "$instance" != "0" ]; then
-                       if [ "$reason" == "suspend" ]; then
-                               vtpm_suspend $instance
-                       fi
-               fi
-       fi
-
-       release_lock vtpmdb
+                       vtpm_suspend $instance
+               fi
+
+               release_lock vtpmdb
+       fi
 }
 
 
diff -r 049e669e6a8a -r b29806fb6ba0 tools/examples/xen-network-common.sh
--- a/tools/examples/xen-network-common.sh      Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/examples/xen-network-common.sh      Mon Jun 26 14:53:55 2006 -0500
@@ -104,3 +104,48 @@ find_dhcpd_init_file()
 {
   first_file -x /etc/init.d/{dhcp3-server,dhcp,dhcpd}
 }
+
+# configure interfaces which act as pure bridge ports:
+#  - make quiet: no arp, no multicast (ipv6 autoconf)
+#  - set mac address to fe:ff:ff:ff:ff:ff
+setup_bridge_port() {
+    local dev="$1"
+
+    # take interface down ...
+    ip link set ${dev} down
+
+    # ... and configure it
+    ip link set ${dev} arp off
+    ip link set ${dev} multicast off
+    ip link set ${dev} addr fe:ff:ff:ff:ff:ff
+    ip addr flush ${dev}
+}
+
+# Usage: create_bridge bridge
+create_bridge () {
+    local bridge=$1
+
+    # Don't create the bridge if it already exists.
+    if [ ! -e "/sys/class/net/${bridge}/bridge" ]; then
+       brctl addbr ${bridge}
+       brctl stp ${bridge} off
+       brctl setfd ${bridge} 0
+        ip link set ${bridge} arp off
+        ip link set ${bridge} multicast off
+    fi
+    ip link set ${bridge} up
+}
+
+# Usage: add_to_bridge bridge dev
+add_to_bridge () {
+    local bridge=$1
+    local dev=$2
+
+    # Don't add $dev to $bridge if it's already on a bridge.
+    if [ -e "/sys/class/net/${bridge}/brif/${dev}" ]; then
+       return
+    fi
+    brctl addif ${bridge} ${dev}
+    ip link set ${dev} up
+}
+
diff -r 049e669e6a8a -r b29806fb6ba0 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm      Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/examples/xmexample.hvm      Mon Jun 26 14:53:55 2006 -0500
@@ -164,3 +164,15 @@ ne2000=0
 #-----------------------------------------------------------------------------
 #    start in full screen
 #full-screen=1   
+
+
+#-----------------------------------------------------------------------------
+#   Enable USB support (specific devices specified at runtime through the
+#                      monitor window)
+#usb=1
+
+#   Enable USB mouse support (only enable one of the following, `mouse' for
+#                            PS/2 protocol relative mouse, `tablet' for
+#                            absolute mouse)
+#usbdevice='mouse'
+#usbdevice='tablet'
diff -r 049e669e6a8a -r b29806fb6ba0 tools/firmware/acpi/Makefile
--- a/tools/firmware/acpi/Makefile      Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/firmware/acpi/Makefile      Mon Jun 26 14:53:55 2006 -0500
@@ -33,17 +33,16 @@ IASL_URL=http://developer.intel.com/tech
 
IASL_URL=http://developer.intel.com/technology/iapc/acpi/downloads/$(IASL_VER).tar.gz
 
 vpath iasl $(PATH)
-.PHONY: all
 all:$(ACPI_BIN)
 
 acpi_dsdt.c:acpi_dsdt.asl
        $(MAKE) iasl
-       iasl -oa -tc acpi_dsdt.asl
+       iasl  -tc acpi_dsdt.asl
        mv acpi_dsdt.hex acpi_dsdt.c
        echo "int DsdtLen=sizeof(AmlCode);" >> acpi_dsdt.c
        rm *.aml
+#        iasl -oa -tc acpi_dsdt.asl
 
-.PHONY: iasl
 iasl:
        @echo
        @echo "ACPI ASL compiler(iasl) is needed"
@@ -62,10 +61,8 @@ iasl:
 $(ACPI_BIN):$(ACPI_GEN)
        ./$(ACPI_GEN) $(ACPI_BIN)
 
-.PHONY: clean
 clean:
        rm -rf *.o $(ACPI_GEN) $(ACPI_BIN) $(IASL_VER) 
        rm -rf  $(IASL_VER).tar.gz
 
-.PHONY: install
 install: all
diff -r 049e669e6a8a -r b29806fb6ba0 tools/firmware/acpi/acpi2_0.h
--- a/tools/firmware/acpi/acpi2_0.h     Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/firmware/acpi/acpi2_0.h     Mon Jun 26 14:53:55 2006 -0500
@@ -323,7 +323,7 @@ typedef struct {
 // The physical that acpi table reside in the guest BIOS
 //#define ACPI_PHYSICAL_ADDRESS 0xE2000
 #define ACPI_PHYSICAL_ADDRESS 0xEA000
-#define ACPI_TABLE_SIZE (2*1024)  //Currently 2K is enough
+#define ACPI_TABLE_SIZE (4*1024)  //Currently 4K is enough
 
 void
 AcpiBuildTable(uint8_t* buf);
diff -r 049e669e6a8a -r b29806fb6ba0 tools/firmware/acpi/acpi_dsdt.asl
--- a/tools/firmware/acpi/acpi_dsdt.asl Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/firmware/acpi/acpi_dsdt.asl Mon Jun 26 14:53:55 2006 -0500
@@ -20,7 +20,7 @@
 //**
 //**
 
-DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL ", "XEN     ", 2)
+DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL","int-xen", 2006)
 {
     Name (\PMBS, 0x0C00)
     Name (\PMLN, 0x08)
@@ -29,24 +29,33 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, 
     Name (\APCB, 0xFEC00000)
     Name (\APCL, 0x00010000)
     Name (\PUID, 0x00)
+
     Scope (\_PR)
     {
         Processor (CPU0, 0x00, 0x00000000, 0x00) {}
         Processor (CPU1, 0x01, 0x00000000, 0x00) {}
         Processor (CPU2, 0x02, 0x00000000, 0x00) {}
         Processor (CPU3, 0x03, 0x00000000, 0x00) {}
+
     }
 
 /* Poweroff support - ties in with qemu emulation */
 
     Name (\_S5, Package (0x04)
     {
-        0x07, 
-        0x07, 
-        0x00, 
+        0x07,
+        0x07,
+        0x00,
         0x00
     })
 
+
+       Name(PICD, 0)   
+
+       Method(_PIC, 1) { 
+ 
+               Store(Arg0, PICD) 
+       }
     Scope (\_SB)
     {
         Device (PCI0)
@@ -55,9 +64,20 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, 
             Name (_UID, 0x00)
             Name (_ADR, 0x00)
             Name (_BBN, 0x00)
+            OperationRegion (PIRP, PCI_Config, 0x3c, 0x10)
+           Field(PIRP, ByteAcc, NoLock, Preserve){        
+          IRQ3,3,
+          IRQ5,5,
+          IRQ7,7,
+          IRQ9,9,
+          IRQA,10,
+          IRQB,11
+         }
+ 
             Method (_CRS, 0, NotSerialized)
             {
-                Name (PRT0, ResourceTemplate ()
+          
+               Name (PRT0, ResourceTemplate ()
                 {
                                        /* bus number is from 0 - 255*/
                     WordBusNumber (ResourceConsumer, MinFixed, MaxFixed, 
SubDecode,
@@ -79,75 +99,270 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, 
                         0x0FFF,
                         0x0000,
                         0x0300)
+
+                 /* reserve what device model consumed for IDE and acpi pci 
device            */
+                     WordIO (ResourceConsumer, MinFixed, MaxFixed, PosDecode, 
EntireRange,
+                        0x0000,
+                        0xc000,
+                        0xc01f,
+                        0x0000,
+                        0x0020)
+                 /* reserve what device model consumed for Ethernet controller 
pci device        */
+                     WordIO (ResourceConsumer, MinFixed, MaxFixed, PosDecode, 
EntireRange,
+                        0x0000,
+                        0xc020,
+                        0xc03f,
+                        0x0000,
+                        0x0010)
+
                     DWordMemory (ResourceProducer, PosDecode, MinFixed, 
MaxFixed, Cacheable, ReadOnly,
                         0x00000000,
-                        0x000A0000,
+                        0x000c0000,
                         0x000FFFFF,
                         0x00000000,
-                        0x00060000)
+                        0x00030000)
+
+                 /* reserve what device model consumed for PCI VGA device      
  */
+
+                    DWordMemory (ResourceConsumer, PosDecode, MinFixed, 
MaxFixed, Cacheable, ReadWrite,
+                        0x00000000,
+                        0xF0000000,
+                        0xF1FFFFFF,
+                        0x00000000,
+                        0x02000000)
+                    DWordMemory (ResourceConsumer, PosDecode, MinFixed, 
MaxFixed, Cacheable, ReadWrite,
+                        0x00000000,
+                        0xF2000000,
+                        0xF2000FFF,
+                        0x00000000,
+                        0x00001000)
+                 /* reserve what device model consumed for Ethernet controller 
pci device        */
+                      DWordMemory (ResourceConsumer, PosDecode, MinFixed, 
MaxFixed, Cacheable, ReadWrite,
+                        0x00000000,
+                        0xF2001000,
+                        0xF200101F,
+                        0x00000000,
+                        0x00000020) 
                 })
                 Return (PRT0)
             }
-
-            Name (AIR0, Package (0x06)
-            {
-               Package (0x04)
-                {
-                    0x001FFFFF, 
-                    0x02, 
-                    0x00, 
-                    0x17
-                }, 
-
-                Package (0x04)
-                {
-                    0x001FFFFF, 
-                    0x03, 
-                    0x00, 
-                    0x13
-                }, 
-
-                Package (0x04)
-                {
-                    0x001DFFFF, 
-                    0x01, 
-                    0x00, 
-                    0x13
-                }, 
-
-                Package (0x04)
-                {
-                    0x001DFFFF, 
-                    0x00, 
-                    0x00, 
-                    0x10
-                }, 
-
-                Package (0x04)
-                {
-                    0x001DFFFF, 
-                    0x02, 
-                    0x00, 
-                    0x12
-                }, 
-
-                Package (0x04)
-                {
-                    0x001DFFFF, 
-                    0x03, 
-                    0x00, 
-                    0x17
-                }
-            })
-            Method (_PRT, 0, NotSerialized)
-            {
-                Return (AIR0)
-            }
-
+       Name(BUFA, ResourceTemplate() {
+                IRQ(Level, ActiveLow, Shared) {
+                        3,4,5,6,7,10,11,12,14,15}               
+                }) 
+
+                Name(BUFB, Buffer(){
+                0x23, 0x00, 0x00, 0x18,
+                0x79, 0})
+
+                CreateWordField(BUFB, 0x01, IRQV)
+               
+                Name(BUFC, Buffer(){
+                5, 7, 10, 11
+                 })
+                
+                CreateByteField(BUFC, 0x01, PIQA)
+                CreateByteField(BUFC, 0x01, PIQB)
+                CreateByteField(BUFC, 0x01, PIQC)
+                CreateByteField(BUFC, 0x01, PIQD)
+                               
+               Device(LNKA)    {
+                Name(_HID, EISAID("PNP0C0F"))  // PCI interrupt link
+                Name(_UID, 1)
+                Method(_STA, 0) {
+                               And(PIRA, 0x80, Local0)
+                        If(LEqual(Local0, 0x80)) {
+                                Return(0x09)   
+                                }
+                        Else {
+                                Return(0x0B)   
+                                }
+                        }
+
+                Method(_PRS) {
+
+                        Return(BUFA)
+                } // Method(_PRS)
+
+                Method(_DIS) {
+                               Or(PIRA, 0x80, PIRA)
+                }
+
+                Method(_CRS) {
+                        And(PIRB, 0x0f, Local0)                 
+                        ShiftLeft(0x1, Local0, IRQV)    
+                        Return(BUFB)                    
+                } 
+
+                Method(_SRS, 1) {
+                                CreateWordField(ARG0, 0x01, IRQ1)       
+                        FindSetRightBit(IRQ1, Local0)           
+                        Decrement(Local0)                       
+                        Store(Local0, PIRA)                     
+                 } // Method(_SRS)
+        }
+
+        Device(LNKB)   {
+                Name(_HID, EISAID("PNP0C0F"))   
+                Name(_UID, 2)
+                Method(_STA, 0) {
+                               And(PIRB, 0x80, Local0)
+                        If(LEqual(Local0, 0x80)) {
+                                Return(0x09)    
+                                }
+                        Else {
+                                Return(0x0B)    
+                                }
+                        }
+
+                Method(_PRS) {
+                                Return(BUFA)                    
+                } // Method(_PRS)
+
+                Method(_DIS) {
+
+                               Or(PIRB, 0x80, PIRB)
+                }
+
+                Method(_CRS) {
+                        And(PIRB, 0x0f, Local0)                 
+                        ShiftLeft(0x1, Local0, IRQV)    
+                        Return(BUFB)                    
+                } // Method(_CRS)
+
+                Method(_SRS, 1) {
+                                CreateWordField(ARG0, 0x01, IRQ1)       
+                        FindSetRightBit(IRQ1, Local0)           
+                        Decrement(Local0)                       
+                        Store(Local0, PIRB)                     
+                 } // Method(_SRS)
+        }
+
+        Device(LNKC)   {
+                Name(_HID, EISAID("PNP0C0F"))  // PCI interrupt link
+                Name(_UID, 3)
+                Method(_STA, 0) {
+                               And(PIRC, 0x80, Local0)
+                        If(LEqual(Local0, 0x80)) {
+                                Return(0x09)    
+                        }
+                        Else {
+                                Return(0x0B)    
+                        }
+                }
+
+                Method(_PRS) {                          
+                        Return(BUFA)                    
+                } // Method(_PRS)                       
+
+                Method(_DIS) {
+
+                               Or(PIRC, 0x80, PIRC)
+                }
+
+                Method(_CRS) {
+                        And(PIRC, 0x0f, Local0)                 
+                        ShiftLeft(0x1, Local0, IRQV)    
+                        Return(BUFB)                    
+                } // Method(_CRS)
+
+                Method(_SRS, 1) {
+                                CreateWordField(ARG0, 0x01, IRQ1)       
+                        FindSetRightBit(IRQ1, Local0)           
+                        Decrement(Local0)                       
+                        Store(Local0, PIRC)                     
+                 } // Method(_SRS)
+        }
+
+        Device(LNKD)   {
+                Name(_HID, EISAID("PNP0C0F"))   
+                Name(_UID, 4)
+                Method(_STA, 0) {
+                               And(PIRD, 0x80, Local0)
+                        If(LEqual(Local0, 0x80)) {
+                                Return(0x09)    
+                        }
+                        Else {
+                                Return(0x0B)    
+                        }
+                }
+
+                Method(_PRS) {                          
+                        Return(BUFA)                    
+                } // Method(_PRS)                       
+
+                Method(_DIS) {
+                               Or(PIRD, 0x80, PIRD)
+                }
+
+                Method(_CRS) {
+                        And(PIRD, 0x0f, Local0)                 
+                        ShiftLeft(0x1, Local0, IRQV)    
+                        Return(BUFB)                    
+                } // Method(_CRS)
+
+                Method(_SRS, 1) {
+                                CreateWordField(ARG0, 0x01, IRQ1)       
+                        FindSetRightBit(IRQ1, Local0)           
+                        Decrement(Local0)                       
+                        Store(Local0, PIRD)                     
+                 } // Method(_SRS)
+        }
+        Method(_PRT,0) {
+                       If(PICD) {Return(PRTA)}  
+                       Return (PRTP)  
+               } // end _PRT
+               
+               
+        Name(PRTP, Package(){
+                        Package(){0x0000ffff, 0, \_SB.PCI0.LNKA, 0},   // Slot 
1, INTA
+                        Package(){0x0000ffff, 1, \_SB.PCI0.LNKB, 0},   // Slot 
1, INTB
+                        Package(){0x0000ffff, 2, \_SB.PCI0.LNKC, 0},   // Slot 
1, INTC
+                        Package(){0x0000ffff, 3, \_SB.PCI0.LNKD, 0},   // Slot 
1, INTD
+
+                        Package(){0x0001ffff, 0, \_SB.PCI0.LNKB, 0},   // Slot 
2, INTB
+                        Package(){0x0001ffff, 1, \_SB.PCI0.LNKC, 0},   // Slot 
2, INTC
+                        Package(){0x0001ffff, 2, \_SB.PCI0.LNKD, 0},   // Slot 
2, INTD
+                        Package(){0x0001ffff, 3, \_SB.PCI0.LNKA, 0},   // Slot 
2, INTA
+                        
+                        Package(){0x0002ffff, 0, \_SB.PCI0.LNKC, 0},   // Slot 
3, INTC
+                        Package(){0x0002ffff, 1, \_SB.PCI0.LNKD, 0},   // Slot 
3, INTD
+                        Package(){0x0002ffff, 2, \_SB.PCI0.LNKA, 0},   // Slot 
3, INTA
+                        Package(){0x0002ffff, 3, \_SB.PCI0.LNKB, 0},   // Slot 
3, INTB
+                        
+                        Package(){0x0003ffff, 0, \_SB.PCI0.LNKD, 0},   // Slot 
2, INTD
+                        Package(){0x0003ffff, 1, \_SB.PCI0.LNKA, 0},   // Slot 
2, INTA
+                        Package(){0x0003ffff, 2, \_SB.PCI0.LNKB, 0},   // Slot 
2, INTB
+                        Package(){0x0003ffff, 3, \_SB.PCI0.LNKC, 0},   // Slot 
2, INTC
+                        
+                        }
+            )
+       Name(PRTA, Package(){
+                        Package(){0x0001ffff, 0, 0, 5},        // Device 1, 
INTA
+
+                        Package(){0x0002ffff, 0, 0, 7},        // Device 2, 
INTA
+                       
+                        Package(){0x0003ffff, 0, 0, 10},       // Device 3, 
INTA
+
+                        Package(){0x0003ffff, 0, 0, 11},       // Device 4, 
INTA
+                                   
+                        
+                        }
+            )
+            
             Device (ISA)
             {
-                Name (_ADR, 0x00010000) /*TODO, device id, PCI bus num, ...*/
-
+                Name (_ADR, 0x00000000) /* device id, PCI bus num, ... */
+ 
+               OperationRegion(PIRQ, PCI_Config, 0x60, 0x4)
+                        Scope(\) {
+                                Field (\_SB.PCI0.ISA.PIRQ, ByteAcc, NoLock, 
Preserve) {
+                                        PIRA, 8,
+                                        PIRB, 8,
+                                        PIRC, 8,
+                                        PIRD, 8
+                                        }
+                                }
                 Device (SYSR)
                 {
                     Name (_HID, EisaId ("PNP0C02"))
diff -r 049e669e6a8a -r b29806fb6ba0 tools/firmware/acpi/acpi_dsdt.c
--- a/tools/firmware/acpi/acpi_dsdt.c   Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/firmware/acpi/acpi_dsdt.c   Mon Jun 26 14:53:55 2006 -0500
@@ -1,22 +1,22 @@
 /*
  * 
  * Intel ACPI Component Architecture
- * ASL Optimizing Compiler / AML Disassembler version 20050624 [Aug 24 2005]
+ * ASL Optimizing Compiler / AML Disassembler version 20050513 [Jun  8 2005]
  * Copyright (C) 2000 - 2005 Intel Corporation
  * Supports ACPI Specification Revision 3.0
  * 
- * Compilation of "acpi_dsdt.asl" - Thu May  4 17:42:00 2006
+ * Compilation of "acpi_dsdt.asl" - Mon Jun 12 22:33:41 2006
  * 
  * C source code output
  *
  */
 unsigned char AmlCode[] = 
 {
-    0x44,0x53,0x44,0x54,0x7C,0x04,0x00,0x00,  /* 00000000    "DSDT|..." */
-    0x01,0x72,0x49,0x4E,0x54,0x45,0x4C,0x20,  /* 00000008    ".rINTEL " */
-    0x58,0x45,0x4E,0x20,0x20,0x20,0x20,0x20,  /* 00000010    "XEN     " */
-    0x02,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C,  /* 00000018    "....INTL" */
-    0x24,0x06,0x05,0x20,0x08,0x50,0x4D,0x42,  /* 00000020    "$.. .PMB" */
+    0x44,0x53,0x44,0x54,0xC3,0x08,0x00,0x00,  /* 00000000    "DSDT...." */
+    0x01,0x0C,0x49,0x4E,0x54,0x45,0x4C,0x00,  /* 00000008    "..INTEL." */
+    0x69,0x6E,0x74,0x2D,0x78,0x65,0x6E,0x00,  /* 00000010    "int-xen." */
+    0xD6,0x07,0x00,0x00,0x49,0x4E,0x54,0x4C,  /* 00000018    "....INTL" */
+    0x13,0x05,0x05,0x20,0x08,0x50,0x4D,0x42,  /* 00000020    "... .PMB" */
     0x53,0x0B,0x00,0x0C,0x08,0x50,0x4D,0x4C,  /* 00000028    "S....PML" */
     0x4E,0x0A,0x08,0x08,0x49,0x4F,0x42,0x31,  /* 00000030    "N...IOB1" */
     0x00,0x08,0x49,0x4F,0x4C,0x31,0x00,0x08,  /* 00000038    "..IOL1.." */
@@ -32,129 +32,266 @@ unsigned char AmlCode[] =
     0x0B,0x43,0x50,0x55,0x33,0x03,0x00,0x00,  /* 00000088    ".CPU3..." */
     0x00,0x00,0x00,0x08,0x5F,0x53,0x35,0x5F,  /* 00000090    "...._S5_" */
     0x12,0x08,0x04,0x0A,0x07,0x0A,0x07,0x00,  /* 00000098    "........" */
-    0x00,0x10,0x4A,0x3D,0x5F,0x53,0x42,0x5F,  /* 000000A0    "..J=_SB_" */
-    0x5B,0x82,0x42,0x3D,0x50,0x43,0x49,0x30,  /* 000000A8    "[.B=PCI0" */
-    0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,  /* 000000B0    "._HID.A." */
-    0x0A,0x03,0x08,0x5F,0x55,0x49,0x44,0x00,  /* 000000B8    "..._UID." */
-    0x08,0x5F,0x41,0x44,0x52,0x00,0x08,0x5F,  /* 000000C0    "._ADR.._" */
-    0x42,0x42,0x4E,0x00,0x14,0x4A,0x06,0x5F,  /* 000000C8    "BBN..J._" */
-    0x43,0x52,0x53,0x00,0x08,0x50,0x52,0x54,  /* 000000D0    "CRS..PRT" */
-    0x30,0x11,0x48,0x05,0x0A,0x54,0x88,0x0D,  /* 000000D8    "0.H..T.." */
-    0x00,0x02,0x0F,0x00,0x00,0x00,0x00,0x00,  /* 000000E0    "........" */
-    0xFF,0x00,0x00,0x00,0x00,0x01,0x47,0x01,  /* 000000E8    "......G." */
-    0xF8,0x0C,0xF8,0x0C,0x01,0x08,0x88,0x0D,  /* 000000F0    "........" */
-    0x00,0x01,0x0C,0x03,0x00,0x00,0x00,0x00,  /* 000000F8    "........" */
-    0xF7,0x0C,0x00,0x00,0xF8,0x0C,0x88,0x0D,  /* 00000100    "........" */
-    0x00,0x01,0x0C,0x03,0x00,0x00,0x00,0x0D,  /* 00000108    "........" */
-    0xFF,0x0F,0x00,0x00,0x00,0x03,0x87,0x17,  /* 00000110    "........" */
-    0x00,0x00,0x0C,0x02,0x00,0x00,0x00,0x00,  /* 00000118    "........" */
-    0x00,0x00,0x0A,0x00,0xFF,0xFF,0x0F,0x00,  /* 00000120    "........" */
-    0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,  /* 00000128    "........" */
-    0x79,0x00,0xA4,0x50,0x52,0x54,0x30,0x08,  /* 00000130    "y..PRT0." */
-    0x41,0x49,0x52,0x30,0x12,0x4F,0x04,0x06,  /* 00000138    "AIR0.O.." */
-    0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x1F,0x00,  /* 00000140    "........" */
-    0x0A,0x02,0x00,0x0A,0x17,0x12,0x0C,0x04,  /* 00000148    "........" */
-    0x0C,0xFF,0xFF,0x1F,0x00,0x0A,0x03,0x00,  /* 00000150    "........" */
-    0x0A,0x13,0x12,0x0B,0x04,0x0C,0xFF,0xFF,  /* 00000158    "........" */
-    0x1D,0x00,0x01,0x00,0x0A,0x13,0x12,0x0B,  /* 00000160    "........" */
-    0x04,0x0C,0xFF,0xFF,0x1D,0x00,0x00,0x00,  /* 00000168    "........" */
-    0x0A,0x10,0x12,0x0C,0x04,0x0C,0xFF,0xFF,  /* 00000170    "........" */
-    0x1D,0x00,0x0A,0x02,0x00,0x0A,0x12,0x12,  /* 00000178    "........" */
-    0x0C,0x04,0x0C,0xFF,0xFF,0x1D,0x00,0x0A,  /* 00000180    "........" */
-    0x03,0x00,0x0A,0x17,0x14,0x0B,0x5F,0x50,  /* 00000188    "......_P" */
-    0x52,0x54,0x00,0xA4,0x41,0x49,0x52,0x30,  /* 00000190    "RT..AIR0" */
-    0x5B,0x82,0x42,0x2E,0x49,0x53,0x41,0x5F,  /* 00000198    "[.B.ISA_" */
-    0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00,  /* 000001A0    "._ADR..." */
-    0x01,0x00,0x5B,0x82,0x46,0x0B,0x53,0x59,  /* 000001A8    "..[.F.SY" */
-    0x53,0x52,0x08,0x5F,0x48,0x49,0x44,0x0C,  /* 000001B0    "SR._HID." */
-    0x41,0xD0,0x0C,0x02,0x08,0x5F,0x55,0x49,  /* 000001B8    "A...._UI" */
-    0x44,0x01,0x08,0x43,0x52,0x53,0x5F,0x11,  /* 000001C0    "D..CRS_." */
-    0x4E,0x08,0x0A,0x8A,0x47,0x01,0x10,0x00,  /* 000001C8    "N...G..." */
-    0x10,0x00,0x00,0x10,0x47,0x01,0x22,0x00,  /* 000001D0    "....G."." */
-    0x22,0x00,0x00,0x0C,0x47,0x01,0x30,0x00,  /* 000001D8    ""...G.0." */
-    0x30,0x00,0x00,0x10,0x47,0x01,0x44,0x00,  /* 000001E0    "0...G.D." */
-    0x44,0x00,0x00,0x1C,0x47,0x01,0x62,0x00,  /* 000001E8    "D...G.b." */
-    0x62,0x00,0x00,0x02,0x47,0x01,0x65,0x00,  /* 000001F0    "b...G.e." */
-    0x65,0x00,0x00,0x0B,0x47,0x01,0x72,0x00,  /* 000001F8    "e...G.r." */
-    0x72,0x00,0x00,0x0E,0x47,0x01,0x80,0x00,  /* 00000200    "r...G..." */
-    0x80,0x00,0x00,0x01,0x47,0x01,0x84,0x00,  /* 00000208    "....G..." */
-    0x84,0x00,0x00,0x03,0x47,0x01,0x88,0x00,  /* 00000210    "....G..." */
-    0x88,0x00,0x00,0x01,0x47,0x01,0x8C,0x00,  /* 00000218    "....G..." */
-    0x8C,0x00,0x00,0x03,0x47,0x01,0x90,0x00,  /* 00000220    "....G..." */
-    0x90,0x00,0x00,0x10,0x47,0x01,0xA2,0x00,  /* 00000228    "....G..." */
-    0xA2,0x00,0x00,0x1C,0x47,0x01,0xE0,0x00,  /* 00000230    "....G..." */
-    0xE0,0x00,0x00,0x10,0x47,0x01,0xA0,0x08,  /* 00000238    "....G..." */
-    0xA0,0x08,0x00,0x04,0x47,0x01,0xC0,0x0C,  /* 00000240    "....G..." */
-    0xC0,0x0C,0x00,0x10,0x47,0x01,0xD0,0x04,  /* 00000248    "....G..." */
-    0xD0,0x04,0x00,0x02,0x79,0x00,0x14,0x0B,  /* 00000250    "....y..." */
-    0x5F,0x43,0x52,0x53,0x00,0xA4,0x43,0x52,  /* 00000258    "_CRS..CR" */
-    0x53,0x5F,0x5B,0x82,0x2B,0x50,0x49,0x43,  /* 00000260    "S_[.+PIC" */
-    0x5F,0x08,0x5F,0x48,0x49,0x44,0x0B,0x41,  /* 00000268    "_._HID.A" */
-    0xD0,0x08,0x5F,0x43,0x52,0x53,0x11,0x18,  /* 00000270    ".._CRS.." */
-    0x0A,0x15,0x47,0x01,0x20,0x00,0x20,0x00,  /* 00000278    "..G. . ." */
-    0x01,0x02,0x47,0x01,0xA0,0x00,0xA0,0x00,  /* 00000280    "..G....." */
-    0x01,0x02,0x22,0x04,0x00,0x79,0x00,0x5B,  /* 00000288    ".."..y.[" */
-    0x82,0x47,0x05,0x44,0x4D,0x41,0x30,0x08,  /* 00000290    ".G.DMA0." */
-    0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x02,  /* 00000298    "_HID.A.." */
-    0x00,0x08,0x5F,0x43,0x52,0x53,0x11,0x41,  /* 000002A0    ".._CRS.A" */
-    0x04,0x0A,0x3D,0x2A,0x10,0x04,0x47,0x01,  /* 000002A8    "..=*..G." */
-    0x00,0x00,0x00,0x00,0x00,0x10,0x47,0x01,  /* 000002B0    "......G." */
-    0x81,0x00,0x81,0x00,0x00,0x03,0x47,0x01,  /* 000002B8    "......G." */
-    0x87,0x00,0x87,0x00,0x00,0x01,0x47,0x01,  /* 000002C0    "......G." */
-    0x89,0x00,0x89,0x00,0x00,0x03,0x47,0x01,  /* 000002C8    "......G." */
-    0x8F,0x00,0x8F,0x00,0x00,0x01,0x47,0x01,  /* 000002D0    "......G." */
-    0xC0,0x00,0xC0,0x00,0x00,0x20,0x47,0x01,  /* 000002D8    "..... G." */
-    0x80,0x04,0x80,0x04,0x00,0x10,0x79,0x00,  /* 000002E0    "......y." */
-    0x5B,0x82,0x25,0x54,0x4D,0x52,0x5F,0x08,  /* 000002E8    "[.%TMR_." */
-    0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x01,  /* 000002F0    "_HID.A.." */
-    0x00,0x08,0x5F,0x43,0x52,0x53,0x11,0x10,  /* 000002F8    ".._CRS.." */
-    0x0A,0x0D,0x47,0x01,0x40,0x00,0x40,0x00,  /* 00000300    "..G.@.@." */
-    0x00,0x04,0x22,0x01,0x00,0x79,0x00,0x5B,  /* 00000308    ".."..y.[" */
-    0x82,0x25,0x52,0x54,0x43,0x5F,0x08,0x5F,  /* 00000310    ".%RTC_._" */
-    0x48,0x49,0x44,0x0C,0x41,0xD0,0x0B,0x00,  /* 00000318    "HID.A..." */
-    0x08,0x5F,0x43,0x52,0x53,0x11,0x10,0x0A,  /* 00000320    "._CRS..." */
-    0x0D,0x47,0x01,0x70,0x00,0x70,0x00,0x00,  /* 00000328    ".G.p.p.." */
-    0x02,0x22,0x00,0x01,0x79,0x00,0x5B,0x82,  /* 00000330    "."..y.[." */
-    0x22,0x53,0x50,0x4B,0x52,0x08,0x5F,0x48,  /* 00000338    ""SPKR._H" */
-    0x49,0x44,0x0C,0x41,0xD0,0x08,0x00,0x08,  /* 00000340    "ID.A...." */
-    0x5F,0x43,0x52,0x53,0x11,0x0D,0x0A,0x0A,  /* 00000348    "_CRS...." */
-    0x47,0x01,0x61,0x00,0x61,0x00,0x00,0x01,  /* 00000350    "G.a.a..." */
-    0x79,0x00,0x5B,0x82,0x31,0x50,0x53,0x32,  /* 00000358    "y.[.1PS2" */
-    0x4D,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,  /* 00000360    "M._HID.A" */
-    0xD0,0x0F,0x13,0x08,0x5F,0x43,0x49,0x44,  /* 00000368    "...._CID" */
-    0x0C,0x41,0xD0,0x0F,0x13,0x14,0x09,0x5F,  /* 00000370    ".A....._" */
-    0x53,0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,  /* 00000378    "STA....." */
-    0x5F,0x43,0x52,0x53,0x11,0x08,0x0A,0x05,  /* 00000380    "_CRS...." */
-    0x22,0x00,0x10,0x79,0x00,0x5B,0x82,0x42,  /* 00000388    ""..y.[.B" */
-    0x04,0x50,0x53,0x32,0x4B,0x08,0x5F,0x48,  /* 00000390    ".PS2K._H" */
-    0x49,0x44,0x0C,0x41,0xD0,0x03,0x03,0x08,  /* 00000398    "ID.A...." */
-    0x5F,0x43,0x49,0x44,0x0C,0x41,0xD0,0x03,  /* 000003A0    "_CID.A.." */
-    0x0B,0x14,0x09,0x5F,0x53,0x54,0x41,0x00,  /* 000003A8    "..._STA." */
-    0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53,  /* 000003B0    "...._CRS" */
-    0x11,0x18,0x0A,0x15,0x47,0x01,0x60,0x00,  /* 000003B8    "....G.`." */
-    0x60,0x00,0x00,0x01,0x47,0x01,0x64,0x00,  /* 000003C0    "`...G.d." */
-    0x64,0x00,0x00,0x01,0x22,0x02,0x00,0x79,  /* 000003C8    "d..."..y" */
-    0x00,0x5B,0x82,0x3A,0x46,0x44,0x43,0x30,  /* 000003D0    ".[.:FDC0" */
-    0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,  /* 000003D8    "._HID.A." */
-    0x07,0x00,0x14,0x09,0x5F,0x53,0x54,0x41,  /* 000003E0    "...._STA" */
-    0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,  /* 000003E8    "....._CR" */
-    0x53,0x11,0x1B,0x0A,0x18,0x47,0x01,0xF0,  /* 000003F0    "S....G.." */
-    0x03,0xF0,0x03,0x01,0x06,0x47,0x01,0xF7,  /* 000003F8    ".....G.." */
-    0x03,0xF7,0x03,0x01,0x01,0x22,0x40,0x00,  /* 00000400    "....."@." */
-    0x2A,0x04,0x00,0x79,0x00,0x5B,0x82,0x35,  /* 00000408    "*..y.[.5" */
-    0x55,0x41,0x52,0x31,0x08,0x5F,0x48,0x49,  /* 00000410    "UAR1._HI" */
-    0x44,0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,  /* 00000418    "D.A...._" */
-    0x55,0x49,0x44,0x01,0x14,0x09,0x5F,0x53,  /* 00000420    "UID..._S" */
-    0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,  /* 00000428    "TA....._" */
-    0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,  /* 00000430    "CRS....G" */
-    0x01,0xF8,0x03,0xF8,0x03,0x01,0x08,0x22,  /* 00000438    "......."" */
-    0x10,0x00,0x79,0x00,0x5B,0x82,0x36,0x55,  /* 00000440    "..y.[.6U" */
-    0x41,0x52,0x32,0x08,0x5F,0x48,0x49,0x44,  /* 00000448    "AR2._HID" */
-    0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,0x55,  /* 00000450    ".A...._U" */
-    0x49,0x44,0x0A,0x02,0x14,0x09,0x5F,0x53,  /* 00000458    "ID...._S" */
-    0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,  /* 00000460    "TA....._" */
-    0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,  /* 00000468    "CRS....G" */
-    0x01,0xF8,0x02,0xF8,0x02,0x01,0x08,0x22,  /* 00000470    "......."" */
-    0x08,0x00,0x79,0x00,
+    0x00,0x08,0x50,0x49,0x43,0x44,0x00,0x14,  /* 000000A0    "..PICD.." */
+    0x0C,0x5F,0x50,0x49,0x43,0x01,0x70,0x68,  /* 000000A8    "._PIC.ph" */
+    0x50,0x49,0x43,0x44,0x10,0x4E,0x80,0x5F,  /* 000000B0    "PICD.N._" */
+    0x53,0x42,0x5F,0x5B,0x82,0x46,0x80,0x50,  /* 000000B8    "SB_[.F.P" */
+    0x43,0x49,0x30,0x08,0x5F,0x48,0x49,0x44,  /* 000000C0    "CI0._HID" */
+    0x0C,0x41,0xD0,0x0A,0x03,0x08,0x5F,0x55,  /* 000000C8    ".A...._U" */
+    0x49,0x44,0x00,0x08,0x5F,0x41,0x44,0x52,  /* 000000D0    "ID.._ADR" */
+    0x00,0x08,0x5F,0x42,0x42,0x4E,0x00,0x5B,  /* 000000D8    ".._BBN.[" */
+    0x80,0x50,0x49,0x52,0x50,0x02,0x0A,0x3C,  /* 000000E0    ".PIRP..<" */
+    0x0A,0x10,0x5B,0x81,0x24,0x50,0x49,0x52,  /* 000000E8    "..[.$PIR" */
+    0x50,0x01,0x49,0x52,0x51,0x33,0x03,0x49,  /* 000000F0    "P.IRQ3.I" */
+    0x52,0x51,0x35,0x05,0x49,0x52,0x51,0x37,  /* 000000F8    "RQ5.IRQ7" */
+    0x07,0x49,0x52,0x51,0x39,0x09,0x49,0x52,  /* 00000100    ".IRQ9.IR" */
+    0x51,0x41,0x0A,0x49,0x52,0x51,0x42,0x0B,  /* 00000108    "QA.IRQB." */
+    0x14,0x48,0x0D,0x5F,0x43,0x52,0x53,0x00,  /* 00000110    ".H._CRS." */
+    0x08,0x50,0x52,0x54,0x30,0x11,0x46,0x0C,  /* 00000118    ".PRT0.F." */
+    0x0A,0xC2,0x88,0x0D,0x00,0x02,0x0F,0x00,  /* 00000120    "........" */
+    0x00,0x00,0x00,0x00,0xFF,0x00,0x00,0x00,  /* 00000128    "........" */
+    0x00,0x01,0x47,0x01,0xF8,0x0C,0xF8,0x0C,  /* 00000130    "..G....." */
+    0x01,0x08,0x88,0x0D,0x00,0x01,0x0C,0x03,  /* 00000138    "........" */
+    0x00,0x00,0x00,0x00,0xF7,0x0C,0x00,0x00,  /* 00000140    "........" */
+    0xF8,0x0C,0x88,0x0D,0x00,0x01,0x0C,0x03,  /* 00000148    "........" */
+    0x00,0x00,0x00,0x0D,0xFF,0x0F,0x00,0x00,  /* 00000150    "........" */
+    0x00,0x03,0x88,0x0D,0x00,0x01,0x0D,0x03,  /* 00000158    "........" */
+    0x00,0x00,0x00,0xC0,0x1F,0xC0,0x00,0x00,  /* 00000160    "........" */
+    0x20,0x00,0x88,0x0D,0x00,0x01,0x0D,0x03,  /* 00000168    " ......." */
+    0x00,0x00,0x20,0xC0,0x3F,0xC0,0x00,0x00,  /* 00000170    ".. .?..." */
+    0x10,0x00,0x87,0x17,0x00,0x00,0x0C,0x02,  /* 00000178    "........" */
+    0x00,0x00,0x00,0x00,0x00,0x00,0x0C,0x00,  /* 00000180    "........" */
+    0xFF,0xFF,0x0F,0x00,0x00,0x00,0x00,0x00,  /* 00000188    "........" */
+    0x00,0x00,0x03,0x00,0x87,0x17,0x00,0x00,  /* 00000190    "........" */
+    0x0D,0x03,0x00,0x00,0x00,0x00,0x00,0x00,  /* 00000198    "........" */
+    0x00,0xF0,0xFF,0xFF,0xFF,0xF1,0x00,0x00,  /* 000001A0    "........" */
+    0x00,0x00,0x00,0x00,0x00,0x02,0x87,0x17,  /* 000001A8    "........" */
+    0x00,0x00,0x0D,0x03,0x00,0x00,0x00,0x00,  /* 000001B0    "........" */
+    0x00,0x00,0x00,0xF2,0xFF,0x0F,0x00,0xF2,  /* 000001B8    "........" */
+    0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,  /* 000001C0    "........" */
+    0x87,0x17,0x00,0x00,0x0D,0x03,0x00,0x00,  /* 000001C8    "........" */
+    0x00,0x00,0x00,0x10,0x00,0xF2,0x1F,0x10,  /* 000001D0    "........" */
+    0x00,0xF2,0x00,0x00,0x00,0x00,0x20,0x00,  /* 000001D8    "...... ." */
+    0x00,0x00,0x79,0x00,0xA4,0x50,0x52,0x54,  /* 000001E0    "..y..PRT" */
+    0x30,0x08,0x42,0x55,0x46,0x41,0x11,0x09,  /* 000001E8    "0.BUFA.." */
+    0x0A,0x06,0x23,0xF8,0xDC,0x18,0x79,0x00,  /* 000001F0    "..#...y." */
+    0x08,0x42,0x55,0x46,0x42,0x11,0x09,0x0A,  /* 000001F8    ".BUFB..." */
+    0x06,0x23,0x00,0x00,0x18,0x79,0x00,0x8B,  /* 00000200    ".#...y.." */
+    0x42,0x55,0x46,0x42,0x01,0x49,0x52,0x51,  /* 00000208    "BUFB.IRQ" */
+    0x56,0x08,0x42,0x55,0x46,0x43,0x11,0x07,  /* 00000210    "V.BUFC.." */
+    0x0A,0x04,0x05,0x07,0x0A,0x0B,0x8C,0x42,  /* 00000218    ".......B" */
+    0x55,0x46,0x43,0x01,0x50,0x49,0x51,0x41,  /* 00000220    "UFC.PIQA" */
+    0x8C,0x42,0x55,0x46,0x43,0x01,0x50,0x49,  /* 00000228    ".BUFC.PI" */
+    0x51,0x42,0x8C,0x42,0x55,0x46,0x43,0x01,  /* 00000230    "QB.BUFC." */
+    0x50,0x49,0x51,0x43,0x8C,0x42,0x55,0x46,  /* 00000238    "PIQC.BUF" */
+    0x43,0x01,0x50,0x49,0x51,0x44,0x5B,0x82,  /* 00000240    "C.PIQD[." */
+    0x48,0x08,0x4C,0x4E,0x4B,0x41,0x08,0x5F,  /* 00000248    "H.LNKA._" */
+    0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x0F,  /* 00000250    "HID.A..." */
+    0x08,0x5F,0x55,0x49,0x44,0x01,0x14,0x1C,  /* 00000258    "._UID..." */
+    0x5F,0x53,0x54,0x41,0x00,0x7B,0x50,0x49,  /* 00000260    "_STA.{PI" */
+    0x52,0x41,0x0A,0x80,0x60,0xA0,0x08,0x93,  /* 00000268    "RA..`..." */
+    0x60,0x0A,0x80,0xA4,0x0A,0x09,0xA1,0x04,  /* 00000270    "`......." */
+    0xA4,0x0A,0x0B,0x14,0x0B,0x5F,0x50,0x52,  /* 00000278    "....._PR" */
+    0x53,0x00,0xA4,0x42,0x55,0x46,0x41,0x14,  /* 00000280    "S..BUFA." */
+    0x11,0x5F,0x44,0x49,0x53,0x00,0x7D,0x50,  /* 00000288    "._DIS.}P" */
+    0x49,0x52,0x41,0x0A,0x80,0x50,0x49,0x52,  /* 00000290    "IRA..PIR" */
+    0x41,0x14,0x1A,0x5F,0x43,0x52,0x53,0x00,  /* 00000298    "A.._CRS." */
+    0x7B,0x50,0x49,0x52,0x42,0x0A,0x0F,0x60,  /* 000002A0    "{PIRB..`" */
+    0x79,0x01,0x60,0x49,0x52,0x51,0x56,0xA4,  /* 000002A8    "y.`IRQV." */
+    0x42,0x55,0x46,0x42,0x14,0x1B,0x5F,0x53,  /* 000002B0    "BUFB.._S" */
+    0x52,0x53,0x01,0x8B,0x68,0x01,0x49,0x52,  /* 000002B8    "RS..h.IR" */
+    0x51,0x31,0x82,0x49,0x52,0x51,0x31,0x60,  /* 000002C0    "Q1.IRQ1`" */
+    0x76,0x60,0x70,0x60,0x50,0x49,0x52,0x41,  /* 000002C8    "v`p`PIRA" */
+    0x5B,0x82,0x49,0x08,0x4C,0x4E,0x4B,0x42,  /* 000002D0    "[.I.LNKB" */
+    0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,  /* 000002D8    "._HID.A." */
+    0x0C,0x0F,0x08,0x5F,0x55,0x49,0x44,0x0A,  /* 000002E0    "..._UID." */
+    0x02,0x14,0x1C,0x5F,0x53,0x54,0x41,0x00,  /* 000002E8    "..._STA." */
+    0x7B,0x50,0x49,0x52,0x42,0x0A,0x80,0x60,  /* 000002F0    "{PIRB..`" */
+    0xA0,0x08,0x93,0x60,0x0A,0x80,0xA4,0x0A,  /* 000002F8    "...`...." */
+    0x09,0xA1,0x04,0xA4,0x0A,0x0B,0x14,0x0B,  /* 00000300    "........" */
+    0x5F,0x50,0x52,0x53,0x00,0xA4,0x42,0x55,  /* 00000308    "_PRS..BU" */
+    0x46,0x41,0x14,0x11,0x5F,0x44,0x49,0x53,  /* 00000310    "FA.._DIS" */
+    0x00,0x7D,0x50,0x49,0x52,0x42,0x0A,0x80,  /* 00000318    ".}PIRB.." */
+    0x50,0x49,0x52,0x42,0x14,0x1A,0x5F,0x43,  /* 00000320    "PIRB.._C" */
+    0x52,0x53,0x00,0x7B,0x50,0x49,0x52,0x42,  /* 00000328    "RS.{PIRB" */
+    0x0A,0x0F,0x60,0x79,0x01,0x60,0x49,0x52,  /* 00000330    "..`y.`IR" */
+    0x51,0x56,0xA4,0x42,0x55,0x46,0x42,0x14,  /* 00000338    "QV.BUFB." */
+    0x1B,0x5F,0x53,0x52,0x53,0x01,0x8B,0x68,  /* 00000340    "._SRS..h" */
+    0x01,0x49,0x52,0x51,0x31,0x82,0x49,0x52,  /* 00000348    ".IRQ1.IR" */
+    0x51,0x31,0x60,0x76,0x60,0x70,0x60,0x50,  /* 00000350    "Q1`v`p`P" */
+    0x49,0x52,0x42,0x5B,0x82,0x49,0x08,0x4C,  /* 00000358    "IRB[.I.L" */
+    0x4E,0x4B,0x43,0x08,0x5F,0x48,0x49,0x44,  /* 00000360    "NKC._HID" */
+    0x0C,0x41,0xD0,0x0C,0x0F,0x08,0x5F,0x55,  /* 00000368    ".A...._U" */
+    0x49,0x44,0x0A,0x03,0x14,0x1C,0x5F,0x53,  /* 00000370    "ID...._S" */
+    0x54,0x41,0x00,0x7B,0x50,0x49,0x52,0x43,  /* 00000378    "TA.{PIRC" */
+    0x0A,0x80,0x60,0xA0,0x08,0x93,0x60,0x0A,  /* 00000380    "..`...`." */
+    0x80,0xA4,0x0A,0x09,0xA1,0x04,0xA4,0x0A,  /* 00000388    "........" */
+    0x0B,0x14,0x0B,0x5F,0x50,0x52,0x53,0x00,  /* 00000390    "..._PRS." */
+    0xA4,0x42,0x55,0x46,0x41,0x14,0x11,0x5F,  /* 00000398    ".BUFA.._" */
+    0x44,0x49,0x53,0x00,0x7D,0x50,0x49,0x52,  /* 000003A0    "DIS.}PIR" */
+    0x43,0x0A,0x80,0x50,0x49,0x52,0x43,0x14,  /* 000003A8    "C..PIRC." */
+    0x1A,0x5F,0x43,0x52,0x53,0x00,0x7B,0x50,  /* 000003B0    "._CRS.{P" */
+    0x49,0x52,0x43,0x0A,0x0F,0x60,0x79,0x01,  /* 000003B8    "IRC..`y." */
+    0x60,0x49,0x52,0x51,0x56,0xA4,0x42,0x55,  /* 000003C0    "`IRQV.BU" */
+    0x46,0x42,0x14,0x1B,0x5F,0x53,0x52,0x53,  /* 000003C8    "FB.._SRS" */
+    0x01,0x8B,0x68,0x01,0x49,0x52,0x51,0x31,  /* 000003D0    "..h.IRQ1" */
+    0x82,0x49,0x52,0x51,0x31,0x60,0x76,0x60,  /* 000003D8    ".IRQ1`v`" */
+    0x70,0x60,0x50,0x49,0x52,0x43,0x5B,0x82,  /* 000003E0    "p`PIRC[." */
+    0x49,0x08,0x4C,0x4E,0x4B,0x44,0x08,0x5F,  /* 000003E8    "I.LNKD._" */
+    0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x0F,  /* 000003F0    "HID.A..." */
+    0x08,0x5F,0x55,0x49,0x44,0x0A,0x04,0x14,  /* 000003F8    "._UID..." */
+    0x1C,0x5F,0x53,0x54,0x41,0x00,0x7B,0x50,  /* 00000400    "._STA.{P" */
+    0x49,0x52,0x44,0x0A,0x80,0x60,0xA0,0x08,  /* 00000408    "IRD..`.." */
+    0x93,0x60,0x0A,0x80,0xA4,0x0A,0x09,0xA1,  /* 00000410    ".`......" */
+    0x04,0xA4,0x0A,0x0B,0x14,0x0B,0x5F,0x50,  /* 00000418    "......_P" */
+    0x52,0x53,0x00,0xA4,0x42,0x55,0x46,0x41,  /* 00000420    "RS..BUFA" */
+    0x14,0x11,0x5F,0x44,0x49,0x53,0x00,0x7D,  /* 00000428    ".._DIS.}" */
+    0x50,0x49,0x52,0x44,0x0A,0x80,0x50,0x49,  /* 00000430    "PIRD..PI" */
+    0x52,0x44,0x14,0x1A,0x5F,0x43,0x52,0x53,  /* 00000438    "RD.._CRS" */
+    0x00,0x7B,0x50,0x49,0x52,0x44,0x0A,0x0F,  /* 00000440    ".{PIRD.." */
+    0x60,0x79,0x01,0x60,0x49,0x52,0x51,0x56,  /* 00000448    "`y.`IRQV" */
+    0xA4,0x42,0x55,0x46,0x42,0x14,0x1B,0x5F,  /* 00000450    ".BUFB.._" */
+    0x53,0x52,0x53,0x01,0x8B,0x68,0x01,0x49,  /* 00000458    "SRS..h.I" */
+    0x52,0x51,0x31,0x82,0x49,0x52,0x51,0x31,  /* 00000460    "RQ1.IRQ1" */
+    0x60,0x76,0x60,0x70,0x60,0x50,0x49,0x52,  /* 00000468    "`v`p`PIR" */
+    0x44,0x14,0x16,0x5F,0x50,0x52,0x54,0x00,  /* 00000470    "D.._PRT." */
+    0xA0,0x0A,0x50,0x49,0x43,0x44,0xA4,0x50,  /* 00000478    "..PICD.P" */
+    0x52,0x54,0x41,0xA4,0x50,0x52,0x54,0x50,  /* 00000480    "RTA.PRTP" */
+    0x08,0x50,0x52,0x54,0x50,0x12,0x43,0x0E,  /* 00000488    ".PRTP.C." */
+    0x10,0x12,0x0B,0x04,0x0B,0xFF,0xFF,0x00,  /* 00000490    "........" */
+    0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0B,0x04,  /* 00000498    "LNKA...." */
+    0x0B,0xFF,0xFF,0x01,0x4C,0x4E,0x4B,0x42,  /* 000004A0    "....LNKB" */
+    0x00,0x12,0x0C,0x04,0x0B,0xFF,0xFF,0x0A,  /* 000004A8    "........" */
+    0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0C,  /* 000004B0    ".LNKC..." */
+    0x04,0x0B,0xFF,0xFF,0x0A,0x03,0x4C,0x4E,  /* 000004B8    "......LN" */
+    0x4B,0x44,0x00,0x12,0x0D,0x04,0x0C,0xFF,  /* 000004C0    "KD......" */
+    0xFF,0x01,0x00,0x00,0x4C,0x4E,0x4B,0x42,  /* 000004C8    "....LNKB" */
+    0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x01,  /* 000004D0    "........" */
+    0x00,0x01,0x4C,0x4E,0x4B,0x43,0x00,0x12,  /* 000004D8    "..LNKC.." */
+    0x0E,0x04,0x0C,0xFF,0xFF,0x01,0x00,0x0A,  /* 000004E0    "........" */
+    0x02,0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E,  /* 000004E8    ".LNKD..." */
+    0x04,0x0C,0xFF,0xFF,0x01,0x00,0x0A,0x03,  /* 000004F0    "........" */
+    0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0D,0x04,  /* 000004F8    "LNKA...." */
+    0x0C,0xFF,0xFF,0x02,0x00,0x00,0x4C,0x4E,  /* 00000500    "......LN" */
+    0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF,  /* 00000508    "KC......" */
+    0xFF,0x02,0x00,0x01,0x4C,0x4E,0x4B,0x44,  /* 00000510    "....LNKD" */
+    0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x02,  /* 00000518    "........" */
+    0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x41,0x00,  /* 00000520    "...LNKA." */
+    0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x02,0x00,  /* 00000528    "........" */
+    0x0A,0x03,0x4C,0x4E,0x4B,0x42,0x00,0x12,  /* 00000530    "..LNKB.." */
+    0x0D,0x04,0x0C,0xFF,0xFF,0x03,0x00,0x00,  /* 00000538    "........" */
+    0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04,  /* 00000540    "LNKD...." */
+    0x0C,0xFF,0xFF,0x03,0x00,0x01,0x4C,0x4E,  /* 00000548    "......LN" */
+    0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,  /* 00000550    "KA......" */
+    0xFF,0x03,0x00,0x0A,0x02,0x4C,0x4E,0x4B,  /* 00000558    ".....LNK" */
+    0x42,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,  /* 00000560    "B......." */
+    0x03,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x43,  /* 00000568    "....LNKC" */
+    0x00,0x08,0x50,0x52,0x54,0x41,0x12,0x32,  /* 00000570    "..PRTA.2" */
+    0x04,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x01,  /* 00000578    "........" */
+    0x00,0x00,0x00,0x0A,0x05,0x12,0x0B,0x04,  /* 00000580    "........" */
+    0x0C,0xFF,0xFF,0x02,0x00,0x00,0x00,0x0A,  /* 00000588    "........" */
+    0x07,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x03,  /* 00000590    "........" */
+    0x00,0x00,0x00,0x0A,0x0A,0x12,0x0B,0x04,  /* 00000598    "........" */
+    0x0C,0xFF,0xFF,0x03,0x00,0x00,0x00,0x0A,  /* 000005A0    "........" */
+    0x0B,0x5B,0x82,0x48,0x31,0x49,0x53,0x41,  /* 000005A8    ".[.H1ISA" */
+    0x5F,0x08,0x5F,0x41,0x44,0x52,0x00,0x5B,  /* 000005B0    "_._ADR.[" */
+    0x80,0x50,0x49,0x52,0x51,0x02,0x0A,0x60,  /* 000005B8    ".PIRQ..`" */
+    0x0A,0x04,0x10,0x2E,0x5C,0x00,0x5B,0x81,  /* 000005C0    "....\.[." */
+    0x29,0x5C,0x2F,0x04,0x5F,0x53,0x42,0x5F,  /* 000005C8    ")\/._SB_" */
+    0x50,0x43,0x49,0x30,0x49,0x53,0x41,0x5F,  /* 000005D0    "PCI0ISA_" */
+    0x50,0x49,0x52,0x51,0x01,0x50,0x49,0x52,  /* 000005D8    "PIRQ.PIR" */
+    0x41,0x08,0x50,0x49,0x52,0x42,0x08,0x50,  /* 000005E0    "A.PIRB.P" */
+    0x49,0x52,0x43,0x08,0x50,0x49,0x52,0x44,  /* 000005E8    "IRC.PIRD" */
+    0x08,0x5B,0x82,0x46,0x0B,0x53,0x59,0x53,  /* 000005F0    ".[.F.SYS" */
+    0x52,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,  /* 000005F8    "R._HID.A" */
+    0xD0,0x0C,0x02,0x08,0x5F,0x55,0x49,0x44,  /* 00000600    "...._UID" */
+    0x01,0x08,0x43,0x52,0x53,0x5F,0x11,0x4E,  /* 00000608    "..CRS_.N" */
+    0x08,0x0A,0x8A,0x47,0x01,0x10,0x00,0x10,  /* 00000610    "...G...." */
+    0x00,0x00,0x10,0x47,0x01,0x22,0x00,0x22,  /* 00000618    "...G."."" */
+    0x00,0x00,0x0C,0x47,0x01,0x30,0x00,0x30,  /* 00000620    "...G.0.0" */
+    0x00,0x00,0x10,0x47,0x01,0x44,0x00,0x44,  /* 00000628    "...G.D.D" */
+    0x00,0x00,0x1C,0x47,0x01,0x62,0x00,0x62,  /* 00000630    "...G.b.b" */
+    0x00,0x00,0x02,0x47,0x01,0x65,0x00,0x65,  /* 00000638    "...G.e.e" */
+    0x00,0x00,0x0B,0x47,0x01,0x72,0x00,0x72,  /* 00000640    "...G.r.r" */
+    0x00,0x00,0x0E,0x47,0x01,0x80,0x00,0x80,  /* 00000648    "...G...." */
+    0x00,0x00,0x01,0x47,0x01,0x84,0x00,0x84,  /* 00000650    "...G...." */
+    0x00,0x00,0x03,0x47,0x01,0x88,0x00,0x88,  /* 00000658    "...G...." */
+    0x00,0x00,0x01,0x47,0x01,0x8C,0x00,0x8C,  /* 00000660    "...G...." */
+    0x00,0x00,0x03,0x47,0x01,0x90,0x00,0x90,  /* 00000668    "...G...." */
+    0x00,0x00,0x10,0x47,0x01,0xA2,0x00,0xA2,  /* 00000670    "...G...." */
+    0x00,0x00,0x1C,0x47,0x01,0xE0,0x00,0xE0,  /* 00000678    "...G...." */
+    0x00,0x00,0x10,0x47,0x01,0xA0,0x08,0xA0,  /* 00000680    "...G...." */
+    0x08,0x00,0x04,0x47,0x01,0xC0,0x0C,0xC0,  /* 00000688    "...G...." */
+    0x0C,0x00,0x10,0x47,0x01,0xD0,0x04,0xD0,  /* 00000690    "...G...." */
+    0x04,0x00,0x02,0x79,0x00,0x14,0x0B,0x5F,  /* 00000698    "...y..._" */
+    0x43,0x52,0x53,0x00,0xA4,0x43,0x52,0x53,  /* 000006A0    "CRS..CRS" */
+    0x5F,0x5B,0x82,0x2B,0x50,0x49,0x43,0x5F,  /* 000006A8    "_[.+PIC_" */
+    0x08,0x5F,0x48,0x49,0x44,0x0B,0x41,0xD0,  /* 000006B0    "._HID.A." */
+    0x08,0x5F,0x43,0x52,0x53,0x11,0x18,0x0A,  /* 000006B8    "._CRS..." */
+    0x15,0x47,0x01,0x20,0x00,0x20,0x00,0x01,  /* 000006C0    ".G. . .." */
+    0x02,0x47,0x01,0xA0,0x00,0xA0,0x00,0x01,  /* 000006C8    ".G......" */
+    0x02,0x22,0x04,0x00,0x79,0x00,0x5B,0x82,  /* 000006D0    "."..y.[." */
+    0x47,0x05,0x44,0x4D,0x41,0x30,0x08,0x5F,  /* 000006D8    "G.DMA0._" */
+    0x48,0x49,0x44,0x0C,0x41,0xD0,0x02,0x00,  /* 000006E0    "HID.A..." */
+    0x08,0x5F,0x43,0x52,0x53,0x11,0x41,0x04,  /* 000006E8    "._CRS.A." */
+    0x0A,0x3D,0x2A,0x10,0x04,0x47,0x01,0x00,  /* 000006F0    ".=*..G.." */
+    0x00,0x00,0x00,0x00,0x10,0x47,0x01,0x81,  /* 000006F8    ".....G.." */
+    0x00,0x81,0x00,0x00,0x03,0x47,0x01,0x87,  /* 00000700    ".....G.." */
+    0x00,0x87,0x00,0x00,0x01,0x47,0x01,0x89,  /* 00000708    ".....G.." */
+    0x00,0x89,0x00,0x00,0x03,0x47,0x01,0x8F,  /* 00000710    ".....G.." */
+    0x00,0x8F,0x00,0x00,0x01,0x47,0x01,0xC0,  /* 00000718    ".....G.." */
+    0x00,0xC0,0x00,0x00,0x20,0x47,0x01,0x80,  /* 00000720    ".... G.." */
+    0x04,0x80,0x04,0x00,0x10,0x79,0x00,0x5B,  /* 00000728    ".....y.[" */
+    0x82,0x25,0x54,0x4D,0x52,0x5F,0x08,0x5F,  /* 00000730    ".%TMR_._" */
+    0x48,0x49,0x44,0x0C,0x41,0xD0,0x01,0x00,  /* 00000738    "HID.A..." */
+    0x08,0x5F,0x43,0x52,0x53,0x11,0x10,0x0A,  /* 00000740    "._CRS..." */
+    0x0D,0x47,0x01,0x40,0x00,0x40,0x00,0x00,  /* 00000748    ".G.@.@.." */
+    0x04,0x22,0x01,0x00,0x79,0x00,0x5B,0x82,  /* 00000750    "."..y.[." */
+    0x25,0x52,0x54,0x43,0x5F,0x08,0x5F,0x48,  /* 00000758    "%RTC_._H" */
+    0x49,0x44,0x0C,0x41,0xD0,0x0B,0x00,0x08,  /* 00000760    "ID.A...." */
+    0x5F,0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,  /* 00000768    "_CRS...." */
+    0x47,0x01,0x70,0x00,0x70,0x00,0x00,0x02,  /* 00000770    "G.p.p..." */
+    0x22,0x00,0x01,0x79,0x00,0x5B,0x82,0x22,  /* 00000778    ""..y.[."" */
+    0x53,0x50,0x4B,0x52,0x08,0x5F,0x48,0x49,  /* 00000780    "SPKR._HI" */
+    0x44,0x0C,0x41,0xD0,0x08,0x00,0x08,0x5F,  /* 00000788    "D.A...._" */
+    0x43,0x52,0x53,0x11,0x0D,0x0A,0x0A,0x47,  /* 00000790    "CRS....G" */
+    0x01,0x61,0x00,0x61,0x00,0x00,0x01,0x79,  /* 00000798    ".a.a...y" */
+    0x00,0x5B,0x82,0x31,0x50,0x53,0x32,0x4D,  /* 000007A0    ".[.1PS2M" */
+    0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,  /* 000007A8    "._HID.A." */
+    0x0F,0x13,0x08,0x5F,0x43,0x49,0x44,0x0C,  /* 000007B0    "..._CID." */
+    0x41,0xD0,0x0F,0x13,0x14,0x09,0x5F,0x53,  /* 000007B8    "A....._S" */
+    0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,  /* 000007C0    "TA....._" */
+    0x43,0x52,0x53,0x11,0x08,0x0A,0x05,0x22,  /* 000007C8    "CRS...."" */
+    0x00,0x10,0x79,0x00,0x5B,0x82,0x42,0x04,  /* 000007D0    "..y.[.B." */
+    0x50,0x53,0x32,0x4B,0x08,0x5F,0x48,0x49,  /* 000007D8    "PS2K._HI" */
+    0x44,0x0C,0x41,0xD0,0x03,0x03,0x08,0x5F,  /* 000007E0    "D.A...._" */
+    0x43,0x49,0x44,0x0C,0x41,0xD0,0x03,0x0B,  /* 000007E8    "CID.A..." */
+    0x14,0x09,0x5F,0x53,0x54,0x41,0x00,0xA4,  /* 000007F0    ".._STA.." */
+    0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53,0x11,  /* 000007F8    "..._CRS." */
+    0x18,0x0A,0x15,0x47,0x01,0x60,0x00,0x60,  /* 00000800    "...G.`.`" */
+    0x00,0x00,0x01,0x47,0x01,0x64,0x00,0x64,  /* 00000808    "...G.d.d" */
+    0x00,0x00,0x01,0x22,0x02,0x00,0x79,0x00,  /* 00000810    "..."..y." */
+    0x5B,0x82,0x3A,0x46,0x44,0x43,0x30,0x08,  /* 00000818    "[.:FDC0." */
+    0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x07,  /* 00000820    "_HID.A.." */
+    0x00,0x14,0x09,0x5F,0x53,0x54,0x41,0x00,  /* 00000828    "..._STA." */
+    0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53,  /* 00000830    "...._CRS" */
+    0x11,0x1B,0x0A,0x18,0x47,0x01,0xF0,0x03,  /* 00000838    "....G..." */
+    0xF0,0x03,0x01,0x06,0x47,0x01,0xF7,0x03,  /* 00000840    "....G..." */
+    0xF7,0x03,0x01,0x01,0x22,0x40,0x00,0x2A,  /* 00000848    "...."@.*" */
+    0x04,0x00,0x79,0x00,0x5B,0x82,0x35,0x55,  /* 00000850    "..y.[.5U" */
+    0x41,0x52,0x31,0x08,0x5F,0x48,0x49,0x44,  /* 00000858    "AR1._HID" */
+    0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,0x55,  /* 00000860    ".A...._U" */
+    0x49,0x44,0x01,0x14,0x09,0x5F,0x53,0x54,  /* 00000868    "ID..._ST" */
+    0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43,  /* 00000870    "A....._C" */
+    0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,0x01,  /* 00000878    "RS....G." */
+    0xF8,0x03,0xF8,0x03,0x01,0x08,0x22,0x10,  /* 00000880    "......"." */
+    0x00,0x79,0x00,0x5B,0x82,0x36,0x55,0x41,  /* 00000888    ".y.[.6UA" */
+    0x52,0x32,0x08,0x5F,0x48,0x49,0x44,0x0C,  /* 00000890    "R2._HID." */
+    0x41,0xD0,0x05,0x01,0x08,0x5F,0x55,0x49,  /* 00000898    "A...._UI" */
+    0x44,0x0A,0x02,0x14,0x09,0x5F,0x53,0x54,  /* 000008A0    "D...._ST" */
+    0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43,  /* 000008A8    "A....._C" */
+    0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,0x01,  /* 000008B0    "RS....G." */
+    0xF8,0x02,0xF8,0x02,0x01,0x08,0x22,0x08,  /* 000008B8    "......"." */
+    0x00,0x79,0x00,
 };
 int DsdtLen=sizeof(AmlCode);
diff -r 049e669e6a8a -r b29806fb6ba0 tools/firmware/acpi/acpi_fadt.h
--- a/tools/firmware/acpi/acpi_fadt.h   Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/firmware/acpi/acpi_fadt.h   Mon Jun 26 14:53:55 2006 -0500
@@ -22,19 +22,19 @@
 // FADT Definitions, see ACPI 2.0 specification for details.
 //
 
-#define ACPI_OEM_FADT_REVISION  0x00000000 // TBD
+#define ACPI_OEM_FADT_REVISION  0x00000001 // TBD
 
-#define ACPI_PREFERRED_PM_PROFILE 0x04
+#define ACPI_PREFERRED_PM_PROFILE 0x00
 #define ACPI_SCI_INT              0x0009
-#define ACPI_SMI_CMD              0x000000B2
+#define ACPI_SMI_CMD              0x00000000
 #define ACPI_ACPI_ENABLE    0x00
 #define ACPI_ACPI_DISABLE   0x00
 #define ACPI_S4_BIOS_REQ    0x00
 #define ACPI_PSTATE_CNT     0x00
-#define ACPI_GPE1_BASE      0x20
+#define ACPI_GPE1_BASE      0x00
 #define ACPI_CST_CNT        0x00
-#define ACPI_P_LVL2_LAT     0x0065
-#define ACPI_P_LVL3_LAT     0X03E9
+#define ACPI_P_LVL2_LAT     0x0064
+#define ACPI_P_LVL3_LAT     0X03E8
 #define ACPI_FLUSH_SIZE     0x00
 #define ACPI_FLUSH_STRIDE   0x00
 #define ACPI_DUTY_OFFSET    0x01
@@ -51,15 +51,16 @@
 //
 // Fixed Feature Flags
 // 
-#define ACPI_FIXED_FEATURE_FLAGS (ACPI_SLP_BUTTON| ACPI_WBINVD  )
+#define ACPI_FIXED_FEATURE_FLAGS 
(ACPI_PROC_C1|ACPI_SLP_BUTTON|ACPI_WBINVD|ACPI_PWR_BUTTON|ACPI_FIX_RTC)
 
 //
 // PM1A Event Register Block Generic Address Information
 //
 #define ACPI_PM1A_EVT_BLK_ADDRESS_SPACE_ID  ACPI_SYSTEM_IO
-#define ACPI_PM1A_EVT_BLK_BIT_WIDTH         0x00
+#define ACPI_PM1A_EVT_BLK_BIT_WIDTH         0x20
 #define ACPI_PM1A_EVT_BLK_BIT_OFFSET        0x00
-#define ACPI_PM1A_EVT_BLK_ADDRESS           0x0000000000008000
+//#define ACPI_PM1A_EVT_BLK_ADDRESS           0x000000000000c010
+#define ACPI_PM1A_EVT_BLK_ADDRESS           0x000000000000c040
 
 //
 // PM1B Event Register Block Generic Address Information
@@ -73,7 +74,7 @@
 // PM1A Control Register Block Generic Address Information
 //
 #define ACPI_PM1A_CNT_BLK_ADDRESS_SPACE_ID  ACPI_SYSTEM_IO
-#define ACPI_PM1A_CNT_BLK_BIT_WIDTH         0x08
+#define ACPI_PM1A_CNT_BLK_BIT_WIDTH         0x10
 #define ACPI_PM1A_CNT_BLK_BIT_OFFSET        0x00
 #define ACPI_PM1A_CNT_BLK_ADDRESS           (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04)
 
diff -r 049e669e6a8a -r b29806fb6ba0 tools/ioemu/hw/pc.c
--- a/tools/ioemu/hw/pc.c       Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/ioemu/hw/pc.c       Mon Jun 26 14:53:55 2006 -0500
@@ -375,7 +375,9 @@ static int serial_io[MAX_SERIAL_PORTS] =
 static int serial_io[MAX_SERIAL_PORTS] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
 static int serial_irq[MAX_SERIAL_PORTS] = { 4, 3, 4, 3 };
 
-extern int acpi_init(unsigned int base);
+//extern int acpi_init(unsigned int base);
+/*  PIIX4 acpi pci configuration space, func 3 */
+extern void pci_piix4_acpi_init(PCIBus *bus);
 
 #define NOBIOS 1
 
@@ -583,7 +585,9 @@ void pc_init(uint64_t ram_size, int vga_
     floppy_controller = fdctrl_init(6, 2, 0, 0x3f0, fd_table);
 
     cmos_init(ram_size, boot_device, bs_table, timeoffset);
-    acpi_init(0x8000);
+// using PIIX4 acpi model
+//    acpi_init(0x8000);
+    pci_piix4_acpi_init(pci_bus);
 
     if (pci_enabled && usb_enabled) {
        usb_uhci_init(pci_bus, usb_root_ports);
diff -r 049e669e6a8a -r b29806fb6ba0 tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c      Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/ioemu/hw/pci.c      Mon Jun 26 14:53:55 2006 -0500
@@ -1394,7 +1394,7 @@ static uint32_t pci_bios_io_addr;
 static uint32_t pci_bios_io_addr;
 static uint32_t pci_bios_mem_addr;
 /* host irqs corresponding to PCI irqs A-D */
-static uint8_t pci_irqs[4] = { 11, 9, 11, 9 };
+static uint8_t pci_irqs[4] = { 10, 11, 10, 11 };
 
 static void pci_set_io_region_addr(PCIDevice *d, int region_num, uint32_t addr)
 {
@@ -1447,12 +1447,22 @@ static void pci_bios_init_device(PCIDevi
             pci_set_io_region_addr(d, 3, 0x374);
         }
         break;
+       case 0x0680:
+       if (vendor_id == 0x8086 && device_id == 0x7113) {
+          // PIIX4 ACPI PM 
+        pci_config_writew(d, 0x20, 0x0000); // NO smb bus IO enable in PIIX4
+        pci_config_writew(d, 0x22, 0x0000); 
+        goto default_map;
+       }
+         break;
+
     case 0x0300:
         if (vendor_id != 0x1234)
             goto default_map;
         /* VGA: map frame buffer to default Bochs VBE address */
         pci_set_io_region_addr(d, 0, 0xE0000000);
         break;
+
     case 0x0800:
         /* PIC */
         vendor_id = pci_config_readw(d, PCI_VENDOR_ID);
@@ -1497,6 +1507,13 @@ static void pci_bios_init_device(PCIDevi
         pic_irq = pci_irqs[pin];
         pci_config_writeb(d, PCI_INTERRUPT_LINE, pic_irq);
     }
+    if (class== 0x0680&& vendor_id == 0x8086 && device_id == 0x7113) {
+         // PIIX4 ACPI PM
+       pci_config_writew(d, 0x20, 0x0000); // NO smb bus IO enable in PIIX4
+       pci_config_writew(d, 0x22, 0x0000);
+       pci_config_writew(d, 0x3c, 0x0009); // Hardcodeed IRQ9
+       pci_config_writew(d, 0x3d, 0x0001);
+    }
 }
 
 /*
diff -r 049e669e6a8a -r b29806fb6ba0 tools/ioemu/target-i386-dm/Makefile
--- a/tools/ioemu/target-i386-dm/Makefile       Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/ioemu/target-i386-dm/Makefile       Mon Jun 26 14:53:55 2006 -0500
@@ -281,7 +281,7 @@ VL_OBJS+= usb.o usb-hub.o usb-uhci.o usb
 # Hardware support
 VL_OBJS+= ide.o ne2000.o pckbd.o vga.o dma.o
 VL_OBJS+= fdc.o mc146818rtc.o serial.o i8259_stub.o pc.o port-e9.o
-VL_OBJS+= cirrus_vga.o pcnet.o acpi.o
+VL_OBJS+= cirrus_vga.o pcnet.o piix4acpi.o
 VL_OBJS+= $(SOUND_HW) $(AUDIODRV) mixeng.o
 
 ifeq ($(TARGET_ARCH), ppc)
diff -r 049e669e6a8a -r b29806fb6ba0 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/libxc/xc_domain.c   Mon Jun 26 14:53:55 2006 -0500
@@ -283,6 +283,17 @@ int xc_domain_setmaxmem(int xc_handle,
     op.cmd = DOM0_SETDOMAINMAXMEM;
     op.u.setdomainmaxmem.domain = (domid_t)domid;
     op.u.setdomainmaxmem.max_memkb = max_memkb;
+    return do_dom0_op(xc_handle, &op);
+}
+
+int xc_domain_set_time_offset(int xc_handle,
+                              uint32_t domid,
+                              int32_t time_offset_seconds)
+{
+    DECLARE_DOM0_OP;
+    op.cmd = DOM0_SETTIMEOFFSET;
+    op.u.settimeoffset.domain = (domid_t)domid;
+    op.u.settimeoffset.time_offset_seconds = time_offset_seconds;
     return do_dom0_op(xc_handle, &op);
 }
 
diff -r 049e669e6a8a -r b29806fb6ba0 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/libxc/xc_linux_restore.c    Mon Jun 26 14:53:55 2006 -0500
@@ -572,42 +572,48 @@ int xc_linux_restore(int xc_handle, int 
     nr_pins = 0;
     for (i = 0; i < max_pfn; i++) {
 
-        if (i == (max_pfn-1) || nr_pins == MAX_PIN_BATCH) {
+        if ( (pfn_type[i] & LPINTAB) == 0 )
+            continue;
+
+        switch (pfn_type[i]) {
+
+        case (L1TAB|LPINTAB):
+            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+            break;
+
+        case (L2TAB|LPINTAB):
+            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+            break;
+
+        case (L3TAB|LPINTAB):
+            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+            break;
+
+        case (L4TAB|LPINTAB):
+            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+            break;
+
+        default:
+            continue;
+        }
+
+        pin[nr_pins].arg1.mfn = p2m[i];
+        nr_pins++;
+
+        /* Batch full? Then flush. */
+        if (nr_pins == MAX_PIN_BATCH) {
             if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) {
                 ERR("Failed to pin batch of %d page tables", nr_pins);
                 goto out;
             }
             nr_pins = 0;
         }
-
-        if ( (pfn_type[i] & LPINTAB) == 0 )
-            continue;
-
-        switch(pfn_type[i]) {
-
-        case (L1TAB|LPINTAB):
-            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
-            break;
-
-        case (L2TAB|LPINTAB):
-            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
-            break;
-
-        case (L3TAB|LPINTAB):
-            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
-            break;
-
-        case (L4TAB|LPINTAB):
-            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
-            break;
-
-        default:
-            continue;
-        }
-
-        pin[nr_pins].arg1.mfn = p2m[i];
-        nr_pins++;
-
+    }
+
+    /* Flush final partial batch. */
+    if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) {
+        ERR("Failed to pin batch of %d page tables", nr_pins);
+        goto out;
     }
 
     DPRINTF("\b\b\b\b100%%\n");
diff -r 049e669e6a8a -r b29806fb6ba0 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/libxc/xc_linux_save.c       Mon Jun 26 14:53:55 2006 -0500
@@ -91,12 +91,12 @@ static inline int test_bit (int nr, vola
 
 static inline void clear_bit (int nr, volatile void * addr)
 {
-    BITMAP_ENTRY(nr, addr) &= ~(1 << BITMAP_SHIFT(nr));
+    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
 }
 
 static inline void set_bit ( int nr, volatile void * addr)
 {
-    BITMAP_ENTRY(nr, addr) |= (1 << BITMAP_SHIFT(nr));
+    BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
 }
 
 /* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
diff -r 049e669e6a8a -r b29806fb6ba0 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/libxc/xenctrl.h     Mon Jun 26 14:53:55 2006 -0500
@@ -415,6 +415,10 @@ int xc_domain_setmaxmem(int xc_handle,
                         uint32_t domid,
                         unsigned int max_memkb);
 
+int xc_domain_set_time_offset(int xc_handle,
+                              uint32_t domid,
+                              int32_t time_offset_seconds);
+
 int xc_domain_memory_increase_reservation(int xc_handle,
                                           uint32_t domid,
                                           unsigned long nr_extents,
diff -r 049e669e6a8a -r b29806fb6ba0 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/python/xen/lowlevel/xc/xc.c Mon Jun 26 14:53:55 2006 -0500
@@ -869,6 +869,30 @@ static PyObject *pyxc_domain_iomem_permi
     return zero;
 }
 
+static PyObject *pyxc_domain_set_time_offset(XcObject *self, PyObject *args)
+{
+    uint32_t dom;
+    int32_t time_offset_seconds;
+    time_t calendar_time;
+    struct tm local_time;
+    struct tm utc_time;
+
+    if (!PyArg_ParseTuple(args, "i", &dom))
+        return NULL;
+
+    calendar_time = time(NULL);
+    localtime_r(&calendar_time, &local_time);
+    gmtime_r(&calendar_time, &utc_time);
+    /* set up to get calendar time based on utc_time, with local dst setting */
+    utc_time.tm_isdst = local_time.tm_isdst;
+    time_offset_seconds = (int32_t)difftime(calendar_time, mktime(&utc_time));
+
+    if (xc_domain_set_time_offset(self->xc_handle, dom, time_offset_seconds) 
!= 0)
+        return NULL;
+
+    Py_INCREF(zero);
+    return zero;
+}
 
 static PyObject *dom_op(XcObject *self, PyObject *args,
                         int (*fn)(int, uint32_t))
@@ -1207,6 +1231,13 @@ static PyMethodDef pyxc_methods[] = {
       METH_VARARGS, "\n"
       "Returns: [int]: The size in KiB of memory spanning the given number "
       "of pages.\n" },
+
+    { "domain_set_time_offset",
+      (PyCFunction)pyxc_domain_set_time_offset,
+      METH_VARARGS, "\n"
+      "Set a domain's time offset to Dom0's localtime\n"
+      " dom        [int]: Domain whose time offset is being set.\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
 
     { NULL, NULL, 0, NULL }
 };
diff -r 049e669e6a8a -r b29806fb6ba0 tools/python/xen/util/xmlrpclib2.py
--- a/tools/python/xen/util/xmlrpclib2.py       Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/python/xen/util/xmlrpclib2.py       Mon Jun 26 14:53:55 2006 -0500
@@ -13,7 +13,7 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2006 Anthony Liguori <aliguori@xxxxxxxxxx>
-# Copyright (C) 2006 XenSource Ltd.
+# Copyright (C) 2006 XenSource Inc.
 #============================================================================
 
 """
@@ -26,11 +26,18 @@ from httplib import HTTPConnection, HTTP
 from httplib import HTTPConnection, HTTP
 from xmlrpclib import Transport
 from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
+import SocketServer
 import xmlrpclib, socket, os, stat
-import SocketServer
 
-import xen.xend.XendClient
 from xen.xend.XendLogging import log
+
+try:
+    import SSHTransport
+    ssh_enabled = True
+except ImportError:
+    # SSHTransport is disabled on Python <2.4, because it uses the subprocess
+    # package.
+    ssh_enabled = False
 
 
 # A new ServerProxy that also supports httpu urls.  An http URL comes in the
@@ -39,6 +46,31 @@ from xen.xend.XendLogging import log
 # httpu:///absolute/path/to/socket.sock
 #
 # It assumes that the RPC handler is /RPC2.  This probably needs to be improved
+
+# We're forced to subclass the RequestHandler class so that we can work around
+# some bugs in Keep-Alive handling and also enabled it by default
+class XMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
+    protocol_version = "HTTP/1.1"
+
+    # this is inspired by SimpleXMLRPCRequestHandler's do_POST but differs
+    # in a few non-trivial ways
+    # 1) we never generate internal server errors.  We let the exception
+    #    propagate so that it shows up in the Xend debug logs
+    # 2) we don't bother checking for a _dispatch function since we don't
+    #    use one
+    def do_POST(self):
+        data = self.rfile.read(int(self.headers["content-length"]))
+        rsp = self.server._marshaled_dispatch(data)
+
+        self.send_response(200)
+        self.send_header("Content-Type", "text/xml")
+        self.send_header("Content-Length", str(len(rsp)))
+        self.end_headers()
+
+        self.wfile.write(rsp)
+        self.wfile.flush()
+        if self.close_connection == 1:
+            self.connection.shutdown(1)
 
 class HTTPUnixConnection(HTTPConnection):
     def connect(self):
@@ -75,9 +107,15 @@ class ServerProxy(xmlrpclib.ServerProxy)
             if protocol == 'httpu':
                 uri = 'http:' + rest
                 transport = UnixTransport()
+            elif protocol == 'ssh':
+                global ssh_enabled
+                if ssh_enabled:
+                    (transport, uri) = SSHTransport.getHTTPURI(uri)
+                else:
+                    raise ValueError(
+                        "SSH transport not supported on Python <2.4.")
         xmlrpclib.ServerProxy.__init__(self, uri, transport, encoding,
                                        verbose, allow_none)
-
 
     def __request(self, methodname, params):
         response = xmlrpclib.ServerProxy.__request(self, methodname, params)
@@ -93,6 +131,10 @@ class ServerProxy(xmlrpclib.ServerProxy)
 
 class TCPXMLRPCServer(SocketServer.ThreadingMixIn, SimpleXMLRPCServer):
     allow_reuse_address = True
+
+    def __init__(self, addr, requestHandler=XMLRPCRequestHandler,
+                 logRequests=1):
+        SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests)
 
     def _marshaled_dispatch(self, data, dispatch_method = None):
         params, method = xmlrpclib.loads(data)
@@ -121,6 +163,7 @@ class TCPXMLRPCServer(SocketServer.Threa
         except xmlrpclib.Fault, fault:
             response = xmlrpclib.dumps(fault)
         except Exception, exn:
+            import xen.xend.XendClient
             log.exception(exn)
             response = xmlrpclib.dumps(
                 xmlrpclib.Fault(xen.xend.XendClient.ERROR_INTERNAL, str(exn)))
@@ -131,10 +174,10 @@ class TCPXMLRPCServer(SocketServer.Threa
 # It implements proper support for allow_reuse_address by
 # unlink()'ing an existing socket.
 
-class UnixXMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
+class UnixXMLRPCRequestHandler(XMLRPCRequestHandler):
     def address_string(self):
         try:
-            return SimpleXMLRPCRequestHandler.address_string(self)
+            return XMLRPCRequestHandler.address_string(self)
         except ValueError, e:
             return self.client_address[:2]
 
diff -r 049e669e6a8a -r b29806fb6ba0 tools/python/xen/xend/XendClient.py
--- a/tools/python/xen/xend/XendClient.py       Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/python/xen/xend/XendClient.py       Mon Jun 26 14:53:55 2006 -0500
@@ -18,6 +18,8 @@
 #============================================================================
 
 from xen.util.xmlrpclib2 import ServerProxy
+import os
+import sys
 
 XML_RPC_SOCKET = "/var/run/xend/xmlrpc.sock"
 
@@ -25,4 +27,13 @@ ERROR_GENERIC = 2
 ERROR_GENERIC = 2
 ERROR_INVALID_DOMAIN = 3
 
-server = ServerProxy('httpu:///var/run/xend/xmlrpc.sock')
+uri = 'httpu:///var/run/xend/xmlrpc.sock'
+if os.environ.has_key('XM_SERVER'):
+    uri = os.environ['XM_SERVER']
+
+try:
+    server = ServerProxy(uri)
+except ValueError, exn:
+    print >>sys.stderr, exn
+    sys.exit(1)
+
diff -r 049e669e6a8a -r b29806fb6ba0 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/python/xen/xend/XendDomainInfo.py   Mon Jun 26 14:53:55 2006 -0500
@@ -135,6 +135,7 @@ ROUNDTRIPPING_CONFIG_ENTRIES = [
     ('bootloader', str),
     ('bootloader_args', str),
     ('features', str),
+    ('localtime', int),
     ]
 
 ROUNDTRIPPING_CONFIG_ENTRIES += VM_CONFIG_PARAMS
@@ -1259,6 +1260,10 @@ class XendDomainInfo:
             self.image = image.create(self,
                                       self.info['image'],
                                       self.info['device'])
+
+            localtime = self.info['localtime']
+            if localtime is not None and localtime == 1:
+                xc.domain_set_time_offset(self.domid)
 
             xc.domain_setcpuweight(self.domid, self.info['cpu_weight'])
 
diff -r 049e669e6a8a -r b29806fb6ba0 tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/python/xen/xend/balloon.py  Mon Jun 26 14:53:55 2006 -0500
@@ -31,7 +31,7 @@ PROC_XEN_BALLOON = '/proc/xen/balloon'
 
 BALLOON_OUT_SLACK = 1 # MiB.  We need this because the physinfo details are
                       # rounded.
-RETRY_LIMIT = 10
+RETRY_LIMIT = 20
 RETRY_LIMIT_INCR = 5
 ##
 # The time to sleep between retries grows linearly, using this value (in
diff -r 049e669e6a8a -r b29806fb6ba0 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/python/xen/xm/create.py     Mon Jun 26 14:53:55 2006 -0500
@@ -672,6 +672,8 @@ def make_config(vals):
         config.append(['backend', ['netif']])
     if vals.tpmif:
         config.append(['backend', ['tpmif']])
+    if vals.localtime:
+        config.append(['localtime', vals.localtime])
 
     config_image = configure_image(vals)
     if vals.bootloader:
diff -r 049e669e6a8a -r b29806fb6ba0 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/python/xen/xm/main.py       Mon Jun 26 14:53:55 2006 -0500
@@ -41,6 +41,7 @@ import xen.xend.XendClient
 import xen.xend.XendClient
 from xen.xend.XendClient import server
 from xen.util import security
+from select import select
 
 # getopt.gnu_getopt is better, but only exists in Python 2.3+.  Use
 # getopt.getopt if gnu_getopt is not available.  This will mean that options
@@ -124,6 +125,7 @@ loadpolicy_help = "loadpolicy <policy>  
 loadpolicy_help = "loadpolicy <policy>              Load binary policy into 
hypervisor"
 makepolicy_help = "makepolicy <policy>              Build policy and create 
.bin/.map files"
 labels_help     = "labels [policy] [type=DOM|..]    List <type> labels for 
(active) policy."
+serve_help      = "serve                            Proxy Xend XML-RPC over 
stdio"
 
 short_command_list = [
     "console",
@@ -171,7 +173,8 @@ host_commands = [
 host_commands = [
     "dmesg",
     "info",
-    "log"
+    "log",
+    "serve",
     ]
 
 scheduler_commands = [
@@ -273,7 +276,7 @@ for command in all_commands:
 ####################################################################
 
 def arg_check(args, name, lo, hi = -1):
-    n = len(args)
+    n = len([i for i in args if i != '--'])
     
     if hi == -1:
         if n != lo:
@@ -833,6 +836,32 @@ def xm_log(args):
     arg_check(args, "log", 0)
     
     print server.xend.node.log()
+
+def xm_serve(args):
+    arg_check(args, "serve", 0)
+
+    from fcntl import fcntl, F_SETFL
+    
+    s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+    s.connect(xen.xend.XendClient.XML_RPC_SOCKET)
+    fcntl(sys.stdin, F_SETFL, os.O_NONBLOCK)
+
+    while True:
+        iwtd, owtd, ewtd = select([sys.stdin, s], [], [])
+        if s in iwtd:
+            data = s.recv(4096)
+            if len(data) > 0:
+                sys.stdout.write(data)
+                sys.stdout.flush()
+            else:
+                break
+        if sys.stdin in iwtd:
+            data = sys.stdin.read(4096)
+            if len(data) > 0:
+                s.sendall(data)
+            else:
+                break
+    s.close()
 
 def parse_dev_info(info):
     def get_info(n, t, d):
@@ -1072,6 +1101,7 @@ commands = {
     "dmesg": xm_dmesg,
     "info": xm_info,
     "log": xm_log,
+    "serve": xm_serve,
     # scheduler
     "sched-bvt": xm_sched_bvt,
     "sched-bvt-ctxallow": xm_sched_bvt_ctxallow,
diff -r 049e669e6a8a -r b29806fb6ba0 tools/security/secpol_tool.c
--- a/tools/security/secpol_tool.c      Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/security/secpol_tool.c      Mon Jun 26 14:53:55 2006 -0500
@@ -229,6 +229,7 @@ void acm_dump_policy_buffer(void *buf, i
 
 #define PULL_CACHE_SIZE                8192
 uint8_t pull_buffer[PULL_CACHE_SIZE];
+
 int acm_domain_getpolicy(int xc_handle)
 {
     struct acm_getpolicy getpolicy;
@@ -236,7 +237,7 @@ int acm_domain_getpolicy(int xc_handle)
 
     memset(pull_buffer, 0x00, sizeof(pull_buffer));
     getpolicy.interface_version = ACM_INTERFACE_VERSION;
-    getpolicy.pullcache = (void *) pull_buffer;
+    set_xen_guest_handle(getpolicy.pullcache, pull_buffer);
     getpolicy.pullcache_size = sizeof(pull_buffer);
     ret = xc_acm_op(xc_handle, ACMOP_getpolicy, &getpolicy, sizeof(getpolicy));
 
@@ -281,7 +282,7 @@ int acm_domain_loadpolicy(int xc_handle,
         /* dump it and then push it down into xen/acm */
         acm_dump_policy_buffer(buffer, len);
         setpolicy.interface_version = ACM_INTERFACE_VERSION;
-        setpolicy.pushcache = (void *) buffer;
+        set_xen_guest_handle(setpolicy.pushcache, buffer);
         setpolicy.pushcache_size = len;
         ret = xc_acm_op(xc_handle, ACMOP_setpolicy, &setpolicy, 
sizeof(setpolicy));
 
@@ -330,7 +331,7 @@ int acm_domain_dumpstats(int xc_handle)
 
     memset(stats_buffer, 0x00, sizeof(stats_buffer));
     dumpstats.interface_version = ACM_INTERFACE_VERSION;
-    dumpstats.pullcache = (void *) stats_buffer;
+    set_xen_guest_handle(dumpstats.pullcache, stats_buffer);
     dumpstats.pullcache_size = sizeof(stats_buffer);
     ret = xc_acm_op(xc_handle, ACMOP_dumpstats, &dumpstats, sizeof(dumpstats));
 
diff -r 049e669e6a8a -r b29806fb6ba0 tools/xm-test/grouptest/default
--- a/tools/xm-test/grouptest/default   Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/xm-test/grouptest/default   Mon Jun 26 14:53:55 2006 -0500
@@ -21,7 +21,7 @@ reboot
 reboot
 restore
 save
-sedf
+sched-credit
 shutdown
 sysrq
 unpause
diff -r 049e669e6a8a -r b29806fb6ba0 tools/xm-test/grouptest/medium
--- a/tools/xm-test/grouptest/medium    Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/xm-test/grouptest/medium    Mon Jun 26 14:53:55 2006 -0500
@@ -16,7 +16,7 @@ reboot
 reboot
 restore 02_restore_badparm_neg.test 03_restore_badfilename_neg.test 
04_restore_withdevices_pos.test
 save
-sedf
+sched-credit
 shutdown
 sysrq 01_sysrq_basic_neg.test 02_sysrq_sync_pos.test
 unpause
diff -r 049e669e6a8a -r b29806fb6ba0 tools/xm-test/lib/XmTestLib/Console.py
--- a/tools/xm-test/lib/XmTestLib/Console.py    Mon Jun 26 13:09:11 2006 -0400
+++ b/tools/xm-test/lib/XmTestLib/Console.py    Mon Jun 26 14:53:55 2006 -0500
@@ -82,9 +82,6 @@ class XmConsole:
 
         tty.setraw(self.consoleFd, termios.TCSANOW)
 
-        self.__chewall(self.consoleFd)
-
-
     def __addToHistory(self, line):
         self.historyBuffer.append(line)
         self.historyLines += 1
@@ -120,34 +117,47 @@ class XmConsole:
         output"""
         self.PROMPT = prompt
 
-
-    def __chewall(self, fd):
+    def __getprompt(self, fd):
         timeout = 0
-        bytes   = 0
-        
-        while timeout < 3:
-            i, o, e = select.select([fd], [], [], 1)
-            if fd in i:
-                try:
-                    foo = os.read(fd, 1)
-                    if self.debugMe:
-                        sys.stdout.write(foo)
-                    bytes += 1
-                except Exception, exn:
-                    raise ConsoleError(str(exn))
-
-            else:
-                timeout += 1
-
-            if self.limit and bytes >= self.limit:
+        bytes = 0
+        while timeout < 180:
+            # eat anything while total bytes less than limit else raise RUNAWAY
+            while (not self.limit) or (bytes < self.limit):
+                i, o, e = select.select([fd], [], [], 1)
+                if fd in i:
+                    try:
+                        foo = os.read(fd, 1)
+                        if self.debugMe:
+                            sys.stdout.write(foo)
+                        bytes += 1
+                    except Exception, exn:
+                        raise ConsoleError(str(exn))
+                else:
+                    break
+            else:
                 raise ConsoleError("Console run-away (exceeded %i bytes)"
                                    % self.limit, RUNAWAY)
-
-        if self.debugMe:
-            print "Ignored %i bytes of miscellaneous console output" % bytes
-        
-        return bytes
-
+            # press enter
+            os.write(self.consoleFd, "\n")
+            # look for prompt
+            for prompt_char in "\r\n" + self.PROMPT:
+                i, o, e = select.select([fd], [], [], 1)
+                if fd in i:
+                    try:
+                        foo = os.read(fd, 1)
+                        if self.debugMe:
+                            sys.stdout.write(foo)
+                        if foo != prompt_char:
+                            break
+                    except Exception, exn:
+                        raise ConsoleError(str(exn))
+                else:
+                    timeout += 1
+                    break
+            else:
+                break
+        else:
+            raise ConsoleError("Timed out waiting for console prompt")
 
     def __runCmd(self, command, saveHistory=True):
         output = ""
@@ -155,7 +165,7 @@ class XmConsole:
         lines  = 0
         bytes  = 0
 
-        self.__chewall(self.consoleFd)
+        self.__getprompt(self.consoleFd)
 
         if verbose:
             print "[%s] Sending `%s'" % (self.domain, command)
@@ -176,7 +186,7 @@ class XmConsole:
                         "Failed to read from console (fd=%i): %s" %
                         (self.consoleFd, exn))
             else:
-                raise ConsoleError("Timed out waiting for console")
+                raise ConsoleError("Timed out waiting for console command")
 
             if self.limit and bytes >= self.limit:
                 raise ConsoleError("Console run-away (exceeded %i bytes)"
diff -r 049e669e6a8a -r b29806fb6ba0 
tools/xm-test/tests/memset/03_memset_random_pos.py
--- a/tools/xm-test/tests/memset/03_memset_random_pos.py        Mon Jun 26 
13:09:11 2006 -0400
+++ b/tools/xm-test/tests/memset/03_memset_random_pos.py        Mon Jun 26 
14:53:55 2006 -0500
@@ -22,12 +22,6 @@ except DomainError, e:
     FAIL(str(e))
 
 times = random.randint(10,50)
-
-try:
-    console = XmConsole(domain.getName())
-    console.sendInput("input")
-except ConsoleError, e:
-    FAIL(str(e))
 
 try:
     run = console.runCmd("cat /proc/xen/balloon | grep Current");
diff -r 049e669e6a8a -r b29806fb6ba0 xen/acm/acm_core.c
--- a/xen/acm/acm_core.c        Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/acm/acm_core.c        Mon Jun 26 14:53:55 2006 -0500
@@ -222,9 +222,8 @@ acm_setup(unsigned int *initrdidx,
         pol = (struct acm_policy_buffer *)_policy_start;
         if (ntohl(pol->magic) == ACM_MAGIC)
         {
-            rc = acm_set_policy((void *)_policy_start,
-                                (u32)_policy_len,
-                                0);
+            rc = do_acm_set_policy((void *)_policy_start,
+                                   (u32)_policy_len);
             if (rc == ACM_OK)
             {
                 printkd("Policy len  0x%lx, start at 
%p.\n",_policy_len,_policy_start);
diff -r 049e669e6a8a -r b29806fb6ba0 xen/acm/acm_policy.c
--- a/xen/acm/acm_policy.c      Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/acm/acm_policy.c      Mon Jun 26 14:53:55 2006 -0500
@@ -26,36 +26,43 @@
 #include <xen/lib.h>
 #include <xen/delay.h>
 #include <xen/sched.h>
+#include <xen/guest_access.h>
 #include <acm/acm_core.h>
 #include <public/acm_ops.h>
 #include <acm/acm_hooks.h>
 #include <acm/acm_endian.h>
 
 int
-acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size, int isuserbuffer)
+acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size)
 {
     u8 *policy_buffer = NULL;
-    struct acm_policy_buffer *pol;
+    int ret = -EFAULT;
  
     if (buf_size < sizeof(struct acm_policy_buffer))
         return -EFAULT;
 
-    /* 1. copy buffer from domain */
+    /* copy buffer from guest domain */
     if ((policy_buffer = xmalloc_array(u8, buf_size)) == NULL)
         return -ENOMEM;
 
-    if (isuserbuffer) {
-        if (copy_from_guest(policy_buffer, buf, buf_size))
-        {
-            printk("%s: Error copying!\n",__func__);
-            goto error_free;
-        }
-    } else
-        memcpy(policy_buffer, buf, buf_size);
-
-    /* 2. some sanity checking */
-    pol = (struct acm_policy_buffer *)policy_buffer;
-
+    if (copy_from_guest(policy_buffer, buf, buf_size))
+    {
+        printk("%s: Error copying!\n",__func__);
+        goto error_free;
+    }
+    ret = do_acm_set_policy(policy_buffer, buf_size);
+
+ error_free:
+    xfree(policy_buffer);
+    return ret;
+}
+
+
+int
+do_acm_set_policy(void *buf, u32 buf_size)
+{
+    struct acm_policy_buffer *pol = (struct acm_policy_buffer *)buf;
+    /* some sanity checking */
     if ((ntohl(pol->magic) != ACM_MAGIC) ||
         (buf_size != ntohl(pol->len)) ||
         (ntohl(pol->policy_version) != ACM_POLICY_VERSION))
@@ -85,33 +92,31 @@ acm_set_policy(XEN_GUEST_HANDLE(void) bu
     /* get bin_policy lock and rewrite policy (release old one) */
     write_lock(&acm_bin_pol_rwlock);
 
-    /* 3. set label reference name */
+    /* set label reference name */
     if (acm_set_policy_reference(buf + ntohl(pol->policy_reference_offset),
                                  ntohl(pol->primary_buffer_offset) -
                                  ntohl(pol->policy_reference_offset)))
         goto error_lock_free;
 
-    /* 4. set primary policy data */
+    /* set primary policy data */
     if (acm_primary_ops->set_binary_policy(buf + 
ntohl(pol->primary_buffer_offset),
                                            ntohl(pol->secondary_buffer_offset) 
-
                                            ntohl(pol->primary_buffer_offset)))
         goto error_lock_free;
 
-    /* 5. set secondary policy data */
+    /* set secondary policy data */
     if (acm_secondary_ops->set_binary_policy(buf + 
ntohl(pol->secondary_buffer_offset),
                                              ntohl(pol->len) - 
                                              
ntohl(pol->secondary_buffer_offset)))
         goto error_lock_free;
 
     write_unlock(&acm_bin_pol_rwlock);
-    xfree(policy_buffer);
     return ACM_OK;
 
  error_lock_free:
     write_unlock(&acm_bin_pol_rwlock);
  error_free:
     printk("%s: Error setting policy.\n", __func__);
-    xfree(policy_buffer);
     return -EFAULT;
 }
 
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/ia64/linux-xen/smp.c
--- a/xen/arch/ia64/linux-xen/smp.c     Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/ia64/linux-xen/smp.c     Mon Jun 26 14:53:55 2006 -0500
@@ -421,6 +421,42 @@ smp_call_function (void (*func) (void *i
 }
 EXPORT_SYMBOL(smp_call_function);
 
+#ifdef XEN
+int
+on_selected_cpus(cpumask_t selected, void (*func) (void *info), void *info,
+                 int retry, int wait)
+{
+       struct call_data_struct data;
+       unsigned int cpu, nr_cpus = cpus_weight(selected);
+
+       ASSERT(local_irq_is_enabled());
+
+       if (!nr_cpus)
+               return 0;
+
+       data.func = func;
+       data.info = info;
+       data.wait = wait;
+       atomic_set(&data.started, 0);
+       atomic_set(&data.finished, 0);
+
+       spin_lock(&call_lock);
+
+       call_data = &data;
+       wmb();
+
+       for_each_cpu_mask(cpu, selected)
+               send_IPI_single(cpu, IPI_CALL_FUNC);
+
+       while (atomic_read(wait ? &data.finished : &data.started) != nr_cpus)
+               cpu_relax();
+
+       spin_unlock(&call_lock);
+
+       return 0;
+}
+#endif
+
 /*
  * this function calls the 'stop' function on all other CPUs in the system.
  */
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/ia64/xen/domain.c        Mon Jun 26 14:53:55 2006 -0500
@@ -855,9 +855,7 @@ int construct_dom0(struct domain *d,
        sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION);
        si->nr_pages     = max_pages;
 
-       /* Give up the VGA console if DOM0 is configured to grab it. */
-       if (cmdline != NULL)
-           console_endboot(strstr(cmdline, "tty0") != NULL);
+       console_endboot();
 
        printk("Dom0: 0x%lx\n", (u64)dom0);
 
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/ia64/xen/xensetup.c      Mon Jun 26 14:53:55 2006 -0500
@@ -511,9 +511,8 @@ printk("About to call init_trace_bufs()\
 printk("About to call init_trace_bufs()\n");
     init_trace_bufs();
 
-    /* Give up the VGA console if DOM0 is configured to grab it. */
 #ifdef CONFIG_XEN_CONSOLE_INPUT        /* CONFIG_SERIAL_8250_CONSOLE=n in 
dom0! */
-    console_endboot(cmdline && strstr(cmdline, "tty0"));
+    console_endboot();
 #endif
 
     domain0_ready = 1;
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/Makefile     Mon Jun 26 14:53:55 2006 -0500
@@ -41,7 +41,7 @@ obj-y += x86_emulate.o
 obj-y += x86_emulate.o
 
 ifneq ($(pae),n)
-obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o
+obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o
 else
 obj-$(x86_32) += shadow32.o
 endif
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/audit.c
--- a/xen/arch/x86/audit.c      Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/audit.c      Mon Jun 26 14:53:55 2006 -0500
@@ -923,8 +923,8 @@ void _audit_domain(struct domain *d, int
                                d->domain_id, page_to_mfn(page),
                                page->u.inuse.type_info,
                                page->count_info);
-                        printk("a->gpfn_and_flags=%p\n",
-                               (void *)a->gpfn_and_flags);
+                        printk("a->gpfn_and_flags=%"PRIx64"\n",
+                               (u64)a->gpfn_and_flags);
                         errors++;
                     }
                     break;
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Mon Jun 26 14:53:55 2006 -0500
@@ -74,12 +74,15 @@ static void __vmx_clear_vmcs(void *info)
 
 static void vmx_clear_vmcs(struct vcpu *v)
 {
-    unsigned int cpu = v->arch.hvm_vmx.active_cpu;
-
-    if ( (cpu == -1) || (cpu == smp_processor_id()) )
-        __vmx_clear_vmcs(v);
-    else
-        on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1);
+    int cpu = v->arch.hvm_vmx.active_cpu;
+
+    if ( cpu == -1 )
+        return;
+
+    if ( cpu == smp_processor_id() )
+        return __vmx_clear_vmcs(v);
+
+    on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1);
 }
 
 static void vmx_load_vmcs(struct vcpu *v)
@@ -97,6 +100,8 @@ void vmx_vmcs_enter(struct vcpu *v)
      *     context initialisation.
      *  2. VMPTRLD as soon as we context-switch to a HVM VCPU.
      *  3. VMCS destruction needs to happen later (from domain_destroy()).
+     * We can relax this a bit if a paused VCPU always commits its
+     * architectural state to a software structure.
      */
     if ( v == current )
         return;
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Jun 26 14:53:55 2006 -0500
@@ -1623,7 +1623,7 @@ static int mov_to_cr(int gp, int cr, str
             if ( vmx_pgbit_test(v) )
             {
                 /* The guest is a 32-bit PAE guest. */
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
                 unsigned long mfn, old_base_mfn;
 
                 if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
@@ -1667,7 +1667,7 @@ static int mov_to_cr(int gp, int cr, str
             else
             {
                 /*  The guest is a 64 bit or 32-bit PAE guest. */
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
                 if ( (v->domain->arch.ops != NULL) &&
                         v->domain->arch.ops->guest_paging_levels == PAGING_L2)
                 {
@@ -1680,15 +1680,6 @@ static int mov_to_cr(int gp, int cr, str
                     {
                         printk("Unsupported guest paging levels\n");
                         /* need to take a clean path */
-                        domain_crash_synchronous();
-                    }
-                }
-                else
-                {
-                    if ( !shadow_set_guest_paging_levels(v->domain,
-                                                            PAGING_L4) )
-                    {
-                        printk("Unsupported guest paging levels\n");
                         domain_crash_synchronous();
                     }
                 }
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/mm.c Mon Jun 26 14:53:55 2006 -0500
@@ -108,11 +108,20 @@
 #include <public/memory.h>
 
 #ifdef VERBOSE
-#define MEM_LOG(_f, _a...)                           \
-  printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
+#define MEM_LOG(_f, _a...)                                  \
+  printk("DOM%u: (file=mm.c, line=%d) " _f "\n",            \
          current->domain->domain_id , __LINE__ , ## _a )
 #else
 #define MEM_LOG(_f, _a...) ((void)0)
+#endif
+
+/*
+ * PTE updates can be done with ordinary writes except:
+ *  1. Debug builds get extra checking by using CMPXCHG[8B].
+ *  2. PAE builds perform an atomic 8-byte store with CMPXCHG8B.
+ */
+#if !defined(NDEBUG) || defined(CONFIG_X86_PAE)
+#define PTE_UPDATE_WITH_CMPXCHG
 #endif
 
 /*
@@ -261,17 +270,19 @@ void share_xen_page_with_privileged_gues
 
 #ifdef NDEBUG
 /* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
-#define l3tab_needs_shadow(mfn) (mfn >= 0x100000)
+#define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000)
 #else
 /*
- * In debug builds we aggressively shadow PDPTs to exercise code paths.
+ * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths.
  * We cannot safely shadow the idle page table, nor shadow-mode page tables
- * (detected by lack of an owning domain). Always shadow PDPTs above 4GB.
+ * (detected by lack of an owning domain). As required for correctness, we
+ * always shadow PDPTs aboive 4GB.
  */
 #define l3tab_needs_shadow(mfn)                         \
-    ((((mfn << PAGE_SHIFT) != __pa(idle_pg_table)) &&   \
-      (page_get_owner(mfn_to_page(mfn)) != NULL)) ||    \
-     (mfn >= 0x100000))
+    (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
+      (page_get_owner(mfn_to_page(mfn)) != NULL) &&     \
+      ((mfn) & 1)) || /* odd MFNs are shadowed */       \
+     ((mfn) >= 0x100000))
 #endif
 
 static l1_pgentry_t *fix_pae_highmem_pl1e;
@@ -296,6 +307,8 @@ static void __write_ptbase(unsigned long
     if ( !l3tab_needs_shadow(mfn) )
     {
         write_cr3(mfn << PAGE_SHIFT);
+        /* Cache is no longer in use or valid (/after/ write to %cr3). */
+        cache->high_mfn = 0;
         return;
     }
 
@@ -1173,16 +1186,27 @@ static inline int update_l1e(l1_pgentry_
     intpte_t o = l1e_get_intpte(ol1e);
     intpte_t n = l1e_get_intpte(nl1e);
 
-    if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
-         unlikely(o != l1e_get_intpte(ol1e)) )
-    {
-        MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
-                ": saw %" PRIpte,
-                l1e_get_intpte(ol1e),
-                l1e_get_intpte(nl1e),
-                o);
-        return 0;
-    }
+    for ( ; ; )
+    {
+        if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) )
+        {
+            MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
+                    ": saw %" PRIpte,
+                    l1e_get_intpte(ol1e),
+                    l1e_get_intpte(nl1e),
+                    o);
+            return 0;
+        }
+
+        if ( o == l1e_get_intpte(ol1e) )
+            break;
+
+        /* Allowed to change in Accessed/Dirty flags only. */
+        BUG_ON((o ^ l1e_get_intpte(ol1e)) &
+               ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));
+        ol1e = l1e_from_intpte(o);
+    }
+
     return 1;
 #endif
 }
@@ -1235,17 +1259,20 @@ static int mod_l1_entry(l1_pgentry_t *pl
 #ifndef PTE_UPDATE_WITH_CMPXCHG
 #define UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; })
 #else
-#define UPDATE_ENTRY(_t,_p,_o,_n) ({                                    \
-    intpte_t __o = cmpxchg((intpte_t *)(_p),                            \
-                           _t ## e_get_intpte(_o),                      \
-                           _t ## e_get_intpte(_n));                     \
-    if ( __o != _t ## e_get_intpte(_o) )                                \
-        MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte              \
-                ": saw %" PRIpte "",                                    \
-                (_t ## e_get_intpte(_o)),                               \
-                (_t ## e_get_intpte(_n)),                               \
-                (__o));                                                 \
-    (__o == _t ## e_get_intpte(_o)); })
+#define UPDATE_ENTRY(_t,_p,_o,_n) ({                            \
+    for ( ; ; )                                                 \
+    {                                                           \
+        intpte_t __o = cmpxchg((intpte_t *)(_p),                \
+                               _t ## e_get_intpte(_o),          \
+                               _t ## e_get_intpte(_n));         \
+        if ( __o == _t ## e_get_intpte(_o) )                    \
+            break;                                              \
+        /* Allowed to change in Accessed/Dirty flags only. */   \
+        BUG_ON((__o ^ _t ## e_get_intpte(_o)) &                 \
+               ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));             \
+        _o = _t ## e_from_intpte(__o);                          \
+    }                                                           \
+    1; })
 #endif
 
 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
@@ -2494,7 +2521,7 @@ static int destroy_grant_pte_mapping(
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(__put_user(0, (intpte_t *)va)))
+    if ( unlikely(!update_l1e((l1_pgentry_t *)va, ol1e, l1e_empty())) )
     {
         MEM_LOG("Cannot delete PTE entry at %p", va);
         put_page_type(page);
@@ -2574,7 +2601,7 @@ static int destroy_grant_va_mapping(
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(__put_user(0, &pl1e->l1)) )
+    if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty())) )
     {
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         return GNTST_general_error;
@@ -3028,6 +3055,20 @@ long arch_memory_op(int op, XEN_GUEST_HA
         return 0;
     }
 
+    case XENMEM_machphys_mapping:
+    {
+        struct xen_machphys_mapping mapping = {
+            .v_start = MACH2PHYS_VIRT_START,
+            .v_end   = MACH2PHYS_VIRT_END,
+            .max_mfn = MACH2PHYS_NR_ENTRIES - 1
+        };
+
+        if ( copy_to_guest(arg, &mapping, 1) )
+            return -EFAULT;
+
+        return 0;
+    }
+
     default:
         return subarch_memory_op(op, arg);
     }
@@ -3351,7 +3392,7 @@ static int ptwr_emulated_update(
         addr &= ~(sizeof(paddr_t)-1);
         if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
         {
-            propagate_page_fault(addr, 4); /* user mode, read fault */
+            propagate_page_fault(addr, 0); /* read fault */
             return X86EMUL_PROPAGATE_FAULT;
         }
         /* Mask out bits provided by caller. */
@@ -3366,6 +3407,7 @@ static int ptwr_emulated_update(
         old  |= full;
     }
 
+#if 0 /* XXX KAF: I don't think this can happen. */
     /*
      * We must not emulate an update to a PTE that is temporarily marked
      * writable by the batched ptwr logic, else we can corrupt page refcnts! 
@@ -3376,6 +3418,12 @@ static int ptwr_emulated_update(
     if ( ((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) &&
          (l1_linear_offset(l1va) == l1_linear_offset(addr)) )
         ptwr_flush(d, PTWR_PT_INACTIVE);
+#else
+    BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_ACTIVE].l1va) != 0) &&
+           (l1_linear_offset(l1va) == l1_linear_offset(addr)));
+    BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) &&
+           (l1_linear_offset(l1va) == l1_linear_offset(addr)));
+#endif
 
     /* Read the PTE that maps the page being updated. */
     if ( __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
@@ -3417,8 +3465,9 @@ static int ptwr_emulated_update(
     }
     else
     {
-        ol1e  = *pl1e;
-        *pl1e = nl1e;
+        ol1e = *pl1e;
+        if ( !update_l1e(pl1e, ol1e, nl1e) )
+            BUG();
     }
     unmap_domain_page(pl1e);
 
@@ -3483,16 +3532,18 @@ int ptwr_do_page_fault(struct domain *d,
     unsigned long    l2_idx;
     struct x86_emulate_ctxt emul_ctxt;
 
-    if ( unlikely(shadow_mode_enabled(d)) )
-        return 0;
+    ASSERT(!shadow_mode_enabled(d));
 
     /*
      * Attempt to read the PTE that maps the VA being accessed. By checking for
      * PDE validity in the L2 we avoid many expensive fixups in __get_user().
+     * NB. The L2 entry cannot be detached due to existing ptwr work: the
+     * caller already checked that.
      */
-    if ( !(l2e_get_flags(__linear_l2_table[l2_linear_offset(addr)]) &
-           _PAGE_PRESENT) ||
-         __copy_from_user(&pte,&linear_pg_table[l1_linear_offset(addr)],
+    pl2e = &__linear_l2_table[l2_linear_offset(addr)];
+    if ( __copy_from_user(&l2e, pl2e, sizeof(l2e)) ||
+        !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
+         __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
                           sizeof(pte)) )
     {
         return 0;
@@ -3565,21 +3616,31 @@ int ptwr_do_page_fault(struct domain *d,
     }
 
     /*
-     * If this is a multi-processor guest then ensure that the page is hooked
-     * into at most one L2 table, which must be the one running on this VCPU.
+     * Multi-processor guest? Then ensure that the page table is hooked into
+     * at most one L2, and also ensure that there is only one mapping of the
+     * page table itself (or there can be conflicting writable mappings from
+     * other VCPUs).
      */
-    if ( (d->vcpu[0]->next_in_list != NULL) &&
-         ((page->u.inuse.type_info & PGT_count_mask) != 
-          (!!(page->u.inuse.type_info & PGT_pinned) +
-           (which == PTWR_PT_ACTIVE))) )
-    {
-        /* Could be conflicting writable mappings from other VCPUs. */
-        cleanup_writable_pagetable(d);
-        goto emulate;
+    if ( d->vcpu[0]->next_in_list != NULL )
+    {
+        if ( /* Hooked into at most one L2 table (which this VCPU maps)? */
+             ((page->u.inuse.type_info & PGT_count_mask) != 
+              (!!(page->u.inuse.type_info & PGT_pinned) +
+               (which == PTWR_PT_ACTIVE))) ||
+             /* PTEs are mapped read-only in only one place? */
+             ((page->count_info & PGC_count_mask) !=
+              (!!(page->count_info & PGC_allocated) +       /* alloc count */
+               (page->u.inuse.type_info & PGT_count_mask) + /* type count  */
+               1)) )                                        /* map count   */
+        {
+            /* Could be conflicting writable mappings from other VCPUs. */
+            cleanup_writable_pagetable(d);
+            goto emulate;
+        }
     }
 
     /*
-     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at at 
+     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at a
      * time. If there is already one, we must flush it out.
      */
     if ( d->arch.ptwr[which].l1va )
@@ -3600,18 +3661,16 @@ int ptwr_do_page_fault(struct domain *d,
                 "pfn %lx\n", PTWR_PRINT_WHICH, addr,
                 l2_idx << L2_PAGETABLE_SHIFT, pfn);
 
-    d->arch.ptwr[which].l1va   = addr | 1;
-    d->arch.ptwr[which].l2_idx = l2_idx;
-    d->arch.ptwr[which].vcpu   = current;
-
-#ifdef PERF_ARRAYS
-    d->arch.ptwr[which].eip    = regs->eip;
-#endif
-
     /* For safety, disconnect the L1 p.t. page from current space. */
     if ( which == PTWR_PT_ACTIVE )
     {
-        l2e_remove_flags(*pl2e, _PAGE_PRESENT);
+        l2e_remove_flags(l2e, _PAGE_PRESENT);
+        if ( unlikely(__copy_to_user(pl2e, &l2e, sizeof(l2e))) )
+        {
+            MEM_LOG("ptwr: Could not unhook l2e at %p", pl2e);
+            domain_crash(d);
+            return 0;
+        }
         flush_tlb_mask(d->domain_dirty_cpumask);
     }
     
@@ -3625,14 +3684,24 @@ int ptwr_do_page_fault(struct domain *d,
     if ( unlikely(__put_user(pte.l1,
                              &linear_pg_table[l1_linear_offset(addr)].l1)) )
     {
-        MEM_LOG("ptwr: Could not update pte at %p", (unsigned long *)
+        MEM_LOG("ptwr: Could not update pte at %p",
                 &linear_pg_table[l1_linear_offset(addr)]);
-        /* Toss the writable pagetable state and crash. */
-        d->arch.ptwr[which].l1va = 0;
         domain_crash(d);
         return 0;
     }
     
+    /*
+     * Now record the writable pagetable state *after* any accesses that can
+     * cause a recursive page fault (i.e., those via the *_user() accessors).
+     * Otherwise we can enter ptwr_flush() with half-done ptwr state.
+     */
+    d->arch.ptwr[which].l1va   = addr | 1;
+    d->arch.ptwr[which].l2_idx = l2_idx;
+    d->arch.ptwr[which].vcpu   = current;
+#ifdef PERF_ARRAYS
+    d->arch.ptwr[which].eip    = regs->eip;
+#endif
+
     return EXCRET_fault_fixed;
 
  emulate:
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/setup.c      Mon Jun 26 14:53:55 2006 -0500
@@ -396,11 +396,13 @@ void __init __start_xen(multiboot_info_t
     BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
     BUILD_BUG_ON(sizeof(vcpu_info_t) != 64);
 
-    /* __foo are defined in public headers. Check they match internal defs. */
+    /* Check definitions in public headers match internal defs. */
     BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
 #ifdef HYPERVISOR_VIRT_END
     BUILD_BUG_ON(__HYPERVISOR_VIRT_END   != HYPERVISOR_VIRT_END);
 #endif
+    BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START);
+    BUILD_BUG_ON(MACH2PHYS_VIRT_END   != RO_MPT_VIRT_END);
 
     init_frametable();
 
@@ -596,8 +598,7 @@ void __init __start_xen(multiboot_info_t
 
     init_trace_bufs();
 
-    /* Give up the VGA console if DOM0 is configured to grab it. */
-    console_endboot(cmdline && strstr(cmdline, "tty0"));
+    console_endboot();
 
     /* Hide UART from DOM0 if we're using it */
     serial_endboot();
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/shadow.c     Mon Jun 26 14:53:55 2006 -0500
@@ -222,6 +222,7 @@ alloc_shadow_page(struct domain *d,
     unsigned long smfn, real_gpfn;
     int pin = 0;
     void *l1, *lp;
+    u64 index = 0;
 
     // Currently, we only keep pre-zero'ed pages around for use as L1's...
     // This will change.  Soon.
@@ -354,9 +355,19 @@ alloc_shadow_page(struct domain *d,
         if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
             pin = 1;
 #endif
+
+#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE )
+        /*
+         * We use PGT_l4_shadow for 2-level paging guests on PAE
+         */
+        if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+            pin = 1;
+#endif
+        if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+            index = get_cr3_idxval(current);
         break;
 
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
     case PGT_fl1_shadow:
         perfc_incr(shadow_l1_pages);
         d->arch.shadow_page_count++;
@@ -393,7 +404,7 @@ alloc_shadow_page(struct domain *d,
     //
     ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
 
-    set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
+    set_shadow_status(d, gpfn, gmfn, smfn, psh_type, index);
 
     if ( pin )
         shadow_pin(smfn);
@@ -1324,7 +1335,7 @@ increase_writable_pte_prediction(struct 
     prediction = (prediction & PGT_mfn_mask) | score;
 
     //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, 
create);
-    set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred);
+    set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred, 0);
 
     if ( create )
         perfc_incr(writable_pte_predictions);
@@ -1345,10 +1356,10 @@ decrease_writable_pte_prediction(struct 
     //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, 
score);
 
     if ( score )
-        set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred);
+        set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred, 0);
     else
     {
-        delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
+        delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred, 
0);
         perfc_decr(writable_pte_predictions);
     }
 }
@@ -1385,7 +1396,7 @@ static u32 remove_all_write_access_in_pt
     int is_l1_shadow =
         ((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) ==
          PGT_l1_shadow);
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
     is_l1_shadow |=
       ((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) ==
                 PGT_fl1_shadow);
@@ -1494,7 +1505,7 @@ static int remove_all_write_access(
         while ( a && a->gpfn_and_flags )
         {
             if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
               || (a->gpfn_and_flags & PGT_type_mask) == PGT_fl1_shadow
 #endif
               )
@@ -1538,8 +1549,8 @@ static void resync_pae_guest_l3(struct d
             continue;
 
         idx = get_cr3_idxval(v);
-        smfn = __shadow_status(
-            d, ((unsigned long)(idx << PGT_pae_idx_shift) | entry->gpfn), 
PGT_l4_shadow);
+
+        smfn = __shadow_status(d, entry->gpfn, PGT_l4_shadow);
 
         if ( !smfn ) 
             continue;
@@ -1706,7 +1717,7 @@ static int resync_all(struct domain *d, 
                 {
                     int error;
 
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
                     unsigned long gpfn;
 
                     gpfn = guest_l1e_get_paddr(guest1[i]) >> PAGE_SHIFT;
@@ -2420,17 +2431,6 @@ static void shadow_update_pagetables(str
         v->arch.guest_vtable = map_domain_page_global(gmfn);
     }
 
-#if CONFIG_PAGING_LEVELS >= 3
-    /*
-     * Handle 32-bit PAE enabled guest
-     */
-    if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) 
-    {
-        u32 index = get_cr3_idxval(v);
-        gpfn = ((unsigned long)index << PGT_pae_idx_shift) | gpfn;
-    }
-#endif
-
     /*
      *  arch.shadow_table
      */
@@ -2443,6 +2443,23 @@ static void shadow_update_pagetables(str
         if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_l4_shadow))) )
             smfn = shadow_l3_table(v, gpfn, gmfn);
     } 
+    else
+#endif
+
+#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE )
+    /*
+     * We use PGT_l4_shadow for 2-level paging guests on PAE
+     */
+    if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+    {
+        if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_l4_shadow))) )
+            smfn = shadow_l3_table(v, gpfn, gmfn);
+        else
+        {
+            update_top_level_shadow(v, smfn);
+            need_sync = 1;
+        }
+    }
     else
 #endif
     if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) 
@@ -3093,6 +3110,36 @@ static inline unsigned long init_bl2(
 
     return smfn;
 }
+
+static inline unsigned long init_l3(
+    struct vcpu *v, unsigned long gpfn, unsigned long gmfn)
+{
+    unsigned long smfn;
+    l4_pgentry_t *spl4e;
+    unsigned long index;
+
+    if ( unlikely(!(smfn = alloc_shadow_page(v->domain, gpfn, gmfn, 
PGT_l4_shadow))) )
+    {
+        printk("Couldn't alloc an L4 shadow for pfn= %lx mfn= %lx\n", gpfn, 
gmfn);
+        BUG(); /* XXX Deal gracefully wiht failure. */
+    }
+
+    /* Map the self entry, L4&L3 share the same page */
+    spl4e = (l4_pgentry_t *)map_domain_page(smfn);
+
+    /*
+     * Shadow L4's pfn_info->tlbflush_timestamp
+     * should also save it's own index.
+     */
+
+    index = get_cr3_idxval(v);
+    frame_table[smfn].tlbflush_timestamp = index;
+
+    memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
+    spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
+    unmap_domain_page(spl4e);
+    return smfn;
+}
 #endif
 
 #if CONFIG_PAGING_LEVELS == 3
@@ -3111,6 +3158,12 @@ static unsigned long shadow_l3_table(
          d->arch.ops->guest_paging_levels == PAGING_L2 )
     {
         return init_bl2(d, gpfn, gmfn);
+    }
+
+    if ( SH_GUEST_32PAE &&
+         d->arch.ops->guest_paging_levels == PAGING_L3 )
+    {
+        return init_l3(v, gpfn, gmfn);
     }
 
     if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) )
@@ -3223,6 +3276,11 @@ static unsigned long shadow_l4_table(
         return init_bl2(d, gpfn, gmfn);
     }
 
+    if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
+    {
+        return init_l3(v, gpfn, gmfn);
+    }
+
     if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
     {
         printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, 
gmfn);
@@ -3230,24 +3288,6 @@ static unsigned long shadow_l4_table(
     }
 
     spl4e = (l4_pgentry_t *)map_domain_page(smfn);
-
-    /* For 32-bit PAE guest on 64-bit host */
-    if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) 
-    {
-        unsigned long index;
-        /*
-         * Shadow L4's pfn_info->tlbflush_timestamp
-         * should also save it's own index.
-         */
-        index = get_cr3_idxval(v);
-        frame_table[smfn].tlbflush_timestamp = index;
-
-        memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
-        /* Map the self entry */
-        spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
-        unmap_domain_page(spl4e);
-        return smfn;
-    }
 
     /* Install hypervisor and 4x linear p.t. mapings. */
     if ( (PGT_base_page_table == PGT_l4_page_table) &&
@@ -3378,7 +3418,7 @@ validate_bl2e_change(
  * This shadow_mark_va_out_of_sync() is for 2M page shadow
  */
 static void shadow_mark_va_out_of_sync_2mp(
-  struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long 
writable_pl1e)
+  struct vcpu *v, unsigned long gpfn, unsigned long mfn, paddr_t writable_pl1e)
 {
     struct out_of_sync_entry *entry =
       shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
@@ -3647,6 +3687,7 @@ static inline int l2e_rw_fault(
     }
 
     unmap_domain_page(l1_p);
+    *gl2e_p = gl2e;
     return 1;
 
 }
@@ -3720,7 +3761,7 @@ static inline int guest_page_fault(
 
     ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 );
 
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
     if ( (error_code & (ERROR_I | ERROR_P)) == (ERROR_I | ERROR_P) )
         return 1;
 #endif
@@ -4056,7 +4097,7 @@ struct shadow_ops MODE_32_2_HANDLER = {
 };
 #endif
 
-#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) ) ||  \
+#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) && !defined 
(GUEST_32PAE) ) ||  \
     ( CONFIG_PAGING_LEVELS == 4 && defined (GUEST_PGENTRY_32) ) 
 
 
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/shadow32.c   Mon Jun 26 14:53:55 2006 -0500
@@ -306,7 +306,7 @@ alloc_shadow_page(struct domain *d,
     //
     ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
 
-    set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
+    set_shadow_status(d, gpfn, gmfn, smfn, psh_type, 0);
 
     if ( pin )
         shadow_pin(smfn);
@@ -395,7 +395,7 @@ void free_shadow_page(unsigned long smfn
 
     ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
 
-    delete_shadow_status(d, gpfn, gmfn, type);
+    delete_shadow_status(d, gpfn, gmfn, type, 0);
 
     switch ( type )
     {
@@ -2319,7 +2319,7 @@ increase_writable_pte_prediction(struct 
     prediction = (prediction & PGT_mfn_mask) | score;
 
     //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, 
create);
-    set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred);
+    set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred, 0);
 
     if ( create )
         perfc_incr(writable_pte_predictions);
@@ -2340,10 +2340,10 @@ decrease_writable_pte_prediction(struct 
     //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, 
score);
 
     if ( score )
-        set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred);
+        set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, 
PGT_writable_pred, 0);
     else
     {
-        delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
+        delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred, 
0);
         perfc_decr(writable_pte_predictions);
     }
 }
@@ -2381,7 +2381,7 @@ free_writable_pte_predictions(struct dom
              * keep an accurate count of writable_pte_predictions to keep it
              * happy.
              */
-            delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
+            delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred, 0);
             perfc_decr(writable_pte_predictions);
         }
 
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/shadow_guest32pae.c
--- a/xen/arch/x86/shadow_guest32pae.c  Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/shadow_guest32pae.c  Mon Jun 26 14:53:55 2006 -0500
@@ -1,5 +1,4 @@
 #define GUEST_32PAE
-#if defined (__x86_64__)
 
 #include "shadow.c"
 struct shadow_ops MODE_64_PAE_HANDLER = {
@@ -15,4 +14,3 @@ struct shadow_ops MODE_64_PAE_HANDLER = 
     .gva_to_gpa                 = gva_to_gpa_64,
 };
 
-#endif
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/shadow_public.c      Mon Jun 26 14:53:55 2006 -0500
@@ -123,8 +123,19 @@ int shadow_set_guest_paging_levels(struc
 #endif
 #if CONFIG_PAGING_LEVELS == 3
     case 3:
-        if ( d->arch.ops != &MODE_64_3_HANDLER )
-            d->arch.ops = &MODE_64_3_HANDLER;
+        if ( d->arch.ops == NULL ||
+                    shadow_mode_log_dirty(d) )
+        {
+            if ( d->arch.ops != &MODE_64_3_HANDLER )
+                d->arch.ops = &MODE_64_3_HANDLER;
+        }
+        else
+        {
+            if ( d->arch.ops == &MODE_64_2_HANDLER )
+                free_shadow_pages(d);
+            if ( d->arch.ops != &MODE_64_PAE_HANDLER )
+                d->arch.ops = &MODE_64_PAE_HANDLER;
+        }
         shadow_unlock(d);
         return 1;
 #endif
@@ -268,10 +279,8 @@ free_shadow_tables(struct domain *d, uns
                     put_shadow_ref(entry_get_pfn(ple[i]));
                 if (d->arch.ops->guest_paging_levels == PAGING_L3)
                 {
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
                     if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L4 )
-#elif CONFIG_PAGING_LEVELS == 3
-                    if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L3 )
 #endif
                         break;
                 }
@@ -710,6 +719,7 @@ void free_shadow_page(unsigned long smfn
     struct domain *d = page_get_owner(mfn_to_page(gmfn));
     unsigned long gpfn = mfn_to_gmfn(d, gmfn);
     unsigned long type = page->u.inuse.type_info & PGT_type_mask;
+    u64 index = 0;
 
     SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
 
@@ -722,12 +732,16 @@ void free_shadow_page(unsigned long smfn
         if ( !mfn )
             gpfn |= (1UL << 63);
     }
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
     if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
-        if ( type == PGT_l4_shadow ) 
-            gpfn = ((unsigned long)page->tlbflush_timestamp << 
PGT_pae_idx_shift) | gpfn;
-#endif
-
-    delete_shadow_status(d, gpfn, gmfn, type);
+    {
+        if ( type == PGT_l4_shadow )
+            index = page->tlbflush_timestamp;
+    }
+#endif
+
+    delete_shadow_status(d, gpfn, gmfn, type, index);
 
     switch ( type )
     {
@@ -835,7 +849,7 @@ free_writable_pte_predictions(struct dom
         while ( count )
         {
             count--;
-            delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
+            delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred, 0);
         }
 
         xfree(gpfn_list);
@@ -1050,8 +1064,8 @@ void __shadow_mode_disable(struct domain
     {
         if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
         {
-            printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
-                   __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
+            printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%"PRIx64"\n",
+                   __FILE__, i, (u64)d->arch.shadow_ht[i].gpfn_and_flags);
             BUG();
         }
     }
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/time.c       Mon Jun 26 14:53:55 2006 -0500
@@ -699,7 +699,7 @@ void update_domain_wallclock_time(struct
 {
     spin_lock(&wc_lock);
     version_update_begin(&d->shared_info->wc_version);
-    d->shared_info->wc_sec  = wc_sec;
+    d->shared_info->wc_sec  = wc_sec + d->time_offset_seconds;
     d->shared_info->wc_nsec = wc_nsec;
     version_update_end(&d->shared_info->wc_version);
     spin_unlock(&wc_lock);
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/traps.c      Mon Jun 26 14:53:55 2006 -0500
@@ -276,6 +276,42 @@ void show_stack(struct cpu_user_regs *re
     show_trace(regs);
 }
 
+void show_stack_overflow(unsigned long esp)
+{
+#ifdef MEMORY_GUARD
+    unsigned long esp_top = get_stack_bottom() & PAGE_MASK;
+    unsigned long *stack, addr;
+
+    /* Trigger overflow trace if %esp is within 100 bytes of the guard page. */
+    if ( ((esp - esp_top) > 100) && ((esp_top - esp) > 100) )
+        return;
+
+    if ( esp < esp_top )
+        esp = esp_top;
+
+    printk("Xen stack overflow:\n   ");
+
+    stack = (unsigned long *)esp;
+    while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
+    {
+        addr = *stack++;
+        if ( is_kernel_text(addr) )
+        {
+            printk("%p: [<%p>]", stack, _p(addr));
+            print_symbol(" %s\n   ", addr);
+        }
+    }
+
+    printk("\n");
+#endif
+}
+
+void show_execution_state(struct cpu_user_regs *regs)
+{
+    show_registers(regs);
+    show_stack(regs);
+}
+
 /*
  * This is called for faults at very unexpected times (e.g., when interrupts
  * are disabled). In such situations we can't do much that is safe. We try to
@@ -297,7 +333,7 @@ asmlinkage void fatal_trap(int trapnr, s
     watchdog_disable();
     console_start_sync();
 
-    show_registers(regs);
+    show_execution_state(regs);
 
     if ( trapnr == TRAP_page_fault )
     {
@@ -360,7 +396,7 @@ static inline int do_trap(int trapnr, ch
 
     DEBUGGER_trap_fatal(trapnr, regs);
 
-    show_registers(regs);
+    show_execution_state(regs);
     panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
           "[error_code=%04x]\n",
           smp_processor_id(), trapnr, str, regs->error_code);
@@ -451,8 +487,23 @@ asmlinkage int do_invalid_op(struct cpu_
 
     if ( unlikely(!guest_mode(regs)) )
     {
+        char sig[5];
+        /* Signature (ud2; .ascii "dbg") indicates dump state and continue. */
+        if ( (__copy_from_user(sig, (char *)regs->eip, sizeof(sig)) == 0) &&
+             (memcmp(sig, "\xf\xb""dbg", sizeof(sig)) == 0) )
+        {
+            show_execution_state(regs);
+            regs->eip += sizeof(sig);
+            return EXCRET_fault_fixed;
+        }
+        printk("%02x %02x %02x %02x %02x\n",
+               (unsigned char)sig[0],
+               (unsigned char)sig[1],
+               (unsigned char)sig[2],
+               (unsigned char)sig[3],
+               (unsigned char)sig[4]);
         DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
-        show_registers(regs);
+        show_execution_state(regs);
         panic("CPU%d FATAL TRAP: vector = %d (invalid opcode)\n",
               smp_processor_id(), TRAP_invalid_op);
     }
@@ -481,7 +532,7 @@ asmlinkage int do_int3(struct cpu_user_r
     if ( !guest_mode(regs) )
     {
         DEBUGGER_trap_fatal(TRAP_int3, regs);
-        show_registers(regs);
+        show_execution_state(regs);
         panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
     } 
 
@@ -511,9 +562,9 @@ void propagate_page_fault(unsigned long 
     v->vcpu_info->arch.cr2           = addr;
 
     /* Re-set error_code.user flag appropriately for the guest. */
-    error_code &= ~4;
+    error_code &= ~PGERR_user_mode;
     if ( !guest_kernel_mode(v, guest_cpu_user_regs()) )
-        error_code |= 4;
+        error_code |= PGERR_user_mode;
 
     ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
     tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
@@ -547,6 +598,7 @@ static int handle_gdt_ldt_mapping_fault(
     {
         /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
         LOCK_BIGLOCK(d);
+        cleanup_writable_pagetable(d);
         ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
         UNLOCK_BIGLOCK(d);
 
@@ -578,6 +630,98 @@ static int handle_gdt_ldt_mapping_fault(
     (((va) >= HYPERVISOR_VIRT_START))
 #endif
 
+static int __spurious_page_fault(
+    unsigned long addr, struct cpu_user_regs *regs)
+{
+    unsigned long mfn, cr3 = read_cr3();
+#if CONFIG_PAGING_LEVELS >= 4
+    l4_pgentry_t l4e, *l4t;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    l3_pgentry_t l3e, *l3t;
+#endif
+    l2_pgentry_t l2e, *l2t;
+    l1_pgentry_t l1e, *l1t;
+    unsigned int required_flags, disallowed_flags;
+
+    /* Reserved bit violations are never spurious faults. */
+    if ( regs->error_code & PGERR_reserved_bit )
+        return 0;
+
+    required_flags  = _PAGE_PRESENT;
+    if ( regs->error_code & PGERR_write_access )
+        required_flags |= _PAGE_RW;
+    if ( regs->error_code & PGERR_user_mode )
+        required_flags |= _PAGE_USER;
+
+    disallowed_flags = 0;
+    if ( regs->error_code & PGERR_instr_fetch )
+        disallowed_flags |= _PAGE_NX;
+
+    mfn = cr3 >> PAGE_SHIFT;
+
+#if CONFIG_PAGING_LEVELS >= 4
+    l4t = map_domain_page(mfn);
+    l4e = l4t[l4_table_offset(addr)];
+    mfn = l4e_get_pfn(l4e);
+    unmap_domain_page(l4t);
+    if ( !(l4e_get_flags(l4e) & required_flags) ||
+         (l4e_get_flags(l4e) & disallowed_flags) )
+        return 0;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3
+    l3t  = map_domain_page(mfn); 
+#ifdef CONFIG_X86_PAE
+    l3t += (cr3 & 0xFE0UL) >> 3;
+#endif
+    l3e = l3t[l3_table_offset(addr)];
+    mfn = l3e_get_pfn(l3e);
+    unmap_domain_page(l3t);
+#ifdef CONFIG_X86_PAE
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+        return 0;
+#else
+    if ( !(l3e_get_flags(l3e) & required_flags) ||
+         (l3e_get_flags(l3e) & disallowed_flags) )
+        return 0;
+#endif
+#endif
+
+    l2t = map_domain_page(mfn);
+    l2e = l2t[l2_table_offset(addr)];
+    mfn = l2e_get_pfn(l2e);
+    unmap_domain_page(l2t);
+    if ( !(l2e_get_flags(l2e) & required_flags) ||
+         (l2e_get_flags(l2e) & disallowed_flags) )
+        return 0;
+    if ( l2e_get_flags(l2e) & _PAGE_PSE )
+        return 1;
+
+    l1t = map_domain_page(mfn);
+    l1e = l1t[l1_table_offset(addr)];
+    mfn = l1e_get_pfn(l1e);
+    unmap_domain_page(l1t);
+    if ( !(l1e_get_flags(l1e) & required_flags) ||
+         (l1e_get_flags(l1e) & disallowed_flags) )
+        return 0;
+    return 1;
+}
+
+static int spurious_page_fault(
+    unsigned long addr, struct cpu_user_regs *regs)
+{
+    struct domain *d = current->domain;
+    int            is_spurious;
+
+    LOCK_BIGLOCK(d);
+    cleanup_writable_pagetable(d);
+    is_spurious = __spurious_page_fault(addr, regs);
+    UNLOCK_BIGLOCK(d);
+
+    return is_spurious;
+}
+
 static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
 {
     struct vcpu   *v = current;
@@ -590,12 +734,17 @@ static int fixup_page_fault(unsigned lon
         if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
             return handle_gdt_ldt_mapping_fault(
                 addr - GDT_LDT_VIRT_START, regs);
-    }
-    else if ( unlikely(shadow_mode_enabled(d)) )
-    {
+        /*
+         * Do not propagate spurious faults in the hypervisor area to the
+         * guest. It cannot fix them up.
+         */
+        return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0);
+    }
+
+    if ( unlikely(shadow_mode_enabled(d)) )
         return shadow_fault(addr, regs);
-    }
-    else if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
+
+    if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
     {
         LOCK_BIGLOCK(d);
         if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
@@ -607,9 +756,14 @@ static int fixup_page_fault(unsigned lon
             return EXCRET_fault_fixed;
         }
 
+        /*
+         * Note it is *not* safe to check PGERR_page_present here. It can be
+         * clear, due to unhooked page table, when we would otherwise expect
+         * it to be set. We have an aversion to trusting that flag in Xen, and
+         * guests ought to be leery too.
+         */
         if ( guest_kernel_mode(v, regs) &&
-             /* Protection violation on write? No reserved-bit violation? */
-             ((regs->error_code & 0xb) == 0x3) &&
+             (regs->error_code & PGERR_write_access) &&
              ptwr_do_page_fault(d, addr, regs) )
         {
             UNLOCK_BIGLOCK(d);
@@ -619,46 +773,6 @@ static int fixup_page_fault(unsigned lon
     }
 
     return 0;
-}
-
-static int spurious_page_fault(unsigned long addr, struct cpu_user_regs *regs)
-{
-    struct vcpu   *v = current;
-    struct domain *d = v->domain;
-    int            rc;
-
-    /*
-     * The only possible reason for a spurious page fault not to be picked
-     * up already is that a page directory was unhooked by writable page table
-     * logic and then reattached before the faulting VCPU could detect it.
-     */
-    if ( is_idle_domain(d) ||               /* no ptwr in idle domain       */
-         IN_HYPERVISOR_RANGE(addr) ||       /* no ptwr on hypervisor addrs  */
-         shadow_mode_enabled(d) ||          /* no ptwr logic in shadow mode */
-         ((regs->error_code & 0x1d) != 0) ) /* simple not-present fault?    */
-        return 0;
-
-    LOCK_BIGLOCK(d);
-
-    /*
-     * The page directory could have been detached again while we weren't
-     * holding the per-domain lock. Detect that and fix up if it's the case.
-     */
-    if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
-         unlikely(l2_linear_offset(addr) ==
-                  d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
-    {
-        ptwr_flush(d, PTWR_PT_ACTIVE);
-        rc = 1;
-    }
-    else
-    {
-        /* Okay, walk the page tables. Only check for not-present faults.*/
-        rc = __spurious_page_fault(addr);
-    }
-
-    UNLOCK_BIGLOCK(d);
-    return rc;
 }
 
 /*
@@ -703,7 +817,7 @@ asmlinkage int do_page_fault(struct cpu_
 
         DEBUGGER_trap_fatal(TRAP_page_fault, regs);
 
-        show_registers(regs);
+        show_execution_state(regs);
         show_page_walk(addr);
         panic("CPU%d FATAL PAGE FAULT\n"
               "[error_code=%04x]\n"
@@ -784,8 +898,6 @@ static inline int admin_io_okay(
     (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
 
 /* Propagate a fault back to the guest kernel. */
-#define USER_READ_FAULT  4 /* user mode, read fault */
-#define USER_WRITE_FAULT 6 /* user mode, write fault */
 #define PAGE_FAULT(_faultaddr, _errcode)        \
 ({  propagate_page_fault(_faultaddr, _errcode); \
     return EXCRET_fault_fixed;                  \
@@ -795,7 +907,7 @@ static inline int admin_io_okay(
 #define insn_fetch(_type, _size, _ptr)          \
 ({  unsigned long _x;                           \
     if ( get_user(_x, (_type *)eip) )           \
-        PAGE_FAULT(eip, USER_READ_FAULT);       \
+        PAGE_FAULT(eip, 0); /* read fault */    \
     eip += _size; (_type)_x; })
 
 static int emulate_privileged_op(struct cpu_user_regs *regs)
@@ -864,17 +976,17 @@ static int emulate_privileged_op(struct 
             case 1:
                 data = (u8)inb_user((u16)regs->edx, v, regs);
                 if ( put_user((u8)data, (u8 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             case 2:
                 data = (u16)inw_user((u16)regs->edx, v, regs);
                 if ( put_user((u16)data, (u16 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             case 4:
                 data = (u32)inl_user((u16)regs->edx, v, regs);
                 if ( put_user((u32)data, (u32 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             }
             regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
@@ -889,17 +1001,17 @@ static int emulate_privileged_op(struct 
             {
             case 1:
                 if ( get_user(data, (u8 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outb_user((u8)data, (u16)regs->edx, v, regs);
                 break;
             case 2:
                 if ( get_user(data, (u16 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outw_user((u16)data, (u16)regs->edx, v, regs);
                 break;
             case 4:
                 if ( get_user(data, (u32 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outl_user((u32)data, (u16)regs->edx, v, regs);
                 break;
             }
@@ -1082,7 +1194,7 @@ static int emulate_privileged_op(struct 
             v->arch.guest_context.ctrlreg[2] = *reg;
             v->vcpu_info->arch.cr2           = *reg;
             break;
-            
+
         case 3: /* Write CR3 */
             LOCK_BIGLOCK(v->domain);
             cleanup_writable_pagetable(v->domain);
@@ -1270,7 +1382,7 @@ asmlinkage int do_general_protection(str
     DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
 
  hardware_gp:
-    show_registers(regs);
+    show_execution_state(regs);
     panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
           smp_processor_id(), regs->error_code);
     return 0;
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/x86_32/seg_fixup.c
--- a/xen/arch/x86/x86_32/seg_fixup.c   Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/x86_32/seg_fixup.c   Mon Jun 26 14:53:55 2006 -0500
@@ -464,7 +464,7 @@ int gpf_emulate_4gb(struct cpu_user_regs
     return 0;
 
  page_fault:
-    propagate_page_fault((unsigned long)pb, 4);
+    propagate_page_fault((unsigned long)pb, 0); /* read fault */
     return EXCRET_fault_fixed;
 }
 
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/x86_32/traps.c       Mon Jun 26 14:53:55 2006 -0500
@@ -68,13 +68,11 @@ void show_registers(struct cpu_user_regs
            "ss: %04x   cs: %04x\n",
            fault_regs.ds, fault_regs.es, fault_regs.fs,
            fault_regs.gs, fault_regs.ss, fault_regs.cs);
-
-    show_stack(regs);
 }
 
 void show_page_walk(unsigned long addr)
 {
-    unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
+    unsigned long pfn, mfn, cr3 = read_cr3();
 #ifdef CONFIG_X86_PAE
     l3_pgentry_t l3e, *l3t;
 #endif
@@ -83,8 +81,11 @@ void show_page_walk(unsigned long addr)
 
     printk("Pagetable walk from %08lx:\n", addr);
 
+    mfn = cr3 >> PAGE_SHIFT;
+
 #ifdef CONFIG_X86_PAE
-    l3t = map_domain_page(mfn);
+    l3t  = map_domain_page(mfn);
+    l3t += (cr3 & 0xFE0UL) >> 3;
     l3e = l3t[l3_table_offset(addr)];
     mfn = l3e_get_pfn(l3e);
     pfn = get_gpfn_from_mfn(mfn);
@@ -111,40 +112,6 @@ void show_page_walk(unsigned long addr)
     pfn = get_gpfn_from_mfn(mfn);
     printk("   L1 = %"PRIpte" %08lx\n", l1e_get_intpte(l1e), pfn);
     unmap_domain_page(l1t);
-}
-
-int __spurious_page_fault(unsigned long addr)
-{
-    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
-#ifdef CONFIG_X86_PAE
-    l3_pgentry_t l3e, *l3t;
-#endif
-    l2_pgentry_t l2e, *l2t;
-    l1_pgentry_t l1e, *l1t;
-
-#ifdef CONFIG_X86_PAE
-    l3t = map_domain_page(mfn);
-    l3e = l3t[l3_table_offset(addr)];
-    mfn = l3e_get_pfn(l3e);
-    unmap_domain_page(l3t);
-    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-        return 0;
-#endif
-
-    l2t = map_domain_page(mfn);
-    l2e = l2t[l2_table_offset(addr)];
-    mfn = l2e_get_pfn(l2e);
-    unmap_domain_page(l2t);
-    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-        return 0;
-    if ( l2e_get_flags(l2e) & _PAGE_PSE )
-        return 1;
-
-    l1t = map_domain_page(mfn);
-    l1e = l1t[l1_table_offset(addr)];
-    mfn = l1e_get_pfn(l1e);
-    unmap_domain_page(l1t);
-    return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
 }
 
 #define DOUBLEFAULT_STACK_SIZE 1024
@@ -173,6 +140,7 @@ asmlinkage void do_double_fault(void)
            tss->esi, tss->edi, tss->ebp, tss->esp);
     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   ss: %04x\n",
            tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
+    show_stack_overflow(tss->esp);
     printk("************************************\n");
     printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
     printk("System needs manual reset.\n");
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/x86_64/traps.c       Mon Jun 26 14:53:55 2006 -0500
@@ -68,8 +68,6 @@ void show_registers(struct cpu_user_regs
            "ss: %04x   cs: %04x\n",
            fault_regs.ds, fault_regs.es, fault_regs.fs,
            fault_regs.gs, fault_regs.ss, fault_regs.cs);
-
-    show_stack(regs);
 }
 
 void show_page_walk(unsigned long addr)
@@ -115,40 +113,6 @@ void show_page_walk(unsigned long addr)
     printk("    L1 = %"PRIpte" %016lx\n", l1e_get_intpte(l1e), pfn);
 }
 
-int __spurious_page_fault(unsigned long addr)
-{
-    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
-    l4_pgentry_t l4e, *l4t;
-    l3_pgentry_t l3e, *l3t;
-    l2_pgentry_t l2e, *l2t;
-    l1_pgentry_t l1e, *l1t;
-
-    l4t = mfn_to_virt(mfn);
-    l4e = l4t[l4_table_offset(addr)];
-    mfn = l4e_get_pfn(l4e);
-    if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
-        return 0;
-
-    l3t = mfn_to_virt(mfn);
-    l3e = l3t[l3_table_offset(addr)];
-    mfn = l3e_get_pfn(l3e);
-    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-        return 0;
-
-    l2t = mfn_to_virt(mfn);
-    l2e = l2t[l2_table_offset(addr)];
-    mfn = l2e_get_pfn(l2e);
-    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-        return 0;
-    if ( l2e_get_flags(l2e) & _PAGE_PSE )
-        return 1;
-
-    l1t = mfn_to_virt(mfn);
-    l1e = l1t[l1_table_offset(addr)];
-    mfn = l1e_get_pfn(l1e);
-    return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
-}
-
 asmlinkage void double_fault(void);
 asmlinkage void do_double_fault(struct cpu_user_regs *regs)
 {
@@ -159,6 +123,7 @@ asmlinkage void do_double_fault(struct c
     /* Find information saved during fault and dump it to the console. */
     printk("************************************\n");
     show_registers(regs);
+    show_stack_overflow(regs->rsp);
     printk("************************************\n");
     printk("CPU%d DOUBLE FAULT -- system shutdown\n", smp_processor_id());
     printk("System needs manual reset.\n");
diff -r 049e669e6a8a -r b29806fb6ba0 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/arch/x86/x86_emulate.c        Mon Jun 26 14:53:55 2006 -0500
@@ -1146,7 +1146,7 @@ x86_emulate_read_std(
     *val = 0;
     if ( copy_from_user((void *)val, (void *)addr, bytes) )
     {
-        propagate_page_fault(addr, 4); /* user mode, read fault */
+        propagate_page_fault(addr, 0); /* read fault */
         return X86EMUL_PROPAGATE_FAULT;
     }
     return X86EMUL_CONTINUE;
@@ -1161,7 +1161,7 @@ x86_emulate_write_std(
 {
     if ( copy_to_user((void *)addr, (void *)&val, bytes) )
     {
-        propagate_page_fault(addr, 6); /* user mode, write fault */
+        propagate_page_fault(addr, PGERR_write_access); /* write fault */
         return X86EMUL_PROPAGATE_FAULT;
     }
     return X86EMUL_CONTINUE;
diff -r 049e669e6a8a -r b29806fb6ba0 xen/common/acm_ops.c
--- a/xen/common/acm_ops.c      Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/common/acm_ops.c      Mon Jun 26 14:53:55 2006 -0500
@@ -69,7 +69,7 @@ long do_acm_op(int cmd, XEN_GUEST_HANDLE
             return -EACCES;
 
         rc = acm_set_policy(setpolicy.pushcache,
-                            setpolicy.pushcache_size, 1);
+                            setpolicy.pushcache_size);
         break;
     }
 
diff -r 049e669e6a8a -r b29806fb6ba0 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/common/dom0_ops.c     Mon Jun 26 14:53:55 2006 -0500
@@ -693,6 +693,21 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
     break;
 #endif
 
+    case DOM0_SETTIMEOFFSET:
+    {
+        struct domain *d;
+
+        ret = -ESRCH;
+        d = find_domain_by_id(op->u.settimeoffset.domain);
+        if ( d != NULL )
+        {
+            d->time_offset_seconds = op->u.settimeoffset.time_offset_seconds;
+            put_domain(d);
+            ret = 0;
+        }
+    }
+    break;
+
     default:
         ret = arch_do_dom0_op(op, u_dom0_op);
         break;
@@ -701,9 +716,9 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
     spin_unlock(&dom0_lock);
 
     if (!ret)
-        acm_post_dom0_op(op, ssid);
+        acm_post_dom0_op(op, &ssid);
     else
-        acm_fail_dom0_op(op, ssid);
+        acm_fail_dom0_op(op, &ssid);
 
     return ret;
 }
diff -r 049e669e6a8a -r b29806fb6ba0 xen/common/domain.c
--- a/xen/common/domain.c       Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/common/domain.c       Mon Jun 26 14:53:55 2006 -0500
@@ -234,7 +234,7 @@ void __domain_crash(struct domain *d)
     {
         printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
                d->domain_id, current->vcpu_id, smp_processor_id());
-        show_registers(guest_cpu_user_regs());
+        show_execution_state(guest_cpu_user_regs());
     }
     else
     {
diff -r 049e669e6a8a -r b29806fb6ba0 xen/common/kernel.c
--- a/xen/common/kernel.c       Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/common/kernel.c       Mon Jun 26 14:53:55 2006 -0500
@@ -96,10 +96,11 @@ char *print_tainted(char *str)
 {
     if ( tainted )
     {
-        snprintf(str, TAINT_STRING_MAX_LEN, "Tainted: %c%c%c",
+        snprintf(str, TAINT_STRING_MAX_LEN, "Tainted: %c%c%c%c",
                  tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
                  tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
-                 tainted & TAINT_BAD_PAGE ? 'B' : ' ');
+                 tainted & TAINT_BAD_PAGE ? 'B' : ' ',
+                 tainted & TAINT_SYNC_CONSOLE ? 'C' : ' ');
     }
     else
     {
diff -r 049e669e6a8a -r b29806fb6ba0 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/common/keyhandler.c   Mon Jun 26 14:53:55 2006 -0500
@@ -87,10 +87,28 @@ static void show_handlers(unsigned char 
                    key_table[i].desc);
 }
 
+static void __dump_execstate(void *unused)
+{
+    dump_execution_state();
+}
+
 static void dump_registers(unsigned char key, struct cpu_user_regs *regs)
 {
+    unsigned int cpu;
+
     printk("'%c' pressed -> dumping registers\n", key); 
-    show_registers(regs); 
+
+    /* Get local execution state out immediately, in case we get stuck. */
+    printk("\n*** Dumping CPU%d state: ***\n", smp_processor_id());
+    show_execution_state(regs);
+
+    for_each_online_cpu ( cpu )
+    {
+        if ( cpu == smp_processor_id() )
+            continue;
+        printk("\n*** Dumping CPU%d state: ***\n", cpu);
+        on_selected_cpus(cpumask_of_cpu(cpu), __dump_execstate, NULL, 1, 1);
+    }
 }
 
 static void halt_machine(unsigned char key, struct cpu_user_regs *regs)
diff -r 049e669e6a8a -r b29806fb6ba0 xen/common/memory.c
--- a/xen/common/memory.c       Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/common/memory.c       Mon Jun 26 14:53:55 2006 -0500
@@ -282,7 +282,7 @@ memory_exchange(XEN_GUEST_HANDLE(xen_mem
     LIST_HEAD(in_chunk_list);
     LIST_HEAD(out_chunk_list);
     unsigned long in_chunk_order, out_chunk_order;
-    xen_pfn_t gpfn, gmfn, mfn;
+    xen_pfn_t     gpfn, gmfn, mfn;
     unsigned long i, j, k;
     unsigned int  memflags = 0;
     long          rc = 0;
diff -r 049e669e6a8a -r b29806fb6ba0 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/common/sched_credit.c Mon Jun 26 14:53:55 2006 -0500
@@ -967,9 +967,6 @@ csched_load_balance(int cpu, struct csch
         if ( peer_cpu == cpu )
             break;
 
-        BUG_ON( peer_cpu >= csched_priv.ncpus );
-        BUG_ON( peer_cpu == cpu );
-
         /*
          * Get ahold of the scheduler lock for this peer CPU.
          *
@@ -1072,7 +1069,6 @@ csched_schedule(s_time_t now)
     ret.task = snext->vcpu;
 
     CSCHED_VCPU_CHECK(ret.task);
-    BUG_ON( !vcpu_runnable(ret.task) );
 
     return ret;
 }
diff -r 049e669e6a8a -r b29806fb6ba0 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/common/sched_sedf.c   Mon Jun 26 14:53:55 2006 -0500
@@ -360,24 +360,23 @@ static int sedf_init_vcpu(struct vcpu *v
         INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q));
     }
        
+    /* Every VCPU gets an equal share of extratime by default. */
+    inf->deadl_abs   = 0;
+    inf->latency     = 0;
+    inf->status      = EXTRA_AWARE | SEDF_ASLEEP;
+    inf->extraweight = 1;
+
     if ( v->domain->domain_id == 0 )
     {
-        /*set dom0 to something useful to boot the machine*/
+        /* Domain0 gets 75% guaranteed (15ms every 20ms). */
         inf->period    = MILLISECS(20);
         inf->slice     = MILLISECS(15);
-        inf->latency   = 0;
-        inf->deadl_abs = 0;
-        inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
     }
     else
     {
-        /*other domains run in best effort mode*/
+        /* Best-effort extratime only. */
         inf->period    = WEIGHT_PERIOD;
         inf->slice     = 0;
-        inf->deadl_abs = 0;
-        inf->latency   = 0;
-        inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
-        inf->extraweight = 1;
     }
 
     inf->period_orig = inf->period; inf->slice_orig = inf->slice;
@@ -609,7 +608,16 @@ static void desched_extra_dom(s_time_t n
         PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n", 
               inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id,
               inf->short_block_lost_tot);
+#if 0
+        /*
+         * KAF: If we don't exit short-blocking state at this point
+         * domain0 can steal all CPU for up to 10 seconds before
+         * scheduling settles down (when competing against another
+         * CPU-bound domain). Doing this seems to make things behave
+         * nicely. Noone gets starved by default.
+         */
         if ( inf->short_block_lost_tot <= 0 )
+#endif
         {
             PRINT(4,"Domain %i.%i compensated short block loss!\n",
                   inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id);
diff -r 049e669e6a8a -r b29806fb6ba0 xen/common/schedule.c
--- a/xen/common/schedule.c     Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/common/schedule.c     Mon Jun 26 14:53:55 2006 -0500
@@ -33,8 +33,8 @@
 
 extern void arch_getdomaininfo_ctxt(struct vcpu *,
                                     struct vcpu_guest_context *);
-/* opt_sched: scheduler - default to SEDF */
-static char opt_sched[10] = "sedf";
+/* opt_sched: scheduler - default to credit */
+static char opt_sched[10] = "credit";
 string_param("sched", opt_sched);
 
 #define TIME_SLOP      (s32)MICROSECS(50)     /* allow time to slip a bit */
diff -r 049e669e6a8a -r b29806fb6ba0 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/drivers/char/console.c        Mon Jun 26 14:53:55 2006 -0500
@@ -476,7 +476,11 @@ void init_console(void)
         if ( strncmp(p, "com", 3) == 0 )
             sercon_handle = serial_parse_handle(p);
         else if ( strncmp(p, "vga", 3) == 0 )
+        {
             vgacon_enabled = 1;
+            if ( strncmp(p+3, "[keep]", 6) == 0 )
+                vgacon_enabled++;
+        }
     }
 
     init_vga();
@@ -497,14 +501,47 @@ void init_console(void)
     if ( opt_sync_console )
     {
         serial_start_sync(sercon_handle);
+        add_taint(TAINT_SYNC_CONSOLE);
         printk("Console output is synchronous.\n");
     }
 }
 
-void console_endboot(int disable_vga)
-{
-    if ( disable_vga )
-        vgacon_enabled = 0;
+void console_endboot(void)
+{
+    int i, j;
+
+    if ( opt_sync_console )
+    {
+        printk("**********************************************\n");
+        printk("******* WARNING: CONSOLE OUTPUT IS SYCHRONOUS\n");
+        printk("******* This option is intended to aid debugging "
+               "of Xen by ensuring\n");
+        printk("******* that all output is synchronously delivered "
+               "on the serial line.\n");
+        printk("******* However it can introduce SIGNIFICANT latencies "
+               "and affect\n");
+        printk("******* timekeeping. It is NOT recommended for "
+               "production use!\n");
+        printk("**********************************************\n");
+        for ( i = 0; i < 3; i++ )
+        {
+            printk("%d... ", 3-i);
+            for ( j = 0; j < 100; j++ )
+            {
+                if ( softirq_pending(smp_processor_id()) )
+                    do_softirq();
+                mdelay(10);
+            }
+        }
+        printk("\n");
+    }
+
+    if ( vgacon_enabled )
+    {
+        vgacon_enabled--;
+        printk("Xen is %s VGA console.\n",
+               vgacon_enabled ? "keeping" : "relinquishing");
+    }
 
     /*
      * If user specifies so, we fool the switch routine to redirect input
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/acm/acm_core.h
--- a/xen/include/acm/acm_core.h        Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/acm/acm_core.h        Mon Jun 26 14:53:55 2006 -0500
@@ -121,10 +121,11 @@ int acm_init_domain_ssid(domid_t id, ssi
 int acm_init_domain_ssid(domid_t id, ssidref_t ssidref);
 void acm_free_domain_ssid(struct acm_ssid_domain *ssid);
 int acm_init_binary_policy(u32 policy_code);
-int acm_set_policy(void *buf, u32 buf_size, int isuserbuffer);
-int acm_get_policy(void *buf, u32 buf_size);
-int acm_dump_statistics(void *buf, u16 buf_size);
-int acm_get_ssid(ssidref_t ssidref, u8 *buf, u16 buf_size);
+int acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size);
+int do_acm_set_policy(void *buf, u32 buf_size);
+int acm_get_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size);
+int acm_dump_statistics(XEN_GUEST_HANDLE(void) buf, u16 buf_size);
+int acm_get_ssid(ssidref_t ssidref, XEN_GUEST_HANDLE(void) buf, u16 buf_size);
 int acm_get_decision(ssidref_t ssidref1, ssidref_t ssidref2, u32 hook);
 int acm_set_policy_reference(u8 * buf, u32 buf_size);
 int acm_dump_policy_reference(u8 *buf, u32 buf_size);
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/acm/acm_hooks.h
--- a/xen/include/acm/acm_hooks.h       Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/acm/acm_hooks.h       Mon Jun 26 14:53:55 2006 -0500
@@ -273,7 +273,12 @@ static inline void acm_post_dom0_op(stru
             op->u.createdomain.domain, op->u.createdomain.ssidref);
         break;
     case DOM0_DESTROYDOMAIN:
-        acm_post_domain_destroy(ssid, op->u.destroydomain.domain);
+        if (*ssid == NULL) {
+            printkd("%s: ERROR. SSID unset.\n",
+                    __func__);
+            break;
+        }
+        acm_post_domain_destroy(*ssid, op->u.destroydomain.domain);
         /* free security ssid for the destroyed domain (also if null policy */
         acm_free_domain_ssid((struct acm_ssid_domain *)(*ssid));
         *ssid = NULL;
@@ -281,13 +286,22 @@ static inline void acm_post_dom0_op(stru
     }
 }
 
-static inline void acm_fail_dom0_op(struct dom0_op *op, void *ssid) 
+static inline void acm_fail_dom0_op(struct dom0_op *op, void **ssid)
 {
     switch(op->cmd) {
     case DOM0_CREATEDOMAIN:
         acm_fail_domain_create(
             current->domain->ssid, op->u.createdomain.ssidref);
         break;
+    case DOM0_DESTROYDOMAIN:
+        /*  we don't handle domain destroy failure but at least free the ssid 
*/
+        if (*ssid == NULL) {
+            printkd("%s: ERROR. SSID unset.\n",
+                    __func__);
+            break;
+        }
+        acm_free_domain_ssid((struct acm_ssid_domain *)(*ssid));
+        *ssid = NULL;
     }
 }
 
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/asm-ia64/debugger.h
--- a/xen/include/asm-ia64/debugger.h   Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/asm-ia64/debugger.h   Mon Jun 26 14:53:55 2006 -0500
@@ -41,6 +41,14 @@
 #include <xen/gdbstub.h>
 
 void show_registers(struct cpu_user_regs *regs);
+void dump_stack(void);
+
+static inline void
+show_execution_state(struct cpu_user_regs *regs)
+{
+    show_registers(regs);
+    dump_stack();
+}
 
 // NOTE: on xen struct pt_regs = struct cpu_user_regs
 //       see include/asm-ia64/linux-xen/asm/ptrace.h
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/asm-ia64/vmx.h
--- a/xen/include/asm-ia64/vmx.h        Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/asm-ia64/vmx.h        Mon Jun 26 14:53:55 2006 -0500
@@ -42,6 +42,7 @@ extern void vmx_save_state(struct vcpu *
 extern void vmx_save_state(struct vcpu *v);
 extern void vmx_load_state(struct vcpu *v);
 extern void show_registers(struct pt_regs *regs);
+#define show_execution_state show_registers
 extern int vmx_build_physmap_table(struct domain *d);
 extern unsigned long __gpfn_to_mfn_foreign(struct domain *d, unsigned long 
gpfn);
 extern void sync_split_caches(void);
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/asm-ia64/xenprocessor.h
--- a/xen/include/asm-ia64/xenprocessor.h       Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/asm-ia64/xenprocessor.h       Mon Jun 26 14:53:55 2006 -0500
@@ -237,4 +237,6 @@ typedef union {
     u64 itir;
 } ia64_itir_t;
 
+#define dump_execution_state() printk("FIXME: implement ia64 
dump_execution_state()\n");
+
 #endif // _ASM_IA64_XENPROCESSOR_H
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/asm-x86/hvm/support.h Mon Jun 26 14:53:55 2006 -0500
@@ -132,7 +132,7 @@ extern unsigned int opt_hvm_debug_level;
 #define  __hvm_bug(regs)                                        \
     do {                                                        \
         printk("__hvm_bug at %s:%d\n", __FILE__, __LINE__);     \
-        show_registers(regs);                                   \
+        show_execution_state(regs);                             \
         domain_crash_synchronous();                             \
     } while (0)
 
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/asm-x86/mm.h  Mon Jun 26 14:53:55 2006 -0500
@@ -103,13 +103,11 @@ struct page_info
 #define PGT_high_mfn_mask   (0xfffUL << PGT_high_mfn_shift)
 #define PGT_mfn_mask        (((1U<<23)-1) | PGT_high_mfn_mask)
 #define PGT_high_mfn_nx     (0x800UL << PGT_high_mfn_shift)
-#define PGT_pae_idx_shift   PGT_high_mfn_shift
 #else
  /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */
 #define PGT_mfn_mask        ((1U<<23)-1)
  /* NX for PAE xen is not supported yet */
 #define PGT_high_mfn_nx     (1ULL << 63)
-#define PGT_pae_idx_shift   23
 #endif
 
 #define PGT_score_shift     23
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/asm-x86/processor.h   Mon Jun 26 14:53:55 2006 -0500
@@ -128,6 +128,13 @@
 /* 'arch_vcpu' flags values */
 #define _TF_kernel_mode        0
 #define TF_kernel_mode         (1<<_TF_kernel_mode)
+
+/* #PF error code values. */
+#define PGERR_page_present   (1U<<0)
+#define PGERR_write_access   (1U<<1)
+#define PGERR_user_mode      (1U<<2)
+#define PGERR_reserved_bit   (1U<<3)
+#define PGERR_instr_fetch    (1U<<4)
 
 #ifndef __ASSEMBLY__
 
@@ -522,10 +529,16 @@ extern always_inline void prefetchw(cons
 #endif
 
 void show_stack(struct cpu_user_regs *regs);
+void show_stack_overflow(unsigned long esp);
 void show_registers(struct cpu_user_regs *regs);
+void show_execution_state(struct cpu_user_regs *regs);
 void show_page_walk(unsigned long addr);
-int __spurious_page_fault(unsigned long addr);
 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs);
+
+/* Dumps current register and stack state. */
+#define dump_execution_state()                                              \
+    /* NB. Needs interrupts enabled else we end up in fatal_trap(). */      \
+    __asm__ __volatile__ ( "pushf ; sti ; ud2 ; .ascii \"dbg\" ; popf" )
 
 extern void mtrr_ap_init(void);
 extern void mtrr_bp_init(void);
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/asm-x86/shadow.h      Mon Jun 26 14:53:55 2006 -0500
@@ -112,6 +112,30 @@ do {                                    
 } while (0)
 #endif
 
+#if CONFIG_PAGING_LEVELS >= 3
+static inline u64 get_cr3_idxval(struct vcpu *v)
+{
+    u64 pae_cr3;
+
+    if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 &&
+            !shadow_mode_log_dirty(v->domain) )
+    {
+        pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */
+        return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK;
+    }
+    else
+        return 0;
+}
+
+#define shadow_key_t u64
+#define index_to_key(x) ((x) << 32)
+#else
+#define get_cr3_idxval(v) (0)
+#define shadow_key_t unsigned long
+#define index_to_key(x)  (0)
+#endif
+
+
 #define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) 
- (_max)) << 16) | (_min))
 #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
 #define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) 
>> 16))
@@ -309,7 +333,7 @@ extern unsigned long get_mfn_from_gpfn_f
 
 struct shadow_status {
     struct shadow_status *next;   /* Pull-to-front list per hash bucket. */
-    unsigned long gpfn_and_flags; /* Guest pfn plus flags. */
+    shadow_key_t  gpfn_and_flags; /* Guest pfn plus flags. */
     unsigned long smfn;           /* Shadow mfn.           */
 };
 
@@ -1180,7 +1204,13 @@ static inline unsigned long __shadow_sta
     struct domain *d, unsigned long gpfn, unsigned long stype)
 {
     struct shadow_status *p, *x, *head;
-    unsigned long key = gpfn | stype;
+    shadow_key_t key;
+#if CONFIG_PAGING_LEVELS >= 3
+    if ( d->arch.ops->guest_paging_levels == PAGING_L3 && stype == 
PGT_l4_shadow )
+        key = gpfn | stype | index_to_key(get_cr3_idxval(current));
+    else
+#endif
+        key = gpfn | stype;
 
     ASSERT(shadow_lock_is_acquired(d));
     ASSERT(gpfn == (gpfn & PGT_mfn_mask));
@@ -1295,10 +1325,11 @@ shadow_max_pgtable_type(struct domain *d
 }
 
 static inline void delete_shadow_status(
-    struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int 
stype)
+    struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int 
stype, u64 index)
 {
     struct shadow_status *p, *x, *n, *head;
-    unsigned long key = gpfn | stype;
+
+    shadow_key_t key = gpfn | stype | index_to_key(index);
 
     ASSERT(shadow_lock_is_acquired(d));
     ASSERT(!(gpfn & ~PGT_mfn_mask));
@@ -1374,11 +1405,12 @@ static inline void delete_shadow_status(
 
 static inline void set_shadow_status(
     struct domain *d, unsigned long gpfn, unsigned long gmfn,
-    unsigned long smfn, unsigned long stype)
+    unsigned long smfn, unsigned long stype, u64 index)
 {
     struct shadow_status *x, *head, *extra;
     int i;
-    unsigned long key = gpfn | stype;
+
+    shadow_key_t key = gpfn | stype | index_to_key(index);
 
     SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype);
 
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/asm-x86/shadow_64.h
--- a/xen/include/asm-x86/shadow_64.h   Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/asm-x86/shadow_64.h   Mon Jun 26 14:53:55 2006 -0500
@@ -36,9 +36,9 @@
  */
 extern struct shadow_ops MODE_64_2_HANDLER;
 extern struct shadow_ops MODE_64_3_HANDLER;
+extern struct shadow_ops MODE_64_PAE_HANDLER;
 #if CONFIG_PAGING_LEVELS == 4
 extern struct shadow_ops MODE_64_4_HANDLER;
-extern struct shadow_ops MODE_64_PAE_HANDLER;
 #endif
 
 #if CONFIG_PAGING_LEVELS == 3
@@ -65,10 +65,6 @@ typedef struct { intpte_t l4; } l4_pgent
 #define ESH_LOG(_f, _a...) ((void)0)
 #endif
 
-#define PAGING_L4      4UL
-#define PAGING_L3      3UL
-#define PAGING_L2      2UL
-#define PAGING_L1      1UL
 #define L_MASK  0xff
 
 #define PAE_PAGING_LEVELS   3
@@ -108,18 +104,14 @@ typedef struct { intpte_t lo; } pgentry_
 #define entry_has_changed(x,y,flags) \
         ( !!(((x).lo ^ (y).lo) & 
((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
 
+/******************************************************************************/
+/*
+ * The macro and inlines are for 32-bit PAE guest 
+ */
+#define PAE_PDPT_RESERVED   0x1e6 /* [8:5], [2,1] */
+
 #define PAE_SHADOW_SELF_ENTRY   259
 #define PAE_L3_PAGETABLE_ENTRIES   4
-
-/******************************************************************************/
-/*
- * The macro and inlines are for 32-bit PAE guest on 64-bit host
- */
-#define PAE_CR3_ALIGN       5
-#define PAE_CR3_IDX_MASK    0x7f
-#define PAE_CR3_IDX_NO      128
-
-#define PAE_PDPT_RESERVED   0x1e6 /* [8:5], [2,1] */
 
 
/******************************************************************************/
 static inline int  table_offset_64(unsigned long va, int level)
@@ -186,19 +178,10 @@ static inline int guest_table_offset_64(
     }
 }
 
-static inline unsigned long get_cr3_idxval(struct vcpu *v)
-{
-    unsigned long pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */
-
-    return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK;
-}
-
-
 #define SH_GUEST_32PAE 1
 #else 
 #define guest_table_offset_64(va, level, index) \
             table_offset_64((va),(level))
-#define get_cr3_idxval(v) 0
 #define SH_GUEST_32PAE 0
 #endif
 
@@ -514,7 +497,10 @@ static inline void entry_general(
 
                 l1_p =(pgentry_64_t *)map_domain_page(smfn);
                 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
-                    entry_remove_flags(l1_p[i], _PAGE_RW);
+                {
+                    if ( mfn_is_page_table(entry_get_pfn(l1_p[i])) )
+                        entry_remove_flags(l1_p[i], _PAGE_RW);
+                }
 
                 unmap_domain_page(l1_p);
             }
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/asm-x86/shadow_ops.h
--- a/xen/include/asm-x86/shadow_ops.h  Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/asm-x86/shadow_ops.h  Mon Jun 26 14:53:55 2006 -0500
@@ -21,6 +21,14 @@
 
 #ifndef _XEN_SHADOW_OPS_H
 #define _XEN_SHADOW_OPS_H
+
+#define PAGING_L4      4UL
+#define PAGING_L3      3UL
+#define PAGING_L2      2UL
+#define PAGING_L1      1UL
+
+#define PAE_CR3_ALIGN       5
+#define PAE_CR3_IDX_MASK    0x7f
 
 #if defined( GUEST_PGENTRY_32 )
 
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/public/arch-x86_32.h  Mon Jun 26 14:53:55 2006 -0500
@@ -74,16 +74,23 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
  */
 #ifdef CONFIG_X86_PAE
 #define __HYPERVISOR_VIRT_START 0xF5800000
+#define __MACH2PHYS_VIRT_START  0xF5800000
+#define __MACH2PHYS_VIRT_END    0xF6800000
 #else
 #define __HYPERVISOR_VIRT_START 0xFC000000
+#define __MACH2PHYS_VIRT_START  0xFC000000
+#define __MACH2PHYS_VIRT_END    0xFC400000
 #endif
 
 #ifndef HYPERVISOR_VIRT_START
 #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
 #endif
 
+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2)
 #ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START)
 #endif
 
 /* Maximum number of virtual CPUs in multi-processor guests. */
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/public/arch-x86_64.h  Mon Jun 26 14:53:55 2006 -0500
@@ -85,21 +85,25 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 
 #define __HYPERVISOR_VIRT_START 0xFFFF800000000000
 #define __HYPERVISOR_VIRT_END   0xFFFF880000000000
+#define __MACH2PHYS_VIRT_START  0xFFFF800000000000
+#define __MACH2PHYS_VIRT_END    0xFFFF804000000000
 
 #ifndef HYPERVISOR_VIRT_START
 #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
 #define HYPERVISOR_VIRT_END   mk_unsigned_long(__HYPERVISOR_VIRT_END)
 #endif
 
+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
 /* Maximum number of virtual CPUs in multi-processor guests. */
 #define MAX_VIRT_CPUS 32
 
 #ifndef __ASSEMBLY__
-
-/* The machine->physical mapping table starts at this address, read-only. */
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
 
 /*
  * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/public/dom0_ops.h     Mon Jun 26 14:53:55 2006 -0500
@@ -513,6 +513,27 @@ struct dom0_hypercall_init {
 };
 typedef struct dom0_hypercall_init dom0_hypercall_init_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t);
+
+#define DOM0_DOMAIN_SETUP     49
+#define _XEN_DOMAINSETUP_hvm_guest 0
+#define XEN_DOMAINSETUP_hvm_guest  (1UL<<_XEN_DOMAINSETUP_hvm_guest)
+typedef struct dom0_domain_setup {
+    domid_t  domain;          /* domain to be affected */
+    unsigned long flags;      /* XEN_DOMAINSETUP_* */
+#ifdef __ia64__
+    unsigned long bp;         /* mpaddr of boot param area */
+    unsigned long maxmem;        /* Highest memory address for MDT.  */
+#endif
+} dom0_domain_setup_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_domain_setup_t);
+
+#define DOM0_SETTIMEOFFSET    50
+struct dom0_settimeoffset {
+    domid_t  domain;
+    int32_t  time_offset_seconds; /* applied to domain wallclock time */
+};
+typedef struct dom0_settimeoffset dom0_settimeoffset_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_settimeoffset_t);
 
 struct dom0_op {
     uint32_t cmd;
@@ -555,6 +576,8 @@ struct dom0_op {
         struct dom0_irq_permission    irq_permission;
         struct dom0_iomem_permission  iomem_permission;
         struct dom0_hypercall_init    hypercall_init;
+        struct dom0_domain_setup      domain_setup;
+        struct dom0_settimeoffset     settimeoffset;
         uint8_t                       pad[128];
     } u;
 };
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/public/memory.h
--- a/xen/include/public/memory.h       Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/public/memory.h       Mon Jun 26 14:53:55 2006 -0500
@@ -141,6 +141,20 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn
 DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
 
 /*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping     12
+struct xen_machphys_mapping {
+    unsigned long v_start, v_end; /* Start and end virtual addresses.   */
+    unsigned long max_mfn;        /* Maximum MFN that can be looked up. */
+};
+typedef struct xen_machphys_mapping xen_machphys_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
+
+/*
  * Sets the GPFN at which a particular page appears in the specified guest's
  * pseudophysical address space.
  * arg == addr of xen_add_to_physmap_t.
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/xen/console.h
--- a/xen/include/xen/console.h Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/xen/console.h Mon Jun 26 14:53:55 2006 -0500
@@ -15,7 +15,7 @@ long read_console_ring(XEN_GUEST_HANDLE(
 long read_console_ring(XEN_GUEST_HANDLE(char), u32 *, int);
 
 void init_console(void);
-void console_endboot(int disable_vga);
+void console_endboot(void);
 
 void console_force_unlock(void);
 void console_force_lock(void);
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h     Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/xen/lib.h     Mon Jun 26 14:53:55 2006 -0500
@@ -82,6 +82,7 @@ unsigned long long parse_size_and_unit(c
 #define TAINT_UNSAFE_SMP                (1<<0)
 #define TAINT_MACHINE_CHECK             (1<<1)
 #define TAINT_BAD_PAGE                  (1<<2)
+#define TAINT_SYNC_CONSOLE              (1<<3)
 extern int tainted;
 #define TAINT_STRING_MAX_LEN            20
 extern char *print_tainted(char *str);
diff -r 049e669e6a8a -r b29806fb6ba0 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Mon Jun 26 13:09:11 2006 -0400
+++ b/xen/include/xen/sched.h   Mon Jun 26 14:53:55 2006 -0500
@@ -159,6 +159,7 @@ struct domain
 
     /* OProfile support. */
     struct xenoprof *xenoprof;
+    int32_t time_offset_seconds;
 };
 
 struct domain_setup_info
diff -r 049e669e6a8a -r b29806fb6ba0 
patches/linux-2.6.16.13/ipv6-no-autoconf.patch
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/ipv6-no-autoconf.patch    Mon Jun 26 14:53:55 
2006 -0500
@@ -0,0 +1,23 @@
+ net/ipv6/addrconf.c |    2 ++
+ 1 files changed, 2 insertions(+)
+
+Index: build/net/ipv6/addrconf.c
+===================================================================
+--- build.orig/net/ipv6/addrconf.c
++++ build/net/ipv6/addrconf.c
+@@ -2462,6 +2462,7 @@ static void addrconf_dad_start(struct in
+       spin_lock_bh(&ifp->lock);
+ 
+       if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
++          !(dev->flags&IFF_MULTICAST) ||
+           !(ifp->flags&IFA_F_TENTATIVE)) {
+               ifp->flags &= ~IFA_F_TENTATIVE;
+               spin_unlock_bh(&ifp->lock);
+@@ -2546,6 +2547,7 @@ static void addrconf_dad_completed(struc
+       if (ifp->idev->cnf.forwarding == 0 &&
+           ifp->idev->cnf.rtr_solicits > 0 &&
+           (dev->flags&IFF_LOOPBACK) == 0 &&
++          (dev->flags & IFF_MULTICAST) &&
+           (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
+               struct in6_addr all_routers;
+ 
diff -r 049e669e6a8a -r b29806fb6ba0 tools/ioemu/hw/piix4acpi.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ioemu/hw/piix4acpi.c        Mon Jun 26 14:53:55 2006 -0500
@@ -0,0 +1,481 @@
+/*
+ * PIIX4 ACPI controller emulation
+ *
+ * Winston liwen Wang, winston.l.wang@xxxxxxxxx
+ * Copyright (c) 2006 , Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "vl.h"
+#define FREQUENCE_PMTIMER  3753425
+/* acpi register bit define here  */
+
+/* PM1_STS                                             */
+#define TMROF_STS        (1 << 0)
+#define BM_STS                   (1 << 4)
+#define GBL_STS          (1 << 5)
+#define PWRBTN_STS       (1 << 8)
+#define RTC_STS          (1 << 10)
+#define PRBTNOR_STS       (1 << 11)
+#define WAK_STS          (1 << 15)
+/* PM1_EN                                              */
+#define TMROF_EN          (1 << 0)
+#define GBL_EN            (1 << 5)
+#define PWRBTN_EN         (1 << 8)
+#define RTC_EN           (1 << 10)
+/* PM1_CNT                                             */
+#define SCI_EN            (1 << 0)
+#define GBL_RLS           (1 << 2)
+#define SLP_EN           (1 << 13)
+
+/* Bits of PM1a register define here  */
+#define SLP_TYP_MASK    0x1C00
+#define SLP_VAL         0x1C00
+
+typedef struct AcpiDeviceState AcpiDeviceState;
+AcpiDeviceState *acpi_device_table;
+
+/* Bits of PM1a register define here  */
+typedef struct PMTState {
+    uint32_t count;
+    int irq;
+    uint64_t next_pm_time;
+    QEMUTimer *pm_timer;
+}PMTState;
+
+typedef struct PM1Event_BLK {
+    uint16_t pm1_status; /* pm1a_EVT_BLK */
+    uint16_t pm1_enable; /* pm1a_EVT_BLK+2 */
+}PM1Event_BLK;
+
+typedef struct PCIAcpiState {
+    PCIDevice dev;
+    uint16_t irq;
+    uint16_t pm1_status; /* pm1a_EVT_BLK */
+    uint16_t pm1_enable; /* pm1a_EVT_BLK+2 */
+    uint16_t pm1_control; /* pm1a_ECNT_BLK */
+    uint32_t pm1_timer; /* pmtmr_BLK */
+} PCIAcpiState;
+
+static PMTState *pmtimer_state;
+static PCIAcpiState *acpi_state;
+
+static void pmtimer_save(QEMUFile *f, void *opaque)
+{
+    PMTState *s = opaque;
+
+    qemu_put_be32s(f, &s->count);
+    qemu_put_be32s(f, &s->irq);
+    qemu_put_be64s(f, &s->next_pm_time);
+    qemu_put_timer(f, s->pm_timer);
+}
+
+static int pmtimer_load(QEMUFile *f, void *opaque, int version_id)
+{
+    PMTState *s = opaque;
+
+    if (version_id != 1)
+        return -EINVAL;
+    qemu_get_be32s(f, &s->count);
+    qemu_get_be32s(f, &s->irq);
+    qemu_get_be64s(f, &s->next_pm_time);
+    qemu_get_timer(f, s->pm_timer);
+    return 0;
+
+}
+
+static inline void acpi_set_irq(PCIAcpiState *s)
+{
+/* no real SCI event need for now, so comment the following line out */
+/*  pic_set_irq(s->irq, 1); */
+    printf("acpi_set_irq: s->irq %x \n",s->irq);
+}
+
+static void pm_timer_update(void *opaque)
+{
+    PMTState *s = opaque;
+    s->next_pm_time += muldiv64(1, ticks_per_sec,FREQUENCE_PMTIMER);
+    qemu_mod_timer(s->pm_timer, s->next_pm_time);
+    acpi_state->pm1_timer ++;
+
+    /* If pm timer is zero then reset it to zero. */
+    if (acpi_state->pm1_timer >= 0x1000000) {
+/*      printf("pm_timerupdate: timer overflow: %x \n", 
acpi_state->pm1_timer); */
+
+        acpi_state->pm1_timer = 0;
+        acpi_state->pm1_status =   acpi_state->pm1_status | TMROF_STS;
+        /* If TMROF_EN is set then send the irq. */
+        if ((acpi_state->pm1_enable & TMROF_EN) == TMROF_EN) {
+            acpi_set_irq(acpi_state);
+            acpi_state->pm1_enable = 0x00; /* only need one time...*/
+        }
+    }
+    s->count = acpi_state->pm1_timer;
+}
+
+static PMTState *pmtimer_init(void)
+{
+    PMTState *s;
+
+    s = qemu_mallocz(sizeof(PMTState));
+    if (!s)
+        return NULL;
+
+    /* s->irq = irq;    */
+
+    s->pm_timer = qemu_new_timer(vm_clock, pm_timer_update, s);
+
+    s->count = 0;
+    s->next_pm_time = qemu_get_clock(vm_clock) + muldiv64(1, 
ticks_per_sec,FREQUENCE_PMTIMER) + 1;
+    qemu_mod_timer(s->pm_timer, s->next_pm_time);
+
+    register_savevm("pm timer", 1, 1, pmtimer_save, pmtimer_load, s);
+    return s;
+}
+
+static void acpi_reset(PCIAcpiState *s)
+{
+    uint8_t *pci_conf;
+    pci_conf = s->dev.config;
+
+    pci_conf[0x42] = 0x00;
+    pci_conf[0x43] = 0x00;
+    s->irq = 9;
+    s->pm1_status = 0;
+    s->pm1_enable = 0x00;    /* TMROF_EN should cleared */
+    s->pm1_control = SCI_EN; /* SCI_EN */
+    s->pm1_timer = 0;
+}
+
+/*byte access  */
+static void acpiPm1Status_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+    
+    if ((val&TMROF_STS)==TMROF_STS)
+        s->pm1_status = s->pm1_status&!TMROF_STS;
+
+    if ((val&GBL_STS)==GBL_STS)
+        s->pm1_status = s->pm1_status&!GBL_STS;
+
+/*     printf("acpiPm1Status_writeb \n addr %x val:%x pm1_status:%x \n", addr, 
val,s->pm1_status); */
+}
+
+static uint32_t acpiPm1Status_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_status;
+/*         printf("acpiPm1Status_readb \n addr %x val:%x\n", addr, val); */
+
+   return val;
+}
+
+static void acpiPm1StatusP1_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+     s->pm1_status = (val<<8)||(s->pm1_status);
+/*     printf("acpiPm1StatusP1_writeb \n addr %x val:%x\n", addr, val); */
+}
+
+static uint32_t acpiPm1StatusP1_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = (s->pm1_status)>>8;
+    printf("acpiPm1StatusP1_readb \n addr %x val:%x\n", addr, val);
+
+    return val;
+}
+
+static void acpiPm1Enable_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_enable = val;
+/*   printf("acpiPm1Enable_writeb \n addr %x val:%x\n", addr, val); */
+}
+
+static uint32_t acpiPm1Enable_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = (s->pm1_enable)||0x1;
+/*  printf("acpiPm1Enable_readb \n addr %x val:%x\n", addr, val); */
+
+    return val;
+}
+
+static void acpiPm1EnableP1_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_enable = (val<<8)||(s->pm1_enable);
+/*    printf("acpiPm1EnableP1_writeb \n addr %x val:%x\n", addr, val); */
+
+}
+
+static uint32_t acpiPm1EnableP1_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = (s->pm1_enable)>>8;
+/*  printf("acpiPm1EnableP1_readb \n addr %x val:%x\n", addr, val); */
+
+    return val;
+}
+
+static void acpiPm1Control_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_control = val;
+/*  printf("acpiPm1Control_writeb \n addr %x val:%x\n", addr, val); */
+
+}
+
+static uint32_t acpiPm1Control_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_control;
+/*    printf("acpiPm1Control_readb \n addr %x val:%x\n", addr, val); */
+
+    return val;
+}
+
+static void acpiPm1ControlP1_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_control = (val<<8)||(s->pm1_control);
+/*    printf("acpiPm1ControlP1_writeb \n addr %x val:%x\n", addr, val); */
+
+    // Check for power off request
+
+    if (((val & SLP_EN) != 0) &&
+        ((val & SLP_TYP_MASK) == SLP_VAL)) {
+        s->pm1_timer=0x0; //clear ACPI timer
+        qemu_system_shutdown_request();
+    }
+}
+
+static uint32_t acpiPm1ControlP1_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = (s->pm1_control)>>8;
+/*    printf("acpiPm1ControlP1_readb \n addr %x val:%x\n", addr, val); */
+
+    return val;
+}
+
+
+/* word access   */
+
+static void acpiPm1Status_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    if ((val&TMROF_STS)==TMROF_STS)
+        s->pm1_status = s->pm1_status&!TMROF_STS;
+
+    if ((val&GBL_STS)==GBL_STS)
+        s->pm1_status = s->pm1_status&!GBL_STS;
+
+/*    printf("acpiPm1Status_writew \n addr %x val:%x pm1_status:%x \n", addr, 
val,s->pm1_status); */
+}
+
+static uint32_t acpiPm1Status_readw(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_status;
+/*    printf("acpiPm1Status_readw \n addr %x val:%x\n", addr, val); */
+
+    return val;
+}
+
+static void acpiPm1Enable_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_enable = val;
+/*    printf("acpiPm1Enable_writew \n addr %x val:%x\n", addr, val); */
+
+}
+
+static uint32_t acpiPm1Enable_readw(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_enable;
+/*    printf("acpiPm1Enable_readw \n addr %x val:%x\n", addr, val); */
+
+   return val;
+}
+
+static void acpiPm1Control_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_control = val;
+/*    printf("acpiPm1Control_writew \n addr %x val:%x\n", addr, val); */
+
+    // Check for power off request
+
+    if (((val & SLP_EN) != 0) &&
+        ((val & SLP_TYP_MASK) == SLP_VAL)) {
+        qemu_system_shutdown_request();
+    }
+
+}
+
+static uint32_t acpiPm1Control_readw(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_control;
+/*    printf("acpiPm1Control_readw \n addr %x val:%x\n", addr, val);  */
+
+    return val;
+}
+
+/* dword access */
+
+static void acpiPm1Event_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_status = val;
+    s->pm1_enable = val>>16;
+/*     printf("acpiPm1Event_writel \n addr %x val:%x \n", addr, val); */
+
+}
+
+static uint32_t acpiPm1Event_readl(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_status|(s->pm1_enable<<16);
+/*    printf("acpiPm1Event_readl \n addr %x val:%x\n", addr, val);    */
+
+    return val;
+}
+
+static void acpiPm1Timer_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_timer = val;
+/*    printf("acpiPm1Timer_writel \n addr %x val:%x\n", addr, val); */
+}
+
+static uint32_t acpiPm1Timer_readl(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    uint32_t val;
+
+    val = s->pm1_timer;
+/*    printf("acpiPm1Timer_readl \n addr %x val:%x\n", addr, val); */
+    return val;
+}
+
+static void acpi_map(PCIDevice *pci_dev, int region_num,
+                    uint32_t addr, uint32_t size, int type)
+{
+    PCIAcpiState *d = (PCIAcpiState *)pci_dev;
+
+    printf("register acpi io\n");
+
+    /* Byte access */
+    register_ioport_write(addr, 1, 1, acpiPm1Status_writeb, d);
+    register_ioport_read(addr, 1, 1, acpiPm1Status_readb, d);
+    register_ioport_write(addr+1, 1, 1, acpiPm1StatusP1_writeb, d);
+    register_ioport_read(addr+1, 1, 1, acpiPm1StatusP1_readb, d);
+
+    register_ioport_write(addr + 2, 1, 1, acpiPm1Enable_writeb, d);
+    register_ioport_read(addr + 2, 1, 1, acpiPm1Enable_readb, d);
+    register_ioport_write(addr + 2 +1, 1, 1, acpiPm1EnableP1_writeb, d);
+    register_ioport_read(addr + 2 +1, 1, 1, acpiPm1EnableP1_readb, d);
+
+    register_ioport_write(addr + 4, 1, 1, acpiPm1Control_writeb, d);
+    register_ioport_read(addr + 4, 1, 1, acpiPm1Control_readb, d);
+    register_ioport_write(addr + 4 + 1, 1, 1, acpiPm1ControlP1_writeb, d);
+    register_ioport_read(addr + 4 +1, 1, 1, acpiPm1ControlP1_readb, d);
+
+    /* Word access */
+    register_ioport_write(addr, 2, 2, acpiPm1Status_writew, d);
+    register_ioport_read(addr, 2, 2, acpiPm1Status_readw, d);
+
+    register_ioport_write(addr + 2, 2, 2, acpiPm1Enable_writew, d);
+    register_ioport_read(addr + 2, 2, 2, acpiPm1Enable_readw, d);
+
+    register_ioport_write(addr + 4, 2, 2, acpiPm1Control_writew, d);
+    register_ioport_read(addr + 4, 2, 2, acpiPm1Control_readw, d);
+
+    /* DWord access */
+    register_ioport_write(addr, 4, 4, acpiPm1Event_writel, d);
+    register_ioport_read(addr, 4, 4, acpiPm1Event_readl, d);
+
+    register_ioport_write(addr + 8, 4, 4, acpiPm1Timer_writel, d);
+    register_ioport_read(addr + 8, 4, 4, acpiPm1Timer_readl, d);
+}
+
+/* PIIX4 acpi pci configuration space, func 3 */
+void pci_piix4_acpi_init(PCIBus *bus)
+{
+    PCIAcpiState *d;
+    uint8_t *pci_conf;
+
+    /* register a function 3 of PIIX4 */
+    d = (PCIAcpiState *)pci_register_device(
+        bus, "PIIX4 ACPI", sizeof(PCIAcpiState),
+        ((PCIDevice *)piix3_state)->devfn + 3, NULL, NULL);
+
+    acpi_state = d;
+    pci_conf = d->dev.config;
+    pci_conf[0x00] = 0x86;  /* Intel */
+    pci_conf[0x01] = 0x80;
+    pci_conf[0x02] = 0x13;
+    pci_conf[0x03] = 0x71;
+    pci_conf[0x08] = 0x01;  /* B0 stepping */
+    pci_conf[0x09] = 0x00;  /* base class */
+    pci_conf[0x0a] = 0x80;  /* Sub class */
+    pci_conf[0x0b] = 0x06;
+    pci_conf[0x0e] = 0x00;
+    pci_conf[0x3d] = 0x01;  /* Hardwired to PIRQA is used */
+
+    pci_register_io_region((PCIDevice *)d, 4, 0x10,
+                           PCI_ADDRESS_SPACE_IO, acpi_map);
+    pmtimer_state = pmtimer_init();
+    acpi_reset (d);
+}
diff -r 049e669e6a8a -r b29806fb6ba0 tools/python/xen/util/SSHTransport.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/util/SSHTransport.py     Mon Jun 26 14:53:55 2006 -0500
@@ -0,0 +1,102 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2006 Anthony Liguori <aliguori@xxxxxxxxxx>
+# Copyright (C) 2006 XenSource Inc.
+#============================================================================
+
+"""
+XML-RPC SSH transport.
+"""
+
+from xmlrpclib import getparser, Fault
+from subprocess import Popen, PIPE
+from getpass import getuser
+from fcntl import ioctl
+import errno
+import os
+import termios
+
+
+def getHTTPURI(uri):
+    (protocol, rest) = uri.split(':', 1)
+    if not rest.startswith('//'):
+        raise ValueError("Invalid ssh URL '%s'" % uri)
+    rest = rest[2:]
+    user = getuser()
+    path = 'RPC2'
+    if rest.find('@') != -1:
+        (user, rest) = rest.split('@', 1)
+    if rest.find('/') != -1:
+        (host, rest) = rest.split('/', 1)
+        if len(rest) > 0:
+            path = rest
+    else:
+        host = rest
+    transport = SSHTransport(host, user)
+    uri = 'http://%s/%s' % (host, path)
+    return transport, uri
+
+
+class SSHTransport(object):
+    def __init__(self, host, user, askpass=None):
+        self.host = host
+        self.user = user
+        self.askpass = askpass
+        self.ssh = None
+
+    def getssh(self):
+        if self.ssh == None:
+            if self.askpass:
+                f = open('/dev/tty', 'w')
+                try:
+                    os.environ['SSH_ASKPASS'] = self.askpass
+                    ioctl(f.fileno(), termios.TIOCNOTTY)
+                finally:
+                    f.close()
+
+            cmd = ['ssh', '%s@%s' % (self.user, self.host), 'xm serve']
+            try:
+                self.ssh = Popen(cmd, bufsize=0, stdin=PIPE, stdout=PIPE)
+            except OSError, (err, msg):
+                if err == errno.ENOENT:
+                    raise Fault(0, "ssh executable not found!")
+                raise
+        return self.ssh
+
+    def request(self, host, handler, request_body, verbose=0):
+        p, u = getparser()
+        ssh = self.getssh()
+        ssh.stdin.write("""POST /%s HTTP/1.1
+User-Agent: Xen
+Host: %s
+Content-Type: text/xml
+Content-Length: %d
+
+%s""" % (handler, host, len(request_body), request_body))
+        ssh.stdin.flush()
+
+        content_length = 0
+        line = ssh.stdout.readline()
+        if line.split()[1] != '200':
+            raise Fault(0, 'Server returned %s' % (' '.join(line[1:])))
+        
+        while line not in ['', '\r\n', '\n']:
+            if line.lower().startswith('content-length:'):
+                content_length = int(line[15:].strip())
+            line = ssh.stdout.readline()
+        content = ssh.stdout.read(content_length)
+        p.feed(content)
+        p.close()
+        return u.close()
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/Makefile
--- a/tools/blktap/Makefile     Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-MAJOR    = 3.0
-MINOR    = 0
-SONAME   = libblktap.so.$(MAJOR)
-
-XEN_ROOT = ../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-SUBDIRS :=
-SUBDIRS += ublkback
-#SUBDIRS += parallax
-
-BLKTAP_INSTALL_DIR = /usr/sbin
-
-INSTALL            = install
-INSTALL_PROG       = $(INSTALL) -m0755
-INSTALL_DIR        = $(INSTALL) -d -m0755
-
-INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
-
-LIBS     := -lpthread -lz
-
-SRCS     :=
-SRCS     += blktaplib.c xenbus.c blkif.c
-
-CFLAGS   += -Werror
-CFLAGS   += -Wno-unused
-CFLAGS   += -fno-strict-aliasing
-CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# get asprintf():
-CFLAGS   += -D _GNU_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-CFLAGS   += $(INCLUDES) 
-DEPS     = .*.d
-
-OBJS     = $(patsubst %.c,%.o,$(SRCS))
-IBINS   :=
-#IBINS   += blkdump
-
-LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
-
-.PHONY: all
-all: mk-symlinks libblktap.so #blkdump
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: install
-install: all
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/include
-       $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
-       #$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: clean
-clean:
-       rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: rpm
-rpm: all
-       rm -rf staging
-       mkdir staging
-       mkdir staging/i386
-       rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
-               --define "_rpmdir$$PWD/staging" -bb rpm.spec
-       mv staging/i386/*.rpm .
-       rm -rf staging
-
-libblktap.so: $(OBJS) 
-       $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared         \
-             -L$(XEN_XENSTORE) -l xenstore                       \
-             -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
-       ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
-       ln -sf libblktap.so.$(MAJOR) $@
-
-blkdump: libblktap.so
-       $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \
-             -l blktap blkdump.c
-
-.PHONY: TAGS clean install mk-symlinks rpm
-
-.PHONY: TAGS
-TAGS:
-       etags -t $(SRCS) *.h
-
--include $(DEPS)
-
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/README
--- a/tools/blktap/README       Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-Block Tap User-level Interfaces
-Andrew Warfield
-andrew.warfield@xxxxxxxxxxxx
-February 8, 2005
-
-NOTE #1: The blktap is _experimental_ code.  It works for me.  Your
-mileage may vary.  Don't use it for anything important.  Please. ;)
-
-NOTE #2: All of the interfaces here are likely to change.  This is all
-early code, and I am checking it in because others want to play with
-it.  If you use it for anything, please let me know!
-
-Overview:
----------
-
-This directory contains a library and set of example applications for
-the block tap device.  The block tap hooks into the split block device
-interfaces above Xen allowing them to be extended.  This extension can
-be done in userspace with the help of a library.
-
-The tap can be installed either as an interposition domain in between
-a frontend and backend driver pair, or as a terminating backend, in
-which case it is responsible for serving all requests itself.
-
-There are two reasons that you might want to use the tap,
-corresponding to these configurations:
-
- 1. To examine or modify a stream of block requests while they are
-    in-flight (e.g. to encrypt data, or add data-driven watchpoints)
-
- 2. To prototype a new backend driver, serving requests from the tap
-    rather than passing them along to the XenLinux blkback driver.
-    (e.g. to forward block requests to a remote host)
-
-
-Interface:
-----------
-
-At the moment, the tap interface is similar in spirit to that of the
-Linux netfilter.  Requests are messages from a client (frontend)
-domain to a disk (backend) domain.  Responses are messages travelling
-back, acknowledging the completion of a request.  the library allows
-chains of functions to be attached to these events.  In addition,
-hooks may be attached to handle control messages, which signify things
-like connections from new domains.
-
-At present the control messages especially expose a lot of the
-underlying driver interfaces.  This may change in the future in order
-to simplify writing hooks.
-
-Here are the public interfaces:
-
-These allow hook functions to be chained:
-
- void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
- void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
- void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
-
-This allows a response to be injected, in the case where a request has
-been removed using BLKTAP_STOLEN.
-
- void blktap_inject_response(blkif_response_t *);
-
-These let you add file descriptors and handlers to the main poll loop:
-
- int  blktap_attach_poll(int fd, short events, int (*func)(int));
- void blktap_detach_poll(int fd);
-
-This starts the main poll loop:
-
- int  blktap_listen(void);
-
-Example:
---------
-
-blkimage.c uses an image on the local file system to serve requests to
-a domain.  Here's what it looks like:
-
----[blkimg.c]---
-
-/* blkimg.c
- *
- * file-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkimglib.h"
-
-
-int main(int argc, char *argv[])
-{
-    image_init();
-    
-    blktap_register_ctrl_hook("image_control", image_control);
-    blktap_register_request_hook("image_request", image_request);
-    blktap_listen();
-    
-    return 0;
-}
-
-----------------
-
-All of the real work is in blkimglib.c, but this illustrates the
-actual tap interface well enough.  image_control() will be called with
-all control messages.  image_request() handles requests.  As it reads
-from an on-disk image file, no requests are ever passed on to a
-backend, and so there will be no responses to process -- so there is
-nothing registered as a response hook.
-
-Other examples:
----------------
-
-Here is a list of other examples in the directory:
-
-Things that terminate a block request stream:
-
-  blkimg    - Use a image file/device to serve requests
-  blkgnbd   - Use a remote gnbd server to serve requests
-  blkaio    - Use libaio... (DOES NOT WORK)
-  
-Things that don't:
-
-  blkdump   - Print in-flight requests.
-  blkcow    - Really inefficient copy-on-write disks using libdb to store
-              writes.
-
-There are examples of plugging these things together, for instance
-blkcowgnbd is a read-only gnbd device with copy-on-write to a local
-file.
-
-TODO:
------
-
-- Make session tracking work.  At the moment these generally just handle a 
-  single front-end client at a time.
-
-- Integrate with Xend.  Need to cleanly pass a image identifier in the connect
-  message.
-
-- Make an asynchronous file-io terminator.  The libaio attempt is
-  tragically stalled because mapped foreign pages make pfn_valid fail
-  (they are VM_IO), and so cannot be passed to aio as targets.  A
-  better solution may be to tear the disk interfaces out of the real
-  backend and expose them somehow.
-
-- Make CoW suck less.
-
-- Do something more along the lines of dynamic linking for the
-  plugins, so thatthey don't all need a new main().
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/README.sept05
--- a/tools/blktap/README.sept05        Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-The blktap has been rewritten substantially based on the current
-blkback driver.  I've removed passthrough support, as this is broken
-by the move to grant tables and the lack of transitive grants.  A
-blktap VM is now only capable of terminating block requests in
-userspace.
-
-ublkback/ contains a _very_ initial cut at a user-level version of the block
-backend driver.  It gives a working example of how the current tap
-interfaces are used, in particular w.r.t. the vbd directories in
-xenstore.
-
-parallax/ contains fairly recent parallax code.  This does not run on
-the changed blktap interface, but should only be a couple of hours
-work to get going again.
-
-All of the tricky bits are done, but there is plenty of cleaning to
-do, and the top-level functionality is not here yet.  At the moment,
-the daemon ignores the pdev requested by the tools and opens the file 
-or device specified by TMP_IMAGE_FILE_NAME in ublkback.c.
-
-TODO:
-1. Fix to allow pdev in the store to specify the device to open.
-2. Add support (to tools as well) to mount arbitrary files...
-   just write the filename to mount into the store, instead of pdev.
-3. Reeximine blkif refcounting, it is almost certainly broken at the moment.
-   - creating a blkif should take a reference.
-   - each inflight request should take a reference on dequeue in blktaplib
-   - sending responses should drop refs.
-   - blkif should be implicitly freed when refcounts fall to 0.
-4. Modify the parallax req/rsp code as per ublkback to use the new tap 
-   interfaces. 
-5. Write a front end that allows parallax and normal mounts to coexist
-6. Allow blkback and blktap to run at the same time.
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/blkdump.c
--- a/tools/blktap/blkdump.c    Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-/* blkdump.c
- *
- * show a running trace of block requests as they fly by.
- * 
- * (c) 2004 Andrew Warfield.
- */
- 
-#include <stdio.h>
-#include "blktaplib.h"
- 
-int request_print(blkif_request_t *req)
-{
-    int i;
-    
-    if ( (req->operation == BLKIF_OP_READ) ||
-         (req->operation == BLKIF_OP_WRITE) )
-    {
-        printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", 
-                ID_TO_DOM(req->id), ID_TO_IDX(req->id), 
-                blkif_op_name[req->operation], 
-                req->nr_segments, req->handle, 
-                req->sector_number);
-        
-        
-        for (i=0; i < req->nr_segments; i++) {
-            printf("              (gref: 0x%8x start: %u stop: %u)\n",
-                   req->seg[i].gref,
-                   req->seg[i].first_sect,
-                   req->seg[i].last_sect);
-        }
-            
-    } else {
-        printf("Unknown request message type.\n");
-    }
-    
-    return BLKTAP_PASS;
-}
-
-int response_print(blkif_response_t *rsp)
-{   
-    if ( (rsp->operation == BLKIF_OP_READ) ||
-         (rsp->operation == BLKIF_OP_WRITE) )
-    {
-        printf("[%2u:%2u>%5s] (status: %d)\n", 
-                ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 
-                blkif_op_name[rsp->operation], 
-                rsp->status);
-            
-    } else {
-        printf("Unknown request message type.\n");
-    }
-    return BLKTAP_PASS;
-}
-
-int main(int argc, char *argv[])
-{
-    blktap_register_request_hook("request_print", request_print);
-    blktap_register_response_hook("response_print", response_print);
-    blktap_listen();
-    
-    return 0;
-}
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/blkif.c
--- a/tools/blktap/blkif.c      Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,212 +0,0 @@
-/*
- * blkif.c
- * 
- * The blkif interface for blktap.  A blkif describes an in-use virtual disk.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <err.h>
-
-#include "blktaplib.h"
-
-#if 1
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-static blkif_t      *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            ((blkif->domid != domid) || (blkif->handle != handle)) )
-        blkif = blkif->hash_next;
-    return blkif;
-}
-
-blkif_t *alloc_blkif(domid_t domid)
-{
-    blkif_t *blkif;
-
-    blkif = (blkif_t *)malloc(sizeof(blkif_t));
-    if (!blkif)
-        return NULL;
-
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid = domid;
-
-    return blkif;
-}
-
-static int (*new_blkif_hook)(blkif_t *blkif) = NULL;
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
-{
-    new_blkif_hook = fn;
-}
-
-int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
-               long int readonly)
-{
-    domid_t domid;
-    blkif_t **pblkif;
-    
-    if (blkif == NULL)
-        return -EINVAL;
-
-    domid = blkif->domid;
-    blkif->handle   = handle;
-    blkif->pdev     = pdev;
-    blkif->readonly = readonly;
-
-    /*
-     * Call out to the new_blkif_hook. The tap application should define this,
-     * and it should return having set blkif->ops
-     * 
-     */
-    if (new_blkif_hook == NULL)
-    {
-        warn("Probe detected a new blkif, but no new_blkif_hook!");
-        return -1;
-    }
-    new_blkif_hook(blkif);
-
-    /* Now wire it in. */
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif != NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-        {
-            DPRINTF("Could not create blkif: already exists\n");
-            return -1;
-        }
-        pblkif = &(*pblkif)->hash_next;
-    }
-    blkif->hash_next = NULL;
-    *pblkif = blkif;
-
-    return 0;
-}
-
-void free_blkif(blkif_t *blkif)
-{
-    blkif_t **pblkif, *curs;
-    
-    pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)];
-    while ( (curs = *pblkif) != NULL )
-    {
-        if ( blkif == curs )
-        {
-            *pblkif = curs->hash_next;
-        }
-        pblkif = &curs->hash_next;
-    }
-    free(blkif);
-}
-
-void blkif_register_request_hook(blkif_t *blkif, char *name, 
-                                 int (*rh)(blkif_t *, blkif_request_t *, int)) 
-{
-    request_hook_t *rh_ent, **c;
-    
-    rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
-    if (!rh_ent) 
-    {
-        warn("couldn't allocate a new hook");
-        return;
-    }
-    
-    rh_ent->func  = rh;
-    rh_ent->next = NULL;
-    if (asprintf(&rh_ent->name, "%s", name) == -1)
-    {
-        free(rh_ent);
-        warn("couldn't allocate a new hook name");
-        return;
-    }
-    
-    c = &blkif->request_hook_chain;
-    while (*c != NULL) {
-        c = &(*c)->next;
-    }
-    *c = rh_ent;
-}
-
-void blkif_register_response_hook(blkif_t *blkif, char *name, 
-                                  int (*rh)(blkif_t *, blkif_response_t *, 
int)) 
-{
-    response_hook_t *rh_ent, **c;
-    
-    rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
-    if (!rh_ent) 
-    { 
-        warn("couldn't allocate a new hook");
-        return;
-    }
-    
-    rh_ent->func  = rh;
-    rh_ent->next = NULL;
-    if (asprintf(&rh_ent->name, "%s", name) == -1)
-    {
-        free(rh_ent);
-        warn("couldn't allocate a new hook name");
-        return;
-    }
-    
-    c = &blkif->response_hook_chain;
-    while (*c != NULL) {
-        c = &(*c)->next;
-    }
-    *c = rh_ent;
-}
-
-void blkif_print_hooks(blkif_t *blkif)
-{
-    request_hook_t  *req_hook;
-    response_hook_t *rsp_hook;
-    
-    DPRINTF("Request Hooks:\n");
-    req_hook = blkif->request_hook_chain;
-    while (req_hook != NULL)
-    {
-        DPRINTF("  [0x%p] %s\n", req_hook->func, req_hook->name);
-        req_hook = req_hook->next;
-    }
-    
-    DPRINTF("Response Hooks:\n");
-    rsp_hook = blkif->response_hook_chain;
-    while (rsp_hook != NULL)
-    {
-        DPRINTF("  [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
-        rsp_hook = rsp_hook->next;
-    }
-}
-
-
-long int vbd_size(blkif_t *blkif)
-{
-    return 1000000000;
-}
-
-long int vbd_secsize(blkif_t *blkif)
-{
-    return 512;
-}
-
-unsigned vbd_info(blkif_t *blkif)
-{
-    return 0;
-}
-
-
-void __init_blkif(void)
-{    
-    memset(blkif_hash, 0, sizeof(blkif_hash));
-}
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/blktaplib.c
--- a/tools/blktap/blktaplib.c  Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,453 +0,0 @@
-/*
- * blktaplib.c
- * 
- * userspace interface routines for the blktap driver.
- *
- * (threadsafe(r) version) 
- *
- * (c) 2004 Andrew Warfield.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/user.h>
-#include <err.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <linux/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/ioctl.h>
-#include <string.h>
-#include <unistd.h>
-#include <pthread.h>
-#include <xs.h>
-                                                                     
-#define __COMPILING_BLKTAP_LIB
-#include "blktaplib.h"
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-#define DEBUG_RING_IDXS 0
-
-#define POLLRDNORM     0x040 
-
-#define BLKTAP_IOCTL_KICK 1
-
-
-void got_sig_bus();
-void got_sig_int();
-
-/* in kernel these are opposite, but we are a consumer now. */
-blkif_back_ring_t  fe_ring; /* slightly counterintuitive ;) */
-blkif_front_ring_t be_ring; 
-
-unsigned long mmap_vstart = 0;
-char *blktap_mem;
-int fd = 0;
-
-#define BLKTAP_RING_PAGES       1 /* Front */
-#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
-    
-int bad_count = 0;
-void bad(void)
-{
-    bad_count ++;
-    if (bad_count > 50) exit(0);
-}
-/*-----[ ID Manipulation from tap driver code ]--------------------------*/
-
-#define ACTIVE_RING_IDX unsigned short
-
-inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
-{
-    return ( (fe_dom << 16) | idx );
-}
-
-inline unsigned int ID_TO_IDX(unsigned long id) 
-{ 
-        return ( id & 0x0000ffff );
-}
-
-inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
-
-static int (*request_hook)(blkif_request_t *req) = NULL;
-static int (*response_hook)(blkif_response_t *req) = NULL;
-        
-/*-----[ Data to/from Backend (server) VM ]------------------------------*/
-
-/*
-
-inline int write_req_to_be_ring(blkif_request_t *req)
-{
-    blkif_request_t *req_d;
-    static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-    pthread_mutex_lock(&be_prod_mutex);
-    req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
-    memcpy(req_d, req, sizeof(blkif_request_t));
-    wmb();
-    be_ring.req_prod_pvt++;
-    pthread_mutex_unlock(&be_prod_mutex);
-    
-    return 0;
-}
-*/
-
-inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
-{
-    blkif_response_t *rsp_d;
-    static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-    pthread_mutex_lock(&fe_prod_mutex);
-    rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
-    memcpy(rsp_d, rsp, sizeof(blkif_response_t));
-    wmb();
-    fe_ring.rsp_prod_pvt++;
-    pthread_mutex_unlock(&fe_prod_mutex);
-
-    return 0;
-}
-
-static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp)
-{
-    response_hook_t  *rsp_hook;
-    
-    rsp_hook = blkif->response_hook_chain;
-    while (rsp_hook != NULL)
-    {
-        switch(rsp_hook->func(blkif, rsp, 1))
-        {
-        case BLKTAP_PASS:
-            break;
-        default:
-            printf("Only PASS is supported for resp hooks!\n");
-        }
-        rsp_hook = rsp_hook->next;
-    }
-}
-
-
-static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp)
-{
-    
-    apply_rsp_hooks(blkif, rsp);
-  
-    write_rsp_to_fe_ring(rsp);
-}
-
-void blktap_kick_responses(void)
-{
-    pthread_mutex_lock(&push_mutex);
-    
-    RING_PUSH_RESPONSES(&fe_ring);
-    ioctl(fd, BLKTAP_IOCTL_KICK_FE);
-    
-    pthread_mutex_unlock(&push_mutex);
-}
-
-/*-----[ Polling fd listeners ]------------------------------------------*/
-
-#define MAX_POLLFDS 64
-
-typedef struct {
-    int (*func)(int fd);
-    struct pollfd *pfd;
-    int fd;
-    short events;
-    int active;
-} pollhook_t;
-
-static struct pollfd  pfd[MAX_POLLFDS+2]; /* tap and store are extra */
-static pollhook_t     pollhooks[MAX_POLLFDS];
-static unsigned int   ph_freelist[MAX_POLLFDS];
-static unsigned int   ph_cons, ph_prod;
-#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
-#define PH_IDX(x) (x % MAX_POLLFDS)
-
-int blktap_attach_poll(int fd, short events, int (*func)(int fd))
-{
-    pollhook_t *ph;
-    
-    if (nr_pollhooks() == MAX_POLLFDS) {
-        printf("Too many pollhooks!\n");
-        return -1;
-    }
-    
-    ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
-    
-    ph->func        = func;
-    ph->fd          = fd;
-    ph->events      = events;
-    ph->active      = 1;
-    
-    DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1, 
-            nr_pollhooks());
-    
-    return 0;
-}
-
-void blktap_detach_poll(int fd)
-{
-    int i;
-    
-    for (i=0; i<MAX_POLLFDS; i++)
-        if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
-            ph_freelist[PH_IDX(ph_prod++)] = i;
-            pollhooks[i].pfd->fd = -1;
-            pollhooks[i].active = 0;
-            break;
-        }
-        
-    DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i, 
-            nr_pollhooks());
-}
-
-void pollhook_init(void)
-{
-    int i;
-    
-    for (i=0; i < MAX_POLLFDS; i++) {
-        ph_freelist[i] = (i+1) % MAX_POLLFDS;
-        pollhooks[i].active = 0;
-    }
-    
-    ph_cons = 0;
-    ph_prod = MAX_POLLFDS;
-}
-
-void __attribute__ ((constructor)) blktaplib_init(void)
-{
-    pollhook_init();
-}
-
-/*-----[ The main listen loop ]------------------------------------------*/
-
-int blktap_listen(void)
-{
-    int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret;
-    struct xs_handle *h;
-    blkif_t *blkif;
-
-    /* comms rings: */
-    blkif_request_t  *req;
-    blkif_response_t *rsp;
-    blkif_sring_t    *sring;
-    RING_IDX          rp, i, pfd_count; 
-    
-    /* pending rings */
-    blkif_request_t req_pending[BLK_RING_SIZE];
-    /* blkif_response_t rsp_pending[BLK_RING_SIZE] */;
-    
-    /* handler hooks: */
-    request_hook_t   *req_hook;
-    response_hook_t  *rsp_hook;
-    
-    signal (SIGBUS, got_sig_bus);
-    signal (SIGINT, got_sig_int);
-    
-    __init_blkif();
-
-    fd = open("/dev/blktap", O_RDWR);
-    if (fd == -1)
-        err(-1, "open failed!");
-
-    blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 
-             PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-
-    if ((int)blktap_mem == -1) 
-        err(-1, "mmap failed!");
-
-    /* assign the rings to the mapped memory */
-/*
-    sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
-    FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
-*/  
-    sring = (blkif_sring_t *)((unsigned long)blktap_mem);
-    BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
-
-    mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
-
-
-    /* Set up store connection and watch. */
-    h = xs_daemon_open();
-    if (h == NULL) 
-        err(-1, "xs_daemon_open");
-    
-    ret = add_blockdevice_probe_watch(h, "Domain-0");
-    if (ret != 0)
-        err(0, "adding device probewatch");
-    
-    ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
-
-    while(1) {
-        int ret;
-        
-        /* build the poll list */
-        pfd_count = 0;
-        for ( i=0; i < MAX_POLLFDS; i++ ) {
-            pollhook_t *ph = &pollhooks[i];
-            
-            if (ph->active) {
-                pfd[pfd_count].fd     = ph->fd;
-                pfd[pfd_count].events = ph->events;
-                ph->pfd               = &pfd[pfd_count];
-                pfd_count++;
-            }
-        }
-
-        tap_pfd = pfd_count++;
-        pfd[tap_pfd].fd = fd;
-        pfd[tap_pfd].events = POLLIN;
-
-        store_pfd = pfd_count++;
-        pfd[store_pfd].fd = xs_fileno(h);
-        pfd[store_pfd].events = POLLIN;
-        
-        if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) {
-            if (DEBUG_RING_IDXS)
-                ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
-            continue;
-        }
-
-        for (i=0; i < MAX_POLLFDS; i++) {
-            if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
-                pollhooks[i].func(pollhooks[i].pfd->fd);
-        }
-        
-        if (pfd[store_pfd].revents) {
-            ret = xs_fire_next_watch(h);
-        }
-
-        if (pfd[tap_pfd].revents) 
-        {    
-            /* empty the fe_ring */
-            notify_fe = 0;
-            notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
-            rp = fe_ring.sring->req_prod;
-            rmb();
-            for (i = fe_ring.req_cons; i != rp; i++)
-            {
-                int done = 0; 
-
-                req = RING_GET_REQUEST(&fe_ring, i);
-                memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
-                req = &req_pending[ID_TO_IDX(req->id)];
-
-                blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle);
-
-                if (blkif != NULL)
-                {
-                    req_hook = blkif->request_hook_chain;
-                    while (req_hook != NULL)
-                    {
-                        switch(req_hook->func(blkif, req, ((i+1) == rp)))
-                        {
-                        case BLKTAP_RESPOND:
-                            apply_rsp_hooks(blkif, (blkif_response_t *)req);
-                            write_rsp_to_fe_ring((blkif_response_t *)req);
-                            notify_fe = 1;
-                            done = 1;
-                            break;
-                        case BLKTAP_STOLEN:
-                            done = 1;
-                            break;
-                        case BLKTAP_PASS:
-                            break;
-                        default:
-                            printf("Unknown request hook return value!\n");
-                        }
-                        if (done) break;
-                        req_hook = req_hook->next;
-                    }
-                }
-
-                if (done == 0) 
-                {
-                    /* this was:  */
-                    /* write_req_to_be_ring(req); */
-
-                    unsigned long id = req->id;
-                    unsigned short operation = req->operation;
-                    printf("Unterminated request!\n");
-                    rsp = (blkif_response_t *)req;
-                    rsp->id = id;
-                    rsp->operation = operation;
-                    rsp->status = BLKIF_RSP_ERROR;
-                    write_rsp_to_fe_ring(rsp);
-                    notify_fe = 1;
-                    done = 1;
-                }
-
-            }
-            fe_ring.req_cons = i;
-
-            /* empty the be_ring */
-/*
-            notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
-            rp = be_ring.sring->rsp_prod;
-            rmb();
-            for (i = be_ring.rsp_cons; i != rp; i++)
-            {
-
-                rsp = RING_GET_RESPONSE(&be_ring, i);
-                memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
-                rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
-
-                DPRINTF("copying a be request\n");
-
-                apply_rsp_hooks(rsp);
-                write_rsp_to_fe_ring(rsp);
-            }
-            be_ring.rsp_cons = i;
-*/
-            /* notify the domains */
-/*
-            if (notify_be) {
-                DPRINTF("notifying be\n");
-pthread_mutex_lock(&push_mutex);
-                RING_PUSH_REQUESTS(&be_ring);
-                ioctl(fd, BLKTAP_IOCTL_KICK_BE);
-pthread_mutex_unlock(&push_mutex);
-            }
-*/
-            if (notify_fe) {
-                DPRINTF("notifying fe\n");
-                pthread_mutex_lock(&push_mutex);
-                RING_PUSH_RESPONSES(&fe_ring);
-                ioctl(fd, BLKTAP_IOCTL_KICK_FE);
-                pthread_mutex_unlock(&push_mutex);
-            }
-        }        
-    }
-
-
-    munmap(blktap_mem, PAGE_SIZE);
-
- mmap_failed:
-    close(fd);
-
- open_failed:
-    return 0;
-}
-
-void got_sig_bus() {
-    printf("Attempted to access a page that isn't.\n");
-    exit(-1);
-}
-
-void got_sig_int() {
-    DPRINTF("quitting -- returning to passthrough mode.\n");
-    if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
-    close(fd);
-    fd = 0;
-    exit(0);
-} 
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/blktaplib.h
--- a/tools/blktap/blktaplib.h  Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,171 +0,0 @@
-/* blktaplib.h
- *
- * userland accessors to the block tap.
- *
- * Sept 2/05 -- I'm scaling this back to only support block remappings
- * to user in a backend domain.  Passthrough and interposition can be readded
- * once transitive grants are available.
- */
- 
-#ifndef __BLKTAPLIB_H__
-#define __BLKTAPLIB_H__
-
-#include <xenctrl.h>
-#include <sys/user.h>
-#include <xen/xen.h>
-#include <xen/io/blkif.h>
-#include <xen/io/ring.h>
-#include <xen/io/domain_controller.h>
-#include <xs.h>
-
-#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-
-/* /dev/xen/blktap resides at device number major=10, minor=202        */ 
-#define BLKTAP_MINOR 202
-
-/* size of the extra VMA area to map in attached pages. */
-#define BLKTAP_VMA_PAGES BLK_RING_SIZE
-
-/* blktap IOCTLs:                                                      */
-#define BLKTAP_IOCTL_KICK_FE         1
-#define BLKTAP_IOCTL_KICK_BE         2
-#define BLKTAP_IOCTL_SETMODE         3
-#define BLKTAP_IOCTL_PRINT_IDXS      100   
-
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
-#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
-#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
-#define BLKTAP_MODE_COPY_FE          0x00000004
-#define BLKTAP_MODE_COPY_BE          0x00000008
-#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
-#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
-
-#define BLKTAP_MODE_INTERPOSE \
-           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-#define BLKTAP_MODE_COPY_BOTH \
-           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
-
-#define BLKTAP_MODE_COPY_BOTH_PAGES \
-           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) );
-/*
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
-        );
-*/
-}
-
-/* Return values for handling messages in hooks. */
-#define BLKTAP_PASS     0 /* Keep passing this request as normal. */
-#define BLKTAP_RESPOND  1 /* Request is now a reply.  Return it.  */
-#define BLKTAP_STOLEN   2 /* Hook has stolen request.             */
-
-//#define domid_t unsigned short
-
-inline unsigned int ID_TO_IDX(unsigned long id);
-inline domid_t ID_TO_DOM(unsigned long id);
-
-int  blktap_attach_poll(int fd, short events, int (*func)(int));
-void blktap_detach_poll(int fd);
-int  blktap_listen(void);
-
-struct blkif;
-
-typedef struct request_hook_st {
-    char *name;
-    int (*func)(struct blkif *, blkif_request_t *, int);
-    struct request_hook_st *next;
-} request_hook_t;
-
-typedef struct response_hook_st {
-    char *name;
-    int (*func)(struct blkif *, blkif_response_t *, int);
-    struct response_hook_st *next;
-} response_hook_t;
-
-struct blkif_ops {
-    long int (*get_size)(struct blkif *blkif);
-    long int (*get_secsize)(struct blkif *blkif);
-    unsigned (*get_info)(struct blkif *blkif);
-};
-
-typedef struct blkif {
-    domid_t domid;
-    long int handle;
-
-    long int pdev;
-    long int readonly;
-
-    enum { DISCONNECTED, CONNECTED } state;
-
-    struct blkif_ops *ops;
-    request_hook_t *request_hook_chain;
-    response_hook_t *response_hook_chain;
-
-    struct blkif *hash_next;
-
-    void *prv;  /* device-specific data */
-} blkif_t;
-
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-blkif_t *alloc_blkif(domid_t domid);
-int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
-               long int readonly);
-void free_blkif(blkif_t *blkif);
-void __init_blkif(void);
-
-
-/* xenstore/xenbus: */
-extern int add_blockdevice_probe_watch(struct xs_handle *h, 
-                                       const char *domname);
-int xs_fire_next_watch(struct xs_handle *h);
-
-
-void blkif_print_hooks(blkif_t *blkif);
-void blkif_register_request_hook(blkif_t *blkif, char *name, 
-                             int (*rh)(blkif_t *, blkif_request_t *, int));
-void blkif_register_response_hook(blkif_t *blkif, char *name, 
-                             int (*rh)(blkif_t *, blkif_response_t *, int));
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *);
-void blktap_kick_responses(void);
-
-/* this must match the underlying driver... */
-#define MAX_PENDING_REQS 64
-
-/* Accessing attached data page mappings */
-#define MMAP_PAGES                                              \
-    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg)                                   \
-    (mmap_vstart +                                              \
-     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
-     ((_seg) * PAGE_SIZE))
-
-extern unsigned long mmap_vstart;
-
-/* Defines that are only used by library clients */
-
-#ifndef __COMPILING_BLKTAP_LIB
-
-static char *blkif_op_name[] = {
-    [BLKIF_OP_READ]       = "READ",
-    [BLKIF_OP_WRITE]      = "WRITE",
-};
-
-#endif /* __COMPILING_BLKTAP_LIB */
-    
-#endif /* __BLKTAPLIB_H__ */
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/list.h
--- a/tools/blktap/list.h       Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-/*
- * list.h
- * 
- * This is a subset of linux's list.h intended to be used in user-space.
- * 
- */
-
-#ifndef __LIST_H__
-#define __LIST_H__
-
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
-
-struct list_head {
-        struct list_head *next, *prev;
-};
- 
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
- 
-#define LIST_HEAD(name) \
-        struct list_head name = LIST_HEAD_INIT(name)
-
-static inline void __list_add(struct list_head *new,
-                              struct list_head *prev,
-                              struct list_head *next)
-{
-        next->prev = new;
-        new->next = next;
-        new->prev = prev;
-        prev->next = new;
-}
-
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
-        __list_add(new, head, head->next);
-}
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
-        next->prev = prev;
-        prev->next = next;
-}
-static inline void list_del(struct list_head *entry)
-{
-        __list_del(entry->prev, entry->next);
-        entry->next = LIST_POISON1;
-        entry->prev = LIST_POISON2;
-}
-#define list_entry(ptr, type, member)                                   \
-        ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-#define list_for_each_entry(pos, head, member)                          \
-        for (pos = list_entry((head)->next, typeof(*pos), member);      \
-             &pos->member != (head);                                    \
-             pos = list_entry(pos->member.next, typeof(*pos), member))
-
-#endif /* __LIST_H__ */
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/parallax/Makefile
--- a/tools/blktap/parallax/Makefile    Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-XEN_ROOT = ../../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-PARALLAX_INSTALL_DIR   = /usr/sbin
-
-INSTALL         = install
-INSTALL_PROG    = $(INSTALL) -m0755
-INSTALL_DIR     = $(INSTALL) -d -m0755
-
-INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC)
-
-LDFLAGS = -L.. -lpthread -lz -lblktap
-
-#PLX_SRCS := 
-PLX_SRCS := vdi.c 
-PLX_SRCS += radix.c 
-PLX_SRCS += snaplog.c
-PLX_SRCS += blockstore.c 
-PLX_SRCS += block-async.c
-PLX_SRCS += requests-async.c
-VDI_SRCS := $(PLX_SRCS)
-PLX_SRCS += parallax.c
-
-#VDI_TOOLS :=
-VDI_TOOLS := vdi_create
-VDI_TOOLS += vdi_list
-VDI_TOOLS += vdi_snap
-VDI_TOOLS += vdi_snap_list
-VDI_TOOLS += vdi_snap_delete
-VDI_TOOLS += vdi_fill
-VDI_TOOLS += vdi_tree
-VDI_TOOLS += vdi_validate
-
-CFLAGS   += -Werror
-CFLAGS   += -Wno-unused
-CFLAGS   += -fno-strict-aliasing
-CFLAGS   += $(INCLUDES)
-CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS     = .*.d
-
-OBJS     = $(patsubst %.c,%.o,$(SRCS))
-IBINS    = parallax $(VDI_TOOLS)
-
-.PHONY: all
-all: $(VDI_TOOLS) parallax blockstored
-
-.PHONY: install
-install: all
-       $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR)
-
-.PHONY: clean
-clean:
-       rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest
-
-parallax: $(PLX_SRCS)
-       $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS)
-
-${VDI_TOOLS}: %: %.c $(VDI_SRCS)
-       $(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS)
-
--include $(DEPS)
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/parallax/README
--- a/tools/blktap/parallax/README      Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,177 +0,0 @@
-Parallax Quick Overview
-March 3, 2005
-
-This is intended to provide a quick set of instructions to let you
-guys play with the current parallax source.  In it's current form, the
-code will let you run an arbitrary number of VMs off of a single disk
-image, doing copy-on-write as they make updates.  Each domain is
-assigned a virtual disk image (VDI), which may be based on a snapshot
-of an existing image.  All of the VDI and snapshot management should
-currently work.
-
-The current implementation uses a single file as a blockstore for
-_everything_ this will soon be replaced by the fancier backend code
-and the local cache.  As it stands, Parallax will create
-"blockstore.dat" in the directory that you run it from, and use
-largefile support to make this grow to unfathomable girth.  So, you
-probably want to run the daemon off of a local disk, with a lot of
-free space.
-
-Here's how to get going:
-
-0. Setup:
----------
-
-Pick a local directory on a disk with lots of room.  You should be
-running from a privileged domain (e.g. dom0) with the blocktap
-configured in and block backend NOT.
-
-For convenience (for the moment) copy all of the vdi tools (vdi_*) and
-the parallax daemon from tools/blktap into this directory.
-
-1. Populate the blockstore:
----------------------------
-
-First you need to put at least one image into the blockstore.  You
-will need a disk image, either as a file or local partition.  My
-general approach has been to
-
-(a) make a really big sparse file with 
-
-        dd if=/dev/zero of=./image bs=4K count=1 seek=[big value]
-
-(b) put a filesystem into it
-
-        mkfs.ext3 ./image
-
-(c) mount it using loopback
-
-        mkdir ./mnt
-        mount -o loop ./image
-
-(d) cd into it and untar one of the image files from srg-roots.
-
-        cd mnt
-        tar ...
-
-NOTE: Beware if your system is FC3.  mkfs is not compatible with old
-versions of fedora, and so you don't have much choice but to install
-further fc3 images if you have used the fc3 version of mkfs.
-
-(e) unmount the image
-
-        cd ..
-        umount mnt
-
-(f) now, create a new VDI to hold the image 
-
-        ./vdi_create "My new FC3 VDI"
-
-(g) get the id of the new VDI.
-
-        ./vdi_list
-
-        |      0                     My new FC3 VDI
-
-(0 is the VDI id... create a few more if you want.)
-
-(h) hoover your image into the new VDI.
-
-        ./vdi_fill 0 ./image
-
-This will pull the entire image into the blockstore and set up a
-mapping tree for it for VDI 0.  Passing a device (i.e. /dev/sda3)
-should also work, but vdi_fill has NO notion of sparseness yet, so you
-are going to pump a block into the store for each block you read.
-
-vdi_fill will count up until it is done, and you should be ready to
-go.  If you want to be anal, you can use vdi_validate to test the VDI
-against the original image.
-
-2. Create some extra VDIs
--------------------------
-
-VDIs are actually a list of snapshots, and each snapshot is a full
-image of mappings.  So, to preserve an immutable copy of a current
-VDI, do this:
-
-(a) Snapshot your new VDI.
-
-        ./vdi_snap 0
-
-Snapshotting writes the current radix root to the VDI's snapshot log,
-and assigns it a new writable root.
-
-(b) look at the VDI's snapshot log.
-
-        ./vdi_snap_list 0
-
-        | 16   0      Thu Mar  3 19:27:48 2005 565111           31
-
-The first two columns constitute a snapshot id and represent the
-(block, offset) of the snapshot record.  The Date tells you when the
-snapshot was made, and 31 is the radix root node of the snapshot.
-
-(c) Create a new VDI, based on that snapshot, and look at the list.
-
-        ./vdi_create "FC3 - Copy 1" 16 0
-        ./vdi_list
-
-        |      0                     My new FC3 VDI
-        |      1                       FC3 - Copy 1
-
-NOTE: If you have Graphviz installed on your system, you can use
-vdi_tree to generate a postscript of your current set of VDIs and
-snapshots.
-
-
-Create as many VDIs as you need for the VMs that you want to run.
-
-3. Boot some VMs:
------------------
-
-Parallax currently uses a hack in xend to pass the VDI id, you need to
-modify the disk line of the VM config that is going to mount it.
-
-(a) set up your vm config, by using the following disk line:
-
-        disk = ['parallax:1,sda1,w,0' ]
-
-This example uses VDI 1 (from vdi_list above), presents it as sda1
-(writable), and uses dom 0 as the backend.  If you were running the
-daemon (and tap driver) in some domain other than 0, you would change
-this last parameter.
-
-NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so 
that it knows what to do with "parallax:".
-
-(b) Run parallax in the backend domain.
-
-        ./parallax
-
-(c) create your new domain.
-
-        xm create ...
-
----
-
-That's pretty much all there is to it at the moment.  Hope this is
-clear enough to get you going.  Now, a few serious caveats that will
-be sorted out in the almost immediate future:
-
-WARNINGS:
----------
-
-1. There is NO locking in the VDI tools at the moment, so I'd avoid
-running them in parallel, or more importantly, running them while the
-daemon is running.
-
-2. I doubt that xend will be very happy about restarting if you have
-parallax-using domains.  So if it dies while there are active parallax
-doms, you may need to reboot.
-
-3. I've turned off write-in-place.  So at the moment, EVERY block
-write is a log append on the blockstore.  I've been having some probs
-with the radix tree's marking of writable blocks after snapshots and
-will sort this out very soon.
-
-
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/parallax/block-async.c
--- a/tools/blktap/parallax/block-async.c       Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,393 +0,0 @@
-/* block-async.c
- * 
- * Asynchronous block wrappers for parallax.
- */
- 
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "block-async.h"
-#include "blockstore.h"
-#include "vdi.h"
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* We have a queue of outstanding I/O requests implemented as a 
- * circular producer-consumer ring with free-running buffers.
- * to allow reordering, this ring indirects to indexes in an 
- * ring of io_structs.
- * 
- * the block_* calls may either add an entry to this ring and return, 
- * or satisfy the request immediately and call the callback directly.
- * None of the io calls in parallax should be nested enough to worry 
- * about stack problems with this approach.
- */
-
-struct read_args {
-    uint64_t addr;
-};
-
-struct write_args {
-    uint64_t   addr;
-    char *block;
-};
-
-struct alloc_args {
-    char *block;
-};
- 
-struct pending_io_req {
-    enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;
-    union {
-        struct read_args  r;
-        struct write_args w;
-        struct alloc_args a;
-    } u;
-    io_cb_t cb;
-    void *param;
-};
-
-void radix_lock_init(struct radix_lock *r)
-{
-    int i;
-    
-    pthread_mutex_init(&r->lock, NULL);
-    for (i=0; i < 1024; i++) {
-        r->lines[i] = 0;
-        r->waiters[i] = NULL;
-        r->state[i] = ANY;
-    }
-}
-
-/* maximum outstanding I/O requests issued asynchronously */
-/* must be a power of 2.*/
-#define MAX_PENDING_IO 1024
-
-/* how many threads to concurrently issue I/O to the disk. */
-#define IO_POOL_SIZE   10
-
-static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];
-static int pending_io_list[MAX_PENDING_IO];
-static unsigned long io_prod = 0, io_cons = 0, io_free = 0;
-#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))
-#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)
-#define PENDING_IO_ENT(_x) \
-       (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])
-#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)
-#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)
-static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t  pending_io_cond = PTHREAD_COND_INITIALIZER;
-
-static void init_pending_io(void)
-{
-    int i;
-       
-    for (i=0; i<MAX_PENDING_IO; i++)
-        pending_io_list[i] = i;
-               
-} 
-
-void block_read(uint64_t addr, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-    
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);
-    req->op = IO_READ;
-    req->u.r.addr = addr;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-    
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);
-    req->op = IO_WRITE;
-    req->u.w.addr  = addr;
-    req->u.w.block = block;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-
-void block_alloc(char *block, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-       
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    req->op = IO_ALLOC;
-    req->u.a.block = block;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    pthread_mutex_lock(&r->lock);
-    
-    if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {
-        r->lines[row]++;
-        r->state[row] = READ;
-        DPRINTF("RLOCK  : %3d (row: %d)\n", r->lines[row], row);
-        pthread_mutex_unlock(&r->lock);
-        ret.type = IO_INT_T;
-        ret.u.i = 0;
-        cb(ret, param);
-    } else {
-        struct radix_wait **rwc;
-        struct radix_wait *rw = 
-            (struct radix_wait *) malloc (sizeof(struct radix_wait));
-        DPRINTF("RLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
-        rw->type  = RLOCK;
-        rw->param = param;
-        rw->cb    = cb;
-        rw->next  = NULL;
-        /* append to waiters list. */
-        rwc = &r->waiters[row];
-        while (*rwc != NULL) rwc = &(*rwc)->next;
-        *rwc = rw;
-        pthread_mutex_unlock(&r->lock);
-        return;
-    }
-}
-
-
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    pthread_mutex_lock(&r->lock);
-    
-    /* the second check here is redundant -- just here for debugging now. */
-    if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {
-        r->state[row] = STOP;
-        r->lines[row] = -1;
-        DPRINTF("WLOCK  : %3d (row: %d)\n", r->lines[row], row);
-        pthread_mutex_unlock(&r->lock);
-        ret.type = IO_INT_T;
-        ret.u.i = 0;
-        cb(ret, param);
-    } else {
-        struct radix_wait **rwc;
-        struct radix_wait *rw = 
-            (struct radix_wait *) malloc (sizeof(struct radix_wait));
-        DPRINTF("WLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
-        rw->type  = WLOCK;
-        rw->param = param;
-        rw->cb    = cb;
-        rw->next  = NULL;
-        /* append to waiters list. */
-        rwc = &r->waiters[row];
-        while (*rwc != NULL) rwc = &(*rwc)->next;
-        *rwc = rw;
-        pthread_mutex_unlock(&r->lock);
-        return;
-    }
-       
-}
-
-/* called with radix_lock locked and lock count of zero. */
-static void wake_waiters(struct radix_lock *r, int row)
-{
-    struct pending_io_req *req;
-    struct radix_wait *rw;
-    
-    if (r->lines[row] != 0) return;
-    if (r->waiters[row] == NULL) return; 
-    
-    if (r->waiters[row]->type == WLOCK) {
-
-        rw = r->waiters[row];
-        pthread_mutex_lock(&pending_io_lock);
-        assert(CAN_PRODUCE_PENDING_IO);
-        
-        req = PENDING_IO_ENT(io_prod++);
-        req->op    = IO_WWAKE;
-        req->cb    = rw->cb;
-        req->param = rw->param;
-        r->lines[row] = -1; /* write lock the row. */
-        r->state[row] = STOP;
-        r->waiters[row] = rw->next;
-        free(rw);
-        pthread_mutex_unlock(&pending_io_lock);
-    
-    } else /* RLOCK */ {
-
-        while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {
-            rw = r->waiters[row];
-            pthread_mutex_lock(&pending_io_lock);
-            assert(CAN_PRODUCE_PENDING_IO);
-            
-            req = PENDING_IO_ENT(io_prod++);
-            req->op    = IO_RWAKE;
-            req->cb    = rw->cb;
-            req->param = rw->param;
-            r->lines[row]++; /* read lock the row. */
-            r->state[row] = READ; 
-            r->waiters[row] = rw->next;
-            free(rw);
-            pthread_mutex_unlock(&pending_io_lock);
-        }
-
-        if (r->waiters[row] != NULL) /* There is a write queued still */
-            r->state[row] = STOP;
-    }  
-    
-    pthread_mutex_lock(&pending_io_lock);
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);
-}
-
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-       
-    pthread_mutex_lock(&r->lock);
-    assert(r->lines[row] > 0); /* try to catch misuse. */
-    r->lines[row]--;
-    if (r->lines[row] == 0) {
-        r->state[row] = ANY;
-        wake_waiters(r, row);
-    }
-    pthread_mutex_unlock(&r->lock);
-    cb(ret, param);
-}
-
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    
-    pthread_mutex_lock(&r->lock);
-    assert(r->lines[row] == -1); /* try to catch misuse. */
-    r->lines[row] = 0;
-    r->state[row] = ANY;
-    wake_waiters(r, row);
-    pthread_mutex_unlock(&r->lock);
-    cb(ret, param);
-}
-
-/* consumer calls */
-static void do_next_io_req(struct pending_io_req *req)
-{
-    struct io_ret          ret;
-    void  *param;
-    
-    switch (req->op) {
-    case IO_READ:
-        ret.type = IO_BLOCK_T;
-        ret.u.b  = readblock(req->u.r.addr);
-        break;
-    case IO_WRITE:
-        ret.type = IO_INT_T;
-        ret.u.i  = writeblock(req->u.w.addr, req->u.w.block);
-        DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);
-        break;
-    case IO_ALLOC:
-        ret.type = IO_ADDR_T;
-        ret.u.a  = allocblock(req->u.a.block);
-        break;
-    case IO_RWAKE:
-        DPRINTF("WAKE DEFERRED RLOCK!\n");
-        ret.type = IO_INT_T;
-        ret.u.i  = 0;
-        break;
-    case IO_WWAKE:
-        DPRINTF("WAKE DEFERRED WLOCK!\n");
-        ret.type = IO_INT_T;
-        ret.u.i  = 0;
-        break;
-    default:
-        DPRINTF("Unknown IO operation on pending list!\n");
-        return;
-    }
-    
-    param = req->param;
-    pthread_mutex_lock(&pending_io_lock);
-    pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);
-    pthread_mutex_unlock(&pending_io_lock);
-       
-    assert(req->cb != NULL);
-    req->cb(ret, param);
-    
-}
-
-void *io_thread(void *param) 
-{
-    int tid;
-    struct pending_io_req *req;
-    
-    /* Set this thread's tid. */
-    tid = *(int *)param;
-    free(param);
-    
-start:
-    pthread_mutex_lock(&pending_io_lock);
-    while (io_prod == io_cons) {
-        pthread_cond_wait(&pending_io_cond, &pending_io_lock);
-    }
-    
-    if (io_prod == io_cons) {
-        /* unnecessary wakeup. */
-        pthread_mutex_unlock(&pending_io_lock);
-        goto start;
-    }
-    
-    req = PENDING_IO_ENT(io_cons++);
-    pthread_mutex_unlock(&pending_io_lock);
-       
-    do_next_io_req(req);
-    
-    goto start;
-       
-}
-
-static pthread_t io_pool[IO_POOL_SIZE];
-void start_io_threads(void)
-
-{      
-    int i, tid=0;
-    
-    for (i=0; i < IO_POOL_SIZE; i++) {
-        int ret, *t;
-        t = (int *)malloc(sizeof(int));
-        *t = tid++;
-        ret = pthread_create(&io_pool[i], NULL, io_thread, t);
-        if (ret != 0) printf("Error starting thread %d\n", i);
-    }
-       
-}
-
-void init_block_async(void)
-{
-    init_pending_io();
-    start_io_threads();
-}
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/parallax/block-async.h
--- a/tools/blktap/parallax/block-async.h       Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-/* block-async.h
- * 
- * Asynchronous block wrappers for parallax.
- */
- 
-#ifndef _BLOCKASYNC_H_
-#define _BLOCKASYNC_H_
-
-#include <assert.h>
-#include <xenctrl.h>
-#include "vdi.h"
-
-struct io_ret
-{
-    enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;
-    union {
-        uint64_t   a;
-        char *b;
-        int   i;
-    } u;
-};
-
-typedef void (*io_cb_t)(struct io_ret r, void *param);
-
-/* per-vdi lock structures to make sure requests run in a safe order. */
-struct radix_wait {
-    enum {RLOCK, WLOCK} type;
-    io_cb_t  cb;
-    void    *param;
-    struct radix_wait *next;
-};
-
-struct radix_lock {
-    pthread_mutex_t lock;
-    int                    lines[1024];
-    struct radix_wait     *waiters[1024];
-    enum {ANY, READ, STOP} state[1024];
-};
-void radix_lock_init(struct radix_lock *r);
-
-void block_read(uint64_t addr, io_cb_t cb, void *param);
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param);
-void block_alloc(char *block, io_cb_t cb, void *param);
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void init_block_async(void);
-
-static inline uint64_t IO_ADDR(struct io_ret r)
-{
-    assert(r.type == IO_ADDR_T);
-    return r.u.a;
-}
-
-static inline char *IO_BLOCK(struct io_ret r)
-{
-    assert(r.type == IO_BLOCK_T);
-    return r.u.b;
-}
-
-static inline int IO_INT(struct io_ret r)
-{
-    assert(r.type == IO_INT_T);
-    return r.u.i;
-}
-
-
-#endif //_BLOCKASYNC_H_
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/parallax/blockstore.c
--- a/tools/blktap/parallax/blockstore.c        Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1348 +0,0 @@
-/**************************************************************************
- * 
- * blockstore.c
- *
- * Simple block store interface
- *
- */
- 
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <stdarg.h>
-#include "blockstore.h"
-#include <pthread.h>
-
-//#define BLOCKSTORE_REMOTE
-//#define BSDEBUG
-
-#define RETRY_TIMEOUT 1000000 /* microseconds */
-
-/*****************************************************************************
- * Debugging
- */
-#ifdef BSDEBUG
-void DB(char *format, ...)
-{
-    va_list args;
-    fprintf(stderr, "[%05u] ", (int)pthread_getspecific(tid_key));
-    va_start(args, format);
-    vfprintf(stderr, format, args);
-    va_end(args);
-}
-#else
-#define DB(format, ...) (void)0
-#endif
-
-#ifdef BLOCKSTORE_REMOTE
-
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-
-/*****************************************************************************
- * Network state                                                             *
- *****************************************************************************/
-
-/* The individual disk servers we talks to. These will be referenced by
- * an integer index into bsservers[].
- */
-bsserver_t bsservers[MAX_SERVERS];
-
-/* The cluster map. This is indexed by an integer cluster number.
- */
-bscluster_t bsclusters[MAX_CLUSTERS];
-
-/* Local socket.
- */
-struct sockaddr_in sin_local;
-int bssock = 0;
-
-/*****************************************************************************
- * Notification                                                              *
- *****************************************************************************/
-
-typedef struct pool_thread_t_struct {
-    pthread_mutex_t ptmutex;
-    pthread_cond_t ptcv;
-    int newdata;
-} pool_thread_t;
-
-pool_thread_t pool_thread[READ_POOL_SIZE+1];
-
-#define RECV_NOTIFY(tid) { \
-    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
-    pool_thread[tid].newdata = 1; \
-    DB("CV Waking %u", tid); \
-    pthread_cond_signal(&(pool_thread[tid].ptcv)); \
-    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-#define RECV_AWAIT(tid) { \
-    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
-    if (pool_thread[tid].newdata) { \
-        pool_thread[tid].newdata = 0; \
-        DB("CV Woken %u", tid); \
-    } \
-    else { \
-        DB("CV Waiting %u", tid); \
-        pthread_cond_wait(&(pool_thread[tid].ptcv), \
-                          &(pool_thread[tid].ptmutex)); \
-    } \
-    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-
-/*****************************************************************************
- * Message queue management                                                  *
- *****************************************************************************/
-
-/* Protects the queue manipulation critcal regions.
- */
-pthread_mutex_t ptmutex_queue;
-#define ENTER_QUEUE_CR pthread_mutex_lock(&ptmutex_queue)
-#define LEAVE_QUEUE_CR pthread_mutex_unlock(&ptmutex_queue)
-
-pthread_mutex_t ptmutex_recv;
-#define ENTER_RECV_CR pthread_mutex_lock(&ptmutex_recv)
-#define LEAVE_RECV_CR pthread_mutex_unlock(&ptmutex_recv)
-
-/* A message queue entry. We allocate one of these for every request we send.
- * Asynchronous reply reception also used one of these.
- */
-typedef struct bsq_t_struct {
-    struct bsq_t_struct *prev;
-    struct bsq_t_struct *next;
-    int status;
-    int server;
-    int length;
-    struct msghdr msghdr;
-    struct iovec iov[2];
-    int tid;
-    struct timeval tv_sent;
-    bshdr_t message;
-    void *block;
-} bsq_t;
-
-#define BSQ_STATUS_MATCHED 1
-
-pthread_mutex_t ptmutex_luid;
-#define ENTER_LUID_CR pthread_mutex_lock(&ptmutex_luid)
-#define LEAVE_LUID_CR pthread_mutex_unlock(&ptmutex_luid)
-
-static uint64_t luid_cnt = 0x1000ULL;
-uint64_t new_luid(void) {
-    uint64_t luid;
-    ENTER_LUID_CR;
-    luid = luid_cnt++;
-    LEAVE_LUID_CR;
-    return luid;
-}
-
-/* Queue of outstanding requests.
- */
-bsq_t *bs_head = NULL;
-bsq_t *bs_tail = NULL;
-int bs_qlen = 0;
-
-/*
- */
-void queuedebug(char *msg) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen);
-    for (q = bs_head; q; q = q->next) {
-        fprintf(stderr, "  luid=%016llx server=%u\n",
-                q->message.luid, q->server);
-    }
-    LEAVE_QUEUE_CR;
-}
-
-int enqueue(bsq_t *qe) {
-    ENTER_QUEUE_CR;
-    qe->next = NULL;
-    qe->prev = bs_tail;
-    if (!bs_head)
-        bs_head = qe;
-    else
-        bs_tail->next = qe;
-    bs_tail = qe;
-    bs_qlen++;
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("enqueue");
-#endif
-    return 0;
-}
-
-int dequeue(bsq_t *qe) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    for (q = bs_head; q; q = q->next) {
-        if (q == qe) {
-            if (q->prev)
-                q->prev->next = q->next;
-            else 
-                bs_head = q->next;
-            if (q->next)
-                q->next->prev = q->prev;
-            else
-                bs_tail = q->prev;
-            bs_qlen--;
-            goto found;
-        }
-    }
-
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("dequeue not found");
-#endif
-    return 0;
-
-    found:
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("dequeue not found");
-#endif
-    return 1;
-}
-
-bsq_t *queuesearch(bsq_t *qe) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    for (q = bs_head; q; q = q->next) {
-        if ((qe->server == q->server) &&
-            (qe->message.operation == q->message.operation) &&
-            (qe->message.luid == q->message.luid)) {
-
-            if ((q->message.operation == BSOP_READBLOCK) &&
-                ((q->message.flags & BSOP_FLAG_ERROR) == 0)) {
-                q->block = qe->block;
-                qe->block = NULL;
-            }
-            q->length = qe->length;
-            q->message.flags = qe->message.flags;
-            q->message.id = qe->message.id;
-            q->status |= BSQ_STATUS_MATCHED;
-
-            if (q->prev)
-                q->prev->next = q->next;
-            else 
-                bs_head = q->next;
-            if (q->next)
-                q->next->prev = q->prev;
-            else
-                bs_tail = q->prev;
-            q->next = NULL;
-            q->prev = NULL;
-            bs_qlen--;
-            goto found;
-        }
-    }
-
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("queuesearch not found");
-#endif
-    return NULL;
-
-    found:
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("queuesearch found");
-#endif
-    return q;
-}
-
-/*****************************************************************************
- * Network communication                                                     *
- *****************************************************************************/
-
-int send_message(bsq_t *qe) {
-    int rc;
-
-    qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin);
-    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    qe->msghdr.msg_iov = qe->iov;
-    if (qe->block)
-        qe->msghdr.msg_iovlen = 2;
-    else
-        qe->msghdr.msg_iovlen = 1;
-    qe->msghdr.msg_control = NULL;
-    qe->msghdr.msg_controllen = 0;
-    qe->msghdr.msg_flags = 0;
-
-    qe->iov[0].iov_base = (void *)&(qe->message);
-    qe->iov[0].iov_len = MSGBUFSIZE_ID;
-
-    if (qe->block) {
-        qe->iov[1].iov_base = qe->block;
-        qe->iov[1].iov_len = BLOCK_SIZE;
-    }
-
-    qe->message.luid = new_luid();
-
-    qe->status = 0;
-    qe->tid = (int)pthread_getspecific(tid_key);
-    if (enqueue(qe) < 0) {
-        fprintf(stderr, "Error enqueuing request.\n");
-        return -1;
-    }
-
-    gettimeofday(&(qe->tv_sent), NULL);
-    DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid);
-    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
-    //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0,
-    //           (struct sockaddr *)&(bsservers[qe->server].sin),
-    //           sizeof(struct sockaddr_in));
-    if (rc < 0)
-        return rc;
-
-    return rc;
-}
-
-int recv_message(bsq_t *qe) {
-    struct sockaddr_in from;
-    //int flen = sizeof(from);
-    int rc;
-
-    qe->msghdr.msg_name = &from;
-    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    qe->msghdr.msg_iov = qe->iov;
-    if (qe->block)
-        qe->msghdr.msg_iovlen = 2;
-    else
-        qe->msghdr.msg_iovlen = 1;
-    qe->msghdr.msg_control = NULL;
-    qe->msghdr.msg_controllen = 0;
-    qe->msghdr.msg_flags = 0;
-
-    qe->iov[0].iov_base = (void *)&(qe->message);
-    qe->iov[0].iov_len = MSGBUFSIZE_ID;
-    if (qe->block) {
-        qe->iov[1].iov_base = qe->block;
-        qe->iov[1].iov_len = BLOCK_SIZE;
-    }
-
-    rc = recvmsg(bssock, &(qe->msghdr), 0);
-
-    //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0,
-    //               (struct sockaddr *)&from, &flen);
-    return rc;
-}
-
-int get_server_number(struct sockaddr_in *sin) {
-    int i;
-
-#ifdef BSDEBUG2
-    fprintf(stderr,
-            "get_server_number(%u.%u.%u.%u/%u)\n",
-            (unsigned int)sin->sin_addr.s_addr & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff,
-            (unsigned int)sin->sin_port);
-#endif
-
-    for (i = 0; i < MAX_SERVERS; i++) {
-        if (bsservers[i].hostname) {
-#ifdef BSDEBUG2
-            fprintf(stderr,
-                    "get_server_number check %u.%u.%u.%u/%u\n",
-                    (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 
16)&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 
24)&0xff,
-                    (unsigned int)bsservers[i].sin.sin_port);
-#endif
-            if ((sin->sin_family == bsservers[i].sin.sin_family) &&
-                (sin->sin_port == bsservers[i].sin.sin_port) &&
-                (memcmp((void *)&(sin->sin_addr),
-                        (void *)&(bsservers[i].sin.sin_addr),
-                        sizeof(struct in_addr)) == 0)) {
-                return i;
-            }
-        }        
-    }
-
-    return -1;
-}
-
-void *rx_buffer = NULL;
-bsq_t rx_qe;
-bsq_t *recv_any(void) {
-    struct sockaddr_in from;
-    int rc;
-    
-    DB("ENTER recv_any\n");
-
-    rx_qe.msghdr.msg_name = &from;
-    rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    rx_qe.msghdr.msg_iov = rx_qe.iov;
-    if (!rx_buffer) {
-        rx_buffer = malloc(BLOCK_SIZE);
-        if (!rx_buffer) {
-            perror("recv_any malloc");
-            return NULL;
-        }
-    }
-    rx_qe.block = rx_buffer;
-    rx_buffer = NULL;
-    rx_qe.msghdr.msg_iovlen = 2;
-    rx_qe.msghdr.msg_control = NULL;
-    rx_qe.msghdr.msg_controllen = 0;
-    rx_qe.msghdr.msg_flags = 0;
-    
-    rx_qe.iov[0].iov_base = (void *)&(rx_qe.message);
-    rx_qe.iov[0].iov_len = MSGBUFSIZE_ID;
-    rx_qe.iov[1].iov_base = rx_qe.block;
-    rx_qe.iov[1].iov_len = BLOCK_SIZE;
-
-    rc = recvmsg(bssock, &(rx_qe.msghdr), 0);
-    if (rc < 0) {
-        perror("recv_any");
-        return NULL;
-    }
-
-    rx_qe.length = rc;    
-    rx_qe.server = get_server_number(&from);
-
-    DB("recv_any from %d luid=%016llx len=%u\n",
-       rx_qe.server, rx_qe.message.luid, rx_qe.length);
-
-    return &rx_qe;
-}
-
-void recv_recycle_buffer(bsq_t *q) {
-    if (q->block) {
-        rx_buffer = q->block;
-        q->block = NULL;
-    }
-}
-
-// cycle through reading any incoming, searching for a match in the
-// queue, until we have all we need.
-int wait_recv(bsq_t **reqs, int numreqs) {
-    bsq_t *q, *m;
-    unsigned int x, i;
-    int tid = (int)pthread_getspecific(tid_key);
-
-    DB("ENTER wait_recv %u\n", numreqs);
-
-    checkmatch:
-    x = 0xffffffff;
-    for (i = 0; i < numreqs; i++) {
-        x &= reqs[i]->status;
-    }
-    if ((x & BSQ_STATUS_MATCHED)) {
-        DB("LEAVE wait_recv\n");
-        return numreqs;
-    }
-
-    RECV_AWAIT(tid);
-
-    /*
-    rxagain:
-    ENTER_RECV_CR;
-    q = recv_any();
-    LEAVE_RECV_CR;
-    if (!q)
-        return -1;
-
-    m = queuesearch(q);
-    recv_recycle_buffer(q);
-    if (!m) {
-        fprintf(stderr, "Unmatched RX\n");
-        goto rxagain;
-    }
-    */
-
-    goto checkmatch;
-
-}
-
-/* retry
- */
-static int retry_count = 0;
-int retry(bsq_t *qe)
-{
-    int rc;
-    gettimeofday(&(qe->tv_sent), NULL);
-    DB("retry to %d luid=%016llx\n", qe->server, qe->message.luid);
-    retry_count++;
-    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
-    if (rc < 0)
-        return rc;
-    return 0;
-}
-
-/* queue runner
- */
-void *queue_runner(void *arg)
-{
-    for (;;) {
-        struct timeval now;
-        long long nowus, sus;
-        bsq_t *q;
-        int r;
-
-        sleep(1);
-
-        gettimeofday(&now, NULL);
-        nowus = now.tv_usec + now.tv_sec * 1000000;
-        ENTER_QUEUE_CR;
-        r = retry_count;
-        for (q = bs_head; q; q = q->next) {
-            sus = q->tv_sent.tv_usec + q->tv_sent.tv_sec * 1000000;
-            if ((nowus - sus) > RETRY_TIMEOUT) {
-                if (retry(q) < 0) {
-                    fprintf(stderr, "Error on sendmsg retry.\n");
-                }
-            }
-        }
-        if (r != retry_count) {
-            fprintf(stderr, "RETRIES: %u %u\n", retry_count - r, retry_count);
-        }
-        LEAVE_QUEUE_CR;
-    }
-}
-
-/* receive loop
- */
-void *receive_loop(void *arg)
-{
-    bsq_t *q, *m;
-
-    for(;;) {
-        q = recv_any();
-        if (!q) {
-            fprintf(stderr, "recv_any error\n");
-        }
-        else {
-            m = queuesearch(q);
-            recv_recycle_buffer(q);
-            if (!m) {
-                fprintf(stderr, "Unmatched RX\n");
-            }
-            else {
-                DB("RX MATCH");
-                RECV_NOTIFY(m->tid);
-            }
-        }
-    }
-}
-pthread_t pthread_recv;
-
-/*****************************************************************************
- * Reading                                                                   *
- *****************************************************************************/
-
-void *readblock_indiv(int server, uint64_t id) {
-    void *block;
-    bsq_t *qe;
-    int len, rc;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("readblock qe malloc");
-        return NULL;
-    }
-    qe->block = NULL;
-    
-    /*
-    qe->block = malloc(BLOCK_SIZE);
-    if (!qe->block) {
-        perror("readblock qe malloc");
-        free((void *)qe);
-        return NULL;
-    }
-    */
-
-    qe->server = server;
-
-    qe->message.operation = BSOP_READBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = id;
-    qe->length = MSGBUFSIZE_ID;
-
-    if (send_message(qe) < 0) {
-        perror("readblock sendto");
-        goto err;
-    }
-    
-    /*len = recv_message(qe);
-    if (len < 0) {
-        perror("readblock recv");
-        goto err;
-    }*/
-
-    rc = wait_recv(&qe, 1);
-    if (rc < 0) {
-        perror("readblock recv");
-        goto err;
-    }
-
-    if ((qe->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "readblock server error\n");
-        goto err;
-    }
-    if (qe->length < MSGBUFSIZE_BLOCK) {
-        fprintf(stderr, "readblock recv short (%u)\n", len);
-        goto err;
-    }
-    /* if ((block = malloc(BLOCK_SIZE)) == NULL) {
-        perror("readblock malloc");
-        goto err;
-    }
-    memcpy(block, qe->message.block, BLOCK_SIZE);
-    */    
-    block = qe->block;
-
-    free((void *)qe);
-    return block;
-
-    err:
-    free(qe->block);
-    free((void *)qe);
-    return NULL;
-}
-
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *
- *   @return: pointer to block, NULL on error
- */
-void *readblock(uint64_t id) {
-    int map = (int)BSID_MAP(id);
-    uint64_t xid;
-    static int i = CLUSTER_MAX_REPLICAS - 1;
-    void *block = NULL;
-
-    /* special case for the "superblock" just use the first block on the
-     * first replica. (extend to blocks < 6 for vdi bug)
-     */
-    if (id < 6) {
-        block = readblock_indiv(bsclusters[map].servers[0], id);
-        goto out;
-    }
-
-    i++;
-    if (i >= CLUSTER_MAX_REPLICAS)
-        i = 0;
-    switch (i) {
-    case 0:
-        xid = BSID_REPLICA0(id);
-        break;
-    case 1:
-        xid = BSID_REPLICA1(id);
-        break;
-    case 2:
-        xid = BSID_REPLICA2(id);
-        break;
-    }
-    
-    block = readblock_indiv(bsclusters[map].servers[i], xid);
-
-    out:
-#ifdef BSDEBUG
-    if (block)
-        fprintf(stderr, "READ:  %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-                id,
-                (unsigned int)((unsigned char *)block)[0],
-                (unsigned int)((unsigned char *)block)[1],
-                (unsigned int)((unsigned char *)block)[2],
-                (unsigned int)((unsigned char *)block)[3],
-                (unsigned int)((unsigned char *)block)[4],
-                (unsigned int)((unsigned char *)block)[5],
-                (unsigned int)((unsigned char *)block)[6],
-                (unsigned int)((unsigned char *)block)[7]);
-    else
-        fprintf(stderr, "READ:  %016llx NULL\n", id);
-#endif
-    return block;
-}
-
-/*****************************************************************************
- * Writing                                                                   *
- *****************************************************************************/
-
-bsq_t *writeblock_indiv(int server, uint64_t id, void *block) {
-
-    bsq_t *qe;
-    int len;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("writeblock qe malloc");
-        goto err;
-    }
-    qe->server = server;
-
-    qe->message.operation = BSOP_WRITEBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = id;
-    //memcpy(qe->message.block, block, BLOCK_SIZE);
-    qe->block = block;
-    qe->length = MSGBUFSIZE_BLOCK;
-
-    if (send_message(qe) < 0) {
-        perror("writeblock sendto");
-        goto err;
-    }
-
-    return qe;
-
-    err:
-    free((void *)qe);
-    return NULL;
-}
-    
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    
-    int map = (int)BSID_MAP(id);
-    int rep0 = bsclusters[map].servers[0];
-    int rep1 = bsclusters[map].servers[1];
-    int rep2 = bsclusters[map].servers[2];
-    bsq_t *reqs[3];
-    int rc;
-
-    reqs[0] = reqs[1] = reqs[2] = NULL;
-
-#ifdef BSDEBUG
-    fprintf(stderr,
-            "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-            id,
-            (unsigned int)((unsigned char *)block)[0],
-            (unsigned int)((unsigned char *)block)[1],
-            (unsigned int)((unsigned char *)block)[2],
-            (unsigned int)((unsigned char *)block)[3],
-            (unsigned int)((unsigned char *)block)[4],
-            (unsigned int)((unsigned char *)block)[5],
-            (unsigned int)((unsigned char *)block)[6],
-            (unsigned int)((unsigned char *)block)[7]);
-#endif
-
-    /* special case for the "superblock" just use the first block on the
-     * first replica. (extend to blocks < 6 for vdi bug)
-     */
-    if (id < 6) {
-        reqs[0] = writeblock_indiv(rep0, id, block);
-        if (!reqs[0])
-            return -1;
-        rc = wait_recv(reqs, 1);
-        return rc;
-    }
-
-    reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block);
-    if (!reqs[0])
-        goto err;
-    reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block);
-    if (!reqs[1])
-        goto err;
-    reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block);
-    if (!reqs[2])
-        goto err;
-
-    rc = wait_recv(reqs, 3);
-    if (rc < 0) {
-        perror("writeblock recv");
-        goto err;
-    }
-    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server0 error\n");
-        goto err;
-    }
-    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server1 error\n");
-        goto err;
-    }
-    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server2 error\n");
-        goto err;
-    }
-
-
-    free((void *)reqs[0]);
-    free((void *)reqs[1]);
-    free((void *)reqs[2]);
-    return 0;
-
-    err:
-    if (reqs[0]) {
-        dequeue(reqs[0]);
-        free((void *)reqs[0]);
-    }
-    if (reqs[1]) {
-        dequeue(reqs[1]);
-        free((void *)reqs[1]);
-    }
-    if (reqs[2]) {
-        dequeue(reqs[2]);
-        free((void *)reqs[2]);
-    }
-    return -1;
-}
-
-/*****************************************************************************
- * Allocation                                                                *
- *****************************************************************************/
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock(void *block) {
-    return allocblock_hint(block, 0);
-}
-
-bsq_t *allocblock_hint_indiv(int server, void *block, uint64_t hint) {
-    bsq_t *qe;
-    int len;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("allocblock_hint qe malloc");
-        goto err;
-    }
-    qe->server = server;
-
-    qe->message.operation = BSOP_ALLOCBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = hint;
-    //memcpy(qe->message.block, block, BLOCK_SIZE);
-    qe->block = block;
-    qe->length = MSGBUFSIZE_BLOCK;
-
-    if (send_message(qe) < 0) {
-        perror("allocblock_hint sendto");
-        goto err;
-    }
-    
-    return qe;
-
-    err:
-    free((void *)qe);
-    return NULL;
-}
-
-/**
- * allocblock_hint: write a new block to disk
- *   @block: pointer to block
- *   @hint: allocation hint
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
-    int map = (int)hint;
-    int rep0 = bsclusters[map].servers[0];
-    int rep1 = bsclusters[map].servers[1];
-    int rep2 = bsclusters[map].servers[2];
-    bsq_t *reqs[3];
-    int rc;
-    uint64_t id0, id1, id2;
-
-    reqs[0] = reqs[1] = reqs[2] = NULL;
-
-    DB("ENTER allocblock\n");
-
-    reqs[0] = allocblock_hint_indiv(rep0, block, hint);
-    if (!reqs[0])
-        goto err;
-    reqs[1] = allocblock_hint_indiv(rep1, block, hint);
-    if (!reqs[1])
-        goto err;
-    reqs[2] = allocblock_hint_indiv(rep2, block, hint);
-    if (!reqs[2])
-        goto err;
-
-    rc = wait_recv(reqs, 3);
-    if (rc < 0) {
-        perror("allocblock recv");
-        goto err;
-    }
-    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server0 error\n");
-        goto err;
-    }
-    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server1 error\n");
-        goto err;
-    }
-    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server2 error\n");
-        goto err;
-    }
-
-    id0 = reqs[0]->message.id;
-    id1 = reqs[1]->message.id;
-    id2 = reqs[2]->message.id;
-
-#ifdef BSDEBUG
-    fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-            BSID(map, id0, id1, id2),
-            (unsigned int)((unsigned char *)block)[0],
-            (unsigned int)((unsigned char *)block)[1],
-            (unsigned int)((unsigned char *)block)[2],
-            (unsigned int)((unsigned char *)block)[3],
-            (unsigned int)((unsigned char *)block)[4],
-            (unsigned int)((unsigned char *)block)[5],
-            (unsigned int)((unsigned char *)block)[6],
-            (unsigned int)((unsigned char *)block)[7]);
-#endif
-    
-    free((void *)reqs[0]);
-    free((void *)reqs[1]);
-    free((void *)reqs[2]);
-    return BSID(map, id0, id1, id2);
-
-    err:
-    if (reqs[0]) {
-        dequeue(reqs[0]);
-        free((void *)reqs[0]);
-    }
-    if (reqs[1]) {
-        dequeue(reqs[1]);
-        free((void *)reqs[1]);
-    }
-    if (reqs[2]) {
-        dequeue(reqs[2]);
-        free((void *)reqs[2]);
-    }
-    return 0;
-}
-
-#else /* /BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Local storage version                                                     *
- *****************************************************************************/
- 
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *
- *   @return: pointer to block, NULL on error
- */
-
-void *readblock(uint64_t id) {
-    void *block;
-    int block_fp;
-   
-//printf("readblock(%llu)\n", id); 
-    block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return NULL;
-    }
-    
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        printf ("%Ld ", id);
-        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
-        perror("readblock lseek");
-        goto err;
-    }
-    if ((block = malloc(BLOCK_SIZE)) == NULL) {
-        perror("readblock malloc");
-        goto err;
-    }
-    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("readblock read");
-        free(block);
-        goto err;
-    }
-    close(block_fp);
-    return block;
-    
-err:
-    close(block_fp);
-    return NULL;
-}
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    
-    int block_fp;
-    
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-    }
-
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        perror("writeblock lseek");
-        goto err;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) < 0) {
-        perror("writeblock write");
-        goto err;
-    }
-    close(block_fp);
-    return 0;
-
-err:
-    close(block_fp);
-    return -1;
-}
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-
-uint64_t allocblock(void *block) {
-    uint64_t lb;
-    off64_t pos;
-    int block_fp;
-    
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return 0;
-    }
-
-    pos = lseek64(block_fp, 0, SEEK_END);
-    if (pos == (off64_t)-1) {
-        perror("allocblock lseek");
-        goto err;
-    }
-    if (pos % BLOCK_SIZE != 0) {
-        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
-        goto err;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("allocblock write");
-        goto err;
-    }
-    lb = pos / BLOCK_SIZE + 1;
-//printf("alloc(%Ld)\n", lb);
-    close(block_fp);
-    return lb;
-    
-err:
-    close(block_fp);
-    return 0;
-    
-}
-
-/**
- * allocblock_hint: write a new block to disk
- *   @block: pointer to block
- *   @hint: allocation hint
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
-    return allocblock(block);
-}
-
-#endif /* BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Memory management                                                         *
- *****************************************************************************/
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- *   @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
-    void *block = malloc(BLOCK_SIZE);
-    if (block == NULL) {
-        perror("newblock");
-        return NULL;
-    }
-    memset(block, 0, BLOCK_SIZE);
-    return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- *   @id: block id (zero if this is only in-memory)
- *   @block: block to be freed
- */
-void freeblock(void *block) {
-        free(block);
-}
-
-static freeblock_t *new_freeblock(void)
-{
-    freeblock_t *fb;
-    
-    fb = newblock();
-    
-    if (fb == NULL) return NULL;
-    
-    fb->magic = FREEBLOCK_MAGIC;
-    fb->next  = 0ULL;
-    fb->count = 0ULL;
-    memset(fb->list, 0, sizeof fb->list);
-    
-    return fb;
-}
-
-void releaseblock(uint64_t id)
-{
-    blockstore_super_t *bs_super;
-    freeblock_t *fl_current;
-    
-    /* get superblock */
-    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-    
-    /* get freeblock_current */
-    if (bs_super->freelist_current == 0ULL) 
-    {
-        fl_current = new_freeblock();
-        bs_super->freelist_current = allocblock(fl_current);
-        writeblock(BLOCKSTORE_SUPER, bs_super);
-    } else {
-        fl_current = readblock(bs_super->freelist_current);
-    }
-    
-    /* if full, chain to superblock and allocate new current */
-    
-    if (fl_current->count == FREEBLOCK_SIZE) {
-        fl_current->next = bs_super->freelist_full;
-        writeblock(bs_super->freelist_current, fl_current);
-        bs_super->freelist_full = bs_super->freelist_current;
-        freeblock(fl_current);
-        fl_current = new_freeblock();
-        bs_super->freelist_current = allocblock(fl_current);
-        writeblock(BLOCKSTORE_SUPER, bs_super);
-    }
-    
-    /* append id to current */
-    fl_current->list[fl_current->count++] = id;
-    writeblock(bs_super->freelist_current, fl_current);
-    
-    freeblock(fl_current);
-    freeblock(bs_super);
-    
-    
-}
-
-/* freelist debug functions: */
-void freelist_count(int print_each)
-{
-    blockstore_super_t *bs_super;
-    freeblock_t *fb;
-    uint64_t total = 0, next;
-    
-    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-    
-    if (bs_super->freelist_current == 0ULL) {
-        printf("freelist is empty!\n");
-        return;
-    }
-    
-    fb = readblock(bs_super->freelist_current);
-    printf("%Ld entires on current.\n", fb->count);
-    total += fb->count;
-    if (print_each == 1)
-    {
-        int i;
-        for (i=0; i< fb->count; i++)
-            printf("  %Ld\n", fb->list[i]);
-    }
-    
-    freeblock(fb);
-    
-    if (bs_super->freelist_full == 0ULL) {
-        printf("freelist_full is empty!\n");
-        return;
-    }
-    
-    next = bs_super->freelist_full;
-    for (;;) {
-        fb = readblock(next);
-        total += fb->count;
-        if (print_each == 1)
-        {
-            int i;
-            for (i=0; i< fb->count; i++)
-                printf("  %Ld\n", fb->list[i]);
-        }
-        next = fb->next;
-        freeblock(fb);
-        if (next == 0ULL) break;
-    }
-    printf("Total of %Ld ids on freelist.\n", total);
-}
-
-/*****************************************************************************
- * Initialisation                                                            *
- *****************************************************************************/
-
-int __init_blockstore(void)
-{
-    int i;
-    blockstore_super_t *bs_super;
-    uint64_t ret;
-    int block_fp;
-    
-#ifdef BLOCKSTORE_REMOTE
-    struct hostent *addr;
-
-    pthread_mutex_init(&ptmutex_queue, NULL);
-    pthread_mutex_init(&ptmutex_luid, NULL);
-    pthread_mutex_init(&ptmutex_recv, NULL);
-    /*pthread_mutex_init(&ptmutex_notify, NULL);*/
-    for (i = 0; i <= READ_POOL_SIZE; i++) {
-        pool_thread[i].newdata = 0;
-        pthread_mutex_init(&(pool_thread[i].ptmutex), NULL);
-        pthread_cond_init(&(pool_thread[i].ptcv), NULL);
-    }
-
-    bsservers[0].hostname = "firebug.cl.cam.ac.uk";
-    bsservers[1].hostname = "planb.cl.cam.ac.uk";
-    bsservers[2].hostname = "simcity.cl.cam.ac.uk";
-    bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/;
-    bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/;
-    bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/;
-    bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/;
-    bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/;
-    bsservers[8].hostname = NULL;
-    bsservers[9].hostname = NULL;
-    bsservers[10].hostname = NULL;
-    bsservers[11].hostname = NULL;
-    bsservers[12].hostname = NULL;
-    bsservers[13].hostname = NULL;
-    bsservers[14].hostname = NULL;
-    bsservers[15].hostname = NULL;
-
-    for (i = 0; i < MAX_SERVERS; i++) {
-        if (!bsservers[i].hostname)
-            continue;
-        addr = gethostbyname(bsservers[i].hostname);
-        if (!addr) {
-            perror("bad hostname");
-            return -1;
-        }
-        bsservers[i].sin.sin_family = addr->h_addrtype;
-        bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT);
-        bsservers[i].sin.sin_addr.s_addr = 
-            ((struct in_addr *)(addr->h_addr))->s_addr;
-    }
-
-    /* Cluster map
-     */
-    bsclusters[0].servers[0] = 0;
-    bsclusters[0].servers[1] = 1;
-    bsclusters[0].servers[2] = 2;
-    bsclusters[1].servers[0] = 1;
-    bsclusters[1].servers[1] = 2;
-    bsclusters[1].servers[2] = 3;
-    bsclusters[2].servers[0] = 2;
-    bsclusters[2].servers[1] = 3;
-    bsclusters[2].servers[2] = 4;
-    bsclusters[3].servers[0] = 3;
-    bsclusters[3].servers[1] = 4;
-    bsclusters[3].servers[2] = 5;
-    bsclusters[4].servers[0] = 4;
-    bsclusters[4].servers[1] = 5;
-    bsclusters[4].servers[2] = 6;
-    bsclusters[5].servers[0] = 5;
-    bsclusters[5].servers[1] = 6;
-    bsclusters[5].servers[2] = 7;
-    bsclusters[6].servers[0] = 6;
-    bsclusters[6].servers[1] = 7;
-    bsclusters[6].servers[2] = 0;
-    bsclusters[7].servers[0] = 7;
-    bsclusters[7].servers[1] = 0;
-    bsclusters[7].servers[2] = 1;
-
-    /* Local socket set up
-     */
-    bssock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (bssock < 0) {
-        perror("Bad socket");
-        return -1;
-    }
-    memset(&sin_local, 0, sizeof(sin_local));
-    sin_local.sin_family = AF_INET;
-    sin_local.sin_port = htons(BLOCKSTORED_PORT);
-    sin_local.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) {
-        perror("bind");
-        close(bssock);
-        return -1;
-    }
-
-    pthread_create(&pthread_recv, NULL, receive_loop, NULL);
-    pthread_create(&pthread_recv, NULL, queue_runner, NULL);
-
-#else /* /BLOCKSTORE_REMOTE */
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-        exit(-1);
-    }
-    
-    if (lseek(block_fp, 0, SEEK_END) == 0) {
-        bs_super = newblock();
-        bs_super->magic            = BLOCKSTORE_MAGIC;
-        bs_super->freelist_full    = 0LL;
-        bs_super->freelist_current = 0LL;
-        
-        ret = allocblock(bs_super);
-        
-        freeblock(bs_super);
-    } else {
-        bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-        if (bs_super->magic != BLOCKSTORE_MAGIC)
-        {
-            printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n");
-            exit(-1);
-        }
-        freeblock(bs_super);
-    }
-        
-    close(block_fp);
-        
-#endif /*  BLOCKSTORE_REMOTE */   
-    return 0;
-}
-
-void __exit_blockstore(void)
-{
-    int i;
-#ifdef BLOCKSTORE_REMOTE
-    pthread_mutex_destroy(&ptmutex_recv);
-    pthread_mutex_destroy(&ptmutex_luid);
-    pthread_mutex_destroy(&ptmutex_queue);
-    /*pthread_mutex_destroy(&ptmutex_notify);
-      pthread_cond_destroy(&ptcv_notify);*/
-    for (i = 0; i <= READ_POOL_SIZE; i++) {
-        pthread_mutex_destroy(&(pool_thread[i].ptmutex));
-        pthread_cond_destroy(&(pool_thread[i].ptcv));
-    }
-#endif
-}
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/parallax/blockstore.h
--- a/tools/blktap/parallax/blockstore.h        Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-/**************************************************************************
- * 
- * blockstore.h
- *
- * Simple block store interface
- *
- */
- 
-#ifndef __BLOCKSTORE_H__
-#define __BLOCKSTORE_H__
-
-#include <netinet/in.h>
-#include <xenctrl.h>
-
-#define BLOCK_SIZE  4096
-#define BLOCK_SHIFT   12
-#define BLOCK_MASK  0xfffffffffffff000LL
-
-/* XXX SMH: where is the below supposed to be defined???? */
-#ifndef SECTOR_SHIFT 
-#define SECTOR_SHIFT   9 
-#endif
-
-#define FREEBLOCK_SIZE  (BLOCK_SIZE / sizeof(uint64_t)) - (3 * 
sizeof(uint64_t))
-#define FREEBLOCK_MAGIC 0x0fee0fee0fee0feeULL
-
-typedef struct {
-    uint64_t magic;
-    uint64_t next;
-    uint64_t count;
-    uint64_t list[FREEBLOCK_SIZE];
-} freeblock_t; 
-
-#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaaULL
-#define BLOCKSTORE_SUPER 1ULL
-
-typedef struct {
-    uint64_t magic;
-    uint64_t freelist_full;
-    uint64_t freelist_current;
-} blockstore_super_t;
-
-extern void *newblock();
-extern void *readblock(uint64_t id);
-extern uint64_t allocblock(void *block);
-extern uint64_t allocblock_hint(void *block, uint64_t hint);
-extern int writeblock(uint64_t id, void *block);
-
-/* Add this blockid to a freelist, to be recycled by the allocator. */
-extern void releaseblock(uint64_t id);
-
-/* this is a memory free() operation for block-sized allocations */
-extern void freeblock(void *block);
-extern int __init_blockstore(void);
-
-/* debug for freelist. */
-void freelist_count(int print_each);
-#define ALLOCFAIL (((uint64_t)(-1)))
-
-/* Distribution
- */
-#define BLOCKSTORED_PORT 9346
-
-struct bshdr_t_struct {
-    uint32_t            operation;
-    uint32_t            flags;
-    uint64_t            id;
-    uint64_t            luid;
-} __attribute__ ((packed));
-typedef struct bshdr_t_struct bshdr_t;
-
-struct bsmsg_t_struct {
-    bshdr_t        hdr;
-    unsigned char  block[BLOCK_SIZE];
-} __attribute__ ((packed));
-
-typedef struct bsmsg_t_struct bsmsg_t;
-
-#define MSGBUFSIZE_OP    sizeof(uint32_t)
-#define MSGBUFSIZE_FLAGS (sizeof(uint32_t) + sizeof(uint32_t))
-#define MSGBUFSIZE_ID    (sizeof(uint32_t) + sizeof(uint32_t) + 
sizeof(uint64_t) + sizeof(uint64_t))
-#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t)
-
-#define BSOP_READBLOCK  0x01
-#define BSOP_WRITEBLOCK 0x02
-#define BSOP_ALLOCBLOCK 0x03
-#define BSOP_FREEBLOCK  0x04
-
-#define BSOP_FLAG_ERROR 0x01
-
-#define BS_ALLOC_SKIP 10
-#define BS_ALLOC_HACK
-
-/* Remote hosts and cluster map - XXX need to generalise
- */
-
-/*
-
-  Interim ID format is
-
-  63 60 59                40 39                20 19                 0
-  +----+--------------------+--------------------+--------------------+
-  |map | replica 2          | replica 1          | replica 0          |
-  +----+--------------------+--------------------+--------------------+
-
-  The map is an index into a table detailing which machines form the
-  cluster.
-
- */
-
-#define BSID_REPLICA0(_id) ((_id)&0xfffffULL)
-#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL)
-#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL)
-#define BSID_MAP(_id)      (((_id)>>60)&0xfULL)
-
-#define BSID(_map, _rep0, _rep1, _rep2) ((((uint64_t)(_map))<<60) | \
-                                         (((uint64_t)(_rep2))<<40) | \
-                                         (((uint64_t)(_rep1))<<20) | 
((uint64_t)(_rep0)))
-
-typedef struct bsserver_t_struct {
-    char              *hostname;
-    struct sockaddr_in sin;
-} bsserver_t;
-
-#define MAX_SERVERS 16
-
-#define CLUSTER_MAX_REPLICAS 3
-typedef struct bscluster_t_struct {
-    int servers[CLUSTER_MAX_REPLICAS];
-} bscluster_t;
-
-#define MAX_CLUSTERS 16
-
-#endif /* __BLOCKSTORE_H__ */
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/parallax/blockstored.c
--- a/tools/blktap/parallax/blockstored.c       Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,275 +0,0 @@
-/**************************************************************************
- * 
- * blockstored.c
- *
- * Block store daemon.
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <errno.h>
-#include "blockstore.h"
-
-//#define BSDEBUG
-
-int readblock_into(uint64_t id, void *block);
-
-int open_socket(uint16_t port) {
-    
-    struct sockaddr_in sn;
-    int sock;
-
-    sock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (sock < 0) {
-        perror("Bad socket");
-        return -1;
-    }
-    memset(&sn, 0, sizeof(sn));
-    sn.sin_family = AF_INET;
-    sn.sin_port = htons(port);
-    sn.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
-        perror("bind");
-        close(sock);
-        return -1;
-    }
-
-    return sock;
-}
-
-static int block_fp = -1;
-static int bssock = -1;
-
-int send_reply(struct sockaddr_in *peer, void *buffer, int len) {
-
-    int rc;
-    
-#ifdef BSDEBUG
-    fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n",
-            len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t 
*)buffer)->hdr.id);
-#endif
-    rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer, 
sizeof(*peer));
-    if (rc < 0) {
-        perror("send_reply");
-        return 1;
-    }
-
-
-    return 0;
-}
-
-static bsmsg_t msgbuf;
-
-void service_loop(void) {
-
-    for (;;) {
-        int rc, len;
-        struct sockaddr_in from;
-        size_t slen = sizeof(from);
-        uint64_t bid;
-
-        len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0,
-                       (struct sockaddr *)&from, &slen);
-
-        if (len < 0) {
-            perror("recvfrom");
-            continue;
-        }
-
-        if (len < MSGBUFSIZE_OP) {
-            fprintf(stderr, "Short packet.\n");
-            continue;
-        }
-
-#ifdef BSDEBUG
-        fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n",
-                len, msgbuf.hdr.operation, msgbuf.hdr.id);
-#endif
-
-        switch (msgbuf.hdr.operation) {
-        case BSOP_READBLOCK:
-            if (len < MSGBUFSIZE_ID) {
-                fprintf(stderr, "Short packet (readblock %u).\n", len);
-                continue;
-            }
-            rc = readblock_into(msgbuf.hdr.id, msgbuf.block);
-            if (rc < 0) {
-                fprintf(stderr, "readblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK);
-            break;
-        case BSOP_WRITEBLOCK:
-            if (len < MSGBUFSIZE_BLOCK) {
-                fprintf(stderr, "Short packet (writeblock %u).\n", len);
-                continue;
-            }
-            rc = writeblock(msgbuf.hdr.id, msgbuf.block);
-            if (rc < 0) {
-                fprintf(stderr, "writeblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-            break;
-        case BSOP_ALLOCBLOCK:
-            if (len < MSGBUFSIZE_BLOCK) {
-                fprintf(stderr, "Short packet (allocblock %u).\n", len);
-                continue;
-            }
-            bid = allocblock(msgbuf.block);
-            if (bid == ALLOCFAIL) {
-                fprintf(stderr, "allocblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.id = bid;
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-            break;
-        }
-
-    }
-}
- 
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *   @block: pointer to buffer to receive block
- *
- *   @return: 0 if OK, other on error
- */
-
-int readblock_into(uint64_t id, void *block) {
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
-        perror("readblock lseek");
-        return -1;
-    }
-    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("readblock read");
-        return -1;
-    }
-    return 0;
-}
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        perror("writeblock lseek");
-        return -1;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) < 0) {
-        perror("writeblock write");
-        return -1;
-    }
-    return 0;
-}
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-static uint64_t lastblock = 0;
-
-uint64_t allocblock(void *block) {
-    uint64_t lb;
-    off64_t pos;
-
-    retry:
-    pos = lseek64(block_fp, 0, SEEK_END);
-    if (pos == (off64_t)-1) {
-        perror("allocblock lseek");
-        return ALLOCFAIL;
-    }
-    if (pos % BLOCK_SIZE != 0) {
-        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
-        return ALLOCFAIL;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("allocblock write");
-        return ALLOCFAIL;
-    }
-    lb = pos / BLOCK_SIZE + 1;
-
-#ifdef BS_ALLOC_HACK
-    if (lb < BS_ALLOC_SKIP)
-        goto retry;
-#endif
-    
-    if (lb <= lastblock)
-        printf("[*** %Ld alredy allocated! ***]\n", lb);
-    
-    lastblock = lb;
-    return lb;
-}
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- *   @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
-    void *block = malloc(BLOCK_SIZE);
-    if (block == NULL) {
-        perror("newblock");
-        return NULL;
-    }
-    memset(block, 0, BLOCK_SIZE);
-    return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- *   @id: block id (zero if this is only in-memory)
- *   @block: block to be freed
- */
-void freeblock(void *block) {
-        free(block);
-}
-
-
-int main(int argc, char **argv)
-{
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-    }
-
-    bssock = open_socket(BLOCKSTORED_PORT);
-    if (bssock < 0) {
-        return -1;
-    }
-
-    service_loop();
-    
-    close(bssock);
-
-    return 0;
-}
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/parallax/bstest.c
--- a/tools/blktap/parallax/bstest.c    Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-/**************************************************************************
- * 
- * bstest.c
- *
- * Block store daemon test program.
- *
- * usage: bstest <host>|X {r|w|a} ID 
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-#include <errno.h>
-#include "blockstore.h"
-
-int direct(char *host, uint32_t op, uint64_t id, int len) {
-    struct sockaddr_in sn, peer;
-    int sock;
-    bsmsg_t msgbuf;
-    int rc, slen;
-    struct hostent *addr;
-
-    addr = gethostbyname(host);
-    if (!addr) {
-        perror("bad hostname");
-        exit(1);
-    }
-    peer.sin_family = addr->h_addrtype;
-    peer.sin_port = htons(BLOCKSTORED_PORT);
-    peer.sin_addr.s_addr =  ((struct in_addr *)(addr->h_addr))->s_addr;
-    fprintf(stderr, "Sending to: %u.%u.%u.%u\n",
-            (unsigned int)(unsigned char)addr->h_addr[0],
-            (unsigned int)(unsigned char)addr->h_addr[1],
-            (unsigned int)(unsigned char)addr->h_addr[2],
-            (unsigned int)(unsigned char)addr->h_addr[3]);
-
-    sock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (sock < 0) {
-        perror("Bad socket");
-        exit(1);
-    }
-    memset(&sn, 0, sizeof(sn));
-    sn.sin_family = AF_INET;
-    sn.sin_port = htons(BLOCKSTORED_PORT);
-    sn.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
-        perror("bind");
-        close(sock);
-        exit(1);
-    }
-
-    memset((void *)&msgbuf, 0, sizeof(msgbuf));
-    msgbuf.operation = op;
-    msgbuf.id = id;
-
-    rc = sendto(sock, (void *)&msgbuf, len, 0,
-                (struct sockaddr *)&peer, sizeof(peer));
-    if (rc < 0) {
-        perror("sendto");
-        exit(1);
-    }
-
-    slen = sizeof(peer);
-    len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0,
-                   (struct sockaddr *)&peer, &slen);
-    if (len < 0) {
-        perror("recvfrom");
-        exit(1);
-    }
-
-    printf("Reply %u bytes:\n", len);
-    if (len >= MSGBUFSIZE_OP)
-        printf("  operation: %u\n", msgbuf.operation);
-    if (len >= MSGBUFSIZE_FLAGS)
-        printf("  flags: 0x%x\n", msgbuf.flags);
-    if (len >= MSGBUFSIZE_ID)
-        printf("  id: %llu\n", msgbuf.id);
-    if (len >= (MSGBUFSIZE_ID + 4))
-        printf("  data: %02x %02x %02x %02x...\n",
-               (unsigned int)msgbuf.block[0],
-               (unsigned int)msgbuf.block[1],
-               (unsigned int)msgbuf.block[2],
-               (unsigned int)msgbuf.block[3]);
-    
-    if (sock > 0)
-        close(sock);
-   
-    return 0;
-}
-
-int main (int argc, char **argv) {
-
-    uint32_t op = 0;
-    uint64_t id = 0;
-    int len = 0, rc;
-    void *block;
-
-    if (argc < 3) {
-        fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n");
-        return 1;
-    }
-
-    switch (argv[2][0]) {
-    case 'r':
-    case 'R':
-        op = BSOP_READBLOCK;
-        len = MSGBUFSIZE_ID;
-        break;
-    case 'w':
-    case 'W':
-        op = BSOP_WRITEBLOCK;
-        len = MSGBUFSIZE_BLOCK;
-        break;
-    case 'a':
-    case 'A':
-        op = BSOP_ALLOCBLOCK;
-        len = MSGBUFSIZE_BLOCK;
-        break;
-    default:
-        fprintf(stderr, "Unknown action '%s'.\n", argv[2]);
-        return 1;
-    }
-
-    if (argc >= 4)
-        id = atoll(argv[3]);
-
-    if (strcmp(argv[1], "X") == 0) {
-        rc = __init_blockstore();
-        if (rc < 0) {
-            fprintf(stderr, "blockstore init failed.\n");
-            return 1;
-        }
-        switch(op) {
-        case BSOP_READBLOCK:
-            block = readblock(id);
-            if (block) {
-                printf("data: %02x %02x %02x %02x...\n",
-                       (unsigned int)((unsigned char*)block)[0],
-                       (unsigned int)((unsigned char*)block)[1],
-                       (unsigned int)((unsigned char*)block)[2],
-                       (unsigned int)((unsigned char*)block)[3]);
-            }
-            break;
-        case BSOP_WRITEBLOCK:
-            block = malloc(BLOCK_SIZE);
-            if (!block) {
-                perror("bstest malloc");
-                return 1;
-            }
-            memset(block, 0, BLOCK_SIZE);
-            rc = writeblock(id, block);
-            if (rc != 0) {
-                printf("error\n");
-            }
-            else {
-                printf("OK\n");
-            }
-            break;
-        case BSOP_ALLOCBLOCK:
-            block = malloc(BLOCK_SIZE);
-            if (!block) {
-                perror("bstest malloc");
-                return 1;
-            }
-            memset(block, 0, BLOCK_SIZE);
-            id = allocblock_hint(block, id);
-            if (id == 0) {
-                printf("error\n");
-            }
-            else {
-                printf("ID: %llu\n", id);
-            }
-            break;
-        }
-    }
-    else {
-        direct(argv[1], op, id, len);
-    }
-
-
-    return 0;
-}
diff -r 049e669e6a8a -r b29806fb6ba0 tools/blktap/parallax/parallax.c
--- a/tools/blktap/parallax/parallax.c  Mon Jun 26 13:09:11 2006 -0400
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,608 +0,0 @@
-/**************************************************************************
- * 
- * parallax.c
- *
- * The Parallax Storage Server
- *
- */
- 
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "blktaplib.h"
-#include "blockstore.h"
-#include "vdi.h"
-#include "block-async.h"
-#include "requests-async.h"
-
-#define PARALLAX_DEV     61440
-#define SECTS_PER_NODE   8
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* ------[ session records ]----------------------------------------------- */
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-#define VDI_HASHSZ 16
-#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
-
-typedef struct blkif {
-    domid_t       domid;
-    unsigned int  handle;
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    vdi_t        *vdi_hash[VDI_HASHSZ];
-    struct blkif *hash_next;
-} blkif_t;
-
-static blkif_t      *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    if ( handle != 0 )
-        printf("blktap/parallax don't currently support non-0 dev handles!\n");
-    
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            ((blkif->domid != domid) || (blkif->handle != handle)) )
-        blkif = blkif->hash_next;
-    return blkif;
-}
-
-vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
-{
-    vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
-    
-    while ((vdi != NULL) && (vdi->vdevice != device))
-        vdi = vdi->next;
-    
-    return vdi;
-}
-
-/* ------[ control message handling ]-------------------------------------- */
-
-void blkif_create(blkif_be_create_t *create)
-{
-    domid_t       domid  = create->domid;
-    unsigned int  handle = create->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    DPRINTF("parallax (blkif_create): create is %p\n", create); 
-    
-    if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
-    {
-        DPRINTF("Could not create blkif: out of memory\n");
-        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid  = domid;
-    blkif->handle = handle;
-    blkif->status = DISCONNECTED;
-
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif != NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-        {
-            DPRINTF("Could not create blkif: already exists (%d,%d)\n",
-                domid, handle);
-            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
-            free(blkif);
-            return;
-        }
-        pblkif = &(*pblkif)->hash_next;
-    }
-
-    blkif->hash_next = *pblkif;
-    *pblkif = blkif;
-
-    DPRINTF("Successfully created blkif\n");
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_destroy(blkif_be_destroy_t *destroy)
-{
-    domid_t       domid  = destroy->domid;
-    unsigned int  handle = destroy->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); 
-    
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif = *pblkif) != NULL )
-    {
-        if ( (blkif->domid == domid) && (blkif->handle == handle) )
-        {
-            if ( blkif->status != DISCONNECTED )
-                goto still_connected;
-            goto destroy;
-        }
-        pblkif = &blkif->hash_next;
-    }
-
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- still_connected:
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
-    return;
-
- destroy:
-    *pblkif = blkif->hash_next;
-    free(blkif);
-    destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_create(blkif_be_vbd_create_t *create)
-{
-    blkif_t            *blkif;
-    vdi_t              *vdi, **vdip;
-    blkif_vdev_t        vdevice = create->vdevice;
-
-    DPRINTF("parallax (vbd_create): create=%p\n", create); 
-    
-    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
-    if ( blkif == NULL )
-    {
-        DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 
-                create->domid, create->blkif_handle); 
-        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    /* VDI identifier is in grow->extent.sector_start */
-    DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 
-            (unsigned long)create->dev_handle);
-
-    vdi = vdi_get(create->dev_handle);
-    if (vdi == NULL)
-    {
-        printf("parallax (vbd_create): VDI %lx not found.\n",
-               (unsigned long)create->dev_handle);
-        create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
-        return;
-    }
-    
-    vdi->next = NULL;
-    vdi->vdevice = vdevice;
-    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
-    while (*vdip != NULL)
-        vdip = &(*vdip)->next;
-    *vdip = vdi;
-    
-    DPRINTF("blkif_create succeeded\n"); 
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
-{
-    blkif_t            *blkif;
-    vdi_t              *vdi, **vdip;
-    blkif_vdev_t        vdevice = destroy->vdevice;
-    
-    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
-    if ( blkif == NULL )
-    {
-        DPRINTF("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 
-                destroy->domid, destroy->blkif_handle); 
-        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
-    while ((*vdip != NULL) && ((*vdip)->vdevice != vdevice))
-        vdip = &(*vdip)->next;
-
-    if (*vdip != NULL) 
-    {
-        vdi = *vdip;
-        *vdip = vdi->next;
-        vdi_put(vdi);
-    }
-        
-}
-
-int parallax_control(control_msg_t *msg)
-{
-    domid_t  domid;
-    int      ret;
-
-    DPRINTF("parallax_control: msg is %p\n", msg); 
-    
-    if (msg->type != CMSG_BLKIF_BE) 
-    {
-        printf("Unexpected control message (%d)\n", msg->type);
-        return 0;
-    }
-    
-    switch(msg->subtype)
-    {
-    case CMSG_BLKIF_BE_CREATE:
-        if ( msg->length != sizeof(blkif_be_create_t) )
-            goto parse_error;
-        blkif_create((blkif_be_create_t *)msg->msg);
-        break;   
-        
-    case CMSG_BLKIF_BE_DESTROY:
-        if ( msg->length != sizeof(blkif_be_destroy_t) )
-            goto parse_error;
-        blkif_destroy((blkif_be_destroy_t *)msg->msg);
-        break;  
-        
-    case CMSG_BLKIF_BE_VBD_CREATE:
-        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
-            goto parse_error;
-        vbd_create((blkif_be_vbd_create_t *)msg->msg);
-        break;
-        
-    case CMSG_BLKIF_BE_VBD_DESTROY:
-        if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
-            goto parse_error;
-        vbd_destroy((blkif_be_vbd_destroy_t *)msg->msg);
-        break;
-
-    case CMSG_BLKIF_BE_CONNECT:
-    case CMSG_BLKIF_BE_DISCONNECT:
-        /* we don't manage the device channel, the tap does. */
-        break;
-
-    default:
-        goto parse_error;
-    }
-    return 0;
-parse_error:
-    printf("Bad control message!\n");
-    return 0;
-    
-}    
-
-int parallax_probe(blkif_request_t *req, blkif_t *blkif)
-{
-    blkif_response_t *rsp;
-    vdisk_t *img_info;
-    vdi_t *vdi;
-    int i, nr_vdis = 0; 
-
-    DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif); 
-
-    /* We expect one buffer only. */
-    if ( req->nr_segments != 1 )
-      goto err;
-
-    /* Make sure the buffer is page-sized. */
-    if ( (req->seg[0].first_sect != 0) || (req->seg[0].last_sect != 7) )
-      goto err;
-
-    /* fill the list of devices */
-    for (i=0; i<VDI_HASHSZ; i++) {
-        vdi = blkif->vdi_hash[i];
-        while (vdi) {
-            img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
-            img_info[nr_vdis].device   = vdi->vdevice;
-            img_info[nr_vdis].info     = 0;
-            /* The -1 here accounts for the LSB in the radix tree */
-            img_info[nr_vdis].capacity = 
-                    ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE);
-            nr_vdis++;
-            vdi = vdi->next;
-        }
-    }
-
-    
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_PROBE;
-    rsp->status = nr_vdis; /* number of disks */
-
-    DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis);
-    return  BLKTAP_RESPOND;
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_PROBE;
-    rsp->status = BLKIF_RSP_ERROR;
-    
-    DPRINTF("parallax_probe: send error response\n"); 
-    return BLKTAP_RESPOND;  
-}
-
-typedef struct {
-    blkif_request_t *req;
-    int              count;
-    int              error;
-    pthread_mutex_t  mutex;
-} pending_t;
-
-#define MAX_REQUESTS 64
-pending_t pending_list[MAX_REQUESTS];
-
-struct cb_param {
-    pending_t *pent;
-    int       segment;
-    uint64_t       sector; 
-    uint64_t       vblock; /* for debug printing -- can be removed. */
-};
-
-static void read_cb(struct io_ret r, void *in_param)
-{
-    struct cb_param *param = (struct cb_param *)in_param;
-    pending_t *p = param->pent;
-    int segment = param->segment;
-    blkif_request_t *req = p->req;
-    unsigned long size, offset, start;
-    char *dpage, *spage;
-       
-    spage  = IO_BLOCK(r);
-    if (spage == NULL) { p->error++; goto finish; }
-    dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment);
-    
-    /* Calculate read size and offset within the read block. */
-
-    offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE;
-    size = (req->seg[segment].last_sect - req->seg[segment].first_sect + 1) <<
-        SECTOR_SHIFT;
-    start = req->seg[segment].first_sect << SECTOR_SHIFT;
-
-    DPRINTF("ParallaxRead: sect: %lld (%ld,%ld),  "
-            "vblock %llx, "
-            "size %lx\n", 
-            param->sector,
-            p->req->seg[segment].first_sect,
-            p->req->seg[segment].last_sect,
-            param->vblock, size); 
-
-    memcpy(dpage + start, spage + offset, size);
-    freeblock(spage);
-    
-    /* Done the read.  Now update the pending record. */
- finish:
-    pthread_mutex_lock(&p->mutex);
-    p->count--;
-    
-    if (p->count == 0) {
-       blkif_response_t *rsp;
-       
-        rsp = (blkif_response_t *)req;

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [XenPPC] [xenppc-unstable] [ppc] merge with upstream, Xen patchbot-xenppc-unstable <=