WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Merge (only) xen-ia64-unstable with latest xen-unstable

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] Merge (only) xen-ia64-unstable with latest xen-unstable
From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 15 Sep 2005 07:48:27 +0000
Delivery-date: Thu, 15 Sep 2005 07:56:13 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User djm@xxxxxxxxxxxxxxx
# Node ID b2f4823b6ff09017e21c0d44f73c58118f50ea46
# Parent  10b1d30d3f66beac5a8275d108461da558c38d1d
# Parent  ec4a3f2d060e1dcc137cca136309ca67d46fbb85
Merge (only) xen-ia64-unstable with latest xen-unstable

diff -r 10b1d30d3f66 -r b2f4823b6ff0 .hgignore
--- a/.hgignore Thu Sep  8 15:18:40 2005
+++ b/.hgignore Fri Sep  9 16:30:54 2005
@@ -82,6 +82,7 @@
 ^tools/blktap/parallax/vdi_validate$
 ^tools/blktap/parallax/parallax$
 ^tools/blktap/parallax/blockstored$
+^tools/blktap/ublkback/ublkback$
 ^tools/blktap/xen/.*$
 ^tools/check/\..*$
 ^tools/cmdline/.*$
@@ -141,24 +142,28 @@
 ^tools/vnet/vnet-module/\.tmp_versions/.*$
 ^tools/vnet/vnet-module/vnet_module\.mod\..*$
 ^tools/vnetd/vnetd$
+^tools/vtpm/vtpm*
+^tools/vtpm/tpm_emulator-*
+^tools/vtpm_manager/manager/vtpm_managerd
 ^tools/web-shutdown\.tap$
 ^tools/x2d2/minixend$
-^tools/xcs/xcs$
-^tools/xcs/xcsdump$
 ^tools/xcutils/xc_restore$
 ^tools/xcutils/xc_save$
 ^tools/xenstat/xentop/xentop$
 ^tools/xenstore/testsuite/tmp/.*$
 ^tools/xenstore/xen$
+^tools/xenstore/xenbus_dev.h$
 ^tools/xenstore/xenstored$
 ^tools/xenstore/xenstored_test$
+^tools/xenstore/xenstore-read$
+^tools/xenstore/xenstore-rm$
+^tools/xenstore/xenstore-write$
 ^tools/xenstore/xs_dom0_test$
 ^tools/xenstore/xs_random$
 ^tools/xenstore/xs_stress$
 ^tools/xenstore/xs_test$
 ^tools/xenstore/xs_watch_stress$
 ^tools/xentrace/xentrace$
-^tools/xfrd/xfrd$
 ^xen/BLOG$
 ^xen/TAGS$
 ^xen/arch/x86/asm-offsets\.s$
diff -r 10b1d30d3f66 -r b2f4823b6ff0 Config.mk
--- a/Config.mk Thu Sep  8 15:18:40 2005
+++ b/Config.mk Fri Sep  9 16:30:54 2005
@@ -48,3 +48,4 @@
 # Optional components
 XENSTAT_XENTOP ?= y
 
+VTPM_TOOLS ?= n
diff -r 10b1d30d3f66 -r b2f4823b6ff0 Makefile
--- a/Makefile  Thu Sep  8 15:18:40 2005
+++ b/Makefile  Fri Sep  9 16:30:54 2005
@@ -35,11 +35,11 @@
 export pae=y
 endif
 
-.PHONY:        all dist install xen tools kernels docs world clean mkpatches 
mrproper
+.PHONY:        all dist install xen kernels tools docs world clean mkpatches 
mrproper
 .PHONY:        kbuild kdelete kclean
 
 # build and install everything into the standard system directories
-install: install-xen install-tools install-kernels install-docs
+install: install-xen install-kernels install-tools install-docs
 
 build: kernels
        $(MAKE) -C xen build
@@ -47,7 +47,7 @@
        $(MAKE) -C docs build
 
 # build and install everything into local dist directory
-dist: xen tools kernels docs
+dist: xen kernels tools docs
        $(INSTALL_DIR) $(DISTDIR)/check
        $(INSTALL_DATA) ./COPYING $(DISTDIR)
        $(INSTALL_DATA) ./README $(DISTDIR)
@@ -178,6 +178,8 @@
        rm -rf $(D)/usr/share/man/man1/xen*
        rm -rf $(D)/usr/share/man/man8/xen*
        rm -rf $(D)/usr/lib/xen
+       rm -rf $(D)/etc/hotplug.d/xen-backend
+       rm -rf $(D)/etc/hotplug/xen-backend.agent
 
 # Legacy targets for compatibility
 linux24:
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/Makefile   Fri Sep  9 16:30:54 2005
@@ -31,16 +31,18 @@
 OBJS := $(TARGET_ARCH).o
 OBJS += $(patsubst %.c,%.o,$(wildcard *.c))
 OBJS += $(patsubst %.c,%.o,$(wildcard lib/*.c))
-
+OBJS += $(patsubst %.c,%.o,$(wildcard xenbus/*.c))
+                                                                               
   
 HDRS := $(wildcard include/*.h)
 HDRS += $(wildcard include/xen/*.h)
 
 default: $(TARGET)
 
-xen-public:
+links:
        [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen
-
-$(TARGET): xen-public $(OBJS)
+       [ -e xenbus/xenstored.h ] || ln -sf ../../../tools/xenstore/xenstored.h 
xenbus/xenstored.h
+       
+$(TARGET): links $(OBJS)
        $(LD) -N -T minios-$(TARGET_ARCH).lds $(OBJS) -o $@.elf
        gzip -f -9 -c $@.elf >$@.gz
 
@@ -55,3 +57,4 @@
 %.o: %.S $(HDRS) Makefile
        $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
 
+
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/README
--- a/extras/mini-os/README     Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/README     Fri Sep  9 16:30:54 2005
@@ -23,13 +23,8 @@
 
 - to build it just type make.
 
-- copy image.final somewhere where dom0 can access it
+- to start it do the following in domain0 (assuming xend is running)
+  # xm create domain_config
 
-- in dom0
-  # xi_create 16000 test
-    <domid>
-  # xi_build <domid> image.final 0
-  # xi_start <domid>
-
-this prints out a bunch of stuff and then every 1000 timer interrupts the
-system time.
+this starts the kernel and prints out a bunch of stuff and then every
+1000 timer interrupts the system time.
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/events.c
--- a/extras/mini-os/events.c   Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/events.c   Fri Sep  9 16:30:54 2005
@@ -17,13 +17,13 @@
  */
 
 #include <os.h>
+#include <mm.h>
 #include <hypervisor.h>
 #include <events.h>
 #include <lib.h>
 
-#include <xen/event_channel.h>
 static ev_action_t ev_actions[NR_EVS];
-void default_handler(u32 port, struct pt_regs *regs);
+void default_handler(int port, struct pt_regs *regs);
 
 
 /*
@@ -32,7 +32,6 @@
 int do_event(u32 port, struct pt_regs *regs)
 {
     ev_action_t  *action;
-
     if (port >= NR_EVS) {
         printk("Port number too large: %d\n", port);
         return 0;
@@ -57,11 +56,23 @@
 
 }
 
+void bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *) )
+{
+       if(ev_actions[port].handler)
+        printk("WARN: Handler for port %d already registered, replacing\n",
+                               port);
+
+       ev_actions[port].handler = handler;
+       ev_actions[port].status &= ~EVS_DISABLED;         
+ 
+       /* Finally unmask the port */
+       unmask_evtchn(port);
+}
+
 int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) )
 {
        evtchn_op_t op;
        int ret = 0;
-       u32 port;
 
        /* Try to bind the virq to a port */
        op.cmd = EVTCHNOP_bind_virq;
@@ -73,21 +84,12 @@
                printk("Failed to bind virtual IRQ %d\n", virq);
                goto out;
     }
-
-    port = op.u.bind_virq.port;
-       
-       if(ev_actions[port].handler)
-        printk("WARN: Handler for port %d already registered, replacing\n",
-                               port);
-
-       ev_actions[port].handler = handler;
-       ev_actions[port].status &= ~EVS_DISABLED;
-       
-       /* Finally unmask the port */
-       unmask_evtchn(port);
+    bind_evtchn(op.u.bind_virq.port, handler); 
 out:
        return ret;
 }
+
+
 
 /*
  * Initially all events are without a handler and disabled
@@ -100,10 +102,10 @@
     for ( i = 0; i < NR_EVS; i++ )
     {
         ev_actions[i].status  = EVS_DISABLED;
-        ev_actions[i].handler = NULL;
+        ev_actions[i].handler = default_handler;
     }
 }
 
-void default_handler(u32 port, struct pt_regs *regs) {
+void default_handler(int port, struct pt_regs *regs) {
     printk("[Port %d] - event received\n", port);
 }
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/events.h
--- a/extras/mini-os/include/events.h   Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/include/events.h   Fri Sep  9 16:30:54 2005
@@ -20,6 +20,7 @@
 #define _EVENTS_H_
 
 #include<traps.h>
+#include <xen/event_channel.h>
 
 #define NR_EVS 1024
 
@@ -39,6 +40,16 @@
 /* prototypes */
 int do_event(u32 port, struct pt_regs *regs);
 int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) );
+void bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *) );
 void init_events(void);
 
+static inline int notify_via_evtchn(int port)
+{
+    evtchn_op_t op;
+    op.cmd = EVTCHNOP_send;
+    op.u.send.local_port = port;
+    return HYPERVISOR_event_channel_op(&op);
+}
+
+
 #endif /* _EVENTS_H_ */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/hypervisor.h
--- a/extras/mini-os/include/hypervisor.h       Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/include/hypervisor.h       Fri Sep  9 16:30:54 2005
@@ -13,7 +13,6 @@
 #define _HYPERVISOR_H_
 
 #include <types.h>
-
 #include <xen/xen.h>
 #include <xen/io/domain_controller.h>
 
@@ -40,48 +39,26 @@
 /*
  * Assembler stubs for hyper-calls.
  */
-
-#ifdef __i386__
-#define _a1 "b"
-#define _a2 "c"
-#define _a3 "d"
-#define _a4 "S"
-#else
-#define _a1 "D"
-#define _a2 "S"
-#define _a3 "d"
-#define _a4 "b"
-#endif
-
-static __inline__ int HYPERVISOR_event_channel_op(
-    void *op)
+#if defined(__i386__)
+static inline int
+HYPERVISOR_set_trap_table(
+    trap_info_t *table)
 {
     int ret;
     unsigned long ignore;
+
     __asm__ __volatile__ (
         TRAP_INSTR
         : "=a" (ret), "=b" (ignore)
-       : "0" (__HYPERVISOR_event_channel_op), "1" (op)
-       : "memory" );
-
-    return ret;
-}
-
-static __inline__ int HYPERVISOR_set_trap_table(trap_info_t *table)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table),
-        _a1 (table) : "memory" );
-
-    return ret;
-}
-
-static __inline__ int HYPERVISOR_mmu_update(mmu_update_t *req, 
-                                            int count, 
-                                            int *success_count, 
-                                            domid_t domid)
+       : "0" (__HYPERVISOR_set_trap_table), "1" (table)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_mmu_update(
+    mmu_update_t *req, int count, int *success_count, domid_t domid)
 {
     int ret;
     unsigned long ign1, ign2, ign3, ign4;
@@ -89,18 +66,16 @@
     __asm__ __volatile__ (
         TRAP_INSTR
         : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
-        : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
-          "3" (success_count), "4" (domid)
-        : "memory" );
-
-    return ret;
-}
-
-
-static __inline__ int HYPERVISOR_mmuext_op(struct mmuext_op *op, 
-                                           int count, 
-                                           int *success_count, 
-                                           domid_t domid)
+       : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
+        "3" (success_count), "4" (domid)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_mmuext_op(
+    struct mmuext_op *op, int count, int *success_count, domid_t domid)
 {
     int ret;
     unsigned long ign1, ign2, ign3, ign4;
@@ -108,70 +83,65 @@
     __asm__ __volatile__ (
         TRAP_INSTR
         : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
-        : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
-          "3" (success_count), "4" (domid)
-        : "memory" );
-
-    return ret;
-}
-
-
-
-static __inline__ int HYPERVISOR_set_gdt(unsigned long *frame_list, int 
entries)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_set_gdt), 
-        _a1 (frame_list), _a2 (entries) : "memory" );
-
-
-    return ret;
-}
-
-static __inline__ int HYPERVISOR_stack_switch(unsigned long ss, unsigned long 
esp)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_stack_switch),
-        _a1 (ss), _a2 (esp) : "memory" );
-
-    return ret;
-}
-
-#ifdef __i386__
-static __inline__ int HYPERVISOR_set_callbacks(
+       : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
+        "3" (success_count), "4" (domid)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_set_gdt(
+    unsigned long *frame_list, int entries)
+{
+    int ret;
+    unsigned long ign1, ign2;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+       : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries)
+       : "memory" );
+
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_stack_switch(
+    unsigned long ss, unsigned long esp)
+{
+    int ret;
+    unsigned long ign1, ign2;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+       : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_set_callbacks(
     unsigned long event_selector, unsigned long event_address,
     unsigned long failsafe_selector, unsigned long failsafe_address)
 {
     int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks),
-        _a1 (event_selector), _a2 (event_address), 
-        _a3 (failsafe_selector), _a4 (failsafe_address) : "memory" );
-
-    return ret;
-}
-#else
-static __inline__ int HYPERVISOR_set_callbacks(
-    unsigned long event_address,
-    unsigned long failsafe_address,
-    unsigned long syscall_address)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks),
-        _a1 (event_address), _a2 (failsafe_address), 
-        _a3 (syscall_address) : "memory" );
-
-    return ret;
-}
-#endif
-
-static __inline__ int
+    unsigned long ign1, ign2, ign3, ign4;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+       : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector),
+         "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
 HYPERVISOR_fpu_taskswitch(
     int set)
 {
@@ -187,67 +157,106 @@
     return ret;
 }
 
-static __inline__ int HYPERVISOR_yield(void)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
-        _a1 (SCHEDOP_yield) : "memory" );
-
-    return ret;
-}
-
-static __inline__ int HYPERVISOR_block(void)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
-        _a1 (SCHEDOP_block) : "memory" );
-
-    return ret;
-}
-
-static __inline__ int HYPERVISOR_shutdown(void)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
-        _a1 (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
-        : "memory" );
-
-    return ret;
-}
-
-static __inline__ int HYPERVISOR_reboot(void)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
-        _a1 (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
-        : "memory" );
-
-    return ret;
-}
-
-static __inline__ int HYPERVISOR_suspend(unsigned long srec)
-{
-    int ret;
+static inline int
+HYPERVISOR_yield(
+    void)
+{
+    int ret;
+    unsigned long ign;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign)
+       : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
+       : "memory", "ecx" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_block(
+    void)
+{
+    int ret;
+    unsigned long ign1;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1)
+       : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
+       : "memory", "ecx" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_shutdown(
+    void)
+{
+    int ret;
+    unsigned long ign1;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
+        : "memory", "ecx" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_reboot(
+    void)
+{
+    int ret;
+    unsigned long ign1;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
+        : "memory", "ecx" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_suspend(
+    unsigned long srec)
+{
+    int ret;
+    unsigned long ign1, ign2;
+
     /* NB. On suspend, control software expects a suspend record in %esi. */
     __asm__ __volatile__ (
         TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
-        _a1 (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), 
-        "S" (srec) : "memory" );
-
-    return ret;
-}
-
-#ifdef __i386__
-static __inline__ long HYPERVISOR_set_timer_op( u64 timeout )
+        : "=a" (ret), "=b" (ign1), "=S" (ign2)
+       : "0" (__HYPERVISOR_sched_op),
+        "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), 
+        "S" (srec) : "memory", "ecx");
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_crash(
+    void)
+{
+    int ret;
+    unsigned long ign1;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
+        : "memory", "ecx" );
+
+    return ret;
+}
+
+static inline long
+HYPERVISOR_set_timer_op(
+    u64 timeout)
 {
     int ret;
     unsigned long timeout_hi = (unsigned long)(timeout>>32);
@@ -262,8 +271,516 @@
 
     return ret;
 }
+
+#if 0
+static inline int
+HYPERVISOR_dom0_op(
+    dom0_op_t *dom0_op)
+{
+    int ret;
+    unsigned long ign1;
+
+    dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1)
+       : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op)
+       : "memory");
+
+    return ret;
+}
+#endif
+
+static inline int
+HYPERVISOR_set_debugreg(
+    int reg, unsigned long value)
+{
+    int ret;
+    unsigned long ign1, ign2;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+       : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value)
+       : "memory" );
+
+    return ret;
+}
+
+static inline unsigned long
+HYPERVISOR_get_debugreg(
+    int reg)
+{
+    unsigned long ret;
+    unsigned long ign;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign)
+       : "0" (__HYPERVISOR_get_debugreg), "1" (reg)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_update_descriptor(
+    u64 ma, u64 desc)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3, ign4;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+       : "0" (__HYPERVISOR_update_descriptor),
+         "1" ((unsigned long)ma), "2" ((unsigned long)(ma>>32)),
+         "3" ((unsigned long)desc), "4" ((unsigned long)(desc>>32))
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_dom_mem_op(
+    unsigned int op, unsigned long *extent_list,
+    unsigned long nr_extents, unsigned int extent_order)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3, ign4, ign5;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4),
+         "=D" (ign5)
+       : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list),
+         "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF)
+        : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_multicall(
+    void *call_list, int nr_calls)
+{
+    int ret;
+    unsigned long ign1, ign2;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+       : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_update_va_mapping(
+    unsigned long va, pte_t new_val, unsigned long flags)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3, ign4;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+       : "0" (__HYPERVISOR_update_va_mapping), 
+          "1" (va), "2" ((new_val).pte_low),
+#ifdef CONFIG_X86_PAE
+         "3" ((new_val).pte_high),
 #else
-static __inline__ long HYPERVISOR_set_timer_op( u64 timeout )
+         "3" (0),
+#endif
+         "4" (flags)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_event_channel_op(
+    void *op)
+{
+    int ret;
+    unsigned long ignore;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ignore)
+       : "0" (__HYPERVISOR_event_channel_op), "1" (op)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_xen_version(
+    int cmd)
+{
+    int ret;
+    unsigned long ignore;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ignore)
+       : "0" (__HYPERVISOR_xen_version), "1" (cmd)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_console_io(
+    int cmd, int count, char *str)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+       : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_physdev_op(
+    void *physdev_op)
+{
+    int ret;
+    unsigned long ign;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign)
+       : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_grant_table_op(
+    unsigned int cmd, void *uop, unsigned int count)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+       : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_update_va_mapping_otherdomain(
+    unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3, ign4, ign5;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3),
+         "=S" (ign4), "=D" (ign5)
+       : "0" (__HYPERVISOR_update_va_mapping_otherdomain),
+          "1" (va), "2" ((new_val).pte_low),
+#ifdef CONFIG_X86_PAE
+         "3" ((new_val).pte_high),
+#else
+         "3" (0),
+#endif
+         "4" (flags), "5" (domid) :
+        "memory" );
+    
+    return ret;
+}
+
+static inline int
+HYPERVISOR_vm_assist(
+    unsigned int cmd, unsigned int type)
+{
+    int ret;
+    unsigned long ign1, ign2;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+       : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type)
+       : "memory" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_boot_vcpu(
+    unsigned long vcpu, vcpu_guest_context_t *ctxt)
+{
+    int ret;
+    unsigned long ign1, ign2;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+       : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
+       : "memory");
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_vcpu_down(
+    int vcpu)
+{
+    int ret;
+    unsigned long ign1;
+    /* Yes, I really do want to clobber edx here: when we resume a
+       vcpu after unpickling a multi-processor domain, it returns
+       here, but clobbers all of the call clobbered registers. */
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
+        : "memory", "ecx", "edx" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_vcpu_up(
+    int vcpu)
+{
+    int ret;
+    unsigned long ign1;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift))
+        : "memory", "ecx" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_vcpu_pickle(
+    int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int ret;
+    unsigned long ign1, ign2;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
+         "2" (ctxt)
+        : "memory" );
+
+    return ret;
+}
+#elif defined(__x86_64__)
+
+#define __syscall_clobber "r11","rcx","memory"
+
+/*
+ * Assembler stubs for hyper-calls.
+ */
+static inline int
+HYPERVISOR_set_trap_table(
+    trap_info_t *table)
+{
+    int ret;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret)
+       : "0" ((unsigned long)__HYPERVISOR_set_trap_table), "D" (table)
+       : __syscall_clobber );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_mmu_update(
+    mmu_update_t *req, int count, int *success_count, domid_t domid)
+{
+    int ret;
+
+    __asm__ __volatile__ (
+        "movq %5, %%r10;" TRAP_INSTR
+        : "=a" (ret)
+       : "0" ((unsigned long)__HYPERVISOR_mmu_update), "D" (req), "S" 
((long)count),
+         "d" (success_count), "g" ((unsigned long)domid)
+       : __syscall_clobber, "r10" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_mmuext_op(
+    struct mmuext_op *op, int count, int *success_count, domid_t domid)
+{
+    int ret;
+
+    __asm__ __volatile__ (
+        "movq %5, %%r10;" TRAP_INSTR
+        : "=a" (ret)
+        : "0" (__HYPERVISOR_mmuext_op), "D" (op), "S" ((long)count), 
+          "d" (success_count), "g" ((unsigned long)domid)
+        : __syscall_clobber, "r10" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_set_gdt(
+    unsigned long *frame_list, int entries)
+{
+    int ret;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret)
+       : "0" ((unsigned long)__HYPERVISOR_set_gdt), "D" (frame_list), "S" 
((long)entries)
+       : __syscall_clobber );
+
+
+    return ret;
+}
+static inline int
+HYPERVISOR_stack_switch(
+    unsigned long ss, unsigned long esp)
+{
+    int ret;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret)
+       : "0" ((unsigned long)__HYPERVISOR_stack_switch), "D" (ss), "S" (esp)
+       : __syscall_clobber );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_set_callbacks(
+    unsigned long event_address, unsigned long failsafe_address, 
+    unsigned long syscall_address)
+{
+    int ret;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret)
+       : "0" ((unsigned long)__HYPERVISOR_set_callbacks), "D" (event_address),
+         "S" (failsafe_address), "d" (syscall_address)
+       : __syscall_clobber );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_fpu_taskswitch(
+    int set)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_fpu_taskswitch),
+          "D" ((unsigned long) set) : __syscall_clobber );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_yield(
+    void)
+{
+    int ret;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret)
+       : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned 
long)SCHEDOP_yield)
+       : __syscall_clobber );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_block(
+    void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret)
+       : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned 
long)SCHEDOP_block)
+       : __syscall_clobber );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_shutdown(
+    void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret)
+       : "0" ((unsigned long)__HYPERVISOR_sched_op),
+         "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_poweroff << 
SCHEDOP_reasonshift)))
+       : __syscall_clobber );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_reboot(
+    void)
+{
+    int ret;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret)
+       : "0" ((unsigned long)__HYPERVISOR_sched_op),
+         "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_reboot << 
SCHEDOP_reasonshift)))
+       : __syscall_clobber );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_suspend(
+    unsigned long srec)
+{
+    int ret;
+
+    /* NB. On suspend, control software expects a suspend record in %esi. */
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret)
+       : "0" ((unsigned long)__HYPERVISOR_sched_op),
+        "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_suspend << 
SCHEDOP_reasonshift))), 
+        "S" (srec)
+       : __syscall_clobber );
+
+    return ret;
+}
+
+/*
+ * We can have the timeout value in a single argument for the hypercall, but
+ * that will break the common code. 
+ */
+static inline long
+HYPERVISOR_set_timer_op(
+    u64 timeout)
 {
     int ret;
 
@@ -329,7 +846,7 @@
     int ret;
     __asm__ __volatile__ (
         TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_dom_mem_op),
+        : "=a" (ret) : "0" (__HYPERVISOR_memory_op),
         _a1 (dom_mem_op) : "memory" );
 
     return ret;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/lib.h
--- a/extras/mini-os/include/lib.h      Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/include/lib.h      Fri Sep  9 16:30:54 2005
@@ -60,10 +60,22 @@
 /* printing */
 #define printk  printf
 #define kprintf printf
-int printf(const char *fmt, ...);
-int vprintf(const char *fmt, va_list ap);
-int sprintf(char *buf, const char *cfmt, ...);
-int vsprintf(char *buf, const char *cfmt, va_list ap);
+#define _p(_x) ((void *)(unsigned long)(_x))
+void printf(const char *fmt, ...);
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
+int snprintf(char * buf, size_t size, const char *fmt, ...);
+int scnprintf(char * buf, size_t size, const char *fmt, ...);
+int vsprintf(char *buf, const char *fmt, va_list args);
+int sprintf(char * buf, const char *fmt, ...);
+int vsscanf(const char * buf, const char * fmt, va_list args);
+int sscanf(const char * buf, const char * fmt, ...);
+
+long simple_strtol(const char *cp,char **endp,unsigned int base);
+unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base);
+long long simple_strtoll(const char *cp,char **endp,unsigned int base);
+unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int 
base);
+
 
 /* string and memory manipulation */
 int    memcmp(const void *cs, const void *ct, size_t count);
@@ -77,6 +89,16 @@
 size_t strlen(const char *s);
 char  *strchr(const char *s, int c);
 char  *strstr(const char *s1, const char *s2);
+char * strcat(char * dest, const char * src);
+
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+struct kvec {
+    void *iov_base;
+    size_t iov_len;
+};
+
 
 
 #endif /* _LIB_H_ */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h       Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/include/mm.h       Fri Sep  9 16:30:54 2005
@@ -24,6 +24,15 @@
 
 #ifndef _MM_H_
 #define _MM_H_
+
+#ifdef __i386__
+#include <xen/arch-x86_32.h>
+#endif
+
+#ifdef __x86_64__
+#include <xen/arch-x86_64.h>
+#endif
+
 
 #ifdef __x86_64__
 
@@ -56,6 +65,8 @@
 
 #define L1_PAGETABLE_ENTRIES    1024
 #define L2_PAGETABLE_ENTRIES    1024
+
+#elif defined(__x86_64__)
 #endif
 
 /* Given a virtual address, get an entry offset into a page table. */
@@ -97,13 +108,15 @@
 
 extern unsigned long *phys_to_machine_mapping;
 #define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)])
-#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)])
 static __inline__ unsigned long phys_to_machine(unsigned long phys)
 {
     unsigned long machine = pfn_to_mfn(phys >> L1_PAGETABLE_SHIFT);
     machine = (machine << L1_PAGETABLE_SHIFT) | (phys & ~PAGE_MASK);
     return machine;
 }
+
+
+#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)])
 static __inline__ unsigned long machine_to_phys(unsigned long machine)
 {
     unsigned long phys = mfn_to_pfn(machine >> L1_PAGETABLE_SHIFT);
@@ -119,16 +132,15 @@
 
 #define to_phys(x)                 ((unsigned long)(x)-VIRT_START)
 #define to_virt(x)                 ((void *)((unsigned long)(x)+VIRT_START))
-#define __va to_virt
-#define __pa to_phys
 
 #define virt_to_pfn(_virt)         (PFN_DOWN(to_phys(_virt)))
+#define mach_to_virt(_mach)        (to_virt(machine_to_phys(_mach)))
+#define mfn_to_virt(_mfn)          (mach_to_virt(_mfn << PAGE_SHIFT))
 
 void init_mm(void);
 unsigned long alloc_pages(int order);
 #define alloc_page()    alloc_pages(0);
 void free_pages(void *pointer, int order);
-//int is_mfn_mapped(unsigned long mfn);
 
 static __inline__ int get_order(unsigned long size)
 {
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/os.h
--- a/extras/mini-os/include/os.h       Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/include/os.h       Fri Sep  9 16:30:54 2005
@@ -15,15 +15,16 @@
 #define unlikely(x)  __builtin_expect((x),0)
 
 #define smp_processor_id() 0
-#define preempt_disable() ((void)0)
-#define preempt_enable() ((void)0)
-
-#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0))
+
 
 #ifndef __ASSEMBLY__
 #include <types.h>
+#include <hypervisor.h>
 #endif
 #include <xen/xen.h>
+
+
+#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0))
 
 #define __KERNEL_CS  FLAT_KERNEL_CS
 #define __KERNEL_DS  FLAT_KERNEL_DS
@@ -54,8 +55,6 @@
 /* Everything below this point is not included by assembler (.S) files. */
 #ifndef __ASSEMBLY__
 
-#define pt_regs xen_regs
-
 void trap_init(void);
 
 /* 
@@ -69,10 +68,8 @@
 #define __cli()                                                                
\
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
-       preempt_disable();                                              \
        _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
        _vcpu->evtchn_upcall_mask = 1;                                  \
-       preempt_enable_no_resched();                                    \
        barrier();                                                      \
 } while (0)
 
@@ -80,13 +77,11 @@
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
-       preempt_disable();                                              \
        _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
        _vcpu->evtchn_upcall_mask = 0;                                  \
        barrier(); /* unmask then check (avoid races) */                \
        if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
                force_evtchn_callback();                                \
-       preempt_enable();                                               \
 } while (0)
 
 #define __save_flags(x)                                                        
\
@@ -100,15 +95,12 @@
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
-       preempt_disable();                                              \
        _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
        if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
                barrier(); /* unmask then check (avoid races) */        \
                if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
                        force_evtchn_callback();                        \
-               preempt_enable();                                       \
-       } else                                                          \
-               preempt_enable_no_resched();                            \
+       }\
 } while (0)
 
 #define safe_halt()            ((void)0)
@@ -116,11 +108,9 @@
 #define __save_and_cli(x)                                              \
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
-       preempt_disable();                                              \
        _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
        (x) = _vcpu->evtchn_upcall_mask;                                \
        _vcpu->evtchn_upcall_mask = 1;                                  \
-       preempt_enable_no_resched();                                    \
        barrier();                                                      \
 } while (0)
 
@@ -135,6 +125,15 @@
 
 /* This is a barrier for the compiler only, NOT the processor! */
 #define barrier() __asm__ __volatile__("": : :"memory")
+
+#if defined(__i386__)
+#define mb()    __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#define rmb()   __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#elif defined(__x86_64__)
+#define mb()    __asm__ __volatile__ ("mfence":::"memory")
+#define rmb()   __asm__ __volatile__ ("lfence":::"memory")
+#endif
+
 
 #define LOCK_PREFIX ""
 #define LOCK ""
@@ -147,69 +146,71 @@
 typedef struct { volatile int counter; } atomic_t;
 
 
-#define xchg(ptr,v) \
-        ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+/************************** i386 *******************************/
+#if defined (__i386__)
+
+#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned 
long)(v),(ptr),sizeof(*(ptr))))
 struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((volatile struct __xchg_dummy *)(x))
-static __inline__ unsigned long __xchg(unsigned long x, volatile void * ptr,
-                                   int size)
-{
-    switch (size) {
-    case 1:
-        __asm__ __volatile__("xchgb %b0,%1"
-                             :"=q" (x)
-                             :"m" (*__xg(ptr)), "0" (x)
-                             :"memory");
-        break;
-    case 2:
-        __asm__ __volatile__("xchgw %w0,%1"
-                             :"=r" (x)
-                             :"m" (*__xg(ptr)), "0" (x)
-                             :"memory");
-        break;
-    case 4:
-        __asm__ __volatile__("xchgl %0,%1"
-                             :"=r" (x)
-                             :"m" (*__xg(ptr)), "0" (x)
-                             :"memory");
-        break;
-    }
-    return x;
+#define __xg(x) ((struct __xchg_dummy *)(x))
+static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int 
size)
+{
+       switch (size) {
+               case 1:
+                       __asm__ __volatile__("xchgb %b0,%1"
+                               :"=q" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 2:
+                       __asm__ __volatile__("xchgw %w0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 4:
+                       __asm__ __volatile__("xchgl %0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+       }
+       return x;
 }
 
 /**
  * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to set
+ * @nr: Bit to clear
  * @addr: Address to count from
  *
- * This operation is atomic and cannot be reordered.  
+ * This operation is atomic and cannot be reordered.
+ * It can be reorderdered on other architectures other than x86.
  * It also implies a memory barrier.
  */
-static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
-{
-        int oldbit;
-
-        __asm__ __volatile__( LOCK_PREFIX
-                "btrl %2,%1\n\tsbbl %0,%0"
-                :"=r" (oldbit),"=m" (ADDR)
-                :"Ir" (nr) : "memory");
-        return oldbit;
-}
-
-static __inline__ int constant_test_bit(int nr, const volatile void * addr)
-{
-    return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 
5])) != 0;
-}
-
-static __inline__ int variable_test_bit(int nr, volatile void * addr)
-{
-    int oldbit;
-    
-    __asm__ __volatile__(
-        "btl %2,%1\n\tsbbl %0,%0"
-        :"=r" (oldbit)
-        :"m" (ADDR),"Ir" (nr));
-    return oldbit;
+static inline int test_and_clear_bit(int nr, volatile unsigned long * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__( LOCK
+               "btrl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+       return oldbit;
+}
+
+static inline int constant_test_bit(int nr, const volatile unsigned long *addr)
+{
+       return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0;
+}
+
+static inline int variable_test_bit(int nr, const volatile unsigned long * 
addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__(
+               "btl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit)
+               :"m" (ADDR),"Ir" (nr));
+       return oldbit;
 }
 
 #define test_bit(nr,addr) \
@@ -217,6 +218,152 @@
  constant_test_bit((nr),(addr)) : \
  variable_test_bit((nr),(addr)))
 
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note: there are no guarantees that this function will not be reordered
+ * on non x86 architectures, so if you are writting portable code,
+ * make sure not to rely on its reordering guarantees.
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(int nr, volatile unsigned long * addr)
+{
+       __asm__ __volatile__( LOCK
+               "btsl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void clear_bit(int nr, volatile unsigned long * addr)
+{
+       __asm__ __volatile__( LOCK
+               "btrl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+       __asm__("bsfl %1,%0"
+               :"=r" (word)
+               :"rm" (word));
+       return word;
+}
+
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+#define ADDR (*(volatile long *) addr)
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+
+
+#elif defined(__x86_64__)/* ifdef __i386__ */
+/************************** x86_84 *******************************/
+
+#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned 
long)(v),(ptr),sizeof(*(ptr))))
+#define __xg(x) ((volatile long *)(x))
+static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int 
size)
+{
+       switch (size) {
+               case 1:
+                       __asm__ __volatile__("xchgb %b0,%1"
+                               :"=q" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 2:
+                       __asm__ __volatile__("xchgw %w0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 4:
+                       __asm__ __volatile__("xchgl %k0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 8:
+                       __asm__ __volatile__("xchgq %0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+       }
+       return x;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__( LOCK_PREFIX
+               "btrl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"dIr" (nr) : "memory");
+       return oldbit;
+}
+
+static __inline__ int constant_test_bit(int nr, const volatile void * addr)
+{
+       return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr 
>> 5])) != 0;
+}
+
+static __inline__ int variable_test_bit(int nr, volatile const void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__(
+               "btl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit)
+               :"m" (ADDR),"dIr" (nr));
+       return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
 
 /**
  * set_bit - Atomically set a bit in memory
@@ -230,10 +377,10 @@
  */
 static __inline__ void set_bit(int nr, volatile void * addr)
 {
-        __asm__ __volatile__( LOCK_PREFIX
-                "btsl %1,%0"
-                :"=m" (ADDR)
-                :"Ir" (nr));
+       __asm__ __volatile__( LOCK_PREFIX
+               "btsl %1,%0"
+               :"=m" (ADDR)
+               :"dIr" (nr) : "memory");
 }
 
 /**
@@ -248,40 +395,43 @@
  */
 static __inline__ void clear_bit(int nr, volatile void * addr)
 {
-        __asm__ __volatile__( LOCK_PREFIX
-                "btrl %1,%0"
-                :"=m" (ADDR)
-                :"Ir" (nr));
-}
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- * 
- * Atomically increments @v by 1.  Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */ 
-static __inline__ void atomic_inc(atomic_t *v)
-{
-        __asm__ __volatile__(
-                LOCK "incl %0"
-                :"=m" (v->counter)
-                :"m" (v->counter));
-}
-
-
-#define rdtscll(val) \
-     __asm__ __volatile__("rdtsc" : "=A" (val))
-
+       __asm__ __volatile__( LOCK_PREFIX
+               "btrl %1,%0"
+               :"=m" (ADDR)
+               :"dIr" (nr));
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
 static __inline__ unsigned long __ffs(unsigned long word)
 {
-        __asm__("bsfl %1,%0"
-                :"=r" (word)
-                :"rm" (word));
-        return word;
+       __asm__("bsfq %1,%0"
+               :"=r" (word)
+               :"rm" (word));
+       return word;
 }
 
 #define ADDR (*(volatile long *) addr)
+
+#define rdtscll(val) do { \
+     unsigned int __a,__d; \
+     asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
+     (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
+} while(0)
+
+
+#else /* ifdef __x86_64__ */
+#error "Unsupported architecture"
+#endif
+
+
+/********************* common i386 and x86_64  ****************************/
+
+
 
 static __inline__ void synch_set_bit(int nr, volatile void * addr)
 {
@@ -306,6 +456,14 @@
     return oldbit;
 }
 
+static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "lock btrl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
 
 static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
 {
@@ -326,9 +484,8 @@
 (__builtin_constant_p(nr) ? \
  synch_const_test_bit((nr),(addr)) : \
  synch_var_test_bit((nr),(addr)))
-#endif /* !__ASSEMBLY__ */
-
-#define rdtsc(low,high) \
-     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
-
+
+
+
+#endif /* not assembly */
 #endif /* _OS_H_ */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/time.h
--- a/extras/mini-os/include/time.h     Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/include/time.h     Fri Sep  9 16:30:54 2005
@@ -58,6 +58,6 @@
 s_time_t get_s_time(void);
 s_time_t get_v_time(void);
 void     gettimeofday(struct timeval *tv);
-void     block(u32 millisecs);
+void     block_domain(u32 millisecs);
 
 #endif /* _TIME_H_ */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/traps.h
--- a/extras/mini-os/include/traps.h    Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/include/traps.h    Fri Sep  9 16:30:54 2005
@@ -17,6 +17,7 @@
 #ifndef _TRAPS_H_
 #define _TRAPS_H_
 
+#ifdef __i386__
 struct pt_regs {
        long ebx;
        long ecx;
@@ -34,7 +35,38 @@
        long esp;
        int  xss;
 };
+#elif __x86_64__
 
+struct pt_regs {
+       unsigned long r15;
+       unsigned long r14;
+       unsigned long r13;
+       unsigned long r12;
+       unsigned long rbp;
+       unsigned long rbx;
+/* arguments: non interrupts/non tracing syscalls only save upto here*/
+       unsigned long r11;
+       unsigned long r10;      
+       unsigned long r9;
+       unsigned long r8;
+       unsigned long rax;
+       unsigned long rcx;
+       unsigned long rdx;
+       unsigned long rsi;
+       unsigned long rdi;
+       unsigned long orig_rax;
+/* end of arguments */         
+/* cpu exception frame or undefined */
+       unsigned long rip;
+       unsigned long cs;
+       unsigned long eflags; 
+       unsigned long rsp; 
+       unsigned long ss;
+/* top of stack page */ 
+};
+
+
+#endif
 
 void dump_regs(struct pt_regs *regs);
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/types.h
--- a/extras/mini-os/include/types.h    Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/include/types.h    Fri Sep  9 16:30:54 2005
@@ -44,11 +44,19 @@
 typedef long long           quad_t;
 typedef unsigned long long  u_quad_t;
 typedef unsigned int        uintptr_t;
+
+typedef struct { unsigned long pte_low; } pte_t;
 #elif defined(__x86_64__)
 typedef long                quad_t;
 typedef unsigned long       u_quad_t;
 typedef unsigned long       uintptr_t;
+
+typedef struct { unsigned long pte; } pte_t;
 #endif
 
+
+
+
+#define INT_MAX         ((int)(~0U>>1))
 #define UINT_MAX            (~0U)
 #endif /* _TYPES_H_ */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/kernel.c   Fri Sep  9 16:30:54 2005
@@ -33,6 +33,8 @@
 #include <time.h>
 #include <types.h>
 #include <lib.h>
+#include <sched.h>
+#include <xenbus.h>
 
 /*
  * Shared page for communicating with the hypervisor.
@@ -59,10 +61,12 @@
 
 extern char shared_info[PAGE_SIZE];
 
+#define __pte(x) ((pte_t) { (0) } )
+
 static shared_info_t *map_shared_info(unsigned long pa)
 {
     if ( HYPERVISOR_update_va_mapping(
-        (unsigned long)shared_info, pa | 7, UVMF_INVLPG) )
+        (unsigned long)shared_info, __pte(pa | 7), UVMF_INVLPG) )
     {
         printk("Failed to map shared_info!!\n");
         *(int*)0=0;
@@ -77,7 +81,6 @@
 void start_kernel(start_info_t *si)
 {
     static char hello[] = "Bootstrapping...\n";
-    int i;
     (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(hello), hello);
 
     /* Copy the start_info struct to a globally-accessible area. */
@@ -96,7 +99,6 @@
         (unsigned long)hypervisor_callback,
         (unsigned long)failsafe_callback, 0);
 #endif
-
     trap_init();
 
     /* ENABLE EVENT DELIVERY. This is disabled at start of day. */
@@ -119,7 +121,6 @@
      * If used for porting another OS, start here to figure out your
      * guest os entry point. Otherwise continue below...
      */
-
     /* init memory management */
     init_mm();
 
@@ -127,15 +128,15 @@
     init_events();
     /* init time and timers */
     init_time();
+    
+    /* init scheduler */
+    init_sched();
 
-    /* do nothing */
-    i = 0;
-    for ( ; ; ) 
-    {      
-//        HYPERVISOR_yield();
-        block(100);
-        i++;
-    }
+    /* init xenbus */
+    xs_init();
+    
+    /* Everything initialised, start idle thread */
+    run_idle_thread();
 }
 
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/lib/printf.c
--- a/extras/mini-os/lib/printf.c       Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/lib/printf.c       Fri Sep  9 16:30:54 2005
@@ -1,20 +1,18 @@
-/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+/* 
  ****************************************************************************
  * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
  ****************************************************************************
  *
  *        File: printf.c
  *      Author: Rolf Neugebauer (neugebar@xxxxxxxxxxxxx)
- *     Changes: 
+ *     Changes: Grzegorz Milos (gm281@xxxxxxxxx) 
  *              
- *        Date: Aug 2003
+ *        Date: Aug 2003, Aug 2005
  * 
  * Environment: Xen Minimal OS
  * Description: Library functions for printing
  *              (freebsd port, mainly sys/subr_prf.c)
  *
- ****************************************************************************
- * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
  ****************************************************************************
  *
  *-
@@ -60,409 +58,748 @@
 #include <types.h>
 #include <hypervisor.h>
 #include <lib.h>
-
-/****************************************************************************
- * RN: printf family of routines
- * taken mainly from sys/subr_prf.c
- ****************************************************************************/
-char const hex2ascii_data[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-#define hex2ascii(hex)  (hex2ascii_data[hex])
-#define NBBY    8               /* number of bits in a byte */
-#define MAXNBUF    (sizeof(quad_t) * NBBY + 1)
-
-static int kvprintf(char const *fmt, void *arg, int radix, va_list ap);
-
-
-int
-printf(const char *fmt, ...)
-{
-       va_list ap;
-       int retval;
-    static char printk_buf[1024];
-
-       va_start(ap, fmt);
-       retval = kvprintf(fmt, printk_buf, 10, ap);
-    printk_buf[retval] = '\0';
-       va_end(ap);
-    (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(printk_buf), 
-                                printk_buf);
-       return retval;
-}
-
-int
-vprintf(const char *fmt, va_list ap)
-{
-       int retval;
-    static char printk_buf[1024];
-       retval = kvprintf(fmt, printk_buf, 10, ap);
-    printk_buf[retval] = '\0';
-    (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(printk_buf),
-                                printk_buf);
-       return retval;
-}
-
-int
-sprintf(char *buf, const char *cfmt, ...)
-{
-       int retval;
-       va_list ap;
-
-       va_start(ap, cfmt);
-       retval = kvprintf(cfmt, (void *)buf, 10, ap);
-       buf[retval] = '\0';
-       va_end(ap);
-       return retval;
-}
-
-int
-vsprintf(char *buf, const char *cfmt, va_list ap)
-{
-       int retval;
-
-       retval = kvprintf(cfmt, (void *)buf, 10, ap);
-       buf[retval] = '\0';
-       return retval;
-}
-
-
-/*
- * Put a NUL-terminated ASCII number (base <= 36) in a buffer in reverse
- * order; return an optional length and a pointer to the last character
- * written in the buffer (i.e., the first character of the string).
- * The buffer pointed to by `nbuf' must have length >= MAXNBUF.
- */
-static char *
-ksprintn(char *nbuf, u_long ul, int base, int *lenp)
-{
-       char *p;
-
-       p = nbuf;
-       *p = '\0';
-       do {
-               *++p = hex2ascii(ul % base);
-       } while (ul /= base);
-       if (lenp)
-               *lenp = p - nbuf;
-       return (p);
-}
-/* ksprintn, but for a quad_t. */
-static char *
-ksprintqn(char *nbuf, u_quad_t uq, int base, int *lenp)
-{
-       char *p;
-
-       p = nbuf;
-       *p = '\0';
-       do {
-               *++p = hex2ascii(uq % base);
-       } while (uq /= base);
-       if (lenp)
-               *lenp = p - nbuf;
-       return (p);
-}
-
-/*
- * Scaled down version of printf(3).
+#include <mm.h>
+#include <ctype.h>
+
+/**
+ * simple_strtoul - convert a string to an unsigned long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
+{
+    unsigned long result = 0,value;
+
+    if (!base) {
+        base = 10;
+        if (*cp == '0') {
+            base = 8;
+            cp++;
+            if ((*cp == 'x') && isxdigit(cp[1])) {
+                cp++;
+                base = 16;
+            }
+        }
+    }
+    while (isxdigit(*cp) &&
+           (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
+        result = result*base + value;
+        cp++;
+    }
+    if (endp)
+        *endp = (char *)cp;
+    return result;
+}
+
+/**
+ * simple_strtol - convert a string to a signed long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long simple_strtol(const char *cp,char **endp,unsigned int base)
+{
+    if(*cp=='-')
+        return -simple_strtoul(cp+1,endp,base);
+    return simple_strtoul(cp,endp,base);
+}
+
+/**
+ * simple_strtoull - convert a string to an unsigned long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int 
base)
+{
+    unsigned long long result = 0,value;
+
+    if (!base) {
+        base = 10;
+        if (*cp == '0') {
+            base = 8;
+            cp++;
+            if ((*cp == 'x') && isxdigit(cp[1])) {
+                cp++;
+                base = 16;
+            }
+        }
+    }
+    while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp)
+                                                               ? toupper(*cp) 
: *cp)-'A'+10) < base) {
+        result = result*base + value;
+        cp++;
+    }
+    if (endp)
+        *endp = (char *)cp;
+    return result;
+}
+
+/**
+ * simple_strtoll - convert a string to a signed long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long long simple_strtoll(const char *cp,char **endp,unsigned int base)
+{
+    if(*cp=='-')
+        return -simple_strtoull(cp+1,endp,base);
+    return simple_strtoull(cp,endp,base);
+}
+
+static int skip_atoi(const char **s)
+{
+    int i=0;
+
+    while (isdigit(**s))
+        i = i*10 + *((*s)++) - '0';
+    return i;
+}
+
+#define ZEROPAD 1               /* pad with zero */
+#define SIGN    2               /* unsigned/signed long */
+#define PLUS    4               /* show plus */
+#define SPACE   8               /* space if plus */
+#define LEFT    16              /* left justified */
+#define SPECIAL 32              /* 0x */
+#define LARGE   64              /* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * buf, char * end, long long num, int base, int 
size, int precision, int type)
+{
+    char c,sign,tmp[66];
+    const char *digits;
+    const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+    const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    int i;
+
+    digits = (type & LARGE) ? large_digits : small_digits;
+    if (type & LEFT)
+        type &= ~ZEROPAD;
+    if (base < 2 || base > 36)
+        return buf;
+    c = (type & ZEROPAD) ? '0' : ' ';
+    sign = 0;
+    if (type & SIGN) {
+        if (num < 0) {
+            sign = '-';
+            num = -num;
+            size--;
+        } else if (type & PLUS) {
+            sign = '+';
+            size--;
+        } else if (type & SPACE) {
+            sign = ' ';
+            size--;
+        }
+    }
+    if (type & SPECIAL) {
+        if (base == 16)
+            size -= 2;
+        else if (base == 8)
+            size--;
+    }
+    i = 0;
+    if (num == 0)
+        tmp[i++]='0';
+    else 
+    {
+        /* XXX KAF: force unsigned mod and div. */
+        unsigned long long num2=(unsigned long long)num;
+        unsigned int base2=(unsigned int)base;
+        while (num2 != 0) { tmp[i++] = digits[num2%base2]; num2 /= base2; }
+    }
+    if (i > precision)
+        precision = i;
+    size -= precision;
+    if (!(type&(ZEROPAD+LEFT))) {
+        while(size-->0) {
+            if (buf <= end)
+                *buf = ' ';
+            ++buf;
+        }
+    }
+    if (sign) {
+        if (buf <= end)
+            *buf = sign;
+        ++buf;
+    }
+    if (type & SPECIAL) {
+        if (base==8) {
+            if (buf <= end)
+                *buf = '0';
+            ++buf;
+        } else if (base==16) {
+            if (buf <= end)
+                *buf = '0';
+            ++buf;
+            if (buf <= end)
+                *buf = digits[33];
+            ++buf;
+        }
+    }
+    if (!(type & LEFT)) {
+        while (size-- > 0) {
+            if (buf <= end)
+                *buf = c;
+            ++buf;
+        }
+    }
+    while (i < precision--) {
+        if (buf <= end)
+            *buf = '0';
+        ++buf;
+    }
+    while (i-- > 0) {
+        if (buf <= end)
+            *buf = tmp[i];
+        ++buf;
+    }
+    while (size-- > 0) {
+        if (buf <= end)
+            *buf = ' ';
+        ++buf;
+    }
+    return buf;
+}
+
+/**
+* vsnprintf - Format a string and place it in a buffer
+* @buf: The buffer to place the result into
+* @size: The size of the buffer, including the trailing null space
+* @fmt: The format string to use
+* @args: Arguments for the format string
+*
+* Call this function if you are already dealing with a va_list.
+* You probably want snprintf instead.
+ */
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+    int len;
+    unsigned long long num;
+    int i, base;
+    char *str, *end, c;
+    const char *s;
+
+    int flags;          /* flags to number() */
+
+    int field_width;    /* width of output field */
+    int precision;              /* min. # of digits for integers; max
+                                   number of chars for from string */
+    int qualifier;              /* 'h', 'l', or 'L' for integer fields */
+                                /* 'z' support added 23/7/1999 S.H.    */
+                                /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+    str = buf;
+    end = buf + size - 1;
+
+    if (end < buf - 1) {
+        end = ((void *) -1);
+        size = end - buf + 1;
+    }
+
+    for (; *fmt ; ++fmt) {
+        if (*fmt != '%') {
+            if (str <= end)
+                *str = *fmt;
+            ++str;
+            continue;
+        }
+
+        /* process flags */
+        flags = 0;
+    repeat:
+        ++fmt;          /* this also skips first '%' */
+        switch (*fmt) {
+        case '-': flags |= LEFT; goto repeat;
+        case '+': flags |= PLUS; goto repeat;
+        case ' ': flags |= SPACE; goto repeat;
+        case '#': flags |= SPECIAL; goto repeat;
+        case '0': flags |= ZEROPAD; goto repeat;
+        }
+
+        /* get field width */
+        field_width = -1;
+        if (isdigit(*fmt))
+            field_width = skip_atoi(&fmt);
+        else if (*fmt == '*') {
+            ++fmt;
+            /* it's the next argument */
+            field_width = va_arg(args, int);
+            if (field_width < 0) {
+                field_width = -field_width;
+                flags |= LEFT;
+            }
+        }
+
+        /* get the precision */
+        precision = -1;
+        if (*fmt == '.') {
+            ++fmt;
+            if (isdigit(*fmt))
+                precision = skip_atoi(&fmt);
+            else if (*fmt == '*') {
+                ++fmt;
+                          /* it's the next argument */
+                precision = va_arg(args, int);
+            }
+            if (precision < 0)
+                precision = 0;
+        }
+
+        /* get the conversion qualifier */
+        qualifier = -1;
+        if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+            qualifier = *fmt;
+            ++fmt;
+            if (qualifier == 'l' && *fmt == 'l') {
+                qualifier = 'L';
+                ++fmt;
+            }
+        }
+        if (*fmt == 'q') {
+            qualifier = 'L';
+            ++fmt;
+        }
+
+        /* default base */
+        base = 10;
+
+        switch (*fmt) {
+        case 'c':
+            if (!(flags & LEFT)) {
+                while (--field_width > 0) {
+                    if (str <= end)
+                        *str = ' ';
+                    ++str;
+                }
+            }
+            c = (unsigned char) va_arg(args, int);
+            if (str <= end)
+                *str = c;
+            ++str;
+            while (--field_width > 0) {
+                if (str <= end)
+                    *str = ' ';
+                ++str;
+            }
+            continue;
+
+        case 's':
+            s = va_arg(args, char *);
+            if (!s)
+                s = "<NULL>";
+
+            len = strnlen(s, precision);
+
+            if (!(flags & LEFT)) {
+                while (len < field_width--) {
+                    if (str <= end)
+                        *str = ' ';
+                    ++str;
+                }
+            }
+            for (i = 0; i < len; ++i) {
+                if (str <= end)
+                    *str = *s;
+                ++str; ++s;
+            }
+            while (len < field_width--) {
+                if (str <= end)
+                    *str = ' ';
+                ++str;
+            }
+            continue;
+
+        case 'p':
+            if (field_width == -1) {
+                field_width = 2*sizeof(void *);
+                flags |= ZEROPAD;
+            }
+            str = number(str, end,
+                         (unsigned long) va_arg(args, void *),
+                         16, field_width, precision, flags);
+            continue;
+
+
+        case 'n':
+            /* FIXME:
+             * What does C99 say about the overflow case here? */
+            if (qualifier == 'l') {
+                long * ip = va_arg(args, long *);
+                *ip = (str - buf);
+            } else if (qualifier == 'Z') {
+                size_t * ip = va_arg(args, size_t *);
+                *ip = (str - buf);
+            } else {
+                int * ip = va_arg(args, int *);
+                *ip = (str - buf);
+            }
+            continue;
+
+        case '%':
+            if (str <= end)
+                *str = '%';
+            ++str;
+            continue;
+
+                        /* integer number formats - set up the flags and 
"break" */
+        case 'o':
+            base = 8;
+            break;
+
+        case 'X':
+            flags |= LARGE;
+        case 'x':
+            base = 16;
+            break;
+
+        case 'd':
+        case 'i':
+            flags |= SIGN;
+        case 'u':
+            break;
+
+        default:
+            if (str <= end)
+                *str = '%';
+            ++str;
+            if (*fmt) {
+                if (str <= end)
+                    *str = *fmt;
+                ++str;
+            } else {
+                --fmt;
+            }
+            continue;
+        }
+        if (qualifier == 'L')
+            num = va_arg(args, long long);
+        else if (qualifier == 'l') {
+            num = va_arg(args, unsigned long);
+            if (flags & SIGN)
+                num = (signed long) num;
+        } else if (qualifier == 'Z') {
+            num = va_arg(args, size_t);
+        } else if (qualifier == 'h') {
+            num = (unsigned short) va_arg(args, int);
+            if (flags & SIGN)
+                num = (signed short) num;
+        } else {
+            num = va_arg(args, unsigned int);
+            if (flags & SIGN)
+                num = (signed int) num;
+        }
+
+        str = number(str, end, num, base,
+                     field_width, precision, flags);
+    }
+    if (str <= end)
+        *str = '\0';
+    else if (size > 0)
+        /* don't write out a null byte if the buf size is zero */
+        *end = '\0';
+    /* the trailing null byte doesn't count towards the total
+     * ++str;
+     */
+    return str-buf;
+}
+
+/**
+ * snprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ */
+int snprintf(char * buf, size_t size, const char *fmt, ...)
+{
+    va_list args;
+    int i;
+
+    va_start(args, fmt);
+    i=vsnprintf(buf,size,fmt,args);
+    va_end(args);
+    return i;
+}
+
+/**
+ * vsprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
  *
- * Two additional formats:
- *
- * The format %b is supported to decode error registers.
- * Its usage is:
- *
- *     printf("reg=%b\n", regval, "<base><arg>*");
- *
- * where <base> is the output base expressed as a control character, e.g.
- * \10 gives octal; \20 gives hex.  Each arg is a sequence of characters,
- * the first of which gives the bit number to be inspected (origin 1), and
- * the next characters (up to a control character, i.e. a character <= 32),
- * give the name of the register.  Thus:
- *
- *     kvprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE\n");
- *
- * would produce output:
- *
- *     reg=3<BITTWO,BITONE>
- *
- * XXX:  %D  -- Hexdump, takes pointer and separator string:
- *             ("%6D", ptr, ":")   -> XX:XX:XX:XX:XX:XX
- *             ("%*D", len, ptr, " " -> XX XX XX XX ...
- */
-
-/* RN: This normally takes a function for output. 
- * we always print to a string and the use HYPERCALL for write to console */
-static int
-kvprintf(char const *fmt, void *arg, int radix, va_list ap)
-{
-
-#define PCHAR(c) {int cc=(c); *d++ = cc; retval++; }
-
-       char nbuf[MAXNBUF];
-       char *p, *q, *d;
-       u_char *up;
-       int ch, n;
-       u_long ul;
-       u_quad_t uq;
-       int base, lflag, qflag, tmp, width, ladjust, sharpflag, neg, sign, dot;
-       int dwidth;
-       char padc;
-       int retval = 0;
-
-       ul = 0;
-       uq = 0;
-    d = (char *) arg;
-
-       if (fmt == NULL)
-               fmt = "(fmt null)\n";
-
-       if (radix < 2 || radix > 36)
-               radix = 10;
-
-       for (;;) {
-               padc = ' ';
-               width = 0;
-               while ((ch = (u_char)*fmt++) != '%') {
-                       if (ch == '\0') 
-                               return retval;
-                       PCHAR(ch);
-               }
-               qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0;
-               sign = 0; dot = 0; dwidth = 0;
-reswitch:      switch (ch = (u_char)*fmt++) {
-               case '.':
-                       dot = 1;
-                       goto reswitch;
-               case '#':
-                       sharpflag = 1;
-                       goto reswitch;
-               case '+':
-                       sign = 1;
-                       goto reswitch;
-               case '-':
-                       ladjust = 1;
-                       goto reswitch;
-               case '%':
-                       PCHAR(ch);
-                       break;
-               case '*':
-                       if (!dot) {
-                               width = va_arg(ap, int);
-                               if (width < 0) {
-                                       ladjust = !ladjust;
-                                       width = -width;
+ * Call this function if you are already dealing with a va_list.
+ * You probably want sprintf instead.
+ */
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+    return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args);
+}
+
+
+/**
+ * sprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ */
+int sprintf(char * buf, const char *fmt, ...)
+{
+    va_list args;
+    int i;
+
+    va_start(args, fmt);
+    i=vsprintf(buf,fmt,args);
+    va_end(args);
+    return i;
+}
+
+
+void printf(const char *fmt, ...)
+{
+    static char   buf[1024];
+    va_list       args;
+    
+    va_start(args, fmt);
+    (void)vsnprintf(buf, sizeof(buf), fmt, args);
+    va_end(args);        
+   
+    (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(buf), buf);
+}
+
+/**
+ * vsscanf - Unformat a buffer into a list of arguments
+ * @buf:       input buffer
+ * @fmt:       format of buffer
+ * @args:      arguments
+ */
+int vsscanf(const char * buf, const char * fmt, va_list args)
+{
+       const char *str = buf;
+       char *next;
+       char digit;
+       int num = 0;
+       int qualifier;
+       int base;
+       int field_width;
+       int is_sign = 0;
+
+       while(*fmt && *str) {
+               /* skip any white space in format */
+               /* white space in format matchs any amount of
+                * white space, including none, in the input.
+                */
+               if (isspace(*fmt)) {
+                       while (isspace(*fmt))
+                               ++fmt;
+                       while (isspace(*str))
+                               ++str;
+               }
+
+               /* anything that is not a conversion must match exactly */
+               if (*fmt != '%' && *fmt) {
+                       if (*fmt++ != *str++)
+                               break;
+                       continue;
+               }
+
+               if (!*fmt)
+                       break;
+               ++fmt;
+               
+               /* skip this conversion.
+                * advance both strings to next white space
+                */
+               if (*fmt == '*') {
+                       while (!isspace(*fmt) && *fmt)
+                               fmt++;
+                       while (!isspace(*str) && *str)
+                               str++;
+                       continue;
+               }
+
+               /* get field width */
+               field_width = -1;
+               if (isdigit(*fmt))
+                       field_width = skip_atoi(&fmt);
+
+               /* get conversion qualifier */
+               qualifier = -1;
+               if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+                   *fmt == 'Z' || *fmt == 'z') {
+                       qualifier = *fmt++;
+                       if (unlikely(qualifier == *fmt)) {
+                               if (qualifier == 'h') {
+                                       qualifier = 'H';
+                                       fmt++;
+                               } else if (qualifier == 'l') {
+                                       qualifier = 'L';
+                                       fmt++;
                                }
-                       } else {
-                               dwidth = va_arg(ap, int);
                        }
-                       goto reswitch;
-               case '0':
-                       if (!dot) {
-                               padc = '0';
-                               goto reswitch;
+               }
+               base = 10;
+               is_sign = 0;
+
+               if (!*fmt || !*str)
+                       break;
+
+               switch(*fmt++) {
+               case 'c':
+               {
+                       char *s = (char *) va_arg(args,char*);
+                       if (field_width == -1)
+                               field_width = 1;
+                       do {
+                               *s++ = *str++;
+                       } while (--field_width > 0 && *str);
+                       num++;
+               }
+               continue;
+               case 's':
+               {
+                       char *s = (char *) va_arg(args, char *);
+                       if(field_width == -1)
+                               field_width = INT_MAX;
+                       /* first, skip leading white space in buffer */
+                       while (isspace(*str))
+                               str++;
+
+                       /* now copy until next white space */
+                       while (*str && !isspace(*str) && field_width--) {
+                               *s++ = *str++;
                        }
-               case '1': case '2': case '3': case '4':
-               case '5': case '6': case '7': case '8': case '9':
-                               for (n = 0;; ++fmt) {
-                                       n = n * 10 + ch - '0';
-                                       ch = *fmt;
-                                       if (ch < '0' || ch > '9')
-                                               break;
-                               }
-                       if (dot)
-                               dwidth = n;
-                       else
-                               width = n;
-                       goto reswitch;
-               case 'b':
-                       ul = va_arg(ap, int);
-                       p = va_arg(ap, char *);
-                       for (q = ksprintn(nbuf, ul, *p++, NULL); *q;)
-                               PCHAR(*q--);
-
-                       if (!ul)
-                               break;
-
-                       for (tmp = 0; *p;) {
-                               n = *p++;
-                               if (ul & (1 << (n - 1))) {
-                                       PCHAR(tmp ? ',' : '<');
-                                       for (; (n = *p) > ' '; ++p)
-                                               PCHAR(n);
-                                       tmp = 1;
-                               } else
-                                       for (; *p > ' '; ++p)
-                                               continue;
-                       }
-                       if (tmp)
-                               PCHAR('>');
-                       break;
-               case 'c':
-                       PCHAR(va_arg(ap, int));
-                       break;
-               case 'D':
-                       up = va_arg(ap, u_char *);
-                       p = va_arg(ap, char *);
-                       if (!width)
-                               width = 16;
-                       while(width--) {
-                               PCHAR(hex2ascii(*up >> 4));
-                               PCHAR(hex2ascii(*up & 0x0f));
-                               up++;
-                               if (width)
-                                       for (q=p;*q;q++)
-                                               PCHAR(*q);
-                       }
-                       break;
-               case 'd':
-                       if (qflag)
-                               uq = va_arg(ap, quad_t);
-                       else if (lflag)
-                               ul = va_arg(ap, long);
-                       else
-                               ul = va_arg(ap, int);
-                       sign = 1;
-                       base = 10;
-                       goto number;
-               case 'l':
-                       if (lflag) {
-                               lflag = 0;
-                               qflag = 1;
-                       } else
-                               lflag = 1;
-                       goto reswitch;
+                       *s = '\0';
+                       num++;
+               }
+               continue;
+               case 'n':
+                       /* return number of characters read so far */
+               {
+                       int *i = (int *)va_arg(args,int*);
+                       *i = str - buf;
+               }
+               continue;
                case 'o':
-                       if (qflag)
-                               uq = va_arg(ap, u_quad_t);
-                       else if (lflag)
-                               ul = va_arg(ap, u_long);
-                       else
-                               ul = va_arg(ap, u_int);
                        base = 8;
-                       goto nosign;
-               case 'p':
-                       ul = (uintptr_t)va_arg(ap, void *);
-                       base = 16;
-                       sharpflag = 0;
-            padc  = '0';
-            width = sizeof(uintptr_t)*2;
-                       goto nosign;
-               case 'q':
-                       qflag = 1;
-                       goto reswitch;
-               case 'n':
-               case 'r':
-                       if (qflag)
-                               uq = va_arg(ap, u_quad_t);
-                       else if (lflag)
-                               ul = va_arg(ap, u_long);
-                       else
-                               ul = sign ?
-                                   (u_long)va_arg(ap, int) : va_arg(ap, u_int);
-                       base = radix;
-                       goto number;
-               case 's':
-                       p = va_arg(ap, char *);
-                       if (p == NULL)
-                               p = "(null)";
-                       if (!dot)
-                               n = strlen (p);
-                       else
-                               for (n = 0; n < dwidth && p[n]; n++)
-                                       continue;
-
-                       width -= n;
-
-                       if (!ladjust && width > 0)
-                               while (width--)
-                                       PCHAR(padc);
-                       while (n--)
-                               PCHAR(*p++);
-                       if (ladjust && width > 0)
-                               while (width--)
-                                       PCHAR(padc);
-                       break;
-               case 'u':
-                       if (qflag)
-                               uq = va_arg(ap, u_quad_t);
-                       else if (lflag)
-                               ul = va_arg(ap, u_long);
-                       else
-                               ul = va_arg(ap, u_int);
-                       base = 10;
-                       goto nosign;
+                       break;
                case 'x':
                case 'X':
-                       if (qflag)
-                               uq = va_arg(ap, u_quad_t);
-                       else if (lflag)
-                               ul = va_arg(ap, u_long);
-                       else
-                               ul = va_arg(ap, u_int);
                        base = 16;
-                       goto nosign;
+                       break;
+               case 'i':
+                        base = 0;
+               case 'd':
+                       is_sign = 1;
+               case 'u':
+                       break;
+               case '%':
+                       /* looking for '%' in str */
+                       if (*str++ != '%') 
+                               return num;
+                       continue;
+               default:
+                       /* invalid format; stop here */
+                       return num;
+               }
+
+               /* have some sort of integer conversion.
+                * first, skip white space in buffer.
+                */
+               while (isspace(*str))
+                       str++;
+
+               digit = *str;
+               if (is_sign && digit == '-')
+                       digit = *(str + 1);
+
+               if (!digit
+                    || (base == 16 && !isxdigit(digit))
+                    || (base == 10 && !isdigit(digit))
+                    || (base == 8 && (!isdigit(digit) || digit > '7'))
+                    || (base == 0 && !isdigit(digit)))
+                               break;
+
+               switch(qualifier) {
+               case 'H':       /* that's 'hh' in format */
+                       if (is_sign) {
+                               signed char *s = (signed char *) 
va_arg(args,signed char *);
+                               *s = (signed char) 
simple_strtol(str,&next,base);
+                       } else {
+                               unsigned char *s = (unsigned char *) 
va_arg(args, unsigned char *);
+                               *s = (unsigned char) simple_strtoul(str, &next, 
base);
+                       }
+                       break;
+               case 'h':
+                       if (is_sign) {
+                               short *s = (short *) va_arg(args,short *);
+                               *s = (short) simple_strtol(str,&next,base);
+                       } else {
+                               unsigned short *s = (unsigned short *) 
va_arg(args, unsigned short *);
+                               *s = (unsigned short) simple_strtoul(str, 
&next, base);
+                       }
+                       break;
+               case 'l':
+                       if (is_sign) {
+                               long *l = (long *) va_arg(args,long *);
+                               *l = simple_strtol(str,&next,base);
+                       } else {
+                               unsigned long *l = (unsigned long*) 
va_arg(args,unsigned long*);
+                               *l = simple_strtoul(str,&next,base);
+                       }
+                       break;
+               case 'L':
+                       if (is_sign) {
+                               long long *l = (long long*) va_arg(args,long 
long *);
+                               *l = simple_strtoll(str,&next,base);
+                       } else {
+                               unsigned long long *l = (unsigned long long*) 
va_arg(args,unsigned long long*);
+                               *l = simple_strtoull(str,&next,base);
+                       }
+                       break;
+               case 'Z':
                case 'z':
-                       if (qflag)
-                               uq = va_arg(ap, u_quad_t);
-                       else if (lflag)
-                               ul = va_arg(ap, u_long);
-                       else
-                               ul = sign ?
-                                   (u_long)va_arg(ap, int) : va_arg(ap, u_int);
-                       base = 16;
-                       goto number;
-nosign:                        sign = 0;
-number:                        
-                       if (qflag) {
-                               if (sign && (quad_t)uq < 0) {
-                                       neg = 1;
-                                       uq = -(quad_t)uq;
-                               }
-                               p = ksprintqn(nbuf, uq, base, &tmp);
+               {
+                       size_t *s = (size_t*) va_arg(args,size_t*);
+                       *s = (size_t) simple_strtoul(str,&next,base);
+               }
+               break;
+               default:
+                       if (is_sign) {
+                               int *i = (int *) va_arg(args, int*);
+                               *i = (int) simple_strtol(str,&next,base);
                        } else {
-                               if (sign && (long)ul < 0) {
-                                       neg = 1;
-                                       ul = -(long)ul;
-                               }
-                               p = ksprintn(nbuf, ul, base, &tmp);
+                               unsigned int *i = (unsigned int*) va_arg(args, 
unsigned int*);
+                               *i = (unsigned int) 
simple_strtoul(str,&next,base);
                        }
-                       if (sharpflag && (qflag ? uq != 0 : ul != 0)) {
-                               if (base == 8)
-                                       tmp++;
-                               else if (base == 16)
-                                       tmp += 2;
-                       }
-                       if (neg)
-                               tmp++;
-
-                       if (!ladjust && width && (width -= tmp) > 0)
-                               while (width--)
-                                       PCHAR(padc);
-                       if (neg)
-                               PCHAR('-');
-                       if (sharpflag && (qflag ? uq != 0 : ul != 0)) {
-                               if (base == 8) {
-                                       PCHAR('0');
-                               } else if (base == 16) {
-                                       PCHAR('0');
-                                       PCHAR('x');
-                               }
-                       }
-
-                       while (*p)
-                               PCHAR(*p--);
-
-                       if (ladjust && width && (width -= tmp) > 0)
-                               while (width--)
-                                       PCHAR(padc);
-
-                       break;
-               default:
-                       PCHAR('%');
-                       if (lflag)
-                               PCHAR('l');
-                       PCHAR(ch);
-                       break;
-               }
+                       break;
+               }
+               num++;
+
+               if (!next)
+                       break;
+               str = next;
        }
-#undef PCHAR
-}
-
+       return num;
+}
+
+/**
+ * sscanf - Unformat a buffer into a list of arguments
+ * @buf:       input buffer
+ * @fmt:       formatting of buffer
+ * @...:       resulting arguments
+ */
+int sscanf(const char * buf, const char * fmt, ...)
+{
+       va_list args;
+       int i;
+
+       va_start(args,fmt);
+       i = vsscanf(buf,fmt,args);
+       va_end(args);
+       return i;
+}
+
+
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/lib/string.c
--- a/extras/mini-os/lib/string.c       Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/lib/string.c       Fri Sep  9 16:30:54 2005
@@ -107,6 +107,19 @@
         return sc - s;
 }
 
+
+char * strcat(char * dest, const char * src)
+{
+    char *tmp = dest;
+    
+    while (*dest)
+        dest++;
+    
+    while ((*dest++ = *src++) != '\0');
+    
+    return tmp;
+}
+
 size_t strlen(const char * s)
 {
        const char *sc;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/mm.c
--- a/extras/mini-os/mm.c       Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/mm.c       Fri Sep  9 16:30:54 2005
@@ -198,7 +198,6 @@
 #endif
 
 
-
 /*
  * Initialise allocator, placing addresses [@min,@max] in free pool.
  * @min and @max are PHYSICAL addresses.
@@ -486,16 +485,17 @@
     phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
    
     /* First page follows page table pages and 3 more pages (store page etc) */
-    start_pfn = PFN_UP(__pa(start_info.pt_base)) + start_info.nr_pt_frames + 3;
+    start_pfn = PFN_UP(to_phys(start_info.pt_base)) + start_info.nr_pt_frames 
+ 3;
     max_pfn = start_info.nr_pages;
 
     printk("  start_pfn:    %lx\n", start_pfn);
     printk("  max_pfn:      %lx\n", max_pfn);
 
 
+#ifdef __i386__
     build_pagetable(&start_pfn, &max_pfn);
-    
-#ifdef __i386__
+#endif
+
     /*
      * now we can initialise the page allocator
      */
@@ -503,7 +503,5 @@
            (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn), 
            (u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn));
     init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn));   
-#endif
-    
     printk("MM: done\n");
 }
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/time.c
--- a/extras/mini-os/time.c     Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/time.c     Fri Sep  9 16:30:54 2005
@@ -203,7 +203,7 @@
 }
 
 
-void block(u32 millisecs)
+void block_domain(u32 millisecs)
 {
     struct timeval tv;
     gettimeofday(&tv);
@@ -232,5 +232,6 @@
 
 void init_time(void)
 {
+    printk("Initialising timer interface\n");
     bind_virq(VIRQ_TIMER, &timer_handler);
 }
diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/traps.c
--- a/extras/mini-os/traps.c    Thu Sep  8 15:18:40 2005
+++ b/extras/mini-os/traps.c    Fri Sep  9 16:30:54 2005
@@ -33,36 +33,7 @@
 
 void dump_regs(struct pt_regs *regs)
 {
-    unsigned long esp;
-    unsigned short ss;
-
-#ifdef __x86_64__
-    esp = regs->rsp;
-    ss  = regs->xss;
-#else
-    esp = (unsigned long) (&regs->esp);
-    ss = __KERNEL_DS;
-    if (regs->xcs & 2) {
-printk("CS is true, esp is %x\n", regs->esp);
-        esp = regs->esp;
-        ss = regs->xss & 0xffff;
-    }
-#endif
-    printf("EIP:    %04x:[<%p>]\n",
-           0xffff & regs->xcs , regs->eip);
-    printf("EFLAGS: %p\n",regs->eflags);
-    printf("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
-           regs->eax, regs->ebx, regs->ecx, regs->edx);
-    printf("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
-           regs->esi, regs->edi, regs->ebp, esp);
-#ifdef __x86_64__
-    printf("r8 : %p   r9 : %p   r10: %p   r11: %p\n",
-           regs->r8,  regs->r9,  regs->r10, regs->r11);
-    printf("r12: %p   r13: %p   r14: %p   r15: %p\n",
-           regs->r12, regs->r13, regs->r14, regs->r15);
-#endif
-    printf("ds: %04x   es: %04x   ss: %04x\n",
-           regs->xds & 0xffff, regs->xes & 0xffff, ss);
+    printk("FIXME: proper register dump (with the stack dump)\n");
 }      
 
 
@@ -105,6 +76,7 @@
     printk("Page fault at linear address %p\n", addr);
     dump_regs(regs);
 #ifdef __x86_64__
+    /* FIXME: _PAGE_PSE */
     {
         unsigned long *tab = (unsigned long *)start_info.pt_base;
         unsigned long page;
@@ -112,23 +84,16 @@
         printk("Pagetable walk from %p:\n", tab);
         
         page = tab[l4_table_offset(addr)];
-        tab = __va(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
+        tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
         printk(" L4 = %p (%p)\n", page, tab);
-        if ( !(page & AGERESENT) )
-            goto out;
 
         page = tab[l3_table_offset(addr)];
-        tab = __va(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
+        tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
         printk("  L3 = %p (%p)\n", page, tab);
-        if ( !(page & AGERESENT) )
-            goto out;
         
         page = tab[l2_table_offset(addr)];
-        tab = __va(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
-        printk("   L2 = %p (%p) %s\n", page, tab,
-               (page & AGESE) ? "(2MB)" : "");
-        if ( !(page & AGERESENT) || (page & AGESE) )
-            goto out;
+        tab =  to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
+        printk("   L2 = %p (%p)\n", page, tab);
         
         page = tab[l1_table_offset(addr)];
         printk("    L1 = %p\n", page);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.4-xen-sparse/mkbuildtree
--- a/linux-2.4-xen-sparse/mkbuildtree  Thu Sep  8 15:18:40 2005
+++ b/linux-2.4-xen-sparse/mkbuildtree  Fri Sep  9 16:30:54 2005
@@ -102,9 +102,9 @@
 relative_lndir ${RS}
 rm -f mkbuildtree
 
-set ${RS}/../linux-2.6-xen-sparse
-[ "$1" == "${RS}/../linux-2.6-xen-sparse" ] && { echo "no Linux 2.6 sparse 
tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; }
-LINUX_26="$1"
+LINUX_26=${RS}/../linux-2.6-xen-sparse
+[ -d $LINUX_26 ] || { echo "no Linux 2.6 sparse tree at 
${RS}/../linux-2.6-xen-sparse"; exit 1; }
+
 
 # Create links to the shared definitions of the Xen interfaces.
 rm -rf ${AD}/include/asm-xen/xen-public
diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig     Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig     Fri Sep  9 16:30:54 2005
@@ -70,6 +70,27 @@
          network devices to other guests via a high-performance shared-memory
          interface.
 
+config XEN_TPMDEV_FRONTEND
+        bool "TPM-device frontend driver"
+        default n
+        help
+          The TPM-device frontend driver.
+
+config XEN_TPMDEV_BACKEND
+        bool "TPM-device backend driver"
+        default n
+        help
+          The TPM-device backend driver
+
+config XEN_TPMDEV_CLOSE_IF_VTPM_FAILS
+        bool "TPM backend closes upon vTPM failure"
+        depends on XEN_TPMDEV_BACKEND
+        default n
+        help
+          The TPM backend closes the channel if the vTPM in userspace indicates
+          a failure. The corresponding domain's channel will be closed.
+          Say Y if you want this feature.
+
 config XEN_BLKDEV_FRONTEND
        bool "Block-device frontend driver"
        default y
@@ -88,15 +109,8 @@
          dedicated device-driver domain, or your master control domain
          (domain 0), then you almost certainly want to say Y here.
 
-config XEN_NETDEV_GRANT_TX
-        bool "Grant table substrate for net drivers tx path (DANGEROUS)"
-        default n
-        help
-          This introduces the use of grant tables as a data exhange mechanism
-          between the frontend and backend network drivers.
-
-config XEN_NETDEV_GRANT_RX
-        bool "Grant table substrate for net drivers rx path (DANGEROUS)"
+config XEN_NETDEV_GRANT
+        bool "Grant table substrate for network drivers (DANGEROUS)"
         default n
         help
           This introduces the use of grant tables as a data exhange mechanism
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/Kconfig.drivers
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig.drivers     Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig.drivers     Fri Sep  9 16:30:54 2005
@@ -49,6 +49,10 @@
 endif
 
 if !XEN_PHYSDEV_ACCESS
+source "drivers/char/tpm/Kconfig.domU"
+endif
+
+if !XEN_PHYSDEV_ACCESS
 
 menu "Character devices"
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/Makefile    Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Makefile    Fri Sep  9 16:30:54 2005
@@ -65,6 +65,7 @@
 
 XINSTALL_NAME ?= $(KERNELRELEASE)
 install: vmlinuz
+install kernel_install:
        mkdir -p $(INSTALL_PATH)/boot
        ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) 
$(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(XENGUEST)$(INSTALL_SUFFIX)
        rm -f $(INSTALL_PATH)/boot/vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32       Thu Sep 
 8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32       Fri Sep 
 9 16:30:54 2005
@@ -15,10 +15,11 @@
 CONFIG_XEN_BLKDEV_BACKEND=y
 # CONFIG_XEN_BLKDEV_TAP_BE is not set
 CONFIG_XEN_NETDEV_BACKEND=y
+# CONFIG_XEN_TPMDEV_FRONTEND is not set
+# CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT_TX=y
-CONFIG_XEN_NETDEV_GRANT_RX=y
+CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -1122,7 +1123,7 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
+CONFIG_CRAMFS=y
 # CONFIG_VXFS_FS is not set
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64       Thu Sep 
 8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64       Fri Sep 
 9 16:30:54 2005
@@ -15,10 +15,11 @@
 CONFIG_XEN_BLKDEV_BACKEND=y
 # CONFIG_XEN_BLKDEV_TAP_BE is not set
 CONFIG_XEN_NETDEV_BACKEND=y
+# CONFIG_XEN_TPMDEV_FRONTEND is not set
+# CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT_TX=y
-CONFIG_XEN_NETDEV_GRANT_RX=y
+CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -195,6 +196,7 @@
 # CONFIG_BLK_DEV_CRYPTOLOOP is not set
 # CONFIG_BLK_DEV_NBD is not set
 # CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_UB is not set
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=16384
@@ -1030,7 +1032,7 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
+CONFIG_CRAMFS=y
 # CONFIG_VXFS_FS is not set
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32       Thu Sep 
 8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32       Fri Sep 
 9 16:30:54 2005
@@ -12,10 +12,11 @@
 #
 # CONFIG_XEN_PRIVILEGED_GUEST is not set
 # CONFIG_XEN_PHYSDEV_ACCESS is not set
+# CONFIG_XEN_TPMDEV_FRONTEND is not set
+# CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT_TX=y
-CONFIG_XEN_NETDEV_GRANT_RX=y
+CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -336,6 +337,7 @@
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_TCG_TPM is not set
 
 #
 # Character devices
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64       Thu Sep 
 8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64       Fri Sep 
 9 16:30:54 2005
@@ -12,10 +12,11 @@
 #
 # CONFIG_XEN_PRIVILEGED_GUEST is not set
 # CONFIG_XEN_PHYSDEV_ACCESS is not set
+# CONFIG_XEN_TPMDEV_FRONTEND is not set
+# CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT_TX=y
-CONFIG_XEN_NETDEV_GRANT_RX=y
+CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -662,6 +663,7 @@
 CONFIG_INPUT=m
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
+# CONFIG_TCG_TPM is not set
 
 #
 # Character devices
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32        Thu Sep 
 8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32        Fri Sep 
 9 16:30:54 2005
@@ -15,10 +15,11 @@
 CONFIG_XEN_BLKDEV_BACKEND=y
 # CONFIG_XEN_BLKDEV_TAP_BE is not set
 CONFIG_XEN_NETDEV_BACKEND=y
+# CONFIG_XEN_TPMDEV_FRONTEND is not set
+# CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT_TX=y
-CONFIG_XEN_NETDEV_GRANT_RX=y
+CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -1855,9 +1856,7 @@
 #
 # TPM devices
 #
-CONFIG_TCG_TPM=m
-CONFIG_TCG_NSC=m
-CONFIG_TCG_ATMEL=m
+# CONFIG_TCG_TPM is not set
 
 #
 # I2C support
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64        Thu Sep 
 8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64        Fri Sep 
 9 16:30:54 2005
@@ -15,10 +15,11 @@
 CONFIG_XEN_BLKDEV_BACKEND=y
 # CONFIG_XEN_BLKDEV_TAP_BE is not set
 CONFIG_XEN_NETDEV_BACKEND=y
+# CONFIG_XEN_TPMDEV_FRONTEND is not set
+# CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT_TX=y
-CONFIG_XEN_NETDEV_GRANT_RX=y
+CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -2201,7 +2202,7 @@
 CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
 CONFIG_SYSFS=y
-# CONFIG_DEVFS_FS is not set
+CONFIG_DEVFS_FS=y
 CONFIG_DEVPTS_FS_XATTR=y
 CONFIG_DEVPTS_FS_SECURITY=y
 CONFIG_TMPFS=y
@@ -2231,7 +2232,7 @@
 CONFIG_JFFS2_ZLIB=y
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
-CONFIG_CRAMFS=m
+CONFIG_CRAMFS=y
 CONFIG_VXFS_FS=m
 # CONFIG_HPFS_FS is not set
 CONFIG_QNX4FS_FS=m
diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig        Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig        Fri Sep  9 16:30:54 2005
@@ -379,18 +379,18 @@
          If you don't know what to do here, say N.
 
 config SMP_ALTERNATIVES
-        bool "SMP alternatives support (EXPERIMENTAL)"
-        depends on SMP && EXPERIMENTAL
-        help
-          Try to reduce the overhead of running an SMP kernel on a uniprocessor
-          host slightly by replacing certain key instruction sequences
-          according to whether we currently have more than one CPU available.
-          This should provide a noticeable boost to performance when
-          running SMP kernels on UP machines, and have negligible impact
-          when running on an true SMP host.
+       bool "SMP alternatives support (EXPERIMENTAL)"
+       depends on SMP && EXPERIMENTAL
+       help
+         Try to reduce the overhead of running an SMP kernel on a uniprocessor
+         host slightly by replacing certain key instruction sequences
+         according to whether we currently have more than one CPU available.
+         This should provide a noticeable boost to performance when
+         running SMP kernels on UP machines, and have negligible impact
+         when running on an true SMP host.
 
           If unsure, say N.
-
+         
 config NR_CPUS
        int "Maximum number of CPUs (2-255)"
        range 2 255
@@ -807,8 +807,8 @@
          direct access method and falls back to the BIOS if that doesn't
          work. If unsure, go with the default, which is "Any".
 
-config PCI_GOBIOS
-       bool "BIOS"
+#config PCI_GOBIOS
+#      bool "BIOS"
 
 config PCI_GOMMCONFIG
        bool "MMConfig"
@@ -821,10 +821,10 @@
 
 endchoice
 
-config PCI_BIOS
-       bool
-       depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
-       default y
+#config PCI_BIOS
+#      bool
+#      depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
+#      default y
 
 config PCI_DIRECT
        bool
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Fri Sep  9 
16:30:54 2005
@@ -5,6 +5,7 @@
 XENARCH        := $(subst ",,$(CONFIG_XENARCH))
 
 CFLAGS += -Iarch/$(XENARCH)/kernel
+AFLAGS += -Iarch/$(XENARCH)/kernel
 
 extra-y := head.o init_task.o
 
@@ -32,7 +33,7 @@
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o
 c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o
-c-obj-$(CONFIG_X86_REBOOTFIXUPS)+= reboot_fixups.o
+c-obj-$(CONFIG_X86_REBOOTFIXUPS)       += reboot_fixups.o
 c-obj-$(CONFIG_X86_NUMAQ)      += numaq.o
 c-obj-$(CONFIG_X86_SUMMIT_NUMA)        += summit.o
 c-obj-$(CONFIG_MODULES)                += module.o
@@ -69,7 +70,7 @@
 
 $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
 $(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
-                     $(obj)/vsyscall-%.o FORCE
+                     $(obj)/vsyscall-%.o $(obj)/vsyscall-note.o FORCE
        $(call if_changed,syscall)
 
 # We also create a special relocatable object that should mirror the symbol
@@ -81,20 +82,17 @@
 
 SYSCFLAGS_vsyscall-syms.o = -r
 $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
-                       $(obj)/vsyscall-sysenter.o FORCE
+                       $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
        $(call if_changed,syscall)
 
 c-link :=
-s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o 
vsyscall.lds.o syscall_table.o
+s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o 
vsyscall.lds.o vsyscall-note.o
 
 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst 
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
        @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@
 
 $(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
 
-EXTRA_AFLAGS   += -I$(obj)
-$(obj)/entry.o: $(src)/entry.S $(src)/syscall_table.S
-
 obj-y  += $(c-obj-y) $(s-obj-y)
 obj-m  += $(c-obj-m)
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c     Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c     Fri Sep  9 
16:30:54 2005
@@ -469,6 +469,18 @@
        unsigned int irq;
        unsigned int plat_gsi = gsi;
 
+#ifdef CONFIG_PCI
+       /*
+        * Make sure all (legacy) PCI IRQs are set as level-triggered.
+        */
+       if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+               extern void eisa_set_level_irq(unsigned int irq);
+
+               if (edge_level == ACPI_LEVEL_SENSITIVE)
+                               eisa_set_level_irq(gsi);
+       }
+#endif
+
 #ifdef CONFIG_X86_IO_APIC
        if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
                plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
@@ -610,7 +622,7 @@
        acpi_fadt.force_apic_physical_destination_mode = 
fadt->force_apic_physical_destination_mode;
 #endif
 
-#ifdef CONFIG_X86_PM_TIMER
+#if defined(CONFIG_X86_PM_TIMER) && !defined(CONFIG_XEN)
        /* detect the location of the ACPI PM Timer */
        if (fadt->revision >= FADT2_REVISION_ID) {
                /* FADT rev. 2 */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Fri Sep  9 
16:30:54 2005
@@ -147,7 +147,7 @@
 {
        struct cpuinfo_x86 *c = &boot_cpu_data;
 
-       if (!(xen_start_info.flags & SIF_PRIVILEGED))
+       if (!(xen_start_info->flags & SIF_PRIVILEGED))
                return -ENODEV;
 
        if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Fri Sep  9 16:30:54 2005
@@ -47,6 +47,7 @@
 #include <asm/segment.h>
 #include <asm/smp.h>
 #include <asm/page.h>
+#include <asm/desc.h>
 #include "irq_vectors.h"
 #include <asm-xen/xen-public/xen.h>
 
@@ -112,7 +113,7 @@
                                XEN_BLOCK_EVENTS(%esi)
 #else
 #define preempt_stop
-#define resume_kernel          restore_all
+#define resume_kernel          restore_nocheck
 #endif
 
 #define SAVE_ALL \
@@ -161,11 +162,9 @@
        addl $4, %esp;  \
 1:     iret;           \
 .section .fixup,"ax";   \
-2:     movl $(__USER_DS), %edx; \
-       movl %edx, %ds; \
-       movl %edx, %es; \
-       movl $11,%eax;  \
-       call do_exit;   \
+2:     pushl $0;       \
+       pushl $do_iret_error;   \
+       jmp error_code; \
 .previous;             \
 .section __ex_table,"a";\
        .align 4;       \
@@ -196,7 +195,7 @@
        movl EFLAGS(%esp), %eax         # mix EFLAGS and CS
        movb CS(%esp), %al
        testl $(VM_MASK | 2), %eax
-       jz resume_kernel                # returning to kernel or vm86-space
+       jz resume_kernel
 ENTRY(resume_userspace)
        XEN_BLOCK_EVENTS(%esi)          # make sure we don't miss an interrupt
                                        # setting need_resched or sigpending
@@ -211,7 +210,7 @@
 ENTRY(resume_kernel)
        XEN_BLOCK_EVENTS(%esi)
        cmpl $0,TI_preempt_count(%ebp)  # non-zero preempt_count ?
-       jnz restore_all
+       jnz restore_nocheck
 need_resched:
        movl TI_flags(%ebp), %ecx       # need_resched set ?
        testb $_TIF_NEED_RESCHED, %cl
@@ -252,7 +251,8 @@
        SAVE_ALL
        GET_THREAD_INFO(%ebp)
 
-       testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+       /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not 
testb */
+       testw 
$(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
        jnz syscall_trace_entry
        cmpl $(nr_syscalls), %eax
        jae syscall_badsys
@@ -276,7 +276,8 @@
        SAVE_ALL
        GET_THREAD_INFO(%ebp)
                                        # system call tracing in operation
-       testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+       /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not 
testb */
+       testw 
$(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
        jnz syscall_trace_entry
        cmpl $(nr_syscalls), %eax
        jae syscall_badsys
@@ -290,7 +291,20 @@
        movl TI_flags(%ebp), %ecx
        testw $_TIF_ALLWORK_MASK, %cx   # current->work
        jne syscall_exit_work
+
 restore_all:
+#if 0 /* XEN */
+       movl EFLAGS(%esp), %eax         # mix EFLAGS, SS and CS
+       # Warning: OLDSS(%esp) contains the wrong/random values if we
+       # are returning to the kernel.
+       # See comments in process.c:copy_thread() for details.
+       movb OLDSS(%esp), %ah
+       movb CS(%esp), %al
+       andl $(VM_MASK | (4 << 8) | 3), %eax
+       cmpl $((4 << 8) | 3), %eax
+       je ldt_ss                       # returning to user-space with LDT SS
+#endif /* XEN */
+restore_nocheck:
        testl $VM_MASK, EFLAGS(%esp)
        jnz resume_vm86
        movb EVENT_MASK(%esp), %al
@@ -300,7 +314,19 @@
        andb $1,%al                     # %al == mask & ~saved_mask
        jnz restore_all_enable_events   #     != 0 => reenable event delivery
        XEN_PUT_VCPU_INFO(%esi)
-       RESTORE_ALL
+       RESTORE_REGS
+       addl $4, %esp
+1:     iret
+.section .fixup,"ax"
+iret_exc:
+       pushl $0                        # no error code
+       pushl $do_iret_error
+       jmp error_code
+.previous
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
 
 resume_vm86:
        XEN_UNBLOCK_EVENTS(%esi)
@@ -309,6 +335,33 @@
        movl $__HYPERVISOR_switch_vm86,%eax
        int $0x82
        ud2
+
+#if 0 /* XEN */
+ldt_ss:
+       larl OLDSS(%esp), %eax
+       jnz restore_nocheck
+       testl $0x00400000, %eax         # returning to 32bit stack?
+       jnz restore_nocheck             # allright, normal return
+       /* If returning to userspace with 16bit stack,
+        * try to fix the higher word of ESP, as the CPU
+        * won't restore it.
+        * This is an "official" bug of all the x86-compatible
+        * CPUs, which we can try to work around to make
+        * dosemu and wine happy. */
+       subl $8, %esp           # reserve space for switch16 pointer
+       cli
+       movl %esp, %eax
+       /* Set up the 16bit stack frame with switch32 pointer on top,
+        * and a switch16 pointer on top of the current frame. */
+       call setup_x86_bogus_stack
+       RESTORE_REGS
+       lss 20+4(%esp), %esp    # switch to 16bit stack
+1:     iret
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
+#endif /* XEN */
 
        # perform work that needs to be done immediately before resumption
        ALIGN
@@ -385,6 +438,27 @@
        jmp resume_userspace
 
 #if 0 /* XEN */
+#define FIXUP_ESPFIX_STACK \
+       movl %esp, %eax; \
+       /* switch to 32bit stack using the pointer on top of 16bit stack */ \
+       lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
+       /* copy data from 16bit stack to 32bit stack */ \
+       call fixup_x86_bogus_stack; \
+       /* put ESP to the proper location */ \
+       movl %eax, %esp;
+#define UNWIND_ESPFIX_STACK \
+       pushl %eax; \
+       movl %ss, %eax; \
+       /* see if on 16bit stack */ \
+       cmpw $__ESPFIX_SS, %ax; \
+       jne 28f; \
+       movl $__KERNEL_DS, %edx; \
+       movl %edx, %ds; \
+       movl %edx, %es; \
+       /* switch to 32bit stack */ \
+       FIXUP_ESPFIX_STACK \
+28:    popl %eax;
+
 /*
  * Build the entry stubs and pointer table with
  * some assembler magic.
@@ -440,7 +514,9 @@
        pushl %ecx
        pushl %ebx
        cld
-       movl %es, %ecx
+       pushl %es
+#      UNWIND_ESPFIX_STACK
+       popl %ecx
        movl ES(%esp), %edi             # get the function address
        movl ORIG_EAX(%esp), %edx       # get the error code
        movl %eax, ORIG_EAX(%esp)
@@ -625,6 +701,11 @@
  * fault happened on the sysenter path.
  */
 ENTRY(nmi)
+       pushl %eax
+       movl %ss, %eax
+       cmpw $__ESPFIX_SS, %ax
+       popl %eax
+       je nmi_16bit_stack
        cmpl $sysenter_entry,(%esp)
        je nmi_stack_fixup
        pushl %eax
@@ -644,7 +725,7 @@
        xorl %edx,%edx          # zero error code
        movl %esp,%eax          # pt_regs pointer
        call do_nmi
-       RESTORE_ALL
+       jmp restore_all
 
 nmi_stack_fixup:
        FIX_STACK(12,nmi_stack_correct, 1)
@@ -659,6 +740,29 @@
 nmi_debug_stack_fixup:
        FIX_STACK(24,nmi_stack_correct, 1)
        jmp nmi_stack_correct
+
+nmi_16bit_stack:
+       /* create the pointer to lss back */
+       pushl %ss
+       pushl %esp
+       movzwl %sp, %esp
+       addw $4, (%esp)
+       /* copy the iret frame of 12 bytes */
+       .rept 3
+       pushl 16(%esp)
+       .endr
+       pushl %eax
+       SAVE_ALL
+       FIXUP_ESPFIX_STACK              # %eax == %esp
+       xorl %edx,%edx                  # zero error code
+       call do_nmi
+       RESTORE_REGS
+       lss 12+4(%esp), %esp            # back to 16bit stack
+1:     iret
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
 #endif /* XEN */
 
 ENTRY(int3)
@@ -725,7 +829,9 @@
        pushl %ecx
        pushl %ebx
        cld
-       movl %es,%edi
+       pushl %es
+#      UNWIND_ESPFIX_STACK
+       popl %edi
        movl ES(%esp), %ecx             /* get the faulting address */
        movl ORIG_EAX(%esp), %edx       /* get the error code */
        movl %eax, ORIG_EAX(%esp)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S  Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S  Fri Sep  9 16:30:54 2005
@@ -38,17 +38,13 @@
 #define X86_VENDOR_ID  new_cpu_data+CPUINFO_x86_vendor_id
 
 ENTRY(startup_32)
-       cld
-
-       /* Copy the necessary stuff from xen_start_info structure. */
-       mov  $xen_start_info_union,%edi
-       mov  $512,%ecx
-       rep movsl
+       movl %esi,xen_start_info
 
 #ifdef CONFIG_SMP
 ENTRY(startup_32_smp)
+#endif /* CONFIG_SMP */
+
        cld
-#endif /* CONFIG_SMP */
 
        /* Set up the stack pointer */
        lss stack_start,%esp
@@ -179,7 +175,7 @@
        .quad 0x0000000000000000        /* 0xc0 APM CS 16 code (16 bit) */
        .quad 0x0000000000000000        /* 0xc8 APM DS    data */
 
-       .quad 0x0000000000000000        /* 0xd0 - unused */
+       .quad 0x0000000000000000        /* 0xd0 - ESPFIX 16-bit SS */
        .quad 0x0000000000000000        /* 0xd8 - unused */
        .quad 0x0000000000000000        /* 0xe0 - unused */
        .quad 0x0000000000000000        /* 0xe8 - unused */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c   Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c   Fri Sep  9 16:30:54 2005
@@ -242,12 +242,12 @@
        } else if (i == NR_IRQS) {
                seq_printf(p, "NMI: ");
                for_each_cpu(j)
-                       seq_printf(p, "%10u ", nmi_count(j));
+                       seq_printf(p, "%10u ", nmi_count(j));
                seq_putc(p, '\n');
 #ifdef CONFIG_X86_LOCAL_APIC
                seq_printf(p, "LOC: ");
                for_each_cpu(j)
-                       seq_printf(p, "%10u ", per_cpu(irq_stat, 
j).apic_timer_irqs);
+                       seq_printf(p, "%10u ", 
per_cpu(irq_stat,j).apic_timer_irqs);
                seq_putc(p, '\n');
 #endif
                seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
@@ -263,6 +263,7 @@
 void fixup_irqs(cpumask_t map)
 {
        unsigned int irq;
+       static int warned;
 
        for (irq = 0; irq < NR_IRQS; irq++) {
                cpumask_t mask;
@@ -276,7 +277,7 @@
                }
                if (irq_desc[irq].handler->set_affinity)
                        irq_desc[irq].handler->set_affinity(irq, mask);
-               else if (irq_desc[irq].action)
+               else if (irq_desc[irq].action && !(warned++))
                        printk("Cannot set affinity for irq %i\n", irq);
        }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Fri Sep  9 
16:30:54 2005
@@ -115,9 +115,9 @@
        if (swiotlb)
                return swiotlb_dma_supported(dev, mask);
        /*
-         * By default we'll BUG when an infeasible DMA is requested, and
-         * request swiotlb=force (see IOMMU_BUG_ON).
-         */
+        * By default we'll BUG when an infeasible DMA is requested, and
+        * request swiotlb=force (see IOMMU_BUG_ON).
+        */
        return 1;
 }
 EXPORT_SYMBOL(dma_supported);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Fri Sep  9 
16:30:54 2005
@@ -135,6 +135,10 @@
  * low exit latency (ie sit in a loop waiting for
  * somebody to say that they'd like to reschedule)
  */
+#ifdef CONFIG_SMP
+extern void smp_suspend(void);
+extern void smp_resume(void);
+#endif
 void cpu_idle (void)
 {
        int cpu = _smp_processor_id();
@@ -149,6 +153,9 @@
 
                        if (cpu_is_offline(cpu)) {
                                local_irq_disable();
+#ifdef CONFIG_SMP
+                               smp_suspend();
+#endif
 #if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
                                /* Ack it.  From this point on until
                                   we get woken up, we're not allowed
@@ -159,6 +166,9 @@
                                HYPERVISOR_vcpu_down(cpu);
 #endif
                                play_dead();
+#ifdef CONFIG_SMP
+                               smp_resume();
+#endif
                                local_irq_enable();
                        }
 
@@ -456,7 +466,6 @@
        boot_option_idle_override = 1;
        return 1;
 }
-
 
 /*
  *     switch_to(x,yn) should switch tasks from x to y.
@@ -789,10 +798,3 @@
                sp -= get_random_int() % 8192;
        return sp & ~0xf;
 }
-
-
-#ifndef CONFIG_X86_SMP
-void _restore_vcpu(void)
-{
-}
-#endif
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Fri Sep  9 16:30:54 2005
@@ -55,6 +55,7 @@
 #include <asm/io.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/xen-public/physdev.h>
+#include <asm-xen/xen-public/memory.h>
 #include "setup_arch_pre.h"
 #include <bios_ebda.h>
 
@@ -288,7 +289,7 @@
        int           i;
 
        /* Nothing to do if not running in dom0. */
-       if (!(xen_start_info.flags & SIF_INITDOMAIN))
+       if (!(xen_start_info->flags & SIF_INITDOMAIN))
                return;
 
        /* video rom */
@@ -358,11 +359,12 @@
 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
 EXPORT_SYMBOL(HYPERVISOR_shared_info);
 
-unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
+unsigned long *phys_to_machine_mapping;
+unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16];
 EXPORT_SYMBOL(phys_to_machine_mapping);
 
 /* Raw start-of-day parameters from the hypervisor. */
-union xen_start_info_union xen_start_info_union;
+start_info_t *xen_start_info;
 
 static void __init limit_regions(unsigned long long size)
 {
@@ -702,7 +704,7 @@
 
        if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
                max_cmdline = COMMAND_LINE_SIZE;
-       memcpy(saved_command_line, xen_start_info.cmd_line, max_cmdline);
+       memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
        /* Save unparsed command line copy for /proc/cmdline */
        saved_command_line[max_cmdline-1] = '\0';
 
@@ -933,8 +935,8 @@
 /* We don't use the fake e820 because we need to respond to user override. */
 void __init find_max_pfn(void)
 {
-       if ( xen_override_max_pfn < xen_start_info.nr_pages )
-               xen_override_max_pfn = xen_start_info.nr_pages;
+       if ( xen_override_max_pfn < xen_start_info->nr_pages )
+               xen_override_max_pfn = xen_start_info->nr_pages;
        max_pfn = xen_override_max_pfn;
 }
 #endif /* XEN */
@@ -1077,12 +1079,12 @@
 void __init setup_bootmem_allocator(void);
 static unsigned long __init setup_memory(void)
 {
-
        /*
         * partially used pages are not usable - thus
         * we are rounding upwards:
         */
-       min_low_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + 
xen_start_info.nr_pt_frames;
+       min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) +
+               xen_start_info->nr_pt_frames;
 
        find_max_pfn();
 
@@ -1188,7 +1190,7 @@
 #endif /* !CONFIG_XEN */
 
 #ifdef CONFIG_BLK_DEV_INITRD
-       if (xen_start_info.mod_start) {
+       if (xen_start_info->mod_start) {
                if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
                        /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
                        initrd_start = INITRD_START + PAGE_OFFSET;
@@ -1205,7 +1207,7 @@
        }
 #endif
 
-       phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
+       phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
 }
 
 /*
@@ -1234,10 +1236,64 @@
 legacy_init_iomem_resources(struct resource *code_resource, struct resource 
*data_resource)
 {
        int i;
+#ifdef CONFIG_XEN
+       dom0_op_t op;
+       struct dom0_memory_map_entry *map;
+       unsigned long gapstart, gapsize;
+       unsigned long long last;
+#endif
 
 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
        probe_roms();
 #endif
+
+#ifdef CONFIG_XEN
+       map = alloc_bootmem_low_pages(PAGE_SIZE);
+       op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
+       op.u.physical_memory_map.memory_map = map;
+       op.u.physical_memory_map.max_map_entries =
+               PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
+       BUG_ON(HYPERVISOR_dom0_op(&op));
+
+       last = 0x100000000ULL;
+       gapstart = 0x10000000;
+       gapsize = 0x400000;
+
+       for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
+               struct resource *res;
+
+               if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
+                       gapsize = last - map[i].end;
+                       gapstart = map[i].end;
+               }
+               if (map[i].start < last)
+                       last = map[i].start;
+
+               if (map[i].end > 0x100000000ULL)
+                       continue;
+               res = alloc_bootmem_low(sizeof(struct resource));
+               res->name = map[i].is_ram ? "System RAM" : "reserved";
+               res->start = map[i].start;
+               res->end = map[i].end - 1;
+               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               request_resource(&iomem_resource, res);
+       }
+
+       free_bootmem(__pa(map), PAGE_SIZE);
+
+       /*
+        * Start allocating dynamic PCI memory a bit into the gap,
+        * aligned up to the nearest megabyte.
+        *
+        * Question: should we try to pad it up a bit (do something
+        * like " + (gapsize >> 3)" in there too?). We now have the
+        * technology.
+        */
+       pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
+
+       printk("Allocating PCI resources starting at %08lx (gap: 
%08lx:%08lx)\n",
+               pci_mem_start, gapstart, gapsize);
+#else
        for (i = 0; i < e820.nr_map; i++) {
                struct resource *res;
                if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
@@ -1263,6 +1319,7 @@
                        request_resource(res, data_resource);
                }
        }
+#endif
 }
 
 /*
@@ -1270,23 +1327,29 @@
  */
 static void __init register_memory(void)
 {
+#ifndef CONFIG_XEN
        unsigned long gapstart, gapsize;
        unsigned long long last;
+#endif
        int           i;
+
+       /* Nothing to do if not running in dom0. */
+       if (!(xen_start_info->flags & SIF_INITDOMAIN))
+               return;
 
        if (efi_enabled)
                efi_initialize_iomem_resources(&code_resource, &data_resource);
        else
                legacy_init_iomem_resources(&code_resource, &data_resource);
 
-       if (xen_start_info.flags & SIF_INITDOMAIN)
-               /* EFI systems may still have VGA */
-               request_resource(&iomem_resource, &video_ram_resource);
+       /* EFI systems may still have VGA */
+       request_resource(&iomem_resource, &video_ram_resource);
 
        /* request I/O space for devices used on all i[345]86 PCs */
        for (i = 0; i < STANDARD_IO_RESOURCES; i++)
                request_resource(&ioport_resource, &standard_io_resources[i]);
 
+#ifndef CONFIG_XEN
        /*
         * Search for the bigest gap in the low 32 bits of the e820
         * memory space.
@@ -1327,6 +1390,7 @@
 
        printk("Allocating PCI resources starting at %08lx (gap: 
%08lx:%08lx)\n",
                pci_mem_start, gapstart, gapsize);
+#endif
 }
 
 /* Use inline assembly to define this because the nops are defined 
@@ -1456,7 +1520,7 @@
  */
 void __init setup_arch(char **cmdline_p)
 {
-       int i, j;
+       int i, j, k, fpp;
        physdev_op_t op;
        unsigned long max_low_pfn;
 
@@ -1535,8 +1599,8 @@
        init_mm.start_code = (unsigned long) _text;
        init_mm.end_code = (unsigned long) _etext;
        init_mm.end_data = (unsigned long) _edata;
-       init_mm.brk = (PFN_UP(__pa(xen_start_info.pt_base)) +
-                      xen_start_info.nr_pt_frames) << PAGE_SHIFT;
+       init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
+                      xen_start_info->nr_pt_frames) << PAGE_SHIFT;
 
        /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
        /*code_resource.start = virt_to_phys(_text);*/
@@ -1573,42 +1637,64 @@
 #endif
 
        /* Make sure we have a correctly sized P->M table. */
-       if (max_pfn != xen_start_info.nr_pages) {
+       if (max_pfn != xen_start_info->nr_pages) {
                phys_to_machine_mapping = alloc_bootmem_low_pages(
-                       max_pfn * sizeof(unsigned int));
-
-               if (max_pfn > xen_start_info.nr_pages) {
+                       max_pfn * sizeof(unsigned long));
+
+               if (max_pfn > xen_start_info->nr_pages) {
                        /* set to INVALID_P2M_ENTRY */
                        memset(phys_to_machine_mapping, ~0,
-                               max_pfn * sizeof(unsigned int));
+                               max_pfn * sizeof(unsigned long));
                        memcpy(phys_to_machine_mapping,
-                               (unsigned int *)xen_start_info.mfn_list,
-                               xen_start_info.nr_pages * sizeof(unsigned int));
+                               (unsigned long *)xen_start_info->mfn_list,
+                               xen_start_info->nr_pages * sizeof(unsigned 
long));
                } else {
+                       struct xen_memory_reservation reservation = {
+                               .extent_start = (unsigned long 
*)xen_start_info->mfn_list + max_pfn,
+                               .nr_extents   = xen_start_info->nr_pages - 
max_pfn,
+                               .extent_order = 0,
+                               .domid        = DOMID_SELF
+                       };
+
                        memcpy(phys_to_machine_mapping,
-                               (unsigned int *)xen_start_info.mfn_list,
-                               max_pfn * sizeof(unsigned int));
-                       /* N.B. below relies on sizeof(int) == sizeof(long). */
-                       if (HYPERVISOR_dom_mem_op(
-                               MEMOP_decrease_reservation,
-                               (unsigned long *)xen_start_info.mfn_list + 
max_pfn,
-                               xen_start_info.nr_pages - max_pfn, 0) !=
-                           (xen_start_info.nr_pages - max_pfn)) BUG();
+                               (unsigned long *)xen_start_info->mfn_list,
+                               max_pfn * sizeof(unsigned long));
+                       BUG_ON(HYPERVISOR_memory_op(
+                               XENMEM_decrease_reservation,
+                               &reservation) !=
+                           (xen_start_info->nr_pages - max_pfn));
                }
                free_bootmem(
-                       __pa(xen_start_info.mfn_list), 
-                       PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
-                       sizeof(unsigned int))));
-       }
-
-       pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
-       for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned int)), j++ )
-       {       
-            pfn_to_mfn_frame_list[j] = 
-                 virt_to_mfn(&phys_to_machine_mapping[i]);
-       }
-       HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
-            virt_to_mfn(pfn_to_mfn_frame_list);
+                       __pa(xen_start_info->mfn_list), 
+                       PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
+                       sizeof(unsigned long))));
+       }
+
+
+       /* 
+        * Initialise the list of the frames that specify the list of 
+        * frames that make up the p2m table. Used by save/restore
+        */
+       pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE);
+       HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+         virt_to_mfn(pfn_to_mfn_frame_list_list);
+              
+       fpp = PAGE_SIZE/sizeof(unsigned long);
+       for ( i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++ )
+       {
+           if ( (j % fpp) == 0 )
+           {
+               k++;
+               BUG_ON(k>=16);
+               pfn_to_mfn_frame_list[k] = alloc_bootmem_low_pages(PAGE_SIZE);
+               pfn_to_mfn_frame_list_list[k] = 
+                   virt_to_mfn(pfn_to_mfn_frame_list[k]);
+               j=0;
+           }
+           pfn_to_mfn_frame_list[k][j] = 
+               virt_to_mfn(&phys_to_machine_mapping[i]);
+       }
+       HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
 
        /*
         * NOTE: at this point the bootmem allocator is fully available.
@@ -1626,8 +1712,8 @@
        }
 #endif
 
-
-       dmi_scan_machine();
+       if (xen_start_info->flags & SIF_INITDOMAIN)
+               dmi_scan_machine();
 
 #ifdef CONFIG_X86_GENERICARCH
        generic_apic_probe(*cmdline_p);
@@ -1640,7 +1726,7 @@
        HYPERVISOR_physdev_op(&op);
 
 #ifdef CONFIG_ACPI_BOOT
-       if (!(xen_start_info.flags & SIF_INITDOMAIN)) {
+       if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
                printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
                acpi_disabled = 1;
                acpi_ht = 0;
@@ -1666,8 +1752,8 @@
 
        register_memory();
 
-       if (xen_start_info.flags & SIF_INITDOMAIN) {
-               if (!(xen_start_info.flags & SIF_PRIVILEGED))
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               if (!(xen_start_info->flags & SIF_PRIVILEGED))
                        panic("Xen granted us console access "
                              "but not privileged status");
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Fri Sep  9 
16:30:54 2005
@@ -856,9 +856,6 @@
        cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL|__GFP_ZERO);
        BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
        cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
-       printk("GDT: copying %d bytes from %lx to %lx\n",
-               cpu_gdt_descr[0].size, cpu_gdt_descr[0].address,
-               cpu_gdt_descr[cpu].address); 
        memcpy((void *)cpu_gdt_descr[cpu].address,
               (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
 
@@ -1274,6 +1271,7 @@
                        printk(KERN_WARNING "WARNING: %d siblings found for 
CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
                        smp_num_siblings = siblings;
                }
+
                if (c->x86_num_cores > 1) {
                        for (i = 0; i < NR_CPUS; i++) {
                                if (!cpu_isset(i, cpu_callout_map))
@@ -1601,32 +1599,71 @@
 
 void smp_suspend(void)
 {
-       /* XXX todo: take down time and ipi's on all cpus */
        local_teardown_timer_irq();
        smp_intr_exit();
 }
 
 void smp_resume(void)
 {
-       /* XXX todo: restore time and ipi's on all cpus */
        smp_intr_init();
        local_setup_timer_irq();
 }
 
-DECLARE_PER_CPU(int, timer_irq);
-
-void _restore_vcpu(void)
-{
-       int cpu = smp_processor_id();
-       extern atomic_t vcpus_rebooting;
-
-       /* We are the first thing the vcpu runs when it comes back,
-          and we are supposed to restore the IPIs and timer
-          interrupts etc.  When we return, the vcpu's idle loop will
-          start up again. */
-       _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu));
-       _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu));
-       _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) );
-       _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) 
);
+static atomic_t vcpus_rebooting;
+
+static void restore_vcpu_ready(void)
+{
+
        atomic_dec(&vcpus_rebooting);
 }
+
+void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+       int r;
+       int gdt_pages;
+       r = HYPERVISOR_vcpu_pickle(vcpu, ctxt);
+       if (r != 0)
+               panic("pickling vcpu %d -> %d!\n", vcpu, r);
+
+       /* Translate from machine to physical addresses where necessary,
+          so that they can be translated to our new machine address space
+          after resume.  libxc is responsible for doing this to vcpu0,
+          but we do it to the others. */
+       gdt_pages = (ctxt->gdt_ents + 511) / 512;
+       ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]);
+       for (r = 0; r < gdt_pages; r++)
+               ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]);
+}
+
+int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+       int r;
+       int gdt_pages = (ctxt->gdt_ents + 511) / 512;
+
+       /* This is kind of a hack, and implicitly relies on the fact that
+          the vcpu stops in a place where all of the call clobbered
+          registers are already dead. */
+       ctxt->user_regs.esp -= 4;
+       ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip;
+       ctxt->user_regs.eip = (unsigned long)restore_vcpu_ready;
+
+       /* De-canonicalise.  libxc handles this for vcpu 0, but we need
+          to do it for the other vcpus. */
+       ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]);
+       for (r = 0; r < gdt_pages; r++)
+               ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]);
+
+       atomic_set(&vcpus_rebooting, 1);
+       r = HYPERVISOR_boot_vcpu(vcpu, ctxt);
+       if (r != 0) {
+               printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r);
+               return -1;
+       }
+
+       /* Make sure we wait for the new vcpu to come up before trying to do
+          anything with it or starting the next one. */
+       while (atomic_read(&vcpus_rebooting))
+               barrier();
+
+       return 0;
+}
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c       Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c       Fri Sep  9 
16:30:54 2005
@@ -51,7 +51,7 @@
  * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
  * API.
  */
-static dma_addr_t iotlb_bus_start, iotlb_bus_mask;
+static dma_addr_t iotlb_bus_start, iotlb_bus_end, iotlb_bus_mask;
 
 /* Does the given dma address reside within the swiotlb aperture? */
 #define in_swiotlb_aperture(a) (!(((a) ^ iotlb_bus_start) & iotlb_bus_mask))
@@ -157,6 +157,7 @@
        io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
 
        iotlb_bus_start = virt_to_bus(iotlb_virt_start);
+       iotlb_bus_end   = iotlb_bus_start + bytes;
        iotlb_bus_mask  = ~(dma_addr_t)(bytes - 1);
 
        printk(KERN_INFO "Software IO TLB enabled: \n"
@@ -165,7 +166,7 @@
               " Kernel range: 0x%016lx - 0x%016lx\n",
               bytes >> 20,
               (unsigned long)iotlb_bus_start,
-              (unsigned long)iotlb_bus_start + bytes,
+              (unsigned long)iotlb_bus_end,
               (unsigned long)iotlb_virt_start,
               (unsigned long)iotlb_virt_start + bytes);
 }
@@ -181,7 +182,7 @@
          * Otherwise, enable for domain 0 if the machine has 'lots of memory',
          * which we take to mean more than 2GB.
          */
-       if (xen_start_info.flags & SIF_INITDOMAIN) {
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
                dom0_op_t op;
                op.cmd = DOM0_PHYSINFO;
                if ((HYPERVISOR_dom0_op(&op) == 0) &&
@@ -191,6 +192,8 @@
 
        if (swiotlb)
                swiotlb_init_with_default_size(64 * (1<<20));
+       else
+               printk(KERN_INFO "Software IO TLB disabled\n");
 }
 
 static void
@@ -424,13 +427,6 @@
        }
 
        dev_addr = virt_to_bus(map);
-
-       /*
-        * Ensure that the address returned is DMA'ble
-        */
-       if (address_needs_mapping(hwdev, dev_addr))
-               panic("map_single: bounce buffer is not DMA'ble");
-
        return dev_addr;
 }
 
@@ -632,7 +628,7 @@
 int
 swiotlb_dma_supported (struct device *hwdev, u64 mask)
 {
-       return (mask >= 0xffffffffUL);
+       return (mask >= (iotlb_bus_end - 1));
 }
 
 EXPORT_SYMBOL(swiotlb_init);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Fri Sep  9 16:30:54 2005
@@ -445,7 +445,7 @@
        sec = tv->tv_sec;
        __normalize_time(&sec, &nsec);
 
-       if ((xen_start_info.flags & SIF_INITDOMAIN) &&
+       if ((xen_start_info->flags & SIF_INITDOMAIN) &&
            !independent_wallclock) {
                op.cmd = DOM0_SETTIME;
                op.u.settime.secs        = sec;
@@ -476,7 +476,7 @@
 
        WARN_ON(irqs_disabled());
 
-       if (!(xen_start_info.flags & SIF_INITDOMAIN))
+       if (!(xen_start_info->flags & SIF_INITDOMAIN))
                return 0;
 
        /* gets recalled with irq locally disabled */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Fri Sep  9 16:30:54 2005
@@ -449,10 +449,10 @@
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
-DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
 #ifdef CONFIG_X86_MCE
 DO_ERROR(18, SIGBUS, "machine check", machine_check)
 #endif
+DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
 
 fastcall void do_general_protection(struct pt_regs * regs, long error_code)
 {
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c     Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c     Fri Sep  9 16:30:54 2005
@@ -588,7 +588,15 @@
                pmd_k = pmd_offset(pud_k, address);
                if (!pmd_present(*pmd_k))
                        goto no_context;
+#ifndef CONFIG_XEN
                set_pmd(pmd, *pmd_k);
+#else
+               /*
+                * When running on Xen we must launder *pmd_k through
+                * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
+                */
+               set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
+#endif
 
                pte_k = pte_offset_kernel(pmd_k, address);
                if (!pte_present(*pte_k))
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c        Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c        Fri Sep  9 
16:30:54 2005
@@ -35,6 +35,7 @@
 #include <asm/pgtable.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/balloon.h>
+#include <asm-xen/xen-public/memory.h>
 #include <linux/module.h>
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 #include <linux/percpu.h>
@@ -105,7 +106,7 @@
 void xen_machphys_update(unsigned long mfn, unsigned long pfn)
 {
        mmu_update_t u;
-       u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+       u.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
        u.val = pfn;
        BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
 }
@@ -320,6 +321,12 @@
        pmd_t         *pmd;
        pte_t         *pte;
        unsigned long  mfn, i, flags;
+       struct xen_memory_reservation reservation = {
+               .extent_start = &mfn,
+               .nr_extents   = 1,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
 
        scrub_pages(vstart, 1 << order);
 
@@ -336,13 +343,15 @@
                        vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
                phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
                        INVALID_P2M_ENTRY;
-               BUG_ON(HYPERVISOR_dom_mem_op(
-                       MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+               BUG_ON(HYPERVISOR_memory_op(
+                       XENMEM_decrease_reservation, &reservation) != 1);
        }
 
        /* 2. Get a new contiguous memory extent. */
-       BUG_ON(HYPERVISOR_dom_mem_op(
-               MEMOP_increase_reservation, &mfn, 1, order | (32<<8)) != 1);
+       reservation.extent_order = order;
+       reservation.address_bits = 31; /* aacraid limitation */
+       BUG_ON(HYPERVISOR_memory_op(
+               XENMEM_increase_reservation, &reservation) != 1);
 
        /* 3. Map the new extent in place of old pages. */
        for (i = 0; i < (1<<order); i++) {
@@ -367,6 +376,12 @@
        pmd_t         *pmd;
        pte_t         *pte;
        unsigned long  mfn, i, flags;
+       struct xen_memory_reservation reservation = {
+               .extent_start = &mfn,
+               .nr_extents   = 1,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
 
        scrub_pages(vstart, 1 << order);
 
@@ -385,14 +400,14 @@
                        vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
                phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
                        INVALID_P2M_ENTRY;
-               BUG_ON(HYPERVISOR_dom_mem_op(
-                       MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+               BUG_ON(HYPERVISOR_memory_op(
+                       XENMEM_decrease_reservation, &reservation) != 1);
        }
 
        /* 2. Map new pages in place of old pages. */
        for (i = 0; i < (1<<order); i++) {
-               BUG_ON(HYPERVISOR_dom_mem_op(
-                       MEMOP_increase_reservation, &mfn, 1, 0) != 1);
+               BUG_ON(HYPERVISOR_memory_op(
+                       XENMEM_increase_reservation, &reservation) != 1);
                BUG_ON(HYPERVISOR_update_va_mapping(
                        vstart + (i*PAGE_SIZE),
                        pfn_pte_ma(mfn, PAGE_KERNEL), 0));
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Fri Sep  9 16:30:54 2005
@@ -159,7 +159,7 @@
        pte_t *pte;
        int pgd_idx, pmd_idx, pte_ofs;
 
-       unsigned long max_ram_pfn = xen_start_info.nr_pages;
+       unsigned long max_ram_pfn = xen_start_info->nr_pages;
        if (max_ram_pfn > max_low_pfn)
                max_ram_pfn = max_low_pfn;
 
@@ -219,6 +219,8 @@
        }
 }
 
+#ifndef CONFIG_XEN
+
 static inline int page_kills_ppro(unsigned long pagenr)
 {
        if (pagenr >= 0x70000 && pagenr <= 0x7003F)
@@ -266,6 +268,13 @@
        return 0;
 }
 
+#else /* CONFIG_XEN */
+
+#define page_kills_ppro(p)     0
+#define page_is_ram(p)         1
+
+#endif
+
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
@@ -308,7 +317,7 @@
                ClearPageReserved(page);
                set_bit(PG_highmem, &page->flags);
                set_page_count(page, 1);
-               if (pfn < xen_start_info.nr_pages)
+               if (pfn < xen_start_info->nr_pages)
                        __free_page(page);
                totalhigh_pages++;
        } else
@@ -347,7 +356,7 @@
 static void __init pagetable_init (void)
 {
        unsigned long vaddr;
-       pgd_t *pgd_base = (pgd_t *)xen_start_info.pt_base;
+       pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
        int i;
 
        swapper_pg_dir = pgd_base;
@@ -526,14 +535,14 @@
        kmap_init();
 
        /* Switch to the real shared_info page, and clear the dummy page. */
-       set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
+       set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
        HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
        memset(empty_zero_page, 0, sizeof(empty_zero_page));
 
 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
        /* Setup mapping of lower 1st MB */
        for (i = 0; i < NR_FIX_ISAMAPS; i++)
-               if (xen_start_info.flags & SIF_PRIVILEGED)
+               if (xen_start_info->flags & SIF_PRIVILEGED)
                        set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
                else
                        __set_fixmap(FIX_ISAMAP_BEGIN - i,
@@ -630,7 +639,7 @@
        /* this will put all low memory onto the freelists */
        totalram_pages += free_all_bootmem();
        /* XEN: init and count low-mem pages outside initial allocation. */
-       for (pfn = xen_start_info.nr_pages; pfn < max_low_pfn; pfn++) {
+       for (pfn = xen_start_info->nr_pages; pfn < max_low_pfn; pfn++) {
                ClearPageReserved(&mem_map[pfn]);
                set_page_count(&mem_map[pfn], 1);
                totalram_pages++;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Fri Sep  9 16:30:54 2005
@@ -19,295 +19,17 @@
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 
-#ifndef CONFIG_XEN_PHYSDEV_ACCESS
-
-void * __ioremap(unsigned long phys_addr, unsigned long size,
-                unsigned long flags)
-{
-       return NULL;
-}
-
-void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
-       return NULL;
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-}
-
-#ifdef __i386__
-
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
-       return NULL;
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
-}
-
-#endif /* __i386__ */
-
-#else
-
-/*
- * Does @address reside within a non-highmem page that is local to this virtual
- * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
- * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand
- * why this works.
- */
-static inline int is_local_lowmem(unsigned long address)
-{
-       extern unsigned long max_low_pfn;
-       unsigned long mfn = address >> PAGE_SHIFT;
-       unsigned long pfn = mfn_to_pfn(mfn);
-       return ((pfn < max_low_pfn) && (phys_to_machine_mapping[pfn] == mfn));
-}
-
-/*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned 
long flags)
-{
-       void __iomem * addr;
-       struct vm_struct * area;
-       unsigned long offset, last_addr;
-       domid_t domid = DOMID_IO;
-
-       /* Don't allow wraparound or zero size */
-       last_addr = phys_addr + size - 1;
-       if (!size || last_addr < phys_addr)
-               return NULL;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       /*
-        * Don't remap the low PCI/ISA area, it's always mapped..
-        */
-       if (phys_addr >= 0x0 && last_addr < 0x100000)
-               return isa_bus_to_virt(phys_addr);
-#endif
-
-       /*
-        * Don't allow anybody to remap normal RAM that we're using..
-        */
-       if (is_local_lowmem(phys_addr)) {
-               char *t_addr, *t_end;
-               struct page *page;
-
-               t_addr = bus_to_virt(phys_addr);
-               t_end = t_addr + (size - 1);
-          
-               for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); 
page++)
-                       if(!PageReserved(page))
-                               return NULL;
-
-               domid = DOMID_SELF;
-       }
-
-       /*
-        * Mappings have to be page-aligned
-        */
-       offset = phys_addr & ~PAGE_MASK;
-       phys_addr &= PAGE_MASK;
-       size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
-       /*
-        * Ok, go for it..
-        */
-       area = get_vm_area(size, VM_IOREMAP | (flags << 20));
-       if (!area)
-               return NULL;
-       area->phys_addr = phys_addr;
-       addr = (void __iomem *) area->addr;
-       flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
-#ifdef __x86_64__
-       flags |= _PAGE_USER;
-#endif
-       if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr,
-                                   size, __pgprot(flags), domid)) {
-               vunmap((void __force *) addr);
-               return NULL;
-       }
-       return (void __iomem *) (offset + (char __iomem *)addr);
-}
-
-
-/**
- * ioremap_nocache     -   map bus memory into CPU space
- * @offset:    bus address of the memory
- * @size:      size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address. 
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many 
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- * 
- * Must be freed with iounmap.
- */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
-       unsigned long last_addr;
-       void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
-       if (!p) 
-               return p; 
-
-       /* Guaranteed to be > phys_addr, as per __ioremap() */
-       last_addr = phys_addr + size - 1;
-
-       if (is_local_lowmem(last_addr)) { 
-               struct page *ppage = virt_to_page(bus_to_virt(phys_addr));
-               unsigned long npages;
-
-               phys_addr &= PAGE_MASK;
-
-               /* This might overflow and become zero.. */
-               last_addr = PAGE_ALIGN(last_addr);
-
-               /* .. but that's ok, because modulo-2**n arithmetic will make
-               * the page-aligned "last - first" come out right.
-               */
-               npages = (last_addr - phys_addr) >> PAGE_SHIFT;
-
-               if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { 
-                       iounmap(p); 
-                       p = NULL;
-               }
-               global_flush_tlb();
-       }
-
-       return p;                                       
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-       struct vm_struct *p;
-       if ((void __force *) addr <= high_memory) 
-               return; 
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
-               return;
-#endif
-       p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
-       if (!p) { 
-               printk("__iounmap: bad address %p\n", addr);
-               return;
-       }
-
-       if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
-               /* p->size includes the guard page, but cpa doesn't like that */
-               change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
-                                (p->size - PAGE_SIZE) >> PAGE_SHIFT,
-                                PAGE_KERNEL);                           
-               global_flush_tlb();
-       } 
-       kfree(p); 
-}
-
-#ifdef __i386__
-
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
-       unsigned long offset, last_addr;
-       unsigned int nrpages;
-       enum fixed_addresses idx;
-
-       /* Don't allow wraparound or zero size */
-       last_addr = phys_addr + size - 1;
-       if (!size || last_addr < phys_addr)
-               return NULL;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       /*
-        * Don't remap the low PCI/ISA area, it's always mapped..
-        */
-       if (phys_addr >= 0x0 && last_addr < 0x100000)
-               return isa_bus_to_virt(phys_addr);
-#endif
-
-       /*
-        * Mappings have to be page-aligned
-        */
-       offset = phys_addr & ~PAGE_MASK;
-       phys_addr &= PAGE_MASK;
-       size = PAGE_ALIGN(last_addr) - phys_addr;
-
-       /*
-        * Mappings have to fit in the FIX_BTMAP area.
-        */
-       nrpages = size >> PAGE_SHIFT;
-       if (nrpages > NR_FIX_BTMAPS)
-               return NULL;
-
-       /*
-        * Ok, go for it..
-        */
-       idx = FIX_BTMAP_BEGIN;
-       while (nrpages > 0) {
-               set_fixmap(idx, phys_addr);
-               phys_addr += PAGE_SIZE;
-               --idx;
-               --nrpages;
-       }
-       return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
-       unsigned long virt_addr;
-       unsigned long offset;
-       unsigned int nrpages;
-       enum fixed_addresses idx;
-
-       virt_addr = (unsigned long)addr;
-       if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
-               return;
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
-               return;
-#endif
-       offset = virt_addr & ~PAGE_MASK;
-       nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
-
-       idx = FIX_BTMAP_BEGIN;
-       while (nrpages > 0) {
-               clear_fixmap(idx);
-               --idx;
-               --nrpages;
-       }
-}
-
-#endif /* __i386__ */
-
-#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
-
+#define ISA_START_ADDRESS      0x0
+#define ISA_END_ADDRESS                0x100000
+
+#if 0 /* not PAE safe */
 /* These hacky macros avoid phys->machine translations. */
 #define __direct_pte(x) ((pte_t) { (x) } )
 #define __direct_mk_pte(page_nr,pgprot) \
   __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
 #define direct_mk_pte_phys(physpage, pgprot) \
   __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
-
+#endif
 
 static int direct_remap_area_pte_fn(pte_t *pte, 
                                    struct page *pte_page,
@@ -316,16 +38,16 @@
 {
        mmu_update_t **v = (mmu_update_t **)data;
 
-       (*v)->ptr = ((maddr_t)pfn_to_mfn(page_to_pfn(pte_page)) <<
+       (*v)->ptr = ((u64)pfn_to_mfn(page_to_pfn(pte_page)) <<
                     PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
        (*v)++;
 
        return 0;
 }
 
-int direct_remap_area_pages(struct mm_struct *mm,
+int direct_remap_pfn_range(struct mm_struct *mm,
                            unsigned long address, 
-                           unsigned long machine_addr,
+                           unsigned long mfn,
                            unsigned long size, 
                            pgprot_t prot,
                            domid_t  domid)
@@ -356,9 +78,9 @@
                 * Fill in the machine address: PTE ptr is done later by
                 * __direct_remap_area_pages(). 
                 */
-               v->val = pte_val_ma(pfn_pte_ma(machine_addr >> PAGE_SHIFT, 
prot));
-
-               machine_addr += PAGE_SIZE;
+               v->val = pte_val_ma(pfn_pte_ma(mfn, prot));
+
+               mfn++;
                address += PAGE_SIZE; 
                v++;
        }
@@ -376,8 +98,10 @@
        return 0;
 }
 
-EXPORT_SYMBOL(direct_remap_area_pages);
-
+EXPORT_SYMBOL(direct_remap_pfn_range);
+
+
+/* FIXME: This is horribly broken on PAE */ 
 static int lookup_pte_fn(
        pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
 {
@@ -412,6 +136,292 @@
 } 
 
 EXPORT_SYMBOL(touch_pte_range);
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+
+/*
+ * Does @address reside within a non-highmem page that is local to this virtual
+ * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
+ * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand
+ * why this works.
+ */
+static inline int is_local_lowmem(unsigned long address)
+{
+       extern unsigned long max_low_pfn;
+       unsigned long mfn = address >> PAGE_SHIFT;
+       unsigned long pfn = mfn_to_pfn(mfn);
+       return ((pfn < max_low_pfn) && (phys_to_machine_mapping[pfn] == mfn));
+}
+
+/*
+ * Generic mapping function (not visible outside):
+ */
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned 
long flags)
+{
+       void __iomem * addr;
+       struct vm_struct * area;
+       unsigned long offset, last_addr;
+       domid_t domid = DOMID_IO;
+
+       /* Don't allow wraparound or zero size */
+       last_addr = phys_addr + size - 1;
+       if (!size || last_addr < phys_addr)
+               return NULL;
+
+       /*
+        * Don't remap the low PCI/ISA area, it's always mapped..
+        */
+       if (xen_start_info->flags & SIF_PRIVILEGED &&
+           phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
+               return (void __iomem *) isa_bus_to_virt(phys_addr);
+
+       /*
+        * Don't allow anybody to remap normal RAM that we're using..
+        */
+       if (is_local_lowmem(phys_addr)) {
+               char *t_addr, *t_end;
+               struct page *page;
+
+               t_addr = bus_to_virt(phys_addr);
+               t_end = t_addr + (size - 1);
+          
+               for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); 
page++)
+                       if(!PageReserved(page))
+                               return NULL;
+
+               domid = DOMID_SELF;
+       }
+
+       /*
+        * Mappings have to be page-aligned
+        */
+       offset = phys_addr & ~PAGE_MASK;
+       phys_addr &= PAGE_MASK;
+       size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
+       /*
+        * Ok, go for it..
+        */
+       area = get_vm_area(size, VM_IOREMAP | (flags << 20));
+       if (!area)
+               return NULL;
+       area->phys_addr = phys_addr;
+       addr = (void __iomem *) area->addr;
+       flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+#ifdef __x86_64__
+       flags |= _PAGE_USER;
+#endif
+       if (direct_remap_pfn_range(&init_mm, (unsigned long) addr, 
phys_addr>>PAGE_SHIFT,
+                                   size, __pgprot(flags), domid)) {
+               vunmap((void __force *) addr);
+               return NULL;
+       }
+       return (void __iomem *) (offset + (char __iomem *)addr);
+}
+
+
+/**
+ * ioremap_nocache     -   map bus memory into CPU space
+ * @offset:    bus address of the memory
+ * @size:      size of the resource to map
+ *
+ * ioremap_nocache performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address. 
+ *
+ * This version of ioremap ensures that the memory is marked uncachable
+ * on the CPU as well as honouring existing caching rules from things like
+ * the PCI bus. Note that there are other caches and buffers on many 
+ * busses. In particular driver authors should read up on PCI writes
+ *
+ * It's useful if some control registers are in such an area and
+ * write combining or read caching is not desirable:
+ * 
+ * Must be freed with iounmap.
+ */
+
+void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+{
+       unsigned long last_addr;
+       void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
+       if (!p) 
+               return p; 
+
+       /* Guaranteed to be > phys_addr, as per __ioremap() */
+       last_addr = phys_addr + size - 1;
+
+       if (is_local_lowmem(last_addr)) { 
+               struct page *ppage = virt_to_page(bus_to_virt(phys_addr));
+               unsigned long npages;
+
+               phys_addr &= PAGE_MASK;
+
+               /* This might overflow and become zero.. */
+               last_addr = PAGE_ALIGN(last_addr);
+
+               /* .. but that's ok, because modulo-2**n arithmetic will make
+               * the page-aligned "last - first" come out right.
+               */
+               npages = (last_addr - phys_addr) >> PAGE_SHIFT;
+
+               if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { 
+                       iounmap(p); 
+                       p = NULL;
+               }
+               global_flush_tlb();
+       }
+
+       return p;                                       
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+       struct vm_struct *p;
+       if ((void __force *) addr <= high_memory) 
+               return;
+
+       /*
+        * __ioremap special-cases the PCI/ISA range by not instantiating a
+        * vm_area and by simply returning an address into the kernel mapping
+        * of ISA space.   So handle that here.
+        */
+       if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
+               return;
+
+       write_lock(&vmlist_lock);
+       p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) 
addr));
+       if (!p) { 
+               printk("iounmap: bad address %p\n", addr);
+               goto out_unlock;
+       }
+
+       if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
+               /* p->size includes the guard page, but cpa doesn't like that */
+               change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
+                                (p->size - PAGE_SIZE) >> PAGE_SHIFT,
+                                PAGE_KERNEL);
+               global_flush_tlb();
+       } 
+out_unlock:
+       write_unlock(&vmlist_lock);
+       kfree(p); 
+}
+
+#ifdef __i386__
+
+void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
+{
+       unsigned long offset, last_addr;
+       unsigned int nrpages;
+       enum fixed_addresses idx;
+
+       /* Don't allow wraparound or zero size */
+       last_addr = phys_addr + size - 1;
+       if (!size || last_addr < phys_addr)
+               return NULL;
+
+       /*
+        * Don't remap the low PCI/ISA area, it's always mapped..
+        */
+       if (xen_start_info->flags & SIF_PRIVILEGED &&
+           phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
+               return isa_bus_to_virt(phys_addr);
+
+       /*
+        * Mappings have to be page-aligned
+        */
+       offset = phys_addr & ~PAGE_MASK;
+       phys_addr &= PAGE_MASK;
+       size = PAGE_ALIGN(last_addr) - phys_addr;
+
+       /*
+        * Mappings have to fit in the FIX_BTMAP area.
+        */
+       nrpages = size >> PAGE_SHIFT;
+       if (nrpages > NR_FIX_BTMAPS)
+               return NULL;
+
+       /*
+        * Ok, go for it..
+        */
+       idx = FIX_BTMAP_BEGIN;
+       while (nrpages > 0) {
+               set_fixmap(idx, phys_addr);
+               phys_addr += PAGE_SIZE;
+               --idx;
+               --nrpages;
+       }
+       return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
+}
+
+void __init bt_iounmap(void *addr, unsigned long size)
+{
+       unsigned long virt_addr;
+       unsigned long offset;
+       unsigned int nrpages;
+       enum fixed_addresses idx;
+
+       virt_addr = (unsigned long)addr;
+       if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
+               return;
+       if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
+               return;
+       offset = virt_addr & ~PAGE_MASK;
+       nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
+
+       idx = FIX_BTMAP_BEGIN;
+       while (nrpages > 0) {
+               clear_fixmap(idx);
+               --idx;
+               --nrpages;
+       }
+}
+
+#endif /* __i386__ */
+
+#else /* CONFIG_XEN_PHYSDEV_ACCESS */
+
+void __iomem * __ioremap(unsigned long phys_addr, unsigned long size,
+                        unsigned long flags)
+{
+       return NULL;
+}
+
+void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+{
+       return NULL;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+}
+
+#ifdef __i386__
+
+void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
+{
+       return NULL;
+}
+
+void __init bt_iounmap(void *addr, unsigned long size)
+{
+}
+
+#endif /* __i386__ */
+
+#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
 
 /*
  * Local variables:
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile   Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile   Fri Sep  9 16:30:54 2005
@@ -4,7 +4,7 @@
 
 c-obj-y                                := i386.o
 
-c-obj-$(CONFIG_PCI_BIOS)               += pcbios.o
+#c-obj-$(CONFIG_PCI_BIOS)              += pcbios.o
 c-obj-$(CONFIG_PCI_MMCONFIG)   += mmconfig.o
 c-obj-$(CONFIG_PCI_DIRECT)     += direct.o
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/kernel/Makefile     Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/Makefile     Fri Sep  9 16:30:54 2005
@@ -11,7 +11,7 @@
 
 extra-y += vmlinux.lds
 
-obj-y   := ctrl_if.o evtchn.o fixup.o reboot.o gnttab.o devmem.o
+obj-y   := evtchn.o fixup.o reboot.o gnttab.o devmem.o
 
 obj-$(CONFIG_PROC_FS) += xen_proc.o
 obj-$(CONFIG_NET)     += skbuff.o
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c     Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c     Fri Sep  9 16:30:54 2005
@@ -40,16 +40,8 @@
 #include <asm-xen/synch_bitops.h>
 #include <asm-xen/xen-public/event_channel.h>
 #include <asm-xen/xen-public/physdev.h>
-#include <asm-xen/ctrl_if.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/evtchn.h>
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-EXPORT_SYMBOL(force_evtchn_callback);
-EXPORT_SYMBOL(evtchn_do_upcall);
-EXPORT_SYMBOL(bind_evtchn_to_irq);
-EXPORT_SYMBOL(unbind_evtchn_from_irq);
-#endif
 
 /*
  * This lock protects updates to the following mapping and reference-count
@@ -133,6 +125,7 @@
 {
     (void)HYPERVISOR_xen_version(0);
 }
+EXPORT_SYMBOL(force_evtchn_callback);
 
 /* NB. Interrupts are disabled on entry. */
 asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
@@ -165,6 +158,7 @@
         }
     }
 }
+EXPORT_SYMBOL(evtchn_do_upcall);
 
 static int find_unbound_irq(void)
 {
@@ -211,6 +205,7 @@
     
     return irq;
 }
+EXPORT_SYMBOL(bind_virq_to_irq);
 
 void unbind_virq_from_irq(int virq)
 {
@@ -244,74 +239,7 @@
 
     spin_unlock(&irq_mapping_update_lock);
 }
-
-/* This is only used when a vcpu from an xm save.  The ipi is expected
-   to have been bound before we suspended, and so all of the xenolinux
-   state is set up; we only need to restore the Xen side of things.
-   The irq number has to be the same, but the evtchn number can
-   change. */
-void _bind_ipi_to_irq(int ipi, int vcpu, int irq)
-{
-    evtchn_op_t op;
-    int evtchn;
-
-    spin_lock(&irq_mapping_update_lock);
-
-    op.cmd = EVTCHNOP_bind_ipi;
-    if ( HYPERVISOR_event_channel_op(&op) != 0 )
-       panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu);
-    evtchn = op.u.bind_ipi.port;
-
-    printk("<0>IPI %d, old evtchn %d, evtchn %d.\n",
-          ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi],
-          evtchn);
-
-    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
-    irq_to_evtchn[irq] = -1;
-
-    evtchn_to_irq[evtchn] = irq;
-    irq_to_evtchn[irq]    = evtchn;
-
-    printk("<0>evtchn_to_irq[%d] = %d.\n", evtchn,
-          evtchn_to_irq[evtchn]);
-    per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn;
-
-    bind_evtchn_to_cpu(evtchn, vcpu);
-
-    spin_unlock(&irq_mapping_update_lock);
-
-    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
-    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
-}
-
-void _bind_virq_to_irq(int virq, int cpu, int irq)
-{
-    evtchn_op_t op;
-    int evtchn;
-
-    spin_lock(&irq_mapping_update_lock);
-
-    op.cmd              = EVTCHNOP_bind_virq;
-    op.u.bind_virq.virq = virq;
-    if ( HYPERVISOR_event_channel_op(&op) != 0 )
-            panic("Failed to bind virtual IRQ %d\n", virq);
-    evtchn = op.u.bind_virq.port;
-
-    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
-    irq_to_evtchn[irq] = -1;
-
-    evtchn_to_irq[evtchn] = irq;
-    irq_to_evtchn[irq]    = evtchn;
-
-    per_cpu(virq_to_irq, cpu)[virq] = irq;
-
-    bind_evtchn_to_cpu(evtchn, cpu);
-
-    spin_unlock(&irq_mapping_update_lock);
-
-    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
-    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
-}
+EXPORT_SYMBOL(unbind_virq_from_irq);
 
 int bind_ipi_to_irq(int ipi)
 {
@@ -347,6 +275,7 @@
 
     return irq;
 }
+EXPORT_SYMBOL(bind_ipi_to_irq);
 
 void unbind_ipi_from_irq(int ipi)
 {
@@ -374,6 +303,7 @@
 
     spin_unlock(&irq_mapping_update_lock);
 }
+EXPORT_SYMBOL(unbind_ipi_from_irq);
 
 int bind_evtchn_to_irq(unsigned int evtchn)
 {
@@ -394,6 +324,7 @@
     
     return irq;
 }
+EXPORT_SYMBOL(bind_evtchn_to_irq);
 
 void unbind_evtchn_from_irq(unsigned int evtchn)
 {
@@ -409,6 +340,7 @@
 
     spin_unlock(&irq_mapping_update_lock);
 }
+EXPORT_SYMBOL(unbind_evtchn_from_irq);
 
 int bind_evtchn_to_irqhandler(
     unsigned int evtchn,
@@ -427,6 +359,7 @@
 
     return retval;
 }
+EXPORT_SYMBOL(bind_evtchn_to_irqhandler);
 
 void unbind_evtchn_from_irqhandler(unsigned int evtchn, void *dev_id)
 {
@@ -434,6 +367,7 @@
     free_irq(irq, dev_id);
     unbind_evtchn_from_irq(evtchn);
 }
+EXPORT_SYMBOL(unbind_evtchn_from_irqhandler);
 
 #ifdef CONFIG_SMP
 static void do_nothing_function(void *ign)
@@ -797,7 +731,4 @@
         irq_desc[pirq_to_irq(i)].depth   = 1;
         irq_desc[pirq_to_irq(i)].handler = &pirq_type;
     }
-
-    /* This needs to be done early, but after the IRQ subsystem is alive. */
-    ctrl_if_init();
-}
+}
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Fri Sep  9 16:30:54 2005
@@ -12,10 +12,8 @@
 #include <asm-xen/evtchn.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/xen-public/dom0_ops.h>
-#include <asm-xen/linux-public/suspend.h>
 #include <asm-xen/queues.h>
 #include <asm-xen/xenbus.h>
-#include <asm-xen/ctrl_if.h>
 #include <linux/cpu.h>
 #include <linux/kthread.h>
 
@@ -65,69 +63,10 @@
 #define cpu_up(x) (-EOPNOTSUPP)
 #endif
 
-static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
-{
-    int r;
-    int gdt_pages;
-    r = HYPERVISOR_vcpu_pickle(vcpu, ctxt);
-    if (r != 0)
-       panic("pickling vcpu %d -> %d!\n", vcpu, r);
-
-    /* Translate from machine to physical addresses where necessary,
-       so that they can be translated to our new machine address space
-       after resume.  libxc is responsible for doing this to vcpu0,
-       but we do it to the others. */
-    gdt_pages = (ctxt->gdt_ents + 511) / 512;
-    ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]);
-    for (r = 0; r < gdt_pages; r++)
-       ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]);
-}
-
-void _restore_vcpu(int cpu);
-
-atomic_t vcpus_rebooting;
-
-static int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
-{
-    int r;
-    int gdt_pages = (ctxt->gdt_ents + 511) / 512;
-
-    /* This is kind of a hack, and implicitly relies on the fact that
-       the vcpu stops in a place where all of the call clobbered
-       registers are already dead. */
-    ctxt->user_regs.esp -= 4;
-    ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip;
-    ctxt->user_regs.eip = (unsigned long)_restore_vcpu;
-
-    /* De-canonicalise.  libxc handles this for vcpu 0, but we need
-       to do it for the other vcpus. */
-    ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]);
-    for (r = 0; r < gdt_pages; r++)
-       ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]);
-
-    atomic_set(&vcpus_rebooting, 1);
-    r = HYPERVISOR_boot_vcpu(vcpu, ctxt);
-    if (r != 0) {
-       printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r);
-       return -1;
-    }
-
-    /* Make sure we wait for the new vcpu to come up before trying to do
-       anything with it or starting the next one. */
-    while (atomic_read(&vcpus_rebooting))
-       barrier();
-
-    return 0;
-}
 
 static int __do_suspend(void *ignore)
 {
-    int i, j;
-    suspend_record_t *suspend_record;
-    static vcpu_guest_context_t suspended_cpu_records[NR_CPUS];
-
-    /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */
-       /* XXX SMH: yes it would :-( */ 
+    int i, j, k, fpp;
 
 #ifdef CONFIG_XEN_USB_FRONTEND
     extern void usbif_resume();
@@ -138,16 +77,25 @@
     extern int gnttab_suspend(void);
     extern int gnttab_resume(void);
 
-#ifdef CONFIG_SMP
-    extern void smp_suspend(void);
-    extern void smp_resume(void);
-#endif
     extern void time_suspend(void);
     extern void time_resume(void);
     extern unsigned long max_pfn;
-    extern unsigned int *pfn_to_mfn_frame_list;
-
+    extern unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[];
+
+#ifdef CONFIG_SMP
+    extern void smp_suspend(void);
+    extern void smp_resume(void);
+
+    static vcpu_guest_context_t suspended_cpu_records[NR_CPUS];
     cpumask_t prev_online_cpus, prev_present_cpus;
+
+    void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt);
+    int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt);
+#endif
+
+    extern void xencons_suspend(void);
+    extern void xencons_resume(void);
+
     int err = 0;
 
     BUG_ON(smp_processor_id() != 0);
@@ -155,15 +103,14 @@
 
 #if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
     if (num_online_cpus() > 1) {
-       printk(KERN_WARNING "Can't suspend SMP guests without 
CONFIG_HOTPLUG_CPU\n");
+       printk(KERN_WARNING 
+               "Can't suspend SMP guests without CONFIG_HOTPLUG_CPU\n");
        return -EOPNOTSUPP;
     }
 #endif
 
-    suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL);
-    if ( suspend_record == NULL )
-        goto out;
-
+    preempt_disable();
+#ifdef CONFIG_SMP
     /* Take all of the other cpus offline.  We need to be careful not
        to get preempted between the final test for num_online_cpus()
        == 1 and disabling interrupts, since otherwise userspace could
@@ -175,7 +122,6 @@
        since by the time num_online_cpus() == 1, there aren't any
        other cpus) */
     cpus_clear(prev_online_cpus);
-    preempt_disable();
     while (num_online_cpus() > 1) {
        preempt_enable();
        for_each_online_cpu(i) {
@@ -190,13 +136,13 @@
        }
        preempt_disable();
     }
-
-    suspend_record->nr_pfns = max_pfn; /* final number of pfns */
+#endif
 
     __cli();
 
     preempt_enable();
 
+#ifdef CONFIG_SMP
     cpus_clear(prev_present_cpus);
     for_each_present_cpu(i) {
        if (i == 0)
@@ -204,6 +150,7 @@
        save_vcpu_context(i, &suspended_cpu_records[i]);
        cpu_set(i, prev_present_cpus);
     }
+#endif
 
 #ifdef __i386__
     mm_pin_all();
@@ -218,7 +165,7 @@
 
     xenbus_suspend();
 
-    ctrl_if_suspend();
+    xencons_suspend();
 
     irq_suspend();
 
@@ -227,37 +174,44 @@
     HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
     clear_fixmap(FIX_SHARED_INFO);
 
-    memcpy(&suspend_record->resume_info, &xen_start_info,
-           sizeof(xen_start_info));
+    xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
+    xen_start_info->console_mfn = mfn_to_pfn(xen_start_info->console_mfn);
 
     /* We'll stop somewhere inside this hypercall.  When it returns,
        we'll start resuming after the restore. */
-    HYPERVISOR_suspend(virt_to_mfn(suspend_record));
+    HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
 
     shutting_down = SHUTDOWN_INVALID; 
 
-    memcpy(&xen_start_info, &suspend_record->resume_info,
-           sizeof(xen_start_info));
-
-    set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
+    set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
 
     HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
 
     memset(empty_zero_page, 0, PAGE_SIZE);
-
-    for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
+            
+    HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+               virt_to_mfn(pfn_to_mfn_frame_list_list);
+  
+    fpp = PAGE_SIZE/sizeof(unsigned long);
+    for ( i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++ )
     {
-        pfn_to_mfn_frame_list[j] = 
-            virt_to_mfn(&phys_to_machine_mapping[i]);
-    }
-    HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
-        virt_to_mfn(pfn_to_mfn_frame_list);
+       if ( (j % fpp) == 0 )
+       {
+           k++;
+           pfn_to_mfn_frame_list_list[k] = 
+                   virt_to_mfn(pfn_to_mfn_frame_list[k]);
+           j=0;
+       }
+       pfn_to_mfn_frame_list[k][j] = 
+               virt_to_mfn(&phys_to_machine_mapping[i]);
+    }
+    HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
 
     gnttab_resume();
 
     irq_resume();
 
-    ctrl_if_resume();
+    xencons_resume();
 
     xenbus_resume();
 
@@ -269,12 +223,14 @@
 
     usbif_resume();
 
-    for_each_cpu_mask(i, prev_present_cpus) {
+#ifdef CONFIG_SMP
+    for_each_cpu_mask(i, prev_present_cpus)
        restore_vcpu_context(i, &suspended_cpu_records[i]);
-    }
+#endif
 
     __sti();
 
+#ifdef CONFIG_SMP
  out_reenable_cpus:
     for_each_cpu_mask(i, prev_online_cpus) {
        j = cpu_up(i);
@@ -284,10 +240,8 @@
            err = j;
        }
     }
-
- out:
-    if ( suspend_record != NULL )
-        free_page((unsigned long)suspend_record);
+#endif
+
     return err;
 }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig      Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig      Fri Sep  9 16:30:54 2005
@@ -21,12 +21,12 @@
          classical 32-bit x86 architecture. For details see
          <http://www.x86-64.org/>.
 
-config X86
-       bool
-       default y
-
 config 64BIT
        def_bool y
+
+config X86
+       bool
+       default y
 
 config MMU
        bool
@@ -89,10 +89,11 @@
 #        Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs.
 
 config MPSC
-       bool "Intel x86-64"
+       bool "Intel EM64T"
        help
-         Optimize for Intel IA32 with 64bit extension CPUs
-         (Prescott/Nocona/Potomac)
+         Optimize for Intel Pentium 4 and Xeon CPUs with Intel
+         Extended Memory 64 Technology(EM64T). For details see
+         <http://www.intel.com/technology/64bitextensions/>.
 
 config GENERIC_CPU
        bool "Generic-x86-64"
@@ -367,7 +368,6 @@
 
          If unsure, say Y. Only embedded should say N here.
 
-
 endmenu
 
 #
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile      Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile      Fri Sep  9 
16:30:54 2005
@@ -40,7 +40,7 @@
 i386-obj-$(CONFIG_DUMMY_IOMMU) += pci-dma.o
 i386-obj-$(CONFIG_SWIOTLB)     += swiotlb.o
 obj-$(CONFIG_KPROBES)          += kprobes.o
-obj-$(CONFIG_X86_PM_TIMER)     += pmtimer.o
+#obj-$(CONFIG_X86_PM_TIMER)    += pmtimer.o
 
 c-obj-$(CONFIG_MODULES)                += module.o
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c        Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c        Fri Sep  9 
16:30:54 2005
@@ -20,6 +20,9 @@
 #include <asm/e820.h>
 #include <asm/proto.h>
 #include <asm/bootsetup.h>
+#include <asm-xen/xen-public/memory.h>
+
+unsigned long pci_mem_start = 0xaeedbabe;
 
 /* 
  * PFN of last memory page.
@@ -517,14 +520,13 @@
 }
 
 #else  /* CONFIX_XEN */
+
 extern unsigned long xen_override_max_pfn;
 extern union xen_start_info_union xen_start_info_union;
-/*
- * Guest physical starts from 0.
- */
+
 unsigned long __init e820_end_of_ram(void)
 {
-        unsigned long max_end_pfn = xen_start_info.nr_pages;
+        unsigned long max_end_pfn = xen_start_info->nr_pages;
 
        if ( xen_override_max_pfn <  max_end_pfn)
                xen_override_max_pfn = max_end_pfn;
@@ -532,64 +534,53 @@
         return xen_override_max_pfn;
 }
 
-
-
 void __init e820_reserve_resources(void) 
 {
-       return;                 /* Xen won't have reserved entries */
-}
-
-#endif
-
-void __init parse_memopt(char *p, char **from) 
-{ 
-       end_user_pfn = memparse(p, from);
-       end_user_pfn >>= PAGE_SHIFT;    
-        xen_override_max_pfn = (unsigned long) end_user_pfn;
-} 
-
-unsigned long pci_mem_start = 0xaeedbabe;
-
-/*
- * Search for the biggest gap in the low 32 bits of the e820
- * memory space.  We pass this space to PCI to assign MMIO resources
- * for hotplug or unconfigured devices in.
- * Hopefully the BIOS let enough space left.
- */
-__init void e820_setup_gap(void)
-{
-       unsigned long gapstart, gapsize;
-       unsigned long last;
-       int i;
-       int found = 0;
-
-       last = 0x100000000ull;
+       dom0_op_t op;
+       struct dom0_memory_map_entry *map;
+       unsigned long gapstart, gapsize, last;
+       int i, found = 0;
+
+       if (!(xen_start_info->flags & SIF_INITDOMAIN))
+               return;
+
+       map = alloc_bootmem_low_pages(PAGE_SIZE);
+       op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
+       op.u.physical_memory_map.memory_map = map;
+       op.u.physical_memory_map.max_map_entries =
+               PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
+       BUG_ON(HYPERVISOR_dom0_op(&op));
+
+       last = 0x100000000ULL;
        gapstart = 0x10000000;
        gapsize = 0x400000;
-       i = e820.nr_map;
-       while (--i >= 0) {
-               unsigned long long start = e820.map[i].addr;
-               unsigned long long end = start + e820.map[i].size;
-
-               /*
-                * Since "last" is at most 4GB, we know we'll
-                * fit in 32 bits if this condition is true
-                */
-               if (last > end) {
-                       unsigned long gap = last - end;
-
-                       if (gap > gapsize) {
-                               gapsize = gap;
-                               gapstart = end;
-                               found = 1;
-                       }
-               }
-               if (start < last)
-                       last = start;
-       }
+
+       for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
+               struct resource *res;
+
+               if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
+                       gapsize = last - map[i].end;
+                       gapstart = map[i].end;
+                       found = 1;
+               }
+               if (map[i].start < last)
+                       last = map[i].start;
+
+               if (map[i].end > 0x100000000ULL)
+                       continue;
+               res = alloc_bootmem_low(sizeof(struct resource));
+               res->name = map[i].is_ram ? "System RAM" : "reserved";
+               res->start = map[i].start;
+               res->end = map[i].end - 1;
+               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               request_resource(&iomem_resource, res);
+       }
+
+       free_bootmem(__pa(map), PAGE_SIZE);
 
        if (!found) {
-               gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
+               HYPERVISOR_memory_op(XENMEM_maximum_ram_page, &gapstart);
+               gapstart = (gapstart << PAGE_SHIFT) + 1024*1024;
                printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit 
address range\n"
                       KERN_ERR "PCI: Unassigned devices with 32bit resource 
registers may break!\n");
        }
@@ -607,3 +598,72 @@
        printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: 
%lx:%lx)\n",
                pci_mem_start, gapstart, gapsize);
 }
+
+#endif
+
+void __init parse_memopt(char *p, char **from) 
+{ 
+       end_user_pfn = memparse(p, from);
+       end_user_pfn >>= PAGE_SHIFT;    
+        xen_override_max_pfn = (unsigned long) end_user_pfn;
+} 
+
+/*
+ * Search for the biggest gap in the low 32 bits of the e820
+ * memory space.  We pass this space to PCI to assign MMIO resources
+ * for hotplug or unconfigured devices in.
+ * Hopefully the BIOS let enough space left.
+ */
+__init void e820_setup_gap(void)
+{
+#ifndef CONFIG_XEN
+       unsigned long gapstart, gapsize;
+       unsigned long last;
+       int i;
+       int found = 0;
+
+       last = 0x100000000ull;
+       gapstart = 0x10000000;
+       gapsize = 0x400000;
+       i = e820.nr_map;
+       while (--i >= 0) {
+               unsigned long long start = e820.map[i].addr;
+               unsigned long long end = start + e820.map[i].size;
+
+               /*
+                * Since "last" is at most 4GB, we know we'll
+                * fit in 32 bits if this condition is true
+                */
+               if (last > end) {
+                       unsigned long gap = last - end;
+
+                       if (gap > gapsize) {
+                               gapsize = gap;
+                               gapstart = end;
+                               found = 1;
+                       }
+               }
+               if (start < last)
+                       last = start;
+       }
+
+       if (!found) {
+               gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
+               printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit 
address range\n"
+                      KERN_ERR "PCI: Unassigned devices with 32bit resource 
registers may break!\n");
+       }
+
+       /*
+        * Start allocating dynamic PCI memory a bit into the gap,
+        * aligned up to the nearest megabyte.
+        *
+        * Question: should we try to pad it up a bit (do something
+        * like " + (gapsize >> 3)" in there too?). We now have the
+        * technology.
+        */
+       pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
+
+       printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: 
%lx:%lx)\n",
+               pci_mem_start, gapstart, gapsize);
+#endif
+}
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S        Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S        Fri Sep  9 
16:30:54 2005
@@ -40,16 +40,13 @@
        .globl startup_64
 startup_64:
 ENTRY(_start)
-        cld                
-       /* Copy the necessary stuff from xen_start_info structure. */
-       movq  $xen_start_info_union,%rdi
-       movq  $256,%rcx
-       rep movsq
+       movq %rsi,xen_start_info(%rip)
 
 #ifdef CONFIG_SMP
-        ENTRY(startup_64_smp)
+ENTRY(startup_64_smp)
+#endif /* CONFIG_SMP */
+
        cld
-#endif /* CONFIG_SMP */
 
        movq init_rsp(%rip),%rsp
        /* zero EFLAGS after setting rsp */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c      Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c      Fri Sep  9 
16:30:54 2005
@@ -90,8 +90,9 @@
 {
        int i;
 
-        phys_to_machine_mapping = (u32 *)xen_start_info.mfn_list;
-        start_pfn = (__pa(xen_start_info.pt_base) >> PAGE_SHIFT) +  
xen_start_info.nr_pt_frames;
+        phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
+        start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + 
+               xen_start_info->nr_pt_frames;
 
        for (i = 0; i < 256; i++)
                set_intr_gate(i, early_idt_handler);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Fri Sep  9 
16:30:54 2005
@@ -76,7 +76,8 @@
 /* Allows setting of maximum possible memory size  */
 unsigned long xen_override_max_pfn;
 
-u32 *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
+unsigned long *phys_to_machine_mapping;
+unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[512];
 
 EXPORT_SYMBOL(phys_to_machine_mapping);
 
@@ -84,7 +85,7 @@
 DEFINE_PER_CPU(int, nr_multicall_ents);
 
 /* Raw start-of-day parameters from the hypervisor. */
-union xen_start_info_union xen_start_info_union;
+start_info_t *xen_start_info;
 #endif
 
 /*
@@ -314,7 +315,7 @@
        
        if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
                max_cmdline = COMMAND_LINE_SIZE;
-       memcpy(saved_command_line, xen_start_info.cmd_line, max_cmdline);
+       memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
        saved_command_line[max_cmdline-1] = '\0';
 #else
        memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
@@ -687,7 +688,7 @@
 #endif
 #ifdef CONFIG_XEN
 #ifdef CONFIG_BLK_DEV_INITRD
-       if (xen_start_info.mod_start) {
+       if (xen_start_info->mod_start) {
                if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
                        /*reserve_bootmem_generic(INITRD_START, INITRD_SIZE);*/
                        initrd_start = INITRD_START + PAGE_OFFSET;
@@ -730,29 +731,50 @@
 #endif
 #ifdef CONFIG_XEN
        {
-               int i, j;
+               int i, j, k, fpp;
                /* Make sure we have a large enough P->M table. */
-               if (end_pfn > xen_start_info.nr_pages) {
+               if (end_pfn > xen_start_info->nr_pages) {
                        phys_to_machine_mapping = alloc_bootmem(
-                               max_pfn * sizeof(u32));
+                               end_pfn * sizeof(unsigned long));
                        memset(phys_to_machine_mapping, ~0,
-                              max_pfn * sizeof(u32));
+                              end_pfn * sizeof(unsigned long));
                        memcpy(phys_to_machine_mapping,
-                              (u32 *)xen_start_info.mfn_list,
-                              xen_start_info.nr_pages * sizeof(u32));
+                              (unsigned long *)xen_start_info->mfn_list,
+                              xen_start_info->nr_pages * sizeof(unsigned 
long));
                        free_bootmem(
-                               __pa(xen_start_info.mfn_list), 
-                               PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
-                                               sizeof(u32))));
-               }
-
-               pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE);
-
-               for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(u32)), j++ )
-               {       
-                       pfn_to_mfn_frame_list[j] = 
+                               __pa(xen_start_info->mfn_list), 
+                               PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
+                                               sizeof(unsigned long))));
+               }
+
+               /* 
+                * Initialise the list of the frames that specify the list of 
+                * frames that make up the p2m table. Used by save/restore
+                */
+               pfn_to_mfn_frame_list_list = alloc_bootmem(PAGE_SIZE);
+               HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+                 virt_to_mfn(pfn_to_mfn_frame_list_list);
+              
+               fpp = PAGE_SIZE/sizeof(unsigned long);
+               for ( i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++ )
+               {
+                       if ( (j % fpp) == 0 )
+                       {
+                               k++;
+                               BUG_ON(k>=fpp);
+                               pfn_to_mfn_frame_list[k] = 
alloc_bootmem(PAGE_SIZE);
+                               pfn_to_mfn_frame_list_list[k] = 
+                                       virt_to_mfn(pfn_to_mfn_frame_list[k]);
+                               j=0;
+                       }
+                       pfn_to_mfn_frame_list[k][j] = 
                                virt_to_mfn(&phys_to_machine_mapping[i]);
                }
+               HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
+               
+               
+
+
 
        }
 #endif
@@ -817,8 +839,8 @@
               op.u.set_iopl.iopl = 1;
               HYPERVISOR_physdev_op(&op);
 
-              if (xen_start_info.flags & SIF_INITDOMAIN) {
-                      if (!(xen_start_info.flags & SIF_PRIVILEGED))
+              if (xen_start_info->flags & SIF_INITDOMAIN) {
+                      if (!(xen_start_info->flags & SIF_PRIVILEGED))
                               panic("Xen granted us console access "
                                     "but not privileged status");
                       
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c     Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c     Fri Sep  9 
16:30:54 2005
@@ -1277,21 +1277,23 @@
 
 void smp_suspend(void)
 {
-       /* XXX todo: take down time and ipi's on all cpus */
        local_teardown_timer_irq();
        smp_intr_exit();
 }
 
 void smp_resume(void)
 {
-       /* XXX todo: restore time and ipi's on all cpus */
        smp_intr_init();
        local_setup_timer_irq();
 }
 
-void _restore_vcpu(void)
-{
-       /* XXX need to write this */
-}
-
-#endif
+void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+}
+
+int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+       return 0;
+}
+
+#endif
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c   Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c   Fri Sep  9 16:30:54 2005
@@ -149,7 +149,7 @@
        pmd_t *pmd;
        pte_t *pte;
 
-        pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
+       pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
        pgd += pgd_index(address);
 
        printk("PGD %lx ", pgd_val(*pgd));
@@ -296,9 +296,9 @@
 #define MEM_VERBOSE 1
 
 #ifdef MEM_VERBOSE
-#define MEM_LOG(_f, _a...)                           \
-  printk("fault.c:[%d]-> " _f "\n", \
-          __LINE__ , ## _a )
+#define MEM_LOG(_f, _a...)                     \
+       printk("fault.c:[%d]-> " _f "\n",       \
+       __LINE__ , ## _a )
 #else
 #define MEM_LOG(_f, _a...) ((void)0)
 #endif
@@ -325,7 +325,7 @@
        siginfo_t info;
 
        if (!user_mode(regs))
-                error_code &= ~4; /* means kernel */
+               error_code &= ~4; /* means kernel */
 
 #ifdef CONFIG_CHECKING
        { 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Fri Sep  9 16:30:54 2005
@@ -62,14 +62,16 @@
  * avaialble in init_memory_mapping().
  */
 
-#define addr_to_page(addr, page)                                             \
-        (addr) &= PHYSICAL_PAGE_MASK;                                   \
-        (page) = ((unsigned long *) ((unsigned long)(((mfn_to_pfn((addr) >> 
PAGE_SHIFT)) << PAGE_SHIFT) + __START_KERNEL_map)))
+#define addr_to_page(addr, page)                               \
+       (addr) &= PHYSICAL_PAGE_MASK;                           \
+       (page) = ((unsigned long *) ((unsigned long)            \
+       (((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) +   \
+       __START_KERNEL_map)))
 
 static void __make_page_readonly(unsigned long va)
 {
-        unsigned long addr;
-        pte_t pte, *ptep;
+       unsigned long addr;
+       pte_t pte, *ptep;
        unsigned long *page = (unsigned long *) init_level4_pgt;
 
        addr = (unsigned long) page[pgd_index(va)];
@@ -89,22 +91,22 @@
 
 static void __make_page_writable(unsigned long va)
 {
-        unsigned long addr;
-        pte_t pte, *ptep;
-        unsigned long *page = (unsigned long *) init_level4_pgt;
-
-        addr = (unsigned long) page[pgd_index(va)];
-        addr_to_page(addr, page);
-
-        addr = page[pud_index(va)];
-        addr_to_page(addr, page);
-        
-        addr = page[pmd_index(va)];
-        addr_to_page(addr, page);
-
-        ptep = (pte_t *) &page[pte_index(va)];
+       unsigned long addr;
+       pte_t pte, *ptep;
+       unsigned long *page = (unsigned long *) init_level4_pgt;
+
+       addr = (unsigned long) page[pgd_index(va)];
+       addr_to_page(addr, page);
+
+       addr = page[pud_index(va)];
+       addr_to_page(addr, page);
+ 
+       addr = page[pmd_index(va)];
+       addr_to_page(addr, page);
+
+       ptep = (pte_t *) &page[pte_index(va)];
        pte.pte = (ptep->pte | _PAGE_RW);
-        xen_l1_entry_update(ptep, pte);
+       xen_l1_entry_update(ptep, pte);
        __flush_tlb_one(addr);
 }
 
@@ -115,55 +117,55 @@
 void make_page_readonly(void *va)
 {
        pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
-        unsigned long addr = (unsigned long) va;
-
-        if (!init_mapping_done) {
-                __make_page_readonly(addr);
-                return;
-        }
-                
-        pgd = pgd_offset_k(addr);
-        pud = pud_offset(pgd, addr);
-        pmd = pmd_offset(pud, addr);
-        ptep = pte_offset_kernel(pmd, addr);
+       unsigned long addr = (unsigned long) va;
+
+       if (!init_mapping_done) {
+               __make_page_readonly(addr);
+               return;
+       }
+  
+       pgd = pgd_offset_k(addr);
+       pud = pud_offset(pgd, addr);
+       pmd = pmd_offset(pud, addr);
+       ptep = pte_offset_kernel(pmd, addr);
        pte.pte = (ptep->pte & ~_PAGE_RW);
-        xen_l1_entry_update(ptep, pte);
+       xen_l1_entry_update(ptep, pte);
        __flush_tlb_one(addr);
 }
 
 void make_page_writable(void *va)
 {
-        pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
-        unsigned long addr = (unsigned long) va;
-
-        if (!init_mapping_done) {
-                __make_page_writable(addr);
-                return;
-        }
-
-        pgd = pgd_offset_k(addr);
-        pud = pud_offset(pgd, addr);
-        pmd = pmd_offset(pud, addr);
-        ptep = pte_offset_kernel(pmd, addr);
+       pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
+       unsigned long addr = (unsigned long) va;
+
+       if (!init_mapping_done) {
+               __make_page_writable(addr);
+               return;
+       }
+
+       pgd = pgd_offset_k(addr);
+       pud = pud_offset(pgd, addr);
+       pmd = pmd_offset(pud, addr);
+       ptep = pte_offset_kernel(pmd, addr);
        pte.pte = (ptep->pte | _PAGE_RW);
-        xen_l1_entry_update(ptep, pte);
+       xen_l1_entry_update(ptep, pte);
        __flush_tlb_one(addr);
 }
 
 void make_pages_readonly(void* va, unsigned nr)
 {
-        while ( nr-- != 0 ) {
-                make_page_readonly(va);
-                va = (void*)((unsigned long)va + PAGE_SIZE);
-        }
+       while (nr-- != 0) {
+               make_page_readonly(va);
+               va = (void*)((unsigned long)va + PAGE_SIZE);
+       }
 }
 
 void make_pages_writable(void* va, unsigned nr)
 {
-        while ( nr-- != 0 ) {
-                make_page_writable(va);
-                va = (void*)((unsigned long)va + PAGE_SIZE);
-        }
+       while (nr-- != 0) {
+               make_page_writable(va);
+               va = (void*)((unsigned long)va + PAGE_SIZE);
+       }
 }
 
 /*
@@ -389,7 +391,7 @@
         set_pte_phys(address, phys, prot, SET_FIXMAP_USER); 
 }
 
-unsigned long __initdata table_start, table_end, tables_space; 
+unsigned long __initdata table_start, tables_space; 
 
 unsigned long get_machine_pfn(unsigned long addr)
 {
@@ -400,40 +402,15 @@
         return pte_mfn(*pte);
 } 
 
-#define ALIGN_TO_4K __attribute__((section(".data.page_aligned")))
-#define MAX_LOW_PAGES  0x20
-static unsigned long __init_pgt[MAX_LOW_PAGES][512]  ALIGN_TO_4K;
-static int __init_pgt_index;
-
-/*
- * We start using from start_pfn
- */
 static __init void *alloc_static_page(unsigned long *phys)
 {
-       int i = __init_pgt_index++;
-
-       if (__init_pgt_index >= MAX_LOW_PAGES) {
-               printk("Need to increase MAX_LOW_PAGES");
-               BUG();
-       }
-               
-       *phys = __pa(__init_pgt[i]);
-
-       return (void *) __init_pgt[i];
+       unsigned long va = (start_pfn << PAGE_SHIFT) + __START_KERNEL_map;
+       *phys = start_pfn << PAGE_SHIFT;
+       start_pfn++;
+       memset((void *)va, 0, PAGE_SIZE);
+       return (void *)va;
 } 
 
-/*
- * Get RO page
- */
-static void __init *alloc_low_page(unsigned long *phys)
-{ 
-        unsigned long pfn = table_end++;
-    
-        *phys = (pfn << PAGE_SHIFT);
-        memset((void *) ((pfn << PAGE_SHIFT) + __START_KERNEL_map), 0, 
PAGE_SIZE);
-        return (void *)((pfn << PAGE_SHIFT) + __START_KERNEL_map);
-} 
-
 #define PTE_SIZE PAGE_SIZE
 
 static inline void __set_pte(pte_t *dst, pte_t val)
@@ -443,30 +420,24 @@
 
 static inline int make_readonly(unsigned long paddr)
 {
-    int readonly = 0;
-
-    /* Make new page tables read-only. */
-    if ((paddr < ((table_start << PAGE_SHIFT) + tables_space)) &&
-        (paddr >= (table_start << PAGE_SHIFT)))
-        readonly = 1;
-
-    /* Make old page tables read-only. */
-    if ((paddr < ((xen_start_info.pt_base - __START_KERNEL_map) +
-                  (xen_start_info.nr_pt_frames << PAGE_SHIFT))) &&
-        (paddr >= (xen_start_info.pt_base - __START_KERNEL_map)))
-        readonly = 1;
-
-    /*
-     * No need for writable mapping of kernel image. This also ensures that
-     * page and descriptor tables embedded inside don't have writable mappings.
-     */
-    if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
-        readonly = 1;
-
-    return readonly;
-}
-
-void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+       int readonly = 0;
+
+       /* Make old and new page tables read-only. */
+       if ((paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
+           && (paddr < ((table_start << PAGE_SHIFT) + tables_space)))
+               readonly = 1;
+       /*
+        * No need for writable mapping of kernel image. This also ensures that
+        * page and descriptor tables embedded inside don't have writable
+        * mappings. 
+        */
+       if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
+               readonly = 1;
+
+       return readonly;
+}
+
+static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned 
long end)
 { 
         long i, j, k; 
         unsigned long paddr;
@@ -485,7 +456,7 @@
                        break;
                } 
 
-               pmd = alloc_low_page(&pmd_phys);
+               pmd = alloc_static_page(&pmd_phys);
                 make_page_readonly(pmd);
                 xen_pmd_pin(pmd_phys);
                set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
@@ -499,18 +470,19 @@
                                        set_pmd(pmd,  __pmd(0)); 
                                break;
                        }
-                        pte = alloc_low_page(&pte_phys);
+                        pte = alloc_static_page(&pte_phys);
                         pte_save = pte;
                         for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += 
PTE_SIZE) {
+                                if ((paddr >= end) ||
+                                    ((paddr >> PAGE_SHIFT) >=
+                                     xen_start_info->nr_pages)) { 
+                                        __set_pte(pte, __pte(0)); 
+                                        continue;
+                                }
                                 if (make_readonly(paddr)) {
                                         __set_pte(pte, 
                                                 __pte(paddr | (_KERNPG_TABLE & 
~_PAGE_RW)));
                                         continue;
-                                }
-                                if (paddr >= end) { 
-                                        for (; k < PTRS_PER_PTE; k++, pte++)
-                                                __set_pte(pte, __pte(0)); 
-                                        break;
                                 }
                                 __set_pte(pte, __pte(paddr | _KERNPG_TABLE));
                         }
@@ -525,15 +497,16 @@
 
 static void __init find_early_table_space(unsigned long end)
 {
-        unsigned long puds, pmds, ptes; 
+       unsigned long puds, pmds, ptes; 
 
        puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
        pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
-        ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
-
-        tables_space = round_up(puds * 8, PAGE_SIZE) + 
-                         round_up(pmds * 8, PAGE_SIZE) + 
-                         round_up(ptes * 8, PAGE_SIZE); 
+       ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
+
+       tables_space =
+               round_up(puds * 8, PAGE_SIZE) + 
+               round_up(pmds * 8, PAGE_SIZE) + 
+               round_up(ptes * 8, PAGE_SIZE); 
 }
 
 void __init xen_init_pt(void)
@@ -549,7 +522,7 @@
        memset((void *)level2_kernel_pgt, 0, PAGE_SIZE);
 
        /* Find the initial pte page that was built for us. */
-       page = (unsigned long *)xen_start_info.pt_base;
+       page = (unsigned long *)xen_start_info->pt_base;
        addr = page[pgd_index(__START_KERNEL_map)];
        addr_to_page(addr, page);
        addr = page[pud_index(__START_KERNEL_map)];
@@ -579,65 +552,58 @@
                mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
 }
 
-/*
- * Extend kernel mapping to access pages for page tables.  The initial
- * mapping done by Xen is minimal (e.g. 8MB) and we need to extend the
- * mapping for early initialization.
- */
-static unsigned long current_size, extended_size;
-
 void __init extend_init_mapping(void) 
 {
        unsigned long va = __START_KERNEL_map;
        unsigned long phys, addr, *pte_page;
-        pmd_t *pmd;
+       pmd_t *pmd;
        pte_t *pte, new_pte;
-       unsigned long *page = (unsigned long *) init_level4_pgt;
-       int i;
+       unsigned long *page = (unsigned long *)init_level4_pgt;
 
        addr = page[pgd_index(va)];
        addr_to_page(addr, page);
        addr = page[pud_index(va)];
        addr_to_page(addr, page);
 
-       for (;;) {
+       /* Kill mapping of low 1MB. */
+       while (va < (unsigned long)&_text) {
+               HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
+               va += PAGE_SIZE;
+       }
+
+       /* Ensure init mappings cover kernel text/data and initial tables. */
+       while (va < (__START_KERNEL_map
+                    + (start_pfn << PAGE_SHIFT)
+                    + tables_space)) {
                pmd = (pmd_t *)&page[pmd_index(va)];
-               if (!pmd_present(*pmd))
-                       break;
-               addr = page[pmd_index(va)];
-               addr_to_page(addr, pte_page);
-               for (i = 0; i < PTRS_PER_PTE; i++) {
-                       pte = (pte_t *) &pte_page[pte_index(va)];
-                       if (!pte_present(*pte))
-                               break;
-                       va += PAGE_SIZE;
-                       current_size += PAGE_SIZE;
+               if (pmd_none(*pmd)) {
+                       pte_page = alloc_static_page(&phys);
+                       make_page_readonly(pte_page);
+                       xen_pte_pin(phys);
+                       set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
+               } else {
+                       addr = page[pmd_index(va)];
+                       addr_to_page(addr, pte_page);
                }
-       }
-
-       while (va < __START_KERNEL_map + current_size + tables_space) {
-               pmd = (pmd_t *) &page[pmd_index(va)];
-               if (!pmd_none(*pmd))
-                       continue;
-               pte_page = (unsigned long *) alloc_static_page(&phys);
-               make_page_readonly(pte_page);
-               xen_pte_pin(phys);
-               set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
-               for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
+               pte = (pte_t *)&pte_page[pte_index(va)];
+               if (pte_none(*pte)) {
                        new_pte = pfn_pte(
                                (va - __START_KERNEL_map) >> PAGE_SHIFT, 
                                __pgprot(_KERNPG_TABLE | _PAGE_USER));
-                       pte = (pte_t *)&pte_page[pte_index(va)];
                        xen_l1_entry_update(pte, new_pte);
-                       extended_size += PAGE_SIZE;
                }
-       }
-
-       /* Kill mapping of low 1MB. */
-       for (va = __START_KERNEL_map; va < (unsigned long)&_text; va += 
PAGE_SIZE)
+               va += PAGE_SIZE;
+       }
+
+       /* Finally, blow away any spurious initial mappings. */
+       while (1) {
+               pmd = (pmd_t *)&page[pmd_index(va)];
+               if (pmd_none(*pmd))
+                       break;
                HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
-}
-
+               va += PAGE_SIZE;
+       }
+}
 
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
    This runs before bootmem is initialized and gets pages directly from the 
@@ -650,34 +616,31 @@
 
        find_early_table_space(end);
        extend_init_mapping();
-       start_pfn = current_size >> PAGE_SHIFT;
 
        table_start = start_pfn;
-       table_end = table_start;
 
        start = (unsigned long)__va(start);
        end = (unsigned long)__va(end);
 
        for (; start < end; start = next) {
                unsigned long pud_phys; 
-                pud_t *pud = alloc_low_page(&pud_phys);
-                make_page_readonly(pud);
-                xen_pud_pin(pud_phys);
+               pud_t *pud = alloc_static_page(&pud_phys);
+               make_page_readonly(pud);
+               xen_pud_pin(pud_phys);
                next = start + PGDIR_SIZE;
                if (next > end) 
                        next = end; 
                phys_pud_init(pud, __pa(start), __pa(next));
                set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
-       } 
-
-       printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, 
-              table_start<<PAGE_SHIFT, 
-              table_end<<PAGE_SHIFT);
-
-        start_pfn = ((current_size + extended_size) >> PAGE_SHIFT);
+       }
+
+       printk("kernel direct mapping tables upto %lx @ %lx-%lx\n",
+              __pa(end), table_start<<PAGE_SHIFT, start_pfn<<PAGE_SHIFT);
+
+       BUG_ON(start_pfn != (table_start + (tables_space >> PAGE_SHIFT)));
 
        __flush_tlb_all();
-        init_mapping_done = 1;
+       init_mapping_done = 1;
 }
 
 extern struct x8664_pda cpu_pda[NR_CPUS];
@@ -708,7 +671,7 @@
                free_area_init(zones_size);
        }
 
-        set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
+        set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
         HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
 
         memset(empty_zero_page, 0, sizeof(empty_zero_page));
@@ -719,7 +682,7 @@
                int i;
         /* Setup mapping of lower 1st MB */
                for (i = 0; i < NR_FIX_ISAMAPS; i++)
-                       if (xen_start_info.flags & SIF_PRIVILEGED)
+                       if (xen_start_info->flags & SIF_PRIVILEGED)
                                set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
                        else
                                __set_fixmap(FIX_ISAMAP_BEGIN - i,
@@ -767,9 +730,6 @@
 
 static inline int page_is_ram (unsigned long pagenr)
 {
-        if (pagenr < start_pfn || pagenr >= end_pfn)
-                return 0;
-
         return 1;
 }
 
@@ -1005,3 +965,13 @@
 {
        return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/char/mem.c
--- a/linux-2.6-xen-sparse/drivers/char/mem.c   Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/char/mem.c   Fri Sep  9 16:30:54 2005
@@ -231,7 +231,7 @@
 }
 #endif
 
-static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 {
 #if defined(__HAVE_PHYS_MEM_ACCESS_PROT)
        unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
@@ -258,7 +258,6 @@
        return 0;
 }
 
-#if 0
 static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
 {
         unsigned long long val;
@@ -275,7 +274,6 @@
        vma->vm_pgoff = __pa(val) >> PAGE_SHIFT;
        return mmap_mem(file, vma);
 }
-#endif
 
 extern long vread(char *buf, char *addr, unsigned long count);
 extern long vwrite(char *buf, char *addr, unsigned long count);
@@ -731,7 +729,7 @@
        .llseek         = memory_lseek,
        .read           = read_mem,
        .write          = write_mem,
-       .mmap           = mmap_kmem,
+       .mmap           = mmap_mem,
        .open           = open_mem,
 };
 #else
diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Fri Sep  9 16:30:54 2005
@@ -8,7 +8,9 @@
 
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
+obj-$(CONFIG_XEN_TPMDEV_BACKEND)       += tpmback/
 obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += blkfront/
 obj-$(CONFIG_XEN_NETDEV_FRONTEND)      += netfront/
 obj-$(CONFIG_XEN_BLKDEV_TAP)           += blktap/
+obj-$(CONFIG_XEN_TPMDEV_FRONTEND)      += tpmfront/
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Fri Sep  9 
16:30:54 2005
@@ -44,6 +44,7 @@
 #include <asm-xen/xen_proc.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/balloon.h>
+#include <asm-xen/xen-public/memory.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -57,6 +58,12 @@
 static struct proc_dir_entry *balloon_pde;
 
 static DECLARE_MUTEX(balloon_mutex);
+
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and driver_pages, and
+ * balloon lists.
+ */
 spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
 
 /* We aim for 'current allocation' == 'target allocation'. */
@@ -156,6 +163,146 @@
        return target;
 }
 
+static int increase_reservation(unsigned long nr_pages)
+{
+       unsigned long *mfn_list, pfn, i, flags;
+       struct page   *page;
+       long           rc;
+       struct xen_memory_reservation reservation = {
+               .address_bits = 0,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+
+       if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
+               nr_pages = PAGE_SIZE / sizeof(unsigned long);
+
+       mfn_list = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (mfn_list == NULL)
+               return -ENOMEM;
+
+       balloon_lock(flags);
+
+       reservation.extent_start = mfn_list;
+       reservation.nr_extents   = nr_pages;
+       rc = HYPERVISOR_memory_op(
+               XENMEM_increase_reservation, &reservation);
+       if (rc < nr_pages) {
+               /* We hit the Xen hard limit: reprobe. */
+               reservation.extent_start = mfn_list;
+               reservation.nr_extents   = rc;
+               BUG_ON(HYPERVISOR_memory_op(
+                       XENMEM_decrease_reservation,
+                       &reservation) != rc);
+               hard_limit = current_pages + rc - driver_pages;
+               goto out;
+       }
+
+       for (i = 0; i < nr_pages; i++) {
+               page = balloon_retrieve();
+               BUG_ON(page == NULL);
+
+               pfn = page - mem_map;
+               BUG_ON(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
+
+               /* Update P->M and M->P tables. */
+               phys_to_machine_mapping[pfn] = mfn_list[i];
+               xen_machphys_update(mfn_list[i], pfn);
+            
+               /* Link back into the page tables if not highmem. */
+               if (pfn < max_low_pfn)
+                       BUG_ON(HYPERVISOR_update_va_mapping(
+                               (unsigned long)__va(pfn << PAGE_SHIFT),
+                               pfn_pte_ma(mfn_list[i], PAGE_KERNEL),
+                               0));
+
+               /* Relinquish the page back to the allocator. */
+               ClearPageReserved(page);
+               set_page_count(page, 1);
+               __free_page(page);
+       }
+
+       current_pages += nr_pages;
+
+ out:
+       balloon_unlock(flags);
+
+       free_page((unsigned long)mfn_list);
+
+       return 0;
+}
+
+static int decrease_reservation(unsigned long nr_pages)
+{
+       unsigned long *mfn_list, pfn, i, flags;
+       struct page   *page;
+       void          *v;
+       int            need_sleep = 0;
+       struct xen_memory_reservation reservation = {
+               .address_bits = 0,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+
+       if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
+               nr_pages = PAGE_SIZE / sizeof(unsigned long);
+
+       mfn_list = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (mfn_list == NULL)
+               return -ENOMEM;
+
+       for (i = 0; i < nr_pages; i++) {
+               if ((page = alloc_page(GFP_HIGHUSER)) == NULL) {
+                       nr_pages = i;
+                       need_sleep = 1;
+                       break;
+               }
+
+               pfn = page - mem_map;
+               mfn_list[i] = phys_to_machine_mapping[pfn];
+
+               if (!PageHighMem(page)) {
+                       v = phys_to_virt(pfn << PAGE_SHIFT);
+                       scrub_pages(v, 1);
+                       BUG_ON(HYPERVISOR_update_va_mapping(
+                               (unsigned long)v, __pte_ma(0), 0));
+               }
+#ifdef CONFIG_XEN_SCRUB_PAGES
+               else {
+                       v = kmap(page);
+                       scrub_pages(v, 1);
+                       kunmap(page);
+               }
+#endif
+       }
+
+       /* Ensure that ballooned highmem pages don't have kmaps. */
+       kmap_flush_unused();
+       flush_tlb_all();
+
+       balloon_lock(flags);
+
+       /* No more mappings: invalidate P2M and add to balloon. */
+       for (i = 0; i < nr_pages; i++) {
+               pfn = mfn_to_pfn(mfn_list[i]);
+               phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
+               balloon_append(pfn_to_page(pfn));
+       }
+
+       reservation.extent_start = mfn_list;
+       reservation.nr_extents   = nr_pages;
+       BUG_ON(HYPERVISOR_memory_op(
+               XENMEM_decrease_reservation, &reservation) != nr_pages);
+
+       current_pages -= nr_pages;
+
+       balloon_unlock(flags);
+
+       free_page((unsigned long)mfn_list);
+
+       return need_sleep;
+}
+
 /*
  * We avoid multiple worker processes conflicting via the balloon mutex.
  * We may of course race updates of the target counts (which are protected
@@ -164,112 +311,23 @@
  */
 static void balloon_process(void *unused)
 {
-       unsigned long *mfn_list, pfn, i, flags;
-       struct page   *page;
-       long           credit, debt, rc;
-       void          *v;
+       int need_sleep = 0;
+       long credit;
 
        down(&balloon_mutex);
 
- retry:
-       mfn_list = NULL;
-
-       if ((credit = current_target() - current_pages) > 0) {
-               mfn_list = vmalloc(credit * sizeof(*mfn_list));
-               if (mfn_list == NULL)
-                       goto out;
-
-               balloon_lock(flags);
-               rc = HYPERVISOR_dom_mem_op(
-                       MEMOP_increase_reservation, mfn_list, credit, 0);
-               balloon_unlock(flags);
-               if (rc < credit) {
-                       /* We hit the Xen hard limit: reprobe. */
-                       BUG_ON(HYPERVISOR_dom_mem_op(
-                               MEMOP_decrease_reservation,
-                               mfn_list, rc, 0) != rc);
-                       hard_limit = current_pages + rc - driver_pages;
-                       vfree(mfn_list);
-                       goto retry;
-               }
-
-               for (i = 0; i < credit; i++) {
-                       page = balloon_retrieve();
-                       BUG_ON(page == NULL);
-
-                       pfn = page - mem_map;
-                       if (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
-                               BUG();
-
-                       /* Update P->M and M->P tables. */
-                       phys_to_machine_mapping[pfn] = mfn_list[i];
-                       xen_machphys_update(mfn_list[i], pfn);
-            
-                       /* Link back into the page tables if not highmem. */
-                       if (pfn < max_low_pfn)
-                               BUG_ON(HYPERVISOR_update_va_mapping(
-                                       (unsigned long)__va(pfn << PAGE_SHIFT),
-                                       pfn_pte_ma(mfn_list[i], PAGE_KERNEL),
-                                       0));
-
-                       /* Relinquish the page back to the allocator. */
-                       ClearPageReserved(page);
-                       set_page_count(page, 1);
-                       __free_page(page);
-               }
-
-               current_pages += credit;
-       } else if (credit < 0) {
-               debt = -credit;
-
-               mfn_list = vmalloc(debt * sizeof(*mfn_list));
-               if (mfn_list == NULL)
-                       goto out;
-
-               for (i = 0; i < debt; i++) {
-                       if ((page = alloc_page(GFP_HIGHUSER)) == NULL) {
-                               debt = i;
-                               break;
-                       }
-
-                       pfn = page - mem_map;
-                       mfn_list[i] = phys_to_machine_mapping[pfn];
-
-                       if (!PageHighMem(page)) {
-                               v = phys_to_virt(pfn << PAGE_SHIFT);
-                               scrub_pages(v, 1);
-                               BUG_ON(HYPERVISOR_update_va_mapping(
-                                       (unsigned long)v, __pte_ma(0), 0));
-                       }
-#ifdef CONFIG_XEN_SCRUB_PAGES
-                       else {
-                               v = kmap(page);
-                               scrub_pages(v, 1);
-                               kunmap(page);
-                       }
+       do {
+               credit = current_target() - current_pages;
+               if (credit > 0)
+                       need_sleep = (increase_reservation(credit) != 0);
+               if (credit < 0)
+                       need_sleep = (decrease_reservation(-credit) != 0);
+
+#ifndef CONFIG_PREEMPT
+               if (need_resched())
+                       schedule();
 #endif
-               }
-
-               /* Ensure that ballooned highmem pages don't have kmaps. */
-               kmap_flush_unused();
-               flush_tlb_all();
-
-               /* No more mappings: invalidate P2M and add to balloon. */
-               for (i = 0; i < debt; i++) {
-                       pfn = mfn_to_pfn(mfn_list[i]);
-                       phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
-                       balloon_append(pfn_to_page(pfn));
-               }
-
-               BUG_ON(HYPERVISOR_dom_mem_op(
-                       MEMOP_decrease_reservation,mfn_list, debt, 0) != debt);
-
-               current_pages -= debt;
-       }
-
- out:
-       if (mfn_list != NULL)
-               vfree(mfn_list);
+       } while ((credit != 0) && !need_sleep);
 
        /* Schedule more work if there is some still to be done. */
        if (current_target() != current_pages)
@@ -295,10 +353,10 @@
 /* React to a change in the target key */
 static void watch_target(struct xenbus_watch *watch, const char *node)
 {
-       unsigned long new_target;
+       unsigned long long new_target;
        int err;
 
-       err = xenbus_scanf("memory", "target", "%lu", &new_target);
+       err = xenbus_scanf("memory", "target", "%llu", &new_target);
        if (err != 1) {
                printk(KERN_ERR "Unable to read memory/target\n");
                return;
@@ -390,7 +448,7 @@
 
        IPRINTK("Initialising balloon driver.\n");
 
-       current_pages = min(xen_start_info.nr_pages, max_pfn);
+       current_pages = min(xen_start_info->nr_pages, max_pfn);
        target_pages  = current_pages;
        balloon_low   = 0;
        balloon_high  = 0;
@@ -410,7 +468,7 @@
        balloon_pde->write_proc = balloon_write;
     
        /* Initialise the balloon with excess memory space. */
-       for (pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++) {
+       for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
                page = &mem_map[pfn];
                if (!PageReserved(page))
                        balloon_append(page);
@@ -429,8 +487,9 @@
 void balloon_update_driver_allowance(long delta)
 {
        unsigned long flags;
+
        balloon_lock(flags);
-       driver_pages += delta; /* non-atomic update */
+       driver_pages += delta;
        balloon_unlock(flags);
 }
 
@@ -438,11 +497,17 @@
        pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
 {
        unsigned long mfn = pte_mfn(*pte);
+       struct xen_memory_reservation reservation = {
+               .extent_start = &mfn,
+               .nr_extents   = 1,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
        set_pte(pte, __pte_ma(0));
        phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] =
                INVALID_P2M_ENTRY;
-       BUG_ON(HYPERVISOR_dom_mem_op(
-               MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+       BUG_ON(HYPERVISOR_memory_op(
+               XENMEM_decrease_reservation, &reservation) != 1);
        return 0;
 }
 
@@ -457,9 +522,10 @@
 
        scrub_pages(vstart, 1 << order);
 
-       balloon_lock(flags);
        BUG_ON(generic_page_range(
                &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL));
+
+       balloon_lock(flags);
        current_pages -= 1UL << order;
        balloon_unlock(flags);
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Fri Sep  9 
16:30:54 2005
@@ -504,8 +504,8 @@
     int i;
     struct page *page;
 
-    if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
-         !(xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
+    if ( !(xen_start_info->flags & SIF_INITDOMAIN) &&
+         !(xen_start_info->flags & SIF_BLK_BE_DOMAIN) )
         return 0;
 
     blkif_interface_init();
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Fri Sep  9 
16:30:54 2005
@@ -32,23 +32,15 @@
  */
 
 #if 1
-#define ASSERT(_p) \
-    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
+#define ASSERT(p)                                                         \
+       if (!(p)) { printk("Assertion '%s' failed, line %d, file %s", #p , \
+       __LINE__, __FILE__); *(int*)0=0; }
 #else
 #define ASSERT(_p)
 #endif
 
 #include <linux/version.h>
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 #include "block.h"
-#else
-#include "common.h"
-#include <linux/blk.h>
-#include <linux/tqueue.h>
-#endif
-
 #include <linux/cdrom.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
@@ -58,90 +50,57 @@
 #include <asm-xen/xen-public/grant_table.h>
 #include <asm-xen/gnttab.h>
 
-typedef unsigned char byte; /* from linux/ide.h */
-
-/* Control whether runtime update of vbds is enabled. */
-#define ENABLE_VBD_UPDATE 1
-
 #define BLKIF_STATE_DISCONNECTED 0
 #define BLKIF_STATE_CONNECTED    1
 
 static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED;
-
-#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
 
 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
     (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
 #define GRANTREF_INVALID (1<<15)
-
-static struct blk_shadow {
-    blkif_request_t req;
-    unsigned long request;
-    unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-} blk_shadow[BLK_RING_SIZE];
-unsigned long blk_shadow_free;
+#define GRANT_INVALID_REF      (0xFFFF)
 
 static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */
 
 static void kick_pending_request_queues(struct blkfront_info *info);
 
-static int __init xlblk_init(void);
-
 static void blkif_completion(struct blk_shadow *s);
 
-static inline int GET_ID_FROM_FREELIST(void)
-{
-    unsigned long free = blk_shadow_free;
-    BUG_ON(free > BLK_RING_SIZE);
-    blk_shadow_free = blk_shadow[free].req.id;
-    blk_shadow[free].req.id = 0x0fffffee; /* debug */
-    return free;
-}
-
-static inline void ADD_ID_TO_FREELIST(unsigned long id)
-{
-    blk_shadow[id].req.id  = blk_shadow_free;
-    blk_shadow[id].request = 0;
-    blk_shadow_free = id;
-}
-
-
-/************************  COMMON CODE  (inlined)  ************************/
-
-/* Kernel-specific definitions used in the common code */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define DISABLE_SCATTERGATHER()
-#else
-static int sg_operation = -1;
-#define DISABLE_SCATTERGATHER() (sg_operation = -1)
-#endif
+static inline int GET_ID_FROM_FREELIST(
+       struct blkfront_info *info)
+{
+       unsigned long free = info->shadow_free;
+       BUG_ON(free > BLK_RING_SIZE);
+       info->shadow_free = info->shadow[free].req.id;
+       info->shadow[free].req.id = 0x0fffffee; /* debug */
+       return free;
+}
+
+static inline void ADD_ID_TO_FREELIST(
+       struct blkfront_info *info, unsigned long id)
+{
+       info->shadow[id].req.id  = info->shadow_free;
+       info->shadow[id].request = 0;
+       info->shadow_free = id;
+}
 
 static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r)
 {
 
-    s->req = *r;
+       s->req = *r;
 }
 
 static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s)
 {
 
-    *r = s->req;
-}
-
+       *r = s->req;
+}
 
 static inline void flush_requests(struct blkfront_info *info)
 {
-    DISABLE_SCATTERGATHER();
-    RING_PUSH_REQUESTS(&info->ring);
-    notify_via_evtchn(info->evtchn);
-}
-
-
-/**************************  KERNEL VERSION 2.6  **************************/
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-
-module_init(xlblk_init);
+       RING_PUSH_REQUESTS(&info->ring);
+       notify_via_evtchn(info->evtchn);
+}
 
 static void kick_pending_request_queues(struct blkfront_info *info)
 {
@@ -169,50 +128,44 @@
 
 int blkif_open(struct inode *inode, struct file *filep)
 {
-       // struct gendisk *gd = inode->i_bdev->bd_disk;
-       // struct xlbd_disk_info *di = (struct xlbd_disk_info 
*)gd->private_data;
-
-       /* Update of usage count is protected by per-device semaphore. */
-       // di->mi->usage++;
-
        return 0;
 }
 
 
 int blkif_release(struct inode *inode, struct file *filep)
 {
-    /* FIXME: This is where we can actually free up majors, etc. --RR */
-    return 0;
+       return 0;
 }
 
 
 int blkif_ioctl(struct inode *inode, struct file *filep,
                 unsigned command, unsigned long argument)
 {
-    int i;
-
-    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
-                  command, (long)argument, inode->i_rdev);
-
-    switch ( command )
-    {
-    case HDIO_GETGEO:
-        /* return ENOSYS to use defaults */
-        return -ENOSYS;
-
-    case CDROMMULTISESSION:
-        DPRINTK("FIXME: support multisession CDs later\n");
-        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
-            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
-        return 0;
-
-    default:
-        /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
-          command);*/
-        return -EINVAL; /* same return as native Linux */
-    }
-
-    return 0;
+       int i;
+
+       DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
+                     command, (long)argument, inode->i_rdev);
+
+       switch ( command )
+       {
+       case HDIO_GETGEO:
+               /* return ENOSYS to use defaults */
+               return -ENOSYS;
+
+       case CDROMMULTISESSION:
+               DPRINTK("FIXME: support multisession CDs later\n");
+               for (i = 0; i < sizeof(struct cdrom_multisession); i++)
+                       if (put_user(0, (char *)(argument + i)))
+                               return -EFAULT;
+               return 0;
+
+       default:
+               /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
+                 command);*/
+               return -EINVAL; /* same return as native Linux */
+       }
+
+       return 0;
 }
 
 
@@ -228,76 +181,77 @@
  */
 static int blkif_queue_request(struct request *req)
 {
-    struct blkfront_info *info = req->rq_disk->private_data;
-    unsigned long buffer_ma;
-    blkif_request_t *ring_req;
-    struct bio *bio;
-    struct bio_vec *bvec;
-    int idx;
-    unsigned long id;
-    unsigned int fsect, lsect;
-    int ref;
-    grant_ref_t gref_head;
-
-    if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
-        return 1;
-
-    if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST,
-                                     &gref_head) < 0) {
-           gnttab_request_free_callback(&info->callback,
-                                        blkif_restart_queue_callback, info,
-                                        BLKIF_MAX_SEGMENTS_PER_REQUEST);
-           return 1;
-    }
-
-    /* Fill out a communications ring structure. */
-    ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
-    id = GET_ID_FROM_FREELIST();
-    blk_shadow[id].request = (unsigned long)req;
-
-    ring_req->id = id;
-    ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ;
-    ring_req->sector_number = (blkif_sector_t)req->sector;
-    ring_req->handle = info->handle;
-
-    ring_req->nr_segments = 0;
-    rq_for_each_bio(bio, req)
-    {
-        bio_for_each_segment(bvec, bio, idx)
-        {
-            if ( ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST )
-                BUG();
-            buffer_ma = page_to_phys(bvec->bv_page);
-            fsect = bvec->bv_offset >> 9;
-            lsect = fsect + (bvec->bv_len >> 9) - 1;
-            /* install a grant reference. */
-            ref = gnttab_claim_grant_reference(&gref_head);
-            ASSERT( ref != -ENOSPC );
-
-            gnttab_grant_foreign_access_ref(
-                        ref,
-                        info->backend_id,
-                        buffer_ma >> PAGE_SHIFT,
-                        rq_data_dir(req) );
-
-            blk_shadow[id].frame[ring_req->nr_segments] =
-                buffer_ma >> PAGE_SHIFT;
-
-            ring_req->frame_and_sects[ring_req->nr_segments] =
-                blkif_fas_from_gref(ref, fsect, lsect);
-
-           ring_req->nr_segments++;
-        }
-    }
-
-    info->ring.req_prod_pvt++;
-
-    /* Keep a private copy so we can reissue requests when recovering. */
-    pickle_request(&blk_shadow[id], ring_req);
-
-    gnttab_free_grant_references(gref_head);
-
-    return 0;
+       struct blkfront_info *info = req->rq_disk->private_data;
+       unsigned long buffer_mfn;
+       blkif_request_t *ring_req;
+       struct bio *bio;
+       struct bio_vec *bvec;
+       int idx;
+       unsigned long id;
+       unsigned int fsect, lsect;
+       int ref;
+       grant_ref_t gref_head;
+
+       if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+               return 1;
+
+       if (gnttab_alloc_grant_references(
+               BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+               gnttab_request_free_callback(
+                       &info->callback,
+                       blkif_restart_queue_callback,
+                       info,
+                       BLKIF_MAX_SEGMENTS_PER_REQUEST);
+               return 1;
+       }
+
+       /* Fill out a communications ring structure. */
+       ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+       id = GET_ID_FROM_FREELIST(info);
+       info->shadow[id].request = (unsigned long)req;
+
+       ring_req->id = id;
+       ring_req->operation = rq_data_dir(req) ?
+               BLKIF_OP_WRITE : BLKIF_OP_READ;
+       ring_req->sector_number = (blkif_sector_t)req->sector;
+       ring_req->handle = info->handle;
+
+       ring_req->nr_segments = 0;
+       rq_for_each_bio (bio, req) {
+               bio_for_each_segment (bvec, bio, idx) {
+                       BUG_ON(ring_req->nr_segments
+                              == BLKIF_MAX_SEGMENTS_PER_REQUEST);
+                       buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
+                       fsect = bvec->bv_offset >> 9;
+                       lsect = fsect + (bvec->bv_len >> 9) - 1;
+                       /* install a grant reference. */
+                       ref = gnttab_claim_grant_reference(&gref_head);
+                       ASSERT(ref != -ENOSPC);
+
+                       gnttab_grant_foreign_access_ref(
+                               ref,
+                               info->backend_id,
+                               buffer_mfn,
+                               rq_data_dir(req) );
+
+                       info->shadow[id].frame[ring_req->nr_segments] =
+                               buffer_mfn;
+
+                       ring_req->frame_and_sects[ring_req->nr_segments] =
+                               blkif_fas_from_gref(ref, fsect, lsect);
+
+                       ring_req->nr_segments++;
+               }
+       }
+
+       info->ring.req_prod_pvt++;
+
+       /* Keep a private copy so we can reissue requests when recovering. */
+       pickle_request(&info->shadow[id], ring_req);
+
+       gnttab_free_grant_references(gref_head);
+
+       return 0;
 }
 
 /*
@@ -306,756 +260,200 @@
  */
 void do_blkif_request(request_queue_t *rq)
 {
-    struct blkfront_info *info = NULL;
-    struct request *req;
-    int queued;
-
-    DPRINTK("Entered do_blkif_request\n");
-
-    queued = 0;
-
-    while ( (req = elv_next_request(rq)) != NULL )
-    {
-       info = req->rq_disk->private_data;
-
-        if ( !blk_fs_request(req) )
-        {
-            end_request(req, 0);
-            continue;
-        }
-
-       if (RING_FULL(&info->ring))
-               goto wait;
-
-        DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
-                req, req->cmd, req->sector, req->current_nr_sectors,
-                req->nr_sectors, req->buffer,
-                rq_data_dir(req) ? "write" : "read");
-
-        blkdev_dequeue_request(req);
-        if (blkif_queue_request(req)) {
-               blk_requeue_request(rq, req);
-        wait:
-               /* Avoid pointless unplugs. */
-               blk_stop_queue(rq);
-               break;
-        }
-
-        queued++;
-    }
-
-    if ( queued != 0 )
-        flush_requests(info);
+       struct blkfront_info *info = NULL;
+       struct request *req;
+       int queued;
+
+       DPRINTK("Entered do_blkif_request\n");
+
+       queued = 0;
+
+       while ((req = elv_next_request(rq)) != NULL) {
+               info = req->rq_disk->private_data;
+
+               if (!blk_fs_request(req)) {
+                       end_request(req, 0);
+                       continue;
+               }
+
+               if (RING_FULL(&info->ring))
+                       goto wait;
+
+               DPRINTK("do_blk_req %p: cmd %p, sec %lx, "
+                       "(%u/%li) buffer:%p [%s]\n",
+                       req, req->cmd, req->sector, req->current_nr_sectors,
+                       req->nr_sectors, req->buffer,
+                       rq_data_dir(req) ? "write" : "read");
+
+               blkdev_dequeue_request(req);
+               if (blkif_queue_request(req)) {
+                       blk_requeue_request(rq, req);
+               wait:
+                       /* Avoid pointless unplugs. */
+                       blk_stop_queue(rq);
+                       break;
+               }
+
+               queued++;
+       }
+
+       if (queued != 0)
+               flush_requests(info);
 }
 
 
 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
 {
-    struct request *req;
-    blkif_response_t *bret;
-    RING_IDX i, rp;
-    unsigned long flags;
-    struct blkfront_info *info = (struct blkfront_info *)dev_id;
-
-    spin_lock_irqsave(&blkif_io_lock, flags);
-
-    if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) {
-        spin_unlock_irqrestore(&blkif_io_lock, flags);
-        return IRQ_HANDLED;
-    }
-
-    rp = info->ring.sring->rsp_prod;
-    rmb(); /* Ensure we see queued responses up to 'rp'. */
-
-    for ( i = info->ring.rsp_cons; i != rp; i++ )
-    {
-        unsigned long id;
-
-        bret = RING_GET_RESPONSE(&info->ring, i);
-        id   = bret->id;
-        req  = (struct request *)blk_shadow[id].request;
-
-        blkif_completion(&blk_shadow[id]);
-
-        ADD_ID_TO_FREELIST(id);
-
-        switch ( bret->operation )
-        {
-        case BLKIF_OP_READ:
-        case BLKIF_OP_WRITE:
-            if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
-                DPRINTK("Bad return from blkdev data request: %x\n",
-                        bret->status);
-
-            if ( unlikely(end_that_request_first
-                          (req,
-                           (bret->status == BLKIF_RSP_OKAY),
-                           req->hard_nr_sectors)) )
-                BUG();
-            end_that_request_last(req);
-
-            break;
-        default:
-            BUG();
-        }
-    }
-
-    info->ring.rsp_cons = i;
-
-    kick_pending_request_queues(info);
-
-    spin_unlock_irqrestore(&blkif_io_lock, flags);
-
-    return IRQ_HANDLED;
-}
-
-#else
-/**************************  KERNEL VERSION 2.4  **************************/
-
-static kdev_t        sg_dev;
-static unsigned long sg_next_sect;
-
-/*
- * Request queues with outstanding work, but ring is currently full.
- * We need no special lock here, as we always access this with the
- * blkif_io_lock held. We only need a small maximum list.
- */
-#define MAX_PENDING 8
-static request_queue_t *pending_queues[MAX_PENDING];
-static int nr_pending;
-
-
-#define blkif_io_lock io_request_lock
-
-/*============================================================================*/
-static void kick_pending_request_queues(void)
-{
-    /* We kick pending request queues if the ring is reasonably empty. */
-    if ( (nr_pending != 0) &&
-         (RING_PENDING_REQUESTS(&info->ring) < (BLK_RING_SIZE >> 1)) )
-    {
-        /* Attempt to drain the queue, but bail if the ring becomes full. */
-        while ( (nr_pending != 0) && !RING_FULL(&info->ring) )
-            do_blkif_request(pending_queues[--nr_pending]);
-    }
-}
-
-int blkif_open(struct inode *inode, struct file *filep)
-{
-    short xldev = inode->i_rdev;
-    struct gendisk *gd = get_gendisk(xldev);
-    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
-    short minor = MINOR(xldev);
-
-    if ( gd->part[minor].nr_sects == 0 )
-    {
-        /*
-         * Device either doesn't exist, or has zero capacity; we use a few
-         * cheesy heuristics to return the relevant error code
-         */
-        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
-             ((minor & (gd->max_p - 1)) != 0) )
-        {
-            /*
-             * We have a real device, but no such partition, or we just have a
-             * partition number so guess this is the problem.
-             */
-            return -ENXIO;     /* no such device or address */
-        }
-        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
-        {
-            /* This is a removable device => assume that media is missing. */
-            return -ENOMEDIUM; /* media not present (this is a guess) */
-        }
-        else
-        {
-            /* Just go for the general 'no such device' error. */
-            return -ENODEV;    /* no such device */
-        }
-    }
-
-    /* Update of usage count is protected by per-device semaphore. */
-    disk->usage++;
-
-    return 0;
-}
-
-
-int blkif_release(struct inode *inode, struct file *filep)
-{
-    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
-
-    /*
-     * When usage drops to zero it may allow more VBD updates to occur.
-     * Update of usage count is protected by a per-device semaphore.
-     */
-    if ( --disk->usage == 0 ) {
-        vbd_update();
-    }
-
-    return 0;
-}
-
-
-int blkif_ioctl(struct inode *inode, struct file *filep,
-                unsigned command, unsigned long argument)
-{
-    kdev_t dev = inode->i_rdev;
-    struct hd_geometry *geo = (struct hd_geometry *)argument;
-    struct gendisk *gd;
-    struct hd_struct *part;
-    int i;
-    unsigned short cylinders;
-    byte heads, sectors;
-
-    /* NB. No need to check permissions. That is done for us. */
-
-    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
-                  command, (long) argument, dev);
-
-    gd = get_gendisk(dev);
-    part = &gd->part[MINOR(dev)];
-
-    switch ( command )
-    {
-    case BLKGETSIZE:
-        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
-        return put_user(part->nr_sects, (unsigned long *) argument);
-
-    case BLKGETSIZE64:
-        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
-                      (u64)part->nr_sects * 512);
-        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
-
-    case BLKRRPART:                               /* re-read partition table */
-        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
-        return blkif_revalidate(dev);
-
-    case BLKSSZGET:
-        return hardsect_size[MAJOR(dev)][MINOR(dev)];
-
-    case BLKBSZGET:                                        /* get block size */
-        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
-        break;
-
-    case BLKBSZSET:                                        /* set block size */
-        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
-        break;
-
-    case BLKRASET:                                         /* set read-ahead */
-        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
-        break;
-
-    case BLKRAGET:                                         /* get read-ahead */
-        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
-        break;
-
-    case HDIO_GETGEO:
-        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
-        if (!argument) return -EINVAL;
-
-        /* We don't have real geometry info, but let's at least return
-           values consistent with the size of the device */
-
-        heads = 0xff;
-        sectors = 0x3f;
-        cylinders = part->nr_sects / (heads * sectors);
-
-        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
-        if (put_user(heads,  (byte *)&geo->heads)) return -EFAULT;
-        if (put_user(sectors,  (byte *)&geo->sectors)) return -EFAULT;
-        if (put_user(cylinders, (unsigned short *)&geo->cylinders)) return 
-EFAULT;
-
-        return 0;
-
-    case HDIO_GETGEO_BIG:
-        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
-        if (!argument) return -EINVAL;
-
-        /* We don't have real geometry info, but let's at least return
-           values consistent with the size of the device */
-
-        heads = 0xff;
-        sectors = 0x3f;
-        cylinders = part->nr_sects / (heads * sectors);
-
-        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
-        if (put_user(heads,  (byte *)&geo->heads))   return -EFAULT;
-        if (put_user(sectors,  (byte *)&geo->sectors)) return -EFAULT;
-        if (put_user(cylinders, (unsigned int *) &geo->cylinders)) return 
-EFAULT;
-
-        return 0;
-
-    case CDROMMULTISESSION:
-        DPRINTK("FIXME: support multisession CDs later\n");
-        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
-            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
-        return 0;
-
-    case SCSI_IOCTL_GET_BUS_NUMBER:
-        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif");
-        return -ENOSYS;
-
-    default:
-        WPRINTK("ioctl %08x not supported by XL blkif\n", command);
-        return -ENOSYS;
-    }
-
-    return 0;
-}
-
-
-
-/* check media change: should probably do something here in some cases :-) */
-int blkif_check(kdev_t dev)
-{
-    DPRINTK("blkif_check\n");
-    return 0;
-}
-
-int blkif_revalidate(kdev_t dev)
-{
-    struct block_device *bd;
-    struct gendisk *gd;
-    xl_disk_t *disk;
-    unsigned long capacity;
-    int i, rc = 0;
-
-    if ( (bd = bdget(dev)) == NULL )
-        return -EINVAL;
-
-    /*
-     * Update of partition info, and check of usage count, is protected
-     * by the per-block-device semaphore.
-     */
-    down(&bd->bd_sem);
-
-    if ( ((gd = get_gendisk(dev)) == NULL) ||
-         ((disk = xldev_to_xldisk(dev)) == NULL) ||
-         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
-    {
-        rc = -EINVAL;
-        goto out;
-    }
-
-    if ( disk->usage > 1 )
-    {
-        rc = -EBUSY;
-        goto out;
-    }
-
-    /* Only reread partition table if VBDs aren't mapped to partitions. */
-    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
-    {
-        for ( i = gd->max_p - 1; i >= 0; i-- )
-        {
-            invalidate_device(dev+i, 1);
-            gd->part[MINOR(dev+i)].start_sect = 0;
-            gd->part[MINOR(dev+i)].nr_sects   = 0;
-            gd->sizes[MINOR(dev+i)]           = 0;
-        }
-
-        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
-    }
-
- out:
-    up(&bd->bd_sem);
-    bdput(bd);
-    return rc;
-}
-
-
-/*
- * blkif_queue_request
- *
- * request block io
- * 
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- *   virtual address in the guest os.
- */
-static int blkif_queue_request(unsigned long   id,
-                               int             operation,
-                               char *          buffer,
-                               unsigned long   sector_number,
-                               unsigned short  nr_sectors,
-                               kdev_t          device,
-                              blkif_vdev_t    handle)
-{
-    unsigned long       buffer_ma = virt_to_bus(buffer);
-    unsigned long       xid;
-    struct gendisk     *gd;
-    blkif_request_t    *req;
-    struct buffer_head *bh;
-    unsigned int        fsect, lsect;
-    int ref;
-
-    fsect = (buffer_ma & ~PAGE_MASK) >> 9;
-    lsect = fsect + nr_sectors - 1;
-
-    /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */
-    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
-        BUG();
-    if ( lsect > ((PAGE_SIZE/512)-1) )
-        BUG();
-
-    buffer_ma &= PAGE_MASK;
-
-    if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
-        return 1;
-
-    switch ( operation )
-    {
-
-    case BLKIF_OP_READ:
-    case BLKIF_OP_WRITE:
-        gd = get_gendisk(device);
-
-        /*
-         * Update the sector_number we'll pass down as appropriate; note that
-         * we could sanity check that resulting sector will be in this
-         * partition, but this will happen in driver backend anyhow.
-         */
-        sector_number += gd->part[MINOR(device)].start_sect;
-
-        /*
-         * If this unit doesn't consist of virtual partitions then we clear
-         * the partn bits from the device number.
-         */
-        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
-               GENHD_FL_VIRT_PARTNS) )
-            device &= ~(gd->max_p - 1);
-
-        if ( (sg_operation == operation) &&
-             (sg_dev == device) &&
-             (sg_next_sect == sector_number) )
-        {
-            req = RING_GET_REQUEST(&info->ring,
-                                   info->ring.req_prod_pvt - 1);
-            bh = (struct buffer_head *)id;
-
-            bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request;
-            blk_shadow[req->id].request = (unsigned long)id;
-
-            /* install a grant reference. */
-            ref = gnttab_claim_grant_reference(&gref_head);
-            ASSERT( ref != -ENOSPC );
-
-            gnttab_grant_foreign_access_ref(
-                        ref,
-                        info->backend_id,
-                        buffer_ma >> PAGE_SHIFT,
-                        ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
-
-            blk_shadow[req->id].frame[req->nr_segments] =
-                buffer_ma >> PAGE_SHIFT;
-
-            req->frame_and_sects[req->nr_segments] =
-                blkif_fas_from_gref(ref, fsect, lsect);
-            if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
-                sg_next_sect += nr_sectors;
-            else
-                DISABLE_SCATTERGATHER();
-
-            /* Update the copy of the request in the recovery ring. */
-            pickle_request(&blk_shadow[req->id], req );
-
-            return 0;
-        }
-        else if ( RING_FULL(&info->ring) )
-        {
-            return 1;
-        }
-        else
-        {
-            sg_operation = operation;
-            sg_dev       = device;
-            sg_next_sect = sector_number + nr_sectors;
-        }
-        break;
-
-    default:
-        panic("unknown op %d\n", operation);
-    }
-
-    /* Fill out a communications ring structure. */
-    req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
-
-    xid = GET_ID_FROM_FREELIST();
-    blk_shadow[xid].request = (unsigned long)id;
-
-    req->id            = xid;
-    req->operation     = operation;
-    req->sector_number = (blkif_sector_t)sector_number;
-    req->handle        = handle;
-    req->nr_segments   = 1;
-    /* install a grant reference. */
-    ref = gnttab_claim_grant_reference(&gref_head);
-    ASSERT( ref != -ENOSPC );
-
-    gnttab_grant_foreign_access_ref(
-                ref,
-                info->backend_id,
-                buffer_ma >> PAGE_SHIFT,
-                ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
-
-    blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT;
-
-    req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect);
-
-    /* Keep a private copy so we can reissue requests when recovering. */
-    pickle_request(&blk_shadow[xid], req);
-
-    info->ring.req_prod_pvt++;
-
-    return 0;
-}
-
-
-/*
- * do_blkif_request
- *  read a block; request is in a request queue
- */
-void do_blkif_request(request_queue_t *rq)
-{
-    struct request *req;
-    struct buffer_head *bh, *next_bh;
-    int rw, nsect, full, queued = 0;
-
-    DPRINTK("Entered do_blkif_request\n");
-
-    while ( !rq->plugged && !list_empty(&rq->queue_head))
-    {
-        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
-            goto out;
-
-        DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
-                req, req->cmd, req->sector,
-                req->current_nr_sectors, req->nr_sectors, req->bh);
-
-        rw = req->cmd;
-        if ( rw == READA )
-            rw = READ;
-        if ( unlikely((rw != READ) && (rw != WRITE)) )
-            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
-
-        req->errors = 0;
-
-        bh = req->bh;
-        while ( bh != NULL )
-        {
-            next_bh = bh->b_reqnext;
-            bh->b_reqnext = NULL;
-
-            full = blkif_queue_request(
-                (unsigned long)bh,
-                (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
-                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
-
-            if ( full )
-            {
-                bh->b_reqnext = next_bh;
-                pending_queues[nr_pending++] = rq;
-                if ( unlikely(nr_pending >= MAX_PENDING) )
-                    BUG();
-                goto out;
-            }
-
-            queued++;
-
-            /* Dequeue the buffer head from the request. */
-            nsect = bh->b_size >> 9;
-            bh = req->bh = next_bh;
-
-            if ( bh != NULL )
-            {
-                /* There's another buffer head to do. Update the request. */
-                req->hard_sector += nsect;
-                req->hard_nr_sectors -= nsect;
-                req->sector = req->hard_sector;
-                req->nr_sectors = req->hard_nr_sectors;
-                req->current_nr_sectors = bh->b_size >> 9;
-                req->buffer = bh->b_data;
-            }
-            else
-            {
-                /* That was the last buffer head. Finalise the request. */
-                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
-                    BUG();
-                blkdev_dequeue_request(req);
-                end_that_request_last(req);
-            }
-        }
-    }
-
- out:
-    if ( queued != 0 )
-        flush_requests();
-}
-
-
-static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
-    RING_IDX i, rp;
-    unsigned long flags;
-    struct buffer_head *bh, *next_bh;
-
-    spin_lock_irqsave(&io_request_lock, flags);
-
-    if ( unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery) )
-    {
-        spin_unlock_irqrestore(&io_request_lock, flags);
-        return;
-    }
-
-    rp = info->ring.sring->rsp_prod;
-    rmb(); /* Ensure we see queued responses up to 'rp'. */
-
-    for ( i = info->ring.rsp_cons; i != rp; i++ )
-    {
-        unsigned long id;
-        blkif_response_t *bret;
-
-        bret = RING_GET_RESPONSE(&info->ring, i);
-        id = bret->id;
-        bh = (struct buffer_head *)blk_shadow[id].request;
-
-        blkif_completion(&blk_shadow[id]);
-
-        ADD_ID_TO_FREELIST(id);
-
-        switch ( bret->operation )
-        {
-        case BLKIF_OP_READ:
-        case BLKIF_OP_WRITE:
-            if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
-                DPRINTK("Bad return from blkdev data request: %lx\n",
-                        bret->status);
-            for ( ; bh != NULL; bh = next_bh )
-            {
-                next_bh = bh->b_reqnext;
-                bh->b_reqnext = NULL;
-                bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY);
-            }
-
-            break;
-        case BLKIF_OP_PROBE:
-            memcpy(&blkif_control_rsp, bret, sizeof(*bret));
-            blkif_control_rsp_valid = 1;
-            break;
-        default:
-            BUG();
-        }
-
-    }
-    info->ring.rsp_cons = i;
-
-    kick_pending_request_queues();
-
-    spin_unlock_irqrestore(&io_request_lock, flags);
-}
-
-#endif
-
-/*****************************  COMMON CODE  *******************************/
+       struct request *req;
+       blkif_response_t *bret;
+       RING_IDX i, rp;
+       unsigned long flags;
+       struct blkfront_info *info = (struct blkfront_info *)dev_id;
+
+       spin_lock_irqsave(&blkif_io_lock, flags);
+
+       if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) {
+               spin_unlock_irqrestore(&blkif_io_lock, flags);
+               return IRQ_HANDLED;
+       }
+
+       rp = info->ring.sring->rsp_prod;
+       rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+       for (i = info->ring.rsp_cons; i != rp; i++) {
+               unsigned long id;
+
+               bret = RING_GET_RESPONSE(&info->ring, i);
+               id   = bret->id;
+               req  = (struct request *)info->shadow[id].request;
+
+               blkif_completion(&info->shadow[id]);
+
+               ADD_ID_TO_FREELIST(info, id);
+
+               switch (bret->operation) {
+               case BLKIF_OP_READ:
+               case BLKIF_OP_WRITE:
+                       if (unlikely(bret->status != BLKIF_RSP_OKAY))
+                               DPRINTK("Bad return from blkdev data "
+                                       "request: %x\n", bret->status);
+
+                       BUG_ON(end_that_request_first(
+                               req, (bret->status == BLKIF_RSP_OKAY),
+                               req->hard_nr_sectors));
+                       end_that_request_last(req);
+                       break;
+               default:
+                       BUG();
+               }
+       }
+
+       info->ring.rsp_cons = i;
+
+       kick_pending_request_queues(info);
+
+       spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+       return IRQ_HANDLED;
+}
 
 static void blkif_free(struct blkfront_info *info)
 {
-    /* Prevent new requests being issued until we fix things up. */
-    spin_lock_irq(&blkif_io_lock);
-    info->connected = BLKIF_STATE_DISCONNECTED;
-    spin_unlock_irq(&blkif_io_lock);
-
-    /* Free resources associated with old device channel. */
-    if ( info->ring.sring != NULL )
-    {
-        free_page((unsigned long)info->ring.sring);
-        info->ring.sring = NULL;
-    }
-    unbind_evtchn_from_irqhandler(info->evtchn, NULL);
-    info->evtchn = 0;
+       /* Prevent new requests being issued until we fix things up. */
+       spin_lock_irq(&blkif_io_lock);
+       info->connected = BLKIF_STATE_DISCONNECTED;
+       spin_unlock_irq(&blkif_io_lock);
+
+       /* Free resources associated with old device channel. */
+       if (info->ring.sring != NULL) {
+               free_page((unsigned long)info->ring.sring);
+               info->ring.sring = NULL;
+       }
+       if (info->ring_ref != GRANT_INVALID_REF)
+               gnttab_end_foreign_access(info->ring_ref, 0);
+       info->ring_ref = GRANT_INVALID_REF;
+       unbind_evtchn_from_irqhandler(info->evtchn, info); 
+       info->evtchn = 0;
 }
 
 static void blkif_recover(struct blkfront_info *info)
 {
-    int i;
-    blkif_request_t *req;
-    struct blk_shadow *copy;
-    int j;
-
-    /* Stage 1: Make a safe copy of the shadow state. */
-    copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL);
-    BUG_ON(copy == NULL);
-    memcpy(copy, blk_shadow, sizeof(blk_shadow));
-
-    /* Stage 2: Set up free list. */
-    memset(&blk_shadow, 0, sizeof(blk_shadow));
-    for ( i = 0; i < BLK_RING_SIZE; i++ )
-        blk_shadow[i].req.id = i+1;
-    blk_shadow_free = info->ring.req_prod_pvt;
-    blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
-
-    /* Stage 3: Find pending requests and requeue them. */
-    for ( i = 0; i < BLK_RING_SIZE; i++ )
-    {
-        /* Not in use? */
-        if ( copy[i].request == 0 )
-            continue;
-
-        /* Grab a request slot and unpickle shadow state into it. */
-        req = RING_GET_REQUEST(
-            &info->ring, info->ring.req_prod_pvt);
-        unpickle_request(req, &copy[i]);
-
-        /* We get a new request id, and must reset the shadow state. */
-        req->id = GET_ID_FROM_FREELIST();
-        memcpy(&blk_shadow[req->id], &copy[i], sizeof(copy[i]));
-
-        /* Rewrite any grant references invalidated by suspend/resume. */
-        for ( j = 0; j < req->nr_segments; j++ )
-        {
-            if ( req->frame_and_sects[j] & GRANTREF_INVALID )
-                gnttab_grant_foreign_access_ref(
-                    blkif_gref_from_fas(req->frame_and_sects[j]),
-                    info->backend_id,
-                    blk_shadow[req->id].frame[j],
-                    rq_data_dir((struct request *)
-                                blk_shadow[req->id].request));
-            req->frame_and_sects[j] &= ~GRANTREF_INVALID;
-        }
-        blk_shadow[req->id].req = *req;
-
-        info->ring.req_prod_pvt++;
-    }
-
-    kfree(copy);
-
-    recovery = 0;
-
-    /* info->ring->req_prod will be set when we flush_requests().*/
-    wmb();
-
-    /* Kicks things back into life. */
-    flush_requests(info);
-
-    /* Now safe to left other people use the interface. */
-    info->connected = BLKIF_STATE_CONNECTED;
+       int i;
+       blkif_request_t *req;
+       struct blk_shadow *copy;
+       int j;
+
+       /* Stage 1: Make a safe copy of the shadow state. */
+       copy = (struct blk_shadow *)kmalloc(sizeof(info->shadow), GFP_KERNEL);
+       BUG_ON(copy == NULL);
+       memcpy(copy, info->shadow, sizeof(info->shadow));
+
+       /* Stage 2: Set up free list. */
+       memset(&info->shadow, 0, sizeof(info->shadow));
+       for (i = 0; i < BLK_RING_SIZE; i++)
+               info->shadow[i].req.id = i+1;
+       info->shadow_free = info->ring.req_prod_pvt;
+       info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+       /* Stage 3: Find pending requests and requeue them. */
+       for (i = 0; i < BLK_RING_SIZE; i++) {
+               /* Not in use? */
+               if (copy[i].request == 0)
+                       continue;
+
+               /* Grab a request slot and unpickle shadow state into it. */
+               req = RING_GET_REQUEST(
+                       &info->ring, info->ring.req_prod_pvt);
+               unpickle_request(req, &copy[i]);
+
+               /* We get a new request id, and must reset the shadow state. */
+               req->id = GET_ID_FROM_FREELIST(info);
+               memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
+
+               /* Rewrite any grant references invalidated by susp/resume. */
+               for (j = 0; j < req->nr_segments; j++) {
+                       if ( req->frame_and_sects[j] & GRANTREF_INVALID )
+                               gnttab_grant_foreign_access_ref(
+                                       blkif_gref_from_fas(
+                                               req->frame_and_sects[j]),
+                                       info->backend_id,
+                                       info->shadow[req->id].frame[j],
+                                       rq_data_dir(
+                                               (struct request *)
+                                               info->shadow[req->id].request));
+                       req->frame_and_sects[j] &= ~GRANTREF_INVALID;
+               }
+               info->shadow[req->id].req = *req;
+
+               info->ring.req_prod_pvt++;
+       }
+
+       kfree(copy);
+
+       recovery = 0;
+
+       /* info->ring->req_prod will be set when we flush_requests().*/
+       wmb();
+
+       /* Kicks things back into life. */
+       flush_requests(info);
+
+       /* Now safe to left other people use the interface. */
+       info->connected = BLKIF_STATE_CONNECTED;
 }
 
 static void blkif_connect(struct blkfront_info *info, u16 evtchn)
 {
-    int err = 0;
-
-    info->evtchn = evtchn;
-
-    err = bind_evtchn_to_irqhandler(
-        info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
-    if ( err != 0 )
-    {
-        WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
-        return;
-    }
+       int err = 0;
+
+       info->evtchn = evtchn;
+
+       err = bind_evtchn_to_irqhandler(
+               info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
+       if (err != 0) {
+               WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
+               return;
+       }
 }
 
 
@@ -1107,6 +505,8 @@
        blkif_sring_t *sring;
        evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
        int err;
+
+       info->ring_ref = GRANT_INVALID_REF;
 
        sring = (void *)__get_free_page(GFP_KERNEL);
        if (!sring) {
@@ -1130,6 +530,7 @@
        err = HYPERVISOR_event_channel_op(&op);
        if (err) {
                gnttab_end_foreign_access(info->ring_ref, 0);
+               info->ring_ref = GRANT_INVALID_REF;
                free_page((unsigned long)info->ring.sring);
                info->ring.sring = 0;
                xenbus_dev_error(dev, err, "allocating event channel");
@@ -1227,9 +628,8 @@
 static int blkfront_probe(struct xenbus_device *dev,
                          const struct xenbus_device_id *id)
 {
-       int err;
+       int err, vdevice, i;
        struct blkfront_info *info;
-       int vdevice;
 
        /* FIXME: Use dynamic device id if this is not set. */
        err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice);
@@ -1250,6 +650,12 @@
        info->connected = BLKIF_STATE_DISCONNECTED;
        info->mi = NULL;
        INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
+
+       info->shadow_free = 0;
+       memset(info->shadow, 0, sizeof(info->shadow));
+       for (i = 0; i < BLK_RING_SIZE; i++)
+               info->shadow[i].req.id = i+1;
+       info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
 
        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
@@ -1329,55 +735,57 @@
 
 static int wait_for_blkif(void)
 {
-    int err = 0;
-    int i;
-
-    /*
-     * We should figure out how many and which devices we need to
-     * proceed and only wait for those.  For now, continue once the
-     * first device is around.
-     */
-    for ( i=0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++ )
-    {
-        set_current_state(TASK_INTERRUPTIBLE);
-        schedule_timeout(1);
-    }
-
-    if ( blkif_state != BLKIF_STATE_CONNECTED )
-    {
-        WPRINTK("Timeout connecting to device!\n");
-        err = -ENOSYS;
-    }
-    return err;
+       int err = 0;
+       int i;
+
+       /*
+        * We should figure out how many and which devices we need to
+        * proceed and only wait for those.  For now, continue once the
+        * first device is around.
+        */
+       for (i = 0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(1);
+       }
+
+       if (blkif_state != BLKIF_STATE_CONNECTED) {
+               WPRINTK("Timeout connecting to device!\n");
+               err = -ENOSYS;
+       }
+       return err;
 }
 
 static int __init xlblk_init(void)
 {
-    int i;
-
-    if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
-         (xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
-        return 0;
-
-    IPRINTK("Initialising virtual block device driver\n");
-
-    blk_shadow_free = 0;
-    memset(blk_shadow, 0, sizeof(blk_shadow));
-    for ( i = 0; i < BLK_RING_SIZE; i++ )
-        blk_shadow[i].req.id = i+1;
-    blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
-
-    init_blk_xenbus();
-
-    wait_for_blkif();
-
-    return 0;
-}
+       if ((xen_start_info->flags & SIF_INITDOMAIN) ||
+           (xen_start_info->flags & SIF_BLK_BE_DOMAIN) )
+               return 0;
+
+       IPRINTK("Initialising virtual block device driver\n");
+
+       init_blk_xenbus();
+
+       wait_for_blkif();
+
+       return 0;
+}
+
+module_init(xlblk_init);
 
 static void blkif_completion(struct blk_shadow *s)
 {
-    int i;
-    for ( i = 0; i < s->req.nr_segments; i++ )
-        gnttab_free_grant_reference(
-               blkif_gref_from_fas(s->req.frame_and_sects[i]));
-}
+       int i;
+       for (i = 0; i < s->req.nr_segments; i++)
+               gnttab_end_foreign_access(
+                       blkif_gref_from_fas(s->req.frame_and_sects[i]), 0);
+}
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Fri Sep  9 16:30:54 2005
@@ -96,6 +96,14 @@
        struct xlbd_type_info *type;
 };
 
+struct blk_shadow {
+       blkif_request_t req;
+       unsigned long request;
+       unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+
 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
  * hang in private_data off the gendisk structure. We may end up
@@ -116,11 +124,11 @@
        blkif_front_ring_t ring;
        unsigned int evtchn;
        struct xlbd_major_info *mi;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
        request_queue_t *rq;
-#endif
        struct work_struct work;
        struct gnttab_free_callback callback;
+       struct blk_shadow shadow[BLK_RING_SIZE];
+       unsigned long shadow_free;
 };
 
 extern spinlock_t blkif_io_lock;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/blktap/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile  Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile  Fri Sep  9 16:30:54 2005
@@ -1,3 +1,3 @@
 
-obj-y  := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o 
+obj-y  := xenbus.o interface.o blktap.o 
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Fri Sep  9 16:30:54 2005
@@ -1,90 +1,916 @@
 /******************************************************************************
- * blktap.c
+ * arch/xen/drivers/blkif/blktap/blktap.c
  * 
- * XenLinux virtual block-device tap.
+ * This is a modified version of the block backend driver that remaps requests
+ * to a user-space memory region.  It is intended to be used to write 
+ * application-level servers that provide block interfaces to client VMs.
  * 
- * Copyright (c) 2004, Andrew Warfield
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <asm-xen/balloon.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/gfp.h>
+#include <linux/poll.h>
+#include <asm/tlbflush.h>
+#include "common.h"
+
+/* Only one process may open /dev/xen/blktap at any time. */
+static unsigned long blktap_dev_inuse;
+unsigned long blktap_ring_ok; /* make this ring->state */
+
+/* Rings up to user space. */
+static blkif_front_ring_t blktap_ufe_ring;
+
+/* for poll: */
+static wait_queue_head_t blktap_wait;
+
+/* current switching mode */
+static unsigned long blktap_mode;
+
+/* local prototypes */
+static int blktap_read_ufe_ring(void);
+
+
+/* /dev/xen/blktap resides at device number major=10, minor=200        */ 
+#define BLKTAP_MINOR 202
+
+/* blktap IOCTLs:                                                      */
+#define BLKTAP_IOCTL_KICK_FE         1
+#define BLKTAP_IOCTL_KICK_BE         2 /* currently unused */
+#define BLKTAP_IOCTL_SETMODE         3
+#define BLKTAP_IOCTL_PRINT_IDXS      100  
+
+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
+#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
+#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
+#define BLKTAP_MODE_INTERCEPT_BE     0x00000002  /* unimp. */
+#define BLKTAP_MODE_COPY_FE          0x00000004  /* unimp. */
+#define BLKTAP_MODE_COPY_BE          0x00000008  /* unimp. */
+#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010  /* unimp. */
+#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020  /* unimp. */
+
+#define BLKTAP_MODE_INTERPOSE \
+           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
+
+#define BLKTAP_MODE_COPY_BOTH \
+           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
+
+#define BLKTAP_MODE_COPY_BOTH_PAGES \
+           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
+
+static inline int BLKTAP_MODE_VALID(unsigned long arg)
+{
+    return (
+        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
+        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+        ( arg == BLKTAP_MODE_INTERPOSE    ) );
+/*
+    return (
+        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
+        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
+        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
+        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
+        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
+        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
+        );
+*/
+}
+
+
+/******************************************************************
+ * MMAP REGION
+ */
+
+/*
+ * We use a big chunk of address space to map in-flight requests into,
+ * and export this region up to user-space.  See the comments in blkback
+ * about this -- the two must be kept in sync if the tap is used as a 
+ * passthrough.
+ */
+
+#define MAX_PENDING_REQS 64
+#define BATCH_PER_DOMAIN 16
+
+/* immediately before the mmap area, we have a bunch of pages reserved
+ * for shared memory rings.
+ */
+#define RING_PAGES 1 /* Front */ 
+
+/* Where things are inside the device mapping. */
+struct vm_area_struct *blktap_vma = NULL;
+unsigned long mmap_vstart;  /* Kernel pages for mapping in data. */
+unsigned long rings_vstart; /* start of mmaped vma               */
+unsigned long user_vstart;  /* start of user mappings            */
+
+#define MMAP_PAGES                                              \
+    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+#define MMAP_VADDR(_start, _req,_seg)                           \
+    (_start +                                                   \
+     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
+     ((_seg) * PAGE_SIZE))
+
+
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a 
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements 
+ * the pendcnt towards zero. When it hits zero, the specified domain has a 
+ * response queued for it, with the saved 'id' passed back.
+ */
+typedef struct {
+    blkif_t       *blkif;
+    unsigned long  id;
+    int            nr_pages;
+    atomic_t       pendcnt;
+    unsigned short operation;
+    int            status;
+} pending_req_t;
+
+/*
+ * We can't allocate pending_req's in order, since they may complete out of 
+ * order. We therefore maintain an allocation ring. This ring also indicates 
+ * when enough work has been passed down -- at that point the allocation ring 
+ * will be empty.
+ */
+static pending_req_t pending_reqs[MAX_PENDING_REQS];
+static unsigned char pending_ring[MAX_PENDING_REQS];
+static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
+/* NB. We use a different index type to differentiate from shared blk rings. */
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+/* Requests passing through the tap to the backend hijack the id field
+ * in the request message.  In it we put the AR index _AND_ the fe domid.
+ * the domid is used by the backend to map the pages properly.
+ */
+
+static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
+{
+    return ( (fe_dom << 16) | MASK_PEND_IDX(idx) );
+}
+
+extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id) 
+{ 
+    return (PEND_RING_IDX)( id & 0x0000ffff );
+}
+
+extern inline domid_t ID_TO_DOM(unsigned long id) 
+{ 
+    return (domid_t)(id >> 16); 
+}
+
+
+
+/******************************************************************
+ * GRANT HANDLES
+ */
+
+/* When using grant tables to map a frame for device access then the
+ * handle returned must be used to unmap the frame. This is needed to
+ * drop the ref count on the frame.
+ */
+struct grant_handle_pair
+{
+    u16  kernel;
+    u16  user;
+};
+static struct grant_handle_pair pending_grant_handles[MMAP_PAGES];
+#define pending_handle(_idx, _i) \
+    (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
+#define BLKTAP_INVALID_HANDLE(_g) \
+    (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))
+#define BLKTAP_INVALIDATE_HANDLE(_g) do {       \
+    (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
+    } while(0)
+
+
+/******************************************************************
+ * BLKTAP VM OPS
+ */
+
+static struct page *blktap_nopage(struct vm_area_struct *vma,
+                                             unsigned long address,
+                                             int *type)
+{
+    /*
+     * if the page has not been mapped in by the driver then generate
+     * a SIGBUS to the domain.
+     */
+
+    force_sig(SIGBUS, current);
+
+    return 0;
+}
+
+struct vm_operations_struct blktap_vm_ops = {
+    nopage:   blktap_nopage,
+};
+
+/******************************************************************
+ * BLKTAP FILE OPS
+ */
+
+static int blktap_open(struct inode *inode, struct file *filp)
+{
+    blkif_sring_t *sring;
+    
+    if ( test_and_set_bit(0, &blktap_dev_inuse) )
+        return -EBUSY;
+    
+    /* Allocate the fe ring. */
+    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+    if (sring == NULL)
+        goto fail_nomem;
+
+    SetPageReserved(virt_to_page(sring));
+    
+    SHARED_RING_INIT(sring);
+    FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
+
+    return 0;
+
+ fail_nomem:
+    return -ENOMEM;
+}
+
+static int blktap_release(struct inode *inode, struct file *filp)
+{
+    blktap_dev_inuse = 0;
+    blktap_ring_ok = 0;
+
+    /* Free the ring page. */
+    ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
+    free_page((unsigned long) blktap_ufe_ring.sring);
+
+    /* Clear any active mappings and free foreign map table */
+    if (blktap_vma != NULL) {
+        zap_page_range(blktap_vma, blktap_vma->vm_start, 
+                       blktap_vma->vm_end - blktap_vma->vm_start, NULL);
+        blktap_vma = NULL;
+    }
+
+    return 0;
+}
+
+
+/* Note on mmap:
+ * We need to map pages to user space in a way that will allow the block
+ * subsystem set up direct IO to them.  This couldn't be done before, because
+ * there isn't really a sane way to translate a user virtual address down to a 
+ * physical address when the page belongs to another domain.
  *
- * Based on the original split block driver:
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
- * Copyright (c) 2004, Christian Limpach
- * 
- * Note that unlike the split block driver code, this driver has been developed
- * strictly for Linux 2.6
- */
-
-#include "blktap.h"
-
-int __init xlblktap_init(void)
-{
-    ctrl_msg_t               cmsg;
-    blkif_fe_driver_status_t fe_st;
-    blkif_be_driver_status_t be_st;
-
-    printk(KERN_INFO "Initialising Xen block tap device\n");
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    printk(KERN_INFO "Block tap is using grant tables.\n");
-#endif
-
-    DPRINTK("   tap - Backend connection init:\n");
-
-
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_BLKIF_FE;
-    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
-    cmsg.length    = sizeof(blkif_fe_driver_status_t);
-    fe_st.status   = BLKIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-
-    DPRINTK("   tap - Frontend connection init:\n");
+ * My first approach was to map the page in to kernel memory, add an entry
+ * for it in the physical frame list (using alloc_lomem_region as in blkback)
+ * and then attempt to map that page up to user space.  This is disallowed
+ * by xen though, which realizes that we don't really own the machine frame
+ * underlying the physical page.
+ *
+ * The new approach is to provide explicit support for this in xen linux.
+ * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
+ * mapped from other vms.  vma->vm_private_data is set up as a mapping 
+ * from pages to actual page structs.  There is a new clause in get_user_pages
+ * that does the right thing for this sort of mapping.
+ */
+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+    int size;
+    struct page **map;
+    int i;
+
+    DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+           vma->vm_start, vma->vm_end);
+
+    vma->vm_flags |= VM_RESERVED;
+    vma->vm_ops = &blktap_vm_ops;
+
+    size = vma->vm_end - vma->vm_start;
+    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
+        printk(KERN_INFO 
+               "blktap: you _must_ map exactly %d pages!\n",
+               MMAP_PAGES + RING_PAGES);
+        return -EAGAIN;
+    }
+
+    size >>= PAGE_SHIFT;
+    DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
     
-    active_reqs_init();
+    rings_vstart = vma->vm_start;
+    user_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+    
+    /* Map the ring pages to the start of the region and reserve it. */
+
+    /* not sure if I really need to do this... */
+    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+    if (remap_pfn_range(vma, vma->vm_start, 
+                         __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 
+                         PAGE_SIZE, vma->vm_page_prot)) 
+    {
+        WPRINTK("Mapping user ring failed!\n");
+        goto fail;
+    }
+
+    /* Mark this VM as containing foreign pages, and set up mappings. */
+    map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
+                  * sizeof(struct page_struct*),
+                  GFP_KERNEL);
+    if (map == NULL) 
+    {
+        WPRINTK("Couldn't alloc VM_FOREIGH map.\n");
+        goto fail;
+    }
+
+    for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
+        map[i] = NULL;
+    
+    vma->vm_private_data = map;
+    vma->vm_flags |= VM_FOREIGN;
+
+    blktap_vma = vma;
+    blktap_ring_ok = 1;
+
+    return 0;
+ fail:
+    /* Clear any active mappings. */
+    zap_page_range(vma, vma->vm_start, 
+                   vma->vm_end - vma->vm_start, NULL);
+
+    return -ENOMEM;
+}
+
+static int blktap_ioctl(struct inode *inode, struct file *filp,
+                        unsigned int cmd, unsigned long arg)
+{
+    switch(cmd) {
+    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
+        return blktap_read_ufe_ring();
+
+    case BLKTAP_IOCTL_SETMODE:
+        if (BLKTAP_MODE_VALID(arg)) {
+            blktap_mode = arg;
+            /* XXX: may need to flush rings here. */
+            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
+            return 0;
+        }
+    case BLKTAP_IOCTL_PRINT_IDXS:
+        {
+            //print_fe_ring_idxs();
+            WPRINTK("User Rings: \n-----------\n");
+            WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
+                            "| req_prod: %2d, rsp_prod: %2d\n",
+                            blktap_ufe_ring.rsp_cons,
+                            blktap_ufe_ring.req_prod_pvt,
+                            blktap_ufe_ring.sring->req_prod,
+                            blktap_ufe_ring.sring->rsp_prod);
+            
+        }
+    }
+    return -ENOIOCTLCMD;
+}
+
+static unsigned int blktap_poll(struct file *file, poll_table *wait)
+{
+        poll_wait(file, &blktap_wait, wait);
+        if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) ) 
+        {
+            flush_tlb_all();
+
+            RING_PUSH_REQUESTS(&blktap_ufe_ring);
+            return POLLIN | POLLRDNORM;
+        }
+
+        return 0;
+}
+
+void blktap_kick_user(void)
+{
+    /* blktap_ring->req_prod = blktap_req_prod; */
+    wake_up_interruptible(&blktap_wait);
+}
+
+static struct file_operations blktap_fops = {
+    owner:    THIS_MODULE,
+    poll:     blktap_poll,
+    ioctl:    blktap_ioctl,
+    open:     blktap_open,
+    release:  blktap_release,
+    mmap:     blktap_mmap,
+};
+
+
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do);
+static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
+static void make_response(blkif_t *blkif, unsigned long id, 
+                          unsigned short op, int st);
+
+
+static void fast_flush_area(int idx, int nr_pages)
+{
+    struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+    unsigned int i, op = 0;
+    struct grant_handle_pair *handle;
+    unsigned long ptep;
+
+    for (i=0; i<nr_pages; i++)
+    {
+        handle = &pending_handle(idx, i);
+        if (!BLKTAP_INVALID_HANDLE(handle))
+        {
+
+            unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i);
+            unmap[op].dev_bus_addr = 0;
+            unmap[op].handle = handle->kernel;
+            op++;
+
+            if (create_lookup_pte_addr(blktap_vma->vm_mm,
+                                       MMAP_VADDR(user_vstart, idx, i), 
+                                       &ptep) !=0) {
+                DPRINTK("Couldn't get a pte addr!\n");
+                return;
+            }
+            unmap[op].host_addr    = ptep;
+            unmap[op].dev_bus_addr = 0;
+            unmap[op].handle       = handle->user;
+            op++;
+            
+           BLKTAP_INVALIDATE_HANDLE(handle);
+        }
+    }
+    if ( unlikely(HYPERVISOR_grant_table_op(
+        GNTTABOP_unmap_grant_ref, unmap, op)))
+        BUG();
+
+    if (blktap_vma != NULL)
+        zap_page_range(blktap_vma, 
+                       MMAP_VADDR(user_vstart, idx, 0), 
+                       nr_pages << PAGE_SHIFT, NULL);
+}
+
+/******************************************************************
+ * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
+ */
+
+static struct list_head blkio_schedule_list;
+static spinlock_t blkio_schedule_list_lock;
+
+static int __on_blkdev_list(blkif_t *blkif)
+{
+    return blkif->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( !__on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+    if ( __on_blkdev_list(blkif) )
+    {
+        list_del(&blkif->blkdev_list);
+        blkif->blkdev_list.next = NULL;
+        blkif_put(blkif);
+    }
+    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( __on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
+    {
+        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+        blkif_get(blkif);
+    }
+    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
+
+static int blkio_schedule(void *arg)
+{
+    DECLARE_WAITQUEUE(wq, current);
+
+    blkif_t          *blkif;
+    struct list_head *ent;
+
+    daemonize("xenblkd");
+
+    for ( ; ; )
+    {
+        /* Wait for work to do. */
+        add_wait_queue(&blkio_schedule_wait, &wq);
+        set_current_state(TASK_INTERRUPTIBLE);
+        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
+             list_empty(&blkio_schedule_list) )
+            schedule();
+        __set_current_state(TASK_RUNNING);
+        remove_wait_queue(&blkio_schedule_wait, &wq);
+
+        /* Queue up a batch of requests. */
+        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
+                !list_empty(&blkio_schedule_list) )
+        {
+            ent = blkio_schedule_list.next;
+            blkif = list_entry(ent, blkif_t, blkdev_list);
+            blkif_get(blkif);
+            remove_from_blkdev_list(blkif);
+            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+                add_to_blkdev_list_tail(blkif);
+            blkif_put(blkif);
+        }
+    }
+}
+
+static void maybe_trigger_blkio_schedule(void)
+{
+    /*
+     * Needed so that two processes, who together make the following predicate
+     * true, don't both read stale values and evaluate the predicate
+     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+     */
+    smp_mb();
+
+    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+         !list_empty(&blkio_schedule_list) )
+        wake_up(&blkio_schedule_wait);
+}
+
+
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
+ */
+
+
+static int blktap_read_ufe_ring(void)
+{
+    /* This is called to read responses from the UFE ring. */
+
+    RING_IDX i, j, rp;
+    blkif_response_t *resp;
+    blkif_t *blkif;
+    int pending_idx;
+    pending_req_t *pending_req;
+    unsigned long     flags;
+
+    /* if we are forwarding from UFERring to FERing */
+    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
+
+        /* for each outstanding message on the UFEring  */
+        rp = blktap_ufe_ring.sring->rsp_prod;
+        rmb();
+        
+        for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
+        {
+            resp = RING_GET_RESPONSE(&blktap_ufe_ring, i);
+            pending_idx = MASK_PEND_IDX(ID_TO_IDX(resp->id));
+            pending_req = &pending_reqs[pending_idx];
+            
+            blkif = pending_req->blkif;
+            for (j = 0; j < pending_req->nr_pages; j++) {
+                unsigned long vaddr;
+                struct page **map = blktap_vma->vm_private_data;
+                int offset; 
+
+                vaddr  = MMAP_VADDR(user_vstart, pending_idx, j);
+                offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
+
+                //ClearPageReserved(virt_to_page(vaddr));
+                ClearPageReserved((struct page *)map[offset]);
+                map[offset] = NULL;
+            }
+
+            fast_flush_area(pending_idx, pending_req->nr_pages);
+            make_response(blkif, pending_req->id, resp->operation, 
+                          resp->status);
+            blkif_put(pending_req->blkif);
+            spin_lock_irqsave(&pend_prod_lock, flags);
+            pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+            spin_unlock_irqrestore(&pend_prod_lock, flags);
+        }
+        blktap_ufe_ring.rsp_cons = i;
+        maybe_trigger_blkio_schedule();
+    }
+    return 0;
+}
+
+
+/******************************************************************************
+ * NOTIFICATION FROM GUEST OS.
+ */
+
+irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+    blkif_t *blkif = dev_id;
+    add_to_blkdev_list_tail(blkif);
+    maybe_trigger_blkio_schedule();
+    return IRQ_HANDLED;
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
+ */
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do)
+{
+    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+    blkif_request_t *req;
+    RING_IDX i, rp;
+    int more_to_do = 0;
+    
+    rp = blk_ring->sring->req_prod;
+    rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+    for ( i = blk_ring->req_cons; 
+         (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
+          i++ )
+    {
+        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
+        {
+            more_to_do = 1;
+            break;
+        }
+        
+        req = RING_GET_REQUEST(blk_ring, i);
+        switch ( req->operation )
+        {
+        case BLKIF_OP_READ:
+        case BLKIF_OP_WRITE:
+            dispatch_rw_block_io(blkif, req);
+            break;
+
+        default:
+            DPRINTK("error: unknown block io operation [%d]\n",
+                    req->operation);
+            make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+            break;
+        }
+    }
+
+    blk_ring->req_cons = i;
+    blktap_kick_user();
+
+    return more_to_do;
+}
+
+static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
+{
+    blkif_request_t *target;
+    int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+    pending_req_t *pending_req;
+    struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+    int op, ret;
+    unsigned int nseg;
+
+    /* Check that number of segments is sane. */
+    nseg = req->nr_segments;
+    if ( unlikely(nseg == 0) || 
+         unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
+    {
+        DPRINTK("Bad number of segments in request (%d)\n", nseg);
+        goto bad_descriptor;
+    }
+
+    /* Make sure userspace is ready. */
+    if (!blktap_ring_ok) {
+        DPRINTK("blktap: ring not ready for requests!\n");
+        goto bad_descriptor;
+    }
+    
+
+    if ( RING_FULL(&blktap_ufe_ring) ) {
+        WPRINTK("blktap: fe_ring is full, can't add (very broken!).\n");
+        goto bad_descriptor;
+    }
+
+    flush_cache_all(); /* a noop on intel... */
+
+    /* Map the foreign pages directly in to the application */    
+    op = 0;
+    for (i=0; i<req->nr_segments; i++) {
+
+        unsigned long uvaddr;
+        unsigned long kvaddr;
+        unsigned long ptep;
+
+        uvaddr = MMAP_VADDR(user_vstart, pending_idx, i);
+        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
+
+        /* Map the remote page to kernel. */
+        map[op].host_addr = kvaddr;
+        map[op].dom   = blkif->domid;
+        map[op].ref   = blkif_gref_from_fas(req->frame_and_sects[i]);
+        map[op].flags = GNTMAP_host_map;
+        /* This needs a bit more thought in terms of interposition: 
+         * If we want to be able to modify pages during write using 
+         * grant table mappings, the guest will either need to allow 
+         * it, or we'll need to incur a copy. Bit of an fbufs moment. ;) */
+        if (req->operation == BLKIF_OP_WRITE)
+            map[op].flags |= GNTMAP_readonly;
+        op++;
+
+        /* Now map it to user. */
+        ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
+        if (ret)
+        {
+            DPRINTK("Couldn't get a pte addr!\n");
+            fast_flush_area(pending_idx, req->nr_segments);
+            goto bad_descriptor;
+        }
+
+        map[op].host_addr = ptep;
+        map[op].dom       = blkif->domid;
+        map[op].ref       = blkif_gref_from_fas(req->frame_and_sects[i]);
+        map[op].flags     = GNTMAP_host_map | GNTMAP_application_map
+                            | GNTMAP_contains_pte;
+        /* Above interposition comment applies here as well. */
+        if (req->operation == BLKIF_OP_WRITE)
+            map[op].flags |= GNTMAP_readonly;
+        op++;
+    }
+
+    if ( unlikely(HYPERVISOR_grant_table_op(
+            GNTTABOP_map_grant_ref, map, op)))
+        BUG();
+
+    op = 0;
+    for (i=0; i<(req->nr_segments*2); i+=2) {
+        unsigned long uvaddr;
+        unsigned long kvaddr;
+        unsigned long offset;
+        int cancel = 0;
+
+        uvaddr = MMAP_VADDR(user_vstart, pending_idx, i/2);
+        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i/2);
+
+        if ( unlikely(map[i].handle < 0) ) 
+        {
+            DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle);
+            ret = map[i].handle;
+            cancel = 1;
+        }
+
+        if ( unlikely(map[i+1].handle < 0) ) 
+        {
+            DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle);
+            ret = map[i+1].handle;
+            cancel = 1;
+        }
+
+        if (cancel) 
+        {
+            fast_flush_area(pending_idx, req->nr_segments);
+            goto bad_descriptor;
+        }
+
+        /* Set the necessary mappings in p2m and in the VM_FOREIGN 
+         * vm_area_struct to allow user vaddr -> struct page lookups
+         * to work.  This is needed for direct IO to foreign pages. */
+        phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] =
+            FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
+
+        offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
+        ((struct page **)blktap_vma->vm_private_data)[offset] =
+            pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+
+        /* Save handles for unmapping later. */
+        pending_handle(pending_idx, i/2).kernel = map[i].handle;
+        pending_handle(pending_idx, i/2).user   = map[i+1].handle;
+    }
+
+    /* Mark mapped pages as reserved: */
+    for ( i = 0; i < req->nr_segments; i++ )
+    {
+        unsigned long kvaddr;
+
+        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
+        SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT));
+    }
+
+    pending_req = &pending_reqs[pending_idx];
+    pending_req->blkif     = blkif;
+    pending_req->id        = req->id;
+    pending_req->operation = req->operation;
+    pending_req->status    = BLKIF_RSP_OKAY;
+    pending_req->nr_pages  = nseg;
+    req->id = MAKE_ID(blkif->domid, pending_idx);
+    //atomic_set(&pending_req->pendcnt, nbio);
+    pending_cons++;
+    blkif_get(blkif);
+
+    /* Finally, write the request message to the user ring. */
+    target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt);
+    memcpy(target, req, sizeof(*req));
+    blktap_ufe_ring.req_prod_pvt++;
+    return;
+
+ bad_descriptor:
+    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+} 
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+
+static void make_response(blkif_t *blkif, unsigned long id, 
+                          unsigned short op, int st)
+{
+    blkif_response_t *resp;
+    unsigned long     flags;
+    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+
+    /* Place on the response ring for the relevant domain. */ 
+    spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+    resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
+    resp->id        = id;
+    resp->operation = op;
+    resp->status    = st;
+    wmb(); /* Ensure other side can see the response fields. */
+    blk_ring->rsp_prod_pvt++;
+    RING_PUSH_RESPONSES(blk_ring);
+    spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+
+    /* Kick the relevant domain. */
+    notify_via_evtchn(blkif->evtchn);
+}
+
+static struct miscdevice blktap_miscdev = {
+    .minor        = BLKTAP_MINOR,
+    .name         = "blktap",
+    .fops         = &blktap_fops,
+    .devfs_name   = "misc/blktap",
+};
+
+void blkif_deschedule(blkif_t *blkif)
+{
+    remove_from_blkdev_list(blkif);
+}
+
+static int __init blkif_init(void)
+{
+    int i, j, err;
+    struct page *page;
+/*
+    if ( !(xen_start_info->flags & SIF_INITDOMAIN) &&
+         !(xen_start_info->flags & SIF_BLK_BE_DOMAIN) )
+        return 0;
+*/
     blkif_interface_init();
-    blkdev_schedule_init();
+
+    page = balloon_alloc_empty_page_range(MMAP_PAGES);
+    BUG_ON(page == NULL);
+    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+    pending_cons = 0;
+    pending_prod = MAX_PENDING_REQS;
+    memset(pending_reqs, 0, sizeof(pending_reqs));
+    for ( i = 0; i < MAX_PENDING_REQS; i++ )
+        pending_ring[i] = i;
     
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_BLKIF_BE;
-    cmsg.subtype   = CMSG_BLKIF_BE_DRIVER_STATUS;
-    cmsg.length    = sizeof(blkif_be_driver_status_t);
-    be_st.status   = BLKIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &be_st, sizeof(be_st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-
-    DPRINTK("   tap - Userland channel init:\n");
-
-    blktap_init();
-
-    DPRINTK("Blkif tap device initialized.\n");
+    spin_lock_init(&blkio_schedule_list_lock);
+    INIT_LIST_HEAD(&blkio_schedule_list);
+
+    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+        BUG();
+
+    blkif_xenbus_init();
+
+    for (i=0; i<MAX_PENDING_REQS ; i++)
+        for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
+            BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j));
+
+    err = misc_register(&blktap_miscdev);
+    if ( err != 0 )
+    {
+        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
+        return err;
+    }
+
+    init_waitqueue_head(&blktap_wait);
 
     return 0;
 }
 
-#if 0 /* tap doesn't handle suspend/resume */
-void blkdev_suspend(void)
-{
-}
-
-void blkdev_resume(void)
-{
-    ctrl_msg_t               cmsg;
-    blkif_fe_driver_status_t st;    
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_BLKIF_FE;
-    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
-    cmsg.length    = sizeof(blkif_fe_driver_status_t);
-    st.status      = BLKIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &st, sizeof(st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-#endif
-
-__initcall(xlblktap_init);
+__initcall(blkif_init);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h  Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h  Fri Sep  9 16:30:54 2005
@@ -15,7 +15,6 @@
 #include <linux/config.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
-#include <asm-xen/ctrl_if.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
 #include <asm/io.h>
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/console/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/console/Makefile Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/console/Makefile Fri Sep  9 16:30:54 2005
@@ -1,2 +1,2 @@
 
-obj-y  := console.o
+obj-y  := console.o xencons_ring.o
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c        Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c        Fri Sep  9 
16:30:54 2005
@@ -45,14 +45,15 @@
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/bootmem.h>
+#include <linux/sysrq.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 #include <asm-xen/xen-public/event_channel.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/evtchn.h>
-#include <asm-xen/ctrl_if.h>
-
+
+#include "xencons_ring.h"
 /*
  * Modes:
  *  'xencons=off'  [XC_OFF]:     Console is disabled.
@@ -66,6 +67,11 @@
 static enum { XC_OFF, XC_DEFAULT, XC_TTY, XC_SERIAL } xc_mode = XC_DEFAULT;
 static int xc_num = -1;
 
+#ifdef CONFIG_MAGIC_SYSRQ
+static unsigned long sysrq_requested;
+extern int sysrq_enabled;
+#endif
+
 static int __init xencons_setup(char *str)
 {
     char *q;
@@ -118,13 +124,6 @@
 /* Common transmit-kick routine. */
 static void __xencons_tx_flush(void);
 
-/* This task is used to defer sending console data until there is space. */
-static void xencons_tx_flush_task_routine(void *data);
-
-static DECLARE_TQUEUE(xencons_tx_flush_task, 
-                      xencons_tx_flush_task_routine,
-                      NULL);
-
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 static struct tty_driver *xencons_driver;
 #else
@@ -196,7 +195,7 @@
 void xen_console_init(void)
 #endif
 {
-    if ( xen_start_info.flags & SIF_INITDOMAIN )
+    if ( xen_start_info->flags & SIF_INITDOMAIN )
     {
         if ( xc_mode == XC_DEFAULT )
             xc_mode = XC_SERIAL;
@@ -264,39 +263,22 @@
 /*** Forcibly flush console data before dying. ***/
 void xencons_force_flush(void)
 {
-    ctrl_msg_t msg;
     int        sz;
 
     /* Emergency console is synchronous, so there's nothing to flush. */
-    if ( xen_start_info.flags & SIF_INITDOMAIN )
-        return;
-
-    /*
-     * We use dangerous control-interface functions that require a quiescent
-     * system and no interrupts. Try to ensure this with a global cli().
-     */
-    local_irq_disable(); /* XXXsmp */
+    if ( xen_start_info->flags & SIF_INITDOMAIN )
+        return;
+
 
     /* Spin until console data is flushed through to the domain controller. */
-    while ( (wc != wp) && !ctrl_if_transmitter_empty() )
-    {
-        /* Interrupts are disabled -- we must manually reap responses. */
-        ctrl_if_discard_responses();
-
+    while ( (wc != wp) )
+    {
+       int sent = 0;
         if ( (sz = wp - wc) == 0 )
             continue;
-        if ( sz > sizeof(msg.msg) )
-            sz = sizeof(msg.msg);
-        if ( sz > (wbuf_size - WBUF_MASK(wc)) )
-            sz = wbuf_size - WBUF_MASK(wc);
-
-        msg.type    = CMSG_CONSOLE;
-        msg.subtype = CMSG_CONSOLE_DATA;
-        msg.length  = sz;
-        memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz);
-            
-        if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 )
-            wc += sz;
+       sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+       if (sent > 0)
+           wc += sent;
     }
 }
 
@@ -320,7 +302,7 @@
 static char x_char;
 
 /* Non-privileged receive callback. */
-static void xencons_rx(ctrl_msg_t *msg, unsigned long id)
+static void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
 {
     int           i;
     unsigned long flags;
@@ -328,23 +310,39 @@
     spin_lock_irqsave(&xencons_lock, flags);
     if ( xencons_tty != NULL )
     {
-        for ( i = 0; i < msg->length; i++ )
-            tty_insert_flip_char(xencons_tty, msg->msg[i], 0);
+        for ( i = 0; i < len; i++ ) {
+#ifdef CONFIG_MAGIC_SYSRQ
+            if (sysrq_enabled) {
+                if (buf[i] == '\x0f') { /* ^O */
+                    sysrq_requested = jiffies;
+                    continue; /* don't print the sysrq key */
+                } else if (sysrq_requested) {
+                    unsigned long sysrq_timeout = sysrq_requested + HZ*2;
+                    sysrq_requested = 0;
+                    /* if it's been less than a timeout, do the sysrq */
+                    if (time_before(jiffies, sysrq_timeout)) {
+                        spin_unlock_irqrestore(&xencons_lock, flags);
+                        handle_sysrq(buf[i], regs, xencons_tty);
+                        spin_lock_irqsave(&xencons_lock, flags);
+                        continue;
+                    }
+                }
+            }
+#endif
+            tty_insert_flip_char(xencons_tty, buf[i], 0);
+        }
         tty_flip_buffer_push(xencons_tty);
     }
     spin_unlock_irqrestore(&xencons_lock, flags);
 
-    msg->length = 0;
-    ctrl_if_send_response(msg);
 }
 
 /* Privileged and non-privileged transmit worker. */
 static void __xencons_tx_flush(void)
 {
     int        sz, work_done = 0;
-    ctrl_msg_t msg;
-
-    if ( xen_start_info.flags & SIF_INITDOMAIN )
+
+    if ( xen_start_info->flags & SIF_INITDOMAIN )
     {
         if ( x_char )
         {
@@ -367,38 +365,23 @@
     {
         while ( x_char )
         {
-            msg.type    = CMSG_CONSOLE;
-            msg.subtype = CMSG_CONSOLE_DATA;
-            msg.length  = 1;
-            msg.msg[0]  = x_char;
-
-            if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 )
-                x_char = 0;
-            else if ( ctrl_if_enqueue_space_callback(&xencons_tx_flush_task) )
-                break;
-
-            work_done = 1;
+           if (xencons_ring_send(&x_char, 1) == 1) {
+               x_char = 0;
+               work_done = 1;
+           }
         }
 
         while ( wc != wp )
         {
+           int sent;
             sz = wp - wc;
-            if ( sz > sizeof(msg.msg) )
-                sz = sizeof(msg.msg);
-            if ( sz > (wbuf_size - WBUF_MASK(wc)) )
-                sz = wbuf_size - WBUF_MASK(wc);
-
-            msg.type    = CMSG_CONSOLE;
-            msg.subtype = CMSG_CONSOLE_DATA;
-            msg.length  = sz;
-            memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz);
-            
-            if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 )
-                wc += sz;
-            else if ( ctrl_if_enqueue_space_callback(&xencons_tx_flush_task) )
-                break;
-
-            work_done = 1;
+           if ( sz > (wbuf_size - WBUF_MASK(wc)) )
+               sz = wbuf_size - WBUF_MASK(wc);
+           sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+           if ( sent > 0 ) {
+               wc += sent;
+               work_done = 1;
+           }
         }
     }
 
@@ -409,15 +392,6 @@
              (xencons_tty->ldisc.write_wakeup != NULL) )
             (xencons_tty->ldisc.write_wakeup)(xencons_tty);
     }
-}
-
-/* Non-privileged transmit kicker. */
-static void xencons_tx_flush_task_routine(void *data)
-{
-    unsigned long flags;
-    spin_lock_irqsave(&xencons_lock, flags);
-    __xencons_tx_flush();
-    spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
 /* Privileged receive callback and transmit kicker. */
@@ -726,6 +700,8 @@
     if ( xc_mode == XC_OFF )
         return 0;
 
+    xencons_ring_init();
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
     xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ? 
                                       1 : MAX_NR_CONSOLES);
@@ -794,7 +770,7 @@
     tty_register_device(xencons_driver, 0, NULL);
 #endif
 
-    if ( xen_start_info.flags & SIF_INITDOMAIN )
+    if ( xen_start_info->flags & SIF_INITDOMAIN )
     {
         xencons_priv_irq = bind_virq_to_irq(VIRQ_CONSOLE);
         (void)request_irq(xencons_priv_irq,
@@ -802,7 +778,8 @@
     }
     else
     {
-        (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0);
+       
+       xencons_ring_register_receiver(xencons_rx);
     }
 
     printk("Xen virtual console successfully installed as %s%d\n",
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Fri Sep  9 16:30:54 2005
@@ -350,6 +350,8 @@
 
     spin_unlock_irq(&port_user_lock);
 
+    kfree(u);
+
     return 0;
 }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri Sep  9 16:30:54 2005
@@ -14,15 +14,17 @@
 #include <linux/in.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
-#include <asm-xen/ctrl_if.h>
 #include <asm-xen/evtchn.h>
 #include <asm-xen/xen-public/io/netif.h>
 #include <asm/io.h>
 #include <asm/pgalloc.h>
 
-#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#ifdef CONFIG_XEN_NETDEV_GRANT
 #include <asm-xen/xen-public/grant_table.h>
 #include <asm-xen/gnttab.h>
+
+#define GRANT_INVALID_REF (0xFFFF)
+
 #endif
 
 
@@ -37,6 +39,11 @@
 #define ASSERT(_p) ((void)0)
 #define DPRINTK(_f, _a...) ((void)0)
 #endif
+#define IPRINTK(fmt, args...) \
+    printk(KERN_INFO "xen_net: " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+    printk(KERN_WARNING "xen_net: " fmt, ##args)
+
 
 typedef struct netif_st {
     /* Unique identifier for this interface. */
@@ -47,13 +54,13 @@
 
     /* Physical parameters of the comms window. */
     unsigned long    tx_shmem_frame;
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     u16              tx_shmem_handle;
     unsigned long    tx_shmem_vaddr; 
     grant_ref_t      tx_shmem_ref; 
 #endif
     unsigned long    rx_shmem_frame;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     u16              rx_shmem_handle;
     unsigned long    rx_shmem_vaddr; 
     grant_ref_t      rx_shmem_ref; 
@@ -68,7 +75,7 @@
     /* Private indexes into shared ring. */
     NETIF_RING_IDX rx_req_cons;
     NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */
 #endif
     NETIF_RING_IDX tx_req_cons;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/netback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Fri Sep  9 
16:30:54 2005
@@ -111,91 +111,81 @@
     return netif;
 }
 
-static int map_frontend_page(netif_t *netif, unsigned long localaddr,
-                            unsigned long tx_ring_ref, unsigned long 
rx_ring_ref)
-{
-#if !defined(CONFIG_XEN_NETDEV_GRANT_TX)||!defined(CONFIG_XEN_NETDEV_GRANT_RX)
+static int map_frontend_pages(netif_t *netif, unsigned long localaddr,
+                              unsigned long tx_ring_ref, 
+                              unsigned long rx_ring_ref)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT
+    struct gnttab_map_grant_ref op;
+
+    /* Map: Use the Grant table reference */
+    op.host_addr = localaddr;
+    op.flags     = GNTMAP_host_map;
+    op.ref       = tx_ring_ref;
+    op.dom       = netif->domid;
+    
+    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+    if (op.handle < 0) { 
+        DPRINTK(" Grant table operation failure mapping tx_ring_ref!\n");
+        return op.handle;
+    }
+
+    netif->tx_shmem_ref    = tx_ring_ref;
+    netif->tx_shmem_handle = op.handle;
+    netif->tx_shmem_vaddr  = localaddr;
+
+    /* Map: Use the Grant table reference */
+    op.host_addr = localaddr + PAGE_SIZE;
+    op.flags     = GNTMAP_host_map;
+    op.ref       = rx_ring_ref;
+    op.dom       = netif->domid;
+
+    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+    if (op.handle < 0) { 
+        DPRINTK(" Grant table operation failure mapping rx_ring_ref!\n");
+        return op.handle;
+    }
+
+    netif->rx_shmem_ref    = rx_ring_ref;
+    netif->rx_shmem_handle = op.handle;
+    netif->rx_shmem_vaddr  = localaddr + PAGE_SIZE;
+
+#else
     pgprot_t      prot = __pgprot(_KERNPG_TABLE);
     int           err;
-#endif
-#if defined(CONFIG_XEN_NETDEV_GRANT_TX)
-    {
-        struct gnttab_map_grant_ref op;
-
-        /* Map: Use the Grant table reference */
-        op.host_addr = localaddr;
-        op.flags     = GNTMAP_host_map;
-        op.ref       = tx_ring_ref;
-        op.dom       = netif->domid;
-       
-       BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-        if (op.handle < 0) { 
-            DPRINTK(" Grant table operation failure !\n");
-            return op.handle;
-        }
-
-        netif->tx_shmem_ref    = tx_ring_ref;
-        netif->tx_shmem_handle = op.handle;
-        netif->tx_shmem_vaddr  = localaddr;
-    }
-#else 
-    err = direct_remap_area_pages(&init_mm, localaddr,
-                                 tx_ring_ref<<PAGE_SHIFT, PAGE_SIZE,
+
+    err = direct_remap_pfn_range(&init_mm, localaddr,
+                                 tx_ring_ref, PAGE_SIZE,
                                  prot, netif->domid); 
+    
+    err |= direct_remap_pfn_range(&init_mm, localaddr + PAGE_SIZE,
+                                 rx_ring_ref, PAGE_SIZE,
+                                 prot, netif->domid);
+
     if (err)
        return err;
 #endif
 
-#if defined(CONFIG_XEN_NETDEV_GRANT_RX)
-    {
-        struct gnttab_map_grant_ref op;
-
-        /* Map: Use the Grant table reference */
-        op.host_addr = localaddr + PAGE_SIZE;
-        op.flags     = GNTMAP_host_map;
-        op.ref       = rx_ring_ref;
-        op.dom       = netif->domid;
-
-       BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-        if (op.handle < 0) { 
-            DPRINTK(" Grant table operation failure !\n");
-            return op.handle;
-        }
-
-        netif->rx_shmem_ref    = rx_ring_ref;
-        netif->rx_shmem_handle = op.handle;
-        netif->rx_shmem_vaddr  = localaddr + PAGE_SIZE;
-    }
-#else 
-    err = direct_remap_area_pages(&init_mm, localaddr + PAGE_SIZE,
-                                 rx_ring_ref<<PAGE_SHIFT, PAGE_SIZE,
-                                 prot, netif->domid);
-    if (err)
-       return err;
-#endif
-
     return 0;
 }
 
-static void unmap_frontend_page(netif_t *netif)
-{
-#if defined(CONFIG_XEN_NETDEV_GRANT_RX) || defined(CONFIG_XEN_NETDEV_GRANT_TX)
+static void unmap_frontend_pages(netif_t *netif)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT
     struct gnttab_unmap_grant_ref op;
-#endif
-
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+
     op.host_addr    = netif->tx_shmem_vaddr;
     op.handle       = netif->tx_shmem_handle;
     op.dev_bus_addr = 0;
     BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
-#endif
-
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+
     op.host_addr    = netif->rx_shmem_vaddr;
     op.handle       = netif->rx_shmem_handle;
     op.dev_bus_addr = 0;
     BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
 #endif
+
+    return; 
 }
 
 int netif_map(netif_t *netif, unsigned long tx_ring_ref,
@@ -209,8 +199,8 @@
     if (vma == NULL)
         return -ENOMEM;
 
-    err = map_frontend_page(netif, (unsigned long)vma->addr, tx_ring_ref,
-                           rx_ring_ref);
+    err = map_frontend_pages(netif, (unsigned long)vma->addr, tx_ring_ref,
+                             rx_ring_ref);
     if (err) {
         vfree(vma->addr);
        return err;
@@ -222,7 +212,7 @@
     op.u.bind_interdomain.port2 = evtchn;
     err = HYPERVISOR_event_channel_op(&op);
     if (err) {
-       unmap_frontend_page(netif);
+       unmap_frontend_pages(netif);
        vfree(vma->addr);
        return err;
     }
@@ -267,7 +257,7 @@
     unregister_netdev(netif->dev);
 
     if (netif->tx) {
-       unmap_frontend_page(netif);
+       unmap_frontend_pages(netif);
        vfree(netif->tx); /* Frees netif->rx as well. */
     }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/netback/loopback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c       Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c       Fri Sep  9 
16:30:54 2005
@@ -29,136 +29,163 @@
 #include <linux/skbuff.h>
 #include <net/dst.h>
 
+static int nloopbacks = 1;
+module_param(nloopbacks, int, 0);
+MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
+
 struct net_private {
-    struct net_device *loopback_dev;
-    struct net_device_stats stats;
+       struct net_device *loopback_dev;
+       struct net_device_stats stats;
 };
 
 static int loopback_open(struct net_device *dev)
 {
-    struct net_private *np = netdev_priv(dev);
-    memset(&np->stats, 0, sizeof(np->stats));
-    netif_start_queue(dev);
-    return 0;
+       struct net_private *np = netdev_priv(dev);
+       memset(&np->stats, 0, sizeof(np->stats));
+       netif_start_queue(dev);
+       return 0;
 }
 
 static int loopback_close(struct net_device *dev)
 {
-    netif_stop_queue(dev);
-    return 0;
+       netif_stop_queue(dev);
+       return 0;
 }
 
 static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-    struct net_private *np = netdev_priv(dev);
+       struct net_private *np = netdev_priv(dev);
 
-    dst_release(skb->dst);
-    skb->dst = NULL;
+       dst_release(skb->dst);
+       skb->dst = NULL;
 
-    skb_orphan(skb);
+       skb_orphan(skb);
 
-    np->stats.tx_bytes += skb->len;
-    np->stats.tx_packets++;
+       np->stats.tx_bytes += skb->len;
+       np->stats.tx_packets++;
 
-    /* Switch to loopback context. */
-    dev = np->loopback_dev;
-    np  = netdev_priv(dev);
+       /* Switch to loopback context. */
+       dev = np->loopback_dev;
+       np  = netdev_priv(dev);
 
-    np->stats.rx_bytes += skb->len;
-    np->stats.rx_packets++;
+       np->stats.rx_bytes += skb->len;
+       np->stats.rx_packets++;
 
-    if ( skb->ip_summed == CHECKSUM_HW )
-    {
-        /* Defer checksum calculation. */
-        skb->proto_csum_blank = 1;
-        /* Must be a local packet: assert its integrity. */
-        skb->proto_csum_valid = 1;
-    }
+       if (skb->ip_summed == CHECKSUM_HW) {
+               /* Defer checksum calculation. */
+               skb->proto_csum_blank = 1;
+               /* Must be a local packet: assert its integrity. */
+               skb->proto_csum_valid = 1;
+       }
 
-    skb->ip_summed = skb->proto_csum_valid ?
-        CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
+       skb->ip_summed = skb->proto_csum_valid ?
+               CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 
-    skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */
-    skb->protocol = eth_type_trans(skb, dev);
-    skb->dev      = dev;
-    dev->last_rx  = jiffies;
-    netif_rx(skb);
+       skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */
+       skb->protocol = eth_type_trans(skb, dev);
+       skb->dev      = dev;
+       dev->last_rx  = jiffies;
+       netif_rx(skb);
 
-    return 0;
+       return 0;
 }
 
 static struct net_device_stats *loopback_get_stats(struct net_device *dev)
 {
-    struct net_private *np = netdev_priv(dev);
-    return &np->stats;
+       struct net_private *np = netdev_priv(dev);
+       return &np->stats;
 }
 
 static void loopback_construct(struct net_device *dev, struct net_device *lo)
 {
-    struct net_private *np = netdev_priv(dev);
+       struct net_private *np = netdev_priv(dev);
 
-    np->loopback_dev     = lo;
+       np->loopback_dev     = lo;
 
-    dev->open            = loopback_open;
-    dev->stop            = loopback_close;
-    dev->hard_start_xmit = loopback_start_xmit;
-    dev->get_stats       = loopback_get_stats;
+       dev->open            = loopback_open;
+       dev->stop            = loopback_close;
+       dev->hard_start_xmit = loopback_start_xmit;
+       dev->get_stats       = loopback_get_stats;
 
-    dev->tx_queue_len    = 0;
+       dev->tx_queue_len    = 0;
 
-    dev->features        = NETIF_F_HIGHDMA | NETIF_F_LLTX;
+       dev->features        = NETIF_F_HIGHDMA | NETIF_F_LLTX;
 
-    /*
-     * We do not set a jumbo MTU on the interface. Otherwise the network
-     * stack will try to send large packets that will get dropped by the
-     * Ethernet bridge (unless the physical Ethernet interface is configured
-     * to transfer jumbo packets). If a larger MTU is desired then the system
-     * administrator can specify it using the 'ifconfig' command.
-     */
-    /*dev->mtu             = 16*1024;*/
+       /*
+        * We do not set a jumbo MTU on the interface. Otherwise the network
+        * stack will try to send large packets that will get dropped by the
+        * Ethernet bridge (unless the physical Ethernet interface is
+        * configured to transfer jumbo packets). If a larger MTU is desired
+        * then the system administrator can specify it using the 'ifconfig'
+        * command.
+        */
+       /*dev->mtu             = 16*1024;*/
+}
+
+static int __init make_loopback(int i)
+{
+       struct net_device *dev1, *dev2;
+       char dev_name[IFNAMSIZ];
+       int err = -ENOMEM;
+
+       sprintf(dev_name, "vif0.%d", i);
+       dev1 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
+       sprintf(dev_name, "veth%d", i);
+       dev2 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
+       if ((dev1 == NULL) || (dev2 == NULL))
+               goto fail;
+
+       loopback_construct(dev1, dev2);
+       loopback_construct(dev2, dev1);
+
+       dev1->features |= NETIF_F_NO_CSUM;
+       dev2->features |= NETIF_F_IP_CSUM;
+
+       /*
+        * Initialise a dummy MAC address for the 'dummy backend' interface. We
+        * choose the numerically largest non-broadcast address to prevent the
+        * address getting stolen by an Ethernet bridge for STP purposes.
+        */
+       memset(dev1->dev_addr, 0xFF, ETH_ALEN);
+       dev1->dev_addr[0] &= ~0x01;
+
+       if ((err = register_netdev(dev1)) != 0)
+               goto fail;
+
+       if ((err = register_netdev(dev2)) != 0) {
+               unregister_netdev(dev1);
+               goto fail;
+       }
+
+       return 0;
+
+ fail:
+       if (dev1 != NULL)
+               kfree(dev1);
+       if (dev2 != NULL)
+               kfree(dev2);
+       return err;
 }
 
 static int __init loopback_init(void)
 {
-    struct net_device *dev1, *dev2;
-    int err = -ENOMEM;
+       int i, err = 0;
 
-    dev1 = alloc_netdev(sizeof(struct net_private), "vif0.0", ether_setup);
-    dev2 = alloc_netdev(sizeof(struct net_private), "veth0", ether_setup);
-    if ( (dev1 == NULL) || (dev2 == NULL) )
-        goto fail;
+       for (i = 0; i < nloopbacks; i++)
+               if ((err = make_loopback(i)) != 0)
+                       break;
 
-    loopback_construct(dev1, dev2);
-    loopback_construct(dev2, dev1);
-
-    dev1->features |= NETIF_F_NO_CSUM;
-    dev2->features |= NETIF_F_IP_CSUM;
-
-    /*
-     * Initialise a dummy MAC address for the 'dummy backend' interface. We
-     * choose the numerically largest non-broadcast address to prevent the
-     * address getting stolen by an Ethernet bridge for STP purposes.
-     */
-    memset(dev1->dev_addr, 0xFF, ETH_ALEN);
-    dev1->dev_addr[0] &= ~0x01;
-
-    if ( (err = register_netdev(dev1)) != 0 )
-        goto fail;
-
-    if ( (err = register_netdev(dev2)) != 0 )
-    {
-        unregister_netdev(dev1);
-        goto fail;
-    }
-
-    return 0;
-
- fail:
-    if ( dev1 != NULL )
-        kfree(dev1);
-    if ( dev2 != NULL )
-        kfree(dev2);
-    return err;
+       return err;
 }
 
 module_init(loopback_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Fri Sep  9 
16:30:54 2005
@@ -12,24 +12,8 @@
 
 #include "common.h"
 #include <asm-xen/balloon.h>
-
-#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
-#include <asm-xen/xen-public/grant_table.h>
-#include <asm-xen/gnttab.h>
-#ifdef GRANT_DEBUG
-static void
-dump_packet(int tag, u32 addr, unsigned char *p)
-{
-       int i;
-
-       printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr);
-       for (i = 0; i < 20; i++) {
-               printk("%02x", p[i]);
-       }
-       printk("\n");
-}
-#endif
-#endif
+#include <asm-xen/xen-public/memory.h>
+
 
 static void netif_idx_release(u16 pending_idx);
 static void netif_page_release(struct page *page);
@@ -56,7 +40,8 @@
 static struct sk_buff_head rx_queue;
 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+
+#ifdef CONFIG_XEN_NETDEV_GRANT
 static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
 #else
 static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
@@ -87,16 +72,13 @@
 
 static struct sk_buff_head tx_queue;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
 static u16 grant_tx_ref[MAX_PENDING_REQS];
 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+
 #else
 static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
-#endif
-
-#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
-#define GRANT_INVALID_REF (0xFFFF)
 #endif
 
 static struct list_head net_schedule_list;
@@ -110,25 +92,37 @@
 static unsigned long alloc_mfn(void)
 {
     unsigned long mfn = 0, flags;
+    struct xen_memory_reservation reservation = {
+        .extent_start = mfn_list,
+        .nr_extents   = MAX_MFN_ALLOC,
+        .extent_order = 0,
+        .domid        = DOMID_SELF
+    };
     spin_lock_irqsave(&mfn_lock, flags);
     if ( unlikely(alloc_index == 0) )
-        alloc_index = HYPERVISOR_dom_mem_op(
-            MEMOP_increase_reservation, mfn_list, MAX_MFN_ALLOC, 0);
+        alloc_index = HYPERVISOR_memory_op(
+            XENMEM_increase_reservation, &reservation);
     if ( alloc_index != 0 )
         mfn = mfn_list[--alloc_index];
     spin_unlock_irqrestore(&mfn_lock, flags);
     return mfn;
 }
 
-#ifndef CONFIG_XEN_NETDEV_GRANT_RX
+#ifndef CONFIG_XEN_NETDEV_GRANT
 static void free_mfn(unsigned long mfn)
 {
     unsigned long flags;
+    struct xen_memory_reservation reservation = {
+        .extent_start = &mfn,
+        .nr_extents   = 1,
+        .extent_order = 0,
+        .domid        = DOMID_SELF
+    };
     spin_lock_irqsave(&mfn_lock, flags);
     if ( alloc_index != MAX_MFN_ALLOC )
         mfn_list[alloc_index++] = mfn;
-    else if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
-                                    &mfn, 1, 0) != 1 )
+    else if ( HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation)
+              != 1 )
         BUG();
     spin_unlock_irqrestore(&mfn_lock, flags);
 }
@@ -187,7 +181,7 @@
         dev_kfree_skb(skb);
         skb = nskb;
     }
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
 #ifdef DEBUG_GRANT
     printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x 
gr=%04x\n",
            netif->rx->req_prod,
@@ -233,12 +227,12 @@
 
 static void net_rx_action(unsigned long unused)
 {
-    netif_t *netif;
+    netif_t *netif = NULL; 
     s8 status;
     u16 size, id, evtchn;
     multicall_entry_t *mcl;
     mmu_update_t *mmu;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     gnttab_donate_t *gop;
 #else
     struct mmuext_op *mmuext;
@@ -253,7 +247,7 @@
 
     mcl = rx_mcl;
     mmu = rx_mmu;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     gop = grant_rx_op;
 #else
     mmuext = rx_mmuext;
@@ -269,7 +263,7 @@
         if ( (new_mfn = alloc_mfn()) == 0 )
         {
             if ( net_ratelimit() )
-                printk(KERN_WARNING "Memory squeeze in netback driver.\n");
+                WPRINTK("Memory squeeze in netback driver.\n");
             mod_timer(&net_timer, jiffies + HZ);
             skb_queue_head(&rx_queue, skb);
             break;
@@ -284,7 +278,7 @@
                                pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
         mcl++;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
         gop->mfn = old_mfn;
         gop->domid = netif->domid;
         gop->handle = netif->rx->ring[
@@ -303,7 +297,7 @@
         mmuext->mfn = old_mfn;
         mmuext++;
 #endif
-        mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+        mmu->ptr = ((unsigned long long)new_mfn << PAGE_SHIFT) | 
MMU_MACHPHYS_UPDATE;
         mmu->val = __pa(vdata) >> PAGE_SHIFT;  
         mmu++;
 
@@ -327,7 +321,7 @@
     mcl->args[3] = DOMID_SELF;
     mcl++;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
 #else
     mcl[-3].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
@@ -336,9 +330,17 @@
         BUG();
 
     mcl = rx_mcl;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-    BUG_ON(HYPERVISOR_grant_table_op(
-        GNTTABOP_donate, grant_rx_op, gop - grant_rx_op));
+#ifdef CONFIG_XEN_NETDEV_GRANT
+    if(HYPERVISOR_grant_table_op(GNTTABOP_donate, grant_rx_op, 
+                                 gop - grant_rx_op)) { 
+        /* 
+        ** The other side has given us a bad grant ref, or has no headroom, 
+        ** or has gone away. Unfortunately the current grant table code 
+        ** doesn't inform us which is the case, so not much we can do. 
+        */
+        DPRINTK("net_rx: donate to DOM%u failed; dropping (up to) %d "
+                "packets.\n", grant_rx_op[0].domid, gop - grant_rx_op); 
+    }
     gop = grant_rx_op;
 #else
     mmuext = rx_mmuext;
@@ -350,7 +352,7 @@
 
         /* Rederive the machine addresses. */
         new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
         old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
 #else
         old_mfn = mmuext[0].mfn;
@@ -367,8 +369,13 @@
 
         /* Check the reassignment error code. */
         status = NETIF_RSP_OKAY;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-        BUG_ON(gop->status != 0); /* XXX */
+#ifdef CONFIG_XEN_NETDEV_GRANT
+        if(gop->status != 0) { 
+            DPRINTK("Bad status %d from grant donate to DOM%u\n", 
+                    gop->status, netif->domid);
+            /* XXX SMH: should free 'old_mfn' here */
+            status = NETIF_RSP_ERROR; 
+        } 
 #else
         if ( unlikely(mcl[1].result != 0) )
         {
@@ -391,7 +398,7 @@
 
         netif_put(netif);
         dev_kfree_skb(skb);
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
         mcl++;
         gop++;
 #else
@@ -407,6 +414,7 @@
         notify_via_evtchn(evtchn);
     }
 
+  out: 
     /* More work to do? */
     if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) )
         tasklet_schedule(&net_rx_tasklet);
@@ -483,7 +491,7 @@
 
 inline static void net_tx_action_dealloc(void)
 {
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     gnttab_unmap_grant_ref_t *gop;
 #else
     multicall_entry_t *mcl;
@@ -495,7 +503,7 @@
     dc = dealloc_cons;
     dp = dealloc_prod;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     /*
      * Free up any grants we have finished using
      */
@@ -529,7 +537,7 @@
 #endif
     while ( dealloc_cons != dp )
     {
-#ifndef CONFIG_XEN_NETDEV_GRANT_TX
+#ifndef CONFIG_XEN_NETDEV_GRANT
         /* The update_va_mapping() must not fail. */
         BUG_ON(mcl[0].result != 0);
 #endif
@@ -556,7 +564,7 @@
         
         netif_put(netif);
 
-#ifndef CONFIG_XEN_NETDEV_GRANT_TX
+#ifndef CONFIG_XEN_NETDEV_GRANT
         mcl++;
 #endif
     }
@@ -572,7 +580,7 @@
     netif_tx_request_t txreq;
     u16 pending_idx;
     NETIF_RING_IDX i;
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     gnttab_map_grant_ref_t *mop;
 #else
     multicall_entry_t *mcl;
@@ -582,7 +590,7 @@
     if ( dealloc_cons != dealloc_prod )
         net_tx_action_dealloc();
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     mop = tx_map_ops;
 #else
     mcl = tx_mcl;
@@ -683,7 +691,7 @@
 
         /* Packets passed to netif_rx() must have some headroom. */
         skb_reserve(skb, 16);
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
         mop->host_addr = MMAP_VADDR(pending_idx);
         mop->dom       = netif->domid;
         mop->ref       = txreq.addr >> PAGE_SHIFT;
@@ -706,7 +714,7 @@
 
         pending_cons++;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
         if ( (mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops) )
             break;
 #else
@@ -716,7 +724,7 @@
 #endif
     }
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     if ( mop == tx_map_ops )
         return;
 
@@ -739,7 +747,7 @@
         memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
 
         /* Check the remap error code. */
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
         /* 
            XXX SMH: error returns from grant operations are pretty poorly
            specified/thought out, but the below at least conforms with 
@@ -813,7 +821,7 @@
         netif_rx(skb);
         netif->dev->last_rx = jiffies;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
         mop++;
 #else
         mcl++;
@@ -932,16 +940,13 @@
     int i;
     struct page *page;
 
-    if ( !(xen_start_info.flags & SIF_NET_BE_DOMAIN) &&
-         !(xen_start_info.flags & SIF_INITDOMAIN) )
+    if ( !(xen_start_info->flags & SIF_NET_BE_DOMAIN) &&
+         !(xen_start_info->flags & SIF_INITDOMAIN) )
         return 0;
 
-    printk("Initialising Xen netif backend\n");
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    printk("#### netback tx using grant tables\n");
-#endif
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-    printk("#### netback rx using grant tables\n");
+    IPRINTK("Initialising Xen netif backend.\n");
+#ifdef CONFIG_XEN_NETDEV_GRANT
+    IPRINTK("Using grant tables.\n");
 #endif
 
     /* We can increase reservation by this much in net_rx_action(). */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Sep  9 16:30:54 2005
@@ -160,9 +160,47 @@
                }
 #endif
 
+               kobject_hotplug(&dev->dev.kobj, KOBJ_ONLINE);
+
                /* Pass in NULL node to skip exist test. */
                frontend_changed(&be->watch, NULL);
        }
+}
+
+static int netback_hotplug(struct xenbus_device *xdev, char **envp,
+                          int num_envp, char *buffer, int buffer_size)
+{
+       struct backend_info *be;
+       netif_t *netif;
+       char **key, *val;
+       int i = 0, length = 0;
+       static char *env_vars[] = { "script", "domain", "mac", "bridge", "ip",
+                                   NULL };
+
+       be = xdev->data;
+       netif = be->netif;
+
+       add_hotplug_env_var(envp, num_envp, &i,
+                           buffer, buffer_size, &length,
+                           "vif=%s", netif->dev->name);
+
+       key = env_vars;
+       while (*key != NULL) {
+               val = xenbus_read(xdev->nodename, *key, NULL);
+               if (!IS_ERR(val)) {
+                       char buf[strlen(*key) + 4];
+                       sprintf(buf, "%s=%%s", *key);
+                       add_hotplug_env_var(envp, num_envp, &i,
+                                           buffer, buffer_size, &length,
+                                           buf, val);
+                       kfree(val);
+               }
+               key++;
+       }
+
+       envp[i] = NULL;
+
+       return 0;
 }
 
 static int netback_probe(struct xenbus_device *dev,
@@ -249,6 +287,7 @@
        .ids = netback_ids,
        .probe = netback_probe,
        .remove = netback_remove,
+       .hotplug = netback_hotplug,
 };
 
 void netif_xenbus_init(void)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Fri Sep  9 
16:30:54 2005
@@ -50,13 +50,23 @@
 #include <asm-xen/evtchn.h>
 #include <asm-xen/xenbus.h>
 #include <asm-xen/xen-public/io/netif.h>
+#include <asm-xen/xen-public/memory.h>
 #include <asm-xen/balloon.h>
 #include <asm/page.h>
 #include <asm/uaccess.h>
 
-#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#ifdef CONFIG_XEN_NETDEV_GRANT
 #include <asm-xen/xen-public/grant_table.h>
 #include <asm-xen/gnttab.h>
+
+static grant_ref_t gref_tx_head;
+static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1]; 
+
+static grant_ref_t gref_rx_head;
+static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
+
+#define GRANT_INVALID_REF      (0xFFFF)
+
 #ifdef GRANT_DEBUG
 static void
 dump_packet(int tag, void *addr, u32 ap)
@@ -70,8 +80,17 @@
     }
     printk("\n");
 }
-#endif
-#endif
+
+#define GDPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+                           __FILE__ , __LINE__ , ## _a )
+#else 
+#define dump_packet(x,y,z)  ((void)0)  
+#define GDPRINTK(_f, _a...) ((void)0)
+#endif
+
+#endif
+
+
 
 #ifndef __GFP_NOWARN
 #define __GFP_NOWARN 0
@@ -101,22 +120,10 @@
 #define TX_TEST_IDX req_cons  /* conservative: not seen all our requests? */
 #endif
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
-static grant_ref_t gref_tx_head;
-static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
-#endif
-
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-static grant_ref_t gref_rx_head;
-static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
-#endif
-
-#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
-#define GRANT_INVALID_REF      (0xFFFF)
-#endif
 
 #define NETIF_STATE_DISCONNECTED 0
 #define NETIF_STATE_CONNECTED    1
+
 
 static unsigned int netif_state = NETIF_STATE_DISCONNECTED;
 
@@ -278,7 +285,7 @@
         for (i = np->tx_resp_cons; i != prod; i++) {
             id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
             skb = np->tx_skbs[id];
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
             if (unlikely(gnttab_query_foreign_access(grant_tx_ref[id]) != 0)) {
                 /* other domain is still using this grant - shouldn't happen
                    but if it does, we'll try to reclaim the grant later */
@@ -309,7 +316,7 @@
         mb();
     } while (prod != np->tx->resp_prod);
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
   out: 
 #endif
 
@@ -328,8 +335,9 @@
     struct sk_buff *skb;
     int i, batch_target;
     NETIF_RING_IDX req_prod = np->rx->req_prod;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-    int ref;
+    struct xen_memory_reservation reservation;
+#ifdef CONFIG_XEN_NETDEV_GRANT
+    grant_ref_t ref;
 #endif
 
     if (unlikely(np->backend_state != BEST_CONNECTED))
@@ -363,9 +371,9 @@
         np->rx_skbs[id] = skb;
         
         np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
        ref = gnttab_claim_grant_reference(&gref_rx_head);
-        if (unlikely(ref < 0)) {
+        if (unlikely((signed short)ref < 0)) {
             printk(KERN_ALERT "#### netfront can't claim rx reference\n");
             BUG();
         }
@@ -388,12 +396,15 @@
     rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
 
     /* Give away a batch of pages. */
-    rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
-    rx_mcl[i].args[0] = MEMOP_decrease_reservation;
-    rx_mcl[i].args[1] = (unsigned long)rx_pfn_array;
-    rx_mcl[i].args[2] = (unsigned long)i;
-    rx_mcl[i].args[3] = 0;
-    rx_mcl[i].args[4] = DOMID_SELF;
+    rx_mcl[i].op = __HYPERVISOR_memory_op;
+    rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+    rx_mcl[i].args[1] = (unsigned long)&reservation;
+
+    reservation.extent_start = rx_pfn_array;
+    reservation.nr_extents   = i;
+    reservation.extent_order = 0;
+    reservation.address_bits = 0;
+    reservation.domid        = DOMID_SELF;
 
     /* Tell the ballon driver what is going on. */
     balloon_update_driver_allowance(i);
@@ -401,7 +412,7 @@
     /* Zap PTEs and give away pages in one big multicall. */
     (void)HYPERVISOR_multicall(rx_mcl, i+1);
 
-    /* Check return status of HYPERVISOR_dom_mem_op(). */
+    /* Check return status of HYPERVISOR_memory_op(). */
     if (unlikely(rx_mcl[i].result != i))
         panic("Unable to reduce memory reservation\n");
 
@@ -421,8 +432,8 @@
     struct net_private *np = netdev_priv(dev);
     netif_tx_request_t *tx;
     NETIF_RING_IDX i;
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    unsigned int ref;
+#ifdef CONFIG_XEN_NETDEV_GRANT
+    grant_ref_t ref;
     unsigned long mfn;
 #endif
 
@@ -459,9 +470,9 @@
     tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
 
     tx->id   = id;
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     ref = gnttab_claim_grant_reference(&gref_tx_head);
-    if (unlikely(ref < 0)) {
+    if (unlikely((signed short)ref < 0)) {
         printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
         BUG();
     }
@@ -514,7 +525,7 @@
     network_tx_buf_gc(dev);
     spin_unlock_irqrestore(&np->tx_lock, flags);
 
-    if ((np->rx_resp_cons != np->rx->resp_prod) && (np->user_state == 
UST_OPEN))
+    if((np->rx_resp_cons != np->rx->resp_prod) && (np->user_state == UST_OPEN))
         netif_rx_schedule(dev);
 
     return IRQ_HANDLED;
@@ -532,7 +543,7 @@
     int work_done, budget, more_to_do = 1;
     struct sk_buff_head rxq;
     unsigned long flags;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     unsigned long mfn;
     grant_ref_t ref;
 #endif
@@ -569,8 +580,19 @@
             continue;
         }
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-        ref = grant_rx_ref[rx->id];
+#ifdef CONFIG_XEN_NETDEV_GRANT
+        ref = grant_rx_ref[rx->id]; 
+
+        if(ref == GRANT_INVALID_REF) { 
+            printk(KERN_WARNING "Bad rx grant reference %d from dom %d.\n",
+                   ref, np->backend_id);
+            np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
+            wmb();
+            np->rx->req_prod++;
+            work_done--;
+            continue;
+        }
+
         grant_rx_ref[rx->id] = GRANT_INVALID_REF;
         mfn = gnttab_end_foreign_transfer_ref(ref);
         gnttab_release_grant_reference(&gref_rx_head, ref);
@@ -580,7 +602,7 @@
         ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
 
         /* NB. We handle skb overflow later. */
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
         skb->data = skb->head + rx->addr;
 #else
         skb->data = skb->head + (rx->addr & ~PAGE_MASK);
@@ -595,14 +617,14 @@
         np->stats.rx_bytes += rx->status;
 
         /* Remap the page. */
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-        mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE;
+#ifdef CONFIG_XEN_NETDEV_GRANT
+        mmu->ptr = ((unsigned long long)mfn << PAGE_SHIFT) | 
MMU_MACHPHYS_UPDATE;
 #else
         mmu->ptr  = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
 #endif
         mmu->val  = __pa(skb->head) >> PAGE_SHIFT;
         mmu++;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
        MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
                                pfn_pte_ma(mfn, PAGE_KERNEL), 0);
 #else
@@ -612,19 +634,19 @@
 #endif
         mcl++;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
         phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = mfn;
+        GDPRINTK("#### rx_poll     enqueue vdata=%p mfn=%lu ref=%x\n",
+                skb->data, mfn, ref);
 #else
         phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = 
             rx->addr >> PAGE_SHIFT;
-#endif
-
-#ifdef GRANT_DEBUG
-        printk(KERN_ALERT "#### rx_poll     enqueue vdata=%p mfn=%lu ref=%x\n",
-               skb->data, mfn, ref);
-#endif
+#endif 
+
+
         __skb_queue_tail(&rxq, skb);
     }
+
 
     /* Some pages are no longer absent... */
     balloon_update_driver_allowance(-work_done);
@@ -641,9 +663,9 @@
     }
 
     while ((skb = __skb_dequeue(&rxq)) != NULL) {
-#ifdef GRANT_DEBUG
-        printk(KERN_ALERT "#### rx_poll     dequeue vdata=%p mfn=%lu\n",
-               skb->data, virt_to_mfn(skb->data));
+#ifdef CONFIG_XEN_NETDEV_GRANT
+        GDPRINTK("#### rx_poll     dequeue vdata=%p mfn=%lu\n",
+                skb->data, virt_to_mfn(skb->data));
         dump_packet('d', skb->data, (unsigned long)skb->data);
 #endif
         /*
@@ -742,7 +764,6 @@
     return &np->stats;
 }
 
-
 static void network_connect(struct net_device *dev)
 {
     struct net_private *np;
@@ -782,8 +803,11 @@
             tx = &np->tx->ring[requeue_idx++].req;
 
             tx->id   = i;
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
-            tx->addr = 0; /*(ref << PAGE_SHIFT) |*/
+#ifdef CONFIG_XEN_NETDEV_GRANT
+            gnttab_grant_foreign_access_ref(grant_tx_ref[i], np->backend_id, 
+                                            virt_to_mfn(np->tx_skbs[i]->data),
+                                            GNTMAP_readonly); 
+            tx->addr = grant_tx_ref[i] << PAGE_SHIFT; 
 #else
             tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
 #endif
@@ -798,9 +822,20 @@
     np->tx->req_prod = requeue_idx;
 
     /* Rebuild the RX buffer freelist and the RX ring itself. */
-    for (requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++)
-        if ((unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET)
-            np->rx->ring[requeue_idx++].req.id = i;
+    for (requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++) { 
+        if ((unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET) {
+#ifdef CONFIG_XEN_NETDEV_GRANT 
+            /* Reinstate the grant ref so backend can 'donate' mfn to us. */
+            gnttab_grant_foreign_transfer_ref(grant_rx_ref[i], np->backend_id,
+                                              virt_to_mfn(np->rx_skbs[i]->head)
+                );
+            np->rx->ring[requeue_idx].req.gref = grant_rx_ref[i];
+#endif
+            np->rx->ring[requeue_idx].req.id   = i;
+            requeue_idx++; 
+        }
+    }
+
     wmb();                
     np->rx->req_prod = requeue_idx;
 
@@ -896,13 +931,14 @@
     /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
     for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
         np->tx_skbs[i] = (void *)((unsigned long) i+1);
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
         grant_tx_ref[i] = GRANT_INVALID_REF;
 #endif
     }
+
     for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
         np->rx_skbs[i] = (void *)((unsigned long) i+1);
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT
         grant_rx_ref[i] = GRANT_INVALID_REF;
 #endif
     }
@@ -986,10 +1022,8 @@
        evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
        int err;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
        info->tx_ring_ref = GRANT_INVALID_REF;
-#endif
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
        info->rx_ring_ref = GRANT_INVALID_REF;
 #endif
 
@@ -1009,7 +1043,7 @@
        memset(info->rx, 0, PAGE_SIZE);
        info->backend_state = BEST_DISCONNECTED;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
        err = gnttab_grant_foreign_access(info->backend_id,
                                          virt_to_mfn(info->tx), 0);
        if (err < 0) {
@@ -1017,11 +1051,7 @@
                goto out;
        }
        info->tx_ring_ref = err;
-#else
-       info->tx_ring_ref = virt_to_mfn(info->tx);
-#endif
-
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+
        err = gnttab_grant_foreign_access(info->backend_id,
                                          virt_to_mfn(info->rx), 0);
        if (err < 0) {
@@ -1029,7 +1059,9 @@
                goto out;
        }
        info->rx_ring_ref = err;
+
 #else
+       info->tx_ring_ref = virt_to_mfn(info->tx);
        info->rx_ring_ref = virt_to_mfn(info->rx);
 #endif
 
@@ -1049,16 +1081,17 @@
        if (info->rx)
                free_page((unsigned long)info->rx);
        info->rx = 0;
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+
+#ifdef CONFIG_XEN_NETDEV_GRANT
        if (info->tx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->tx_ring_ref, 0);
        info->tx_ring_ref = GRANT_INVALID_REF;
-#endif
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+
        if (info->rx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->rx_ring_ref, 0);
        info->rx_ring_ref = GRANT_INVALID_REF;
 #endif
+
        return err;
 }
 
@@ -1070,16 +1103,17 @@
        if (info->rx)
                free_page((unsigned long)info->rx);
        info->rx = 0;
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+
+#ifdef CONFIG_XEN_NETDEV_GRANT
        if (info->tx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->tx_ring_ref, 0);
        info->tx_ring_ref = GRANT_INVALID_REF;
-#endif
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+
        if (info->rx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->rx_ring_ref, 0);
        info->rx_ring_ref = GRANT_INVALID_REF;
 #endif
+
        unbind_evtchn_from_irqhandler(info->evtchn, info->netdev);
        info->evtchn = 0;
 }
@@ -1272,25 +1306,25 @@
 
 static int netfront_suspend(struct xenbus_device *dev)
 {
-    struct net_private *np = dev->data;
-    /* Avoid having tx/rx stuff happen until we're ready. */
-    unbind_evtchn_from_irqhandler(np->evtchn, np->netdev);
-    return 0;
+       struct netfront_info *info = dev->data;
+
+       unregister_xenbus_watch(&info->watch);
+       kfree(info->backend);
+       info->backend = NULL;
+
+       netif_free(info);
+
+       return 0;
 }
 
 static int netfront_resume(struct xenbus_device *dev)
 {
-    struct net_private *np = dev->data;
-    /*
-     * Connect regardless of whether IFF_UP flag set.
-     * Stop bad things from happening until we're back up.
-     */
-    np->backend_state = BEST_DISCONNECTED;
-    memset(np->tx, 0, PAGE_SIZE);
-    memset(np->rx, 0, PAGE_SIZE);
-    
-    // send_interface_connect(np);
-    return 0;
+       struct net_private *np = dev->data;
+       int err;
+
+       err = talk_to_backend(dev, np);
+
+       return err;
 }
 
 static struct xenbus_driver netfront = {
@@ -1335,32 +1369,31 @@
 {
     int err = 0;
 
-    if (xen_start_info.flags & SIF_INITDOMAIN)
+    if (xen_start_info->flags & SIF_INITDOMAIN)
         return 0;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    /* A grant for every ring slot */
+    if ((err = xennet_proc_init()) != 0)
+        return err;
+
+    IPRINTK("Initialising virtual ethernet driver.\n");
+
+#ifdef CONFIG_XEN_NETDEV_GRANT
+    IPRINTK("Using grant tables.\n"); 
+
+    /* A grant for every tx ring slot */
     if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
                                       &gref_tx_head) < 0) {
         printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
         return 1;
     }
-    printk(KERN_ALERT "Netdev frontend (TX) is using grant tables.\n"); 
-#endif
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-    /* A grant for every ring slot */
+    /* A grant for every rx ring slot */
     if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
                                       &gref_rx_head) < 0) {
         printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
         return 1;
     }
-    printk(KERN_ALERT "Netdev frontend (RX) is using grant tables.\n"); 
-#endif
-
-    if ((err = xennet_proc_init()) != 0)
-        return err;
-
-    IPRINTK("Initialising virtual ethernet driver.\n");
+#endif
+
 
     (void)register_inetaddr_notifier(&notifier_inetdev);
 
@@ -1373,10 +1406,8 @@
 
 static void netif_exit(void)
 {
-#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+#ifdef CONFIG_XEN_NETDEV_GRANT
     gnttab_free_grant_references(gref_tx_head);
-#endif
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
     gnttab_free_grant_references(gref_rx_head);
 #endif
 }
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Fri Sep  9 
16:30:54 2005
@@ -63,27 +63,23 @@
             "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
             : "=a" (ret) : "0" (&hypercall) : "memory" );
 #elif defined (__x86_64__)
-       __asm__ __volatile__ (
-           "movq   %5,%%r10; movq %6,%%r8;" TRAP_INSTR
-           : "=a" (ret)
-           : "a" ((unsigned long)hypercall.op), 
-             "D" ((unsigned long)hypercall.arg[0]), 
-             "S" ((unsigned long)hypercall.arg[1]),
-             "d" ((unsigned long)hypercall.arg[2]), 
-             "g" ((unsigned long)hypercall.arg[3]),
-             "g" ((unsigned long)hypercall.arg[4])
-           : "r11","rcx","r8","r10","memory");
+        {
+            long ign1, ign2, ign3;
+            __asm__ __volatile__ (
+                "movq %8,%%r10; movq %9,%%r8;" TRAP_INSTR
+                : "=a" (ret), "=D" (ign1), "=S" (ign2), "=d" (ign3)
+                : "0" ((unsigned long)hypercall.op), 
+                "1" ((unsigned long)hypercall.arg[0]), 
+                "2" ((unsigned long)hypercall.arg[1]),
+                "3" ((unsigned long)hypercall.arg[2]), 
+                "g" ((unsigned long)hypercall.arg[3]),
+                "g" ((unsigned long)hypercall.arg[4])
+                : "r11","rcx","r8","r10","memory");
+        }
 #endif
     }
     break;
 
-    case IOCTL_PRIVCMD_INITDOMAIN_EVTCHN:
-    {
-        extern int initdom_ctrlif_domcontroller_port;
-        ret = initdom_ctrlif_domcontroller_port;
-    }
-    break;
-    
 #if defined(CONFIG_XEN_PRIVILEGED_GUEST)
     case IOCTL_PRIVCMD_MMAP:
     {
@@ -120,9 +116,9 @@
                 if ( (msg[j].va + (msg[j].npages<<PAGE_SHIFT)) > vma->vm_end )
                     return -EINVAL;
 
-                if ( (rc = direct_remap_area_pages(vma->vm_mm, 
+                if ( (rc = direct_remap_pfn_range(vma->vm_mm, 
                                                    msg[j].va&PAGE_MASK, 
-                                                   msg[j].mfn<<PAGE_SHIFT, 
+                                                   msg[j].mfn, 
                                                    msg[j].npages<<PAGE_SHIFT, 
                                                    vma->vm_page_prot,
                                                    mmapcmd.dom)) < 0 )
@@ -202,8 +198,8 @@
         extern int do_xenbus_probe(void*);
         unsigned long page;
 
-        if (xen_start_info.store_evtchn != 0) {
-            ret = xen_start_info.store_mfn;
+        if (xen_start_info->store_evtchn != 0) {
+            ret = xen_start_info->store_mfn;
             break;
         }
 
@@ -219,10 +215,10 @@
         SetPageReserved(virt_to_page(page));
 
         /* Initial connect. Setup channel and page. */
-        xen_start_info.store_evtchn = data;
-        xen_start_info.store_mfn = pfn_to_mfn(virt_to_phys((void *)page) >>
+        xen_start_info->store_evtchn = data;
+        xen_start_info->store_mfn = pfn_to_mfn(virt_to_phys((void *)page) >>
                                               PAGE_SHIFT);
-        ret = xen_start_info.store_mfn;
+        ret = xen_start_info->store_mfn;
 
         /* We'll return then this will wait for daemon to answer */
         kthread_run(do_xenbus_probe, NULL, "xenbus_probe");
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/usbback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/common.h Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/common.h Fri Sep  9 16:30:54 2005
@@ -12,7 +12,6 @@
 #include <asm/io.h>
 #include <asm/setup.h>
 #include <asm/pgalloc.h>
-#include <asm-xen/ctrl_if.h>
 #include <asm-xen/hypervisor.h>
 
 #include <asm-xen/xen-public/io/usbif.h>
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/usbback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/interface.c      Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/interface.c      Fri Sep  9 
16:30:54 2005
@@ -161,8 +161,8 @@
     }
 
     prot = __pgprot(_KERNPG_TABLE);
-    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
-                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+    error = direct_remap_pfn_range(&init_mm, VMALLOC_VMADDR(vma->addr),
+                                    shmem_frame, PAGE_SIZE,
                                     prot, domid);
     if ( error != 0 )
     {
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c        Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c        Fri Sep  9 
16:30:54 2005
@@ -1029,8 +1029,8 @@
     int i;
     struct page *page;
 
-    if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
-         !(xen_start_info.flags & SIF_USB_BE_DOMAIN) )
+    if ( !(xen_start_info->flags & SIF_INITDOMAIN) &&
+         !(xen_start_info->flags & SIF_USB_BE_DOMAIN) )
         return 0;
 
     page = balloon_alloc_empty_page_range(MMAP_PAGES);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c      Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c      Fri Sep  9 
16:30:54 2005
@@ -70,7 +70,6 @@
 #include "../../../../../drivers/usb/hcd.h"
 
 #include <asm-xen/xen-public/io/usbif.h>
-#include <asm/ctrl_if.h>
 #include <asm/xen-public/io/domain_controller.h>
 
 /*
@@ -1675,8 +1674,8 @@
 {
        int retval = -ENOMEM, i;
 
-       if ( (xen_start_info.flags & SIF_INITDOMAIN)
-            || (xen_start_info.flags & SIF_USB_BE_DOMAIN) )
+       if ( (xen_start_info->flags & SIF_INITDOMAIN) ||
+            (xen_start_info->flags & SIF_USB_BE_DOMAIN) )
                 return 0;
 
        info(DRIVER_DESC " " DRIVER_VERSION);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile  Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile  Fri Sep  9 16:30:54 2005
@@ -4,3 +4,4 @@
 xenbus-objs += xenbus_comms.o
 xenbus-objs += xenbus_xs.o
 xenbus-objs += xenbus_probe.o 
+xenbus-objs += xenbus_dev.o 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Fri Sep  9 
16:30:54 2005
@@ -48,12 +48,12 @@
 
 static inline struct ringbuf_head *outbuf(void)
 {
-       return mfn_to_virt(xen_start_info.store_mfn);
+       return mfn_to_virt(xen_start_info->store_mfn);
 }
 
 static inline struct ringbuf_head *inbuf(void)
 {
-       return mfn_to_virt(xen_start_info.store_mfn) + PAGE_SIZE/2;
+       return mfn_to_virt(xen_start_info->store_mfn) + PAGE_SIZE/2;
 }
 
 static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
@@ -145,7 +145,7 @@
                data += avail;
                len -= avail;
                update_output_chunk(out, avail);
-               notify_via_evtchn(xen_start_info.store_evtchn);
+               notify_via_evtchn(xen_start_info->store_evtchn);
        } while (len != 0);
 
        return 0;
@@ -190,7 +190,7 @@
                pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
                /* If it was full, tell them we've taken some. */
                if (was_full)
-                       notify_via_evtchn(xen_start_info.store_evtchn);
+                       notify_via_evtchn(xen_start_info->store_evtchn);
        }
 
        /* If we left something, wake watch thread to deal with it. */
@@ -205,20 +205,20 @@
 {
        int err;
 
-       if (!xen_start_info.store_evtchn)
+       if (!xen_start_info->store_evtchn)
                return 0;
 
        err = bind_evtchn_to_irqhandler(
-               xen_start_info.store_evtchn, wake_waiting,
+               xen_start_info->store_evtchn, wake_waiting,
                0, "xenbus", &xb_waitq);
        if (err) {
                printk(KERN_ERR "XENBUS request irq failed %i\n", err);
-               unbind_evtchn_from_irq(xen_start_info.store_evtchn);
+               unbind_evtchn_from_irq(xen_start_info->store_evtchn);
                return err;
        }
 
        /* FIXME zero out page -- domain builder should probably do this*/
-       memset(mfn_to_virt(xen_start_info.store_mfn), 0, PAGE_SIZE);
+       memset(mfn_to_virt(xen_start_info->store_mfn), 0, PAGE_SIZE);
 
        return 0;
 }
@@ -226,8 +226,8 @@
 void xb_suspend_comms(void)
 {
 
-       if (!xen_start_info.store_evtchn)
+       if (!xen_start_info->store_evtchn)
                return;
 
-       unbind_evtchn_from_irqhandler(xen_start_info.store_evtchn, &xb_waitq);
-}
+       unbind_evtchn_from_irqhandler(xen_start_info->store_evtchn, &xb_waitq);
+}
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Fri Sep  9 
16:30:54 2005
@@ -147,6 +147,39 @@
        return 0;
 }
 
+static int xenbus_hotplug_backend(struct device *dev, char **envp,
+                                 int num_envp, char *buffer, int buffer_size)
+{
+       struct xenbus_device *xdev;
+       int i = 0;
+       int length = 0;
+
+       if (dev == NULL)
+               return -ENODEV;
+
+       xdev = to_xenbus_device(dev);
+       if (xdev == NULL)
+               return -ENODEV;
+
+       /* stuff we want to pass to /sbin/hotplug */
+       add_hotplug_env_var(envp, num_envp, &i,
+                           buffer, buffer_size, &length,
+                           "XENBUS_TYPE=%s", xdev->devicetype);
+
+       /* terminate, set to next free slot, shrink available space */
+       envp[i] = NULL;
+       envp = &envp[i];
+       num_envp -= i;
+       buffer = &buffer[length];
+       buffer_size -= length;
+
+       if (dev->driver && to_xenbus_driver(dev->driver)->hotplug)
+               return to_xenbus_driver(dev->driver)->hotplug
+                       (xdev, envp, num_envp, buffer, buffer_size);
+
+       return 0;
+}
+
 static int xenbus_probe_backend(const char *type, const char *uuid);
 static struct xen_bus_type xenbus_backend = {
        .root = "backend",
@@ -156,6 +189,7 @@
        .bus = {
                .name  = "xen-backend",
                .match = xenbus_match,
+               .hotplug = xenbus_hotplug_backend,
        },
        .dev = {
                .bus_id = "xen-backend",
@@ -209,6 +243,7 @@
 {
        return xenbus_register_driver(drv, &xenbus_frontend);
 }
+EXPORT_SYMBOL(xenbus_register_device);
 
 int xenbus_register_backend(struct xenbus_driver *drv)
 {
@@ -586,7 +621,7 @@
 
        down(&xenbus_lock);
 
-       if (xen_start_info.store_evtchn) {
+       if (xen_start_info->store_evtchn) {
                ret = nb->notifier_call(nb, 0, NULL);
        } else {
                notifier_chain_register(&xenstore_chain, nb);
@@ -612,7 +647,7 @@
        int err = 0;
 
        /* Initialize xenstore comms unless already done. */
-       printk("store_evtchn = %i\n", xen_start_info.store_evtchn);
+       printk("store_evtchn = %i\n", xen_start_info->store_evtchn);
        err = xs_init();
        if (err) {
                printk("XENBUS: Error initializing xenstore comms:"
@@ -640,7 +675,7 @@
        device_register(&xenbus_frontend.dev);
        device_register(&xenbus_backend.dev);
 
-       if (!xen_start_info.store_evtchn)
+       if (!xen_start_info->store_evtchn)
                return 0;
 
        do_xenbus_probe(NULL);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Fri Sep  9 
16:30:54 2005
@@ -45,7 +45,9 @@
 
 static char printf_buffer[4096];
 static LIST_HEAD(watches);
+
 DECLARE_MUTEX(xenbus_lock);
+EXPORT_SYMBOL(xenbus_lock);
 
 static int get_error(const char *errorstring)
 {
@@ -104,10 +106,10 @@
 }
 
 /* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
-static void *xs_talkv(enum xsd_sockmsg_type type,
-                     const struct kvec *iovec,
-                     unsigned int num_vecs,
-                     unsigned int *len)
+void *xs_talkv(enum xsd_sockmsg_type type,
+              const struct kvec *iovec,
+              unsigned int num_vecs,
+              unsigned int *len)
 {
        struct xsd_sockmsg msg;
        void *ret = NULL;
@@ -224,6 +226,7 @@
                ret[(*num)++] = p;
        return ret;
 }
+EXPORT_SYMBOL(xenbus_directory);
 
 /* Check if a path exists. Return 1 if it does. */
 int xenbus_exists(const char *dir, const char *node)
@@ -237,6 +240,7 @@
        kfree(d);
        return 1;
 }
+EXPORT_SYMBOL(xenbus_exists);
 
 /* Get the value of a single file.
  * Returns a kmalloced value: call free() on it after use.
@@ -246,6 +250,7 @@
 {
        return xs_single(XS_READ, join(dir, node), len);
 }
+EXPORT_SYMBOL(xenbus_read);
 
 /* Write the value of a single file.
  * Returns -err on failure.  createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
@@ -276,18 +281,21 @@
 
        return xs_error(xs_talkv(XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
 }
+EXPORT_SYMBOL(xenbus_write);
 
 /* Create a new directory. */
 int xenbus_mkdir(const char *dir, const char *node)
 {
        return xs_error(xs_single(XS_MKDIR, join(dir, node), NULL));
 }
+EXPORT_SYMBOL(xenbus_mkdir);
 
 /* Destroy a file or directory (directories must be empty). */
 int xenbus_rm(const char *dir, const char *node)
 {
        return xs_error(xs_single(XS_RM, join(dir, node), NULL));
 }
+EXPORT_SYMBOL(xenbus_rm);
 
 /* Start a transaction: changes by others will not be seen during this
  * transaction, and changes will not be visible to others until end.
@@ -298,6 +306,7 @@
 {
        return xs_error(xs_single(XS_TRANSACTION_START, subtree, NULL));
 }
+EXPORT_SYMBOL(xenbus_transaction_start);
 
 /* End a transaction.
  * If abandon is true, transaction is discarded instead of committed.
@@ -312,6 +321,7 @@
                strcpy(abortstr, "T");
        return xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL));
 }
+EXPORT_SYMBOL(xenbus_transaction_end);
 
 /* Single read and scanf: returns -errno or num scanned. */
 int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
@@ -333,6 +343,7 @@
                return -ERANGE;
        return ret;
 }
+EXPORT_SYMBOL(xenbus_scanf);
 
 /* Single printf and write: returns -errno or 0. */
 int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
@@ -348,6 +359,7 @@
        BUG_ON(ret > sizeof(printf_buffer)-1);
        return xenbus_write(dir, node, printf_buffer, O_CREAT);
 }
+EXPORT_SYMBOL(xenbus_printf);
 
 /* Report a (negative) errno into the store, with explanation. */
 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
@@ -369,6 +381,7 @@
                printk("xenbus: failed to write error node for %s (%s)\n",
                       dev->nodename, printf_buffer);
 }
+EXPORT_SYMBOL(xenbus_dev_error);
 
 /* Clear any error. */
 void xenbus_dev_ok(struct xenbus_device *dev)
@@ -381,6 +394,7 @@
                        dev->has_error = 0;
        }
 }
+EXPORT_SYMBOL(xenbus_dev_ok);
        
 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
 int xenbus_gather(const char *dir, ...)
@@ -410,6 +424,7 @@
        va_end(ap);
        return ret;
 }
+EXPORT_SYMBOL(xenbus_gather);
 
 static int xs_watch(const char *path, const char *token)
 {
@@ -482,6 +497,7 @@
                list_add(&watch->list, &watches);
        return err;
 }
+EXPORT_SYMBOL(register_xenbus_watch);
 
 void unregister_xenbus_watch(struct xenbus_watch *watch)
 {
@@ -499,6 +515,7 @@
                       "XENBUS Failed to release watch %s: %i\n",
                       watch->node, err);
 }
+EXPORT_SYMBOL(unregister_xenbus_watch);
 
 /* Re-register callbacks to all watches. */
 void reregister_xenbus_watches(void)
@@ -540,7 +557,7 @@
                        BUG_ON(!w);
                        w->callback(w, node);
                        kfree(node);
-               } else
+               } else if (node)
                        printk(KERN_WARNING "XENBUS xs_read_watch: %li\n",
                               PTR_ERR(node));
                up(&xenbus_lock);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Fri Sep  9 
16:30:54 2005
@@ -29,551 +29,360 @@
 
 #ifndef __HYPERCALL_H__
 #define __HYPERCALL_H__
+
 #include <asm-xen/xen-public/xen.h>
 
-/*
- * Assembler stubs for hyper-calls.
- */
+#define _hypercall0(type, name)                        \
+({                                             \
+       long __res;                             \
+       asm volatile (                          \
+               TRAP_INSTR                      \
+               : "=a" (__res)                  \
+               : "0" (__HYPERVISOR_##name)     \
+               : "memory" );                   \
+       (type)__res;                            \
+})
+
+#define _hypercall1(type, name, a1)                            \
+({                                                             \
+       long __res, __ign1;                                     \
+       asm volatile (                                          \
+               TRAP_INSTR                                      \
+               : "=a" (__res), "=b" (__ign1)                   \
+               : "0" (__HYPERVISOR_##name), "1" ((long)(a1))   \
+               : "memory" );                                   \
+       (type)__res;                                            \
+})
+
+#define _hypercall2(type, name, a1, a2)                                \
+({                                                             \
+       long __res, __ign1, __ign2;                             \
+       asm volatile (                                          \
+               TRAP_INSTR                                      \
+               : "=a" (__res), "=b" (__ign1), "=c" (__ign2)    \
+               : "0" (__HYPERVISOR_##name), "1" ((long)(a1)),  \
+               "2" ((long)(a2))                                \
+               : "memory" );                                   \
+       (type)__res;                                            \
+})
+
+#define _hypercall3(type, name, a1, a2, a3)                    \
+({                                                             \
+       long __res, __ign1, __ign2, __ign3;                     \
+       asm volatile (                                          \
+               TRAP_INSTR                                      \
+               : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
+               "=d" (__ign3)                                   \
+               : "0" (__HYPERVISOR_##name), "1" ((long)(a1)),  \
+               "2" ((long)(a2)), "3" ((long)(a3))              \
+               : "memory" );                                   \
+       (type)__res;                                            \
+})
+
+#define _hypercall4(type, name, a1, a2, a3, a4)                        \
+({                                                             \
+       long __res, __ign1, __ign2, __ign3, __ign4;             \
+       asm volatile (                                          \
+               TRAP_INSTR                                      \
+               : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
+               "=d" (__ign3), "=S" (__ign4)                    \
+               : "0" (__HYPERVISOR_##name), "1" ((long)(a1)),  \
+               "2" ((long)(a2)), "3" ((long)(a3)),             \
+               "4" ((long)(a4))                                \
+               : "memory" );                                   \
+       (type)__res;                                            \
+})
+
+#define _hypercall5(type, name, a1, a2, a3, a4, a5)            \
+({                                                             \
+       long __res, __ign1, __ign2, __ign3, __ign4, __ign5;     \
+       asm volatile (                                          \
+               TRAP_INSTR                                      \
+               : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
+               "=d" (__ign3), "=S" (__ign4), "=D" (__ign5)     \
+               : "0" (__HYPERVISOR_##name), "1" ((long)(a1)),  \
+               "2" ((long)(a2)), "3" ((long)(a3)),             \
+               "4" ((long)(a4)), "5" ((long)(a5))              \
+               : "memory" );                                   \
+       (type)__res;                                            \
+})
 
 static inline int
 HYPERVISOR_set_trap_table(
-    trap_info_t *table)
-{
-    int ret;
-    unsigned long ignore;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ignore)
-       : "0" (__HYPERVISOR_set_trap_table), "1" (table)
-       : "memory" );
-
-    return ret;
+       trap_info_t *table)
+{
+       return _hypercall1(int, set_trap_table, table);
 }
 
 static inline int
 HYPERVISOR_mmu_update(
-    mmu_update_t *req, int count, int *success_count, domid_t domid)
-{
-    int ret;
-    unsigned long ign1, ign2, ign3, ign4;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
-       : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
-        "3" (success_count), "4" (domid)
-       : "memory" );
-
-    return ret;
+       mmu_update_t *req, int count, int *success_count, domid_t domid)
+{
+       return _hypercall4(int, mmu_update, req, count, success_count, domid);
 }
 
 static inline int
 HYPERVISOR_mmuext_op(
-    struct mmuext_op *op, int count, int *success_count, domid_t domid)
-{
-    int ret;
-    unsigned long ign1, ign2, ign3, ign4;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
-       : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
-        "3" (success_count), "4" (domid)
-       : "memory" );
-
-    return ret;
+       struct mmuext_op *op, int count, int *success_count, domid_t domid)
+{
+       return _hypercall4(int, mmuext_op, op, count, success_count, domid);
 }
 
 static inline int
 HYPERVISOR_set_gdt(
-    unsigned long *frame_list, int entries)
-{
-    int ret;
-    unsigned long ign1, ign2;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2)
-       : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries)
-       : "memory" );
-
-
-    return ret;
+       unsigned long *frame_list, int entries)
+{
+       return _hypercall2(int, set_gdt, frame_list, entries);
 }
 
 static inline int
 HYPERVISOR_stack_switch(
-    unsigned long ss, unsigned long esp)
-{
-    int ret;
-    unsigned long ign1, ign2;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2)
-       : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp)
-       : "memory" );
-
-    return ret;
+       unsigned long ss, unsigned long esp)
+{
+       return _hypercall2(int, stack_switch, ss, esp);
 }
 
 static inline int
 HYPERVISOR_set_callbacks(
-    unsigned long event_selector, unsigned long event_address,
-    unsigned long failsafe_selector, unsigned long failsafe_address)
-{
-    int ret;
-    unsigned long ign1, ign2, ign3, ign4;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
-       : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector),
-         "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address)
-       : "memory" );
-
-    return ret;
+       unsigned long event_selector, unsigned long event_address,
+       unsigned long failsafe_selector, unsigned long failsafe_address)
+{
+       return _hypercall4(int, set_callbacks,
+                          event_selector, event_address,
+                          failsafe_selector, failsafe_address);
 }
 
 static inline int
 HYPERVISOR_fpu_taskswitch(
-    int set)
-{
-    int ret;
-    unsigned long ign;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign)
-        : "0" (__HYPERVISOR_fpu_taskswitch), "1" (set)
-        : "memory" );
-
-    return ret;
+       int set)
+{
+       return _hypercall1(int, fpu_taskswitch, set);
 }
 
 static inline int
 HYPERVISOR_yield(
-    void)
-{
-    int ret;
-    unsigned long ign;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign)
-       : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
-       : "memory", "ecx" );
-
-    return ret;
+       void)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_yield, 0);
 }
 
 static inline int
 HYPERVISOR_block(
-    void)
-{
-    int ret;
-    unsigned long ign1;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1)
-       : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
-       : "memory", "ecx" );
-
-    return ret;
+       void)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_block, 0);
 }
 
 static inline int
 HYPERVISOR_shutdown(
-    void)
-{
-    int ret;
-    unsigned long ign1;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1)
-       : "0" (__HYPERVISOR_sched_op),
-         "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
-        : "memory", "ecx" );
-
-    return ret;
+       void)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+                          (SHUTDOWN_poweroff << SCHEDOP_reasonshift), 0);
 }
 
 static inline int
 HYPERVISOR_reboot(
-    void)
-{
-    int ret;
-    unsigned long ign1;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1)
-       : "0" (__HYPERVISOR_sched_op),
-         "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
-        : "memory", "ecx" );
-
-    return ret;
-}
-
-static inline int
-HYPERVISOR_suspend(
-    unsigned long srec)
-{
-    int ret;
-    unsigned long ign1, ign2;
-
-    /* NB. On suspend, control software expects a suspend record in %esi. */
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=S" (ign2)
-       : "0" (__HYPERVISOR_sched_op),
-        "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), 
-        "S" (srec) : "memory", "ecx");
-
-    return ret;
+       void)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+                          (SHUTDOWN_reboot << SCHEDOP_reasonshift), 0);
 }
 
 static inline int
 HYPERVISOR_crash(
-    void)
-{
-    int ret;
-    unsigned long ign1;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1)
-       : "0" (__HYPERVISOR_sched_op),
-         "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
-        : "memory", "ecx" );
-
-    return ret;
+       void)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+                          (SHUTDOWN_crash << SCHEDOP_reasonshift), 0);
 }
 
 static inline long
 HYPERVISOR_set_timer_op(
-    u64 timeout)
-{
-    int ret;
-    unsigned long timeout_hi = (unsigned long)(timeout>>32);
-    unsigned long timeout_lo = (unsigned long)timeout;
-    unsigned long ign1, ign2;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2)
-       : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_lo), "c" (timeout_hi)
-       : "memory");
-
-    return ret;
+       u64 timeout)
+{
+       unsigned long timeout_hi = (unsigned long)(timeout>>32);
+       unsigned long timeout_lo = (unsigned long)timeout;
+       return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
 }
 
 static inline int
 HYPERVISOR_dom0_op(
-    dom0_op_t *dom0_op)
-{
-    int ret;
-    unsigned long ign1;
-
-    dom0_op->interface_version = DOM0_INTERFACE_VERSION;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1)
-       : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op)
-       : "memory");
-
-    return ret;
+       dom0_op_t *dom0_op)
+{
+       dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+       return _hypercall1(int, dom0_op, dom0_op);
 }
 
 static inline int
 HYPERVISOR_set_debugreg(
-    int reg, unsigned long value)
-{
-    int ret;
-    unsigned long ign1, ign2;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2)
-       : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value)
-       : "memory" );
-
-    return ret;
+       int reg, unsigned long value)
+{
+       return _hypercall2(int, set_debugreg, reg, value);
 }
 
 static inline unsigned long
 HYPERVISOR_get_debugreg(
-    int reg)
-{
-    unsigned long ret;
-    unsigned long ign;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign)
-       : "0" (__HYPERVISOR_get_debugreg), "1" (reg)
-       : "memory" );
-
-    return ret;
+       int reg)
+{
+       return _hypercall1(unsigned long, get_debugreg, reg);
 }
 
 static inline int
 HYPERVISOR_update_descriptor(
-    u64 ma, u64 desc)
-{
-    int ret;
-    unsigned long ign1, ign2, ign3, ign4;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
-       : "0" (__HYPERVISOR_update_descriptor),
-         "1" ((unsigned long)ma), "2" ((unsigned long)(ma>>32)),
-         "3" ((unsigned long)desc), "4" ((unsigned long)(desc>>32))
-       : "memory" );
-
-    return ret;
-}
-
-static inline int
-HYPERVISOR_dom_mem_op(
-    unsigned int op, unsigned long *extent_list,
-    unsigned long nr_extents, unsigned int extent_order)
-{
-    int ret;
-    unsigned long ign1, ign2, ign3, ign4, ign5;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4),
-         "=D" (ign5)
-       : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list),
-         "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF)
-        : "memory" );
-
-    return ret;
+       u64 ma, u64 desc)
+{
+       return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
+}
+
+static inline int
+HYPERVISOR_memory_op(
+       unsigned int cmd, void *arg)
+{
+       return _hypercall2(int, memory_op, cmd, arg);
 }
 
 static inline int
 HYPERVISOR_multicall(
-    void *call_list, int nr_calls)
-{
-    int ret;
-    unsigned long ign1, ign2;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2)
-       : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls)
-       : "memory" );
-
-    return ret;
+       void *call_list, int nr_calls)
+{
+       return _hypercall2(int, multicall, call_list, nr_calls);
 }
 
 static inline int
 HYPERVISOR_update_va_mapping(
-    unsigned long va, pte_t new_val, unsigned long flags)
-{
-    int ret;
-    unsigned long ign1, ign2, ign3, ign4;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
-       : "0" (__HYPERVISOR_update_va_mapping), 
-          "1" (va), "2" ((new_val).pte_low),
+       unsigned long va, pte_t new_val, unsigned long flags)
+{
+       unsigned long pte_hi = 0;
 #ifdef CONFIG_X86_PAE
-         "3" ((new_val).pte_high),
-#else
-         "3" (0),
+       pte_hi = new_val.pte_high;
 #endif
-         "4" (flags)
-       : "memory" );
-
-    return ret;
+       return _hypercall4(int, update_va_mapping, va,
+                          new_val.pte_low, pte_hi, flags);
 }
 
 static inline int
 HYPERVISOR_event_channel_op(
-    void *op)
-{
-    int ret;
-    unsigned long ignore;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ignore)
-       : "0" (__HYPERVISOR_event_channel_op), "1" (op)
-       : "memory" );
-
-    return ret;
+       void *op)
+{
+       return _hypercall1(int, event_channel_op, op);
 }
 
 static inline int
 HYPERVISOR_xen_version(
-    int cmd)
-{
-    int ret;
-    unsigned long ignore;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ignore)
-       : "0" (__HYPERVISOR_xen_version), "1" (cmd)
-       : "memory" );
-
-    return ret;
+       int cmd)
+{
+       return _hypercall1(int, xen_version, cmd);
 }
 
 static inline int
 HYPERVISOR_console_io(
-    int cmd, int count, char *str)
-{
-    int ret;
-    unsigned long ign1, ign2, ign3;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
-       : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str)
-       : "memory" );
-
-    return ret;
+       int cmd, int count, char *str)
+{
+       return _hypercall3(int, console_io, cmd, count, str);
 }
 
 static inline int
 HYPERVISOR_physdev_op(
-    void *physdev_op)
-{
-    int ret;
-    unsigned long ign;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign)
-       : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op)
-       : "memory" );
-
-    return ret;
+       void *physdev_op)
+{
+       return _hypercall1(int, physdev_op, physdev_op);
 }
 
 static inline int
 HYPERVISOR_grant_table_op(
-    unsigned int cmd, void *uop, unsigned int count)
-{
-    int ret;
-    unsigned long ign1, ign2, ign3;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
-       : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count)
-       : "memory" );
-
-    return ret;
+       unsigned int cmd, void *uop, unsigned int count)
+{
+       return _hypercall3(int, grant_table_op, cmd, uop, count);
 }
 
 static inline int
 HYPERVISOR_update_va_mapping_otherdomain(
-    unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
-{
-    int ret;
-    unsigned long ign1, ign2, ign3, ign4, ign5;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3),
-         "=S" (ign4), "=D" (ign5)
-       : "0" (__HYPERVISOR_update_va_mapping_otherdomain),
-          "1" (va), "2" ((new_val).pte_low),
+       unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
+{
+       unsigned long pte_hi = 0;
 #ifdef CONFIG_X86_PAE
-         "3" ((new_val).pte_high),
-#else
-         "3" (0),
+       pte_hi = new_val.pte_high;
 #endif
-         "4" (flags), "5" (domid) :
-        "memory" );
-    
-    return ret;
+       return _hypercall5(int, update_va_mapping_otherdomain, va,
+                          new_val.pte_low, pte_hi, flags, domid);
 }
 
 static inline int
 HYPERVISOR_vm_assist(
-    unsigned int cmd, unsigned int type)
-{
-    int ret;
-    unsigned long ign1, ign2;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2)
-       : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type)
-       : "memory" );
-
-    return ret;
+       unsigned int cmd, unsigned int type)
+{
+       return _hypercall2(int, vm_assist, cmd, type);
 }
 
 static inline int
 HYPERVISOR_boot_vcpu(
-    unsigned long vcpu, vcpu_guest_context_t *ctxt)
-{
-    int ret;
-    unsigned long ign1, ign2;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2)
-       : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
-       : "memory");
-
-    return ret;
+       unsigned long vcpu, vcpu_guest_context_t *ctxt)
+{
+       return _hypercall2(int, boot_vcpu, vcpu, ctxt);
+}
+
+static inline int
+HYPERVISOR_vcpu_up(
+       int vcpu)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_vcpu_up |
+                          (vcpu << SCHEDOP_vcpushift), 0);
+}
+
+static inline int
+HYPERVISOR_vcpu_pickle(
+       int vcpu, vcpu_guest_context_t *ctxt)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_vcpu_pickle |
+                          (vcpu << SCHEDOP_vcpushift), ctxt);
+}
+
+static inline int
+HYPERVISOR_suspend(
+       unsigned long srec)
+{
+       int ret;
+       unsigned long ign1, ign2;
+
+       /* On suspend, control software expects a suspend record in %esi. */
+       __asm__ __volatile__ (
+               TRAP_INSTR
+               : "=a" (ret), "=b" (ign1), "=S" (ign2)
+               : "0" (__HYPERVISOR_sched_op),
+               "1" (SCHEDOP_shutdown | (SHUTDOWN_suspend <<
+                                        SCHEDOP_reasonshift)), 
+               "2" (srec) : "memory", "ecx");
+
+       return ret;
 }
 
 static inline int
 HYPERVISOR_vcpu_down(
-    int vcpu)
-{
-    int ret;
-    unsigned long ign1;
-    /* Yes, I really do want to clobber edx here: when we resume a
-       vcpu after unpickling a multi-processor domain, it returns
-       here, but clobbers all of the call clobbered registers. */
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1)
-       : "0" (__HYPERVISOR_sched_op),
-         "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
-        : "memory", "ecx", "edx" );
-
-    return ret;
-}
-
-static inline int
-HYPERVISOR_vcpu_up(
-    int vcpu)
-{
-    int ret;
-    unsigned long ign1;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1)
-       : "0" (__HYPERVISOR_sched_op),
-         "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift))
-        : "memory", "ecx" );
-
-    return ret;
-}
-
-static inline int
-HYPERVISOR_vcpu_pickle(
-    int vcpu, vcpu_guest_context_t *ctxt)
-{
-    int ret;
-    unsigned long ign1, ign2;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2)
-       : "0" (__HYPERVISOR_sched_op),
-         "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
-         "2" (ctxt)
-        : "memory" );
-
-    return ret;
+       int vcpu)
+{
+       int ret;
+       unsigned long ign1;
+       /* Yes, I really do want to clobber edx here: when we resume a
+          vcpu after unpickling a multi-processor domain, it returns
+          here, but clobbers all of the call clobbered registers. */
+       __asm__ __volatile__ (
+               TRAP_INSTR
+               : "=a" (ret), "=b" (ign1)
+               : "0" (__HYPERVISOR_sched_op),
+               "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
+               : "memory", "ecx", "edx" );
+       return ret;
 }
 
 #endif /* __HYPERCALL_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h  
Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h  
Fri Sep  9 16:30:54 2005
@@ -8,7 +8,7 @@
 
 static char * __init machine_specific_memory_setup(void)
 {
-       unsigned long max_pfn = xen_start_info.nr_pages;
+       unsigned long max_pfn = xen_start_info->nr_pages;
 
        e820.nr_map = 0;
        add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
@@ -23,7 +23,7 @@
        clear_bit(X86_FEATURE_PSE, c->x86_capability);
        clear_bit(X86_FEATURE_PGE, c->x86_capability);
        clear_bit(X86_FEATURE_SEP, c->x86_capability);
-       if (!(xen_start_info.flags & SIF_PRIVILEGED))
+       if (!(xen_start_info->flags & SIF_PRIVILEGED))
                clear_bit(X86_FEATURE_MTRR, c->x86_capability);
 }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h       Thu Sep 
 8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h       Fri Sep 
 9 16:30:54 2005
@@ -35,9 +35,9 @@
         * happen before reload of cr3/ldt (i.e., not in __switch_to).
         */
        asm volatile ( "mov %%fs,%0 ; mov %%gs,%1"
-               : "=m" (*(int *)&current->thread.fs),
-                 "=m" (*(int *)&current->thread.gs));
-       asm volatile ( "mov %0,%%fs ; mov %0,%%gs"
+               : "=m" (current->thread.fs),
+                 "=m" (current->thread.gs));
+       asm volatile ( "movl %0,%%fs ; movl %0,%%gs"
                : : "r" (0) );
 }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Fri Sep  9 
16:30:54 2005
@@ -60,14 +60,14 @@
 #define copy_user_page(to, from, vaddr, pg)    copy_page(to, from)
 
 /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
-#define INVALID_P2M_ENTRY      (~0U)
-#define FOREIGN_FRAME(m)       ((m) | 0x80000000U)
-extern unsigned int *phys_to_machine_mapping;
+#define INVALID_P2M_ENTRY      (~0UL)
+#define FOREIGN_FRAME(m)       ((m) | (1UL<<31))
+extern unsigned long *phys_to_machine_mapping;
 #define pfn_to_mfn(pfn)        \
-((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL)
+(phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL<<31))
 static inline unsigned long mfn_to_pfn(unsigned long mfn)
 {
-       unsigned int pfn;
+       unsigned long pfn;
 
        /*
         * The array access can fail (e.g., device space beyond end of RAM).
@@ -83,7 +83,7 @@
                ".previous"
                : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
 
-       return (unsigned long)pfn;
+       return pfn;
 }
 
 /* Definitions for machine and pseudophysical addresses. */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h       Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h       Fri Sep  9 
16:30:54 2005
@@ -43,8 +43,32 @@
 
 struct pci_dev;
 
+#ifdef CONFIG_SWIOTLB
+
+
 /* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */
 #define PCI_DMA_BUS_IS_PHYS    (0)
+
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)      \
+       dma_addr_t ADDR_NAME;
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)                \
+       __u32 LEN_NAME;
+#define pci_unmap_addr(PTR, ADDR_NAME)                 \
+       ((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)                \
+       (((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME)                   \
+       ((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)          \
+       (((PTR)->LEN_NAME) = (VAL))
+
+#else
+
+/* The PCI address space does equal the physical memory
+ * address space.  The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS    (1)
 
 /* pci_unmap_{page,single} is a nop so... */
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
@@ -53,6 +77,8 @@
 #define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)        do { } while (0)
 #define pci_unmap_len(PTR, LEN_NAME)           (0)
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)  do { } while (0)
+
+#endif
 
 /* This is always fine. */
 #define pci_dac_dma_supported(pci_dev, mask)   (1)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Fri Sep  9 
16:30:54 2005
@@ -460,9 +460,9 @@
 #define kern_addr_valid(addr)  (1)
 #endif /* !CONFIG_DISCONTIGMEM */
 
-int direct_remap_area_pages(struct mm_struct *mm,
+int direct_remap_pfn_range(struct mm_struct *mm,
                             unsigned long address, 
-                            unsigned long machine_addr,
+                            unsigned long mfn,
                             unsigned long size, 
                             pgprot_t prot,
                             domid_t  domid);
@@ -474,10 +474,10 @@
                     unsigned long size);
 
 #define io_remap_page_range(vma,from,phys,size,prot) \
-direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO)
+direct_remap_pfn_range(vma->vm_mm,from,phys>>PAGE_SHIFT,size,prot,DOMID_IO)
 
 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
-direct_remap_area_pages(vma->vm_mm,from,pfn<<PAGE_SHIFT,size,prot,DOMID_IO)
+direct_remap_pfn_range(vma->vm_mm,from,pfn,size,prot,DOMID_IO)
 
 #define MK_IOSPACE_PFN(space, pfn)     (pfn)
 #define GET_IOSPACE(pfn)               0
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h Fri Sep  9 
16:30:54 2005
@@ -517,8 +517,8 @@
  * This special macro can be used to load a debugging register
  */
 #define loaddebug(thread,register) \
-       HYPERVISOR_set_debugreg((register),     \
-                       ((thread)->debugreg[register]))
+               HYPERVISOR_set_debugreg((register), \
+                                       ((thread)->debugreg[register]))
 
 /* Forward declaration, a strange C thing */
 struct task_struct;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h     Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h     Fri Sep  9 
16:30:54 2005
@@ -53,8 +53,8 @@
 #define AUX_DEVICE_INFO (*(unsigned char *) (PARAM+0x1FF))
 #define LOADER_TYPE (*(unsigned char *) (PARAM+0x210))
 #define KERNEL_START (*(unsigned long *) (PARAM+0x214))
-#define INITRD_START (__pa(xen_start_info.mod_start))
-#define INITRD_SIZE (xen_start_info.mod_len)
+#define INITRD_START (__pa(xen_start_info->mod_start))
+#define INITRD_SIZE (xen_start_info->mod_len)
 #define EDID_INFO   (*(struct edid_info *) (PARAM+0x440))
 #define EDD_NR     (*(unsigned char *) (PARAM+EDDNR))
 #define EDD_MBR_SIG_NR (*(unsigned char *) (PARAM+EDD_MBR_SIG_NR_BUF))
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h    Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h    Fri Sep  9 
16:30:54 2005
@@ -561,8 +561,14 @@
 #define local_irq_disable()    __cli()
 #define local_irq_enable()     __sti()
 
+/* Don't use smp_processor_id: this is called in debug versions of that fn. */
+#ifdef CONFIG_SMP
 #define irqs_disabled()                        \
-    HYPERVISOR_shared_info->vcpu_data[smp_processor_id()].evtchn_upcall_mask
+    HYPERVISOR_shared_info->vcpu_data[__smp_processor_id()].evtchn_upcall_mask
+#else
+#define irqs_disabled()                        \
+    HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask
+#endif
 
 /*
  * disable hlt during certain critical i/o operations
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h       Thu Sep 
 8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h       Fri Sep 
 9 16:30:54 2005
@@ -25,8 +25,8 @@
 #define LOADER_TYPE (*(unsigned char *) (PARAM+0x210))
 #define KERNEL_START (*(unsigned int *) (PARAM+0x214))
 
-#define INITRD_START (__pa(xen_start_info.mod_start))
-#define INITRD_SIZE (xen_start_info.mod_len)
+#define INITRD_START (__pa(xen_start_info->mod_start))
+#define INITRD_SIZE (xen_start_info->mod_len)
 #define EDID_INFO   (*(struct edid_info *) (PARAM+0x440))
 
 #define EDD_NR     (*(unsigned char *) (PARAM+EDDNR))
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h       Thu Sep 
 8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h       Fri Sep 
 9 16:30:54 2005
@@ -4,6 +4,10 @@
  * Linux-specific hypervisor handling.
  * 
  * Copyright (c) 2002-2004, K A Fraser
+ * 
+ * 64-bit updates:
+ *   Benjamin Liu <benjamin.liu@xxxxxxxxx>
+ *   Jun Nakajima <jun.nakajima@xxxxxxxxx>
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
@@ -26,497 +30,329 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
-/*
- * Benjamin Liu <benjamin.liu@xxxxxxxxx>
- * Jun Nakajima <jun.nakajima@xxxxxxxxx>
- *   Ported to x86-64.
- * 
- */
 
 #ifndef __HYPERCALL_H__
 #define __HYPERCALL_H__
+
 #include <asm-xen/xen-public/xen.h>
 
 #define __syscall_clobber "r11","rcx","memory"
 
-/*
- * Assembler stubs for hyper-calls.
- */
+#define _hypercall0(type, name)                        \
+({                                             \
+       long __res;                             \
+       asm volatile (                          \
+               TRAP_INSTR                      \
+               : "=a" (__res)                  \
+               : "0" (__HYPERVISOR_##name)     \
+               : __syscall_clobber );          \
+       (type)__res;                            \
+})
+
+#define _hypercall1(type, name, a1)                            \
+({                                                             \
+       long __res, __ign1;                                     \
+       asm volatile (                                          \
+               TRAP_INSTR                                      \
+               : "=a" (__res), "=D" (__ign1)                   \
+               : "0" (__HYPERVISOR_##name), "1" ((long)(a1))   \
+               : __syscall_clobber );                          \
+       (type)__res;                                            \
+})
+
+#define _hypercall2(type, name, a1, a2)                                \
+({                                                             \
+       long __res, __ign1, __ign2;                             \
+       asm volatile (                                          \
+               TRAP_INSTR                                      \
+               : "=a" (__res), "=D" (__ign1), "=S" (__ign2)    \
+               : "0" (__HYPERVISOR_##name), "1" ((long)(a1)),  \
+               "2" ((long)(a2))                                \
+               : __syscall_clobber );                          \
+       (type)__res;                                            \
+})
+
+#define _hypercall3(type, name, a1, a2, a3)                    \
+({                                                             \
+       long __res, __ign1, __ign2, __ign3;                     \
+       asm volatile (                                          \
+               TRAP_INSTR                                      \
+               : "=a" (__res), "=D" (__ign1), "=S" (__ign2),   \
+               "=d" (__ign3)                                   \
+               : "0" (__HYPERVISOR_##name), "1" ((long)(a1)),  \
+               "2" ((long)(a2)), "3" ((long)(a3))              \
+               : __syscall_clobber );                          \
+       (type)__res;                                            \
+})
+
+#define _hypercall4(type, name, a1, a2, a3, a4)                        \
+({                                                             \
+       long __res, __ign1, __ign2, __ign3;                     \
+       asm volatile (                                          \
+               "movq %8,%%r10; " TRAP_INSTR                    \
+               : "=a" (__res), "=D" (__ign1), "=S" (__ign2),   \
+               "=d" (__ign3)                                   \
+               : "0" (__HYPERVISOR_##name), "1" ((long)(a1)),  \
+               "2" ((long)(a2)), "3" ((long)(a3)),             \
+               "g" ((long)(a4))                                \
+               : __syscall_clobber, "r10" );                   \
+       (type)__res;                                            \
+})
+
+#define _hypercall5(type, name, a1, a2, a3, a4, a5)            \
+({                                                             \
+       long __res, __ign1, __ign2, __ign3;                     \
+       asm volatile (                                          \
+               "movq %8,%%r10; movq %9,%%r8; " TRAP_INSTR      \
+               : "=a" (__res), "=D" (__ign1), "=S" (__ign2),   \
+               "=d" (__ign3)                                   \
+               : "0" (__HYPERVISOR_##name), "1" ((long)(a1)),  \
+               "2" ((long)(a2)), "3" ((long)(a3)),             \
+               "g" ((long)(a4)), "g" ((long)(a5))              \
+               : __syscall_clobber, "r10", "r8" );             \
+       (type)__res;                                            \
+})
+
 static inline int
 HYPERVISOR_set_trap_table(
-    trap_info_t *table)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_set_trap_table), "D" (table)
-       : __syscall_clobber );
-
-    return ret;
+       trap_info_t *table)
+{
+       return _hypercall1(int, set_trap_table, table);
 }
 
 static inline int
 HYPERVISOR_mmu_update(
-    mmu_update_t *req, int count, int *success_count, domid_t domid)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        "movq %5, %%r10;" TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_mmu_update), "D" (req), "S" 
((long)count),
-         "d" (success_count), "g" ((unsigned long)domid)
-       : __syscall_clobber, "r10" );
-
-    return ret;
+       mmu_update_t *req, int count, int *success_count, domid_t domid)
+{
+       return _hypercall4(int, mmu_update, req, count, success_count, domid);
 }
 
 static inline int
 HYPERVISOR_mmuext_op(
-    struct mmuext_op *op, int count, int *success_count, domid_t domid)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        "movq %5, %%r10;" TRAP_INSTR
-        : "=a" (ret)
-        : "0" (__HYPERVISOR_mmuext_op), "D" (op), "S" ((long)count), 
-          "d" (success_count), "g" ((unsigned long)domid)
-        : __syscall_clobber, "r10" );
-
-    return ret;
+       struct mmuext_op *op, int count, int *success_count, domid_t domid)
+{
+       return _hypercall4(int, mmuext_op, op, count, success_count, domid);
 }
 
 static inline int
 HYPERVISOR_set_gdt(
-    unsigned long *frame_list, int entries)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_set_gdt), "D" (frame_list), "S" 
((long)entries)
-       : __syscall_clobber );
-
-
-    return ret;
-}
+       unsigned long *frame_list, int entries)
+{
+       return _hypercall2(int, set_gdt, frame_list, entries);
+}
+
 static inline int
 HYPERVISOR_stack_switch(
-    unsigned long ss, unsigned long esp)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_stack_switch), "D" (ss), "S" (esp)
-       : __syscall_clobber );
-
-    return ret;
+       unsigned long ss, unsigned long esp)
+{
+       return _hypercall2(int, stack_switch, ss, esp);
 }
 
 static inline int
 HYPERVISOR_set_callbacks(
-    unsigned long event_address, unsigned long failsafe_address, 
-    unsigned long syscall_address)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_set_callbacks), "D" (event_address),
-         "S" (failsafe_address), "d" (syscall_address)
-       : __syscall_clobber );
-
-    return ret;
+       unsigned long event_address, unsigned long failsafe_address, 
+       unsigned long syscall_address)
+{
+       return _hypercall3(int, set_callbacks,
+                          event_address, failsafe_address, syscall_address);
 }
 
 static inline int
 HYPERVISOR_fpu_taskswitch(
-    int set)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_fpu_taskswitch),
-          "D" ((unsigned long) set) : __syscall_clobber );
-
-    return ret;
+       int set)
+{
+       return _hypercall1(int, fpu_taskswitch, set);
 }
 
 static inline int
 HYPERVISOR_yield(
-    void)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned 
long)SCHEDOP_yield)
-       : __syscall_clobber );
-
-    return ret;
+       void)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_yield, 0);
 }
 
 static inline int
 HYPERVISOR_block(
-    void)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned 
long)SCHEDOP_block)
-       : __syscall_clobber );
-
-    return ret;
+       void)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_block, 0);
 }
 
 static inline int
 HYPERVISOR_shutdown(
-    void)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_sched_op),
-         "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_poweroff << 
SCHEDOP_reasonshift)))
-       : __syscall_clobber );
-
-    return ret;
+       void)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+                          (SHUTDOWN_poweroff << SCHEDOP_reasonshift), 0);
 }
 
 static inline int
 HYPERVISOR_reboot(
-    void)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_sched_op),
-         "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_reboot << 
SCHEDOP_reasonshift)))
-       : __syscall_clobber );
-
-    return ret;
-}
-
-static inline int
-HYPERVISOR_suspend(
-    unsigned long srec)
-{
-    int ret;
-
-    /* NB. On suspend, control software expects a suspend record in %esi. */
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_sched_op),
-        "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_suspend << 
SCHEDOP_reasonshift))), 
-        "S" (srec)
-       : __syscall_clobber );
-
-    return ret;
-}
-
-/*
- * We can have the timeout value in a single argument for the hypercall, but
- * that will break the common code. 
- */
+       void)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+                          (SHUTDOWN_reboot << SCHEDOP_reasonshift), 0);
+}
+
 static inline long
 HYPERVISOR_set_timer_op(
-    u64 timeout)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_set_timer_op),
-         "D" (timeout)
-       : __syscall_clobber );
-
-    return ret;
+       u64 timeout)
+{
+       return _hypercall1(long, set_timer_op, timeout);
 }
 
 static inline int
 HYPERVISOR_dom0_op(
-    dom0_op_t *dom0_op)
-{
-    int ret;
-
-    dom0_op->interface_version = DOM0_INTERFACE_VERSION;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_dom0_op), "D" (dom0_op)
-       : __syscall_clobber );
-
-    return ret;
+       dom0_op_t *dom0_op)
+{
+       dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+       return _hypercall1(int, dom0_op, dom0_op);
 }
 
 static inline int
 HYPERVISOR_set_debugreg(
-    int reg, unsigned long value)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_set_debugreg), "D" ((unsigned 
long)reg), "S" (value)
-       : __syscall_clobber );
-
-    return ret;
+       int reg, unsigned long value)
+{
+       return _hypercall2(int, set_debugreg, reg, value);
 }
 
 static inline unsigned long
 HYPERVISOR_get_debugreg(
-    int reg)
-{
-    unsigned long ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_get_debugreg), "D" ((unsigned 
long)reg)
-       : __syscall_clobber );
-
-    return ret;
+       int reg)
+{
+       return _hypercall1(unsigned long, get_debugreg, reg);
 }
 
 static inline int
 HYPERVISOR_update_descriptor(
-    unsigned long ma, unsigned long word)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_update_descriptor), "D" (ma),
-         "S" (word)
-       : __syscall_clobber );
-
-    return ret;
-}
-
-static inline int
-HYPERVISOR_dom_mem_op(
-    unsigned int op, unsigned long *extent_list,
-    unsigned long nr_extents, unsigned int extent_order)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        "movq %5,%%r10; movq %6,%%r8;" TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_dom_mem_op), "D" ((unsigned 
long)op), "S" (extent_list),
-         "d" (nr_extents), "g" ((unsigned long) extent_order), "g" ((unsigned 
long) DOMID_SELF)
-       : __syscall_clobber,"r8","r10");
-
-    return ret;
+       unsigned long ma, unsigned long word)
+{
+       return _hypercall2(int, update_descriptor, ma, word);
+}
+
+static inline int
+HYPERVISOR_memory_op(
+       unsigned int cmd, void *arg)
+{
+       return _hypercall2(int, memory_op, cmd, arg);
 }
 
 static inline int
 HYPERVISOR_multicall(
-    void *call_list, int nr_calls)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_multicall), "D" (call_list), "S" 
((unsigned long)nr_calls)
-       : __syscall_clobber);
-
-    return ret;
+       void *call_list, int nr_calls)
+{
+       return _hypercall2(int, multicall, call_list, nr_calls);
 }
 
 static inline int
 HYPERVISOR_update_va_mapping(
-    unsigned long page_nr, pte_t new_val, unsigned long flags)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_update_va_mapping), 
-          "D" (page_nr), "S" (new_val.pte), "d" (flags)
-       : __syscall_clobber);
-
-    return ret;
+       unsigned long va, pte_t new_val, unsigned long flags)
+{
+       return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
 }
 
 static inline int
 HYPERVISOR_event_channel_op(
-    void *op)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_event_channel_op), "D" (op)
-       : __syscall_clobber);
-
-    return ret;
+       void *op)
+{
+       return _hypercall1(int, event_channel_op, op);
 }
 
 static inline int
 HYPERVISOR_xen_version(
-    int cmd)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_xen_version), "D" ((unsigned 
long)cmd)
-       : __syscall_clobber);
-
-    return ret;
+       int cmd)
+{
+       return _hypercall1(int, xen_version, cmd);
 }
 
 static inline int
 HYPERVISOR_console_io(
-    int cmd, int count, char *str)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_console_io), "D" ((unsigned 
long)cmd), "S" ((unsigned long)count), "d" (str)
-       : __syscall_clobber);
-
-    return ret;
+       int cmd, int count, char *str)
+{
+       return _hypercall3(int, console_io, cmd, count, str);
 }
 
 static inline int
 HYPERVISOR_physdev_op(
-    void *physdev_op)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_physdev_op), "D" (physdev_op)
-       : __syscall_clobber);
-
-    return ret;
+       void *physdev_op)
+{
+       return _hypercall1(int, physdev_op, physdev_op);
 }
 
 static inline int
 HYPERVISOR_grant_table_op(
-    unsigned int cmd, void *uop, unsigned int count)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_grant_table_op), "D" ((unsigned 
long)cmd), "S" ((unsigned long)uop), "d" (count)
-       : __syscall_clobber);
-
-    return ret;
+       unsigned int cmd, void *uop, unsigned int count)
+{
+       return _hypercall3(int, grant_table_op, cmd, uop, count);
 }
 
 static inline int
 HYPERVISOR_update_va_mapping_otherdomain(
-    unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        "movq %5, %%r10;" TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_update_va_mapping_otherdomain),
-          "D" (page_nr), "S" (new_val.pte), "d" (flags), "g" ((unsigned 
long)domid)
-       : __syscall_clobber,"r10");
-    
-    return ret;
+       unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
+{
+       return _hypercall4(int, update_va_mapping_otherdomain, va,
+                          new_val.pte, flags, domid);
 }
 
 static inline int
 HYPERVISOR_vm_assist(
-    unsigned int cmd, unsigned int type)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_vm_assist), "D" ((unsigned 
long)cmd), "S" ((unsigned long)type)
-       : __syscall_clobber);
-
-    return ret;
+       unsigned int cmd, unsigned int type)
+{
+       return _hypercall2(int, vm_assist, cmd, type);
+}
+
+static inline int
+HYPERVISOR_boot_vcpu(
+       unsigned long vcpu, vcpu_guest_context_t *ctxt)
+{
+       return _hypercall2(int, boot_vcpu, vcpu, ctxt);
+}
+
+static inline int
+HYPERVISOR_vcpu_up(
+       int vcpu)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_vcpu_up |
+                          (vcpu << SCHEDOP_vcpushift), 0);
+}
+
+static inline int
+HYPERVISOR_vcpu_pickle(
+       int vcpu, vcpu_guest_context_t *ctxt)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_vcpu_pickle |
+                          (vcpu << SCHEDOP_vcpushift), ctxt);
 }
 
 static inline int
 HYPERVISOR_switch_to_user(void)
 {
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_switch_to_user) : 
__syscall_clobber );
-
-    return ret;
-}
-
-static inline int
-HYPERVISOR_boot_vcpu(
-    unsigned long vcpu, vcpu_guest_context_t *ctxt)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" (__HYPERVISOR_boot_vcpu), "D" (vcpu), "S" (ctxt)
-       : __syscall_clobber);
-
-    return ret;
+       return _hypercall0(int, switch_to_user);
 }
 
 static inline int
 HYPERVISOR_set_segment_base(
-    int reg, unsigned long value)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_set_segment_base), "D" ((unsigned 
long)reg), "S" (value)
-       : __syscall_clobber );
-
-    return ret;
-}
-
-static inline int
-HYPERVISOR_vcpu_pickle(
-    int vcpu, vcpu_guest_context_t *ctxt)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret)
-       : "0" ((unsigned long)__HYPERVISOR_sched_op),
-       "D" ((unsigned long)SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
-       "S" ((unsigned long)ctxt)
-       : __syscall_clobber );
-
-    return ret;
+       int reg, unsigned long value)
+{
+       return _hypercall2(int, set_segment_base, reg, value);
+}
+
+static inline int
+HYPERVISOR_suspend(
+       unsigned long srec)
+{
+       return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+                          (SHUTDOWN_suspend << SCHEDOP_reasonshift), srec);
 }
 
 #endif /* __HYPERCALL_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h
--- 
a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h    
    Thu Sep  8 15:18:40 2005
+++ 
b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h    
    Fri Sep  9 16:30:54 2005
@@ -14,7 +14,7 @@
        who = "Xen";
 
        start_pfn = 0;
-       max_pfn = xen_start_info.nr_pages;
+       max_pfn = xen_start_info->nr_pages;
 
        e820.nr_map = 0;
        add_memory_region(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn) - 
PFN_PHYS(start_pfn), E820_RAM);
@@ -29,7 +29,7 @@
        clear_bit(X86_FEATURE_PSE, c->x86_capability);
        clear_bit(X86_FEATURE_PGE, c->x86_capability);
        clear_bit(X86_FEATURE_SEP, c->x86_capability);
-       if (!(xen_start_info.flags & SIF_PRIVILEGED))
+       if (!(xen_start_info->flags & SIF_PRIVILEGED))
                clear_bit(X86_FEATURE_MTRR, c->x86_capability);
 }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Fri Sep  9 
16:30:54 2005
@@ -62,14 +62,14 @@
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
-#define INVALID_P2M_ENTRY      (~0U)
-#define FOREIGN_FRAME(m)       ((m) | 0x80000000U)
-extern u32 *phys_to_machine_mapping;
+#define INVALID_P2M_ENTRY      (~0UL)
+#define FOREIGN_FRAME(m)       ((m) | (1UL<<63))
+extern unsigned long *phys_to_machine_mapping;
 #define pfn_to_mfn(pfn)        \
-((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL)
+(phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL << 63))
 static inline unsigned long mfn_to_pfn(unsigned long mfn)
 {
-       unsigned int pfn;
+       unsigned long pfn;
 
        /*
         * The array access can fail (e.g., device space beyond end of RAM).
@@ -77,7 +77,7 @@
         * but we must handle the fault without crashing!
         */
        asm (
-               "1:     movl %1,%k0\n"
+               "1:     movq %1,%0\n"
                "2:\n"
                ".section __ex_table,\"a\"\n"
                "       .align 8\n"
@@ -85,7 +85,7 @@
                ".previous"
                : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
 
-       return (unsigned long)pfn;
+       return pfn;
 }
 
 /* Definitions for machine and pseudophysical addresses. */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h     Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h     Fri Sep  9 
16:30:54 2005
@@ -76,12 +76,29 @@
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)          \
        (((PTR)->LEN_NAME) = (VAL))
 
+#elif defined(CONFIG_SWIOTLB)
+
+#define PCI_DMA_BUS_IS_PHYS    0
+
+#define pci_dac_dma_supported(pci_dev, mask)    1
+
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)      \
+       dma_addr_t ADDR_NAME;
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)                \
+       __u32 LEN_NAME;
+#define pci_unmap_addr(PTR, ADDR_NAME)                 \
+       ((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)                \
+       (((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME)                   \
+       ((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)          \
+       (((PTR)->LEN_NAME) = (VAL))
+
 #else
 /* No IOMMU */
 
-/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */
-#define PCI_DMA_BUS_IS_PHYS    (0)
-
+#define PCI_DMA_BUS_IS_PHYS    1
 #define pci_dac_dma_supported(pci_dev, mask)    1
 
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Fri Sep  9 
16:30:54 2005
@@ -307,7 +307,7 @@
 #define pte_pfn(_pte)                                                  \
 ({                                                                     \
        unsigned long mfn = pte_mfn(_pte);                              \
-       unsigned pfn = mfn_to_pfn(mfn);                                 \
+       unsigned long pfn = mfn_to_pfn(mfn);                            \
        if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
                pfn = max_mapnr; /* special: force !pfn_valid() */      \
        pfn;                                                            \
@@ -526,28 +526,26 @@
 
 #define DOMID_LOCAL (0xFFFFU)
 
-int direct_remap_area_pages(struct mm_struct *mm,
+int direct_remap_pfn_range(struct mm_struct *mm,
                             unsigned long address,
-                            unsigned long machine_addr,
+                            unsigned long mfn,
                             unsigned long size,
                             pgprot_t prot,
                             domid_t  domid);
-int __direct_remap_area_pages(struct mm_struct *mm,
-                              unsigned long address,
-                              unsigned long size,
-                              mmu_update_t *v);
+
 int create_lookup_pte_addr(struct mm_struct *mm,
                            unsigned long address,
                            unsigned long *ptep);
+
 int touch_pte_range(struct mm_struct *mm,
                     unsigned long address,
                     unsigned long size);
 
 #define io_remap_page_range(vma, vaddr, paddr, size, prot)             \
-               
direct_remap_area_pages((vma)->vm_mm,vaddr,paddr,size,prot,DOMID_IO)
+               
direct_remap_pfn_range((vma)->vm_mm,vaddr,paddr>>PAGE_SHIFT,size,prot,DOMID_IO)
 
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)                \
-               
direct_remap_area_pages((vma)->vm_mm,vaddr,(pfn)<<PAGE_SHIFT,size,prot,DOMID_IO)
+               
direct_remap_pfn_range((vma)->vm_mm,vaddr,pfn,size,prot,DOMID_IO)
 
 #define MK_IOSPACE_PFN(space, pfn)     (pfn)
 #define GET_IOSPACE(pfn)               0
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h  Thu Sep  8 
15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h  Fri Sep  9 
16:30:54 2005
@@ -387,8 +387,14 @@
 #define local_irq_disable()    __cli()
 #define local_irq_enable()     __sti()
 
+/* Don't use smp_processor_id: this is called in debug versions of that fn. */
+#ifdef CONFIG_SMP
 #define irqs_disabled()                        \
-    HYPERVISOR_shared_info->vcpu_data[smp_processor_id()].evtchn_upcall_mask
+    HYPERVISOR_shared_info->vcpu_data[__smp_processor_id()].evtchn_upcall_mask
+#else
+#define irqs_disabled()                        \
+    HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask
+#endif
 
 /*
  * disable hlt during certain critical i/o operations
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Fri Sep  9 16:30:54 2005
@@ -52,13 +52,7 @@
 #endif
 
 /* arch/xen/i386/kernel/setup.c */
-union xen_start_info_union
-{
-    start_info_t xen_start_info;
-    char padding[2048];
-};
-extern union xen_start_info_union xen_start_info_union;
-#define xen_start_info (xen_start_info_union.xen_start_info)
+extern start_info_t *xen_start_info;
 
 /* arch/xen/kernel/evtchn.c */
 /* Force a proper event-channel callback from Xen. */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h
--- a/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h       Thu Sep 
 8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h       Fri Sep 
 9 16:30:54 2005
@@ -70,14 +70,6 @@
 #define IOCTL_PRIVCMD_HYPERCALL         \
     _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t))
 
-/*
- * @cmd: IOCTL_PRIVCMD_INITDOMAIN_EVTCHN
- * @arg: n/a
- * Return: Port associated with domain-controller end of control event channel
- *         for the initial domain.
- */
-#define IOCTL_PRIVCMD_INITDOMAIN_EVTCHN \
-    _IOC(_IOC_NONE, 'P', 1, 0)
 #define IOCTL_PRIVCMD_MMAP             \
     _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t))
 #define IOCTL_PRIVCMD_MMAPBATCH             \
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
linux-2.6-xen-sparse/include/asm-xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Fri Sep  9 16:30:54 2005
@@ -64,6 +64,7 @@
        int (*remove)(struct xenbus_device *dev);
        int (*suspend)(struct xenbus_device *dev);
        int (*resume)(struct xenbus_device *dev);
+       int (*hotplug)(struct xenbus_device *, char **, int, char *, int);
        struct device_driver driver;
 };
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c  Thu Sep  8 15:18:40 2005
+++ b/linux-2.6-xen-sparse/mm/memory.c  Fri Sep  9 16:30:54 2005
@@ -954,10 +954,8 @@
                         i++;
                         start += PAGE_SIZE;
                         len--;
-printk(KERN_ALERT "HIT  0x%lx\n", start);
                         continue;
                     } 
-else printk(KERN_ALERT "MISS 0x%lx\n", start);
                 }
 
                if (!vma || (vma->vm_flags & VM_IO)
@@ -1367,20 +1365,15 @@
        struct page *old_page, *new_page;
        unsigned long pfn = pte_pfn(pte);
        pte_t entry;
+       struct page invalid_page;
 
        if (unlikely(!pfn_valid(pfn))) {
-               /*
-                * This should really halt the system so it can be debugged or
-                * at least the kernel stops what it's doing before it corrupts
-                * data, but for the moment just pretend this is OOM.
-                */
-               pte_unmap(page_table);
-               printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n",
-                               address);
-               spin_unlock(&mm->page_table_lock);
-               return VM_FAULT_OOM;
-       }
-       old_page = pfn_to_page(pfn);
+               /* This can happen with /dev/mem (PROT_WRITE, MAP_PRIVATE). */
+               invalid_page.flags = (1<<PG_reserved) | (1<<PG_locked);
+               old_page = &invalid_page;
+       } else {
+               old_page = pfn_to_page(pfn);
+       }
 
        if (!TestSetPageLocked(old_page)) {
                int reuse = can_share_swap_page(old_page);
@@ -1416,7 +1409,13 @@
                new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
                if (!new_page)
                        goto no_new_page;
-               copy_user_highpage(new_page, old_page, address);
+               if (old_page == &invalid_page) {
+                       char *vto = kmap_atomic(new_page, KM_USER1);
+                       copy_page(vto, (void *)(address & PAGE_MASK));
+                       kunmap_atomic(vto, KM_USER1);
+               } else {
+                       copy_user_highpage(new_page, old_page, address);
+               }
        }
        /*
         * Re-check the pte - we dropped the lock
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/Makefile
--- a/tools/Makefile    Thu Sep  8 15:18:40 2005
+++ b/tools/Makefile    Fri Sep  9 16:30:54 2005
@@ -7,14 +7,23 @@
 SUBDIRS += misc
 SUBDIRS += examples
 SUBDIRS += xentrace
-SUBDIRS += python
-SUBDIRS += xcs
 SUBDIRS += xcutils
-#SUBDIRS += pygrub
 SUBDIRS += firmware
 SUBDIRS += security
 SUBDIRS += console
+ifeq ($(VTPM_TOOLS),y)
+SUBDIRS += vtpm_manager
+SUBDIRS += vtpm
+endif
 SUBDIRS += xenstat
+
+.PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean
+
+# These don't cross-compile
+ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
+SUBDIRS += python
+#SUBDIRS += pygrub
+endif
 
 .PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/Rules.mk
--- a/tools/Rules.mk    Thu Sep  8 15:18:40 2005
+++ b/tools/Rules.mk    Fri Sep  9 16:30:54 2005
@@ -4,7 +4,6 @@
 
 XEN_XC             = $(XEN_ROOT)/tools/python/xen/lowlevel/xc
 XEN_LIBXC          = $(XEN_ROOT)/tools/libxc
-XEN_XCS            = $(XEN_ROOT)/tools/xcs
 XEN_XENSTORE       = $(XEN_ROOT)/tools/xenstore
 XEN_LIBXENSTAT     = $(XEN_ROOT)/tools/xenstat/libxenstat/src
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/Makefile
--- a/tools/blktap/Makefile     Thu Sep  8 15:18:40 2005
+++ b/tools/blktap/Makefile     Fri Sep  9 16:30:54 2005
@@ -6,7 +6,8 @@
 include $(XEN_ROOT)/tools/Rules.mk
 
 SUBDIRS :=
-SUBDIRS += parallax
+SUBDIRS += ublkback
+#SUBDIRS += parallax
 
 BLKTAP_INSTALL_DIR = /usr/sbin
 
@@ -14,12 +15,12 @@
 INSTALL_PROG       = $(INSTALL) -m0755
 INSTALL_DIR        = $(INSTALL) -d -m0755
 
-INCLUDES += -I. -I $(XEN_LIBXC)
+INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
 
 LIBS     := -lpthread -lz
 
 SRCS     :=
-SRCS     += blktaplib.c
+SRCS     += blktaplib.c xenbus.c blkif.c
 
 CFLAGS   += -Wall
 CFLAGS   += -Werror
@@ -28,17 +29,20 @@
 CFLAGS   += -g3
 CFLAGS   += -fno-strict-aliasing
 CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
+# get asprintf():
+CFLAGS   += -D _GNU_SOURCE
 # Get gcc to generate the dependencies for us.
 CFLAGS   += -Wp,-MD,.$(@F).d
 CFLAGS   += $(INCLUDES) 
 DEPS     = .*.d
 
 OBJS     = $(patsubst %.c,%.o,$(SRCS))
-IBINS    = blkdump
+IBINS   :=
+#IBINS   += blkdump
 
 LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
 
-all: mk-symlinks libblktap.so blkdump
+all: mk-symlinks libblktap.so #blkdump
        @set -e; for subdir in $(SUBDIRS); do \
                $(MAKE) -C $$subdir $@;       \
        done
@@ -59,7 +63,7 @@
        $(INSTALL_DIR) -p $(DESTDIR)/usr/include
        $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
        $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
-       $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
+       #$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
        @set -e; for subdir in $(SUBDIRS); do \
                $(MAKE) -C $$subdir $@;       \
        done
@@ -79,14 +83,16 @@
        mv staging/i386/*.rpm .
        rm -rf staging
 
-libblktap.so: $(OBJS)
-       $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared -o      \
-             libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
+libblktap.so: $(OBJS) 
+       $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared         \
+             -L$(XEN_XENSTORE) -l xenstore                       \
+             -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
        ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
        ln -sf libblktap.so.$(MAJOR) $@
 
 blkdump: libblktap.so
-       $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. -l blktap blkdump.c
+       $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \
+             -l blktap blkdump.c
 
 .PHONY: TAGS clean install mk-symlinks rpm
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/blkdump.c
--- a/tools/blktap/blkdump.c    Thu Sep  8 15:18:40 2005
+++ b/tools/blktap/blkdump.c    Fri Sep  9 16:30:54 2005
@@ -8,85 +8,18 @@
 #include <stdio.h>
 #include "blktaplib.h"
  
-int control_print(control_msg_t *msg)
-{
-    if (msg->type != CMSG_BLKIF_BE) 
-    {
-        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
-        return 0;
-    }
-    
-    switch(msg->subtype)
-    {
-    case CMSG_BLKIF_BE_CREATE:
-        if ( msg->length != sizeof(blkif_be_create_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
-                ((blkif_be_create_t *)msg->msg)->domid,
-                ((blkif_be_create_t *)msg->msg)->blkif_handle);
-        break; 
-    case CMSG_BLKIF_BE_DESTROY:
-        if ( msg->length != sizeof(blkif_be_destroy_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
-                ((blkif_be_destroy_t *)msg->msg)->domid,
-                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
-        break;   
-    case CMSG_BLKIF_BE_CONNECT:
-        if ( msg->length != sizeof(blkif_be_connect_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CONNECT(d:%d,h:%d)\n",
-                ((blkif_be_connect_t *)msg->msg)->domid,
-                ((blkif_be_connect_t *)msg->msg)->blkif_handle);
-        break;        
-    case CMSG_BLKIF_BE_DISCONNECT:
-        if ( msg->length != sizeof(blkif_be_disconnect_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DISCONNECT(d:%d,h:%d)\n",
-                ((blkif_be_disconnect_t *)msg->msg)->domid,
-                ((blkif_be_disconnect_t *)msg->msg)->blkif_handle);
-        break;     
-    case CMSG_BLKIF_BE_VBD_CREATE:
-        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_CREATE(d:%d,h:%d,v:%d)\n",
-                ((blkif_be_vbd_create_t *)msg->msg)->domid,
-                ((blkif_be_vbd_create_t *)msg->msg)->blkif_handle,
-                ((blkif_be_vbd_create_t *)msg->msg)->vdevice);
-        break;
-    case CMSG_BLKIF_BE_VBD_DESTROY:
-        if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
-            goto parse_error;
-        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_DESTROY(d:%d,h:%d,v:%d)\n",
-                ((blkif_be_vbd_destroy_t *)msg->msg)->domid,
-                ((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle,
-                ((blkif_be_vbd_destroy_t *)msg->msg)->vdevice);
-        break;
-    default:
-        goto parse_error;
-    }
-   
-    return 0; 
-      
-parse_error:
-    printf("[CONTROL_MSG] Bad message type or length!\n");
-    return 0;
-}
- 
 int request_print(blkif_request_t *req)
 {
     int i;
     unsigned long fas;
     
-    if ( req->operation == BLKIF_OP_PROBE ) {
-        printf("[%2u:%2u<%s]\n", ID_TO_DOM(req->id), ID_TO_IDX(req->id),
-                blkif_op_name[req->operation]);
-        return BLKTAP_PASS;
-    } else {
+    if ( (req->operation == BLKIF_OP_READ) ||
+         (req->operation == BLKIF_OP_WRITE) )
+    {
         printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", 
                 ID_TO_DOM(req->id), ID_TO_IDX(req->id), 
                 blkif_op_name[req->operation], 
-                req->nr_segments, req->device, 
+                req->nr_segments, req->handle, 
                 req->sector_number);
         
         
@@ -99,6 +32,8 @@
                     );
         }
             
+    } else {
+        printf("Unknown request message type.\n");
     }
     
     return BLKTAP_PASS;
@@ -106,23 +41,22 @@
 
 int response_print(blkif_response_t *rsp)
 {   
-    if ( rsp->operation == BLKIF_OP_PROBE ) {
-        printf("[%2u:%2u>%s]\n", ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id),
-                blkif_op_name[rsp->operation]);
-        return BLKTAP_PASS;
-    } else {
+    if ( (rsp->operation == BLKIF_OP_READ) ||
+         (rsp->operation == BLKIF_OP_WRITE) )
+    {
         printf("[%2u:%2u>%5s] (status: %d)\n", 
                 ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 
                 blkif_op_name[rsp->operation], 
                 rsp->status);
             
+    } else {
+        printf("Unknown request message type.\n");
     }
     return BLKTAP_PASS;
 }
 
 int main(int argc, char *argv[])
 {
-    blktap_register_ctrl_hook("control_print", control_print);
     blktap_register_request_hook("request_print", request_print);
     blktap_register_response_hook("response_print", response_print);
     blktap_listen();
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/blktaplib.c
--- a/tools/blktap/blktaplib.c  Thu Sep  8 15:18:40 2005
+++ b/tools/blktap/blktaplib.c  Fri Sep  9 16:30:54 2005
@@ -24,7 +24,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <pthread.h>
-
+#include <xs.h>
                                                                      
 #define __COMPILING_BLKTAP_LIB
 #include "blktaplib.h"
@@ -34,11 +34,12 @@
 #else
 #define DPRINTF(_f, _a...) ((void)0)
 #endif
-#define DEBUG_RING_IDXS 1
+#define DEBUG_RING_IDXS 0
 
 #define POLLRDNORM     0x040 
 
 #define BLKTAP_IOCTL_KICK 1
+
 
 void got_sig_bus();
 void got_sig_int();
@@ -46,17 +47,13 @@
 /* in kernel these are opposite, but we are a consumer now. */
 blkif_back_ring_t  fe_ring; /* slightly counterintuitive ;) */
 blkif_front_ring_t be_ring; 
-ctrl_back_ring_t   ctrl_ring;
 
 unsigned long mmap_vstart = 0;
 char *blktap_mem;
 int fd = 0;
 
-#define BLKTAP_RING_PAGES       3 /* Ctrl, Back, Front */
-/*#define BLKTAP_MMAP_PAGES       ((11 + 1) * 64)*/
-#define BLKTAP_MMAP_PAGES \
-    ((BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) * BLKIF_RING_SIZE)
-#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + BLKTAP_MMAP_PAGES)
+#define BLKTAP_RING_PAGES       1 /* Front */
+#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
     
 int bad_count = 0;
 void bad(void)
@@ -79,126 +76,13 @@
 }
 
 inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
-/*
+
 static int (*request_hook)(blkif_request_t *req) = NULL;
 static int (*response_hook)(blkif_response_t *req) = NULL;
-*/
-
-/*-----[ Request/Response hook chains.]----------------------------------*/
-
-#define HOOK_NAME_MAX 50
-        
-typedef struct ctrl_hook_st {
-    char name[HOOK_NAME_MAX];
-    int (*func)(control_msg_t *);
-    struct ctrl_hook_st *next;
-} ctrl_hook_t;
-        
-typedef struct request_hook_st {
-    char name[HOOK_NAME_MAX];
-    int (*func)(blkif_request_t *);
-    struct request_hook_st *next;
-} request_hook_t;
-
-typedef struct response_hook_st {
-    char name[HOOK_NAME_MAX];
-    int (*func)(blkif_response_t *);
-    struct response_hook_st *next;
-} response_hook_t;
-
-static ctrl_hook_t *ctrl_hook_chain = NULL;
-static request_hook_t *request_hook_chain = NULL;
-static response_hook_t *response_hook_chain = NULL;
-
-void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)) 
-{
-    ctrl_hook_t *ch_ent, **c;
-    
-    ch_ent = (ctrl_hook_t *)malloc(sizeof(ctrl_hook_t));
-    if (!ch_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
-    
-    ch_ent->func  = ch;
-    ch_ent->next = NULL;
-    strncpy(ch_ent->name, name, HOOK_NAME_MAX);
-    ch_ent->name[HOOK_NAME_MAX-1] = '\0';
-    
-    c = &ctrl_hook_chain;
-    while (*c != NULL) {
-        c = &(*c)->next;
-    }
-    *c = ch_ent;
-}
-
-void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)) 
-{
-    request_hook_t *rh_ent, **c;
-    
-    rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
-    if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
-    
-    rh_ent->func  = rh;
-    rh_ent->next = NULL;
-    strncpy(rh_ent->name, name, HOOK_NAME_MAX);
-    
-    c = &request_hook_chain;
-    while (*c != NULL) {
-        c = &(*c)->next;
-    }
-    *c = rh_ent;
-}
-
-void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)) 
-{
-    response_hook_t *rh_ent, **c;
-    
-    rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
-    if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
-    
-    rh_ent->func  = rh;
-    rh_ent->next = NULL;
-    strncpy(rh_ent->name, name, HOOK_NAME_MAX);
-    
-    c = &response_hook_chain;
-    while (*c != NULL) {
-        c = &(*c)->next;
-    }
-    *c = rh_ent;
-}
-
-void print_hooks(void)
-{
-    request_hook_t  *req_hook;
-    response_hook_t *rsp_hook;
-    ctrl_hook_t     *ctrl_hook;
-    
-    DPRINTF("Control Hooks:\n");
-    ctrl_hook = ctrl_hook_chain;
-    while (ctrl_hook != NULL)
-    {
-        DPRINTF("  [0x%p] %s\n", ctrl_hook->func, ctrl_hook->name);
-        ctrl_hook = ctrl_hook->next;
-    }
-    
-    DPRINTF("Request Hooks:\n");
-    req_hook = request_hook_chain;
-    while (req_hook != NULL)
-    {
-        DPRINTF("  [0x%p] %s\n", req_hook->func, req_hook->name);
-        req_hook = req_hook->next;
-    }
-    
-    DPRINTF("Response Hooks:\n");
-    rsp_hook = response_hook_chain;
-    while (rsp_hook != NULL)
-    {
-        DPRINTF("  [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
-        rsp_hook = rsp_hook->next;
-    }
-}
         
 /*-----[ Data to/from Backend (server) VM ]------------------------------*/
 
-
+/*
 
 inline int write_req_to_be_ring(blkif_request_t *req)
 {
@@ -214,6 +98,7 @@
     
     return 0;
 }
+*/
 
 inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
 {
@@ -230,14 +115,14 @@
     return 0;
 }
 
-static void apply_rsp_hooks(blkif_response_t *rsp)
+static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp)
 {
     response_hook_t  *rsp_hook;
     
-    rsp_hook = response_hook_chain;
+    rsp_hook = blkif->response_hook_chain;
     while (rsp_hook != NULL)
     {
-        switch(rsp_hook->func(rsp))
+        switch(rsp_hook->func(blkif, rsp, 1))
         {
         case BLKTAP_PASS:
             break;
@@ -248,15 +133,19 @@
     }
 }
 
+
 static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
 
-void blktap_inject_response(blkif_response_t *rsp)
-{
-    
-    apply_rsp_hooks(rsp);
-    
+void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp)
+{
+    
+    apply_rsp_hooks(blkif, rsp);
+  
     write_rsp_to_fe_ring(rsp);
-    
+}
+
+void blktap_kick_responses(void)
+{
     pthread_mutex_lock(&push_mutex);
     
     RING_PUSH_RESPONSES(&fe_ring);
@@ -277,7 +166,7 @@
     int active;
 } pollhook_t;
 
-static struct pollfd  pfd[MAX_POLLFDS+1];
+static struct pollfd  pfd[MAX_POLLFDS+2]; /* tap and store are extra */
 static pollhook_t     pollhooks[MAX_POLLFDS];
 static unsigned int   ph_freelist[MAX_POLLFDS];
 static unsigned int   ph_cons, ph_prod;
@@ -344,65 +233,65 @@
 
 int blktap_listen(void)
 {
-    int               notify_be, notify_fe, tap_pfd;
-    
+    int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret;
+    struct xs_handle *h;
+    blkif_t *blkif;
+
     /* comms rings: */
     blkif_request_t  *req;
     blkif_response_t *rsp;
-    control_msg_t    *msg;
     blkif_sring_t    *sring;
-    ctrl_sring_t     *csring;
     RING_IDX          rp, i, pfd_count; 
     
     /* pending rings */
     blkif_request_t req_pending[BLKIF_RING_SIZE];
-    blkif_response_t rsp_pending[BLKIF_RING_SIZE];
+    /* blkif_response_t rsp_pending[BLKIF_RING_SIZE] */;
     
     /* handler hooks: */
     request_hook_t   *req_hook;
     response_hook_t  *rsp_hook;
-    ctrl_hook_t      *ctrl_hook;
     
     signal (SIGBUS, got_sig_bus);
     signal (SIGINT, got_sig_int);
     
-    print_hooks();
-    
+    __init_blkif();
+
     fd = open("/dev/blktap", O_RDWR);
-    if (fd == -1) {
-        printf("open failed! (%d)\n", errno);
-        goto open_failed;
-    }
+    if (fd == -1)
+        err(-1, "open failed!");
 
     blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 
              PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 
-    if ((int)blktap_mem == -1) {
-        printf("mmap failed! (%d)\n", errno);
-        goto mmap_failed;
-    }
+    if ((int)blktap_mem == -1) 
+        err(-1, "mmap failed!");
 
     /* assign the rings to the mapped memory */
-    csring = (ctrl_sring_t *)blktap_mem;
-    BACK_RING_INIT(&ctrl_ring, csring, PAGE_SIZE);
-    
+/*
     sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
     FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
-    
-    sring = (blkif_sring_t *)((unsigned long)blktap_mem + (2 *PAGE_SIZE));
+*/  
+    sring = (blkif_sring_t *)((unsigned long)blktap_mem);
     BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
 
     mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
 
+
+    /* Set up store connection and watch. */
+    h = xs_daemon_open();
+    if (h == NULL) 
+        err(-1, "xs_daemon_open");
+    
+    ret = add_blockdevice_probe_watch(h, "Domain-0");
+    if (ret != 0)
+        err(0, "adding device probewatch");
+    
     ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
 
     while(1) {
         int ret;
         
         /* build the poll list */
-        
-        DPRINTF("Building poll list.\n");
-        
         pfd_count = 0;
         for ( i=0; i < MAX_POLLFDS; i++ ) {
             pollhook_t *ph = &pollhooks[i];
@@ -415,49 +304,31 @@
             }
         }
 
-        tap_pfd = pfd_count;
+        tap_pfd = pfd_count++;
         pfd[tap_pfd].fd = fd;
         pfd[tap_pfd].events = POLLIN;
 
-        DPRINTF("poll() %d fds.\n", pfd_count);
+        store_pfd = pfd_count++;
+        pfd[store_pfd].fd = xs_fileno(h);
+        pfd[store_pfd].events = POLLIN;
         
-        if ( (ret = (poll(pfd, pfd_count+1, 10000)) == 0) ) {
+        if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) {
             if (DEBUG_RING_IDXS)
                 ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
             continue;
         }
 
-        DPRINTF("poll returned %d\n", ret);
-
         for (i=0; i < MAX_POLLFDS; i++) {
             if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
                 pollhooks[i].func(pollhooks[i].pfd->fd);
         }
         
-        if (pfd[tap_pfd].revents) {
-            
-            /* empty the control ring */
-            rp = ctrl_ring.sring->req_prod;
-            rmb();
-            for (i = ctrl_ring.req_cons; i < rp; i++)
-            {
-                msg = RING_GET_REQUEST(&ctrl_ring, i);
-
-                ctrl_hook = ctrl_hook_chain;
-                while (ctrl_hook != NULL)
-                {
-                    DPRINTF("CTRL_HOOK: %s\n", ctrl_hook->name);
-                    /* We currently don't respond to ctrl messages. */
-                    ctrl_hook->func(msg);
-                    ctrl_hook = ctrl_hook->next;
-                }
-            }
-            /* Using this as a unidirectional ring. */
-            ctrl_ring.req_cons = ctrl_ring.rsp_prod_pvt = i;
-pthread_mutex_lock(&push_mutex);
-            RING_PUSH_RESPONSES(&ctrl_ring);
-pthread_mutex_unlock(&push_mutex);
-            
+        if (pfd[store_pfd].revents) {
+            ret = xs_fire_next_watch(h);
+        }
+
+        if (pfd[tap_pfd].revents) 
+        {    
             /* empty the fe_ring */
             notify_fe = 0;
             notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
@@ -465,44 +336,62 @@
             rmb();
             for (i = fe_ring.req_cons; i != rp; i++)
             {
-                int done = 0; /* stop forwarding this request */
+                int done = 0; 
 
                 req = RING_GET_REQUEST(&fe_ring, i);
                 memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
                 req = &req_pending[ID_TO_IDX(req->id)];
 
-                DPRINTF("copying an fe request\n");
-
-                req_hook = request_hook_chain;
-                while (req_hook != NULL)
+                blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle);
+
+                if (blkif != NULL)
                 {
-                    DPRINTF("REQ_HOOK: %s\n", req_hook->name);
-                    switch(req_hook->func(req))
+                    req_hook = blkif->request_hook_chain;
+                    while (req_hook != NULL)
                     {
-                    case BLKTAP_RESPOND:
-                        apply_rsp_hooks((blkif_response_t *)req);
-                        write_rsp_to_fe_ring((blkif_response_t *)req);
-                        notify_fe = 1;
-                        done = 1;
-                        break;
-                    case BLKTAP_STOLEN:
-                        done = 1;
-                        break;
-                    case BLKTAP_PASS:
-                        break;
-                    default:
-                        printf("Unknown request hook return value!\n");
+                        switch(req_hook->func(blkif, req, ((i+1) == rp)))
+                        {
+                        case BLKTAP_RESPOND:
+                            apply_rsp_hooks(blkif, (blkif_response_t *)req);
+                            write_rsp_to_fe_ring((blkif_response_t *)req);
+                            notify_fe = 1;
+                            done = 1;
+                            break;
+                        case BLKTAP_STOLEN:
+                            done = 1;
+                            break;
+                        case BLKTAP_PASS:
+                            break;
+                        default:
+                            printf("Unknown request hook return value!\n");
+                        }
+                        if (done) break;
+                        req_hook = req_hook->next;
                     }
-                    if (done) break;
-                    req_hook = req_hook->next;
                 }
 
-                if (done == 0) write_req_to_be_ring(req);
+                if (done == 0) 
+                {
+                    /* this was:  */
+                    /* write_req_to_be_ring(req); */
+
+                    unsigned long id = req->id;
+                    unsigned short operation = req->operation;
+                    printf("Unterminated request!\n");
+                    rsp = (blkif_response_t *)req;
+                    rsp->id = id;
+                    rsp->operation = operation;
+                    rsp->status = BLKIF_RSP_ERROR;
+                    write_rsp_to_fe_ring(rsp);
+                    notify_fe = 1;
+                    done = 1;
+                }
 
             }
             fe_ring.req_cons = i;
 
             /* empty the be_ring */
+/*
             notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
             rp = be_ring.sring->rsp_prod;
             rmb();
@@ -519,9 +408,9 @@
                 write_rsp_to_fe_ring(rsp);
             }
             be_ring.rsp_cons = i;
-
+*/
             /* notify the domains */
-
+/*
             if (notify_be) {
                 DPRINTF("notifying be\n");
 pthread_mutex_lock(&push_mutex);
@@ -529,13 +418,13 @@
                 ioctl(fd, BLKTAP_IOCTL_KICK_BE);
 pthread_mutex_unlock(&push_mutex);
             }
-
+*/
             if (notify_fe) {
                 DPRINTF("notifying fe\n");
-pthread_mutex_lock(&push_mutex);
+                pthread_mutex_lock(&push_mutex);
                 RING_PUSH_RESPONSES(&fe_ring);
                 ioctl(fd, BLKTAP_IOCTL_KICK_FE);
-pthread_mutex_unlock(&push_mutex);
+                pthread_mutex_unlock(&push_mutex);
             }
         }        
     }
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/blktaplib.h
--- a/tools/blktap/blktaplib.h  Thu Sep  8 15:18:40 2005
+++ b/tools/blktap/blktaplib.h  Fri Sep  9 16:30:54 2005
@@ -2,6 +2,9 @@
  *
  * userland accessors to the block tap.
  *
+ * Sept 2/05 -- I'm scaling this back to only support block remappings
+ * to user in a backend domain.  Passthrough and interposition can be readded
+ * once transitive grants are available.
  */
  
 #ifndef __BLKTAPLIB_H__
@@ -13,6 +16,7 @@
 #include <xen/io/blkif.h>
 #include <xen/io/ring.h>
 #include <xen/io/domain_controller.h>
+#include <xs.h>
 
 /* /dev/xen/blktap resides at device number major=10, minor=202        */ 
 #define BLKTAP_MINOR 202
@@ -49,12 +53,18 @@
     return (
         ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
         ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+        ( arg == BLKTAP_MODE_INTERPOSE    ) );
+/*
+    return (
+        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
+        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
         ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
         ( arg == BLKTAP_MODE_INTERPOSE    ) ||
         ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
         ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
         ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
         );
+*/
 }
 
 /* Return values for handling messages in hooks. */
@@ -62,29 +72,88 @@
 #define BLKTAP_RESPOND  1 /* Request is now a reply.  Return it.  */
 #define BLKTAP_STOLEN   2 /* Hook has stolen request.             */
 
-#define domid_t unsigned short
+//#define domid_t unsigned short
 
 inline unsigned int ID_TO_IDX(unsigned long id);
 inline domid_t ID_TO_DOM(unsigned long id);
 
-void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
-void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
-void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
-void blktap_inject_response(blkif_response_t *);
 int  blktap_attach_poll(int fd, short events, int (*func)(int));
 void blktap_detach_poll(int fd);
 int  blktap_listen(void);
 
+struct blkif;
+
+typedef struct request_hook_st {
+    char *name;
+    int (*func)(struct blkif *, blkif_request_t *, int);
+    struct request_hook_st *next;
+} request_hook_t;
+
+typedef struct response_hook_st {
+    char *name;
+    int (*func)(struct blkif *, blkif_response_t *, int);
+    struct response_hook_st *next;
+} response_hook_t;
+
+struct blkif_ops {
+    long int (*get_size)(struct blkif *blkif);
+    long int (*get_secsize)(struct blkif *blkif);
+    unsigned (*get_info)(struct blkif *blkif);
+};
+
+typedef struct blkif {
+    domid_t domid;
+    long int handle;
+
+    long int pdev;
+    long int readonly;
+
+    enum { DISCONNECTED, CONNECTED } state;
+
+    struct blkif_ops *ops;
+    request_hook_t *request_hook_chain;
+    response_hook_t *response_hook_chain;
+
+    struct blkif *hash_next;
+
+    void *prv;  /* device-specific data */
+} blkif_t;
+
+void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+blkif_t *alloc_blkif(domid_t domid);
+int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
+               long int readonly);
+void free_blkif(blkif_t *blkif);
+void __init_blkif(void);
+
+
+/* xenstore/xenbus: */
+extern int add_blockdevice_probe_watch(struct xs_handle *h, 
+                                       const char *domname);
+int xs_fire_next_watch(struct xs_handle *h);
+
+
+void blkif_print_hooks(blkif_t *blkif);
+void blkif_register_request_hook(blkif_t *blkif, char *name, 
+                             int (*rh)(blkif_t *, blkif_request_t *, int));
+void blkif_register_response_hook(blkif_t *blkif, char *name, 
+                             int (*rh)(blkif_t *, blkif_response_t *, int));
+void blkif_inject_response(blkif_t *blkif, blkif_response_t *);
+void blktap_kick_responses(void);
+
+/* this must match the underlying driver... */
+#define MAX_PENDING_REQS 64
+
 /* Accessing attached data page mappings */
-#define MMAP_PAGES_PER_REQUEST \
-    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
-#define MMAP_VADDR(_req,_seg)                        \
-    (mmap_vstart +                                   \
-     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+#define MMAP_PAGES                                              \
+    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg)                                   \
+    (mmap_vstart +                                              \
+     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
      ((_seg) * PAGE_SIZE))
 
 extern unsigned long mmap_vstart;
-
 
 /* Defines that are only used by library clients */
 
@@ -93,7 +162,6 @@
 static char *blkif_op_name[] = {
     [BLKIF_OP_READ]       = "READ",
     [BLKIF_OP_WRITE]      = "WRITE",
-    [BLKIF_OP_PROBE]      = "PROBE",
 };
 
 #endif /* __COMPILING_BLKTAP_LIB */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_brctl
--- a/tools/check/check_brctl   Thu Sep  8 15:18:40 2005
+++ b/tools/check/check_brctl   Fri Sep  9 16:30:54 2005
@@ -2,8 +2,9 @@
 # CHECK-INSTALL
 
 function error {
-   echo 'Check for the bridge control utils (brctl) failed.'
+   echo
+   echo '  *** Check for the bridge control utils (brctl) FAILED'
    exit 1
 }
 
-brctl show || error
\ No newline at end of file
+which brctl 1>/dev/null 2>&1 || error
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_iproute
--- a/tools/check/check_iproute Thu Sep  8 15:18:40 2005
+++ b/tools/check/check_iproute Fri Sep  9 16:30:54 2005
@@ -2,9 +2,10 @@
 # CHECK-INSTALL
 
 function error {
-   echo 'Check for iproute (ip addr) failed.'
+   echo
+   echo '  *** Check for iproute (ip addr) FAILED'
    exit 1
 }
 
-ip addr list || error
+ip addr list 1>/dev/null 2>&1 || error
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_logging
--- a/tools/check/check_logging Thu Sep  8 15:18:40 2005
+++ b/tools/check/check_logging Fri Sep  9 16:30:54 2005
@@ -18,11 +18,12 @@
         import logging
     except ImportError:
         hline()
-        msg("Python logging is not installed.")
-        msg("Use 'make install-logging' at the xen root to install.")
         msg("")
-        msg("Alternatively download and install from")
-        msg("http://www.red-dove.com/python_logging.html";)
+        msg("  *** Python logging is not installed.")
+        msg("  *** Use 'make install-logging' at the xen root to install.")
+        msg("  *** ")
+        msg("  *** Alternatively download and install from")
+        msg("  *** http://www.red-dove.com/python_logging.html";)
         hline()
         sys.exit(1)
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_python
--- a/tools/check/check_python  Thu Sep  8 15:18:40 2005
+++ b/tools/check/check_python  Fri Sep  9 16:30:54 2005
@@ -2,9 +2,9 @@
 # CHECK-BUILD CHECK-INSTALL
 
 function error {
-    echo "Check for Python version 2.2 or higher failed."
+    echo
+    echo "  *** Check for Python version >= 2.2 FAILED"
     exit 1
 }
 
-python -V
 python -V 2>&1 | cut -d ' ' -f 2 | grep -q -E '^2.2|^2.3|^2.4' || error
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_zlib_devel
--- a/tools/check/check_zlib_devel      Thu Sep  8 15:18:40 2005
+++ b/tools/check/check_zlib_devel      Fri Sep  9 16:30:54 2005
@@ -2,9 +2,10 @@
 # CHECK-BUILD
 
 function error {
-    echo 'Check for zlib includes failed.'
+    echo
+    echo "  *** Check for zlib headers FAILED"
     exit 1
 }
 
 set -e
-[ -e /usr/include/zlib.h ] || error
\ No newline at end of file
+[ -e /usr/include/zlib.h ] || error
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_zlib_lib
--- a/tools/check/check_zlib_lib        Thu Sep  8 15:18:40 2005
+++ b/tools/check/check_zlib_lib        Fri Sep  9 16:30:54 2005
@@ -2,9 +2,10 @@
 # CHECK-BUILD CHECK-INSTALL
 
 function error {
-    echo 'Check for zlib library failed.'
+    echo
+    echo "  *** Check for zlib library FAILED"
     exit 1
 }
 
 set -e
-ldconfig -p | grep libz.so || error
\ No newline at end of file
+ldconfig -p | grep -q libz.so || error
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/chk
--- a/tools/check/chk   Thu Sep  8 15:18:40 2005
+++ b/tools/check/chk   Fri Sep  9 16:30:54 2005
@@ -17,14 +17,11 @@
 case $1 in
     build)
         check="CHECK-BUILD"
-        info=".chkbuild"
         ;;
     install)
         check="CHECK-INSTALL"
-        info=".chkinstall"
         ;;
     clean)
-        rm -f .chkbuild .chkinstall
         exit 0
         ;;
     *)
@@ -34,7 +31,7 @@
 
 failed=0
 
-echo "Xen ${check} " $(date) > ${info}
+echo "Xen ${check} " $(date)
 for f in check_* ; do
     case $f in
         *~)
@@ -49,24 +46,12 @@
     if ! grep -q ${check} $f ; then
         continue
     fi
-    echo ' ' >> ${info}
-    echo "Checking $f" >> ${info}
-    if ./$f 1>>${info} 2>&1 ; then
-        echo OK >> ${info}
+    echo -n "Checking $f: "
+    if ./$f 2>&1 ; then
+        echo OK
     else
         failed=1
-        echo "FAILED $f"
-        echo FAILED >> ${info}
     fi
 done
 
-echo >> ${info}
-
-if [ "$failed" == "1" ] ; then
-    echo "Checks failed. See `pwd`/${info} for details."
-    echo "FAILED" >> ${info}
-    exit 1
-else
-    echo "OK" >> ${info}
-    exit 0
-fi
+exit $failed
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/Makefile
--- a/tools/console/Makefile    Thu Sep  8 15:18:40 2005
+++ b/tools/console/Makefile    Fri Sep  9 16:30:54 2005
@@ -9,10 +9,8 @@
 INSTALL_PROG    = $(INSTALL) -m0755
 INSTALL_DIR     = $(INSTALL) -d -m0755
 
-CC       = gcc
-CFLAGS   = -Wall -Werror -g3
+CFLAGS  += -Wall -Werror -g3
 
-CFLAGS  += -I $(XEN_XCS)
 CFLAGS  += -I $(XEN_LIBXC)
 CFLAGS  += -I $(XEN_XENSTORE)
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/client/main.c
--- a/tools/console/client/main.c       Thu Sep  8 15:18:40 2005
+++ b/tools/console/client/main.c       Fri Sep  9 16:30:54 2005
@@ -170,12 +170,12 @@
                { 0 },
 
        };
-       char *str_pty;
-       char path[1024];
+       char *str_pty, *path;
        int spty;
        unsigned int len = 0;
        struct xs_handle *xs;
        char *end;
+       time_t now;
 
        while((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
                switch(ch) {
@@ -213,20 +213,51 @@
        
        signal(SIGTERM, sighandler);
 
-       snprintf(path, sizeof(path), "/console/%d/tty", domid);
+       path = xs_get_domain_path(xs, domid);
+       if (path == NULL)
+               err(errno, "xs_get_domain_path()");
+       path = realloc(path, strlen(path) + strlen("/console/tty") + 1);
+       if (path == NULL)
+               err(ENOMEM, "realloc");
+       strcat(path, "/console/tty");
        str_pty = xs_read(xs, path, &len);
+
        /* FIXME consoled currently does not assume domain-0 doesn't have a
           console which is good when we break domain-0 up.  To keep us
           user friendly, we'll bail out here since no data will ever show
           up on domain-0. */
-       if (domid == 0 || str_pty == NULL) {
+       if (domid == 0) {
                err(errno, "Could not read tty from store");
        }
+
+       /* Wait a little bit for tty to appear.  There is a race
+          condition that occurs after xend creates a domain.  This
+          code might be running before consoled has noticed the new
+          domain and setup a pty for it.
+
+          A xenstore watch would slightly improve responsiveness but
+          a timeout would still be needed since we don't want to
+          block forever if given an invalid domain or worse yet, a
+          domain that someone else has connected to. */
+
+       now = time(0);
+       while (str_pty == NULL && (now + 5) > time(0)) {
+               struct timeval tv = { 0, 500 };
+               select(0, NULL, NULL, NULL, &tv); /* pause briefly */
+
+               str_pty = xs_read(xs, path, &len);
+       }
+
+       if (str_pty == NULL) {
+               err(errno, "Could not read tty from store");
+       }
+
        spty = open(str_pty, O_RDWR | O_NOCTTY);
        if (spty == -1) {
                err(errno, "Could not open tty `%s'", str_pty);
        }
        free(str_pty);
+       free(path);
 
        init_term(STDIN_FILENO, &attr);
        console_loop(xc_handle, domid, spty);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Thu Sep  8 15:18:40 2005
+++ b/tools/console/daemon/io.c Fri Sep  9 16:30:54 2005
@@ -26,7 +26,6 @@
 #include "xenctrl.h"
 #include "xs.h"
 #include "xen/io/domain_controller.h"
-#include "xcs_proto.h"
 
 #include <malloc.h>
 #include <stdlib.h>
@@ -36,9 +35,15 @@
 #include <fcntl.h>
 #include <unistd.h>
 #include <termios.h>
+#include <stdarg.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
 
 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+/* Each 10 bits takes ~ 3 digits, plus one, plus one for nul terminator. */
+#define MAX_STRLEN(x) ((sizeof(x) * CHAR_BIT + CHAR_BIT-1) / 10 * 3 + 2)
 
 struct buffer
 {
@@ -48,41 +53,6 @@
        size_t max_capacity;
 };
 
-static void buffer_append(struct buffer *buffer, const void *data, size_t size)
-{
-       if ((buffer->capacity - buffer->size) < size) {
-               buffer->capacity += (size + 1024);
-               buffer->data = realloc(buffer->data, buffer->capacity);
-               if (buffer->data == NULL) {
-                       dolog(LOG_ERR, "Memory allocation failed");
-                       exit(ENOMEM);
-               }
-       }
-
-       memcpy(buffer->data + buffer->size, data, size);
-       buffer->size += size;
-
-       if (buffer->max_capacity &&
-           buffer->size > buffer->max_capacity) {
-               memmove(buffer->data + (buffer->size - buffer->max_capacity),
-                       buffer->data, buffer->max_capacity);
-               buffer->data = realloc(buffer->data, buffer->max_capacity);
-               buffer->capacity = buffer->max_capacity;
-       }
-}
-
-static bool buffer_empty(struct buffer *buffer)
-{
-       return buffer->size == 0;
-}
-
-static void buffer_advance(struct buffer *buffer, size_t size)
-{
-       size = MIN(size, buffer->size);
-       memmove(buffer->data, buffer + size, buffer->size - size);
-       buffer->size -= size;
-}
-
 struct domain
 {
        int domid;
@@ -90,9 +60,74 @@
        bool is_dead;
        struct buffer buffer;
        struct domain *next;
+       char *conspath;
+       int ring_ref;
+       int local_port;
+       char *page;
+       int evtchn_fd;
 };
 
 static struct domain *dom_head;
+
+struct ring_head
+{
+       u32 cons;
+       u32 prod;
+       char buf[0];
+} __attribute__((packed));
+
+#define PAGE_SIZE (getpagesize())
+#define XENCONS_RING_SIZE (PAGE_SIZE/2 - sizeof (struct ring_head))
+#define XENCONS_IDX(cnt) ((cnt) % XENCONS_RING_SIZE)
+#define XENCONS_FULL(ring) (((ring)->prod - (ring)->cons) == XENCONS_RING_SIZE)
+#define XENCONS_SPACE(ring) (XENCONS_RING_SIZE - ((ring)->prod - (ring)->cons))
+
+static void buffer_append(struct domain *dom)
+{
+       struct buffer *buffer = &dom->buffer;
+       struct ring_head *ring = (struct ring_head *)dom->page;
+       size_t size;
+
+       while ((size = ring->prod - ring->cons) != 0) {
+               if ((buffer->capacity - buffer->size) < size) {
+                       buffer->capacity += (size + 1024);
+                       buffer->data = realloc(buffer->data, buffer->capacity);
+                       if (buffer->data == NULL) {
+                               dolog(LOG_ERR, "Memory allocation failed");
+                               exit(ENOMEM);
+                       }
+               }
+
+               while (ring->cons < ring->prod) {
+                       buffer->data[buffer->size] =
+                               ring->buf[XENCONS_IDX(ring->cons)];
+                       buffer->size++;
+                       ring->cons++;
+               }
+
+               if (buffer->max_capacity &&
+                   buffer->size > buffer->max_capacity) {
+                       memmove(buffer->data + (buffer->size -
+                                               buffer->max_capacity),
+                               buffer->data, buffer->max_capacity);
+                       buffer->data = realloc(buffer->data,
+                                              buffer->max_capacity);
+                       buffer->capacity = buffer->max_capacity;
+               }
+       }
+}
+
+static bool buffer_empty(struct buffer *buffer)
+{
+       return buffer->size == 0;
+}
+
+static void buffer_advance(struct buffer *buffer, size_t size)
+{
+       size = MIN(size, buffer->size);
+       memmove(buffer->data, buffer + size, buffer->size - size);
+       buffer->size -= size;
+}
 
 static bool domain_is_valid(int domid)
 {
@@ -107,8 +142,9 @@
 
 static int domain_create_tty(struct domain *dom)
 {
-       char path[1024];
+       char *path;
        int master;
+       bool success;
 
        if ((master = getpt()) == -1 ||
            grantpt(master) == -1 || unlockpt(master) == -1) {
@@ -126,27 +162,134 @@
                        tcsetattr(master, TCSAFLUSH, &term);
                }
 
-               xs_mkdir(xs, "/console");
-               snprintf(path, sizeof(path), "/console/%d", dom->domid);
-               xs_mkdir(xs, path);
-               strcat(path, "/tty");
-
-               xs_write(xs, path, slave, strlen(slave), O_CREAT);
-
-               snprintf(path, sizeof(path), "/console/%d/limit", dom->domid);
+               success = asprintf(&path, "%s/tty", dom->conspath) != -1;
+               if (!success)
+                       goto out;
+               success = xs_write(xs, path, slave, strlen(slave), O_CREAT);
+               free(path);
+               if (!success)
+                       goto out;
+
+               success = asprintf(&path, "%s/limit", dom->conspath) != -1;
+               if (!success)
+                       goto out;
                data = xs_read(xs, path, &len);
                if (data) {
                        dom->buffer.max_capacity = strtoul(data, 0, 0);
                        free(data);
                }
+               free(path);
        }
 
        return master;
+ out:
+       close(master);
+       return -1;
+}
+
+/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
+int xs_gather(struct xs_handle *xs, const char *dir, ...)
+{
+       va_list ap;
+       const char *name;
+       char *path;
+       int ret = 0;
+
+       va_start(ap, dir);
+       while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
+               const char *fmt = va_arg(ap, char *);
+               void *result = va_arg(ap, void *);
+               char *p;
+
+               asprintf(&path, "%s/%s", dir, name);
+               p = xs_read(xs, path, NULL);
+               free(path);
+               if (p == NULL) {
+                       ret = ENOENT;
+                       break;
+               }
+               if (fmt) {
+                       if (sscanf(p, fmt, result) == 0)
+                               ret = EINVAL;
+                       free(p);
+               } else
+                       *(char **)result = p;
+       }
+       va_end(ap);
+       return ret;
+}
+
+#define EVENTCHN_BIND          _IO('E', 2)
+#define EVENTCHN_UNBIND        _IO('E', 3)
+
+static int domain_create_ring(struct domain *dom)
+{
+       int err, local_port, ring_ref;
+
+       err = xs_gather(xs, dom->conspath,
+                       "ring-ref", "%u", &ring_ref,
+                       "port", "%i", &local_port,
+                       NULL);
+       if (err)
+               goto out;
+
+       if (ring_ref != dom->ring_ref) {
+               if (dom->page)
+                       munmap(dom->page, getpagesize());
+               dom->page = xc_map_foreign_range(xc, dom->domid, getpagesize(),
+                                                PROT_READ|PROT_WRITE,
+                                                (unsigned long)ring_ref);
+               if (dom->page == NULL) {
+                       err = EINVAL;
+                       goto out;
+               }
+               dom->ring_ref = ring_ref;
+       }
+
+       if (local_port != dom->local_port) {
+               dom->local_port = -1;
+               if (dom->evtchn_fd != -1)
+                       close(dom->evtchn_fd);
+               /* Opening evtchn independently for each console is a bit
+                * wastefule, but that's how the code is structured... */
+               dom->evtchn_fd = open("/dev/xen/evtchn", O_RDWR);
+               if (dom->evtchn_fd == -1) {
+                       err = errno;
+                       goto out;
+               }
+ 
+               if (ioctl(dom->evtchn_fd, EVENTCHN_BIND, local_port) == -1) {
+                       err = errno;
+                       close(dom->evtchn_fd);
+                       dom->evtchn_fd = -1;
+                       goto out;
+               }
+               dom->local_port = local_port;
+       }
+
+ out:
+       return err;
+}
+
+static bool watch_domain(struct domain *dom, bool watch)
+{
+       char domid_str[3 + MAX_STRLEN(dom->domid)];
+       bool success;
+
+       sprintf(domid_str, "dom%u", dom->domid);
+       if (watch)
+               success = xs_watch(xs, dom->conspath, domid_str);
+       else
+               success = xs_unwatch(xs, dom->conspath, domid_str);
+       if (success)
+               domain_create_ring(dom);
+       return success;
 }
 
 static struct domain *create_domain(int domid)
 {
        struct domain *dom;
+       char *s;
 
        dom = (struct domain *)malloc(sizeof(struct domain));
        if (dom == NULL) {
@@ -156,99 +299,145 @@
        }
 
        dom->domid = domid;
+
+       dom->conspath = xs_get_domain_path(xs, dom->domid);
+       if (dom->conspath == NULL)
+               goto out;
+       s = realloc(dom->conspath, strlen(dom->conspath) +
+                   strlen("/console") + 1);
+       if (s == NULL)
+               goto out;
+       dom->conspath = s;
+       strcat(dom->conspath, "/console");
+
        dom->tty_fd = domain_create_tty(dom);
        dom->is_dead = false;
        dom->buffer.data = 0;
        dom->buffer.size = 0;
        dom->buffer.capacity = 0;
        dom->buffer.max_capacity = 0;
-       dom->next = 0;
+       dom->next = NULL;
+
+       dom->ring_ref = -1;
+       dom->local_port = -1;
+       dom->page = NULL;
+       dom->evtchn_fd = -1;
+
+       if (!watch_domain(dom, true))
+               goto out;
+
+       dom->next = dom_head;
+       dom_head = dom;
 
        dolog(LOG_DEBUG, "New domain %d", domid);
 
        return dom;
+ out:
+       if (dom->conspath)
+               free(dom->conspath);
+       free(dom);
+       return NULL;
 }
 
 static struct domain *lookup_domain(int domid)
 {
+       struct domain *dom;
+
+       for (dom = dom_head; dom; dom = dom->next)
+               if (dom->domid == domid)
+                       return dom;
+       return NULL;
+}
+
+static void remove_domain(struct domain *dom)
+{
        struct domain **pp;
 
+       dolog(LOG_DEBUG, "Removing domain-%d", dom->domid);
+
        for (pp = &dom_head; *pp; pp = &(*pp)->next) {
-               struct domain *dom = *pp;
-
-               if (dom->domid == domid) {
-                       return dom;
-               } else if (dom->domid > domid) {
-                       *pp = create_domain(domid);
-                       (*pp)->next = dom;
-                       return *pp;
-               }
-       }
-
-       *pp = create_domain(domid);
-       return *pp;
-}
-
-static void remove_domain(struct domain *dom)
-{
-       struct domain **pp;
-
-       dolog(LOG_DEBUG, "Removing domain-%d", dom->domid);
-
-       for (pp = &dom_head; *pp; pp = &(*pp)->next) {
-               struct domain *d = *pp;
-
-               if (dom->domid == d->domid) {
-                       *pp = d->next;
-                       if (d->buffer.data) {
-                               free(d->buffer.data);
-                       }
-                       free(d);
+               if (dom == *pp) {
+                       *pp = dom->next;
+                       free(dom);
                        break;
                }
        }
 }
 
-static void remove_dead_domains(struct domain *dom)
-{
-       if (dom == NULL) return;
-       remove_dead_domains(dom->next);
-
-       if (dom->is_dead) {
-               remove_domain(dom);
+static void cleanup_domain(struct domain *d)
+{
+       if (!buffer_empty(&d->buffer))
+               return;
+
+       if (d->buffer.data)
+               free(d->buffer.data);
+       d->buffer.data = NULL;
+       if (d->tty_fd != -1)
+               close(d->tty_fd);
+       d->tty_fd = -1;
+       remove_domain(d);
+}
+
+static void shutdown_domain(struct domain *d)
+{
+       d->is_dead = true;
+       watch_domain(d, false);
+       if (d->page)
+               munmap(d->page, getpagesize());
+       d->page = NULL;
+       if (d->evtchn_fd != -1)
+               close(d->evtchn_fd);
+       d->evtchn_fd = -1;
+       cleanup_domain(d);
+}
+
+void enum_domains(void)
+{
+       int domid = 1;
+       xc_dominfo_t dominfo;
+       struct domain *dom;
+
+       while (xc_domain_getinfo(xc, domid, 1, &dominfo) == 1) {
+               dom = lookup_domain(dominfo.domid);
+               if (dominfo.dying || dominfo.crashed || dominfo.shutdown) {
+                       if (dom)
+                               shutdown_domain(dom);
+               } else {
+                       if (dom == NULL)
+                               create_domain(dominfo.domid);
+               }
+               domid = dominfo.domid + 1;
        }
 }
 
 static void handle_tty_read(struct domain *dom)
 {
        ssize_t len;
-       xcs_msg_t msg;
-
-       msg.type = XCS_REQUEST;
-       msg.u.control.remote_dom = dom->domid;
-       msg.u.control.msg.type = CMSG_CONSOLE;
-       msg.u.control.msg.subtype = CMSG_CONSOLE_DATA;
-       msg.u.control.msg.id = 1;
-
-       len = read(dom->tty_fd, msg.u.control.msg.msg, 60);
+       char msg[80];
+       struct ring_head *inring =
+               (struct ring_head *)(dom->page + PAGE_SIZE/2);
+       int i;
+
+       len = read(dom->tty_fd, msg, MIN(XENCONS_SPACE(inring), sizeof(msg)));
        if (len < 1) {
                close(dom->tty_fd);
+               dom->tty_fd = -1;
 
                if (domain_is_valid(dom->domid)) {
                        dom->tty_fd = domain_create_tty(dom);
                } else {
-                       dom->is_dead = true;
+                       shutdown_domain(dom);
                }
        } else if (domain_is_valid(dom->domid)) {
-               msg.u.control.msg.length = len;
-
-               if (!write_sync(xcs_data_fd, &msg, sizeof(msg))) {
-                       dolog(LOG_ERR, "Write to xcs failed: %m");
-                       exit(1);
-               }
+               for (i = 0; i < len; i++) {
+                       inring->buf[XENCONS_IDX(inring->prod)] = msg[i];
+                       inring->prod++;
+               }
+               xc_evtchn_send(xc, dom->local_port);
        } else {
                close(dom->tty_fd);
-               dom->is_dead = true;
+               dom->tty_fd = -1;
+               shutdown_domain(dom);
        }
 }
 
@@ -259,104 +448,105 @@
        len = write(dom->tty_fd, dom->buffer.data, dom->buffer.size);
        if (len < 1) {
                close(dom->tty_fd);
+               dom->tty_fd = -1;
 
                if (domain_is_valid(dom->domid)) {
                        dom->tty_fd = domain_create_tty(dom);
                } else {
-                       dom->is_dead = true;
+                       shutdown_domain(dom);
                }
        } else {
                buffer_advance(&dom->buffer, len);
        }
 }
 
-static void handle_xcs_msg(int fd)
-{
-       xcs_msg_t msg;
-
-       if (!read_sync(fd, &msg, sizeof(msg))) {
-               dolog(LOG_ERR, "read from xcs failed! %m");
-               exit(1);
-       } else if (msg.type == XCS_REQUEST) {
-               struct domain *dom;
-
-               dom = lookup_domain(msg.u.control.remote_dom);
-               buffer_append(&dom->buffer,
-                             msg.u.control.msg.msg,
-                             msg.u.control.msg.length);
-       }
-}
-
-static void enum_domains(void)
-{
-       int domid = 0;
-       xc_dominfo_t dominfo;
-
-       while (xc_domain_getinfo(xc, domid, 1, &dominfo) == 1) {
-               lookup_domain(dominfo.domid);
-               domid = dominfo.domid + 1;
-       }
+static void handle_ring_read(struct domain *dom)
+{
+       u16 v;
+
+       if (!read_sync(dom->evtchn_fd, &v, sizeof(v)))
+               return;
+
+       buffer_append(dom);
+
+       (void)write_sync(dom->evtchn_fd, &v, sizeof(v));
+}
+
+static void handle_xs(int fd)
+{
+       char **vec;
+       int domid;
+       struct domain *dom;
+
+       vec = xs_read_watch(xs);
+       if (!vec)
+               return;
+
+       if (!strcmp(vec[1], "domlist"))
+               enum_domains();
+       else if (sscanf(vec[1], "dom%u", &domid) == 1) {
+               dom = lookup_domain(domid);
+               if (dom->is_dead == false)
+                       domain_create_ring(dom);
+       }
+
+       xs_acknowledge_watch(xs, vec[1]);
+       free(vec);
 }
 
 void handle_io(void)
 {
        fd_set readfds, writefds;
        int ret;
-       int max_fd = -1;
-       int num_of_writes = 0;
 
        do {
-               struct domain *d;
-               struct timeval tv = { 1, 0 };
+               struct domain *d, *n;
+               struct timeval tv = { 100, 0 };
+               int max_fd = -1;
 
                FD_ZERO(&readfds);
                FD_ZERO(&writefds);
 
-               FD_SET(xcs_data_fd, &readfds);
-               max_fd = MAX(xcs_data_fd, max_fd);
+               FD_SET(xs_fileno(xs), &readfds);
+               max_fd = MAX(xs_fileno(xs), max_fd);
 
                for (d = dom_head; d; d = d->next) {
+                       if (d->evtchn_fd != -1) {
+                               FD_SET(d->evtchn_fd, &readfds);
+                               max_fd = MAX(d->evtchn_fd, max_fd);
+                       }
+
                        if (d->tty_fd != -1) {
-                               FD_SET(d->tty_fd, &readfds);
+                               if (!d->is_dead)
+                                       FD_SET(d->tty_fd, &readfds);
+
+                               if (!buffer_empty(&d->buffer))
+                                       FD_SET(d->tty_fd, &writefds);
+                               max_fd = MAX(d->tty_fd, max_fd);
                        }
-
-                       if (d->tty_fd != -1 && !buffer_empty(&d->buffer)) {
-                               FD_SET(d->tty_fd, &writefds);
+               }
+
+               ret = select(max_fd + 1, &readfds, &writefds, 0, &tv);
+
+               if (FD_ISSET(xs_fileno(xs), &readfds))
+                       handle_xs(xs_fileno(xs));
+
+               for (d = dom_head; d; d = n) {
+                       n = d->next;
+                       if (d->evtchn_fd != -1 &&
+                           FD_ISSET(d->evtchn_fd, &readfds))
+                               handle_ring_read(d);
+
+                       if (d->tty_fd != -1) {
+                               if (FD_ISSET(d->tty_fd, &readfds))
+                                       handle_tty_read(d);
+
+                               if (FD_ISSET(d->tty_fd, &writefds))
+                                       handle_tty_write(d);
+
+                               if (d->is_dead)
+                                       cleanup_domain(d);
                        }
-
-                       max_fd = MAX(d->tty_fd, max_fd);
-               }
-
-               ret = select(max_fd + 1, &readfds, &writefds, 0, &tv);
-               if (tv.tv_sec == 1 && (++num_of_writes % 100) == 0) {
-#if 0
-                       /* FIXME */
-                       /* This is a nasty hack.  xcs does not handle the
-                          control channels filling up well at all.  We'll
-                          throttle ourselves here since we do proper
-                          queueing to give the domains a shot at pulling out
-                          the data.  Fixing xcs is not worth it as it's
-                          going away */
-                       tv.tv_usec = 1000;
-                       select(0, 0, 0, 0, &tv);
-#endif
-               }
-               enum_domains();
-
-               if (FD_ISSET(xcs_data_fd, &readfds)) {
-                       handle_xcs_msg(xcs_data_fd);
-               }
-
-               for (d = dom_head; d; d = d->next) {
-                       if (!d->is_dead && FD_ISSET(d->tty_fd, &readfds)) {
-                               handle_tty_read(d);
-                       }
-
-                       if (!d->is_dead && FD_ISSET(d->tty_fd, &writefds)) {
-                               handle_tty_write(d);
-                       }
-               }
-
-               remove_dead_domains(dom_head);
+               }
        } while (ret > -1);
 }
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/daemon/io.h
--- a/tools/console/daemon/io.h Thu Sep  8 15:18:40 2005
+++ b/tools/console/daemon/io.h Fri Sep  9 16:30:54 2005
@@ -21,6 +21,7 @@
 #ifndef CONSOLED_IO_H
 #define CONSOLED_IO_H
 
+void enum_domains(void);
 void handle_io(void);
 
 #endif
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/daemon/main.c
--- a/tools/console/daemon/main.c       Thu Sep  8 15:18:40 2005
+++ b/tools/console/daemon/main.c       Fri Sep  9 16:30:54 2005
@@ -26,8 +26,6 @@
 #include <sys/types.h>
 
 #include "xenctrl.h"
-#include "xen/io/domain_controller.h"
-#include "xcs_proto.h"
 
 #include "utils.h"
 #include "io.h"
@@ -83,7 +81,10 @@
                daemonize("/var/run/xenconsoled.pid");
        }
 
-       xen_setup();
+       if (!xen_setup())
+               exit(1);
+
+       enum_domains();
 
        handle_io();
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/daemon/utils.c
--- a/tools/console/daemon/utils.c      Thu Sep  8 15:18:40 2005
+++ b/tools/console/daemon/utils.c      Fri Sep  9 16:30:54 2005
@@ -35,15 +35,11 @@
 
 #include "xenctrl.h"
 #include "xen/io/domain_controller.h"
-#include "xcs_proto.h"
 
 #include "utils.h"
 
 struct xs_handle *xs;
 int xc;
-
-int xcs_ctrl_fd = -1;
-int xcs_data_fd = -1;
 
 bool _read_write_sync(int fd, void *data, size_t size, bool do_read)
 {
@@ -69,32 +65,6 @@
        }
 
        return true;
-}
-
-static int open_domain_socket(const char *path)
-{
-       struct sockaddr_un addr;
-       int sock;
-       size_t addr_len;
-
-       if ((sock = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) {
-               goto out;
-       }
-
-       addr.sun_family = AF_UNIX;
-       strcpy(addr.sun_path, path);
-       addr_len = sizeof(addr.sun_family) + strlen(XCS_SUN_PATH) + 1;
-
-       if (connect(sock, (struct sockaddr *)&addr, addr_len) == -1) {
-               goto out_close_sock;
-       }
-
-       return sock;
-
- out_close_sock:
-       close(sock);
- out:
-       return -1;
 }
 
 static void child_exit(int sig)
@@ -155,34 +125,8 @@
        signal(SIGTTIN, SIG_IGN);
 }
 
-/* synchronized send/recv strictly for setting up xcs */
-/* always use asychronize callbacks any other time */
-static bool xcs_send_recv(int fd, xcs_msg_t *msg)
-{
-       bool ret = false;
-
-       if (!write_sync(fd, msg, sizeof(*msg))) {
-               dolog(LOG_ERR, "Write failed at %s:%s():L%d?  Possible bug.",
-                      __FILE__, __FUNCTION__, __LINE__);
-               goto out;
-       }
-
-       if (!read_sync(fd, msg, sizeof(*msg))) {
-               dolog(LOG_ERR, "Read failed at %s:%s():L%d?  Possible bug.",
-                      __FILE__, __FUNCTION__, __LINE__);
-               goto out;
-       }
-
-       ret = true;
-
- out:
-       return ret;
-}
-
 bool xen_setup(void)
 {
-       int sock;
-       xcs_msg_t msg;
        
        xs = xs_daemon_open();
        if (xs == NULL) {
@@ -197,57 +141,23 @@
                goto out;
        }
 
-       sock = open_domain_socket(XCS_SUN_PATH);
-       if (sock == -1) {
-               dolog(LOG_ERR, "Failed to contact xcs (%m).  Is it running?");
-               goto out_close_store;
+       if (!xs_watch(xs, "@introduceDomain", "domlist")) {
+               dolog(LOG_ERR, "xenstore watch on @introduceDomain fails.");
+               goto out;
        }
 
-       xcs_ctrl_fd = sock;
-
-       sock = open_domain_socket(XCS_SUN_PATH);
-       if (sock == -1) {
-               dolog(LOG_ERR, "Failed to contact xcs (%m).  Is it running?");
-               goto out_close_ctrl;
-       }
-       
-       xcs_data_fd = sock;
-
-       memset(&msg, 0, sizeof(msg));
-       msg.type = XCS_CONNECT_CTRL;
-       if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) {
-               dolog(LOG_ERR, "xcs control connect failed.  Possible bug.");
-               goto out_close_data;
+       if (!xs_watch(xs, "@releaseDomain", "domlist")) {
+               dolog(LOG_ERR, "xenstore watch on @releaseDomain fails.");
+               goto out;
        }
 
-       msg.type = XCS_CONNECT_DATA;
-       if (!xcs_send_recv(xcs_data_fd, &msg) || msg.result != XCS_RSLT_OK) {
-               dolog(LOG_ERR, "xcs data connect failed.  Possible bug.");
-               goto out_close_data;
-       }
-
-       /* Since the vast majority of control messages are console messages
-          it's just easier to ignore other messages that try to bind to 
-          a specific type. */
-       msg.type = XCS_MSG_BIND;
-       msg.u.bind.port = PORT_WILDCARD;
-       msg.u.bind.type = TYPE_WILDCARD;
-       if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) {
-               dolog(LOG_ERR, "xcs vind failed.  Possible bug.");
-               goto out_close_data;
-       }
-       
        return true;
 
- out_close_data:
-       close(xcs_ctrl_fd);
-       xcs_data_fd = -1;
- out_close_ctrl:
-       close(xcs_ctrl_fd);
-       xcs_ctrl_fd = -1;
- out_close_store:
-       xs_daemon_close(xs);
  out:
+       if (xs)
+               xs_daemon_close(xs);
+       if (xc != -1)
+               xc_interface_close(xc);
        return false;
 }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/daemon/utils.h
--- a/tools/console/daemon/utils.h      Thu Sep  8 15:18:40 2005
+++ b/tools/console/daemon/utils.h      Fri Sep  9 16:30:54 2005
@@ -33,13 +33,15 @@
 #define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false)
 bool _read_write_sync(int fd, void *data, size_t size, bool do_read);
 
-extern int xcs_ctrl_fd;
-extern int xcs_data_fd;
 extern struct xs_handle *xs;
 extern int xc;
 
 #if 1
-#define dolog(val, fmt, ...) syslog(val, fmt, ## __VA_ARGS__)
+#define dolog(val, fmt, ...) do {                              \
+       if ((val) == LOG_ERR)                                   \
+               fprintf(stderr, fmt "\n", ## __VA_ARGS__);      \
+       syslog(val, fmt, ## __VA_ARGS__);                       \
+} while (/* CONSTCOND */0)
 #else
 #define dolog(val, fmt, ...) fprintf(stderr, fmt "\n", ## __VA_ARGS__)
 #endif
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/Makefile
--- a/tools/examples/Makefile   Thu Sep  8 15:18:40 2005
+++ b/tools/examples/Makefile   Fri Sep  9 16:30:54 2005
@@ -1,3 +1,6 @@
+XEN_ROOT = ../../
+include $(XEN_ROOT)/tools/Rules.mk
+
 INSTALL                = install
 INSTALL_DIR    = $(INSTALL) -d -m0755
 INSTALL_PROG   = $(INSTALL) -m0755
@@ -24,8 +27,8 @@
 XEN_BOOT_DIR = /usr/lib/xen/boot
 XEN_BOOT = mem-map.sxp
 
-XEN_HOTPLUG_DIR = /etc/hotplug.d/xen-backend
-XEN_HOTPLUG_SCRIPTS = backend.hotplug
+XEN_HOTPLUG_DIR = /etc/hotplug
+XEN_HOTPLUG_SCRIPTS = xen-backend.agent
 
 all: 
 build:
@@ -68,7 +71,7 @@
        [ -d $(DESTDIR)$(XEN_HOTPLUG_DIR) ] || \
                $(INSTALL_DIR) $(DESTDIR)$(XEN_HOTPLUG_DIR)
        for i in $(XEN_HOTPLUG_SCRIPTS); \
-           do [ -a $(DESTDIR)$(XEN_HOTPLUG_DIR)/$$i ] || \
+           do \
            $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_HOTPLUG_DIR); \
        done
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/init.d/xend
--- a/tools/examples/init.d/xend        Thu Sep  8 15:18:40 2005
+++ b/tools/examples/init.d/xend        Fri Sep  9 16:30:54 2005
@@ -11,7 +11,7 @@
        exit 0
 fi
 
-# Wait for Xend and xcs to be up
+# Wait for Xend to be up
 function await_daemons_up
 {
        i=1
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/vif-bridge
--- a/tools/examples/vif-bridge Thu Sep  8 15:18:40 2005
+++ b/tools/examples/vif-bridge Fri Sep  9 16:30:54 2005
@@ -80,7 +80,7 @@
 fi
 ifconfig ${vif} $OP
 
-if [ ${ip} ] ; then
+if [ "${ip}" ] ; then
 
     # If we've been given a list of IP networks, allow pkts with these src 
addrs.
     for addr in ${ip} ; do
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/vif-route
--- a/tools/examples/vif-route  Thu Sep  8 15:18:40 2005
+++ b/tools/examples/vif-route  Fri Sep  9 16:30:54 2005
@@ -63,7 +63,7 @@
         ;;
 esac
 
-if [ ${ip} ] ; then
+if [ "${ip}" ] ; then
 
     # If we've been given a list of IP networks, allow pkts with these src 
addrs.
     for addr in ${ip} ; do
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/xmexample.vmx
--- a/tools/examples/xmexample.vmx      Thu Sep  8 15:18:40 2005
+++ b/tools/examples/xmexample.vmx      Fri Sep  9 16:30:54 2005
@@ -73,6 +73,10 @@
 vnc=1
 
 #----------------------------------------------------------------------------
+# enable spawning vncviewer(only valid when vnc=1), default = 1
+vncviewer=1
+
+#----------------------------------------------------------------------------
 # no graphics, use serial port
 #nographic=0
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/xmexample1
--- a/tools/examples/xmexample1 Thu Sep  8 15:18:40 2005
+++ b/tools/examples/xmexample1 Fri Sep  9 16:30:54 2005
@@ -48,6 +48,20 @@
 disk = [ 'phy:hda1,hda1,w' ]
 
 #----------------------------------------------------------------------------
+# Define to which TPM instance the user domain should communicate.
+# The vtpm entry is of the form 'instance=INSTANCE,backend=DOM'
+# where INSTANCE indicates the instance number of the TPM the VM
+# should be talking to and DOM provides the domain where the backend
+# is located.
+# Note that no two virtual machines should try to connect to the same
+# TPM instance. The handling of all TPM instances does require
+# some management effort in so far that VM configration files (and thus
+# a VM) should be associated with a TPM instance throughout the lifetime
+# of the VM / VM configuration file. The instance number must be
+# greater or equal to 1.
+#vtpm = [ 'instance=1,backend=0' ]
+
+#----------------------------------------------------------------------------
 # Set the kernel command line for the new domain.
 # You only need to define the IP parameters and hostname if the domain's
 # IP config doesn't, e.g. in ifcfg-eth0 or via DHCP.
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/xmexample2
--- a/tools/examples/xmexample2 Thu Sep  8 15:18:40 2005
+++ b/tools/examples/xmexample2 Fri Sep  9 16:30:54 2005
@@ -84,6 +84,20 @@
          'phy:sda6,sda6,r' ]
 
 #----------------------------------------------------------------------------
+# Define to which TPM instance the user domain should communicate.
+# The vtpm entry is of the form 'instance=INSTANCE,backend=DOM'
+# where INSTANCE indicates the instance number of the TPM the VM
+# should be talking to and DOM provides the domain where the backend
+# is located.
+# Note that no two virtual machines should try to connect to the same
+# TPM instance. The handling of all TPM instances does require
+# some management effort in so far that VM configration files (and thus
+# a VM) should be associated with a TPM instance throughout the lifetime
+# of the VM / VM configuration file. The instance number must be
+# greater or equal to 1.
+#vtpm = ['instance=%d,backend=0' % (vmid) ]
+
+#----------------------------------------------------------------------------
 # Set the kernel command line for the new domain.
 # You only need to define the IP parameters and hostname if the domain's
 # IP config doesn't, e.g. in ifcfg-eth0 or via DHCP.
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/xmexample3
--- a/tools/examples/xmexample3 Thu Sep  8 15:18:40 2005
+++ b/tools/examples/xmexample3 Fri Sep  9 16:30:54 2005
@@ -80,6 +80,20 @@
 disk = [ 'phy:hda%d,hda1,w' % (vmid)]
 
 #----------------------------------------------------------------------------
+# Define to which TPM instance the user domain should communicate.
+# The vtpm entry is of the form 'instance=INSTANCE,backend=DOM'
+# where INSTANCE indicates the instance number of the TPM the VM
+# should be talking to and DOM provides the domain where the backend
+# is located.
+# Note that no two virtual machines should try to connect to the same
+# TPM instance. The handling of all TPM instances does require
+# some management effort in so far that VM configration files (and thus
+# a VM) should be associated with a TPM instance throughout the lifetime
+# of the VM / VM configuration file. The instance number must be
+# greater or equal to 1.
+#vtpm = ['instance=%d,backend=0' % (vmid) ]
+
+#----------------------------------------------------------------------------
 # Set the kernel command line for the new domain.
 # You only need to define the IP parameters and hostname if the domain's
 # IP config doesn't, e.g. in ifcfg-eth0 or via DHCP.
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Thu Sep  8 15:18:40 2005
+++ b/tools/firmware/rombios/rombios.c  Fri Sep  9 16:30:54 2005
@@ -31,7 +31,7 @@
 
 // Xen full virtualization does not handle unaligned IO with page crossing.
 // Disable 32-bit PIO as a workaround.
-#define NO_PIO32
+#undef NO_PIO32
 
 
 // ROM BIOS compatability entry points:
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/firmware/vmxassist/Makefile
--- a/tools/firmware/vmxassist/Makefile Thu Sep  8 15:18:40 2005
+++ b/tools/firmware/vmxassist/Makefile Fri Sep  9 16:30:54 2005
@@ -24,7 +24,7 @@
 # The emulator code lives in ROM space
 TEXTADDR=0x000D0000
 
-DEFINES=-DDEBUG -DENABLE_VME -DTEXTADDR=${TEXTADDR}
+DEFINES=-DDEBUG -DTEXTADDR=${TEXTADDR}
 XENINC=-I$(XEN_ROOT)/xen/include -I$(XEN_ROOT)/tools/libxc
 #DEFINES=-DDEBUG -DTEST -DTEXTADDR=${TEXTADDR}
 #XENINC=-I/home/leendert/xen/xeno-unstable.bk/xen/include
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/firmware/vmxassist/setup.c
--- a/tools/firmware/vmxassist/setup.c  Thu Sep  8 15:18:40 2005
+++ b/tools/firmware/vmxassist/setup.c  Fri Sep  9 16:30:54 2005
@@ -353,7 +353,7 @@
 #endif
        setup_gdt();
        setup_idt();
-#ifdef ENABLE_VME
+#ifndef        TEST
        set_cr4(get_cr4() | CR4_VME); 
 #endif
        setup_ctx();
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c   Thu Sep  8 15:18:40 2005
+++ b/tools/firmware/vmxassist/vm86.c   Fri Sep  9 16:30:54 2005
@@ -465,8 +465,7 @@
  * Emulate a segment load in protected mode
  */
 int
-load_seg(unsigned long sel, unsigned long *base, unsigned long *limit,
-                                               union vmcs_arbytes *arbytes)
+load_seg(unsigned long sel, u32 *base, u32 *limit, union vmcs_arbytes *arbytes)
 {
        unsigned long long entry;
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/firmware/vmxassist/vmxloader.c
--- a/tools/firmware/vmxassist/vmxloader.c      Thu Sep  8 15:18:40 2005
+++ b/tools/firmware/vmxassist/vmxloader.c      Fri Sep  9 16:30:54 2005
@@ -110,8 +110,8 @@
        }
 #ifdef _ACPI_
        puts("Loading ACPI ...\n");
-    if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000 ){
-        /* make sure acpi table does not overlap rombios
+       if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000 ){
+               /* make sure acpi table does not overlap rombios
                 * currently acpi less than 8K will be OK.
                 */
                 memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi, sizeof(acpi));
@@ -122,5 +122,6 @@
        memcpy((void *)TEXTADDR, vmxassist, sizeof(vmxassist));
        puts("Go ...\n");
        ((void (*)())TEXTADDR)();
+       return 0;
 }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/cpu-all.h
--- a/tools/ioemu/cpu-all.h     Thu Sep  8 15:18:40 2005
+++ b/tools/ioemu/cpu-all.h     Fri Sep  9 16:30:54 2005
@@ -672,6 +672,8 @@
 int cpu_memory_rw_debug(CPUState *env, target_ulong addr, 
                         uint8_t *buf, int len, int is_write);
 
+#define VGA_DIRTY_FLAG 0x01
+
 /* read dirty bit (return 0 or 1) */
 static inline int cpu_physical_memory_is_dirty(target_ulong addr)
 {
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/exec.c
--- a/tools/ioemu/exec.c        Thu Sep  8 15:18:40 2005
+++ b/tools/ioemu/exec.c        Fri Sep  9 16:30:54 2005
@@ -142,6 +142,10 @@
 #else
         setvbuf(logfile, NULL, _IOLBF, 0);
 #endif
+/*
+       stdout = logfile;
+       stderr = logfile;
+*/
     }
 }
 
@@ -386,9 +390,6 @@
                     io_mem_write[io_index][1](io_mem_opaque[io_index], addr, 
val);
                     l = 2;
                 } else {
-                    if (l!=1){
-                        fprintf(logfile, "ERROR 8 bit mmio\n");
-                    }
                     /* 8 bit access */
                     val = ldub_raw(buf);
                     io_mem_write[io_index][0](io_mem_opaque[io_index], addr, 
val);
@@ -461,4 +462,14 @@
 
 void cpu_physical_memory_reset_dirty(target_ulong start, target_ulong end)
 {
-}
+       uint8_t *p;
+       int len;
+
+       if ((len = (end - start)) <= 0)
+               return;
+       p = phys_ram_dirty + (start >> TARGET_PAGE_BITS);
+       len = len >> TARGET_PAGE_BITS;
+       while (len > 0)
+               p[--len] &= ~VGA_DIRTY_FLAG;
+       return;
+}
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/hw/pc.c
--- a/tools/ioemu/hw/pc.c       Thu Sep  8 15:18:40 2005
+++ b/tools/ioemu/hw/pc.c       Fri Sep  9 16:30:54 2005
@@ -540,7 +540,10 @@
 
     if (pci_enabled) {
         for(i = 0; i < nb_nics; i++) {
-            pci_ne2000_init(pci_bus, &nd_table[i]);
+            if (nic_pcnet)
+                pci_pcnet_init(pci_bus, &nd_table[i]);
+            else
+                pci_ne2000_init(pci_bus, &nd_table[i]); 
         }
         pci_piix3_ide_init(pci_bus, bs_table);
 #ifdef APIC_SUPPORT
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/hw/vga.c
--- a/tools/ioemu/hw/vga.c      Thu Sep  8 15:18:40 2005
+++ b/tools/ioemu/hw/vga.c      Fri Sep  9 16:30:54 2005
@@ -1620,7 +1620,6 @@
 static void vga_save(QEMUFile *f, void *opaque)
 {
     VGAState *s = opaque;
-    int i;
 
     qemu_put_be32s(f, &s->latch);
     qemu_put_8s(f, &s->sr_index);
@@ -1661,7 +1660,7 @@
 static int vga_load(QEMUFile *f, void *opaque, int version_id)
 {
     VGAState *s = opaque;
-    int is_vbe, i;
+    int is_vbe;
 
     if (version_id != 1)
         return -EINVAL;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/target-i386-dm/Makefile
--- a/tools/ioemu/target-i386-dm/Makefile       Thu Sep  8 15:18:40 2005
+++ b/tools/ioemu/target-i386-dm/Makefile       Fri Sep  9 16:30:54 2005
@@ -272,7 +272,7 @@
 # Hardware support
 VL_OBJS+= ide.o ne2000.o pckbd.o vga.o dma.o
 VL_OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pc.o port-e9.o
-VL_OBJS+= cirrus_vga.o
+VL_OBJS+= cirrus_vga.o pcnet.o
 
 ifeq ($(TARGET_ARCH), ppc)
 VL_OBJS+= ppc.o ide.o ne2000.o pckbd.o vga.o $(SOUND_HW) dma.o $(AUDIODRV)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Thu Sep  8 15:18:40 2005
+++ b/tools/ioemu/target-i386-dm/helper2.c      Fri Sep  9 16:30:54 2005
@@ -169,133 +169,217 @@
 unsigned long
 do_inp(CPUState *env, unsigned long addr, unsigned long size)
 {
-  switch(size) {
-      case 1:
-        return cpu_inb(env, addr);
-      case 2:
-        return cpu_inw(env, addr);
-      case 4:
-        return cpu_inl(env, addr);
-      default:
-       fprintf(logfile, "inp: bad size: %lx %lx\n", addr, size);
-        exit(-1);
-  }
+       switch(size) {
+       case 1:
+               return cpu_inb(env, addr);
+       case 2:
+               return cpu_inw(env, addr);
+       case 4:
+               return cpu_inl(env, addr);
+       default:
+               fprintf(logfile, "inp: bad size: %lx %lx\n", addr, size);
+               exit(-1);
+       }
 }
 
 void
 do_outp(CPUState *env, unsigned long addr, unsigned long size, 
         unsigned long val)
 {
-  switch(size) {
-      case 1:
-        return cpu_outb(env, addr, val);
-      case 2:
-        return cpu_outw(env, addr, val);
-      case 4:
-        return cpu_outl(env, addr, val);
-      default:
-       fprintf(logfile, "outp: bad size: %lx %lx\n", addr, size);
-        exit(-1);
-  }
+       switch(size) {
+       case 1:
+               return cpu_outb(env, addr, val);
+       case 2:
+               return cpu_outw(env, addr, val);
+       case 4:
+               return cpu_outl(env, addr, val);
+       default:
+               fprintf(logfile, "outp: bad size: %lx %lx\n", addr, size);
+               exit(-1);
+       }
 }
 
 extern void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, 
                                    int len, int is_write);
 
 static inline void
-read_physical(target_phys_addr_t addr, unsigned long size, void *val)
-{
-        return cpu_physical_memory_rw(addr, val, size, 0);
+read_physical(u64 addr, unsigned long size, void *val)
+{
+        return cpu_physical_memory_rw((target_phys_addr_t)addr, val, size, 0);
 }
 
 static inline void
-write_physical(target_phys_addr_t addr, unsigned long size, void *val)
-{
-        return cpu_physical_memory_rw(addr, val, size, 1);
-}
-
-//send the ioreq to device model
-void cpu_dispatch_ioreq(CPUState *env, ioreq_t *req)
-{
-       int i;
-       int sign;
-
-       sign = (req->df) ? -1 : 1;
-
-       if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) {
-               if (req->size != 4) {
-                       // Bochs expects higher bits to be 0
-                       req->u.data &= (1UL << (8 * req->size))-1;
-               }
-       }
-
-       if (req->port_mm == 0){//port io
-               if(req->dir == IOREQ_READ){//read
-                       if (!req->pdata_valid) {
-                               req->u.data = do_inp(env, req->addr, req->size);
-                       } else {
-                               unsigned long tmp; 
-
-                               for (i = 0; i < req->count; i++) {
-                                       tmp = do_inp(env, req->addr, req->size);
-                                       
write_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), 
-                                                      req->size, &tmp);
-                               }
-                       }
-               } else if(req->dir == IOREQ_WRITE) {
-                       if (!req->pdata_valid) {
-                               do_outp(env, req->addr, req->size, req->u.data);
-                       } else {
-                               for (i = 0; i < req->count; i++) {
-                                       unsigned long tmp;
-
-                                       
read_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), 
req->size, 
-                                                     &tmp);
-                                       do_outp(env, req->addr, req->size, tmp);
-                               }
-                       }
-                       
-               }
-       } else if (req->port_mm == 1){//memory map io
+write_physical(u64 addr, unsigned long size, void *val)
+{
+        return cpu_physical_memory_rw((target_phys_addr_t)addr, val, size, 1);
+}
+
+void
+cpu_ioreq_pio(CPUState *env, ioreq_t *req)
+{
+       int i, sign;
+
+       sign = req->df ? -1 : 1;
+
+       if (req->dir == IOREQ_READ) {
                if (!req->pdata_valid) {
-                       //handle stos
-                       if(req->dir == IOREQ_READ) { //read
-                               for (i = 0; i < req->count; i++) {
-                                       
read_physical((target_phys_addr_t)req->addr + (sign * i * req->size), 
req->size, &req->u.data);
-                               }
-                       } else if(req->dir == IOREQ_WRITE) { //write
-                               for (i = 0; i < req->count; i++) {
-                                       
write_physical((target_phys_addr_t)req->addr + (sign * i * req->size), 
req->size, &req->u.data);
-                               }
-                       }
+                       req->u.data = do_inp(env, req->addr, req->size);
                } else {
-                       //handle movs
-                       unsigned long tmp;
-                       if (req->dir == IOREQ_READ) {
-                               for (i = 0; i < req->count; i++) {
-                                       
read_physical((target_phys_addr_t)req->addr + (sign * i * req->size), 
req->size, &tmp);
-                                       
write_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), 
req->size, &tmp);
-                               }
-                       } else if (req->dir == IOREQ_WRITE) {
-                               for (i = 0; i < req->count; i++) {
-                                       
read_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), 
req->size, &tmp);
-                                       
write_physical((target_phys_addr_t)req->addr + (sign * i * req->size), 
req->size, &tmp);
-                               }
-                       }
-               }
-       }
-        /* No state change if state = STATE_IORESP_HOOK */
-        if (req->state == STATE_IOREQ_INPROCESS)
-                req->state = STATE_IORESP_READY;
-       env->send_event = 1;
+                       unsigned long tmp; 
+
+                       for (i = 0; i < req->count; i++) {
+                               tmp = do_inp(env, req->addr, req->size);
+                               write_physical((target_phys_addr_t) req->u.pdata
+                                               + (sign * i * req->size), 
+                                       req->size, &tmp);
+                       }
+               }
+       } else if (req->dir == IOREQ_WRITE) {
+               if (!req->pdata_valid) {
+                       do_outp(env, req->addr, req->size, req->u.data);
+               } else {
+                       for (i = 0; i < req->count; i++) {
+                               unsigned long tmp;
+
+                               read_physical((target_phys_addr_t) req->u.pdata
+                                               + (sign * i * req->size),
+                                       req->size, &tmp);
+                               do_outp(env, req->addr, req->size, tmp);
+                       }
+               }
+       }
+}
+
+void
+cpu_ioreq_move(CPUState *env, ioreq_t *req)
+{
+       int i, sign;
+
+       sign = req->df ? -1 : 1;
+
+       if (!req->pdata_valid) {
+               if (req->dir == IOREQ_READ) {
+                       for (i = 0; i < req->count; i++) {
+                               read_physical(req->addr
+                                               + (sign * i * req->size),
+                                       req->size, &req->u.data);
+                       }
+               } else if (req->dir == IOREQ_WRITE) {
+                       for (i = 0; i < req->count; i++) {
+                               write_physical(req->addr
+                                               + (sign * i * req->size),
+                                       req->size, &req->u.data);
+                       }
+               }
+       } else {
+               unsigned long tmp;
+
+               if (req->dir == IOREQ_READ) {
+                       for (i = 0; i < req->count; i++) {
+                               read_physical(req->addr
+                                               + (sign * i * req->size),
+                                       req->size, &tmp);
+                               write_physical((target_phys_addr_t )req->u.pdata
+                                               + (sign * i * req->size),
+                                       req->size, &tmp);
+                       }
+               } else if (req->dir == IOREQ_WRITE) {
+                       for (i = 0; i < req->count; i++) {
+                               read_physical((target_phys_addr_t) req->u.pdata
+                                               + (sign * i * req->size),
+                                       req->size, &tmp);
+                               write_physical(req->addr
+                                               + (sign * i * req->size),
+                                       req->size, &tmp);
+                       }
+               }
+       }
+}
+
+void
+cpu_ioreq_and(CPUState *env, ioreq_t *req)
+{
+       unsigned long tmp1, tmp2;
+
+       if (req->pdata_valid != 0)
+               hw_error("expected scalar value");
+
+       read_physical(req->addr, req->size, &tmp1);
+       if (req->dir == IOREQ_WRITE) {
+               tmp2 = tmp1 & (unsigned long) req->u.data;
+               write_physical(req->addr, req->size, &tmp2);
+       }
+       req->u.data = tmp1;
+}
+
+void
+cpu_ioreq_or(CPUState *env, ioreq_t *req)
+{
+       unsigned long tmp1, tmp2;
+
+       if (req->pdata_valid != 0)
+               hw_error("expected scalar value");
+
+       read_physical(req->addr, req->size, &tmp1);
+       if (req->dir == IOREQ_WRITE) {
+               tmp2 = tmp1 | (unsigned long) req->u.data;
+               write_physical(req->addr, req->size, &tmp2);
+       }
+       req->u.data = tmp1;
+}
+
+void
+cpu_ioreq_xor(CPUState *env, ioreq_t *req)
+{
+       unsigned long tmp1, tmp2;
+
+       if (req->pdata_valid != 0)
+               hw_error("expected scalar value");
+
+       read_physical(req->addr, req->size, &tmp1);
+       if (req->dir == IOREQ_WRITE) {
+               tmp2 = tmp1 ^ (unsigned long) req->u.data;
+               write_physical(req->addr, req->size, &tmp2);
+       }
+       req->u.data = tmp1;
 }
 
 void
 cpu_handle_ioreq(CPUState *env)
 {
        ioreq_t *req = cpu_get_ioreq();
-       if (req)
-               cpu_dispatch_ioreq(env, req);
+
+       if (req) {
+               if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) {
+                       if (req->size != 4)
+                               req->u.data &= (1UL << (8 * req->size))-1;
+               }
+
+               switch (req->type) {
+               case IOREQ_TYPE_PIO:
+                       cpu_ioreq_pio(env, req);
+                       break;
+               case IOREQ_TYPE_COPY:
+                       cpu_ioreq_move(env, req);
+                       break;
+               case IOREQ_TYPE_AND:
+                       cpu_ioreq_and(env, req);
+                       break;
+               case IOREQ_TYPE_OR:
+                       cpu_ioreq_or(env, req);
+                       break;
+               case IOREQ_TYPE_XOR:
+                       cpu_ioreq_xor(env, req);
+                       break;
+               default:
+                       hw_error("Invalid ioreq type 0x%x", req->type);
+               }
+
+               /* No state change if state = STATE_IORESP_HOOK */
+               if (req->state == STATE_IOREQ_INPROCESS)
+                       req->state = STATE_IORESP_READY;
+               env->send_event = 1;
+       }
 }
 
 void
@@ -321,7 +405,7 @@
 
        // Send a message on the event channel. Add the vector to the shared mem
        // page.
-       intr = &(shared_page->sp_global.pic_intr[0]);
+       intr = (unsigned long *) &(shared_page->sp_global.pic_intr[0]);
        atomic_set_bit(vector, intr);
         if (loglevel & CPU_LOG_INT)
                 fprintf(logfile, "injecting vector: %x\n", vector);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Sep  8 15:18:40 2005
+++ b/tools/ioemu/vl.c  Fri Sep  9 16:30:54 2005
@@ -125,6 +125,7 @@
 QEMUTimer *polling_timer;
 int vm_running;
 int audio_enabled = 0;
+int nic_pcnet = 1;
 int sb16_enabled = 1;
 int adlib_enabled = 1;
 int gus_enabled = 1;
@@ -412,6 +413,11 @@
     fprintf(stderr, "qemu: hardware error: ");
     vfprintf(stderr, fmt, ap);
     fprintf(stderr, "\n");
+    if (logfile) {
+       fprintf(logfile, "qemu: hardware error: ");
+       vfprintf(logfile, fmt, ap);
+       fprintf(logfile, "\n");
+    }
     va_end(ap);
     abort();
 }
@@ -2115,6 +2121,7 @@
            "-prep           Simulate a PREP system (default is PowerMAC)\n"
            "-g WxH[xDEPTH]  Set the initial VGA graphic mode\n"
 #endif
+           "-nic-pcnet     simulate an AMD PC-Net PCI ethernet adaptor\n"
            "\n"
            "Network options:\n"
            "-nics n         simulate 'n' network cards [default=1]\n"
@@ -2229,6 +2236,7 @@
     QEMU_OPTION_L,
     QEMU_OPTION_no_code_copy,
     QEMU_OPTION_pci,
+    QEMU_OPTION_nic_pcnet,
     QEMU_OPTION_isa,
     QEMU_OPTION_prep,
     QEMU_OPTION_k,
@@ -2313,6 +2321,7 @@
     
     /* temporary options */
     { "pci", 0, QEMU_OPTION_pci },
+    { "nic-pcnet", 0, QEMU_OPTION_nic_pcnet },
     { "cirrusvga", 0, QEMU_OPTION_cirrusvga },
     { NULL },
 };
@@ -2639,6 +2648,9 @@
                 break;
             case QEMU_OPTION_pci:
                 pci_enabled = 1;
+                break;
+            case QEMU_OPTION_nic_pcnet:
+                nic_pcnet = 1;
                 break;
             case QEMU_OPTION_isa:
                 pci_enabled = 0;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Thu Sep  8 15:18:40 2005
+++ b/tools/ioemu/vl.h  Fri Sep  9 16:30:54 2005
@@ -600,6 +600,12 @@
 void isa_ne2000_init(int base, int irq, NetDriverState *nd);
 void pci_ne2000_init(PCIBus *bus, NetDriverState *nd);
 
+/* pcnet.c */
+
+extern int nic_pcnet;
+
+void pci_pcnet_init(PCIBus *bus, NetDriverState *nd);
+
 /* pckbd.c */
 
 void kbd_init(void);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/Makefile
--- a/tools/libxc/Makefile      Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/Makefile      Fri Sep  9 16:30:54 2005
@@ -139,7 +139,7 @@
 libxenguest.so.$(MAJOR): libxenguest.so.$(MAJOR).$(MINOR)
        ln -sf $< $@
 
-libxenguest.so.$(MAJOR).$(MINOR): $(PIC_BUILD_OBJS)
+libxenguest.so.$(MAJOR).$(MINOR): $(PIC_BUILD_OBJS) libxenctrl.so
        $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenguest.so.$(MAJOR) 
-shared -o $@ $^ -lz -lxenctrl
 
 -include $(DEPS)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xc_core.c     Fri Sep  9 16:30:54 2005
@@ -2,6 +2,7 @@
 #define ELFSIZE 32
 #include "xc_elf.h"
 #include <stdlib.h>
+#include <unistd.h>
 #include <zlib.h>
 
 /* number of pages to write at a time */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xc_domain.c   Fri Sep  9 16:30:54 2005
@@ -7,6 +7,7 @@
  */
 
 #include "xc_private.h"
+#include <xen/memory.h>
 
 int xc_domain_create(int xc_handle,
                      u32 ssidref,
@@ -261,19 +262,66 @@
 
 int xc_domain_memory_increase_reservation(int xc_handle,
                                           u32 domid, 
-                                          unsigned int mem_kb)
+                                          unsigned long nr_extents,
+                                          unsigned int extent_order,
+                                          unsigned int address_bits,
+                                         unsigned long *extent_start)
 {
     int err;
-    unsigned int npages = mem_kb / (PAGE_SIZE/1024);
-
-    err = xc_dom_mem_op(xc_handle, MEMOP_increase_reservation, NULL,
-                        npages, 0, domid);
-    if (err == npages)
+    struct xen_memory_reservation reservation = {
+        .extent_start = extent_start, /* may be NULL */
+        .nr_extents   = nr_extents,
+        .extent_order = extent_order,  
+        .address_bits = address_bits,
+        .domid        = domid
+    };
+
+    err = xc_memory_op(xc_handle, XENMEM_increase_reservation, &reservation);
+    if (err == nr_extents)
         return 0;
 
     if (err > 0) {
+        fprintf(stderr,"Failed alocation for dom %d : %ld pages order %d 
addr_bits %d\n",
+                                 domid, nr_extents, extent_order, 
address_bits);
         errno = ENOMEM;
         err = -1;
     }
     return err;
 }
+
+int xc_domain_memory_decrease_reservation(int xc_handle,
+                                          u32 domid, 
+                                          unsigned long nr_extents,
+                                          unsigned int extent_order,
+                                         unsigned long *extent_start)
+{
+    int err;
+    struct xen_memory_reservation reservation = {
+        .extent_start = extent_start, 
+        .nr_extents   = nr_extents,
+        .extent_order = extent_order,  
+        .address_bits = 0,
+        .domid        = domid
+    };
+
+    if (extent_start == NULL)
+    {
+        fprintf(stderr,"decrease_reservation extent_start is NULL!\n");
+        errno = EINVAL;
+        err = -1;
+       goto out;
+    }
+
+    err = xc_memory_op(xc_handle, XENMEM_increase_reservation, &reservation);
+    if (err == nr_extents)
+        return 0;
+
+    if (err > 0) {
+        fprintf(stderr,"Failed de-alocation for dom %d : %ld pages order %d\n",
+                                 domid, nr_extents, extent_order);
+        errno = EBUSY;
+        err = -1;
+    }
+out:
+    return err;
+}
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xc_linux_build.c      Fri Sep  9 16:30:54 2005
@@ -17,6 +17,7 @@
 #include "xc_elf.h"
 #include "xc_aout9.h"
 #include <stdlib.h>
+#include <unistd.h>
 #include <zlib.h>
 
 #if defined(__i386__)
@@ -56,7 +57,7 @@
 }
 
 #define alloc_pt(ltab, vltab) \
-        ltab = page_array[ppt_alloc++] << PAGE_SHIFT; \
+        ltab = (unsigned long long)(page_array[ppt_alloc++]) << PAGE_SHIFT; \
         if (vltab != NULL) { \
             munmap(vltab, PAGE_SIZE); \
         } \
@@ -127,18 +128,37 @@
     l1_pgentry_64_t *vl1tab=NULL, *vl1e=NULL;
     l2_pgentry_64_t *vl2tab=NULL, *vl2e=NULL;
     l3_pgentry_64_t *vl3tab=NULL, *vl3e=NULL;
-    unsigned long l1tab = 0;
-    unsigned long l2tab = 0;
-    unsigned long l3tab = 0;
+    unsigned long long l1tab = 0;
+    unsigned long long l2tab = 0;
+    unsigned long long l3tab = 0;
     unsigned long ppt_alloc;
     unsigned long count;
 
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
+
+    if ( page_array[ppt_alloc] > 0xfffff )
+    {
+       unsigned long nmfn;
+       nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
+       if ( nmfn == 0 )
+       {
+           fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
+           goto error_out;
+       }
+       page_array[ppt_alloc] = nmfn;
+    }
+
     alloc_pt(l3tab, vl3tab);
     vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
     ctxt->ctrlreg[3] = l3tab;
-    
+
+    if(l3tab>0xfffff000ULL)
+    {
+        fprintf(stderr,"L3TAB = %llx above 4GB!\n",l3tab);
+        goto error_out;
+    }
+ 
     for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
     {
         if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
@@ -274,7 +294,6 @@
                          unsigned long *pvss, vcpu_guest_context_t *ctxt,
                          const char *cmdline,
                          unsigned long shared_info_frame,
-                         unsigned int control_evtchn,
                          unsigned long flags,
                          unsigned int vcpus,
                          unsigned int store_evtchn, unsigned long *store_mfn)
@@ -332,10 +351,10 @@
                        unsigned long *pvss, vcpu_guest_context_t *ctxt,
                        const char *cmdline,
                        unsigned long shared_info_frame,
-                       unsigned int control_evtchn,
                        unsigned long flags,
                        unsigned int vcpus,
-                      unsigned int store_evtchn, unsigned long *store_mfn)
+                      unsigned int store_evtchn, unsigned long *store_mfn,
+                      unsigned int console_evtchn, unsigned long *console_mfn)
 {
     unsigned long *page_array = NULL;
     unsigned long count, i;
@@ -346,7 +365,7 @@
 
     unsigned long nr_pt_pages;
     unsigned long physmap_pfn;
-    u32 *physmap, *physmap_e;
+    unsigned long *physmap, *physmap_e;
 
     struct load_funcs load_funcs;
     struct domain_setup_info dsi;
@@ -358,6 +377,8 @@
     unsigned long vstartinfo_end;
     unsigned long vstoreinfo_start;
     unsigned long vstoreinfo_end;
+    unsigned long vconsole_start;
+    unsigned long vconsole_end;
     unsigned long vstack_start;
     unsigned long vstack_end;
     unsigned long vpt_start;
@@ -391,16 +412,18 @@
     vinitrd_end      = vinitrd_start + initrd_len;
     vphysmap_start   = round_pgup(vinitrd_end);
     vphysmap_end     = vphysmap_start + (nr_pages * sizeof(unsigned long));
-    vstoreinfo_start = round_pgup(vphysmap_end);
+    vstartinfo_start = round_pgup(vphysmap_end);
+    vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
+    vstoreinfo_start = vstartinfo_end;
     vstoreinfo_end   = vstoreinfo_start + PAGE_SIZE;
-    vpt_start        = vstoreinfo_end; 
+    vconsole_start   = vstoreinfo_end;
+    vconsole_end     = vconsole_start + PAGE_SIZE;
+    vpt_start        = vconsole_end; 
 
     for ( nr_pt_pages = 2; ; nr_pt_pages++ )
     {
         vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
-        vstartinfo_start = vpt_end;
-        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
-        vstack_start     = vstartinfo_end;
+        vstack_start     = vpt_end;
         vstack_end       = vstack_start + PAGE_SIZE;
         v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
         if ( (v_end - vstack_end) < (512UL << 10) )
@@ -436,17 +459,19 @@
            " Loaded kernel: %p->%p\n"
            " Init. ramdisk: %p->%p\n"
            " Phys-Mach map: %p->%p\n"
+           " Start info:    %p->%p\n"
            " Store page:    %p->%p\n"
+           " Console page:  %p->%p\n"
            " Page tables:   %p->%p\n"
-           " Start info:    %p->%p\n"
            " Boot stack:    %p->%p\n"
            " TOTAL:         %p->%p\n",
            _p(dsi.v_kernstart), _p(dsi.v_kernend), 
            _p(vinitrd_start), _p(vinitrd_end),
            _p(vphysmap_start), _p(vphysmap_end),
+           _p(vstartinfo_start), _p(vstartinfo_end),
            _p(vstoreinfo_start), _p(vstoreinfo_end),
+           _p(vconsole_start), _p(vconsole_end),
            _p(vpt_start), _p(vpt_end),
-           _p(vstartinfo_start), _p(vstartinfo_end),
            _p(vstack_start), _p(vstack_end),
            _p(dsi.v_start), _p(v_end));
     printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
@@ -519,12 +544,14 @@
     physmap = physmap_e = xc_map_foreign_range(
         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
         page_array[physmap_pfn++]);
+
     for ( count = 0; count < nr_pages; count++ )
     {
         if ( xc_add_mmu_update(xc_handle, mmu,
-                              (page_array[count] << PAGE_SHIFT) | 
+                              ((unsigned long long)page_array[count] << 
PAGE_SHIFT) | 
                               MMU_MACHPHYS_UPDATE, count) )
         {
+            fprintf(stderr,"m2p update failure p=%lx 
m=%lx\n",count,page_array[count] ); 
             munmap(physmap, PAGE_SIZE);
             goto error_out;
         }
@@ -566,6 +593,8 @@
 #endif
 
     *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
+    *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT];
+
 
     start_info = xc_map_foreign_range(
         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
@@ -577,9 +606,10 @@
     start_info->pt_base      = vpt_start;
     start_info->nr_pt_frames = nr_pt_pages;
     start_info->mfn_list     = vphysmap_start;
-    start_info->domain_controller_evtchn = control_evtchn;
     start_info->store_mfn    = *store_mfn;
     start_info->store_evtchn = store_evtchn;
+    start_info->console_mfn   = *console_mfn;
+    start_info->console_evtchn = console_evtchn;
     if ( initrd_len != 0 )
     {
         start_info->mod_start    = vinitrd_start;
@@ -627,11 +657,12 @@
                    const char *image_name,
                    const char *ramdisk_name,
                    const char *cmdline,
-                   unsigned int control_evtchn,
                    unsigned long flags,
                    unsigned int vcpus,
                    unsigned int store_evtchn,
-                   unsigned long *store_mfn)
+                   unsigned long *store_mfn,
+                   unsigned int console_evtchn,
+                   unsigned long *console_mfn)
 {
     dom0_op_t launch_op, op;
     int initrd_fd = -1;
@@ -706,8 +737,9 @@
                      &vstartinfo_start, &vkern_entry,
                      &vstack_start, ctxt, cmdline,
                      op.u.getdomaininfo.shared_info_frame,
-                     control_evtchn, flags, vcpus,
-                     store_evtchn, store_mfn) < 0 )
+                     flags, vcpus,
+                     store_evtchn, store_mfn,
+                    console_evtchn, console_mfn) < 0 )
     {
         ERROR("Error constructing guest OS");
         goto error_out;
@@ -727,7 +759,6 @@
     ctxt->regs.ar_fpsr = FPSR_DEFAULT;
     /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should 
move here */
     ctxt->vcpu.privregs = 0;
-    ctxt->shared.domain_controller_evtchn = control_evtchn;
     ctxt->shared.flags = flags;
     i = 0; /* silence unused variable warning */
 #else /* x86 */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xc_linux_restore.c    Fri Sep  9 16:30:54 2005
@@ -8,24 +8,22 @@
 
 #include <stdlib.h>
 #include <unistd.h>
-
 #include "xg_private.h"
 #include <xenctrl.h>
-
-#include <xen/linux/suspend.h>
+#include <xen/memory.h>
 
 #define MAX_BATCH_SIZE 1024
 
 #define DEBUG 0
 
 #if 1
-#define ERR(_f, _a...) fprintf ( stderr, _f , ## _a ); fflush(stderr)
+#define ERR(_f, _a...) do { fprintf ( stderr, _f , ## _a ); fflush(stderr); } 
while(0)
 #else
 #define ERR(_f, _a...) ((void)0)
 #endif
 
 #if DEBUG
-#define DPRINTF(_f, _a...) fprintf ( stdout, _f , ## _a ); fflush(stdout)
+#define DPRINTF(_f, _a...) do { fprintf ( stdout, _f , ## _a ); 
fflush(stdout); } while (0)
 #else
 #define DPRINTF(_f, _a...) ((void)0)
 #endif
@@ -54,7 +52,8 @@
 }
 
 int xc_linux_restore(int xc_handle, int io_fd, u32 dom, unsigned long nr_pfns,
-                    unsigned int store_evtchn, unsigned long *store_mfn)
+                    unsigned int store_evtchn, unsigned long *store_mfn,
+                    unsigned int console_evtchn, unsigned long *console_mfn)
 {
     dom0_op_t op;
     int rc = 1, i, n, k;
@@ -89,8 +88,8 @@
     /* used by mapper for updating the domain's copy of the table */
     unsigned long *live_pfn_to_mfn_table = NULL;
 
-    /* A temporary mapping of the guest's suspend record. */
-    suspend_record_t *p_srec;
+    /* A temporary mapping of the guest's start_info page. */
+    start_info_t *start_info;
 
     char *region_base;
 
@@ -103,7 +102,7 @@
     struct mmuext_op pin[MAX_PIN_BATCH];
     unsigned int nr_pins = 0;
 
-    DPRINTF("xc_linux_restore start\n");
+    DPRINTF("xc_linux_restore start: nr_pfns = %lx\n", nr_pfns);
 
     if (mlock(&ctxt, sizeof(ctxt))) {
         /* needed for when we do the build dom0 op, 
@@ -150,8 +149,10 @@
     }
 
     err = xc_domain_memory_increase_reservation(xc_handle, dom,
-                                                nr_pfns * PAGE_SIZE / 1024);
+                                                nr_pfns, 0, 0, NULL);
     if (err != 0) {
+        ERR("Failed to increase reservation by %lx\n", 
+            nr_pfns * PAGE_SIZE / 1024); 
         errno = ENOMEM;
         goto out;
     }
@@ -409,7 +410,8 @@
 
     /* Get the list of PFNs that are not in the psuedo-phys map */
     {
-       unsigned int count, *pfntab;
+       unsigned int count;
+        unsigned long *pfntab;
        int rc;
 
        if ( read_exact(io_fd, &count, sizeof(count)) != sizeof(count) )
@@ -441,9 +443,15 @@
 
        if ( count > 0 )
        {
-           if ( (rc = xc_dom_mem_op( xc_handle,
-                                      MEMOP_decrease_reservation,
-                                      pfntab, count, 0, dom )) <0 )
+            struct xen_memory_reservation reservation = {
+                .extent_start = pfntab,
+                .nr_extents   = count,
+                .extent_order = 0,
+                .domid        = dom
+            };
+           if ( (rc = xc_memory_op(xc_handle,
+                                    XENMEM_decrease_reservation,
+                                    &reservation)) != count )
            {
                ERR("Could not decrease reservation : %d",rc);
                goto out;
@@ -470,15 +478,18 @@
         goto out;
     }
     ctxt.user_regs.esi = mfn = pfn_to_mfn_table[pfn];
-    p_srec = xc_map_foreign_range(
+    start_info = xc_map_foreign_range(
         xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
-    p_srec->resume_info.nr_pages    = nr_pfns;
-    p_srec->resume_info.shared_info = shared_info_frame << PAGE_SHIFT;
-    p_srec->resume_info.flags       = 0;
-    *store_mfn = p_srec->resume_info.store_mfn   =
-       pfn_to_mfn_table[p_srec->resume_info.store_mfn];
-    p_srec->resume_info.store_evtchn = store_evtchn;
-    munmap(p_srec, PAGE_SIZE);
+    start_info->nr_pages    = nr_pfns;
+    start_info->shared_info = shared_info_frame << PAGE_SHIFT;
+    start_info->flags       = 0;
+    *store_mfn = start_info->store_mfn   =
+       pfn_to_mfn_table[start_info->store_mfn];
+    start_info->store_evtchn = store_evtchn;
+    *console_mfn = start_info->console_mfn   =
+       pfn_to_mfn_table[start_info->console_mfn];
+    start_info->console_evtchn = console_evtchn;
+    munmap(start_info, PAGE_SIZE);
 
     /* Uncanonicalise each GDT frame number. */
     if ( ctxt.gdt_ents > 8192 )
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xc_linux_save.c       Fri Sep  9 16:30:54 2005
@@ -14,12 +14,29 @@
 
 #include "xg_private.h"
 
-#include <xen/linux/suspend.h>
 #include <xen/io/domain_controller.h>
 
 #define BATCH_SIZE 1024   /* 1024 pages (4MB) at a time */
 
 #define MAX_MBIT_RATE 500
+
+
+/*
+** Default values for important tuning parameters. Can override by passing
+** non-zero replacement values to xc_linux_save().  
+**
+** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. 
+** 
+*/
+#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop */ 
+#define DEF_MAX_FACTOR   3   /* never send more than 3x nr_pfns */
+
+
+
+/* Flags to control behaviour of xc_linux_save */
+#define XCFLAGS_LIVE      1
+#define XCFLAGS_DEBUG     2
+
 
 #define DEBUG 0
 
@@ -320,18 +337,18 @@
                              xc_dominfo_t *info,
                              vcpu_guest_context_t *ctxt)
 {
-    int i=0;
+    int i = 0;
     char ans[30];
 
     printf("suspend\n");
     fflush(stdout);
     if (fgets(ans, sizeof(ans), stdin) == NULL) {
-       ERR("failed reading suspend reply");
-       return -1;
+        ERR("failed reading suspend reply");
+        return -1;
     }
     if (strncmp(ans, "done\n", 5)) {
-       ERR("suspend reply incorrect: %s", ans);
-       return -1;
+        ERR("suspend reply incorrect: %s", ans);
+        return -1;
     }
 
 retry:
@@ -377,19 +394,16 @@
     return -1;
 }
 
-int xc_linux_save(int xc_handle, int io_fd, u32 dom)
+int xc_linux_save(int xc_handle, int io_fd, u32 dom, u32 max_iters, 
+                  u32 max_factor, u32 flags)
 {
     xc_dominfo_t info;
 
     int rc = 1, i, j, k, last_iter, iter = 0;
     unsigned long mfn;
-    int live =  0; // (ioctxt->flags & XCFLAGS_LIVE);
-    int debug = 0; // (ioctxt->flags & XCFLAGS_DEBUG);
+    int live  = (flags & XCFLAGS_LIVE); 
+    int debug = (flags & XCFLAGS_DEBUG); 
     int sent_last_iter, skip_this_iter;
-
-    /* Important tuning parameters */
-    int max_iters  = 29; /* limit us to 30 times round loop */
-    int max_factor = 3;  /* never send more than 3x nr_pfns */
 
     /* The new domain's shared-info frame number. */
     unsigned long shared_info_frame;
@@ -405,6 +419,7 @@
     unsigned long page[1024];
 
     /* A copy of the pfn-to-mfn table frame list. */
+    unsigned long *live_pfn_to_mfn_frame_list_list = NULL;
     unsigned long *live_pfn_to_mfn_frame_list = NULL;
     unsigned long pfn_to_mfn_frame_list[1024];
 
@@ -420,9 +435,6 @@
     /* base of the region in which domain memory is mapped */
     unsigned char *region_base = NULL;
 
-    /* A temporary mapping, and a copy, of the guest's suspend record. */
-    suspend_record_t *p_srec = NULL;
-
     /* number of pages we're dealing with */
     unsigned long nr_pfns;
 
@@ -442,8 +454,16 @@
 
     MBIT_RATE = START_MBIT_RATE;
 
-    DPRINTF("xc_linux_save start %d\n", dom);
-    
+
+    /* If no explicit control parameters given, use defaults */
+    if(!max_iters) 
+        max_iters = DEF_MAX_ITERS; 
+    if(!max_factor) 
+        max_factor = DEF_MAX_FACTOR; 
+
+
+    DPRINTF("xc_linux_save start DOM%u live=%s\n", dom, live?"true":"false"); 
+
     if (mlock(&ctxt, sizeof(ctxt))) {
         ERR("Unable to mlock ctxt");
         return 1;
@@ -487,11 +507,20 @@
         goto out;
     }
 
-    /* the pfn_to_mfn_frame_list fits in a single page */
+    live_pfn_to_mfn_frame_list_list = xc_map_foreign_range(xc_handle, dom,
+                                        PAGE_SIZE, PROT_READ,
+                                        
live_shinfo->arch.pfn_to_mfn_frame_list_list);
+
+    if (!live_pfn_to_mfn_frame_list_list){
+        ERR("Couldn't map pfn_to_mfn_frame_list_list");
+        goto out;
+    }
+
     live_pfn_to_mfn_frame_list = 
-        xc_map_foreign_range(xc_handle, dom, 
-                              PAGE_SIZE, PROT_READ, 
-                              live_shinfo->arch.pfn_to_mfn_frame_list );
+       xc_map_foreign_batch(xc_handle, dom, 
+                            PROT_READ,
+                            live_pfn_to_mfn_frame_list_list,
+                            (nr_pfns+(1024*1024)-1)/(1024*1024) );
 
     if (!live_pfn_to_mfn_frame_list){
         ERR("Couldn't map pfn_to_mfn_frame_list");
@@ -647,22 +676,6 @@
         goto out;
     }
 
-    /* Map the suspend-record MFN to pin it. The page must be owned by 
-       dom for this to succeed. */
-    p_srec = xc_map_foreign_range(xc_handle, dom,
-                                   sizeof(*p_srec), PROT_READ | PROT_WRITE, 
-                                   ctxt.user_regs.esi);
-    if (!p_srec){
-        ERR("Couldn't map suspend record");
-        goto out;
-    }
-
-    /* Canonicalize store mfn. */
-    if ( !translate_mfn_to_pfn(&p_srec->resume_info.store_mfn) ) {
-       ERR("Store frame is not in range of pseudophys map");
-       goto out;
-    }
-
     print_stats( xc_handle, dom, 0, &stats, 0 );
 
     /* Now write out each data page, canonicalising page tables as we go... */
@@ -763,8 +776,6 @@
                 batch++;
             }
      
-//            DPRINTF("batch %d:%d (n=%d)\n", iter, batch, n);
-
             if ( batch == 0 )
                 goto skip; /* vanishingly unlikely... */
       
@@ -915,7 +926,7 @@
             continue;
         }
 
-        if ( last_iter ) break;
+        if ( last_iter ) break; 
 
         if ( live )
         {
@@ -1003,13 +1014,6 @@
        }
     }
 
-    if (nr_pfns != p_srec->nr_pfns )
-    {
-       ERR("Suspend record nr_pfns unexpected (%ld != %ld)",
-                  p_srec->nr_pfns, nr_pfns);
-        goto out;
-    }
-
     /* Canonicalise the suspend-record frame number. */
     if ( !translate_mfn_to_pfn(&ctxt.user_regs.esi) ){
         ERR("Suspend record is not in range of pseudophys map");
@@ -1043,9 +1047,6 @@
     if(live_shinfo)
         munmap(live_shinfo, PAGE_SIZE);
 
-    if(p_srec) 
-        munmap(p_srec, sizeof(*p_srec));
-
     if(live_pfn_to_mfn_frame_list) 
         munmap(live_pfn_to_mfn_frame_list, PAGE_SIZE);
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_load_aout9.c
--- a/tools/libxc/xc_load_aout9.c       Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xc_load_aout9.c       Fri Sep  9 16:30:54 2005
@@ -14,7 +14,8 @@
 
 
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
-#define round_pgdown(_p)  ((_p)&PAGE_MASK)
+#define KZERO             0x80000000
+#define KOFFSET(_p)       ((_p)&~KZERO)
 
 static int parseaout9image(char *, unsigned long, struct domain_setup_info *);
 static int loadaout9image(char *, unsigned long, int, u32, unsigned long *, 
struct domain_setup_info *);
@@ -47,7 +48,7 @@
     struct domain_setup_info *dsi)
 {
     struct Exec ehdr;
-    unsigned long start, txtsz, end;
+    unsigned long start, dstart, end;
 
     if (!get_header(image, image_size, &ehdr)) {
         ERROR("Kernel image does not have a a.out9 header.");
@@ -59,11 +60,11 @@
         return -EINVAL;
     }
 
-    start = round_pgdown(ehdr.entry);
-    txtsz = round_pgup(ehdr.text);
-    end = start + txtsz + ehdr.data + ehdr.bss;
+    start = ehdr.entry;
+    dstart = round_pgup(start + ehdr.text);
+    end = dstart + ehdr.data + ehdr.bss;
 
-    dsi->v_start       = start;
+    dsi->v_start       = KZERO;
     dsi->v_kernstart   = start;
     dsi->v_kernend     = end;
     dsi->v_kernentry   = ehdr.entry;
@@ -83,19 +84,18 @@
     struct domain_setup_info *dsi)
 {
     struct Exec ehdr;
-    unsigned long txtsz;
+    unsigned long start, dstart;
 
     if (!get_header(image, image_size, &ehdr)) {
         ERROR("Kernel image does not have a a.out9 header.");
         return -EINVAL;
     }
 
-    txtsz = round_pgup(ehdr.text);
-    copyout(xch, dom, parray, 
-            0, image, sizeof ehdr + ehdr.text);
-    copyout(xch, dom, parray, 
-            txtsz, image + sizeof ehdr + ehdr.text, ehdr.data);
-    /* XXX zeroing of BSS needed? */
+    start = ehdr.entry;
+    dstart = round_pgup(start + ehdr.text);
+    copyout(xch, dom, parray, start, image + sizeof ehdr, ehdr.text);
+    copyout(xch, dom, parray, dstart,
+            image + sizeof ehdr + ehdr.text, ehdr.data);
 
     /* XXX load symbols */
 
@@ -110,13 +110,14 @@
 copyout(
     int xch, u32 dom,
     unsigned long *parray,
-    unsigned long off,
+    unsigned long addr,
     void *buf,
     int sz)
 {
-    unsigned long pgoff, chunksz;
+    unsigned long pgoff, chunksz, off;
     void *pg;
 
+    off = KOFFSET(addr);
     while (sz > 0) {
         pgoff = off & (PAGE_SIZE-1);
         chunksz = sz;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xc_private.c  Fri Sep  9 16:30:54 2005
@@ -6,6 +6,7 @@
 
 #include <zlib.h>
 #include "xc_private.h"
+#include <xen/memory.h>
 
 void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot,
                            unsigned long *arr, int num )
@@ -115,7 +116,7 @@
 
     if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
     {
-       fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
+       fprintf(stderr, "Dom_mmuext operation failed (rc=%ld errno=%d)-- need 
to"
                     " rebuild the user-space tool set?\n",ret,errno);
     }
 
@@ -171,7 +172,7 @@
 }
 
 int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, 
-                     unsigned long ptr, unsigned long val)
+                     unsigned long long ptr, unsigned long long val)
 {
     mmu->updates[mmu->idx].ptr = ptr;
     mmu->updates[mmu->idx].val = val;
@@ -187,38 +188,64 @@
     return flush_mmu_updates(xc_handle, mmu);
 }
 
-int xc_dom_mem_op(int xc_handle,
-                 unsigned int memop, 
-                 unsigned int *extent_list, 
-                 unsigned int nr_extents,
-                 unsigned int extent_order,
-                 domid_t domid)
+int xc_memory_op(int xc_handle,
+                 int cmd,
+                 void *arg)
 {
     privcmd_hypercall_t hypercall;
+    struct xen_memory_reservation *reservation = arg;
     long ret = -EINVAL;
 
-    hypercall.op     = __HYPERVISOR_dom_mem_op;
-    hypercall.arg[0] = (unsigned long)memop;
-    hypercall.arg[1] = (unsigned long)extent_list;
-    hypercall.arg[2] = (unsigned long)nr_extents;
-    hypercall.arg[3] = (unsigned long)extent_order;
-    hypercall.arg[4] = (unsigned long)domid;
-
-    if ( (extent_list != NULL) && 
-         (mlock(extent_list, nr_extents*sizeof(unsigned long)) != 0) )
-    {
-        PERROR("Could not lock memory for Xen hypercall");
-        goto out1;
+    hypercall.op     = __HYPERVISOR_memory_op;
+    hypercall.arg[0] = (unsigned long)cmd;
+    hypercall.arg[1] = (unsigned long)arg;
+
+    switch ( cmd )
+    {
+    case XENMEM_increase_reservation:
+    case XENMEM_decrease_reservation:
+        if ( mlock(reservation, sizeof(*reservation)) != 0 )
+        {
+            PERROR("Could not mlock");
+            goto out1;
+        }
+        if ( (reservation->extent_start != NULL) &&
+             (mlock(reservation->extent_start,
+                    reservation->nr_extents * sizeof(unsigned long)) != 0) )
+        {
+            PERROR("Could not mlock");
+            safe_munlock(reservation, sizeof(*reservation));
+            goto out1;
+        }
+        break;
+    case XENMEM_maximum_ram_page:
+        if ( mlock(arg, sizeof(unsigned long)) != 0 )
+        {
+            PERROR("Could not mlock");
+            goto out1;
+        }
+        break;
     }
 
     if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
     {
-       fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
+       fprintf(stderr, "hypercall failed (rc=%ld errno=%d)-- need to"
                 " rebuild the user-space tool set?\n",ret,errno);
     }
 
-    if ( extent_list != NULL )
-        safe_munlock(extent_list, nr_extents*sizeof(unsigned long));
+    switch ( cmd )
+    {
+    case XENMEM_increase_reservation:
+    case XENMEM_decrease_reservation:
+        safe_munlock(reservation, sizeof(*reservation));
+        if ( reservation->extent_start != NULL )
+            safe_munlock(reservation->extent_start,
+                         reservation->nr_extents * sizeof(unsigned long));
+        break;
+    case XENMEM_maximum_ram_page:
+        safe_munlock(arg, sizeof(unsigned long));
+        break;
+    }
 
  out1:
     return ret;
@@ -395,3 +422,26 @@
 {
     return do_dom0_op(xc_handle, op);
 }
+
+int xc_version(int xc_handle, int cmd, void *arg)
+{
+    return do_xen_version(xc_handle, cmd, arg);
+}
+
+unsigned long xc_make_page_below_4G(int xc_handle, u32 domid, 
+                                   unsigned long mfn)
+{
+    unsigned long new_mfn;
+    if ( xc_domain_memory_decrease_reservation( 
+       xc_handle, domid, 1, 0, &mfn ) != 1 )
+    {
+       fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
+       return 0;
+    }
+    if ( xc_domain_memory_increase_reservation( xc_handle, domid, 1, 0, 32, 
&new_mfn ) != 1 )
+    {
+       fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
+       return 0;
+    }
+    return new_mfn;
+}
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_private.h
--- a/tools/libxc/xc_private.h  Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xc_private.h  Fri Sep  9 16:30:54 2005
@@ -59,6 +59,17 @@
                       (unsigned long)hypercall);
 }
 
+static inline int do_xen_version(int xc_handle, int cmd, void *dest)
+{
+    privcmd_hypercall_t hypercall;
+
+    hypercall.op     = __HYPERVISOR_xen_version;
+    hypercall.arg[0] = (unsigned long) cmd;
+    hypercall.arg[1] = (unsigned long) dest;
+    
+    return do_xen_hypercall(xc_handle, &hypercall);
+}
+
 static inline int do_dom0_op(int xc_handle, dom0_op_t *op)
 {
     int ret = -1;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_vmx_build.c
--- a/tools/libxc/xc_vmx_build.c        Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xc_vmx_build.c        Fri Sep  9 16:30:54 2005
@@ -7,6 +7,7 @@
 #define ELFSIZE 32
 #include "xc_elf.h"
 #include <stdlib.h>
+#include <unistd.h>
 #include <zlib.h>
 #include <xen/io/ioreq.h>
 #include "linux_boot_params.h"
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xenctrl.h     Fri Sep  9 16:30:54 2005
@@ -23,6 +23,7 @@
 #include <sys/ptrace.h>
 #include <xen/xen.h>
 #include <xen/dom0_ops.h>
+#include <xen/version.h>
 #include <xen/event_channel.h>
 #include <xen/sched_ctl.h>
 #include <xen/acm.h>
@@ -386,7 +387,19 @@
 
 int xc_domain_memory_increase_reservation(int xc_handle,
                                           u32 domid, 
-                                          unsigned int mem_kb);
+                                          unsigned long nr_extents,
+                                          unsigned int extent_order,
+                                          unsigned int address_bits,
+                                         unsigned long *extent_start);
+
+int xc_domain_memory_decrease_reservation(int xc_handle,
+                                          u32 domid, 
+                                          unsigned long nr_extents,
+                                          unsigned int extent_order,
+                                         unsigned long *extent_start);
+
+unsigned long xc_make_page_below_4G(int xc_handle, u32 domid, 
+                                   unsigned long mfn);
 
 typedef dom0_perfc_desc_t xc_perfc_desc_t;
 /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
@@ -430,9 +443,7 @@
 int xc_mmuext_op(int xc_handle, struct mmuext_op *op, unsigned int nr_ops,
                 domid_t dom);
 
-int xc_dom_mem_op(int xc_handle, unsigned int memop, unsigned int *extent_list,
-                 unsigned int nr_extents, unsigned int extent_order,
-                 domid_t domid);
+int xc_memory_op(int xc_handle, int cmd, void *arg);
 
 int xc_get_pfn_type_batch(int xc_handle, u32 dom, int num, unsigned long *arr);
 
@@ -498,6 +509,8 @@
 
 /* Execute a privileged dom0 operation. */
 int xc_dom0_op(int xc_handle, dom0_op_t *op);
+
+int xc_version(int xc_handle, int cmd, void *arg);
 
 /* Initializes the store (for dom0)
    remote_port should be the remote end of a bound interdomain channel between
@@ -520,7 +533,7 @@
 typedef struct xc_mmu xc_mmu_t;
 xc_mmu_t *xc_init_mmu_updates(int xc_handle, domid_t dom);
 int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, 
-                   unsigned long ptr, unsigned long val);
+                   unsigned long long ptr, unsigned long long val);
 int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu);
 
 #endif
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xenguest.h    Fri Sep  9 16:30:54 2005
@@ -6,13 +6,12 @@
  * Copyright (c) 2003-2004, K A Fraser.
  */
 
-#ifndef XENBUILD_H
-#define XENBUILD_H
+#ifndef XENGUEST_H
+#define XENGUEST_H
 
-#define XCFLAGS_VERBOSE   1
-#define XCFLAGS_LIVE      2
-#define XCFLAGS_DEBUG     4
-#define XCFLAGS_CONFIGURE 8
+#define XCFLAGS_LIVE      1
+#define XCFLAGS_DEBUG     2
+
 
 /**
  * This function will save a domain running Linux.
@@ -22,7 +21,8 @@
  * @parm dom the id of the domain
  * @return 0 on success, -1 on failure
  */
-int xc_linux_save(int xc_handle, int fd, uint32_t dom);
+int xc_linux_save(int xc_handle, int fd, uint32_t dom, uint32_t max_iters, 
+                  uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */);
 
 /**
  * This function will restore a saved domain running Linux.
@@ -35,19 +35,22 @@
  * @parm store_mfn returned with the mfn of the store page
  * @return 0 on success, -1 on failure
  */
-int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, unsigned long 
nr_pfns,
-                    unsigned int store_evtchn, unsigned long *store_mfn);
+int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, 
+                     unsigned long nr_pfns, unsigned int store_evtchn, 
+                     unsigned long *store_mfn, unsigned int console_evtchn,
+                    unsigned long *console_mfn);
 
 int xc_linux_build(int xc_handle,
                    uint32_t domid,
                    const char *image_name,
                    const char *ramdisk_name,
                    const char *cmdline,
-                   unsigned int control_evtchn,
                    unsigned long flags,
                    unsigned int vcpus,
                    unsigned int store_evtchn,
-                   unsigned long *store_mfn);
+                   unsigned long *store_mfn,
+                   unsigned int console_evtchn,
+                   unsigned long *console_mfn);
 
 struct mem_map;
 int xc_vmx_build(int xc_handle,
@@ -63,4 +66,4 @@
                  unsigned int store_evtchn,
                  unsigned long *store_mfn);
 
-#endif
+#endif // XENGUEST_H
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c  Thu Sep  8 15:18:40 2005
+++ b/tools/libxc/xg_private.c  Fri Sep  9 16:30:54 2005
@@ -5,6 +5,7 @@
  */
 
 #include <stdlib.h>
+#include <unistd.h>
 #include <zlib.h>
 
 #include "xg_private.h"
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/cpuperf/cpuperf.c
--- a/tools/misc/cpuperf/cpuperf.c      Thu Sep  8 15:18:40 2005
+++ b/tools/misc/cpuperf/cpuperf.c      Fri Sep  9 16:30:54 2005
@@ -243,16 +243,12 @@
     }
 
     if (read) {
-        while((cpu_mask&1)) {
-            int i;
-            for (i=0x300;i<0x312;i++) {
-                printf("%010llu ",cpus_rdmsr( cpu_mask, i ) );
-            }
-            printf("\n");
-            cpu_mask>>=1;
-        }
+        int i;
+        for (i=0x300;i<0x312;i++)
+            printf("%010llu ",cpus_rdmsr( cpu_mask, i ) );
+        printf("\n");
         exit(1);
-    } 
+    }
     
     if (!escr) {
         fprintf(stderr, "Need an ESCR.\n");
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/mbootpack/Makefile
--- a/tools/misc/mbootpack/Makefile     Thu Sep  8 15:18:40 2005
+++ b/tools/misc/mbootpack/Makefile     Fri Sep  9 16:30:54 2005
@@ -20,8 +20,7 @@
 INCS   := -I. -I-
 DEFS   := 
 LDFLAGS        := 
-CC     := gcc
-CFLAGS         := -Wall -Wpointer-arith -Wcast-qual -Wno-unused -Wno-format
+CFLAGS := -Wall -Wpointer-arith -Wcast-qual -Wno-unused -Wno-format
 CFLAGS += -Wmissing-prototypes
 #CFLAGS        += -pipe -g -O0 -Wcast-align
 CFLAGS += -pipe -O3 
@@ -34,7 +33,7 @@
 DEPS     = .*.d
 
 mbootpack: $(OBJS)
-       $(CC) -o $@ $(filter-out %.a, $^) $(LDFLAGS)
+       $(HOSTCC) -o $@ $(filter-out %.a, $^) $(LDFLAGS)
 
 clean:
        $(RM) mbootpack *.o $(DEPS) bootsect setup bzimage_header.c bin2c
@@ -48,7 +47,7 @@
        $(LD) -m elf_i386 -Ttext 0x0 -s --oformat binary setup.o -o $@
 
 bin2c: bin2c.o 
-       $(CC) -o $@ $^ 
+       $(HOSTCC) -o $@ $^ 
 
 bzimage_header.c: bootsect setup bin2c
        ./bin2c -n 8 -b1 -a bzimage_bootsect bootsect > bzimage_header.c
@@ -58,10 +57,10 @@
        @
 
 %.o: %.S
-       $(CC) $(DEPFLAGS) $(CFLAGS) $(INCS) $(DEFS) -c $< -o $@
+       $(HOSTCC) $(DEPFLAGS) $(CFLAGS) $(INCS) $(DEFS) -c $< -o $@
 
 %.o: %.c
-       $(CC) $(DEPFLAGS) $(CFLAGS) $(INCS) $(DEFS) -c $< -o $@
+       $(HOSTCC) $(DEPFLAGS) $(CFLAGS) $(INCS) $(DEFS) -c $< -o $@
 
 .PHONY: all clean gdb
 .PRECIOUS: $(OBJS) $(OBJS:.o=.c) $(DEPS)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/mbootpack/buildimage.c
--- a/tools/misc/mbootpack/buildimage.c Thu Sep  8 15:18:40 2005
+++ b/tools/misc/mbootpack/buildimage.c Fri Sep  9 16:30:54 2005
@@ -42,6 +42,7 @@
 
 #include "mbootpack.h"
 #include "mb_header.h"
+
 
 /*  We will build an image that a bzImage-capable bootloader will load like 
  *  this:
@@ -105,8 +106,8 @@
     section_t *s;
 
     /* Patch the kernel and mbi addresses into the setup code */
-    *(address_t *)(bzimage_setup + BZ_ENTRY_OFFSET) = entry;
-    *(address_t *)(bzimage_setup + BZ_MBI_OFFSET) = mbi;
+    *(address_t *)(bzimage_setup + BZ_ENTRY_OFFSET) = eswap(entry);
+    *(address_t *)(bzimage_setup + BZ_MBI_OFFSET) = eswap(mbi);
     if (!quiet) printf("Kernel entry is %p, MBI is %p.\n", entry, mbi);
 
     /* Write out header and trampoline */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/mbootpack/mbootpack.c
--- a/tools/misc/mbootpack/mbootpack.c  Thu Sep  8 15:18:40 2005
+++ b/tools/misc/mbootpack/mbootpack.c  Fri Sep  9 16:30:54 2005
@@ -252,20 +252,21 @@
     for (i = 0; i <= MIN(len - 12, MULTIBOOT_SEARCH - 12); i += 4)
     {
         mbh = (struct multiboot_header *)(headerbuf + i);
-        if (mbh->magic != MULTIBOOT_MAGIC 
-            || ((mbh->magic+mbh->flags+mbh->checksum) & 0xffffffff))
+        if (eswap(mbh->magic) != MULTIBOOT_MAGIC 
+            || ((eswap(mbh->magic)+eswap(mbh->flags)+eswap(mbh->checksum)) 
+                               & 0xffffffff))
         {
             /* Not a multiboot header */
             continue;
         }
-        if (mbh->flags & MULTIBOOT_UNSUPPORTED) {
+        if (eswap(mbh->flags) & MULTIBOOT_UNSUPPORTED) {
             /* Requires options we don't support */
             printf("Fatal: found a multiboot header, but it "
                     "requires multiboot options that I\n"
                     "don't understand.  Sorry.\n");
             exit(1);
         } 
-        if (mbh->flags & MULTIBOOT_VIDEO_MODE) { 
+        if (eswap(mbh->flags) & MULTIBOOT_VIDEO_MODE) { 
             /* Asked for screen mode information */
             /* XXX carry on regardless */
             printf("Warning: found a multiboot header which asks "
@@ -275,22 +276,22 @@
         }
         /* This kernel will do: place and load it */
 
-        if (mbh->flags & MULTIBOOT_AOUT_KLUDGE) {
+        if (eswap(mbh->flags) & MULTIBOOT_AOUT_KLUDGE) {
 
             /* Load using the offsets in the multiboot header */
             if(!quiet) 
                 printf("Loading %s using multiboot header.\n", filename);
 
             /* How much is there? */
-            start = mbh->load_addr;            
-            if (mbh->load_end_addr != 0) 
-                loadsize = mbh->load_end_addr - mbh->load_addr;
+            start = eswap(mbh->load_addr);            
+            if (eswap(mbh->load_end_addr) != 0) 
+                loadsize = eswap(mbh->load_end_addr) - eswap(mbh->load_addr);
             else 
                 loadsize = sb.st_size;
             
             /* How much memory will it take up? */ 
-            if (mbh->bss_end_addr != 0)
-                size = mbh->bss_end_addr - mbh->load_addr;
+            if (eswap(mbh->bss_end_addr) != 0)
+                size = eswap(mbh->bss_end_addr) - eswap(mbh->load_addr);
             else
                 size = loadsize;
             
@@ -335,32 +336,34 @@
             
             /* Done. */
             if (!quiet) printf("Loaded kernel from %s\n", filename);
-            return mbh->entry_addr;
+            return eswap(mbh->entry_addr);
             
         } else {
 
             /* Now look for an ELF32 header */    
             ehdr = (Elf32_Ehdr *)headerbuf;
-            if (*(unsigned long *)ehdr != 0x464c457f 
+            if (*(unsigned long *)ehdr != eswap(0x464c457f)
                 || ehdr->e_ident[EI_DATA] != ELFDATA2LSB
                 || ehdr->e_ident[EI_CLASS] != ELFCLASS32
-                || ehdr->e_machine != EM_386)
+                || eswap(ehdr->e_machine) != EM_386)
             {
                 printf("Fatal: kernel has neither ELF32/x86 nor multiboot load"
                        " headers.\n");
                 exit(1);
             }
-            if (ehdr->e_phoff + ehdr->e_phnum*sizeof(*phdr) > HEADERBUF_SIZE) {
+            if (eswap(ehdr->e_phoff) + eswap(ehdr->e_phnum)*sizeof(*phdr) 
+                               > HEADERBUF_SIZE) {
                 /* Don't expect this will happen with sane kernels */
                 printf("Fatal: too much ELF for me.  Try increasing "
                        "HEADERBUF_SIZE in mbootpack.\n");
                 exit(1);
             }
-            if (ehdr->e_phoff + ehdr->e_phnum*sizeof (*phdr) > len) {
+            if (eswap(ehdr->e_phoff) + eswap(ehdr->e_phnum)*sizeof (*phdr) 
+                               > len) {
                 printf("Fatal: malformed ELF header overruns EOF.\n");
                 exit(1);
             }
-            if (ehdr->e_phnum <= 0) {
+            if (eswap(ehdr->e_phnum) <= 0) {
                 printf("Fatal: ELF kernel has no program headers.\n");
                 exit(1);
             }
@@ -368,22 +371,22 @@
             if(!quiet) 
                 printf("Loading %s using ELF header.\n", filename);
 
-            if (ehdr->e_type != ET_EXEC 
-                || ehdr->e_version != EV_CURRENT
-                || ehdr->e_phentsize != sizeof (Elf32_Phdr)) {
+            if (eswap(ehdr->e_type) != ET_EXEC 
+                || eswap(ehdr->e_version) != EV_CURRENT
+                || eswap(ehdr->e_phentsize) != sizeof (Elf32_Phdr)) {
                 printf("Warning: funny-looking ELF header.\n");
             }
-            phdr = (Elf32_Phdr *)(headerbuf + ehdr->e_phoff);
+            phdr = (Elf32_Phdr *)(headerbuf + eswap(ehdr->e_phoff));
 
             /* Obey the program headers to load the kernel */
-            for(i = 0; i < ehdr->e_phnum; i++) {
-
-                start = phdr[i].p_paddr;
-                size = phdr[i].p_memsz;
-                if (phdr[i].p_type != PT_LOAD) 
+            for(i = 0; i < eswap(ehdr->e_phnum); i++) {
+
+                start = eswap(phdr[i].p_paddr);
+                size = eswap(phdr[i].p_memsz);
+                if (eswap(phdr[i].p_type) != PT_LOAD) 
                     loadsize = 0;
                 else 
-                    loadsize = MIN((long int)phdr[i].p_filesz, size);
+                    loadsize = MIN((long int)eswap(phdr[i].p_filesz), size);
 
                 if ((buffer = malloc(size)) == NULL) {
                     printf("Fatal: malloc() for kernel load failed: %s\n",
@@ -396,7 +399,7 @@
 
                 /* Load section from file */ 
                 if (loadsize > 0) {
-                    if (fseek(fp, phdr[i].p_offset, SEEK_SET) != 0) {
+                    if (fseek(fp, eswap(phdr[i].p_offset), SEEK_SET) != 0) {
                         printf("Fatal: seek failed in %s\n",
                                 strerror(errno));
                         exit(1);
@@ -452,7 +455,7 @@
          
             /* Done! */
             if (!quiet) printf("Loaded kernel from %s\n", filename);
-            return ehdr->e_entry;
+            return eswap(ehdr->e_entry);
         }
 
     }
@@ -568,12 +571,12 @@
     /* Command line */
     p = (char *)(mbi + 1);
     sprintf(p, "%s %s", imagename, command_line);
-    mbi->cmdline = ((address_t)p) + mbi_reloc_offset;
+    mbi->cmdline = eswap(((address_t)p) + mbi_reloc_offset);
     p += command_line_len;
 
     /* Bootloader ID */
     sprintf(p, version_string);
-    mbi->boot_loader_name = ((address_t)p) + mbi_reloc_offset;
+    mbi->boot_loader_name = eswap(((address_t)p) + mbi_reloc_offset);
     p += strlen(version_string) + 1;
 
     /* Next is space for the module command lines */
@@ -582,17 +585,17 @@
     /* Last come the module info structs */
     modp = (struct mod_list *)
         ((((address_t)p + mod_command_line_space) + 3) & ~3);
-    mbi->mods_count = modules;
-    mbi->mods_addr = ((address_t)modp) + mbi_reloc_offset;
+    mbi->mods_count = eswap(modules);
+    mbi->mods_addr = eswap(((address_t)modp) + mbi_reloc_offset);
 
     /* Memory information will be added at boot time, by setup.S 
      * or trampoline.S. */
-    mbi->flags = MB_INFO_CMDLINE | MB_INFO_BOOT_LOADER_NAME;
+    mbi->flags = eswap(MB_INFO_CMDLINE | MB_INFO_BOOT_LOADER_NAME);
 
 
     /* Load the modules */
     if (modules) {
-        mbi->flags |= MB_INFO_MODS;
+        mbi->flags = eswap(eswap(mbi->flags) | MB_INFO_MODS);
                 
         /* Go back and parse the module command lines */
         optind = opterr = 1;
@@ -652,10 +655,10 @@
             if (p != NULL) *p = ' ';
 
             /* Fill in the module info struct */
-            modp->mod_start = start;
-            modp->mod_end = start + size;
-            modp->cmdline = (address_t)mod_clp + mbi_reloc_offset;
-            modp->pad = 0;
+            modp->mod_start = eswap(start);
+            modp->mod_end = eswap(start + size);
+            modp->cmdline = eswap((address_t)mod_clp + mbi_reloc_offset);
+            modp->pad = eswap(0);
             modp++;
 
             /* Store the module command line */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/mbootpack/mbootpack.h
--- a/tools/misc/mbootpack/mbootpack.h  Thu Sep  8 15:18:40 2005
+++ b/tools/misc/mbootpack/mbootpack.h  Fri Sep  9 16:30:54 2005
@@ -31,6 +31,24 @@
 
 #undef NDEBUG
 #include <stdio.h>
+
+#include <endian.h>
+#include <byteswap.h>
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define eswap(x) (x)
+#else 
+#define eswap(x)                                               \
+        ({                                                      \
+               typeof(x) y = (x);                              \
+               switch(sizeof(y))                               \
+               {                                               \
+               case 2: y = __bswap_16(y); break;               \
+               case 4: y = __bswap_32(y); break;               \
+               case 8: y = __bswap_64(y); break;               \
+               }                                               \
+               y;                                              \
+        })     
+#endif                 
 
 /* Flags */
 extern int quiet;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/xend
--- a/tools/misc/xend   Thu Sep  8 15:18:40 2005
+++ b/tools/misc/xend   Fri Sep  9 16:30:54 2005
@@ -25,11 +25,6 @@
 import signal
 import time
 import commands
-
-XCS_PATH    = "/var/lib/xen/xcs_socket"
-XCS_EXEC    = "/usr/sbin/xcs"
-XCS_PIDFILE = "/var/run/xcs.pid"
-XCS_ARGS    = (XCS_EXEC, "-p", XCS_PIDFILE)
 
 # add fallback path for non-native python path installs if needed
 sys.path.append('/usr/lib/python')
@@ -70,52 +65,6 @@
         hline()
         raise CheckError("invalid user")
 
-def xcs_running():
-    """ See if the control switch is running.
-    """        
-    s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
-    try:
-        s.connect( (XCS_PATH) )
-        s.close()
-    except:
-        try:
-            os.remove(XCS_PIDFILE)
-        except:
-            pass
-       return 0
-    return 1
-    
-def start_xcs():
-    if (not xcs_running()):
-        if os.fork() == 0 :
-            if not os.path.isdir(os.path.dirname(XCS_PATH)):
-                os.makedirs(os.path.dirname(XCS_PATH))
-            try:
-                os.execvp(XCS_EXEC, XCS_ARGS)
-            except:
-                hline()
-                msg("Tried to start xcs, but failed. Is it installed?")
-                hline()
-                raise CheckError("couldn't start xcs")
-        for n in range(10) :
-            if (xcs_running()):
-                break
-            time.sleep(0.1)
-        else :
-                hline()
-                msg("Failed to start the control interface switch.")
-                hline()
-                raise CheckError("xcs not running")
-            
-def stop_xcs():
-    try:
-       xcs_pidfile = open(XCS_PIDFILE)
-        xcs_pid = int(xcs_pidfile.read().strip())
-        os.kill(xcs_pid, signal.SIGTERM)
-        xcs_pidfile.close()
-    except:
-       return    
-
 def start_xenstored():
     XENSTORED_TRACE = os.getenv("XENSTORED_TRACE")
     cmd = "/usr/sbin/xenstored --pid-file=/var/run/xenstore.pid"
@@ -141,21 +90,16 @@
         pid, status = os.wait()
         return status >> 8
     elif sys.argv[1] == 'start':
-        start_xcs()
         start_xenstored()
         start_consoled()
         return daemon.start()
     elif sys.argv[1] == 'trace_start':
-        start_xcs()
         start_xenstored()
         start_consoled()
         return daemon.start(trace=1)
     elif sys.argv[1] == 'stop':
-        stop_xcs()
         return daemon.stop()
     elif sys.argv[1] == 'restart':
-        stop_xcs()
-        start_xcs()
         start_xenstored()
         start_consoled()
         return daemon.stop() or daemon.start()
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/setup.py
--- a/tools/python/setup.py     Thu Sep  8 15:18:40 2005
+++ b/tools/python/setup.py     Fri Sep  9 16:30:54 2005
@@ -7,10 +7,8 @@
 extra_compile_args  = [ "-fno-strict-aliasing", "-Wall", "-Werror" ]
 
 
-include_dirs = [ XEN_ROOT + "/tools/python/xen/lowlevel/xu",
-                 XEN_ROOT + "/tools/libxc",
+include_dirs = [ XEN_ROOT + "/tools/libxc",
                  XEN_ROOT + "/tools/xenstore",
-                 XEN_ROOT + "/tools/xcs",
                  ]
 
 library_dirs = [ XEN_ROOT + "/tools/libxc",
@@ -25,13 +23,6 @@
                library_dirs       = library_dirs,
                libraries          = libraries,
                sources            = [ "xen/lowlevel/xc/xc.c" ])
-
-xu = Extension("xu",
-               extra_compile_args = extra_compile_args,
-               include_dirs       = include_dirs + [ "xen/lowlevel/xu" ],
-               library_dirs       = library_dirs,
-               libraries          = libraries,
-               sources            = [ "xen/lowlevel/xu/xu.c" ])
 
 xs = Extension("xs",
                extra_compile_args = extra_compile_args,
@@ -51,10 +42,10 @@
                          'xen.xend.xenstore',
                          'xen.xm',
                          'xen.web',
-                                                'xen.sv'
+                         'xen.sv'
                          ],
       ext_package = "xen.lowlevel",
-      ext_modules = [ xc, xu, xs ]
+      ext_modules = [ xc, xs ]
       )
 
 os.chdir('logging')
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri Sep  9 16:30:54 2005
@@ -268,25 +268,33 @@
     u32 dom;
     char *image, *ramdisk = NULL, *cmdline = "";
     int flags = 0, vcpus = 1;
-    int control_evtchn, store_evtchn;
+    int store_evtchn, console_evtchn;
     unsigned long store_mfn = 0;
-
-    static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn", 
-                                "image", "ramdisk", "cmdline", "flags",
+    unsigned long console_mfn = 0;
+
+    static char *kwd_list[] = { "dom", "store_evtchn", 
+                                "console_evtchn", "image", 
+                               /* optional */
+                               "ramdisk", "cmdline", "flags",
                                "vcpus", NULL };
 
     if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssii", kwd_list,
-                                      &dom, &control_evtchn, &store_evtchn,
-                                      &image, &ramdisk, &cmdline, &flags,
+                                      &dom, &store_evtchn,
+                                     &console_evtchn, &image, 
+                                     /* optional */
+                                     &ramdisk, &cmdline, &flags,
                                       &vcpus) )
         return NULL;
 
     if ( xc_linux_build(xc->xc_handle, dom, image,
-                        ramdisk, cmdline, control_evtchn, flags, vcpus,
-                        store_evtchn, &store_mfn) != 0 )
-        return PyErr_SetFromErrno(xc_error);
-    
-    return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
+                        ramdisk, cmdline, flags, vcpus,
+                        store_evtchn, &store_mfn, 
+                       console_evtchn, &console_mfn) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+    
+    return Py_BuildValue("{s:i,s:i}", 
+                        "store_mfn", store_mfn,
+                        "console_mfn", console_mfn);
 }
 
 static PyObject *pyxc_vmx_build(PyObject *self,
@@ -682,6 +690,8 @@
 {
     XcObject *xc = (XcObject *)self;
     xc_physinfo_t info;
+    char cpu_cap[128], *p=cpu_cap, *q=cpu_cap;
+    int i;
     
     if ( !PyArg_ParseTuple(args, "") )
         return NULL;
@@ -689,15 +699,72 @@
     if ( xc_physinfo(xc->xc_handle, &info) != 0 )
         return PyErr_SetFromErrno(xc_error);
 
-    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:i}",
+    *q=0;
+    for(i=0;i<sizeof(info.hw_cap)/4;i++)
+    {
+        p+=sprintf(p,"%08x:",info.hw_cap[i]);
+        if(info.hw_cap[i])
+           q=p;
+    }
+    if(q>cpu_cap)
+        *(q-1)=0;
+
+    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:i,s:s}",
                          "threads_per_core", info.threads_per_core,
                          "cores_per_socket", info.cores_per_socket,
                          "sockets_per_node", info.sockets_per_node,
                          "nr_nodes",         info.nr_nodes,
                          "total_pages",      info.total_pages,
                          "free_pages",       info.free_pages,
-                         "cpu_khz",          info.cpu_khz);
-}
+                         "cpu_khz",          info.cpu_khz,
+                         "hw_caps",          cpu_cap);
+}
+
+static PyObject *pyxc_xeninfo(PyObject *self,
+                              PyObject *args,
+                              PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+    xen_extraversion_t xen_extra;
+    xen_compile_info_t xen_cc;
+    xen_changeset_info_t xen_chgset;
+    xen_capabilities_info_t xen_caps;
+    xen_parameters_info_t xen_parms;
+    long xen_version;
+    char str[128];
+
+    xen_version = xc_version(xc->xc_handle, XENVER_version, NULL);
+
+    if ( xc_version(xc->xc_handle, XENVER_extraversion, &xen_extra) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    if ( xc_version(xc->xc_handle, XENVER_compile_info, &xen_cc) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    if ( xc_version(xc->xc_handle, XENVER_changeset, &xen_chgset) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    if ( xc_version(xc->xc_handle, XENVER_capabilities, &xen_caps) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    if ( xc_version(xc->xc_handle, XENVER_parameters, &xen_parms) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    sprintf(str,"virt_start=0x%lx",xen_parms.virt_start);
+
+    return Py_BuildValue("{s:i,s:i,s:s,s:s,s:s,s:s,s:s,s:s,s:s,s:s}",
+                         "xen_major", xen_version >> 16,
+                         "xen_minor", (xen_version & 0xffff),
+                         "xen_extra", xen_extra,
+                         "xen_caps",  xen_caps.caps,
+                         "xen_params", str,
+                         "xen_changeset", xen_chgset,
+                         "cc_compiler", xen_cc.compiler,
+                         "cc_compile_by", xen_cc.compile_by,
+                         "cc_compile_domain", xen_cc.compile_domain,
+                         "cc_compile_date", xen_cc.compile_date);
+}
+
 
 static PyObject *pyxc_sedf_domain_set(PyObject *self,
                                          PyObject *args,
@@ -800,14 +867,21 @@
 
     u32 dom;
     unsigned long mem_kb;
-
-    static char *kwd_list[] = { "dom", "mem_kb", NULL };
-
-    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "ii", kwd_list, 
-                                      &dom, &mem_kb) )
-        return NULL;
-
-    if ( xc_domain_memory_increase_reservation(xc->xc_handle, dom, mem_kb) )
+    unsigned int extent_order = 0 , address_bits = 0;
+    unsigned long nr_extents;
+
+    static char *kwd_list[] = { "dom", "mem_kb", "extent_order", 
"address_bits", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "il|ii", kwd_list, 
+                                      &dom, &mem_kb, &extent_order, 
&address_bits) )
+        return NULL;
+
+    /* round down to nearest power of 2. Assume callers using extent_order>0
+       know what they are doing */
+    nr_extents = (mem_kb / (XC_PAGE_SIZE/1024)) >> extent_order;
+    if ( xc_domain_memory_increase_reservation(xc->xc_handle, dom, 
+                                              nr_extents, extent_order, 
+                                              address_bits, NULL) )
         return PyErr_SetFromErrno(xc_error);
     
     Py_INCREF(zero);
@@ -1081,6 +1155,13 @@
       "Returns [dict]: information about the hardware"
       "        [None]: on failure.\n" },
 
+    { "xeninfo",
+      (PyCFunction)pyxc_xeninfo,
+      METH_VARARGS, "\n"
+      "Get information about the Xen host\n"
+      "Returns [dict]: information about Xen"
+      "        [None]: on failure.\n" },
+
     { "shadow_control", 
       (PyCFunction)pyxc_shadow_control, 
       METH_VARARGS | METH_KEYWORDS, "\n"
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/lowlevel/xs/xs.c Fri Sep  9 16:30:54 2005
@@ -15,6 +15,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
  * Copyright (C) 2005 Mike Wray Hewlett-Packard
+ * Copyright (C) 2005 Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>
  *
  */
 
@@ -45,6 +46,7 @@
 typedef struct XsHandle {
     PyObject_HEAD;
     struct xs_handle *xh;
+    PyObject *watches;
 } XsHandle;
 
 static inline struct xs_handle *xshandle(PyObject *self)
@@ -87,19 +89,21 @@
     PyObject *val = NULL;
 
     if (!xh)
-       goto exit;
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
                                      &path))
         goto exit;
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_read(xh, path, &xsval_n);
-    if (!xsval) {
-        val = pyvalue_int(0);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
         goto exit;
     }
     val = PyString_FromStringAndSize(xsval, xsval_n);
  exit:
     if (xsval)
-       free(xsval);
+        free(xsval);
     return val;
 }
 
@@ -110,7 +114,7 @@
        " create [int]    : create flag, default 0.\n"          \
        " excl   [int]    : exclusive flag, default 0.\n"       \
        "\n"                                                    \
-       "Returns: [int] 0 on success.\n"                        \
+       "Returns None on success.\n"                            \
        "Raises RuntimeError on error.\n"                       \
        "\n"
 
@@ -130,16 +134,23 @@
     int xsval = 0;
 
     if (!xh)
-       goto exit;
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
                                      &path, &data, &data_n, &create, &excl))
         goto exit;
     if (create)
-       flags |= O_CREAT;
+        flags |= O_CREAT;
     if (excl)
-       flags |= O_EXCL;
+        flags |= O_EXCL;
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_write(xh, path, data, data_n, flags);
-    val = pyvalue_int(xsval);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     return val;
 }
@@ -165,12 +176,14 @@
     int i;
 
     if (!xh)
-       goto exit;
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
         goto exit;
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_directory(xh, path, &xsval_n);
-    if (!xsval) {
-        val = pyvalue_int(0);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
         goto exit;
     }
     val = PyList_New(xsval_n);
@@ -184,7 +197,7 @@
        "Make a directory.\n"                                   \
        " path [string]: path to directory to create.\n"        \
        "\n"                                                    \
-       "Returns: [int] 0 on success.\n"                        \
+       "Returns None on success.\n"                            \
        "Raises RuntimeError on error.\n"                       \
        "\n"
 
@@ -199,11 +212,18 @@
     int xsval = 0;
 
     if (!xh)
-       goto exit;
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
         goto exit;
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_mkdir(xh, path);
-    val = pyvalue_int(xsval);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     return val;
 }
@@ -212,7 +232,7 @@
        "Remove a path.\n"                      \
        " path [string] : path to remove\n"     \
        "\n"                                    \
-       "Returns: [int] 0 on success.\n"        \
+       "Returns None on success.\n"            \
        "Raises RuntimeError on error.\n"       \
        "\n"
 
@@ -227,11 +247,18 @@
     int xsval = 0;
 
     if (!xh)
-       goto exit;
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
         goto exit;
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_rm(xh, path);
-    val = pyvalue_int(xsval);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     return val;
 }
@@ -245,7 +272,7 @@
        "\n"
 
 static PyObject *xspy_get_permissions(PyObject *self, PyObject *args,
-                                     PyObject *kwds)
+                                      PyObject *kwds)
 {
     static char *kwd_spec[] = { "path", NULL };
     static char *arg_spec = "s|";
@@ -258,10 +285,12 @@
     int i;
 
     if (!xh)
-       goto exit;
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
         goto exit;
+    Py_BEGIN_ALLOW_THREADS
     perms = xs_get_permissions(xh, path, &perms_n);
+    Py_END_ALLOW_THREADS
     if (!perms) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
         goto exit;
@@ -283,12 +312,12 @@
        " path  [string] : xenstore path.\n"    \
        " perms          : permissions.\n"      \
        "\n"                                    \
-       "Returns: [int] 0 on success.\n"        \
+       "Returns None on success.\n"            \
        "Raises RuntimeError on error.\n"       \
        "\n"
 
 static PyObject *xspy_set_permissions(PyObject *self, PyObject *args,
-                                     PyObject *kwds)
+                                      PyObject *kwds)
 {
     static char *kwd_spec[] = { "path", "perms", NULL };
     static char *arg_spec = "sO";
@@ -321,7 +350,7 @@
     }
     tuple0 = PyTuple_New(0);
     if (!tuple0)
-       goto exit;
+        goto exit;
     for (i = 0; i < xsperms_n; i++) {
         /* Domain the permissions apply to. */
         int dom = 0;
@@ -329,20 +358,27 @@
         int p_read = 0, p_write = 0;
         PyObject *p = PyList_GetItem(perms, i);
         if (!PyArg_ParseTupleAndKeywords(tuple0, p, perm_spec, perm_names,
-                                        &dom, &p_read, &p_write))
+                                         &dom, &p_read, &p_write))
             goto exit;
         xsperms[i].id = dom;
         if (p_read)
-           xsperms[i].perms |= XS_PERM_READ;
+            xsperms[i].perms |= XS_PERM_READ;
         if (p_write)
-           xsperms[i].perms |= XS_PERM_WRITE;
-    }
+            xsperms[i].perms |= XS_PERM_WRITE;
+    }
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_set_permissions(xh, path, xsperms, xsperms_n);
-    val = pyvalue_int(xsval);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     Py_XDECREF(tuple0);
     if (xsperms)
-       free(xsperms);
+        free(xsperms);
     return val;
 }
 
@@ -351,28 +387,53 @@
        " path     [string] : xenstore path.\n"                         \
        " token    [string] : returned in watch notification.\n"        \
        "\n"                                                            \
-       "Returns: [int] 0 on success.\n"                                \
+       "Returns None on success.\n"                                    \
        "Raises RuntimeError on error.\n"                               \
        "\n"
 
+/* Each 10 bits takes ~ 3 digits, plus one, plus one for nul terminator. */
+#define MAX_STRLEN(x) ((sizeof(x) * CHAR_BIT + CHAR_BIT-1) / 10 * 3 + 2)
+
 static PyObject *xspy_watch(PyObject *self, PyObject *args, PyObject *kwds)
 {
     static char *kwd_spec[] = { "path", "token", NULL };
-    static char *arg_spec = "s|is";
-    char *path = NULL;
-    char *token = "";
-
-    struct xs_handle *xh = xshandle(self);
-    PyObject *val = NULL;
-    int xsval = 0;
-
-    if (!xh)
-       goto exit;
+    static char *arg_spec = "sO";
+    char *path = NULL;
+    PyObject *token;
+    char token_str[MAX_STRLEN(unsigned long) + 1];
+    int i;
+
+    XsHandle *xsh = (XsHandle *)self;
+    struct xs_handle *xh = xshandle(self);
+    PyObject *val = NULL;
+    int xsval = 0;
+
+    if (!xh)
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, 
                                      &path, &token))
         goto exit;
-    xsval = xs_watch(xh, path, token);
-    val = pyvalue_int(xsval);
+    Py_INCREF(token);
+    sprintf(token_str, "%li", (unsigned long)token);
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_watch(xh, path, token_str);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        Py_DECREF(token);
+        goto exit;
+    }
+
+    for (i = 0; i < PyList_Size(xsh->watches); i++) {
+        if (PyList_GetItem(xsh->watches, i) == Py_None) {
+            PyList_SetItem(xsh->watches, i, token);
+            break;
+        }
+    }
+    if (i == PyList_Size(xsh->watches))
+        PyList_Append(xsh->watches, token);
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     return val;
 }
@@ -388,29 +449,46 @@
        "\n"
 
 static PyObject *xspy_read_watch(PyObject *self, PyObject *args,
-                                PyObject *kwds)
+                                 PyObject *kwds)
 {
     static char *kwd_spec[] = { NULL };
     static char *arg_spec = "";
 
+    XsHandle *xsh = (XsHandle *)self;
     struct xs_handle *xh = xshandle(self);
     PyObject *val = NULL;
     char **xsval = NULL;
-
-    if (!xh)
-       goto exit;
+    PyObject *token;
+    int i;
+
+    if (!xh)
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec))
         goto exit;
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_read_watch(xh);
-    if (!xsval) {
-            val = PyErr_SetFromErrno(PyExc_RuntimeError);
-            goto exit;
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    if (sscanf(xsval[1], "%li", (unsigned long *)&token) != 1) {
+        PyErr_SetString(PyExc_RuntimeError, "invalid token");
+        goto exit;
+    }
+    for (i = 0; i < PyList_Size(xsh->watches); i++) {
+        if (token == PyList_GetItem(xsh->watches, i))
+            break;
+    }
+    if (i == PyList_Size(xsh->watches)) {
+        PyErr_SetString(PyExc_RuntimeError, "invalid token");
+        goto exit;
     }
     /* Create tuple (path, token). */
-    val = Py_BuildValue("(ss)", xsval[0], xsval[1]);
+    val = Py_BuildValue("(sO)", xsval[0], token);
  exit:
     if (xsval)
-       free(xsval);
+        free(xsval);
     return val;
 }
 
@@ -418,27 +496,36 @@
        "Acknowledge a watch notification that has been read.\n"        \
        " token [string] : from the watch notification\n"               \
        "\n"                                                            \
-       "Returns: [int] 0 on success.\n"                                \
+       "Returns None on success.\n"                                    \
        "Raises RuntimeError on error.\n"                               \
        "\n"
 
 static PyObject *xspy_acknowledge_watch(PyObject *self, PyObject *args,
-                                       PyObject *kwds)
+                                        PyObject *kwds)
 {
     static char *kwd_spec[] = { "token", NULL };
-    static char *arg_spec = "s";
-    char *token;
-
-    struct xs_handle *xh = xshandle(self);
-    PyObject *val = NULL;
-    int xsval = 0;
-
-    if (!xh)
-       goto exit;
+    static char *arg_spec = "O";
+    PyObject *token;
+    char token_str[MAX_STRLEN(unsigned long) + 1];
+
+    struct xs_handle *xh = xshandle(self);
+    PyObject *val = NULL;
+    int xsval = 0;
+
+    if (!xh)
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &token))
         goto exit;
-    xsval = xs_acknowledge_watch(xh, token);
-    val = pyvalue_int(xsval);
+    sprintf(token_str, "%li", (unsigned long)token);
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_acknowledge_watch(xh, token_str);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     return val;
 }
@@ -448,28 +535,46 @@
        " path  [string] : xenstore path.\n"            \
        " token [string] : token from the watch.\n"     \
        "\n"                                            \
-       "Returns: [int] 0 on success.\n"                \
+       "Returns None on success.\n"                    \
        "Raises RuntimeError on error.\n"               \
        "\n"
 
 static PyObject *xspy_unwatch(PyObject *self, PyObject *args, PyObject *kwds)
 {
     static char *kwd_spec[] = { "path", "token", NULL };
-    static char *arg_spec = "s|s";
-    char *path = NULL;
-    char *token = "";
-
-    struct xs_handle *xh = xshandle(self);
-    PyObject *val = NULL;
-    int xsval = 0;
-
-    if (!xh)
-       goto exit;
+    static char *arg_spec = "sO";
+    char *path = NULL;
+    PyObject *token;
+    char token_str[MAX_STRLEN(unsigned long) + 1];
+    int i;
+
+    XsHandle *xsh = (XsHandle *)self;
+    struct xs_handle *xh = xshandle(self);
+    PyObject *val = NULL;
+    int xsval = 0;
+
+    if (!xh)
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path,
-                                    &token))
-        goto exit;
-    xsval = xs_unwatch(xh, path, token);
-    val = pyvalue_int(xsval);
+                                     &token))
+        goto exit;
+    sprintf(token_str, "%li", (unsigned long)token);
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_unwatch(xh, path, token_str);
+    Py_END_ALLOW_THREADS
+    if (!xsval)
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+    else {
+        Py_INCREF(Py_None);
+        val = Py_None;
+    }
+    for (i = 0; i < PyList_Size(xsh->watches); i++) {
+        if (token == PyList_GetItem(xsh->watches, i)) {
+            Py_INCREF(Py_None);
+            PyList_SetItem(xsh->watches, i, Py_None);
+            break;
+        }
+    }
  exit:
     return val;
 }
@@ -479,12 +584,12 @@
        "Only one transaction can be active at a time.\n"       \
        " path [string]: xenstore path.\n"                      \
        "\n"                                                    \
-       "Returns: [int] 0 on success.\n"                        \
+       "Returns None on success.\n"                            \
        "Raises RuntimeError on error.\n"                       \
        "\n"
 
 static PyObject *xspy_transaction_start(PyObject *self, PyObject *args,
-                                       PyObject *kwds)
+                                        PyObject *kwds)
 {
     static char *kwd_spec[] = { "path", NULL };
     static char *arg_spec = "s|";
@@ -495,11 +600,18 @@
     int xsval = 0;
 
     if (!xh)
-       goto exit;
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
         goto exit;
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_transaction_start(xh, path);
-    val = pyvalue_int(xsval);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     return val;
 }
@@ -509,12 +621,12 @@
        "Attempts to commit the transaction unless abort is true.\n"    \
        " abort [int]: abort flag (default 0).\n"                       \
        "\n"                                                            \
-       "Returns: [int] 0 on success.\n"                                \
+       "Returns None on success.\n"                                    \
        "Raises RuntimeError on error.\n"                               \
        "\n"
 
 static PyObject *xspy_transaction_end(PyObject *self, PyObject *args,
-                                     PyObject *kwds)
+                                      PyObject *kwds)
 {
     static char *kwd_spec[] = { "abort", NULL };
     static char *arg_spec = "|i";
@@ -525,11 +637,18 @@
     int xsval = 0;
 
     if (!xh)
-       goto exit;
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &abort))
         goto exit;
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_transaction_end(xh, abort);
-    val = pyvalue_int(xsval);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     return val;
 }
@@ -541,12 +660,12 @@
        " port [int]   : port the domain is using for xenstore\n"       \
        " path [string]: path to the domain's data in xenstore\n"       \
        "\n"                                                            \
-       "Returns: [int] 0 on success.\n"                                \
+       "Returns None on success.\n"                                    \
        "Raises RuntimeError on error.\n"                               \
        "\n"
 
 static PyObject *xspy_introduce_domain(PyObject *self, PyObject *args,
-                                      PyObject *kwds)
+                                       PyObject *kwds)
 {
     static char *kwd_spec[] = { "dom", "page", "port", "path", NULL };
     static char *arg_spec = "iiis|";
@@ -560,12 +679,19 @@
     int xsval = 0;
 
     if (!xh)
-       goto exit;
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
                                      &dom, &page, &port, &path))
         goto exit;
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_introduce_domain(xh, dom, page, port, path);
-    val = pyvalue_int(xsval);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     return val;
 }
@@ -575,12 +701,12 @@
        "Unless this is done the domain will not be released.\n"        \
        " dom [int]: domain id\n"                                       \
        "\n"                                                            \
-       "Returns: [int] 0 on success.\n"                                \
+       "Returns None on success.\n"                                    \
        "Raises RuntimeError on error.\n"                               \
        "\n"
 
 static PyObject *xspy_release_domain(PyObject *self, PyObject *args,
-                                    PyObject *kwds)
+                                     PyObject *kwds)
 {
     static char *kwd_spec[] = { "dom", NULL };
     static char *arg_spec = "i|";
@@ -591,12 +717,19 @@
     int xsval = 0;
 
     if (!xh)
-       goto exit;
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
                                      &dom))
         goto exit;
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_release_domain(xh, dom);
-    val = pyvalue_int(xsval);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     return val;
 }
@@ -604,7 +737,7 @@
 #define xspy_close_doc "\n"                    \
        "Close the connection to xenstore.\n"   \
        "\n"                                    \
-       "Returns: [int] 0 on success.\n"        \
+       "Returns None on success.\n"            \
        "Raises RuntimeError on error.\n"       \
        "\n"
 
@@ -612,18 +745,25 @@
 {
     static char *kwd_spec[] = { NULL };
     static char *arg_spec = "";
-
-    struct xs_handle *xh = xshandle(self);
-    PyObject *val = NULL;
-    int xsval = 1;
-
-    if (!xh)
-       goto exit;
+    int i;
+
+    XsHandle *xsh = (XsHandle *)self;
+    struct xs_handle *xh = xshandle(self);
+    PyObject *val = NULL;
+
+    if (!xh)
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec))
         goto exit;
+    for (i = 0; i < PyList_Size(xsh->watches); i++) {
+        /* TODO: xs_unwatch watches */
+        Py_INCREF(Py_None);
+        PyList_SetItem(xsh->watches, i, Py_None);
+    }
     xs_daemon_close(xh);
-    ((XsHandle*)self)->xh = NULL;
-    val = pyvalue_int(xsval);
+    xsh->xh = NULL;
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     return val;
 }
@@ -631,7 +771,7 @@
 #define xspy_shutdown_doc "\n"                 \
        "Shutdown the xenstore daemon.\n"       \
        "\n"                                    \
-       "Returns: [int] 0 on success.\n"        \
+       "Returns None on success.\n"            \
        "Raises RuntimeError on error.\n"       \
        "\n"
 
@@ -645,11 +785,18 @@
     int xsval = 0;
 
     if (!xh)
-       goto exit;
+        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec))
         goto exit;
+    Py_BEGIN_ALLOW_THREADS
     xsval = xs_shutdown(xh);
-    val = pyvalue_int(xsval);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    Py_INCREF(Py_None);
+    val = Py_None;
  exit:
     return val;
 }
@@ -750,20 +897,25 @@
 
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
                                      &readonly))
-        goto exit;
+        return NULL;
 
     xsh = PyObject_New(XsHandle, &xshandle_type);
     if (!xsh)
-       goto exit;
+        return NULL;
+    xsh->watches = PyList_New(0);
+    if (!xsh->watches)
+        goto exit;
     xsh->xh = (readonly ? xs_daemon_open_readonly() : xs_daemon_open());
     if (!xsh->xh) {
-        PyObject_Del(xsh);
-        val = pyvalue_int(0);
+        Py_DECREF(xsh->watches);
+        PyErr_SetFromErrno(PyExc_RuntimeError);
         goto exit;
     }
     val = (PyObject *)xsh;
- exit:
-    return val;
+    return val;
+ exit:
+    PyObject_Del(xsh);
+    return NULL;
 }
 
 static PyMethodDef xs_methods[] = {
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/XendCheckpoint.py   Fri Sep  9 16:30:54 2005
@@ -34,7 +34,7 @@
         raise XendError(errmsg)
     return buf
 
-def save(xd, fd, dominfo):
+def save(xd, fd, dominfo, live):
     write_exact(fd, SIGNATURE, "could not write guest state file: signature")
 
     config = sxp.to_string(dominfo.sxpr())
@@ -42,8 +42,13 @@
                 "could not write guest state file: config len")
     write_exact(fd, config, "could not write guest state file: config")
 
+    # xc_save takes three customization parameters: maxit, max_f, and flags
+    # the last controls whether or not save is 'live', while the first two
+    # further customize behaviour when 'live' save is enabled. Passing "0"
+    # simply uses the defaults compiled into libxenguest; see the comments 
+    # and/or code in xc_linux_save() for more information. 
     cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd),
-           str(dominfo.id)]
+           str(dominfo.id), "0", "0", str(int(live)) ]
     log.info("[xc_save] " + join(cmd))
     child = xPopen3(cmd, True, -1, [fd, xc.handle()])
     
@@ -51,7 +56,7 @@
     p = select.poll()
     p.register(child.fromchild.fileno())
     p.register(child.childerr.fileno())
-    while True:
+    while True: 
         r = p.poll()
         for (fd, event) in r:
             if not event & select.POLLIN:
@@ -65,15 +70,16 @@
                 if l.rstrip() == "suspend":
                     log.info("suspending %d" % dominfo.id)
                     xd.domain_shutdown(dominfo.id, reason='suspend')
+                    dominfo.state_wait("suspended")
+                    log.info("suspend %d done" % dominfo.id)
                     if dominfo.store_channel:
                         try:
                             dominfo.db.releaseDomain(dominfo.id)
                         except Exception, ex:
-                            log.warning("error in domain release on xenstore: 
%s",
-                                        ex)
+                            log.warning(
+                                "error in domain release on xenstore: %s",
+                                ex)
                             pass
-                    dominfo.state_wait("suspended")
-                    log.info("suspend %d done" % dominfo.id)
                     child.tochild.write("done\n")
                     child.tochild.flush()
         if filter(lambda (fd, event): event & select.POLLHUP, r):
@@ -121,12 +127,18 @@
             "not a valid guest state file: pfn count out of range")
 
     if dominfo.store_channel:
-        evtchn = dominfo.store_channel.port2
+        store_evtchn = dominfo.store_channel.port2
     else:
-        evtchn = 0
+        store_evtchn = 0
+
+    if dominfo.console_channel:
+        console_evtchn = dominfo.console_channel.port2
+    else:
+        console_evtchn = 0
 
     cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
-           str(dominfo.id), str(nr_pfns), str(evtchn)]
+           str(dominfo.id), str(nr_pfns),
+           str(store_evtchn), str(console_evtchn)]
     log.info("[xc_restore] " + join(cmd))
     child = xPopen3(cmd, True, -1, [fd, xc.handle()])
     child.tochild.close()
@@ -147,6 +159,7 @@
             if fd == child.fromchild.fileno():
                 l = child.fromchild.readline()
                 while l:
+                    log.info(l.rstrip())
                     m = re.match(r"^(store-mfn) (\d+)\n$", l)
                     if m:
                         if dominfo.store_channel:
@@ -156,7 +169,10 @@
                                                            dominfo.store_mfn,
                                                            
dominfo.store_channel)
                             dominfo.exportToDB(save=True, sync=True)
-                    log.info(l.rstrip())
+                    m = re.match(r"^(console-mfn) (\d+)\n$", l)
+                    if m:
+                        dominfo.console_mfn = int(m.group(2))
+                        dominfo.exportToDB(save=True, sync=True)
                     try:
                         l = child.fromchild.readline()
                     except:
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/XendDomain.py       Fri Sep  9 16:30:54 2005
@@ -36,7 +36,6 @@
 from xen.xend.XendError import XendError
 from xen.xend.XendLogging import log
 from xen.xend import scheduler
-from xen.xend.server import channel
 from xen.xend.server import relocate
 from xen.xend.uuid import getUuid
 from xen.xend.xenstore import XenNode, DBMap
@@ -67,7 +66,7 @@
         xroot.add_component("xen.xend.XendDomain", self)
         self.domains = XendDomainDict()
         self.dbmap = DBMap(db=XenNode("/domain"))
-        eserver.subscribe('xend.virq', self.onVirq)
+        self.watchReleaseDomain()
         self.initial_refresh()
 
     def list(self):
@@ -75,12 +74,32 @@
 
         @return: domain objects
         """
+        self.refresh()
         return self.domains.values()
-    
-    def onVirq(self, event, val):
-        """Event handler for virq.
-        """
+
+    def list_sorted(self):
+        """Get list of domain objects, sorted by name.
+
+        @return: domain objects
+        """
+        doms = self.list()
+        doms.sort(lambda x, y: cmp(x.name, y.name))
+        return doms
+
+    def list_names(self):
+        """Get list of domain names.
+
+        @return: domain names
+        """
+        doms = self.list_sorted()
+        return map(lambda x: x.name, doms)
+
+    def onReleaseDomain(self):
         self.refresh(cleanup=True)
+
+    def watchReleaseDomain(self):
+        from xen.xend.xenstore.xswatch import xswatch
+        self.releaseDomain = xswatch("@releaseDomain", self.onReleaseDomain)
 
     def xen_domains(self):
         """Get table of domains indexed by id from xc.
@@ -265,24 +284,6 @@
         else:
             self._delete_domain(id)
 
-    def domain_ls(self):
-        """Get list of domain names.
-
-        @return: domain names
-        """
-        self.refresh()
-        doms = self.domains.values()
-        doms.sort(lambda x, y: cmp(x.name, y.name))
-        return map(lambda x: x.name, doms)
-
-    def domain_ls_ids(self):
-        """Get list of domain ids.
-
-        @return: domain names
-        """
-        self.refresh()
-        return self.domains.keys()
-
     def domain_create(self, config):
         """Create a domain from a configuration.
 
@@ -542,7 +543,7 @@
             dominfo.name = "tmp-" + dominfo.name
 
         try:
-            XendCheckpoint.save(self, sock.fileno(), dominfo)
+            XendCheckpoint.save(self, sock.fileno(), dominfo, live)
         except:
             if dst == "localhost":
                 dominfo.name = string.replace(dominfo.name, "tmp-", "", 1)
@@ -563,7 +564,8 @@
 
             fd = os.open(dst, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
 
-            return XendCheckpoint.save(self, fd, dominfo)
+            # For now we don't support 'live checkpoint' 
+            return XendCheckpoint.save(self, fd, dominfo, False)
 
         except OSError, ex:
             raise XendError("can't write guest state file %s: %s" %
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri Sep  9 16:30:54 2005
@@ -34,8 +34,7 @@
 
 from xen.xend.server import controller
 from xen.xend.server import SrvDaemon; xend = SrvDaemon.instance()
-from xen.xend.server import messages
-from xen.xend.server.channel import EventChannel, channelFactory
+from xen.xend.server.channel import EventChannel
 from xen.util.blkif import blkdev_name_to_number, expand_dev_name
 
 from xen.xend import sxp
@@ -47,7 +46,7 @@
 from xen.xend.XendRoot import get_component
 
 from xen.xend.uuid import getUuid
-from xen.xend.xenstore import DBVar
+from xen.xend.xenstore import DBVar, XenNode, DBMap
 
 """Shutdown code for poweroff."""
 DOMAIN_POWEROFF = 0
@@ -231,6 +230,7 @@
         DBVar('start_time',    ty='float'),
         DBVar('state',         ty='str'),
         DBVar('store_mfn',     ty='long'),
+        DBVar('console_mfn',   ty='long', path="console/ring-ref"),
         DBVar('restart_mode',  ty='str'),
         DBVar('restart_state', ty='str'),
         DBVar('restart_time',  ty='float'),
@@ -257,15 +257,17 @@
 
         self.target = None
 
-        self.channel = None
         self.store_channel = None
         self.store_mfn = None
+        self.console_channel = None
+        self.console_mfn = None
         self.controllers = {}
         
         self.info = None
         self.blkif_backend = False
         self.netif_backend = False
         self.netif_idx = 0
+        self.tpmif_backend = False
         
         #todo: state: running, suspended
         self.state = STATE_VM_OK
@@ -292,18 +294,18 @@
         self.db.saveDB(save=save, sync=sync)
 
     def exportToDB(self, save=False, sync=False):
-        if self.channel:
-            self.channel.saveToDB(self.db.addChild("channel"), save=save)
         if self.store_channel:
             self.store_channel.saveToDB(self.db.addChild("store_channel"),
                                         save=save)
+        if self.console_channel:
+            self.db['console/port'] = "%i" % self.console_channel.port1
         if self.image:
             self.image.exportToDB(save=save, sync=sync)
         self.db.exportToDB(self, fields=self.__exports__, save=save, sync=sync)
 
     def importFromDB(self):
         self.db.importFromDB(self, fields=self.__exports__)
-        self.store_channel = self.eventChannel("store_channel")
+        self.store_channel = self.eventChannelOld("store_channel")
 
     def setdom(self, dom):
         """Set the domain id.
@@ -323,16 +325,16 @@
     def getName(self):
         return self.name
 
-    def getChannel(self):
-        return self.channel
-
     def getStoreChannel(self):
         return self.store_channel
 
-    def update(self, info):
+    def getConsoleChannel(self):
+        return self.console_channel
+
+    def update(self, info=None):
         """Update with  info from xc.domain_getinfo().
         """
-        self.info = info
+        self.info = info or dom_get(self.id)
         self.memory = self.info['mem_kb'] / 1024
         self.ssidref = self.info['ssidref']
         self.target = self.info['mem_kb'] * 1024
@@ -384,6 +386,8 @@
         return ctrl
 
     def createDevice(self, type, devconfig, change=False):
+        if self.recreate:
+            return
         if type == 'vbd':
             typedev = sxp.child_value(devconfig, 'dev')
             if re.match('^ioemu:', typedev):
@@ -420,6 +424,15 @@
             return
 
         if type == 'vif':
+            from xen.xend import XendRoot
+            xroot = XendRoot.instance()
+
+            def _get_config_ipaddr(config):
+                val = []
+                for ipaddr in sxp.children(config, elt='ip'):
+                    val.append(sxp.child0(ipaddr))
+                return val
+
             backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
 
             log.error(devconfig)
@@ -427,6 +440,14 @@
             devnum = self.netif_idx
             self.netif_idx += 1
 
+            script = sxp.child_value(devconfig, 'script',
+                                     xroot.get_vif_script())
+            script = os.path.join(xroot.network_script_dir, script)
+            bridge = sxp.child_value(devconfig, 'bridge',
+                                     xroot.get_vif_bridge())
+            mac = sxp.child_value(devconfig, 'mac')
+            ipaddr = _get_config_ipaddr(devconfig)
+
             # create backend db
             backdb = backdom.db.addChild("/backend/%s/%s/%d" %
                                          (type, self.uuid, devnum))
@@ -434,6 +455,12 @@
             # create frontend db
             db = self.db.addChild("/device/%s/%d" % (type, devnum))
             
+            backdb['script'] = script
+            backdb['domain'] = self.name
+            backdb['mac'] = mac
+            backdb['bridge'] = bridge
+            if ipaddr:
+                backdb['ip'] = ' '.join(ipaddr)
             backdb['frontend'] = db.getPath()
             backdb['frontend-id'] = "%i" % self.id
             backdb['handle'] = "%i" % devnum
@@ -442,13 +469,37 @@
             db['backend'] = backdb.getPath()
             db['backend-id'] = "%i" % backdom.id
             db['handle'] = "%i" % devnum
-            log.error(sxp.child_value(devconfig, 'mac'))
-            db['mac'] = sxp.child_value(devconfig, 'mac')
+            db['mac'] = mac
 
             db.saveDB(save=True)
 
             return
         
+        if type == 'vtpm':
+            backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
+
+            devnum = int(sxp.child_value(devconfig, 'instance', '0'))
+            log.error("The domain has a TPM with instance %d." % devnum)
+
+            # create backend db
+            backdb = backdom.db.addChild("/backend/%s/%s/%d" %
+                                         (type, self.uuid, devnum))
+            # create frontend db
+            db = self.db.addChild("/device/%s/%d" % (type, devnum))
+
+            backdb['frontend'] = db.getPath()
+            backdb['frontend-id'] = "%i" % self.id
+            backdb['instance'] = sxp.child_value(devconfig, 'instance', '0')
+            backdb.saveDB(save=True)
+
+            db['handle'] = "%i" % devnum
+            db['backend'] = backdb.getPath()
+            db['backend-id'] = "%i" % int(sxp.child_value(devconfig,
+                                                          'backend', '0'))
+            db.saveDB(save=True)
+
+            return
+
         ctrl = self.findDeviceController(type)
         return ctrl.createDevice(devconfig, recreate=self.recreate,
                                  change=change)
@@ -512,12 +563,18 @@
             sxpr.append(['up_time', str(up_time) ])
             sxpr.append(['start_time', str(self.start_time) ])
 
-        if self.channel:
-            sxpr.append(self.channel.sxpr())
         if self.store_channel:
             sxpr.append(self.store_channel.sxpr())
         if self.store_mfn:
             sxpr.append(['store_mfn', self.store_mfn])
+        if self.console_channel:
+            sxpr.append(['console_channel', self.console_channel.sxpr()])
+        if self.console_mfn:
+            sxpr.append(['console_mfn', self.console_mfn])
+# already in (devices)
+#        console = self.getConsole()
+#        if console:
+#            sxpr.append(console.sxpr())
 
         if self.restart_count:
             sxpr.append(['restart_count', self.restart_count])
@@ -695,12 +752,6 @@
         """
         self.state = STATE_VM_TERMINATED
         self.release_devices()
-        if self.channel:
-            try:
-                self.channel.close()
-                self.channel = None
-            except:
-                pass
         if self.store_channel:
             try:
                 self.store_channel.close()
@@ -712,6 +763,13 @@
             except Exception, ex:
                 log.warning("error in domain release on xenstore: %s", ex)
                 pass
+        if self.console_channel:
+            # notify processes using this cosole?
+            try:
+                self.console_channel.close()
+                self.console_channel = None
+            except:
+                pass
         if self.image:
             try:
                 self.device_model_pid = 0
@@ -723,8 +781,8 @@
     def destroy(self):
         """Clenup vm and destroy domain.
         """
+        self.destroy_domain()
         self.cleanup()
-        self.destroy_domain()
         self.saveToDB()
         return 0
 
@@ -755,6 +813,11 @@
                 for dev in typedb.keys():
                     typedb[dev].delete()
                 typedb.saveDB(save=True)
+            if type == 'vtpm':
+                typedb = ddb.addChild(type)
+                for dev in typedb.keys():
+                    typedb[dev].delete()
+                typedb.saveDB(save=True)
 
     def show(self):
         """Print virtual machine info.
@@ -780,21 +843,7 @@
                   id, self.name, self.memory)
         self.setdom(id)
 
-    def openChannel(self, key, local, remote):
-        """Create a control channel to the domain.
-        If saved info is available recreate the channel.
-        
-        @param key db key for the saved data (if any)
-        @param local default local port
-        @param remote default remote port
-        """
-        db = self.db.addChild(key)
-        chan = channelFactory().restoreFromDB(db, self.id, local, remote)
-        #todo: save here?
-        #chan.saveToDB(db)
-        return chan
-
-    def eventChannel(self, key):
+    def eventChannelOld(self, key):
         """Create an event channel to the domain.
         If saved info is available recreate the channel.
         
@@ -803,11 +852,27 @@
         db = self.db.addChild(key)
         return EventChannel.restoreFromDB(db, 0, self.id)
         
+    def eventChannel(self, path=None, key=None):
+        """Create an event channel to the domain.
+        
+        @param path under which port is stored in db
+        """
+        port = 0
+        try:
+            if path and key:
+                if path:
+                    db = self.db.addChild(path)
+                else:
+                    db = self.db
+                port = int(db[key].getData())
+        except: pass
+        return EventChannel.interdomain(0, self.id, port1=port, port2=0)
+        
     def create_channel(self):
         """Create the channels to the domain.
         """
-        self.channel = self.openChannel("channel", 0, 1)
-        self.store_channel = self.eventChannel("store_channel")
+        self.store_channel = self.eventChannelOld("store_channel")
+        self.console_channel = self.eventChannel("console", "port")
 
     def create_configured_devices(self):
         devices = sxp.children(self.config, 'device')
@@ -950,6 +1015,7 @@
 
         """
         try:
+            self.clear_shutdown()
             self.state = STATE_VM_OK
             self.shutdown_pending = None
             self.restart_check()
@@ -993,6 +1059,8 @@
                 self.netif_backend = True
             elif name == 'usbif':
                 self.usbif_backend = True
+            elif name == 'tpmif':
+                self.tpmif_backend = True
             else:
                 raise VmError('invalid backend type:' + str(name))
 
@@ -1084,7 +1152,7 @@
 
     def dom0_init_store(self):
         if not self.store_channel:
-            self.store_channel = self.eventChannel("store_channel")
+            self.store_channel = self.eventChannelOld("store_channel")
         self.store_mfn = xc.init_store(self.store_channel.port2)
         if self.store_mfn >= 0:
             self.db.introduceDomain(self.id, self.store_mfn,
@@ -1158,6 +1226,10 @@
 controller.addDevControllerClass("vif", netif.NetifController)
 add_device_handler("vif", "vif")
 
+from server import tpmif
+controller.addDevControllerClass("vtpm", tpmif.TPMifController)
+add_device_handler("vtpm", "vtpm")
+
 from server import pciif
 controller.addDevControllerClass("pci", pciif.PciController)
 add_device_handler("pci", "pci")
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/XendNode.py Fri Sep  9 16:30:54 2005
@@ -46,7 +46,7 @@
         return self.xc.bvtsched_global_get()
     
     def info(self):
-        return self.nodeinfo() + self.physinfo()
+        return self.nodeinfo() + self.physinfo() + self.xeninfo()
 
     def nodeinfo(self):
         (sys, host, rel, ver, mch) = os.uname()
@@ -58,14 +58,29 @@
 
     def physinfo(self):
         pinfo = self.xc.physinfo()
-        info = [['cores_per_socket', pinfo['cores_per_socket']],
+        info = [['nr_cpus',          
pinfo['nr_nodes']*pinfo['sockets_per_node']*pinfo['cores_per_socket']*pinfo['threads_per_core']],
+                ['nr_nodes',         pinfo['nr_nodes']],
+                ['sockets_per_node', pinfo['sockets_per_node']],
+                ['cores_per_socket', pinfo['cores_per_socket']],
                 ['threads_per_core', pinfo['threads_per_core']],
-                ['cpu_mhz', pinfo['cpu_khz']/1000],
-                ['memory', pinfo['total_pages']/256],
-                ['free_memory', pinfo['free_pages']/256]]
+                ['cpu_mhz',          pinfo['cpu_khz']/1000],
+                ['hw_caps',          pinfo['hw_caps']],
+                ['memory',           pinfo['total_pages']/256],
+                ['free_memory',      pinfo['free_pages']/256]]
         return info
         
-        
+    def xeninfo(self):
+        xinfo = self.xc.xeninfo()
+        return [['xen_major', xinfo['xen_major']],
+                ['xen_minor', xinfo['xen_minor']],
+                ['xen_extra', xinfo['xen_extra']],
+                ['xen_caps',  xinfo['xen_caps']],
+                ['xen_params',xinfo['xen_params']],
+                ['xen_changeset', xinfo['xen_changeset']],
+                ['cc_compiler', xinfo['cc_compiler']],
+                ['cc_compile_by', xinfo['cc_compile_by']],
+                ['cc_compile_domain', xinfo['cc_compile_domain']],
+                ['cc_compile_date', xinfo['cc_compile_date']]]
 
 def instance():
     global inst
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/image.py    Fri Sep  9 16:30:54 2005
@@ -32,6 +32,9 @@
 """Flag for a net device backend domain."""
 SIF_NET_BE_DOMAIN = (1<<5)
 
+"""Flag for a TPM device backend domain."""
+SIF_TPM_BE_DOMAIN = (1<<7)
+
 class ImageHandler:
     """Abstract base class for image handlers.
 
@@ -156,7 +159,12 @@
         xc.domain_setmaxmem(dom, mem_kb)
 
         try:
-            xc.domain_memory_increase_reservation(dom, mem_kb)
+            # Give the domain some memory below 4GB
+            lmem_kb = 0
+            if lmem_kb > 0:
+                xc.domain_memory_increase_reservation(dom, 
min(lmem_kb,mem_kb), 0, 32)
+            if mem_kb > lmem_kb:
+                xc.domain_memory_increase_reservation(dom, mem_kb-lmem_kb, 0, 
0)
         except:
             xc.domain_destroy(dom)
             raise
@@ -194,6 +202,7 @@
         self.flags = 0
         if self.vm.netif_backend: self.flags |= SIF_NET_BE_DOMAIN
         if self.vm.blkif_backend: self.flags |= SIF_BLK_BE_DOMAIN
+        if self.vm.tpmif_backend: self.flags |= SIF_TPM_BE_DOMAIN
 
         if self.vm.recreate or self.vm.restore:
             return
@@ -238,16 +247,31 @@
             store_evtchn = self.vm.store_channel.port2
         else:
             store_evtchn = 0
+        if self.vm.console_channel:
+            console_evtchn = self.vm.console_channel.port2
+        else:
+            console_evtchn = 0
+
+        log.debug("dom            = %d", self.vm.getDomain())
+        log.debug("image          = %s", self.kernel)
+        log.debug("store_evtchn   = %d", store_evtchn)
+        log.debug("console_evtchn = %d", console_evtchn)
+        log.debug("cmdline        = %s", self.cmdline)
+        log.debug("ramdisk        = %s", self.ramdisk)
+        log.debug("flags          = %d", self.flags)
+        log.debug("vcpus          = %d", self.vm.vcpus)
+
         ret = xc.linux_build(dom            = self.vm.getDomain(),
                              image          = self.kernel,
-                             control_evtchn = self.vm.channel.getRemotePort(),
                              store_evtchn   = store_evtchn,
+                             console_evtchn = console_evtchn,
                              cmdline        = self.cmdline,
                              ramdisk        = self.ramdisk,
                              flags          = self.flags,
                              vcpus          = self.vm.vcpus)
         if isinstance(ret, dict):
             self.vm.store_mfn = ret.get('store_mfn')
+            self.vm.console_mfn = ret.get('console_mfn')
             return 0
         return ret
 
@@ -349,6 +373,11 @@
                mac = sxp.child_value(vifinfo, 'mac')
                ret.append("-macaddr")
                ret.append("%s" % mac)
+            if name == 'vtpm':
+               vtpminfo = sxp.child(device, 'vtpm')
+               instance = sxp.child_value(vtpminfo, 'instance')
+               ret.append("-instance")
+               ret.append("%s" % instance)
 
        # Handle graphics library related options
        vnc = sxp.child_value(self.vm.config, 'vnc')
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/SrvDaemon.py
--- a/tools/python/xen/xend/server/SrvDaemon.py Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/server/SrvDaemon.py Fri Sep  9 16:30:54 2005
@@ -17,8 +17,6 @@
 import time
 import glob
 
-from xen.lowlevel import xu
-
 from xen.xend import sxp
 from xen.xend import PrettyPrint
 from xen.xend import EventServer; eserver = EventServer.instance()
@@ -27,7 +25,6 @@
 from xen.xend.XendLogging import log
 from xen.xend import XendRoot; xroot = XendRoot.instance()
 
-import channel
 import controller
 import event
 import relocate
@@ -37,12 +34,12 @@
     """The xend daemon.
     """
     def __init__(self):
-        self.channelF = None
         self.shutdown = 0
         self.traceon = 0
         self.tracefile = None
         self.traceindent = 0
-
+        self.child = 0 
+        
     def daemon_pids(self):
         pids = []
         pidex = '(?P<pid>\d+)'
@@ -140,15 +137,12 @@
         else:
             return 0
 
-    def install_child_reaper(self):
-        #signal.signal(signal.SIGCHLD, self.onSIGCHLD)
-        # Ensure that zombie children are automatically reaped.
-        xu.autoreap()
-
     def onSIGCHLD(self, signum, frame):
-        code = 1
-        while code > 0:
-            code = os.waitpid(-1, os.WNOHANG)
+        if self.child > 0: 
+            try: 
+                pid, sts = os.waitpid(self.child, os.WNOHANG)
+            except os.error, ex:
+                pass
 
     def fork_pid(self, pidfile):
         """Fork and write the pid of the child to 'pidfile'.
@@ -156,13 +150,16 @@
         @param pidfile: pid file
         @return: pid of child in parent, 0 in child
         """
-        pid = os.fork()
-        if pid:
+
+        self.child = os.fork()
+
+        if self.child:
             # Parent
             pidfile = open(pidfile, 'w')
-            pidfile.write(str(pid))
+            pidfile.write(str(self.child))
             pidfile.close()
-        return pid
+
+        return self.child
 
     def daemonize(self):
         if not XEND_DAEMONIZE: return
@@ -203,8 +200,7 @@
             # Trying to run an already-running service is a success.
             return 0
 
-        self.install_child_reaper()
-
+        signal.signal(signal.SIGCHLD, self.onSIGCHLD)
         if self.fork_pid(XEND_PID_FILE):
             #Parent. Sleep to give child time to start.
             time.sleep(1)
@@ -298,10 +294,8 @@
         _enforce_dom0_cpus()
         try:
             log.info("Xend Daemon started")
-            self.createFactories()
             event.listenEvent(self)
             relocate.listenRelocation()
-            self.listenChannels()
             servers = SrvServer.create()
             self.daemonize()
             servers.start()
@@ -309,22 +303,10 @@
             print >>sys.stderr, 'Exception starting xend:', ex
             if XEND_DEBUG:
                 traceback.print_exc()
-            log.exception("Exception starting xend")
+            log.exception("Exception starting xend (%s)" % ex)
             self.exit(1)
             
-    def createFactories(self):
-        self.channelF = channel.channelFactory()
-
-    def listenChannels(self):
-        def virqReceived(virq):
-            eserver.inject('xend.virq', virq)
-
-        self.channelF.setVirqHandler(virqReceived)
-        self.channelF.start()
-
     def exit(self, rc=0):
-        if self.channelF:
-            self.channelF.stop()
         # Calling sys.exit() raises a SystemExit exception, which only
         # kills the current thread. Calling os._exit() makes the whole
         # Python process exit immediately. There doesn't seem to be another
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/server/SrvDomain.py Fri Sep  9 16:30:54 2005
@@ -221,6 +221,7 @@
         #
         # if op and op[0] in ['vifs', 'vif', 'vbds', 'vbd', 'mem_target_set']:
         #    return self.perform(req)
+        self.dom.update()
         if self.use_sxp(req):
             req.setHeader("Content-Type", sxp.mime_type)
             sxp.show(self.dom.sxpr(), out=req)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 
tools/python/xen/xend/server/SrvDomainDir.py
--- a/tools/python/xen/xend/server/SrvDomainDir.py      Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/server/SrvDomainDir.py      Fri Sep  9 16:30:54 2005
@@ -146,11 +146,10 @@
         if not url.endswith('/'):
             url += '/'
         if use_sxp:
-            domains = self.xd.domain_ls()
+            domains = self.xd.list_names()
             sxp.show(domains, out=req)
         else:
-            domains = self.xd.list()
-            domains.sort(lambda x, y: cmp(x.name, y.name))
+            domains = self.xd.list_sorted()
             req.write('<ul>')
             for d in domains:
                req.write('<li><a href="%s%s"> Domain %s</a>'
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/SrvServer.py
--- a/tools/python/xen/xend/server/SrvServer.py Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/server/SrvServer.py Fri Sep  9 16:30:54 2005
@@ -61,9 +61,14 @@
 
     def start(self):
         Vifctl.network('start')
+        threads = []
         for server in self.servers:
             thread = Thread(target=server.run)
             thread.start()
+            threads.append(thread)
+
+        for t in threads:
+            t.join()
 
 def create():
     root = SrvDir()
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/server/blkif.py     Fri Sep  9 16:30:54 2005
@@ -27,9 +27,7 @@
 from xen.xend import Blkctl
 from xen.xend.xenstore import DBVar
 
-from xen.xend.server import channel
-from xen.xend.server.controller import CtrlMsgRcvr, Dev, DevController
-from xen.xend.server.messages import *
+from xen.xend.server.controller import Dev, DevController
 
 class BlkifBackend:
     """ Handler for the 'back-end' channel to a block device driver domain
@@ -41,21 +39,15 @@
         self.controller = controller
         self.id = id
         self.frontendDomain = self.controller.getDomain()
-        self.frontendChannel = None
         self.backendDomain = dom
-        self.backendChannel = None
         self.destroyed = False
         self.connected = False
-        self.evtchn = None
         self.status = None
 
     def init(self, recreate=False, reboot=False):
         self.destroyed = False
         self.status = BLKIF_INTERFACE_STATUS_DISCONNECTED
         self.frontendDomain = self.controller.getDomain()
-        self.frontendChannel = self.controller.getChannel()
-        cf = channel.channelFactory()
-        self.backendChannel = cf.openChannel(self.backendDomain)
 
     def __str__(self):
         return ('<BlkifBackend frontend=%d backend=%d id=%d>'
@@ -66,29 +58,6 @@
     def getId(self):
         return self.id
 
-    def getEvtchn(self):
-        return self.evtchn
-
-    def closeEvtchn(self):
-        if self.evtchn:
-            channel.eventChannelClose(self.evtchn)
-            self.evtchn = None
-
-    def openEvtchn(self):
-        self.evtchn = channel.eventChannel(self.backendDomain, 
self.frontendDomain)
-
-    def getEventChannelBackend(self):
-        val = 0
-        if self.evtchn:
-            val = self.evtchn['port1']
-        return val
-
-    def getEventChannelFrontend(self):
-        val = 0
-        if self.evtchn:
-            val = self.evtchn['port2']
-        return val
-
     def connect(self, recreate=False):
         """Connect to the blkif control interface.
 
@@ -98,83 +67,19 @@
         if recreate or self.connected:
             self.connected = True
             pass
-        else:
-            self.send_be_create()
-        
-    def send_be_create(self):
-        log.debug("send_be_create %s", str(self))
-        msg = packMsg('blkif_be_create_t',
-                      { 'domid'        : self.frontendDomain,
-                        'blkif_handle' : self.id })
-        msg = self.backendChannel.requestResponse(msg)
-        #todo: check return status
-        self.connected = True
-
+        
     def destroy(self, change=False, reboot=False):
         """Disconnect from the blkif control interface and destroy it.
         """
-        self.send_be_disconnect()
-        self.send_be_destroy()
-        self.closeEvtchn()
         self.destroyed = True
         # For change true need to notify front-end, or back-end will do it?
 
-    def send_be_disconnect(self):
-        msg = packMsg('blkif_be_disconnect_t',
-                      { 'domid'        : self.frontendDomain,
-                        'blkif_handle' : self.id })
-        self.backendChannel.requestResponse(msg)
-        #todo: check return status
-        self.connected = False
-
-    def send_be_destroy(self):
-        msg = packMsg('blkif_be_destroy_t',
-                      { 'domid'        : self.frontendDomain,
-                        'blkif_handle' : self.id })
-        self.backendChannel.requestResponse(msg)
-        #todo: check return status
-
     def connectInterface(self, val):
-        self.openEvtchn()
-        log.debug("Connecting blkif to event channel %s ports=%d:%d",
-                  str(self), self.evtchn['port1'], self.evtchn['port2'])
-        msg = packMsg('blkif_be_connect_t',
-                      { 'domid'        : self.frontendDomain,
-                        'blkif_handle' : self.id,
-                        'evtchn'       : self.getEventChannelBackend(),
-                        'shmem_frame'  : val['shmem_frame'],
-                        'shmem_ref'    : val['shmem_ref'] })
-        msg = self.backendChannel.requestResponse(msg)
-        #todo: check return status
-        val = unpackMsg('blkif_be_connect_t', msg)
         self.status = BLKIF_INTERFACE_STATUS_CONNECTED
-        self.send_fe_interface_status()
             
-    def send_fe_interface_status(self):
-        msg = packMsg('blkif_fe_interface_status_t',
-                      { 'handle' : self.id,
-                        'status' : self.status,
-                        'domid'  : self.backendDomain,
-                        'evtchn' : self.getEventChannelFrontend() })
-        self.frontendChannel.writeRequest(msg)
-
     def interfaceDisconnected(self):
         self.status = BLKIF_INTERFACE_STATUS_DISCONNECTED
-        #todo?: Close evtchn:
-        #self.closeEvtchn()
-        self.send_fe_interface_status()
-        
-    def interfaceChanged(self):
-        """Notify the front-end that devices have been added or removed.
-        The front-end should then probe for devices.
-        """
-        msg = packMsg('blkif_fe_interface_status_t',
-                      { 'handle' : self.id,
-                        'status' : BLKIF_INTERFACE_STATUS_CHANGED,
-                        'domid'  : self.backendDomain,
-                        'evtchn' : 0 })
-        self.frontendChannel.writeRequest(msg)
-
+        
 class BlkDev(Dev):
     """Info record for a block device.
     """
@@ -207,24 +112,17 @@
         self.nr_sectors = None
         
         self.frontendDomain = self.getDomain()
-        self.frontendChannel = None
         self.backendDomain = None
-        self.backendChannel = None
         self.backendId = 0
         self.configure(self.config, recreate=recreate)
 
     def exportToDB(self, save=False):
         Dev.exportToDB(self, save=save)
         backend = self.getBackend()
-        if backend and backend.evtchn:
-            db = self.db.addChild("evtchn")
-            backend.evtchn.saveToDB(db, save=save)
 
     def init(self, recreate=False, reboot=False):
         self.frontendDomain = self.getDomain()
-        self.frontendChannel = self.getChannel()
         backend = self.getBackend()
-        self.backendChannel = backend.backendChannel
         self.backendId = backend.id
 
     def configure(self, config, change=False, recreate=False):
@@ -351,7 +249,6 @@
         self.destroyed = True
         log.debug("Destroying vbd domain=%d id=%s", self.frontendDomain,
                   self.id)
-        self.send_be_vbd_destroy()
         if change:
             self.interfaceChanged()
         self.unbind()
@@ -367,30 +264,6 @@
 
         """
         self.getBackend().connect()
-        self.send_be_vbd_create()
-        
-    def send_be_vbd_create(self):
-        msg = packMsg('blkif_be_vbd_create_t',
-                      { 'domid'        : self.frontendDomain,
-                        'blkif_handle' : self.backendId,
-                        'pdevice'      : self.device,
-                        'dev_handle'   : self.dev_handle,
-                        'vdevice'      : self.vdev,
-                        'readonly'     : self.readonly() })
-        msg = self.backendChannel.requestResponse(msg)
-        
-        val = unpackMsg('blkif_be_vbd_create_t', msg)
-        status = val['status']
-        if status != BLKIF_BE_STATUS_OKAY:
-            raise XendError("Creating vbd failed: device %s, error %d"
-                            % (sxp.to_string(self.config), status))
-
-    def send_be_vbd_destroy(self):
-        msg = packMsg('blkif_be_vbd_destroy_t',
-                      { 'domid'                : self.frontendDomain,
-                        'blkif_handle'         : self.backendId,
-                        'vdevice'              : self.vdev })
-        return self.backendChannel.writeRequest(msg)
         
 class BlkifController(DevController):
     """Block device interface controller. Handles all block devices
@@ -403,19 +276,9 @@
         DevController.__init__(self, vm, recreate=recreate)
         self.backends = {}
         self.backendId = 0
-        self.rcvr = None
 
     def initController(self, recreate=False, reboot=False):
         self.destroyed = False
-        # Add our handlers for incoming requests.
-        self.rcvr = CtrlMsgRcvr(self.getChannel())
-        self.rcvr.addHandler(CMSG_BLKIF_FE,
-                             CMSG_BLKIF_FE_DRIVER_STATUS,
-                             self.recv_fe_driver_status)
-        self.rcvr.addHandler(CMSG_BLKIF_FE,
-                             CMSG_BLKIF_FE_INTERFACE_CONNECT,
-                             self.recv_fe_interface_connect)
-        self.rcvr.registerChannel()
         if reboot:
             self.rebootBackends()
             self.rebootDevices()
@@ -465,26 +328,7 @@
         log.debug("Destroying blkif domain=%d", self.getDomain())
         self.destroyDevices(reboot=reboot)
         self.destroyBackends(reboot=reboot)
-        self.rcvr.deregisterChannel()
 
     def destroyBackends(self, reboot=False):
         for backend in self.backends.values():
             backend.destroy(reboot=reboot)
-
-    def recv_fe_driver_status(self, msg):
-        val = unpackMsg('blkif_fe_driver_status_t', msg)
-        for backend in self.backends.values():
-            backend.interfaceDisconnected()
-
-    def recv_fe_interface_connect(self, msg):
-        val = unpackMsg('blkif_fe_interface_connect_t', msg)
-        id = val['handle']
-        backend = self.getBackendById(id)
-        if backend:
-            try:
-                backend.connectInterface(val)
-            except IOError, ex:
-                log.error("Exception connecting backend: %s", ex)
-        else:
-            log.error('interface connect on unknown interface: id=%d', id)
-
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/channel.py
--- a/tools/python/xen/xend/server/channel.py   Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/server/channel.py   Fri Sep  9 16:30:54 2005
@@ -19,11 +19,8 @@
 import select
 
 import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
-from xen.lowlevel import xu
 
 from xen.xend.XendLogging import log
-
-from messages import *
 
 DEBUG = 0
 
@@ -132,501 +129,3 @@
     """
     if not evtchn: return
     evtchn.close()
-
-class ChannelFactory:
-    """Factory for creating control channels.
-    Maintains a table of channels.
-    """
-
-    """ Channels indexed by index. """
-    channels = None
-
-    thread = None
-
-    notifier = None
-
-    """Map of ports to the virq they signal."""
-    virqPorts = None
-
-    def __init__(self):
-        """Constructor - do not use. Use the channelFactory function."""
-        self.channels = {}
-        self.virqPorts = {}
-        self.notifier = xu.notifier()
-        # Register interest in virqs.
-        self.bind_virq(xen.lowlevel.xc.VIRQ_DOM_EXC)
-        self.virqHandler = None
-
-    def bind_virq(self, virq):
-        port = self.notifier.bind_virq(virq)
-        self.virqPorts[port] = virq
-        log.info("Virq %s on port %s", virq, port)
-
-    def start(self):
-        """Fork a thread to read messages.
-        """
-        if self.thread: return
-        self.thread = threading.Thread(name="ChannelFactory",
-                                       target=self.main)
-        self.thread.setDaemon(True)
-        self.thread.start()
-
-    def stop(self):
-        """Signal the thread to stop.
-        """
-        self.thread = None
-
-    def main(self):
-        """Main routine for the thread.
-        Reads the notifier and dispatches to channels.
-        """
-        while True:
-            if self.thread == None: return
-            port = self.notifier.read()
-            if port:
-                virq = self.virqPorts.get(port)
-                if virq is not None:
-                    self.virqReceived(virq)
-                else:
-                    self.msgReceived(port)
-            else:
-                select.select([self.notifier], [], [], 1.0)
-
-    def msgReceived(self, port):
-        # We run the message handlers in their own threads.
-        # Note we use keyword args to lambda to save the values -
-        # otherwise lambda will use the variables, which will get
-        # assigned by the loop and the lambda will get the changed values.
-        received = 0
-        for chan in self.channels.values():
-            if self.thread == None: return
-            msg = chan.readResponse()
-            if msg:
-                received += 1
-                chan.responseReceived(msg)
-        for chan in self.channels.values():
-            if self.thread == None: return
-            msg = chan.readRequest()
-            if msg:
-                received += 1
-                self.runInThread(lambda chan=chan, msg=msg: 
chan.requestReceived(msg))
-        if port and received == 0:
-            log.warning("Port %s notified, but no messages found", port)
-
-    def runInThread(self, thunk):
-        thread = threading.Thread(target = thunk)
-        thread.setDaemon(True)
-        thread.start()
-
-    def setVirqHandler(self, virqHandler):
-        self.virqHandler = virqHandler
-
-    def virqReceived(self, virq):
-        if DEBUG:
-            print 'virqReceived>', virq
-        if not self.virqHandler: return
-        self.runInThread(lambda virq=virq: self.virqHandler(virq))
-
-    def newChannel(self, dom, local_port, remote_port):
-        """Create a new channel.
-        """
-        return self.addChannel(Channel(self, dom, local_port, remote_port))
-    
-    def addChannel(self, channel):
-        """Add a channel.
-        """
-        self.channels[channel.getKey()] = channel
-        return channel
-
-    def delChannel(self, channel):
-        """Remove the channel.
-        """
-        key = channel.getKey()
-        if key in self.channels:
-            del self.channels[key]
-
-    def getChannel(self, dom, local_port, remote_port):
-        """Get the channel with the given domain and ports (if any).
-        """
-        key = (dom, local_port, remote_port)
-        return self.channels.get(key)
-
-    def findChannel(self, dom, local_port=0, remote_port=0):
-        """Find a channel. Ports given as zero are wildcards.
-
-        dom domain
-
-        returns channel
-        """
-        chan = self.getChannel(dom, local_port, remote_port)
-        if chan: return chan
-        if local_port and remote_port:
-            return None
-        for c in self.channels.values():
-            if c.dom != dom: continue
-            if local_port and local_port != c.getLocalPort(): continue
-            if remote_port and remote_port != c.getRemotePort(): continue
-            return c
-        return None
-
-    def openChannel(self, dom, local_port=0, remote_port=0):
-        chan = self.findChannel(dom, local_port=local_port,
-                                remote_port=remote_port)
-        if chan:
-            return chan
-        chan = self.newChannel(dom, local_port, remote_port)
-        return chan
-        
-
-    def createPort(self, dom, local_port=0, remote_port=0):
-        """Create a port for a channel to the given domain.
-        If only the domain is specified, a new channel with new port ids is
-        created.  If one port id is specified and the given port id is in use,
-        the other port id is filled.  If one port id is specified and the
-        given port id is not in use, a new channel is created with one port
-        id equal to the given id and a new id for the other end.  If both
-        port ids are specified, a port is reconnected using the given port
-        ids.
-
-        @param dom: domain
-        @param local: local port id to use
-        @type  local: int
-        @param remote: remote port id to use
-        @type  remote: int
-        @return: port object
-        """
-        return xu.port(dom, local_port=local_port, remote_port=remote_port)
-
-    def restoreFromDB(self, db, dom, local, remote):
-        """Create a channel using ports restored from the db (if available).
-        Otherwise use the given ports. This is the inverse operation to
-        saveToDB() on a channel.
-
-        @param db db
-        @param dom  domain the channel connects to
-        @param local default local port
-        @param remote default remote port
-        """
-        try:
-            local_port  = int(db['local_port'])
-        except:
-            local_port = local
-        try:
-            remote_port = int(db['remote_port'])
-        except:
-            remote_port = remote
-        try:
-            chan = self.openChannel(dom, local_port, remote_port)
-        except:
-            return None
-        return chan
-
-def channelFactory():
-    """Singleton constructor for the channel factory.
-    Use this instead of the class constructor.
-    """
-    global inst
-    try:
-        inst
-    except:
-        inst = ChannelFactory()
-    return inst
-
-class Channel:
-    """Control channel to a domain.
-    Maintains a list of device handlers to dispatch requests to, based
-    on the request type.
-    """
-
-    def __init__(self, factory, dom, local_port, remote_port):
-        self.factory = factory
-        self.dom = int(dom)
-        # Registered device handlers.
-        self.devs = []
-        # Handlers indexed by the message types they handle.
-        self.devs_by_type = {}
-        self.port = self.factory.createPort(self.dom,
-                                            local_port=local_port,
-                                            remote_port=remote_port)
-        self.closed = False
-        # Queue of waiters for responses to requests.
-        self.queue = ResponseQueue(self)
-        # Make sure the port will deliver all the messages.
-        self.port.register(TYPE_WILDCARD)
-
-    def saveToDB(self, db, save=False):
-        """Save the channel ports to the db so the channel can be restored 
later,
-        using restoreFromDB() on the factory.
-
-        @param db db
-        """
-        if self.closed: return
-        db['local_port'] = str(self.getLocalPort())
-        db['remote_port'] = str(self.getRemotePort())
-        db.saveDB(save=save)
-
-    def getKey(self):
-        """Get the channel key.
-        """
-        return (self.dom, self.getLocalPort(), self.getRemotePort())
-
-    def sxpr(self):
-        val = ['channel']
-        val.append(['domain', self.dom])
-        if self.port:
-            val.append(['local_port', self.port.local_port])
-            val.append(['remote_port', self.port.remote_port])
-        return val
-
-    def close(self):
-        """Close the channel.
-        """
-        if DEBUG:
-            print 'Channel>close>', self
-        if self.closed: return
-        self.closed = True
-        self.factory.delChannel(self)
-        for d in self.devs[:]:
-            d.lostChannel(self)
-        self.devs = []
-        self.devs_by_type = {}
-        if self.port:
-            self.port.close()
-            #self.port = None
-
-    def getDomain(self):
-        return self.dom
-
-    def getLocalPort(self):
-        """Get the local port.
-
-        @return: local port
-        @rtype:  int
-        """
-        if self.closed: return -1
-        return self.port.local_port
-
-    def getRemotePort(self):
-        """Get the remote port.
-
-        @return: remote port
-        @rtype:  int
-        """
-        if self.closed: return -1
-        return self.port.remote_port
-
-    def __repr__(self):
-        return ('<Channel dom=%d ports=%d:%d>'
-                % (self.dom,
-                   self.getLocalPort(),
-                   self.getRemotePort()))
-
-
-    def registerDevice(self, types, dev):
-        """Register a device message handler.
-
-        @param types: message types handled
-        @type  types: array of ints
-        @param dev:   device handler
-        """
-        if self.closed: return
-        self.devs.append(dev)
-        for ty in types:
-            self.devs_by_type[ty] = dev
-
-    def deregisterDevice(self, dev):
-        """Remove the registration for a device handler.
-
-        @param dev: device handler
-        """
-        if dev in self.devs:
-            self.devs.remove(dev)
-        types = [ ty for (ty, d) in self.devs_by_type.items() if d == dev ]
-        for ty in types:
-            del self.devs_by_type[ty]
-
-    def getDevice(self, type):
-        """Get the handler for a message type.
-
-        @param type: message type
-        @type  type: int
-        @return: controller or None
-        @rtype:  device handler
-        """
-        return self.devs_by_type.get(type)
-
-    def requestReceived(self, msg):
-        """A request has been received on the channel.
-        Disptach it to the device handlers.
-        Called from the channel factory thread.
-        """
-        if DEBUG:
-            print 'Channel>requestReceived>', self,
-            printMsg(msg)
-        (ty, subty) = getMessageType(msg)
-        responded = False
-        dev = self.getDevice(ty)
-        if dev:
-            responded = dev.requestReceived(msg, ty, subty)
-        elif DEBUG:
-            print "Channel>requestReceived> No device handler", self,
-            printMsg(msg)
-        else:
-            pass
-        if not responded:
-            self.writeResponse(msg)
-
-    def writeRequest(self, msg):
-        """Write a request to the channel.
-        """
-        if DEBUG:
-            print 'Channel>writeRequest>', self,
-            printMsg(msg, all=True)
-        if self.closed: return -1
-        self.port.write_request(msg)
-        return 1
-
-    def writeResponse(self, msg):
-        """Write a response to the channel.
-        """
-        if DEBUG:
-            print 'Channel>writeResponse>', self,
-            printMsg(msg, all=True)
-        if self.port:
-            self.port.write_response(msg)
-        return 1
-
-    def readRequest(self):
-        """Read a request from the channel.
-        Called internally.
-        """
-        if self.closed:
-            val =  None
-        else:
-            val = self.port.read_request()
-        return val
-        
-    def readResponse(self):
-        """Read a response from the channel.
-        Called internally.
-        """
-        if self.closed:
-            val = None
-        else:
-            val = self.port.read_response()
-        if DEBUG and val:
-            print 'Channel>readResponse>', self,
-            printMsg(val, all=True)
-        return val
-
-    def requestResponse(self, msg, timeout=None):
-        """Write a request and wait for a response.
-        Raises IOError on timeout.
-
-        @param msg request message
-        @param timeout timeout (0 is forever)
-        @return response message
-        """
-        if self.closed:
-            raise IOError("closed")
-        if self.closed:
-            return None
-        if timeout is None:
-            timeout = RESPONSE_TIMEOUT
-        elif timeout <= 0:
-            timeout = None
-        return self.queue.call(msg, timeout)
-
-    def responseReceived(self, msg):
-        """A response has been received, look for a waiter to
-        give it to.
-        Called internally.
-        """
-        if DEBUG:
-            print 'Channel>responseReceived>', self,
-            printMsg(msg)
-        self.queue.response(getMessageId(msg), msg)
-
-    def virq(self):
-        self.factory.virq()
-
-class Response:
-    """Entry in the response queue.
-    Used to signal a response to a message.
-    """
-
-    def __init__(self, mid):
-        self.mid = mid
-        self.msg = None
-        self.ready = threading.Event()
-
-    def response(self, msg):
-        """Signal arrival of a response to a waiting thread.
-        Passing msg None cancels the wait with an IOError.
-        """
-        if msg:
-            self.msg = msg
-        else:
-            self.mid = -1
-        self.ready.set()
-
-    def wait(self, timeout):
-        """Wait up to 'timeout' seconds for a response.
-        Returns the response or raises an IOError.
-        """
-        self.ready.wait(timeout)
-        if self.mid < 0:
-            raise IOError("wait canceled")
-        if self.msg is None:
-            raise IOError("response timeout")
-        return self.msg
-
-class ResponseQueue:
-    """Response queue. Manages waiters for responses to messages.
-    """
-
-    def __init__(self, channel):
-        self.channel = channel
-        self.lock = threading.Lock()
-        self.responses = {}
-
-    def add(self, mid):
-        r = Response(mid)
-        self.responses[mid] = r
-        return r
-
-    def get(self, mid):
-        return self.responses.get(mid)
-
-    def remove(self, mid):
-        r = self.responses.get(mid)
-        if r:
-            del self.responses[mid]
-        return r
-
-    def response(self, mid, msg):
-        """Process a response - signals any waiter that a response
-        has arrived.
-        """
-        try:
-            self.lock.acquire()
-            r = self.remove(mid)
-        finally:
-            self.lock.release()
-        if r:
-            r.response(msg)
-
-    def call(self, msg, timeout):
-        """Send the message and wait for 'timeout' seconds for a response.
-        Returns the response.
-        Raises IOError on timeout.
-        """
-        mid = getMessageId(msg)
-        try:
-            self.lock.acquire()
-            r = self.add(mid)
-        finally:
-            self.lock.release()
-        self.channel.writeRequest(msg)
-        return r.wait(timeout)
-                
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/controller.py
--- a/tools/python/xen/xend/server/controller.py        Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/server/controller.py        Fri Sep  9 16:30:54 2005
@@ -21,106 +21,8 @@
 
 from xen.xend.XendError import XendError
 from xen.xend.xenstore import DBVar
-from xen.xend.server.messages import msgTypeName, printMsg, getMessageType
 
 DEBUG = 0
-
-class CtrlMsgRcvr:
-    """Utility class to dispatch messages on a control channel.
-    Once I{registerChannel} has been called, our message types are registered
-    with the channel. The channel will call I{requestReceived}
-    when a request arrives if it has one of our message types.
-
-    @ivar channel: channel to a domain
-    @type channel: Channel
-    @ivar majorTypes: major message types we are interested in
-    @type majorTypes: {int:{int:method}}
-    
-    """
-
-    def __init__(self, channel):
-        self.majorTypes = {}
-        self.channel = channel
-
-    def getHandler(self, type, subtype):
-        """Get the method for a type and subtype.
-
-        @param type: major message type
-        @param subtype: minor message type
-        @return: method or None
-        """
-        method = None
-        subtypes = self.majorTypes.get(type)
-        if subtypes:
-            method = subtypes.get(subtype)
-        return method
-
-    def addHandler(self, type, subtype, method):
-        """Add a method to handle a message type and subtype.
-        
-        @param type: major message type
-        @param subtype: minor message type
-        @param method: method
-        """
-        subtypes = self.majorTypes.get(type)
-        if not subtypes:
-            subtypes = {}
-            self.majorTypes[type] = subtypes
-        subtypes[subtype] = method
-
-    def getMajorTypes(self):
-        """Get the list of major message types handled.
-        """
-        return self.majorTypes.keys()
-
-    def requestReceived(self, msg, type, subtype):
-        """Dispatch a request message to handlers.
-        Called by the channel for requests with one of our types.
-
-        @param msg:     message
-        @type  msg:     xu message
-        @param type:    major message type
-        @type  type:    int
-        @param subtype: minor message type
-        @type  subtype: int
-        """
-        if DEBUG:
-            print 'requestReceived>',
-            printMsg(msg, all=True)
-        responded = 0
-        method = self.getHandler(type, subtype)
-        if method:
-            responded = method(msg)
-        elif DEBUG:
-            print ('requestReceived> No handler: Message type %s %d:%d'
-                   % (msgTypeName(type, subtype), type, subtype)), self
-        return responded
-        
-
-    def lostChannel(self):
-        """Called when the channel to the domain is lost.
-        """
-        if DEBUG:
-            print 'CtrlMsgRcvr>lostChannel>',
-        self.channel = None
-    
-    def registerChannel(self):
-        """Register interest in our major message types with the
-        channel to our domain. Once we have registered, the channel
-        will call requestReceived for our messages.
-        """
-        if DEBUG:
-            print 'CtrlMsgRcvr>registerChannel>', self.channel, 
self.getMajorTypes()
-        if self.channel:
-            self.channel.registerDevice(self.getMajorTypes(), self)
-        
-    def deregisterChannel(self):
-        """Deregister interest in our major message types with the
-        channel to our domain. After this the channel won't call
-        us any more.
-        """
-        if self.channel:
-            self.channel.deregisterDevice(self)
 
 class DevControllerTable:
     """Table of device controller classes, indexed by type name.
@@ -232,10 +134,6 @@
     def getDomainName(self):
         return self.vm.getName()
 
-    def getChannel(self):
-        chan = self.vm.getChannel()
-        return chan
-    
     def getDomainInfo(self):
         return self.vm
 
@@ -433,9 +331,6 @@
     def getDomainName(self):
         return self.controller.getDomainName()
 
-    def getChannel(self):
-        return self.controller.getChannel()
-    
     def getDomainInfo(self):
         return self.controller.getDomainInfo()
     
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/event.py
--- a/tools/python/xen/xend/server/event.py     Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/server/event.py     Fri Sep  9 16:30:54 2005
@@ -174,11 +174,6 @@
         else:
             logging.removeLogStderr()
 
-    def op_debug_msg(self, name, v):
-        mode = v[1]
-        import messages
-        messages.DEBUG = (mode == 'on')
-
     def op_debug_controller(self, name, v):
         mode = v[1]
         import controller
@@ -186,7 +181,7 @@
 
     def op_domain_ls(self, name, v):
         xd = xroot.get_component("xen.xend.XendDomain")
-        return xd.domain_ls()
+        return xd.list_names()
 
     def op_domain_configure(self, name, v):
         domid = sxp.child_value(v, "dom")
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/netif.py
--- a/tools/python/xen/xend/server/netif.py     Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/server/netif.py     Fri Sep  9 16:30:54 2005
@@ -30,9 +30,7 @@
 from xen.xend.XendRoot import get_component
 from xen.xend.xenstore import DBVar
 
-from xen.xend.server import channel
-from xen.xend.server.controller import CtrlMsgRcvr, Dev, DevController
-from xen.xend.server.messages import *
+from xen.xend.server.controller import Dev, DevController
 
 class NetDev(Dev):
     """A network device.
@@ -90,12 +88,9 @@
     def __init__(self, controller, id, config, recreate=False):
         Dev.__init__(self, controller, id, config, recreate=recreate)
         self.vif = int(self.id)
-        self.evtchn = None
         self.status = None
         self.frontendDomain = self.getDomain()
-        self.frontendChannel = None
         self.backendDomain = None
-        self.backendChannel = None
         self.credit = None
         self.period = None
         self.mac = None
@@ -109,17 +104,11 @@
 
     def exportToDB(self, save=False):
         Dev.exportToDB(self, save=save)
-        if self.evtchn:
-            db = self.db.addChild("evtchn")
-            self.evtchn.saveToDB(db, save=save)
 
     def init(self, recreate=False, reboot=False):
         self.destroyed = False
         self.status = NETIF_INTERFACE_STATUS_DISCONNECTED
         self.frontendDomain = self.getDomain()
-        self.frontendChannel = self.getChannel()
-        cf = channel.channelFactory()
-        self.backendChannel = cf.openChannel(self.backendDomain)
 
     def _get_config_mac(self, config):
         vmac = sxp.child_value(config, 'mac')
@@ -287,10 +276,6 @@
             val.append(['credit', self.credit])
         if self.period:
             val.append(['period', self.period])
-        if self.evtchn:
-            val.append(['evtchn',
-                        self.evtchn['port1'],
-                        self.evtchn['port2']])
         return val
 
     def get_vifname(self):
@@ -348,42 +333,11 @@
         if recreate:
             pass
         else:
-            self.send_be_create()
             if self.credit and self.period:
-                self.send_be_creditlimit(self.credit, self.period)
+                #self.send_be_creditlimit(self.credit, self.period)
+                pass
             self.vifctl('up', vmname=self.getDomainName())
         
-    def closeEvtchn(self):
-        if self.evtchn:
-            channel.eventChannelClose(self.evtchn)
-            self.evtchn = None
-
-    def openEvtchn(self):
-        self.evtchn = channel.eventChannel(self.backendDomain, 
self.frontendDomain)
-        
-    def getEventChannelBackend(self):
-        val = 0
-        if self.evtchn:
-            val = self.evtchn['port1']
-        return val
-
-    def getEventChannelFrontend(self):
-        val = 0
-        if self.evtchn:
-            val = self.evtchn['port2']
-        return val
-
-    def send_be_create(self):
-        msg = packMsg('netif_be_create_t',
-                      { 'domid'        : self.frontendDomain,
-                        'netif_handle' : self.vif,
-                        'be_mac'       : self.be_mac or [0, 0, 0, 0, 0, 0],
-                        'mac'          : self.mac,
-                        #'vifname'      : self.vifname
-                        })
-        msg = self.backendChannel.requestResponse(msg)
-        # todo: check return status
-
     def destroy(self, change=False, reboot=False):
         """Destroy the device's resources and disconnect from the back-end
         device controller. If 'change' is true notify the front-end interface.
@@ -393,47 +347,14 @@
         self.destroyed = True
         self.status = NETIF_INTERFACE_STATUS_CLOSED
         log.debug("Destroying vif domain=%d vif=%d", self.frontendDomain, 
self.vif)
-        self.closeEvtchn()
         self.vifctl('down')
-        self.send_be_disconnect()
-        self.send_be_destroy()
         if change:
             self.reportStatus()
-
-    def send_be_disconnect(self):
-        msg = packMsg('netif_be_disconnect_t',
-                      { 'domid'        : self.frontendDomain,
-                        'netif_handle' : self.vif })
-        self.backendChannel.requestResponse(msg)
-        #todo: check return status
-
-    def send_be_destroy(self, response=None):
-        msg = packMsg('netif_be_destroy_t',
-                      { 'domid'        : self.frontendDomain,
-                        'netif_handle' : self.vif })
-        self.backendChannel.requestResponse(msg)
-        #todo: check return status
-    
-    def recv_fe_interface_connect(self, val):
-        self.openEvtchn()
-        msg = packMsg('netif_be_connect_t',
-                      { 'domid'          : self.frontendDomain,
-                        'netif_handle'   : self.vif,
-                        'evtchn'         : self.getEventChannelBackend(),
-                        'tx_shmem_frame' : val['tx_shmem_frame'],
-                        'tx_shmem_ref'   : val['tx_shmem_ref'],
-                        'rx_shmem_frame' : val['rx_shmem_frame'],
-                        'rx_shmem_ref'   : val['rx_shmem_ref'] })
-        msg = self.backendChannel.requestResponse(msg)
-        #todo: check return status
-        self.status = NETIF_INTERFACE_STATUS_CONNECTED
-        self.reportStatus()
 
     def setCreditLimit(self, credit, period):
         #todo: these params should be in sxpr and vif config.
         self.credit = credit
         self.period = period
-        self.send_be_creditlimit(credit, period)
 
     def getCredit(self):
         return self.credit
@@ -441,31 +362,10 @@
     def getPeriod(self):
         return self.period
         
-    def send_be_creditlimit(self, credit, period):
-        msg = packMsg('netif_be_creditlimit_t',
-                      { 'domid'          : self.frontendDomain,
-                        'netif_handle'   : self.vif,
-                        'credit_bytes'   : credit,
-                        'period_usec'    : period })
-        msg = self.backendChannel.requestResponse(msg)
-        # todo: check return status
-        
-    def reportStatus(self, resp=False):
-        msg = packMsg('netif_fe_interface_status_t',
-                      { 'handle' : self.vif,
-                        'status' : self.status,
-                        'evtchn' : self.getEventChannelFrontend(),
-                        'domid'  : self.backendDomain,
-                        'mac'    : self.mac })
-        if resp:
-            self.frontendChannel.writeResponse(msg)
-        else:
-            self.frontendChannel.writeRequest(msg)
-
     def interfaceChanged(self):
         """Notify the front-end that a device has been added or removed.
         """
-        self.reportStatus()
+        pass
         
 class NetifController(DevController):
     """Network interface controller. Handles all network devices for a domain.
@@ -473,25 +373,9 @@
     
     def __init__(self, vm, recreate=False):
         DevController.__init__(self, vm, recreate=recreate)
-        self.channel = None
-        self.rcvr = None
-        self.channel = None
 
     def initController(self, recreate=False, reboot=False):
         self.destroyed = False
-        self.channel = self.getChannel()
-        # Register our handlers for incoming requests.
-        self.rcvr = CtrlMsgRcvr(self.channel)
-        self.rcvr.addHandler(CMSG_NETIF_FE,
-                             CMSG_NETIF_FE_DRIVER_STATUS,
-                             self.recv_fe_driver_status)
-        self.rcvr.addHandler(CMSG_NETIF_FE,
-                             CMSG_NETIF_FE_INTERFACE_STATUS,
-                             self.recv_fe_interface_status)
-        self.rcvr.addHandler(CMSG_NETIF_FE,
-                             CMSG_NETIF_FE_INTERFACE_CONNECT,
-                             self.recv_fe_interface_connect)
-        self.rcvr.registerChannel()
         if reboot:
             self.rebootDevices()
 
@@ -501,8 +385,6 @@
         self.destroyed = True
         log.debug("Destroying netif domain=%d", self.getDomain())
         self.destroyDevices(reboot=reboot)
-        if self.rcvr:
-            self.rcvr.deregisterChannel()
 
     def sxpr(self):
         val = ['netif', ['dom', self.getDomain()]]
@@ -524,57 +406,3 @@
         
         dev = self.devices[vif]
         return dev.setCreditLimit(credit, period)
-    
-    def recv_fe_driver_status(self, msg):
-        msg = packMsg('netif_fe_driver_status_t',
-                      { 'status'     : NETIF_DRIVER_STATUS_UP,
-                        ## FIXME: max_handle should be max active interface id
-                        'max_handle' : self.getDeviceCount()
-                        #'max_handle' : self.getMaxDeviceId()
-                        })
-        # Two ways of doing it:
-        # 1) front-end requests driver status, we reply with the interface 
count,
-        #    front-end polls the interfaces,
-        #    front-end checks they are all up
-        # 2) front-end requests driver status, we reply (with anything),
-        #    we notify the interfaces,
-        #    we notify driver status up with the count
-        #    front-end checks they are all up
-        #
-        # We really want to use 1), but at the moment the xenU kernel panics
-        # in that mode, so we're sticking to 2) for now.
-        resp = False
-        if resp:
-            self.channel.writeResponse(msg)
-        else:
-            for dev in self.devices.values():
-                dev.reportStatus()
-            self.channel.writeRequest(msg)
-        return resp
-
-    def recv_fe_interface_status(self, msg):
-        val = unpackMsg('netif_fe_interface_status_t', msg)
-        vif = val['handle']
-        dev = self.findDevice(vif)
-        if dev:
-            dev.reportStatus(resp=True)
-        else:
-            log.error('Received netif_fe_interface_status for unknown vif: 
dom=%d vif=%d',
-                      self.getDomain(), vif)
-            msg = packMsg('netif_fe_interface_status_t',
-                          { 'handle' : -1,
-                            'status' : NETIF_INTERFACE_STATUS_CLOSED,
-                            });
-            self.channel.writeResponse(msg)
-        return True
-            
-    def recv_fe_interface_connect(self, msg):
-        val = unpackMsg('netif_fe_interface_connect_t', msg)
-        vif = val['handle']
-        dev = self.getDevice(vif)
-        if dev:
-            dev.recv_fe_interface_connect(val)
-        else:
-            log.error('Received netif_fe_interface_connect for unknown vif: 
dom=%d vif=%d',
-                      self.getDomain(), vif)
-
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/usbif.py
--- a/tools/python/xen/xend/server/usbif.py     Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/server/usbif.py     Fri Sep  9 16:30:54 2005
@@ -9,9 +9,7 @@
 from xen.xend.XendError import XendError
 from xen.xend.xenstore import DBVar
 
-from xen.xend.server import channel
 from xen.xend.server.controller import Dev, DevController
-from xen.xend.server.messages import *
 
 class UsbBackend:
     """Handler for the 'back-end' channel to a USB device driver domain
@@ -25,39 +23,15 @@
         self.connecting = False
         self.frontendDomain = self.controller.getDomain()
         self.backendDomain = dom
-        self.frontendChannel = None
-        self.backendChannel = None
 
     def init(self, recreate=False, reboot=False):
-        self.frontendChannel = self.controller.getChannel()
-        cf = channel.channelFactory()
-        self.backendChannel = cf.openChannel(self.backendDomain)
-
+        pass
+    
     def __str__(self):
         return ('<UsbifBackend frontend=%d backend=%d id=%d>'
                 % (self.frontendDomain,
                    self.backendDomain,
                    self.id))
-
-    def closeEvtchn(self):
-        if self.evtchn:
-            channel.eventChannelClose(self.evtchn)
-            self.evtchn = None
-
-    def openEvtchn(self):
-        self.evtchn = channel.eventChannel(self.backendDomain, 
self.frontendDomain)
-        
-    def getEventChannelBackend(self):
-        val = 0
-        if self.evtchn:
-            val = self.evtchn['port1']
-        return val
-
-    def getEventChannelFrontend(self):
-        val = 0
-        if self.evtchn:
-            val = self.evtchn['port2']
-        return val
 
     def connect(self, recreate=False):
         """Connect the controller to the usbif control interface.
@@ -67,78 +41,14 @@
         log.debug("Connecting usbif %s", str(self))
         if recreate or self.connected or self.connecting:
             pass
-        else:
-            self.send_be_create()
         
-    def send_be_create(self):
-        msg = packMsg('usbif_be_create_t',
-                      { 'domid'        : self.frontendDomain })
-        msg = self.backendChannel.requestResponse(msg)
-        val = unpackMsg('usbif_be_create_t', msg)
-        log.debug('>UsbifBackendController>respond_be_create> %s', str(val))
-        self.connected = True
-    
     def destroy(self, reboot=False):
         """Disconnect from the usbif control interface and destroy it.
         """
         self.destroyed = True
-        self.send_be_disconnect()
-        self.send_be_destroy()
-        self.closeEvtchn()
         
-    def send_be_disconnect(self):
-        log.debug('>UsbifBackendController>send_be_disconnect> %s', str(self))
-        msg = packMsg('usbif_be_disconnect_t',
-                      { 'domid'        : self.frontendDomain })
-        self.backendChannel.requestResponse(msg)
-
-    def send_be_destroy(self, response=None):
-        log.debug('>UsbifBackendController>send_be_destroy> %s', str(self))
-        msg = packMsg('usbif_be_destroy_t',
-                      { 'domid'        : self.frontendDomain })
-        self.backendChannel.requestResponse(msg)
-        #todo: check return status
-
-    
-    def connectInterface(self, val):
-        self.openEvtchn()
-        log.debug(">UsbifBackendController>connectInterface> connecting usbif 
to event channel %s ports=%d:%d",
-                  str(self),
-                  self.getEventChannelBackend(),
-                  self.getEventChannelFrontend())
-        msg = packMsg('usbif_be_connect_t',
-                      { 'domid'        : self.frontendDomain,
-                        'evtchn'       : self.getEventChannelBackend(),
-                        'shmem_frame'  : val['shmem_frame'],
-                        'bandwidth'    : 500 # XXX fix bandwidth!
-                        })
-        msg = self.backendChannel.requestResponse(msg)
-        self.respond_be_connect(msg)
-
-    def respond_be_connect(self, msg):
-        """Response handler for a be_connect message.
-
-        @param msg: message
-        @type  msg: xu message
-        """
-        val = unpackMsg('usbif_be_connect_t', msg)
-        log.debug('>UsbifBackendController>respond_be_connect> %s, %s', 
str(self), str(val))
-        self.send_fe_interface_status_changed()
-        log.debug(">UsbifBackendController> Successfully connected USB 
interface for domain %d" % self.frontendDomain)
-        self.controller.claim_ports()
-            
-    def send_fe_interface_status_changed(self):
-        msg = packMsg('usbif_fe_interface_status_changed_t',
-                      { 'status'    : USBIF_INTERFACE_STATUS_CONNECTED,
-                        'domid'     : self.backendDomain,
-                        'evtchn'    : self.getEventChannelFrontend(),
-                        'bandwidth' : 500,
-                        'num_ports' : len(self.controller.devices)
-                        })
-        self.frontendChannel.writeRequest(msg)
-
     def interfaceChanged(self):
-        self.send_fe_interface_status_changed()
+        pass
 
 
 class UsbDev(Dev):
@@ -153,17 +63,12 @@
         self.port = id
         self.path = None
         self.frontendDomain = self.getDomain()
-        self.frontendChannel = None
         self.backendDomain = 0
-        self.backendChannel = None
         self.configure(self.config, recreate=recreate)
 
     def init(self, recreate=False, reboot=False):
         self.destroyed = False
         self.frontendDomain = self.getDomain()
-        self.frontendChannel = self.getChannel()
-        backend = self.getBackend()
-        self.backendChannel = backend.backendChannel
         
     def configure(self, config, change=False, recreate=False):
         if change:
@@ -204,7 +109,6 @@
         """
         self.destroyed = True
         log.debug("Destroying usb domain=%d id=%s", self.frontendDomain, 
self.id)
-        self.send_be_release_port()
         if change:
             self.interfaceChanged()
 
@@ -220,27 +124,6 @@
         """
         self.getBackend().connect()
 
-    def send_be_claim_port(self):
-        log.debug(">UsbifBackendController>send_be_claim_port> about to claim 
port %s" % self.path)
-        msg = packMsg('usbif_be_claim_port_t',
-                      { 'domid'        : self.frontendDomain,
-                        'path'         : self.path,
-                        'usbif_port'   : self.port,
-                        'status'       : 0})
-        self.backendChannel.writeRequest(msg)
-        log.debug(">UsbifBackendController> Claim port completed")
-        # No need to add any callbacks, since the guest polls its virtual ports
-        # anyhow, somewhat like a UHCI controller ;-)
-
-    def send_be_release_port(self):
-        msg = packMsg('usbif_be_release_port_t',
-                      { 'domid'        : self.frontendDomain,
-                        'path'         : self.path })
-        self.backendChannel.writeRequest(msg)        
-        log.debug(">UsbifBackendController> Release port completed")
-        # No need to add any callbacks, since the guest polls its virtual ports
-        # anyhow, somewhat like a UHCI controller ;-)
-
 class UsbifController(DevController):
     """USB device interface controller. Handles all USB devices
     for a domain.
@@ -252,18 +135,9 @@
         DevController.__init__(self, vm, recreate=recreate)
         self.backends = {}
         self.backendId = 0
-        self.rcvr = None
 
     def init(self, recreate=False, reboot=False):
         self.destroyed = False
-        self.rcvr = CtrlMsgRcvr(self.getChannel())
-        self.rcvr.addHandler(CMSG_USBIF_FE,
-                             CMSG_USBIF_FE_DRIVER_STATUS_CHANGED,
-                             self.recv_fe_driver_status_changed)
-        self.rcvr.addHandler(CMSG_USBIF_FE,
-                             CMSG_USBIF_FE_INTERFACE_CONNECT,
-                             self.recv_fe_interface_connect)
-        self.rcvr.registerChannel()
         if reboot:
             self.rebootBackends()
             self.rebootDevices()
@@ -283,8 +157,6 @@
         log.debug("Destroying blkif domain=%d", self.getDomain())
         self.destroyDevices(reboot=reboot)
         self.destroyBackends(reboot=reboot)
-        if self.rcvr:
-            self.rcvr.deregisterChannel()
 
     def rebootBackends(self):
         for backend in self.backends.values():
@@ -311,40 +183,3 @@
     def destroyBackends(self, reboot=False):
         for backend in self.backends.values():
             backend.destroy(reboot=reboot)
-
-    def recv_fe_driver_status_changed(self, msg):
-        val = unpackMsg('usbif_fe_driver_status_changed_t', msg)
-        log.debug('>UsbifController>recv_fe_driver_status_changed> %s', 
str(val))
-        #todo: FIXME: For each backend?
-        msg = packMsg('usbif_fe_interface_status_changed_t',
-                      { 'status' : USBIF_INTERFACE_STATUS_DISCONNECTED,
-                        'domid'  : 0, #todo: FIXME: should be domid of backend
-                        'evtchn' : 0 })
-        msg = self.getChannel().requestResponse(msg)
-        self.disconnected_resp(msg)
-
-    def disconnected_resp(self, msg):
-        val = unpackMsg('usbif_fe_interface_status_changed_t', msg)
-        if val['status'] != USBIF_INTERFACE_STATUS_DISCONNECTED:
-            log.error(">UsbifController>disconnected_resp> unexpected status 
change")
-        else:
-            log.debug(">UsbifController>disconnected_resp> interface 
disconnected OK")
-
-    def recv_fe_interface_connect(self, msg):
-        val = unpackMsg('usbif_fe_interface_status_changed_t', msg)
-        log.debug(">UsbifController>recv_fe_interface_connect> notifying 
backend")
-        #todo: FIXME: generalise to more than one backend.
-        id = 0
-        backend = self.getBackendById(id)
-        if backend:
-            try:
-                backend.connectInterface(val)
-            except IOError, ex:
-                log.error("Exception connecting backend: %s", ex)
-        else:
-            log.error('interface connect on unknown interface: id=%d', id)
-
-    def claim_ports(self):
-        for dev in self.devices.values():
-            dev.send_be_claim_port()
-
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/xenstore/xsobj.py
--- a/tools/python/xen/xend/xenstore/xsobj.py   Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xend/xenstore/xsobj.py   Fri Sep  9 16:30:54 2005
@@ -543,10 +543,10 @@
             if not isinstance(node, DBMap): continue
             node.saveDB(sync=sync, save=save)
         # Remove db nodes not in children.
-        if sync:
-            for name in self.__db__.ls():
-                if name not in self:
-                    self.__db__.delete(name)
+        ###if sync:
+        ###    for name in self.__db__.ls():
+        ###        if name not in self:
+        ###            self.__db__.delete(name)
 
     def importFromDB(self, obj, fields):
         """Set fields in obj from db fields.
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xm/create.py     Fri Sep  9 16:30:54 2005
@@ -103,12 +103,13 @@
           fn=set_true, default=0,
           use="Connect to the console after the domain is created.")
 
-gopts.var('vnc', val='no|yes',
+gopts.var('vncviewer', val='no|yes',
           fn=set_bool, default=None,
           use="""Spawn a vncviewer listening for a vnc server in the domain.
           The address of the vncviewer is passed to the domain on the kernel 
command
           line using 'VNC_SERVER=<host>:<port>'. The port used by vnc is 5500 
+ DISPLAY.
           A display value with a free port is chosen if possible.
+         Only valid when vnc=1.
           """)
 
 gopts.var('name', val='NAME',
@@ -174,6 +175,12 @@
 gopts.var('netif', val='no|yes',
           fn=set_bool, default=0,
           use="Make the domain a network interface backend.")
+
+gopts.var('tpmif', val='frontend=DOM',
+          fn=append_value, default=[],
+          use="""Make the domain a TPM interface backend. If frontend is given,
+          the frontend in that domain is connected to this backend (not
+          completely implemented, yet)""")
 
 gopts.var('disk', val='phy:DEV,VDEV,MODE[,DOM]',
           fn=append_value, default=[],
@@ -213,6 +220,12 @@
           This option may be repeated to add more than one vif.
           Specifying vifs will increase the number of interfaces as needed.""")
 
+gopts.var('vtpm', val="instance=INSTANCE,backend=DOM",
+          fn=append_value, default=[],
+          use="""Add a tpm interface. On the backend side us the the given
+          instance as virtual TPM instance. Use the backend in the given
+          domain.""")
+
 gopts.var('nics', val="NUM",
           fn=set_int, default=1,
           use="""Set the number of network interfaces.
@@ -308,6 +321,10 @@
 gopts.var('nographic', val='no|yes',
           fn=set_bool, default=0,
           use="Should device models use graphics?")
+
+gopts.var('vnc', val='',
+          fn=set_value, default=None,
+          use="""Should the device model use VNC?""")
 
 gopts.var('sdl', val='',
           fn=set_value, default=None,
@@ -368,6 +385,46 @@
     for path in vals.usb:
         config_usb = ['usb', ['path', path]]
         config_devs.append(['device', config_usb])
+
+def configure_vtpm(opts, config_devs, vals):
+    """Create the config for virtual TPM interfaces.
+    """
+    vtpm = vals.vtpm
+    vtpm_n = 1
+    for idx in range(0, vtpm_n):
+        if idx < len(vtpm):
+            d = vtpm[idx]
+            instance = d.get('instance')
+            if instance == "VTPMD":
+                instance = "0"
+            else:
+                try:
+                    if int(instance) == 0:
+                        opts.err('VM config error: vTPM instance must not be 
0.')
+                except ValueError:
+                    opts.err('Vm config error: could not parse instance 
number.')
+            backend = d.get('backend')
+            config_vtpm = ['vtpm']
+            if instance:
+                config_vtpm.append(['instance', instance])
+            if backend:
+                config_vtpm.append(['backend', backend])
+            config_devs.append(['device', config_vtpm])
+
+def configure_tpmif(opts, config_devs, vals):
+    """Create the config for virtual TPM interfaces.
+    """
+    tpmif = vals.tpmif
+    tpmif_n = 1
+    for idx in range(0, tpmif_n):
+        if idx < len(tpmif):
+            d = tpmif[idx]
+            frontend = d.get('frontend')
+            config_tpmif = ['tpmif']
+            if frontend:
+                config_tpmif.append(['frontend', frontend])
+            config_devs.append(['device', config_tpmif])
+
 
 def randomMAC():
     """Generate a random MAC address.
@@ -442,7 +499,7 @@
     """
     args = [ 'memmap', 'device_model', 'cdrom',
             'boot', 'fda', 'fdb', 'localtime', 'serial', 'macaddr', 'stdvga', 
-             'isa', 'nographic', 'vnc', 'sdl', 'display']        
+             'isa', 'nographic', 'vnc', 'vncviewer', 'sdl', 'display']   
     for a in args:
        if (vals.__dict__[a]):
            config_devs.append([a, vals.__dict__[a]])
@@ -479,6 +536,8 @@
         config.append(['backend', ['blkif']])
     if vals.netif:
         config.append(['backend', ['netif']])
+    if vals.tpmif:
+        config.append(['backend', ['tpmif']])
     if vals.restart:
         config.append(['restart', vals.restart])
 
@@ -491,6 +550,7 @@
     configure_pci(opts, config_devs, vals)
     configure_vifs(opts, config_devs, vals)
     configure_usb(opts, config_devs, vals)
+    configure_vtpm(opts, config_devs, vals)
     configure_vmx(opts, config_devs, vals)
     config += config_devs
 
@@ -539,6 +599,38 @@
         vifs.append(d)
     vals.vif = vifs
 
+def preprocess_vtpm(opts, vals):
+    if not vals.vtpm: return
+    vtpms = []
+    for vtpm in vals.vtpm:
+        d = {}
+        a = vtpm.split(',')
+        for b in a:
+            (k, v) = b.strip().split('=', 1)
+            k = k.strip()
+            v = v.strip()
+            if k not in ['backend', 'instance']:
+                opts.err('Invalid vtpm specifier: ' + vtpm)
+            d[k] = v
+        vtpms.append(d)
+    vals.vtpm = vtpms
+
+def preprocess_tpmif(opts, vals):
+    if not vals.tpmif: return
+    tpmifs = []
+    for tpmif in vals.tpmif:
+        d = {}
+        a = tpmif.split(',')
+        for b in a:
+            (k, v) = b.strip().split('=', 1)
+            k = k.strip()
+            v = v.strip()
+            if k not in ['frontend']:
+                opts.err('Invalid tpmif specifier: ' + vtpm)
+            d[k] = v
+        tpmifs.append(d)
+    vals.tpmif = tpmifs
+
 def preprocess_ip(opts, vals):
     if vals.ip or vals.dhcp != 'off':
         dummy_nfs_server = '1.2.3.4'
@@ -606,7 +698,7 @@
     """If vnc was specified, spawn a vncviewer in listen mode
     and pass its address to the domain on the kernel command line.
     """
-    if not vals.vnc or vals.dryrun: return
+    if not (vals.vnc and vals.vncviewer) or vals.dryrun: return
     vnc_display = choose_vnc_display()
     if not vnc_display:
         opts.warn("No free vnc display")
@@ -627,6 +719,8 @@
     preprocess_ip(opts, vals)
     preprocess_nfs(opts, vals)
     preprocess_vnc(opts, vals)
+    preprocess_vtpm(opts, vals)
+    preprocess_tpmif(opts, vals)
          
 def make_domain(opts, config):
     """Create, build and start a domain.
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Sep  8 15:18:40 2005
+++ b/tools/python/xen/xm/main.py       Fri Sep  9 16:30:54 2005
@@ -715,9 +715,9 @@
             err("Most commands need root access.  Please try again as root")
             sys.exit(1)
         except XendError, ex:
-            if args[0] == "bogus":
-                args.remove("bogus")
             if len(args) > 0:
+                if args[0] == "bogus":
+                    args.remove("bogus")
                 handle_xend_error(argv[1], args[0], ex)
             else:
                 print "Unexpected error:", sys.exc_info()[0]
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/security/Makefile
--- a/tools/security/Makefile   Thu Sep  8 15:18:40 2005
+++ b/tools/security/Makefile   Fri Sep  9 16:30:54 2005
@@ -45,6 +45,7 @@
        $(MAKE) secpol_xml2bin
        chmod 700 ./setlabel.sh
        chmod 700 ./updategrub.sh
+       chmod 700 ./getlabel.sh
 
 secpol_tool : secpol_tool.c secpol_compat.h
        $(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $<
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/security/secpol_tool.c
--- a/tools/security/secpol_tool.c      Thu Sep  8 15:18:40 2005
+++ b/tools/security/secpol_tool.c      Fri Sep  9 16:30:54 2005
@@ -25,6 +25,7 @@
 #include <stdio.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <getopt.h>
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -40,6 +41,17 @@
 #define PERROR(_m, _a...) \
 fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a ,  \
                 errno, strerror(errno))
+
+void usage(char *progname)
+{
+    printf("Use: %s \n"
+           "\t getpolicy\n"
+           "\t dumpstats\n"
+           "\t loadpolicy <binary policy file>\n"
+           "\t getssid -d <domainid> [-f]\n"
+                  "\t getssid -s <ssidref> [-f]\n", progname);
+    exit(-1);
+}
 
 static inline int do_policycmd(int xc_handle, unsigned int cmd,
                                unsigned long data)
@@ -320,7 +332,7 @@
 
         if (ret)
             printf
-                ("ERROR setting policy. Use 'xm dmesg' to see details.\n");
+                ("ERROR setting policy. Try 'xm dmesg' to see details.\n");
         else
             printf("Successfully changed policy.\n");
 
@@ -370,7 +382,7 @@
 
     if (ret < 0)
     {
-        printf("ERROR dumping policy stats. Use 'xm dmesg' to see details.\n");
+        printf("ERROR dumping policy stats. Try 'xm dmesg' to see details.\n");
         return ret;
     }
     stats = (struct acm_stats_buffer *) stats_buffer;
@@ -421,17 +433,121 @@
     }
     return ret;
 }
+/************************ get ssidref & types ******************************/
+/*
+ * the ssid (types) can be looked up either by domain id or by ssidref
+ */
+int acm_domain_getssid(int xc_handle, int argc, char * const argv[])
+{
+    /* this includes header and a set of types */
+    #define MAX_SSIDBUFFER  2000
+    int ret, i;
+    acm_op_t op;
+    struct acm_ssid_buffer *hdr;
+    unsigned char *buf;
+       int nice_print = 1;
+
+    op.cmd = ACM_GETSSID;
+    op.interface_version = ACM_INTERFACE_VERSION;
+       op.u.getssid.get_ssid_by = UNSET;
+       /* arguments
+          -d ... domain id to look up
+          -s ... ssidref number to look up
+          -f ... formatted print (scripts depend on this format)
+       */
+       while (1)
+    {
+               int c = getopt(argc, argv, "d:s:f");
+               if (c == -1)
+                       break;
+               if (c == 'd')
+        {
+                       if (op.u.getssid.get_ssid_by != UNSET)
+                               usage(argv[0]);
+                       op.u.getssid.get_ssid_by = DOMAINID;
+                       op.u.getssid.id.domainid = strtoul(optarg, NULL, 0);
+               }
+               else if (c== 's')
+        {
+                       if (op.u.getssid.get_ssid_by != UNSET)
+                               usage(argv[0]);
+                       op.u.getssid.get_ssid_by = SSIDREF;
+                       op.u.getssid.id.ssidref = strtoul(optarg, NULL, 0);
+               }
+               else if (c== 'f')
+               {
+                       nice_print = 0;
+               }
+               else
+                       usage(argv[0]);
+       }
+       if (op.u.getssid.get_ssid_by == UNSET)
+               usage(argv[0]);
+
+       buf = malloc(MAX_SSIDBUFFER);
+    if (!buf)
+        return -ENOMEM;
+
+    /* dump it and then push it down into xen/acm */
+    op.u.getssid.ssidbuf = buf;   /* out */
+    op.u.getssid.ssidbuf_size = MAX_SSIDBUFFER;
+    ret = do_acm_op(xc_handle, &op);
+
+    if (ret)
+    {
+        printf("ERROR getting ssidref. Try 'xm dmesg' to see details.\n");
+        goto out;
+    }
+    hdr = (struct acm_ssid_buffer *)buf;
+    if (hdr->len > MAX_SSIDBUFFER)
+    {
+        printf("ERROR: Buffer length inconsistent (ret=%d, hdr->len=%d)!\n",
+               ret, hdr->len);
+            return -EIO;
+    }
+       if (nice_print)
+    {
+               printf("SSID: ssidref = 0x%08x \n", hdr->ssidref);
+               printf("      P: %s, max_types = %d\n",
+                          ACM_POLICY_NAME(hdr->primary_policy_code), 
hdr->primary_max_types);
+               printf("          Types: ");
+               for (i=0; i< hdr->primary_max_types; i++)
+                       if (buf[hdr->primary_types_offset + i])
+                               printf("%02x ", i);
+                       else
+                               printf("-- ");
+               printf("\n");
+
+               printf("      S: %s, max_types = %d\n",
+                          ACM_POLICY_NAME(hdr->secondary_policy_code), 
hdr->secondary_max_types);
+               printf("          Types: ");
+               for (i=0; i< hdr->secondary_max_types; i++)
+                       if (buf[hdr->secondary_types_offset + i])
+                               printf("%02x ", i);
+                       else
+                               printf("-- ");
+               printf("\n");
+       }
+       else
+    {
+               /* formatted print for use with scripts (.sh)
+                *  update scripts when updating here (usually
+                *  used in combination with -d to determine a
+                *  running domain's label
+                */
+               printf("SSID: ssidref = 0x%08x \n", hdr->ssidref);
+       }
+
+    /* return ste ssidref */
+    if (hdr->primary_policy_code == ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)
+        ret = (hdr->ssidref) & 0xffff;
+    else if (hdr->secondary_policy_code == ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)
+        ret = (hdr->ssidref) >> 16;
+ out:
+    return ret;
+}
 
 /***************************** main **************************************/
-
-void usage(char *progname)
-{
-    printf("Use: %s \n"
-           "\t getpolicy\n"
-           "\t dumpstats\n"
-           "\t loadpolicy <binary policy file>\n", progname);
-    exit(-1);
-}
 
 int main(int argc, char **argv)
 {
@@ -459,6 +575,8 @@
         if (argc != 2)
             usage(argv[0]);
         ret = acm_domain_dumpstats(acm_cmd_fd);
+    } else if (!strcmp(argv[1], "getssid")) {
+        ret = acm_domain_getssid(acm_cmd_fd, argc, argv);
     } else
         usage(argv[0]);
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/security/setlabel.sh
--- a/tools/security/setlabel.sh        Thu Sep  8 15:18:40 2005
+++ b/tools/security/setlabel.sh        Fri Sep  9 16:30:54 2005
@@ -34,275 +34,27 @@
        exec sh -c "bash $0 $*"
 fi
 
+export PATH=$PATH:.
+source labelfuncs.sh
 
 usage ()
 {
-       echo "Usage: $0 [Option] <vmfile> <label> <policy name> "
-       echo "    or $0 -l <policy name>"
+       echo "Usage: $0 [Option] <vmfile> <label> [<policy name>]"
+       echo "    or $0 -l [<policy name>]"
        echo ""
-       echo "Valid Options are:"
+       echo "Valid options are:"
        echo "-r          : to relabel a file without being prompted"
        echo ""
        echo "vmfile      : XEN vm configuration file"
-       echo "label       : the label to map"
+       echo "label       : the label to map to an ssidref"
        echo "policy name : the name of the policy, i.e. 'chwall'"
+       echo "              If the policy name is omitted, it is attempted"
+       echo "              to find the current policy's name in grub.conf."
        echo ""
-       echo "-l <policy name> is used to show valid labels in the map file"
+       echo "-l [<policy name>] is used to show valid labels in the map file 
of"
+       echo "                   the given or current policy."
        echo ""
 }
-
-
-findMapFile ()
-{
-       mapfile="./$1.map"
-       if [ -r "$mapfile" ]; then
-               return 1
-       fi
-
-       mapfile="./policies/$1/$1.map"
-       if [ -r "$mapfile" ]; then
-               return 1
-       fi
-
-       return 0
-}
-
-showLabels ()
-{
-       mapfile=$1
-       if [ ! -r "$mapfile" -o "$mapfile" == "" ]; then
-               echo "Cannot read from vm configuration file $vmfile."
-               return -1
-       fi
-
-       getPrimaryPolicy $mapfile
-       getSecondaryPolicy $mapfile
-
-       echo "The following labels are available:"
-       let line=1
-       while [ 1 ]; do
-               ITEM=`cat $mapfile |         \
-                     awk -vline=$line       \
-                         -vprimary=$primary \
-                     '{                     \
-                        if ($1 == "LABEL->SSID" &&  \
-                            $2 == "VM" &&           \
-                            $3 == primary ) {       \
-                          ctr++;                    \
-                          if (ctr == line) {        \
-                            print $4;               \
-                          }                         \
-                        }                           \
-                      } END {                       \
-                      }'`
-
-               if [ "$ITEM" == "" ]; then
-                       break
-               fi
-               if [ "$secondary" != "NULL" ]; then
-                       LABEL=`cat $mapfile |     \
-                              awk -vitem=$ITEM   \
-                              '{
-                                 if ($1 == "LABEL->SSID" && \
-                                     $2 == "VM" &&          \
-                                     $3 == "CHWALL" &&      \
-                                     $4 == item ) {         \
-                                   result = item;           \
-                                 }                          \
-                               } END {                      \
-                                   print result             \
-                               }'`
-               else
-                       LABEL=$ITEM
-               fi
-
-               if [ "$LABEL" != "" ]; then
-                       echo "$LABEL"
-                       found=1
-               fi
-               let line=line+1
-       done
-       if [ "$found" != "1" ]; then
-               echo "No labels found."
-       fi
-}
-
-getPrimaryPolicy ()
-{
-       mapfile=$1
-       primary=`cat $mapfile  |   \
-                awk '             \
-                 {                \
-                   if ( $1 == "PRIMARY" ) { \
-                     res=$2;                \
-                   }                        \
-                 } END {                    \
-                   print res;               \
-                 } '`
-}
-
-getSecondaryPolicy ()
-{
-       mapfile=$1
-       secondary=`cat $mapfile  |   \
-                awk '             \
-                 {                \
-                   if ( $1 == "SECONDARY" ) { \
-                     res=$2;                \
-                   }                        \
-                 } END {                    \
-                   print res;               \
-                 } '`
-}
-
-
-getDefaultSsid ()
-{
-       mapfile=$1
-       pol=$2
-       RES=`cat $mapfile    \
-            awk -vpol=$pol  \
-             {              \
-               if ($1 == "LABEL->SSID" && \
-                   $2 == "ANY"         && \
-                   $3 == pol           && \
-                   $4 == "DEFAULT"       ) {\
-                     res=$5;                \
-               }                            \
-             } END {                        \
-               printf "%04x", strtonum(res) \
-            }'`
-       echo "default NULL mapping is $RES"
-       defaultssid=$RES
-}
-
-relabel ()
-{
-       vmfile=$1
-       label=$2
-       mapfile=$3
-       mode=$4
-
-       if [ ! -r "$vmfile" ]; then
-               echo "Cannot read from vm configuration file $vmfile."
-               return -1
-       fi
-
-       if [ ! -w "$vmfile" ]; then
-               echo "Cannot write to vm configuration file $vmfile."
-               return -1
-       fi
-
-       if [ ! -r "$mapfile" ] ; then
-               echo "Cannot read mapping file $mapfile."
-               return -1
-       fi
-
-       # Determine which policy is primary, which sec.
-       getPrimaryPolicy $mapfile
-       getSecondaryPolicy $mapfile
-
-       # Calculate the primary policy's SSIDREF
-       if [ "$primary" == "NULL" ]; then
-               SSIDLO="0000"
-       else
-               SSIDLO=`cat $mapfile |                    \
-                       awk -vlabel=$label                \
-                           -vprimary=$primary            \
-                          '{                             \
-                             if ( $1 == "LABEL->SSID" && \
-                                  $2 == "VM" &&          \
-                                  $3 == primary  &&      \
-                                  $4 == label ) {        \
-                               result=$5                 \
-                             }                           \
-                          } END {                        \
-                            if (result != "" )           \
-                              {printf "%04x", strtonum(result)}\
-                          }'`
-       fi
-
-       # Calculate the secondary policy's SSIDREF
-       if [ "$secondary" == "NULL" ]; then
-               SSIDHI="0000"
-       else
-               SSIDHI=`cat $mapfile |                    \
-                       awk -vlabel=$label                \
-                           -vsecondary=$secondary        \
-                          '{                             \
-                             if ( $1 == "LABEL->SSID" && \
-                                  $2 == "VM"          && \
-                                  $3 == secondary     && \
-                                  $4 == label ) {        \
-                               result=$5                 \
-                             }                           \
-                           }  END {                      \
-                             if (result != "" )          \
-                               {printf "%04x", strtonum(result)}\
-                           }'`
-       fi
-
-       if [ "$SSIDLO" == "" -o \
-            "$SSIDHI" == "" ]; then
-               echo "Could not map the given label '$label'."
-               return -1
-       fi
-
-       ACM_POLICY=`cat $mapfile |             \
-           awk ' { if ( $1 == "POLICY" ) {    \
-                     result=$2                \
-                   }                          \
-                 }                            \
-                 END {                        \
-                   if (result != "") {        \
-                     printf result            \
-                   }                          \
-                 }'`
-
-       if [ "$ACM_POLICY" == "" ]; then
-               echo "Could not find 'POLICY' entry in map file."
-               return -1
-       fi
-
-       SSIDREF="0x$SSIDHI$SSIDLO"
-
-       if [ "$mode" != "relabel" ]; then
-               RES=`cat $vmfile |  \
-                    awk '{         \
-                      if ( substr($1,0,7) == "ssidref" ) {\
-                        print $0;             \
-                      }                       \
-                    }'`
-               if [ "$RES" != "" ]; then
-                       echo "Do you want to overwrite the existing mapping 
($RES)? (y/N)"
-                       read user
-                       if [ "$user" != "y" -a "$user" != "Y" ]; then
-                               echo "Aborted."
-                               return 0
-                       fi
-               fi
-       fi
-
-       #Write the output
-       vmtmp1="/tmp/__setlabel.tmp1"
-       vmtmp2="/tmp/__setlabel.tmp2"
-       touch $vmtmp1
-       touch $vmtmp2
-       if [ ! -w "$vmtmp1" -o ! -w "$vmtmp2" ]; then
-               echo "Cannot create temporary files. Aborting."
-               return -1
-       fi
-       RES=`sed -e '/^#ACM_POLICY/d' $vmfile > $vmtmp1`
-       RES=`sed -e '/^#ACM_LABEL/d' $vmtmp1 > $vmtmp2`
-       RES=`sed -e '/^ssidref/d' $vmtmp2 > $vmtmp1`
-       echo "#ACM_POLICY=$ACM_POLICY" >> $vmtmp1
-       echo "#ACM_LABEL=$label" >> $vmtmp1
-       echo "ssidref = $SSIDREF" >> $vmtmp1
-       mv -f $vmtmp1 $vmfile
-       rm -rf $vmtmp1 $vmtmp2
-       echo "Mapped label '$label' to ssidref '$SSIDREF'."
-}
-
 
 
 if [ "$1" == "-r" ]; then
@@ -317,10 +69,25 @@
 
 if [ "$mode" == "show" ]; then
        if [ "$1" == "" ]; then
-               usage
-               exit -1;
+               findGrubConf
+               ret=$?
+               if [ $ret -eq 0 ]; then
+                       echo "Could not find grub.conf"
+                       exit -1;
+               fi
+               findPolicyInGrub $grubconf
+               if [ "$policy" != "" ]; then
+                       echo "Assuming policy to be '$policy'.";
+               else
+                       echo "Could not find policy."
+                       exit -1;
+               fi
+       else
+               policy=$3;
        fi
-       findMapFile $1
+
+
+       findMapFile $policy
        res=$?
        if [ "$res" != "0" ]; then
                showLabels $mapfile
@@ -330,11 +97,29 @@
 elif [ "$mode" == "usage" ]; then
        usage
 else
+       if [ "$2" == "" ]; then
+               usage
+               exit -1
+       fi
        if [ "$3" == "" ]; then
-               usage
-               exit -1;
+               findGrubConf
+               ret=$?
+               if [ $ret -eq 0 ]; then
+                       echo "Could not find grub.conf"
+                       exit -1;
+               fi
+               findPolicyInGrub $grubconf
+               if [ "$policy" != "" ]; then
+                       echo "Assuming policy to be '$policy'.";
+               else
+                       echo "Could not find policy."
+                       exit -1;
+               fi
+
+       else
+               policy=$3;
        fi
-       findMapFile $3
+       findMapFile $policy
        res=$?
        if [ "$res" != "0" ]; then
                relabel $1 $2 $mapfile $mode
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcutils/Makefile
--- a/tools/xcutils/Makefile    Thu Sep  8 15:18:40 2005
+++ b/tools/xcutils/Makefile    Fri Sep  9 16:30:54 2005
@@ -18,8 +18,6 @@
 PROGRAMS_INSTALL_DIR   = /usr/libexec/xen
 
 INCLUDES += -I $(XEN_LIBXC)
-
-CC := gcc
 
 CFLAGS += -Wall -Werror -O3 -fno-strict-aliasing
 CFLAGS += $(INCLUDES)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c        Thu Sep  8 15:18:40 2005
+++ b/tools/xcutils/xc_restore.c        Fri Sep  9 16:30:54 2005
@@ -17,22 +17,27 @@
 int
 main(int argc, char **argv)
 {
-    unsigned int xc_fd, io_fd, domid, nr_pfns, evtchn;
+    unsigned int xc_fd, io_fd, domid, nr_pfns, store_evtchn, console_evtchn;
     int ret;
-    unsigned long mfn;
+    unsigned long store_mfn, console_mfn;
 
-    if (argc != 6)
-       errx(1, "usage: %s xcfd iofd domid nr_pfns evtchn", argv[0]);
+    if (argc != 7)
+       errx(1,
+            "usage: %s xcfd iofd domid nr_pfns store_evtchn console_evtchn",
+            argv[0]);
 
     xc_fd = atoi(argv[1]);
     io_fd = atoi(argv[2]);
     domid = atoi(argv[3]);
     nr_pfns = atoi(argv[4]);
-    evtchn = atoi(argv[5]);
+    store_evtchn = atoi(argv[5]);
+    console_evtchn = atoi(argv[6]);
 
-    ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, evtchn, &mfn);
+    ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
+                          &store_mfn, console_evtchn, &console_mfn);
     if (ret == 0) {
-       printf("store-mfn %li\n", mfn);
+       printf("store-mfn %li\n", store_mfn);
+       printf("console-mfn %li\n", console_mfn);
        fflush(stdout);
     }
     return ret;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c   Thu Sep  8 15:18:40 2005
+++ b/tools/xcutils/xc_save.c   Fri Sep  9 16:30:54 2005
@@ -17,14 +17,17 @@
 int
 main(int argc, char **argv)
 {
-    unsigned int xc_fd, io_fd, domid;
+    unsigned int xc_fd, io_fd, domid, maxit, max_f, flags; 
 
-    if (argc != 4)
-       errx(1, "usage: %s xcfd iofd domid", argv[0]);
+    if (argc != 7)
+       errx(1, "usage: %s xcfd iofd domid maxit maxf flags", argv[0]);
 
     xc_fd = atoi(argv[1]);
     io_fd = atoi(argv[2]);
     domid = atoi(argv[3]);
+    maxit = atoi(argv[4]);
+    max_f = atoi(argv[5]);
+    flags = atoi(argv[6]);
 
-    return xc_linux_save(xc_fd, io_fd, domid);
+    return xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags);
 }
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstat/Makefile
--- a/tools/xenstat/Makefile    Thu Sep  8 15:18:40 2005
+++ b/tools/xenstat/Makefile    Fri Sep  9 16:30:54 2005
@@ -3,7 +3,11 @@
 
 SUBDIRS :=
 SUBDIRS += libxenstat
+
+# This doesn't cross-compile (cross-compile environments rarely have curses)
+ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
 SUBDIRS += xentop
+endif
 
 .PHONY: all install clean
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Thu Sep  8 15:18:40 2005
+++ b/tools/xenstore/Makefile   Fri Sep  9 16:30:54 2005
@@ -8,15 +8,16 @@
 INSTALL_DIR     = $(INSTALL) -d -m0755
 
 PROFILE=#-pg
-BASECFLAGS=-Wall -W -g 
+BASECFLAGS=-Wall -W -g -Werror
 # Make gcc generate dependencies.
 BASECFLAGS += -Wp,-MD,.$(@F).d
 PROG_DEP = .*.d
-#BASECFLAGS+= -O3 $(PROFILE)
+BASECFLAGS+= -O3 $(PROFILE)
 #BASECFLAGS+= -I$(XEN_ROOT)/tools
 BASECFLAGS+= -I$(XEN_ROOT)/tools/libxc
 BASECFLAGS+= -I$(XEN_ROOT)/xen/include/public
 BASECFLAGS+= -I.
+BASECFLAGS+= -I$(XEN_ROOT)/linux-2.6-xen-sparse/include/asm-xen/linux-public
 
 CFLAGS  += $(BASECFLAGS)
 LDFLAGS += $(PROFILE) -L$(XEN_LIBXC)
@@ -24,15 +25,22 @@
 TESTFLAGS= -DTESTING
 TESTENV  = XENSTORED_ROOTDIR=$(TESTDIR) XENSTORED_RUNDIR=$(TESTDIR)
 
-all: xen xenstored libxenstore.so
+CLIENTS := xenstore-read xenstore-rm xenstore-write
+CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS))
 
-testcode: xen xs_test xenstored_test xs_random xs_dom0_test
+all: libxenstore.so xenstored $(CLIENTS)
 
-xen:
-       ln -sf $(XEN_ROOT)/xen/include/public $@
+testcode: xs_test xenstored_test xs_random xs_dom0_test
 
 xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o 
xenstored_transaction.o xs_lib.o talloc.o utils.o
        $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
+
+$(CLIENTS): libxenstore.so
+$(CLIENTS): xenstore-%: xenstore_%.o
+       $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -L. -lxenstore -o $@
+
+$(CLIENTS_OBJS): xenstore_%.o: xenstore_client.c
+       $(COMPILE.c) -DCLIENT_$(*F) -o $@ $<
 
 xenstored_test: xenstored_core_test.o xenstored_watch_test.o 
xenstored_domain_test.o xenstored_transaction_test.o xs_lib.o talloc_test.o 
fake_libxc.o utils.o
        $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
@@ -58,7 +66,7 @@
 
 clean: testsuite-clean
        rm -f *.o *.opic *.so
-       rm -f xen xenstored xs_random xs_stress xs_crashme
+       rm -f xenstored xs_random xs_stress xs_crashme
        rm -f xs_test xenstored_test xs_dom0_test
        $(RM) $(PROG_DEP)
 
@@ -72,10 +80,10 @@
 
 fullcheck: testsuite-run randomcheck stresstest
 
-testsuite-run: xen xenstored_test xs_test
+testsuite-run: xenstored_test xs_test
        $(TESTENV) testsuite/test.sh && echo
 
-testsuite-fast: xen xenstored_test xs_test
+testsuite-fast: xenstored_test xs_test
        @$(TESTENV) testsuite/test.sh --fast
 
 testsuite-clean:
@@ -111,12 +119,14 @@
 tarball: clean
        cd .. && tar -c -j -v -h -f xenstore.tar.bz2 xenstore/
 
-install: xenstored libxenstore.so
+install: libxenstore.so xenstored $(CLIENTS)
        $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored
        $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored
+       $(INSTALL_DIR) -p $(DESTDIR)/usr/bin
        $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
        $(INSTALL_DIR) -p $(DESTDIR)/usr/include
        $(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin
+       $(INSTALL_PROG) $(CLIENTS) $(DESTDIR)/usr/bin
        $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
        $(INSTALL_DATA) libxenstore.so $(DESTDIR)/usr/$(LIBDIR)
        $(INSTALL_DATA) xs.h $(DESTDIR)/usr/include
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored.h
--- a/tools/xenstore/xenstored.h        Thu Sep  8 15:18:40 2005
+++ b/tools/xenstore/xenstored.h        Fri Sep  9 16:30:54 2005
@@ -42,7 +42,7 @@
        XS_OP_READ_ONLY = XS_TRANSACTION_END,
        XS_INTRODUCE,
        XS_RELEASE,
-       XS_GETDOMAINPATH,
+       XS_GET_DOMAIN_PATH,
        XS_WRITE,
        XS_MKDIR,
        XS_RM,
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Thu Sep  8 15:18:40 2005
+++ b/tools/xenstore/xenstored_core.c   Fri Sep  9 16:30:54 2005
@@ -49,6 +49,8 @@
 #include "xenstored_watch.h"
 #include "xenstored_transaction.h"
 #include "xenstored_domain.h"
+#include "xenctrl.h"
+#include "xen/io/domain_controller.h"
 
 static bool verbose;
 LIST_HEAD(connections);
@@ -140,7 +142,7 @@
        case XS_TRANSACTION_END: return "TRANSACTION_END";
        case XS_INTRODUCE: return "INTRODUCE";
        case XS_RELEASE: return "RELEASE";
-       case XS_GETDOMAINPATH: return "GETDOMAINPATH";
+       case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
        case XS_WRITE: return "WRITE";
        case XS_MKDIR: return "MKDIR";
        case XS_RM: return "RM";
@@ -719,7 +721,7 @@
        char *slash = strrchr(node + 1, '/');
        if (!slash)
                return talloc_strdup(node, "/");
-       return talloc_asprintf(node, "%.*s", slash - node, node);
+       return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
 }
 
 static enum xs_perm_type perm_for_id(domid_t id,
@@ -828,6 +830,15 @@
        return false;
 }
 
+bool check_event_node(const char *node)
+{
+       if (!node || !strstarts(node, "@")) {
+               errno = EINVAL;
+               return false;
+       }
+       return true;
+}
+
 static void send_directory(struct connection *conn, const char *node)
 {
        char *path, *reply;
@@ -901,7 +912,7 @@
        if (slash)
                *slash = '\0';
 
-       dest = talloc_asprintf(dir, "%.*s", dot - dir, dir);
+       dest = talloc_asprintf(dir, "%.*s", (int)(dot - dir), dir);
        return rename(dir, dest) == 0;
 }
 
@@ -1277,7 +1288,7 @@
                do_release(conn, onearg(in));
                break;
 
-       case XS_GETDOMAINPATH:
+       case XS_GET_DOMAIN_PATH:
                do_get_domain_path(conn, onearg(in));
                break;
 
@@ -1295,8 +1306,12 @@
 
 static void consider_message(struct connection *conn)
 {
-       struct buffered_data *in = NULL;
-       enum xsd_sockmsg_type type = conn->in->hdr.msg.type;
+       /*
+        * 'volatile' qualifier prevents register allocation which fixes:
+        *   warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork'
+        */
+       struct buffered_data *volatile in = NULL;
+       enum xsd_sockmsg_type volatile type = conn->in->hdr.msg.type;
        jmp_buf talloc_fail;
 
        assert(conn->state == OK);
@@ -1434,7 +1449,11 @@
 
 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
 {
-       struct connection *new;
+       /*
+        * 'volatile' qualifier prevents register allocation which fixes:
+        *   warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork'
+        */
+       struct connection *volatile new;
        jmp_buf talloc_fail;
 
        new = talloc(talloc_autofree_context(), struct connection);
@@ -1628,12 +1647,13 @@
 }
 
 
-static struct option options[] = { { "no-fork", 0, NULL, 'N' },
-                                  { "verbose", 0, NULL, 'V' },
-                                  { "output-pid", 0, NULL, 'P' },
-                                  { "trace-file", 1, NULL, 'T' },
-                                  { "pid-file", 1, NULL, 'F' },
-                                  { NULL, 0, NULL, 0 } };
+static struct option options[] = {
+       { "pid-file", 1, NULL, 'F' },
+       { "no-fork", 0, NULL, 'N' },
+       { "output-pid", 0, NULL, 'P' },
+       { "trace-file", 1, NULL, 'T' },
+       { "verbose", 0, NULL, 'V' },
+       { NULL, 0, NULL, 0 } };
 
 int main(int argc, char *argv[])
 {
@@ -1644,13 +1664,14 @@
        bool outputpid = false;
        const char *pidfile = NULL;
 
-       while ((opt = getopt_long(argc, argv, "DVT:", options, NULL)) != -1) {
+       while ((opt = getopt_long(argc, argv, "F:NPT:V", options,
+                                 NULL)) != -1) {
                switch (opt) {
+               case 'F':
+                       pidfile = optarg;
+                       break;
                case 'N':
                        dofork = false;
-                       break;
-               case 'V':
-                       verbose = true;
                        break;
                case 'P':
                        outputpid = true;
@@ -1662,8 +1683,9 @@
                                            optarg);
                         write(tracefd, "\n***\n", strlen("\n***\n"));
                        break;
-               case 'F':
-                       pidfile = optarg;
+               case 'V':
+                       verbose = true;
+                       break;
                }
        }
        if (optind != argc)
@@ -1812,6 +1834,7 @@
                /* If transactions ended, we might be able to do more work. */
                unblock_connections();
 
-               max = initialize_set(&inset, &outset, *sock,*ro_sock,event_fd);
-       }
-}
+               max = initialize_set(&inset, &outset, *sock, *ro_sock,
+                                    event_fd);
+       }
+}
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h   Thu Sep  8 15:18:40 2005
+++ b/tools/xenstore/xenstored_core.h   Fri Sep  9 16:30:54 2005
@@ -133,6 +133,9 @@
 bool check_node_perms(struct connection *conn, const char *node,
                      enum xs_perm_type perm);
 
+/* Check if node is an event node. */
+bool check_event_node(const char *node);
+
 /* Path to this node outside transaction. */
 char *node_dir_outside_transaction(const char *node);
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Thu Sep  8 15:18:40 2005
+++ b/tools/xenstore/xenstored_domain.c Fri Sep  9 16:30:54 2005
@@ -1,4 +1,4 @@
-/* 
+/*
     Domain communications for Xen Store Daemon.
     Copyright (C) 2005 Rusty Russell IBM Corporation
 
@@ -33,10 +33,12 @@
 #include "talloc.h"
 #include "xenstored_core.h"
 #include "xenstored_domain.h"
+#include "xenstored_watch.h"
 #include "xenstored_test.h"
 
 static int *xc_handle;
 static int eventchn_fd;
+static int virq_port;
 static unsigned int ringbuf_datasize;
 
 struct domain
@@ -216,17 +218,6 @@
        return 0;
 }
 
-static struct domain *find_domain(u16 port)
-{
-       struct domain *i;
-
-       list_for_each_entry(i, &domains, list) {
-               if (i->port == port)
-                       return i;
-       }
-       return NULL;
-}
-
 /* We scan all domains rather than use the information given here. */
 void handle_event(int event_fd)
 {
@@ -234,6 +225,10 @@
 
        if (read(event_fd, &port, sizeof(port)) != sizeof(port))
                barf_perror("Failed to read from event fd");
+
+       if (port == virq_port)
+               domain_cleanup();
+
 #ifndef TESTING
        if (write(event_fd, &port, sizeof(port)) != sizeof(port))
                barf_perror("Failed to write to event fd");
@@ -319,6 +314,9 @@
 
        /* Now domain belongs to its connection. */
        talloc_steal(domain->conn, domain);
+
+       fire_watches(conn, "@introduceDomain", false);
+
        send_ack(conn, XS_INTRODUCE);
 }
 
@@ -367,7 +365,30 @@
        }
 
        talloc_free(domain->conn);
+
+       fire_watches(NULL, "@releaseDomain", false);
+
        send_ack(conn, XS_RELEASE);
+}
+
+void domain_cleanup(void)
+{
+       xc_dominfo_t dominfo;
+       struct domain *domain, *tmp;
+       int released = 0;
+
+       list_for_each_entry_safe(domain, tmp, &domains, list) {
+               if (xc_domain_getinfo(*xc_handle, domain->domid, 1,
+                                     &dominfo) == 1 &&
+                   dominfo.domid == domain->domid &&
+                   !dominfo.dying && !dominfo.crashed && !dominfo.shutdown)
+                       continue;
+               talloc_free(domain->conn);
+               released++;
+       }
+
+       if (released)
+               fire_watches(NULL, "@releaseDomain", false);
 }
 
 void do_get_domain_path(struct connection *conn, const char *domid_str)
@@ -386,10 +407,10 @@
        else
                domain = find_domain_by_domid(domid);
 
-       if (!domain) 
+       if (!domain)
                send_error(conn, ENOENT);
        else
-               send_reply(conn, XS_GETDOMAINPATH, domain->path,
+               send_reply(conn, XS_GET_DOMAIN_PATH, domain->path,
                           strlen(domain->path) + 1);
 }
 
@@ -412,26 +433,55 @@
 {
 }
 
+#define EVTCHN_DEV_NAME  "/dev/xen/evtchn"
+#define EVTCHN_DEV_MAJOR 10
+#define EVTCHN_DEV_MINOR 201
+
 /* Returns the event channel handle. */
 int domain_init(void)
 {
+       struct stat st;
+
        /* The size of the ringbuffer: half a page minus head structure. */
        ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head);
 
        xc_handle = talloc(talloc_autofree_context(), int);
        if (!xc_handle)
                barf_perror("Failed to allocate domain handle");
+
        *xc_handle = xc_interface_open();
        if (*xc_handle < 0)
                barf_perror("Failed to open connection to hypervisor");
+
        talloc_set_destructor(xc_handle, close_xc_handle);
 
 #ifdef TESTING
        eventchn_fd = fake_open_eventchn();
 #else
-       eventchn_fd = open("/dev/xen/evtchn", O_RDWR);
+       /* Make sure any existing device file links to correct device. */
+       if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
+           (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
+               (void)unlink(EVTCHN_DEV_NAME);
+
+ reopen:
+       eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
+       if (eventchn_fd == -1) {
+               if ((errno == ENOENT) &&
+                   ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
+                   (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
+                          makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0))
+                       goto reopen;
+               return -errno;
+       }
 #endif
        if (eventchn_fd < 0)
-               barf_perror("Failed to open connection to hypervisor");
+               barf_perror("Failed to open evtchn device");
+
+       if (xc_evtchn_bind_virq(*xc_handle, VIRQ_DOM_EXC, &virq_port))
+               barf_perror("Failed to bind to domain exception virq");
+
+       if (ioctl(eventchn_fd, EVENTCHN_BIND, virq_port) != 0)
+               barf_perror("Failed to bind to domain exception virq port");
+
        return eventchn_fd;
 }
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored_domain.h
--- a/tools/xenstore/xenstored_domain.h Thu Sep  8 15:18:40 2005
+++ b/tools/xenstore/xenstored_domain.h Fri Sep  9 16:30:54 2005
@@ -28,6 +28,10 @@
 /* domid */
 void do_release(struct connection *conn, const char *domid_str);
 
+/* Enumerate domains and release connections for non-existant or dying
+ * domains. */
+void domain_cleanup(void);
+
 /* domid */
 void do_get_domain_path(struct connection *conn, const char *domid_str);
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored_watch.c
--- a/tools/xenstore/xenstored_watch.c  Thu Sep  8 15:18:40 2005
+++ b/tools/xenstore/xenstored_watch.c  Fri Sep  9 16:30:54 2005
@@ -103,7 +103,8 @@
        /* Check read permission: no permission, no watch event.
         * If it doesn't exist, we need permission to read parent.
         */
-       if (!check_node_perms(conn, node, XS_PERM_READ|XS_PERM_ENOENT_OK)) {
+       if (!check_node_perms(conn, node, XS_PERM_READ|XS_PERM_ENOENT_OK) &&
+           !check_event_node(node)) {
                fprintf(stderr, "No permission for %s\n", node);
                return;
        }
@@ -131,7 +132,7 @@
        struct watch *watch;
 
        /* During transactions, don't fire watches. */
-       if (conn->transaction)
+       if (conn && conn->transaction)
                return;
 
        /* Create an event for each watch.  Don't send to self. */
@@ -213,11 +214,16 @@
                return;
        }
 
-       relative = !strstarts(vec[0], "/");
-       vec[0] = canonicalize(conn, vec[0]);
-       if (!is_valid_nodename(vec[0])) {
-               send_error(conn, errno);
-               return;
+       if (strstarts(vec[0], "@")) {
+               relative = false;
+               /* check if valid event */
+       } else {
+               relative = !strstarts(vec[0], "/");
+               vec[0] = canonicalize(conn, vec[0]);
+               if (!is_valid_nodename(vec[0])) {
+                       send_error(conn, errno);
+                       return;
+               }
        }
 
        watch = talloc(conn, struct watch);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xs.c
--- a/tools/xenstore/xs.c       Thu Sep  8 15:18:40 2005
+++ b/tools/xenstore/xs.c       Fri Sep  9 16:30:54 2005
@@ -31,14 +31,17 @@
 #include <signal.h>
 #include <stdint.h>
 #include <errno.h>
+#include <sys/ioctl.h>
 #include "xs.h"
 #include "xenstored.h"
 #include "xs_lib.h"
 #include "utils.h"
+#include "xenbus_dev.h"
 
 struct xs_handle
 {
        int fd;
+       enum { SOCK, DEV } type;
 };
 
 /* Get the socket from the store daemon handle.
@@ -65,17 +68,39 @@
                h = malloc(sizeof(*h));
                if (h) {
                        h->fd = sock;
+                       h->type = SOCK;
                        return h;
                }
        }
 
        saved_errno = errno;
        close(sock);
-       free(h);
        errno = saved_errno;
        return NULL;
 }
 
+static struct xs_handle *get_dev(const char *connect_to)
+{
+       int fd, saved_errno;
+       struct xs_handle *h = NULL;
+
+       fd = open(connect_to, O_RDONLY);
+       if (fd < 0)
+               return NULL;
+
+       h = malloc(sizeof(*h));
+       if (h) {
+               h->fd = fd;
+               h->type = DEV;
+               return h;
+       }
+
+       saved_errno = errno;
+       close(fd);
+       errno = saved_errno;
+       return NULL;
+}
+
 struct xs_handle *xs_daemon_open(void)
 {
        return get_socket(xs_daemon_socket());
@@ -84,6 +109,11 @@
 struct xs_handle *xs_daemon_open_readonly(void)
 {
        return get_socket(xs_daemon_socket_ro());
+}
+
+struct xs_handle *xs_domain_open(void)
+{
+       return get_dev(xs_domain_dev());
 }
 
 void xs_daemon_close(struct xs_handle *h)
@@ -160,9 +190,9 @@
 }
 
 /* Send message to xs, get malloc'ed reply.  NULL and set errno on error. */
-static void *xs_talkv(struct xs_handle *h, enum xsd_sockmsg_type type,
-                     const struct iovec *iovec, unsigned int num_vecs,
-                     unsigned int *len)
+static void *xs_talkv_sock(struct xs_handle *h, enum xsd_sockmsg_type type,
+                          const struct iovec *iovec, unsigned int num_vecs,
+                          unsigned int *len)
 {
        struct xsd_sockmsg msg;
        void *ret = NULL;
@@ -220,6 +250,54 @@
        close(h->fd);
        h->fd = -1;
        errno = saved_errno;
+       return NULL;
+}
+
+/* Send message to xs, get malloc'ed reply.  NULL and set errno on error. */
+static void *xs_talkv_dev(struct xs_handle *h, enum xsd_sockmsg_type type,
+                         const struct iovec *iovec, unsigned int num_vecs,
+                         unsigned int *len)
+{
+       struct xenbus_dev_talkv dt;
+       char *buf;
+       int err, buflen = 1024;
+
+ again:
+       buf = malloc(buflen);
+       if (buf == NULL) {
+               errno = ENOMEM;
+               return NULL;
+       }
+       dt.type = type;
+       dt.iovec = (struct kvec *)iovec;
+       dt.num_vecs = num_vecs;
+       dt.buf = buf;
+       dt.len = buflen;
+       err = ioctl(h->fd, IOCTL_XENBUS_DEV_TALKV, &dt);
+       if (err < 0) {
+               free(buf);
+               errno = err;
+               return NULL;
+       }
+       if (err > buflen) {
+               free(buf);
+               buflen = err;
+               goto again;
+       }
+       if (len)
+               *len = err;
+       return buf;
+}
+
+/* Send message to xs, get malloc'ed reply.  NULL and set errno on error. */
+static void *xs_talkv(struct xs_handle *h, enum xsd_sockmsg_type type,
+                     const struct iovec *iovec, unsigned int num_vecs,
+                     unsigned int *len)
+{
+       if (h->type == SOCK)
+               return xs_talkv_sock(h, type, iovec, num_vecs, len);
+       if (h->type == DEV)
+               return xs_talkv_dev(h, type, iovec, num_vecs, len);
        return NULL;
 }
 
@@ -535,13 +613,23 @@
        return xs_bool(xs_single(h, XS_RELEASE, domid_str, NULL));
 }
 
+char *xs_get_domain_path(struct xs_handle *h, domid_t domid)
+{
+       char domid_str[MAX_STRLEN(domid)];
+
+       sprintf(domid_str, "%u", domid);
+
+       return xs_single(h, XS_GET_DOMAIN_PATH, domid_str, NULL);
+}
+
 bool xs_shutdown(struct xs_handle *h)
 {
        bool ret = xs_bool(xs_single(h, XS_SHUTDOWN, "", NULL));
        if (ret) {
                char c;
                /* Wait for it to actually shutdown. */
-               read(h->fd, &c, 1);
+               while ((read(h->fd, &c, 1) < 0) && (errno == EINTR))
+                       continue;
        }
        return ret;
 }
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xs.h
--- a/tools/xenstore/xs.h       Thu Sep  8 15:18:40 2005
+++ b/tools/xenstore/xs.h       Fri Sep  9 16:30:54 2005
@@ -30,6 +30,7 @@
  * Returns a handle or NULL.
  */
 struct xs_handle *xs_daemon_open(void);
+struct xs_handle *xs_domain_open(void);
 
 /* Connect to the xs daemon (readonly for non-root clients).
  * Returns a handle or NULL.
@@ -133,6 +134,10 @@
  */
 bool xs_release_domain(struct xs_handle *h, domid_t domid);
 
+/* Query the home path of a domain.
+ */
+char *xs_get_domain_path(struct xs_handle *h, domid_t domid);
+
 /* Only useful for DEBUG versions */
 char *xs_debug_command(struct xs_handle *h, const char *cmd,
                       void *data, unsigned int len);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xs_lib.c
--- a/tools/xenstore/xs_lib.c   Thu Sep  8 15:18:40 2005
+++ b/tools/xenstore/xs_lib.c   Fri Sep  9 16:30:54 2005
@@ -64,6 +64,12 @@
        static char buf[PATH_MAX];
        sprintf(buf, "%s/transactions", xs_daemon_rootdir());
        return buf;
+}
+
+const char *xs_domain_dev(void)
+{
+       char *s = getenv("XENSTORED_DOMAIN_DEV");
+       return (s ? s : "/proc/xen/xenbus");
 }
 
 /* Simple routines for writing to sockets, etc. */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xs_lib.h
--- a/tools/xenstore/xs_lib.h   Thu Sep  8 15:18:40 2005
+++ b/tools/xenstore/xs_lib.h   Fri Sep  9 16:30:54 2005
@@ -48,6 +48,7 @@
 const char *xs_daemon_socket_ro(void);
 const char *xs_daemon_store(void);
 const char *xs_daemon_transactions(void);
+const char *xs_domain_dev(void);
 
 /* Simple write function: loops for you. */
 bool xs_write_all(int fd, const void *data, unsigned int len);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xentrace/formats
--- a/tools/xentrace/formats    Thu Sep  8 15:18:40 2005
+++ b/tools/xentrace/formats    Fri Sep  9 16:30:54 2005
@@ -15,3 +15,7 @@
 0x00080001     CPU%(cpu)d      %(tsc)d         VMX_VMEXIT              [ domid 
= 0x%(1)08x, eip = 0x%(2)08x, reason = 0x%(3)08x ]
 0x00080002     CPU%(cpu)d      %(tsc)d         VMX_VECTOR              [ domid 
= 0x%(1)08x, eip = 0x%(2)08x, vector = 0x%(3)08x ]
 0x00080003     CPU%(cpu)d      %(tsc)d         VMX_INT                 [ domid 
= 0x%(1)08x, trap = 0x%(2)08x, va = 0x%(3)08x ]
+
+0x00090001      CPU%(cpu)d      %(tsc)d         VMENTRY                 
0x%(1)08x 0x%(2)08x 0x%(3)08x 0x%(4)08x 0x%(5)08x
+0x00090002      CPU%(cpu)d      %(tsc)d         VMEXIT                  
0x%(1)08x 0x%(2)08x 0x%(3)08x 
+
diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Thu Sep  8 15:18:40 2005
+++ b/tools/xentrace/xentrace.c Fri Sep  9 16:30:54 2005
@@ -525,7 +525,7 @@
     }
 
     if (opts.cpu_mask != 0) {
-        set_mask(opts.evt_mask, 1);
+        set_mask(opts.cpu_mask, 1);
     }
 
     if ( opts.outfile )
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/Rules.mk
--- a/xen/Rules.mk      Thu Sep  8 15:18:40 2005
+++ b/xen/Rules.mk      Fri Sep  9 16:30:54 2005
@@ -7,7 +7,6 @@
 perfc       ?= n
 perfc_arrays?= n
 trace       ?= n
-optimize    ?= y
 domu_debug  ?= n
 crash_debug ?= n
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/acm/acm_chinesewall_hooks.c
--- a/xen/acm/acm_chinesewall_hooks.c   Thu Sep  8 15:18:40 2005
+++ b/xen/acm/acm_chinesewall_hooks.c   Fri Sep  9 16:30:54 2005
@@ -310,6 +310,28 @@
        return 0;
 }
 
+static int
+chwall_dump_ssid_types(ssidref_t ssidref, u8 *buf, u16 len)
+{
+    int i;
+
+    /* fill in buffer */
+    if (chwall_bin_pol.max_types > len)
+        return -EFAULT;
+
+       if (ssidref >= chwall_bin_pol.max_ssidrefs)
+               return -EFAULT;
+
+    /* read types for chwall ssidref */
+    for(i=0; i< chwall_bin_pol.max_types; i++) {
+        if (chwall_bin_pol.ssidrefs[ssidref * chwall_bin_pol.max_types + i])
+            buf[i] = 1;
+        else
+            buf[i] = 0;
+    }
+    return chwall_bin_pol.max_types;
+}
+
 /***************************
  * Authorization functions
  ***************************/
@@ -492,6 +514,7 @@
        .dump_binary_policy             = chwall_dump_policy,
        .set_binary_policy              = chwall_set_policy,
        .dump_statistics                = chwall_dump_stats,
+    .dump_ssid_types        = chwall_dump_ssid_types,
        /* domain management control hooks */
        .pre_domain_create              = chwall_pre_domain_create,
        .post_domain_create             = chwall_post_domain_create,
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/acm/acm_core.c
--- a/xen/acm/acm_core.c        Thu Sep  8 15:18:40 2005
+++ b/xen/acm/acm_core.c        Fri Sep  9 16:30:54 2005
@@ -64,16 +64,17 @@
 void acm_set_endian(void)
 {
     u32 test = 1;
-    if (*((u8 *)&test) == 1) {
+    if (*((u8 *)&test) == 1)
+    {
        printk("ACM module running in LITTLE ENDIAN.\n");
-       little_endian = 1;
-    } else {
-       printk("ACM module running in BIG ENDIAN.\n");
-       little_endian = 0;
-    }
-}
-
-#if (ACM_USE_SECURITY_POLICY != ACM_NULL_POLICY)
+        little_endian = 1;
+    }
+    else
+    {
+        printk("ACM module running in BIG ENDIAN.\n");
+        little_endian = 0;
+    }
+}
 
 /* initialize global security policy for Xen; policy write-locked already */
 static void
@@ -101,7 +102,8 @@
      * Try all modules and see whichever could be the binary policy.
      * Adjust the initrdidx if module[1] is the binary policy.
      */
-    for (i = mbi->mods_count-1; i >= 1; i--) {
+    for (i = mbi->mods_count-1; i >= 1; i--)
+    {
         struct acm_policy_buffer *pol;
         char *_policy_start; 
         unsigned long _policy_len;
@@ -117,23 +119,32 @@
                continue; /* not a policy */
 
         pol = (struct acm_policy_buffer *)_policy_start;
-        if (ntohl(pol->magic) == ACM_MAGIC) {
+        if (ntohl(pol->magic) == ACM_MAGIC)
+        {
             rc = acm_set_policy((void *)_policy_start,
                                 (u16)_policy_len,
                                 0);
-            if (rc == ACM_OK) {
+            if (rc == ACM_OK)
+            {
                 printf("Policy len  0x%lx, start at 
%p.\n",_policy_len,_policy_start);
-                if (i == 1) {
-                    if (mbi->mods_count > 2) {
+                if (i == 1)
+                {
+                    if (mbi->mods_count > 2)
+                    {
                         *initrdidx = 2;
-                    } else {
+                    }
+                    else {
                         *initrdidx = 0;
                     }
-                } else {
+                }
+                else
+                {
                     *initrdidx = 1;
                 }
                 break;
-            } else {
+            }
+            else
+            {
                printk("Invalid policy. %d.th module line.\n", i+1);
             }
         } /* end if a binary policy definition, i.e., (ntohl(pol->magic) == 
ACM_MAGIC ) */
@@ -147,56 +158,84 @@
          const multiboot_info_t *mbi,
          unsigned long initial_images_start)
 {
-       int ret = -EINVAL;
-
-       acm_set_endian();
+       int ret = ACM_OK;
+
+    acm_set_endian();
        write_lock(&acm_bin_pol_rwlock);
-
-       if (ACM_USE_SECURITY_POLICY == ACM_CHINESE_WALL_POLICY) {
-               acm_init_binary_policy(NULL, NULL);
-               acm_init_chwall_policy();
+    acm_init_binary_policy(NULL, NULL);
+
+    /* set primary policy component */
+    switch ((ACM_USE_SECURITY_POLICY) & 0x0f)
+    {
+
+    case ACM_CHINESE_WALL_POLICY:
+        acm_init_chwall_policy();
                acm_bin_pol.primary_policy_code = ACM_CHINESE_WALL_POLICY;
                acm_primary_ops = &acm_chinesewall_ops;
+        break;
+
+    case ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY:
+        acm_init_ste_policy();
+               acm_bin_pol.primary_policy_code = 
ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY;
+               acm_primary_ops = &acm_simple_type_enforcement_ops;
+        break;
+
+    default:
+        /* NULL or Unknown policy not allowed primary;
+         * NULL/NULL will not compile this code */
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* secondary policy component part */
+    switch ((ACM_USE_SECURITY_POLICY) >> 4) {
+    case ACM_NULL_POLICY:
                acm_bin_pol.secondary_policy_code = ACM_NULL_POLICY;
                acm_secondary_ops = &acm_null_ops;
-               ret = ACM_OK;
-       } else if (ACM_USE_SECURITY_POLICY == 
ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY) {
-               acm_init_binary_policy(NULL, NULL);
+               break;
+
+    case ACM_CHINESE_WALL_POLICY:
+        if (acm_bin_pol.primary_policy_code == ACM_CHINESE_WALL_POLICY)
+        {   /* not a valid combination */
+            ret = -EINVAL;
+            goto out;
+        }
+               acm_init_chwall_policy();
+        acm_bin_pol.secondary_policy_code = ACM_CHINESE_WALL_POLICY;
+               acm_secondary_ops = &acm_chinesewall_ops;
+        break;
+
+    case ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY:
+        if (acm_bin_pol.primary_policy_code == 
ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)
+        {   /* not a valid combination */
+            ret = -EINVAL;
+            goto out;
+        }
                acm_init_ste_policy();
-               acm_bin_pol.primary_policy_code = 
ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY;
-               acm_primary_ops = &acm_simple_type_enforcement_ops;
-               acm_bin_pol.secondary_policy_code = ACM_NULL_POLICY;
-               acm_secondary_ops = &acm_null_ops;
-               ret = ACM_OK;
-       } else if (ACM_USE_SECURITY_POLICY == 
ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY) {
-               acm_init_binary_policy(NULL, NULL);
-               acm_init_chwall_policy();
-               acm_init_ste_policy();
-               acm_bin_pol.primary_policy_code = ACM_CHINESE_WALL_POLICY;
-               acm_primary_ops = &acm_chinesewall_ops;
                acm_bin_pol.secondary_policy_code = 
ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY;
                acm_secondary_ops = &acm_simple_type_enforcement_ops;
-               ret = ACM_OK;
-       } else if (ACM_USE_SECURITY_POLICY == ACM_NULL_POLICY) {
-               acm_init_binary_policy(NULL, NULL);
-               acm_bin_pol.primary_policy_code = ACM_NULL_POLICY;
-               acm_primary_ops = &acm_null_ops;
-               acm_bin_pol.secondary_policy_code = ACM_NULL_POLICY;
-               acm_secondary_ops = &acm_null_ops;
-               ret = ACM_OK;
+        break;
+
+    default:
+        ret = -EINVAL;
+        goto out;
+    }
+
+ out:
+       write_unlock(&acm_bin_pol_rwlock);
+
+       if (ret != ACM_OK)
+    {
+        printk("%s: Error setting policies.\n", __func__);
+        /* here one could imagine a clean panic */
+               return -EINVAL;
        }
-       write_unlock(&acm_bin_pol_rwlock);
-
-       if (ret != ACM_OK)
-               return -EINVAL;         
        acm_setup(initrdidx, mbi, initial_images_start);
        printk("%s: Enforcing Primary %s, Secondary %s.\n", __func__, 
-              ACM_POLICY_NAME(acm_bin_pol.primary_policy_code), 
ACM_POLICY_NAME(acm_bin_pol.secondary_policy_code));
+              ACM_POLICY_NAME(acm_bin_pol.primary_policy_code),
+           ACM_POLICY_NAME(acm_bin_pol.secondary_policy_code));
        return ret;
 }
-
-
-#endif
 
 int
 acm_init_domain_ssid(domid_t id, ssidref_t ssidref)
@@ -205,7 +244,8 @@
        struct domain *subj = find_domain_by_id(id);
        int ret1, ret2;
        
-       if (subj == NULL) {
+       if (subj == NULL)
+    {
                printk("%s: ACM_NULL_POINTER ERROR (id=%x).\n", __func__, id);
                return ACM_NULL_POINTER_ERROR;
        }
@@ -235,14 +275,16 @@
        else
                ret2 = ACM_OK;
 
-       if ((ret1 != ACM_OK) || (ret2 != ACM_OK)) {
+       if ((ret1 != ACM_OK) || (ret2 != ACM_OK))
+    {
                printk("%s: ERROR instantiating individual ssids for domain 
0x%02x.\n",
                       __func__, subj->domain_id);
                acm_free_domain_ssid(ssid);     
                put_domain(subj);
                return ACM_INIT_SSID_ERROR;
        }
-       printk("%s: assigned domain %x the ssidref=%x.\n", __func__, id, 
ssid->ssidref);
+       printk("%s: assigned domain %x the ssidref=%x.\n",
+           __func__, id, ssid->ssidref);
        put_domain(subj);
        return ACM_OK;
 }
@@ -254,11 +296,12 @@
        domid_t id;
 
        /* domain is already gone, just ssid is left */
-       if (ssid == NULL) {
+       if (ssid == NULL)
+    {
                printk("%s: ACM_NULL_POINTER ERROR.\n", __func__);
                return ACM_NULL_POINTER_ERROR;
        }
-               id = ssid->domainid;
+    id = ssid->domainid;
        ssid->subject        = NULL;
 
        if (acm_primary_ops->free_domain_ssid != NULL) /* null policy */
@@ -268,6 +311,7 @@
                acm_secondary_ops->free_domain_ssid(ssid->secondary_ssid);
        ssid->secondary_ssid = NULL;
        xfree(ssid);
-       printkd("%s: Freed individual domain ssid (domain=%02x).\n",__func__, 
id);
+       printkd("%s: Freed individual domain ssid (domain=%02x).\n",
+            __func__, id);
        return ACM_OK;
 }
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/acm/acm_null_hooks.c
--- a/xen/acm/acm_null_hooks.c  Thu Sep  8 15:18:40 2005
+++ b/xen/acm/acm_null_hooks.c  Fri Sep  9 16:30:54 2005
@@ -14,13 +14,13 @@
 #include <acm/acm_hooks.h>
 
 static int
-null_init_domain_ssid(void **chwall_ssid, ssidref_t ssidref)
+null_init_domain_ssid(void **ssid, ssidref_t ssidref)
 {
        return ACM_OK;
 }
 
 static void
-null_free_domain_ssid(void *chwall_ssid)
+null_free_domain_ssid(void *ssid)
 {
        return;
 }
@@ -44,6 +44,14 @@
        return 0;
 }
 
+static int
+null_dump_ssid_types(ssidref_t ssidref, u8 *buffer, u16 buf_size)
+{
+    /* no types */
+    return 0;
+}
+
+
 /* now define the hook structure similarly to LSM */
 struct acm_operations acm_null_ops = {
        .init_domain_ssid               = null_init_domain_ssid,
@@ -51,6 +59,7 @@
        .dump_binary_policy             = null_dump_binary_policy,
        .set_binary_policy              = null_set_binary_policy,
        .dump_statistics                = null_dump_stats,
+    .dump_ssid_types        = null_dump_ssid_types,
        /* domain management control hooks */
        .pre_domain_create              = NULL,
        .post_domain_create             = NULL,
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/acm/acm_policy.c
--- a/xen/acm/acm_policy.c      Thu Sep  8 15:18:40 2005
+++ b/xen/acm/acm_policy.c      Fri Sep  9 16:30:54 2005
@@ -26,8 +26,8 @@
 #include <xen/lib.h>
 #include <xen/delay.h>
 #include <xen/sched.h>
+#include <acm/acm_core.h>
 #include <public/acm_ops.h>
-#include <acm/acm_core.h>
 #include <acm/acm_hooks.h>
 #include <acm/acm_endian.h>
 
@@ -37,14 +37,16 @@
        u8 *policy_buffer = NULL;
        struct acm_policy_buffer *pol;
        
-       if (buf_size < sizeof(struct acm_policy_buffer))
+    if (buf_size < sizeof(struct acm_policy_buffer))
                return -EFAULT;
 
        /* 1. copy buffer from domain */
        if ((policy_buffer = xmalloc_array(u8, buf_size)) == NULL)
-           goto error_free;
+           return -ENOMEM;
+
        if (isuserbuffer) {
-               if (copy_from_user(policy_buffer, buf, buf_size)) {
+               if (copy_from_user(policy_buffer, buf, buf_size))
+        {
                        printk("%s: Error copying!\n",__func__);
                        goto error_free;
                }
@@ -57,11 +59,13 @@
        if ((ntohl(pol->magic) != ACM_MAGIC) || 
            (ntohl(pol->policy_version) != ACM_POLICY_VERSION) ||
            (ntohl(pol->primary_policy_code) != 
acm_bin_pol.primary_policy_code) ||
-           (ntohl(pol->secondary_policy_code) != 
acm_bin_pol.secondary_policy_code)) {
+           (ntohl(pol->secondary_policy_code) != 
acm_bin_pol.secondary_policy_code))
+    {
                printkd("%s: Wrong policy magics or versions!\n", __func__);
                goto error_free;
        }
-       if (buf_size != ntohl(pol->len)) {
+       if (buf_size != ntohl(pol->len))
+    {
                printk("%s: ERROR in buf size.\n", __func__);
                goto error_free;
        }
@@ -72,27 +76,25 @@
        /* 3. set primary policy data */
        if (acm_primary_ops->set_binary_policy(buf + 
ntohl(pol->primary_buffer_offset),
                                                
ntohl(pol->secondary_buffer_offset) -
-                                              
ntohl(pol->primary_buffer_offset))) {
+                                              
ntohl(pol->primary_buffer_offset)))
                goto error_lock_free;
-       }
+
        /* 4. set secondary policy data */
        if (acm_secondary_ops->set_binary_policy(buf + 
ntohl(pol->secondary_buffer_offset),
                                                 ntohl(pol->len) - 
-                                                
ntohl(pol->secondary_buffer_offset))) {
+                                                
ntohl(pol->secondary_buffer_offset)))
                goto error_lock_free;
-       }
+
        write_unlock(&acm_bin_pol_rwlock);
-       if (policy_buffer != NULL)
-               xfree(policy_buffer);
+       xfree(policy_buffer);
        return ACM_OK;
 
  error_lock_free:
        write_unlock(&acm_bin_pol_rwlock);
  error_free:
        printk("%s: Error setting policy.\n", __func__);
-       if (policy_buffer != NULL)
-               xfree(policy_buffer);
-       return -ENOMEM;
+    xfree(policy_buffer);
+       return -EFAULT;
 }
 
 int
@@ -102,11 +104,14 @@
      int ret;
      struct acm_policy_buffer *bin_pol;
        
+    if (buf_size < sizeof(struct acm_policy_buffer))
+               return -EFAULT;
+
      if ((policy_buffer = xmalloc_array(u8, buf_size)) == NULL)
            return -ENOMEM;
 
      read_lock(&acm_bin_pol_rwlock);
-     /* future: read policy from file and set it */
+
      bin_pol = (struct acm_policy_buffer *)policy_buffer;
      bin_pol->magic = htonl(ACM_MAGIC);
      bin_pol->primary_policy_code = htonl(acm_bin_pol.primary_policy_code);
@@ -118,27 +123,30 @@
      
      ret = acm_primary_ops->dump_binary_policy (policy_buffer + 
ntohl(bin_pol->primary_buffer_offset),
                                       buf_size - 
ntohl(bin_pol->primary_buffer_offset));
-     if (ret < 0) {
-            printk("%s: ERROR creating chwallpolicy buffer.\n", __func__);
-            read_unlock(&acm_bin_pol_rwlock);
-            return -1;
-     }
+     if (ret < 0)
+         goto error_free_unlock;
+
      bin_pol->len = htonl(ntohl(bin_pol->len) + ret);
      bin_pol->secondary_buffer_offset = htonl(ntohl(bin_pol->len));
 
      ret = acm_secondary_ops->dump_binary_policy(policy_buffer + 
ntohl(bin_pol->secondary_buffer_offset),
                                    buf_size - 
ntohl(bin_pol->secondary_buffer_offset));
-     if (ret < 0) {
-            printk("%s: ERROR creating chwallpolicy buffer.\n", __func__);
-            read_unlock(&acm_bin_pol_rwlock);
-            return -1;
-     }
+     if (ret < 0)
+         goto error_free_unlock;
+
      bin_pol->len = htonl(ntohl(bin_pol->len) + ret);
-     read_unlock(&acm_bin_pol_rwlock);
      if (copy_to_user(buf, policy_buffer, ntohl(bin_pol->len)))
-            return -EFAULT;
+            goto error_free_unlock;
+
+     read_unlock(&acm_bin_pol_rwlock);
      xfree(policy_buffer);
      return ACM_OK;
+
+ error_free_unlock:
+     read_unlock(&acm_bin_pol_rwlock);
+     printk("%s: Error getting policy.\n", __func__);
+     xfree(policy_buffer);
+     return -EFAULT;
 }
 
 int
@@ -185,4 +193,62 @@
      return -EFAULT;
 }
 
+
+int
+acm_get_ssid(ssidref_t ssidref, u8 *buf, u16 buf_size)
+{
+    /* send stats to user space */
+     u8 *ssid_buffer;
+     int ret;
+     struct acm_ssid_buffer *acm_ssid;
+     if (buf_size < sizeof(struct acm_ssid_buffer))
+               return -EFAULT;
+
+     if ((ssid_buffer = xmalloc_array(u8, buf_size)) == NULL)
+           return -ENOMEM;
+
+     read_lock(&acm_bin_pol_rwlock);
+
+     acm_ssid = (struct acm_ssid_buffer *)ssid_buffer;
+     acm_ssid->len = sizeof(struct acm_ssid_buffer);
+     acm_ssid->ssidref = ssidref;
+     acm_ssid->primary_policy_code = acm_bin_pol.primary_policy_code;
+     acm_ssid->secondary_policy_code = acm_bin_pol.secondary_policy_code;
+     acm_ssid->primary_types_offset = acm_ssid->len;
+
+     /* ret >= 0 --> ret == max_types */
+     ret = acm_primary_ops->dump_ssid_types(ACM_PRIMARY(ssidref),
+                                            ssid_buffer + 
acm_ssid->primary_types_offset,
+                                            buf_size - 
acm_ssid->primary_types_offset);
+     if (ret < 0)
+         goto error_free_unlock;
+
+     acm_ssid->len += ret;
+     acm_ssid->primary_max_types = ret;
+
+     acm_ssid->secondary_types_offset = acm_ssid->len;
+
+     ret = acm_secondary_ops->dump_ssid_types(ACM_SECONDARY(ssidref),
+                                              ssid_buffer + 
acm_ssid->secondary_types_offset,
+                                              buf_size - 
acm_ssid->secondary_types_offset);
+     if (ret < 0)
+         goto error_free_unlock;
+
+     acm_ssid->len += ret;
+     acm_ssid->secondary_max_types = ret;
+
+     if (copy_to_user(buf, ssid_buffer, acm_ssid->len))
+            goto error_free_unlock;
+
+     read_unlock(&acm_bin_pol_rwlock);
+     xfree(ssid_buffer);
+     return ACM_OK;
+
+ error_free_unlock:
+     read_unlock(&acm_bin_pol_rwlock);
+     printk("%s: Error getting ssid.\n", __func__);
+     xfree(ssid_buffer);
+     return -ENOMEM;
+}
+
 /*eof*/
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/acm/acm_simple_type_enforcement_hooks.c
--- a/xen/acm/acm_simple_type_enforcement_hooks.c       Thu Sep  8 15:18:40 2005
+++ b/xen/acm/acm_simple_type_enforcement_hooks.c       Fri Sep  9 16:30:54 2005
@@ -383,6 +383,27 @@
     return sizeof(struct acm_ste_stats_buffer);
 }
 
+static int
+ste_dump_ssid_types(ssidref_t ssidref, u8 *buf, u16 len)
+{
+    int i;
+
+    /* fill in buffer */
+    if (ste_bin_pol.max_types > len)
+        return -EFAULT;
+
+       if (ssidref >= ste_bin_pol.max_ssidrefs)
+               return -EFAULT;
+
+    /* read types for chwall ssidref */
+    for(i=0; i< ste_bin_pol.max_types; i++) {
+               if (ste_bin_pol.ssidrefs[ssidref * ste_bin_pol.max_types + i])
+            buf[i] = 1;
+        else
+            buf[i] = 0;
+    }
+    return ste_bin_pol.max_types;
+}
 
 /* we need to go through this before calling the hooks,
  * returns 1 == cache hit */
@@ -625,22 +646,23 @@
        /* policy management services */
        .init_domain_ssid               = ste_init_domain_ssid,
        .free_domain_ssid               = ste_free_domain_ssid,
-       .dump_binary_policy             = ste_dump_policy,
-       .set_binary_policy              = ste_set_policy,
+       .dump_binary_policy     = ste_dump_policy,
+       .set_binary_policy      = ste_set_policy,
        .dump_statistics                = ste_dump_stats,
+    .dump_ssid_types        = ste_dump_ssid_types,
        /* domain management control hooks */
        .pre_domain_create              = ste_pre_domain_create,
-       .post_domain_create             = NULL,
-       .fail_domain_create             = NULL,
-       .post_domain_destroy            = ste_post_domain_destroy,
+       .post_domain_create         = NULL,
+       .fail_domain_create     = NULL,
+       .post_domain_destroy    = ste_post_domain_destroy,
        /* event channel control hooks */
-       .pre_eventchannel_unbound       = ste_pre_eventchannel_unbound,
+       .pre_eventchannel_unbound   = ste_pre_eventchannel_unbound,
        .fail_eventchannel_unbound      = NULL,
        .pre_eventchannel_interdomain   = ste_pre_eventchannel_interdomain,
        .fail_eventchannel_interdomain  = NULL,
        /* grant table control hooks */
-       .pre_grant_map_ref              = ste_pre_grant_map_ref,
-       .fail_grant_map_ref             = NULL,
-       .pre_grant_setup                = ste_pre_grant_setup,
-       .fail_grant_setup               = NULL,
+       .pre_grant_map_ref      = ste_pre_grant_map_ref,
+       .fail_grant_map_ref     = NULL,
+       .pre_grant_setup        = ste_pre_grant_setup,
+       .fail_grant_setup       = NULL,
 };
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/Makefile     Fri Sep  9 16:30:54 2005
@@ -17,7 +17,7 @@
 
 OBJS := $(patsubst shadow%.o,,$(OBJS)) # drop all
 ifeq ($(TARGET_SUBARCH),x86_64) 
- OBJS += shadow.o shadow_public.o      # x86_64: new code
+ OBJS += shadow.o shadow_public.o shadow_guest32.o     # x86_64: new code
 endif
 ifeq ($(TARGET_SUBARCH),x86_32) 
  ifneq ($(pae),n)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/Rules.mk
--- a/xen/arch/x86/Rules.mk     Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/Rules.mk     Fri Sep  9 16:30:54 2005
@@ -13,10 +13,8 @@
 CFLAGS  += -I$(BASEDIR)/include/asm-x86/mach-generic
 CFLAGS  += -I$(BASEDIR)/include/asm-x86/mach-default
 
-ifeq ($(optimize),y)
+ifneq ($(debug),y)
 CFLAGS  += -O3 -fomit-frame-pointer
-else
-x86_32/usercopy.o: CFLAGS += -O1
 endif
 
 # Prevent floating-point variables from creeping into Xen.
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/boot/x86_32.S        Fri Sep  9 16:30:54 2005
@@ -9,6 +9,8 @@
                .text
 
 ENTRY(start)
+ENTRY(stext)
+ENTRY(_stext)
         jmp __start
 
         .align 4
@@ -260,6 +262,3 @@
         .org 0x2000 + STACK_SIZE + PAGE_SIZE
 
 #endif /* CONFIG_X86_PAE */
-
-ENTRY(stext)
-ENTRY(_stext)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/boot/x86_64.S        Fri Sep  9 16:30:54 2005
@@ -10,6 +10,8 @@
         .code32
 
 ENTRY(start)
+ENTRY(stext)
+ENTRY(_stext)
         jmp __start
 
         .org    0x004
@@ -267,5 +269,3 @@
 
         .org 0x4000 + STACK_SIZE + PAGE_SIZE
         .code64
-ENTRY(stext)
-ENTRY(_stext)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/cdb.c
--- a/xen/arch/x86/cdb.c        Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/cdb.c        Fri Sep  9 16:30:54 2005
@@ -21,7 +21,7 @@
    debugger. so avoid it. */
 #define dbg_printk(...)
 
-static unsigned char opt_cdb[30] = "none";
+static char opt_cdb[30] = "none";
 string_param("cdb", opt_cdb);
 
 struct xendbg_context {
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/dom0_ops.c   Fri Sep  9 16:30:54 2005
@@ -19,6 +19,7 @@
 #include <xen/console.h>
 #include <asm/shadow.h>
 #include <asm/irq.h>
+#include <asm/processor.h>
 #include <public/sched_ctl.h>
 
 #include <asm/mtrr.h>
@@ -188,9 +189,11 @@
         pi->total_pages      = max_page;
         pi->free_pages       = avail_domheap_pages();
         pi->cpu_khz          = cpu_khz;
-
-        copy_to_user(u_dom0_op, op, sizeof(*op));
+        memset( pi->hw_cap, 0, sizeof(pi->hw_cap) );
+        memcpy( pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4 );
         ret = 0;
+        if( copy_to_user(u_dom0_op, op, sizeof(*op)) )
+           ret = -EINVAL;
     }
     break;
     
@@ -389,9 +392,31 @@
     }
     break;
 
+    case DOM0_PHYSICAL_MEMORY_MAP:
+    {
+        struct dom0_memory_map_entry entry;
+        int i;
+
+        for ( i = 0; i < e820.nr_map; i++ )
+        {
+            if ( i >= op->u.physical_memory_map.max_map_entries )
+                break;
+            entry.start  = e820.map[i].addr;
+            entry.end    = e820.map[i].addr + e820.map[i].size;
+            entry.is_ram = (e820.map[i].type == E820_RAM);
+            (void)copy_to_user(
+                &op->u.physical_memory_map.memory_map[i],
+                &entry, sizeof(entry));
+        }
+
+        op->u.physical_memory_map.nr_map_entries = i;
+        (void)copy_to_user(u_dom0_op, op, sizeof(*op));
+    }
+    break;
+
     default:
         ret = -ENOSYS;
-
+        break;
     }
 
     return ret;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/domain.c     Fri Sep  9 16:30:54 2005
@@ -255,13 +255,13 @@
     v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
     v->cpumap = CPUMAP_RUNANYWHERE;
     SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
-    machine_to_phys_mapping[virt_to_phys(d->shared_info) >> 
-                           PAGE_SHIFT] = INVALID_M2P_ENTRY;
+    set_pfn_from_mfn(virt_to_phys(d->shared_info) >> PAGE_SHIFT,
+            INVALID_M2P_ENTRY);
     
     d->arch.mm_perdomain_pt = alloc_xenheap_page();
     memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE);
-    machine_to_phys_mapping[virt_to_phys(d->arch.mm_perdomain_pt) >> 
-                           PAGE_SHIFT] = INVALID_M2P_ENTRY;
+    set_pfn_from_mfn(virt_to_phys(d->arch.mm_perdomain_pt) >> PAGE_SHIFT,
+            INVALID_M2P_ENTRY);
     v->arch.perdomain_ptes = d->arch.mm_perdomain_pt;
     v->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
         l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
@@ -381,11 +381,13 @@
 out:
     free_vmcs(vmcs);
     if(v->arch.arch_vmx.io_bitmap_a != 0) {
-        free_xenheap_pages(v->arch.arch_vmx.io_bitmap_a, get_order(0x1000));
+        free_xenheap_pages(
+            v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000));
         v->arch.arch_vmx.io_bitmap_a = 0;
     }
     if(v->arch.arch_vmx.io_bitmap_b != 0) {
-        free_xenheap_pages(v->arch.arch_vmx.io_bitmap_b, get_order(0x1000));
+        free_xenheap_pages(
+            v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000));
         v->arch.arch_vmx.io_bitmap_b = 0;
     }
     v->arch.arch_vmx.vmcs = 0;
@@ -885,8 +887,13 @@
     return switch_required;
 }
 
-void sync_lazy_execstate_cpu(unsigned int cpu)
-{
+void sync_vcpu_execstate(struct vcpu *v)
+{
+    unsigned int cpu = v->processor;
+
+    if ( !cpu_isset(cpu, v->domain->cpumask) )
+        return;
+
     if ( cpu == smp_processor_id() )
     {
         (void)__sync_lazy_execstate();
@@ -967,11 +974,13 @@
     BUG_ON(v->arch.arch_vmx.vmcs == NULL);
     free_vmcs(v->arch.arch_vmx.vmcs);
     if(v->arch.arch_vmx.io_bitmap_a != 0) {
-        free_xenheap_pages(v->arch.arch_vmx.io_bitmap_a, get_order(0x1000));
+        free_xenheap_pages(
+            v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000));
         v->arch.arch_vmx.io_bitmap_a = 0;
     }
     if(v->arch.arch_vmx.io_bitmap_b != 0) {
-        free_xenheap_pages(v->arch.arch_vmx.io_bitmap_b, get_order(0x1000));
+        free_xenheap_pages(
+            v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000));
         v->arch.arch_vmx.io_bitmap_b = 0;
     }
     v->arch.arch_vmx.vmcs = 0;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/domain_build.c       Fri Sep  9 16:30:54 2005
@@ -20,6 +20,7 @@
 #include <asm/processor.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
+#include <asm/physdev.h>
 #include <asm/shadow.h>
 
 static long dom0_nrpages;
@@ -74,15 +75,12 @@
     struct pfn_info *page;
     unsigned int order;
     /*
-     * Allocate up to 2MB at a time:
-     *  1. This prevents overflow of get_order() when allocating more than
-     *     4GB to domain 0 on a PAE machine.
-     *  2. It prevents allocating very large chunks from DMA pools before
-     *     the >4GB pool is fully depleted.
+     * Allocate up to 2MB at a time: It prevents allocating very large chunks
+     * from DMA pools before the >4GB pool is fully depleted.
      */
     if ( max_pages > (2UL << (20 - PAGE_SHIFT)) )
         max_pages = 2UL << (20 - PAGE_SHIFT);
-    order = get_order(max_pages << PAGE_SHIFT);
+    order = get_order_from_pages(max_pages);
     if ( (max_pages & (max_pages-1)) != 0 )
         order--;
     while ( (page = alloc_domheap_pages(d, order, 0)) == NULL )
@@ -217,14 +215,14 @@
     vinitrd_start    = round_pgup(dsi.v_end);
     vinitrd_end      = vinitrd_start + initrd_len;
     vphysmap_start   = round_pgup(vinitrd_end);
-    vphysmap_end     = vphysmap_start + (nr_pages * sizeof(u32));
-    vpt_start        = round_pgup(vphysmap_end);
+    vphysmap_end     = vphysmap_start + (nr_pages * sizeof(unsigned long));
+    vstartinfo_start = round_pgup(vphysmap_end);
+    vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
+    vpt_start        = vstartinfo_end;
     for ( nr_pt_pages = 2; ; nr_pt_pages++ )
     {
         vpt_end          = vpt_start + (nr_pt_pages * PAGE_SIZE);
-        vstartinfo_start = vpt_end;
-        vstartinfo_end   = vstartinfo_start + PAGE_SIZE;
-        vstack_start     = vstartinfo_end;
+        vstack_start     = vpt_end;
         vstack_end       = vstack_start + PAGE_SIZE;
         v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
         if ( (v_end - vstack_end) < (512UL << 10) )
@@ -251,7 +249,7 @@
 #endif
     }
 
-    order = get_order(v_end - dsi.v_start);
+    order = get_order_from_bytes(v_end - dsi.v_start);
     if ( (1UL << order) > nr_pages )
         panic("Domain 0 allocation is too small for kernel image.\n");
 
@@ -271,15 +269,15 @@
            " Loaded kernel: %p->%p\n"
            " Init. ramdisk: %p->%p\n"
            " Phys-Mach map: %p->%p\n"
+           " Start info:    %p->%p\n"
            " Page tables:   %p->%p\n"
-           " Start info:    %p->%p\n"
            " Boot stack:    %p->%p\n"
            " TOTAL:         %p->%p\n",
            _p(dsi.v_kernstart), _p(dsi.v_kernend), 
            _p(vinitrd_start), _p(vinitrd_end),
            _p(vphysmap_start), _p(vphysmap_end),
+           _p(vstartinfo_start), _p(vstartinfo_end),
            _p(vpt_start), _p(vpt_end),
-           _p(vstartinfo_start), _p(vstartinfo_end),
            _p(vstack_start), _p(vstack_end),
            _p(dsi.v_start), _p(v_end));
     printk(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
@@ -592,8 +590,7 @@
     if ( opt_dom0_translate )
     {
         si->shared_info  = d->next_io_page << PAGE_SHIFT;
-        set_machinetophys(virt_to_phys(d->shared_info) >> PAGE_SHIFT,
-                          d->next_io_page);
+        set_pfn_from_mfn(virt_to_phys(d->shared_info) >> PAGE_SHIFT, 
d->next_io_page);
         d->next_io_page++;
     }
     else
@@ -613,8 +610,8 @@
         if ( !opt_dom0_translate && (pfn > REVERSE_START) )
             mfn = alloc_epfn - (pfn - REVERSE_START);
 #endif
-        ((u32 *)vphysmap_start)[pfn] = mfn;
-        machine_to_phys_mapping[mfn] = pfn;
+        ((unsigned long *)vphysmap_start)[pfn] = mfn;
+        set_pfn_from_mfn(mfn, pfn);
     }
     while ( pfn < nr_pages )
     {
@@ -626,8 +623,8 @@
 #ifndef NDEBUG
 #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn)))
 #endif
-            ((u32 *)vphysmap_start)[pfn] = mfn;
-            machine_to_phys_mapping[mfn] = pfn;
+            ((unsigned long *)vphysmap_start)[pfn] = mfn;
+            set_pfn_from_mfn(mfn, pfn);
 #undef pfn
             page++; pfn++;
         }
@@ -708,6 +705,18 @@
         printk("dom0: shadow setup done\n");
     }
 
+    /*
+     * Modify I/O port access permissions.
+     */
+    /* Master Interrupt Controller (PIC). */
+    physdev_modify_ioport_access_range(dom0, 0, 0x20, 2);
+    /* Slave Interrupt Controller (PIC). */
+    physdev_modify_ioport_access_range(dom0, 0, 0xA0, 2);
+    /* Interval Timer (PIT). */
+    physdev_modify_ioport_access_range(dom0, 0, 0x40, 4);
+    /* PIT Channel 2 / PC Speaker Control. */
+    physdev_modify_ioport_access_range(dom0, 0, 0x61, 1);
+
     return 0;
 }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/mm.c Fri Sep  9 16:30:54 2005
@@ -1450,9 +1450,9 @@
                          ((type & PGT_type_mask) != PGT_l1_page_table) )
                         MEM_LOG("Bad type (saw %" PRtype_info
                                 "!= exp %" PRtype_info ") "
-                                "for mfn %lx (pfn %x)",
+                                "for mfn %lx (pfn %lx)",
                                 x, type, page_to_pfn(page),
-                                machine_to_phys_mapping[page_to_pfn(page)]);
+                                get_pfn_from_mfn(page_to_pfn(page)));
                     return 0;
                 }
                 else if ( (x & PGT_va_mask) == PGT_va_mutable )
@@ -2206,7 +2206,7 @@
                 printk("privileged guest dom%d requests pfn=%lx to "
                        "map mfn=%lx for dom%d\n",
                        d->domain_id, gpfn, mfn, FOREIGNDOM->domain_id);
-                set_machinetophys(mfn, gpfn);
+                set_pfn_from_mfn(mfn, gpfn);
                 set_p2m_entry(FOREIGNDOM, gpfn, mfn, &sh_mapcache, &mapcache);
                 okay = 1;
                 shadow_unlock(FOREIGNDOM);
@@ -2225,7 +2225,7 @@
                 break;
             }
 
-            set_machinetophys(mfn, gpfn);
+            set_pfn_from_mfn(mfn, gpfn);
             okay = 1;
 
             /*
@@ -3185,7 +3185,7 @@
     struct pfn_info *page;
     l1_pgentry_t     pte;
     l2_pgentry_t    *pl2e, l2e;
-    int              which;
+    int              which, flags;
     unsigned long    l2_idx;
 
     if ( unlikely(shadow_mode_enabled(d)) )
@@ -3206,8 +3206,24 @@
     pfn  = l1e_get_pfn(pte);
     page = &frame_table[pfn];
 
+#ifdef CONFIG_X86_64
+#define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT | _PAGE_USER)
+#else
+#define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT)
+#endif
+
+    /*
+     * Check the required flags for a valid wrpt mapping. If the page is
+     * already writable then we can return straight to the guest (SMP race).
+     * We decide whether or not to propagate the fault by testing for write
+     * permissions in page directories by writing back to the linear mapping.
+     */
+    if ( (flags = l1e_get_flags(pte) & WRPT_PTE_FLAGS) == WRPT_PTE_FLAGS )
+        return !__put_user(
+            pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1);
+
     /* We are looking only for read-only mappings of p.t. pages. */
-    if ( ((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) != _PAGE_PRESENT) ||
+    if ( ((flags | _PAGE_RW) != WRPT_PTE_FLAGS) ||
          ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
          ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
          (page_get_owner(page) != d) )
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/setup.c      Fri Sep  9 16:30:54 2005
@@ -12,6 +12,8 @@
 #include <xen/trace.h>
 #include <xen/multiboot.h>
 #include <xen/domain_page.h>
+#include <xen/compile.h>
+#include <public/version.h>
 #include <asm/bitops.h>
 #include <asm/smp.h>
 #include <asm/processor.h>
@@ -90,6 +92,8 @@
 unsigned long mmu_cr4_features = X86_CR4_PSE;
 #endif
 EXPORT_SYMBOL(mmu_cr4_features);
+
+int hvm_enabled = 0; /* can we run unmodified guests */
 
 struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
 
@@ -529,6 +533,45 @@
     startup_cpu_idle_loop();
 }
 
+void arch_get_xen_caps(xen_capabilities_info_t *info)
+{
+    char *p=info->caps;
+
+    *p=0;
+
+#ifdef CONFIG_X86_32
+
+#ifndef CONFIG_X86_PAE       
+    p+=sprintf(p,"xen_%d.%d_x86_32 ",XEN_VERSION,XEN_SUBVERSION);    
+    if(hvm_enabled)
+    {
+        p+=sprintf(p,"hvm_%d.%d_x86_32 ",XEN_VERSION,XEN_SUBVERSION);    
+    }
+#else
+    p+=sprintf(p,"xen_%d.%d_x86_32p ",XEN_VERSION,XEN_SUBVERSION);
+    if(hvm_enabled)
+    {
+        //p+=sprintf(p,"hvm_%d.%d_x86_32 ",XEN_VERSION,XEN_SUBVERSION);    
+        //p+=sprintf(p,"hvm_%d.%d_x86_32p ",XEN_VERSION,XEN_SUBVERSION);    
+    }
+
+#endif        
+
+#else /* !CONFIG_X86_32 */
+    p+=sprintf(p,"xen_%d.%d_x86_64 ",XEN_VERSION,XEN_SUBVERSION);
+    if(hvm_enabled)
+    {
+        //p+=sprintf(p,"hvm_%d.%d_x86_32 ",XEN_VERSION,XEN_SUBVERSION);    
+        //p+=sprintf(p,"hvm_%d.%d_x86_32p ",XEN_VERSION,XEN_SUBVERSION);    
+        p+=sprintf(p,"hvm_%d.%d_x86_64 ",XEN_VERSION,XEN_SUBVERSION);    
+    }
+#endif
+    
+    BUG_ON((p-info->caps)>sizeof(*info));
+
+    if(p>info->caps) *(p-1) = 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/shadow.c     Fri Sep  9 16:30:54 2005
@@ -53,6 +53,9 @@
     struct domain *d, unsigned long gpfn, unsigned long gmfn);
 static void shadow_map_into_current(struct vcpu *v,
     unsigned long va, unsigned int from, unsigned int to);
+static inline void validate_bl2e_change( struct domain *d,
+       guest_root_pgentry_t *new_gle_p, pgentry_64_t *shadow_l3, int index);
+
 #endif
 
 /********
@@ -217,10 +220,38 @@
         }
         else
         {
-            page = alloc_domheap_page(NULL);
-            void *l1 = map_domain_page(page_to_pfn(page));
-            memset(l1, 0, PAGE_SIZE);
-            unmap_domain_page(l1);
+            if (d->arch.ops->guest_paging_levels == PAGING_L2)
+            {
+#if CONFIG_PAGING_LEVELS >= 4
+                /* For 32-bit VMX guest, 2 shadow L1s to simulate 1 guest L1
+                 * So need allocate 2 continues shadow L1 each time.
+                 */
+                page = alloc_domheap_pages(NULL, SL1_ORDER, 0);
+                if (!page)
+                    domain_crash_synchronous();
+
+                void *l1_0 = map_domain_page(page_to_pfn(page));
+                memset(l1_0,0,PAGE_SIZE);
+                unmap_domain_page(l1_0);
+                void *l1_1 = map_domain_page(page_to_pfn(page+1));
+                memset(l1_1,0,PAGE_SIZE);
+                unmap_domain_page(l1_1);
+#else
+                page = alloc_domheap_page(NULL);
+                if (!page)
+                    domain_crash_synchronous();
+                void *l1 = map_domain_page(page_to_pfn(page));
+                memset(l1, 0, PAGE_SIZE);
+                unmap_domain_page(l1);
+#endif
+            }
+            else
+            {
+                page = alloc_domheap_page(NULL);
+                void *l1 = map_domain_page(page_to_pfn(page));
+                memset(l1, 0, PAGE_SIZE);
+                unmap_domain_page(l1);
+            }
         }
     }
     else {
@@ -331,7 +362,21 @@
   fail:
     FSH_LOG("promotion of pfn=%lx mfn=%lx failed!  external gnttab refs?",
             gpfn, gmfn);
-    free_domheap_page(page);
+    if (psh_type == PGT_l1_shadow)
+    {
+        if (d->arch.ops->guest_paging_levels == PAGING_L2)
+        {
+#if CONFIG_PAGING_LEVELS >=4
+            free_domheap_pages(page, SL1_ORDER);
+#else
+            free_domheap_page(page);
+#endif
+        }
+        else
+            free_domheap_page(page);
+    }
+    else
+        free_domheap_page(page);
     return 0;
 }
 
@@ -478,13 +523,15 @@
 { 
     struct vcpu *v = current;
     struct domain *d = v->domain;
-    l1_pgentry_t *gpl1e, *spl1e;
-    l2_pgentry_t gl2e, sl2e;
+    l1_pgentry_t *spl1e;
+    l2_pgentry_t sl2e;
+    guest_l1_pgentry_t *gpl1e;
+    guest_l2_pgentry_t gl2e;
     unsigned long gl1pfn, gl1mfn, sl1mfn;
     int i, init_table = 0;
 
     __guest_get_l2e(v, va, &gl2e);
-    ASSERT(l2e_get_flags(gl2e) & _PAGE_PRESENT);
+    ASSERT(guest_l2e_get_flags(gl2e) & _PAGE_PRESENT);
     gl1pfn = l2e_get_pfn(gl2e);
 
     if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) )
@@ -523,28 +570,49 @@
     ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
 #endif
 
-    if ( !get_shadow_ref(sl1mfn) )
-        BUG();
-    l2pde_general(d, &gl2e, &sl2e, sl1mfn);
-    __guest_set_l2e(v, va, &gl2e);
-    __shadow_set_l2e(v, va, &sl2e);
+#if CONFIG_PAGING_LEVELS >=4
+    if (d->arch.ops->guest_paging_levels == PAGING_L2)
+    {
+        /* for 32-bit VMX guest on 64-bit host, 
+         * need update two L2 entries each time
+         */
+        if ( !get_shadow_ref(sl1mfn))
+                BUG();
+        l2pde_general(d, &gl2e, &sl2e, sl1mfn);
+        __guest_set_l2e(v, va, &gl2e);
+        __shadow_set_l2e(v, va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1), &sl2e);
+        if ( !get_shadow_ref(sl1mfn+1))
+            BUG();
+        sl2e = l2e_empty();
+        l2pde_general(d, &gl2e, &sl2e, sl1mfn+1);
+        __shadow_set_l2e(v,((va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1)) + (1 << 
L2_PAGETABLE_SHIFT)) , &sl2e);
+    } else
+#endif
+    {
+        if ( !get_shadow_ref(sl1mfn) )
+            BUG();
+        l2pde_general(d, &gl2e, &sl2e, sl1mfn);
+        __guest_set_l2e(v, va, &gl2e);
+        __shadow_set_l2e(v, va , &sl2e);
+    }
 
     if ( init_table )
     {
         l1_pgentry_t sl1e;
-        int index = l1_table_offset(va);
+        int index = guest_l1_table_offset(va);
         int min = 1, max = 0;
         
         unsigned long entries, pt_va;
         l1_pgentry_t tmp_sl1e;
-        l1_pgentry_t tmp_gl1e;//Prepare for double compile
-
-
-        entries = PAGE_SIZE / sizeof(l1_pgentry_t);
+        guest_l1_pgentry_t tmp_gl1e;//Prepare for double compile
+
+
+        entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t);
         pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << 
L1_PAGETABLE_SHIFT;
-        gpl1e = (l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gl1e);
-
-        entries = PAGE_SIZE / sizeof(l1_pgentry_t);
+        gpl1e = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gl1e);
+
+        /* If the PGT_l1_shadow has two continual pages */
+        entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t); //1024 entry!!!
         pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << 
L1_PAGETABLE_SHIFT;
         spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e);
 
@@ -555,7 +623,7 @@
         spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
                                      ~(L1_PAGETABLE_ENTRIES-1)]);*/
 
-        for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+        for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
         {
             l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
             if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
@@ -584,7 +652,7 @@
     }
 }
 
-static void 
+static void
 shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
 {
     struct vcpu *v = current;
@@ -616,7 +684,7 @@
                 perfc_incrc(shadow_set_l1e_unlinked);
                 if ( !get_shadow_ref(sl1mfn) )
                     BUG();
-                l2pde_general(d, &gpde, &sl2e, sl1mfn);
+                l2pde_general(d, (guest_l2_pgentry_t *)&gpde, &sl2e, sl1mfn);
                 __guest_set_l2e(v, va, &gpde);
                 __shadow_set_l2e(v, va, &sl2e);
             }
@@ -651,6 +719,7 @@
     shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
 }
 
+#if CONFIG_PAGING_LEVELS <= 3
 static void shadow_invlpg_32(struct vcpu *v, unsigned long va)
 {
     struct domain *d = v->domain;
@@ -671,6 +740,7 @@
                          sizeof(gpte))) {*/
     if (unlikely(!__guest_get_l1e(v, va, &gpte))) {
         perfc_incrc(shadow_invlpg_faults);
+        shadow_unlock(d);
         return;
     }
     l1pte_propagate_from_guest(d, gpte, &spte);
@@ -678,6 +748,7 @@
 
     shadow_unlock(d);
 }
+#endif
 
 static struct out_of_sync_entry *
 shadow_alloc_oos_entry(struct domain *d)
@@ -758,8 +829,8 @@
     length = max - min + 1;
     perfc_incr_histo(snapshot_copies, length, PT_UPDATES);
 
-    min *= sizeof(l1_pgentry_t);
-    length *= sizeof(l1_pgentry_t);
+    min *= sizeof(guest_l1_pgentry_t);
+    length *= sizeof(guest_l1_pgentry_t);
 
     original = map_domain_page(gmfn);
     snapshot = map_domain_page(smfn);
@@ -840,7 +911,7 @@
 
         __shadow_get_l4e(v, va, &sl4e);
         if ( !(l4e_get_flags(sl4e) & _PAGE_PRESENT)) {
-            shadow_map_into_current(v, va, L3, L4);
+            shadow_map_into_current(v, va, PAGING_L3, PAGING_L4);
         }
 
         if (!__shadow_get_l3e(v, va, &sl3e)) {
@@ -848,7 +919,7 @@
         }
 
         if ( !(l3e_get_flags(sl3e) & _PAGE_PRESENT)) {
-            shadow_map_into_current(v, va, L2, L3);
+            shadow_map_into_current(v, va, PAGING_L2, PAGING_L3);
         }
     }
 #endif
@@ -886,11 +957,11 @@
  * Returns 0 otherwise.
  */
 static int snapshot_entry_matches(
-    struct domain *d, l1_pgentry_t *guest_pt,
+    struct domain *d, guest_l1_pgentry_t *guest_pt,
     unsigned long gpfn, unsigned index)
 {
     unsigned long smfn = __shadow_status(d, gpfn, PGT_snapshot);
-    l1_pgentry_t *snapshot, gpte; // could be L1s or L2s or ...
+    guest_l1_pgentry_t *snapshot, gpte; // could be L1s or L2s or ...
     int entries_match;
 
     perfc_incrc(snapshot_entry_matches_calls);
@@ -907,7 +978,7 @@
     // This could probably be smarter, but this is sufficent for
     // our current needs.
     //
-    entries_match = !l1e_has_changed(gpte, snapshot[index],
+    entries_match = !guest_l1e_has_changed(gpte, snapshot[index],
                                      PAGE_FLAG_MASK);
 
     unmap_domain_page(snapshot);
@@ -935,10 +1006,10 @@
     unsigned long l2mfn = pagetable_get_pfn(v->arch.guest_table);
 #endif
     unsigned long l2pfn = __mfn_to_gpfn(d, l2mfn);
-    l2_pgentry_t l2e;
+    guest_l2_pgentry_t l2e;
     unsigned long l1pfn, l1mfn;
-    l1_pgentry_t *guest_pt;
-    l1_pgentry_t tmp_gle;
+    guest_l1_pgentry_t *guest_pt;
+    guest_l1_pgentry_t tmp_gle;
     unsigned long pt_va;
 
     ASSERT(shadow_lock_is_acquired(d));
@@ -947,7 +1018,7 @@
     perfc_incrc(shadow_out_of_sync_calls);
 
 #if CONFIG_PAGING_LEVELS >= 4
-    if (d->arch.ops->guest_paging_levels == L4) { /* Mode F */
+    if (d->arch.ops->guest_paging_levels == PAGING_L4) { /* Mode F */
         pgentry_64_t le;
         unsigned long gmfn;
         unsigned long gpfn;
@@ -955,9 +1026,9 @@
 
         gmfn = l2mfn;
         gpfn = l2pfn;
-        guest_pt = (l1_pgentry_t *)v->arch.guest_vtable;
-
-        for (i = L4; i >= L3; i--) {
+        guest_pt = (guest_l1_pgentry_t *)v->arch.guest_vtable;
+
+        for (i = PAGING_L4; i >= PAGING_L3; i--) {
             if ( page_out_of_sync(&frame_table[gmfn]) &&
               !snapshot_entry_matches(
                   d, guest_pt, gpfn, table_offset_64(va, i)) )
@@ -971,7 +1042,7 @@
             if ( !VALID_MFN(gmfn) )
                 return 0;
             /* Todo: check!*/
-            guest_pt = (l1_pgentry_t *)map_domain_page(gmfn);
+            guest_pt = (guest_l1_pgentry_t *)map_domain_page(gmfn);
 
         }
 
@@ -985,13 +1056,13 @@
 #endif
 
     if ( page_out_of_sync(&frame_table[l2mfn]) &&
-         !snapshot_entry_matches(d, (l1_pgentry_t *)v->arch.guest_vtable,
-                                 l2pfn, l2_table_offset(va)) )
+         !snapshot_entry_matches(d, (guest_l1_pgentry_t *)v->arch.guest_vtable,
+                                 l2pfn, guest_l2_table_offset(va)) )
         return 1;
 
     __guest_get_l2e(v, va, &l2e);
-    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || 
-         (l2e_get_flags(l2e) & _PAGE_PSE))
+    if ( !(guest_l2e_get_flags(l2e) & _PAGE_PRESENT) || 
+         (guest_l2e_get_flags(l2e) & _PAGE_PSE))
         return 0;
 
     l1pfn = l2e_get_pfn(l2e);
@@ -1000,20 +1071,20 @@
     // If the l1 pfn is invalid, it can't be out of sync...
     if ( !VALID_MFN(l1mfn) )
         return 0;
-    
-    pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(L1_PAGETABLE_ENTRIES - 1))
+
+    pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(GUEST_L1_PAGETABLE_ENTRIES - 1))
       << L1_PAGETABLE_SHIFT;
-    guest_pt = (l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gle);
+    guest_pt = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gle);
 
     if ( page_out_of_sync(&frame_table[l1mfn]) &&
          !snapshot_entry_matches(
-             d, guest_pt, l1pfn, l1_table_offset(va)) )
+             d, guest_pt, l1pfn, guest_l1_table_offset(va)) )
         return 1;
 
     return 0;
 }
 
-#define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(l1_pgentry_t)))
+#define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / 
sizeof(guest_l1_pgentry_t)))
 static inline unsigned long
 predict_writable_pte_page(struct domain *d, unsigned long gpfn)
 {
@@ -1107,7 +1178,7 @@
         return (found == max_refs_to_find);
     }
 
-    i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1);
+    i = readonly_gpfn & (GUEST_L1_PAGETABLE_ENTRIES - 1);
     if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
     {
         perfc_incrc(remove_write_fast_exit);
@@ -1116,7 +1187,7 @@
         return found;
     }
  
-    for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
+    for (i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++)
     {
         if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
             break;
@@ -1281,15 +1352,15 @@
         switch ( stype ) {
         case PGT_l1_shadow:
         {
-            l1_pgentry_t *guest1 = guest;
+            guest_l1_pgentry_t *guest1 = guest;
             l1_pgentry_t *shadow1 = shadow;
-            l1_pgentry_t *snapshot1 = snapshot;
+            guest_l1_pgentry_t *snapshot1 = snapshot;
 
             ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ||
                    shadow_mode_write_all(d));
 
             if ( !shadow_mode_refcounts(d) )
-                revalidate_l1(d, guest1, snapshot1);
+                revalidate_l1(d, (l1_pgentry_t *)guest1, (l1_pgentry_t 
*)snapshot1);
 
             if ( !smfn )
                 break;
@@ -1300,7 +1371,7 @@
             for ( i = min_shadow; i <= max_shadow; i++ )
             {
                 if ( (i < min_snapshot) || (i > max_snapshot) ||
-                     l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
+                     guest_l1e_has_changed(guest1[i], snapshot1[i], 
PAGE_FLAG_MASK) )
                 {
                     need_flush |= validate_pte_change(d, guest1[i], 
&shadow1[i]);
 
@@ -1430,32 +1501,36 @@
         {
             int max = -1;
 
-            l4_pgentry_t *guest4 = guest;
+            guest_root_pgentry_t *guest_root = guest;
             l4_pgentry_t *shadow4 = shadow;
-            l4_pgentry_t *snapshot4 = snapshot;
+            guest_root_pgentry_t *snapshot_root = snapshot;
 
             changed = 0;
-            for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
+            for ( i = 0; i < GUEST_ROOT_PAGETABLE_ENTRIES; i++ )
             {
                 if ( !is_guest_l4_slot(i) && !external )
                     continue;
-                l4_pgentry_t new_l4e = guest4[i];
-                if ( l4e_has_changed(new_l4e, snapshot4[i], PAGE_FLAG_MASK))
+                guest_root_pgentry_t new_root_e = guest_root[i];
+                if ( root_entry_has_changed(
+                        new_root_e, snapshot_root[i], PAGE_FLAG_MASK))
                 {
-                    need_flush |= validate_entry_change(
-                      d, (pgentry_64_t *)&new_l4e,
-                      (pgentry_64_t *)&shadow4[i], 
shadow_type_to_level(stype));
-
+                    if (d->arch.ops->guest_paging_levels == PAGING_L4) {
+                        need_flush |= validate_entry_change(
+                          d, (pgentry_64_t *)&new_root_e,
+                          (pgentry_64_t *)&shadow4[i], 
shadow_type_to_level(stype));
+                    } else {
+                        validate_bl2e_change(d, &new_root_e, shadow, i);
+                    }
                     changed++;
                     ESH_LOG("%d: shadow4 mfn: %lx, shadow root: %lx\n", i,
                       smfn, pagetable_get_paddr(current->arch.shadow_table));
                 }
-                if ( l4e_get_intpte(new_l4e) != 0 ) /* FIXME: check flags? */
+                if ( guest_root_get_intpte(new_root_e) != 0 ) /* FIXME: check 
flags? */
                     max = i;
 
                 //  Need a better solution in the long term.
-                if ( !(l4e_get_flags(new_l4e) & _PAGE_PRESENT) &&
-                  unlikely(l4e_get_intpte(new_l4e) != 0) &&
+                if ( !(guest_root_get_flags(new_root_e) & _PAGE_PRESENT) &&
+                  unlikely(guest_root_get_intpte(new_root_e) != 0) &&
                   !unshadow &&
                   (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
                     unshadow = 1;
@@ -1554,8 +1629,14 @@
     if ( shadow_mode_translate(d) )
         need_flush |= resync_all(d, PGT_hl2_shadow);
 #endif
-    need_flush |= resync_all(d, PGT_l2_shadow);
-    need_flush |= resync_all(d, PGT_l3_shadow);
+
+    /*
+     * Fixme: for i386 host
+     */
+    if (d->arch.ops->guest_paging_levels == PAGING_L4) {
+        need_flush |= resync_all(d, PGT_l2_shadow);
+        need_flush |= resync_all(d, PGT_l3_shadow);
+    }
     need_flush |= resync_all(d, PGT_l4_shadow);
 
     if ( need_flush && !unlikely(shadow_mode_external(d)) )
@@ -1565,11 +1646,11 @@
 }
 
 static inline int l1pte_write_fault(
-    struct vcpu *v, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
+    struct vcpu *v, guest_l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
     unsigned long va)
 {
     struct domain *d = v->domain;
-    l1_pgentry_t gpte = *gpte_p;
+    guest_l1_pgentry_t gpte = *gpte_p;
     l1_pgentry_t spte;
     unsigned long gpfn = l1e_get_pfn(gpte);
     unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
@@ -1583,9 +1664,9 @@
         return 0;
     }
 
-    ASSERT(l1e_get_flags(gpte) & _PAGE_RW);
-    l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED);
-    spte = l1e_from_pfn(gmfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
+    ASSERT(guest_l1e_get_flags(gpte) & _PAGE_RW);
+    guest_l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED);
+    spte = l1e_from_pfn(gmfn, guest_l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
 
     SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
              l1e_get_intpte(spte), l1e_get_intpte(gpte));
@@ -1603,9 +1684,9 @@
 }
 
 static inline int l1pte_read_fault(
-    struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
+    struct domain *d, guest_l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
 { 
-    l1_pgentry_t gpte = *gpte_p;
+    guest_l1_pgentry_t gpte = *gpte_p;
     l1_pgentry_t spte = *spte_p;
     unsigned long pfn = l1e_get_pfn(gpte);
     unsigned long mfn = __gpfn_to_mfn(d, pfn);
@@ -1617,10 +1698,10 @@
         return 0;
     }
 
-    l1e_add_flags(gpte, _PAGE_ACCESSED);
-    spte = l1e_from_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
-
-    if ( shadow_mode_log_dirty(d) || !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
+    guest_l1e_add_flags(gpte, _PAGE_ACCESSED);
+    spte = l1e_from_pfn(mfn, guest_l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
+
+    if ( shadow_mode_log_dirty(d) || !(guest_l1e_get_flags(gpte) & 
_PAGE_DIRTY) ||
          mfn_is_page_table(mfn) )
     {
         l1e_remove_flags(spte, _PAGE_RW);
@@ -1633,7 +1714,7 @@
 
     return 1;
 }
-
+#if CONFIG_PAGING_LEVELS <= 3
 static int shadow_fault_32(unsigned long va, struct cpu_user_regs *regs)
 {
     l1_pgentry_t gpte, spte, orig_gpte;
@@ -1767,6 +1848,7 @@
     shadow_unlock(d);
     return 0;
 }
+#endif
 
 static int do_update_va_mapping(unsigned long va,
                                 l1_pgentry_t val,
@@ -1786,7 +1868,7 @@
     //
     __shadow_sync_va(v, va);
 
-    l1pte_propagate_from_guest(d, val, &spte);
+    l1pte_propagate_from_guest(d, *(guest_l1_pgentry_t *)&val, &spte);
     shadow_set_l1e(va, spte, 0);
 
     /*
@@ -1847,7 +1929,7 @@
 #if CONFIG_PAGING_LEVELS == 2
     unsigned long hl2mfn;
 #endif
-  
+
     int max_mode = ( shadow_mode_external(d) ? SHM_external
                      : shadow_mode_translate(d) ? SHM_translate
                      : shadow_mode_enabled(d) ? SHM_enable
@@ -1953,17 +2035,6 @@
 #endif
 }
 
-struct shadow_ops MODE_A_HANDLER = {
-    .guest_paging_levels        = 2,
-    .invlpg                     = shadow_invlpg_32,
-    .fault                      = shadow_fault_32,
-    .update_pagetables          = shadow_update_pagetables,
-    .sync_all                   = sync_all,
-    .remove_all_write_access    = remove_all_write_access,
-    .do_update_va_mapping       = do_update_va_mapping,
-    .mark_mfn_out_of_sync       = mark_mfn_out_of_sync,
-    .is_out_of_sync             = is_out_of_sync,
-};
 
 /************************************************************************/
 /************************************************************************/
@@ -2444,12 +2515,90 @@
     BUG();                      /* not implemenated yet */
     return 42;
 }
+static unsigned long gva_to_gpa_pae(unsigned long gva)
+{
+    BUG();
+    return 43;
+}
 #endif
 
 #if CONFIG_PAGING_LEVELS >= 4
 /****************************************************************************/
 /* 64-bit shadow-mode code testing */
 /****************************************************************************/
+/*
+ * validate_bl2e_change()
+ * The code is for 32-bit VMX gues on 64-bit host.
+ * To sync guest L2.
+ */
+
+static inline void
+validate_bl2e_change(
+  struct domain *d,
+  guest_root_pgentry_t *new_gle_p,
+  pgentry_64_t *shadow_l3,
+  int index)
+{
+    int sl3_idx, sl2_idx;
+    unsigned long sl2mfn, sl1mfn;
+    pgentry_64_t *sl2_p;
+
+    /* Using guest l2 pte index to get shadow l3&l2 index
+     * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512
+     */
+    sl3_idx = index / (PAGETABLE_ENTRIES / 2);
+    sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2;
+
+    sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]);
+    sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn);
+
+    validate_pde_change(
+        d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]);
+
+    /* Mapping the second l1 shadow page */
+    if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) {
+       sl1mfn = entry_get_pfn(sl2_p[sl2_idx]);
+       sl2_p[sl2_idx + 1] =
+            entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx]));
+    }
+    unmap_domain_page(sl2_p);
+
+}
+
+/*
+ * init_bl2() is for 32-bit VMX guest on 64-bit host
+ * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2
+ */
+static inline unsigned long init_bl2(l4_pgentry_t *spl4e, unsigned long smfn)
+{
+    unsigned int count;
+    unsigned long sl2mfn;
+    struct pfn_info *page;
+
+    memset(spl4e, 0, PAGE_SIZE);
+
+    /* Map the self entry, L4&L3 share the same page */
+    spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
+
+    /* Allocate 4 shadow L2s */
+    page = alloc_domheap_pages(NULL, SL2_ORDER, 0);
+    if (!page)
+        domain_crash_synchronous();
+
+    for (count = 0; count < PDP_ENTRIES; count++)
+    {
+        sl2mfn = page_to_pfn(page+count);
+        void *l2 = map_domain_page(sl2mfn);
+        memset(l2, 0, PAGE_SIZE);
+        unmap_domain_page(l2);
+        spl4e[count] = l4e_from_pfn(sl2mfn, _PAGE_PRESENT);
+    }
+
+    unmap_domain_page(spl4e);
+    return smfn;
+
+
+}
 
 static unsigned long shadow_l4_table(
   struct domain *d, unsigned long gpfn, unsigned long gmfn)
@@ -2463,11 +2612,16 @@
 
     if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
     {
-        printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n", gpfn, 
gmfn);
+        printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, 
gmfn);
         BUG(); /* XXX Deal gracefully with failure. */
     }
 
     spl4e = (l4_pgentry_t *)map_domain_page(smfn);
+
+    if (d->arch.ops->guest_paging_levels == PAGING_L2) {
+        return init_bl2(spl4e, smfn);
+    }
+
     /* Install hypervisor and 4x linear p.t. mapings. */
     if ( (PGT_base_page_table == PGT_l4_page_table) &&
       !shadow_mode_external(d) )
@@ -2575,7 +2729,7 @@
     pgentry_64_t gle, sle;
     unsigned long gpfn, smfn;
 
-    if (from == L1 && to == L2) {
+    if (from == PAGING_L1 && to == PAGING_L2) {
         shadow_map_l1_into_current_l2(va);
         return;
     }
@@ -2607,7 +2761,7 @@
     if (!(l4e_get_flags(sl4e) & _PAGE_PRESENT)) {
         if (create_l2_shadow) {
             perfc_incrc(shadow_set_l3e_force_map);
-            shadow_map_into_current(v, va, L3, L4);
+            shadow_map_into_current(v, va, PAGING_L3, PAGING_L4);
             __shadow_get_l4e(v, va, &sl4e);
         } else {
             printk("For non VMX shadow, create_l1_shadow:%d\n", 
create_l2_shadow);
@@ -2618,7 +2772,7 @@
     if (!(l3e_get_flags(sl3e) & _PAGE_PRESENT)) {
          if (create_l2_shadow) {
             perfc_incrc(shadow_set_l2e_force_map);
-            shadow_map_into_current(v, va, L2, L3);
+            shadow_map_into_current(v, va, PAGING_L2, PAGING_L3);
             __shadow_get_l3e(v, va, &sl3e);
         } else {
             printk("For non VMX shadow, create_l1_shadow:%d\n", 
create_l2_shadow);
@@ -2654,8 +2808,15 @@
     l1_pgentry_t old_spte;
     l1_pgentry_t sl1e = *(l1_pgentry_t *)sl1e_p;
     int i;
-
-    for (i = L4; i >= L2; i--) {
+    unsigned long orig_va = 0;
+
+    if (d->arch.ops->guest_paging_levels == PAGING_L2) {
+        /* This is for 32-bit VMX guest on 64-bit host */
+        orig_va = va;
+        va = va & (~((1<<L2_PAGETABLE_SHIFT_32)-1));
+    }
+
+    for (i = PAGING_L4; i >= PAGING_L2; i--) {
         if (!__rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i)) {
             printk("<%s> i = %d\n", __func__, i);
             BUG();
@@ -2671,9 +2832,13 @@
 #endif
             }
         }
-        if(i < L4)
+        if(i < PAGING_L4)
             shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, 
i));
         sle_up = sle;
+    }
+
+    if (d->arch.ops->guest_paging_levels == PAGING_L2) {
+        va = orig_va;
     }
 
     if ( shadow_mode_refcounts(d) )
@@ -2691,9 +2856,13 @@
     }
 
     __shadow_set_l1e(v, va, &sl1e);
-    shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, L1));
-}
-
+
+    shadow_update_min_max(entry_get_pfn(sle_up), guest_l1_table_offset(va));
+}
+
+/* As 32-bit guest don't support 4M page yet,
+ * we don't concern double compile for this function
+ */
 static inline int l2e_rw_fault(
     struct vcpu *v, l2_pgentry_t *gl2e_p, unsigned long va, int rw)
 {
@@ -2824,12 +2993,120 @@
 
 }
 
+/*
+ * Check P, R/W, U/S bits in the guest page table.
+ * If the fault belongs to guest return 1,
+ * else return 0.
+ */
+#if defined( GUEST_PGENTRY_32 )
+static inline int guest_page_fault(struct vcpu *v,
+  unsigned long va, unsigned int error_code, 
+  guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
+{
+    /* The following check for 32-bit guest on 64-bit host */
+
+    __guest_get_l2e(v, va, gpl2e);
+
+    /* Check the guest L2 page-table entry first*/
+    if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT)))
+        return 1;
+
+    if (error_code & ERROR_W) {
+        if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW)))
+            return 1;
+    }
+    if (error_code & ERROR_U) {
+        if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER)))
+            return 1;
+    }
+
+    if (guest_l2e_get_flags(*gpl2e) & _PAGE_PSE)
+        return 0;
+
+    __guest_get_l1e(v, va, gpl1e);
+
+    /* Then check the guest L1 page-table entry */
+    if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT)))
+        return 1;
+
+    if (error_code & ERROR_W) {
+        if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW)))
+            return 1;
+    }
+    if (error_code & ERROR_U) {
+        if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER)))
+            return 1;
+    }
+
+    return 0;
+}
+#else
+static inline int guest_page_fault(struct vcpu *v,
+  unsigned long va, unsigned int error_code, 
+  guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
+{
+    struct domain *d = v->domain;
+    pgentry_64_t gle, *lva;
+    unsigned long mfn;
+    int i;
+
+    __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4);
+    if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
+        return 1;
+
+    if (error_code & ERROR_W) {
+        if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
+            return 1;
+    }
+    if (error_code & ERROR_U) {
+        if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
+            return 1;
+    }
+    for (i = PAGING_L3; i >= PAGING_L1; i--) {
+        /*
+         * If it's not external mode, then mfn should be machine physical.
+         */
+        mfn = __gpfn_to_mfn(d, (entry_get_value(gle) >> PAGE_SHIFT));
+
+        lva = (pgentry_64_t *) phys_to_virt(
+          mfn << PAGE_SHIFT);
+        gle = lva[table_offset_64(va, i)];
+
+        if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
+            return 1;
+
+        if (error_code & ERROR_W) {
+            if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
+                return 1;
+        }
+        if (error_code & ERROR_U) {
+            if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
+                return 1;
+        }
+
+        if (i == PAGING_L2) {
+            if (gpl2e)
+                gpl2e->l2 = gle.lo;
+
+            if (likely(entry_get_flags(gle) & _PAGE_PSE))
+                return 0;
+
+        }
+
+        if (i == PAGING_L1)
+            if (gpl1e)
+                gpl1e->l1 = gle.lo;
+    }
+    return 0;
+}
+#endif
 static int shadow_fault_64(unsigned long va, struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
     struct domain *d = v->domain;
-    l2_pgentry_t gl2e;
-    l1_pgentry_t sl1e, gl1e;
+    guest_l2_pgentry_t gl2e;
+    guest_l1_pgentry_t gl1e;
+    l1_pgentry_t sl1e;
 
     perfc_incrc(shadow_fault_calls);
 
@@ -2852,12 +3129,11 @@
      * STEP 2. Check if the fault belongs to guest
      */
     if ( guest_page_fault(
-            v, va, regs->error_code, 
-            (pgentry_64_t *)&gl2e, (pgentry_64_t *)&gl1e) ) {
+            v, va, regs->error_code, &gl2e, &gl1e) ) {
         goto fail;
     }
     
-    if ( unlikely(!(l2e_get_flags(gl2e) & _PAGE_PSE)) ) {
+    if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PSE)) ) {
         /*
          * Handle 4K pages here
          */
@@ -2891,11 +3167,11 @@
          */
         /* Write fault? */
         if ( regs->error_code & 2 ) {
-            if ( !l2e_rw_fault(v, &gl2e, va, WRITE_FAULT) ) {
+            if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) ) {
                 goto fail;
             }
         } else {
-            l2e_rw_fault(v, &gl2e, va, READ_FAULT);
+            l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT);
         }
 
         /*
@@ -2943,7 +3219,27 @@
     shadow_unlock(d);
 }
 
-#ifndef PGENTRY_32
+static unsigned long gva_to_gpa_64(unsigned long gva)
+{
+    struct vcpu *v = current;
+    guest_l1_pgentry_t gl1e = {0};
+    guest_l2_pgentry_t gl2e = {0};
+    unsigned long gpa;
+
+    if (guest_page_fault(v, gva, 0, &gl2e, &gl1e))
+        return 0;
+    
+    if (guest_l2e_get_flags(gl2e) & _PAGE_PSE)
+        gpa = guest_l2e_get_paddr(gl2e) + (gva & ((1 << 
GUEST_L2_PAGETABLE_SHIFT) - 1));
+    else
+        gpa = guest_l1e_get_paddr(gl1e) + (gva & ~PAGE_MASK);
+
+    return gpa;
+
+}
+
+#ifndef GUEST_PGENTRY_32
+
 struct shadow_ops MODE_F_HANDLER = {
     .guest_paging_levels              = 4,
     .invlpg                     = shadow_invlpg_64,
@@ -2954,10 +3250,42 @@
     .do_update_va_mapping       = do_update_va_mapping,
     .mark_mfn_out_of_sync       = mark_mfn_out_of_sync,
     .is_out_of_sync             = is_out_of_sync,
+    .gva_to_gpa                 = gva_to_gpa_64,
 };
 #endif
 
 #endif
+
+#if CONFIG_PAGING_LEVELS == 2
+struct shadow_ops MODE_A_HANDLER = {
+    .guest_paging_levels        = 2,
+    .invlpg                     = shadow_invlpg_32,
+    .fault                      = shadow_fault_32,
+    .update_pagetables          = shadow_update_pagetables,
+    .sync_all                   = sync_all,
+    .remove_all_write_access    = remove_all_write_access,
+    .do_update_va_mapping       = do_update_va_mapping,
+    .mark_mfn_out_of_sync       = mark_mfn_out_of_sync,
+    .is_out_of_sync             = is_out_of_sync,
+    .gva_to_gpa                 = gva_to_gpa_64,
+};
+
+#elif CONFIG_PAGING_LEVELS == 3
+struct shadow_ops MODE_B_HANDLER = {
+    .guest_paging_levels              = 3,
+    .invlpg                     = shadow_invlpg_32,
+    .fault                      = shadow_fault_32,
+    .update_pagetables          = shadow_update_pagetables,
+    .sync_all                   = sync_all,
+    .remove_all_write_access    = remove_all_write_access,
+    .do_update_va_mapping       = do_update_va_mapping,
+    .mark_mfn_out_of_sync       = mark_mfn_out_of_sync,
+    .is_out_of_sync             = is_out_of_sync,
+    .gva_to_gpa                 = gva_to_gpa_pae,
+};
+
+#endif
+
 
 /*
  * Local variables:
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/shadow32.c   Fri Sep  9 16:30:54 2005
@@ -827,7 +827,7 @@
     {
         page = list_entry(list_ent, struct pfn_info, list);
         mfn = page_to_pfn(page);
-        pfn = machine_to_phys_mapping[mfn];
+        pfn = get_pfn_from_mfn(mfn);
         ASSERT(pfn != INVALID_M2P_ENTRY);
         ASSERT(pfn < (1u<<20));
 
@@ -841,7 +841,7 @@
     {
         page = list_entry(list_ent, struct pfn_info, list);
         mfn = page_to_pfn(page);
-        pfn = machine_to_phys_mapping[mfn];
+        pfn = get_pfn_from_mfn(mfn);
         if ( (pfn != INVALID_M2P_ENTRY) &&
              (pfn < (1u<<20)) )
         {
@@ -1685,6 +1685,7 @@
     if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
                          sizeof(gpte))) {
         perfc_incrc(shadow_invlpg_faults);
+        shadow_unlock(d);
         return;
     }
     l1pte_propagate_from_guest(d, gpte, &spte);
@@ -1917,8 +1918,10 @@
     snapshot = map_domain_page(smfn);
 
     if (__copy_from_user(&gpte, &guest_pt[index],
-                         sizeof(gpte)))
+                         sizeof(gpte))) {
+        unmap_domain_page(snapshot);
         return 0;
+    }
 
     // This could probably be smarter, but this is sufficent for
     // our current needs.
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/shadow_public.c      Fri Sep  9 16:30:54 2005
@@ -33,11 +33,15 @@
 #if CONFIG_PAGING_LEVELS >= 3
 #include <asm/shadow_64.h>
 
+#endif
+#if CONFIG_PAGING_LEVELS == 4
 extern struct shadow_ops MODE_F_HANDLER;
+extern struct shadow_ops MODE_D_HANDLER;
 #endif
 
 extern struct shadow_ops MODE_A_HANDLER;
 
+#define SHADOW_MAX_GUEST32(_encoded) ((L1_PAGETABLE_ENTRIES_32 - 1) - 
((_encoded) >> 16))
 /****************************************************************************/
 /************* export interface functions ***********************************/
 /****************************************************************************/
@@ -48,7 +52,7 @@
     shadow_lock(d);
 
     switch(levels) {
-#if CONFIG_PAGING_LEVELS >= 4 
+#if CONFIG_PAGING_LEVELS >= 4
     case 4:
        if ( d->arch.ops != &MODE_F_HANDLER )
            d->arch.ops = &MODE_F_HANDLER;
@@ -56,9 +60,14 @@
         return 1;
 #endif
     case 3:
-    case 2:                     
+    case 2:
+#if CONFIG_PAGING_LEVELS == 2
        if ( d->arch.ops != &MODE_A_HANDLER )
            d->arch.ops = &MODE_A_HANDLER;
+#elif CONFIG_PAGING_LEVELS == 4
+       if ( d->arch.ops != &MODE_D_HANDLER )
+           d->arch.ops = &MODE_D_HANDLER;
+#endif
        shadow_unlock(d);
         return 1;
    default:
@@ -122,13 +131,17 @@
     return d->arch.ops->is_out_of_sync(v, va);
 }
 
+unsigned long gva_to_gpa(unsigned long gva)
+{
+    struct domain *d = current->domain;
+    return d->arch.ops->gva_to_gpa(gva);
+}
 /****************************************************************************/
 /****************************************************************************/
 #if CONFIG_PAGING_LEVELS >= 4
 /*
  * Convert PAE 3-level page-table to 4-level page-table
  */
-#define PDP_ENTRIES   4
 static pagetable_t page_table_convert(struct domain *d)
 {
     struct pfn_info *l4page, *l3page;
@@ -203,19 +216,41 @@
 /*
  * Free l2, l3, l4 shadow tables
  */
+
+void free_fake_shadow_l2(struct domain *d,unsigned long smfn);
+
 static void inline
 free_shadow_tables(struct domain *d, unsigned long smfn, u32 level)
 {
     pgentry_64_t *ple = map_domain_page(smfn);
     int i, external = shadow_mode_external(d);
-
-    for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
-        if ( external || is_guest_l4_slot(i) )
-            if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
-                put_shadow_ref(entry_get_pfn(ple[i]));
-
-    unmap_domain_page(ple);
-}
+    struct pfn_info *page = &frame_table[smfn];
+
+    if (d->arch.ops->guest_paging_levels == PAGING_L2)
+    {
+#if CONFIG_PAGING_LEVELS >=4
+        for ( i = 0; i < PDP_ENTRIES; i++ )
+        {
+            if (entry_get_flags(ple[i]) & _PAGE_PRESENT )
+                free_fake_shadow_l2(d,entry_get_pfn(ple[i]));
+        }
+   
+        page = &frame_table[entry_get_pfn(ple[0])];
+        free_domheap_pages(page, SL2_ORDER);
+        unmap_domain_page(ple);
+#endif
+    }
+    else
+    {
+        for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
+            if ( external || is_guest_l4_slot(i) )
+                if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
+                        put_shadow_ref(entry_get_pfn(ple[i]));
+
+        unmap_domain_page(ple);
+    }
+}
+
 
 void free_monitor_pagetable(struct vcpu *v)
 {
@@ -453,7 +488,12 @@
     struct pfn_info *spage = pfn_to_page(smfn);
     u32 min_max = spage->tlbflush_timestamp;
     int min = SHADOW_MIN(min_max);
-    int max = SHADOW_MAX(min_max);
+    int max;
+    
+    if (d->arch.ops->guest_paging_levels == PAGING_L2)
+        max = SHADOW_MAX_GUEST32(min_max);
+    else
+        max = SHADOW_MAX(min_max);
 
     for ( i = min; i <= max; i++ )
     {
@@ -512,9 +552,24 @@
     unmap_domain_page(pl2e);
 }
 
+void free_fake_shadow_l2(struct domain *d, unsigned long smfn)
+{
+    pgentry_64_t *ple = map_domain_page(smfn);
+    int i;
+
+    for ( i = 0; i < PAGETABLE_ENTRIES; i = i + 2 )
+    {
+        if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
+            put_shadow_ref(entry_get_pfn(ple[i]));
+    }
+
+    unmap_domain_page(ple);
+}
+
 void free_shadow_page(unsigned long smfn)
 {
     struct pfn_info *page = &frame_table[smfn];
+
     unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
     struct domain *d = page_get_owner(pfn_to_page(gmfn));
     unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
@@ -531,6 +586,7 @@
             gpfn |= (1UL << 63);
     }
 #endif
+
     delete_shadow_status(d, gpfn, gmfn, type);
 
     switch ( type )
@@ -687,7 +743,7 @@
     int                   i;
     struct shadow_status *x;
     struct vcpu          *v;
- 
+
     /*
      * WARNING! The shadow page table must not currently be in use!
      * e.g., You are expected to have paused the domain and synchronized CR3.
@@ -794,7 +850,16 @@
         perfc_decr(free_l1_pages);
 
         struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
-        free_domheap_page(page);
+       if (d->arch.ops->guest_paging_levels == PAGING_L2)
+       {
+#if CONFIG_PAGING_LEVELS >=4
+        free_domheap_pages(page, SL1_ORDER);
+#else
+       free_domheap_page(page);
+#endif
+       }
+       else
+       free_domheap_page(page);
     }
 
     shadow_audit(d, 0);
@@ -1191,7 +1256,7 @@
     {
         DPRINTK("Don't try to do a shadow op on yourself!\n");
         return -EINVAL;
-    }   
+    }
 
     domain_pause(d);
 
@@ -1311,7 +1376,7 @@
     {
         page = list_entry(list_ent, struct pfn_info, list);
         mfn = page_to_pfn(page);
-        pfn = machine_to_phys_mapping[mfn];
+        pfn = get_pfn_from_mfn(mfn);
         ASSERT(pfn != INVALID_M2P_ENTRY);
         ASSERT(pfn < (1u<<20));
 
@@ -1325,7 +1390,7 @@
     {
         page = list_entry(list_ent, struct pfn_info, list);
         mfn = page_to_pfn(page);
-        pfn = machine_to_phys_mapping[mfn];
+        pfn = get_pfn_from_mfn(mfn);
         if ( (pfn != INVALID_M2P_ENTRY) &&
              (pfn < (1u<<20)) )
         {
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/time.c       Fri Sep  9 16:30:54 2005
@@ -792,6 +792,13 @@
     tsc_elapsed64   = curr_tsc - prev_tsc;
 
     /*
+     * Weirdness can happen if we lose sync with the platform timer.
+     * We could be smarter here: resync platform timer with local timer?
+     */
+    if ( ((s64)stime_elapsed64 < (EPOCH / 2)) )
+        goto out;
+
+    /*
      * Calculate error-correction factor. This only slows down a fast local
      * clock (slow clocks are warped forwards). The scale factor is clamped
      * to >= 0.5.
@@ -854,6 +861,7 @@
     cpu_time[cpu].stime_local_stamp  = curr_local_stime;
     cpu_time[cpu].stime_master_stamp = curr_master_stime;
 
+ out:
     set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH);
 
     if ( cpu == 0 )
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/traps.c      Fri Sep  9 16:30:54 2005
@@ -101,6 +101,14 @@
 static int debug_stack_lines = 20;
 integer_param("debug_stack_lines", debug_stack_lines);
 
+#ifdef CONFIG_X86_32
+#define stack_words_per_line 8
+#define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)&regs->esp)
+#else
+#define stack_words_per_line 4
+#define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->esp)
+#endif
+
 int is_kernel_text(unsigned long addr)
 {
     extern char _stext, _etext;
@@ -117,17 +125,16 @@
     return (unsigned long) &_etext;
 }
 
-void show_guest_stack(void)
+static void show_guest_stack(struct cpu_user_regs *regs)
 {
     int i;
-    struct cpu_user_regs *regs = guest_cpu_user_regs();
     unsigned long *stack = (unsigned long *)regs->esp, addr;
 
     printk("Guest stack trace from "__OP"sp=%p:\n   ", stack);
 
-    for ( i = 0; i < (debug_stack_lines*8); i++ )
-    {
-        if ( ((long)stack & (STACK_SIZE-1)) == 0 )
+    for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
+    {
+        if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
             break;
         if ( get_user(addr, stack) )
         {
@@ -137,7 +144,7 @@
             i = 1;
             break;
         }
-        if ( (i != 0) && ((i % 8) == 0) )
+        if ( (i != 0) && ((i % stack_words_per_line) == 0) )
             printk("\n   ");
         printk("%p ", _p(addr));
         stack++;
@@ -147,40 +154,100 @@
     printk("\n");
 }
 
-void show_trace(unsigned long *esp)
-{
-    unsigned long *stack = esp, addr;
-    int i = 0;
-
-    printk("Xen call trace from "__OP"sp=%p:\n   ", stack);
-
-    while ( ((long) stack & (STACK_SIZE-1)) != 0 )
+#ifdef NDEBUG
+
+static void show_trace(struct cpu_user_regs *regs)
+{
+    unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
+
+    printk("Xen call trace:\n   ");
+
+    printk("[<%p>]", _p(regs->eip));
+    print_symbol(" %s\n   ", regs->eip);
+
+    while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
     {
         addr = *stack++;
         if ( is_kernel_text(addr) )
         {
             printk("[<%p>]", _p(addr));
             print_symbol(" %s\n   ", addr);
-            i++;
-        }
-    }
-    if ( i == 0 )
-        printk("Trace empty.");
+        }
+    }
+
     printk("\n");
 }
 
-void show_stack(unsigned long *esp)
-{
-    unsigned long *stack = esp, addr;
+#else
+
+static void show_trace(struct cpu_user_regs *regs)
+{
+    unsigned long *frame, next, addr, low, high;
+
+    printk("Xen call trace:\n   ");
+
+    printk("[<%p>]", _p(regs->eip));
+    print_symbol(" %s\n   ", regs->eip);
+
+    /* Bounds for range of valid frame pointer. */
+    low  = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
+    high = (low & ~(STACK_SIZE - 1)) + (STACK_SIZE - sizeof(struct cpu_info));
+
+    /* The initial frame pointer. */
+    next = regs->ebp;
+
+    for ( ; ; )
+    {
+        /* Valid frame pointer? */
+        if ( (next < low) || (next > high) )
+        {
+            /*
+             * Exception stack frames have a different layout, denoted by an
+             * inverted frame pointer.
+             */
+            next = ~next;
+            if ( (next < low) || (next > high) )
+                break;
+            frame = (unsigned long *)next;
+            next  = frame[0];
+            addr  = frame[(offsetof(struct cpu_user_regs, eip) -
+                           offsetof(struct cpu_user_regs, ebp))
+                         / BYTES_PER_LONG];
+        }
+        else
+        {
+            /* Ordinary stack frame. */
+            frame = (unsigned long *)next;
+            next  = frame[0];
+            addr  = frame[1];
+        }
+
+        printk("[<%p>]", _p(addr));
+        print_symbol(" %s\n   ", addr);
+
+        low = (unsigned long)&frame[2];
+    }
+
+    printk("\n");
+}
+
+#endif
+
+void show_stack(struct cpu_user_regs *regs)
+{
+    unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
     int i;
 
+    if ( GUEST_MODE(regs) )
+        return show_guest_stack(regs);
+
     printk("Xen stack trace from "__OP"sp=%p:\n   ", stack);
 
-    for ( i = 0; i < (debug_stack_lines*8); i++ )
-    {
-        if ( ((long)stack & (STACK_SIZE-1)) == 0 )
-            break;
-        if ( (i != 0) && ((i % 8) == 0) )
+    for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
+    {
+        if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
+            break;
+        if ( (i != 0) && ((i % stack_words_per_line) == 0) )
             printk("\n   ");
         addr = *stack++;
         printk("%p ", _p(addr));
@@ -189,7 +256,7 @@
         printk("Stack empty.");
     printk("\n");
 
-    show_trace(esp);
+    show_trace(regs);
 }
 
 /*
@@ -403,20 +470,32 @@
     return EXCRET_fault_fixed;
 }
 
-asmlinkage int do_page_fault(struct cpu_user_regs *regs)
-{
-    unsigned long addr, fixup;
-    struct vcpu *v = current;
+#ifdef HYPERVISOR_VIRT_END
+#define IN_HYPERVISOR_RANGE(va) \
+    (((va) >= HYPERVISOR_VIRT_START) && ((va) < HYPERVISOR_VIRT_END))
+#else
+#define IN_HYPERVISOR_RANGE(va) \
+    (((va) >= HYPERVISOR_VIRT_START))
+#endif
+
+static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
+{
+    struct vcpu   *v = current;
     struct domain *d = v->domain;
 
-    __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
-
-    DEBUGGER_trap_entry(TRAP_page_fault, regs);
-
-    perfc_incrc(page_faults);
-
-    if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
-                !shadow_mode_enabled(d)) )
+    if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
+    {
+        if ( shadow_mode_external(d) && GUEST_CONTEXT(v, regs) )
+            return shadow_fault(addr, regs);
+        if ( (addr >= PERDOMAIN_VIRT_START) && (addr < PERDOMAIN_VIRT_END) )
+            return handle_perdomain_mapping_fault(
+                addr - PERDOMAIN_VIRT_START, regs);
+    }
+    else if ( unlikely(shadow_mode_enabled(d)) )
+    {
+        return shadow_fault(addr, regs);
+    }
+    else if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
     {
         LOCK_BIGLOCK(d);
         if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
@@ -428,14 +507,9 @@
             return EXCRET_fault_fixed;
         }
 
-        if ( ((addr < HYPERVISOR_VIRT_START) 
-#if defined(__x86_64__)
-              || (addr >= HYPERVISOR_VIRT_END)
-#endif        
-            )     
-             &&
-             KERNEL_MODE(v, regs) &&
-             ((regs->error_code & 3) == 3) && /* write-protection fault */
+        if ( KERNEL_MODE(v, regs) &&
+             /* Protection violation on write? No reserved-bit violation? */
+             ((regs->error_code & 0xb) == 0x3) &&
              ptwr_do_page_fault(d, addr, regs) )
         {
             UNLOCK_BIGLOCK(d);
@@ -444,43 +518,51 @@
         UNLOCK_BIGLOCK(d);
     }
 
-    if ( unlikely(shadow_mode_enabled(d)) &&
-         ((addr < HYPERVISOR_VIRT_START) ||
-#if defined(__x86_64__)
-          (addr >= HYPERVISOR_VIRT_END) ||
-#endif
-          (shadow_mode_external(d) && GUEST_CONTEXT(v, regs))) &&
-         shadow_fault(addr, regs) )
-        return EXCRET_fault_fixed;
-
-    if ( unlikely(addr >= PERDOMAIN_VIRT_START) &&
-         unlikely(addr < PERDOMAIN_VIRT_END) &&
-         handle_perdomain_mapping_fault(addr - PERDOMAIN_VIRT_START, regs) )
-        return EXCRET_fault_fixed;
-
-    if ( !GUEST_MODE(regs) )
-        goto xen_fault;
+    return 0;
+}
+
+/*
+ * #PF error code:
+ *  Bit 0: Protection violation (=1) ; Page not present (=0)
+ *  Bit 1: Write access
+ *  Bit 2: Supervisor mode
+ *  Bit 3: Reserved bit violation
+ *  Bit 4: Instruction fetch
+ */
+asmlinkage int do_page_fault(struct cpu_user_regs *regs)
+{
+    unsigned long addr, fixup;
+    int rc;
+
+    __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
+
+    DEBUGGER_trap_entry(TRAP_page_fault, regs);
+
+    perfc_incrc(page_faults);
+
+    if ( unlikely((rc = fixup_page_fault(addr, regs)) != 0) )
+        return rc;
+
+    if ( unlikely(!GUEST_MODE(regs)) )
+    {
+        if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
+        {
+            perfc_incrc(copy_user_faults);
+            regs->eip = fixup;
+            return 0;
+        }
+
+        DEBUGGER_trap_fatal(TRAP_page_fault, regs);
+
+        show_registers(regs);
+        show_page_walk(addr);
+        panic("CPU%d FATAL PAGE FAULT\n"
+              "[error_code=%04x]\n"
+              "Faulting linear address: %p\n",
+              smp_processor_id(), regs->error_code, addr);
+    }
 
     propagate_page_fault(addr, regs->error_code);
-    return 0;
-
- xen_fault:
-
-    if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
-    {
-        perfc_incrc(copy_user_faults);
-        regs->eip = fixup;
-        return 0;
-    }
-
-    DEBUGGER_trap_fatal(TRAP_page_fault, regs);
-
-    show_registers(regs);
-    show_page_walk(addr);
-    panic("CPU%d FATAL PAGE FAULT\n"
-          "[error_code=%04x]\n"
-          "Faulting linear address: %p\n",
-          smp_processor_id(), regs->error_code, addr);
     return 0;
 }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/vmx.c        Fri Sep  9 16:30:54 2005
@@ -49,6 +49,15 @@
 int vmcs_size;
 unsigned int opt_vmx_debug_level = 0;
 integer_param("vmx_debug", opt_vmx_debug_level);
+
+extern int hvm_enabled;
+
+#ifdef TRACE_BUFFER
+static unsigned long trace_values[NR_CPUS][4];
+#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
+#else
+#define TRACE_VMEXIT(index,value) ((void)0)
+#endif
 
 #ifdef __x86_64__
 static struct msr_state percpu_msr[NR_CPUS];
@@ -338,6 +347,8 @@
 
     vmx_save_init_msrs();
 
+    hvm_enabled = 1;
+
     return 1;
 }
 
@@ -351,7 +362,7 @@
  * Not all cases receive valid value in the VM-exit instruction length field.
  */
 #define __get_instruction_length(len) \
-    __vmread(INSTRUCTION_LEN, &(len)); \
+    __vmread(VM_EXIT_INSTRUCTION_LEN, &(len)); \
      if ((len) < 1 || (len) > 15) \
         __vmx_bug(&regs);
 
@@ -381,6 +392,7 @@
 
     if (!vmx_paging_enabled(current)){
         handle_mmio(va, va);
+        TRACE_VMEXIT (2,2);
         return 1;
     }
     gpa = gva_to_gpa(va);
@@ -389,21 +401,22 @@
     if ( mmio_space(gpa) ){
         if (gpa >= 0xFEE00000) { /* workaround for local APIC */
             u32 inst_len;
-            __vmread(INSTRUCTION_LEN, &(inst_len));
+            __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len));
             __update_guest_eip(inst_len);
             return 1;
         }
+        TRACE_VMEXIT (2,2);
         handle_mmio(va, gpa);
         return 1;
     }
 
     result = shadow_fault(va, regs);
-
+    TRACE_VMEXIT (2,result);
 #if 0
     if ( !result )
     {
         __vmread(GUEST_RIP, &eip);
-        printk("vmx pgfault to guest va=%p eip=%p\n", va, eip);
+        printk("vmx pgfault to guest va=%lx eip=%lx\n", va, eip);
     }
 #endif
 
@@ -447,7 +460,16 @@
         clear_bit(X86_FEATURE_PSE, &edx);
         clear_bit(X86_FEATURE_PAE, &edx);
         clear_bit(X86_FEATURE_PSE36, &edx);
+#else
+        struct vcpu *d = current;
+        if (d->domain->arch.ops->guest_paging_levels == PAGING_L2)
+        {
+            clear_bit(X86_FEATURE_PSE, &edx);
+            clear_bit(X86_FEATURE_PAE, &edx);
+            clear_bit(X86_FEATURE_PSE36, &edx);
+        }
 #endif
+
     }
 
     regs->eax = (unsigned long) eax;
@@ -542,7 +564,7 @@
     int i, inst_len;
     int inst_copy_from_guest(unsigned char *, unsigned long, int);
 
-    __vmread(INSTRUCTION_LEN, &inst_len);
+    __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
     memset(inst, 0, MAX_INST_LEN);
     if (inst_copy_from_guest(inst, eip, inst_len) != inst_len) {
         printf("check_for_null_selector: get guest instruction failed\n");
@@ -584,15 +606,66 @@
     return 0;
 }
 
+void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
+       unsigned long count, int size, long value, int dir, int pvalid)
+{
+    struct vcpu *v = current;
+    vcpu_iodata_t *vio;
+    ioreq_t *p;
+
+    vio = get_vio(v->domain, v->vcpu_id);
+    if (vio == NULL) {
+        printk("bad shared page: %lx\n", (unsigned long) vio);
+        domain_crash_synchronous();
+    }
+
+    if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
+       printf("VMX I/O has not yet completed\n");
+       domain_crash_synchronous();
+    }
+    set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+
+    p = &vio->vp_ioreq;
+    p->dir = dir;
+    p->pdata_valid = pvalid;
+
+    p->type = IOREQ_TYPE_PIO;
+    p->size = size;
+    p->addr = port;
+    p->count = count;
+    p->df = regs->eflags & EF_DF ? 1 : 0;
+
+    if (pvalid) {
+        if (vmx_paging_enabled(current))
+            p->u.pdata = (void *) gva_to_gpa(value);
+        else
+            p->u.pdata = (void *) value; /* guest VA == guest PA */
+    } else
+        p->u.data = value;
+
+    p->state = STATE_IOREQ_READY;
+
+    if (vmx_portio_intercept(p)) {
+        /* no blocking & no evtchn notification */
+        clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+        return;
+    }
+
+    evtchn_send(iopacket_port(v->domain));
+    vmx_wait_io();
+}
+
 static void vmx_io_instruction(struct cpu_user_regs *regs, 
                    unsigned long exit_qualification, unsigned long inst_len) 
 {
-    struct vcpu *d = current;
-    vcpu_iodata_t *vio;
-    ioreq_t *p;
-    unsigned long addr;
+    struct mi_per_cpu_info *mpcip;
     unsigned long eip, cs, eflags;
+    unsigned long port, size, dir;
     int vm86;
+
+    mpcip = &current->domain->arch.vmx_platform.mpci;
+    mpcip->instr = INSTR_PIO;
+    mpcip->flags = 0;
 
     __vmread(GUEST_RIP, &eip);
     __vmread(GUEST_CS_SELECTOR, &cs);
@@ -605,104 +678,93 @@
                 vm86, cs, eip, exit_qualification);
 
     if (test_bit(6, &exit_qualification))
-        addr = (exit_qualification >> 16) & (0xffff);
+        port = (exit_qualification >> 16) & 0xFFFF;
     else
-        addr = regs->edx & 0xffff;
-
-    vio = get_vio(d->domain, d->vcpu_id);
-    if (vio == 0) {
-        printk("bad shared page: %lx", (unsigned long) vio);
-        domain_crash_synchronous(); 
-    }
-    p = &vio->vp_ioreq;
-    p->dir = test_bit(3, &exit_qualification); /* direction */
-
-    p->pdata_valid = 0;
-    p->count = 1;
-    p->size = (exit_qualification & 7) + 1;
+        port = regs->edx & 0xffff;
+    TRACE_VMEXIT(2, port);
+    size = (exit_qualification & 7) + 1;
+    dir = test_bit(3, &exit_qualification); /* direction */
 
     if (test_bit(4, &exit_qualification)) { /* string instruction */
-       unsigned long laddr;
-
-       __vmread(GUEST_LINEAR_ADDRESS, &laddr);
+       unsigned long addr, count = 1;
+       int sign = regs->eflags & EF_DF ? -1 : 1;
+
+       __vmread(GUEST_LINEAR_ADDRESS, &addr);
+
         /*
          * In protected mode, guest linear address is invalid if the
          * selector is null.
          */
-        if (!vm86 && check_for_null_selector(eip)) {
-            laddr = (p->dir == IOREQ_WRITE) ? regs->esi : regs->edi;
-        }
-        p->pdata_valid = 1;
-
-        p->u.data = laddr;
-        if (vmx_paging_enabled(d))
-                p->u.pdata = (void *) gva_to_gpa(p->u.data);
-        p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
-
-        if (test_bit(5, &exit_qualification)) /* "rep" prefix */
-           p->count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
-
-        /*
-         * Split up string I/O operations that cross page boundaries. Don't
-         * advance %eip so that "rep insb" will restart at the next page.
-         */
-        if ((p->u.data & PAGE_MASK) != 
-               ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
-           VMX_DBG_LOG(DBG_LEVEL_2,
-               "String I/O crosses page boundary (cs:eip=0x%lx:0x%lx)\n",
-               cs, eip);
-            if (p->u.data & (p->size - 1)) {
-               printf("Unaligned string I/O operation (cs:eip=0x%lx:0x%lx)\n",
-                       cs, eip);
-                domain_crash_synchronous();     
-            }
-            p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
-        } else {
-            __update_guest_eip(inst_len);
-        }
-    } else if (p->dir == IOREQ_WRITE) {
-        p->u.data = regs->eax;
+        if (!vm86 && check_for_null_selector(eip))
+            addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
+
+        if (test_bit(5, &exit_qualification)) { /* "rep" prefix */
+           mpcip->flags |= REPZ;
+           count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
+       }
+
+       /*
+        * Handle string pio instructions that cross pages or that
+        * are unaligned. See the comments in vmx_platform.c/handle_mmio()
+        */
+       if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
+           unsigned long value = 0;
+
+           mpcip->flags |= OVERLAP;
+           if (dir == IOREQ_WRITE)
+               vmx_copy(&value, addr, size, VMX_COPY_IN);
+           send_pio_req(regs, port, 1, size, value, dir, 0);
+       } else {
+           if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
+                if (sign > 0)
+                    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
+                else
+                    count = (addr & ~PAGE_MASK) / size;
+           } else
+               __update_guest_eip(inst_len);
+
+           send_pio_req(regs, port, count, size, addr, dir, 1);
+       }
+    } else {
         __update_guest_eip(inst_len);
-    } else
-        __update_guest_eip(inst_len);
-
-    p->addr = addr;
-    p->port_mm = 0;
-
-    /* Check if the packet needs to be intercepted */
-    if (vmx_portio_intercept(p))
-       /* no blocking & no evtchn notification */
-        return;
-
-    set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags);
-    p->state = STATE_IOREQ_READY;
-    evtchn_send(iopacket_port(d->domain));
-    vmx_wait_io();
-}
-
-enum { COPY_IN = 0, COPY_OUT };
-
-static inline int
+       send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
+    }
+}
+
+int
 vmx_copy(void *buf, unsigned long laddr, int size, int dir)
 {
+    unsigned long gpa, mfn;
     char *addr;
-    unsigned long mfn;
-
-    if ( (size + (laddr & (PAGE_SIZE - 1))) >= PAGE_SIZE )
-    {
-       printf("vmx_copy exceeds page boundary\n");
-        return 0;
-    }
-
-    mfn = phys_to_machine_mapping(laddr >> PAGE_SHIFT);
-    addr = (char *)map_domain_page(mfn) + (laddr & ~PAGE_MASK);
-
-    if (dir == COPY_IN)
-           memcpy(buf, addr, size);
-    else
-           memcpy(addr, buf, size);
-
-    unmap_domain_page(addr);
+    int count;
+
+    while (size > 0) {
+       count = PAGE_SIZE - (laddr & ~PAGE_MASK);
+       if (count > size)
+           count = size;
+
+       if (vmx_paging_enabled(current)) {
+               gpa = gva_to_gpa(laddr);
+               mfn = get_mfn_from_pfn(gpa >> PAGE_SHIFT);
+       } else
+               mfn = get_mfn_from_pfn(laddr >> PAGE_SHIFT);
+       if (mfn == INVALID_MFN)
+               return 0;
+
+       addr = (char *)map_domain_page(mfn) + (laddr & ~PAGE_MASK);
+
+       if (dir == VMX_COPY_IN)
+           memcpy(buf, addr, count);
+       else
+           memcpy(addr, buf, count);
+
+       unmap_domain_page(addr);
+
+       laddr += count;
+       buf += count;
+       size -= count;
+    }
+
     return 1;
 }
 
@@ -712,7 +774,7 @@
     unsigned long inst_len;
     int error = 0;
 
-    error |= __vmread(INSTRUCTION_LEN, &inst_len);
+    error |= __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
     error |= __vmread(GUEST_RIP, &c->eip);
     c->eip += inst_len; /* skip transition instruction */
     error |= __vmread(GUEST_RSP, &c->esp);
@@ -795,7 +857,7 @@
         * removed some translation or changed page attributes.
         * We simply invalidate the shadow.
         */
-       mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT);
+       mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
        if (mfn != pagetable_get_pfn(d->arch.guest_table)) {
            printk("Invalid CR3 value=%x", c->cr3);
            domain_crash_synchronous();
@@ -813,7 +875,7 @@
            domain_crash_synchronous(); 
            return 0;
        }
-       mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT);
+       mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
        d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
        update_pagetables(d);
        /* 
@@ -889,7 +951,7 @@
     u32 cp;
 
     /* make sure vmxassist exists (this is not an error) */
-    if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), COPY_IN))
+    if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), VMX_COPY_IN))
        return 0;
     if (magic != VMXASSIST_MAGIC)
        return 0;
@@ -903,20 +965,20 @@
      */
     case VMX_ASSIST_INVOKE:
        /* save the old context */
-       if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), COPY_IN))
+       if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN))
            goto error;
        if (cp != 0) {
            if (!vmx_world_save(d, &c))
                goto error;
-           if (!vmx_copy(&c, cp, sizeof(c), COPY_OUT))
+           if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_OUT))
                goto error;
        }
 
        /* restore the new context, this should activate vmxassist */
-       if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), COPY_IN))
+       if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), VMX_COPY_IN))
            goto error;
        if (cp != 0) {
-            if (!vmx_copy(&c, cp, sizeof(c), COPY_IN))
+            if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN))
                goto error;
            if (!vmx_world_restore(d, &c))
                goto error;
@@ -930,10 +992,10 @@
      */
     case VMX_ASSIST_RESTORE:
        /* save the old context */
-       if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), COPY_IN))
+       if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN))
            goto error;
        if (cp != 0) {
-            if (!vmx_copy(&c, cp, sizeof(c), COPY_IN))
+            if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN))
                goto error;
            if (!vmx_world_restore(d, &c))
                goto error;
@@ -968,7 +1030,7 @@
         /*
          * The guest CR3 must be pointing to the guest physical.
          */
-        if ( !VALID_MFN(mfn = phys_to_machine_mapping(
+        if ( !VALID_MFN(mfn = get_mfn_from_pfn(
                             d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
              !get_page(pfn_to_page(mfn), d->domain) )
         {
@@ -996,6 +1058,15 @@
 
 #if CONFIG_PAGING_LEVELS >= 4 
             if(!shadow_set_guest_paging_levels(d->domain, 4)) {
+                printk("Unsupported guest paging levels\n");
+                domain_crash_synchronous(); /* need to take a clean path */
+            }
+#endif
+        }
+        else
+        {
+#if CONFIG_PAGING_LEVELS >= 4
+            if(!shadow_set_guest_paging_levels(d->domain, 2)) {
                 printk("Unsupported guest paging levels\n");
                 domain_crash_synchronous(); /* need to take a clean path */
             }
@@ -1164,7 +1235,7 @@
              * removed some translation or changed page attributes.
              * We simply invalidate the shadow.
              */
-            mfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
+            mfn = get_mfn_from_pfn(value >> PAGE_SHIFT);
             if (mfn != pagetable_get_pfn(d->arch.guest_table))
                 __vmx_bug(regs);
             shadow_sync_all(d->domain);
@@ -1175,7 +1246,7 @@
              */
             VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
             if ( ((value >> PAGE_SHIFT) > d->domain->max_pages ) ||
-                 !VALID_MFN(mfn = phys_to_machine_mapping(value >> 
PAGE_SHIFT)) ||
+                 !VALID_MFN(mfn = get_mfn_from_pfn(value >> PAGE_SHIFT)) ||
                  !get_page(pfn_to_page(mfn), d->domain) )
             {
                 printk("Invalid CR3 value=%lx", value);
@@ -1282,13 +1353,20 @@
     case TYPE_MOV_TO_CR:
         gp = exit_qualification & CONTROL_REG_ACCESS_REG;
         cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
+        TRACE_VMEXIT(1,TYPE_MOV_TO_CR);
+        TRACE_VMEXIT(2,cr);
+        TRACE_VMEXIT(3,gp);
         return mov_to_cr(gp, cr, regs);
     case TYPE_MOV_FROM_CR:
         gp = exit_qualification & CONTROL_REG_ACCESS_REG;
         cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
+        TRACE_VMEXIT(1,TYPE_MOV_FROM_CR);
+        TRACE_VMEXIT(2,cr);
+        TRACE_VMEXIT(3,gp);
         mov_from_cr(cr, gp, regs);
         break;
     case TYPE_CLTS:
+        TRACE_VMEXIT(1,TYPE_CLTS);
         clts();
         setup_fpu(current);
 
@@ -1301,6 +1379,7 @@
         __vmwrite(CR0_READ_SHADOW, value);
         break;
     case TYPE_LMSW:
+        TRACE_VMEXIT(1,TYPE_LMSW);
         __vmread(CR0_READ_SHADOW, &value);
        value = (value & ~0xF) |
                (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
@@ -1518,15 +1597,18 @@
 
     __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
     if (idtv_info_field & INTR_INFO_VALID_MASK) {
-       if ((idtv_info_field & 0x0700) != 0x400) { /* exclude soft ints */
-            __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
-
-           if (idtv_info_field & 0x800) { /* valid error code */
-               unsigned long error_code;
-               __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
-               __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
-           } 
-       }
+       __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+
+       __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
+       if (inst_len >= 1 && inst_len <= 15) 
+           __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
+
+       if (idtv_info_field & 0x800) { /* valid error code */
+           unsigned long error_code;
+           __vmread(IDT_VECTORING_ERROR_CODE, &error_code);
+           __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+       } 
+
         VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
     }
 
@@ -1544,6 +1626,7 @@
 
     __vmread(GUEST_RIP, &eip);
     TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
+    TRACE_VMEXIT(0,exit_reason);
 
     switch (exit_reason) {
     case EXIT_REASON_EXCEPTION_NMI:
@@ -1562,6 +1645,7 @@
             __vmx_bug(&regs);
         vector &= 0xff;
 
+        TRACE_VMEXIT(1,vector);
         perfc_incra(cause_vector, vector);
 
         TRACE_3D(TRC_VMX_VECTOR, v->domain->domain_id, eip, vector);
@@ -1606,6 +1690,10 @@
         {
             __vmread(EXIT_QUALIFICATION, &va);
             __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code);
+            
+           TRACE_VMEXIT(3,regs.error_code);
+           TRACE_VMEXIT(4,va);
+
             VMX_DBG_LOG(DBG_LEVEL_VMMU, 
                         "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
                         (unsigned long)regs.eax, (unsigned long)regs.ebx,
@@ -1680,6 +1768,8 @@
                 eip, inst_len, exit_qualification);
         if (vmx_cr_access(exit_qualification, &regs))
            __update_guest_eip(inst_len);
+        TRACE_VMEXIT(3,regs.error_code);
+        TRACE_VMEXIT(4,exit_qualification);
         break;
     }
     case EXIT_REASON_DR_ACCESS:
@@ -1692,6 +1782,7 @@
         __vmread(EXIT_QUALIFICATION, &exit_qualification);
         __get_instruction_length(inst_len);
         vmx_io_instruction(&regs, exit_qualification, inst_len);
+        TRACE_VMEXIT(4,exit_qualification);
         break;
     case EXIT_REASON_MSR_READ:
         __get_instruction_length(inst_len);
@@ -1726,6 +1817,25 @@
 #endif
 }
 
+#ifdef TRACE_BUFFER
+asmlinkage void trace_vmentry (void)
+{
+    TRACE_5D(TRC_VMENTRY,trace_values[current->processor][0],
+          
trace_values[current->processor][1],trace_values[current->processor][2],
+          
trace_values[current->processor][3],trace_values[current->processor][4]);
+    TRACE_VMEXIT(0,9);
+    TRACE_VMEXIT(1,9);
+    TRACE_VMEXIT(2,9);
+    TRACE_VMEXIT(3,9);
+    TRACE_VMEXIT(4,9);
+    return;
+}
+asmlinkage void trace_vmexit (void)
+{
+    TRACE_3D(TRC_VMEXIT,0,0,0);
+    return;
+}
+#endif 
 #endif /* CONFIG_VMX */
 
 /*
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c      Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/vmx_intercept.c      Fri Sep  9 16:30:54 2005
@@ -172,7 +172,7 @@
 
     if (p->size != 1 ||
         p->pdata_valid ||
-        p->port_mm)
+       p->type != IOREQ_TYPE_PIO)
         return 0;
     
     if (p->addr == PIT_MODE &&
@@ -284,7 +284,5 @@
         if (!reinit)
            register_portio_handler(0x40, 4, intercept_pit_io); 
     }
-
-}
-
+}
 #endif /* CONFIG_VMX */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/vmx_io.c     Fri Sep  9 16:30:54 2005
@@ -33,6 +33,7 @@
 #include <asm/vmx_platform.h>
 #include <asm/vmx_virpit.h>
 #include <asm/apic.h>
+#include <asm/shadow.h>
 
 #include <public/io/ioreq.h>
 #include <public/io/vmx_vlapic.h>
@@ -123,7 +124,6 @@
             regs->esp &= 0xFFFF0000;
             regs->esp |= (value & 0xFFFF);
             break;
-
         case 5:
             regs->ebp &= 0xFFFF0000;
             regs->ebp |= (value & 0xFFFF);
@@ -207,7 +207,6 @@
             *reg &= ~0xFFFF;
             *reg |= (value & 0xFFFF);
             break;
-
         case LONG:
             *reg &= ~0xFFFFFFFF;
             *reg |= (value & 0xFFFFFFFF);
@@ -322,13 +321,319 @@
 }
 #endif
 
+extern long get_reg_value(int size, int index, int seg, struct cpu_user_regs 
*regs);
+
+static inline void set_eflags_CF(int size, unsigned long v1,
+       unsigned long v2, struct cpu_user_regs *regs)
+{
+    unsigned long mask = (1 << (8 * size)) - 1;
+
+    if ((v1 & mask) > (v2 & mask))
+       regs->eflags |= X86_EFLAGS_CF;
+    else
+       regs->eflags &= ~X86_EFLAGS_CF;
+}
+
+static inline void set_eflags_OF(int size, unsigned long v1,
+       unsigned long v2, unsigned long v3, struct cpu_user_regs *regs)
+{
+    if ((v3 ^ v2) & (v3 ^ v1) & (1 << ((8 * size) - 1)))
+       regs->eflags |= X86_EFLAGS_OF;
+}
+
+static inline void set_eflags_AF(int size, unsigned long v1,
+       unsigned long v2, unsigned long v3, struct cpu_user_regs *regs)
+{
+    if ((v1 ^ v2 ^ v3) & 0x10)
+       regs->eflags |= X86_EFLAGS_AF;
+}
+
+static inline void set_eflags_ZF(int size, unsigned long v1,
+       struct cpu_user_regs *regs)
+{
+    unsigned long mask = (1 << (8 * size)) - 1;
+
+    if ((v1 & mask) == 0)
+       regs->eflags |= X86_EFLAGS_ZF;
+}
+
+static inline void set_eflags_SF(int size, unsigned long v1,
+       struct cpu_user_regs *regs)
+{
+    if (v1 & (1 << ((8 * size) - 1)))
+       regs->eflags |= X86_EFLAGS_SF;
+}
+
+static char parity_table[256] = {
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
+};
+
+static inline void set_eflags_PF(int size, unsigned long v1,
+       struct cpu_user_regs *regs)
+{
+    if (parity_table[v1 & 0xFF])
+       regs->eflags |= X86_EFLAGS_PF;
+}
+
+static void vmx_pio_assist(struct cpu_user_regs *regs, ioreq_t *p,
+                                       struct mi_per_cpu_info *mpcip)
+{
+    unsigned long old_eax;
+    int sign = p->df ? -1 : 1;
+
+    if (p->dir == IOREQ_WRITE) {
+        if (p->pdata_valid) {
+            regs->esi += sign * p->count * p->size;
+           if (mpcip->flags & REPZ)
+               regs->ecx -= p->count;
+        }
+    } else {
+       if (mpcip->flags & OVERLAP) {
+           unsigned long addr;
+
+            regs->edi += sign * p->count * p->size;
+           if (mpcip->flags & REPZ)
+               regs->ecx -= p->count;
+
+           addr = regs->edi;
+           if (sign > 0)
+               addr -= p->size;
+           vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT);
+       } else if (p->pdata_valid) {
+            regs->edi += sign * p->count * p->size;
+           if (mpcip->flags & REPZ)
+               regs->ecx -= p->count;
+        } else {
+           old_eax = regs->eax;
+           switch (p->size) {
+            case 1:
+                regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
+                break;
+            case 2:
+                regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
+                break;
+            case 4:
+                regs->eax = (p->u.data & 0xffffffff);
+                break;
+            default:
+               printk("Error: %s unknown port size\n", __FUNCTION__);
+               domain_crash_synchronous();
+           }
+       }
+    }
+}
+
+static void vmx_mmio_assist(struct cpu_user_regs *regs, ioreq_t *p,
+                                       struct mi_per_cpu_info *mpcip)
+{
+    int sign = p->df ? -1 : 1;
+    int size = -1, index = -1;
+    unsigned long value = 0, diff = 0;
+    unsigned long src, dst;
+
+    src = mpcip->operand[0];
+    dst = mpcip->operand[1];
+    size = operand_size(src);
+
+    switch (mpcip->instr) {
+    case INSTR_MOV:
+       if (dst & REGISTER) {
+           index = operand_index(dst);
+           set_reg_value(size, index, 0, regs, p->u.data);
+       }
+       break;
+
+    case INSTR_MOVZ:
+       if (dst & REGISTER) {
+           index = operand_index(dst);
+           switch (size) {
+           case BYTE: p->u.data = p->u.data & 0xFFULL; break;
+           case WORD: p->u.data = p->u.data & 0xFFFFULL; break;
+           case LONG: p->u.data = p->u.data & 0xFFFFFFFFULL; break;
+           }
+           set_reg_value(operand_size(dst), index, 0, regs, p->u.data);
+       }
+       break;
+
+    case INSTR_MOVS:
+       sign = p->df ? -1 : 1;
+       regs->esi += sign * p->count * p->size;
+       regs->edi += sign * p->count * p->size;
+
+       if ((mpcip->flags & OVERLAP) && p->dir == IOREQ_READ) {
+           unsigned long addr = regs->edi;
+
+           if (sign > 0)
+               addr -= p->size;
+           vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT);
+       }
+
+       if (mpcip->flags & REPZ)
+           regs->ecx -= p->count;
+       break;
+
+    case INSTR_STOS:
+       sign = p->df ? -1 : 1;
+       regs->edi += sign * p->count * p->size;
+       if (mpcip->flags & REPZ)
+           regs->ecx -= p->count;
+       break;
+
+    case INSTR_AND:
+       if (src & REGISTER) {
+           index = operand_index(src);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data & value;
+       } else if (src & IMMEDIATE) {
+           value = mpcip->immediate;
+           diff = (unsigned long) p->u.data & value;
+       } else if (src & MEMORY) {
+           index = operand_index(dst);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data & value;
+           set_reg_value(size, index, 0, regs, diff);
+       }
+
+       /*
+        * The OF and CF flags are cleared; the SF, ZF, and PF
+        * flags are set according to the result. The state of
+        * the AF flag is undefined.
+        */
+       regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                         X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+       set_eflags_ZF(size, diff, regs);
+       set_eflags_SF(size, diff, regs);
+       set_eflags_PF(size, diff, regs);
+       break;
+
+    case INSTR_OR:
+       if (src & REGISTER) {
+           index = operand_index(src);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data | value;
+       } else if (src & IMMEDIATE) {
+           value = mpcip->immediate;
+           diff = (unsigned long) p->u.data | value;
+       } else if (src & MEMORY) {
+           index = operand_index(dst);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data | value;
+           set_reg_value(size, index, 0, regs, diff);
+       }
+
+       /*
+        * The OF and CF flags are cleared; the SF, ZF, and PF
+        * flags are set according to the result. The state of
+        * the AF flag is undefined.
+        */
+       regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                         X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+       set_eflags_ZF(size, diff, regs);
+       set_eflags_SF(size, diff, regs);
+       set_eflags_PF(size, diff, regs);
+       break;
+
+    case INSTR_XOR:
+       if (src & REGISTER) {
+           index = operand_index(src);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data ^ value;
+       } else if (src & IMMEDIATE) {
+           value = mpcip->immediate;
+           diff = (unsigned long) p->u.data ^ value;
+       } else if (src & MEMORY) {
+           index = operand_index(dst);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data ^ value;
+           set_reg_value(size, index, 0, regs, diff);
+       }
+
+       /*
+        * The OF and CF flags are cleared; the SF, ZF, and PF
+        * flags are set according to the result. The state of
+        * the AF flag is undefined.
+        */
+       regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                         X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+       set_eflags_ZF(size, diff, regs);
+       set_eflags_SF(size, diff, regs);
+       set_eflags_PF(size, diff, regs);
+       break;
+
+    case INSTR_CMP:
+       if (src & REGISTER) {
+           index = operand_index(src);
+           value = get_reg_value(size, index, 0, regs);
+           diff = (unsigned long) p->u.data - value;
+       } else if (src & IMMEDIATE) {
+           value = mpcip->immediate;
+           diff = (unsigned long) p->u.data - value;
+       } else if (src & MEMORY) {
+           index = operand_index(dst);
+           value = get_reg_value(size, index, 0, regs);
+           diff = value - (unsigned long) p->u.data;
+       }
+
+       /*
+        * The CF, OF, SF, ZF, AF, and PF flags are set according
+        * to the result
+        */
+       regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
+                         X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+       set_eflags_CF(size, value, (unsigned long) p->u.data, regs);
+       set_eflags_OF(size, diff, value, (unsigned long) p->u.data, regs);
+       set_eflags_AF(size, diff, value, (unsigned long) p->u.data, regs);
+       set_eflags_ZF(size, diff, regs);
+       set_eflags_SF(size, diff, regs);
+       set_eflags_PF(size, diff, regs);
+       break;
+
+    case INSTR_TEST:
+       if (src & REGISTER) {
+           index = operand_index(src);
+           value = get_reg_value(size, index, 0, regs);
+       } else if (src & IMMEDIATE) {
+           value = mpcip->immediate;
+       } else if (src & MEMORY) {
+           index = operand_index(dst);
+           value = get_reg_value(size, index, 0, regs);
+       }
+       diff = (unsigned long) p->u.data & value;
+
+       /*
+        * Sets the SF, ZF, and PF status flags. CF and OF are set to 0
+        */
+       regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                         X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+       set_eflags_ZF(size, diff, regs);
+       set_eflags_SF(size, diff, regs);
+       set_eflags_PF(size, diff, regs);
+       break;
+    }
+
+    load_cpu_user_regs(regs);
+}
+
 void vmx_io_assist(struct vcpu *v) 
 {
     vcpu_iodata_t *vio;
     ioreq_t *p;
     struct cpu_user_regs *regs = guest_cpu_user_regs();
-    unsigned long old_eax;
-    int sign;
     struct mi_per_cpu_info *mpci_p;
     struct cpu_user_regs *inst_decoder_regs;
 
@@ -340,80 +645,26 @@
     if (vio == 0) {
         VMX_DBG_LOG(DBG_LEVEL_1, 
                     "bad shared page: %lx", (unsigned long) vio);
+       printf("bad shared page: %lx\n", (unsigned long) vio);
         domain_crash_synchronous();
     }
+
     p = &vio->vp_ioreq;
-
-    if (p->state == STATE_IORESP_HOOK){
+    if (p->state == STATE_IORESP_HOOK)
         vmx_hooks_assist(v);
-    }
 
     /* clear IO wait VMX flag */
     if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
-        if (p->state != STATE_IORESP_READY) {
-                /* An interrupt send event raced us */
-                return;
-        } else {
-            p->state = STATE_INVALID;
-        }
-        clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
-    } else {
-        return;
-    }
-
-    sign = (p->df) ? -1 : 1;
-    if (p->port_mm) {
-        if (p->pdata_valid) {
-            regs->esi += sign * p->count * p->size;
-            regs->edi += sign * p->count * p->size;
-        } else {
-            if (p->dir == IOREQ_WRITE) {
-                return;
-            }
-            int size = -1, index = -1;
-
-            size = operand_size(v->domain->arch.vmx_platform.mpci.mmio_target);
-            index = 
operand_index(v->domain->arch.vmx_platform.mpci.mmio_target);
-
-            if (v->domain->arch.vmx_platform.mpci.mmio_target & WZEROEXTEND) {
-                p->u.data = p->u.data & 0xffff;
-            }        
-            set_reg_value(size, index, 0, regs, p->u.data);
-
-        }
-        load_cpu_user_regs(regs);
-        return;
-    }
-
-    if (p->dir == IOREQ_WRITE) {
-        if (p->pdata_valid) {
-            regs->esi += sign * p->count * p->size;
-            regs->ecx -= p->count;
-        }
-        return;
-    } else {
-        if (p->pdata_valid) {
-            regs->edi += sign * p->count * p->size;
-            regs->ecx -= p->count;
-            return;
-        }
-    }
-
-    old_eax = regs->eax;
-
-    switch(p->size) {
-    case 1:
-        regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
-        break;
-    case 2:
-        regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
-        break;
-    case 4:
-        regs->eax = (p->u.data & 0xffffffff);
-        break;
-    default:
-        printk("Error: %s unknwon port size\n", __FUNCTION__);
-        domain_crash_synchronous();
+        if (p->state == STATE_IORESP_READY) {
+           p->state = STATE_INVALID;
+            clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+
+           if (p->type == IOREQ_TYPE_PIO)
+               vmx_pio_assist(regs, p, mpci_p);
+           else
+               vmx_mmio_assist(regs, p, mpci_p);
+       }
+       /* else an interrupt send event raced us */
     }
 }
 
@@ -456,8 +707,9 @@
     int port = iopacket_port(current->domain);
 
     do {
-        if(!test_bit(port, &current->domain->shared_info->evtchn_pending[0]))
+        if (!test_bit(port, &current->domain->shared_info->evtchn_pending[0]))
             do_block();
+
         vmx_check_events(current);
         if (!test_bit(ARCH_VMX_IO_WAIT, &current->arch.arch_vmx.flags))
             break;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/vmx_platform.c
--- a/xen/arch/x86/vmx_platform.c       Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/vmx_platform.c       Fri Sep  9 16:30:54 2005
@@ -64,37 +64,37 @@
         case QUAD:
             return (long)(reg);
         default:
-            printk("Error: <__get_reg_value>Invalid reg size\n");
+       printf("Error: (__get_reg_value) Invalid reg size\n");
             domain_crash_synchronous();
     }
 }
 
-static long get_reg_value(int size, int index, int seg, struct cpu_user_regs 
*regs) 
+long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) 
 {
     if (size == BYTE) {
         switch (index) { 
-            case 0: //%al
+       case 0: /* %al */
                 return (char)(regs->rax & 0xFF);
-            case 1: //%cl  
+       case 1: /* %cl */
                 return (char)(regs->rcx & 0xFF);
-            case 2: //%dl
+       case 2: /* %dl */
                 return (char)(regs->rdx & 0xFF); 
-            case 3: //%bl
+       case 3: /* %bl */
                 return (char)(regs->rbx & 0xFF);
-            case 4: //%ah
+       case 4: /* %ah */
                 return (char)((regs->rax & 0xFF00) >> 8);
-            case 5: //%ch 
+       case 5: /* %ch */
                 return (char)((regs->rcx & 0xFF00) >> 8);
-            case 6: //%dh
+       case 6: /* %dh */
                 return (char)((regs->rdx & 0xFF00) >> 8);
-            case 7: //%bh
+       case 7: /* %bh */
                 return (char)((regs->rbx & 0xFF00) >> 8);
             default:
-                printk("Error: (get_reg_value)Invalid index value\n"); 
+           printf("Error: (get_reg_value) Invalid index value\n"); 
                 domain_crash_synchronous();
         }
-
-    }
+    }
+
     switch (index) {
         case 0: return __get_reg_value(regs->rax, size);
         case 1: return __get_reg_value(regs->rcx, size);
@@ -113,7 +113,7 @@
         case 14: return __get_reg_value(regs->r14, size);
         case 15: return __get_reg_value(regs->r15, size);
         default:
-            printk("Error: (get_reg_value)Invalid index value\n"); 
+       printf("Error: (get_reg_value) Invalid index value\n"); 
             domain_crash_synchronous();
     }
 }
@@ -129,117 +129,91 @@
     __vmread(GUEST_RIP, &regs->eip);
 }
 
-static long get_reg_value(int size, int index, int seg, struct cpu_user_regs 
*regs)
+static inline long __get_reg_value(unsigned long reg, int size)
 {                    
-    /*               
-     * Reference the db_reg[] table
-     */              
-    switch (size) {  
-    case BYTE: 
+    switch(size) {
+    case WORD:
+       return (short)(reg & 0xFFFF);
+    case LONG:
+       return (int)(reg & 0xFFFFFFFF);
+    default:
+       printf("Error: (__get_reg_value) Invalid reg size\n");
+       domain_crash_synchronous();
+    }
+}
+
+long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
+{                    
+    if (size == BYTE) {
         switch (index) { 
-        case 0: //%al
+       case 0: /* %al */
             return (char)(regs->eax & 0xFF);
-        case 1: //%cl  
+       case 1: /* %cl */
             return (char)(regs->ecx & 0xFF);
-        case 2: //%dl
+       case 2: /* %dl */
             return (char)(regs->edx & 0xFF); 
-        case 3: //%bl
+       case 3: /* %bl */
             return (char)(regs->ebx & 0xFF);
-        case 4: //%ah
+       case 4: /* %ah */
             return (char)((regs->eax & 0xFF00) >> 8);
-        case 5: //%ch 
+       case 5: /* %ch */
             return (char)((regs->ecx & 0xFF00) >> 8);
-        case 6: //%dh
+       case 6: /* %dh */
             return (char)((regs->edx & 0xFF00) >> 8);
-        case 7: //%bh
+       case 7: /* %bh */
             return (char)((regs->ebx & 0xFF00) >> 8);
         default:
-            printk("Error: (get_reg_value)size case 0 error\n"); 
+           printf("Error: (get_reg_value) Invalid index value\n"); 
             domain_crash_synchronous();
         }
-    case WORD:
+        }
+
         switch (index) {
-        case 0: //%ax
-            return (short)(regs->eax & 0xFFFF);
-        case 1: //%cx
-            return (short)(regs->ecx & 0xFFFF);
-        case 2: //%dx
-            return (short)(regs->edx & 0xFFFF);
-        case 3: //%bx
-            return (short)(regs->ebx & 0xFFFF);
-        case 4: //%sp
-            return (short)(regs->esp & 0xFFFF);
-            break;
-        case 5: //%bp
-            return (short)(regs->ebp & 0xFFFF);
-        case 6: //%si
-            return (short)(regs->esi & 0xFFFF);
-        case 7: //%di
-            return (short)(regs->edi & 0xFFFF);
-        default:
-            printk("Error: (get_reg_value)size case 1 error\n");
-            domain_crash_synchronous();
-        }
-    case LONG:
-        switch (index) {
-        case 0: //%eax
-            return regs->eax;
-        case 1: //%ecx
-            return regs->ecx;
-        case 2: //%edx
-            return regs->edx;
-
-        case 3: //%ebx
-            return regs->ebx;
-        case 4: //%esp
-            return regs->esp;
-        case 5: //%ebp
-            return regs->ebp;
-        case 6: //%esi
-            return regs->esi;
-        case 7: //%edi
-            return regs->edi;
-        default:
-            printk("Error: (get_reg_value)size case 2 error\n");
-            domain_crash_synchronous();
-        }
+    case 0: return __get_reg_value(regs->eax, size);
+    case 1: return __get_reg_value(regs->ecx, size);
+    case 2: return __get_reg_value(regs->edx, size);
+    case 3: return __get_reg_value(regs->ebx, size);
+    case 4: return __get_reg_value(regs->esp, size);
+    case 5: return __get_reg_value(regs->ebp, size);
+    case 6: return __get_reg_value(regs->esi, size);
+    case 7: return __get_reg_value(regs->edi, size);
     default:
-        printk("Error: (get_reg_value)size case error\n");
+       printf("Error: (get_reg_value) Invalid index value\n"); 
         domain_crash_synchronous();
     }
 }
 #endif
 
-static inline const unsigned char *check_prefix(const unsigned char *inst, 
struct instruction *thread_inst, unsigned char *rex_p)
+static inline unsigned char *check_prefix(unsigned char *inst,
+               struct instruction *thread_inst, unsigned char *rex_p)
 {
     while (1) {
         switch (*inst) {
-            /* rex prefix for em64t instructions*/
+        /* rex prefix for em64t instructions */
             case 0x40 ... 0x4e:
                 *rex_p = *inst;
                 break;
-
-            case 0xf3: //REPZ
+        case 0xf3: /* REPZ */
                thread_inst->flags = REPZ;
-                       break;
-            case 0xf2: //REPNZ
+               break;
+        case 0xf2: /* REPNZ */
                thread_inst->flags = REPNZ;
-                       break;
-            case 0xf0: //LOCK
+               break;
+        case 0xf0: /* LOCK */
                break;
-            case 0x2e: //CS
-            case 0x36: //SS
-            case 0x3e: //DS
-            case 0x26: //ES
-            case 0x64: //FS
-            case 0x65: //GS
-                       thread_inst->seg_sel = *inst;
+        case 0x2e: /* CS */
+        case 0x36: /* SS */
+        case 0x3e: /* DS */
+        case 0x26: /* ES */
+        case 0x64: /* FS */
+        case 0x65: /* GS */
+               thread_inst->seg_sel = *inst;
                 break;
-            case 0x66: //32bit->16bit
+        case 0x66: /* 32bit->16bit */
                 thread_inst->op_size = WORD;
                 break;
             case 0x67:
-                       printf("Error: Not handling 0x67 (yet)\n");
+               printf("Error: Not handling 0x67 (yet)\n");
                 domain_crash_synchronous();
                 break;
             default:
@@ -249,7 +223,7 @@
     }
 }
 
-static inline unsigned long get_immediate(int op16, const unsigned char *inst, 
int op_size)
+static inline unsigned long get_immediate(int op16,const unsigned char *inst, 
int op_size)
 {
     int mod, reg, rm;
     unsigned long val = 0;
@@ -317,275 +291,328 @@
 
 static void init_instruction(struct instruction *mmio_inst)
 {
-    memset(mmio_inst->i_name, '0', I_NAME_LEN);
-    mmio_inst->op_size =  0;
-    mmio_inst->offset = 0;
+    mmio_inst->instr = 0;
+    mmio_inst->op_size = 0;
     mmio_inst->immediate = 0;
     mmio_inst->seg_sel = 0;
-    mmio_inst->op_num = 0;
 
     mmio_inst->operand[0] = 0;
     mmio_inst->operand[1] = 0;
-    mmio_inst->operand[2] = 0;
         
     mmio_inst->flags = 0;
 }
 
 #define GET_OP_SIZE_FOR_BYTE(op_size)   \
-    do {if (rex) op_size = BYTE_64;else op_size = BYTE;} while(0)
+    do {                               \
+       if (rex)                        \
+           op_size = BYTE_64;          \
+       else                            \
+           op_size = BYTE;             \
+    } while(0)
 
 #define GET_OP_SIZE_FOR_NONEBYTE(op_size)   \
-    do {if (rex & 0x8) op_size = QUAD; else if (op_size != WORD) op_size = 
LONG;} while(0)
-
-static int vmx_decode(const unsigned char *inst, struct instruction 
*thread_inst)
+    do {                               \
+       if (rex & 0x8)                  \
+           op_size = QUAD;             \
+       else if (op_size != WORD)       \
+           op_size = LONG;             \
+    } while(0)
+
+
+/*
+ * Decode mem,accumulator operands (as in <opcode> m8/m16/m32, al,ax,eax)
+ */
+static int mem_acc(unsigned char size, struct instruction *instr)
+{
+    instr->operand[0] = mk_operand(size, 0, 0, MEMORY);
+    instr->operand[1] = mk_operand(size, 0, 0, REGISTER);
+    return DECODE_success;
+}
+
+/*
+ * Decode accumulator,mem operands (as in <opcode> al,ax,eax, m8/m16/m32)
+ */
+static int acc_mem(unsigned char size, struct instruction *instr)
+{
+    instr->operand[0] = mk_operand(size, 0, 0, REGISTER);
+    instr->operand[1] = mk_operand(size, 0, 0, MEMORY);
+    return DECODE_success;
+}
+
+/*
+ * Decode mem,reg operands (as in <opcode> r32/16, m32/16)
+ */
+static int mem_reg(unsigned char size, unsigned char *opcode,
+                       struct instruction *instr, unsigned char rex)
+{
+    int index = get_index(opcode + 1, rex);
+
+    instr->operand[0] = mk_operand(size, 0, 0, MEMORY);
+    instr->operand[1] = mk_operand(size, index, 0, REGISTER);
+    return DECODE_success;
+}
+
+/*
+ * Decode reg,mem operands (as in <opcode> m32/16, r32/16)
+ */
+static int reg_mem(unsigned char size, unsigned char *opcode,
+                       struct instruction *instr, unsigned char rex)
+{
+    int index = get_index(opcode + 1, rex);
+
+    instr->operand[0] = mk_operand(size, index, 0, REGISTER);
+    instr->operand[1] = mk_operand(size, 0, 0, MEMORY);
+    return DECODE_success;
+}
+
+static int vmx_decode(unsigned char *opcode, struct instruction *instr)
 {
     unsigned long eflags;
     int index, vm86 = 0;
     unsigned char rex = 0;
     unsigned char tmp_size = 0;
 
-
-    init_instruction(thread_inst);
-
-    inst = check_prefix(inst, thread_inst, &rex);
+    init_instruction(instr);
+
+    opcode = check_prefix(opcode, instr, &rex);
 
     __vmread(GUEST_RFLAGS, &eflags);
     if (eflags & X86_EFLAGS_VM)
         vm86 = 1;
 
     if (vm86) { /* meaning is reversed */
-       if (thread_inst->op_size == WORD)
-           thread_inst->op_size = LONG;
-       else if (thread_inst->op_size == LONG)
-           thread_inst->op_size = WORD;
-       else if (thread_inst->op_size == 0)
-           thread_inst->op_size = WORD;
-    }
-
-    switch(*inst) {
-        case 0x81:
-            /* This is only a workaround for cmpl instruction*/
-            strcpy((char *)thread_inst->i_name, "cmp");
+       if (instr->op_size == WORD)
+           instr->op_size = LONG;
+       else if (instr->op_size == LONG)
+           instr->op_size = WORD;
+       else if (instr->op_size == 0)
+           instr->op_size = WORD;
+    }
+
+    switch (*opcode) {
+    case 0x0B: /* or m32/16, r32/16 */
+       instr->instr = INSTR_OR;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return mem_reg(instr->op_size, opcode, instr, rex);
+
+    case 0x20: /* and r8, m8 */
+       instr->instr = INSTR_AND;
+       GET_OP_SIZE_FOR_BYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x21: /* and r32/16, m32/16 */
+       instr->instr = INSTR_AND;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x23: /* and m32/16, r32/16 */
+       instr->instr = INSTR_AND;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return mem_reg(instr->op_size, opcode, instr, rex);
+
+    case 0x30: /* xor r8, m8 */
+       instr->instr = INSTR_XOR;
+       GET_OP_SIZE_FOR_BYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x31: /* xor r32/16, m32/16 */
+       instr->instr = INSTR_XOR;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x39: /* cmp r32/16, m32/16 */
+       instr->instr = INSTR_CMP;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x81:
+       if (((opcode[1] >> 3) & 7) == 7) { /* cmp $imm, m32/16 */
+           instr->instr = INSTR_CMP;
+           GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+
+           instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
+           instr->immediate = get_immediate(vm86, opcode+1, BYTE);
+           instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
+
             return DECODE_success;
-
-        case 0x88:
-            /* mov r8 to m8 */
-            thread_inst->op_size = BYTE;
-            index = get_index((inst + 1), rex);
-            GET_OP_SIZE_FOR_BYTE(tmp_size);
-            thread_inst->operand[0] = mk_operand(tmp_size, index, 0, REGISTER);
-
-            break;
-        case 0x89:
-            /* mov r32/16 to m32/16 */
-            index = get_index((inst + 1), rex);
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-            thread_inst->operand[0] = mk_operand(thread_inst->op_size, index, 
0, REGISTER);
-
-            break;
-        case 0x8a:
-            /* mov m8 to r8 */
-            thread_inst->op_size = BYTE;
-            index = get_index((inst + 1), rex);
-            GET_OP_SIZE_FOR_BYTE(tmp_size);
-            thread_inst->operand[1] = mk_operand(tmp_size, index, 0, REGISTER);
-            break;
-        case 0x8b:
-            /* mov r32/16 to m32/16 */
-            index = get_index((inst + 1), rex);
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-            thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 
0, REGISTER);
-            break;
-        case 0x8c:
-        case 0x8e:
-            printk("%x, This opcode hasn't been handled yet!", *inst);
-            return DECODE_failure;
-            /* Not handle it yet. */
-        case 0xa0:
-            /* mov byte to al */
-            thread_inst->op_size = BYTE;
-            GET_OP_SIZE_FOR_BYTE(tmp_size);
-            thread_inst->operand[1] = mk_operand(tmp_size, 0, 0, REGISTER);
-            break;
-        case 0xa1:
-            /* mov word/doubleword to ax/eax */
-           GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-           thread_inst->operand[1] = mk_operand(thread_inst->op_size, 0, 0, 
REGISTER);
-
-            break;
-        case 0xa2:
-            /* mov al to (seg:offset) */
-            thread_inst->op_size = BYTE;
-            GET_OP_SIZE_FOR_BYTE(tmp_size);
-            thread_inst->operand[0] = mk_operand(tmp_size, 0, 0, REGISTER);
-            break;
-        case 0xa3:
-            /* mov ax/eax to (seg:offset) */
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-            thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, 
REGISTER);
-            break;
-        case 0xa4:
-            /* movsb */
-            thread_inst->op_size = BYTE;
-            strcpy((char *)thread_inst->i_name, "movs");
-            return DECODE_success;
-        case 0xa5:
-            /* movsw/movsl */
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-           strcpy((char *)thread_inst->i_name, "movs");
-            return DECODE_success;
-        case 0xaa:
-            /* stosb */
-            thread_inst->op_size = BYTE;
-            strcpy((char *)thread_inst->i_name, "stosb");
-            return DECODE_success;
-       case 0xab:
-            /* stosw/stosl */
-            if (thread_inst->op_size == WORD) {
-                strcpy((char *)thread_inst->i_name, "stosw");
-            } else {
-                thread_inst->op_size = LONG;
-                strcpy((char *)thread_inst->i_name, "stosl");
-            }
-            return DECODE_success;
-        case 0xc6:
-            /* mov imm8 to m8 */
-            thread_inst->op_size = BYTE;
-            thread_inst->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
-            thread_inst->immediate = get_immediate(vm86,
-                                       (inst+1), thread_inst->op_size);
-            break;
-        case 0xc7:
-            /* mov imm16/32 to m16/32 */
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-            thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, 
IMMEDIATE);
-            thread_inst->immediate = get_immediate(vm86, (inst+1), 
thread_inst->op_size);
+       } else
+           return DECODE_failure;
+
+    case 0x84:  /* test m8, r8 */
+       instr->instr = INSTR_TEST;
+       instr->op_size = BYTE;
+       GET_OP_SIZE_FOR_BYTE(tmp_size);
+       return mem_reg(tmp_size, opcode, instr, rex);
+
+    case 0x88: /* mov r8, m8 */
+       instr->instr = INSTR_MOV;
+       instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(tmp_size);
+       return reg_mem(tmp_size, opcode, instr, rex);
+
+    case 0x89: /* mov r32/16, m32/16 */
+       instr->instr = INSTR_MOV;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x8A: /* mov m8, r8 */
+       instr->instr = INSTR_MOV;
+       instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(tmp_size);
+       return mem_reg(tmp_size, opcode, instr, rex);
+
+    case 0x8B: /* mov m32/16, r32/16 */
+       instr->instr = INSTR_MOV;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return mem_reg(instr->op_size, opcode, instr, rex);
+
+    case 0xA0: /* mov <addr>, al */
+       instr->instr = INSTR_MOV;
+       instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(tmp_size);
+       return mem_acc(tmp_size, instr);
+
+    case 0xA1: /* mov <addr>, ax/eax */
+       instr->instr = INSTR_MOV;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return mem_acc(instr->op_size, instr);
+
+    case 0xA2: /* mov al, <addr> */
+       instr->instr = INSTR_MOV;
+       instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(tmp_size);
+       return acc_mem(tmp_size, instr);
+
+    case 0xA3: /* mov ax/eax, <addr> */
+       instr->instr = INSTR_MOV;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return acc_mem(instr->op_size, instr);
+
+    case 0xA4: /* movsb */
+       instr->instr = INSTR_MOVS;
+       instr->op_size = BYTE;
+        return DECODE_success;
             
-            break;
-        case 0x0f:
-            break;
-        default:
-            printk("%x, This opcode hasn't been handled yet!", *inst);
-            return DECODE_failure;
-    }
+    case 0xA5: /* movsw/movsl */
+       instr->instr = INSTR_MOVS;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return DECODE_success;
     
-    strcpy((char *)thread_inst->i_name, "mov");
-    if (*inst != 0x0f) {
+    case 0xAA: /* stosb */
+       instr->instr = INSTR_STOS;
+       instr->op_size = BYTE;
         return DECODE_success;
-    }
-
-    inst++;
-    switch (*inst) {
+
+    case 0xAB: /* stosw/stosl */
+       instr->instr = INSTR_STOS;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       return DECODE_success;
                     
-        /* movz */
-        case 0xb6:
-            index = get_index((inst + 1), rex);
-            GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size);
-            thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 
0, REGISTER);
-            thread_inst->op_size = BYTE;
-            strcpy((char *)thread_inst->i_name, "movzb");
+    case 0xC6:
+       if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm8, m8 */
+           instr->instr = INSTR_MOV;
+           instr->op_size = BYTE;
+
+           instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
+           instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
+           instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
             
             return DECODE_success;
-        case 0xb7:
-           index = get_index((inst + 1), rex);
-           if (rex & 0x8) {
-                   thread_inst->op_size = LONG;
-                   thread_inst->operand[1] = mk_operand(QUAD, index, 0, 
REGISTER);
-           } else {
-                   thread_inst->op_size = WORD;
-                   thread_inst->operand[1] = mk_operand(LONG, index, 0, 
REGISTER);
-           }
+       } else
+           return DECODE_failure;
             
-            strcpy((char *)thread_inst->i_name, "movzw");
+    case 0xC7:
+       if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm16/32, m16/32 */
+           instr->instr = INSTR_MOV;
+           GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+
+           instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
+           instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
+           instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
             
             return DECODE_success;
-        default:
-            printk("0f %x, This opcode hasn't been handled yet!", *inst);
-            return DECODE_failure;
-    }
-
-    /* will never reach here */
-    return DECODE_failure;
+       } else
+           return DECODE_failure;
+
+    case 0xF6:
+       if (((opcode[1] >> 3) & 7) == 0) { /* testb $imm8, m8 */
+           instr->instr = INSTR_TEST;
+           instr->op_size = BYTE;
+
+           instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
+           instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
+           instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
+
+           return DECODE_success;
+       } else
+           return DECODE_failure;
+
+    case 0x0F:
+       break;
+
+    default:
+       printf("%x, This opcode isn't handled yet!\n", *opcode);
+        return DECODE_failure;
+    }
+
+    switch (*++opcode) {
+    case 0xB6: /* movz m8, r16/r32 */
+       instr->instr = INSTR_MOVZ;
+       GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+       index = get_index(opcode + 1, rex);
+       instr->operand[0] = mk_operand(BYTE, 0, 0, MEMORY);
+       instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER);
+       return DECODE_success;
+
+    case 0xB7: /* movz m16, r32 */
+       instr->instr = INSTR_MOVZ;
+       index = get_index(opcode + 1, rex);
+       if (rex & 0x8) {
+          instr->op_size = LONG;
+          instr->operand[1] = mk_operand(QUAD, index, 0, REGISTER);
+       } else {
+          instr->op_size = WORD;
+          instr->operand[1] = mk_operand(LONG, index, 0, REGISTER);
+       }
+       instr->operand[0] = mk_operand(instr->op_size, 0, 0, MEMORY);
+       return DECODE_success;
+
+    default:
+       printf("0f %x, This opcode isn't handled yet\n", *opcode);
+       return DECODE_failure;
+    }
 }
 
 int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int 
inst_len)
 {
-    unsigned long gpa;
-    unsigned long mfn;
-    unsigned char *inst_start;
-    int remaining = 0;
-        
-    if ( (inst_len > MAX_INST_LEN) || (inst_len <= 0) )
+    if (inst_len > MAX_INST_LEN || inst_len <= 0)
         return 0;
-
-    if ( vmx_paging_enabled(current) )
-    {
-        gpa = gva_to_gpa(guest_eip);
-        mfn = phys_to_machine_mapping(gpa >> PAGE_SHIFT);
-
-        /* Does this cross a page boundary ? */
-        if ( (guest_eip & PAGE_MASK) != ((guest_eip + inst_len) & PAGE_MASK) )
-        {
-            remaining = (guest_eip + inst_len) & ~PAGE_MASK;
-            inst_len -= remaining;
-        }
-    }
-    else
-    {
-        mfn = phys_to_machine_mapping(guest_eip >> PAGE_SHIFT);
-    }
-
-    inst_start = map_domain_page(mfn);
-    memcpy((char *)buf, inst_start + (guest_eip & ~PAGE_MASK), inst_len);
-    unmap_domain_page(inst_start);
-
-    if ( remaining )
-    {
-        gpa = gva_to_gpa(guest_eip+inst_len+remaining);
-        mfn = phys_to_machine_mapping(gpa >> PAGE_SHIFT);
-
-        inst_start = map_domain_page(mfn);
-        memcpy((char *)buf+inst_len, inst_start, remaining);
-        unmap_domain_page(inst_start);
-    }
-
-    return inst_len+remaining;
-}
-
-static int read_from_mmio(struct instruction *inst_p)
-{
-    // Only for mov instruction now!!!
-    if (inst_p->operand[1] & REGISTER)
-        return 1;
-
-    return 0;
-}
-
-// dir:  1 read from mmio
-//       0 write to mmio
-static void send_mmio_req(unsigned long gpa, 
-                   struct instruction *inst_p, long value, int dir, int pvalid)
+    if (!vmx_copy(buf, guest_eip, inst_len, VMX_COPY_IN))
+        return 0;
+    return inst_len;
+}
+
+void send_mmio_req(unsigned char type, unsigned long gpa, 
+          unsigned long count, int size, long value, int dir, int pvalid)
 {
     struct vcpu *d = current;
     vcpu_iodata_t *vio;
     ioreq_t *p;
     int vm86;
-    struct mi_per_cpu_info *mpci_p;
-    struct cpu_user_regs *inst_decoder_regs;
+    struct cpu_user_regs *regs;
     extern long evtchn_send(int lport);
 
-    mpci_p = &current->domain->arch.vmx_platform.mpci;
-    inst_decoder_regs = mpci_p->inst_decoder_regs;
+    regs = current->domain->arch.vmx_platform.mpci.inst_decoder_regs;
 
     vio = get_vio(d->domain, d->vcpu_id);
-
     if (vio == NULL) {
-        printk("bad shared page\n");
+        printf("bad shared page\n");
         domain_crash_synchronous(); 
     }
+
     p = &vio->vp_ioreq;
 
-    vm86 = inst_decoder_regs->eflags & X86_EFLAGS_VM;
+    vm86 = regs->eflags & X86_EFLAGS_VM;
 
     if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)) {
         printf("VMX I/O has not yet completed\n");
@@ -596,24 +623,21 @@
     p->dir = dir;
     p->pdata_valid = pvalid;
 
-    p->port_mm = 1;
-    p->size = inst_p->op_size;
+    p->type = type;
+    p->size = size;
     p->addr = gpa;
-    p->u.data = value;
+    p->count = count;
+    p->df = regs->eflags & EF_DF ? 1 : 0;
+
+    if (pvalid) {
+       if (vmx_paging_enabled(current))
+           p->u.pdata = (void *) gva_to_gpa(value);
+        else
+           p->u.pdata = (void *) value; /* guest VA == guest PA */
+    } else
+       p->u.data = value;
 
     p->state = STATE_IOREQ_READY;
-
-    if (inst_p->flags & REPZ) {
-        if (vm86)
-            p->count = inst_decoder_regs->ecx & 0xFFFF;
-        else
-            p->count = inst_decoder_regs->ecx;
-        p->df = (inst_decoder_regs->eflags & EF_DF) ? 1 : 0;
-    } else
-        p->count = 1;
-
-    if ((pvalid) && vmx_paging_enabled(current))
-        p->u.pdata = (void *) gva_to_gpa(p->u.data);
 
     if (vmx_mmio_intercept(p)){
         p->state = STATE_IORESP_READY;
@@ -625,21 +649,53 @@
     vmx_wait_io();
 }
 
+static void mmio_operands(int type, unsigned long gpa, struct instruction 
*inst,
+               struct mi_per_cpu_info *mpcip, struct cpu_user_regs *regs)
+{
+    unsigned long value = 0;
+    int index, size;
+    
+    size = operand_size(inst->operand[0]);
+
+    mpcip->flags = inst->flags;
+    mpcip->instr = inst->instr;
+    mpcip->operand[0] = inst->operand[0]; /* source */
+    mpcip->operand[1] = inst->operand[1]; /* destination */
+
+    if (inst->operand[0] & REGISTER) { /* dest is memory */
+       index = operand_index(inst->operand[0]);
+       value = get_reg_value(size, index, 0, regs);
+       send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0);
+    } else if (inst->operand[0] & IMMEDIATE) { /* dest is memory */
+       value = inst->immediate;
+       send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0);
+    } else if (inst->operand[0] & MEMORY) { /* dest is register */
+       /* send the request and wait for the value */
+       send_mmio_req(type, gpa, 1, size, 0, IOREQ_READ, 0);
+    } else {
+       printf("mmio_operands: invalid operand\n");
+       domain_crash_synchronous();
+    }
+}
+
+#define GET_REPEAT_COUNT() \
+     (mmio_inst.flags & REPZ ? (vm86 ? regs->ecx & 0xFFFF : regs->ecx) : 1)
+       
 void handle_mmio(unsigned long va, unsigned long gpa)
 {
     unsigned long eip, eflags, cs;
     unsigned long inst_len, inst_addr;
-    struct mi_per_cpu_info *mpci_p;
-    struct cpu_user_regs *inst_decoder_regs;
+    struct mi_per_cpu_info *mpcip;
+    struct cpu_user_regs *regs;
     struct instruction mmio_inst;
     unsigned char inst[MAX_INST_LEN];
-    int vm86, ret;
+    int i, vm86, ret;
      
-    mpci_p = &current->domain->arch.vmx_platform.mpci;
-    inst_decoder_regs = mpci_p->inst_decoder_regs;
+    mpcip = &current->domain->arch.vmx_platform.mpci;
+    regs = mpcip->inst_decoder_regs;
 
     __vmread(GUEST_RIP, &eip);
-    __vmread(INSTRUCTION_LEN, &inst_len);
+    __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
     __vmread(GUEST_RFLAGS, &eflags);
     vm86 = eflags & X86_EFLAGS_VM;
 
@@ -647,108 +703,142 @@
         __vmread(GUEST_CS_SELECTOR, &cs);
         inst_addr = (cs << 4) + eip;
     } else
-        inst_addr = eip; /* XXX should really look at GDT[cs].base too */
-
-    memset(inst, '0', MAX_INST_LEN);
+        inst_addr = eip;
+
+    memset(inst, 0, MAX_INST_LEN);
     ret = inst_copy_from_guest(inst, inst_addr, inst_len);
     if (ret != inst_len) {
-        printk("handle_mmio - EXIT: get guest instruction fault\n");
+        printf("handle_mmio - EXIT: get guest instruction fault\n");
         domain_crash_synchronous();
     }
-
 
     init_instruction(&mmio_inst);
     
     if (vmx_decode(inst, &mmio_inst) == DECODE_failure) {
-        printk("vmx decode failure: eip=%lx, va=%lx\n %x %x %x %x\n", eip, va, 
-               inst[0], inst[1], inst[2], inst[3]);
+       printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %ld:",
+               va, gpa, inst_len);
+       for (i = 0; i < inst_len; i++)
+           printf(" %02x", inst[i] & 0xFF);
+       printf("\n");
         domain_crash_synchronous();
     }
 
-    __vmwrite(GUEST_RIP, eip + inst_len);
-    store_cpu_user_regs(inst_decoder_regs);
-
-    // Only handle "mov" and "movs" instructions!
-    if (!strncmp((char *)mmio_inst.i_name, "movz", 4)) {
-        if (read_from_mmio(&mmio_inst)) {
-            // Send the request and waiting for return value.
-            mpci_p->mmio_target = mmio_inst.operand[1] | WZEROEXTEND;
-            send_mmio_req(gpa, &mmio_inst, 0, IOREQ_READ, 0);
-            return ;
-        } else {
-            printk("handle_mmio - EXIT: movz error!\n");
-            domain_crash_synchronous();
-        }
-    }
-
-    if (!strncmp((char *)mmio_inst.i_name, "movs", 4)) {
+    store_cpu_user_regs(regs);
+    regs->eip += inst_len; /* advance %eip */
+
+    switch (mmio_inst.instr) {
+    case INSTR_MOV:
+       mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_MOVS:
+    {
+       unsigned long count = GET_REPEAT_COUNT();
+       unsigned long size = mmio_inst.op_size;
+       int sign = regs->eflags & EF_DF ? -1 : 1;
        unsigned long addr = 0;
        int dir;
 
+       /* determine non-MMIO address */
        if (vm86) {
            unsigned long seg;
 
            __vmread(GUEST_ES_SELECTOR, &seg);
-           if (((seg << 4) + (inst_decoder_regs->edi & 0xFFFF)) == va) {
+           if (((seg << 4) + (regs->edi & 0xFFFF)) == va) {
                dir = IOREQ_WRITE;
                __vmread(GUEST_DS_SELECTOR, &seg);
-               addr = (seg << 4) + (inst_decoder_regs->esi & 0xFFFF);
+               addr = (seg << 4) + (regs->esi & 0xFFFF);
            } else {
                dir = IOREQ_READ;
-               addr = (seg << 4) + (inst_decoder_regs->edi & 0xFFFF);
+               addr = (seg << 4) + (regs->edi & 0xFFFF);
            }
-       } else { /* XXX should really look at GDT[ds/es].base too */
-           if (va == inst_decoder_regs->edi) {
+       } else {
+           if (va == regs->edi) {
                dir = IOREQ_WRITE;
-               addr = inst_decoder_regs->esi;
+               addr = regs->esi;
            } else {
                dir = IOREQ_READ;
-               addr = inst_decoder_regs->edi;
+               addr = regs->edi;
            }
        }
 
-       send_mmio_req(gpa, &mmio_inst, addr, dir, 1);
-        return;
-    }
-
-    if (!strncmp((char *)mmio_inst.i_name, "mov", 3)) {
-        long value = 0;
-        int size, index;
-
-        if (read_from_mmio(&mmio_inst)) {
-            // Send the request and waiting for return value.
-            mpci_p->mmio_target = mmio_inst.operand[1];
-            send_mmio_req(gpa, &mmio_inst, value, IOREQ_READ, 0);
-            return;
-        } else {
-            // Write to MMIO
-            if (mmio_inst.operand[0] & IMMEDIATE) {
-                value = mmio_inst.immediate;
-            } else if (mmio_inst.operand[0] & REGISTER) {
-                size = operand_size(mmio_inst.operand[0]);
-                index = operand_index(mmio_inst.operand[0]);
-                value = get_reg_value(size, index, 0, inst_decoder_regs);
-            } else {
-                domain_crash_synchronous();
-            }
-            send_mmio_req(gpa, &mmio_inst, value, IOREQ_WRITE, 0);
-            return;
-        }
-    }
-
-    if (!strncmp((char *)mmio_inst.i_name, "stos", 4)) {
-        send_mmio_req(gpa, &mmio_inst,
-            inst_decoder_regs->eax, IOREQ_WRITE, 0);
-        return;
-    }
-    /* Workaround for cmp instruction */
-    if (!strncmp((char *)mmio_inst.i_name, "cmp", 3)) {
-        inst_decoder_regs->eflags &= ~X86_EFLAGS_ZF;
-        __vmwrite(GUEST_RFLAGS, inst_decoder_regs->eflags);
-        return;
-    }
-
-    domain_crash_synchronous();
+       mpcip->flags = mmio_inst.flags;
+       mpcip->instr = mmio_inst.instr;
+
+       /*
+        * In case of a movs spanning multiple pages, we break the accesses
+        * up into multiple pages (the device model works with non-continguous
+        * physical guest pages). To copy just one page, we adjust %ecx and
+        * do not advance %eip so that the next "rep movs" copies the next page.
+        * Unaligned accesses, for example movsl starting at PGSZ-2, are
+        * turned into a single copy where we handle the overlapping memory
+        * copy ourself. After this copy succeeds, "rep movs" is executed
+        * again.
+        */
+       if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
+           unsigned long value = 0;
+
+           mpcip->flags |= OVERLAP;
+
+           regs->eip -= inst_len; /* do not advance %eip */
+
+           if (dir == IOREQ_WRITE)
+               vmx_copy(&value, addr, size, VMX_COPY_IN);
+           send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, 0);
+       } else {
+           if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) {
+               regs->eip -= inst_len; /* do not advance %eip */
+
+               if (sign > 0)
+                   count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
+               else
+                   count = (addr & ~PAGE_MASK) / size;
+           }
+
+           send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, addr, dir, 1);
+       }
+        break;
+    }
+
+    case INSTR_MOVZ:
+       mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_STOS:
+       /*
+        * Since the destination is always in (contiguous) mmio space we don't
+        * need to break it up into pages.
+        */
+       mpcip->flags = mmio_inst.flags;
+       mpcip->instr = mmio_inst.instr;
+        send_mmio_req(IOREQ_TYPE_COPY, gpa,
+           GET_REPEAT_COUNT(), mmio_inst.op_size, regs->eax, IOREQ_WRITE, 0);
+       break;
+
+    case INSTR_OR:
+       mmio_operands(IOREQ_TYPE_OR, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_AND:
+       mmio_operands(IOREQ_TYPE_AND, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_XOR:
+       mmio_operands(IOREQ_TYPE_XOR, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_CMP:
+       mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    case INSTR_TEST:
+       mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs);
+       break;
+
+    default:
+       printf("Unhandled MMIO instruction\n");
+       domain_crash_synchronous();
+    }
 }
 
 #endif /* CONFIG_VMX */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/vmx_vmcs.c   Fri Sep  9 16:30:54 2005
@@ -44,7 +44,7 @@
 
     rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
     vmcs_size = vmx_msr_high & 0x1fff;
-    vmcs = alloc_xenheap_pages(get_order(vmcs_size)); 
+    vmcs = alloc_xenheap_pages(get_order_from_bytes(vmcs_size)); 
     memset((char *)vmcs, 0, vmcs_size); /* don't remove this */
 
     vmcs->vmcs_revision_id = vmx_msr_low;
@@ -55,7 +55,7 @@
 {
     int order;
 
-    order = get_order(vmcs_size);
+    order = get_order_from_bytes(vmcs_size);
     free_xenheap_pages(vmcs, order);
 }
 
@@ -76,8 +76,8 @@
     error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
 
     /* need to use 0x1000 instead of PAGE_SIZE */
-    io_bitmap_a = (void*) alloc_xenheap_pages(get_order(0x1000)); 
-    io_bitmap_b = (void*) alloc_xenheap_pages(get_order(0x1000)); 
+    io_bitmap_a = (void*) alloc_xenheap_pages(get_order_from_bytes(0x1000)); 
+    io_bitmap_b = (void*) alloc_xenheap_pages(get_order_from_bytes(0x1000)); 
     memset(io_bitmap_a, 0xff, 0x1000);
     /* don't bother debug port access */
     clear_bit(PC_DEBUG_PORT, io_bitmap_a);
@@ -148,7 +148,7 @@
     offset = (addr & ~PAGE_MASK);
     addr = round_pgdown(addr);
 
-    mpfn = phys_to_machine_mapping(addr >> PAGE_SHIFT);
+    mpfn = get_mfn_from_pfn(addr >> PAGE_SHIFT);
     p = map_domain_page(mpfn);
 
     e820p = (struct e820entry *) ((unsigned long) p + offset); 
@@ -175,7 +175,7 @@
     unmap_domain_page(p);        
 
     /* Initialise shared page */
-    mpfn = phys_to_machine_mapping(gpfn);
+    mpfn = get_mfn_from_pfn(gpfn);
     p = map_domain_page(mpfn);
     d->domain->arch.vmx_platform.shared_page_va = (unsigned long)p;
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_32/asm-offsets.c
--- a/xen/arch/x86/x86_32/asm-offsets.c Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/x86_32/asm-offsets.c Fri Sep  9 16:30:54 2005
@@ -71,6 +71,9 @@
     OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask);
     BLANK();
 
+    DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
+    BLANK();
+
     OFFSET(TRAPBOUNCE_error_code, struct trap_bounce, error_code);
     OFFSET(TRAPBOUNCE_cr2, struct trap_bounce, cr2);
     OFFSET(TRAPBOUNCE_flags, struct trap_bounce, flags);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/x86_32/entry.S       Fri Sep  9 16:30:54 2005
@@ -61,6 +61,11 @@
 #include <asm/page.h>
 #include <public/xen.h>
 
+#define GET_GUEST_REGS(reg)                     \
+        movl $~(STACK_SIZE-1),reg;              \
+        andl %esp,reg;                          \
+        orl  $(STACK_SIZE-CPUINFO_sizeof),reg;
+
 #define GET_CURRENT(reg)         \
         movl $STACK_SIZE-4, reg; \
         orl  %esp, reg;          \
@@ -121,6 +126,9 @@
 ENTRY(vmx_asm_vmexit_handler)
         /* selectors are restored/saved by VMX */
         VMX_SAVE_ALL_NOSEGREGS
+#ifdef TRACE_BUFFER
+        call trace_vmexit
+#endif
         call vmx_vmexit_handler
         jmp vmx_asm_do_resume
 
@@ -142,6 +150,9 @@
 /* vmx_restore_all_guest */
         call vmx_intr_assist
         call load_cr2
+#ifdef TRACE_BUFFER
+        call trace_vmentry
+#endif
         .endif
         VMX_RESTORE_ALL_NOSEGREGS
         /* 
@@ -273,7 +284,41 @@
         GET_CURRENT(%ebx)
         andl $(NR_hypercalls-1),%eax
         PERFC_INCR(PERFC_hypercalls, %eax)
+#ifndef NDEBUG
+        /* Deliberately corrupt parameter regs not used by this hypercall. */
+        pushl %eax
+        pushl UREGS_eip+4(%esp)
+        pushl 28(%esp) # EBP
+        pushl 28(%esp) # EDI
+        pushl 28(%esp) # ESI
+        pushl 28(%esp) # EDX
+        pushl 28(%esp) # ECX
+        pushl 28(%esp) # EBX
+        movzb hypercall_args_table(,%eax,1),%ecx
+        leal  (%esp,%ecx,4),%edi
+        subl  $6,%ecx
+        negl  %ecx
+        movl  %eax,%esi
+        movl  $0xDEADBEEF,%eax
+        rep   stosl
+        movl  %esi,%eax
+#endif
         call *hypercall_table(,%eax,4)
+#ifndef NDEBUG
+        /* Deliberately corrupt parameter regs used by this hypercall. */
+        addl  $24,%esp     # Shadow parameters
+        popl  %ecx         # Shadow EIP
+        cmpl  %ecx,UREGS_eip(%esp)
+        popl  %ecx         # Shadow hypercall index
+        jne   skip_clobber # If EIP has changed then don't clobber
+        movzb hypercall_args_table(,%ecx,1),%ecx
+        movl  %esp,%edi
+        movl  %eax,%esi
+        movl  $0xDEADBEEF,%eax
+        rep   stosl
+        movl  %esi,%eax
+skip_clobber:
+#endif
         movl %eax,UREGS_eax(%esp)       # save the return value
 
 test_all_events:
@@ -674,12 +719,14 @@
 do_arch_sched_op:
         # Ensure we return success even if we return via schedule_tail()
         xorl %eax,%eax
-        movl %eax,UREGS_eax+4(%esp)
+        GET_GUEST_REGS(%ecx)
+        movl %eax,UREGS_eax(%ecx)
         jmp  do_sched_op
 
 do_switch_vm86:
-        # Discard the return address
-        addl $4,%esp
+        # Reset the stack pointer
+        GET_GUEST_REGS(%ecx)
+        movl %ecx,%esp
 
         # GS:ESI == Ring-1 stack activation
         movl UREGS_esp(%esp),%esi
@@ -749,7 +796,7 @@
         .long do_get_debugreg
         .long do_update_descriptor  /* 10 */
         .long do_ni_hypercall
-        .long do_dom_mem_op
+        .long do_memory_op
         .long do_multicall
         .long do_update_va_mapping
         .long do_set_timer_op       /* 15 */
@@ -768,3 +815,36 @@
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
+
+ENTRY(hypercall_args_table)
+        .byte 1 /* do_set_trap_table    */  /*  0 */
+        .byte 4 /* do_mmu_update        */
+        .byte 2 /* do_set_gdt           */
+        .byte 2 /* do_stack_switch      */
+        .byte 4 /* do_set_callbacks     */
+        .byte 1 /* do_fpu_taskswitch    */  /*  5 */
+        .byte 2 /* do_arch_sched_op     */
+        .byte 1 /* do_dom0_op           */
+        .byte 2 /* do_set_debugreg      */
+        .byte 1 /* do_get_debugreg      */
+        .byte 4 /* do_update_descriptor */  /* 10 */
+        .byte 0 /* do_ni_hypercall      */
+        .byte 2 /* do_memory_op         */
+        .byte 2 /* do_multicall         */
+        .byte 4 /* do_update_va_mapping */
+        .byte 2 /* do_set_timer_op      */  /* 15 */
+        .byte 1 /* do_event_channel_op  */
+        .byte 1 /* do_xen_version       */
+        .byte 3 /* do_console_io        */
+        .byte 1 /* do_physdev_op        */
+        .byte 3 /* do_grant_table_op    */  /* 20 */
+        .byte 2 /* do_vm_assist         */
+        .byte 5 /* do_update_va_mapping_otherdomain */
+        .byte 0 /* do_switch_vm86       */
+        .byte 2 /* do_boot_vcpu         */
+        .byte 0 /* do_ni_hypercall      */  /* 25 */
+        .byte 4 /* do_mmuext_op         */
+        .byte 1 /* do_acm_op            */
+        .rept NR_hypercalls-(.-hypercall_args_table)
+        .byte 0 /* do_ni_hypercall      */
+        .endr
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/x86_32/mm.c  Fri Sep  9 16:30:54 2005
@@ -95,7 +95,7 @@
      * Allocate and map the machine-to-phys table and create read-only mapping 
      * of MPT for guest-OS use.
      */
-    mpt_size  = (max_page * 4) + (1UL << L2_PAGETABLE_SHIFT) - 1UL;
+    mpt_size  = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
     mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
     for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
     {
@@ -118,7 +118,8 @@
     }
 
     /* Set up mapping cache for domain pages. */
-    mapcache_order = get_order(MAPCACHE_MBYTES << (20 - PAGETABLE_ORDER));
+    mapcache_order = get_order_from_bytes(
+        MAPCACHE_MBYTES << (20 - PAGETABLE_ORDER));
     mapcache = alloc_xenheap_pages(mapcache_order);
     memset(mapcache, 0, PAGE_SIZE << mapcache_order);
     for ( i = 0; i < (MAPCACHE_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/x86_32/traps.c       Fri Sep  9 16:30:54 2005
@@ -79,11 +79,8 @@
            "ss: %04lx   cs: %04lx\n",
            ds, es, fs, gs, ss, cs);
 
-    if ( GUEST_MODE(regs) )
-        show_guest_stack();
-    else
-        show_stack((unsigned long *)&regs->esp);
-} 
+    show_stack(regs);
+}
 
 void show_page_walk(unsigned long addr)
 {
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/x86_64/asm-offsets.c Fri Sep  9 16:30:54 2005
@@ -71,6 +71,9 @@
     OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask);
     BLANK();
 
+    DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
+    BLANK();
+
     OFFSET(TRAPBOUNCE_error_code, struct trap_bounce, error_code);
     OFFSET(TRAPBOUNCE_cr2, struct trap_bounce, cr2);
     OFFSET(TRAPBOUNCE_flags, struct trap_bounce, flags);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/x86_64/entry.S       Fri Sep  9 16:30:54 2005
@@ -11,6 +11,11 @@
 #include <asm/apicdef.h>
 #include <asm/page.h>
 #include <public/xen.h>
+
+#define GET_GUEST_REGS(reg)                     \
+        movq $~(STACK_SIZE-1),reg;              \
+        andq %rsp,reg;                          \
+        orq  $(STACK_SIZE-CPUINFO_sizeof),reg;
 
 #define GET_CURRENT(reg)         \
         movq $STACK_SIZE-8, reg; \
@@ -120,10 +125,42 @@
 /*hypercall:*/
         movq  %r10,%rcx
         andq  $(NR_hypercalls-1),%rax
+#ifndef NDEBUG
+        /* Deliberately corrupt parameter regs not used by this hypercall. */
+        pushq %rdi; pushq %rsi; pushq %rdx; pushq %rcx; pushq %r8 ; pushq %r9 
+        leaq  hypercall_args_table(%rip),%r10
+        movq  $6,%rcx
+        sub   (%r10,%rax,1),%cl
+        movq  %rsp,%rdi
+        movl  $0xDEADBEEF,%eax
+        rep   stosq
+        popq  %r9 ; popq  %r8 ; popq  %rcx; popq  %rdx; popq  %rsi; popq  %rdi
+        movq  UREGS_rax(%rsp),%rax
+        andq  $(NR_hypercalls-1),%rax
+        pushq %rax
+        pushq UREGS_rip+8(%rsp)
+#endif
         leaq  hypercall_table(%rip),%r10
         PERFC_INCR(PERFC_hypercalls, %rax)
         callq *(%r10,%rax,8)
-        movq %rax,UREGS_rax(%rsp)       # save the return value
+#ifndef NDEBUG
+        /* Deliberately corrupt parameter regs used by this hypercall. */
+        popq  %r10         # Shadow RIP
+        cmpq  %r10,UREGS_rip(%rsp)
+        popq  %rcx         # Shadow hypercall index
+        jne   skip_clobber /* If RIP has changed then don't clobber. */
+        leaq  hypercall_args_table(%rip),%r10
+        movb  (%r10,%rcx,1),%cl
+        movl  $0xDEADBEEF,%r10d
+        cmpb  $1,%cl; jb skip_clobber; movq %r10,UREGS_rdi(%rsp)
+        cmpb  $2,%cl; jb skip_clobber; movq %r10,UREGS_rsi(%rsp)
+        cmpb  $3,%cl; jb skip_clobber; movq %r10,UREGS_rdx(%rsp)
+        cmpb  $4,%cl; jb skip_clobber; movq %r10,UREGS_r10(%rsp)
+        cmpb  $5,%cl; jb skip_clobber; movq %r10,UREGS_r8(%rsp)
+        cmpb  $6,%cl; jb skip_clobber; movq %r10,UREGS_r9(%rsp)
+skip_clobber:
+#endif
+        movq  %rax,UREGS_rax(%rsp)       # save the return value
 
 /* %rbx: struct vcpu */
 test_all_events:
@@ -302,7 +339,8 @@
 1:      /* In kernel context already: push new frame at existing %rsp. */
         movq  UREGS_rsp+8(%rsp),%rsi
         andb  $0xfc,UREGS_cs+8(%rsp)    # Indicate kernel context to guest.
-2:      movq  $HYPERVISOR_VIRT_START,%rax
+2:      andq  $~0xf,%rsi                # Stack frames are 16-byte aligned.
+        movq  $HYPERVISOR_VIRT_START,%rax
         cmpq  %rax,%rsi
         jb    1f                        # In +ve address space? Then okay.
         movq  $HYPERVISOR_VIRT_END+60,%rax
@@ -538,7 +576,8 @@
 do_arch_sched_op:
         # Ensure we return success even if we return via schedule_tail()
         xorl  %eax,%eax
-        movq  %rax,UREGS_rax+8(%rsp)
+        GET_GUEST_REGS(%r10)
+        movq  %rax,UREGS_rax(%r10)
         jmp   do_sched_op
 
 .data
@@ -578,7 +617,7 @@
         .quad do_get_debugreg
         .quad do_update_descriptor  /* 10 */
         .quad do_ni_hypercall
-        .quad do_dom_mem_op
+        .quad do_memory_op
         .quad do_multicall
         .quad do_update_va_mapping
         .quad do_set_timer_op       /* 15 */
@@ -597,3 +636,36 @@
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .quad do_ni_hypercall
         .endr
+
+ENTRY(hypercall_args_table)
+        .byte 1 /* do_set_trap_table    */  /*  0 */
+        .byte 4 /* do_mmu_update        */
+        .byte 2 /* do_set_gdt           */
+        .byte 2 /* do_stack_switch      */
+        .byte 3 /* do_set_callbacks     */
+        .byte 1 /* do_fpu_taskswitch    */  /*  5 */
+        .byte 2 /* do_arch_sched_op     */
+        .byte 1 /* do_dom0_op           */
+        .byte 2 /* do_set_debugreg      */
+        .byte 1 /* do_get_debugreg      */
+        .byte 2 /* do_update_descriptor */  /* 10 */
+        .byte 0 /* do_ni_hypercall      */
+        .byte 2 /* do_memory_op         */
+        .byte 2 /* do_multicall         */
+        .byte 3 /* do_update_va_mapping */
+        .byte 1 /* do_set_timer_op      */  /* 15 */
+        .byte 1 /* do_event_channel_op  */
+        .byte 1 /* do_xen_version       */
+        .byte 3 /* do_console_io        */
+        .byte 1 /* do_physdev_op        */
+        .byte 3 /* do_grant_table_op    */  /* 20 */
+        .byte 2 /* do_vm_assist         */
+        .byte 4 /* do_update_va_mapping_otherdomain */
+        .byte 0 /* do_switch_to_user    */
+        .byte 2 /* do_boot_vcpu         */
+        .byte 2 /* do_set_segment_base  */  /* 25 */
+        .byte 4 /* do_mmuext_op         */
+        .byte 1 /* do_acm_op            */
+        .rept NR_hypercalls-(.-hypercall_args_table)
+        .byte 0 /* do_ni_hypercall      */
+        .endr
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/x86_64/mm.c  Fri Sep  9 16:30:54 2005
@@ -98,7 +98,7 @@
      * Allocate and map the machine-to-phys table.
      * This also ensures L3 is present for fixmaps.
      */
-    mpt_size  = (max_page * 4) + (1UL << L2_PAGETABLE_SHIFT) - 1UL;
+    mpt_size  = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
     mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
     for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
     {
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Thu Sep  8 15:18:40 2005
+++ b/xen/arch/x86/x86_64/traps.c       Fri Sep  9 16:30:54 2005
@@ -15,24 +15,24 @@
 
 void show_registers(struct cpu_user_regs *regs)
 {
-    printk("CPU:    %d\nEIP:    %04x:[<%016lx>]",
+    printk("CPU:    %d\nRIP:    %04x:[<%016lx>]",
            smp_processor_id(), 0xffff & regs->cs, regs->rip);
     if ( !GUEST_MODE(regs) )
         print_symbol(" %s", regs->rip);
-    printk("\nEFLAGS: %016lx\n", regs->eflags);
-    printk("rax: %016lx   rbx: %016lx   rcx: %016lx   rdx: %016lx\n",
-           regs->rax, regs->rbx, regs->rcx, regs->rdx);
-    printk("rsi: %016lx   rdi: %016lx   rbp: %016lx   rsp: %016lx\n",
-           regs->rsi, regs->rdi, regs->rbp, regs->rsp);
-    printk("r8:  %016lx   r9:  %016lx   r10: %016lx   r11: %016lx\n",
-           regs->r8,  regs->r9,  regs->r10, regs->r11);
-    printk("r12: %016lx   r13: %016lx   r14: %016lx   r15: %016lx\n",
-           regs->r12, regs->r13, regs->r14, regs->r15);
+    printk("\nRFLAGS: %016lx\n", regs->eflags);
+    printk("rax: %016lx   rbx: %016lx   rcx: %016lx\n",
+           regs->rax, regs->rbx, regs->rcx);
+    printk("rdx: %016lx   rsi: %016lx   rdi: %016lx\n",
+           regs->rdx, regs->rsi, regs->rdi);
+    printk("rbp: %016lx   rsp: %016lx   r8:  %016lx\n",
+           regs->rbp, regs->rsp, regs->r8);
+    printk("r9:  %016lx   r10: %016lx   r11: %016lx\n",
+           regs->r9,  regs->r10, regs->r11);
+    printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
+           regs->r12, regs->r13, regs->r14);
+    printk("r15: %016lx\n", regs->r15);
 
-    if ( GUEST_MODE(regs) )
-        show_guest_stack();
-    else
-        show_stack((unsigned long *)regs->rsp);
+    show_stack(regs);
 }
 
 void show_page_walk(unsigned long addr)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/Makefile
--- a/xen/common/Makefile       Thu Sep  8 15:18:40 2005
+++ b/xen/common/Makefile       Fri Sep  9 16:30:54 2005
@@ -2,7 +2,6 @@
 include $(BASEDIR)/Rules.mk
 
 ifeq ($(TARGET_ARCH),ia64)
-#OBJS := $(subst dom_mem_ops.o,,$(OBJS))
 OBJS := $(subst grant_table.o,,$(OBJS))
 endif
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/acm_ops.c
--- a/xen/common/acm_ops.c      Thu Sep  8 15:18:40 2005
+++ b/xen/common/acm_ops.c      Fri Sep  9 16:30:54 2005
@@ -19,6 +19,7 @@
 #include <xen/types.h>
 #include <xen/lib.h>
 #include <xen/mm.h>
+#include <public/acm.h>
 #include <public/acm_ops.h>
 #include <xen/sched.h>
 #include <xen/event.h>
@@ -41,7 +42,8 @@
     POLICY,                     /* access to policy interface (early drop) */
     GETPOLICY,                  /* dump policy cache */
     SETPOLICY,                  /* set policy cache (controls security) */
-    DUMPSTATS                   /* dump policy statistics */
+    DUMPSTATS,                  /* dump policy statistics */
+    GETSSID                     /* retrieve ssidref for domain id */
 } acm_operation_t;
 
 int acm_authorize_acm_ops(struct domain *d, acm_operation_t pops)
@@ -117,6 +119,35 @@
         }
         break;
 
+    case ACM_GETSSID:
+        {
+                       ssidref_t ssidref;
+
+            if (acm_authorize_acm_ops(current->domain, GETSSID))
+                return -EACCES;
+
+                       if (op->u.getssid.get_ssid_by == SSIDREF)
+                               ssidref = op->u.getssid.id.ssidref;
+                       else if (op->u.getssid.get_ssid_by == DOMAINID) {
+                               struct domain *subj = 
find_domain_by_id(op->u.getssid.id.domainid);
+                               if (!subj)
+                                       return -ESRCH; /* domain not found */
+
+                               ssidref = ((struct acm_ssid_domain 
*)(subj->ssid))->ssidref;
+                               put_domain(subj);
+                       } else
+                               return -ESRCH;
+
+            ret = acm_get_ssid(ssidref,
+                               op->u.getssid.ssidbuf,
+                               op->u.getssid.ssidbuf_size);
+            if (ret == ACM_OK)
+                ret = 0;
+            else
+                ret = -ESRCH;
+        }
+        break;
+
     default:
         ret = -ESRCH;
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/domain.c
--- a/xen/common/domain.c       Thu Sep  8 15:18:40 2005
+++ b/xen/common/domain.c       Fri Sep  9 16:30:54 2005
@@ -114,6 +114,8 @@
             sched_rem_domain(v);
         domain_relinquish_resources(d);
         put_domain(d);
+
+        send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
     }
 }
 
@@ -174,7 +176,7 @@
 void domain_shutdown(u8 reason)
 {
     struct domain *d = current->domain;
-    struct vcpu *v;
+    struct vcpu   *v;
 
     if ( d->domain_id == 0 )
     {
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/event_channel.c
--- a/xen/common/event_channel.c        Thu Sep  8 15:18:40 2005
+++ b/xen/common/event_channel.c        Fri Sep  9 16:30:54 2005
@@ -250,6 +250,9 @@
 
     if ( virq >= ARRAY_SIZE(v->virq_to_evtchn) )
         return -EINVAL;
+
+    if ( d->domain_id == 0 && virq >= VIRQ_CONSOLE )
+        v = d->vcpu[0];
 
     spin_lock(&d->evtchn_lock);
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/grant_table.c
--- a/xen/common/grant_table.c  Thu Sep  8 15:18:40 2005
+++ b/xen/common/grant_table.c  Fri Sep  9 16:30:54 2005
@@ -399,7 +399,7 @@
     {
         int              i;
         grant_mapping_t *new_mt;
-        grant_table_t   *lgt      = ld->grant_table;
+        grant_table_t   *lgt = ld->grant_table;
 
         if ( (lgt->maptrack_limit << 1) > MAPTRACK_MAX_ENTRIES )
         {
@@ -437,9 +437,8 @@
             ref, dom, dev_hst_ro_flags);
 #endif
 
-    if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref,
-                                                  dev_hst_ro_flags,
-                                                  addr, &frame)))
+    if ( (rc = __gnttab_activate_grant_ref(ld, led, rd, ref, dev_hst_ro_flags,
+                                           addr, &frame)) >= 0 )
     {
         /*
          * Only make the maptrack live _after_ writing the pte, in case we 
@@ -807,7 +806,8 @@
     int i;
     int result = GNTST_okay;
 
-    for (i = 0; i < count; i++) {
+    for ( i = 0; i < count; i++ )
+    {
         gnttab_donate_t *gop = &uop[i];
 #if GRANT_DEBUG
         printk("gnttab_donate: i=%d mfn=%lx domid=%d gref=%08x\n",
@@ -815,19 +815,24 @@
 #endif
         page = &frame_table[gop->mfn];
         
-        if (unlikely(IS_XEN_HEAP_FRAME(page))) { 
+        if ( unlikely(IS_XEN_HEAP_FRAME(page)))
+        { 
             printk("gnttab_donate: xen heap frame mfn=%lx\n", 
                    (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
         }
-        if (unlikely(!pfn_valid(page_to_pfn(page)))) {
+        
+        if ( unlikely(!pfn_valid(page_to_pfn(page))) )
+        {
             printk("gnttab_donate: invalid pfn for mfn=%lx\n", 
                    (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
         }
-        if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
+
+        if ( unlikely((e = find_domain_by_id(gop->domid)) == NULL) )
+        {
             printk("gnttab_donate: can't find domain %d\n", gop->domid);
             gop->status = GNTST_bad_domain;
             continue;
@@ -881,47 +886,23 @@
          * headroom.  Also, a domain mustn't have PGC_allocated
          * pages when it is dying.
          */
-#ifdef GRANT_DEBUG
-        if (unlikely(e->tot_pages >= e->max_pages)) {
-            printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n",
-                   e->tot_pages, e->max_pages);
+        if ( unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
+             unlikely(e->tot_pages >= e->max_pages) ||
+             unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle)) )
+        {
+            DPRINTK("gnttab_donate: Transferee has no reservation headroom "
+                    "(%d,%d) or provided a bad grant ref (%08x) or "
+                    "is dying (%lx)\n",
+                    e->tot_pages, e->max_pages, gop->handle, e->domain_flags);
             spin_unlock(&e->page_alloc_lock);
             put_domain(e);
-            result = GNTST_general_error;
+            gop->status = result = GNTST_general_error;
             break;
         }
-        if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags))) {
-            printk("gnttab_donate: target domain is dying\n");
-            spin_unlock(&e->page_alloc_lock);
-            put_domain(e);
-            result = GNTST_general_error;
-            break;
-        }
-        if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
-            printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
-            spin_unlock(&e->page_alloc_lock);
-            put_domain(e);
-            result = GNTST_general_error;
-            break;
-        }
-#else
-        ASSERT(e->tot_pages <= e->max_pages);
-        if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
-            unlikely(e->tot_pages == e->max_pages) ||
-            unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
-            printk("gnttab_donate: Transferee has no reservation headroom (%d,"
-                   "%d) or provided a bad grant ref (%08x) or is dying (%p)\n",
-                   e->tot_pages, e->max_pages, gop->handle, e->d_flags);
-            spin_unlock(&e->page_alloc_lock);
-            put_domain(e);
-            result = GNTST_general_error;
-            break;
-        }
-#endif
+
         /* Okay, add the page to 'e'. */
-        if (unlikely(e->tot_pages++ == 0)) {
+        if ( unlikely(e->tot_pages++ == 0) )
             get_knownalive_domain(e);
-        }
         list_add_tail(&page->list, &e->page_list);
         page_set_owner(page, e);
         
@@ -937,6 +918,7 @@
         
         gop->status = GNTST_okay;
     }
+
     return result;
 }
 
@@ -956,38 +938,38 @@
     
     rc = -EFAULT;
     switch ( cmd )
-        {
-        case GNTTABOP_map_grant_ref:
-            if ( unlikely(!array_access_ok(
-                              uop, count, sizeof(gnttab_map_grant_ref_t))) )
-                goto out;
-            rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
-            break;
-        case GNTTABOP_unmap_grant_ref:
-            if ( unlikely(!array_access_ok(
-                              uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
-                goto out;
-            rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, 
-                                        count);
-            break;
-        case GNTTABOP_setup_table:
-            rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
-            break;
+    {
+    case GNTTABOP_map_grant_ref:
+        if ( unlikely(!array_access_ok(
+            uop, count, sizeof(gnttab_map_grant_ref_t))) )
+            goto out;
+        rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
+        break;
+    case GNTTABOP_unmap_grant_ref:
+        if ( unlikely(!array_access_ok(
+            uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
+            goto out;
+        rc = gnttab_unmap_grant_ref(
+            (gnttab_unmap_grant_ref_t *)uop, count);
+        break;
+    case GNTTABOP_setup_table:
+        rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
+        break;
 #if GRANT_DEBUG
-        case GNTTABOP_dump_table:
-            rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
-            break;
+    case GNTTABOP_dump_table:
+        rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
+        break;
 #endif
-        case GNTTABOP_donate:
-            if (unlikely(!array_access_ok(uop, count, 
-                                          sizeof(gnttab_donate_t))))
-                goto out;
-            rc = gnttab_donate(uop, count);
-            break;
-        default:
-            rc = -ENOSYS;
-            break;
-        }
+    case GNTTABOP_donate:
+        if (unlikely(!array_access_ok(
+            uop, count, sizeof(gnttab_donate_t))))
+            goto out;
+        rc = gnttab_donate(uop, count);
+        break;
+    default:
+        rc = -ENOSYS;
+        break;
+    }
     
   out:
     UNLOCK_BIGLOCK(d);
@@ -1020,17 +1002,17 @@
     lgt = ld->grant_table;
     
 #if GRANT_DEBUG_VERBOSE
-    if ( ld->domain_ id != 0 ) {
-            DPRINTK("Foreign unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
-                    rd->domain_id, ld->domain_id, frame, readonly);
-      }
+    if ( ld->domain_id != 0 )
+        DPRINTK("Foreign unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
+                rd->domain_id, ld->domain_id, frame, readonly);
 #endif
     
     /* Fast exit if we're not mapping anything using grant tables */
     if ( lgt->map_count == 0 )
         return 0;
     
-    if ( get_domain(rd) == 0 ) {
+    if ( get_domain(rd) == 0 )
+    {
         DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n",
                 rd->domain_id);
         return 0;
@@ -1211,13 +1193,13 @@
         DPRINTK("Bad pfn (%lx)\n", pfn);
     else
     {
-        machine_to_phys_mapping[frame] = pfn;
+        set_pfn_from_mfn(frame, pfn);
 
         if ( unlikely(shadow_mode_log_dirty(ld)))
              mark_dirty(ld, frame);
 
         if (shadow_mode_translate(ld))
-            __phys_to_machine_mapping[pfn] = frame;
+            set_mfn_from_pfn(pfn, frame);
     }
     sha->frame = __mfn_to_gpfn(rd, frame);
     sha->domid = rd->domain_id;
@@ -1267,9 +1249,11 @@
     for ( i = 0; i < NR_GRANT_FRAMES; i++ )
     {
         SHARE_PFN_WITH_DOMAIN(
-            virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d);
-        machine_to_phys_mapping[(virt_to_phys(t->shared) >> PAGE_SHIFT) + i] =
-            INVALID_M2P_ENTRY;
+            virt_to_page((char *)t->shared + (i * PAGE_SIZE)),
+            d);
+        set_pfn_from_mfn(
+            (virt_to_phys(t->shared) >> PAGE_SHIFT) + i,
+            INVALID_M2P_ENTRY);
     }
 
     /* Okay, install the structure. */
@@ -1306,57 +1290,53 @@
     {
         map = &gt->maptrack[handle];
 
-        if ( map->ref_and_flags & GNTMAP_device_map )
-        {
-            dom = map->domid;
-            ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
-
-            DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n",
-                    handle, ref,
-                    map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom);
-
-            if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
-                 unlikely(ld == rd) )
+        if ( !(map->ref_and_flags & GNTMAP_device_map) )
+            continue;
+
+        dom = map->domid;
+        ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
+
+        DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n",
+                handle, ref, map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom);
+
+        if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
+             unlikely(ld == rd) )
+        {
+            if ( rd != NULL )
+                put_domain(rd);
+            printk(KERN_WARNING "Grant release: No dom%d\n", dom);
+            continue;
+        }
+
+        act = &rd->grant_table->active[ref];
+        sha = &rd->grant_table->shared[ref];
+
+        spin_lock(&rd->grant_table->lock);
+
+        if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) )
+        {
+            frame = act->frame;
+
+            if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) &&
+                 ( (act->pin & GNTPIN_devw_mask) >  0 ) )
             {
-                if ( rd != NULL )
-                    put_domain(rd);
-
-                printk(KERN_WARNING "Grant release: No dom%d\n", dom);
-                continue;
+                clear_bit(_GTF_writing, &sha->flags);
+                put_page_type(&frame_table[frame]);
             }
 
-            act = &rd->grant_table->active[ref];
-            sha = &rd->grant_table->shared[ref];
-
-            spin_lock(&rd->grant_table->lock);
-
-            if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) )
+            map->ref_and_flags &= ~GNTMAP_device_map;
+            act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask);
+            if ( act->pin == 0 )
             {
-                frame = act->frame;
-
-                if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) &&
-                     ( (act->pin & GNTPIN_devw_mask) >  0 ) )
-                {
-                    clear_bit(_GTF_writing, &sha->flags);
-                    put_page_type(&frame_table[frame]);
-                }
-
-                act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask);
-
-                if ( act->pin == 0 )
-                {
-                    clear_bit(_GTF_reading, &sha->flags);
-                    map->ref_and_flags = 0;
-                    put_page(&frame_table[frame]);
-                }
-                else
-                    map->ref_and_flags &= ~GNTMAP_device_map;
+                clear_bit(_GTF_reading, &sha->flags);
+                map->ref_and_flags = 0;
+                put_page(&frame_table[frame]);
             }
-
-            spin_unlock(&rd->grant_table->lock);
-
-            put_domain(rd);
-        }
+        }
+
+        spin_unlock(&rd->grant_table->lock);
+
+        put_domain(rd);
     }
 }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/kernel.c
--- a/xen/common/kernel.c       Thu Sep  8 15:18:40 2005
+++ b/xen/common/kernel.c       Fri Sep  9 16:30:54 2005
@@ -46,7 +46,7 @@
         if ( optval != NULL )
             *optval++ = '\0';
 
-        for ( param = &__setup_start; param != &__setup_end; param++ )
+        for ( param = &__setup_start; param <= &__setup_end; param++ )
         {
             if ( strcmp(param->name, opt ) != 0 )
                 continue;
@@ -110,6 +110,38 @@
             return -EFAULT;
         return 0;
     }
+
+    case XENVER_capabilities:
+    {
+        xen_capabilities_info_t info;
+        extern void arch_get_xen_caps(xen_capabilities_info_t * info);
+        
+        memset(&info, 0, sizeof(info));
+        arch_get_xen_caps(&info);
+
+        if ( copy_to_user(arg, &info, sizeof(info)) )
+            return -EFAULT;
+        return 0;
+    }
+    
+    case XENVER_parameters:
+    {
+        xen_parameters_info_t info = { .virt_start = HYPERVISOR_VIRT_START };
+
+        if ( copy_to_user(arg, &info, sizeof(info)) )
+            return -EFAULT;
+        return 0;
+        
+    }
+    
+    case XENVER_changeset:
+    {
+        xen_changeset_info_t chgset;
+        safe_strcpy(chgset, XEN_CHANGESET);
+        if ( copy_to_user(arg, chgset, sizeof(chgset)) )
+            return -EFAULT;
+        return 0;
+    }
     }
 
     return -ENOSYS;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/multicall.c
--- a/xen/common/multicall.c    Thu Sep  8 15:18:40 2005
+++ b/xen/common/multicall.c    Fri Sep  9 16:30:54 2005
@@ -45,6 +45,18 @@
 
         do_multicall_call(&mcs->call);
 
+#ifndef NDEBUG
+        {
+            /*
+             * Deliberately corrupt the contents of the multicall structure.
+             * The caller must depend only on the 'result' field on return.
+             */
+            multicall_entry_t corrupt;
+            memset(&corrupt, 0xAA, sizeof(corrupt));
+            (void)__copy_to_user(&call_list[i], &corrupt, sizeof(corrupt));
+        }
+#endif
+
         if ( unlikely(__put_user(mcs->call.result, &call_list[i].result)) )
         {
             DPRINTK("Error writing result back to multicall block.\n");
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Thu Sep  8 15:18:40 2005
+++ b/xen/common/page_alloc.c   Fri Sep  9 16:30:54 2005
@@ -216,7 +216,7 @@
 #define NR_ZONES    3
 
 
-#define MAX_DMADOM_PFN 0xFFFFF
+#define MAX_DMADOM_PFN 0x7FFFFUL /* 31 addressable bits */
 #define pfn_dom_zone_type(_pfn)                                 \
     (((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM)
 
@@ -485,43 +485,40 @@
 
 void init_domheap_pages(physaddr_t ps, physaddr_t pe)
 {
+    unsigned long s_tot, e_tot, s_dma, e_dma, s_nrm, e_nrm;
+
     ASSERT(!in_irq());
 
-    ps = round_pgup(ps) >> PAGE_SHIFT;
-    pe = round_pgdown(pe) >> PAGE_SHIFT;
-    if ( pe <= ps )
-        return;
-
-    if ( (ps < MAX_DMADOM_PFN) && (pe > MAX_DMADOM_PFN) )
-    {
-        init_heap_pages(
-            MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps);
-        init_heap_pages(
-            MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN), pe - MAX_DMADOM_PFN);
-    }
-    else
-    {
-        init_heap_pages(pfn_dom_zone_type(ps), pfn_to_page(ps), pe - ps);
-    }
+    s_tot = round_pgup(ps) >> PAGE_SHIFT;
+    e_tot = round_pgdown(pe) >> PAGE_SHIFT;
+
+    s_dma = min(s_tot, MAX_DMADOM_PFN + 1);
+    e_dma = min(e_tot, MAX_DMADOM_PFN + 1);
+    if ( s_dma < e_dma )
+        init_heap_pages(MEMZONE_DMADOM, pfn_to_page(s_dma), e_dma - s_dma);
+
+    s_nrm = max(s_tot, MAX_DMADOM_PFN + 1);
+    e_nrm = max(e_tot, MAX_DMADOM_PFN + 1);
+    if ( s_nrm < e_nrm )
+        init_heap_pages(MEMZONE_DOM, pfn_to_page(s_nrm), e_nrm - s_nrm);
 }
 
 
 struct pfn_info *alloc_domheap_pages(
     struct domain *d, unsigned int order, unsigned int flags)
 {
-    struct pfn_info *pg;
+    struct pfn_info *pg = NULL;
     cpumask_t mask;
     int i;
 
     ASSERT(!in_irq());
 
-    pg = NULL;
-    if (! (flags & ALLOC_DOM_DMA))
+    if ( !(flags & ALLOC_DOM_DMA) )
         pg = alloc_heap_pages(MEMZONE_DOM, order);
-    if (pg == NULL) {
-        if ( unlikely((pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL) )
+
+    if ( pg == NULL )
+        if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL )
             return NULL;
-    }
 
     mask = pg->u.free.cpumask;
     tlbflush_filter(mask, pg->tlbflush_timestamp);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/schedule.c
--- a/xen/common/schedule.c     Thu Sep  8 15:18:40 2005
+++ b/xen/common/schedule.c     Fri Sep  9 16:30:54 2005
@@ -218,9 +218,7 @@
             && spin_is_locked(&schedule_data[v->processor].schedule_lock) )
         cpu_relax();
 
-    /* Counteract lazy context switching. */
-    if ( cpu_isset(v->processor, v->domain->cpumask) )
-        sync_lazy_execstate_cpu(v->processor);
+    sync_vcpu_execstate(v);
 }
 
 void vcpu_wake(struct vcpu *v)
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/trace.c
--- a/xen/common/trace.c        Thu Sep  8 15:18:40 2005
+++ b/xen/common/trace.c        Fri Sep  9 16:30:54 2005
@@ -66,7 +66,7 @@
     }
 
     nr_pages = num_online_cpus() * opt_tbuf_size;
-    order    = get_order(nr_pages * PAGE_SIZE);
+    order    = get_order_from_pages(nr_pages);
     
     if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
     {
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/xmalloc.c
--- a/xen/common/xmalloc.c      Thu Sep  8 15:18:40 2005
+++ b/xen/common/xmalloc.c      Fri Sep  9 16:30:54 2005
@@ -86,7 +86,7 @@
 static void *xmalloc_whole_pages(size_t size)
 {
     struct xmalloc_hdr *hdr;
-    unsigned int pageorder = get_order(size);
+    unsigned int pageorder = get_order_from_bytes(size);
 
     hdr = alloc_xenheap_pages(pageorder);
     if ( hdr == NULL )
@@ -159,7 +159,7 @@
     /* Big allocs free directly. */
     if ( hdr->size >= PAGE_SIZE )
     {
-        free_xenheap_pages(hdr, get_order(hdr->size));
+        free_xenheap_pages(hdr, get_order_from_bytes(hdr->size));
         return;
     }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Thu Sep  8 15:18:40 2005
+++ b/xen/drivers/char/console.c        Fri Sep  9 16:30:54 2005
@@ -627,7 +627,7 @@
     if ( bytes == 0 )
         return 0;
 
-    order = get_order(bytes);
+    order = get_order_from_bytes(bytes);
     debugtrace_buf = alloc_xenheap_pages(order);
     ASSERT(debugtrace_buf != NULL);
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/drivers/char/serial.c
--- a/xen/drivers/char/serial.c Thu Sep  8 15:18:40 2005
+++ b/xen/drivers/char/serial.c Fri Sep  9 16:30:54 2005
@@ -366,8 +366,9 @@
 void serial_async_transmit(struct serial_port *port)
 {
     BUG_ON(!port->driver->tx_empty);
-    if ( !port->txbuf )
-        port->txbuf = alloc_xenheap_pages(get_order(SERIAL_TXBUFSZ));
+    if ( port->txbuf == NULL )
+        port->txbuf = alloc_xenheap_pages(
+            get_order_from_bytes(SERIAL_TXBUFSZ));
 }
 
 /*
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/acm/acm_core.h
--- a/xen/include/acm/acm_core.h        Thu Sep  8 15:18:40 2005
+++ b/xen/include/acm/acm_core.h        Fri Sep  9 16:30:54 2005
@@ -101,9 +101,15 @@
  *     primary ssidref   = lower 16 bit
  *      secondary ssidref = higher 16 bit
  */
+#define ACM_PRIMARY(ssidref) \
+       ((ssidref) & 0xffff)
+
+#define ACM_SECONDARY(ssidref) \
+       ((ssidref) >> 16)
+
 #define GET_SSIDREF(POLICY, ssidref) \
        ((POLICY) == acm_bin_pol.primary_policy_code) ? \
-       ((ssidref) & 0xffff) : ((ssidref) >> 16)
+       ACM_PRIMARY(ssidref) : ACM_SECONDARY(ssidref)
 
 /* macros to access ssid pointer for primary / secondary policy */
 #define GET_SSIDP(POLICY, ssid) \
@@ -116,6 +122,7 @@
 int acm_set_policy(void *buf, u16 buf_size, int isuserbuffer);
 int acm_get_policy(void *buf, u16 buf_size);
 int acm_dump_statistics(void *buf, u16 buf_size);
+int acm_get_ssid(ssidref_t ssidref, u8 *buf, u16 buf_size);
 
 #endif
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/acm/acm_hooks.h
--- a/xen/include/acm/acm_hooks.h       Thu Sep  8 15:18:40 2005
+++ b/xen/include/acm/acm_hooks.h       Fri Sep  9 16:30:54 2005
@@ -92,6 +92,7 @@
     int  (*dump_binary_policy)         (u8 *buffer, u16 buf_size);
     int  (*set_binary_policy)          (u8 *buffer, u16 buf_size);
     int  (*dump_statistics)            (u8 *buffer, u16 buf_size);
+    int  (*dump_ssid_types)            (ssidref_t ssidref, u8 *buffer, u16 
buf_size);
     /* domain management control hooks (can be NULL) */
     int  (*pre_domain_create)          (void *subject_ssid, ssidref_t ssidref);
     void (*post_domain_create)         (domid_t domid, ssidref_t ssidref);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/asm_defns.h
--- a/xen/include/asm-x86/asm_defns.h   Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/asm_defns.h   Fri Sep  9 16:30:54 2005
@@ -6,11 +6,6 @@
 #include <asm/asm-offsets.h>
 #include <asm/processor.h>
 
-#ifndef STR
-#define __STR(x) #x
-#define STR(x) __STR(x)
-#endif
-
 #ifdef __x86_64__
 #include <asm/x86_64/asm_defns.h>
 #else
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/bitops.h
--- a/xen/include/asm-x86/bitops.h      Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/bitops.h      Fri Sep  9 16:30:54 2005
@@ -6,11 +6,6 @@
  */
 
 #include <xen/config.h>
-
-#ifndef STR
-#define __STR(x) #x
-#define STR(x) __STR(x)
-#endif
 
 /*
  * These have to be done with inline assembly: that way the bit-setting
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/mm.h  Fri Sep  9 16:30:54 2005
@@ -255,10 +255,13 @@
  * contiguous (or near contiguous) physical memory.
  */
 #undef  machine_to_phys_mapping
-#define machine_to_phys_mapping ((u32 *)RDWR_MPT_VIRT_START)
-#define INVALID_M2P_ENTRY        (~0U)
-#define VALID_M2P(_e)            (!((_e) & (1U<<31)))
+#define machine_to_phys_mapping  ((unsigned long *)RDWR_MPT_VIRT_START)
+#define INVALID_M2P_ENTRY        (~0UL)
+#define VALID_M2P(_e)            (!((_e) & (1UL<<(BITS_PER_LONG-1))))
 #define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
+
+#define set_pfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
+#define get_pfn_from_mfn(mfn)      (machine_to_phys_mapping[(mfn)])
 
 /*
  * The phys_to_machine_mapping is the reversed mapping of MPT for full
@@ -266,17 +269,17 @@
  * guests, so we steal the address space that would have normally
  * been used by the read-only MPT map.
  */
-#define __phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START)
-#define INVALID_MFN               (~0UL)
-#define VALID_MFN(_mfn)           (!((_mfn) & (1U<<31)))
-
-/* Returns the machine physical */
-static inline unsigned long phys_to_machine_mapping(unsigned long pfn) 
+#define phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START)
+#define INVALID_MFN             (~0UL)
+#define VALID_MFN(_mfn)         (!((_mfn) & (1U<<31)))
+
+#define set_mfn_from_pfn(pfn, mfn) (phys_to_machine_mapping[(pfn)] = (mfn))
+static inline unsigned long get_mfn_from_pfn(unsigned long pfn) 
 {
     unsigned long mfn;
     l1_pgentry_t pte;
 
-    if ( (__copy_from_user(&pte, &__phys_to_machine_mapping[pfn],
+    if ( (__copy_from_user(&pte, &phys_to_machine_mapping[pfn],
                            sizeof(pte)) == 0) &&
          (l1e_get_flags(pte) & _PAGE_PRESENT) )
        mfn = l1e_get_pfn(pte);
@@ -285,7 +288,6 @@
     
     return mfn; 
 }
-#define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn)
 
 #ifdef MEMORY_GUARD
 void memguard_init(void);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/page-guest32.h
--- a/xen/include/asm-x86/page-guest32.h        Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/page-guest32.h        Fri Sep  9 16:30:54 2005
@@ -32,6 +32,11 @@
 /* Get pte access flags (unsigned int). */
 #define l1e_get_flags_32(x)           (get_pte_flags_32((x).l1))
 #define l2e_get_flags_32(x)           (get_pte_flags_32((x).l2))
+
+#define l1e_get_paddr_32(x)           \
+    ((physaddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
+#define l2e_get_paddr_32(x)           \
+    ((physaddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
 
 /* Construct an empty pte. */
 #define l1e_empty_32()                ((l1_pgentry_32_t) { 0 })
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/page.h        Fri Sep  9 16:30:54 2005
@@ -280,12 +280,21 @@
 
 #ifndef __ASSEMBLY__
 
-static __inline__ int get_order(unsigned long size)
+static inline int get_order_from_bytes(physaddr_t size)
 {
     int order;
     size = (size-1) >> PAGE_SHIFT;
     for ( order = 0; size; order++ )
         size >>= 1;
+    return order;
+}
+
+static inline int get_order_from_pages(unsigned long nr_pages)
+{
+    int order;
+    nr_pages--;
+    for ( order = 0; nr_pages; order++ )
+        nr_pages >>= 1;
     return order;
 }
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/processor.h   Fri Sep  9 16:30:54 2005
@@ -496,9 +496,7 @@
 
 #endif
 
-void show_guest_stack();
-void show_trace(unsigned long *esp);
-void show_stack(unsigned long *esp);
+void show_stack(struct cpu_user_regs *regs);
 void show_registers(struct cpu_user_regs *regs);
 void show_page_walk(unsigned long addr);
 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs);
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/shadow.h      Fri Sep  9 16:30:54 2005
@@ -34,6 +34,8 @@
 #include <asm/vmx.h>
 #include <public/dom0_ops.h>
 #include <asm/shadow_public.h>
+#include <asm/page-guest32.h>
+#include <asm/shadow_ops.h>
 
 /* Shadow PT operation mode : shadow-mode variable in arch_domain. */
 
@@ -104,9 +106,9 @@
 } while (0)
 #endif
 
-#define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((L1_PAGETABLE_ENTRIES - 1) - 
(_max)) << 16) | (_min))
+#define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) 
- (_max)) << 16) | (_min))
 #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
-#define SHADOW_MAX(_encoded) ((L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16))
+#define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) 
>> 16))
 
 extern void shadow_mode_init(void);
 extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc);
@@ -132,6 +134,7 @@
                                        struct domain_mmap_cache *cache);
 #if CONFIG_PAGING_LEVELS >= 3
 #include <asm/page-guest32.h>
+extern unsigned long gva_to_gpa(unsigned long gva);
 extern void shadow_l3_normal_pt_update(struct domain *d,
                                        unsigned long pa, l3_pgentry_t l3e,
                                        struct domain_mmap_cache *cache);
@@ -269,14 +272,14 @@
 
 #define __mfn_to_gpfn(_d, mfn)                         \
     ( (shadow_mode_translate(_d))                      \
-      ? machine_to_phys_mapping[(mfn)]                 \
+      ? get_pfn_from_mfn(mfn)                                   \
       : (mfn) )
 
 #define __gpfn_to_mfn(_d, gpfn)                        \
     ({                                                 \
         ASSERT(current->domain == (_d));               \
         (shadow_mode_translate(_d))                    \
-        ? phys_to_machine_mapping(gpfn)                \
+        ? get_mfn_from_pfn(gpfn)                \
         : (gpfn);                                      \
     })
 
@@ -461,7 +464,7 @@
     // This wants the nice compact set of PFNs from 0..domain's max,
     // which __mfn_to_gpfn() only returns for translated domains.
     //
-    pfn = machine_to_phys_mapping[mfn];
+    pfn = get_pfn_from_mfn(mfn);
 
     /*
      * Values with the MSB set denote MFNs that aren't really part of the 
@@ -562,7 +565,7 @@
     old_hl2e = v->arch.hl2_vtable[index];
 
     if ( (l2e_get_flags(gl2e) & _PAGE_PRESENT) &&
-         VALID_MFN(mfn = phys_to_machine_mapping(l2e_get_pfn(gl2e))) )
+         VALID_MFN(mfn = get_mfn_from_pfn(l2e_get_pfn(gl2e))) )
         new_hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
     else
         new_hl2e = l1e_empty();
@@ -794,22 +797,22 @@
 #endif
 
 static inline void l1pte_propagate_from_guest(
-    struct domain *d, l1_pgentry_t gpte, l1_pgentry_t *spte_p)
+    struct domain *d, guest_l1_pgentry_t gpte, l1_pgentry_t *spte_p)
 { 
     unsigned long mfn;
     l1_pgentry_t spte;
 
     spte = l1e_empty();
 
-    if ( ((l1e_get_flags(gpte) & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
+    if ( ((guest_l1e_get_flags(gpte) & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
           (_PAGE_PRESENT|_PAGE_ACCESSED)) &&
          VALID_MFN(mfn = __gpfn_to_mfn(d, l1e_get_pfn(gpte))) )
     {
         spte = l1e_from_pfn(
-            mfn, l1e_get_flags(gpte) & ~(_PAGE_GLOBAL | _PAGE_AVAIL));
+            mfn, guest_l1e_get_flags(gpte) & ~(_PAGE_GLOBAL | _PAGE_AVAIL));
 
         if ( shadow_mode_log_dirty(d) ||
-             !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
+             !(guest_l1e_get_flags(gpte) & _PAGE_DIRTY) ||
              mfn_is_page_table(mfn) )
         {
             l1e_remove_flags(spte, _PAGE_RW);
@@ -859,22 +862,22 @@
 
 static inline void l2pde_general(
     struct domain *d,
-    l2_pgentry_t *gpde_p,
+    guest_l2_pgentry_t *gpde_p,
     l2_pgentry_t *spde_p,
     unsigned long sl1mfn)
 {
-    l2_pgentry_t gpde = *gpde_p;
+    guest_l2_pgentry_t gpde = *gpde_p;
     l2_pgentry_t spde;
 
     spde = l2e_empty();
-    if ( (l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) )
+    if ( (guest_l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) )
     {
         spde = l2e_from_pfn(
-            sl1mfn, 
-            (l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL);
+            sl1mfn,
+            (guest_l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED) & 
~_PAGE_AVAIL);
 
         /* N.B. PDEs do not have a dirty bit. */
-        l2e_add_flags(gpde, _PAGE_ACCESSED);
+        guest_l2e_add_flags(gpde, _PAGE_ACCESSED);
 
         *gpde_p = gpde;
     }
@@ -887,12 +890,12 @@
 }
 
 static inline void l2pde_propagate_from_guest(
-    struct domain *d, l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p)
-{
-    l2_pgentry_t gpde = *gpde_p;
+    struct domain *d, guest_l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p)
+{
+    guest_l2_pgentry_t gpde = *gpde_p;
     unsigned long sl1mfn = 0;
 
-    if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
+    if ( guest_l2e_get_flags(gpde) & _PAGE_PRESENT )
         sl1mfn =  __shadow_status(d, l2e_get_pfn(gpde), PGT_l1_shadow);
     l2pde_general(d, gpde_p, spde_p, sl1mfn);
 }
@@ -904,7 +907,7 @@
 static int inline
 validate_pte_change(
     struct domain *d,
-    l1_pgentry_t new_pte,
+    guest_l1_pgentry_t new_pte,
     l1_pgentry_t *shadow_pte_p)
 {
     l1_pgentry_t old_spte, new_spte;
@@ -1004,7 +1007,7 @@
 static int inline
 validate_pde_change(
     struct domain *d,
-    l2_pgentry_t new_gpde,
+    guest_l2_pgentry_t new_gpde,
     l2_pgentry_t *shadow_pde_p)
 {
     l2_pgentry_t old_spde, new_spde;
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/shadow_64.h
--- a/xen/include/asm-x86/shadow_64.h   Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/shadow_64.h   Fri Sep  9 16:30:54 2005
@@ -27,6 +27,7 @@
 #ifndef _XEN_SHADOW_64_H
 #define _XEN_SHADOW_64_H
 #include <asm/shadow.h>
+#include <asm/shadow_ops.h>
 
 #define READ_FAULT  0
 #define WRITE_FAULT 1
@@ -42,14 +43,14 @@
 #define ESH_LOG(_f, _a...) ((void)0)
 #endif
 
-#define L4      4UL
-#define L3      3UL
-#define L2      2UL
-#define L1      1UL
+#define PAGING_L4      4UL
+#define PAGING_L3      3UL
+#define PAGING_L2      2UL
+#define PAGING_L1      1UL
 #define L_MASK  0xff
 
-#define ROOT_LEVEL_64   L4
-#define ROOT_LEVEL_32   L2
+#define ROOT_LEVEL_64   PAGING_L4
+#define ROOT_LEVEL_32   PAGING_L2
 
 #define SHADOW_ENTRY    (2UL << 16)
 #define GUEST_ENTRY     (1UL << 16)
@@ -58,6 +59,10 @@
 #define SET_ENTRY   (1UL << 8)
 
 #define PAGETABLE_ENTRIES    (1<<PAGETABLE_ORDER)
+
+/* For 32-bit VMX guest to allocate shadow L1 & L2*/
+#define SL1_ORDER   1
+#define SL2_ORDER   2
 
 typedef struct { intpte_t lo; } pgentry_64_t;
 #define shadow_level_to_type(l)    (l << 29)
@@ -76,6 +81,10 @@
 #define entry_remove_flags(x, flags) ((x).lo &= ~put_pte_flags(flags))
 #define entry_has_changed(x,y,flags) \
         ( !!(((x).lo ^ (y).lo) & 
((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
+
+#define PAE_SHADOW_SELF_ENTRY   259
+#define PDP_ENTRIES   4
+
 static inline int  table_offset_64(unsigned long va, int level)
 {
     switch(level) {
@@ -86,8 +95,13 @@
         case 3:
             return  (((va) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 
1));
 #if CONFIG_PAGING_LEVELS >= 4
+#ifndef GUEST_PGENTRY_32
         case 4:
             return  (((va) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 
1));
+#else
+        case 4:
+            return PAE_SHADOW_SELF_ENTRY; 
+#endif
 #endif
         default:
             //printk("<table_offset_64> level %d is too big\n", level);
@@ -138,7 +152,7 @@
             return NULL;
         mfn = entry_get_value(*le_e) >> PAGE_SHIFT;
         if ((flag & GUEST_ENTRY) && shadow_mode_translate(d))
-            mfn = phys_to_machine_mapping(mfn);
+            mfn = get_mfn_from_pfn(mfn);
         le_p = (pgentry_64_t *)phys_to_virt(mfn << PAGE_SHIFT);
         index = table_offset_64(va, (level + i - 1));
         le_e = &le_p[index];
@@ -165,30 +179,30 @@
     return le_e;
 }
 #define __shadow_set_l4e(v, va, value) \
-  __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | L4)
+  __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L4)
 #define __shadow_get_l4e(v, va, sl4e) \
-  __rw_entry(v, va, sl4e, SHADOW_ENTRY | GET_ENTRY | L4)
+  __rw_entry(v, va, sl4e, SHADOW_ENTRY | GET_ENTRY | PAGING_L4)
 #define __shadow_set_l3e(v, va, value) \
-  __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | L3)
+  __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L3)
 #define __shadow_get_l3e(v, va, sl3e) \
-  __rw_entry(v, va, sl3e, SHADOW_ENTRY | GET_ENTRY | L3)
+  __rw_entry(v, va, sl3e, SHADOW_ENTRY | GET_ENTRY | PAGING_L3)
 #define __shadow_set_l2e(v, va, value) \
-  __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | L2)
+  __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L2)
 #define __shadow_get_l2e(v, va, sl2e) \
-  __rw_entry(v, va, sl2e, SHADOW_ENTRY | GET_ENTRY | L2)
+  __rw_entry(v, va, sl2e, SHADOW_ENTRY | GET_ENTRY | PAGING_L2)
 #define __shadow_set_l1e(v, va, value) \
-  __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | L1)
+  __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L1)
 #define __shadow_get_l1e(v, va, sl1e) \
-  __rw_entry(v, va, sl1e, SHADOW_ENTRY | GET_ENTRY | L1)
+  __rw_entry(v, va, sl1e, SHADOW_ENTRY | GET_ENTRY | PAGING_L1)
 
 #define __guest_set_l4e(v, va, value) \
-  __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | L4)
+  __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L4)
 #define __guest_get_l4e(v, va, gl4e) \
-  __rw_entry(v, va, gl4e, GUEST_ENTRY | GET_ENTRY | L4)
+  __rw_entry(v, va, gl4e, GUEST_ENTRY | GET_ENTRY | PAGING_L4)
 #define __guest_set_l3e(v, va, value) \
-  __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | L3)
+  __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L3)
 #define __guest_get_l3e(v, va, sl3e) \
-  __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | L3)
+  __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | PAGING_L3)
 
 static inline void *  __guest_set_l2e(
     struct vcpu *v, u64 va, void *value, int size)
@@ -205,7 +219,7 @@
                 return &l2va[l2_table_offset_32(va)];
             }
         case 8:
-            return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | L2);
+            return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | 
PAGING_L2);
         default:
             BUG();
             return NULL;
@@ -230,7 +244,7 @@
                 return &l2va[l2_table_offset_32(va)];
             }
         case 8:
-            return __rw_entry(v, va, gl2e, GUEST_ENTRY | GET_ENTRY | L2);
+            return __rw_entry(v, va, gl2e, GUEST_ENTRY | GET_ENTRY | 
PAGING_L2);
         default:
             BUG();
             return NULL;
@@ -257,7 +271,7 @@
                 if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT)))
                     return NULL;
 
-                l1mfn = phys_to_machine_mapping(
+                l1mfn = get_mfn_from_pfn(
                   l2e_get_pfn(gl2e));
 
                 l1va = (l1_pgentry_32_t *)
@@ -269,7 +283,7 @@
             }
 
         case 8:
-            return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | L1);
+            return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | 
PAGING_L1);
         default:
             BUG();
             return NULL;
@@ -299,7 +313,7 @@
                     return NULL;
 
 
-                l1mfn = phys_to_machine_mapping(
+                l1mfn = get_mfn_from_pfn(
                   l2e_get_pfn(gl2e));
                 l1va = (l1_pgentry_32_t *) phys_to_virt(
                   l1mfn << L1_PAGETABLE_SHIFT);
@@ -310,7 +324,7 @@
             }
         case 8:
             // 64-bit guest
-            return __rw_entry(v, va, gl1e, GUEST_ENTRY | GET_ENTRY | L1);
+            return __rw_entry(v, va, gl1e, GUEST_ENTRY | GET_ENTRY | 
PAGING_L1);
         default:
             BUG();
             return NULL;
@@ -334,7 +348,7 @@
     sle = entry_empty();
     if ( (entry_get_flags(gle) & _PAGE_PRESENT) && (smfn != 0) )
     {
-        if ((entry_get_flags(gle) & _PAGE_PSE) && level == L2) {
+        if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) {
             sle = entry_from_pfn(smfn, entry_get_flags(gle));
             entry_remove_flags(sle, _PAGE_PSE);
 
@@ -376,7 +390,7 @@
     unsigned long smfn = 0;
 
     if ( entry_get_flags(gle) & _PAGE_PRESENT ) {
-        if ((entry_get_flags(gle) & _PAGE_PSE) && level == L2) {
+        if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) {
             smfn =  __shadow_status(d, entry_get_value(gle) >> PAGE_SHIFT, 
PGT_fl1_shadow);
         } else {
             smfn =  __shadow_status(d, entry_get_pfn(gle), 
@@ -421,86 +435,6 @@
     return 1;
 }
 
-/*
- * Check P, R/W, U/S bits in the guest page table.
- * If the fault belongs to guest return 1,
- * else return 0.
- */
-static inline int guest_page_fault(struct vcpu *v,
-  unsigned long va, unsigned int error_code, pgentry_64_t *gpl2e, pgentry_64_t 
*gpl1e)
-{
-    struct domain *d = v->domain;
-    pgentry_64_t gle, *lva;
-    unsigned long mfn;
-    int i;
-
-    __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | L4);
-    if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
-        return 1;
-
-    if (error_code & ERROR_W) {
-        if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
-            return 1;
-    }
-    if (error_code & ERROR_U) {
-        if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
-            return 1;
-    }
-    for (i = L3; i >= L1; i--) {
-       /*
-        * If it's not external mode, then mfn should be machine physical.
-        */
-       mfn = __gpfn_to_mfn(d, (entry_get_value(gle) >> PAGE_SHIFT));
-
-        lva = (pgentry_64_t *) phys_to_virt(
-           mfn << PAGE_SHIFT);
-        gle = lva[table_offset_64(va, i)];
-
-        if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
-            return 1;
-
-        if (error_code & ERROR_W) {
-            if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
-                return 1;
-        }
-        if (error_code & ERROR_U) {
-            if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
-                return 1;
-        }
-
-        if (i == L2) {
-            if (gpl2e)
-                *gpl2e = gle;
-
-            if (likely(entry_get_flags(gle) & _PAGE_PSE))
-                return 0;
-
-        }
-
-        if (i == L1)
-            if (gpl1e)
-                *gpl1e = gle;
-    }
-    return 0;
-}
-
-static inline unsigned long gva_to_gpa(unsigned long gva)
-{
-    struct vcpu *v = current;
-    pgentry_64_t gl1e = {0};
-    pgentry_64_t gl2e = {0};
-    unsigned long gpa;
-
-    if (guest_page_fault(v, gva, 0, &gl2e, &gl1e))
-        return 0;
-    if (entry_get_flags(gl2e) & _PAGE_PSE)
-        gpa = entry_get_paddr(gl2e) + (gva & ((1 << L2_PAGETABLE_SHIFT) - 1));
-    else
-        gpa = entry_get_paddr(gl1e) + (gva & ~PAGE_MASK);
-
-    return gpa;
-
-}
 #endif
 
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/shadow_public.h
--- a/xen/include/asm-x86/shadow_public.h       Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/shadow_public.h       Fri Sep  9 16:30:54 2005
@@ -49,6 +49,7 @@
          (*mark_mfn_out_of_sync)(struct vcpu *v, unsigned long gpfn,
                               unsigned long mfn);
     int  (*is_out_of_sync)(struct vcpu *v, unsigned long va);
+    unsigned long (*gva_to_gpa)(unsigned long gva);
 };
 #endif
 
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/vmx.h Fri Sep  9 16:30:54 2005
@@ -275,7 +275,9 @@
     return 0;
 }
 
-static inline int __vmread (unsigned long field, void *value)
+#define __vmread(x, ptr) ___vmread((x), (ptr), sizeof(*(ptr)))
+
+static always_inline int ___vmread (const unsigned long field,  void *ptr, 
const int size)
 {
     unsigned long eflags;
     unsigned long ecx = 0;
@@ -286,7 +288,23 @@
                            : "a" (field)
                            : "memory");
 
-    *((long *) value) = ecx;
+    switch (size) {
+    case 1:
+        *((u8 *) (ptr)) = ecx;
+        break;
+    case 2:
+        *((u16 *) (ptr)) = ecx;
+        break;
+    case 4:
+        *((u32 *) (ptr)) = ecx;
+        break;
+    case 8:
+        *((u64 *) (ptr)) = ecx;
+        break;
+    default:
+        domain_crash_synchronous();
+        break;
+    }
 
     __save_flags(eflags);
     if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
@@ -453,4 +471,7 @@
 void load_cpu_user_regs(struct cpu_user_regs *regs);
 void store_cpu_user_regs(struct cpu_user_regs *regs);
 
+enum { VMX_COPY_IN = 0, VMX_COPY_OUT };
+int vmx_copy(void *buf, unsigned long laddr, int size, int dir);
+
 #endif /* __ASM_X86_VMX_H__ */
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/vmx_platform.h
--- a/xen/include/asm-x86/vmx_platform.h        Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/vmx_platform.h        Fri Sep  9 16:30:54 2005
@@ -24,8 +24,7 @@
 #include <asm/vmx_virpit.h>
 #include <asm/vmx_intercept.h>
 
-#define MAX_OPERAND_NUM 3
-#define I_NAME_LEN  16
+#define MAX_OPERAND_NUM 2
 
 #define mk_operand(size, index, seg, flag) \
     (((size) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
@@ -35,54 +34,60 @@
 
 #define operand_index(operand)  \
       ((operand >> 16) & 0xFF)
-      //For instruction.operand[].size
+
+/* for instruction.operand[].size */
 #define BYTE    1
 #define WORD    2
 #define LONG    4
 #define QUAD    8
 #define BYTE_64 16
 
-      //For instruction.operand[].flag
+/* for instruction.operand[].flag */
 #define REGISTER    0x1
 #define MEMORY      0x2
 #define IMMEDIATE   0x4
-#define WZEROEXTEND 0x8
 
-      //For instruction.flags
+/* for instruction.flags */
 #define REPZ    0x1
 #define REPNZ   0x2
+#define OVERLAP 0x4
+
+#define        INSTR_PIO       1
+#define INSTR_OR       2
+#define INSTR_AND      3
+#define INSTR_XOR      4
+#define INSTR_CMP      5
+#define INSTR_MOV      6
+#define INSTR_MOVS     7
+#define INSTR_MOVZ     8
+#define INSTR_STOS     9
+#define INSTR_TEST     10
 
 struct instruction {
-    __s8    i_name[I_NAME_LEN];  //Instruction's name
-    __s16   op_size;    //The operand's bit size, e.g. 16-bit or 32-bit.
-
-    __u64   offset;     //The effective address
-          //offset = Base + (Index * Scale) + Displacement
-
+    __s8    instr;     /* instruction type */
+    __s16   op_size;    /* the operand's bit size, e.g. 16-bit or 32-bit */
     __u64   immediate;
-
-    __u16   seg_sel;    //Segmentation selector
-
-    __u32   operand[MAX_OPERAND_NUM];   //The order of operand is from AT&T 
Assembly
-    __s16   op_num; //The operand numbers
-
-    __u32   flags; //
+    __u16   seg_sel;    /* segmentation selector */
+    __u32   operand[MAX_OPERAND_NUM];   /* order is AT&T assembly */
+    __u32   flags;
 };
 
 #define MAX_INST_LEN      32
 
-struct mi_per_cpu_info
-{
-    unsigned long          mmio_target;
-    struct cpu_user_regs        *inst_decoder_regs;
+struct mi_per_cpu_info {
+    int                    flags;
+    int                           instr;               /* instruction */
+    unsigned long          operand[2];         /* operands */
+    unsigned long          immediate;          /* immediate portion */
+    struct cpu_user_regs   *inst_decoder_regs; /* current context */
 };
 
 struct virtual_platform_def {
-    unsigned long          *real_mode_data; /* E820, etc. */
+    unsigned long          *real_mode_data;    /* E820, etc. */
     unsigned long          shared_page_va;
     struct vmx_virpit_t    vmx_pit;
     struct vmx_handler_t   vmx_handler;
-    struct mi_per_cpu_info mpci;            /* MMIO */
+    struct mi_per_cpu_info mpci;               /* MMIO */
 };
 
 extern void handle_mmio(unsigned long, unsigned long);
@@ -91,6 +96,6 @@
 extern void vmx_io_assist(struct vcpu *v);
 
 // XXX - think about this -- maybe use bit 30 of the mfn to signify an MMIO 
frame.
-#define mmio_space(gpa) (!VALID_MFN(phys_to_machine_mapping((gpa) >> 
PAGE_SHIFT)))
+#define mmio_space(gpa) (!VALID_MFN(get_mfn_from_pfn((gpa) >> PAGE_SHIFT)))
 
 #endif
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h    Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/vmx_vmcs.h    Fri Sep  9 16:30:54 2005
@@ -183,7 +183,7 @@
     VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
     VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
     VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
-    VM_ENTRY_INSTRUCTION_LENGTH     = 0x0000401a,
+    VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
     TPR_THRESHOLD                   = 0x0000401c,
     SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
     VM_INSTRUCTION_ERROR            = 0x00004400,
@@ -192,7 +192,7 @@
     VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
     IDT_VECTORING_INFO_FIELD        = 0x00004408,
     IDT_VECTORING_ERROR_CODE        = 0x0000440a,
-    INSTRUCTION_LEN                 = 0x0000440c,
+    VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
     VMX_INSTRUCTION_INFO            = 0x0000440e,
     GUEST_ES_LIMIT                  = 0x00004800,
     GUEST_CS_LIMIT                  = 0x00004802,
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/x86_32/asm_defns.h
--- a/xen/include/asm-x86/x86_32/asm_defns.h    Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/x86_32/asm_defns.h    Fri Sep  9 16:30:54 2005
@@ -1,56 +1,26 @@
 #ifndef __X86_32_ASM_DEFNS_H__
 #define __X86_32_ASM_DEFNS_H__
 
-/* Maybe auto-generate the following two cases (quoted vs. unquoted). */
-#ifndef __ASSEMBLY__
-
-#define __SAVE_ALL_PRE                                                  \
-        "cld;"                                                          \
-        "pushl %eax;"                                                   \
-        "pushl %ebp;"                                                   \
-        "pushl %edi;"                                                   \
-        "pushl %esi;"                                                   \
-        "pushl %edx;"                                                   \
-        "pushl %ecx;"                                                   \
-        "pushl %ebx;"                                                   \
-        "testl $"STR(X86_EFLAGS_VM)","STR(UREGS_eflags)"(%esp);"        \
-        "jz 2f;"                                                        \
-        "call setup_vm86_frame;"                                        \
-        "jmp 3f;"                                                       \
-        "2:testb $3,"STR(UREGS_cs)"(%esp);"                             \
-        "jz 1f;"                                                        \
-        "mov %ds,"STR(UREGS_ds)"(%esp);"                                \
-        "mov %es,"STR(UREGS_es)"(%esp);"                                \
-        "mov %fs,"STR(UREGS_fs)"(%esp);"                                \
-        "mov %gs,"STR(UREGS_gs)"(%esp);"                                \
-        "3:"
-
-#define SAVE_ALL_NOSEGREGS(_reg)                \
-        __SAVE_ALL_PRE                          \
-        "1:"
-
-#define SET_XEN_SEGMENTS(_reg)                                  \
-        "movl $("STR(__HYPERVISOR_DS)"),%e"STR(_reg)"x;"        \
-        "mov %e"STR(_reg)"x,%ds;"                              \
-        "mov %e"STR(_reg)"x,%es;"
-
-#define SAVE_ALL(_reg)                          \
-        __SAVE_ALL_PRE                          \
-        SET_XEN_SEGMENTS(_reg)                  \
-        "1:"
-
+#ifndef NDEBUG
+/* Indicate special exception stack frame by inverting the frame pointer. */
+#define SETUP_EXCEPTION_FRAME_POINTER           \
+        movl  %esp,%ebp;                        \
+        notl  %ebp
 #else
+#define SETUP_EXCEPTION_FRAME_POINTER
+#endif
 
 #define __SAVE_ALL_PRE                                  \
         cld;                                            \
         pushl %eax;                                     \
         pushl %ebp;                                     \
+        SETUP_EXCEPTION_FRAME_POINTER;                  \
         pushl %edi;                                     \
         pushl %esi;                                     \
         pushl %edx;                                     \
         pushl %ecx;                                     \
         pushl %ebx;                                     \
-        testl $X86_EFLAGS_VM,UREGS_eflags(%esp);        \
+        testl $(X86_EFLAGS_VM),UREGS_eflags(%esp);      \
         jz 2f;                                          \
         call setup_vm86_frame;                          \
         jmp 3f;                                         \
@@ -83,8 +53,6 @@
 #define PERFC_INCR(_name,_idx)
 #endif
 
-#endif
-
 #define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
 #define XBUILD_SMP_INTERRUPT(x,v)               \
 asmlinkage void x(void);                        \
@@ -92,7 +60,7 @@
     "\n"__ALIGN_STR"\n"                         \
     STR(x) ":\n\t"                              \
     "pushl $"#v"<<16\n\t"                       \
-    SAVE_ALL(a)                                 \
+    STR(SAVE_ALL(a))                            \
     "call "STR(smp_##x)"\n\t"                   \
     "jmp ret_from_intr\n");
 
@@ -103,7 +71,7 @@
 "\n"__ALIGN_STR"\n"                             \
 STR(x) ":\n\t"                                  \
     "pushl $"#v"<<16\n\t"                       \
-    SAVE_ALL(a)                                 \
+    STR(SAVE_ALL(a))                            \
     "movl %esp,%eax\n\t"                        \
     "pushl %eax\n\t"                            \
     "call "STR(smp_##x)"\n\t"                   \
@@ -114,7 +82,7 @@
 __asm__(                                        \
     "\n" __ALIGN_STR"\n"                        \
     "common_interrupt:\n\t"                     \
-    SAVE_ALL(a)                                 \
+    STR(SAVE_ALL(a))                            \
     "movl %esp,%eax\n\t"                        \
     "pushl %eax\n\t"                            \
     "call " STR(do_IRQ) "\n\t"                  \
diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/x86_64/asm_defns.h
--- a/xen/include/asm-x86/x86_64/asm_defns.h    Thu Sep  8 15:18:40 2005
+++ b/xen/include/asm-x86/x86_64/asm_defns.h    Fri Sep  9 16:30:54 2005
@@ -1,49 +1,14 @@
 #ifndef __X86_64_ASM_DEFNS_H__
 #define __X86_64_ASM_DEFNS_H__
 
-/* Maybe auto-generate the following two cases (quoted vs. unquoted). */
-#ifndef __ASSEMBLY__
-
-#define SAVE_ALL                                \
-        "cld;"                                  \
-        "pushq %rdi;"                           \
-        "pushq %rsi;"                           \
-        "pushq %rdx;"                           \
-        "pushq %rcx;"                           \
-        "pushq %rax;"                           \
-        "pushq %r8;"                            \
-        "pushq %r9;"                            \
-        "pushq %r10;"                           \
-        "pushq %r11;"                           \
-        "pushq %rbx;"                           \
-        "pushq %rbp;"                           \
-        "pushq %r12;"                           \
-        "pushq %r13;"                           \
-        "pushq %r14;"                           \
-        "pushq %r15;"
-
-#define RESTORE_ALL                             \
-        "popq  %r15;"                           \
-        "popq  %r14;"                           \
-        "popq  %r13;"                           \
-        "popq  %r12;"                           \
-        "popq  %rbp;"                           \
-        "popq  %rbx;"                           \
-        "popq  %r11;"                           \
-        "popq  %r10;"                           \
-        "popq  %r9;"                            \
-        "popq  %r8;"                            \
-        "popq  %rax;"                           \
-        "popq  %rcx;"                           \
-        "popq  %rdx;"                           \
-        "popq  %rsi;"                           \
-        "popq  %rdi;"
-
-/* Work around AMD erratum #88 */
-#define safe_swapgs                             \
-        "mfence; swapgs;"
-
+#ifndef NDEBUG
+/* Indicate special exception stack frame by inverting the frame pointer. */
+#define SETUP_EXCEPTION_FRAME_POINTER           \
+        movq  %rsp,%rbp;                        \
+        notq  %rbp
 #else
+#define SETUP_EXCEPTION_FRAME_POINTER
+#endif
 
 #define SAVE_ALL                                \
         cld;                                    \
@@ -58,6 +23,7 @@
         pushq %r11;                             \
         pushq %rbx;                             \
         pushq %rbp;                             \
+        SETUP_EXCEPTION_FRAME_POINTER;          \
         pushq %r12;                             \
         pushq %r13;                             \
         pushq %r14;                             \
@@ -90,7 +56,9 @@
 #define PERFC_INCR(_name,_idx)
 #endif
 
-#endif
+/* Work around AMD erratum #88 */
+#define safe_swapgs                             \
+        "mfence; swapgs;"
 
 #define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
 #define XBUILD