# HG changeset patch
# User djm@xxxxxxxxxxxxxxx
# Node ID 8799d14bef771ae236166e3c5c00a65dd6f2d44c
# Parent  5f1ed597f1072b86d5c59a588c3ac2aefd0b7450
# Parent  9fb0bad776dd3d1c1cd5eec4990a296fbe0e38dc
Merge with -unstable
diff -r 5f1ed597f107 -r 8799d14bef77 .hgignore
--- a/.hgignore Wed Aug 24 02:43:18 2005
+++ b/.hgignore Thu Aug 25 22:53:20 2005
@@ -69,25 +69,25 @@
 ^tools/blktap/blkdump$
 ^tools/blktap/blkgnbd$
 ^tools/blktap/blkimg$
-^tools/blktap/blockstore\.dat$
-^tools/blktap/blockstored$
 ^tools/blktap/bstest$
-^tools/blktap/parallax$
 ^tools/blktap/vdi\.dot$
 ^tools/blktap/vdi\.ps$
-^tools/blktap/vdi_create$
-^tools/blktap/vdi_fill$
-^tools/blktap/vdi_list$
-^tools/blktap/vdi_snap$
-^tools/blktap/vdi_snap_list$
-^tools/blktap/vdi_tree$
-^tools/blktap/vdi_validate$
+^tools/blktap/parallax/vdi_create$
+^tools/blktap/parallax/vdi_fill$
+^tools/blktap/parallax/vdi_list$
+^tools/blktap/parallax/vdi_snap$
+^tools/blktap/parallax/vdi_snap_list$
+^tools/blktap/parallax/vdi_snap_delete$
+^tools/blktap/parallax/vdi_tree$
+^tools/blktap/parallax/vdi_validate$
+^tools/blktap/parallax/parallax$
+^tools/blktap/parallax/blockstored$
 ^tools/blktap/xen/.*$
 ^tools/check/\..*$
 ^tools/cmdline/.*$
 ^tools/cmdline/xen/.*$
-^tools/consoled/consoled$
-^tools/consoled/xc_console$
+^tools/console/xenconsoled$
+^tools/console/xenconsole$
 ^tools/debugger/pdb/pdb$
 ^tools/debugger/pdb/linux-[0-9.]*-module/.*\.ko$
 ^tools/debugger/pdb/linux-[0-9.]*-module/.*\.mod.c$
@@ -116,7 +116,6 @@
 ^tools/ioemu/target-.*/Makefile$
 ^tools/ioemu/target-.*/config\..*$
 ^tools/ioemu/target-.*/qemu-dm$
-^tools/ioemu/target-.*/qemu-vgaram-bin$
 ^tools/libxc/xen/.*$
 ^tools/misc/cpuperf/cpuperf-perfcntr$
 ^tools/misc/cpuperf/cpuperf-xen$
@@ -148,6 +147,7 @@
 ^tools/xcs/xcsdump$
 ^tools/xcutils/xc_restore$
 ^tools/xcutils/xc_save$
+^tools/xenstat/xentop/xentop$
 ^tools/xenstore/testsuite/tmp/.*$
 ^tools/xenstore/xen$
 ^tools/xenstore/xenstored$
diff -r 5f1ed597f107 -r 8799d14bef77 Config.mk
--- a/Config.mk Wed Aug 24 02:43:18 2005
+++ b/Config.mk Thu Aug 25 22:53:20 2005
@@ -7,13 +7,14 @@
 
 # Tools to run on system hosting the build
 HOSTCC     = gcc
-HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer 
+HOSTCFLAGS = -Wall -Werror -Wstrict-prototypes -O2 -fomit-frame-pointer
 
 AS         = $(CROSS_COMPILE)as
 LD         = $(CROSS_COMPILE)ld
 CC         = $(CROSS_COMPILE)gcc
 CPP        = $(CROSS_COMPILE)gcc -E
 AR         = $(CROSS_COMPILE)ar
+RANLIB     = $(CROSS_COMPILE)ranlib
 NM         = $(CROSS_COMPILE)nm
 STRIP      = $(CROSS_COMPILE)strip
 OBJCOPY    = $(CROSS_COMPILE)objcopy
@@ -35,3 +36,15 @@
 
 # Choose the best mirror to download linux kernel
 KERNEL_REPO = http://www.kernel.org
+
+# ACM_USE_SECURITY_POLICY is set to security policy of Xen
+# Supported models are:
+#      ACM_NULL_POLICY (ACM will not be built with this policy)
+#      ACM_CHINESE_WALL_POLICY
+#      ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY
+#      ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY
+ACM_USE_SECURITY_POLICY ?= ACM_NULL_POLICY
+
+# Optional components
+XENSTAT_XENTOP ?= y
+
diff -r 5f1ed597f107 -r 8799d14bef77 Makefile
--- a/Makefile  Wed Aug 24 02:43:18 2005
+++ b/Makefile  Thu Aug 25 22:53:20 2005
@@ -101,11 +101,6 @@
        for i in $(ALLKERNELS) ; do $(MAKE) $$i-delete ; done
        for i in $(ALLSPARSETREES) ; do $(MAKE) $$i-mrproper ; done
 
-install-twisted:
-       wget http://www.twistedmatrix.com/products/get-current.epy
-       tar -zxf Twisted-*.tar.gz
-       cd Twisted-* && python setup.py install
-
 install-logging: LOGGING=logging-0.4.9.2
 install-logging:
        [ -f $(LOGGING).tar.gz ] || wget 
http://www.red-dove.com/$(LOGGING).tar.gz
@@ -149,7 +144,6 @@
        @echo '  kclean           - clean guest kernel build trees'
        @echo ''
        @echo 'Dependency installation targets:'
-       @echo '  install-twisted  - install the Twisted Matrix Framework'
        @echo '  install-logging  - install the Python Logging package'
        @echo '  install-iptables - install iptables tools'
        @echo ''
@@ -178,6 +172,12 @@
        rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/miniterm
        rm -rf $(D)/boot/*xen*
        rm -rf $(D)/lib/modules/*xen*
+       rm -rf $(D)/usr/bin/cpuperf-perfcntr $(D)/usr/bin/cpuperf-xen
+       rm -rf $(D)/usr/bin/xc_shadow
+       rm -rf $(D)/usr/share/xen $(D)/usr/libexec/xen
+       rm -rf $(D)/usr/share/man/man1/xen*
+       rm -rf $(D)/usr/share/man/man8/xen*
+       rm -rf $(D)/usr/lib/xen
 
 # Legacy targets for compatibility
 linux24:
diff -r 5f1ed597f107 -r 8799d14bef77 buildconfigs/Rules.mk
--- a/buildconfigs/Rules.mk     Wed Aug 24 02:43:18 2005
+++ b/buildconfigs/Rules.mk     Thu Aug 25 22:53:20 2005
@@ -66,6 +66,7 @@
 
 PATCHDIRS := $(wildcard patches/*-*)
 
+ifneq ($(PATCHDIRS),)
 -include $(patsubst %,%/.makedep,$(PATCHDIRS))
 
 $(patsubst patches/%,patches/%/.makedep,$(PATCHDIRS)): patches/%/.makedep: 
@@ -80,6 +81,7 @@
        ([ -d patches/$* ] && \
          for i in patches/$*/*.patch ; do ( cd $(@D) ; patch -p1 <../$$i || 
exit 1 ) ; done) || true
        touch $@ # update timestamp to avoid rebuild
+endif
 
 %-build:
        $(MAKE) -f buildconfigs/mk.$* build
@@ -115,7 +117,7 @@
 ifeq ($(XEN_TARGET_X86_PAE),y)
        sed -e 's!^CONFIG_HIGHMEM4G=y$$!\# CONFIG_HIGHMEM4G is not set!;s!^\# 
CONFIG_HIGHMEM64G is not set$$!CONFIG_HIGHMEM64G=y!' $(CONFIG_FILE) > 
$(CONFIG_FILE)- && mv $(CONFIG_FILE)- $(CONFIG_FILE)
 else
-       @: # do nothing yet
+       grep '^CONFIG_HIGHMEM64G=y' $(CONFIG_FILE) >/dev/null && ( sed -e 
's!^CONFIG_HIGHMEM64G=y$$!\# CONFIG_HIGHMEM64G is not set!;s!^\# 
CONFIG_HIGHMEM4G is not set$$!CONFIG_HIGHMEM4G=y!' $(CONFIG_FILE) > 
$(CONFIG_FILE)- && mv $(CONFIG_FILE)- $(CONFIG_FILE) ) || true
 endif
 
 # never delete any intermediate files.
diff -r 5f1ed597f107 -r 8799d14bef77 docs/src/user.tex
--- a/docs/src/user.tex Wed Aug 24 02:43:18 2005
+++ b/docs/src/user.tex Thu Aug 25 22:53:20 2005
@@ -1709,8 +1709,11 @@
  For example: `com1=9600, 8n1, 0x408, 5' maps COM1 to a
  9600-baud port, 8 data bits, no parity, 1 stop bit,
  I/O port base 0x408, IRQ 5.
- If the I/O base and IRQ are standard (com1:0x3f8,4;
- com2:0x2f8,3) then they need not be specified. 
+ If some configuration options are standard (e.g., I/O base and IRQ),
+ then only a prefix of the full configuration string need be
+ specified. If the baud rate is pre-configured (e.g., by the
+ bootloader) then you can specify `auto' in place of a numeric baud
+ rate. 
 
 \item [console=$<$specifier list$>$ ] 
  Specify the destination for Xen console I/O.
@@ -1760,7 +1763,7 @@
  physical address in the memory map will be ignored. This parameter
  may be specified with a B, K, M or G suffix, representing bytes,
  kilobytes, megabytes and gigabytes respectively. The
- default unit, if no suffix is specified, is bytes.
+ default unit, if no suffix is specified, is kilobytes.
 
 \item [dom0\_mem=xxx ] 
  Set the amount of memory to be allocated to domain0. In Xen 3.x the parameter
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/include/hypervisor.h
--- a/extras/mini-os/include/hypervisor.h       Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/include/hypervisor.h       Thu Aug 25 22:53:20 2005
@@ -80,16 +80,42 @@
 
 static __inline__ int HYPERVISOR_mmu_update(mmu_update_t *req, 
                                             int count, 
-                                            int *success_count)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_mmu_update), 
-        _a1 (req), _a2 (count), _a3 (success_count)  : "memory" );
-
-    return ret;
-}
+                                            int *success_count, 
+                                            domid_t domid)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3, ign4;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+        : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
+          "3" (success_count), "4" (domid)
+        : "memory" );
+
+    return ret;
+}
+
+
+static __inline__ int HYPERVISOR_mmuext_op(struct mmuext_op *op, 
+                                           int count, 
+                                           int *success_count, 
+                                           domid_t domid)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3, ign4;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+        : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
+          "3" (success_count), "4" (domid)
+        : "memory" );
+
+    return ret;
+}
+
+
 
 static __inline__ int HYPERVISOR_set_gdt(unsigned long *frame_list, int 
entries)
 {
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h       Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/include/mm.h       Thu Aug 25 22:53:20 2005
@@ -43,13 +43,27 @@
 #define PADDR_MASK              ((1UL << PADDR_BITS)-1)
 #define VADDR_MASK              ((1UL << VADDR_BITS)-1)
 
-#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
+#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> 
L1_PAGETABLE_SHIFT)
+
+#endif
+
+
+
+#ifdef __i386__
+
+#define L1_PAGETABLE_SHIFT      12
+#define L2_PAGETABLE_SHIFT      22
+
+#define L1_PAGETABLE_ENTRIES    1024
+#define L2_PAGETABLE_ENTRIES    1024
+#endif
 
 /* Given a virtual address, get an entry offset into a page table. */
 #define l1_table_offset(_a) \
   (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
 #define l2_table_offset(_a) \
   (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
+#ifdef __x86_64__
 #define l3_table_offset(_a) \
   (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
 #define l4_table_offset(_a) \
@@ -67,13 +81,16 @@
 #define _PAGE_PSE      0x080UL
 #define _PAGE_GLOBAL   0x100UL
 
-#define PAGE_SHIFT      12
-#define PAGE_SIZE       (1UL << PAGE_SHIFT)
+#define L1_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | 
_PAGE_USER)
+
+#define PAGE_SIZE       (1UL << L1_PAGETABLE_SHIFT)
+#define PAGE_SHIFT      L1_PAGETABLE_SHIFT
 #define PAGE_MASK       (~(PAGE_SIZE-1))
 
-#define PFN_UP(x)      (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)    ((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)    ((x) << PAGE_SHIFT)
+#define PFN_UP(x)      (((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT)
+#define PFN_DOWN(x)    ((x) >> L1_PAGETABLE_SHIFT)
+#define PFN_PHYS(x)    ((x) << L1_PAGETABLE_SHIFT)
 
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr)        (((addr)+PAGE_SIZE-1)&PAGE_MASK)
@@ -83,14 +100,14 @@
 #define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)])
 static __inline__ unsigned long phys_to_machine(unsigned long phys)
 {
-    unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
-    machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
+    unsigned long machine = pfn_to_mfn(phys >> L1_PAGETABLE_SHIFT);
+    machine = (machine << L1_PAGETABLE_SHIFT) | (phys & ~PAGE_MASK);
     return machine;
 }
 static __inline__ unsigned long machine_to_phys(unsigned long machine)
 {
-    unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT);
-    phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
+    unsigned long phys = mfn_to_pfn(machine >> L1_PAGETABLE_SHIFT);
+    phys = (phys << L1_PAGETABLE_SHIFT) | (machine & ~PAGE_MASK);
     return phys;
 }
 
@@ -105,7 +122,10 @@
 #define __va to_virt
 #define __pa to_phys
 
+#define virt_to_pfn(_virt)         (PFN_DOWN(to_phys(_virt)))
+
 void init_mm(void);
 unsigned long alloc_pages(int order);
+int is_mfn_mapped(unsigned long mfn);
 
 #endif /* _MM_H_ */
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/include/time.h
--- a/extras/mini-os/include/time.h     Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/include/time.h     Thu Aug 25 22:53:20 2005
@@ -28,7 +28,7 @@
  * of real time into system time 
  */
 typedef s64 s_time_t;
-#define NOW()                   ((s_time_t)get_s_time())
+#define NOW()                   ((s_time_t)monotonic_clock())
 #define SECONDS(_s)             (((s_time_t)(_s))  * 1000000000UL )
 #define TENTHS(_ts)             (((s_time_t)(_ts)) * 100000000UL )
 #define HUNDREDTHS(_hs)         (((s_time_t)(_hs)) * 10000000UL )
@@ -36,7 +36,8 @@
 #define MICROSECS(_us)          (((s_time_t)(_us)) * 1000UL )
 #define Time_Max                ((s_time_t) 0x7fffffffffffffffLL)
 #define FOREVER                 Time_Max
-
+#define NSEC_TO_USEC(_nsec)     (_nsec / 1000UL)
+#define NSEC_TO_SEC(_nsec)      (_nsec / 1000000000ULL)
 
 /* wall clock time  */
 typedef long time_t;
@@ -44,6 +45,11 @@
 struct timeval {
        time_t          tv_sec;         /* seconds */
        suseconds_t     tv_usec;        /* microseconds */
+};
+
+struct timespec {
+    time_t      ts_sec;
+    long        ts_nsec;
 };
 
 
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/kernel.c   Thu Aug 25 22:53:20 2005
@@ -132,22 +132,8 @@
     i = 0;
     for ( ; ; ) 
     {      
-        if(i >= 1000)         
-        {
-            {
-                unsigned long saved;
-                __asm__ ("movl %%esp, %0"
-                         :"=r"(saved)  /* y is output operand */
-                            /* x is input operand */);
-//                        :"a"); /* %eax is clobbered register */
-                printk("ESP=0x%lx\n", saved);
-            }
-            
-            printk("1000 bloks\n");
-            i=0;            
-        }
 //        HYPERVISOR_yield();
-        block(1);
+        block(100);
         i++;
     }
 }
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/mm.c
--- a/extras/mini-os/mm.c       Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/mm.c       Thu Aug 25 22:53:20 2005
@@ -5,9 +5,9 @@
  *
  *        File: mm.c
  *      Author: Rolf Neugebauer (neugebar@xxxxxxxxxxxxx)
- *     Changes: 
+ *     Changes: Grzegorz Milos
  *              
- *        Date: Aug 2003
+ *        Date: Aug 2003, chages Aug 2005
  * 
  * Environment: Xen Minimal OS
  * Description: memory management related functions
@@ -41,86 +41,18 @@
 #include <types.h>
 #include <lib.h>
 
+
+#ifdef MM_DEBUG
+#define DEBUG(_f, _a...) \
+    printk("MINI_OS(file=mm.c, line=%d) " _f "\n", __LINE__, ## _a)
+#else
+#define DEBUG(_f, _a...)    ((void)0)
+#endif
+
 unsigned long *phys_to_machine_mapping;
 extern char *stack;
 extern char _text, _etext, _edata, _end;
 
-static void init_page_allocator(unsigned long min, unsigned long max);
-
-void init_mm(void)
-{
-
-    unsigned long start_pfn, max_pfn, max_free_pfn;
-
-    unsigned long *pgd = (unsigned long *)start_info.pt_base;
-
-    printk("MM: Init\n");
-
-    printk("  _text:        %p\n", &_text);
-    printk("  _etext:       %p\n", &_etext);
-    printk("  _edata:       %p\n", &_edata);
-    printk("  stack start:  %p\n", &stack);
-    printk("  _end:         %p\n", &_end);
-
-    /* set up minimal memory infos */
-    start_pfn = PFN_UP(to_phys(&_end));
-    max_pfn = start_info.nr_pages;
-
-    printk("  start_pfn:    %lx\n", start_pfn);
-    printk("  max_pfn:      %lx\n", max_pfn);
-
-    /*
-     * we know where free tables start (start_pfn) and how many we 
-     * have (max_pfn). 
-     * 
-     * Currently the hypervisor stores page tables it providesin the
-     * high region of the this memory range.
-     * 
-     * next we work out how far down this goes (max_free_pfn)
-     * 
-     * XXX this assumes the hypervisor provided page tables to be in
-     * the upper region of our initial memory. I don't know if this 
-     * is always true.
-     */
-
-    max_free_pfn = PFN_DOWN(to_phys(pgd));
-#ifdef __i386__
-    {
-        unsigned long *pgd = (unsigned long *)start_info.pt_base;
-        unsigned long  pte;
-        int i;
-        printk("  pgd(pa(pgd)): %lx(%lx)", (u_long)pgd, to_phys(pgd));
-
-        for ( i = 0; i < (HYPERVISOR_VIRT_START>>22); i++ )
-        {
-            unsigned long pgde = *pgd++;
-            if ( !(pgde & 1) ) continue;
-            pte = machine_to_phys(pgde & PAGE_MASK);
-            printk("  PT(%x): %lx(%lx)", i, (u_long)to_virt(pte), pte);
-            if (PFN_DOWN(pte) <= max_free_pfn) 
-                max_free_pfn = PFN_DOWN(pte);
-        }
-    }
-    max_free_pfn--;
-    printk("  max_free_pfn: %lx\n", max_free_pfn);
-
-    /*
-     * now we can initialise the page allocator
-     */
-    printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n",
-           (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn), 
-           (u_long)to_virt(PFN_PHYS(max_free_pfn)), PFN_PHYS(max_free_pfn));
-    init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_free_pfn));   
-#endif
-
-
-    /* Now initialise the physical->machine mapping table. */
-
-
-    printk("MM: done\n");
-
-    
-}
 
 /*********************
  * ALLOCATION BITMAP
@@ -213,6 +145,59 @@
 
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+
+#ifdef MM_DEBUG
+/*
+ * Prints allocation[0/1] for @nr_pages, starting at @start
+ * address (virtual).
+ */
+static void print_allocation(void *start, int nr_pages)
+{
+    unsigned long pfn_start = virt_to_pfn(start);
+    int count;
+    for(count = 0; count < nr_pages; count++)
+        if(allocated_in_map(pfn_start + count)) printk("1");
+        else printk("0");
+        
+    printk("\n");        
+}
+
+/*
+ * Prints chunks (making them with letters) for @nr_pages starting
+ * at @start (virtual).
+ */
+static void print_chunks(void *start, int nr_pages)
+{
+    char chunks[1001], current='A';
+    int order, count;
+    chunk_head_t *head;
+    unsigned long pfn_start = virt_to_pfn(start);
+   
+    memset(chunks, (int)'_', 1000);
+    if(nr_pages > 1000) 
+    {
+        DEBUG("Can only pring 1000 pages. Increase buffer size.");
+    }
+    
+    for(order=0; order < FREELIST_SIZE; order++)
+    {
+        head = free_head[order];
+        while(!FREELIST_EMPTY(head))
+        {
+            for(count = 0; count < 1<< head->level; count++)
+            {
+                if(count + virt_to_pfn(head) - pfn_start < 1000)
+                    chunks[count + virt_to_pfn(head) - pfn_start] = current;
+            }
+            head = head->next;
+            current++;
+        }
+    }
+    chunks[nr_pages] = '\0';
+    printk("%s\n", chunks);
+}
+#endif
+
 
 
 /*
@@ -328,3 +313,198 @@
     return 0;
 }
 
+void free_pages(void *pointer, int order)
+{
+    chunk_head_t *freed_ch, *to_merge_ch;
+    chunk_tail_t *freed_ct;
+    unsigned long mask;
+    
+    /* First free the chunk */
+    map_free(virt_to_pfn(pointer), 1 << order);
+    
+    /* Create free chunk */
+    freed_ch = (chunk_head_t *)pointer;
+    freed_ct = (chunk_tail_t *)((char *)pointer + (1<<(order + PAGE_SHIFT)))-1;
+    
+    /* Now, possibly we can conseal chunks together */
+    while(order < FREELIST_SIZE)
+    {
+        mask = 1 << (order + PAGE_SHIFT);
+        if((unsigned long)freed_ch & mask) 
+        {
+            to_merge_ch = (chunk_head_t *)((char *)freed_ch - mask);
+            if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
+                    to_merge_ch->level != order)
+                break;
+            
+            /* Merge with predecessor */
+            freed_ch = to_merge_ch;   
+        }
+        else 
+        {
+            to_merge_ch = (chunk_head_t *)((char *)freed_ch + mask);
+            if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
+                    to_merge_ch->level != order)
+                break;
+            
+            /* Merge with successor */
+            freed_ct = (chunk_tail_t *)((char *)to_merge_ch + mask);
+        }
+        
+        /* We are commited to merging, unlink the chunk */
+        *(to_merge_ch->pprev) = to_merge_ch->next;
+        to_merge_ch->next->pprev = to_merge_ch->pprev;
+        
+        order++;
+    }
+
+    /* Link the new chunk */
+    freed_ch->level = order;
+    freed_ch->next  = free_head[order];
+    freed_ch->pprev = &free_head[order];
+    freed_ct->level = order;
+    
+    freed_ch->next->pprev = &freed_ch->next;
+    free_head[order] = freed_ch;   
+   
+}
+void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
+{
+    unsigned long pfn_to_map, pt_frame;
+    unsigned long mach_ptd, max_mach_ptd;
+    int count;
+    unsigned long mach_pte, virt_pte;
+    unsigned long *ptd = (unsigned long *)start_info.pt_base;
+    mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
+    struct mmuext_op pin_request;
+    
+    /* Firstly work out what is the first pfn that is not yet in page tables
+       NB. Assuming that builder fills whole pt_frames (which it does at the
+       moment)
+     */  
+    pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES;
+    DEBUG("start_pfn=%ld, first pfn_to_map %ld, max_pfn=%ld", 
+            *start_pfn, pfn_to_map, *max_pfn);
+
+    /* Machine address of page table directory */
+    mach_ptd = phys_to_machine(to_phys(start_info.pt_base));
+    mach_ptd += sizeof(void *) * 
+        l2_table_offset((unsigned long)to_virt(PFN_PHYS(pfn_to_map)));
+  
+    max_mach_ptd = sizeof(void *) * 
+        l2_table_offset((unsigned long)to_virt(PFN_PHYS(*max_pfn)));
+    
+    /* Check that we are not trying to access Xen region */
+    if(max_mach_ptd > sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START))
+    {
+        printk("WARNING: mini-os will not use all the memory supplied\n");
+        max_mach_ptd = sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START);
+        *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE);
+    }
+    max_mach_ptd += phys_to_machine(to_phys(start_info.pt_base));
+    DEBUG("Max_mach_ptd 0x%lx", max_mach_ptd); 
+   
+    pt_frame = *start_pfn;
+    /* Should not happen - no empty, mapped pages */
+    if(pt_frame >= pfn_to_map)
+    {
+        printk("ERROR: Not even a single empty, mapped page\n");
+        *(int*)0=0;
+    }
+    
+    while(mach_ptd < max_mach_ptd)
+    {
+        /* Correct protection needs to be set for the new page table frame */
+        virt_pte = (unsigned long)to_virt(PFN_PHYS(pt_frame));
+        mach_pte = ptd[l2_table_offset(virt_pte)] & ~(PAGE_SIZE-1);
+        mach_pte += sizeof(void *) * l1_table_offset(virt_pte);
+        DEBUG("New page table page: pfn=0x%lx, mfn=0x%lx, virt_pte=0x%lx, "
+                "mach_pte=0x%lx", pt_frame, pfn_to_mfn(pt_frame), 
+                virt_pte, mach_pte);
+        
+        /* Update the entry */
+        mmu_updates[0].ptr = mach_pte;
+        mmu_updates[0].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT | 
+                                                    (L1_PROT & ~_PAGE_RW);
+        if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
+        {
+            printk("PTE for new page table page could not be updated\n");
+            *(int*)0=0;
+        }
+        
+        /* Pin the page to provide correct protection */
+        pin_request.cmd = MMUEXT_PIN_L1_TABLE;
+        pin_request.mfn = pfn_to_mfn(pt_frame);
+        if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0)
+        {
+            printk("ERROR: pinning failed\n");
+            *(int*)0=0;
+        }
+        
+        /* Now fill the new page table page with entries.
+           Update the page directory as well. */
+        count = 0;
+        mmu_updates[count].ptr = mach_ptd;
+        mmu_updates[count].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT |
+                                                         L2_PROT;
+        count++;
+        mach_ptd += sizeof(void *);
+        mach_pte = phys_to_machine(PFN_PHYS(pt_frame++));
+        
+        for(;count <= L1_PAGETABLE_ENTRIES && pfn_to_map <= *max_pfn; count++)
+        {
+            mmu_updates[count].ptr = mach_pte;
+            mmu_updates[count].val = 
+                pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
+            if(count == 1) DEBUG("mach_pte 0x%lx", mach_pte);
+            mach_pte += sizeof(void *);
+        }
+        if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0) 
+        {            
+            printk("ERROR: mmu_update failed\n");
+            *(int*)0=0;
+        }
+        (*start_pfn)++;
+    }
+
+    *start_pfn = pt_frame;
+}
+
+void init_mm(void)
+{
+
+    unsigned long start_pfn, max_pfn;
+
+    printk("MM: Init\n");
+
+    printk("  _text:        %p\n", &_text);
+    printk("  _etext:       %p\n", &_etext);
+    printk("  _edata:       %p\n", &_edata);
+    printk("  stack start:  %p\n", &stack);
+    printk("  _end:         %p\n", &_end);
+
+    /* set up minimal memory infos */
+    phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
+   
+    /* First page follows page table pages and 3 more pages (store page etc) */
+    start_pfn = PFN_UP(__pa(start_info.pt_base)) + start_info.nr_pt_frames + 3;
+    max_pfn = start_info.nr_pages;
+
+    printk("  start_pfn:    %lx\n", start_pfn);
+    printk("  max_pfn:      %lx\n", max_pfn);
+
+
+    build_pagetable(&start_pfn, &max_pfn);
+    
+#ifdef __i386__
+    /*
+     * now we can initialise the page allocator
+     */
+    printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n",
+           (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn), 
+           (u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn));
+    init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn));   
+#endif
+
+    printk("MM: done\n");
+}
diff -r 5f1ed597f107 -r 8799d14bef77 extras/mini-os/time.c
--- a/extras/mini-os/time.c     Wed Aug 24 02:43:18 2005
+++ b/extras/mini-os/time.c     Thu Aug 25 22:53:20 2005
@@ -43,19 +43,20 @@
  * Time functions
  *************************************************************************/
 
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-static unsigned long fast_gettimeoffset_quotient;
-
-
 /* These are peridically updated in shared_info, and then copied here. */
-static u32 shadow_tsc_stamp;
-static s64 shadow_system_time;
-static u32 shadow_time_version;
-static struct timeval shadow_tv;
+struct shadow_time_info {
+       u64 tsc_timestamp;     /* TSC at last update of time vals.  */
+       u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
+       u32 tsc_to_nsec_mul;
+       u32 tsc_to_usec_mul;
+       int tsc_shift;
+       u32 version;
+};
+static struct timespec shadow_ts;
+static u32 shadow_ts_version;
+
+static struct shadow_time_info shadow;
+
 
 #ifndef rmb
 #define rmb()  __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
@@ -63,116 +64,150 @@
 
 #define HANDLE_USEC_OVERFLOW(_tv)          \
     do {                                   \
-        while ( (_tv).tv_usec >= 1000000 ) \
+        while ( (_tv)->tv_usec >= 1000000 ) \
         {                                  \
-            (_tv).tv_usec -= 1000000;      \
-            (_tv).tv_sec++;                \
+            (_tv)->tv_usec -= 1000000;      \
+            (_tv)->tv_sec++;                \
         }                                  \
     } while ( 0 )
 
+static inline int time_values_up_to_date(void)
+{
+       struct vcpu_time_info *src = &HYPERVISOR_shared_info->vcpu_time[0]; 
+
+       return (shadow.version == src->version);
+}
+
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+       u64 product;
+#ifdef __i386__
+       u32 tmp1, tmp2;
+#endif
+
+       if ( shift < 0 )
+               delta >>= -shift;
+       else
+               delta <<= shift;
+
+#ifdef __i386__
+       __asm__ (
+               "mul  %5       ; "
+               "mov  %4,%%eax ; "
+               "mov  %%edx,%4 ; "
+               "mul  %5       ; "
+               "add  %4,%%eax ; "
+               "xor  %5,%5    ; "
+               "adc  %5,%%edx ; "
+               : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+               : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#else
+       __asm__ (
+               "mul %%rdx ; shrd $32,%%rdx,%%rax"
+               : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#endif
+
+       return product;
+}
+
+
+static unsigned long get_nsec_offset(void)
+{
+       u64 now, delta;
+       rdtscll(now);
+       delta = now - shadow.tsc_timestamp;
+       return scale_delta(delta, shadow.tsc_to_nsec_mul, shadow.tsc_shift);
+}
+
+
 static void get_time_values_from_xen(void)
 {
-    do {
-        shadow_time_version = HYPERVISOR_shared_info->time_version2;
-        rmb();
-        shadow_tv.tv_sec    = HYPERVISOR_shared_info->wc_sec;
-        shadow_tv.tv_usec   = HYPERVISOR_shared_info->wc_usec;
-        shadow_tsc_stamp    = (u32)HYPERVISOR_shared_info->tsc_timestamp;
-        shadow_system_time  = HYPERVISOR_shared_info->system_time;
-        rmb();
-    }
-    while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 );
-}
-
-
-#define TIME_VALUES_UP_TO_DATE \
-    (shadow_time_version == HYPERVISOR_shared_info->time_version2)
-
-static u32  get_time_delta_usecs(void)
-{
-       register unsigned long eax, edx;
-
-       /* Read the Time Stamp Counter */
-
-       rdtsc(eax,edx);
-
-       /* .. relative to previous jiffy (32 bits is enough) */
-       eax -= shadow_tsc_stamp;
-
-       /*
-        * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
-        *             = (tsc_low delta) * (usecs_per_clock)
-        *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
-        *
-        * Using a mull instead of a divl saves up to 31 clock cycles
-        * in the critical path.
-        */
-
-       __asm__("mull %2"
-               :"=a" (eax), "=d" (edx)
-               :"rm" (fast_gettimeoffset_quotient),
-                "0" (eax));
-
-       /* our adjusted time offset in microseconds */
-       return edx;
-}
-
-s64 get_s_time (void)
-{
-    u64 u_delta;
-    s64 ret;
-
- again:
-
-    u_delta = get_time_delta_usecs();
-    ret = shadow_system_time + (1000 * u_delta);
-
-    if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
-    {
-        /*
-         * We may have blocked for a long time, rendering our calculations
-         * invalid (e.g. the time delta may have overflowed). Detect that
-         * and recalculate with fresh values.
-         */
-        get_time_values_from_xen();
-        goto again;
-    }
-
-    return ret;
-}
+       struct vcpu_time_info    *src = &HYPERVISOR_shared_info->vcpu_time[0];
+
+       do {
+               shadow.version = src->version;
+               rmb();
+               shadow.tsc_timestamp     = src->tsc_timestamp;
+               shadow.system_timestamp  = src->system_time;
+               shadow.tsc_to_nsec_mul   = src->tsc_to_system_mul;
+               shadow.tsc_shift         = src->tsc_shift;
+               rmb();
+       }
+       while ((src->version & 1) | (shadow.version ^ src->version));
+
+       shadow.tsc_to_usec_mul = shadow.tsc_to_nsec_mul / 1000;
+}
+
+
+
+
+/* monotonic_clock(): returns # of nanoseconds passed since time_init()
+ *             Note: This function is required to return accurate
+ *             time even in the absence of multiple timer ticks.
+ */
+u64 monotonic_clock(void)
+{
+       u64 time;
+       u32 local_time_version;
+
+       do {
+               local_time_version = shadow.version;
+               rmb();
+               time = shadow.system_timestamp + get_nsec_offset();
+        if (!time_values_up_to_date())
+                       get_time_values_from_xen();
+               rmb();
+       } while (local_time_version != shadow.version);
+
+       return time;
+}
+
+static void update_wallclock(void)
+{
+       shared_info_t *s = HYPERVISOR_shared_info;
+
+       do {
+               shadow_ts_version = s->wc_version;
+               rmb();
+               shadow_ts.ts_sec  = s->wc_sec;
+               shadow_ts.ts_nsec = s->wc_nsec;
+               rmb();
+       }
+       while ((s->wc_version & 1) | (shadow_ts_version ^ s->wc_version));
+}
+
 
 void gettimeofday(struct timeval *tv)
 {
-    struct timeval _tv;
-
-    do {
-        get_time_values_from_xen();
-        _tv.tv_usec = get_time_delta_usecs();
-        _tv.tv_sec   = shadow_tv.tv_sec;
-        _tv.tv_usec += shadow_tv.tv_usec;
-    }
-    while ( unlikely(!TIME_VALUES_UP_TO_DATE) );
-
-    HANDLE_USEC_OVERFLOW(_tv);
-    *tv = _tv;
-}
+    u64 nsec = monotonic_clock();
+    nsec += shadow_ts.ts_nsec;
+    
+    
+    tv->tv_sec = shadow_ts.ts_sec;
+    tv->tv_sec += NSEC_TO_SEC(nsec);
+    tv->tv_usec = NSEC_TO_USEC(nsec % 1000000000UL);
+}
+
 
 static void print_current_time(void)
 {
-    struct timeval tv;
-
-    get_time_values_from_xen();
+    struct timeval tv;    
 
     gettimeofday(&tv);
     printk("T(s=%ld us=%ld)\n", tv.tv_sec, tv.tv_usec);
 }
 
+
 void block(u32 millisecs)
 {
     struct timeval tv;
     gettimeofday(&tv);
-    //printk("tv.tv_sec=%ld, tv.tv_usec=%ld, shadow_system_time=%lld\n", 
tv.tv_sec, tv.tv_usec, shadow_system_time );
-    HYPERVISOR_set_timer_op(get_s_time() + 1000000LL * (s64) millisecs);
+    HYPERVISOR_set_timer_op(monotonic_clock() + 1000000LL * (s64) millisecs);
     HYPERVISOR_block();
 }
 
@@ -185,7 +220,7 @@
     static int i;
 
     get_time_values_from_xen();
-
+    update_wallclock();
     i++;
     if (i >= 1000) {
         print_current_time();
@@ -197,24 +232,5 @@
 
 void init_time(void)
 {
-    u64         __cpu_khz;
-    unsigned long cpu_khz;
-
-    __cpu_khz = HYPERVISOR_shared_info->cpu_freq;
-
-    cpu_khz = (u32) (__cpu_khz/1000);
-
-    printk("Xen reported: %lu.%03lu MHz processor.\n", 
-           cpu_khz / 1000, cpu_khz % 1000);
-       /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
-          (2^32 * 1 / (clocks/us)) */
-       {       
-               unsigned long eax=0, edx=1000;
-               __asm__("divl %2"
-                   :"=a" (fast_gettimeoffset_quotient), "=d" (edx)
-                   :"r" (cpu_khz),
-                   "0" (eax), "1" (edx));
-       }
-
     bind_virq(VIRQ_TIMER, &timer_handler);
 }
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.4-xen-sparse/arch/xen/Makefile
--- a/linux-2.4-xen-sparse/arch/xen/Makefile    Wed Aug 24 02:43:18 2005
+++ b/linux-2.4-xen-sparse/arch/xen/Makefile    Thu Aug 25 22:53:20 2005
@@ -61,7 +61,6 @@
 SUBDIRS += arch/xen/drivers/evtchn
 SUBDIRS += arch/xen/drivers/blkif
 SUBDIRS += arch/xen/drivers/netif
-#SUBDIRS += arch/xen/drivers/usbif
 SUBDIRS += arch/xen/drivers/balloon
 ifdef CONFIG_XEN_PRIVILEGED_GUEST
 SUBDIRS += arch/xen/drivers/dom0 
@@ -72,7 +71,6 @@
 CORE_FILES += arch/xen/drivers/console/drv.o
 DRIVERS += arch/xen/drivers/blkif/drv.o
 DRIVERS += arch/xen/drivers/netif/drv.o
-DRIVERS += arch/xen/drivers/usbif/drv.o
 ifdef CONFIG_XEN_PRIVILEGED_GUEST
 CORE_FILES += arch/xen/drivers/dom0/drv.o
 endif
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.4-xen-sparse/arch/xen/config.in
--- a/linux-2.4-xen-sparse/arch/xen/config.in   Wed Aug 24 02:43:18 2005
+++ b/linux-2.4-xen-sparse/arch/xen/config.in   Thu Aug 25 22:53:20 2005
@@ -16,14 +16,10 @@
 comment 'Xen'
 bool 'Support for privileged operations (domain 0)' CONFIG_XEN_PRIVILEGED_GUEST
 bool 'Device-driver domain (physical device access)' CONFIG_XEN_PHYSDEV_ACCESS
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
-    bool 'USB-device backend driver' CONFIG_XEN_USB_BACKEND
-fi
 bool 'Scrub memory before freeing it to Xen' CONFIG_XEN_SCRUB_PAGES
 bool 'Network-device frontend driver' CONFIG_XEN_NETDEV_FRONTEND
 bool 'Block-device frontend driver' CONFIG_XEN_BLKDEV_FRONTEND
 bool 'Block-device uses grant tables' CONFIG_XEN_BLKDEV_GRANT
-bool 'USB-device frontend driver' CONFIG_XEN_USB_FRONTEND
 endmenu
 # The IBM S/390 patch needs this.
 define_bool CONFIG_NO_IDLE_HZ y
@@ -267,7 +263,7 @@
 
 source drivers/char/Config.in
 
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" -o "$CONFIG_XEN_USB_FRONTEND" = "y" ]; 
then
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
    source drivers/media/Config.in
 fi
 
@@ -302,14 +298,8 @@
    endmenu
 fi
 
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" -o "$CONFIG_XEN_USB_FRONTEND" = "y" ]; 
then
-   if [ "$CONFIG_XEN_USB_FRONTEND" = "y" -o "$CONFIG_XEN_USB_BACKEND" = "y" ]; 
then
-       define_bool CONFIG_USB y
-   fi
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
    source drivers/usb/Config.in
-fi
-
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
    source net/bluetooth/Config.in
 fi
 
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.4-xen-sparse/mkbuildtree
--- a/linux-2.4-xen-sparse/mkbuildtree  Wed Aug 24 02:43:18 2005
+++ b/linux-2.4-xen-sparse/mkbuildtree  Thu Aug 25 22:53:20 2005
@@ -103,9 +103,8 @@
 rm -f mkbuildtree
 
 set ${RS}/../linux-2.6-xen-sparse
-[ "$1" == "${RS}/../linux-2.6-xen-parse" ] && { echo "no Linux 2.6 sparse tree 
at ${RS}/../linux-2.6-xen-sparse"; exit 1; }
+[ "$1" == "${RS}/../linux-2.6-xen-sparse" ] && { echo "no Linux 2.6 sparse 
tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; }
 LINUX_26="$1"
-
 
 # Create links to the shared definitions of the Xen interfaces.
 rm -rf ${AD}/include/asm-xen/xen-public
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.6-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig     Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig     Thu Aug 25 22:53:20 2005
@@ -61,15 +61,6 @@
           with the blktap.  This option will be removed as the block drivers 
are
           modified to use grant tables.
 
-config XEN_BLKDEV_GRANT
-        bool "Grant table substrate for block drivers"
-        depends on !XEN_BLKDEV_TAP_BE
-        default y
-        help
-          This introduces the use of grant tables as a data exhange mechanism
-          between the frontend and backend block drivers. This currently
-          conflicts with the block tap.
-
 config XEN_NETDEV_BACKEND
        bool "Network-device backend driver"
        depends on XEN_PHYSDEV_ACCESS
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32       Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32       Thu Aug 
25 22:53:20 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.12-xen0
-# Mon Jul 25 09:48:34 2005
+# Wed Aug  3 09:54:56 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -14,12 +14,11 @@
 CONFIG_XEN_PHYSDEV_ACCESS=y
 CONFIG_XEN_BLKDEV_BACKEND=y
 # CONFIG_XEN_BLKDEV_TAP_BE is not set
-CONFIG_XEN_BLKDEV_GRANT=y
 CONFIG_XEN_NETDEV_BACKEND=y
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-#CONFIG_XEN_NETDEV_GRANT_TX=y
-#CONFIG_XEN_NETDEV_GRANT_RX=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -93,11 +92,11 @@
 # CONFIG_M586 is not set
 # CONFIG_M586TSC is not set
 # CONFIG_M586MMX is not set
-# CONFIG_M686 is not set
+CONFIG_M686=y
 # CONFIG_MPENTIUMII is not set
 # CONFIG_MPENTIUMIII is not set
 # CONFIG_MPENTIUMM is not set
-CONFIG_MPENTIUM4=y
+# CONFIG_MPENTIUM4 is not set
 # CONFIG_MK6 is not set
 # CONFIG_MK7 is not set
 # CONFIG_MK8 is not set
@@ -112,15 +111,15 @@
 # CONFIG_X86_GENERIC is not set
 CONFIG_X86_CMPXCHG=y
 CONFIG_X86_XADD=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=5
 CONFIG_RWSEM_XCHGADD_ALGORITHM=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_X86_PPRO_FENCE=y
 CONFIG_X86_WP_WORKS_OK=y
 CONFIG_X86_INVLPG=y
 CONFIG_X86_BSWAP=y
 CONFIG_X86_POPAD_OK=y
 CONFIG_X86_GOOD_APIC=y
-CONFIG_X86_INTEL_USERCOPY=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
 # CONFIG_HPET_TIMER is not set
 # CONFIG_HPET_EMULATE_RTC is not set
@@ -130,6 +129,7 @@
 # CONFIG_X86_REBOOTFIXUPS is not set
 CONFIG_MICROCODE=y
 CONFIG_X86_CPUID=y
+CONFIG_SWIOTLB=y
 
 #
 # Firmware Drivers
@@ -540,7 +540,7 @@
 # CONFIG_IP_NF_MATCH_STATE is not set
 # CONFIG_IP_NF_MATCH_CONNTRACK is not set
 # CONFIG_IP_NF_MATCH_OWNER is not set
-# CONFIG_IP_NF_MATCH_PHYSDEV is not set
+CONFIG_IP_NF_MATCH_PHYSDEV=y
 # CONFIG_IP_NF_MATCH_ADDRTYPE is not set
 # CONFIG_IP_NF_MATCH_REALM is not set
 # CONFIG_IP_NF_MATCH_SCTP is not set
@@ -688,7 +688,7 @@
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
 # CONFIG_R8169 is not set
-# CONFIG_SK98LIN is not set
+CONFIG_SK98LIN=y
 # CONFIG_VIA_VELOCITY is not set
 CONFIG_TIGON3=y
 # CONFIG_BNX2 is not set
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64       Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64       Thu Aug 
25 22:53:20 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-xen0
-# Wed Jun 29 10:01:20 2005
+# Linux kernel version: 2.6.12.4-xen0
+# Mon Aug 15 18:57:19 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -14,10 +14,11 @@
 CONFIG_XEN_PHYSDEV_ACCESS=y
 CONFIG_XEN_BLKDEV_BACKEND=y
 # CONFIG_XEN_BLKDEV_TAP_BE is not set
-CONFIG_XEN_BLKDEV_GRANT=y
 CONFIG_XEN_NETDEV_BACKEND=y
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -50,6 +51,7 @@
 # CONFIG_IKCONFIG is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -116,9 +118,11 @@
 CONFIG_GENERIC_CPU=y
 CONFIG_X86_L1_CACHE_BYTES=128
 # CONFIG_X86_TSC is not set
+CONFIG_X86_XEN_GENAPIC=y
 # CONFIG_X86_MSR is not set
 # CONFIG_GART_IOMMU is not set
 CONFIG_DUMMY_IOMMU=y
+CONFIG_SWIOTLB=y
 # CONFIG_X86_MCE is not set
 
 #
@@ -160,6 +164,7 @@
 CONFIG_STANDALONE=y
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
 
 #
 # Memory Technology Devices (MTD)
@@ -369,7 +374,23 @@
 #
 # Multi-device support (RAID and LVM)
 #
-# CONFIG_MD is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+# CONFIG_MD_RAID10 is not set
+# CONFIG_MD_RAID5 is not set
+# CONFIG_MD_RAID6 is not set
+CONFIG_MD_MULTIPATH=y
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_SNAPSHOT=y
+CONFIG_DM_MIRROR=y
+# CONFIG_DM_ZERO is not set
+CONFIG_DM_MULTIPATH=y
+CONFIG_DM_MULTIPATH_EMC=y
 
 #
 # Fusion MPT device support
@@ -458,7 +479,7 @@
 # CONFIG_IP_NF_MATCH_STATE is not set
 # CONFIG_IP_NF_MATCH_CONNTRACK is not set
 # CONFIG_IP_NF_MATCH_OWNER is not set
-# CONFIG_IP_NF_MATCH_PHYSDEV is not set
+CONFIG_IP_NF_MATCH_PHYSDEV=y
 # CONFIG_IP_NF_MATCH_ADDRTYPE is not set
 # CONFIG_IP_NF_MATCH_REALM is not set
 # CONFIG_IP_NF_MATCH_SCTP is not set
@@ -589,7 +610,7 @@
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
 # CONFIG_R8169 is not set
-# CONFIG_SK98LIN is not set
+CONFIG_SK98LIN=y
 # CONFIG_VIA_VELOCITY is not set
 CONFIG_TIGON3=y
 # CONFIG_BNX2 is not set
@@ -786,7 +807,107 @@
 #
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
-# CONFIG_USB is not set
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+# CONFIG_USB_DEVICEFS is not set
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_EHCI_HCD is not set
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_UHCI_HCD=y
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; 
see USB_STORAGE Help for more information
+#
+# CONFIG_USB_STORAGE is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+CONFIG_USB_HIDINPUT=y
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+
+#
+# USB ATM/DSL drivers
+#
 
 #
 # USB Gadget Support
@@ -801,7 +922,12 @@
 #
 # InfiniBand support
 #
-# CONFIG_INFINIBAND is not set
+CONFIG_INFINIBAND=y
+CONFIG_INFINIBAND_MTHCA=y
+CONFIG_INFINIBAND_MTHCA_DEBUG=y
+CONFIG_INFINIBAND_IPOIB=y
+CONFIG_INFINIBAND_IPOIB_DEBUG=y
+CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
 
 #
 # Power management options
@@ -1036,7 +1162,22 @@
 # Kernel hacking
 #
 # CONFIG_PRINTK_TIME is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=15
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
 CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_MPPARSE=y
+# CONFIG_CHECKING is not set
+# CONFIG_INIT_DEBUG is not set
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32       Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32       Thu Aug 
25 22:53:20 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.12-xenU
-# Mon Jul 25 10:06:06 2005
+# Wed Aug  3 09:57:44 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -12,11 +12,10 @@
 #
 # CONFIG_XEN_PRIVILEGED_GUEST is not set
 # CONFIG_XEN_PHYSDEV_ACCESS is not set
-CONFIG_XEN_BLKDEV_GRANT=y
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-#CONFIG_XEN_NETDEV_GRANT_TX=y
-#CONFIG_XEN_NETDEV_GRANT_RX=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -90,11 +89,11 @@
 # CONFIG_M586 is not set
 # CONFIG_M586TSC is not set
 # CONFIG_M586MMX is not set
-# CONFIG_M686 is not set
+CONFIG_M686=y
 # CONFIG_MPENTIUMII is not set
 # CONFIG_MPENTIUMIII is not set
 # CONFIG_MPENTIUMM is not set
-CONFIG_MPENTIUM4=y
+# CONFIG_MPENTIUM4 is not set
 # CONFIG_MK6 is not set
 # CONFIG_MK7 is not set
 # CONFIG_MK8 is not set
@@ -109,15 +108,15 @@
 # CONFIG_X86_GENERIC is not set
 CONFIG_X86_CMPXCHG=y
 CONFIG_X86_XADD=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=5
 CONFIG_RWSEM_XCHGADD_ALGORITHM=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_X86_PPRO_FENCE=y
 CONFIG_X86_WP_WORKS_OK=y
 CONFIG_X86_INVLPG=y
 CONFIG_X86_BSWAP=y
 CONFIG_X86_POPAD_OK=y
 CONFIG_X86_GOOD_APIC=y
-CONFIG_X86_INTEL_USERCOPY=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
 # CONFIG_HPET_TIMER is not set
 # CONFIG_HPET_EMULATE_RTC is not set
@@ -415,7 +414,7 @@
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
+CONFIG_CRAMFS=y
 # CONFIG_VXFS_FS is not set
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64       Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64       Thu Aug 
25 22:53:20 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.12-xenU
-# Thu Jul  7 11:43:14 2005
+# Thu Aug 18 11:15:14 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -12,9 +12,10 @@
 #
 # CONFIG_XEN_PRIVILEGED_GUEST is not set
 # CONFIG_XEN_PHYSDEV_ACCESS is not set
-CONFIG_XEN_BLKDEV_GRANT=y
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -28,7 +29,7 @@
 #
 CONFIG_EXPERIMENTAL=y
 CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 
 #
@@ -46,8 +47,10 @@
 CONFIG_HOTPLUG=y
 CONFIG_KOBJECT_UEVENT=y
 # CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
 CONFIG_KALLSYMS_EXTRA_PASS=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -72,6 +75,7 @@
 CONFIG_MODVERSIONS=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
 CONFIG_XENARCH="x86_64"
 CONFIG_X86=y
 CONFIG_MMU=y
@@ -84,12 +88,15 @@
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_X86_GOOD_APIC=y
 # CONFIG_HPET_TIMER is not set
-# CONFIG_SMP is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+# CONFIG_SCHED_SMT is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_MICROCODE is not set
 CONFIG_X86_CPUID=y
 # CONFIG_NUMA is not set
 # CONFIG_MTRR is not set
+CONFIG_HAVE_DEC_LOCK=y
 # CONFIG_X86_LOCAL_APIC is not set
 # CONFIG_X86_IO_APIC is not set
 # CONFIG_PCI is not set
@@ -112,7 +119,11 @@
 # CONFIG_GENERIC_CPU is not set
 CONFIG_X86_L1_CACHE_BYTES=128
 # CONFIG_X86_TSC is not set
+CONFIG_X86_XEN_GENAPIC=y
 # CONFIG_X86_MSR is not set
+CONFIG_X86_HT=y
+# CONFIG_K8_NUMA is not set
+# CONFIG_NUMA_EMU is not set
 CONFIG_DUMMY_IOMMU=y
 # CONFIG_X86_MCE is not set
 
@@ -155,6 +166,7 @@
 CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 CONFIG_FW_LOADER=y
+# CONFIG_DEBUG_DRIVER is not set
 
 #
 # Block devices
@@ -257,7 +269,10 @@
 CONFIG_IP_ROUTE_MULTIPATH=y
 # CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set
 CONFIG_IP_ROUTE_VERBOSE=y
-# CONFIG_IP_PNP is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPGRE_BROADCAST=y
@@ -557,7 +572,6 @@
 #
 # Old SIR device drivers
 #
-# CONFIG_IRPORT_SIR is not set
 
 #
 # Old Serial dongle support
@@ -660,14 +674,14 @@
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_XATTR=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
 CONFIG_JBD=m
 # CONFIG_JBD_DEBUG is not set
 CONFIG_FS_MBCACHE=y
-CONFIG_REISERFS_FS=m
+CONFIG_REISERFS_FS=y
 # CONFIG_REISERFS_CHECK is not set
 CONFIG_REISERFS_PROC_INFO=y
 CONFIG_REISERFS_FS_XATTR=y
@@ -746,7 +760,7 @@
 # CONFIG_BEFS_DEBUG is not set
 CONFIG_BFS_FS=m
 CONFIG_EFS_FS=m
-CONFIG_CRAMFS=m
+CONFIG_CRAMFS=y
 CONFIG_VXFS_FS=m
 # CONFIG_HPFS_FS is not set
 CONFIG_QNX4FS_FS=m
@@ -859,17 +873,7 @@
 # Security options
 #
 # CONFIG_KEYS is not set
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_SECURITY_CAPABILITIES=y
-# CONFIG_SECURITY_SECLVL is not set
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
-CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_SECURITY_SELINUX_DEVELOP=y
-CONFIG_SECURITY_SELINUX_AVC_STATS=y
-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
+# CONFIG_SECURITY is not set
 
 #
 # Cryptographic options
@@ -917,5 +921,19 @@
 # Kernel hacking
 #
 # CONFIG_PRINTK_TIME is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=15
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
+# CONFIG_INIT_DEBUG is not set
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32        Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32        Thu Aug 
25 22:53:20 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.12-xen
-# Thu Jul 14 21:55:53 2005
+# Wed Aug  3 10:04:25 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -14,10 +14,11 @@
 CONFIG_XEN_PHYSDEV_ACCESS=y
 CONFIG_XEN_BLKDEV_BACKEND=y
 # CONFIG_XEN_BLKDEV_TAP_BE is not set
-CONFIG_XEN_BLKDEV_GRANT=y
 CONFIG_XEN_NETDEV_BACKEND=y
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -135,6 +136,7 @@
 # CONFIG_X86_REBOOTFIXUPS is not set
 CONFIG_MICROCODE=m
 CONFIG_X86_CPUID=m
+CONFIG_SWIOTLB=y
 
 #
 # Firmware Drivers
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64        Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64        Thu Aug 
25 22:53:20 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-xen
-# Fri Jul 15 00:34:21 2005
+# Linux kernel version: 2.6.12.4-xen
+# Mon Aug 15 19:54:11 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -14,10 +14,11 @@
 CONFIG_XEN_PHYSDEV_ACCESS=y
 CONFIG_XEN_BLKDEV_BACKEND=y
 # CONFIG_XEN_BLKDEV_TAP_BE is not set
-CONFIG_XEN_BLKDEV_GRANT=y
 CONFIG_XEN_NETDEV_BACKEND=y
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_GRANT_TX=y
+CONFIG_XEN_NETDEV_GRANT_RX=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -33,6 +34,7 @@
 # CONFIG_CLEAN_COMPILE is not set
 CONFIG_BROKEN=y
 CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 
 #
@@ -48,10 +50,11 @@
 CONFIG_HOTPLUG=y
 CONFIG_KOBJECT_UEVENT=y
 # CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_KALLSYMS_EXTRA_PASS=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_BASE_FULL=y
@@ -73,8 +76,9 @@
 # CONFIG_MODULE_FORCE_UNLOAD is not set
 CONFIG_OBSOLETE_MODPARM=y
 # CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
 CONFIG_XENARCH="x86_64"
 CONFIG_X86=y
 CONFIG_MMU=y
@@ -87,12 +91,15 @@
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_X86_GOOD_APIC=y
 # CONFIG_HPET_TIMER is not set
-# CONFIG_SMP is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+# CONFIG_SCHED_SMT is not set
 # CONFIG_PREEMPT is not set
 CONFIG_MICROCODE=y
 # CONFIG_X86_CPUID is not set
 # CONFIG_NUMA is not set
 # CONFIG_MTRR is not set
+CONFIG_HAVE_DEC_LOCK=y
 CONFIG_X86_LOCAL_APIC=y
 CONFIG_X86_IO_APIC=y
 CONFIG_PCI=y
@@ -108,7 +115,7 @@
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
-# CONFIG_EARLY_PRINTK is not set
+CONFIG_EARLY_PRINTK=y
 
 #
 # Processor type and features
@@ -117,9 +124,14 @@
 CONFIG_GENERIC_CPU=y
 CONFIG_X86_L1_CACHE_BYTES=128
 # CONFIG_X86_TSC is not set
+CONFIG_X86_XEN_GENAPIC=y
 # CONFIG_X86_MSR is not set
+CONFIG_X86_HT=y
+# CONFIG_K8_NUMA is not set
+# CONFIG_NUMA_EMU is not set
 # CONFIG_GART_IOMMU is not set
 CONFIG_DUMMY_IOMMU=y
+CONFIG_SWIOTLB=y
 # CONFIG_X86_MCE is not set
 
 #
@@ -149,7 +161,7 @@
 # Executable file formats
 #
 CONFIG_BINFMT_ELF=y
-CONFIG_BINFMT_MISC=m
+CONFIG_BINFMT_MISC=y
 
 #
 # Device Drivers
@@ -160,7 +172,7 @@
 #
 CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
-CONFIG_FW_LOADER=m
+CONFIG_FW_LOADER=y
 # CONFIG_DEBUG_DRIVER is not set
 
 #
@@ -174,7 +186,7 @@
 CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
 # CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
 # CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
-# CONFIG_MTD_CMDLINE_PARTS is not set
+CONFIG_MTD_CMDLINE_PARTS=y
 
 #
 # User Modules And Translation Layers
@@ -206,24 +218,20 @@
 # CONFIG_MTD_CFI_I8 is not set
 CONFIG_MTD_CFI_INTELEXT=m
 CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_AMDSTD_RETRY=0
+CONFIG_MTD_CFI_AMDSTD_RETRY=3
 CONFIG_MTD_CFI_STAA=m
 CONFIG_MTD_CFI_UTIL=m
 CONFIG_MTD_RAM=m
 CONFIG_MTD_ROM=m
 CONFIG_MTD_ABSENT=m
 # CONFIG_MTD_OBSOLETE_CHIPS is not set
-# CONFIG_MTD_XIP is not set
 
 #
 # Mapping drivers for chip access
 #
 CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP=m
-CONFIG_MTD_PHYSMAP_START=0x8000000
-CONFIG_MTD_PHYSMAP_LEN=0x4000000
-CONFIG_MTD_PHYSMAP_BANKWIDTH=2
-CONFIG_MTD_PNC2000=m
+# CONFIG_MTD_PHYSMAP is not set
+# CONFIG_MTD_PNC2000 is not set
 CONFIG_MTD_SC520CDP=m
 CONFIG_MTD_NETSC520=m
 CONFIG_MTD_TS5500=m
@@ -231,10 +239,9 @@
 CONFIG_MTD_ELAN_104NC=m
 # CONFIG_MTD_AMD76XROM is not set
 # CONFIG_MTD_ICHXROM is not set
-# CONFIG_MTD_SCB2_FLASH is not set
-CONFIG_MTD_NETtel=m
-CONFIG_MTD_DILNETPC=m
-CONFIG_MTD_DILNETPC_BOOTSIZE=0x80000
+CONFIG_MTD_SCB2_FLASH=m
+# CONFIG_MTD_NETtel is not set
+# CONFIG_MTD_DILNETPC is not set
 # CONFIG_MTD_L440GX is not set
 CONFIG_MTD_PCI=m
 
@@ -244,19 +251,19 @@
 CONFIG_MTD_PMC551=m
 # CONFIG_MTD_PMC551_BUGFIX is not set
 # CONFIG_MTD_PMC551_DEBUG is not set
-CONFIG_MTD_SLRAM=m
-CONFIG_MTD_PHRAM=m
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
 CONFIG_MTD_MTDRAM=m
 CONFIG_MTDRAM_TOTAL_SIZE=4096
 CONFIG_MTDRAM_ERASE_SIZE=128
-CONFIG_MTD_BLKMTD=m
-# CONFIG_MTD_BLOCK2MTD is not set
+# CONFIG_MTD_BLKMTD is not set
+CONFIG_MTD_BLOCK2MTD=m
 
 #
 # Disk-On-Chip Device Drivers
 #
 CONFIG_MTD_DOC2000=m
-CONFIG_MTD_DOC2001=m
+# CONFIG_MTD_DOC2001 is not set
 CONFIG_MTD_DOC2001PLUS=m
 CONFIG_MTD_DOCPROBE=m
 CONFIG_MTD_DOCECC=m
@@ -269,10 +276,7 @@
 CONFIG_MTD_NAND=m
 # CONFIG_MTD_NAND_VERIFY_WRITE is not set
 CONFIG_MTD_NAND_IDS=m
-CONFIG_MTD_NAND_DISKONCHIP=m
-# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
-CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
-# CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE is not set
+# CONFIG_MTD_NAND_DISKONCHIP is not set
 # CONFIG_MTD_NAND_NANDSIM is not set
 
 #
@@ -280,8 +284,7 @@
 #
 CONFIG_PARPORT=m
 CONFIG_PARPORT_PC=m
-CONFIG_PARPORT_SERIAL=m
-CONFIG_PARPORT_PC_FIFO=y
+# CONFIG_PARPORT_PC_FIFO is not set
 # CONFIG_PARPORT_PC_SUPERIO is not set
 CONFIG_PARPORT_NOT_PC=y
 # CONFIG_PARPORT_GSC is not set
@@ -290,13 +293,7 @@
 #
 # Plug and Play support
 #
-CONFIG_PNP=y
-# CONFIG_PNP_DEBUG is not set
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
+# CONFIG_PNP is not set
 
 #
 # Block devices
@@ -324,7 +321,7 @@
 CONFIG_PARIDE_FIT2=m
 CONFIG_PARIDE_FIT3=m
 CONFIG_PARIDE_EPAT=m
-# CONFIG_PARIDE_EPATC8 is not set
+CONFIG_PARIDE_EPATC8=y
 CONFIG_PARIDE_EPIA=m
 CONFIG_PARIDE_FRIQ=m
 CONFIG_PARIDE_FRPW=m
@@ -345,7 +342,7 @@
 # CONFIG_BLK_DEV_UB is not set
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_BLK_DEV_RAM_SIZE=16384
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_LBD=y
@@ -360,70 +357,69 @@
 CONFIG_IOSCHED_AS=y
 CONFIG_IOSCHED_DEADLINE=y
 CONFIG_IOSCHED_CFQ=y
-# CONFIG_ATA_OVER_ETH is not set
+CONFIG_ATA_OVER_ETH=m
 
 #
 # ATA/ATAPI/MFM/RLL support
 #
-CONFIG_IDE=m
-CONFIG_BLK_DEV_IDE=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
 
 #
 # Please see Documentation/ide.txt for help/info on IDE drives
 #
 # CONFIG_BLK_DEV_IDE_SATA is not set
 # CONFIG_BLK_DEV_HD_IDE is not set
-CONFIG_BLK_DEV_IDEDISK=m
-# CONFIG_IDEDISK_MULTI_MODE is not set
-CONFIG_BLK_DEV_IDECD=m
-CONFIG_BLK_DEV_IDETAPE=m
-CONFIG_BLK_DEV_IDEFLOPPY=m
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDETAPE is not set
+CONFIG_BLK_DEV_IDEFLOPPY=y
 CONFIG_BLK_DEV_IDESCSI=m
 # CONFIG_IDE_TASK_IOCTL is not set
 
 #
 # IDE chipset support/bugfixes
 #
-CONFIG_IDE_GENERIC=m
+CONFIG_IDE_GENERIC=y
 CONFIG_BLK_DEV_CMD640=y
-# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
-# CONFIG_BLK_DEV_IDEPNP is not set
+CONFIG_BLK_DEV_CMD640_ENHANCED=y
 CONFIG_BLK_DEV_IDEPCI=y
 CONFIG_IDEPCI_SHARE_IRQ=y
 # CONFIG_BLK_DEV_OFFBOARD is not set
-CONFIG_BLK_DEV_GENERIC=m
-CONFIG_BLK_DEV_OPTI621=m
-CONFIG_BLK_DEV_RZ1000=m
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_RZ1000=y
 CONFIG_BLK_DEV_IDEDMA_PCI=y
 # CONFIG_BLK_DEV_IDEDMA_FORCED is not set
 CONFIG_IDEDMA_PCI_AUTO=y
 # CONFIG_IDEDMA_ONLYDISK is not set
-CONFIG_BLK_DEV_AEC62XX=m
-CONFIG_BLK_DEV_ALI15X3=m
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
 # CONFIG_WDC_ALI15X3 is not set
-CONFIG_BLK_DEV_AMD74XX=m
-CONFIG_BLK_DEV_ATIIXP=m
-CONFIG_BLK_DEV_CMD64X=m
-CONFIG_BLK_DEV_TRIFLEX=m
-CONFIG_BLK_DEV_CY82C693=m
-CONFIG_BLK_DEV_CS5520=m
-CONFIG_BLK_DEV_CS5530=m
-CONFIG_BLK_DEV_HPT34X=m
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_ATIIXP=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5520=y
+CONFIG_BLK_DEV_CS5530=y
+CONFIG_BLK_DEV_HPT34X=y
 # CONFIG_HPT34X_AUTODMA is not set
-CONFIG_BLK_DEV_HPT366=m
-CONFIG_BLK_DEV_SC1200=m
-CONFIG_BLK_DEV_PIIX=m
-CONFIG_BLK_DEV_NS87415=m
-CONFIG_BLK_DEV_PDC202XX_OLD=m
-CONFIG_PDC202XX_BURST=y
-CONFIG_BLK_DEV_PDC202XX_NEW=m
+CONFIG_BLK_DEV_HPT366=y
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_NS87415 is not set
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+# CONFIG_PDC202XX_BURST is not set
+CONFIG_BLK_DEV_PDC202XX_NEW=y
 CONFIG_PDC202XX_FORCE=y
-CONFIG_BLK_DEV_SVWKS=m
-CONFIG_BLK_DEV_SIIMAGE=m
-CONFIG_BLK_DEV_SIS5513=m
-CONFIG_BLK_DEV_SLC90E66=m
-CONFIG_BLK_DEV_TRM290=m
-CONFIG_BLK_DEV_VIA82CXXX=m
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+# CONFIG_BLK_DEV_TRM290 is not set
+CONFIG_BLK_DEV_VIA82CXXX=y
 # CONFIG_IDE_ARM is not set
 CONFIG_BLK_DEV_IDEDMA=y
 # CONFIG_IDEDMA_IVB is not set
@@ -433,17 +429,17 @@
 #
 # SCSI device support
 #
-CONFIG_SCSI=m
+CONFIG_SCSI=y
 CONFIG_SCSI_PROC_FS=y
 
 #
 # SCSI support type (disk, tape, CD-ROM)
 #
-CONFIG_BLK_DEV_SD=m
+CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=m
 CONFIG_CHR_DEV_OSST=m
 CONFIG_BLK_DEV_SR=m
-# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_BLK_DEV_SR_VENDOR=y
 CONFIG_CHR_DEV_SG=m
 
 #
@@ -458,7 +454,7 @@
 #
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
-# CONFIG_SCSI_ISCSI_ATTRS is not set
+CONFIG_SCSI_ISCSI_ATTRS=m
 
 #
 # SCSI low-level drivers
@@ -468,29 +464,30 @@
 CONFIG_SCSI_ACARD=m
 CONFIG_SCSI_AACRAID=m
 CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=8
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=4
 CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-CONFIG_AIC7XXX_DEBUG_ENABLE=y
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
-CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_CMDS_PER_DEVICE=4
 CONFIG_AIC79XX_RESET_DELAY_MS=15000
-CONFIG_AIC79XX_ENABLE_RD_STRM=y
-CONFIG_AIC79XX_DEBUG_ENABLE=y
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
 CONFIG_AIC79XX_DEBUG_MASK=0
-CONFIG_AIC79XX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_ADVANSYS=m
-# CONFIG_MEGARAID_NEWGEN is not set
-CONFIG_MEGARAID_LEGACY=m
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_ADVANSYS is not set
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
-CONFIG_SCSI_ATA_PIIX=m
+CONFIG_SCSI_ATA_PIIX=y
 CONFIG_SCSI_SATA_NV=m
 CONFIG_SCSI_SATA_PROMISE=m
-# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_QSTOR=m
 CONFIG_SCSI_SATA_SX4=m
 CONFIG_SCSI_SATA_SIL=m
 CONFIG_SCSI_SATA_SIS=m
@@ -500,17 +497,14 @@
 CONFIG_SCSI_BUSLOGIC=m
 # CONFIG_SCSI_OMIT_FLASHPOINT is not set
 # CONFIG_SCSI_CPQFCTS is not set
-CONFIG_SCSI_DMX3191D=m
-CONFIG_SCSI_EATA=m
-CONFIG_SCSI_EATA_TAGGED_QUEUE=y
-CONFIG_SCSI_EATA_LINKED_COMMANDS=y
-CONFIG_SCSI_EATA_MAX_TAGS=16
-CONFIG_SCSI_EATA_PIO=m
-CONFIG_SCSI_FUTURE_DOMAIN=m
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
 CONFIG_SCSI_GDTH=m
 CONFIG_SCSI_IPS=m
-# CONFIG_SCSI_INITIO is not set
-# CONFIG_SCSI_INIA100 is not set
+CONFIG_SCSI_INITIO=m
+CONFIG_SCSI_INIA100=m
 CONFIG_SCSI_PPA=m
 CONFIG_SCSI_IMM=m
 # CONFIG_SCSI_IZIP_EPP16 is not set
@@ -520,32 +514,29 @@
 CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
 # CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
-CONFIG_SCSI_IPR=m
-# CONFIG_SCSI_IPR_TRACE is not set
-# CONFIG_SCSI_IPR_DUMP is not set
+# CONFIG_SCSI_IPR is not set
 # CONFIG_SCSI_PCI2000 is not set
 # CONFIG_SCSI_PCI2220I is not set
-CONFIG_SCSI_QLOGIC_ISP=m
-CONFIG_SCSI_QLOGIC_FC=m
-CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y
+# CONFIG_SCSI_QLOGIC_ISP is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
 CONFIG_SCSI_QLOGIC_1280=m
 CONFIG_SCSI_QLOGIC_1280_1040=y
-CONFIG_SCSI_QLA2XXX=m
-# CONFIG_SCSI_QLA21XX is not set
-# CONFIG_SCSI_QLA22XX is not set
-# CONFIG_SCSI_QLA2300 is not set
-# CONFIG_SCSI_QLA2322 is not set
-# CONFIG_SCSI_QLA6312 is not set
+CONFIG_SCSI_QLA2XXX=y
+CONFIG_SCSI_QLA21XX=m
+CONFIG_SCSI_QLA22XX=m
+CONFIG_SCSI_QLA2300=m
+CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA6312=m
 CONFIG_SCSI_LPFC=m
 CONFIG_SCSI_DC395x=m
 CONFIG_SCSI_DC390T=m
-CONFIG_SCSI_DEBUG=m
+# CONFIG_SCSI_DEBUG is not set
 
 #
 # Multi-device support (RAID and LVM)
 #
 CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
+CONFIG_BLK_DEV_MD=y
 CONFIG_MD_LINEAR=m
 CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
@@ -579,7 +570,7 @@
 # Subsystem Options
 #
 # CONFIG_IEEE1394_VERBOSEDEBUG is not set
-# CONFIG_IEEE1394_OUI_DB is not set
+CONFIG_IEEE1394_OUI_DB=y
 CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y
 CONFIG_IEEE1394_CONFIG_ROM_IP1394=y
 
@@ -618,9 +609,9 @@
 #
 # Networking options
 #
-CONFIG_PACKET=m
+CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
-CONFIG_UNIX=m
+CONFIG_UNIX=y
 CONFIG_NET_KEY=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
@@ -784,11 +775,6 @@
 CONFIG_IP6_NF_RAW=m
 
 #
-# DECnet: Netfilter Configuration
-#
-CONFIG_DECNET_NF_GRABULATOR=m
-
-#
 # Bridge: Netfilter Configuration
 #
 CONFIG_BRIDGE_NF_EBTABLES=m
@@ -810,9 +796,9 @@
 CONFIG_BRIDGE_EBT_REDIRECT=m
 CONFIG_BRIDGE_EBT_SNAT=m
 CONFIG_BRIDGE_EBT_LOG=m
-# CONFIG_BRIDGE_EBT_ULOG is not set
+CONFIG_BRIDGE_EBT_ULOG=m
 CONFIG_XFRM=y
-CONFIG_XFRM_USER=m
+CONFIG_XFRM_USER=y
 
 #
 # SCTP Configuration (EXPERIMENTAL)
@@ -823,19 +809,18 @@
 # CONFIG_SCTP_HMAC_NONE is not set
 # CONFIG_SCTP_HMAC_SHA1 is not set
 CONFIG_SCTP_HMAC_MD5=y
-CONFIG_ATM=y
-CONFIG_ATM_CLIP=y
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
 # CONFIG_ATM_CLIP_NO_ICMP is not set
 CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
+# CONFIG_ATM_MPOA is not set
 CONFIG_ATM_BR2684=m
 # CONFIG_ATM_BR2684_IPFILTER is not set
 CONFIG_BRIDGE=m
 CONFIG_VLAN_8021Q=m
-CONFIG_DECNET=m
-# CONFIG_DECNET_ROUTER is not set
+# CONFIG_DECNET is not set
 CONFIG_LLC=y
-CONFIG_LLC2=m
+# CONFIG_LLC2 is not set
 CONFIG_IPX=m
 # CONFIG_IPX_INTERN is not set
 CONFIG_ATALK=m
@@ -843,12 +828,10 @@
 CONFIG_IPDDP=m
 CONFIG_IPDDP_ENCAP=y
 CONFIG_IPDDP_DECAP=y
-CONFIG_X25=m
-CONFIG_LAPB=m
-# CONFIG_NET_DIVERT is not set
-CONFIG_ECONET=m
-CONFIG_ECONET_AUNUDP=y
-CONFIG_ECONET_NATIVE=y
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+CONFIG_NET_DIVERT=y
+# CONFIG_ECONET is not set
 CONFIG_WAN_ROUTER=m
 
 #
@@ -880,9 +863,9 @@
 CONFIG_NET_CLS_ROUTE=y
 CONFIG_NET_CLS_FW=m
 CONFIG_NET_CLS_U32=m
-# CONFIG_CLS_U32_PERF is not set
-# CONFIG_NET_CLS_IND is not set
-# CONFIG_CLS_U32_MARK is not set
+CONFIG_CLS_U32_PERF=y
+CONFIG_NET_CLS_IND=y
+CONFIG_CLS_U32_MARK=y
 CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_EMATCH=y
@@ -897,31 +880,12 @@
 #
 # Network testing
 #
-CONFIG_NET_PKTGEN=m
+# CONFIG_NET_PKTGEN is not set
 CONFIG_NETPOLL=y
 # CONFIG_NETPOLL_RX is not set
-# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NETPOLL_TRAP=y
 CONFIG_NET_POLL_CONTROLLER=y
-CONFIG_HAMRADIO=y
-
-#
-# Packet Radio protocols
-#
-CONFIG_AX25=m
-# CONFIG_AX25_DAMA_SLAVE is not set
-CONFIG_NETROM=m
-CONFIG_ROSE=m
-
-#
-# AX.25 network device drivers
-#
-CONFIG_MKISS=m
-CONFIG_6PACK=m
-CONFIG_BPQETHER=m
-CONFIG_BAYCOM_SER_FDX=m
-CONFIG_BAYCOM_SER_HDX=m
-CONFIG_BAYCOM_PAR=m
-CONFIG_YAM=m
+# CONFIG_HAMRADIO is not set
 CONFIG_IRDA=m
 
 #
@@ -937,7 +901,7 @@
 #
 CONFIG_IRDA_CACHE_LAST_LSAP=y
 CONFIG_IRDA_FAST_RR=y
-CONFIG_IRDA_DEBUG=y
+# CONFIG_IRDA_DEBUG is not set
 
 #
 # Infrared-port device drivers
@@ -1002,9 +966,9 @@
 CONFIG_BT_HCIUART=m
 CONFIG_BT_HCIUART_H4=y
 CONFIG_BT_HCIUART_BCSP=y
-# CONFIG_BT_HCIUART_BCSP_TXCRC is not set
+CONFIG_BT_HCIUART_BCSP_TXCRC=y
 CONFIG_BT_HCIBCM203X=m
-# CONFIG_BT_HCIBPA10X is not set
+CONFIG_BT_HCIBPA10X=m
 CONFIG_BT_HCIBFUSB=m
 CONFIG_BT_HCIVHCI=m
 CONFIG_NETDEVICES=y
@@ -1012,21 +976,11 @@
 CONFIG_BONDING=m
 CONFIG_EQUALIZER=m
 CONFIG_TUN=m
-CONFIG_NET_SB1000=m
 
 #
 # ARCnet devices
 #
-CONFIG_ARCNET=m
-CONFIG_ARCNET_1201=m
-CONFIG_ARCNET_1051=m
-CONFIG_ARCNET_RAW=m
-# CONFIG_ARCNET_CAP is not set
-CONFIG_ARCNET_COM90xx=m
-CONFIG_ARCNET_COM90xxIO=m
-CONFIG_ARCNET_RIM_I=m
-CONFIG_ARCNET_COM20020=m
-CONFIG_ARCNET_COM20020_PCI=m
+# CONFIG_ARCNET is not set
 
 #
 # Ethernet (10 or 100Mbit)
@@ -1046,21 +1000,21 @@
 CONFIG_DE2104X=m
 CONFIG_TULIP=m
 # CONFIG_TULIP_MWI is not set
-# CONFIG_TULIP_MMIO is not set
+CONFIG_TULIP_MMIO=y
 # CONFIG_TULIP_NAPI is not set
 CONFIG_DE4X5=m
 CONFIG_WINBOND_840=m
 CONFIG_DM9102=m
-CONFIG_HP100=m
+# CONFIG_HP100 is not set
 CONFIG_NET_PCI=y
 CONFIG_PCNET32=m
 CONFIG_AMD8111_ETH=m
-# CONFIG_AMD8111E_NAPI is not set
+CONFIG_AMD8111E_NAPI=y
 CONFIG_ADAPTEC_STARFIRE=m
-# CONFIG_ADAPTEC_STARFIRE_NAPI is not set
+CONFIG_ADAPTEC_STARFIRE_NAPI=y
 CONFIG_B44=m
 CONFIG_FORCEDETH=m
-# CONFIG_DGRS is not set
+CONFIG_DGRS=m
 CONFIG_EEPRO100=m
 CONFIG_E100=m
 CONFIG_FEALNX=m
@@ -1069,7 +1023,7 @@
 CONFIG_8139CP=m
 CONFIG_8139TOO=m
 CONFIG_8139TOO_PIO=y
-CONFIG_8139TOO_TUNE_TWISTER=y
+# CONFIG_8139TOO_TUNE_TWISTER is not set
 CONFIG_8139TOO_8129=y
 # CONFIG_8139_OLD_RX_RESET is not set
 CONFIG_SIS900=m
@@ -1077,21 +1031,22 @@
 CONFIG_SUNDANCE=m
 # CONFIG_SUNDANCE_MMIO is not set
 CONFIG_VIA_RHINE=m
-# CONFIG_VIA_RHINE_MMIO is not set
+CONFIG_VIA_RHINE_MMIO=y
 
 #
 # Ethernet (1000 Mbit)
 #
-# CONFIG_ACENIC is not set
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
 CONFIG_DL2K=m
 CONFIG_E1000=m
-# CONFIG_E1000_NAPI is not set
+CONFIG_E1000_NAPI=y
 CONFIG_NS83820=m
 CONFIG_HAMACHI=m
 CONFIG_YELLOWFIN=m
 CONFIG_R8169=m
-# CONFIG_R8169_NAPI is not set
-# CONFIG_R8169_VLAN is not set
+CONFIG_R8169_NAPI=y
+CONFIG_R8169_VLAN=y
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
@@ -1101,9 +1056,9 @@
 # Ethernet (10000 Mbit)
 #
 CONFIG_IXGB=m
-# CONFIG_IXGB_NAPI is not set
+CONFIG_IXGB_NAPI=y
 CONFIG_S2IO=m
-# CONFIG_S2IO_NAPI is not set
+CONFIG_S2IO_NAPI=y
 # CONFIG_2BUFF_MODE is not set
 
 #
@@ -1124,7 +1079,7 @@
 #
 # Obsolete Wireless cards support (pre-802.11)
 #
-CONFIG_STRIP=m
+# CONFIG_STRIP is not set
 
 #
 # Wireless 802.11b ISA/PCI cards support
@@ -1145,35 +1100,7 @@
 #
 # Wan interfaces
 #
-CONFIG_WAN=y
-CONFIG_DSCC4=m
-CONFIG_DSCC4_PCISYNC=y
-CONFIG_DSCC4_PCI_RST=y
-CONFIG_LANMEDIA=m
-CONFIG_SYNCLINK_SYNCPPP=m
-CONFIG_HDLC=m
-CONFIG_HDLC_RAW=y
-CONFIG_HDLC_RAW_ETH=y
-CONFIG_HDLC_CISCO=y
-CONFIG_HDLC_FR=y
-CONFIG_HDLC_PPP=y
-CONFIG_HDLC_X25=y
-CONFIG_PCI200SYN=m
-CONFIG_WANXL=m
-CONFIG_PC300=m
-CONFIG_PC300_MLPPP=y
-CONFIG_FARSYNC=m
-CONFIG_DLCI=m
-CONFIG_DLCI_COUNT=24
-CONFIG_DLCI_MAX=8
-CONFIG_WAN_ROUTER_DRIVERS=y
-# CONFIG_VENDOR_SANGOMA is not set
-CONFIG_CYCLADES_SYNC=m
-CONFIG_CYCLOMX_X25=y
-CONFIG_LAPBETHER=m
-CONFIG_X25_ASY=m
-CONFIG_SBNI=m
-# CONFIG_SBNI_MULTILINE is not set
+# CONFIG_WAN is not set
 
 #
 # ATM drivers
@@ -1184,8 +1111,7 @@
 # CONFIG_ATM_ENI_DEBUG is not set
 # CONFIG_ATM_ENI_TUNE_BURST is not set
 CONFIG_ATM_FIRESTREAM=m
-CONFIG_ATM_ZATM=m
-# CONFIG_ATM_ZATM_DEBUG is not set
+# CONFIG_ATM_ZATM is not set
 CONFIG_ATM_IDT77252=m
 # CONFIG_ATM_IDT77252_DEBUG is not set
 # CONFIG_ATM_IDT77252_RCV_ALL is not set
@@ -1195,20 +1121,13 @@
 CONFIG_ATM_HORIZON=m
 # CONFIG_ATM_HORIZON_DEBUG is not set
 CONFIG_ATM_FORE200E_MAYBE=m
-CONFIG_ATM_FORE200E_PCA=y
-CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y
-# CONFIG_ATM_FORE200E_USE_TASKLET is not set
-CONFIG_ATM_FORE200E_TX_RETRY=16
-CONFIG_ATM_FORE200E_DEBUG=0
-CONFIG_ATM_FORE200E=m
+# CONFIG_ATM_FORE200E_PCA is not set
 CONFIG_ATM_HE=m
-CONFIG_ATM_HE_USE_SUNI=y
+# CONFIG_ATM_HE_USE_SUNI is not set
 CONFIG_FDDI=y
-CONFIG_DEFXX=m
+# CONFIG_DEFXX is not set
 CONFIG_SKFP=m
-CONFIG_HIPPI=y
-CONFIG_ROADRUNNER=m
-# CONFIG_ROADRUNNER_LARGE_RINGS is not set
+# CONFIG_HIPPI is not set
 CONFIG_PLIP=m
 CONFIG_PPP=m
 CONFIG_PPP_MULTILINK=y
@@ -1216,15 +1135,15 @@
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
 CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_BSDCOMP=m
+# CONFIG_PPP_BSDCOMP is not set
 CONFIG_PPPOE=m
 CONFIG_PPPOATM=m
 CONFIG_SLIP=m
 CONFIG_SLIP_COMPRESSED=y
 CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
+# CONFIG_SLIP_MODE_SLIP6 is not set
 CONFIG_NET_FC=y
-CONFIG_SHAPER=m
+# CONFIG_SHAPER is not set
 CONFIG_NETCONSOLE=m
 
 #
@@ -1240,16 +1159,15 @@
 CONFIG_ISDN_PPP_VJ=y
 CONFIG_ISDN_MPP=y
 CONFIG_IPPP_FILTER=y
-CONFIG_ISDN_PPP_BSDCOMP=m
+# CONFIG_ISDN_PPP_BSDCOMP is not set
 CONFIG_ISDN_AUDIO=y
 CONFIG_ISDN_TTY_FAX=y
-CONFIG_ISDN_X25=y
 
 #
 # ISDN feature submodules
 #
 CONFIG_ISDN_DRV_LOOP=m
-# CONFIG_ISDN_DIVERSION is not set
+CONFIG_ISDN_DIVERSION=m
 
 #
 # ISDN4Linux hardware drivers
@@ -1265,9 +1183,9 @@
 #
 CONFIG_HISAX_EURO=y
 CONFIG_DE_AOC=y
-# CONFIG_HISAX_NO_SENDCOMPLETE is not set
-# CONFIG_HISAX_NO_LLC is not set
-# CONFIG_HISAX_NO_KEYPAD is not set
+CONFIG_HISAX_NO_SENDCOMPLETE=y
+CONFIG_HISAX_NO_LLC=y
+CONFIG_HISAX_NO_KEYPAD=y
 CONFIG_HISAX_1TR6=y
 CONFIG_HISAX_NI1=y
 CONFIG_HISAX_MAX_CARDS=8
@@ -1342,19 +1260,12 @@
 #
 # Active Eicon DIVA Server cards
 #
-CONFIG_CAPI_EICON=y
-CONFIG_ISDN_DIVAS=m
-CONFIG_ISDN_DIVAS_BRIPCI=y
-CONFIG_ISDN_DIVAS_PRIPCI=y
-CONFIG_ISDN_DIVAS_DIVACAPI=m
-CONFIG_ISDN_DIVAS_USERIDI=m
-CONFIG_ISDN_DIVAS_MAINT=m
+# CONFIG_CAPI_EICON is not set
 
 #
 # Telephony Support
 #
-CONFIG_PHONE=m
-CONFIG_PHONE_IXJ=m
+# CONFIG_PHONE is not set
 
 #
 # Input device support
@@ -1365,27 +1276,25 @@
 # Userland interfaces
 #
 CONFIG_INPUT_MOUSEDEV=y
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
 CONFIG_INPUT_JOYDEV=m
-CONFIG_INPUT_TSDEV=m
-CONFIG_INPUT_TSDEV_SCREEN_X=240
-CONFIG_INPUT_TSDEV_SCREEN_Y=320
-CONFIG_INPUT_EVDEV=m
-CONFIG_INPUT_EVBUG=m
+# CONFIG_INPUT_TSDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
 
 #
 # Input Device Drivers
 #
 CONFIG_INPUT_KEYBOARD=y
 CONFIG_KEYBOARD_ATKBD=y
-CONFIG_KEYBOARD_SUNKBD=m
-CONFIG_KEYBOARD_LKKBD=m
-CONFIG_KEYBOARD_XTKBD=m
-CONFIG_KEYBOARD_NEWTON=m
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
 CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=m
+CONFIG_MOUSE_PS2=y
 CONFIG_MOUSE_SERIAL=m
 CONFIG_MOUSE_VSXXXAA=m
 CONFIG_INPUT_JOYSTICK=y
@@ -1427,19 +1336,19 @@
 #
 CONFIG_SERIO=y
 CONFIG_SERIO_I8042=y
-CONFIG_SERIO_SERPORT=m
-CONFIG_SERIO_CT82C710=m
-CONFIG_SERIO_PARKBD=m
-CONFIG_SERIO_PCIPS2=m
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PARKBD is not set
+# CONFIG_SERIO_PCIPS2 is not set
 CONFIG_SERIO_LIBPS2=y
-CONFIG_SERIO_RAW=m
+# CONFIG_SERIO_RAW is not set
 CONFIG_GAMEPORT=m
 CONFIG_GAMEPORT_NS558=m
 CONFIG_GAMEPORT_L4=m
 CONFIG_GAMEPORT_EMU10K1=m
 CONFIG_GAMEPORT_VORTEX=m
 CONFIG_GAMEPORT_FM801=m
-# CONFIG_GAMEPORT_CS461X is not set
+CONFIG_GAMEPORT_CS461X=m
 
 #
 # Character devices
@@ -1452,21 +1361,16 @@
 #
 # Serial drivers
 #
-CONFIG_SERIAL_8250=m
-# CONFIG_SERIAL_8250_ACPI is not set
-CONFIG_SERIAL_8250_NR_UARTS=4
-# CONFIG_SERIAL_8250_EXTENDED is not set
+# CONFIG_SERIAL_8250 is not set
 
 #
 # Non-8250 serial port support
 #
-CONFIG_SERIAL_CORE=m
-CONFIG_SERIAL_JSM=m
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_LEGACY_PTYS is not set
 CONFIG_PRINTER=m
-# CONFIG_LP_CONSOLE is not set
+CONFIG_LP_CONSOLE=y
 CONFIG_PPDEV=m
 CONFIG_TIPAR=m
 
@@ -1500,7 +1404,7 @@
 CONFIG_WAFER_WDT=m
 CONFIG_I8XX_TCO=m
 CONFIG_SC1200_WDT=m
-CONFIG_60XX_WDT=m
+# CONFIG_60XX_WDT is not set
 CONFIG_CPU5_WDT=m
 CONFIG_W83627HF_WDT=m
 CONFIG_W83877F_WDT=m
@@ -1518,67 +1422,31 @@
 #
 CONFIG_USBPCWATCHDOG=m
 CONFIG_HW_RANDOM=m
-CONFIG_NVRAM=m
-CONFIG_RTC=m
-CONFIG_GEN_RTC=m
-CONFIG_GEN_RTC_X=y
+# CONFIG_NVRAM is not set
+CONFIG_RTC=y
 CONFIG_DTLK=m
 CONFIG_R3964=m
-CONFIG_APPLICOM=m
+# CONFIG_APPLICOM is not set
 
 #
 # Ftape, the floppy tape device driver
 #
-CONFIG_FTAPE=m
-CONFIG_ZFTAPE=m
-CONFIG_ZFT_DFLT_BLK_SZ=10240
-
-#
-# The compressor will be built as a module only!
-#
-CONFIG_ZFT_COMPRESSOR=m
-CONFIG_FT_NR_BUFFERS=3
-CONFIG_FT_PROC_FS=y
-CONFIG_FT_NORMAL_DEBUG=y
-# CONFIG_FT_FULL_DEBUG is not set
-# CONFIG_FT_NO_TRACE is not set
-# CONFIG_FT_NO_TRACE_AT_ALL is not set
-
-#
-# Hardware configuration
-#
-CONFIG_FT_STD_FDC=y
-# CONFIG_FT_MACH2 is not set
-# CONFIG_FT_PROBE_FC10 is not set
-# CONFIG_FT_ALT_FDC is not set
-CONFIG_FT_FDC_THR=8
-CONFIG_FT_FDC_MAX_RATE=2000
-CONFIG_FT_ALPHA_CLOCK=0
-CONFIG_AGP=m
-CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL=m
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
 CONFIG_DRM=m
 CONFIG_DRM_TDFX=m
 # CONFIG_DRM_GAMMA is not set
 CONFIG_DRM_R128=m
 CONFIG_DRM_RADEON=m
-CONFIG_DRM_I810=m
-CONFIG_DRM_I830=m
-CONFIG_DRM_I915=m
-CONFIG_DRM_MGA=m
-CONFIG_DRM_SIS=m
-CONFIG_MWAVE=m
-CONFIG_RAW_DRIVER=m
+# CONFIG_MWAVE is not set
+# CONFIG_RAW_DRIVER is not set
 # CONFIG_HPET is not set
-CONFIG_MAX_RAW_DEVS=256
 CONFIG_HANGCHECK_TIMER=m
 
 #
 # TPM devices
 #
-CONFIG_TCG_TPM=m
-CONFIG_TCG_NSC=m
-CONFIG_TCG_ATMEL=m
+# CONFIG_TCG_TPM is not set
 
 #
 # I2C support
@@ -1596,24 +1464,24 @@
 #
 # I2C Hardware Bus support
 #
-CONFIG_I2C_ALI1535=m
-CONFIG_I2C_ALI1563=m
-CONFIG_I2C_ALI15X3=m
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
 CONFIG_I2C_AMD756=m
 CONFIG_I2C_AMD756_S4882=m
 CONFIG_I2C_AMD8111=m
-CONFIG_I2C_I801=m
-CONFIG_I2C_I810=m
-CONFIG_I2C_PIIX4=m
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_PIIX4 is not set
 CONFIG_I2C_ISA=m
 CONFIG_I2C_NFORCE2=m
-CONFIG_I2C_PARPORT=m
-CONFIG_I2C_PARPORT_LIGHT=m
+# CONFIG_I2C_PARPORT is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
 CONFIG_I2C_PROSAVAGE=m
 CONFIG_I2C_SAVAGE4=m
-CONFIG_SCx200_ACB=m
-CONFIG_I2C_SIS5595=m
-CONFIG_I2C_SIS630=m
+# CONFIG_SCx200_ACB is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
 CONFIG_I2C_SIS96X=m
 CONFIG_I2C_STUB=m
 CONFIG_I2C_VIA=m
@@ -1648,7 +1516,7 @@
 CONFIG_SENSORS_LM92=m
 CONFIG_SENSORS_MAX1619=m
 CONFIG_SENSORS_PC87360=m
-# CONFIG_SENSORS_SMSC47B397 is not set
+CONFIG_SENSORS_SMSC47B397=m
 CONFIG_SENSORS_SIS5595=m
 CONFIG_SENSORS_SMSC47M1=m
 CONFIG_SENSORS_VIA686A=m
@@ -1682,7 +1550,7 @@
 #
 # Misc devices
 #
-CONFIG_IBM_ASM=m
+# CONFIG_IBM_ASM is not set
 
 #
 # Multimedia devices
@@ -1714,13 +1582,14 @@
 CONFIG_VIDEO_ZORAN_LML33=m
 CONFIG_VIDEO_ZORAN_LML33R10=m
 # CONFIG_VIDEO_ZR36120 is not set
-# CONFIG_VIDEO_SAA7134 is not set
+CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_SAA7134_DVB=m
 CONFIG_VIDEO_MXB=m
 CONFIG_VIDEO_DPC=m
 CONFIG_VIDEO_HEXIUM_ORION=m
 CONFIG_VIDEO_HEXIUM_GEMINI=m
 CONFIG_VIDEO_CX88=m
-# CONFIG_VIDEO_CX88_DVB is not set
+CONFIG_VIDEO_CX88_DVB=m
 CONFIG_VIDEO_OVCAMCHIP=m
 
 #
@@ -1740,7 +1609,7 @@
 # Supported SAA7146 based PCI Adapters
 #
 CONFIG_DVB_AV7110=m
-# CONFIG_DVB_AV7110_OSD is not set
+CONFIG_DVB_AV7110_OSD=y
 CONFIG_DVB_BUDGET=m
 CONFIG_DVB_BUDGET_CI=m
 CONFIG_DVB_BUDGET_AV=m
@@ -1755,7 +1624,12 @@
 CONFIG_DVB_DIBUSB_MISDESIGNED_DEVICES=y
 # CONFIG_DVB_DIBCOM_DEBUG is not set
 CONFIG_DVB_CINERGYT2=m
-# CONFIG_DVB_CINERGYT2_TUNING is not set
+CONFIG_DVB_CINERGYT2_TUNING=y
+CONFIG_DVB_CINERGYT2_STREAM_URB_COUNT=32
+CONFIG_DVB_CINERGYT2_STREAM_BUF_SIZE=512
+CONFIG_DVB_CINERGYT2_QUERY_INTERVAL=250
+CONFIG_DVB_CINERGYT2_ENABLE_RC_INPUT_DEVICE=y
+CONFIG_DVB_CINERGYT2_RC_QUERY_INTERVAL=100
 
 #
 # Supported FlexCopII (B2C2) Adapters
@@ -1822,6 +1696,7 @@
 CONFIG_VIDEO_VIDEOBUF=m
 CONFIG_VIDEO_TUNER=m
 CONFIG_VIDEO_BUF=m
+CONFIG_VIDEO_BUF_DVB=m
 CONFIG_VIDEO_BTCX=m
 CONFIG_VIDEO_IR=m
 CONFIG_VIDEO_TVEEPROM=m
@@ -1830,36 +1705,34 @@
 # Graphics support
 #
 CONFIG_FB=y
-CONFIG_FB_CFB_FILLRECT=m
-CONFIG_FB_CFB_COPYAREA=m
-CONFIG_FB_CFB_IMAGEBLIT=m
-CONFIG_FB_SOFT_CURSOR=m
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+CONFIG_FB_SOFT_CURSOR=y
 # CONFIG_FB_MACMODES is not set
 CONFIG_FB_MODE_HELPERS=y
 CONFIG_FB_TILEBLITTING=y
 CONFIG_FB_CIRRUS=m
-CONFIG_FB_PM2=m
-CONFIG_FB_PM2_FIFO_DISCONNECT=y
-CONFIG_FB_CYBER2000=m
+# CONFIG_FB_PM2 is not set
+# CONFIG_FB_CYBER2000 is not set
 # CONFIG_FB_ASILIANT is not set
 # CONFIG_FB_IMSTT is not set
 CONFIG_FB_VGA16=m
-# CONFIG_FB_VESA is not set
+CONFIG_FB_VESA=y
 CONFIG_VIDEO_SELECT=y
-CONFIG_FB_HGA=m
-# CONFIG_FB_HGA_ACCEL is not set
-CONFIG_FB_NVIDIA=m
-CONFIG_FB_NVIDIA_I2C=y
+# CONFIG_FB_HGA is not set
+# CONFIG_FB_NVIDIA is not set
 CONFIG_FB_RIVA=m
-CONFIG_FB_RIVA_I2C=y
-CONFIG_FB_RIVA_DEBUG=y
+# CONFIG_FB_RIVA_I2C is not set
+# CONFIG_FB_RIVA_DEBUG is not set
 CONFIG_FB_MATROX=m
 CONFIG_FB_MATROX_MILLENIUM=y
 CONFIG_FB_MATROX_MYSTIQUE=y
-# CONFIG_FB_MATROX_G is not set
+CONFIG_FB_MATROX_G=y
 CONFIG_FB_MATROX_I2C=m
+CONFIG_FB_MATROX_MAVEN=m
 CONFIG_FB_MATROX_MULTIHEAD=y
-CONFIG_FB_RADEON_OLD=m
+# CONFIG_FB_RADEON_OLD is not set
 CONFIG_FB_RADEON=m
 CONFIG_FB_RADEON_I2C=y
 # CONFIG_FB_RADEON_DEBUG is not set
@@ -1867,33 +1740,30 @@
 CONFIG_FB_ATY=m
 CONFIG_FB_ATY_CT=y
 CONFIG_FB_ATY_GENERIC_LCD=y
-CONFIG_FB_ATY_XL_INIT=y
+# CONFIG_FB_ATY_XL_INIT is not set
 CONFIG_FB_ATY_GX=y
 CONFIG_FB_SAVAGE=m
 CONFIG_FB_SAVAGE_I2C=y
 CONFIG_FB_SAVAGE_ACCEL=y
-CONFIG_FB_SIS=m
-CONFIG_FB_SIS_300=y
-CONFIG_FB_SIS_315=y
+# CONFIG_FB_SIS is not set
 CONFIG_FB_NEOMAGIC=m
 CONFIG_FB_KYRO=m
 CONFIG_FB_3DFX=m
-# CONFIG_FB_3DFX_ACCEL is not set
+CONFIG_FB_3DFX_ACCEL=y
 CONFIG_FB_VOODOO1=m
 CONFIG_FB_TRIDENT=m
-# CONFIG_FB_TRIDENT_ACCEL is not set
+CONFIG_FB_TRIDENT_ACCEL=y
 # CONFIG_FB_PM3 is not set
-CONFIG_FB_GEODE=y
-CONFIG_FB_GEODE_GX1=m
-CONFIG_FB_S1D13XXX=m
-CONFIG_FB_VIRTUAL=m
+# CONFIG_FB_GEODE is not set
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
 
 #
 # Console display driver support
 #
 CONFIG_VGA_CONSOLE=y
 CONFIG_DUMMY_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE=m
+CONFIG_FRAMEBUFFER_CONSOLE=y
 # CONFIG_FONTS is not set
 CONFIG_FONT_8x8=y
 CONFIG_FONT_8x16=y
@@ -1901,8 +1771,15 @@
 #
 # Logo configuration
 #
-# CONFIG_LOGO is not set
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_LOGO_LINUX_CLUT224=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=m
+CONFIG_BACKLIGHT_DEVICE=y
+CONFIG_LCD_CLASS_DEVICE=m
+CONFIG_LCD_DEVICE=y
 
 #
 # Sound
@@ -1936,7 +1813,7 @@
 CONFIG_SND_DUMMY=m
 CONFIG_SND_VIRMIDI=m
 CONFIG_SND_MTPAV=m
-CONFIG_SND_SERIAL_U16550=m
+# CONFIG_SND_SERIAL_U16550 is not set
 CONFIG_SND_MPU401=m
 
 #
@@ -1956,8 +1833,8 @@
 CONFIG_SND_CS46XX_NEW_DSP=y
 CONFIG_SND_CS4281=m
 CONFIG_SND_EMU10K1=m
-# CONFIG_SND_EMU10K1X is not set
-# CONFIG_SND_CA0106 is not set
+CONFIG_SND_EMU10K1X=m
+CONFIG_SND_CA0106=m
 CONFIG_SND_KORG1212=m
 CONFIG_SND_MIXART=m
 CONFIG_SND_NM256=m
@@ -1982,7 +1859,7 @@
 CONFIG_SND_INTEL8X0M=m
 CONFIG_SND_SONICVIBES=m
 CONFIG_SND_VIA82XX=m
-# CONFIG_SND_VIA82XX_MODEM is not set
+CONFIG_SND_VIA82XX_MODEM=m
 CONFIG_SND_VX222=m
 CONFIG_SND_HDA_INTEL=m
 
@@ -1995,75 +1872,7 @@
 #
 # Open Sound System
 #
-CONFIG_SOUND_PRIME=m
-CONFIG_SOUND_BT878=m
-CONFIG_SOUND_CMPCI=m
-# CONFIG_SOUND_CMPCI_FM is not set
-# CONFIG_SOUND_CMPCI_MIDI is not set
-CONFIG_SOUND_CMPCI_JOYSTICK=y
-CONFIG_SOUND_EMU10K1=m
-CONFIG_MIDI_EMU10K1=y
-CONFIG_SOUND_FUSION=m
-CONFIG_SOUND_CS4281=m
-CONFIG_SOUND_ES1370=m
-CONFIG_SOUND_ES1371=m
-CONFIG_SOUND_ESSSOLO1=m
-CONFIG_SOUND_MAESTRO=m
-CONFIG_SOUND_MAESTRO3=m
-CONFIG_SOUND_ICH=m
-CONFIG_SOUND_SONICVIBES=m
-CONFIG_SOUND_TRIDENT=m
-# CONFIG_SOUND_MSNDCLAS is not set
-# CONFIG_SOUND_MSNDPIN is not set
-CONFIG_SOUND_VIA82CXXX=m
-CONFIG_MIDI_VIA82CXXX=y
-CONFIG_SOUND_OSS=m
-# CONFIG_SOUND_TRACEINIT is not set
-# CONFIG_SOUND_DMAP is not set
-# CONFIG_SOUND_AD1816 is not set
-CONFIG_SOUND_AD1889=m
-CONFIG_SOUND_SGALAXY=m
-CONFIG_SOUND_ADLIB=m
-CONFIG_SOUND_ACI_MIXER=m
-CONFIG_SOUND_CS4232=m
-CONFIG_SOUND_SSCAPE=m
-CONFIG_SOUND_GUS=m
-CONFIG_SOUND_GUS16=y
-CONFIG_SOUND_GUSMAX=y
-CONFIG_SOUND_VMIDI=m
-CONFIG_SOUND_TRIX=m
-CONFIG_SOUND_MSS=m
-CONFIG_SOUND_MPU401=m
-CONFIG_SOUND_NM256=m
-CONFIG_SOUND_MAD16=m
-CONFIG_MAD16_OLDCARD=y
-CONFIG_SOUND_PAS=m
-CONFIG_SOUND_PSS=m
-CONFIG_PSS_MIXER=y
-CONFIG_SOUND_SB=m
-# CONFIG_SOUND_AWE32_SYNTH is not set
-CONFIG_SOUND_WAVEFRONT=m
-CONFIG_SOUND_MAUI=m
-CONFIG_SOUND_YM3812=m
-CONFIG_SOUND_OPL3SA1=m
-CONFIG_SOUND_OPL3SA2=m
-CONFIG_SOUND_YMFPCI=m
-# CONFIG_SOUND_YMFPCI_LEGACY is not set
-CONFIG_SOUND_UART6850=m
-CONFIG_SOUND_AEDSP16=m
-CONFIG_SC6600=y
-CONFIG_SC6600_JOY=y
-CONFIG_SC6600_CDROM=4
-CONFIG_SC6600_CDROMBASE=0x0
-# CONFIG_AEDSP16_MSS is not set
-# CONFIG_AEDSP16_SBPRO is not set
-# CONFIG_AEDSP16_MPU401 is not set
-CONFIG_SOUND_TVMIXER=m
-CONFIG_SOUND_KAHLUA=m
-CONFIG_SOUND_ALI5455=m
-CONFIG_SOUND_FORTE=m
-CONFIG_SOUND_RME96XX=m
-CONFIG_SOUND_AD1980=m
+# CONFIG_SOUND_PRIME is not set
 
 #
 # USB support
@@ -2077,14 +1886,14 @@
 # Miscellaneous USB options
 #
 CONFIG_USB_DEVICEFS=y
-CONFIG_USB_BANDWIDTH=y
+# CONFIG_USB_BANDWIDTH is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_OTG is not set
 
 #
 # USB Host Controller Drivers
 #
-CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD=m
 CONFIG_USB_EHCI_SPLIT_ISO=y
 CONFIG_USB_EHCI_ROOT_HUB_TT=y
 CONFIG_USB_OHCI_HCD=m
@@ -2096,7 +1905,7 @@
 #
 # USB Device Class drivers
 #
-CONFIG_USB_AUDIO=m
+# CONFIG_USB_AUDIO is not set
 
 #
 # USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
@@ -2122,16 +1931,13 @@
 #
 # USB Input Devices
 #
-CONFIG_USB_HID=m
+CONFIG_USB_HID=y
 CONFIG_USB_HIDINPUT=y
-# CONFIG_HID_FF is not set
+CONFIG_HID_FF=y
+CONFIG_HID_PID=y
+CONFIG_LOGITECH_FF=y
+CONFIG_THRUSTMASTER_FF=y
 CONFIG_USB_HIDDEV=y
-
-#
-# USB HID Boot Protocol drivers
-#
-CONFIG_USB_KBD=y
-CONFIG_USB_MOUSE=y
 CONFIG_USB_AIPTEK=m
 CONFIG_USB_WACOM=m
 CONFIG_USB_KBTAB=m
@@ -2150,7 +1956,7 @@
 #
 # USB Multimedia devices
 #
-# CONFIG_USB_DABUSB is not set
+CONFIG_USB_DABUSB=m
 CONFIG_USB_VICAM=m
 CONFIG_USB_DSBR=m
 CONFIG_USB_IBMCAM=m
@@ -2220,30 +2026,30 @@
 CONFIG_USB_SERIAL_IR=m
 CONFIG_USB_SERIAL_EDGEPORT=m
 CONFIG_USB_SERIAL_EDGEPORT_TI=m
-# CONFIG_USB_SERIAL_GARMIN is not set
+CONFIG_USB_SERIAL_GARMIN=m
 CONFIG_USB_SERIAL_IPW=m
 CONFIG_USB_SERIAL_KEYSPAN_PDA=m
 CONFIG_USB_SERIAL_KEYSPAN=m
-# CONFIG_USB_SERIAL_KEYSPAN_MPR is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA28X is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA28XA is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA28XB is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA19W is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA19QW is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA19QI is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA49W is not set
-# CONFIG_USB_SERIAL_KEYSPAN_USA49WLC is not set
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19=y
+CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
 CONFIG_USB_SERIAL_KLSI=m
 CONFIG_USB_SERIAL_KOBIL_SCT=m
 CONFIG_USB_SERIAL_MCT_U232=m
 CONFIG_USB_SERIAL_PL2303=m
 CONFIG_USB_SERIAL_HP4X=m
 CONFIG_USB_SERIAL_SAFE=m
-# CONFIG_USB_SERIAL_SAFE_PADDED is not set
-# CONFIG_USB_SERIAL_TI is not set
+CONFIG_USB_SERIAL_SAFE_PADDED=y
+CONFIG_USB_SERIAL_TI=m
 CONFIG_USB_SERIAL_CYBERJACK=m
 CONFIG_USB_SERIAL_XIRCOM=m
 CONFIG_USB_SERIAL_OMNINET=m
@@ -2252,17 +2058,17 @@
 #
 # USB Miscellaneous drivers
 #
-# CONFIG_USB_EMI62 is not set
+CONFIG_USB_EMI62=m
 # CONFIG_USB_EMI26 is not set
 CONFIG_USB_AUERSWALD=m
 CONFIG_USB_RIO500=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
 CONFIG_USB_LED=m
-CONFIG_USB_CYTHERM=m
+# CONFIG_USB_CYTHERM is not set
 CONFIG_USB_PHIDGETKIT=m
 CONFIG_USB_PHIDGETSERVO=m
-# CONFIG_USB_IDMOUSE is not set
+CONFIG_USB_IDMOUSE=m
 CONFIG_USB_SISUSBVGA=m
 CONFIG_USB_TEST=m
 
@@ -2275,33 +2081,24 @@
 #
 # USB Gadget Support
 #
-CONFIG_USB_GADGET=m
-# CONFIG_USB_GADGET_DEBUG_FILES is not set
-CONFIG_USB_GADGET_NET2280=y
-CONFIG_USB_NET2280=m
-# CONFIG_USB_GADGET_PXA2XX is not set
-# CONFIG_USB_GADGET_GOKU is not set
-# CONFIG_USB_GADGET_LH7A40X is not set
-# CONFIG_USB_GADGET_OMAP is not set
-# CONFIG_USB_GADGET_DUMMY_HCD is not set
-CONFIG_USB_GADGET_DUALSPEED=y
-CONFIG_USB_ZERO=m
-CONFIG_USB_ETH=m
-CONFIG_USB_ETH_RNDIS=y
-CONFIG_USB_GADGETFS=m
-CONFIG_USB_FILE_STORAGE=m
-# CONFIG_USB_FILE_STORAGE_TEST is not set
-CONFIG_USB_G_SERIAL=m
+# CONFIG_USB_GADGET is not set
 
 #
 # MMC/SD Card support
 #
-# CONFIG_MMC is not set
+CONFIG_MMC=m
+# CONFIG_MMC_DEBUG is not set
+CONFIG_MMC_BLOCK=m
+CONFIG_MMC_WBSD=m
 
 #
 # InfiniBand support
 #
-# CONFIG_INFINIBAND is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
 
 #
 # Power management options
@@ -2335,7 +2132,7 @@
 #
 # File systems
 #
-CONFIG_EXT2_FS=m
+CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
@@ -2345,16 +2142,18 @@
 CONFIG_EXT3_FS_SECURITY=y
 CONFIG_JBD=m
 # CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=m
+CONFIG_FS_MBCACHE=y
 CONFIG_REISERFS_FS=m
 # CONFIG_REISERFS_CHECK is not set
-# CONFIG_REISERFS_PROC_INFO is not set
-# CONFIG_REISERFS_FS_XATTR is not set
+CONFIG_REISERFS_PROC_INFO=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_JFS_FS=m
 CONFIG_JFS_POSIX_ACL=y
-# CONFIG_JFS_SECURITY is not set
+CONFIG_JFS_SECURITY=y
 # CONFIG_JFS_DEBUG is not set
-CONFIG_JFS_STATISTICS=y
+# CONFIG_JFS_STATISTICS is not set
 CONFIG_FS_POSIX_ACL=y
 
 #
@@ -2362,15 +2161,15 @@
 #
 CONFIG_XFS_FS=m
 CONFIG_XFS_EXPORT=y
-CONFIG_XFS_RT=y
+# CONFIG_XFS_RT is not set
 CONFIG_XFS_QUOTA=y
 CONFIG_XFS_SECURITY=y
 CONFIG_XFS_POSIX_ACL=y
 CONFIG_MINIX_FS=m
 CONFIG_ROMFS_FS=m
 CONFIG_QUOTA=y
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
 CONFIG_QUOTACTL=y
 CONFIG_DNOTIFY=y
 CONFIG_AUTOFS_FS=m
@@ -2379,10 +2178,10 @@
 #
 # CD-ROM/DVD Filesystems
 #
-CONFIG_ISO9660_FS=m
+CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
-CONFIG_ZISOFS_FS=m
+CONFIG_ZISOFS_FS=y
 CONFIG_UDF_FS=m
 CONFIG_UDF_NLS=y
 
@@ -2393,10 +2192,8 @@
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-# CONFIG_NTFS_RW is not set
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+# CONFIG_NTFS_FS is not set
 
 #
 # Pseudo filesystems
@@ -2404,9 +2201,7 @@
 CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
 CONFIG_SYSFS=y
-CONFIG_DEVFS_FS=y
-# CONFIG_DEVFS_MOUNT is not set
-# CONFIG_DEVFS_DEBUG is not set
+# CONFIG_DEVFS_FS is not set
 CONFIG_DEVPTS_FS_XATTR=y
 CONFIG_DEVPTS_FS_SECURITY=y
 CONFIG_TMPFS=y
@@ -2419,8 +2214,7 @@
 #
 # Miscellaneous filesystems
 #
-CONFIG_ADFS_FS=m
-# CONFIG_ADFS_FS_RW is not set
+# CONFIG_ADFS_FS is not set
 CONFIG_AFFS_FS=m
 CONFIG_HFS_FS=m
 CONFIG_HFSPLUS_FS=m
@@ -2428,20 +2222,18 @@
 # CONFIG_BEFS_DEBUG is not set
 CONFIG_BFS_FS=m
 CONFIG_EFS_FS=m
-CONFIG_JFFS_FS=m
-CONFIG_JFFS_FS_VERBOSE=0
-CONFIG_JFFS_PROC_FS=y
+# CONFIG_JFFS_FS is not set
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
-# CONFIG_JFFS2_FS_NAND is not set
+CONFIG_JFFS2_FS_NAND=y
 # CONFIG_JFFS2_FS_NOR_ECC is not set
 # CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
 CONFIG_JFFS2_ZLIB=y
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
-CONFIG_CRAMFS=y
+CONFIG_CRAMFS=m
 CONFIG_VXFS_FS=m
-CONFIG_HPFS_FS=m
+# CONFIG_HPFS_FS is not set
 CONFIG_QNX4FS_FS=m
 # CONFIG_QNX4FS_RW is not set
 CONFIG_SYSV_FS=m
@@ -2470,7 +2262,8 @@
 # CONFIG_SMB_NLS_DEFAULT is not set
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS is not set
-# CONFIG_CIFS_XATTR is not set
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
 # CONFIG_CIFS_EXPERIMENTAL is not set
 CONFIG_NCP_FS=m
 CONFIG_NCPFS_PACKET_SIGNING=y
@@ -2478,38 +2271,29 @@
 CONFIG_NCPFS_STRONG=y
 CONFIG_NCPFS_NFS_NS=y
 CONFIG_NCPFS_OS2_NS=y
-# CONFIG_NCPFS_SMALLDOS is not set
+CONFIG_NCPFS_SMALLDOS=y
 CONFIG_NCPFS_NLS=y
 CONFIG_NCPFS_EXTRAS=y
-CONFIG_CODA_FS=m
-# CONFIG_CODA_FS_OLD_API is not set
-CONFIG_AFS_FS=m
-CONFIG_RXRPC=m
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
 
 #
 # Partition Types
 #
 CONFIG_PARTITION_ADVANCED=y
-CONFIG_ACORN_PARTITION=y
-CONFIG_ACORN_PARTITION_CUMANA=y
-# CONFIG_ACORN_PARTITION_EESOX is not set
-CONFIG_ACORN_PARTITION_ICS=y
-# CONFIG_ACORN_PARTITION_ADFS is not set
-# CONFIG_ACORN_PARTITION_POWERTEC is not set
-CONFIG_ACORN_PARTITION_RISCIX=y
+# CONFIG_ACORN_PARTITION is not set
 CONFIG_OSF_PARTITION=y
 CONFIG_AMIGA_PARTITION=y
-CONFIG_ATARI_PARTITION=y
+# CONFIG_ATARI_PARTITION is not set
 CONFIG_MAC_PARTITION=y
 CONFIG_MSDOS_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
 CONFIG_MINIX_SUBPARTITION=y
 CONFIG_SOLARIS_X86_PARTITION=y
 CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_LDM_PARTITION=y
-# CONFIG_LDM_DEBUG is not set
+# CONFIG_LDM_PARTITION is not set
 CONFIG_SGI_PARTITION=y
-CONFIG_ULTRIX_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
 CONFIG_SUN_PARTITION=y
 CONFIG_EFI_PARTITION=y
 
@@ -2517,8 +2301,8 @@
 # Native Language Support
 #
 CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="cp437"
-CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_737=m
 CONFIG_NLS_CODEPAGE_775=m
 CONFIG_NLS_CODEPAGE_850=m
@@ -2541,7 +2325,7 @@
 CONFIG_NLS_ISO8859_8=m
 CONFIG_NLS_CODEPAGE_1250=m
 CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
+CONFIG_NLS_ASCII=y
 CONFIG_NLS_ISO8859_1=m
 CONFIG_NLS_ISO8859_2=m
 CONFIG_NLS_ISO8859_3=m
@@ -2561,15 +2345,15 @@
 # Security options
 #
 CONFIG_KEYS=y
-# CONFIG_KEYS_DEBUG_PROC_KEYS is not set
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
 CONFIG_SECURITY=y
-# CONFIG_SECURITY_NETWORK is not set
-CONFIG_SECURITY_CAPABILITIES=m
-CONFIG_SECURITY_ROOTPLUG=m
-CONFIG_SECURITY_SECLVL=m
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_CAPABILITIES=y
+# CONFIG_SECURITY_ROOTPLUG is not set
+# CONFIG_SECURITY_SECLVL is not set
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
 CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_SECURITY_SELINUX_DEVELOP=y
 CONFIG_SECURITY_SELINUX_AVC_STATS=y
@@ -2582,8 +2366,8 @@
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_MD5=m
+CONFIG_CRYPTO_SHA1=y
 CONFIG_CRYPTO_SHA256=m
 CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_WP512=m
@@ -2602,7 +2386,7 @@
 CONFIG_CRYPTO_DEFLATE=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_CRC32C=m
-CONFIG_CRYPTO_TEST=m
+# CONFIG_CRYPTO_TEST is not set
 
 #
 # Hardware crypto devices
@@ -2616,8 +2400,6 @@
 CONFIG_LIBCRC32C=m
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_REED_SOLOMON=m
-CONFIG_REED_SOLOMON_DEC16=y
 
 #
 # Kernel hacking
@@ -2625,7 +2407,7 @@
 # CONFIG_PRINTK_TIME is not set
 CONFIG_DEBUG_KERNEL=y
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_LOG_BUF_SHIFT=14
+CONFIG_LOG_BUF_SHIFT=15
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_DEBUG_SLAB is not set
 # CONFIG_DEBUG_SPINLOCK is not set
@@ -2640,5 +2422,4 @@
 # CONFIG_4KSTACKS is not set
 CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_MPPARSE=y
-# CONFIG_CHECKING is not set
 # CONFIG_INIT_DEBUG is not set
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.6-xen-sparse/arch/xen/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig        Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig        Thu Aug 25 22:53:20 2005
@@ -533,6 +533,11 @@
          with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
          /dev/cpu/31/cpuid.
 
+config SWIOTLB
+       bool
+       depends on PCI
+       default y
+
 source "drivers/firmware/Kconfig"
 
 choice
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Thu Aug 25 
22:53:20 2005
@@ -14,8 +14,7 @@
 
 c-obj-y        := semaphore.o vm86.o \
                ptrace.o sys_i386.o \
-               i387.o dmi_scan.o bootflag.o \
-               doublefault.o
+               i387.o dmi_scan.o bootflag.o
 s-obj-y        :=
 
 obj-y                          += cpu/
@@ -44,6 +43,7 @@
 c-obj-$(CONFIG_EFI)            += efi.o efi_stub.o
 c-obj-$(CONFIG_EARLY_PRINTK)   += early_printk.o
 c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o
+obj-$(CONFIG_SWIOTLB)          += swiotlb.o
 
 EXTRA_AFLAGS   := -traditional
 
@@ -84,7 +84,7 @@
                        $(obj)/vsyscall-sysenter.o FORCE
        $(call if_changed,syscall)
 
-c-link := init_task.o
+c-link :=
 s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o 
vsyscall.lds.o syscall_table.o
 
 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst 
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
@@ -92,6 +92,7 @@
 
 $(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
 
+EXTRA_AFLAGS   += -I$(obj)
 $(obj)/entry.o: $(src)/entry.S $(src)/syscall_table.S
 
 obj-y  += $(c-obj-y) $(s-obj-y)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c    Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c    Thu Aug 25 
22:53:20 2005
@@ -19,11 +19,13 @@
 
 #include "cpu.h"
 
+#ifndef CONFIG_XEN
 DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
 EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
 
 DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+#endif
 
 static int cachesize_override __initdata = -1;
 static int disable_x86_fxsr __initdata = 0;
@@ -569,7 +571,7 @@
        for (va = gdt_descr->address, f = 0;
             va < gdt_descr->address + gdt_descr->size;
             va += PAGE_SIZE, f++) {
-               frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+               frames[f] = virt_to_mfn(va);
                make_page_readonly((void *)va);
        }
        if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S  Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S  Thu Aug 25 22:53:20 2005
@@ -136,9 +136,6 @@
 ENTRY(empty_zero_page)
 
 .org 0x2000
-ENTRY(swapper_pg_dir)
-
-.org 0x3000
 ENTRY(cpu_gdt_table)
        .quad 0x0000000000000000        /* NULL descriptor */
        .quad 0x0000000000000000        /* 0x0b reserved */
@@ -190,10 +187,10 @@
        .quad 0x0000000000000000        /* 0xf8 - GDT entry 31: double-fault 
TSS */
        .fill GDT_ENTRIES-32,8,0
 
-.org 0x4000
+.org 0x3000
 ENTRY(default_ldt)
 
-.org 0x5000
+.org 0x4000
 /*
  * Real beginning of normal "text" segment
  */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Thu Aug 25 
22:53:20 2005
@@ -115,9 +115,6 @@
 EXPORT_SYMBOL(__copy_to_user_ll);
 EXPORT_SYMBOL(strnlen_user);
 
-EXPORT_SYMBOL(dma_alloc_coherent);
-EXPORT_SYMBOL(dma_free_coherent);
-
 #ifdef CONFIG_PCI
 EXPORT_SYMBOL(pci_mem_start);
 #endif
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c        Thu Aug 25 
22:53:20 2005
@@ -80,7 +80,7 @@
                t->io_bitmap_ptr = bitmap;
 
                op.cmd = PHYSDEVOP_SET_IOBITMAP;
-               op.u.set_iobitmap.bitmap   = (unsigned long)bitmap;
+               op.u.set_iobitmap.bitmap   = (char *)bitmap;
                op.u.set_iobitmap.nr_ports = IO_BITMAP_BITS;
                HYPERVISOR_physdev_op(&op);
        }
@@ -113,16 +113,12 @@
        if ((new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO))
                return -EPERM;
 
-       /* Maintain OS privileges even if user attempts to relinquish them. */
-       if (new_io_pl == 0)
-               new_io_pl = 1;
-
        /* Change our version of the privilege levels. */
        current->thread.io_pl = new_io_pl;
 
        /* Force the change at ring 0. */
        op.cmd             = PHYSDEVOP_SET_IOPL;
-       op.u.set_iopl.iopl = new_io_pl;
+       op.u.set_iopl.iopl = (new_io_pl == 0) ? 1 : new_io_pl;
        HYPERVISOR_physdev_op(&op);
 
        return 0;
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c   Thu Aug 25 22:53:20 2005
@@ -198,7 +198,7 @@
 {
        struct mm_struct * mm = current->mm;
        __u32 entry_1, entry_2, *lp;
-       unsigned long mach_lp;
+       maddr_t mach_lp;
        int error;
        struct user_desc ldt_info;
 
@@ -245,7 +245,8 @@
 
        /* Install the new entry ...  */
 install:
-       error = HYPERVISOR_update_descriptor(mach_lp, entry_1, entry_2);
+       error = HYPERVISOR_update_descriptor(
+               mach_lp, (u64)entry_1 | ((u64)entry_2<<32));
 
 out_unlock:
        up(&mm->context.sem);
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c       Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c       Thu Aug 25 
22:53:20 2005
@@ -784,7 +784,9 @@
 
 void __init find_smp_config (void)
 {
+#ifndef CONFIG_XEN
        unsigned int address;
+#endif
 
        /*
         * FIXME: Linux assumes you have 640K of base ram..
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Thu Aug 25 
22:53:20 2005
@@ -23,6 +23,104 @@
        int             flags;
        unsigned long   *bitmap;
 };
+
+#define IOMMU_BUG_ON(test)                             \
+do {                                                   \
+       if (unlikely(test)) {                           \
+               printk(KERN_ALERT "Fatal DMA error! "   \
+                      "Please use 'swiotlb=force'\n"); \
+               BUG();                                  \
+       }                                               \
+} while (0)
+
+int
+dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+          enum dma_data_direction direction)
+{
+       int i, rc;
+
+       BUG_ON(direction == DMA_NONE);
+
+       if (swiotlb) {
+               rc = swiotlb_map_sg(hwdev, sg, nents, direction);
+       } else {
+               for (i = 0; i < nents; i++ ) {
+                       sg[i].dma_address =
+                               page_to_phys(sg[i].page) + sg[i].offset;
+                       sg[i].dma_length  = sg[i].length;
+                       BUG_ON(!sg[i].page);
+                       IOMMU_BUG_ON(address_needs_mapping(
+                               hwdev, sg[i].dma_address));
+               }
+               rc = nents;
+       }
+
+       flush_write_buffers();
+       return rc;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void
+dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+            enum dma_data_direction direction)
+{
+       BUG_ON(direction == DMA_NONE);
+       if (swiotlb)
+               swiotlb_unmap_sg(hwdev, sg, nents, direction);
+}
+EXPORT_SYMBOL(dma_unmap_sg);
+
+dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+            size_t size, enum dma_data_direction direction)
+{
+       dma_addr_t dma_addr;
+
+       BUG_ON(direction == DMA_NONE);
+
+       if (swiotlb) {
+               dma_addr = swiotlb_map_page(
+                       dev, page, offset, size, direction);
+       } else {
+               dma_addr = page_to_phys(page) + offset;
+               IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
+       }
+
+       return dma_addr;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+              enum dma_data_direction direction)
+{
+       BUG_ON(direction == DMA_NONE);
+       if (swiotlb)
+               swiotlb_unmap_page(dev, dma_address, size, direction);
+}
+EXPORT_SYMBOL(dma_unmap_page);
+
+int
+dma_mapping_error(dma_addr_t dma_addr)
+{
+       if (swiotlb)
+               return swiotlb_dma_mapping_error(dma_addr);
+       return 0;
+}
+EXPORT_SYMBOL(dma_mapping_error);
+
+int
+dma_supported(struct device *dev, u64 mask)
+{
+       if (swiotlb)
+               return swiotlb_dma_supported(dev, mask);
+       /*
+         * By default we'll BUG when an infeasible DMA is requested, and
+         * request swiotlb=force (see IOMMU_BUG_ON).
+         */
+       return 1;
+}
+EXPORT_SYMBOL(dma_supported);
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
                           dma_addr_t *dma_handle, unsigned int __nocast gfp)
@@ -54,13 +152,14 @@
        ret = (void *)vstart;
 
        if (ret != NULL) {
-               xen_contig_memory(vstart, order);
+               xen_create_contiguous_region(vstart, order);
 
                memset(ret, 0, size);
                *dma_handle = virt_to_bus(ret);
        }
        return ret;
 }
+EXPORT_SYMBOL(dma_alloc_coherent);
 
 void dma_free_coherent(struct device *dev, size_t size,
                         void *vaddr, dma_addr_t dma_handle)
@@ -72,9 +171,12 @@
                int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
 
                bitmap_release_region(mem->bitmap, page, order);
-       } else
+       } else {
+               xen_destroy_contiguous_region((unsigned long)vaddr, order);
                free_pages((unsigned long)vaddr, order);
-}
+       }
+}
+EXPORT_SYMBOL(dma_free_coherent);
 
 int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
                                dma_addr_t device_addr, size_t size, int flags)
@@ -153,46 +255,20 @@
 }
 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
 
-static LIST_HEAD(dma_map_head);
-static DEFINE_SPINLOCK(dma_map_lock);
-struct dma_map_entry {
-       struct list_head list;
-       dma_addr_t dma;
-       char *bounce, *host;
-       size_t size;
-};
-#define DMA_MAP_MATCHES(e,d) (((e)->dma<=(d)) && (((e)->dma+(e)->size)>(d)))
-
 dma_addr_t
 dma_map_single(struct device *dev, void *ptr, size_t size,
               enum dma_data_direction direction)
 {
-       struct dma_map_entry *ent;
-       void *bnc;
        dma_addr_t dma;
-       unsigned long flags;
-
-       BUG_ON(direction == DMA_NONE);
-
-       /*
-        * Even if size is sub-page, the buffer may still straddle a page
-        * boundary. Take into account buffer start offset. All other calls are
-        * conservative and always search the dma_map list if it's non-empty.
-        */
-       if ((((unsigned int)ptr & ~PAGE_MASK) + size) <= PAGE_SIZE) {
+
+       BUG_ON(direction == DMA_NONE);
+
+       if (swiotlb) {
+               dma = swiotlb_map_single(dev, ptr, size, direction);
+       } else {
                dma = virt_to_bus(ptr);
-       } else {
-               BUG_ON((bnc = dma_alloc_coherent(dev, size, &dma, 0)) == NULL);
-               BUG_ON((ent = kmalloc(sizeof(*ent), GFP_KERNEL)) == NULL);
-               if (direction != DMA_FROM_DEVICE)
-                       memcpy(bnc, ptr, size);
-               ent->dma    = dma;
-               ent->bounce = bnc;
-               ent->host   = ptr;
-               ent->size   = size;
-               spin_lock_irqsave(&dma_map_lock, flags);
-               list_add(&ent->list, &dma_map_head);
-               spin_unlock_irqrestore(&dma_map_lock, flags);
+               IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
+               IOMMU_BUG_ON(address_needs_mapping(dev, dma));
        }
 
        flush_write_buffers();
@@ -204,30 +280,9 @@
 dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
                 enum dma_data_direction direction)
 {
-       struct dma_map_entry *ent;
-       unsigned long flags;
-
-       BUG_ON(direction == DMA_NONE);
-
-       /* Fast-path check: are there any multi-page DMA mappings? */
-       if (!list_empty(&dma_map_head)) {
-               spin_lock_irqsave(&dma_map_lock, flags);
-               list_for_each_entry ( ent, &dma_map_head, list ) {
-                       if (DMA_MAP_MATCHES(ent, dma_addr)) {
-                               list_del(&ent->list);
-                               break;
-                       }
-               }
-               spin_unlock_irqrestore(&dma_map_lock, flags);
-               if (&ent->list != &dma_map_head) {
-                       BUG_ON(dma_addr != ent->dma);
-                       BUG_ON(size != ent->size);
-                       if (direction != DMA_TO_DEVICE)
-                               memcpy(ent->host, ent->bounce, size);
-                       dma_free_coherent(dev, size, ent->bounce, ent->dma);
-                       kfree(ent);
-               }
-       }
+       BUG_ON(direction == DMA_NONE);
+       if (swiotlb)
+               swiotlb_unmap_single(dev, dma_addr, size, direction);
 }
 EXPORT_SYMBOL(dma_unmap_single);
 
@@ -235,23 +290,8 @@
 dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
                        enum dma_data_direction direction)
 {
-       struct dma_map_entry *ent;
-       unsigned long flags, off;
-
-       /* Fast-path check: are there any multi-page DMA mappings? */
-       if (!list_empty(&dma_map_head)) {
-               spin_lock_irqsave(&dma_map_lock, flags);
-               list_for_each_entry ( ent, &dma_map_head, list )
-                       if (DMA_MAP_MATCHES(ent, dma_handle))
-                               break;
-               spin_unlock_irqrestore(&dma_map_lock, flags);
-               if (&ent->list != &dma_map_head) {
-                       off = dma_handle - ent->dma;
-                       BUG_ON((off + size) > ent->size);
-                       /*if (direction != DMA_TO_DEVICE)*/
-                               memcpy(ent->host+off, ent->bounce+off, size);
-               }
-       }
+       if (swiotlb)
+               swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction);
 }
 EXPORT_SYMBOL(dma_sync_single_for_cpu);
 
@@ -259,24 +299,17 @@
 dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t 
size,
                            enum dma_data_direction direction)
 {
-       struct dma_map_entry *ent;
-       unsigned long flags, off;
-
-       /* Fast-path check: are there any multi-page DMA mappings? */
-       if (!list_empty(&dma_map_head)) {
-               spin_lock_irqsave(&dma_map_lock, flags);
-               list_for_each_entry ( ent, &dma_map_head, list )
-                       if (DMA_MAP_MATCHES(ent, dma_handle))
-                               break;
-               spin_unlock_irqrestore(&dma_map_lock, flags);
-               if (&ent->list != &dma_map_head) {
-                       off = dma_handle - ent->dma;
-                       BUG_ON((off + size) > ent->size);
-                       /*if (direction != DMA_FROM_DEVICE)*/
-                               memcpy(ent->bounce+off, ent->host+off, size);
-               }
-       }
-
-       flush_write_buffers();
+       if (swiotlb)
+               swiotlb_sync_single_for_device(dev, dma_handle, size, 
direction);
 }
 EXPORT_SYMBOL(dma_sync_single_for_device);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Thu Aug 25 
22:53:20 2005
@@ -115,20 +115,12 @@
 /* We don't actually take CPU down, just spin without interrupts. */
 static inline void play_dead(void)
 {
-       /* Ack it */
-       __get_cpu_var(cpu_state) = CPU_DEAD;
-
-       /* We shouldn't have to disable interrupts while dead, but
-        * some interrupts just don't seem to go away, and this makes
-        * it "work" for testing purposes. */
        /* Death loop */
        while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
                HYPERVISOR_yield();
 
-       local_irq_disable();
        __flush_tlb_all();
        cpu_set(smp_processor_id(), cpu_online_map);
-       local_irq_enable();
 }
 #else
 static inline void play_dead(void)
@@ -156,12 +148,19 @@
                        rmb();
 
                        if (cpu_is_offline(cpu)) {
+                               local_irq_disable();
 #if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
+                               /* Ack it.  From this point on until
+                                  we get woken up, we're not allowed
+                                  to take any locks.  In particular,
+                                  don't printk. */
+                               __get_cpu_var(cpu_state) = CPU_DEAD;
                                /* Tell hypervisor to take vcpu down. */
                                HYPERVISOR_vcpu_down(cpu);
 #endif
                                play_dead();
-         }
+                               local_irq_enable();
+                       }
 
                        __get_cpu_var(irq_stat).idle_timestamp = jiffies;
                        xen_idle();
@@ -523,23 +522,22 @@
         * Load the per-thread Thread-Local Storage descriptor.
         * This is load_TLS(next, cpu) with multicalls.
         */
-#define C(i) do {                                                       \
-       if (unlikely(next->tls_array[i].a != prev->tls_array[i].a ||    \
-                    next->tls_array[i].b != prev->tls_array[i].b)) {   \
-               mcl->op      = __HYPERVISOR_update_descriptor;          \
-               mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu)  \
-                                        [GDT_ENTRY_TLS_MIN + i]);      \
-               mcl->args[1] = ((u32 *)&next->tls_array[i])[0];         \
-               mcl->args[2] = ((u32 *)&next->tls_array[i])[1];         \
-               mcl++;                                                  \
-       }                                                               \
+#define C(i) do {                                                      \
+       if (unlikely(next->tls_array[i].a != prev->tls_array[i].a ||    \
+                    next->tls_array[i].b != prev->tls_array[i].b)) {   \
+               mcl->op = __HYPERVISOR_update_descriptor;               \
+               *(u64 *)&mcl->args[0] = virt_to_machine(                \
+                       &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
+               *(u64 *)&mcl->args[2] = *(u64 *)&next->tls_array[i];    \
+               mcl++;                                                  \
+       }                                                               \
 } while (0)
        C(0); C(1); C(2);
 #undef C
 
        if (unlikely(prev->io_pl != next->io_pl)) {
                iopl_op.cmd             = PHYSDEVOP_SET_IOPL;
-               iopl_op.u.set_iopl.iopl = next->io_pl;
+               iopl_op.u.set_iopl.iopl = (next->io_pl == 0) ? 1 : next->io_pl;
                mcl->op      = __HYPERVISOR_physdev_op;
                mcl->args[0] = (unsigned long)&iopl_op;
                mcl++;
@@ -549,7 +547,7 @@
                iobmp_op.cmd                     =
                        PHYSDEVOP_SET_IOBITMAP;
                iobmp_op.u.set_iobitmap.bitmap   =
-                       (unsigned long)next->io_bitmap_ptr;
+                       (char *)next->io_bitmap_ptr;
                iobmp_op.u.set_iobitmap.nr_ports =
                        next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
                mcl->op      = __HYPERVISOR_physdev_op;
@@ -791,3 +789,10 @@
                sp -= get_random_int() % 8192;
        return sp & ~0xf;
 }
+
+
+#ifndef CONFIG_X86_SMP
+void _restore_vcpu(void)
+{
+}
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Thu Aug 25 22:53:20 2005
@@ -1575,19 +1575,20 @@
        /* Make sure we have a correctly sized P->M table. */
        if (max_pfn != xen_start_info.nr_pages) {
                phys_to_machine_mapping = alloc_bootmem_low_pages(
-                       max_pfn * sizeof(unsigned long));
+                       max_pfn * sizeof(unsigned int));
 
                if (max_pfn > xen_start_info.nr_pages) {
                        /* set to INVALID_P2M_ENTRY */
                        memset(phys_to_machine_mapping, ~0,
-                               max_pfn * sizeof(unsigned long));
+                               max_pfn * sizeof(unsigned int));
                        memcpy(phys_to_machine_mapping,
-                               (unsigned long *)xen_start_info.mfn_list,
-                               xen_start_info.nr_pages * sizeof(unsigned 
long));
+                               (unsigned int *)xen_start_info.mfn_list,
+                               xen_start_info.nr_pages * sizeof(unsigned int));
                } else {
                        memcpy(phys_to_machine_mapping,
-                               (unsigned long *)xen_start_info.mfn_list,
-                               max_pfn * sizeof(unsigned long));
+                               (unsigned int *)xen_start_info.mfn_list,
+                               max_pfn * sizeof(unsigned int));
+                       /* N.B. below relies on sizeof(int) == sizeof(long). */
                        if (HYPERVISOR_dom_mem_op(
                                MEMOP_decrease_reservation,
                                (unsigned long *)xen_start_info.mfn_list + 
max_pfn,
@@ -1597,18 +1598,17 @@
                free_bootmem(
                        __pa(xen_start_info.mfn_list), 
                        PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
-                       sizeof(unsigned long))));
+                       sizeof(unsigned int))));
        }
 
        pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
-       for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
+       for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned int)), j++ )
        {       
             pfn_to_mfn_frame_list[j] = 
-                 virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+                 virt_to_mfn(&phys_to_machine_mapping[i]);
        }
        HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
-            virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
-
+            virt_to_mfn(pfn_to_mfn_frame_list);
 
        /*
         * NOTE: at this point the bootmem allocator is fully available.
@@ -1636,7 +1636,7 @@
                efi_map_memmap();
 
        op.cmd             = PHYSDEVOP_SET_IOPL;
-       op.u.set_iopl.iopl = current->thread.io_pl = 1;
+       op.u.set_iopl.iopl = 1;
        HYPERVISOR_physdev_op(&op);
 
 #ifdef CONFIG_ACPI_BOOT
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Thu Aug 25 
22:53:20 2005
@@ -62,6 +62,8 @@
 #include <mach_wakecpu.h>
 #include <smpboot_hooks.h>
 
+#include <asm-xen/evtchn.h>
+
 /* Set if we find a B stepping CPU */
 static int __initdata smp_b_stepping;
 
@@ -129,15 +131,7 @@
  */
 void __init smp_alloc_memory(void)
 {
-#if 1
-       int cpu;
-
-       for (cpu = 1; cpu < NR_CPUS; cpu++) {
-               cpu_gdt_descr[cpu].address = (unsigned long)
-                       alloc_bootmem_low_pages(PAGE_SIZE);
-               /* XXX free unused pages later */
-       }
-#else
+#if 0
        trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
        /*
         * Has to be in very low memory so we can execute
@@ -859,8 +853,8 @@
        atomic_set(&init_deasserted, 0);
 
 #if 1
-       if (cpu_gdt_descr[0].size > PAGE_SIZE)
-               BUG();
+       cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL);
+       BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
        cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
        printk("GDT: copying %d bytes from %lx to %lx\n",
                cpu_gdt_descr[0].size, cpu_gdt_descr[0].address,
@@ -878,7 +872,8 @@
        ctxt.user_regs.cs = __KERNEL_CS;
        ctxt.user_regs.eip = start_eip;
        ctxt.user_regs.esp = idle->thread.esp;
-       ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+#define X86_EFLAGS_IOPL_RING1 0x1000
+       ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1;
 
        /* FPU is set up to default initial state. */
        memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
@@ -901,7 +896,7 @@
                for (va = cpu_gdt_descr[cpu].address, f = 0;
                     va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
                     va += PAGE_SIZE, f++) {
-                       ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+                       ctxt.gdt_frames[f] = virt_to_mfn(va);
                        make_page_readonly((void *)va);
                }
                ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
@@ -917,10 +912,11 @@
        ctxt.failsafe_callback_cs  = __KERNEL_CS;
        ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
 
-       ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(swapper_pg_dir);
+       ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
 
        boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
-       printk("boot error: %ld\n", boot_error);
+       if (boot_error)
+               printk("boot error: %ld\n", boot_error);
 
        if (!boot_error) {
                /*
@@ -1321,14 +1317,127 @@
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-#include <asm-xen/ctrl_if.h>
-
+#include <asm-xen/xenbus.h>
 /* hotplug down/up funtion pointer and target vcpu */
 struct vcpu_hotplug_handler_t {
-       void (*fn)(int vcpu);
+       void (*fn) (int vcpu);
        u32 vcpu;
 };
 static struct vcpu_hotplug_handler_t vcpu_hotplug_handler;
+
+static int vcpu_hotplug_cpu_process(void *unused)
+{
+       struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
+
+       if (handler->fn) {
+               (*(handler->fn)) (handler->vcpu);
+               handler->fn = NULL;
+       }
+       return 0;
+}
+
+static void __vcpu_hotplug_handler(void *unused)
+{
+       int err;
+
+       err = kernel_thread(vcpu_hotplug_cpu_process,
+                           NULL, CLONE_FS | CLONE_FILES);
+       if (err < 0)
+               printk(KERN_ALERT "Error creating hotplug_cpu process!\n");
+}
+
+static void handle_vcpu_hotplug_event(struct xenbus_watch *, const char *);
+static struct notifier_block xsn_cpu;
+
+/* xenbus watch struct */
+static struct xenbus_watch cpu_watch = {
+       .node = "cpu",
+       .callback = handle_vcpu_hotplug_event
+};
+
+/* NB: Assumes xenbus_lock is held! */
+static int setup_cpu_watcher(struct notifier_block *notifier,
+                             unsigned long event, void *data)
+{
+       int err = 0;
+
+       BUG_ON(down_trylock(&xenbus_lock) == 0);
+       err = register_xenbus_watch(&cpu_watch);
+
+       if (err) {
+               printk("Failed to register watch on /cpu\n");
+       }
+
+       return NOTIFY_DONE;
+}
+
+static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, const char 
*node)
+{
+       static DECLARE_WORK(vcpu_hotplug_work, __vcpu_hotplug_handler, NULL);
+       struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
+       ssize_t ret;
+       int err, cpu;
+       char state[8];
+       char dir[32];
+       char *cpustr;
+
+       /* get a pointer to start of cpu string */
+       if ((cpustr = strstr(node, "cpu/")) != NULL) {
+
+               /* find which cpu state changed, note vcpu for handler */
+               sscanf(cpustr, "cpu/%d", &cpu);
+               handler->vcpu = cpu;
+
+               /* calc the dir for xenbus read */
+               sprintf(dir, "cpu/%d", cpu);
+
+               /* make sure watch that was triggered is changes to the correct 
key */
+               if ((strcmp(node + strlen(dir), "/availability")) != 0)
+                       return;
+
+               /* get the state value */
+               xenbus_transaction_start("cpu");
+               err = xenbus_scanf(dir, "availability", "%s", state);
+               xenbus_transaction_end(0);
+
+               if (err != 1) {
+                       printk(KERN_ERR
+                              "XENBUS: Unable to read cpu state\n");
+                       return;
+               }
+
+               /* if we detect a state change, take action */
+               if (strcmp(state, "online") == 0) {
+                       /* offline -> online */
+                       if (!cpu_isset(cpu, cpu_online_map)) {
+                               handler->fn = (void *)&cpu_up;
+                               ret = schedule_work(&vcpu_hotplug_work);
+                       } 
+               } else if (strcmp(state, "offline") == 0) {
+                       /* online -> offline */
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               handler->fn = (void *)&cpu_down;
+                               ret = schedule_work(&vcpu_hotplug_work);
+                       } 
+               } else {
+                       printk(KERN_ERR
+                              "XENBUS: unknown state(%s) on node(%s)\n", state,
+                              node);
+               }
+       }
+       return;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+       xsn_cpu.notifier_call = setup_cpu_watcher;
+
+       register_xenstore_notifier(&xsn_cpu);
+
+       return 0;
+}
+
+subsys_initcall(setup_vcpu_hotplug_event);
 
 /* must be called with the cpucontrol mutex held */
 static int __devinit cpu_enable(unsigned int cpu)
@@ -1398,77 +1507,6 @@
        printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 }
 
-static int vcpu_hotplug_cpu_process(void *unused)
-{
-       struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
-
-       if (handler->fn) {
-               (*(handler->fn))(handler->vcpu);
-               handler->fn = NULL;
-       }
-       return 0;
-}
-
-static void __vcpu_hotplug_handler(void *unused)
-{
-       int err;
-
-       err = kernel_thread(vcpu_hotplug_cpu_process, 
-                           NULL, CLONE_FS | CLONE_FILES);
-       if (err < 0)
-               printk(KERN_ALERT "Error creating hotplug_cpu process!\n");
-
-}
-
-static void vcpu_hotplug_event_handler(ctrl_msg_t *msg, unsigned long id)
-{
-       static DECLARE_WORK(vcpu_hotplug_work, __vcpu_hotplug_handler, NULL);
-       vcpu_hotplug_t *req = (vcpu_hotplug_t *)&msg->msg[0];
-       struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
-       ssize_t ret;
-
-       if (msg->length != sizeof(vcpu_hotplug_t))
-               goto parse_error;
-
-       /* grab target vcpu from msg */
-       handler->vcpu = req->vcpu;
-
-       /* determine which function to call based on msg subtype */
-       switch (msg->subtype) {
-        case CMSG_VCPU_HOTPLUG_OFF:
-               handler->fn = (void *)&cpu_down;
-               ret = schedule_work(&vcpu_hotplug_work);
-               req->status = (u32) ret;
-               break;
-        case CMSG_VCPU_HOTPLUG_ON:
-               handler->fn = (void *)&cpu_up;
-               ret = schedule_work(&vcpu_hotplug_work);
-               req->status = (u32) ret;
-               break;
-        default:
-               goto parse_error;
-       }
-
-       ctrl_if_send_response(msg);
-       return;
- parse_error:
-       msg->length = 0;
-       ctrl_if_send_response(msg);
-}
-
-static int __init setup_vcpu_hotplug_event(void)
-{
-       struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
-
-       handler->fn = NULL;
-       ctrl_if_register_receiver(CMSG_VCPU_HOTPLUG,
-                                 vcpu_hotplug_event_handler, 0);
-
-       return 0;
-}
-
-__initcall(setup_vcpu_hotplug_event);
-
 #else /* ... !CONFIG_HOTPLUG_CPU */
 int __cpu_disable(void)
 {
@@ -1529,20 +1567,66 @@
 extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
 extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
 
-void __init smp_intr_init(void)
+void smp_intr_init(void)
 {
        int cpu = smp_processor_id();
 
        per_cpu(resched_irq, cpu) =
-               bind_ipi_on_cpu_to_irq(RESCHEDULE_VECTOR);
+               bind_ipi_to_irq(RESCHEDULE_VECTOR);
        sprintf(resched_name[cpu], "resched%d", cpu);
        BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
                           SA_INTERRUPT, resched_name[cpu], NULL));
 
        per_cpu(callfunc_irq, cpu) =
-               bind_ipi_on_cpu_to_irq(CALL_FUNCTION_VECTOR);
+               bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
        sprintf(callfunc_name[cpu], "callfunc%d", cpu);
        BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
                           smp_call_function_interrupt,
                           SA_INTERRUPT, callfunc_name[cpu], NULL));
 }
+
+static void smp_intr_exit(void)
+{
+       int cpu = smp_processor_id();
+
+       free_irq(per_cpu(resched_irq, cpu), NULL);
+       unbind_ipi_from_irq(RESCHEDULE_VECTOR);
+
+       free_irq(per_cpu(callfunc_irq, cpu), NULL);
+       unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
+}
+
+extern void local_setup_timer_irq(void);
+extern void local_teardown_timer_irq(void);
+
+void smp_suspend(void)
+{
+       /* XXX todo: take down time and ipi's on all cpus */
+       local_teardown_timer_irq();
+       smp_intr_exit();
+}
+
+void smp_resume(void)
+{
+       /* XXX todo: restore time and ipi's on all cpus */
+       smp_intr_init();
+       local_setup_timer_irq();
+}
+
+DECLARE_PER_CPU(int, timer_irq);
+
+void _restore_vcpu(void)
+{
+       int cpu = smp_processor_id();
+       extern atomic_t vcpus_rebooting;
+
+       /* We are the first thing the vcpu runs when it comes back,
+          and we are supposed to restore the IPIs and timer
+          interrupts etc.  When we return, the vcpu's idle loop will
+          start up again. */
+       _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu));
+       _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu));
+       _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) );
+       _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) 
);
+       atomic_dec(&vcpus_rebooting);
+}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Thu Aug 25 22:53:20 2005
@@ -70,6 +70,8 @@
 
 #include "io_ports.h"
 
+#include <asm-xen/evtchn.h>
+
 extern spinlock_t i8259A_lock;
 int pit_latch_buggy;              /* extern */
 
@@ -113,26 +115,15 @@
        u32 version;
 };
 static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
-static struct timeval shadow_tv;
+static struct timespec shadow_tv;
+static u32 shadow_tv_version;
 
 /* Keep track of last time we did processing/updating of jiffies and xtime. */
 static u64 processed_system_time;   /* System time (ns) at last processing. */
 static DEFINE_PER_CPU(u64, processed_system_time);
 
-#define NS_PER_TICK (1000000000ULL/HZ)
-
-#define HANDLE_USEC_UNDERFLOW(_tv) do {                \
-       while ((_tv).tv_usec < 0) {             \
-               (_tv).tv_usec += USEC_PER_SEC;  \
-               (_tv).tv_sec--;                 \
-       }                                       \
-} while (0)
-#define HANDLE_USEC_OVERFLOW(_tv) do {         \
-       while ((_tv).tv_usec >= USEC_PER_SEC) { \
-               (_tv).tv_usec -= USEC_PER_SEC;  \
-               (_tv).tv_sec++;                 \
-       }                                       \
-} while (0)
+#define NS_PER_TICK (1000000000L/HZ)
+
 static inline void __normalize_time(time_t *sec, s64 *nsec)
 {
        while (*nsec >= NSEC_PER_SEC) {
@@ -153,8 +144,6 @@
        return 1;
 }
 __setup("independent_wallclock", __independent_wallclock);
-#define INDEPENDENT_WALLCLOCK() \
-    (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN))
 
 int tsc_disable __initdata = 0;
 
@@ -175,25 +164,40 @@
        .delay = delay_tsc,
 };
 
-static inline u32 down_shift(u64 time, int shift)
-{
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+       u64 product;
+#ifdef __i386__
+       u32 tmp1, tmp2;
+#endif
+
        if ( shift < 0 )
-               return (u32)(time >> -shift);
-       return (u32)((u32)time << shift);
-}
-
-/*
- * 32-bit multiplication of integer multiplicand and fractional multiplier
- * yielding 32-bit integer product.
- */
-static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
-{
-       u32 product_int, product_frac;
+               delta >>= -shift;
+       else
+               delta <<= shift;
+
+#ifdef __i386__
        __asm__ (
-               "mul %3"
-               : "=a" (product_frac), "=d" (product_int)
-               : "0" (multiplicand), "r" (multiplier) );
-       return product_int;
+               "mul  %5       ; "
+               "mov  %4,%%eax ; "
+               "mov  %%edx,%4 ; "
+               "mul  %5       ; "
+               "add  %4,%%eax ; "
+               "xor  %5,%5    ; "
+               "adc  %5,%%edx ; "
+               : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+               : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#else
+       __asm__ (
+               "mul %%rdx ; shrd $32,%%rdx,%%rax"
+               : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#endif
+
+       return product;
 }
 
 void init_cpu_khz(void)
@@ -201,55 +205,43 @@
        u64 __cpu_khz = 1000000ULL << 32;
        struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_time[0];
        do_div(__cpu_khz, info->tsc_to_system_mul);
-       cpu_khz = down_shift(__cpu_khz, -info->tsc_shift);
-       printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
-              cpu_khz / 1000, cpu_khz % 1000);
+       if ( info->tsc_shift < 0 )
+               cpu_khz = __cpu_khz << -info->tsc_shift;
+       else
+               cpu_khz = __cpu_khz >> info->tsc_shift;
 }
 
 static u64 get_nsec_offset(struct shadow_time_info *shadow)
 {
-       u64 now;
-       u32 delta;
+       u64 now, delta;
        rdtscll(now);
-       delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
-       return mul_frac(delta, shadow->tsc_to_nsec_mul);
+       delta = now - shadow->tsc_timestamp;
+       return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
 }
 
 static unsigned long get_usec_offset(struct shadow_time_info *shadow)
 {
-       u64 now;
-       u32 delta;
+       u64 now, delta;
        rdtscll(now);
-       delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
-       return mul_frac(delta, shadow->tsc_to_usec_mul);
-}
-
-static void update_wallclock(void)
-{
-       shared_info_t *s = HYPERVISOR_shared_info;
+       delta = now - shadow->tsc_timestamp;
+       return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
+}
+
+static void __update_wallclock(time_t sec, long nsec)
+{
        long wtm_nsec, xtime_nsec;
        time_t wtm_sec, xtime_sec;
-       u64 tmp, usec;
-
-       shadow_tv.tv_sec  = s->wc_sec;
-       shadow_tv.tv_usec = s->wc_usec;
-
-       if (INDEPENDENT_WALLCLOCK())
-               return;
-
-       if ((time_status & STA_UNSYNC) != 0)
-               return;
+       u64 tmp, wc_nsec;
 
        /* Adjust wall-clock time base based on wall_jiffies ticks. */
-       usec = processed_system_time;
-       do_div(usec, 1000);
-       usec += (u64)shadow_tv.tv_sec * 1000000ULL;
-       usec += (u64)shadow_tv.tv_usec;
-       usec -= (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
+       wc_nsec = processed_system_time;
+       wc_nsec += (u64)sec * 1000000000ULL;
+       wc_nsec += (u64)nsec;
+       wc_nsec -= (jiffies - wall_jiffies) * (u64)(NSEC_PER_SEC / HZ);
 
        /* Split wallclock base into seconds and nanoseconds. */
-       tmp = usec;
-       xtime_nsec = do_div(tmp, 1000000) * 1000ULL;
+       tmp = wc_nsec;
+       xtime_nsec = do_div(tmp, 1000000000);
        xtime_sec  = (time_t)tmp;
 
        wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
@@ -257,13 +249,35 @@
 
        set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
        set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+
+       time_adjust = 0;                /* stop active adjtime() */
+       time_status |= STA_UNSYNC;
+       time_maxerror = NTP_PHASE_LIMIT;
+       time_esterror = NTP_PHASE_LIMIT;
+}
+
+static void update_wallclock(void)
+{
+       shared_info_t *s = HYPERVISOR_shared_info;
+
+       do {
+               shadow_tv_version = s->wc_version;
+               rmb();
+               shadow_tv.tv_sec  = s->wc_sec;
+               shadow_tv.tv_nsec = s->wc_nsec;
+               rmb();
+       }
+       while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version));
+
+       if (!independent_wallclock)
+               __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec);
 }
 
 /*
  * Reads a consistent set of time-base values from Xen, into a shadow data
- * area. Must be called with the xtime_lock held for writing.
+ * area.
  */
-static void __get_time_values_from_xen(void)
+static void get_time_values_from_xen(void)
 {
        shared_info_t           *s = HYPERVISOR_shared_info;
        struct vcpu_time_info   *src;
@@ -273,7 +287,7 @@
        dst = &per_cpu(shadow_time, smp_processor_id());
 
        do {
-               dst->version = src->time_version2;
+               dst->version = src->version;
                rmb();
                dst->tsc_timestamp     = src->tsc_timestamp;
                dst->system_timestamp  = src->system_time;
@@ -281,13 +295,9 @@
                dst->tsc_shift         = src->tsc_shift;
                rmb();
        }
-       while (dst->version != src->time_version1);
+       while ((src->version & 1) | (dst->version ^ src->version));
 
        dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
-
-       if ((shadow_tv.tv_sec != s->wc_sec) ||
-           (shadow_tv.tv_usec != s->wc_usec))
-               update_wallclock();
 }
 
 static inline int time_values_up_to_date(int cpu)
@@ -298,7 +308,7 @@
        src = &HYPERVISOR_shared_info->vcpu_time[cpu]; 
        dst = &per_cpu(shadow_time, cpu); 
 
-       return (dst->version == src->time_version2);
+       return (dst->version == src->version);
 }
 
 /*
@@ -339,10 +349,10 @@
        unsigned long seq;
        unsigned long usec, sec;
        unsigned long max_ntp_tick;
-       unsigned long flags;
        s64 nsec;
        unsigned int cpu;
        struct shadow_time_info *shadow;
+       u32 local_time_version;
 
        cpu = get_cpu();
        shadow = &per_cpu(shadow_time, cpu);
@@ -350,6 +360,7 @@
        do {
                unsigned long lost;
 
+               local_time_version = shadow->version;
                seq = read_seqbegin(&xtime_lock);
 
                usec = get_usec_offset(shadow);
@@ -385,12 +396,11 @@
                         * overflowed). Detect that and recalculate
                         * with fresh values.
                         */
-                       write_seqlock_irqsave(&xtime_lock, flags);
-                       __get_time_values_from_xen();
-                       write_sequnlock_irqrestore(&xtime_lock, flags);
+                       get_time_values_from_xen();
                        continue;
                }
-       } while (read_seqretry(&xtime_lock, seq));
+       } while (read_seqretry(&xtime_lock, seq) ||
+                (local_time_version != shadow->version));
 
        put_cpu();
 
@@ -407,18 +417,14 @@
 
 int do_settimeofday(struct timespec *tv)
 {
-       time_t wtm_sec, sec = tv->tv_sec;
-       long wtm_nsec;
+       time_t sec;
        s64 nsec;
-       struct timespec xentime;
        unsigned int cpu;
        struct shadow_time_info *shadow;
+       dom0_op_t op;
 
        if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
                return -EINVAL;
-
-       if (!INDEPENDENT_WALLCLOCK())
-               return 0; /* Silent failure? */
 
        cpu = get_cpu();
        shadow = &per_cpu(shadow_time, cpu);
@@ -430,50 +436,30 @@
         * overflows. If that were to happen then our shadow time values would
         * be stale, so we can retry with fresh ones.
         */
- again:
-       nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow);
-       if (unlikely(!time_values_up_to_date(cpu))) {
-               __get_time_values_from_xen();
-               goto again;
-       }
-
+       for ( ; ; ) {
+               nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow);
+               if (time_values_up_to_date(cpu))
+                       break;
+               get_time_values_from_xen();
+       }
+       sec = tv->tv_sec;
        __normalize_time(&sec, &nsec);
-       set_normalized_timespec(&xentime, sec, nsec);
-
-       /*
-        * This is revolting. We need to set "xtime" correctly. However, the
-        * value in this location is the value at the most recent update of
-        * wall time.  Discover what correction gettimeofday() would have
-        * made, and then undo it!
-        */
-       nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
-       nsec -= (shadow->system_timestamp - processed_system_time);
-
-       __normalize_time(&sec, &nsec);
-       wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-       wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
-       set_normalized_timespec(&xtime, sec, nsec);
-       set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
-       time_adjust = 0;                /* stop active adjtime() */
-       time_status |= STA_UNSYNC;
-       time_maxerror = NTP_PHASE_LIMIT;
-       time_esterror = NTP_PHASE_LIMIT;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       if (xen_start_info.flags & SIF_INITDOMAIN) {
-               dom0_op_t op;
+
+       if ((xen_start_info.flags & SIF_INITDOMAIN) &&
+           !independent_wallclock) {
                op.cmd = DOM0_SETTIME;
-               op.u.settime.secs        = xentime.tv_sec;
-               op.u.settime.usecs       = xentime.tv_nsec / NSEC_PER_USEC;
+               op.u.settime.secs        = sec;
+               op.u.settime.nsecs       = nsec;
                op.u.settime.system_time = shadow->system_timestamp;
-               write_sequnlock_irq(&xtime_lock);
                HYPERVISOR_dom0_op(&op);
-       } else
-#endif
-               write_sequnlock_irq(&xtime_lock);
+               update_wallclock();
+       } else if (independent_wallclock) {
+               nsec -= shadow->system_timestamp;
+               __normalize_time(&sec, &nsec);
+               __update_wallclock(sec, nsec);
+       }
+
+       write_sequnlock_irq(&xtime_lock);
 
        put_cpu();
 
@@ -489,6 +475,9 @@
        int retval;
 
        WARN_ON(irqs_disabled());
+
+       if (!(xen_start_info.flags & SIF_INITDOMAIN))
+               return 0;
 
        /* gets recalled with irq locally disabled */
        spin_lock_irq(&rtc_lock);
@@ -515,21 +504,21 @@
 {
        int cpu = get_cpu();
        struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
-       s64 off;
-       unsigned long flags;
-       
-       for ( ; ; ) {
-               off = get_nsec_offset(shadow);
-               if (time_values_up_to_date(cpu))
-                       break;
-               write_seqlock_irqsave(&xtime_lock, flags);
-               __get_time_values_from_xen();
-               write_sequnlock_irqrestore(&xtime_lock, flags);
-       }
+       u64 time;
+       u32 local_time_version;
+
+       do {
+               local_time_version = shadow->version;
+               smp_rmb();
+               time = shadow->system_timestamp + get_nsec_offset(shadow);
+               if (!time_values_up_to_date(cpu))
+                       get_time_values_from_xen();
+               smp_rmb();
+       } while (local_time_version != shadow->version);
 
        put_cpu();
 
-       return shadow->system_timestamp + off;
+       return time;
 }
 EXPORT_SYMBOL(monotonic_clock);
 
@@ -551,19 +540,16 @@
 EXPORT_SYMBOL(profile_pc);
 #endif
 
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-static inline void do_timer_interrupt(int irq, void *dev_id,
-                                       struct pt_regs *regs)
+irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
        s64 delta, delta_cpu;
        int cpu = smp_processor_id();
        struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
 
+       write_seqlock(&xtime_lock);
+
        do {
-               __get_time_values_from_xen();
+               get_time_values_from_xen();
 
                delta = delta_cpu = 
                        shadow->system_timestamp + get_nsec_offset(shadow);
@@ -572,7 +558,7 @@
        }
        while (!time_values_up_to_date(cpu));
 
-       if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
+       if (unlikely(delta < (s64)-1000000) || unlikely(delta_cpu < 0)) {
                printk("Timer ISR/%d: Time went backwards: "
                       "delta=%lld cpu_delta=%lld shadow=%lld "
                       "off=%lld processed=%lld cpu_processed=%lld\n",
@@ -583,7 +569,6 @@
                for (cpu = 0; cpu < num_online_cpus(); cpu++)
                        printk(" %d: %lld\n", cpu,
                               per_cpu(processed_system_time, cpu));
-               return;
        }
 
        /* System-wide jiffy work. */
@@ -593,32 +578,25 @@
                do_timer(regs);
        }
 
-       /* Local CPU jiffy work. */
+       if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
+               update_wallclock();
+               clock_was_set();
+       }
+
+       write_sequnlock(&xtime_lock);
+
+       /*
+         * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure
+         * if there is risk of deadlock if we do (since update_process_times
+         * may do scheduler rebalancing work and thus acquire runqueue locks).
+         */
        while (delta_cpu >= NS_PER_TICK) {
                delta_cpu -= NS_PER_TICK;
                per_cpu(processed_system_time, cpu) += NS_PER_TICK;
                update_process_times(user_mode(regs));
                profile_tick(CPU_PROFILING, regs);
        }
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
-       /*
-        * Here we are in the timer irq handler. We just have irqs locally
-        * disabled but we don't know if the timer_bh is running on the other
-        * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
-        * the irq version of write_lock because as just said we have irq
-        * locally disabled. -arca
-        */
-       write_seqlock(&xtime_lock);
-       do_timer_interrupt(irq, NULL, regs);
-       write_sequnlock(&xtime_lock);
+
        return IRQ_HANDLED;
 }
 
@@ -767,7 +745,7 @@
 #endif
 
 /* Dynamically-mapped IRQ. */
-static DEFINE_PER_CPU(int, timer_irq);
+DEFINE_PER_CPU(int, timer_irq);
 
 static struct irqaction irq_timer = {
        timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
@@ -786,15 +764,16 @@
                return;
        }
 #endif
-       __get_time_values_from_xen();
-       xtime.tv_sec = shadow_tv.tv_sec;
-       xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
-       set_normalized_timespec(&wall_to_monotonic,
-               -xtime.tv_sec, -xtime.tv_nsec);
+       get_time_values_from_xen();
+
        processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
        per_cpu(processed_system_time, 0) = processed_system_time;
 
+       update_wallclock();
+
        init_cpu_khz();
+       printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
+              cpu_khz / 1000, cpu_khz % 1000);
 
 #if defined(__x86_64__)
        vxtime.mode = VXTIME_TSC;
@@ -860,6 +839,8 @@
 void time_suspend(void)
 {
        /* nothing */
+       teardown_irq(per_cpu(timer_irq, 0), &irq_timer);
+       unbind_virq_from_irq(VIRQ_TIMER);
 }
 
 /* No locking required. We are only CPU running, and interrupts are off. */
@@ -867,17 +848,31 @@
 {
        init_cpu_khz();
 
-       /* Get timebases for new environment. */ 
-       __get_time_values_from_xen();
-
-       /* Reset our own concept of passage of system time. */
-       processed_system_time =
-               per_cpu(shadow_time, smp_processor_id()).system_timestamp;
+       get_time_values_from_xen();
+
+       processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
        per_cpu(processed_system_time, 0) = processed_system_time;
+
+       update_wallclock();
+
+       per_cpu(timer_irq, 0) = bind_virq_to_irq(VIRQ_TIMER);
+       (void)setup_irq(per_cpu(timer_irq, 0), &irq_timer);
 }
 
 #ifdef CONFIG_SMP
 static char timer_name[NR_CPUS][15];
+void local_setup_timer_irq(void)
+{
+       int cpu = smp_processor_id();
+
+       if (cpu == 0)
+               return;
+       per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
+       sprintf(timer_name[cpu], "timer%d", cpu);
+       BUG_ON(request_irq(per_cpu(timer_irq, cpu), timer_interrupt,
+                          SA_INTERRUPT, timer_name[cpu], NULL));
+}
+
 void local_setup_timer(void)
 {
        int seq, cpu = smp_processor_id();
@@ -888,10 +883,17 @@
                        per_cpu(shadow_time, cpu).system_timestamp;
        } while (read_seqretry(&xtime_lock, seq));
 
-       per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
-       sprintf(timer_name[cpu], "timer%d", cpu);
-       BUG_ON(request_irq(per_cpu(timer_irq, cpu), timer_interrupt,
-                          SA_INTERRUPT, timer_name[cpu], NULL));
+       local_setup_timer_irq();
+}
+
+void local_teardown_timer_irq(void)
+{
+       int cpu = smp_processor_id();
+
+       if (cpu == 0)
+               return;
+       free_irq(per_cpu(timer_irq, cpu), NULL);
+       unbind_virq_from_irq(VIRQ_TIMER);
 }
 #endif
 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Thu Aug 25 22:53:20 2005
@@ -871,6 +871,7 @@
        }
 }
 
+#ifndef CONFIG_XEN
 fastcall void setup_x86_bogus_stack(unsigned char * stk)
 {
        unsigned long *switch16_ptr, *switch32_ptr;
@@ -915,6 +916,7 @@
        memcpy(stack32, stack16, len);
        return stack32;
 }
+#endif
 
 /*
  *  'math_state_restore()' saves the current math information in the
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c     Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c     Thu Aug 25 22:53:20 2005
@@ -281,7 +281,7 @@
        siginfo_t info;
 
        /* Set the "privileged fault" bit to something sane. */
-       error_code &= 3;
+       error_code &= ~4;
        error_code |= (regs->xcs & 2) << 1;
        if (regs->eflags & X86_EFLAGS_VM)
                error_code |= 4;
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c   Thu Aug 25 22:53:20 2005
@@ -41,8 +41,7 @@
        if (!pte_none(*(kmap_pte-idx)))
                BUG();
 #endif
-       set_pte(kmap_pte-idx, mk_pte(page, prot));
-       __flush_tlb_one(vaddr);
+       set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
 
        return (void*) vaddr;
 }
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c        Thu Aug 25 
22:53:20 2005
@@ -35,6 +35,7 @@
 #include <asm/pgtable.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/balloon.h>
+#include <linux/module.h>
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 #include <linux/percpu.h>
 #include <asm/tlbflush.h>
@@ -58,124 +59,124 @@
 #ifndef CONFIG_XEN_SHADOW_MODE
 void xen_l1_entry_update(pte_t *ptr, pte_t val)
 {
-    mmu_update_t u;
-    u.ptr = virt_to_machine(ptr);
-    u.val = pte_val_ma(val);
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+       mmu_update_t u;
+       u.ptr = virt_to_machine(ptr);
+       u.val = pte_val_ma(val);
+       BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
 {
-    mmu_update_t u;
-    u.ptr = virt_to_machine(ptr);
-    u.val = pmd_val_ma(val);
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+       mmu_update_t u;
+       u.ptr = virt_to_machine(ptr);
+       u.val = pmd_val_ma(val);
+       BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
 }
 
 #ifdef CONFIG_X86_PAE
 void xen_l3_entry_update(pud_t *ptr, pud_t val)
 {
-    mmu_update_t u;
-    u.ptr = virt_to_machine(ptr);
-    u.val = pud_val_ma(val);
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+       mmu_update_t u;
+       u.ptr = virt_to_machine(ptr);
+       u.val = pud_val_ma(val);
+       BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
 }
 #endif
 
 #ifdef CONFIG_X86_64
 void xen_l3_entry_update(pud_t *ptr, pud_t val)
 {
-    mmu_update_t u;
-    u.ptr = virt_to_machine(ptr);
-    u.val = val.pud;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+       mmu_update_t u;
+       u.ptr = virt_to_machine(ptr);
+       u.val = val.pud;
+       BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
 {
-    mmu_update_t u;
-    u.ptr = virt_to_machine(ptr);
-    u.val = val.pgd;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+       mmu_update_t u;
+       u.ptr = virt_to_machine(ptr);
+       u.val = val.pgd;
+       BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
 }
 #endif /* CONFIG_X86_64 */
 #endif /* CONFIG_XEN_SHADOW_MODE */
 
 void xen_machphys_update(unsigned long mfn, unsigned long pfn)
 {
-    mmu_update_t u;
-    u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
-    u.val = pfn;
-    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+       mmu_update_t u;
+       u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+       u.val = pfn;
+       BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_pt_switch(unsigned long ptr)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_NEW_BASEPTR;
-    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_NEW_BASEPTR;
+       op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_new_user_pt(unsigned long ptr)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_NEW_USER_BASEPTR;
-    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_NEW_USER_BASEPTR;
+       op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_tlb_flush(void)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_invlpg(unsigned long ptr)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_INVLPG_LOCAL;
-    op.linear_addr = ptr & PAGE_MASK;
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_INVLPG_LOCAL;
+       op.linear_addr = ptr & PAGE_MASK;
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 #ifdef CONFIG_SMP
 
 void xen_tlb_flush_all(void)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_TLB_FLUSH_ALL;
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_TLB_FLUSH_ALL;
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_tlb_flush_mask(cpumask_t *mask)
 {
-    struct mmuext_op op;
-    if ( cpus_empty(*mask) )
-        return;
-    op.cmd = MMUEXT_TLB_FLUSH_MULTI;
-    op.vcpumask = mask->bits;
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       if ( cpus_empty(*mask) )
+               return;
+       op.cmd = MMUEXT_TLB_FLUSH_MULTI;
+       op.vcpumask = mask->bits;
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_invlpg_all(unsigned long ptr)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_INVLPG_ALL;
-    op.linear_addr = ptr & PAGE_MASK;
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_INVLPG_ALL;
+       op.linear_addr = ptr & PAGE_MASK;
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
 {
-    struct mmuext_op op;
-    if ( cpus_empty(*mask) )
-        return;
-    op.cmd = MMUEXT_INVLPG_MULTI;
-    op.vcpumask = mask->bits;
-    op.linear_addr = ptr & PAGE_MASK;
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       if ( cpus_empty(*mask) )
+               return;
+       op.cmd = MMUEXT_INVLPG_MULTI;
+       op.vcpumask = mask->bits;
+       op.linear_addr = ptr & PAGE_MASK;
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 #endif /* CONFIG_SMP */
@@ -183,181 +184,233 @@
 #ifndef CONFIG_XEN_SHADOW_MODE
 void xen_pgd_pin(unsigned long ptr)
 {
-    struct mmuext_op op;
+       struct mmuext_op op;
 #ifdef CONFIG_X86_64
-    op.cmd = MMUEXT_PIN_L4_TABLE;
+       op.cmd = MMUEXT_PIN_L4_TABLE;
 #elif defined(CONFIG_X86_PAE)
-    op.cmd = MMUEXT_PIN_L3_TABLE;
+       op.cmd = MMUEXT_PIN_L3_TABLE;
 #else
-    op.cmd = MMUEXT_PIN_L2_TABLE;
+       op.cmd = MMUEXT_PIN_L2_TABLE;
 #endif
-    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_pgd_unpin(unsigned long ptr)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_UNPIN_TABLE;
-    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_UNPIN_TABLE;
+       op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_pte_pin(unsigned long ptr)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_PIN_L1_TABLE;
-    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_PIN_L1_TABLE;
+       op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_pte_unpin(unsigned long ptr)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_UNPIN_TABLE;
-    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_UNPIN_TABLE;
+       op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 #ifdef CONFIG_X86_64
 void xen_pud_pin(unsigned long ptr)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_PIN_L3_TABLE;
-    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_PIN_L3_TABLE;
+       op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_pud_unpin(unsigned long ptr)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_UNPIN_TABLE;
-    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_UNPIN_TABLE;
+       op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_pmd_pin(unsigned long ptr)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_PIN_L2_TABLE;
-    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_PIN_L2_TABLE;
+       op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_pmd_unpin(unsigned long ptr)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_UNPIN_TABLE;
-    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+       struct mmuext_op op;
+       op.cmd = MMUEXT_UNPIN_TABLE;
+       op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 #endif /* CONFIG_X86_64 */
 #endif /* CONFIG_XEN_SHADOW_MODE */
 
 void xen_set_ldt(unsigned long ptr, unsigned long len)
 {
-    struct mmuext_op op;
-    op.cmd = MMUEXT_SET_LDT;
-    op.linear_addr = ptr;
-    op.nr_ents = len;
-    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_contig_memory(unsigned long vstart, unsigned int order)
-{
-    /*
-     * Ensure multi-page extents are contiguous in machine memory. This code 
-     * could be cleaned up some, and the number of hypercalls reduced.
-     */
-    pgd_t         *pgd; 
-    pud_t         *pud; 
-    pmd_t         *pmd;
-    pte_t         *pte;
-    unsigned long  mfn, i, flags;
-
-    scrub_pages(vstart, 1 << order);
-
-    balloon_lock(flags);
-
-    /* 1. Zap current PTEs, giving away the underlying pages. */
-    for (i = 0; i < (1<<order); i++) {
-        pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
-        pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
-        pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
-        pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
-        mfn = pte_mfn(*pte);
-        HYPERVISOR_update_va_mapping(
-            vstart + (i*PAGE_SIZE), __pte_ma(0), 0);
-        phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
-            INVALID_P2M_ENTRY;
-        BUG_ON(HYPERVISOR_dom_mem_op(
-            MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
-    }
-
-    /* 2. Get a new contiguous memory extent. */
-    BUG_ON(HYPERVISOR_dom_mem_op(
-              MEMOP_increase_reservation, &mfn, 1, order | (32<<8)) != 1);
-
-    /* 3. Map the new extent in place of old pages. */
-    for (i = 0; i < (1<<order); i++) {
-        HYPERVISOR_update_va_mapping(
-            vstart + (i*PAGE_SIZE),
-            __pte_ma(((mfn+i)<<PAGE_SHIFT)|__PAGE_KERNEL), 0);
-        xen_machphys_update(mfn+i, (__pa(vstart)>>PAGE_SHIFT)+i);
-        phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = mfn+i;
-    }
-
-    flush_tlb_all();
-
-    balloon_unlock(flags);
-}
-
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
-
-unsigned long allocate_empty_lowmem_region(unsigned long pages)
-{
-    pgd_t         *pgd;
-    pud_t         *pud; 
-    pmd_t         *pmd;
-    pte_t         *pte;
-    unsigned long *pfn_array;
-    unsigned long  vstart;
-    unsigned long  i;
-    unsigned int   order = get_order(pages*PAGE_SIZE);
-
-    vstart = __get_free_pages(GFP_KERNEL, order);
-    if ( vstart == 0 )
-        return 0UL;
-
-    scrub_pages(vstart, 1 << order);
-
-    pfn_array = vmalloc((1<<order) * sizeof(*pfn_array));
-    if ( pfn_array == NULL )
-        BUG();
-
-    for ( i = 0; i < (1<<order); i++ )
-    {
-        pgd = pgd_offset_k(   (vstart + (i*PAGE_SIZE)));
-        pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
-        pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
-        pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); 
-        pfn_array[i] = pte_mfn(*pte);
-#ifdef CONFIG_X86_64
-        xen_l1_entry_update(pte, __pte(0));
-#else
-        HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE), __pte_ma(0), 0);
-#endif
-        phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
-            INVALID_P2M_ENTRY;
-    }
-
-    flush_tlb_all();
-
-    balloon_put_pages(pfn_array, 1 << order);
-
-    vfree(pfn_array);
-
-    return vstart;
-}
-
-#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+       struct mmuext_op op;
+       op.cmd = MMUEXT_SET_LDT;
+       op.linear_addr = ptr;
+       op.nr_ents = len;
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+/*
+ * Bitmap is indexed by page number. If bit is set, the page is part of a
+ * xen_create_contiguous_region() area of memory.
+ */
+unsigned long *contiguous_bitmap;
+
+static void contiguous_bitmap_set(
+       unsigned long first_page, unsigned long nr_pages)
+{
+       unsigned long start_off, end_off, curr_idx, end_idx;
+
+       curr_idx  = first_page / BITS_PER_LONG;
+       start_off = first_page & (BITS_PER_LONG-1);
+       end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
+       end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
+
+       if (curr_idx == end_idx) {
+               contiguous_bitmap[curr_idx] |=
+                       ((1UL<<end_off)-1) & -(1UL<<start_off);
+       } else {
+               contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
+               while ( ++curr_idx < end_idx )
+                       contiguous_bitmap[curr_idx] = ~0UL;
+               contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
+       }
+}
+
+static void contiguous_bitmap_clear(
+       unsigned long first_page, unsigned long nr_pages)
+{
+       unsigned long start_off, end_off, curr_idx, end_idx;
+
+       curr_idx  = first_page / BITS_PER_LONG;
+       start_off = first_page & (BITS_PER_LONG-1);
+       end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
+       end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
+
+       if (curr_idx == end_idx) {
+               contiguous_bitmap[curr_idx] &=
+                       -(1UL<<end_off) | ((1UL<<start_off)-1);
+       } else {
+               contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
+               while ( ++curr_idx != end_idx )
+                       contiguous_bitmap[curr_idx] = 0;
+               contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
+       }
+}
+
+/* Ensure multi-page extents are contiguous in machine memory. */
+void xen_create_contiguous_region(unsigned long vstart, unsigned int order)
+{
+       pgd_t         *pgd; 
+       pud_t         *pud; 
+       pmd_t         *pmd;
+       pte_t         *pte;
+       unsigned long  mfn, i, flags;
+
+       scrub_pages(vstart, 1 << order);
+
+       balloon_lock(flags);
+
+       /* 1. Zap current PTEs, giving away the underlying pages. */
+       for (i = 0; i < (1<<order); i++) {
+               pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
+               pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
+               pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
+               pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
+               mfn = pte_mfn(*pte);
+               BUG_ON(HYPERVISOR_update_va_mapping(
+                       vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
+               phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
+                       INVALID_P2M_ENTRY;
+               BUG_ON(HYPERVISOR_dom_mem_op(
+                       MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+       }
+
+       /* 2. Get a new contiguous memory extent. */
+       BUG_ON(HYPERVISOR_dom_mem_op(
+               MEMOP_increase_reservation, &mfn, 1, order | (32<<8)) != 1);
+
+       /* 3. Map the new extent in place of old pages. */
+       for (i = 0; i < (1<<order); i++) {
+               BUG_ON(HYPERVISOR_update_va_mapping(
+                       vstart + (i*PAGE_SIZE),
+                       pfn_pte_ma(mfn+i, PAGE_KERNEL), 0));
+               xen_machphys_update(mfn+i, (__pa(vstart)>>PAGE_SHIFT)+i);
+               phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = mfn+i;
+       }
+
+       flush_tlb_all();
+
+       contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
+
+       balloon_unlock(flags);
+}
+
+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
+{
+       pgd_t         *pgd; 
+       pud_t         *pud; 
+       pmd_t         *pmd;
+       pte_t         *pte;
+       unsigned long  mfn, i, flags;
+
+       scrub_pages(vstart, 1 << order);
+
+       balloon_lock(flags);
+
+       contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
+
+       /* 1. Zap current PTEs, giving away the underlying pages. */
+       for (i = 0; i < (1<<order); i++) {
+               pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
+               pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
+               pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
+               pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
+               mfn = pte_mfn(*pte);
+               BUG_ON(HYPERVISOR_update_va_mapping(
+                       vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
+               phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
+                       INVALID_P2M_ENTRY;
+               BUG_ON(HYPERVISOR_dom_mem_op(
+                       MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+       }
+
+       /* 2. Map new pages in place of old pages. */
+       for (i = 0; i < (1<<order); i++) {
+               BUG_ON(HYPERVISOR_dom_mem_op(
+                       MEMOP_increase_reservation, &mfn, 1, 0) != 1);
+               BUG_ON(HYPERVISOR_update_va_mapping(
+                       vstart + (i*PAGE_SIZE),
+                       pfn_pte_ma(mfn, PAGE_KERNEL), 0));
+               xen_machphys_update(mfn, (__pa(vstart)>>PAGE_SHIFT)+i);
+               phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = mfn;
+       }
+
+       flush_tlb_all();
+
+       balloon_unlock(flags);
+}
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Thu Aug 25 22:53:20 2005
@@ -41,6 +41,14 @@
 #include <asm/sections.h>
 #include <asm-xen/hypervisor.h>
 
+extern unsigned long *contiguous_bitmap;
+
+#if defined(CONFIG_SWIOTLB)
+extern void swiotlb_init(void);
+int swiotlb;
+EXPORT_SYMBOL(swiotlb);
+#endif
+
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -334,18 +342,18 @@
 extern void __init remap_numa_kva(void);
 #endif
 
+pgd_t *swapper_pg_dir;
+
 static void __init pagetable_init (void)
 {
        unsigned long vaddr;
-       pgd_t *pgd_base = swapper_pg_dir;
-       pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
-
-#ifdef CONFIG_X86_PAE
+       pgd_t *pgd_base = (pgd_t *)xen_start_info.pt_base;
        int i;
-       /* Init entries of the first-level page table to the zero page */
-       for (i = 0; i < PTRS_PER_PGD; i++)
-               set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | 
_PAGE_PRESENT));
-#endif
+
+       swapper_pg_dir = pgd_base;
+       init_mm.pgd    = pgd_base;
+       for (i = 0; i < NR_CPUS; i++)
+               per_cpu(cur_pgd, i) = pgd_base;
 
        /* Enable PSE if available */
        if (cpu_has_pse) {
@@ -358,44 +366,6 @@
                __PAGE_KERNEL |= _PAGE_GLOBAL;
                __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
        }
-
-       /*
-        * Switch to proper mm_init page directory. Initialise from the current
-        * page directory, write-protect the new page directory, then switch to
-        * it. We clean up by write-enabling and then freeing the old page dir.
-        */
-#ifndef CONFIG_X86_PAE
-       memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
-       make_page_readonly(pgd_base);
-       xen_pgd_pin(__pa(pgd_base));
-       load_cr3(pgd_base);
-       xen_pgd_unpin(__pa(old_pgd));
-       make_page_writable(old_pgd);
-       __flush_tlb_all();
-       free_bootmem(__pa(old_pgd), PAGE_SIZE);
-#else
-       {
-               pud_t *old_pud = pud_offset(old_pgd+3, PAGE_OFFSET);
-               pmd_t *old_pmd = pmd_offset(old_pud, PAGE_OFFSET);
-               pmd_t *new_pmd = alloc_bootmem_low_pages(PAGE_SIZE);
-
-               memcpy(new_pmd,  old_pmd, PAGE_SIZE);
-               memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
-               set_pgd(&pgd_base[3], __pgd(__pa(new_pmd) | _PAGE_PRESENT));
-
-               make_page_readonly(new_pmd);
-               make_page_readonly(pgd_base);
-               xen_pgd_pin(__pa(pgd_base));
-               load_cr3(pgd_base);
-               xen_pgd_unpin(__pa(old_pgd));
-               make_page_writable(old_pgd);
-               make_page_writable(old_pmd);
-               __flush_tlb_all();
-
-               free_bootmem(__pa(old_pgd), PAGE_SIZE);
-               free_bootmem(__pa(old_pmd), PAGE_SIZE);
-       }
-#endif
 
        init_mm.context.pinned = 1;
        kernel_physical_mapping_init(pgd_base);
@@ -409,17 +379,6 @@
        page_table_range_init(vaddr, 0, pgd_base);
 
        permanent_kmaps_init(pgd_base);
-
-#if 0 /* def CONFIG_X86_PAE */
-       /*
-        * Add low memory identity-mappings - SMP needs it when
-        * starting up on an AP from real-mode. In the non-PAE
-        * case we already have these mappings through head.S.
-        * All user-space mappings are explicitly cleared after
-        * SMP startup.
-        */
-       set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
-#endif
 }
 
 #if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND)
@@ -630,6 +589,15 @@
        int tmp;
        int bad_ppro;
        unsigned long pfn;
+
+       contiguous_bitmap = alloc_bootmem_low_pages(
+               (max_low_pfn + 2*BITS_PER_LONG) >> 3);
+       BUG_ON(!contiguous_bitmap);
+       memset(contiguous_bitmap, 0, (max_low_pfn + 2*BITS_PER_LONG) >> 3);
+
+#if defined(CONFIG_SWIOTLB)
+       swiotlb_init(); 
+#endif
 
 #ifndef CONFIG_DISCONTIGMEM
        if (!mem_map)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Thu Aug 25 22:53:20 2005
@@ -36,6 +36,8 @@
 {
 }
 
+#ifdef __i386__
+
 void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
 {
        return NULL;
@@ -44,6 +46,8 @@
 void __init bt_iounmap(void *addr, unsigned long size)
 {
 }
+
+#endif /* __i386__ */
 
 #else
 
@@ -58,7 +62,7 @@
        extern unsigned long max_low_pfn;
        unsigned long mfn = address >> PAGE_SHIFT;
        unsigned long pfn = mfn_to_pfn(mfn);
-       return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
+       return ((pfn < max_low_pfn) && (phys_to_machine_mapping[pfn] == mfn));
 }
 
 /*
@@ -126,10 +130,12 @@
                return NULL;
        area->phys_addr = phys_addr;
        addr = (void __iomem *) area->addr;
+       flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+#ifdef __x86_64__
+       flags |= _PAGE_USER;
+#endif
        if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr,
-                                   size, __pgprot(_PAGE_PRESENT | _PAGE_RW |
-                                                  _PAGE_DIRTY | _PAGE_ACCESSED
-                                                  | flags), domid)) {
+                                   size, __pgprot(flags), domid)) {
                vunmap((void __force *) addr);
                return NULL;
        }
@@ -218,6 +224,8 @@
        kfree(p); 
 }
 
+#ifdef __i386__
+
 void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
 {
        unsigned long offset, last_addr;
@@ -289,6 +297,8 @@
        }
 }
 
+#endif /* __i386__ */
+
 #endif /* CONFIG_XEN_PHYSDEV_ACCESS */
 
 /* These hacky macros avoid phys->machine translations. */
@@ -298,90 +308,20 @@
 #define direct_mk_pte_phys(physpage, pgprot) \
   __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
 
-static inline void direct_remap_area_pte(pte_t *pte, 
-                                        unsigned long address, 
-                                        unsigned long size,
-                                        mmu_update_t **v)
-{
-       unsigned long end;
-
-       address &= ~PMD_MASK;
-       end = address + size;
-       if (end > PMD_SIZE)
-               end = PMD_SIZE;
-       if (address >= end)
-               BUG();
-
-       do {
-               (*v)->ptr = virt_to_machine(pte);
-               (*v)++;
-               address += PAGE_SIZE;
-               pte++;
-       } while (address && (address < end));
-}
-
-static inline int direct_remap_area_pmd(struct mm_struct *mm,
-                                       pmd_t *pmd, 
-                                       unsigned long address, 
-                                       unsigned long size,
-                                       mmu_update_t **v)
-{
-       unsigned long end;
-
-       address &= ~PGDIR_MASK;
-       end = address + size;
-       if (end > PGDIR_SIZE)
-               end = PGDIR_SIZE;
-       if (address >= end)
-               BUG();
-       do {
-               pte_t *pte = (mm == &init_mm) ? 
-                       pte_alloc_kernel(mm, pmd, address) :
-                       pte_alloc_map(mm, pmd, address);
-               if (!pte)
-                       return -ENOMEM;
-               direct_remap_area_pte(pte, address, end - address, v);
-               pte_unmap(pte);
-               address = (address + PMD_SIZE) & PMD_MASK;
-               pmd++;
-       } while (address && (address < end));
+
+static int direct_remap_area_pte_fn(pte_t *pte, 
+                                   struct page *pte_page,
+                                   unsigned long address, 
+                                   void *data)
+{
+       mmu_update_t **v = (mmu_update_t **)data;
+
+       (*v)->ptr = ((maddr_t)pfn_to_mfn(page_to_pfn(pte_page)) <<
+                    PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
+       (*v)++;
+
        return 0;
 }
- 
-int __direct_remap_area_pages(struct mm_struct *mm,
-                             unsigned long address, 
-                             unsigned long size, 
-                             mmu_update_t *v)
-{
-       pgd_t * dir;
-       unsigned long end = address + size;
-       int error;
-
-       dir = pgd_offset(mm, address);
-       if (address >= end)
-               BUG();
-       spin_lock(&mm->page_table_lock);
-       do {
-               pud_t *pud;
-               pmd_t *pmd;
-
-               error = -ENOMEM;
-               pud = pud_alloc(mm, dir, address);
-               if (!pud)
-                       break;
-               pmd = pmd_alloc(mm, pud, address);
-               if (!pmd)
-                       break;
-               error = 0;
-               direct_remap_area_pmd(mm, pmd, address, end - address, &v);
-               address = (address + PGDIR_SIZE) & PGDIR_MASK;
-               dir++;
-
-       } while (address && (address < end));
-       spin_unlock(&mm->page_table_lock);
-       return error;
-}
-
 
 int direct_remap_area_pages(struct mm_struct *mm,
                            unsigned long address, 
@@ -393,7 +333,7 @@
        int i;
        unsigned long start_address;
 #define MAX_DIRECTMAP_MMU_QUEUE 130
-       mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
+       mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u, *w = u;
 
        start_address = address;
 
@@ -402,11 +342,10 @@
        for (i = 0; i < size; i += PAGE_SIZE) {
                if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
                        /* Fill in the PTE pointers. */
-                       __direct_remap_area_pages(mm,
-                                                 start_address, 
-                                                 address-start_address, 
-                                                 u);
- 
+                       generic_page_range(mm, start_address, 
+                                          address - start_address,
+                                          direct_remap_area_pte_fn, &w);
+                       w = u;
                        if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
                                return -EFAULT;
                        v = u;
@@ -417,7 +356,7 @@
                 * Fill in the machine address: PTE ptr is done later by
                 * __direct_remap_area_pages(). 
                 */
-               v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
+               v->val = pte_val_ma(pfn_pte_ma(machine_addr >> PAGE_SHIFT, 
prot));
 
                machine_addr += PAGE_SIZE;
                address += PAGE_SIZE; 
@@ -426,10 +365,8 @@
 
        if (v != u) {
                /* get the ptep's filled in */
-               __direct_remap_area_pages(mm,
-                                         start_address, 
-                                         address-start_address, 
-                                         u);
+               generic_page_range(mm, start_address, address - start_address,
+                                  direct_remap_area_pte_fn, &w);
                if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
                        return -EFAULT;
        }
@@ -440,3 +377,48 @@
 }
 
 EXPORT_SYMBOL(direct_remap_area_pages);
+
+static int lookup_pte_fn(
+       pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+       unsigned long *ptep = (unsigned long *)data;
+       if (ptep)
+               *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) <<
+                        PAGE_SHIFT) |
+                       ((unsigned long)pte & ~PAGE_MASK);
+       return 0;
+}
+
+int create_lookup_pte_addr(struct mm_struct *mm, 
+                          unsigned long address,
+                          unsigned long *ptep)
+{
+       return generic_page_range(mm, address, PAGE_SIZE, lookup_pte_fn, ptep);
+}
+
+EXPORT_SYMBOL(create_lookup_pte_addr);
+
+static int noop_fn(
+       pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+       return 0;
+}
+
+int touch_pte_range(struct mm_struct *mm,
+                   unsigned long address,
+                   unsigned long size)
+{
+       return generic_page_range(mm, address, size, noop_fn, NULL);
+} 
+
+EXPORT_SYMBOL(touch_pte_range);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c   Thu Aug 25 22:53:20 2005
@@ -25,6 +25,7 @@
 #include <asm/mmu_context.h>
 
 #include <asm-xen/foreign_page.h>
+#include <asm-xen/hypervisor.h>
 
 void show_mem(void)
 {
@@ -169,7 +170,7 @@
        __flush_tlb_one(vaddr);
 }
 
-void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t 
flags)
+void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
 {
        unsigned long address = __fix_to_virt(idx);
 
@@ -221,8 +222,8 @@
        unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
 
        if (!pte_write(*virt_to_ptep(va)))
-               HYPERVISOR_update_va_mapping(
-                       va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0);
+               BUG_ON(HYPERVISOR_update_va_mapping(
+                       va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
 
        ClearPageForeign(pte);
        set_page_count(pte, 1);
@@ -274,6 +275,11 @@
 {
        unsigned long flags;
 
+#ifdef CONFIG_X86_PAE
+       /* this gives us a page below 4GB */
+       xen_create_contiguous_region((unsigned long)pgd, 0);
+#endif
+
        if (!HAVE_SHARED_KERNEL_PMD)
                spin_lock_irqsave(&pgd_lock, flags);
 
@@ -349,16 +355,17 @@
 
        if (!pte_write(*ptep)) {
                xen_pgd_unpin(__pa(pgd));
-               HYPERVISOR_update_va_mapping(
+               BUG_ON(HYPERVISOR_update_va_mapping(
                        (unsigned long)pgd,
                        pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL),
-                       0);
+                       0));
        }
 
        /* in the PAE case user pgd entries are overwritten before usage */
        if (PTRS_PER_PMD > 1) {
                for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
                        pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
+                       make_page_writable(pmd);
                        kmem_cache_free(pmd_cache, pmd);
                }
                if (!HAVE_SHARED_KERNEL_PMD) {
@@ -444,9 +451,9 @@
 
        if (PageHighMem(page))
                return;
-       HYPERVISOR_update_va_mapping(
+       BUG_ON(HYPERVISOR_update_va_mapping(
                (unsigned long)__va(pfn << PAGE_SHIFT),
-               pfn_pte(pfn, flags), 0);
+               pfn_pte(pfn, flags), 0));
 }
 
 static void mm_walk(struct mm_struct *mm, pgprot_t flags)
@@ -485,10 +492,10 @@
     spin_lock(&mm->page_table_lock);
 
     mm_walk(mm, PAGE_KERNEL_RO);
-    HYPERVISOR_update_va_mapping(
+    BUG_ON(HYPERVISOR_update_va_mapping(
         (unsigned long)mm->pgd,
         pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO),
-        UVMF_TLB_FLUSH);
+        UVMF_TLB_FLUSH));
     xen_pgd_pin(__pa(mm->pgd));
     mm->context.pinned = 1;
     spin_lock(&mm_unpinned_lock);
@@ -503,9 +510,9 @@
     spin_lock(&mm->page_table_lock);
 
     xen_pgd_unpin(__pa(mm->pgd));
-    HYPERVISOR_update_va_mapping(
+    BUG_ON(HYPERVISOR_update_va_mapping(
         (unsigned long)mm->pgd,
-        pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0);
+        pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0));
     mm_walk(mm, PAGE_KERNEL);
     xen_tlb_flush();
     mm->context.pinned = 0;
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile   Thu Aug 25 22:53:20 2005
@@ -17,7 +17,7 @@
 c-pci-$(CONFIG_X86_VISWS)      := visws.o fixup.o
 pci-$(CONFIG_X86_VISWS)                :=
 c-pci-$(CONFIG_X86_NUMAQ)      := numa.o
-pci-$(CONFIG_X86_NUMAQ)                := irq.o
+l-pci-$(CONFIG_X86_NUMAQ)      := irq.o
 
 obj-y                          += $(pci-y)
 c-obj-y                                += $(c-pci-y) common.o
@@ -27,6 +27,7 @@
 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
        @ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@
 
-obj-y  += $(c-obj-y) $(l-pci-y)
+# Make sure irq.o gets linked in before common.o
+obj-y  += $(patsubst common.o,$(l-pci-y) common.o,$(c-obj-y))
 
 clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c     Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c     Thu Aug 25 22:53:20 2005
@@ -116,9 +116,9 @@
 #elif defined (__x86_64__)
 #define IRQ_REG orig_rax
 #endif
-#define do_IRQ(irq, regs) do {         \
-    (regs)->IRQ_REG = (irq);           \
-    do_IRQ((regs));                    \
+#define do_IRQ(irq, regs) do {                  \
+    (regs)->IRQ_REG = (irq);                    \
+    do_IRQ((regs));                             \
 } while (0)
 #endif
 
@@ -137,14 +137,14 @@
 /* NB. Interrupts are disabled on entry. */
 asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
 {
-    u32           l1, l2;
+    u32     l1, l2;
     unsigned int   l1i, l2i, port;
     int            irq, cpu = smp_processor_id();
     shared_info_t *s = HYPERVISOR_shared_info;
     vcpu_info_t   *vcpu_info = &s->vcpu_data[cpu];
 
     vcpu_info->evtchn_upcall_pending = 0;
-    
+
     /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
     l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
     while ( l1 != 0 )
@@ -158,9 +158,9 @@
             l2 &= ~(1 << l2i);
             
             port = (l1i << 5) + l2i;
-            if ( (irq = evtchn_to_irq[port]) != -1 )
+            if ( (irq = evtchn_to_irq[port]) != -1 ) {
                 do_IRQ(irq, regs);
-            else
+           } else
                 evtchn_device_upcall(port);
         }
     }
@@ -229,13 +229,14 @@
         if ( HYPERVISOR_event_channel_op(&op) != 0 )
             panic("Failed to unbind virtual IRQ %d\n", virq);
 
-       /* This is a slight hack.  Interdomain ports can be allocated
-          directly by userspace, and at that point they get bound by
-          Xen to vcpu 0.  We therefore need to make sure that if we
-          get an event on an event channel we don't know about vcpu 0
-          handles it.  Binding channels to vcpu 0 when closing them
-          achieves this. */
-       bind_evtchn_to_cpu(evtchn, 0);
+        /*
+         * This is a slight hack. Interdomain ports can be allocated directly 
+         * by userspace, and at that point they get bound by Xen to vcpu 0. We 
+         * therefore need to make sure that if we get an event on an event 
+         * channel we don't know about vcpu 0 handles it. Binding channels to 
+         * vcpu 0 when closing them achieves this.
+         */
+        bind_evtchn_to_cpu(evtchn, 0);
         evtchn_to_irq[evtchn] = -1;
         irq_to_evtchn[irq]    = -1;
         per_cpu(virq_to_irq, cpu)[virq]     = -1;
@@ -244,7 +245,75 @@
     spin_unlock(&irq_mapping_update_lock);
 }
 
-int bind_ipi_on_cpu_to_irq(int ipi)
+/* This is only used when a vcpu from an xm save.  The ipi is expected
+   to have been bound before we suspended, and so all of the xenolinux
+   state is set up; we only need to restore the Xen side of things.
+   The irq number has to be the same, but the evtchn number can
+   change. */
+void _bind_ipi_to_irq(int ipi, int vcpu, int irq)
+{
+    evtchn_op_t op;
+    int evtchn;
+
+    spin_lock(&irq_mapping_update_lock);
+
+    op.cmd = EVTCHNOP_bind_ipi;
+    if ( HYPERVISOR_event_channel_op(&op) != 0 )
+       panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu);
+    evtchn = op.u.bind_ipi.port;
+
+    printk("<0>IPI %d, old evtchn %d, evtchn %d.\n",
+          ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi],
+          evtchn);
+
+    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+    irq_to_evtchn[irq] = -1;
+
+    evtchn_to_irq[evtchn] = irq;
+    irq_to_evtchn[irq]    = evtchn;
+
+    printk("<0>evtchn_to_irq[%d] = %d.\n", evtchn,
+          evtchn_to_irq[evtchn]);
+    per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn;
+
+    bind_evtchn_to_cpu(evtchn, vcpu);
+
+    spin_unlock(&irq_mapping_update_lock);
+
+    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
+    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
+}
+
+void _bind_virq_to_irq(int virq, int cpu, int irq)
+{
+    evtchn_op_t op;
+    int evtchn;
+
+    spin_lock(&irq_mapping_update_lock);
+
+    op.cmd              = EVTCHNOP_bind_virq;
+    op.u.bind_virq.virq = virq;
+    if ( HYPERVISOR_event_channel_op(&op) != 0 )
+            panic("Failed to bind virtual IRQ %d\n", virq);
+    evtchn = op.u.bind_virq.port;
+
+    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+    irq_to_evtchn[irq] = -1;
+
+    evtchn_to_irq[evtchn] = irq;
+    irq_to_evtchn[irq]    = evtchn;
+
+    per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+    bind_evtchn_to_cpu(evtchn, cpu);
+
+    spin_unlock(&irq_mapping_update_lock);
+
+    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
+    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
+}
+
+int bind_ipi_to_irq(int ipi)
 {
     evtchn_op_t op;
     int evtchn, irq;
@@ -269,7 +338,7 @@
     } 
     else
     {
-       irq = evtchn_to_irq[evtchn];
+        irq = evtchn_to_irq[evtchn];
     }
 
     irq_bindcount[irq]++;
@@ -284,29 +353,29 @@
     evtchn_op_t op;
     int cpu    = smp_processor_id();
     int evtchn = per_cpu(ipi_to_evtchn, cpu)[ipi];
-    int irq    = irq_to_evtchn[evtchn];
+    int irq    = evtchn_to_irq[evtchn];
 
     spin_lock(&irq_mapping_update_lock);
 
     if ( --irq_bindcount[irq] == 0 )
     {
-       op.cmd          = EVTCHNOP_close;
-       op.u.close.dom  = DOMID_SELF;
-       op.u.close.port = evtchn;
-       if ( HYPERVISOR_event_channel_op(&op) != 0 )
-           panic("Failed to unbind virtual IPI %d on cpu %d\n", ipi, cpu);
-
-       /* See comments in unbind_virq_from_irq */
-       bind_evtchn_to_cpu(evtchn, 0);
+        op.cmd          = EVTCHNOP_close;
+        op.u.close.dom  = DOMID_SELF;
+        op.u.close.port = evtchn;
+        if ( HYPERVISOR_event_channel_op(&op) != 0 )
+            panic("Failed to unbind virtual IPI %d on cpu %d\n", ipi, cpu);
+
+        /* See comments in unbind_virq_from_irq */
+        bind_evtchn_to_cpu(evtchn, 0);
         evtchn_to_irq[evtchn] = -1;
         irq_to_evtchn[irq]    = -1;
-       per_cpu(ipi_to_evtchn, cpu)[ipi] = 0;
+        per_cpu(ipi_to_evtchn, cpu)[ipi] = 0;
     }
 
     spin_unlock(&irq_mapping_update_lock);
 }
 
-int bind_evtchn_to_irq(int evtchn)
+int bind_evtchn_to_irq(unsigned int evtchn)
 {
     int irq;
 
@@ -326,7 +395,7 @@
     return irq;
 }
 
-void unbind_evtchn_from_irq(int evtchn)
+void unbind_evtchn_from_irq(unsigned int evtchn)
 {
     int irq = evtchn_to_irq[evtchn];
 
@@ -341,9 +410,36 @@
     spin_unlock(&irq_mapping_update_lock);
 }
 
+int bind_evtchn_to_irqhandler(
+    unsigned int evtchn,
+    irqreturn_t (*handler)(int, void *, struct pt_regs *),
+    unsigned long irqflags,
+    const char *devname,
+    void *dev_id)
+{
+    unsigned int irq;
+    int retval;
+
+    irq = bind_evtchn_to_irq(evtchn);
+    retval = request_irq(irq, handler, irqflags, devname, dev_id);
+    if ( retval != 0 )
+        unbind_evtchn_from_irq(evtchn);
+
+    return retval;
+}
+
+void unbind_evtchn_from_irqhandler(unsigned int evtchn, void *dev_id)
+{
+    unsigned int irq = evtchn_to_irq[evtchn];
+    free_irq(irq, dev_id);
+    unbind_evtchn_from_irq(evtchn);
+}
+
+#ifdef CONFIG_SMP
 static void do_nothing_function(void *ign)
 {
 }
+#endif
 
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
 static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
@@ -354,38 +450,37 @@
     spin_lock(&irq_mapping_update_lock);
     evtchn = irq_to_evtchn[irq];
     if (!VALID_EVTCHN(evtchn)) {
-       spin_unlock(&irq_mapping_update_lock);
-       return;
-    }
-
-    /* Tell Xen to send future instances of this interrupt to the
-       other vcpu */
+        spin_unlock(&irq_mapping_update_lock);
+        return;
+    }
+
+    /* Tell Xen to send future instances of this interrupt to other vcpu. */
     op.cmd = EVTCHNOP_bind_vcpu;
     op.u.bind_vcpu.port = evtchn;
     op.u.bind_vcpu.vcpu = tcpu;
 
-    /* If this fails, it usually just indicates that we're dealing
-       with a virq or IPI channel, which don't actually need to be
-       rebound.  Ignore it, but don't do the xenlinux-level rebind
-       in that case. */
+    /*
+     * If this fails, it usually just indicates that we're dealing with a virq 
+     * or IPI channel, which don't actually need to be rebound. Ignore it, 
+     * but don't do the xenlinux-level rebind in that case.
+     */
     if (HYPERVISOR_event_channel_op(&op) >= 0)
-       bind_evtchn_to_cpu(evtchn, tcpu);
+        bind_evtchn_to_cpu(evtchn, tcpu);
 
     spin_unlock(&irq_mapping_update_lock);
 
-    /* Now send the new target processor a NOP IPI.  When this
-       returns, it will check for any pending interrupts, and so
-       service any that got delivered to the wrong processor by
-       mistake. */
-    /* XXX: The only time this is called with interrupts disabled is
-       from the hotplug/hotunplug path.  In that case, all cpus are
-       stopped with interrupts disabled, and the missed interrupts
-       will be picked up when they start again.  This is kind of a
-       hack.
-    */
-    if (!irqs_disabled()) {
-       smp_call_function(do_nothing_function, NULL, 0, 0);
-    }
+    /*
+     * Now send the new target processor a NOP IPI. When this returns, it 
+     * will check for any pending interrupts, and so service any that got 
+     * delivered to the wrong processor by mistake.
+     * 
+     * XXX: The only time this is called with interrupts disabled is from the 
+     * hotplug/hotunplug path. In that case, all cpus are stopped with 
+     * interrupts disabled, and the missed interrupts will be picked up when 
+     * they start again. This is kind of a hack.
+     */
+    if (!irqs_disabled())
+        smp_call_function(do_nothing_function, NULL, 0, 0);
 }
 
 
@@ -585,6 +680,16 @@
     set_affinity_irq
 };
 
+void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
+{
+    int evtchn = irq_to_evtchn[i];
+    shared_info_t *s = HYPERVISOR_shared_info;
+    if ( !VALID_EVTCHN(evtchn) )
+        return;
+    BUG_ON(!synch_test_bit(evtchn, &s->evtchn_mask[0]));
+    synch_set_bit(evtchn, &s->evtchn_pending[0]);
+}
+
 void irq_suspend(void)
 {
     int pirq, virq, irq, evtchn;
@@ -631,7 +736,7 @@
         evtchn = op.u.bind_virq.port;
         
         /* Record the new mapping. */
-       bind_evtchn_to_cpu(evtchn, 0);
+        bind_evtchn_to_cpu(evtchn, 0);
         evtchn_to_irq[evtchn] = irq;
         irq_to_evtchn[irq]    = evtchn;
 
@@ -655,9 +760,9 @@
 #endif
 
     for ( cpu = 0; cpu < NR_CPUS; cpu++ ) {
-       /* No VIRQ -> IRQ mappings. */
-       for ( i = 0; i < NR_VIRQS; i++ )
-           per_cpu(virq_to_irq, cpu)[i] = -1;
+        /* No VIRQ -> IRQ mappings. */
+        for ( i = 0; i < NR_VIRQS; i++ )
+            per_cpu(virq_to_irq, cpu)[i] = -1;
     }
 
     /* No event-channel -> IRQ mappings. */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Thu Aug 25 22:53:20 2005
@@ -34,44 +34,90 @@
 
 
 EXPORT_SYMBOL(gnttab_grant_foreign_access);
+EXPORT_SYMBOL(gnttab_end_foreign_access_ref);
 EXPORT_SYMBOL(gnttab_end_foreign_access);
 EXPORT_SYMBOL(gnttab_query_foreign_access);
 EXPORT_SYMBOL(gnttab_grant_foreign_transfer);
+EXPORT_SYMBOL(gnttab_end_foreign_transfer_ref);
 EXPORT_SYMBOL(gnttab_end_foreign_transfer);
 EXPORT_SYMBOL(gnttab_alloc_grant_references);
 EXPORT_SYMBOL(gnttab_free_grant_references);
+EXPORT_SYMBOL(gnttab_free_grant_reference);
 EXPORT_SYMBOL(gnttab_claim_grant_reference);
 EXPORT_SYMBOL(gnttab_release_grant_reference);
 EXPORT_SYMBOL(gnttab_grant_foreign_access_ref);
 EXPORT_SYMBOL(gnttab_grant_foreign_transfer_ref);
 
-static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES];
+#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
+#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1)
+
+static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
+static int gnttab_free_count = NR_GRANT_ENTRIES;
 static grant_ref_t gnttab_free_head;
+static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED;
 
 static grant_entry_t *shared;
 
-/*
- * Lock-free grant-entry allocator
- */
-
-static inline int
-get_free_entry(
-    void)
-{
-    grant_ref_t fh, nfh = gnttab_free_head;
-    do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; }
-    while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh,
-                                    gnttab_free_list[fh])) != fh) );
-    return fh;
+static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
+
+static int
+get_free_entries(int count)
+{
+    unsigned long flags;
+    int ref;
+    grant_ref_t head;
+    spin_lock_irqsave(&gnttab_list_lock, flags);
+    if (gnttab_free_count < count) {
+       spin_unlock_irqrestore(&gnttab_list_lock, flags);
+       return -1;
+    }
+    ref = head = gnttab_free_head;
+    gnttab_free_count -= count;
+    while (count-- > 1)
+       head = gnttab_list[head];
+    gnttab_free_head = gnttab_list[head];
+    gnttab_list[head] = GNTTAB_LIST_END;
+    spin_unlock_irqrestore(&gnttab_list_lock, flags);
+    return ref;
+}
+
+#define get_free_entry() get_free_entries(1)
+
+static void
+do_free_callbacks(void)
+{
+    struct gnttab_free_callback *callback = gnttab_free_callback_list, *next;
+    gnttab_free_callback_list = NULL;
+    while (callback) {
+       next = callback->next;
+       if (gnttab_free_count >= callback->count) {
+           callback->next = NULL;
+           callback->fn(callback->arg);
+       } else {
+           callback->next = gnttab_free_callback_list;
+           gnttab_free_callback_list = callback;
+       }
+       callback = next;
+    }
 }
 
 static inline void
-put_free_entry(
-    grant_ref_t ref)
-{
-    grant_ref_t fh, nfh = gnttab_free_head;
-    do { gnttab_free_list[ref] = fh = nfh; wmb(); }
-    while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) );
+check_free_callbacks(void)
+{
+    if (unlikely(gnttab_free_callback_list))
+       do_free_callbacks();
+}
+
+static void
+put_free_entry(grant_ref_t ref)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&gnttab_list_lock, flags);
+    gnttab_list[ref] = gnttab_free_head;
+    gnttab_free_head = ref;
+    gnttab_free_count++;
+    check_free_callbacks();
+    spin_unlock_irqrestore(&gnttab_list_lock, flags);
 }
 
 /*
@@ -79,8 +125,7 @@
  */
 
 int
-gnttab_grant_foreign_access(
-    domid_t domid, unsigned long frame, int readonly)
+gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly)
 {
     int ref;
     
@@ -96,8 +141,8 @@
 }
 
 void
-gnttab_grant_foreign_access_ref(
-    grant_ref_t ref, domid_t domid, unsigned long frame, int readonly)
+gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+                               unsigned long frame, int readonly)
 {
     shared[ref].frame = frame;
     shared[ref].domid = domid;
@@ -107,7 +152,7 @@
 
 
 int
-gnttab_query_foreign_access( grant_ref_t ref )
+gnttab_query_foreign_access(grant_ref_t ref)
 {
     u16 nflags;
 
@@ -117,7 +162,7 @@
 }
 
 void
-gnttab_end_foreign_access( grant_ref_t ref, int readonly )
+gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
 {
     u16 flags, nflags;
 
@@ -127,13 +172,17 @@
             printk(KERN_ALERT "WARNING: g.e. still in use!\n");
     }
     while ( (nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != flags );
-
+}
+
+void
+gnttab_end_foreign_access(grant_ref_t ref, int readonly)
+{
+    gnttab_end_foreign_access_ref(ref, readonly);
     put_free_entry(ref);
 }
 
 int
-gnttab_grant_foreign_transfer(
-    domid_t domid, unsigned long pfn )
+gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
 {
     int ref;
 
@@ -149,8 +198,8 @@
 }
 
 void
-gnttab_grant_foreign_transfer_ref(
-    grant_ref_t ref, domid_t domid, unsigned long pfn )
+gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
+                                 unsigned long pfn)
 {
     shared[ref].frame = pfn;
     shared[ref].domid = domid;
@@ -159,21 +208,13 @@
 }
 
 unsigned long
-gnttab_end_foreign_transfer(
-    grant_ref_t ref)
+gnttab_end_foreign_transfer_ref(grant_ref_t ref)
 {
     unsigned long frame = 0;
     u16           flags;
 
     flags = shared[ref].flags;
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-    /*
-     * But can't flags == (GTF_accept_transfer | GTF_transfer_completed)
-     * if gnttab_donate executes without interruption???
-     */
-#else
-    ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
-#endif
+
     /*
      * If a transfer is committed then wait for the frame address to appear.
      * Otherwise invalidate the grant entry against future use.
@@ -183,65 +224,91 @@
         while ( unlikely((frame = shared[ref].frame) == 0) )
             cpu_relax();
 
+    return frame;
+}
+
+unsigned long
+gnttab_end_foreign_transfer(grant_ref_t ref)
+{
+    unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
     put_free_entry(ref);
-
     return frame;
 }
 
 void
-gnttab_free_grant_references( u16 count, grant_ref_t head )
-{
-    /* TODO: O(N)...? */
-    grant_ref_t to_die = 0, next = head;
-    int i;
-
-    for ( i = 0; i < count; i++ )
-    {
-        to_die = next;
-        next = gnttab_free_list[next];
-        put_free_entry( to_die );
+gnttab_free_grant_reference(grant_ref_t ref)
+{
+
+    put_free_entry(ref);
+}
+
+void
+gnttab_free_grant_references(grant_ref_t head)
+{
+    grant_ref_t ref;
+    unsigned long flags;
+    int count = 1;
+    if (head == GNTTAB_LIST_END)
+       return;
+    spin_lock_irqsave(&gnttab_list_lock, flags);
+    ref = head;
+    while (gnttab_list[ref] != GNTTAB_LIST_END) {
+       ref = gnttab_list[ref];
+       count++;
     }
-}
-
-int
-gnttab_alloc_grant_references( u16 count,
-                               grant_ref_t *head,
-                               grant_ref_t *terminal )
-{
-    int i;
-    grant_ref_t h = gnttab_free_head;
-
-    for ( i = 0; i < count; i++ )
-        if ( unlikely(get_free_entry() == -1) )
-            goto not_enough_refs;
+    gnttab_list[ref] = gnttab_free_head;
+    gnttab_free_head = head;
+    gnttab_free_count += count;
+    check_free_callbacks();
+    spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+
+int
+gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
+{
+    int h = get_free_entries(count);
+
+    if (h == -1)
+       return -ENOSPC;
 
     *head = h;
-    *terminal = gnttab_free_head;
 
     return 0;
-
-not_enough_refs:
-    gnttab_free_head = h;
-    return -ENOSPC;
-}
-
-int
-gnttab_claim_grant_reference( grant_ref_t *private_head,
-                              grant_ref_t  terminal )
-{
-    grant_ref_t g;
-    if ( unlikely((g = *private_head) == terminal) )
+}
+
+int
+gnttab_claim_grant_reference(grant_ref_t *private_head)
+{
+    grant_ref_t g = *private_head;
+    if (unlikely(g == GNTTAB_LIST_END))
         return -ENOSPC;
-    *private_head = gnttab_free_list[g];
+    *private_head = gnttab_list[g];
     return g;
 }
 
 void
-gnttab_release_grant_reference( grant_ref_t *private_head,
-                                grant_ref_t  release )
-{
-    gnttab_free_list[release] = *private_head;
+gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
+{
+    gnttab_list[release] = *private_head;
     *private_head = release;
+}
+
+void
+gnttab_request_free_callback(struct gnttab_free_callback *callback,
+                            void (*fn)(void *), void *arg, u16 count)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&gnttab_list_lock, flags);
+    if (callback->next)
+       goto out;
+    callback->fn = fn;
+    callback->arg = arg;
+    callback->count = count;
+    callback->next = gnttab_free_callback_list;
+    gnttab_free_callback_list = callback;
+    check_free_callbacks();
+ out:
+    spin_unlock_irqrestore(&gnttab_list_lock, flags);
 }
 
 /*
@@ -252,8 +319,9 @@
 
 static struct proc_dir_entry *grant_pde;
 
-static int grant_ioctl(struct inode *inode, struct file *file,
-                       unsigned int cmd, unsigned long data)
+static int
+grant_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+           unsigned long data)
 {
     int                     ret;
     privcmd_hypercall_t     hypercall;
@@ -291,8 +359,9 @@
     ioctl:  grant_ioctl,
 };
 
-static int grant_read(char *page, char **start, off_t off,
-                      int count, int *eof, void *data)
+static int
+grant_read(char *page, char **start, off_t off, int count, int *eof,
+          void *data)
 {
     int             len;
     unsigned int    i;
@@ -321,8 +390,9 @@
     return len;
 }
 
-static int grant_write(struct file *file, const char __user *buffer,
-                       unsigned long count, void *data)
+static int
+grant_write(struct file *file, const char __user *buffer, unsigned long count,
+           void *data)
 {
     /* TODO: implement this */
     return -ENOSYS;
@@ -330,7 +400,8 @@
 
 #endif /* CONFIG_PROC_FS */
 
-int gnttab_resume(void)
+int
+gnttab_resume(void)
 {
     gnttab_setup_table_t setup;
     unsigned long        frames[NR_GRANT_FRAMES];
@@ -349,7 +420,8 @@
     return 0;
 }
 
-int gnttab_suspend(void)
+int
+gnttab_suspend(void)
 {
     int i;
 
@@ -359,7 +431,8 @@
     return 0;
 }
 
-static int __init gnttab_init(void)
+static int __init
+gnttab_init(void)
 {
     int i;
 
@@ -368,7 +441,7 @@
     shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END);
 
     for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
-        gnttab_free_list[i] = i + 1;
+        gnttab_list[i] = i + 1;
     
 #ifdef CONFIG_PROC_FS
     /*
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Thu Aug 25 22:53:20 2005
@@ -1,7 +1,4 @@
-
 #define __KERNEL_SYSCALLS__
-static int errno;
-#include <linux/errno.h>
 #include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -9,13 +6,23 @@
 #include <linux/module.h>
 #include <linux/reboot.h>
 #include <linux/sysrq.h>
+#include <linux/stringify.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
-#include <asm-xen/ctrl_if.h>
+#include <asm-xen/evtchn.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/xen-public/dom0_ops.h>
 #include <asm-xen/linux-public/suspend.h>
 #include <asm-xen/queues.h>
+#include <asm-xen/xenbus.h>
+#include <asm-xen/ctrl_if.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
+
+#define SHUTDOWN_INVALID  -1
+#define SHUTDOWN_POWEROFF  0
+#define SHUTDOWN_REBOOT    1
+#define SHUTDOWN_SUSPEND   2
 
 void machine_restart(char * __unused)
 {
@@ -51,30 +58,76 @@
  */
 
 /* Ignore multiple shutdown requests. */
-static int shutting_down = -1;
-
-static void __do_suspend(void)
+static int shutting_down = SHUTDOWN_INVALID;
+
+#ifndef CONFIG_HOTPLUG_CPU
+#define cpu_down(x) (-EOPNOTSUPP)
+#define cpu_up(x) (-EOPNOTSUPP)
+#endif
+
+static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int r;
+    int gdt_pages;
+    r = HYPERVISOR_vcpu_pickle(vcpu, ctxt);
+    if (r != 0)
+       panic("pickling vcpu %d -> %d!\n", vcpu, r);
+
+    /* Translate from machine to physical addresses where necessary,
+       so that they can be translated to our new machine address space
+       after resume.  libxc is responsible for doing this to vcpu0,
+       but we do it to the others. */
+    gdt_pages = (ctxt->gdt_ents + 511) / 512;
+    ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]);
+    for (r = 0; r < gdt_pages; r++)
+       ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]);
+}
+
+void _restore_vcpu(int cpu);
+
+atomic_t vcpus_rebooting;
+
+static int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int r;
+    int gdt_pages = (ctxt->gdt_ents + 511) / 512;
+
+    /* This is kind of a hack, and implicitly relies on the fact that
+       the vcpu stops in a place where all of the call clobbered
+       registers are already dead. */
+    ctxt->user_regs.esp -= 4;
+    ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip;
+    ctxt->user_regs.eip = (unsigned long)_restore_vcpu;
+
+    /* De-canonicalise.  libxc handles this for vcpu 0, but we need
+       to do it for the other vcpus. */
+    ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]);
+    for (r = 0; r < gdt_pages; r++)
+       ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]);
+
+    atomic_set(&vcpus_rebooting, 1);
+    r = HYPERVISOR_boot_vcpu(vcpu, ctxt);
+    if (r != 0) {
+       printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r);
+       return -1;
+    }
+
+    /* Make sure we wait for the new vcpu to come up before trying to do
+       anything with it or starting the next one. */
+    while (atomic_read(&vcpus_rebooting))
+       barrier();
+
+    return 0;
+}
+
+static int __do_suspend(void *ignore)
 {
     int i, j;
     suspend_record_t *suspend_record;
+    static vcpu_guest_context_t suspended_cpu_records[NR_CPUS];
 
     /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */
        /* XXX SMH: yes it would :-( */ 
-#ifdef CONFIG_XEN_BLKDEV_FRONTEND
-    extern void blkdev_suspend(void);
-    extern void blkdev_resume(void);
-#else
-#define blkdev_suspend() do{}while(0)
-#define blkdev_resume()  do{}while(0)
-#endif
-
-#ifdef CONFIG_XEN_NETDEV_FRONTEND
-    extern void netif_suspend(void);
-    extern void netif_resume(void);  
-#else
-#define netif_suspend() do{}while(0)
-#define netif_resume()  do{}while(0)
-#endif
 
 #ifdef CONFIG_XEN_USB_FRONTEND
     extern void usbif_resume();
@@ -82,37 +135,88 @@
 #define usbif_resume() do{}while(0)
 #endif
 
-#ifdef CONFIG_XEN_BLKDEV_GRANT
     extern int gnttab_suspend(void);
     extern int gnttab_resume(void);
-#else
-#define gnttab_suspend() do{}while(0)
-#define gnttab_resume()  do{}while(0)
-#endif
-
+
+#ifdef CONFIG_SMP
+    extern void smp_suspend(void);
+    extern void smp_resume(void);
+#endif
     extern void time_suspend(void);
     extern void time_resume(void);
     extern unsigned long max_pfn;
     extern unsigned int *pfn_to_mfn_frame_list;
 
+    cpumask_t prev_online_cpus, prev_present_cpus;
+    int err = 0;
+
+    BUG_ON(smp_processor_id() != 0);
+    BUG_ON(in_interrupt());
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
+    if (num_online_cpus() > 1) {
+       printk(KERN_WARNING "Can't suspend SMP guests without 
CONFIG_HOTPLUG_CPU\n");
+       return -EOPNOTSUPP;
+    }
+#endif
+
     suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL);
     if ( suspend_record == NULL )
         goto out;
 
+    /* Take all of the other cpus offline.  We need to be careful not
+       to get preempted between the final test for num_online_cpus()
+       == 1 and disabling interrupts, since otherwise userspace could
+       bring another cpu online, and then we'd be stuffed.  At the
+       same time, cpu_down can reschedule, so we need to enable
+       preemption while doing that.  This kind of sucks, but should be
+       correct. */
+    /* (We don't need to worry about other cpus bringing stuff up,
+       since by the time num_online_cpus() == 1, there aren't any
+       other cpus) */
+    cpus_clear(prev_online_cpus);
+    preempt_disable();
+    while (num_online_cpus() > 1) {
+       preempt_enable();
+       for_each_online_cpu(i) {
+           if (i == 0)
+               continue;
+           err = cpu_down(i);
+           if (err != 0) {
+               printk(KERN_CRIT "Failed to take all CPUs down: %d.\n", err);
+               goto out_reenable_cpus;
+           }
+           cpu_set(i, prev_online_cpus);
+       }
+       preempt_disable();
+    }
+
     suspend_record->nr_pfns = max_pfn; /* final number of pfns */
 
     __cli();
+
+    preempt_enable();
+
+    cpus_clear(prev_present_cpus);
+    for_each_present_cpu(i) {
+       if (i == 0)
+           continue;
+       save_vcpu_context(i, &suspended_cpu_records[i]);
+       cpu_set(i, prev_present_cpus);
+    }
 
 #ifdef __i386__
     mm_pin_all();
     kmem_cache_shrink(pgd_cache);
 #endif
 
-    netif_suspend();
-
-    blkdev_suspend();
-
     time_suspend();
+
+#ifdef CONFIG_SMP
+    smp_suspend();
+#endif
+
+    xenbus_suspend();
 
     ctrl_if_suspend();
 
@@ -126,9 +230,11 @@
     memcpy(&suspend_record->resume_info, &xen_start_info,
            sizeof(xen_start_info));
 
-    HYPERVISOR_suspend(virt_to_machine(suspend_record) >> PAGE_SHIFT);
-
-    shutting_down = -1; 
+    /* We'll stop somewhere inside this hypercall.  When it returns,
+       we'll start resuming after the restore. */
+    HYPERVISOR_suspend(virt_to_mfn(suspend_record));
+
+    shutting_down = SHUTDOWN_INVALID; 
 
     memcpy(&xen_start_info, &suspend_record->resume_info,
            sizeof(xen_start_info));
@@ -142,10 +248,10 @@
     for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
     {
         pfn_to_mfn_frame_list[j] = 
-            virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+            virt_to_mfn(&phys_to_machine_mapping[i]);
     }
     HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
-        virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
+        virt_to_mfn(pfn_to_mfn_frame_list);
 
     gnttab_resume();
 
@@ -153,19 +259,36 @@
 
     ctrl_if_resume();
 
+    xenbus_resume();
+
+#ifdef CONFIG_SMP
+    smp_resume();
+#endif
+
     time_resume();
 
-    blkdev_resume();
-
-    netif_resume();
-
     usbif_resume();
 
+    for_each_cpu_mask(i, prev_present_cpus) {
+       restore_vcpu_context(i, &suspended_cpu_records[i]);
+    }
+
     __sti();
+
+ out_reenable_cpus:
+    for_each_cpu_mask(i, prev_online_cpus) {
+       j = cpu_up(i);
+       if (j != 0) {
+           printk(KERN_CRIT "Failed to bring cpu %d back up (%d).\n",
+                  i, j);
+           err = j;
+       }
+    }
 
  out:
     if ( suspend_record != NULL )
         free_page((unsigned long)suspend_record);
+    return err;
 }
 
 static int shutdown_process(void *__unused)
@@ -186,7 +309,7 @@
 
     switch ( shutting_down )
     {
-    case CMSG_SHUTDOWN_POWEROFF:
+    case SHUTDOWN_POWEROFF:
         if ( execve("/sbin/poweroff", poweroff_argv, envp) < 0 )
         {
             sys_reboot(LINUX_REBOOT_MAGIC1,
@@ -196,7 +319,7 @@
         }
         break;
 
-    case CMSG_SHUTDOWN_REBOOT:
+    case SHUTDOWN_REBOOT:
         if ( execve("/sbin/reboot", restart_argv, envp) < 0 )
         {
             sys_reboot(LINUX_REBOOT_MAGIC1,
@@ -207,16 +330,28 @@
         break;
     }
 
-    shutting_down = -1; /* could try again */
+    shutting_down = SHUTDOWN_INVALID; /* could try again */
 
     return 0;
 }
 
+static struct task_struct *kthread_create_on_cpu(int (*f)(void *arg),
+                                                void *arg,
+                                                const char *name,
+                                                int cpu)
+{
+    struct task_struct *p;
+    p = kthread_create(f, arg, name);
+    kthread_bind(p, cpu);
+    wake_up_process(p);
+    return p;
+}
+
 static void __shutdown_handler(void *unused)
 {
     int err;
 
-    if ( shutting_down != CMSG_SHUTDOWN_SUSPEND )
+    if ( shutting_down != SHUTDOWN_SUSPEND )
     {
         err = kernel_thread(shutdown_process, NULL, CLONE_FS | CLONE_FILES);
         if ( err < 0 )
@@ -224,46 +359,121 @@
     }
     else
     {
-        __do_suspend();
-    }
-}
-
-static void shutdown_handler(ctrl_msg_t *msg, unsigned long id)
+       kthread_create_on_cpu(__do_suspend, NULL, "suspender", 0);
+    }
+}
+
+static void shutdown_handler(struct xenbus_watch *watch, const char *node)
 {
     static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
 
-    if ( msg->subtype == CMSG_SHUTDOWN_SYSRQ )
-    {
-       int sysrq = ((shutdown_sysrq_t *)&msg->msg[0])->key;
-       
+    char *str;
+
+    str = (char *)xenbus_read("control", "shutdown", NULL);
+    /* Ignore read errors. */
+    if (IS_ERR(str))
+        return;
+    if (strlen(str) == 0) {
+        kfree(str);
+        return;
+    }
+
+    xenbus_write("control", "shutdown", "", O_CREAT);
+
+    if (strcmp(str, "poweroff") == 0)
+        shutting_down = SHUTDOWN_POWEROFF;
+    else if (strcmp(str, "reboot") == 0)
+        shutting_down = SHUTDOWN_REBOOT;
+    else if (strcmp(str, "suspend") == 0)
+        shutting_down = SHUTDOWN_SUSPEND;
+    else {
+        printk("Ignoring shutdown request: %s\n", str);
+        shutting_down = SHUTDOWN_INVALID;
+    }
+
+    kfree(str);
+
+    if (shutting_down != SHUTDOWN_INVALID)
+        schedule_work(&shutdown_work);
+}
+
 #ifdef CONFIG_MAGIC_SYSRQ
+static void sysrq_handler(struct xenbus_watch *watch, const char *node)
+{
+    char sysrq_key = '\0';
+    
+    if (!xenbus_scanf("control", "sysrq", "%c", &sysrq_key)) {
+        printk(KERN_ERR "Unable to read sysrq code in control/sysrq\n");
+        return;
+    }
+
+    xenbus_printf("control", "sysrq", "%c", '\0');
+
+    if (sysrq_key != '\0') {
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-       handle_sysrq(sysrq, NULL, NULL);
+        handle_sysrq(sysrq_key, NULL, NULL);
 #else
-       handle_sysrq(sysrq, NULL, NULL, NULL);
-#endif
-#endif
-    }
-    else if ( (shutting_down == -1) &&
-         ((msg->subtype == CMSG_SHUTDOWN_POWEROFF) ||
-          (msg->subtype == CMSG_SHUTDOWN_REBOOT) ||
-          (msg->subtype == CMSG_SHUTDOWN_SUSPEND)) )
-    {
-        shutting_down = msg->subtype;
-        schedule_work(&shutdown_work);
-    }
-    else
-    {
-        printk("Ignore spurious shutdown request\n");
-    }
-
-    ctrl_if_send_response(msg);
+        handle_sysrq(sysrq_key, NULL, NULL, NULL);
+#endif
+    }
+}
+#endif
+
+static struct xenbus_watch shutdown_watch = {
+    .node = "control/shutdown",
+    .callback = shutdown_handler
+};
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static struct xenbus_watch sysrq_watch = {
+    .node ="control/sysrq",
+    .callback = sysrq_handler
+};
+#endif
+
+static struct notifier_block xenstore_notifier;
+
+/* Setup our watcher
+   NB: Assumes xenbus_lock is held!
+*/
+static int setup_shutdown_watcher(struct notifier_block *notifier,
+                                  unsigned long event,
+                                  void *data)
+{
+    int err1 = 0;
+#ifdef CONFIG_MAGIC_SYSRQ
+    int err2 = 0;
+#endif
+
+    BUG_ON(down_trylock(&xenbus_lock) == 0);
+
+    err1 = register_xenbus_watch(&shutdown_watch);
+#ifdef CONFIG_MAGIC_SYSRQ
+    err2 = register_xenbus_watch(&sysrq_watch);
+#endif
+
+    if (err1) {
+        printk(KERN_ERR "Failed to set shutdown watcher\n");
+    }
+    
+#ifdef CONFIG_MAGIC_SYSRQ
+    if (err2) {
+        printk(KERN_ERR "Failed to set sysrq watcher\n");
+    }
+#endif
+
+    return NOTIFY_DONE;
 }
 
 static int __init setup_shutdown_event(void)
 {
-    ctrl_if_register_receiver(CMSG_SHUTDOWN, shutdown_handler, 0);
+    
+    xenstore_notifier.notifier_call = setup_shutdown_watcher;
+
+    register_xenstore_notifier(&xenstore_notifier);
+    
     return 0;
 }
 
-__initcall(setup_shutdown_event);
+subsys_initcall(setup_shutdown_event);
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c     Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c     Thu Aug 25 22:53:20 2005
@@ -5,8 +5,6 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
 #include <linux/etherdevice.h>
@@ -14,34 +12,86 @@
 #include <linux/init.h>
 #include <asm/io.h>
 #include <asm/page.h>
-
-EXPORT_SYMBOL(__dev_alloc_skb);
+#include <asm-xen/hypervisor.h>
 
 /* Referenced in netback.c. */
 /*static*/ kmem_cache_t *skbuff_cachep;
 
-/* Size must be cacheline-aligned (alloc_skb uses SKB_DATA_ALIGN). */
-#define XEN_SKB_SIZE \
-    ((PAGE_SIZE - sizeof(struct skb_shared_info)) & ~(SMP_CACHE_BYTES - 1))
+#define MAX_SKBUFF_ORDER 2
+static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1];
 
 struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask)
 {
-    struct sk_buff *skb;
-    skb = alloc_skb_from_cache(skbuff_cachep, length + 16, gfp_mask);
-    if ( likely(skb != NULL) )
-        skb_reserve(skb, 16);
-    return skb;
+       struct sk_buff *skb;
+       int order;
+
+       length = SKB_DATA_ALIGN(length + 16);
+       order = get_order(length + sizeof(struct skb_shared_info));
+       if (order > MAX_SKBUFF_ORDER) {
+               printk(KERN_ALERT "Attempt to allocate order %d skbuff. "
+                      "Increase MAX_SKBUFF_ORDER.\n", order);
+               return NULL;
+       }
+
+       skb = alloc_skb_from_cache(
+               skbuff_order_cachep[order], length, gfp_mask);
+       if (skb != NULL)
+               skb_reserve(skb, 16);
+
+       return skb;
 }
 
 static void skbuff_ctor(void *buf, kmem_cache_t *cachep, unsigned long unused)
 {
-    scrub_pages(buf, 1);
+       int order = 0;
+
+       while (skbuff_order_cachep[order] != cachep)
+               order++;
+
+       if (order != 0)
+               xen_create_contiguous_region((unsigned long)buf, order);
+
+       scrub_pages(buf, 1 << order);
+}
+
+static void skbuff_dtor(void *buf, kmem_cache_t *cachep, unsigned long unused)
+{
+       int order = 0;
+
+       while (skbuff_order_cachep[order] != cachep)
+               order++;
+
+       if (order != 0)
+               xen_destroy_contiguous_region((unsigned long)buf, order);
 }
 
 static int __init skbuff_init(void)
 {
-    skbuff_cachep = kmem_cache_create(
-        "xen-skb", PAGE_SIZE, PAGE_SIZE, 0, skbuff_ctor, NULL);
-    return 0;
+       static char name[MAX_SKBUFF_ORDER + 1][20];
+       unsigned long size;
+       int order;
+
+       for (order = 0; order <= MAX_SKBUFF_ORDER; order++) {
+               size = PAGE_SIZE << order;
+               sprintf(name[order], "xen-skb-%lu", size);
+               skbuff_order_cachep[order] = kmem_cache_create(
+                       name[order], size, size, 0, skbuff_ctor, skbuff_dtor);
+       }
+
+       skbuff_cachep = skbuff_order_cachep[0];
+
+       return 0;
 }
 __initcall(skbuff_init);
+
+EXPORT_SYMBOL(__dev_alloc_skb);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig      Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig      Thu Aug 25 22:53:20 2005
@@ -125,6 +125,10 @@
 config X86_IO_APIC
        bool
        default XEN_PRIVILEGED_GUEST
+
+config X86_XEN_GENAPIC
+       bool
+       default XEN_PRIVILEGED_GUEST || SMP
 
 config X86_LOCAL_APIC
        bool
@@ -325,12 +329,12 @@
 # need this always enabled with GART_IOMMU for the VIA workaround
 config SWIOTLB
        bool
-       depends on GART_IOMMU
+       depends on PCI
        default y
 
 config DUMMY_IOMMU
        bool
-       depends on !GART_IOMMU && !SWIOTLB
+       depends on !GART_IOMMU
        default y
        help
          Don't use IOMMU code. This will cause problems when you have more 
than 4GB
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/Makefile     Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Makefile     Thu Aug 25 22:53:20 2005
@@ -79,14 +79,15 @@
 CFLAGS += $(xenflags-y)
 AFLAGS += $(xenflags-y)
 
-prepare: include/asm-$(XENARCH)/asm_offset.h
-CLEAN_FILES += include/asm-$(XENARCH)/asm_offset.h
+prepare: include/asm-$(XENARCH)/asm_offsets.h
+CLEAN_FILES += include/asm-$(XENARCH)/asm_offsets.h
 
 arch/$(XENARCH)/kernel/asm-offsets.s: include/asm include/.asm-ignore \
        include/linux/version.h include/config/MARKER
 
+include/asm-$(XENARCH)/offset.h: arch/$(XENARCH)/kernel/asm-offsets.s
+       $(call filechk,gen-asm-offsets)
 
-include/asm-$(XENARCH)/asm_offset.h: arch/xen/x86_64/kernel/asm-offsets.s
-       $(call filechk,gen-asm-offsets)
-       ln -fsn asm_offset.h include/asm-$(XENARCH)/offset.h
-
+include/asm-$(XENARCH)/asm_offsets.h: include/asm-$(XENARCH)/offset.h
+       ln -fsn offset.h include/asm-$(XENARCH)/asm_offsets.h
+       ln -fsn offset.h include/asm-$(XENARCH)/asm_offset.h
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile        Thu Aug 25 
22:53:20 2005
@@ -36,8 +36,8 @@
        $(call if_changed,syscall)
 
 AFLAGS_vsyscall-int80.o = -m32 -I$(obj)
-AFLAGS_vsyscall-sysenter.o = -m32
-AFLAGS_vsyscall-syscall.o = -m32
+AFLAGS_vsyscall-sysenter.o = -m32 -I$(obj)
+AFLAGS_vsyscall-syscall.o = -m32 -I$(obj)
 CFLAGS_ia32_ioctl.o += -Ifs/
 
 s-link := vsyscall-syscall.o vsyscall-sysenter.o vsyscall-sigreturn.o
@@ -48,13 +48,11 @@
 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst 
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
        @ln -fsn $(srctree)/arch/x86_64/ia32/$(notdir $@) $@
 
-$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S 
../../i386/kernel/vsyscall-note.S
-$(obj)/vsyscall-sysenter.S: $(obj)/vsyscall-sigreturn.S
-$(obj)/vsyscall-syscall.S: $(obj)/vsyscall-sigreturn.S
+$(obj)/vsyscall-int80.o $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-syscall.o: \
+       $(obj)/vsyscall-sigreturn.S $(obj)/../../i386/kernel/vsyscall-note.S
 
-../../i386/kernel/vsyscall-note.S:
-       @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) 
$(srctree)/arch/xen/i386/kernel/$(notdir $@)
-       make -C arch/xen/i386/kernel vsyscall-note.S
+$(obj)/../../i386/kernel/vsyscall-note.S:
+       @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@
 
 obj-y  += $(c-obj-y) $(s-obj-y)
 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c     Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c     Thu Aug 25 
22:53:20 2005
@@ -128,8 +128,12 @@
 #endif
        return 0;
 } 
-       
-__initcall(init_syscall32); 
+
+/*
+ * This must be done early in case we have an initrd containing 32-bit
+ * binaries (e.g., hotplug). This could be pushed upstream to arch/x86_64.
+ */    
+core_initcall(init_syscall32); 
 
 /* May not be __init: called during resume */
 void syscall32_cpu_init(void)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile      Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile      Thu Aug 25 
22:53:20 2005
@@ -25,30 +25,32 @@
 c-obj-$(CONFIG_X86_MSR)                += msr.o
 obj-$(CONFIG_MICROCODE)                += microcode.o
 obj-$(CONFIG_X86_CPUID)                += cpuid.o
-#obj-$(CONFIG_SMP)             += smp.o smpboot.o trampoline.o
+obj-$(CONFIG_SMP)              += smp.o smpboot.o
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o
 c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o mpparse.o
-c-obj-$(CONFIG_X86_IO_APIC)    += genapic.o genapic_cluster.o genapic_flat.o
+obj-$(CONFIG_X86_XEN_GENAPIC)  += genapic.o genapic_xen.o
+c-obj-$(CONFIG_X86_IO_APIC)    += genapic_cluster.o genapic_flat.o
 #obj-$(CONFIG_PM)              += suspend.o
 #obj-$(CONFIG_SOFTWARE_SUSPEND)        += suspend_asm.o
 #obj-$(CONFIG_CPU_FREQ)                += cpufreq/
 #obj-$(CONFIG_EARLY_PRINTK)    += early_printk.o
 #obj-$(CONFIG_GART_IOMMU)      += pci-gart.o aperture.o
-c-obj-$(CONFIG_DUMMY_IOMMU)    += pci-nommu.o pci-dma.o
-#obj-$(CONFIG_SWIOTLB)         += swiotlb.o
+obj-$(CONFIG_DUMMY_IOMMU)      += pci-nommu.o
+i386-obj-$(CONFIG_DUMMY_IOMMU) += pci-dma.o
+i386-obj-$(CONFIG_SWIOTLB)     += swiotlb.o
 obj-$(CONFIG_KPROBES)          += kprobes.o
 obj-$(CONFIG_X86_PM_TIMER)     += pmtimer.o
 
 c-obj-$(CONFIG_MODULES)                += module.o
 
-#obj-y                         += topology.o
+obj-y                          += topology.o
 c-obj-y                                += intel_cacheinfo.o
 
 bootflag-y                     += ../../../i386/kernel/bootflag.o
 cpuid-$(subst m,y,$(CONFIG_X86_CPUID))  += ../../../i386/kernel/cpuid.o
 topology-y                     += ../../../i386/mach-default/topology.o
-swiotlb-$(CONFIG_SWIOTLB)      += ../../../ia64/lib/swiotlb.o
+#swiotlb-$(CONFIG_SWIOTLB)      += ../../../ia64/lib/swiotlb.o
 microcode-$(subst m,y,$(CONFIG_MICROCODE))  += ../../../i386/kernel/microcode.o
 intel_cacheinfo-y              += ../../../i386/kernel/cpu/intel_cacheinfo.o
 quirks-y                       += ../../i386/kernel/quirks.o
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c        Thu Aug 25 
22:53:20 2005
@@ -45,10 +45,11 @@
 
 void smp_local_timer_interrupt(struct pt_regs *regs)
 {
+
+       profile_tick(CPU_PROFILING, regs);
+#ifndef CONFIG_XEN
        int cpu = smp_processor_id();
 
-       profile_tick(CPU_PROFILING, regs);
-#if 0
        if (--per_cpu(prof_counter, cpu) <= 0) {
                /*
                 * The multiplier may have changed since the last time we got
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c        Thu Aug 25 
22:53:20 2005
@@ -46,7 +46,7 @@
        e820.nr_map++;
 }
 
-#if 0
+#ifndef CONFIG_XEN
 extern char _end[];
 
 /* 
@@ -235,7 +235,6 @@
        }
 }
 
-
 void __init e820_print_map(char *who)
 {
        int i;
@@ -516,9 +515,31 @@
        printk(KERN_INFO "BIOS-provided physical RAM map:\n");
        e820_print_map(who);
 }
+
+#else  /* CONFIX_XEN */
+extern unsigned long xen_override_max_pfn;
+extern union xen_start_info_union xen_start_info_union;
+/*
+ * Guest physical starts from 0.
+ */
+unsigned long __init e820_end_of_ram(void)
+{
+        unsigned long max_end_pfn = xen_start_info.nr_pages;
+
+       if ( xen_override_max_pfn <  max_end_pfn)
+               xen_override_max_pfn = max_end_pfn;
+       
+        return xen_override_max_pfn;
+}
+
+
+
+void __init e820_reserve_resources(void) 
+{
+       return;                 /* Xen won't have reserved entries */
+}
+
 #endif
-
-extern unsigned long xen_override_max_pfn;
 
 void __init parse_memopt(char *p, char **from) 
 { 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c        Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c        Thu Aug 
25 22:53:20 2005
@@ -5,6 +5,8 @@
 #include <linux/string.h>
 #include <asm/io.h>
 #include <asm/processor.h>
+
+#ifndef CONFIG_XEN
 
 /* Simple VGA output */
 
@@ -59,7 +61,6 @@
        .index =        -1,
 };
 
-#ifndef CONFIG_XEN
 /* Serial functions loosely based on a similar package from Klaus P. Gerlicher 
*/ 
 
 static int early_serial_base = 0x3f8;  /* ttyS0 */
@@ -148,7 +149,8 @@
        outb((divisor >> 8) & 0xff, early_serial_base + DLH); 
        outb(c & ~DLAB, early_serial_base + LCR);
 }
-#else
+
+#else /* CONFIG_XEN */
 
 static void
 early_serial_write(struct console *con, const char *s, unsigned count)
@@ -167,6 +169,13 @@
 static __init void early_serial_init(char *s)
 {
 }
+
+/*
+ * No early VGA console on Xen, as we do not have convenient ISA-space
+ * mappings. Someone should fix this for domain 0. For now, use fake serial.
+ */
+#define early_vga_console early_serial_console
+
 #endif
 
 static struct console early_serial_console = {
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S       Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S       Thu Aug 25 
22:53:20 2005
@@ -128,7 +128,6 @@
          * #define VGCF_IN_SYSCALL (1<<8) 
          */
         .macro SWITCH_TO_USER flag
-        movl $0,%gs:pda_kernel_mode     # change to user mode
         subq $8*4,%rsp                   # reuse rip, cs, rflags, rsp, ss in 
the stack
         movq %rax,(%rsp)
         movq %r11,1*8(%rsp)
@@ -139,7 +138,6 @@
         .endm
 
         .macro SWITCH_TO_KERNEL ssoff,adjust=0
-       btsq $0,%gs:pda_kernel_mode
        jc  1f
        orb  $1,\ssoff-\adjust+4(%rsp)
 1:
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S        Thu Aug 25 
22:53:20 2005
@@ -28,8 +28,6 @@
 #include <asm/page.h>
 #include <asm/msr.h>
 #include <asm/cache.h>
-/* #include <asm/thread_info.h> */
-        
        
 /* we are not able to switch in one step to the final KERNEL ADRESS SPACE
  * because we need identity-mapped pages on setup so define __START_KERNEL to
@@ -43,10 +41,9 @@
 startup_64:
 ENTRY(_start)
         cld                
-       movq init_rsp(%rip),%rsp
        /* Copy the necessary stuff from xen_start_info structure. */
        movq  $xen_start_info_union,%rdi
-       movq  $64,%rcx          /* sizeof (union xen_start_info_union) / sizeof 
(long) */
+       movq  $256,%rcx
        rep movsq
 
 #ifdef CONFIG_SMP
@@ -54,6 +51,7 @@
        cld
 #endif /* CONFIG_SMP */
 
+       movq init_rsp(%rip),%rsp
        /* zero EFLAGS after setting rsp */
        pushq $0
        popfq
@@ -116,15 +114,81 @@
 ENTRY(init_level4_user_pgt)
        .fill   512,8,0
 
+       /*
+        * In Xen the following pre-initialized pgt entries are re-initialized.
+        */
+.org 0x3000
+ENTRY(level3_kernel_pgt)
+       .fill   510,8,0
+       /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
+       .quad   0x0000000000105007              /* -> level2_kernel_pgt */
+       .fill   1,8,0
+
+.org 0x4000
+ENTRY(level2_ident_pgt)
+       /* 40MB for bootup.     */
+       .quad   0x0000000000000283
+       .quad   0x0000000000200183
+       .quad   0x0000000000400183
+       .quad   0x0000000000600183
+       .quad   0x0000000000800183
+       .quad   0x0000000000A00183
+       .quad   0x0000000000C00183
+       .quad   0x0000000000E00183
+       .quad   0x0000000001000183
+       .quad   0x0000000001200183
+       .quad   0x0000000001400183
+       .quad   0x0000000001600183
+       .quad   0x0000000001800183
+       .quad   0x0000000001A00183
+       .quad   0x0000000001C00183
+       .quad   0x0000000001E00183
+       .quad   0x0000000002000183
+       .quad   0x0000000002200183
+       .quad   0x0000000002400183
+       .quad   0x0000000002600183
+       /* Temporary mappings for the super early allocator in 
arch/x86_64/mm/init.c */
+       .globl temp_boot_pmds
+temp_boot_pmds:
+       .fill   492,8,0
+
+.org 0x5000
+ENTRY(level2_kernel_pgt)
+       /* 40MB kernel mapping. The kernel code cannot be bigger than that.
+          When you change this change KERNEL_TEXT_SIZE in page.h too. */
+       /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
+       .quad   0x0000000000000183
+       .quad   0x0000000000200183
+       .quad   0x0000000000400183
+       .quad   0x0000000000600183
+       .quad   0x0000000000800183
+       .quad   0x0000000000A00183
+       .quad   0x0000000000C00183
+       .quad   0x0000000000E00183
+       .quad   0x0000000001000183
+       .quad   0x0000000001200183
+       .quad   0x0000000001400183
+       .quad   0x0000000001600183
+       .quad   0x0000000001800183
+       .quad   0x0000000001A00183
+       .quad   0x0000000001C00183
+       .quad   0x0000000001E00183
+       .quad   0x0000000002000183
+       .quad   0x0000000002200183
+       .quad   0x0000000002400183
+       .quad   0x0000000002600183
+       /* Module mapping starts here */
+       .fill   492,8,0
+       
         /*
          * This is used for vsyscall area mapping as we have a different
          * level4 page table for user.
          */
-.org 0x3000
+.org 0x6000
 ENTRY(level3_user_pgt)
         .fill  512,8,0
 
-.org 0x4000
+.org 0x7000
 ENTRY(cpu_gdt_table)
 /* The TLS descriptors are currently at a different place compared to i386.
    Hopefully nobody expects them at a fixed place (Wine?) */
@@ -140,26 +204,34 @@
        .quad   0,0                     /* TSS */
        .quad   0,0                     /* LDT */
        .quad   0,0,0                   /* three TLS descriptors */ 
-
-gdt_end:       
+       .quad   0                       /* unused now?   __KERNEL16_CS - 16bit 
PM for S3 wakeup. */
+
+gdt_end:
+#if 0
        /* asm/segment.h:GDT_ENTRIES must match this */ 
        /* This should be a multiple of the cache line size */
        /* GDTs of other CPUs: */       
        .fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table)
-
-.org 0x5000
+#endif
+
+.org 0x8000
 ENTRY(empty_zero_page)
 
-.org 0x6000
+.org 0x9000
 ENTRY(empty_bad_page)
 
-.org 0x7000
+.org 0xa000
 ENTRY(empty_bad_pte_table)
 
-.org 0x8000
+.org 0xb000
 ENTRY(empty_bad_pmd_table)
 
-       .org 0x9000
+.org 0xc000
+ENTRY(level3_physmem_pgt)
+       .quad   0x0000000000105007              /* -> level2_kernel_pgt (so 
that __va works even before pagetable_init) */
+
+       
+       .org 0xd000
 #ifdef CONFIG_ACPI_SLEEP
 ENTRY(wakeup_level4_pgt)
        .quad   0x0000000000102007              /* -> level3_ident_pgt */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c      Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c      Thu Aug 25 
22:53:20 2005
@@ -30,7 +30,6 @@
         unsigned int old_io_pl = current->thread.io_pl;
         physdev_op_t op;
 
-
        if (new_io_pl > 3)
                return -EINVAL;
 
@@ -38,16 +37,12 @@
        if ((new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO))
                return -EPERM;
 
-       /* Maintain OS privileges even if user attempts to relinquish them. */
-       if (new_io_pl == 0)
-               new_io_pl = 1;
-
        /* Change our version of the privilege levels. */
        current->thread.io_pl = new_io_pl;
 
        /* Force the change at ring 0. */
        op.cmd             = PHYSDEVOP_SET_IOPL;
-       op.u.set_iopl.iopl = new_io_pl;
+       op.u.set_iopl.iopl = (new_io_pl == 0) ? 1 : new_io_pl;
        HYPERVISOR_physdev_op(&op);
 
        return 0;
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c Thu Aug 25 22:53:20 2005
@@ -21,6 +21,11 @@
 
 atomic_t irq_err_count;
 
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
 
 /*
  * Generic, controller-independent functions:
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c   Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c   Thu Aug 25 
22:53:20 2005
@@ -61,6 +61,7 @@
 EXPORT_SYMBOL(dma_free_coherent);
 #endif
 
+#if 0
 int dma_supported(struct device *hwdev, u64 mask)
 {
         /*
@@ -76,6 +77,7 @@
        return 1;
 } 
 EXPORT_SYMBOL(dma_supported);
+#endif
 
 int dma_get_cache_alignment(void)
 {
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c     Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c     Thu Aug 25 
22:53:20 2005
@@ -148,7 +148,6 @@
                        if (cpu_is_offline(cpu))
                                play_dead();
 
-                        __IRQ_STAT(cpu,idle_timestamp) = jiffies;
                        xen_idle();
                }
 
@@ -454,8 +453,8 @@
 #define C(i) do {                                                      \
        if (unlikely(next->tls_array[i] != prev->tls_array[i])) {       \
                mcl->op      = __HYPERVISOR_update_descriptor;          \
-               mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu)  \
-                                              [GDT_ENTRY_TLS_MIN + i]); \
+               mcl->args[0] = virt_to_machine(                         \
+                       &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
                mcl->args[1] = next->tls_array[i];                      \
                mcl++;                                                  \
        }                                                               \
@@ -465,7 +464,7 @@
 
        if (unlikely(prev->io_pl != next->io_pl)) {
                iopl_op.cmd             = PHYSDEVOP_SET_IOPL;
-               iopl_op.u.set_iopl.iopl = next->io_pl;
+               iopl_op.u.set_iopl.iopl = (next->io_pl == 0) ? 1 : next->io_pl;
                mcl->op      = __HYPERVISOR_physdev_op;
                mcl->args[0] = (unsigned long)&iopl_op;
                mcl++;
@@ -475,7 +474,7 @@
                iobmp_op.cmd                     =
                        PHYSDEVOP_SET_IOBITMAP;
                iobmp_op.u.set_iobitmap.bitmap   =
-                       (unsigned long)next->io_bitmap_ptr;
+                       (char *)next->io_bitmap_ptr;
                iobmp_op.u.set_iobitmap.nr_ports =
                        next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
                mcl->op      = __HYPERVISOR_physdev_op;
@@ -744,3 +743,9 @@
                sp -= get_random_int() % 8192;
        return sp & ~0xf;
 }
+
+#ifndef CONFIG_SMP
+void _restore_vcpu(void)
+{
+}
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Thu Aug 25 
22:53:20 2005
@@ -40,7 +40,6 @@
 #include <linux/acpi.h>
 #include <linux/kallsyms.h>
 #include <linux/edd.h>
-#include <linux/percpu.h>
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -58,27 +57,36 @@
 #include <asm/setup.h>
 #include <asm/mach_apic.h>
 #include <asm/numa.h>
+#ifdef CONFIG_XEN
+#include <linux/percpu.h>
 #include <asm-xen/xen-public/physdev.h>
 #include "setup_arch_pre.h"
 #include <asm-xen/hypervisor.h>
-
 #define PFN_UP(x)       (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
 #define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
-
+#define end_pfn_map end_pfn
 #include <asm/mach-xen/setup_arch_post.h>
 
 extern unsigned long start_pfn;
-
-#if 0
-struct edid_info {
-        unsigned char dummy[128];
-};
-#endif
-
 extern struct edid_info edid_info;
+
+shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+EXPORT_SYMBOL(HYPERVISOR_shared_info);
 
 /* Allows setting of maximum possible memory size  */
 unsigned long xen_override_max_pfn;
+
+u32 *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
+
+EXPORT_SYMBOL(phys_to_machine_mapping);
+
+DEFINE_PER_CPU(multicall_entry_t, multicall_list[8]);
+DEFINE_PER_CPU(int, nr_multicall_ents);
+
+/* Raw start-of-day parameters from the hypervisor. */
+union xen_start_info_union xen_start_info_union;
+#endif
+
 /*
  * Machine setup..
  */
@@ -166,7 +174,7 @@
 
 #define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
 
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
 static struct resource system_rom_resource = {
        .name = "System ROM",
        .start = 0xf0000,
@@ -200,12 +208,14 @@
 #define ADAPTER_ROM_RESOURCES \
        (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
 
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
 static struct resource video_rom_resource = {
        .name = "Video ROM",
        .start = 0xc0000,
        .end = 0xc7fff,
        .flags = IORESOURCE_ROM,
 };
+#endif
 
 static struct resource video_ram_resource = {
        .name = "Video RAM area",
@@ -214,7 +224,7 @@
        .flags = IORESOURCE_RAM,
 };
 
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
 
 static int __init romchecksum(unsigned char *rom, unsigned long length)
@@ -292,33 +302,24 @@
 }
 #endif
 
-/*
- * Point at the empty zero page to start with. We map the real shared_info
- * page as soon as fixmap is up and running.
- */
-shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
-EXPORT_SYMBOL(HYPERVISOR_shared_info);
-
-u32 *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
-
-EXPORT_SYMBOL(phys_to_machine_mapping);
-
-DEFINE_PER_CPU(multicall_entry_t, multicall_list[8]);
-DEFINE_PER_CPU(int, nr_multicall_ents);
-
-/* Raw start-of-day parameters from the hypervisor. */
-union xen_start_info_union xen_start_info_union;
 
 static __init void parse_cmdline_early (char ** cmdline_p)
 {
        char c = ' ', *to = command_line, *from = COMMAND_LINE;
-       int len = 0, max_cmdline;
-
+       int len = 0;
+
+       /* Save unparsed command line copy for /proc/cmdline */
+#ifdef CONFIG_XEN
+       int max_cmdline;
+       
        if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
                max_cmdline = COMMAND_LINE_SIZE;
        memcpy(saved_command_line, xen_start_info.cmd_line, max_cmdline);
-       /* Save unparsed command line copy for /proc/cmdline */
        saved_command_line[max_cmdline-1] = '\0';
+#else
+       memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
+       saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+#endif
 
        for (;;) {
                if (c != ' ') 
@@ -376,8 +377,7 @@
                        acpi_skip_timer_override = 1;
 #endif
 #endif
-
-#if 0
+#ifndef CONFIG_XEN
                if (!memcmp(from, "nolapic", 7) ||
                    !memcmp(from, "disableapic", 11))
                        disable_apic = 1;
@@ -389,8 +389,7 @@
                        skip_ioapic_setup = 0;
                        ioapic_force = 1;
                }
-#endif
-                       
+#endif                 
                if (!memcmp(from, "mem=", 4))
                        parse_memopt(from+4, &from); 
 
@@ -424,34 +423,28 @@
 }
 
 #ifndef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_XEN
 static void __init contig_initmem_init(void)
 {
+        unsigned long bootmap_size = init_bootmem(start_pfn, end_pfn);
+        free_bootmem(0, end_pfn << PAGE_SHIFT);   
+        reserve_bootmem(HIGH_MEMORY,
+                        (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1)
+                        - HIGH_MEMORY);
+}
+#else
+static void __init contig_initmem_init(void)
+{
         unsigned long bootmap_size, bootmap; 
-
-        /*
-        * partially used pages are not usable - thus
-        * we are rounding upwards:
-        */
-
         bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
-        bootmap = start_pfn;
-        bootmap_size = init_bootmem(bootmap, end_pfn);
+        bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
+        if (bootmap == -1L) 
+                panic("Cannot find bootmem map of size %ld\n",bootmap_size);
+        bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
+        e820_bootmem_free(&contig_page_data, 0, end_pfn << PAGE_SHIFT); 
         reserve_bootmem(bootmap, bootmap_size);
-        
-        free_bootmem(start_pfn << PAGE_SHIFT, (end_pfn - start_pfn) << 
PAGE_SHIFT);   
-        printk("Registering memory for bootmem: from  %lx, size = %lx\n",
-                     start_pfn << PAGE_SHIFT, (end_pfn - start_pfn) << 
PAGE_SHIFT);
-        /* 
-         * This should cover kernel_end
-         */
-#if 0
-        reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
-                                      bootmap_size + PAGE_SIZE-1) - 
(HIGH_MEMORY));
-#endif
-        reserve_bootmem(0, (PFN_PHYS(start_pfn) +
-                            bootmap_size + PAGE_SIZE-1));
-
 } 
+#endif /* !CONFIG_XEN */
 #endif
 
 /* Use inline assembly to define this because the nops are defined 
@@ -543,7 +536,7 @@
 }
 #endif
 
-#if 0
+#ifndef CONFIG_XEN
 #define EBDA_ADDR_POINTER 0x40E
 static void __init reserve_ebda_region(void)
 {
@@ -559,73 +552,17 @@
 }
 #endif
 
-/*
- * Guest physical starts from 0.
- */
-
-unsigned long __init xen_end_of_ram(void)
-{
-        unsigned long max_end_pfn = xen_start_info.nr_pages;
-
-       if ( xen_override_max_pfn <  max_end_pfn)
-               xen_override_max_pfn = max_end_pfn;
-       
-        return xen_override_max_pfn;
-}
-
-static void __init print_memory_map(char *who)
-{
-        int i;
-
-        for (i = 0; i < e820.nr_map; i++) {
-                early_printk(" %s: %016Lx - %016Lx ", who,
-                        e820.map[i].addr,
-                        e820.map[i].addr + e820.map[i].size);
-                switch (e820.map[i].type) {
-                case E820_RAM:  early_printk("(usable)\n");
-                                break;
-                case E820_RESERVED:
-                                early_printk("(reserved)\n");
-                                break;
-                case E820_ACPI:
-                                early_printk("(ACPI data)\n");
-                                break;
-                case E820_NVS:
-                                early_printk("(ACPI NVS)\n");
-                                break;
-                default:        early_printk("type %u\n", e820.map[i].type);
-                                break;
-                }
-        }
-}
-
 void __init setup_arch(char **cmdline_p)
 {
-       int i, j;
-       physdev_op_t op;
-
-#if 0
-       ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
-#else
+       unsigned long kernel_end;
+
+#ifdef CONFIG_XEN
        ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); 
-#endif
        drive_info = DRIVE_INFO;
-
+       kernel_end = 0;         /* dummy */
 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
        screen_info = SCREEN_INFO;
-#endif
-       edid_info = EDID_INFO;
-       saved_video_mode = SAVED_VIDEO_MODE;
-       bootloader_type = LOADER_TYPE;
-
-#ifdef CONFIG_BLK_DEV_RAM
-       rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
-       rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
-       rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
-#endif
-/*        register_console(&xen_console); */
-
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+
        /* This is drawn from a dump from vgacon:startup in standard Linux. */
        screen_info.orig_video_mode = 3; 
        screen_info.orig_video_isVGA = 1;
@@ -633,27 +570,56 @@
        screen_info.orig_video_cols = 80;
        screen_info.orig_video_ega_bx = 3;
        screen_info.orig_video_points = 16;
-#endif       
+#endif
+       edid_info = EDID_INFO;
+       saved_video_mode = SAVED_VIDEO_MODE;
+       bootloader_type = LOADER_TYPE;
+
+#ifdef CONFIG_BLK_DEV_RAM
+       rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+       rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+       rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+
+
+#endif
+
+       HYPERVISOR_vm_assist(VMASST_CMD_enable,
+                            VMASST_TYPE_writable_pagetables);
+
         ARCH_SETUP
-        print_memory_map(machine_specific_memory_setup());
-
-        /*     copy_edd();  */
+#else
+       ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
+       drive_info = DRIVE_INFO;
+       screen_info = SCREEN_INFO;
+       edid_info = EDID_INFO;
+       saved_video_mode = SAVED_VIDEO_MODE;
+       bootloader_type = LOADER_TYPE;
+
+#ifdef CONFIG_BLK_DEV_RAM
+       rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+       rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+       rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+#endif
+       setup_memory_region();
+       copy_edd();
+#endif /* !CONFIG_XEN */
 
        if (!MOUNT_ROOT_RDONLY)
                root_mountflags &= ~MS_RDONLY;
        init_mm.start_code = (unsigned long) &_text;
        init_mm.end_code = (unsigned long) &_etext;
        init_mm.end_data = (unsigned long) &_edata;
-/*     init_mm.brk = (unsigned long) &_end; */
+#ifdef CONFIG_XEN
         init_mm.brk = start_pfn << PAGE_SHIFT;
-
-
-#if 0  /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
+#else
+       init_mm.brk = (unsigned long) &_end;    
+
        code_resource.start = virt_to_phys(&_text);
        code_resource.end = virt_to_phys(&_etext)-1;
        data_resource.start = virt_to_phys(&_etext);
        data_resource.end = virt_to_phys(&_edata)-1;
 #endif
+
        parse_cmdline_early(cmdline_p);
 
        early_identify_cpu(&boot_cpu_data);
@@ -662,15 +628,11 @@
         * partially used pages are not usable - thus
         * we are rounding upwards:
         */
-#if 0
        end_pfn = e820_end_of_ram();
-#else
-        end_pfn = xen_end_of_ram();
-#endif
 
        check_efer();
 
-       init_memory_mapping(0, (end_pfn << PAGE_SHIFT));
+       init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
 
 #ifdef CONFIG_ACPI_NUMA
        /*
@@ -685,24 +647,25 @@
        contig_initmem_init(); 
 #endif
 
-       /* Reserve direct mapping and shared info etc. */
-//     reserve_bootmem_generic(table_start << PAGE_SHIFT, (table_end + 1 - 
table_start) << PAGE_SHIFT);
-
-//     reserve_bootmem_generic(0, (table_end + 1) << PAGE_SHIFT);
+#ifndef CONFIG_XEN
+       /* Reserve direct mapping */
+       reserve_bootmem_generic(table_start << PAGE_SHIFT, 
+                               (table_end - table_start) << PAGE_SHIFT);
 
        /* reserve kernel */
-//     kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
-
-#if 0
+       kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
+       reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
+
        /*
         * reserve physical page 0 - it's a special BIOS page on many boxes,
         * enabling clean reboots, SMP operation, laptop functions.
         */
        reserve_bootmem_generic(0, PAGE_SIZE);
-#endif
 
        /* reserve ebda region */
-/*     reserve_ebda_region(); */
+       reserve_ebda_region();
+#endif
+
 
 #ifdef CONFIG_SMP
        /*
@@ -722,6 +685,7 @@
         */
        acpi_reserve_bootmem();
 #endif
+#ifdef CONFIG_XEN
 #ifdef CONFIG_BLK_DEV_INITRD
        if (xen_start_info.mod_start) {
                if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
@@ -738,6 +702,25 @@
                }
        }
 #endif
+#else  /* CONFIG_XEN */
+#ifdef CONFIG_BLK_DEV_INITRD
+       if (LOADER_TYPE && INITRD_START) {
+               if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
+                       reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
+                       initrd_start =
+                               INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
+                       initrd_end = initrd_start+INITRD_SIZE;
+               }
+               else {
+                       printk(KERN_ERR "initrd extends beyond end of memory "
+                           "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+                           (unsigned long)(INITRD_START + INITRD_SIZE),
+                           (unsigned long)(end_pfn << PAGE_SHIFT));
+                       initrd_start = 0;
+               }
+       }
+#endif
+#endif /* !CONFIG_XEN */
        paging_init();
 #ifdef CONFIG_X86_LOCAL_APIC
        /*
@@ -745,30 +728,36 @@
         */
        find_smp_config();
 #endif
-       /* Make sure we have a large enough P->M table. */
-       if (end_pfn > xen_start_info.nr_pages) {
-               phys_to_machine_mapping = alloc_bootmem(
-                       max_pfn * sizeof(unsigned long));
-               memset(phys_to_machine_mapping, ~0,
-                       max_pfn * sizeof(unsigned long));
-               memcpy(phys_to_machine_mapping,
-                       (unsigned long *)xen_start_info.mfn_list,
-                       xen_start_info.nr_pages * sizeof(unsigned long));
-               free_bootmem(
-                       __pa(xen_start_info.mfn_list), 
-                       PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
-                       sizeof(unsigned long))));
-       }
-
-       pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE);
-
-       for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
-       {       
-            pfn_to_mfn_frame_list[j] = 
-                 virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
-       }
-
-#if 0
+#ifdef CONFIG_XEN
+       {
+               int i, j;
+               /* Make sure we have a large enough P->M table. */
+               if (end_pfn > xen_start_info.nr_pages) {
+                       phys_to_machine_mapping = alloc_bootmem(
+                               max_pfn * sizeof(u32));
+                       memset(phys_to_machine_mapping, ~0,
+                              max_pfn * sizeof(u32));
+                       memcpy(phys_to_machine_mapping,
+                              (u32 *)xen_start_info.mfn_list,
+                              xen_start_info.nr_pages * sizeof(u32));
+                       free_bootmem(
+                               __pa(xen_start_info.mfn_list), 
+                               PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
+                                               sizeof(u32))));
+               }
+
+               pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE);
+
+               for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(u32)), j++ )
+               {       
+                       pfn_to_mfn_frame_list[j] = 
+                               virt_to_mfn(&phys_to_machine_mapping[i]);
+               }
+
+       }
+#endif
+
+#ifndef CONFIG_XEN
        check_ioapic();
 #endif
 
@@ -784,6 +773,7 @@
         */
        acpi_boot_init();
 #endif
+
 #ifdef CONFIG_X86_LOCAL_APIC
        /*
         * get boot-time SMP configuration:
@@ -795,18 +785,14 @@
 #endif
 #endif
 
-        /* XXX Disable irqdebug until we have a way to avoid interrupt
-        * conflicts. */
-/*     noirqdebug_setup(""); */
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
        /*
         * Request address space for all standard RAM and ROM resources
         * and also for regions reported as reserved by the e820.
         */
        probe_roms();
-#endif
-/*     e820_reserve_resources();  */
+       e820_reserve_resources();
+#endif
 
        request_resource(&iomem_resource, &video_ram_resource);
 
@@ -823,14 +809,40 @@
        iommu_hole_init();
 #endif
 
-       op.cmd             = PHYSDEVOP_SET_IOPL;
-       op.u.set_iopl.iopl = current->thread.io_pl = 1;
-       HYPERVISOR_physdev_op(&op);
-
-       if (xen_start_info.flags & SIF_INITDOMAIN) {
-               if (!(xen_start_info.flags & SIF_PRIVILEGED))
-                       panic("Xen granted us console access "
-                             "but not privileged status");
+#ifdef CONFIG_XEN
+       {
+              physdev_op_t op;
+
+              op.cmd             = PHYSDEVOP_SET_IOPL;
+              op.u.set_iopl.iopl = 1;
+              HYPERVISOR_physdev_op(&op);
+
+              if (xen_start_info.flags & SIF_INITDOMAIN) {
+                      if (!(xen_start_info.flags & SIF_PRIVILEGED))
+                              panic("Xen granted us console access "
+                                    "but not privileged status");
+                      
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+              conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+              conswitchp = &dummy_con;
+#endif
+#endif
+              } else {
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+                      extern const struct consw xennull_con;
+                      extern int console_use_vt;
+#if defined(CONFIG_VGA_CONSOLE)
+               /* disable VGA driver */
+                      ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
+#endif
+                      conswitchp = &xennull_con;
+                      console_use_vt = 0;
+#endif
+              }
+       }
+#else  /* CONFIG_XEN */
 
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
@@ -839,18 +851,8 @@
        conswitchp = &dummy_con;
 #endif
 #endif
-       } else {
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-               extern const struct consw xennull_con;
-               extern int console_use_vt;
-#if defined(CONFIG_VGA_CONSOLE)
-               /* disable VGA driver */
-               ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
-#endif
-               conswitchp = &xennull_con;
-               console_use_vt = 0;
-#endif
-       }
+
+#endif /* !CONFIG_XEN */
 }
 
 static int __init get_model_name(struct cpuinfo_x86 *c)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c     Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c     Thu Aug 25 
22:53:20 2005
@@ -30,9 +30,9 @@
 #include <asm/proto.h>
 #include <asm/mman.h>
 #include <asm/numa.h>
-
+#ifdef CONFIG_XEN
 #include <asm-xen/hypervisor.h>
-
+#endif
 char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
 
 cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
@@ -123,16 +123,61 @@
        }
 } 
 
+#ifdef CONFIG_XEN
+static void switch_pt(void)
+{
+       xen_pt_switch(__pa(init_level4_pgt));
+        xen_new_user_pt(__pa(init_level4_user_pgt));
+}
+
+void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
+{
+       unsigned long frames[16];
+       unsigned long va;
+       int f;
+
+       for (va = gdt_descr->address, f = 0;
+            va < gdt_descr->address + gdt_descr->size;
+            va += PAGE_SIZE, f++) {
+               frames[f] = virt_to_mfn(va);
+               make_page_readonly((void *)va);
+       }
+       if (HYPERVISOR_set_gdt(frames, gdt_descr->size /
+                               sizeof (struct desc_struct)))
+               BUG();
+}
+#else
+static void switch_pt(void)
+{
+       asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
+}
+
+void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
+{
+#ifdef CONFIG_SMP
+       int cpu = stack_smp_processor_id();
+#else
+       int cpu = smp_processor_id();
+#endif
+
+       asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
+       asm volatile("lidt %0" :: "m" (idt_descr));
+}
+#endif
+
+
 void pda_init(int cpu)
 { 
-        pgd_t *old_level4 = (pgd_t *)xen_start_info.pt_base;
        struct x8664_pda *pda = &cpu_pda[cpu];
 
        /* Setup up data that may be needed in __get_free_pages early */
        asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); 
+#ifndef CONFIG_XEN
+       wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
+#else
         HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, 
                                     (unsigned long)(cpu_pda + cpu));
-
+#endif
        pda->me = pda;
        pda->cpunumber = cpu; 
        pda->irqcount = -1;
@@ -140,21 +185,14 @@
                (unsigned long)stack_thread_info() - PDA_STACKOFFSET + 
THREAD_SIZE; 
        pda->active_mm = &init_mm;
        pda->mmu_state = 0;
-        pda->kernel_mode = 1;
 
        if (cpu == 0) {
-                memcpy((void *)init_level4_pgt, 
-                       (void *) xen_start_info.pt_base, PAGE_SIZE);
+#ifdef CONFIG_XEN
+               xen_init_pt();
+#endif
                /* others are initialized in smpboot.c */
                pda->pcurrent = &init_task;
                pda->irqstackptr = boot_cpu_stack; 
-                make_page_readonly(init_level4_pgt);
-                make_page_readonly(init_level4_user_pgt);
-                make_page_readonly(level3_user_pgt); /* for vsyscall stuff */
-                xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
-                xen_pud_pin(__pa_symbol(level3_user_pgt));
-                set_pgd((pgd_t *)(init_level4_user_pgt + 511), 
-                        mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
        } else {
                pda->irqstackptr = (char *)
                        __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
@@ -162,20 +200,7 @@
                        panic("cannot allocate irqstack for cpu %d", cpu); 
        }
 
-       xen_pt_switch(__pa(init_level4_pgt));
-        xen_new_user_pt(__pa(init_level4_user_pgt));
-
-       if (cpu == 0) {
-                xen_pgd_unpin(__pa(old_level4));
-#if 0
-                early_printk("__pa: %x, <machine_phys> old_level 4 %x\n", 
-                             __pa(xen_start_info.pt_base),
-                             pfn_to_mfn(__pa(old_level4) >> PAGE_SHIFT));
-#endif
-//                make_page_writable(old_level4);
-//                free_bootmem(__pa(old_level4), PAGE_SIZE);
-        }
-
+       switch_pt();
        pda->irqstackptr += IRQSTACKSIZE-64;
 } 
 
@@ -185,6 +210,18 @@
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
+#ifndef CONFIG_XEN
+       /* 
+        * LSTAR and STAR live in a bit strange symbiosis.
+        * They both write to the same internal register. STAR allows to set 
CS/DS
+        * but only a 32bit target. LSTAR sets the 64bit rip.    
+        */ 
+       wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32); 
+       wrmsrl(MSR_LSTAR, system_call); 
+
+       /* Flags to clear on syscall */
+       wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); 
+#endif
 #ifdef CONFIG_IA32_EMULATION                   
        syscall32_cpu_init ();
 #endif
@@ -197,27 +234,8 @@
        rdmsrl(MSR_EFER, efer); 
         if (!(efer & EFER_NX) || do_not_nx) { 
                 __supported_pte_mask &= ~_PAGE_NX; 
-
         }       
 }
-
-void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
-{
-       unsigned long frames[16];
-       unsigned long va;
-       int f;
-
-       for (va = gdt_descr->address, f = 0;
-            va < gdt_descr->address + gdt_descr->size;
-            va += PAGE_SIZE, f++) {
-               frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
-               make_page_readonly((void *)va);
-       }
-       if (HYPERVISOR_set_gdt(frames, gdt_descr->size /
-                               sizeof (struct desc_struct)))
-               BUG();
-}
-
 
 /*
  * cpu_init() initializes state that is per-CPU. Some data is already
@@ -247,36 +265,32 @@
 
        me = current;
 
-       if (test_and_set_bit(cpu, &cpu_initialized))
+       if (cpu_test_and_set(cpu, cpu_initialized))
                panic("CPU#%d already initialized!\n", cpu);
 
        printk("Initializing CPU#%d\n", cpu);
 
-#if 0
                clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
-#endif
+
        /*
         * Initialize the per-CPU GDT with the boot GDT,
         * and set up the GDT descriptor:
         */
+#ifndef CONFIG_XEN 
        if (cpu) {
                memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
        }       
 
        cpu_gdt_descr[cpu].size = GDT_SIZE;
        cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
-#if 0
-       asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
-       asm volatile("lidt %0" :: "m" (idt_descr));
-#endif
-        cpu_gdt_init(&cpu_gdt_descr[cpu]);
-
-#if 0
+
        memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES 
* 8);
-
-#endif
-       memcpy(me->thread.tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
+#else
+       memcpy(me->thread.tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
            GDT_ENTRY_TLS_ENTRIES * 8);
+
+       cpu_gdt_init(&cpu_gdt_descr[cpu]);
+#endif
        
        /*
         * Delete NT
@@ -284,12 +298,12 @@
 
        asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; 
popfq" ::: "eax");
 
-       if (cpu == 0) 
-               early_identify_cpu(&boot_cpu_data);
-
        syscall_init();
 
+       wrmsrl(MSR_FS_BASE, 0);
+       wrmsrl(MSR_KERNEL_GS_BASE, 0);
        barrier(); 
+
        check_efer();
 
        /*
@@ -321,19 +335,22 @@
                BUG();
        enter_lazy_tlb(&init_mm, me);
 
+#ifndef CONFIG_XEN
+       set_tss_desc(cpu, t);
+       load_TR_desc();
+#endif
        load_LDT(&init_mm.context);
 
        /*
         * Clear all 6 debug registers:
         */
-#define CD(register) HYPERVISOR_set_debugreg(register, 0)
-
-       CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
-
-#undef CD
+
+       set_debug(0UL, 0);
+       set_debug(0UL, 1);
+       set_debug(0UL, 2);
+       set_debug(0UL, 3);
+       set_debug(0UL, 6);
+       set_debug(0UL, 7);
+
        fpu_init(); 
-
-#ifdef CONFIG_NUMA
-       numa_add_cpu(cpu);
-#endif
-}
+}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c Thu Aug 25 22:53:20 2005
@@ -28,7 +28,12 @@
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/apicdef.h>
-
+#ifdef CONFIG_XEN
+#include <asm-xen/evtchn.h>
+
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
+
+#else
 /*
  *     Smarter SMP flushing macros. 
  *             c/o Linus Torvalds.
@@ -44,6 +49,7 @@
 static unsigned long flush_va;
 static DEFINE_SPINLOCK(tlbstate_lock);
 #define FLUSH_ALL      -1ULL
+#endif
 
 /*
  * We cannot call mmdrop() because we are in interrupt context, 
@@ -57,6 +63,7 @@
        load_cr3(swapper_pg_dir);
 }
 
+#ifndef CONFIG_XEN
 /*
  *
  * The flush IPI assumes that a thread switch happens in this order:
@@ -250,6 +257,18 @@
 {
        on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
 }
+#else
+asmlinkage void smp_invalidate_interrupt (void)
+{ return; }
+void flush_tlb_current_task(void)
+{ xen_tlb_flush_mask(¤t->mm->cpu_vm_mask); }
+void flush_tlb_mm (struct mm_struct * mm)
+{ xen_tlb_flush_mask(&mm->cpu_vm_mask); }
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
+void flush_tlb_all(void)
+{ xen_tlb_flush_all(); }
+#endif /* Xen */
 
 void smp_kdb_stop(void)
 {
@@ -310,13 +329,21 @@
 
        /* Wait for response */
        while (atomic_read(&data.started) != cpus)
+#ifndef CONFIG_XEN
                cpu_relax();
+#else
+               barrier();
+#endif
 
        if (!wait)
                return;
 
        while (atomic_read(&data.finished) != cpus)
+#ifndef CONFIG_XEN
                cpu_relax();
+#else
+               barrier();
+#endif
 }
 
 /*
@@ -350,7 +377,11 @@
         */
        cpu_clear(smp_processor_id(), cpu_online_map);
        local_irq_disable();
+#ifndef CONFIG_XEN
        disable_local_APIC();
+#else
+       xxprint("stop_this_cpu disable_local_APIC\n");
+#endif
        local_irq_enable(); 
 }
 
@@ -364,8 +395,10 @@
 void smp_send_stop(void)
 {
        int nolock = 0;
+#ifndef CONFIG_XEN
        if (reboot_force)
                return;
+#endif
        /* Don't deadlock on the call lock in panic */
        if (!spin_trylock(&call_lock)) {
                /* ignore locking because we have paniced anyways */
@@ -376,7 +409,11 @@
                spin_unlock(&call_lock);
 
        local_irq_disable();
+#ifdef CONFIG_XEN
+       xxprint("stop_this_cpu disable_local_APIC\n");
+#else
        disable_local_APIC();
+#endif
        local_irq_enable();
 }
 
@@ -385,18 +422,32 @@
  * all the work is done automatically when
  * we return from the interrupt.
  */
+#ifndef CONFIG_XEN
 asmlinkage void smp_reschedule_interrupt(void)
-{
+#else
+asmlinkage irqreturn_t smp_reschedule_interrupt(void)
+#endif
+{
+#ifndef CONFIG_XEN
        ack_APIC_irq();
-}
-
+#else
+       return IRQ_HANDLED;
+#endif
+}
+
+#ifndef CONFIG_XEN
 asmlinkage void smp_call_function_interrupt(void)
+#else
+asmlinkage irqreturn_t smp_call_function_interrupt(void)
+#endif
 {
        void (*func) (void *info) = call_data->func;
        void *info = call_data->info;
        int wait = call_data->wait;
 
+#ifndef CONFIG_XEN
        ack_APIC_irq();
+#endif
        /*
         * Notify initiating CPU that I've grabbed the data and am
         * about to execute the function
@@ -413,10 +464,16 @@
                mb();
                atomic_inc(&call_data->finished);
        }
+#ifdef CONFIG_XEN
+       return IRQ_HANDLED;
+#endif
 }
 
 int safe_smp_processor_id(void)
 {
+#ifdef CONFIG_XEN
+       return smp_processor_id();
+#else
        int apicid, i;
 
        if (disable_apic)
@@ -437,4 +494,5 @@
                return 0;
 
        return 0; /* Should not happen */
-}
+#endif
+}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c     Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c     Thu Aug 25 
22:53:20 2005
@@ -47,6 +47,9 @@
 #include <linux/bootmem.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
+#ifdef CONFIG_XEN
+#include <linux/interrupt.h>
+#endif
 
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
@@ -57,11 +60,20 @@
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
 #include <asm/nmi.h>
+#ifdef CONFIG_XEN
+#include <asm/arch_hooks.h>
+
+#include <asm-xen/evtchn.h>
+#endif
 
 /* Change for real CPU hotplug. Note other files need to be fixed
    first too. */
 #define __cpuinit __init
 #define __cpuinitdata __initdata
+
+#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
+       unsigned int maxcpus = NR_CPUS;
+#endif
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
@@ -96,6 +108,7 @@
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_core_map);
 
+#ifndef CONFIG_XEN
 /*
  * Trampoline 80x86 program as an array.
  */
@@ -115,6 +128,7 @@
        memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
        return virt_to_phys(tramp);
 }
+#endif
 
 /*
  * The bootstrap kernel entry code has set these up. Save them for
@@ -130,6 +144,7 @@
        print_cpu_info(c);
 }
 
+#ifndef CONFIG_XEN
 /*
  * New Funky TSC sync algorithm borrowed from IA64.
  * Main advantage is that it doesn't reset the TSCs fully and
@@ -331,6 +346,7 @@
        return 0;
 }
 __setup("notscsync", notscsync_setup);
+#endif
 
 static atomic_t init_deasserted __cpuinitdata;
 
@@ -343,6 +359,7 @@
        int cpuid, phys_id;
        unsigned long timeout;
 
+#ifndef CONFIG_XEN
        /*
         * If waken up by an INIT in an 82489DX configuration
         * we may get here before an INIT-deassert IPI reaches
@@ -352,10 +369,15 @@
        while (!atomic_read(&init_deasserted))
                cpu_relax();
 
+#endif
        /*
         * (This works even if the APIC is not enabled.)
         */
+#ifndef CONFIG_XEN
        phys_id = GET_APIC_ID(apic_read(APIC_ID));
+#else
+       phys_id = smp_processor_id();
+#endif
        cpuid = smp_processor_id();
        if (cpu_isset(cpuid, cpu_callin_map)) {
                panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
@@ -389,6 +411,7 @@
                        cpuid);
        }
 
+#ifndef CONFIG_XEN
        /*
         * the boot CPU has finished the init stage and is spinning
         * on callin_map until we finish. We are free to set up this
@@ -398,6 +421,7 @@
 
        Dprintk("CALLIN, before setup_local_APIC().\n");
        setup_local_APIC();
+#endif
 
        /*
         * Get our bogomips.
@@ -405,7 +429,9 @@
        calibrate_delay();
        Dprintk("Stack at about %p\n",&cpuid);
 
+#ifndef CONFIG_XEN
        disable_APIC_timer();
+#endif
 
        /*
         * Save our processor parameters
@@ -417,6 +443,29 @@
         */
        cpu_set(cpuid, cpu_callin_map);
 }
+
+#ifdef CONFIG_XEN
+static irqreturn_t ldebug_interrupt(
+       int irq, void *dev_id, struct pt_regs *regs)
+{
+       return IRQ_HANDLED;
+}
+
+static DEFINE_PER_CPU(int, ldebug_irq);
+static char ldebug_name[NR_CPUS][15];
+
+void ldebug_setup(void)
+{
+       int cpu = smp_processor_id();
+
+       per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
+       sprintf(ldebug_name[cpu], "ldebug%d", cpu);
+       BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
+                          SA_INTERRUPT, ldebug_name[cpu], NULL));
+}
+
+extern void local_setup_timer(void);
+#endif
 
 /*
  * Setup code on secondary processor (after comming out of the trampoline)
@@ -434,6 +483,7 @@
        /* otherwise gcc will move up the smp_processor_id before the cpu_init 
*/
        barrier();
 
+#ifndef CONFIG_XEN
        Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());         
        setup_secondary_APIC_clock();
 
@@ -446,6 +496,12 @@
        }
 
        enable_APIC_timer();
+#else
+       local_setup_timer();
+       ldebug_setup();
+       smp_intr_init();
+       local_irq_enable();
+#endif
 
        /*
         * Allow the master to continue.
@@ -453,10 +509,12 @@
        cpu_set(smp_processor_id(), cpu_online_map);
        mb();
 
+#ifndef CONFIG_XEN
        /* Wait for TSC sync to not schedule things before.
           We still process interrupts, which could see an inconsistent
           time in that window unfortunately. */
        tsc_sync_wait();
+#endif
 
        cpu_idle();
 }
@@ -464,6 +522,7 @@
 extern volatile unsigned long init_rsp;
 extern void (*initial_code)(void);
 
+#ifndef CONFIG_XEN
 #if APIC_DEBUG
 static void inquire_remote_apic(int apicid)
 {
@@ -627,6 +686,7 @@
 
        return (send_status | accept_status);
 }
+#endif
 
 /*
  * Boot one CPU.
@@ -637,6 +697,14 @@
        unsigned long boot_error;
        int timeout;
        unsigned long start_rip;
+#ifdef CONFIG_XEN
+       vcpu_guest_context_t ctxt;
+       extern void startup_64_smp(void);
+       extern void hypervisor_callback(void);
+       extern void failsafe_callback(void);
+       extern void smp_trap_init(trap_info_t *);
+       int i;
+#endif
        /*
         * We can't use kernel_thread since we must avoid to
         * reschedule the child.
@@ -649,7 +717,11 @@
 
        cpu_pda[cpu].pcurrent = idle;
 
+#ifndef CONFIG_XEN
        start_rip = setup_trampoline();
+#else
+       start_rip = (unsigned long)startup_64_smp;
+#endif
 
        init_rsp = idle->thread.rsp;
        per_cpu(init_tss,cpu).rsp0 = init_rsp;
@@ -666,6 +738,95 @@
 
        atomic_set(&init_deasserted, 0);
 
+#ifdef CONFIG_XEN
+       cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL);
+       BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
+       cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+       memcpy((void *)cpu_gdt_descr[cpu].address,
+               (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+
+       memset(&ctxt, 0, sizeof(ctxt));
+
+       ctxt.flags = VGCF_IN_KERNEL;
+       ctxt.user_regs.ds = __USER_DS;
+       ctxt.user_regs.es = __USER_DS;
+       ctxt.user_regs.fs = 0;
+       ctxt.user_regs.gs = 0;
+       ctxt.user_regs.ss = __KERNEL_DS|0x3;
+       ctxt.user_regs.cs = __KERNEL_CS|0x3;
+       ctxt.user_regs.rip = start_rip;
+       ctxt.user_regs.rsp = idle->thread.rsp;
+#define X86_EFLAGS_IOPL_RING3 0x3000
+       ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING3;
+
+       /* FPU is set up to default initial state. */
+       memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+       /* Virtual IDT is empty at start-of-day. */
+       for ( i = 0; i < 256; i++ )
+       {
+               ctxt.trap_ctxt[i].vector = i;
+               ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
+       }
+       smp_trap_init(ctxt.trap_ctxt);
+
+       /* No LDT. */
+       ctxt.ldt_ents = 0;
+
+       {
+               unsigned long va;
+               int f;
+
+               for (va = cpu_gdt_descr[cpu].address, f = 0;
+                    va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+                    va += PAGE_SIZE, f++) {
+                       ctxt.gdt_frames[f] = virt_to_mfn(va);
+                       make_page_readonly((void *)va);
+               }
+               ctxt.gdt_ents = GDT_ENTRIES;
+       }
+
+       /* Ring 1 stack is the initial stack. */
+       ctxt.kernel_ss = __KERNEL_DS;
+       ctxt.kernel_sp = idle->thread.rsp;
+
+       /* Callback handlers. */
+       ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
+       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+       ctxt.syscall_callback_eip  = (unsigned long)system_call;
+
+       ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
+
+       boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+       if (boot_error)
+               printk("boot error: %ld\n", boot_error);
+
+       if (!boot_error) {
+               /*
+                * allow APs to start initializing.
+                */
+               Dprintk("Before Callout %d.\n", cpu);
+               cpu_set(cpu, cpu_callout_map);
+               Dprintk("After Callout %d.\n", cpu);
+
+               /*
+                * Wait 5s total for a response
+                */
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       if (cpu_isset(cpu, cpu_callin_map))
+                               break;  /* It has booted */
+                       udelay(100);
+               }
+
+               if (cpu_isset(cpu, cpu_callin_map)) {
+                       /* number CPUs logically, starting from 1 (BSP is 0) */
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+               }
+       }
+       x86_cpu_to_apicid[cpu] = apicid;
+#else
        Dprintk("Setting warm reset code and vector.\n");
 
        CMOS_WRITE(0xa, 0xf);
@@ -729,6 +890,7 @@
 #endif
                }
        }
+#endif
        if (boot_error) {
                cpu_clear(cpu, cpu_callout_map); /* was set here 
(do_boot_cpu()) */
                clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
@@ -790,6 +952,7 @@
        }
 }
 
+#ifndef CONFIG_XEN
 /*
  * Cleanup possible dangling ends...
  */
@@ -817,6 +980,7 @@
        free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
 #endif
 }
+#endif
 
 /*
  * Fall back to non SMP mode after errors.
@@ -827,10 +991,12 @@
 {
        cpu_present_map = cpumask_of_cpu(0);
        cpu_possible_map = cpumask_of_cpu(0);
+#ifndef CONFIG_XEN
        if (smp_found_config)
                phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
        else
                phys_cpu_present_map = physid_mask_of_physid(0);
+#endif
        cpu_set(0, cpu_sibling_map[0]);
        cpu_set(0, cpu_core_map[0]);
 }
@@ -857,6 +1023,7 @@
  */
 static int __cpuinit smp_sanity_check(unsigned max_cpus)
 {
+#ifndef CONFIG_XEN
        if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
                printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
                       hard_smp_processor_id());
@@ -896,13 +1063,19 @@
                nr_ioapics = 0;
                return -1;
        }
+#endif
 
        /*
         * If SMP should be disabled, then really disable it!
         */
        if (!max_cpus) {
+#ifdef CONFIG_XEN
+               HYPERVISOR_shared_info->n_vcpu = 1;
+#endif
                printk(KERN_INFO "SMP mode deactivated, forcing use of dummy 
APIC emulation.\n");
+#ifndef CONFIG_XEN
                nr_ioapics = 0;
+#endif
                return -1;
        }
 
@@ -917,7 +1090,10 @@
 {
        int i;
 
+#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
+#else
        nmi_watchdog_default();
+#endif
        current_cpu_data = boot_cpu_data;
        current_thread_info()->cpu = 0;  /* needed? */
 
@@ -927,8 +1103,12 @@
         * Fill in cpu_present_mask
         */
        for (i = 0; i < NR_CPUS; i++) {
+#ifndef CONFIG_XEN
                int apicid = cpu_present_to_apicid(i);
                if (physid_isset(apicid, phys_cpu_present_map)) {
+#else
+               if (i < HYPERVISOR_shared_info->n_vcpu) {
+#endif
                        cpu_set(i, cpu_present_map);
                        /* possible map would be different if we supported real
                           CPU hotplug. */
@@ -942,6 +1122,9 @@
                return;
        }
 
+#ifdef CONFIG_XEN
+       smp_intr_init();
+#else
 
        /*
         * Switch from PIC to APIC mode.
@@ -954,20 +1137,26 @@
                      GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
                /* Or can we switch back to PIC here? */
        }
+#endif
 
        /*
         * Now start the IO-APICs
         */
+#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST)
+#else
        if (!skip_ioapic_setup && nr_ioapics)
                setup_IO_APIC();
        else
                nr_ioapics = 0;
+#endif
 
        /*
         * Set up local APIC timer on boot CPU.
         */
 
+#ifndef CONFIG_XEN
        setup_boot_APIC_clock();
+#endif
 }
 
 /*
@@ -989,17 +1178,23 @@
 int __cpuinit __cpu_up(unsigned int cpu)
 {
        int err;
+#ifndef CONFIG_XEN
        int apicid = cpu_present_to_apicid(cpu);
+#else
+       int apicid = cpu;
+#endif
 
        WARN_ON(irqs_disabled());
 
        Dprintk("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
 
+#ifndef CONFIG_XEN
        if (apicid == BAD_APICID || apicid == boot_cpu_id ||
            !physid_isset(apicid, phys_cpu_present_map)) {
                printk("__cpu_up: bad cpu %d\n", cpu);
                return -EINVAL;
        }
+#endif
 
        /* Boot it! */
        err = do_boot_cpu(cpu, apicid);
@@ -1021,15 +1216,82 @@
  */
 void __cpuinit smp_cpus_done(unsigned int max_cpus)
 {
+#ifndef CONFIG_XEN
        zap_low_mappings();
        smp_cleanup_boot();
 
 #ifdef CONFIG_X86_IO_APIC
        setup_ioapic_dest();
 #endif
+#endif
 
        detect_siblings();
+#ifndef CONFIG_XEN
        time_init_gtod();
 
        check_nmi_watchdog();
-}
+#endif
+}
+
+#ifdef CONFIG_XEN
+extern int bind_ipi_to_irq(int ipi);
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static char resched_name[NR_CPUS][15];
+static char callfunc_name[NR_CPUS][15];
+
+void smp_intr_init(void)
+{
+       int cpu = smp_processor_id();
+
+       per_cpu(resched_irq, cpu) =
+               bind_ipi_to_irq(RESCHEDULE_VECTOR);
+       sprintf(resched_name[cpu], "resched%d", cpu);
+       BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
+                          SA_INTERRUPT, resched_name[cpu], NULL));
+
+       per_cpu(callfunc_irq, cpu) =
+               bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
+       sprintf(callfunc_name[cpu], "callfunc%d", cpu);
+       BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
+                          smp_call_function_interrupt,
+                          SA_INTERRUPT, callfunc_name[cpu], NULL));
+}
+
+static void smp_intr_exit(void)
+{
+       int cpu = smp_processor_id();
+
+       free_irq(per_cpu(resched_irq, cpu), NULL);
+       unbind_ipi_from_irq(RESCHEDULE_VECTOR);
+
+       free_irq(per_cpu(callfunc_irq, cpu), NULL);
+       unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
+}
+
+extern void local_setup_timer_irq(void);
+extern void local_teardown_timer_irq(void);
+
+void smp_suspend(void)
+{
+       /* XXX todo: take down time and ipi's on all cpus */
+       local_teardown_timer_irq();
+       smp_intr_exit();
+}
+
+void smp_resume(void)
+{
+       /* XXX todo: restore time and ipi's on all cpus */
+       smp_intr_init();
+       local_setup_timer_irq();
+}
+
+void _restore_vcpu(void)
+{
+       /* XXX need to write this */
+}
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c       Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c       Thu Aug 25 
22:53:20 2005
@@ -953,6 +953,17 @@
        cpu_init();
 }
 
+void smp_trap_init(trap_info_t *trap_ctxt)
+{
+       trap_info_t *t = trap_table;
+
+       for (t = trap_table; t->address; t++) {
+               trap_ctxt[t->vector].flags = t->flags;
+               trap_ctxt[t->vector].cs = t->cs;
+               trap_ctxt[t->vector].address = t->address;
+       }
+}
+
 
 /* Actual parsing is done early in setup.c. */
 static int __init oops_dummy(char *s)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c    Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c    Thu Aug 25 
22:53:20 2005
@@ -210,15 +210,16 @@
        __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
 }
 
-extern void __set_fixmap_user (enum fixed_addresses, unsigned long, pgprot_t);
-
+#ifdef CONFIG_XEN
 static void __init map_vsyscall_user(void)
 {
+       extern void __set_fixmap_user(enum fixed_addresses, unsigned long, 
pgprot_t);
        extern char __vsyscall_0;
        unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
 
        __set_fixmap_user(VSYSCALL_FIRST_PAGE, physaddr_page0, 
PAGE_KERNEL_VSYSCALL);
 }
+#endif
 
 static int __init vsyscall_init(void)
 {
@@ -227,7 +228,10 @@
        BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
        BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
        map_vsyscall();
-        map_vsyscall_user();    /* establish tranlation for user address space 
*/
+#ifdef CONFIG_XEN
+       map_vsyscall_user();
+       sysctl_vsyscall = 0; /* disable vgettimeofay() */
+#endif
 #ifdef CONFIG_SYSCTL
        register_sysctl_table(kernel_root_table2, 0);
 #endif
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S   Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S   Thu Aug 25 
22:53:20 2005
@@ -8,11 +8,14 @@
 #define sizeof_vcpu_shift              3
 
 #ifdef CONFIG_SMP
-#define preempt_disable(reg)   incl threadinfo_preempt_count(reg)
-#define preempt_enable(reg)    decl threadinfo_preempt_count(reg)
+//#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
+//#define preempt_enable(reg)  decl threadinfo_preempt_count(reg)
+#define preempt_disable(reg)
+#define preempt_enable(reg)
 #define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp)                   ; \
                                movq %gs:pda_cpunumber,reg              ; \
-                               shl  $sizeof_vcpu_shift,reg             ; \
+                               shl  $32, reg                           ; \
+                               shr  $32-sizeof_vcpu_shift,reg          ; \
                                addq HYPERVISOR_shared_info,reg
 #define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp)                    ; \
 #define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile  Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile  Thu Aug 25 22:53:20 2005
@@ -6,10 +6,10 @@
 
 CFLAGS += -Iarch/$(XENARCH)/mm
 
-obj-y  := init.o fault.o ioremap.o pageattr.o
+obj-y  := init.o fault.o pageattr.o
 c-obj-y        := extable.o
 
-i386-obj-y := hypervisor.o
+i386-obj-y := hypervisor.o ioremap.o
 
 #obj-y  := init.o fault.o ioremap.o extable.o pageattr.o
 #c-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c   Thu Aug 25 22:53:20 2005
@@ -250,7 +250,11 @@
           happen within a race in page table update. In the later
           case just flush. */
 
-       pgd = pgd_offset(current->mm ?: &init_mm, address);
+       /* On Xen the line below does not always work. Needs investigating! */
+       /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
+       pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
+       pgd += pgd_index(address);
+
        pgd_ref = pgd_offset_k(address);
        if (pgd_none(*pgd_ref))
                return -1;
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Thu Aug 25 22:53:20 2005
@@ -40,12 +40,14 @@
 #include <asm/proto.h>
 #include <asm/smp.h>
 
+extern unsigned long *contiguous_bitmap;
+
+#if defined(CONFIG_SWIOTLB)
+extern void swiotlb_init(void);
+#endif
+
 #ifndef Dprintk
 #define Dprintk(x...)
-#endif
-
-#ifdef CONFIG_GART_IOMMU
-extern int swiotlb;
 #endif
 
 extern char _stext[];
@@ -280,7 +282,7 @@
        if (!pte_none(*pte) &&
            pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
                pte_ERROR(*pte);
-        xen_l1_entry_update(pte, new_pte);
+        set_pte(pte, new_pte);
 
        /*
         * It's enough to flush this one mapping.
@@ -439,6 +441,31 @@
        *dst = val;
 }
 
+static inline int make_readonly(unsigned long paddr)
+{
+    int readonly = 0;
+
+    /* Make new page tables read-only. */
+    if ((paddr < ((table_start << PAGE_SHIFT) + tables_space)) &&
+        (paddr >= (table_start << PAGE_SHIFT)))
+        readonly = 1;
+
+    /* Make old page tables read-only. */
+    if ((paddr < ((xen_start_info.pt_base - __START_KERNEL_map) +
+                  (xen_start_info.nr_pt_frames << PAGE_SHIFT))) &&
+        (paddr >= (xen_start_info.pt_base - __START_KERNEL_map)))
+        readonly = 1;
+
+    /*
+     * No need for writable mapping of kernel image. This also ensures that
+     * page and descriptor tables embedded inside don't have writable mappings.
+     */
+    if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
+        readonly = 1;
+
+    return readonly;
+}
+
 void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
 { 
         long i, j, k; 
@@ -475,9 +502,7 @@
                         pte = alloc_low_page(&pte_phys);
                         pte_save = pte;
                         for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += 
PTE_SIZE) {
-                                if (paddr < (table_start << PAGE_SHIFT) 
-                                    + tables_space)
-                                {
+                                if (make_readonly(paddr)) {
                                         __set_pte(pte, 
                                                 __pte(paddr | (_KERNPG_TABLE & 
~_PAGE_RW)));
                                         continue;
@@ -511,75 +536,106 @@
                          round_up(ptes * 8, PAGE_SIZE); 
 }
 
+void __init xen_init_pt(void)
+{
+       unsigned long addr, *page;
+       int i;
+
+       for (i = 0; i < NR_CPUS; i++)
+               per_cpu(cur_pgd, i) = init_mm.pgd;
+
+       memset((void *)init_level4_pgt,   0, PAGE_SIZE);
+       memset((void *)level3_kernel_pgt, 0, PAGE_SIZE);
+       memset((void *)level2_kernel_pgt, 0, PAGE_SIZE);
+
+       /* Find the initial pte page that was built for us. */
+       page = (unsigned long *)xen_start_info.pt_base;
+       addr = page[pgd_index(__START_KERNEL_map)];
+       addr_to_page(addr, page);
+       addr = page[pud_index(__START_KERNEL_map)];
+       addr_to_page(addr, page);
+
+       /* Construct mapping of initial pte page in our own directories. */
+       init_level4_pgt[pgd_index(__START_KERNEL_map)] = 
+               mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
+       level3_kernel_pgt[pud_index(__START_KERNEL_map)] = 
+               __pud(__pa_symbol(level2_kernel_pgt) |
+                     _KERNPG_TABLE | _PAGE_USER);
+        memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
+
+       make_page_readonly(init_level4_pgt);
+       make_page_readonly(init_level4_user_pgt);
+       make_page_readonly(level3_kernel_pgt);
+       make_page_readonly(level3_user_pgt);
+       make_page_readonly(level2_kernel_pgt);
+
+       xen_pgd_pin(__pa_symbol(init_level4_pgt));
+       xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
+       xen_pud_pin(__pa_symbol(level3_kernel_pgt));
+       xen_pud_pin(__pa_symbol(level3_user_pgt));
+       xen_pmd_pin(__pa_symbol(level2_kernel_pgt));
+
+       set_pgd((pgd_t *)(init_level4_user_pgt + 511), 
+               mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
+}
 
 /*
  * Extend kernel mapping to access pages for page tables.  The initial
  * mapping done by Xen is minimal (e.g. 8MB) and we need to extend the
  * mapping for early initialization.
  */
-
-#define MIN_INIT_SIZE  0x800000
 static unsigned long current_size, extended_size;
 
 void __init extend_init_mapping(void) 
 {
        unsigned long va = __START_KERNEL_map;
-       unsigned long addr, *pte_page;
-
-       unsigned long phys;
+       unsigned long phys, addr, *pte_page;
         pmd_t *pmd;
        pte_t *pte, new_pte;
        unsigned long *page = (unsigned long *) init_level4_pgt;
        int i;
 
-       addr = (unsigned long) page[pgd_index(va)];
+       addr = page[pgd_index(va)];
        addr_to_page(addr, page);
-
        addr = page[pud_index(va)];
        addr_to_page(addr, page);
 
        for (;;) {
+               pmd = (pmd_t *)&page[pmd_index(va)];
+               if (!pmd_present(*pmd))
+                       break;
+               addr = page[pmd_index(va)];
+               addr_to_page(addr, pte_page);
+               for (i = 0; i < PTRS_PER_PTE; i++) {
+                       pte = (pte_t *) &pte_page[pte_index(va)];
+                       if (!pte_present(*pte))
+                               break;
+                       va += PAGE_SIZE;
+                       current_size += PAGE_SIZE;
+               }
+       }
+
+       while (va < __START_KERNEL_map + current_size + tables_space) {
                pmd = (pmd_t *) &page[pmd_index(va)];
-               if (pmd_present(*pmd)) {
-                       /*
-                        * if pmd is valid, check pte.
-                        */
-                       addr = page[pmd_index(va)];
-                       addr_to_page(addr, pte_page);
-                       
-                       for (i = 0; i < PTRS_PER_PTE; i++) {
-                               pte = (pte_t *) &pte_page[pte_index(va)];
-                               
-                               if (pte_present(*pte)) {
-                                       va += PAGE_SIZE;
-                                       current_size += PAGE_SIZE;
-                               } else
-                                   break;
-                       }
-
-               } else
-                   break;
-       }
-
-       for (; va < __START_KERNEL_map + current_size + tables_space; ) {
-               pmd = (pmd_t *) &page[pmd_index(va)];
-
-               if (pmd_none(*pmd)) {
-                       pte_page = (unsigned long *) alloc_static_page(&phys);
-                       make_page_readonly(pte_page);
-                       xen_pte_pin(phys);
-                       set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
-
-                       for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
-                               new_pte = pfn_pte((va -  __START_KERNEL_map) >> 
PAGE_SHIFT, 
-                                                 __pgprot(_KERNPG_TABLE | 
_PAGE_USER));
-
-                               pte = (pte_t *) &pte_page[pte_index(va)];
-                               xen_l1_entry_update(pte, new_pte);
-                               extended_size += PAGE_SIZE;
-                       }
-               } 
-       }
+               if (!pmd_none(*pmd))
+                       continue;
+               pte_page = (unsigned long *) alloc_static_page(&phys);
+               make_page_readonly(pte_page);
+               xen_pte_pin(phys);
+               set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
+               for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
+                       new_pte = pfn_pte(
+                               (va - __START_KERNEL_map) >> PAGE_SHIFT, 
+                               __pgprot(_KERNPG_TABLE | _PAGE_USER));
+                       pte = (pte_t *)&pte_page[pte_index(va)];
+                       xen_l1_entry_update(pte, new_pte);
+                       extended_size += PAGE_SIZE;
+               }
+       }
+
+       /* Kill mapping of low 1MB. */
+       for (va = __START_KERNEL_map; va < (unsigned long)&_text; va += 
PAGE_SIZE)
+               HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
 }
 
 
@@ -620,10 +676,6 @@
 
         start_pfn = ((current_size + extended_size) >> PAGE_SHIFT);
 
-        /*
-         * TBD: Need to calculate at runtime
-         */
-
        __flush_tlb_all();
         init_mapping_done = 1;
 }
@@ -670,7 +722,7 @@
                                set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
                        else
                                __set_fixmap(FIX_ISAMAP_BEGIN - i,
-                                            virt_to_machine(empty_zero_page),
+                                            virt_to_mfn(empty_zero_page) << 
PAGE_SHIFT,
                                             PAGE_KERNEL_RO);
        }
 #endif
@@ -720,8 +772,6 @@
         return 1;
 }
 
-extern int swiotlb_force;
-
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
                         kcore_vsyscall;
 
@@ -730,14 +780,13 @@
        int codesize, reservedpages, datasize, initsize;
        int tmp;
 
-#ifdef CONFIG_SWIOTLB
-       if (swiotlb_force)
-               swiotlb = 1;
-       if (!iommu_aperture &&
-           (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu))
-              swiotlb = 1;
-       if (swiotlb)
-               swiotlb_init(); 
+       contiguous_bitmap = alloc_bootmem_low_pages(
+               (end_pfn + 2*BITS_PER_LONG) >> 3);
+       BUG_ON(!contiguous_bitmap);
+       memset(contiguous_bitmap, 0, (end_pfn + 2*BITS_PER_LONG) >> 3);
+
+#if defined(CONFIG_SWIOTLB)
+       swiotlb_init(); 
 #endif
 
        /* How many end-of-memory variables you have, grandma! */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile Thu Aug 25 22:53:20 2005
@@ -30,8 +30,9 @@
 $(patsubst %.o,$(obj)/%.c,$(c-i386-obj-y)):
        @ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@
 
-obj-y  += $(c-i386-obj-y) $(c-obj-y)
-obj-y  += $(c-xen-obj-y)
+# Make sure irq.o gets linked in before common.o
+obj-y  += $(patsubst common.o,$(c-xen-obj-y) common.o,$(c-i386-obj-y))
+obj-y  += $(c-obj-y)
 
 clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
 clean-files += $(patsubst %.o,%.c,$(c-i386-obj-y) $(c-i386-obj-))
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu Aug 25 
22:53:20 2005
@@ -81,20 +81,17 @@
 static DECLARE_WORK(balloon_worker, balloon_process, NULL);
 static struct timer_list balloon_timer;
 
-/* Flag for dom0 xenstore workaround */
-static int balloon_xenbus_init=0;
-
-/* Init Function */
-void balloon_init_watcher(void);
-
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 /* Use the private and mapping fields of struct page as a list. */
-#define PAGE_TO_LIST(p) ( (struct list_head *)&p->private )
-#define LIST_TO_PAGE(l) ( list_entry( ((unsigned long *)l),   \
-                                      struct page, private ) )
-#define UNLIST_PAGE(p)  do { list_del(PAGE_TO_LIST(p));       \
-                             p->mapping = NULL;               \
-                             p->private = 0; } while(0)
+#define PAGE_TO_LIST(p) ((struct list_head *)&p->private)
+#define LIST_TO_PAGE(l)                                \
+       (list_entry(((unsigned long *)l), struct page, private))
+#define UNLIST_PAGE(p)                         \
+       do {                                    \
+               list_del(PAGE_TO_LIST(p));      \
+               p->mapping = NULL;              \
+               p->private = 0;                 \
+       } while(0)
 #else
 /* There's a dedicated list field in struct page we can use.    */
 #define PAGE_TO_LIST(p) ( &p->list )
@@ -110,56 +107,53 @@
 #endif
 
 #define IPRINTK(fmt, args...) \
-    printk(KERN_INFO "xen_mem: " fmt, ##args)
+       printk(KERN_INFO "xen_mem: " fmt, ##args)
 #define WPRINTK(fmt, args...) \
-    printk(KERN_WARNING "xen_mem: " fmt, ##args)
+       printk(KERN_WARNING "xen_mem: " fmt, ##args)
 
 /* balloon_append: add the given page to the balloon. */
 static void balloon_append(struct page *page)
 {
-    /* Low memory is re-populated first, so highmem pages go at list tail. */
-    if ( PageHighMem(page) )
-    {
-        list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
-        balloon_high++;
-    }
-    else
-    {
-        list_add(PAGE_TO_LIST(page), &ballooned_pages);
-        balloon_low++;
-    }
+       /* Lowmem is re-populated first, so highmem pages go at list tail. */
+       if (PageHighMem(page)) {
+               list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
+               balloon_high++;
+       } else {
+               list_add(PAGE_TO_LIST(page), &ballooned_pages);
+               balloon_low++;
+       }
 }
 
 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
 static struct page *balloon_retrieve(void)
 {
-    struct page *page;
-
-    if ( list_empty(&ballooned_pages) )
-        return NULL;
-
-    page = LIST_TO_PAGE(ballooned_pages.next);
-    UNLIST_PAGE(page);
-
-    if ( PageHighMem(page) )
-        balloon_high--;
-    else
-        balloon_low--;
-
-    return page;
+       struct page *page;
+
+       if (list_empty(&ballooned_pages))
+               return NULL;
+
+       page = LIST_TO_PAGE(ballooned_pages.next);
+       UNLIST_PAGE(page);
+
+       if (PageHighMem(page))
+               balloon_high--;
+       else
+               balloon_low--;
+
+       return page;
 }
 
 static void balloon_alarm(unsigned long unused)
 {
-    schedule_work(&balloon_worker);
+       schedule_work(&balloon_worker);
 }
 
 static unsigned long current_target(void)
 {
-    unsigned long target = min(target_pages, hard_limit);
-    if ( target > (current_pages + balloon_low + balloon_high) )
-        target = current_pages + balloon_low + balloon_high;
-    return target;
+       unsigned long target = min(target_pages, hard_limit);
+       if (target > (current_pages + balloon_low + balloon_high))
+               target = current_pages + balloon_low + balloon_high;
+       return target;
 }
 
 /*
@@ -170,353 +164,336 @@
  */
 static void balloon_process(void *unused)
 {
-    unsigned long *mfn_list, pfn, i, flags;
-    struct page   *page;
-    long           credit, debt, rc;
-    void          *v;
-
-    down(&balloon_mutex);
+       unsigned long *mfn_list, pfn, i, flags;
+       struct page   *page;
+       long           credit, debt, rc;
+       void          *v;
+
+       down(&balloon_mutex);
 
  retry:
-    mfn_list = NULL;
-
-    if ( (credit = current_target() - current_pages) > 0 )
-    {
-        mfn_list = (unsigned long *)vmalloc(credit * sizeof(*mfn_list));
-        if ( mfn_list == NULL )
-            goto out;
-
-        balloon_lock(flags);
-        rc = HYPERVISOR_dom_mem_op(
-            MEMOP_increase_reservation, mfn_list, credit, 0);
-        balloon_unlock(flags);
-        if ( rc < credit )
-        {
-            /* We hit the Xen hard limit: reprobe. */
-            if ( HYPERVISOR_dom_mem_op(
-                MEMOP_decrease_reservation, mfn_list, rc, 0) != rc )
-                BUG();
-            hard_limit = current_pages + rc - driver_pages;
-            vfree(mfn_list);
-            goto retry;
-        }
-
-        for ( i = 0; i < credit; i++ )
-        {
-            if ( (page = balloon_retrieve()) == NULL )
-                BUG();
-
-            pfn = page - mem_map;
-            if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY )
-                BUG();
-
-            /* Update P->M and M->P tables. */
-            phys_to_machine_mapping[pfn] = mfn_list[i];
-            xen_machphys_update(mfn_list[i], pfn);
+       mfn_list = NULL;
+
+       if ((credit = current_target() - current_pages) > 0) {
+               mfn_list = vmalloc(credit * sizeof(*mfn_list));
+               if (mfn_list == NULL)
+                       goto out;
+
+               balloon_lock(flags);
+               rc = HYPERVISOR_dom_mem_op(
+                       MEMOP_increase_reservation, mfn_list, credit, 0);
+               balloon_unlock(flags);
+               if (rc < credit) {
+                       /* We hit the Xen hard limit: reprobe. */
+                       BUG_ON(HYPERVISOR_dom_mem_op(
+                               MEMOP_decrease_reservation,
+                               mfn_list, rc, 0) != rc);
+                       hard_limit = current_pages + rc - driver_pages;
+                       vfree(mfn_list);
+                       goto retry;
+               }
+
+               for (i = 0; i < credit; i++) {
+                       page = balloon_retrieve();
+                       BUG_ON(page == NULL);
+
+                       pfn = page - mem_map;
+                       if (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
+                               BUG();
+
+                       /* Update P->M and M->P tables. */
+                       phys_to_machine_mapping[pfn] = mfn_list[i];
+                       xen_machphys_update(mfn_list[i], pfn);
             
-            /* Link back into the page tables if it's not a highmem page. */
-            if ( pfn < max_low_pfn )
-            {
-                HYPERVISOR_update_va_mapping(
-                    (unsigned long)__va(pfn << PAGE_SHIFT),
-                    __pte_ma((mfn_list[i] << PAGE_SHIFT) |
-                             pgprot_val(PAGE_KERNEL)),
-                    0);
-            }
-
-            /* Finally, relinquish the memory back to the system allocator. */
-            ClearPageReserved(page);
-            set_page_count(page, 1);
-            __free_page(page);
-        }
-
-        current_pages += credit;
-    }
-    else if ( credit < 0 )
-    {
-        debt = -credit;
-
-        mfn_list = (unsigned long *)vmalloc(debt * sizeof(*mfn_list));
-        if ( mfn_list == NULL )
-            goto out;
-
-        for ( i = 0; i < debt; i++ )
-        {
-            if ( (page = alloc_page(GFP_HIGHUSER)) == NULL )
-            {
-                debt = i;
-                break;
-            }
-
-            pfn = page - mem_map;
-            mfn_list[i] = phys_to_machine_mapping[pfn];
-
-            if ( !PageHighMem(page) )
-            {
-                v = phys_to_virt(pfn << PAGE_SHIFT);
-                scrub_pages(v, 1);
-                HYPERVISOR_update_va_mapping(
-                    (unsigned long)v, __pte_ma(0), 0);
-            }
+                       /* Link back into the page tables if not highmem. */
+                       if (pfn < max_low_pfn)
+                               BUG_ON(HYPERVISOR_update_va_mapping(
+                                       (unsigned long)__va(pfn << PAGE_SHIFT),
+                                       pfn_pte_ma(mfn_list[i], PAGE_KERNEL),
+                                       0));
+
+                       /* Relinquish the page back to the allocator. */
+                       ClearPageReserved(page);
+                       set_page_count(page, 1);
+                       __free_page(page);
+               }
+
+               current_pages += credit;
+       } else if (credit < 0) {
+               debt = -credit;
+
+               mfn_list = vmalloc(debt * sizeof(*mfn_list));
+               if (mfn_list == NULL)
+                       goto out;
+
+               for (i = 0; i < debt; i++) {
+                       if ((page = alloc_page(GFP_HIGHUSER)) == NULL) {
+                               debt = i;
+                               break;
+                       }
+
+                       pfn = page - mem_map;
+                       mfn_list[i] = phys_to_machine_mapping[pfn];
+
+                       if (!PageHighMem(page)) {
+                               v = phys_to_virt(pfn << PAGE_SHIFT);
+                               scrub_pages(v, 1);
+                               BUG_ON(HYPERVISOR_update_va_mapping(
+                                       (unsigned long)v, __pte_ma(0), 0));
+                       }
 #ifdef CONFIG_XEN_SCRUB_PAGES
-            else
-            {
-                v = kmap(page);
-                scrub_pages(v, 1);
-                kunmap(page);
-            }
+                       else {
+                               v = kmap(page);
+                               scrub_pages(v, 1);
+                               kunmap(page);
+                       }
 #endif
-        }
-
-        /* Ensure that ballooned highmem pages don't have cached mappings. */
-        kmap_flush_unused();
-        flush_tlb_all();
-
-        /* No more mappings: invalidate pages in P2M and add to balloon. */
-        for ( i = 0; i < debt; i++ )
-        {
-            pfn = mfn_to_pfn(mfn_list[i]);
-            phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
-            balloon_append(pfn_to_page(pfn));
-        }
-
-        if ( HYPERVISOR_dom_mem_op(
-            MEMOP_decrease_reservation, mfn_list, debt, 0) != debt )
-            BUG();
-
-        current_pages -= debt;
-    }
+               }
+
+               /* Ensure that ballooned highmem pages don't have kmaps. */
+               kmap_flush_unused();
+               flush_tlb_all();
+
+               /* No more mappings: invalidate P2M and add to balloon. */
+               for (i = 0; i < debt; i++) {
+                       pfn = mfn_to_pfn(mfn_list[i]);
+                       phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
+                       balloon_append(pfn_to_page(pfn));
+               }
+
+               BUG_ON(HYPERVISOR_dom_mem_op(
+                       MEMOP_decrease_reservation,mfn_list, debt, 0) != debt);
+
+               current_pages -= debt;
+       }
 
  out:
-    if ( mfn_list != NULL )
-        vfree(mfn_list);
-
-    /* Schedule more work if there is some still to be done. */
-    if ( current_target() != current_pages )
-        mod_timer(&balloon_timer, jiffies + HZ);
-
-    up(&balloon_mutex);
+       if (mfn_list != NULL)
+               vfree(mfn_list);
+
+       /* Schedule more work if there is some still to be done. */
+       if (current_target() != current_pages)
+               mod_timer(&balloon_timer, jiffies + HZ);
+
+       up(&balloon_mutex);
 }
 
 /* Resets the Xen limit, sets new target, and kicks off processing. */
 static void set_new_target(unsigned long target)
 {
-    /* No need for lock. Not read-modify-write updates. */
-    hard_limit   = ~0UL;
-    target_pages = target;
-    schedule_work(&balloon_worker);
-}
-
-static struct xenbus_watch xb_watch =
-{
-    .node = "memory"
-};
-
-/* FIXME: This is part of a dom0 sequencing workaround */
-static struct xenbus_watch root_watch =
-{
-    .node = "/"
+       /* No need for lock. Not read-modify-write updates. */
+       hard_limit   = ~0UL;
+       target_pages = target;
+       schedule_work(&balloon_worker);
+}
+
+static struct xenbus_watch target_watch =
+{
+       .node = "memory/target"
 };
 
 /* React to a change in the target key */
 static void watch_target(struct xenbus_watch *watch, const char *node)
 {
-    unsigned long new_target;
-    int err;
-
-    if(watch == &root_watch)
-    {
-        /* FIXME: This is part of a dom0 sequencing workaround */
-        if(register_xenbus_watch(&xb_watch) == 0)
-        {
-            /* 
-               We successfully set a watch on memory/target:
-               now we can stop watching root 
-            */
-            unregister_xenbus_watch(&root_watch);
-            balloon_xenbus_init=1;
-        } 
-        else 
-        {
-            return;
-        }
-    }
-
-    err = xenbus_scanf("memory", "target", "%lu", &new_target);
+       unsigned long new_target;
+       int err;
+
+       err = xenbus_scanf("memory", "target", "%lu", &new_target);
+       if (err != 1) {
+               printk(KERN_ERR "Unable to read memory/target\n");
+               return;
+       } 
         
-    if(err != 1) 
-    {
-        IPRINTK("Unable to read memory/target\n");
-        return;
-    } 
-        
-    set_new_target(new_target >> PAGE_SHIFT);
+       set_new_target(new_target >> PAGE_SHIFT);
     
 }
 
-/* 
-   Try to set up our watcher, if not already set
-   
+/* Setup our watcher
+   NB: Assumes xenbus_lock is held!
 */
-void balloon_init_watcher(void) 
-{
-    int err;
-
-    if(!xen_start_info.store_evtchn)
-    {
-        IPRINTK("Delaying watcher init until xenstore is available\n");
-        return;
-    }
-
-    down(&xenbus_lock);
-
-    if(! balloon_xenbus_init) 
-    {
-        err = register_xenbus_watch(&xb_watch);
-        if(err) 
-        {
-            /* BIG FAT FIXME: dom0 sequencing workaround
-             * dom0 can't set a watch on memory/target until
-             * after the tools create it.  So, we have to watch
-             * the whole store until that happens.
-             *
-             * This will go away when we have the ability to watch
-             * non-existant keys
-             */
-            register_xenbus_watch(&root_watch);
-        } 
-        else
-        {
-            IPRINTK("Balloon xenbus watcher initialized\n");
-            balloon_xenbus_init = 1;
-        }
-    }
-
-    up(&xenbus_lock);
-
-}
-
-EXPORT_SYMBOL(balloon_init_watcher);
+int balloon_init_watcher(struct notifier_block *notifier,
+                         unsigned long event,
+                         void *data)
+{
+       int err;
+
+       BUG_ON(down_trylock(&xenbus_lock) == 0);
+
+       err = register_xenbus_watch(&target_watch);
+       if (err)
+               printk(KERN_ERR "Failed to set balloon watcher\n");
+
+       return NOTIFY_DONE;
+    
+}
 
 static int balloon_write(struct file *file, const char __user *buffer,
                          unsigned long count, void *data)
 {
-    char memstring[64], *endchar;
-    unsigned long long target_bytes;
-
-    if ( !capable(CAP_SYS_ADMIN) )
-        return -EPERM;
-
-    if ( count <= 1 )
-        return -EBADMSG; /* runt */
-    if ( count > sizeof(memstring) )
-        return -EFBIG;   /* too long */
-
-    if ( copy_from_user(memstring, buffer, count) )
-        return -EFAULT;
-    memstring[sizeof(memstring)-1] = '\0';
-
-    target_bytes = memparse(memstring, &endchar);
-    set_new_target(target_bytes >> PAGE_SHIFT);
-
-    return count;
+       char memstring[64], *endchar;
+       unsigned long long target_bytes;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (count <= 1)
+               return -EBADMSG; /* runt */
+       if (count > sizeof(memstring))
+               return -EFBIG;   /* too long */
+
+       if (copy_from_user(memstring, buffer, count))
+               return -EFAULT;
+       memstring[sizeof(memstring)-1] = '\0';
+
+       target_bytes = memparse(memstring, &endchar);
+       set_new_target(target_bytes >> PAGE_SHIFT);
+
+       return count;
 }
 
 static int balloon_read(char *page, char **start, off_t off,
                         int count, int *eof, void *data)
 {
-    int len;
-
-    len = sprintf(
-        page,
-        "Current allocation: %8lu kB\n"
-        "Requested target:   %8lu kB\n"
-        "Low-mem balloon:    %8lu kB\n"
-        "High-mem balloon:   %8lu kB\n"
-        "Xen hard limit:     ",
-        PAGES2KB(current_pages), PAGES2KB(target_pages), 
-        PAGES2KB(balloon_low), PAGES2KB(balloon_high));
-
-    if ( hard_limit != ~0UL )
-        len += sprintf(
-            page + len, 
-            "%8lu kB (inc. %8lu kB driver headroom)\n",
-            PAGES2KB(hard_limit), PAGES2KB(driver_pages));
-    else
-        len += sprintf(
-            page + len,
-            "     ??? kB\n");
-
-    *eof = 1;
-    return len;
-}
+       int len;
+
+       len = sprintf(
+               page,
+               "Current allocation: %8lu kB\n"
+               "Requested target:   %8lu kB\n"
+               "Low-mem balloon:    %8lu kB\n"
+               "High-mem balloon:   %8lu kB\n"
+               "Xen hard limit:     ",
+               PAGES2KB(current_pages), PAGES2KB(target_pages), 
+               PAGES2KB(balloon_low), PAGES2KB(balloon_high));
+
+       if (hard_limit != ~0UL) {
+               len += sprintf(
+                       page + len, 
+                       "%8lu kB (inc. %8lu kB driver headroom)\n",
+                       PAGES2KB(hard_limit), PAGES2KB(driver_pages));
+       } else {
+               len += sprintf(
+                       page + len,
+                       "     ??? kB\n");
+       }
+
+       *eof = 1;
+       return len;
+}
+
+static struct notifier_block xenstore_notifier;
 
 static int __init balloon_init(void)
 {
-    unsigned long pfn;
-    struct page *page;
-
-    IPRINTK("Initialising balloon driver.\n");
-
-    current_pages = min(xen_start_info.nr_pages, max_pfn);
-    target_pages  = current_pages;
-    balloon_low   = 0;
-    balloon_high  = 0;
-    driver_pages  = 0UL;
-    hard_limit    = ~0UL;
-
-    init_timer(&balloon_timer);
-    balloon_timer.data = 0;
-    balloon_timer.function = balloon_alarm;
+       unsigned long pfn;
+       struct page *page;
+
+       IPRINTK("Initialising balloon driver.\n");
+
+       current_pages = min(xen_start_info.nr_pages, max_pfn);
+       target_pages  = current_pages;
+       balloon_low   = 0;
+       balloon_high  = 0;
+       driver_pages  = 0UL;
+       hard_limit    = ~0UL;
+
+       init_timer(&balloon_timer);
+       balloon_timer.data = 0;
+       balloon_timer.function = balloon_alarm;
     
-    if ( (balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL )
-    {
-        WPRINTK("Unable to create /proc/xen/balloon.\n");
-        return -1;
-    }
-
-    balloon_pde->read_proc  = balloon_read;
-    balloon_pde->write_proc = balloon_write;
+       if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
+               WPRINTK("Unable to create /proc/xen/balloon.\n");
+               return -1;
+       }
+
+       balloon_pde->read_proc  = balloon_read;
+       balloon_pde->write_proc = balloon_write;
     
-    /* Initialise the balloon with excess memory space. */
-    for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ )
-    {
-        page = &mem_map[pfn];
-        if ( !PageReserved(page) )
-            balloon_append(page);
-    }
-
-    xb_watch.callback = watch_target;
-    root_watch.callback = watch_target;
-
-    balloon_init_watcher();
-
-    return 0;
+       /* Initialise the balloon with excess memory space. */
+       for (pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++) {
+               page = &mem_map[pfn];
+               if (!PageReserved(page))
+                       balloon_append(page);
+       }
+
+       target_watch.callback = watch_target;
+       xenstore_notifier.notifier_call = balloon_init_watcher;
+
+       register_xenstore_notifier(&xenstore_notifier);
+    
+       return 0;
 }
 
 subsys_initcall(balloon_init);
 
 void balloon_update_driver_allowance(long delta)
 {
-    unsigned long flags;
-    balloon_lock(flags);
-    driver_pages += delta; /* non-atomic update */
-    balloon_unlock(flags);
-}
-
-void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns)
-{
-    unsigned long flags;
-
-    balloon_lock(flags);
-    if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
-                               mfn_list, nr_mfns, 0) != nr_mfns )
-        BUG();
-    current_pages -= nr_mfns; /* non-atomic update */
-    balloon_unlock(flags);
-
-    schedule_work(&balloon_worker);
+       unsigned long flags;
+       balloon_lock(flags);
+       driver_pages += delta; /* non-atomic update */
+       balloon_unlock(flags);
+}
+
+static int dealloc_pte_fn(
+       pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+       unsigned long mfn = pte_mfn(*pte);
+       set_pte(pte, __pte_ma(0));
+       phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] =
+               INVALID_P2M_ENTRY;
+       BUG_ON(HYPERVISOR_dom_mem_op(
+               MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+       return 0;
+}
+
+struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
+{
+       unsigned long vstart, flags;
+       unsigned int  order = get_order(nr_pages * PAGE_SIZE);
+
+       vstart = __get_free_pages(GFP_KERNEL, order);
+       if (vstart == 0)
+               return NULL;
+
+       scrub_pages(vstart, 1 << order);
+
+       balloon_lock(flags);
+       BUG_ON(generic_page_range(
+               &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL));
+       current_pages -= 1UL << order;
+       balloon_unlock(flags);
+
+       schedule_work(&balloon_worker);
+
+       flush_tlb_all();
+
+       return virt_to_page(vstart);
+}
+
+void balloon_dealloc_empty_page_range(
+       struct page *page, unsigned long nr_pages)
+{
+       unsigned long i, flags;
+       unsigned int  order = get_order(nr_pages * PAGE_SIZE);
+
+       balloon_lock(flags);
+       for (i = 0; i < (1UL << order); i++)
+               balloon_append(page + i);
+       balloon_unlock(flags);
+
+       schedule_work(&balloon_worker);
 }
 
 EXPORT_SYMBOL(balloon_update_driver_allowance);
-EXPORT_SYMBOL(balloon_put_pages);
+EXPORT_SYMBOL(balloon_alloc_empty_page_range);
+EXPORT_SYMBOL(balloon_dealloc_empty_page_range);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blkback/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/Makefile Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/Makefile Thu Aug 25 22:53:20 2005
@@ -1,2 +1,2 @@
 
-obj-y  := blkback.o control.o interface.o vbd.o
+obj-y  := blkback.o xenbus.o interface.o vbd.o
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Thu Aug 25 
22:53:20 2005
@@ -11,11 +11,9 @@
  * Copyright (c) 2005, Christopher Clark
  */
 
+#include <linux/spinlock.h>
+#include <asm-xen/balloon.h>
 #include "common.h"
-#include <asm-xen/evtchn.h>
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-#include <asm-xen/xen-public/grant_table.h>
-#endif
 
 /*
  * These are rather arbitrary. They are fairly large because adjacent requests
@@ -67,9 +65,6 @@
 static PEND_RING_IDX pending_prod, pending_cons;
 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-static kmem_cache_t *buffer_head_cachep;
-#else
 static request_queue_t *plugged_queue;
 static inline void flush_plugged_queue(void)
 {
@@ -82,9 +77,7 @@
         plugged_queue = NULL;
     }
 }
-#endif
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
+
 /* When using grant tables to map a frame for device access then the
  * handle returned must be used to unmap the frame. This is needed to
  * drop the ref count on the frame.
@@ -93,7 +86,6 @@
 #define pending_handle(_idx, _i) \
     (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
 #define BLKBACK_INVALID_HANDLE (0xFFFF)
-#endif
 
 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
 /*
@@ -108,14 +100,12 @@
 #endif
 
 static int do_block_io_op(blkif_t *blkif, int max_to_do);
-static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
 static void make_response(blkif_t *blkif, unsigned long id, 
                           unsigned short op, int st);
 
 static void fast_flush_area(int idx, int nr_pages)
 {
-#ifdef CONFIG_XEN_BLKDEV_GRANT
     struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
     unsigned int i, invcount = 0;
     u16 handle;
@@ -124,31 +114,16 @@
     {
         if ( BLKBACK_INVALID_HANDLE != ( handle = pending_handle(idx, i) ) )
         {
-            unmap[i].host_virt_addr = MMAP_VADDR(idx, i);
+            unmap[i].host_addr      = MMAP_VADDR(idx, i);
             unmap[i].dev_bus_addr   = 0;
             unmap[i].handle         = handle;
-            pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
+            pending_handle(idx, i)  = BLKBACK_INVALID_HANDLE;
             invcount++;
         }
     }
     if ( unlikely(HYPERVISOR_grant_table_op(
                     GNTTABOP_unmap_grant_ref, unmap, invcount)))
         BUG();
-#else
-
-    multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    int               i;
-
-    for ( i = 0; i < nr_pages; i++ )
-    {
-       MULTI_update_va_mapping(mcl+i, MMAP_VADDR(idx, i),
-                               __pte(0), 0);
-    }
-
-    mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
-        BUG();
-#endif
 }
 
 
@@ -205,11 +180,7 @@
     blkif_t          *blkif;
     struct list_head *ent;
 
-    daemonize(
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-        "xenblkd"
-#endif
-        );
+    daemonize("xenblkd");
 
     for ( ; ; )
     {
@@ -236,11 +207,7 @@
         }
 
         /* Push the batch through to disc. */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-        run_task_queue(&tq_disk);
-#else
         flush_plugged_queue();
-#endif
     }
 }
 
@@ -289,13 +256,6 @@
     }
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-static void end_block_io_op(struct buffer_head *bh, int uptodate)
-{
-    __end_block_io_op(bh->b_private, uptodate);
-    kmem_cache_free(buffer_head_cachep, bh);
-}
-#else
 static int end_block_io_op(struct bio *bio, unsigned int done, int error)
 {
     if ( bio->bi_size != 0 )
@@ -304,7 +264,6 @@
     bio_put(bio);
     return error;
 }
-#endif
 
 
 /******************************************************************************
@@ -351,10 +310,6 @@
         case BLKIF_OP_READ:
         case BLKIF_OP_WRITE:
             dispatch_rw_block_io(blkif, req);
-            break;
-
-        case BLKIF_OP_PROBE:
-            dispatch_probe(blkif, req);
             break;
 
         default:
@@ -369,72 +324,6 @@
     return more_to_do;
 }
 
-static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
-{
-    int rsp = BLKIF_RSP_ERROR;
-    int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-
-    /* We expect one buffer only. */
-    if ( unlikely(req->nr_segments != 1) )
-        goto out;
-
-    /* Make sure the buffer is page-sized. */
-    if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
-         (blkif_last_sect(req->frame_and_sects[0]) != 7) )
-        goto out;
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    {
-        struct gnttab_map_grant_ref map;
-
-        map.host_virt_addr = MMAP_VADDR(pending_idx, 0);
-        map.flags = GNTMAP_host_map;
-        map.ref = blkif_gref_from_fas(req->frame_and_sects[0]);
-        map.dom = blkif->domid;
-
-        if ( unlikely(HYPERVISOR_grant_table_op(
-                        GNTTABOP_map_grant_ref, &map, 1)))
-            BUG();
-
-        if ( map.handle < 0 )
-            goto out;
-
-        pending_handle(pending_idx, 0) = map.handle;
-    }
-#else /* else CONFIG_XEN_BLKDEV_GRANT */
-
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
-    /* Grab the real frontend out of the probe message. */
-    if (req->frame_and_sects[1] == BLKTAP_COOKIE) 
-        blkif->is_blktap = 1;
-#endif
-
-
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
-    if ( HYPERVISOR_update_va_mapping_otherdomain(
-        MMAP_VADDR(pending_idx, 0),
-        (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
-        0, (blkif->is_blktap ? ID_TO_DOM(req->id) : blkif->domid) ) )
-        
-        goto out;
-#else
-    if ( HYPERVISOR_update_va_mapping_otherdomain(
-        MMAP_VADDR(pending_idx, 0),
-        (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
-        0, blkif->domid) ) 
-        
-        goto out;
-#endif
-#endif /* endif CONFIG_XEN_BLKDEV_GRANT */
-   
-    rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), 
-                    PAGE_SIZE / sizeof(vdisk_t));
-
- out:
-    fast_flush_area(pending_idx, 1);
-    make_response(blkif, req->id, req->operation, rsp);
-}
-
 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
 {
     extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
@@ -442,24 +331,15 @@
     unsigned long fas = 0;
     int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
     pending_req_t *pending_req;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
     struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-#else
-    unsigned long remap_prot;
-    multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-#endif
     struct phys_req preq;
     struct { 
         unsigned long buf; unsigned int nsec;
     } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
     unsigned int nseg;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-    struct buffer_head *bh;
-#else
     struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
     int nbio = 0;
     request_queue_t *q;
-#endif
 
     /* Check that number of segments is sane. */
     nseg = req->nr_segments;
@@ -470,11 +350,10 @@
         goto bad_descriptor;
     }
 
-    preq.dev           = req->device;
+    preq.dev           = req->handle;
     preq.sector_number = req->sector_number;
     preq.nr_sects      = 0;
 
-#ifdef CONFIG_XEN_BLKDEV_GRANT
     for ( i = 0; i < nseg; i++ )
     {
         fas         = req->frame_and_sects[i];
@@ -484,7 +363,7 @@
             goto bad_descriptor;
         preq.nr_sects += seg[i].nsec;
 
-        map[i].host_virt_addr = MMAP_VADDR(pending_idx, i);
+        map[i].host_addr = MMAP_VADDR(pending_idx, i);
         map[i].dom = blkif->domid;
         map[i].ref = blkif_gref_from_fas(fas);
         map[i].flags = GNTMAP_host_map;
@@ -506,25 +385,15 @@
         }
 
         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
-            FOREIGN_FRAME(map[i].dev_bus_addr);
+            FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
 
         pending_handle(pending_idx, i) = map[i].handle;
     }
-#endif
 
     for ( i = 0; i < nseg; i++ )
     {
         fas         = req->frame_and_sects[i];
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-        seg[i].buf  = (map[i].dev_bus_addr << PAGE_SHIFT) |
-                      (blkif_first_sect(fas) << 9);
-#else
-        seg[i].buf  = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
-        seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
-        if ( seg[i].nsec <= 0 )
-            goto bad_descriptor;
-        preq.nr_sects += seg[i].nsec;
-#endif
+        seg[i].buf  = map[i].dev_bus_addr | (blkif_first_sect(fas) << 9);
     }
 
     if ( vbd_translate(&preq, blkif, operation) != 0 )
@@ -534,40 +403,6 @@
                 preq.sector_number + preq.nr_sects, preq.dev); 
         goto bad_descriptor;
     }
-
-#ifndef CONFIG_XEN_BLKDEV_GRANT
-    if ( operation == READ )
-        remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
-    else
-        remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED;
-
-
-    for ( i = 0; i < nseg; i++ )
-    {
-       MULTI_update_va_mapping_otherdomain(
-           mcl+i, MMAP_VADDR(pending_idx, i),
-           pfn_pte_ma(seg[i].buf >> PAGE_SHIFT, __pgprot(remap_prot)),
-           0, blkif->domid);
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
-        if ( blkif->is_blktap )
-            mcl[i].args[MULTI_UVMDOMID_INDEX] = ID_TO_DOM(req->id);
-#endif
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
-            FOREIGN_FRAME(seg[i].buf >> PAGE_SHIFT);
-    }
-
-    BUG_ON(HYPERVISOR_multicall(mcl, nseg) != 0);
-
-    for ( i = 0; i < nseg; i++ )
-    {
-        if ( unlikely(mcl[i].result != 0) )
-        {
-            DPRINTK("invalid buffer -- could not remap it\n");
-            fast_flush_area(pending_idx, nseg);
-            goto bad_descriptor;
-        }
-    }
-#endif /* end ifndef CONFIG_XEN_BLKDEV_GRANT */
 
     pending_req = &pending_reqs[pending_idx];
     pending_req->blkif     = blkif;
@@ -575,49 +410,6 @@
     pending_req->operation = operation;
     pending_req->status    = BLKIF_RSP_OKAY;
     pending_req->nr_pages  = nseg;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-
-    atomic_set(&pending_req->pendcnt, nseg);
-    pending_cons++;
-    blkif_get(blkif);
-
-    for ( i = 0; i < nseg; i++ )
-    {
-        bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
-        if ( unlikely(bh == NULL) )
-        {
-            __end_block_io_op(pending_req, 0);
-            continue;
-        }
-
-        memset(bh, 0, sizeof (struct buffer_head));
-
-        init_waitqueue_head(&bh->b_wait);
-        bh->b_size          = seg[i].nsec << 9;
-        bh->b_dev           = preq.dev;
-        bh->b_rdev          = preq.dev;
-        bh->b_rsector       = (unsigned long)preq.sector_number;
-        bh->b_data          = (char *)MMAP_VADDR(pending_idx, i) +
-            (seg[i].buf & ~PAGE_MASK);
-        bh->b_page          = virt_to_page(MMAP_VADDR(pending_idx, i));
-        bh->b_end_io        = end_block_io_op;
-        bh->b_private       = pending_req;
-
-        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | 
-            (1 << BH_Req) | (1 << BH_Launder);
-        if ( operation == WRITE )
-            bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
-
-        atomic_set(&bh->b_count, 1);
-
-        /* Dispatch a single request. We'll flush it to disc later. */
-        generic_make_request(operation, bh);
-
-        preq.sector_number += seg[i].nsec;
-    }
-
-#else
 
     for ( i = 0; i < nseg; i++ )
     {
@@ -667,8 +459,6 @@
     for ( i = 0; i < nbio; i++ )
         submit_bio(operation, biolist[i]);
 
-#endif
-
     return;
 
  bad_descriptor:
@@ -712,6 +502,7 @@
 static int __init blkif_init(void)
 {
     int i;
+    struct page *page;
 
     if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
          !(xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
@@ -719,8 +510,9 @@
 
     blkif_interface_init();
 
-    if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
-        BUG();
+    page = balloon_alloc_empty_page_range(MMAP_PAGES);
+    BUG_ON(page == NULL);
+    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
 
     pending_cons = 0;
     pending_prod = MAX_PENDING_REQS;
@@ -734,18 +526,9 @@
     if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
         BUG();
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-    buffer_head_cachep = kmem_cache_create(
-        "buffer_head_cache", sizeof(struct buffer_head),
-        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-#endif
-
-    blkif_ctrlif_init();
-    
-#ifdef CONFIG_XEN_BLKDEV_GRANT
+    blkif_xenbus_init();
+
     memset( pending_grant_handles,  BLKBACK_INVALID_HANDLE, MMAP_PAGES );
-    printk(KERN_ALERT "Blkif backend is using grant tables.\n");
-#endif
 
 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
     printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Thu Aug 25 22:53:20 2005
@@ -5,17 +5,18 @@
 #include <linux/config.h>
 #include <linux/version.h>
 #include <linux/module.h>
-#include <linux/rbtree.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/vmalloc.h>
 #include <asm/io.h>
 #include <asm/setup.h>
 #include <asm/pgalloc.h>
-#include <asm-xen/ctrl_if.h>
+#include <asm-xen/evtchn.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/xen-public/io/blkif.h>
 #include <asm-xen/xen-public/io/ring.h>
+#include <asm-xen/gnttab.h>
 
 #if 0
 #define ASSERT(_p) \
@@ -28,12 +29,13 @@
 #define DPRINTK(_f, _a...) ((void)0)
 #endif
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-typedef struct rb_root rb_root_t;
-typedef struct rb_node rb_node_t;
-#else
-struct block_device;
-#endif
+struct vbd {
+    blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
+    unsigned char  readonly;    /* Non-zero -> read-only */
+    unsigned char  type;        /* VDISK_xxx */
+    blkif_pdev_t   pdevice;     /* phys device that this vbd maps to */
+    struct block_device *bdev;
+}; 
 
 typedef struct blkif_st {
     /* Unique identifier for this interface. */
@@ -42,34 +44,25 @@
     /* Physical parameters of the comms window. */
     unsigned long     shmem_frame;
     unsigned int      evtchn;
-    int               irq;
+    unsigned int      remote_evtchn;
     /* Comms information. */
     blkif_back_ring_t blk_ring;
     /* VBDs attached to this interface. */
-    rb_root_t         vbd_rb;        /* Mapping from 16-bit vdevices to VBDs.*/
-    spinlock_t        vbd_lock;      /* Protects VBD mapping. */
+    struct vbd        vbd;
     /* Private fields. */
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    /*
-     * DISCONNECT response is deferred until pending requests are ack'ed.
-     * We therefore need to store the id from the original request.
-     */
-    u8               disconnect_rspid;
+    enum { DISCONNECTED, CONNECTED } status;
 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
     /* Is this a blktap frontend */
     unsigned int     is_blktap;
 #endif
-    struct blkif_st *hash_next;
     struct list_head blkdev_list;
     spinlock_t       blk_ring_lock;
     atomic_t         refcnt;
 
-    struct work_struct work;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
+    struct work_struct free_work;
     u16 shmem_handle;
-    memory_t shmem_vaddr;
+    unsigned long shmem_vaddr;
     grant_ref_t shmem_ref;
-#endif
 } blkif_t;
 
 void blkif_create(blkif_be_create_t *create);
@@ -77,18 +70,25 @@
 void blkif_connect(blkif_be_connect_t *connect);
 int  blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
 void blkif_disconnect_complete(blkif_t *blkif);
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+blkif_t *alloc_blkif(domid_t domid);
+void free_blkif_callback(blkif_t *blkif);
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
+
 #define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
 #define blkif_put(_b)                             \
     do {                                          \
         if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            blkif_disconnect_complete(_b);        \
+            free_blkif_callback(_b);             \
     } while (0)
 
-void vbd_create(blkif_be_vbd_create_t *create); 
-void vbd_destroy(blkif_be_vbd_destroy_t *delete); 
-int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
-void destroy_all_vbds(blkif_t *blkif);
+/* Create a vbd. */
+int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice,
+              int readonly);
+void vbd_free(struct vbd *vbd);
+
+unsigned long vbd_size(struct vbd *vbd);
+unsigned int vbd_info(struct vbd *vbd);
+unsigned long vbd_secsize(struct vbd *vbd);
 
 struct phys_req {
     unsigned short       dev;
@@ -100,9 +100,10 @@
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
 
 void blkif_interface_init(void);
-void blkif_ctrlif_init(void);
 
 void blkif_deschedule(blkif_t *blkif);
+
+void blkif_xenbus_init(void);
 
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Thu Aug 25 
22:53:20 2005
@@ -7,289 +7,137 @@
  */
 
 #include "common.h"
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#endif
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+#include <asm-xen/evtchn.h>
 
 static kmem_cache_t *blkif_cachep;
-static blkif_t      *blkif_hash[BLKIF_HASHSZ];
 
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+blkif_t *alloc_blkif(domid_t domid)
 {
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            ((blkif->domid != domid) || (blkif->handle != handle)) )
-        blkif = blkif->hash_next;
+    blkif_t *blkif;
+
+    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
+    if (!blkif)
+           return ERR_PTR(-ENOMEM);
+
+    memset(blkif, 0, sizeof(*blkif));
+    blkif->domid = domid;
+    blkif->status = DISCONNECTED;
+    spin_lock_init(&blkif->blk_ring_lock);
+    atomic_set(&blkif->refcnt, 1);
+
     return blkif;
 }
 
-static void __blkif_disconnect_complete(void *arg)
+static int map_frontend_page(blkif_t *blkif, unsigned long localaddr,
+                            unsigned long shared_page)
 {
-    blkif_t              *blkif = (blkif_t *)arg;
-    ctrl_msg_t            cmsg;
-    blkif_be_disconnect_t disc;
+    struct gnttab_map_grant_ref op;
+    op.host_addr = localaddr;
+    op.flags = GNTMAP_host_map;
+    op.ref = shared_page;
+    op.dom = blkif->domid;
 
-    /*
-     * These can't be done in blkif_disconnect() because at that point there
-     * may be outstanding requests at the disc whose asynchronous responses
-     * must still be notified to the remote driver.
-     */
-    unbind_evtchn_from_irq(blkif->evtchn);
+    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
 
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    {
-        /*
-         * Release the shared memory page.
-         */
-        struct gnttab_unmap_grant_ref op;
+    if (op.handle < 0) {
+       DPRINTK(" Grant table operation failure !\n");
+       return op.handle;
+    }
 
-        op.host_virt_addr = blkif->shmem_vaddr;
-        op.handle         = blkif->shmem_handle;
-        op.dev_bus_addr   = 0;
-
-        if(unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 
1))) {
-            BUG();
-        }
-    }
-#endif
-    vfree(blkif->blk_ring.sring);
-
-    /* Construct the deferred response message. */
-    cmsg.type         = CMSG_BLKIF_BE;
-    cmsg.subtype      = CMSG_BLKIF_BE_DISCONNECT;
-    cmsg.id           = blkif->disconnect_rspid;
-    cmsg.length       = sizeof(blkif_be_disconnect_t);
-    disc.domid        = blkif->domid;
-    disc.blkif_handle = blkif->handle;
-    disc.status       = BLKIF_BE_STATUS_OKAY;
-    memcpy(cmsg.msg, &disc, sizeof(disc));
-
-    /*
-     * Make sure message is constructed /before/ status change, because
-     * after the status change the 'blkif' structure could be deallocated at
-     * any time. Also make sure we send the response /after/ status change,
-     * as otherwise a subsequent CONNECT request could spuriously fail if
-     * another CPU doesn't see the status change yet.
-     */
-    mb();
-    if ( blkif->status != DISCONNECTING )
-        BUG();
-    blkif->status = DISCONNECTED;
-    mb();
-
-    /* Send the successful response. */
-    ctrl_if_send_response(&cmsg);
+    blkif->shmem_ref = shared_page;
+    blkif->shmem_handle = op.handle;
+    blkif->shmem_vaddr = localaddr;
+    return 0;
 }
 
-void blkif_disconnect_complete(blkif_t *blkif)
+static void unmap_frontend_page(blkif_t *blkif)
 {
-    INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif);
-    schedule_work(&blkif->work);
+    struct gnttab_unmap_grant_ref op;
+
+    op.host_addr = blkif->shmem_vaddr;
+    op.handle = blkif->shmem_handle;
+    op.dev_bus_addr = 0;
+    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
 }
 
-void blkif_create(blkif_be_create_t *create)
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
 {
-    domid_t       domid  = create->domid;
-    unsigned int  handle = create->blkif_handle;
-    blkif_t     **pblkif, *blkif;
+    struct vm_struct *vma;
+    blkif_sring_t *sring;
+    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+    int err;
 
-    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
-    {
-        DPRINTK("Could not create blkif: out of memory\n");
-        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
+    BUG_ON(blkif->remote_evtchn);
+
+    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+       return -ENOMEM;
+
+    err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page);
+    if (err) {
+        vfree(vma->addr);
+       return err;
     }
 
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid  = domid;
-    blkif->handle = handle;
-    blkif->status = DISCONNECTED;
-    spin_lock_init(&blkif->vbd_lock);
-    spin_lock_init(&blkif->blk_ring_lock);
-    atomic_set(&blkif->refcnt, 0);
-
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif != NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-        {
-            DPRINTK("Could not create blkif: already exists\n");
-            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
-            kmem_cache_free(blkif_cachep, blkif);
-            return;
-        }
-        pblkif = &(*pblkif)->hash_next;
+    op.u.bind_interdomain.dom1 = DOMID_SELF;
+    op.u.bind_interdomain.dom2 = blkif->domid;
+    op.u.bind_interdomain.port1 = 0;
+    op.u.bind_interdomain.port2 = evtchn;
+    err = HYPERVISOR_event_channel_op(&op);
+    if (err) {
+       unmap_frontend_page(blkif);
+       vfree(vma->addr);
+       return err;
     }
 
-    blkif->hash_next = *pblkif;
-    *pblkif = blkif;
+    blkif->evtchn = op.u.bind_interdomain.port1;
+    blkif->remote_evtchn = evtchn;
 
-    DPRINTK("Successfully created blkif\n");
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_destroy(blkif_be_destroy_t *destroy)
-{
-    domid_t       domid  = destroy->domid;
-    unsigned int  handle = destroy->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif = *pblkif) != NULL )
-    {
-        if ( (blkif->domid == domid) && (blkif->handle == handle) )
-        {
-            if ( blkif->status != DISCONNECTED )
-                goto still_connected;
-            goto destroy;
-        }
-        pblkif = &blkif->hash_next;
-    }
-
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- still_connected:
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
-    return;
-
- destroy:
-    *pblkif = blkif->hash_next;
-    destroy_all_vbds(blkif);
-    kmem_cache_free(blkif_cachep, blkif);
-    destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_connect(blkif_be_connect_t *connect)
-{
-    domid_t        domid  = connect->domid;
-    unsigned int   handle = connect->blkif_handle;
-    unsigned int   evtchn = connect->evtchn;
-    unsigned long  shmem_frame = connect->shmem_frame;
-    struct vm_struct *vma;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    int ref = connect->shmem_ref;
-#else
-    pgprot_t       prot;
-    int            error;
-#endif
-    blkif_t       *blkif;
-    blkif_sring_t *sring;
-
-    blkif = blkif_find_by_handle(domid, handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", 
-                connect->domid, connect->blkif_handle); 
-        connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-    {
-        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-#ifndef CONFIG_XEN_BLKDEV_GRANT
-    prot = __pgprot(_KERNPG_TABLE);
-    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
-                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
-                                    prot, domid);
-    if ( error != 0 )
-    {
-        if ( error == -ENOMEM )
-            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        else if ( error == -EFAULT )
-            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
-        else
-            connect->status = BLKIF_BE_STATUS_ERROR;
-        vfree(vma->addr);
-        return;
-    }
-#else
-    { /* Map: Use the Grant table reference */
-        struct gnttab_map_grant_ref op;
-        op.host_virt_addr = VMALLOC_VMADDR(vma->addr);
-        op.flags          = GNTMAP_host_map;
-        op.ref            = ref;
-        op.dom            = domid;
-       
-        BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-       
-        handle = op.handle;
-       
-        if (op.handle < 0) {
-            DPRINTK(" Grant table operation failure !\n");
-            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
-            vfree(vma->addr);
-            return;
-        }
-
-        blkif->shmem_ref = ref;
-        blkif->shmem_handle = handle;
-        blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr);
-    }
-#endif
-
-    if ( blkif->status != DISCONNECTED )
-    {
-        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
-        vfree(vma->addr);
-        return;
-    }
     sring = (blkif_sring_t *)vma->addr;
     SHARED_RING_INIT(sring);
     BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
-    
-    blkif->evtchn        = evtchn;
-    blkif->irq           = bind_evtchn_to_irq(evtchn);
-    blkif->shmem_frame   = shmem_frame;
+
+    bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend",
+                             blkif);
     blkif->status        = CONNECTED;
-    blkif_get(blkif);
+    blkif->shmem_frame   = shared_page;
 
-    request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif);
-
-    connect->status = BLKIF_BE_STATUS_OKAY;
+    return 0;
 }
 
-int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
+static void free_blkif(void *arg)
 {
-    domid_t       domid  = disconnect->domid;
-    unsigned int  handle = disconnect->blkif_handle;
-    blkif_t      *blkif;
+    evtchn_op_t op = { .cmd = EVTCHNOP_close };
+    blkif_t *blkif = (blkif_t *)arg;
 
-    blkif = blkif_find_by_handle(domid, handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("blkif_disconnect attempted for non-existent blkif"
-                " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); 
-        disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return 1; /* Caller will send response error message. */
+    op.u.close.port = blkif->evtchn;
+    op.u.close.dom = DOMID_SELF;
+    HYPERVISOR_event_channel_op(&op);
+    op.u.close.port = blkif->remote_evtchn;
+    op.u.close.dom = blkif->domid;
+    HYPERVISOR_event_channel_op(&op);
+
+    vbd_free(&blkif->vbd);
+
+    if (blkif->evtchn)
+        unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
+
+    if (blkif->blk_ring.sring) {
+       unmap_frontend_page(blkif);
+       vfree(blkif->blk_ring.sring);
+       blkif->blk_ring.sring = NULL;
     }
 
-    if ( blkif->status == CONNECTED )
-    {
-        blkif->status = DISCONNECTING;
-        blkif->disconnect_rspid = rsp_id;
-        wmb(); /* Let other CPUs see the status change. */
-        free_irq(blkif->irq, blkif);
-        blkif_deschedule(blkif);
-        blkif_put(blkif);
-        return 0; /* Caller should not send response message. */
-    }
+    kmem_cache_free(blkif_cachep, blkif);
+}
 
-    disconnect->status = BLKIF_BE_STATUS_OKAY;
-    return 1;
+void free_blkif_callback(blkif_t *blkif)
+{
+    INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
+    schedule_work(&blkif->free_work);
 }
 
 void __init blkif_interface_init(void)
 {
     blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
                                      0, 0, NULL, NULL);
-    memset(blkif_hash, 0, sizeof(blkif_hash));
 }
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c    Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c    Thu Aug 25 22:53:20 2005
@@ -3,104 +3,61 @@
  * 
  * Routines for managing virtual block devices (VBDs).
  * 
- * NOTE: vbd_lock protects updates to the rb_tree against concurrent lookups 
- * in vbd_translate.  All other lookups are implicitly protected because the 
- * only caller (the control message dispatch routine) serializes the calls.
- * 
  * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
  */
 
 #include "common.h"
+#include <asm-xen/xenbus.h>
 
-struct vbd { 
-    blkif_vdev_t   vdevice;     /* what the domain refers to this vbd as */
-    unsigned char  readonly;    /* Non-zero -> read-only */
-    unsigned char  type;        /* VDISK_xxx */
-    blkif_pdev_t   pdevice;     /* phys device that this vbd maps to */
-    struct block_device *bdev;
-    rb_node_t      rb;          /* for linking into R-B tree lookup struct */
-}; 
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 static inline dev_t vbd_map_devnum(blkif_pdev_t cookie)
-{ return MKDEV(cookie>>8, cookie&0xff); }
+{
+    return MKDEV(BLKIF_MAJOR(cookie), BLKIF_MINOR(cookie));
+}
 #define vbd_sz(_v)   ((_v)->bdev->bd_part ? \
     (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
 #define bdev_put(_b) blkdev_put(_b)
-#else
-#define vbd_sz(_v)   (blk_size[MAJOR((_v)->pdevice)][MINOR((_v)->pdevice)]*2)
-#define bdev_put(_b) ((void)0)
-#define bdev_hardsect_size(_b) 512
-#endif
 
-void vbd_create(blkif_be_vbd_create_t *create) 
+unsigned long vbd_size(struct vbd *vbd)
 {
-    struct vbd  *vbd; 
-    rb_node_t  **rb_p, *rb_parent = NULL;
-    blkif_t     *blkif;
-    blkif_vdev_t vdevice = create->vdevice;
+       return vbd_sz(vbd);
+}
 
-    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n", 
-                create->domid, create->blkif_handle); 
-        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
+unsigned int vbd_info(struct vbd *vbd)
+{
+       return vbd->type | (vbd->readonly?VDISK_READONLY:0);
+}
 
-    rb_p = &blkif->vbd_rb.rb_node;
-    while ( *rb_p != NULL )
-    {
-        rb_parent = *rb_p;
-        vbd = rb_entry(rb_parent, struct vbd, rb);
-        if ( vdevice < vbd->vdevice )
-        {
-            rb_p = &rb_parent->rb_left;
-        }
-        else if ( vdevice > vbd->vdevice )
-        {
-            rb_p = &rb_parent->rb_right;
-        }
-        else
-        {
-            DPRINTK("vbd_create attempted for already existing vbd\n");
-            create->status = BLKIF_BE_STATUS_VBD_EXISTS;
-            return;
-        }
-    }
+unsigned long vbd_secsize(struct vbd *vbd)
+{
+       return bdev_hardsect_size(vbd->bdev);
+}
 
-    if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) )
-    {
-        DPRINTK("vbd_create: out of memory\n");
-        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
+int vbd_create(blkif_t *blkif, blkif_vdev_t handle,
+              blkif_pdev_t pdevice, int readonly)
+{
+    struct vbd *vbd;
 
-    vbd->vdevice  = vdevice; 
-    vbd->readonly = create->readonly;
+    vbd = &blkif->vbd;
+    vbd->handle   = handle; 
+    vbd->readonly = readonly;
     vbd->type     = 0;
 
-    /* Mask to 16-bit for compatibility with old tools */
-    vbd->pdevice  = create->pdevice & 0xffff;
+    vbd->pdevice  = pdevice;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
     vbd->bdev = open_by_devnum(
         vbd_map_devnum(vbd->pdevice),
         vbd->readonly ? FMODE_READ : FMODE_WRITE);
     if ( IS_ERR(vbd->bdev) )
     {
         DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-        create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
-        return;
+        return -ENOENT;
     }
 
     if ( (vbd->bdev->bd_disk == NULL) )
     {
         DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-        create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
-        bdev_put(vbd->bdev);
-        return;
+       vbd_free(vbd);
+        return -ENOENT;
     }
 
     if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
@@ -108,181 +65,27 @@
     if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
         vbd->type |= VDISK_REMOVABLE;
 
-#else
-    if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) )
-    {
-        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-        create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
-        return;
-    }
-#endif
-
-    spin_lock(&blkif->vbd_lock);
-    rb_link_node(&vbd->rb, rb_parent, rb_p);
-    rb_insert_color(&vbd->rb, &blkif->vbd_rb);
-    spin_unlock(&blkif->vbd_lock);
-
-    DPRINTK("Successful creation of vdev=%04x (dom=%u)\n",
-            vdevice, create->domid);
-    create->status = BLKIF_BE_STATUS_OKAY;
+    DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+            handle, blkif->domid);
+    return 0;
 }
 
-
-void vbd_destroy(blkif_be_vbd_destroy_t *destroy) 
+void vbd_free(struct vbd *vbd)
 {
-    blkif_t           *blkif;
-    struct vbd        *vbd;
-    rb_node_t         *rb;
-    blkif_vdev_t       vdevice = destroy->vdevice;
-
-    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 
-                destroy->domid, destroy->blkif_handle); 
-        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    rb = blkif->vbd_rb.rb_node;
-    while ( rb != NULL )
-    {
-        vbd = rb_entry(rb, struct vbd, rb);
-        if ( vdevice < vbd->vdevice )
-            rb = rb->rb_left;
-        else if ( vdevice > vbd->vdevice )
-            rb = rb->rb_right;
-        else
-            goto found;
-    }
-
-    destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
-    return;
-
- found:
-    spin_lock(&blkif->vbd_lock);
-    rb_erase(rb, &blkif->vbd_rb);
-    spin_unlock(&blkif->vbd_lock);
-    bdev_put(vbd->bdev);
-    kfree(vbd);
+    if (vbd->bdev)
+       bdev_put(vbd->bdev);
+    vbd->bdev = NULL;
 }
-
-
-void destroy_all_vbds(blkif_t *blkif)
-{
-    struct vbd *vbd;
-    rb_node_t  *rb;
-
-    spin_lock(&blkif->vbd_lock);
-
-    while ( (rb = blkif->vbd_rb.rb_node) != NULL )
-    {
-        vbd = rb_entry(rb, struct vbd, rb);
-        rb_erase(rb, &blkif->vbd_rb);
-        spin_unlock(&blkif->vbd_lock);
-        bdev_put(vbd->bdev);
-        kfree(vbd);
-        spin_lock(&blkif->vbd_lock);
-    }
-
-    spin_unlock(&blkif->vbd_lock);
-}
-
-
-static void vbd_probe_single(
-    blkif_t *blkif, vdisk_t *vbd_info, struct vbd *vbd)
-{
-    vbd_info->device      = vbd->vdevice; 
-    vbd_info->info        = vbd->type | (vbd->readonly ? VDISK_READONLY : 0);
-    vbd_info->capacity    = vbd_sz(vbd);
-    vbd_info->sector_size = bdev_hardsect_size(vbd->bdev);
-}
-
-
-int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
-{
-    int        rc = 0, nr_vbds = 0;
-    rb_node_t *rb;
-
-    spin_lock(&blkif->vbd_lock);
-
-    if ( (rb = blkif->vbd_rb.rb_node) == NULL )
-        goto out;
-
- new_subtree:
-    /* STEP 1. Find least node (it'll be left-most). */
-    while ( rb->rb_left != NULL )
-        rb = rb->rb_left;
-
-    for ( ; ; )
-    {
-        /* STEP 2. Dealt with left subtree. Now process current node. */
-        vbd_probe_single(blkif, &vbd_info[nr_vbds],
-                         rb_entry(rb, struct vbd, rb));
-        if ( ++nr_vbds == max_vbds )
-            goto out;
-
-        /* STEP 3. Process right subtree, if any. */
-        if ( rb->rb_right != NULL )
-        {
-            rb = rb->rb_right;
-            goto new_subtree;
-        }
-
-        /* STEP 4. Done both subtrees. Head back through ancesstors. */
-        for ( ; ; ) 
-        {
-            /* We're done when we get back to the root node. */
-            if ( rb->rb_parent == NULL )
-                goto out;
-            /* If we are left of parent, then parent is next to process. */
-            if ( rb->rb_parent->rb_left == rb )
-                break;
-            /* If we are right of parent, then we climb to grandparent. */
-            rb = rb->rb_parent;
-        }
-
-        rb = rb->rb_parent;
-    }
-
- out:
-    spin_unlock(&blkif->vbd_lock);
-    return (rc == 0) ? nr_vbds : rc;  
-}
-
 
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
 {
-    struct vbd *vbd;
-    rb_node_t  *rb;
-    int         rc = -EACCES;
+    struct vbd *vbd = &blkif->vbd;
+    int rc = -EACCES;
 
-    /* Take the vbd_lock because another thread could be updating the tree. */
-    spin_lock(&blkif->vbd_lock);
-
-    rb = blkif->vbd_rb.rb_node;
-    while ( rb != NULL )
-    {
-        vbd = rb_entry(rb, struct vbd, rb);
-        if ( req->dev < vbd->vdevice )
-            rb = rb->rb_left;
-        else if ( req->dev > vbd->vdevice )
-            rb = rb->rb_right;
-        else
-            goto found;
-    }
-
-    DPRINTK("vbd_translate; domain %u attempted to access "
-            "non-existent VBD.\n", blkif->domid);
-    rc = -ENODEV;
-    goto out;
-
- found:
-
-    if ( (operation == WRITE) && vbd->readonly )
+    if ((operation == WRITE) && vbd->readonly)
         goto out;
 
-    if ( unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)) )
+    if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
         goto out;
 
     req->dev  = vbd->pdevice;
@@ -290,6 +93,5 @@
     rc = 0;
 
  out:
-    spin_unlock(&blkif->vbd_lock);
     return rc;
 }
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Thu Aug 25 
22:53:20 2005
@@ -53,47 +53,26 @@
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <scsi/scsi.h>
-#include <asm-xen/ctrl_if.h>
 #include <asm-xen/evtchn.h>
-#ifdef CONFIG_XEN_BLKDEV_GRANT
+#include <asm-xen/xenbus.h>
 #include <asm-xen/xen-public/grant_table.h>
 #include <asm-xen/gnttab.h>
-#endif
 
 typedef unsigned char byte; /* from linux/ide.h */
 
 /* Control whether runtime update of vbds is enabled. */
 #define ENABLE_VBD_UPDATE 1
 
-#if ENABLE_VBD_UPDATE
-static void vbd_update(void);
-#else
-static void vbd_update(void){};
-#endif
-
-#define BLKIF_STATE_CLOSED       0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED    2
-
-static int blkif_handle = 0;
-static unsigned int blkif_state = BLKIF_STATE_CLOSED;
-static unsigned int blkif_evtchn = 0;
-static unsigned int blkif_irq = 0;
-
-static int blkif_control_rsp_valid;
-static blkif_response_t blkif_control_rsp;
-
-static blkif_front_ring_t blk_ring;
+#define BLKIF_STATE_DISCONNECTED 0
+#define BLKIF_STATE_CONNECTED    1
+
+static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED;
 
 #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
 
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-static domid_t rdomid = 0;
-static grant_ref_t gref_head, gref_terminal;
 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
     (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
 #define GRANTREF_INVALID (1<<15)
-#endif
 
 static struct blk_shadow {
     blkif_request_t req;
@@ -104,9 +83,9 @@
 
 static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */
 
-static void kick_pending_request_queues(void);
-
-int __init xlblk_init(void);
+static void kick_pending_request_queues(struct blkfront_info *info);
+
+static int __init xlblk_init(void);
 
 static void blkif_completion(struct blk_shadow *s);
 
@@ -131,7 +110,7 @@
 
 /* Kernel-specific definitions used in the common code */
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define DISABLE_SCATTERGATHER() 
+#define DISABLE_SCATTERGATHER()
 #else
 static int sg_operation = -1;
 #define DISABLE_SCATTERGATHER() (sg_operation = -1)
@@ -139,38 +118,22 @@
 
 static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r)
 {
-#ifndef CONFIG_XEN_BLKDEV_GRANT
-    int i;
-#endif
 
     s->req = *r;
-
-#ifndef CONFIG_XEN_BLKDEV_GRANT
-    for ( i = 0; i < r->nr_segments; i++ )
-        s->req.frame_and_sects[i] = machine_to_phys(r->frame_and_sects[i]);
-#endif
 }
 
 static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s)
 {
-#ifndef CONFIG_XEN_BLKDEV_GRANT
-    int i;
-#endif
 
     *r = s->req;
-
-#ifndef CONFIG_XEN_BLKDEV_GRANT
-    for ( i = 0; i < s->req.nr_segments; i++ )
-        r->frame_and_sects[i] = phys_to_machine(s->req.frame_and_sects[i]);
-#endif
-}
-
-
-static inline void flush_requests(void)
+}
+
+
+static inline void flush_requests(struct blkfront_info *info)
 {
     DISABLE_SCATTERGATHER();
-    RING_PUSH_REQUESTS(&blk_ring);
-    notify_via_evtchn(blkif_evtchn);
+    RING_PUSH_REQUESTS(&info->ring);
+    notify_via_evtchn(info->evtchn);
 }
 
 
@@ -180,58 +143,45 @@
 
 module_init(xlblk_init);
 
-#if ENABLE_VBD_UPDATE
-static void update_vbds_task(void *unused)
-{ 
-    xlvbd_update_vbds();
-}
-
-static void vbd_update(void)
-{
-    static DECLARE_WORK(update_tq, update_vbds_task, NULL);
-    schedule_work(&update_tq);
-}
-#endif /* ENABLE_VBD_UPDATE */
-
-static struct xlbd_disk_info *head_waiting = NULL;
-static void kick_pending_request_queues(void)
-{
-    struct xlbd_disk_info *di;
-    while ( ((di = head_waiting) != NULL) && !RING_FULL(&blk_ring) )
-    {
-        head_waiting = di->next_waiting;
-        di->next_waiting = NULL;
-        /* Re-enable calldowns. */
-        blk_start_queue(di->rq);
-        /* Kick things off immediately. */
-        do_blkif_request(di->rq);
-    }
+static void kick_pending_request_queues(struct blkfront_info *info)
+{
+       if (!RING_FULL(&info->ring)) {
+               /* Re-enable calldowns. */
+               blk_start_queue(info->rq);
+               /* Kick things off immediately. */
+               do_blkif_request(info->rq);
+       }
+}
+
+static void blkif_restart_queue(void *arg)
+{
+       struct blkfront_info *info = (struct blkfront_info *)arg;
+       spin_lock_irq(&blkif_io_lock);
+       kick_pending_request_queues(info);
+       spin_unlock_irq(&blkif_io_lock);
+}
+
+static void blkif_restart_queue_callback(void *arg)
+{
+       struct blkfront_info *info = (struct blkfront_info *)arg;
+       schedule_work(&info->work);
 }
 
 int blkif_open(struct inode *inode, struct file *filep)
 {
-    struct gendisk *gd = inode->i_bdev->bd_disk;
-    struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
-
-    /* Update of usage count is protected by per-device semaphore. */
-    di->mi->usage++;
-    
-    return 0;
+       // struct gendisk *gd = inode->i_bdev->bd_disk;
+       // struct xlbd_disk_info *di = (struct xlbd_disk_info 
*)gd->private_data;
+
+       /* Update of usage count is protected by per-device semaphore. */
+       // di->mi->usage++;
+
+       return 0;
 }
 
 
 int blkif_release(struct inode *inode, struct file *filep)
 {
-    struct gendisk *gd = inode->i_bdev->bd_disk;
-    struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
-
-    /*
-     * When usage drops to zero it may allow more VBD updates to occur.
-     * Update of usage count is protected by a per-device semaphore.
-     */
-    if ( --di->mi->usage == 0 )
-        vbd_update();
-
+    /* FIXME: This is where we can actually free up majors, etc. --RR */
     return 0;
 }
 
@@ -242,8 +192,8 @@
     int i;
 
     DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
-                  command, (long)argument, inode->i_rdev); 
-  
+                  command, (long)argument, inode->i_rdev);
+
     switch ( command )
     {
     case HDIO_GETGEO:
@@ -269,7 +219,7 @@
 /*
  * blkif_queue_request
  *
- * request block io 
+ * request block io
  * 
  * id: for guest use only.
  * operation: BLKIF_OP_{READ,WRITE,PROBE}
@@ -278,7 +228,7 @@
  */
 static int blkif_queue_request(struct request *req)
 {
-    struct xlbd_disk_info *di = req->rq_disk->private_data;
+    struct blkfront_info *info = req->rq_disk->private_data;
     unsigned long buffer_ma;
     blkif_request_t *ring_req;
     struct bio *bio;
@@ -286,23 +236,29 @@
     int idx;
     unsigned long id;
     unsigned int fsect, lsect;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
     int ref;
-#endif
-
-    if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
+    grant_ref_t gref_head;
+
+    if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
         return 1;
 
+    if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST,
+                                     &gref_head) < 0) {
+           gnttab_request_free_callback(&info->callback,
+                                        blkif_restart_queue_callback, info,
+                                        BLKIF_MAX_SEGMENTS_PER_REQUEST);
+           return 1;
+    }
+
     /* Fill out a communications ring structure. */
-    ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
+    ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
     id = GET_ID_FROM_FREELIST();
     blk_shadow[id].request = (unsigned long)req;
 
     ring_req->id = id;
-    ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
-        BLKIF_OP_READ;
+    ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ;
     ring_req->sector_number = (blkif_sector_t)req->sector;
-    ring_req->device = di->xd_device;
+    ring_req->handle = info->handle;
 
     ring_req->nr_segments = 0;
     rq_for_each_bio(bio, req)
@@ -314,38 +270,35 @@
             buffer_ma = page_to_phys(bvec->bv_page);
             fsect = bvec->bv_offset >> 9;
             lsect = fsect + (bvec->bv_len >> 9) - 1;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
             /* install a grant reference. */
-            ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+            ref = gnttab_claim_grant_reference(&gref_head);
             ASSERT( ref != -ENOSPC );
 
             gnttab_grant_foreign_access_ref(
                         ref,
-                        rdomid,
+                        info->backend_id,
                         buffer_ma >> PAGE_SHIFT,
                         rq_data_dir(req) );
 
             blk_shadow[id].frame[ring_req->nr_segments] =
                 buffer_ma >> PAGE_SHIFT;
 
-            ring_req->frame_and_sects[ring_req->nr_segments++] =
+            ring_req->frame_and_sects[ring_req->nr_segments] =
                 blkif_fas_from_gref(ref, fsect, lsect);
 
-#else
-            ring_req->frame_and_sects[ring_req->nr_segments++] =
-                blkif_fas(buffer_ma, fsect, lsect);
-#endif
+           ring_req->nr_segments++;
         }
     }
 
-    blk_ring.req_prod_pvt++;
-    
+    info->ring.req_prod_pvt++;
+
     /* Keep a private copy so we can reissue requests when recovering. */
     pickle_request(&blk_shadow[id], ring_req);
 
+    gnttab_free_grant_references(gref_head);
+
     return 0;
 }
-
 
 /*
  * do_blkif_request
@@ -353,24 +306,26 @@
  */
 void do_blkif_request(request_queue_t *rq)
 {
-    struct xlbd_disk_info *di;
+    struct blkfront_info *info = NULL;
     struct request *req;
     int queued;
 
-    DPRINTK("Entered do_blkif_request\n"); 
+    DPRINTK("Entered do_blkif_request\n");
 
     queued = 0;
 
     while ( (req = elv_next_request(rq)) != NULL )
     {
+       info = req->rq_disk->private_data;
+
         if ( !blk_fs_request(req) )
         {
             end_request(req, 0);
             continue;
         }
 
-        if ( RING_FULL(&blk_ring) )
-            goto wait;
+       if (RING_FULL(&info->ring))
+               goto wait;
 
         DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
                 req, req->cmd, req->sector, req->current_nr_sectors,
@@ -378,25 +333,19 @@
                 rq_data_dir(req) ? "write" : "read");
 
         blkdev_dequeue_request(req);
-        if ( blkif_queue_request(req) )
-        {
+        if (blkif_queue_request(req)) {
+               blk_requeue_request(rq, req);
         wait:
-            di = req->rq_disk->private_data;
-            if ( di->next_waiting == NULL )
-            {
-                di->next_waiting = head_waiting;
-                head_waiting = di;
-                /* Avoid pointless unplugs. */
-                blk_stop_queue(rq);
-            }
-            break;
+               /* Avoid pointless unplugs. */
+               blk_stop_queue(rq);
+               break;
         }
 
         queued++;
     }
 
     if ( queued != 0 )
-        flush_requests();
+        flush_requests(info);
 }
 
 
@@ -405,25 +354,24 @@
     struct request *req;
     blkif_response_t *bret;
     RING_IDX i, rp;
-    unsigned long flags; 
-    
-    spin_lock_irqsave(&blkif_io_lock, flags);     
-
-    if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) || 
-         unlikely(recovery) )
-    {
+    unsigned long flags;
+    struct blkfront_info *info = (struct blkfront_info *)dev_id;
+
+    spin_lock_irqsave(&blkif_io_lock, flags);
+
+    if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) {
         spin_unlock_irqrestore(&blkif_io_lock, flags);
         return IRQ_HANDLED;
     }
-    
-    rp = blk_ring.sring->rsp_prod;
+
+    rp = info->ring.sring->rsp_prod;
     rmb(); /* Ensure we see queued responses up to 'rp'. */
 
-    for ( i = blk_ring.rsp_cons; i != rp; i++ )
+    for ( i = info->ring.rsp_cons; i != rp; i++ )
     {
         unsigned long id;
 
-        bret = RING_GET_RESPONSE(&blk_ring, i);
+        bret = RING_GET_RESPONSE(&info->ring, i);
         id   = bret->id;
         req  = (struct request *)blk_shadow[id].request;
 
@@ -440,25 +388,21 @@
                         bret->status);
 
             if ( unlikely(end_that_request_first
-                          (req, 
+                          (req,
                            (bret->status == BLKIF_RSP_OKAY),
                            req->hard_nr_sectors)) )
                 BUG();
             end_that_request_last(req);
 
             break;
-        case BLKIF_OP_PROBE:
-            memcpy(&blkif_control_rsp, bret, sizeof(*bret));
-            blkif_control_rsp_valid = 1;
-            break;
         default:
             BUG();
         }
     }
 
-    blk_ring.rsp_cons = i;
-
-    kick_pending_request_queues();
+    info->ring.rsp_cons = i;
+
+    kick_pending_request_queues(info);
 
     spin_unlock_irqrestore(&blkif_io_lock, flags);
 
@@ -484,56 +428,34 @@
 #define blkif_io_lock io_request_lock
 
 
/*============================================================================*/
-#if ENABLE_VBD_UPDATE
-
-/*
- * blkif_update_int/update-vbds_task - handle VBD update events.
- *  Schedule a task for keventd to run, which will update the VBDs and perform 
- *  the corresponding updates to our view of VBD state.
- */
-static void update_vbds_task(void *unused)
-{ 
-    xlvbd_update_vbds();
-}
-
-static void vbd_update(void)
-{
-    static struct tq_struct update_tq;
-    update_tq.routine = update_vbds_task;
-    schedule_task(&update_tq);
-}
-
-#endif /* ENABLE_VBD_UPDATE */
-/*============================================================================*/
-
 static void kick_pending_request_queues(void)
 {
     /* We kick pending request queues if the ring is reasonably empty. */
-    if ( (nr_pending != 0) && 
-         (RING_PENDING_REQUESTS(&blk_ring) < (BLK_RING_SIZE >> 1)) )
+    if ( (nr_pending != 0) &&
+         (RING_PENDING_REQUESTS(&info->ring) < (BLK_RING_SIZE >> 1)) )
     {
         /* Attempt to drain the queue, but bail if the ring becomes full. */
-        while ( (nr_pending != 0) && !RING_FULL(&blk_ring) )
+        while ( (nr_pending != 0) && !RING_FULL(&info->ring) )
             do_blkif_request(pending_queues[--nr_pending]);
     }
 }
 
 int blkif_open(struct inode *inode, struct file *filep)
 {
-    short xldev = inode->i_rdev; 
+    short xldev = inode->i_rdev;
     struct gendisk *gd = get_gendisk(xldev);
     xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
-    short minor = MINOR(xldev); 
+    short minor = MINOR(xldev);
 
     if ( gd->part[minor].nr_sects == 0 )
-    { 
+    {
         /*
          * Device either doesn't exist, or has zero capacity; we use a few
          * cheesy heuristics to return the relevant error code
          */
         if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
              ((minor & (gd->max_p - 1)) != 0) )
-        { 
+        {
             /*
              * We have a real device, but no such partition, or we just have a
              * partition number so guess this is the problem.
@@ -542,16 +464,16 @@
         }
         else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
         {
-            /* This is a removable device => assume that media is missing. */ 
+            /* This is a removable device => assume that media is missing. */
             return -ENOMEDIUM; /* media not present (this is a guess) */
-        } 
+        }
         else
-        { 
+        {
             /* Just go for the general 'no such device' error. */
             return -ENODEV;    /* no such device */
         }
     }
-    
+
     /* Update of usage count is protected by per-device semaphore. */
     disk->usage++;
 
@@ -580,24 +502,24 @@
 {
     kdev_t dev = inode->i_rdev;
     struct hd_geometry *geo = (struct hd_geometry *)argument;
-    struct gendisk *gd;     
-    struct hd_struct *part; 
+    struct gendisk *gd;
+    struct hd_struct *part;
     int i;
     unsigned short cylinders;
     byte heads, sectors;
 
     /* NB. No need to check permissions. That is done for us. */
-    
+
     DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
-                  command, (long) argument, dev); 
-  
+                  command, (long) argument, dev);
+
     gd = get_gendisk(dev);
-    part = &gd->part[MINOR(dev)]; 
+    part = &gd->part[MINOR(dev)];
 
     switch ( command )
     {
     case BLKGETSIZE:
-        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
+        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
         return put_user(part->nr_sects, (unsigned long *) argument);
 
     case BLKGETSIZE64:
@@ -610,7 +532,7 @@
         return blkif_revalidate(dev);
 
     case BLKSSZGET:
-        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
+        return hardsect_size[MAJOR(dev)][MINOR(dev)];
 
     case BLKBSZGET:                                        /* get block size */
         DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
@@ -636,7 +558,7 @@
            values consistent with the size of the device */
 
         heads = 0xff;
-        sectors = 0x3f; 
+        sectors = 0x3f;
         cylinders = part->nr_sects / (heads * sectors);
 
         if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
@@ -646,7 +568,7 @@
 
         return 0;
 
-    case HDIO_GETGEO_BIG: 
+    case HDIO_GETGEO_BIG:
         DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
         if (!argument) return -EINVAL;
 
@@ -654,7 +576,7 @@
            values consistent with the size of the device */
 
         heads = 0xff;
-        sectors = 0x3f; 
+        sectors = 0x3f;
         cylinders = part->nr_sects / (heads * sectors);
 
         if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
@@ -678,7 +600,7 @@
         WPRINTK("ioctl %08x not supported by XL blkif\n", command);
         return -ENOSYS;
     }
-    
+
     return 0;
 }
 
@@ -698,7 +620,7 @@
     xl_disk_t *disk;
     unsigned long capacity;
     int i, rc = 0;
-    
+
     if ( (bd = bdget(dev)) == NULL )
         return -EINVAL;
 
@@ -746,7 +668,7 @@
 /*
  * blkif_queue_request
  *
- * request block io 
+ * request block io
  * 
  * id: for guest use only.
  * operation: BLKIF_OP_{READ,WRITE,PROBE}
@@ -758,7 +680,8 @@
                                char *          buffer,
                                unsigned long   sector_number,
                                unsigned short  nr_sectors,
-                               kdev_t          device)
+                               kdev_t          device,
+                              blkif_vdev_t    handle)
 {
     unsigned long       buffer_ma = virt_to_bus(buffer);
     unsigned long       xid;
@@ -766,9 +689,7 @@
     blkif_request_t    *req;
     struct buffer_head *bh;
     unsigned int        fsect, lsect;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
     int ref;
-#endif
 
     fsect = (buffer_ma & ~PAGE_MASK) >> 9;
     lsect = fsect + nr_sectors - 1;
@@ -776,12 +697,12 @@
     /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */
     if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
         BUG();
-    if ( lsect > 7 )
+    if ( lsect > ((PAGE_SIZE/512)-1) )
         BUG();
 
     buffer_ma &= PAGE_MASK;
 
-    if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
+    if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
         return 1;
 
     switch ( operation )
@@ -789,7 +710,7 @@
 
     case BLKIF_OP_READ:
     case BLKIF_OP_WRITE:
-        gd = get_gendisk(device); 
+        gd = get_gendisk(device);
 
         /*
          * Update the sector_number we'll pass down as appropriate; note that
@@ -799,10 +720,10 @@
         sector_number += gd->part[MINOR(device)].start_sect;
 
         /*
-         * If this unit doesn't consist of virtual partitions then we clear 
+         * If this unit doesn't consist of virtual partitions then we clear
          * the partn bits from the device number.
          */
-        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
+        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
                GENHD_FL_VIRT_PARTNS) )
             device &= ~(gd->max_p - 1);
 
@@ -810,21 +731,20 @@
              (sg_dev == device) &&
              (sg_next_sect == sector_number) )
         {
-            req = RING_GET_REQUEST(&blk_ring, 
-                                   blk_ring.req_prod_pvt - 1);
+            req = RING_GET_REQUEST(&info->ring,
+                                   info->ring.req_prod_pvt - 1);
             bh = (struct buffer_head *)id;
-     
+
             bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request;
             blk_shadow[req->id].request = (unsigned long)id;
 
-#ifdef CONFIG_XEN_BLKDEV_GRANT
             /* install a grant reference. */
-            ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+            ref = gnttab_claim_grant_reference(&gref_head);
             ASSERT( ref != -ENOSPC );
 
             gnttab_grant_foreign_access_ref(
                         ref,
-                        rdomid,
+                        info->backend_id,
                         buffer_ma >> PAGE_SHIFT,
                         ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
 
@@ -833,10 +753,6 @@
 
             req->frame_and_sects[req->nr_segments] =
                 blkif_fas_from_gref(ref, fsect, lsect);
-#else
-            req->frame_and_sects[req->nr_segments] =
-                blkif_fas(buffer_ma, fsect, lsect);
-#endif
             if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
                 sg_next_sect += nr_sectors;
             else
@@ -847,7 +763,7 @@
 
             return 0;
         }
-        else if ( RING_FULL(&blk_ring) )
+        else if ( RING_FULL(&info->ring) )
         {
             return 1;
         }
@@ -864,7 +780,7 @@
     }
 
     /* Fill out a communications ring structure. */
-    req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
+    req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
 
     xid = GET_ID_FROM_FREELIST();
     blk_shadow[xid].request = (unsigned long)id;
@@ -872,31 +788,27 @@
     req->id            = xid;
     req->operation     = operation;
     req->sector_number = (blkif_sector_t)sector_number;
-    req->device        = device; 
+    req->handle        = handle;
     req->nr_segments   = 1;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
     /* install a grant reference. */
-    ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+    ref = gnttab_claim_grant_reference(&gref_head);
     ASSERT( ref != -ENOSPC );
 
     gnttab_grant_foreign_access_ref(
                 ref,
-                rdomid,
+                info->backend_id,
                 buffer_ma >> PAGE_SHIFT,
                 ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
 
     blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT;
 
     req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect);
-#else
-    req->frame_and_sects[0] = blkif_fas(buffer_ma, fsect, lsect);
-#endif
-
-    /* Keep a private copy so we can reissue requests when recovering. */    
+
+    /* Keep a private copy so we can reissue requests when recovering. */
     pickle_request(&blk_shadow[xid], req);
 
-    blk_ring.req_prod_pvt++;
-    
+    info->ring.req_prod_pvt++;
+
     return 0;
 }
 
@@ -911,13 +823,13 @@
     struct buffer_head *bh, *next_bh;
     int rw, nsect, full, queued = 0;
 
-    DPRINTK("Entered do_blkif_request\n"); 
+    DPRINTK("Entered do_blkif_request\n");
 
     while ( !rq->plugged && !list_empty(&rq->queue_head))
     {
-        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
+        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
             goto out;
-  
+
         DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
                 req, req->cmd, req->sector,
                 req->current_nr_sectors, req->nr_sectors, req->bh);
@@ -938,16 +850,16 @@
 
             full = blkif_queue_request(
                 (unsigned long)bh,
-                (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, 
+                (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
                 bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
 
             if ( full )
-            { 
+            {
                 bh->b_reqnext = next_bh;
                 pending_queues[nr_pending++] = rq;
                 if ( unlikely(nr_pending >= MAX_PENDING) )
                     BUG();
-                goto out; 
+                goto out;
             }
 
             queued++;
@@ -955,7 +867,7 @@
             /* Dequeue the buffer head from the request. */
             nsect = bh->b_size >> 9;
             bh = req->bh = next_bh;
-            
+
             if ( bh != NULL )
             {
                 /* There's another buffer head to do. Update the request. */
@@ -985,27 +897,27 @@
 
 static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
 {
-    RING_IDX i, rp; 
-    unsigned long flags; 
+    RING_IDX i, rp;
+    unsigned long flags;
     struct buffer_head *bh, *next_bh;
-    
-    spin_lock_irqsave(&io_request_lock, flags);     
-
-    if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) )
+
+    spin_lock_irqsave(&io_request_lock, flags);
+
+    if ( unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery) )
     {
         spin_unlock_irqrestore(&io_request_lock, flags);
         return;
     }
 
-    rp = blk_ring.sring->rsp_prod;
+    rp = info->ring.sring->rsp_prod;
     rmb(); /* Ensure we see queued responses up to 'rp'. */
 
-    for ( i = blk_ring.rsp_cons; i != rp; i++ )
+    for ( i = info->ring.rsp_cons; i != rp; i++ )
     {
         unsigned long id;
         blkif_response_t *bret;
-        
-        bret = RING_GET_RESPONSE(&blk_ring, i);
+
+        bret = RING_GET_RESPONSE(&info->ring, i);
         id = bret->id;
         bh = (struct buffer_head *)blk_shadow[id].request;
 
@@ -1037,8 +949,8 @@
         }
 
     }
-    blk_ring.rsp_cons = i;
-    
+    info->ring.rsp_cons = i;
+
     kick_pending_request_queues();
 
     spin_unlock_irqrestore(&io_request_lock, flags);
@@ -1048,157 +960,29 @@
 
 /*****************************  COMMON CODE  *******************************/
 
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp,
-                              unsigned long address)
-{
-    int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
-    ASSERT( ref != -ENOSPC );
-
-    gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 );
-
-    req->frame_and_sects[0] = blkif_fas_from_gref(ref, 0, (PAGE_SIZE/512)-1);
-
-    blkif_control_send(req, rsp);
-}
-#endif
-
-void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
-{
-    unsigned long flags, id;
-    blkif_request_t *req_d;
-
- retry:
-    while ( RING_FULL(&blk_ring) )
-    {
-        set_current_state(TASK_INTERRUPTIBLE);
-        schedule_timeout(1);
-    }
-
-    spin_lock_irqsave(&blkif_io_lock, flags);
-    if ( RING_FULL(&blk_ring) )
-    {
-        spin_unlock_irqrestore(&blkif_io_lock, flags);
-        goto retry;
-    }
-
-    DISABLE_SCATTERGATHER();
-    req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
-    *req_d = *req;    
-
-    id = GET_ID_FROM_FREELIST();
-    req_d->id = id;
-    blk_shadow[id].request = (unsigned long)req;
-
-    pickle_request(&blk_shadow[id], req);
-
-    blk_ring.req_prod_pvt++;
-    flush_requests();
-
-    spin_unlock_irqrestore(&blkif_io_lock, flags);
-
-    while ( !blkif_control_rsp_valid )
-    {
-        set_current_state(TASK_INTERRUPTIBLE);
-        schedule_timeout(1);
-    }
-
-    memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
-    blkif_control_rsp_valid = 0;
-}
-
-
-/* Send a driver status notification to the domain controller. */
-static void send_driver_status(int ok)
-{
-    ctrl_msg_t cmsg = {
-        .type    = CMSG_BLKIF_FE,
-        .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
-        .length  = sizeof(blkif_fe_driver_status_t),
-    };
-    blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
-    
-    msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
-
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-/* Tell the controller to bring up the interface. */
-static void blkif_send_interface_connect(void)
-{
-    ctrl_msg_t cmsg = {
-        .type    = CMSG_BLKIF_FE,
-        .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
-        .length  = sizeof(blkif_fe_interface_connect_t),
-    };
-    blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
-    
-    msg->handle      = 0;
-    msg->shmem_frame = (virt_to_machine(blk_ring.sring) >> PAGE_SHIFT);
-    
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    msg->shmem_ref   = gnttab_claim_grant_reference( &gref_head, gref_terminal 
);
-    ASSERT( msg->shmem_ref != -ENOSPC );
-    gnttab_grant_foreign_access_ref ( msg->shmem_ref , rdomid, 
msg->shmem_frame, 0 );
-#endif
-
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-static void blkif_free(void)
+static void blkif_free(struct blkfront_info *info)
 {
     /* Prevent new requests being issued until we fix things up. */
     spin_lock_irq(&blkif_io_lock);
-    recovery = 1;
-    blkif_state = BLKIF_STATE_DISCONNECTED;
+    info->connected = BLKIF_STATE_DISCONNECTED;
     spin_unlock_irq(&blkif_io_lock);
 
     /* Free resources associated with old device channel. */
-    if ( blk_ring.sring != NULL )
-    {
-        free_page((unsigned long)blk_ring.sring);
-        blk_ring.sring = NULL;
-    }
-    free_irq(blkif_irq, NULL);
-    blkif_irq = 0;
-    
-    unbind_evtchn_from_irq(blkif_evtchn);
-    blkif_evtchn = 0;
-}
-
-static void blkif_close(void)
-{
-}
-
-/* Move from CLOSED to DISCONNECTED state. */
-static void blkif_disconnect(void)
-{
-    blkif_sring_t *sring;
-    
-    if ( blk_ring.sring != NULL )
-        free_page((unsigned long)blk_ring.sring);
-    
-    sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
-    SHARED_RING_INIT(sring);
-    FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE);
-    blkif_state  = BLKIF_STATE_DISCONNECTED;
-    blkif_send_interface_connect();
-}
-
-static void blkif_reset(void)
-{
-    blkif_free();
-    blkif_disconnect();
-}
-
-static void blkif_recover(void)
+    if ( info->ring.sring != NULL )
+    {
+        free_page((unsigned long)info->ring.sring);
+        info->ring.sring = NULL;
+    }
+    unbind_evtchn_from_irqhandler(info->evtchn, NULL);
+    info->evtchn = 0;
+}
+
+static void blkif_recover(struct blkfront_info *info)
 {
     int i;
     blkif_request_t *req;
     struct blk_shadow *copy;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
     int j;
-#endif
 
     /* Stage 1: Make a safe copy of the shadow state. */
     copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL);
@@ -1209,7 +993,7 @@
     memset(&blk_shadow, 0, sizeof(blk_shadow));
     for ( i = 0; i < BLK_RING_SIZE; i++ )
         blk_shadow[i].req.id = i+1;
-    blk_shadow_free = blk_ring.req_prod_pvt;
+    blk_shadow_free = info->ring.req_prod_pvt;
     blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
 
     /* Stage 3: Find pending requests and requeue them. */
@@ -1221,195 +1005,339 @@
 
         /* Grab a request slot and unpickle shadow state into it. */
         req = RING_GET_REQUEST(
-            &blk_ring, blk_ring.req_prod_pvt);
+            &info->ring, info->ring.req_prod_pvt);
         unpickle_request(req, ©[i]);
 
         /* We get a new request id, and must reset the shadow state. */
         req->id = GET_ID_FROM_FREELIST();
         memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i]));
 
-#ifdef CONFIG_XEN_BLKDEV_GRANT
         /* Rewrite any grant references invalidated by suspend/resume. */
         for ( j = 0; j < req->nr_segments; j++ )
         {
             if ( req->frame_and_sects[j] & GRANTREF_INVALID )
                 gnttab_grant_foreign_access_ref(
                     blkif_gref_from_fas(req->frame_and_sects[j]),
-                    rdomid,
+                    info->backend_id,
                     blk_shadow[req->id].frame[j],
                     rq_data_dir((struct request *)
                                 blk_shadow[req->id].request));
             req->frame_and_sects[j] &= ~GRANTREF_INVALID;
         }
         blk_shadow[req->id].req = *req;
-#endif
-
-        blk_ring.req_prod_pvt++;
+
+        info->ring.req_prod_pvt++;
     }
 
     kfree(copy);
 
     recovery = 0;
 
-    /* blk_ring->req_prod will be set when we flush_requests().*/
+    /* info->ring->req_prod will be set when we flush_requests().*/
     wmb();
 
     /* Kicks things back into life. */
-    flush_requests();
+    flush_requests(info);
 
     /* Now safe to left other people use the interface. */
-    blkif_state = BLKIF_STATE_CONNECTED;
-}
-
-static void blkif_connect(blkif_fe_interface_status_t *status)
+    info->connected = BLKIF_STATE_CONNECTED;
+}
+
+static void blkif_connect(struct blkfront_info *info, u16 evtchn)
 {
     int err = 0;
 
-    blkif_evtchn = status->evtchn;
-    blkif_irq    = bind_evtchn_to_irq(blkif_evtchn);
-
-    err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
-    if ( err )
-    {
-        WPRINTK("request_irq failed (err=%d)\n", err);
+    info->evtchn = evtchn;
+
+    err = bind_evtchn_to_irqhandler(
+        info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
+    if ( err != 0 )
+    {
+        WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
         return;
     }
-
-    if ( recovery ) 
-    {
-        blkif_recover();
-    } 
-    else 
-    {
-        /* Transition to connected in case we need to do 
-         *  a partition probe on a whole disk. */
-        blkif_state = BLKIF_STATE_CONNECTED;
-        
-        /* Probe for discs attached to the interface. */
-        xlvbd_init();
-    }
-    
-    /* Kick pending requests. */
-    spin_lock_irq(&blkif_io_lock);
-    kick_pending_request_queues();
-    spin_unlock_irq(&blkif_io_lock);
-}
-
-static void unexpected(blkif_fe_interface_status_t *status)
-{
-    DPRINTK(" Unexpected blkif status %u in state %u\n", 
-            status->status, blkif_state);
-}
-
-static void blkif_status(blkif_fe_interface_status_t *status)
-{
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    rdomid       = status->domid; /* need to set rdomid early */
-#endif
-
-    if ( status->handle != blkif_handle )
-    {
-        WPRINTK(" Invalid blkif: handle=%u\n", status->handle);
-        unexpected(status);
-        return;
-    }
-
-    switch ( status->status ) 
-    {
-    case BLKIF_INTERFACE_STATUS_CLOSED:
-        switch ( blkif_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            unexpected(status);
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-        case BLKIF_STATE_CONNECTED:
-            unexpected(status);
-            blkif_close();
-            break;
-        }
-        break;
-
-    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
-        switch ( blkif_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            blkif_disconnect();
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-        case BLKIF_STATE_CONNECTED:
-            /* unexpected(status); */ /* occurs during suspend/resume */
-            blkif_reset();
-            break;
-        }
-        break;
-
-    case BLKIF_INTERFACE_STATUS_CONNECTED:
-        switch ( blkif_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            unexpected(status);
-            blkif_disconnect();
-            blkif_connect(status);
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-            blkif_connect(status);
-            break;
-        case BLKIF_STATE_CONNECTED:
-            unexpected(status);
-            blkif_connect(status);
-            break;
-        }
-        break;
-
-    case BLKIF_INTERFACE_STATUS_CHANGED:
-        switch ( blkif_state )
-        {
-        case BLKIF_STATE_CLOSED:
-        case BLKIF_STATE_DISCONNECTED:
-            unexpected(status);
-            break;
-        case BLKIF_STATE_CONNECTED:
-            vbd_update();
-            break;
-        }
-        break;
-
-    default:
-        WPRINTK(" Invalid blkif status: %d\n", status->status);
-        break;
-    }
-}
-
-
-static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    switch ( msg->subtype )
-    {
-    case CMSG_BLKIF_FE_INTERFACE_STATUS:
-        blkif_status((blkif_fe_interface_status_t *)
-                     &msg->msg[0]);
-        break;
-    default:
-        msg->length = 0;
-        break;
-    }
-
-    ctrl_if_send_response(msg);
-}
-
-int wait_for_blkif(void)
+}
+
+
+static struct xenbus_device_id blkfront_ids[] = {
+       { "vbd" },
+       { "" }
+};
+
+static void watch_for_status(struct xenbus_watch *watch, const char *node)
+{
+       struct blkfront_info *info;
+       unsigned int binfo;
+       unsigned long sectors, sector_size;
+       int err;
+
+       info = container_of(watch, struct blkfront_info, watch);
+       node += strlen(watch->node);
+
+       /* FIXME: clean up when error on the other end. */
+       if (info->connected == BLKIF_STATE_CONNECTED)
+               return;
+
+       err = xenbus_gather(watch->node,
+                           "sectors", "%lu", §ors,
+                           "info", "%u", &binfo,
+                           "sector-size", "%lu", §or_size,
+                           NULL);
+       if (err) {
+               xenbus_dev_error(info->xbdev, err,
+                                "reading backend fields at %s", watch->node);
+               return;
+       }
+
+       xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+       info->connected = BLKIF_STATE_CONNECTED;
+
+       blkif_state = BLKIF_STATE_CONNECTED;
+
+       xenbus_dev_ok(info->xbdev);
+
+       /* Kick pending requests. */
+       spin_lock_irq(&blkif_io_lock);
+       kick_pending_request_queues(info);
+       spin_unlock_irq(&blkif_io_lock);
+}
+
+static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
+{
+       blkif_sring_t *sring;
+       evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
+       int err;
+
+       sring = (void *)__get_free_page(GFP_KERNEL);
+       if (!sring) {
+               xenbus_dev_error(dev, -ENOMEM, "allocating shared ring");
+               return -ENOMEM;
+       }
+       SHARED_RING_INIT(sring);
+       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+       err = gnttab_grant_foreign_access(info->backend_id,
+                                         virt_to_mfn(info->ring.sring), 0);
+       if (err == -ENOSPC) {
+               free_page((unsigned long)info->ring.sring);
+               info->ring.sring = 0;
+               xenbus_dev_error(dev, err, "granting access to ring page");
+               return err;
+       }
+       info->ring_ref = err;
+
+       op.u.alloc_unbound.dom = info->backend_id;
+       err = HYPERVISOR_event_channel_op(&op);
+       if (err) {
+               gnttab_end_foreign_access(info->ring_ref, 0);
+               free_page((unsigned long)info->ring.sring);
+               info->ring.sring = 0;
+               xenbus_dev_error(dev, err, "allocating event channel");
+               return err;
+       }
+       blkif_connect(info, op.u.alloc_unbound.port);
+       return 0;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+                          struct blkfront_info *info)
+{
+       char *backend;
+       const char *message;
+       int err;
+
+       backend = NULL;
+       err = xenbus_gather(dev->nodename,
+                           "backend-id", "%i", &info->backend_id,
+                           "backend", NULL, &backend,
+                           NULL);
+       if (XENBUS_EXIST_ERR(err))
+               goto out;
+       if (backend && strlen(backend) == 0) {
+               err = -ENOENT;
+               goto out;
+       }
+       if (err < 0) {
+               xenbus_dev_error(dev, err, "reading %s/backend or backend-id",
+                                dev->nodename);
+               goto out;
+       }
+
+       /* Create shared ring, alloc event channel. */
+       err = setup_blkring(dev, info);
+       if (err) {
+               xenbus_dev_error(dev, err, "setting up block ring");
+               goto out;
+       }
+
+       err = xenbus_transaction_start(dev->nodename);
+       if (err) {
+               xenbus_dev_error(dev, err, "starting transaction");
+               goto destroy_blkring;
+       }
+
+       err = xenbus_printf(dev->nodename, "ring-ref","%u", info->ring_ref);
+       if (err) {
+               message = "writing ring-ref";
+               goto abort_transaction;
+       }
+       err = xenbus_printf(dev->nodename,
+                           "event-channel", "%u", info->evtchn);
+       if (err) {
+               message = "writing event-channel";
+               goto abort_transaction;
+       }
+
+       info->backend = backend;
+       backend = NULL;
+
+       info->watch.node = info->backend;
+       info->watch.callback = watch_for_status;
+       err = register_xenbus_watch(&info->watch);
+       if (err) {
+               message = "registering watch on backend";
+               goto abort_transaction;
+       }
+
+       err = xenbus_transaction_end(0);
+       if (err) {
+               xenbus_dev_error(dev, err, "completing transaction");
+               goto destroy_blkring;
+       }
+
+ out:
+       if (backend)
+               kfree(backend);
+       return err;
+
+ abort_transaction:
+       xenbus_transaction_end(1);
+       /* Have to do this *outside* transaction.  */
+       xenbus_dev_error(dev, err, "%s", message);
+ destroy_blkring:
+       blkif_free(info);
+       goto out;
+}
+
+/* Setup supplies the backend dir, virtual device.
+
+   We place an event channel and shared frame entries.
+   We watch backend to wait if it's ok. */
+static int blkfront_probe(struct xenbus_device *dev,
+                         const struct xenbus_device_id *id)
+{
+       int err;
+       struct blkfront_info *info;
+       int vdevice;
+
+       /* FIXME: Use dynamic device id if this is not set. */
+       err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice);
+       if (XENBUS_EXIST_ERR(err))
+               return err;
+       if (err < 0) {
+               xenbus_dev_error(dev, err, "reading virtual-device");
+               return err;
+       }
+
+       info = kmalloc(sizeof(*info), GFP_KERNEL);
+       if (!info) {
+               xenbus_dev_error(dev, err, "allocating info structure");
+               return err;
+       }
+       info->xbdev = dev;
+       info->vdevice = vdevice;
+       info->connected = BLKIF_STATE_DISCONNECTED;
+       info->mi = NULL;
+       INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
+
+       /* Front end dir is a number, which is used as the id. */
+       info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
+       dev->data = info;
+
+       err = talk_to_backend(dev, info);
+       if (err) {
+               kfree(info);
+               dev->data = NULL;
+               return err;
+       }
+
+       /* Call once in case entries already there. */
+       watch_for_status(&info->watch, info->watch.node);
+       return 0;
+}
+
+static int blkfront_remove(struct xenbus_device *dev)
+{
+       struct blkfront_info *info = dev->data;
+
+       if (info->backend)
+               unregister_xenbus_watch(&info->watch);
+
+       if (info->mi)
+               xlvbd_del(info);
+
+       blkif_free(info);
+
+       kfree(info->backend);
+       kfree(info);
+
+       return 0;
+}
+
+static int blkfront_suspend(struct xenbus_device *dev)
+{
+       struct blkfront_info *info = dev->data;
+
+       unregister_xenbus_watch(&info->watch);
+       kfree(info->backend);
+       info->backend = NULL;
+
+       recovery = 1;
+       blkif_free(info);
+
+       return 0;
+}
+
+static int blkfront_resume(struct xenbus_device *dev)
+{
+       struct blkfront_info *info = dev->data;
+       int err;
+
+       /* FIXME: Check geometry hasn't changed here... */
+       err = talk_to_backend(dev, info);
+       if (!err) {
+               blkif_recover(info);
+       }
+       return err;
+}
+
+static struct xenbus_driver blkfront = {
+       .name = "vbd",
+       .owner = THIS_MODULE,
+       .ids = blkfront_ids,
+       .probe = blkfront_probe,
+       .remove = blkfront_remove,
+       .resume = blkfront_resume,
+       .suspend = blkfront_suspend,
+};
+
+static void __init init_blk_xenbus(void)
+{
+       xenbus_register_device(&blkfront);
+}
+
+static int wait_for_blkif(void)
 {
     int err = 0;
     int i;
-    send_driver_status(1);
 
     /*
-     * We should read 'nr_interfaces' from response message and wait
-     * for notifications before proceeding. For now we assume that we
-     * will be notified of exactly one interface.
+     * We should figure out how many and which devices we need to
+     * proceed and only wait for those.  For now, continue once the
+     * first device is around.
      */
-    for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
+    for ( i=0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++ )
     {
         set_current_state(TASK_INTERRUPTIBLE);
         schedule_timeout(1);
@@ -1423,17 +1351,9 @@
     return err;
 }
 
-int __init xlblk_init(void)
+static int __init xlblk_init(void)
 {
     int i;
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    /* A grant for every ring slot, plus one for the ring itself. */
-    if ( 0 > gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1,
-                                           &gref_head, &gref_terminal) )
-        return 1;
-    printk(KERN_ALERT "Blkif frontend is using grant tables.\n");
-#endif
 
     if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
          (xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
@@ -1447,46 +1367,17 @@
         blk_shadow[i].req.id = i+1;
     blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
 
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
+    init_blk_xenbus();
 
     wait_for_blkif();
 
     return 0;
 }
 
-void blkdev_suspend(void)
-{
-}
-
-void blkdev_resume(void)
-{
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    int i, j;
-    for ( i = 0; i < BLK_RING_SIZE; i++ )
-        for ( j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++ )
-            blk_shadow[i].req.frame_and_sects[j] |= GRANTREF_INVALID;
-#endif
-    send_driver_status(1);
-}
-
 static void blkif_completion(struct blk_shadow *s)
 {
     int i;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
     for ( i = 0; i < s->req.nr_segments; i++ )
-        gnttab_release_grant_reference(
-            &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i]));
-#else
-    /* This is a hack to get the dirty logging bits set */
-    if ( s->req.operation == BLKIF_OP_READ )
-    {
-        for ( i = 0; i < s->req.nr_segments; i++ )
-        {
-            unsigned long pfn = s->req.frame_and_sects[i] >> PAGE_SHIFT;
-            unsigned long mfn = phys_to_machine_mapping[pfn];
-            xen_machphys_update(mfn, pfn);
-        }
-    }
-#endif
-}
+        gnttab_free_grant_reference(
+               blkif_gref_from_fas(s->req.frame_and_sects[i]));
+}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Thu Aug 25 22:53:20 2005
@@ -33,6 +33,7 @@
 #define __XEN_DRIVERS_BLOCK_H__
 
 #include <linux/config.h>
+#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -44,6 +45,8 @@
 #include <linux/blkdev.h>
 #include <linux/major.h>
 #include <linux/devfs_fs_kernel.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/xenbus.h>
 #include <asm-xen/xen-public/xen.h>
 #include <asm-xen/xen-public/io/blkif.h>
 #include <asm-xen/xen-public/io/ring.h>
@@ -77,11 +80,20 @@
 #define DPRINTK_IOCTL(_f, _a...) ((void)0)
 #endif
 
-struct xlbd_type_info {
-    int partn_shift;
-    int disks_per_major;
-    char *devname;
-    char *diskname;
+struct xlbd_type_info
+{
+       int partn_shift;
+       int disks_per_major;
+       char *devname;
+       char *diskname;
+};
+
+struct xlbd_major_info
+{
+       int major;
+       int index;
+       int usage;
+       struct xlbd_type_info *type;
 };
 
 /*
@@ -89,25 +101,27 @@
  * hang in private_data off the gendisk structure. We may end up
  * putting all kinds of interesting stuff here :-)
  */
-struct xlbd_major_info {
-    int major;
-    int index;
-    int usage;
-    struct xlbd_type_info *type;
+struct blkfront_info
+{
+       struct xenbus_device *xbdev;
+       /* We watch the backend */
+       struct xenbus_watch watch;
+       dev_t dev;
+       int vdevice;
+       blkif_vdev_t handle;
+       int connected;
+       char *backend;
+       int backend_id;
+       int ring_ref;
+       blkif_front_ring_t ring;
+       unsigned int evtchn;
+       struct xlbd_major_info *mi;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+       request_queue_t *rq;
+#endif
+       struct work_struct work;
+       struct gnttab_free_callback callback;
 };
-
-struct xlbd_disk_info {
-    int xd_device;
-    struct xlbd_major_info *mi;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    struct xlbd_disk_info  *next_waiting;
-    request_queue_t        *rq;
-#endif
-};
-
-typedef struct xen_block {
-    int usage;
-} xen_block_t;
 
 extern spinlock_t blkif_io_lock;
 
@@ -117,17 +131,10 @@
                        unsigned command, unsigned long argument);
 extern int blkif_check(dev_t dev);
 extern int blkif_revalidate(dev_t dev);
-extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-extern void blkif_control_probe_send(
-    blkif_request_t *req, blkif_response_t *rsp, unsigned long address);
-#endif
 extern void do_blkif_request (request_queue_t *rq); 
 
-extern void xlvbd_update_vbds(void);
-
 /* Virtual block-device subsystem. */
-extern int  xlvbd_init(void);
-extern void xlvbd_cleanup(void); 
-
+int xlvbd_add(blkif_sector_t capacity, int device,
+             u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
+void xlvbd_del(struct blkfront_info *info);
 #endif /* __XEN_DRIVERS_BLOCK_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Thu Aug 25 22:53:20 2005
@@ -43,458 +43,269 @@
 #define NUM_SCSI_MAJORS 9
 #define NUM_VBD_MAJORS 1
 
-struct lvdisk
-{
-    blkif_sector_t capacity; /*  0: Size in terms of 512-byte sectors.   */
-    blkif_vdev_t   device;   /*  8: Device number (opaque 16 bit value). */
-    u16            info; 
-    struct list_head list;
+static struct xlbd_type_info xlbd_ide_type = {
+       .partn_shift = 6,
+       .disks_per_major = 2,
+       .devname = "ide",
+       .diskname = "hd",
 };
 
-static struct xlbd_type_info xlbd_ide_type = {
-    .partn_shift = 6,
-    .disks_per_major = 2,
-    .devname = "ide",
-    .diskname = "hd",
+static struct xlbd_type_info xlbd_scsi_type = {
+       .partn_shift = 4,
+       .disks_per_major = 16,
+       .devname = "sd",
+       .diskname = "sd",
 };
 
-static struct xlbd_type_info xlbd_scsi_type = {
-    .partn_shift = 4,
-    .disks_per_major = 16,
-    .devname = "sd",
-    .diskname = "sd",
+static struct xlbd_type_info xlbd_vbd_type = {
+       .partn_shift = 4,
+       .disks_per_major = 16,
+       .devname = "xvd",
+       .diskname = "xvd",
 };
 
-static struct xlbd_type_info xlbd_vbd_type = {
-    .partn_shift = 4,
-    .disks_per_major = 16,
-    .devname = "xvd",
-    .diskname = "xvd",
-};
-
 static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
-                                         NUM_VBD_MAJORS];
-
-#define XLBD_MAJOR_IDE_START    0
-#define XLBD_MAJOR_SCSI_START   (NUM_IDE_MAJORS)
-#define XLBD_MAJOR_VBD_START    (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
-
-#define XLBD_MAJOR_IDE_RANGE    XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START 
- 1
-#define XLBD_MAJOR_SCSI_RANGE   XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START 
- 1
-#define XLBD_MAJOR_VBD_RANGE    XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START 
+ NUM_VBD_MAJORS - 1
+                                         NUM_VBD_MAJORS];
+
+#define XLBD_MAJOR_IDE_START   0
+#define XLBD_MAJOR_SCSI_START  (NUM_IDE_MAJORS)
+#define XLBD_MAJOR_VBD_START   (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
+
+#define XLBD_MAJOR_IDE_RANGE   XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START 
- 1
+#define XLBD_MAJOR_SCSI_RANGE  XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START 
- 1
+#define XLBD_MAJOR_VBD_RANGE   XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + 
NUM_VBD_MAJORS - 1
 
 /* Information about our VBDs. */
 #define MAX_VBDS 64
-struct list_head vbds_list;
-
-#define MAJOR_XEN(dev) ((dev)>>8)
-#define MINOR_XEN(dev) ((dev) & 0xff)
-
-static struct block_device_operations xlvbd_block_fops = 
-{
-    .owner  = THIS_MODULE,
-    .open  = blkif_open,
-    .release = blkif_release,
-    .ioctl  = blkif_ioctl,
+static LIST_HEAD(vbds_list);
+
+static struct block_device_operations xlvbd_block_fops =
+{
+       .owner = THIS_MODULE,
+       .open = blkif_open,
+       .release = blkif_release,
+       .ioctl  = blkif_ioctl,
 };
 
 spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
 
-static struct lvdisk *xlvbd_device_alloc(void)
-{
-    struct lvdisk *disk;
-
-    disk = kmalloc(sizeof(*disk), GFP_KERNEL);
-    if (disk != NULL) {
-        memset(disk, 0, sizeof(*disk));
-        INIT_LIST_HEAD(&disk->list);
-    }
-    return disk;
-}
-
-static void xlvbd_device_free(struct lvdisk *disk)
-{
-    list_del(&disk->list);
-    kfree(disk);
-}
-
-static vdisk_t *xlvbd_probe(int *ret)
-{
-    blkif_response_t rsp;
-    blkif_request_t req;
-    vdisk_t *disk_info = NULL;
-    unsigned long buf;
-    int nr;
-
-    buf = __get_free_page(GFP_KERNEL);
-    if ((void *)buf == NULL)
-        goto out;
-
-    memset(&req, 0, sizeof(req));
-    req.operation = BLKIF_OP_PROBE;
-    req.nr_segments = 1;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    blkif_control_probe_send(&req, &rsp,
-                             (unsigned long)(virt_to_machine(buf)));
-#else
-    req.frame_and_sects[0] = blkif_fas(virt_to_machine(buf), 0, 
((PAGE_SIZE/512)-1);
-
-    blkif_control_send(&req, &rsp);
-#endif
-    if ( rsp.status <= 0 ) {
-        WPRINTK("Could not probe disks (%d)\n", rsp.status);
-        goto out;
-    }
-    nr = rsp.status;
-    if ( nr > MAX_VBDS )
-        nr = MAX_VBDS;
-
-    disk_info = kmalloc(nr * sizeof(vdisk_t), GFP_KERNEL);
-    if (disk_info != NULL)
-        memcpy(disk_info, (void *) buf, nr * sizeof(vdisk_t));
-
-    if (ret != NULL)
-        *ret = nr;
-
-out:
-    free_page(buf);
-    return disk_info;
-}
-
-static struct xlbd_major_info *xlbd_alloc_major_info(
-    int major, int minor, int index)
-{
-    struct xlbd_major_info *ptr;
-
-    ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
-    if (ptr == NULL)
-        return NULL;
-
-    memset(ptr, 0, sizeof(struct xlbd_major_info));
-
-    ptr->major = major;
-
-    switch (index) {
-    case XLBD_MAJOR_IDE_RANGE:
-        ptr->type = &xlbd_ide_type;
-        ptr->index = index - XLBD_MAJOR_IDE_START;
-        break;
-    case XLBD_MAJOR_SCSI_RANGE:
-        ptr->type = &xlbd_scsi_type;
-        ptr->index = index - XLBD_MAJOR_SCSI_START;
-        break;
-    case XLBD_MAJOR_VBD_RANGE:
-        ptr->type = &xlbd_vbd_type;
-        ptr->index = index - XLBD_MAJOR_VBD_START;
-        break;
-    }
-    
-    if (register_blkdev(ptr->major, ptr->type->devname)) {
-        WPRINTK("can't get major %d with name %s\n",
-                ptr->major, ptr->type->devname);
-        kfree(ptr);
-        return NULL;
-    }
-
-    devfs_mk_dir(ptr->type->devname);
-    major_info[index] = ptr;
-    return ptr;
-}
-
-static struct xlbd_major_info *xlbd_get_major_info(int device)
-{
-    int major, minor, index;
-
-    major = MAJOR_XEN(device);
-    minor = MINOR_XEN(device);
-
-    switch (major) {
-    case IDE0_MAJOR: index = 0; break;
-    case IDE1_MAJOR: index = 1; break;
-    case IDE2_MAJOR: index = 2; break;
-    case IDE3_MAJOR: index = 3; break;
-    case IDE4_MAJOR: index = 4; break;
-    case IDE5_MAJOR: index = 5; break;
-    case IDE6_MAJOR: index = 6; break;
-    case IDE7_MAJOR: index = 7; break;
-    case IDE8_MAJOR: index = 8; break;
-    case IDE9_MAJOR: index = 9; break;
-    case SCSI_DISK0_MAJOR: index = 10; break;
-    case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
-        index = 11 + major - SCSI_DISK1_MAJOR;
-        break;
-    case SCSI_CDROM_MAJOR: index = 18; break;
-    default: index = 19; break;
-    }
-
-    return ((major_info[index] != NULL) ? major_info[index] :
-            xlbd_alloc_major_info(major, minor, index));
-}
-
-static int xlvbd_init_blk_queue(struct gendisk *gd, vdisk_t *disk)
-{
-    request_queue_t *rq;
-
-    rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
-    if (rq == NULL)
-        return -1;
-
-    elevator_init(rq, "noop");
-
-    /* Hard sector size and max sectors impersonate the equiv. hardware. */
-    blk_queue_hardsect_size(rq, disk->sector_size);
-    blk_queue_max_sectors(rq, 512);
-
-    /* Each segment in a request is up to an aligned page in size. */
-    blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
-    blk_queue_max_segment_size(rq, PAGE_SIZE);
-
-    /* Ensure a merged request will fit in a single I/O ring slot. */
-    blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-    blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-    /* Make sure buffer addresses are sector-aligned. */
-    blk_queue_dma_alignment(rq, 511);
-
-    gd->queue = rq;
-
-    return 0;
-}
-
-struct gendisk *xlvbd_alloc_gendisk(
-    struct xlbd_major_info *mi, int minor, vdisk_t *disk)
-{
-    struct gendisk *gd;
-    struct xlbd_disk_info *di;
-    int nr_minors = 1;
-
-    di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
-    if (di == NULL)
-        return NULL;
-    memset(di, 0, sizeof(*di));
-    di->mi = mi;
-    di->xd_device = disk->device;
-
-    if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
-        nr_minors = 1 << mi->type->partn_shift;
-
-    gd = alloc_disk(nr_minors);
-    if (gd == NULL)
-        goto out;
-
-    if (nr_minors > 1)
-        sprintf(gd->disk_name, "%s%c", mi->type->diskname,
-                'a' + mi->index * mi->type->disks_per_major +
-                    (minor >> mi->type->partn_shift));
-    else
-        sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
-                'a' + mi->index * mi->type->disks_per_major +
-                (minor >> mi->type->partn_shift),
-                minor & ((1 << mi->type->partn_shift) - 1));
-
-    gd->major = mi->major;
-    gd->first_minor = minor;
-    gd->fops = &xlvbd_block_fops;
-    gd->private_data = di;
-    set_capacity(gd, disk->capacity);
-
-    if (xlvbd_init_blk_queue(gd, disk)) {
-        del_gendisk(gd);
-        goto out;
-    }
-
-    di->rq = gd->queue;
-
-    if (disk->info & VDISK_READONLY)
-        set_disk_ro(gd, 1);
-
-    if (disk->info & VDISK_REMOVABLE)
-        gd->flags |= GENHD_FL_REMOVABLE;
-
-    if (disk->info & VDISK_CDROM)
-        gd->flags |= GENHD_FL_CD;
-
-    add_disk(gd);
-
-    return gd;
-
-out:
-    kfree(di);
-    return NULL;
-}
-
-static int xlvbd_device_add(struct list_head *list, vdisk_t *disk)
-{
-    struct lvdisk *new;
-    int minor;
-    dev_t device;
-    struct block_device *bd;
-    struct gendisk *gd;
-    struct xlbd_major_info *mi;
-
-    mi = xlbd_get_major_info(disk->device);
-    if (mi == NULL)
-        return -EPERM;
-
-    new = xlvbd_device_alloc();
-    if (new == NULL)
-        return -1;
-    new->capacity = disk->capacity;
-    new->device = disk->device;
-    new->info = disk->info;
-    
-    minor = MINOR_XEN(disk->device);
-    device = MKDEV(mi->major, minor);
-    
-    bd = bdget(device);
-    if (bd == NULL)
-        goto out;
-    
-    gd = xlvbd_alloc_gendisk(mi, minor, disk);
-    if (gd == NULL)
-        goto out_bd;
-
-    list_add(&new->list, list);
-out_bd:
-    bdput(bd);
-out:
-    return 0;
-}
-
-static int xlvbd_device_del(struct lvdisk *disk)
-{
-    dev_t device;
-    struct block_device *bd;
-    struct gendisk *gd;
-    struct xlbd_disk_info *di;
-    int ret = 0, unused;
-    request_queue_t *rq;
-
-    device = MKDEV(MAJOR_XEN(disk->device), MINOR_XEN(disk->device));
-
-    bd = bdget(device);
-    if (bd == NULL)
-        return -1;
-
-    gd = get_gendisk(device, &unused);
-    di = gd->private_data;
-
-    if (di->mi->usage != 0) {
-        WPRINTK("disk removal failed: used [dev=%x]\n", device);
-        ret = -1;
-        goto out;
-    }
-
-    rq = gd->queue;
-    del_gendisk(gd);
-    put_disk(gd);
-    blk_cleanup_queue(rq);
-
-    xlvbd_device_free(disk);
-out:
-    bdput(bd);
-    return ret;
-}
-
-static int xlvbd_device_update(struct lvdisk *ldisk, vdisk_t *disk)
-{
-    dev_t device;
-    struct block_device *bd;
-    struct gendisk *gd;
-    int unused;
-
-    if ((ldisk->capacity == disk->capacity) && (ldisk->info == disk->info))
-        return 0;    
-
-    device = MKDEV(MAJOR_XEN(ldisk->device), MINOR_XEN(ldisk->device));
-
-    bd = bdget(device);
-    if (bd == NULL)
-        return -1;
-
-    gd = get_gendisk(device, &unused);
-    set_capacity(gd, disk->capacity);    
-    ldisk->capacity = disk->capacity;
-
-    bdput(bd);
-
-    return 0;
-}
-
-void xlvbd_refresh(void)
-{
-    vdisk_t *newdisks;
-    struct list_head *tmp, *tmp2;
-    struct lvdisk *disk;
-    int i, nr;
-
-    newdisks = xlvbd_probe(&nr);
-    if (newdisks == NULL) {
-        WPRINTK("failed to probe\n");
-        return;
-    }
-    
-    i = 0;
-    list_for_each_safe(tmp, tmp2, &vbds_list) {
-        disk = list_entry(tmp, struct lvdisk, list);
-        
-        for (i = 0; i < nr; i++) {
-            if ( !newdisks[i].device )
-                continue;
-            if ( disk->device == newdisks[i].device ) {
-                xlvbd_device_update(disk, &newdisks[i]);
-                newdisks[i].device = 0;
-                break;
-            }
-        }
-        if (i == nr) {
-            xlvbd_device_del(disk);
-            newdisks[i].device = 0;
-        }
-    }
-    for (i = 0; i < nr; i++)
-        if ( newdisks[i].device )
-            xlvbd_device_add(&vbds_list, &newdisks[i]);
-    kfree(newdisks);
-}
-
-/*
- * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
- * state. The VBDs need to be updated in this way when the domain is
- * initialised and also each time we receive an XLBLK_UPDATE event.
- */
-void xlvbd_update_vbds(void)
-{
-    xlvbd_refresh();
-}
-
-/*
- * Set up all the linux device goop for the virtual block devices
- * (vbd's) that we know about. Note that although from the backend
- * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device
- * number, the domain creation tools conventionally allocate these
- * numbers to correspond to those used by 'real' linux -- this is just
- * for convenience as it means e.g. that the same /etc/fstab can be
- * used when booting with or without Xen.
- */
-int xlvbd_init(void)
-{
-    int i, nr;
-    vdisk_t *disks;
-
-    INIT_LIST_HEAD(&vbds_list);
-
-    memset(major_info, 0, sizeof(major_info));
-    
-    disks = xlvbd_probe(&nr);
-    if (disks == NULL) {
-        WPRINTK("failed to probe\n");
-        return -1;
-    }
-
-    for (i = 0; i < nr; i++)
-        xlvbd_device_add(&vbds_list, &disks[i]);
-
-    kfree(disks);
-    return 0;
-}
+static struct xlbd_major_info *
+xlbd_alloc_major_info(int major, int minor, int index)
+{
+       struct xlbd_major_info *ptr;
+
+       ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
+       if (ptr == NULL)
+               return NULL;
+
+       memset(ptr, 0, sizeof(struct xlbd_major_info));
+
+       ptr->major = major;
+
+       switch (index) {
+       case XLBD_MAJOR_IDE_RANGE:
+               ptr->type = &xlbd_ide_type;
+               ptr->index = index - XLBD_MAJOR_IDE_START;
+               break;
+       case XLBD_MAJOR_SCSI_RANGE:
+               ptr->type = &xlbd_scsi_type;
+               ptr->index = index - XLBD_MAJOR_SCSI_START;
+               break;
+       case XLBD_MAJOR_VBD_RANGE:
+               ptr->type = &xlbd_vbd_type;
+               ptr->index = index - XLBD_MAJOR_VBD_START;
+               break;
+       }
+
+       printk("Registering block device major %i\n", ptr->major);
+       if (register_blkdev(ptr->major, ptr->type->devname)) {
+               WPRINTK("can't get major %d with name %s\n",
+                       ptr->major, ptr->type->devname);
+               kfree(ptr);
+               return NULL;
+       }
+
+       devfs_mk_dir(ptr->type->devname);
+       major_info[index] = ptr;
+       return ptr;
+}
+
+static struct xlbd_major_info *
+xlbd_get_major_info(int vdevice)
+{
+       struct xlbd_major_info *mi;
+       int major, minor, index;
+
+       major = BLKIF_MAJOR(vdevice);
+       minor = BLKIF_MINOR(vdevice);
+
+       switch (major) {
+       case IDE0_MAJOR: index = 0; break;
+       case IDE1_MAJOR: index = 1; break;
+       case IDE2_MAJOR: index = 2; break;
+       case IDE3_MAJOR: index = 3; break;
+       case IDE4_MAJOR: index = 4; break;
+       case IDE5_MAJOR: index = 5; break;
+       case IDE6_MAJOR: index = 6; break;
+       case IDE7_MAJOR: index = 7; break;
+       case IDE8_MAJOR: index = 8; break;
+       case IDE9_MAJOR: index = 9; break;
+       case SCSI_DISK0_MAJOR: index = 10; break;
+       case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
+               index = 11 + major - SCSI_DISK1_MAJOR;
+               break;
+       case SCSI_CDROM_MAJOR: index = 18; break;
+       default: index = 19; break;
+       }
+
+       mi = ((major_info[index] != NULL) ? major_info[index] :
+             xlbd_alloc_major_info(major, minor, index));
+       mi->usage++;
+       return mi;
+}
+
+static void
+xlbd_put_major_info(struct xlbd_major_info *mi)
+{
+       mi->usage--;
+       /* XXX: release major if 0 */
+}
+
+static int
+xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+{
+       request_queue_t *rq;
+
+       rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
+       if (rq == NULL)
+               return -1;
+
+       elevator_init(rq, "noop");
+
+       /* Hard sector size and max sectors impersonate the equiv. hardware. */
+       blk_queue_hardsect_size(rq, sector_size);
+       blk_queue_max_sectors(rq, 512);
+
+       /* Each segment in a request is up to an aligned page in size. */
+       blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+       blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+       /* Ensure a merged request will fit in a single I/O ring slot. */
+       blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+       blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+       /* Make sure buffer addresses are sector-aligned. */
+       blk_queue_dma_alignment(rq, 511);
+
+       gd->queue = rq;
+
+       return 0;
+}
+
+static int
+xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
+                   u16 vdisk_info, u16 sector_size,
+                   struct blkfront_info *info)
+{
+       struct gendisk *gd;
+       struct xlbd_major_info *mi;
+       int nr_minors = 1;
+       int err = -ENODEV;
+
+       mi = xlbd_get_major_info(vdevice);
+       if (mi == NULL)
+               goto out;
+       info->mi = mi;
+
+       if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
+               nr_minors = 1 << mi->type->partn_shift;
+
+       gd = alloc_disk(nr_minors);
+       if (gd == NULL)
+               goto out;
+
+       if (nr_minors > 1)
+               sprintf(gd->disk_name, "%s%c", mi->type->diskname,
+                       'a' + mi->index * mi->type->disks_per_major +
+                       (minor >> mi->type->partn_shift));
+       else
+               sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
+                       'a' + mi->index * mi->type->disks_per_major +
+                       (minor >> mi->type->partn_shift),
+                       minor & ((1 << mi->type->partn_shift) - 1));
+
+       gd->major = mi->major;
+       gd->first_minor = minor;
+       gd->fops = &xlvbd_block_fops;
+       gd->private_data = info;
+       set_capacity(gd, capacity);
+
+       if (xlvbd_init_blk_queue(gd, sector_size)) {
+               del_gendisk(gd);
+               goto out;
+       }
+
+       info->rq = gd->queue;
+
+       if (vdisk_info & VDISK_READONLY)
+               set_disk_ro(gd, 1);
+
+       if (vdisk_info & VDISK_REMOVABLE)
+               gd->flags |= GENHD_FL_REMOVABLE;
+
+       if (vdisk_info & VDISK_CDROM)
+               gd->flags |= GENHD_FL_CD;
+
+       add_disk(gd);
+
+       return 0;
+
+ out:
+       if (mi)
+               xlbd_put_major_info(mi);
+       return err;
+}
+
+int
+xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
+         u16 sector_size, struct blkfront_info *info)
+{
+       struct block_device *bd;
+       int err = 0;
+
+       info->dev = MKDEV(BLKIF_MAJOR(vdevice), BLKIF_MINOR(vdevice));
+
+       bd = bdget(info->dev);
+       if (bd == NULL)
+               return -ENODEV;
+
+       err = xlvbd_alloc_gendisk(BLKIF_MINOR(vdevice), capacity, vdevice,
+                                 vdisk_info, sector_size, info);
+
+       bdput(bd);
+       return err;
+}
+
+void
+xlvbd_del(struct blkfront_info *info)
+{
+       struct block_device *bd;
+       struct gendisk *gd;
+       int unused;
+       request_queue_t *rq;
+
+       bd = bdget(info->dev);
+       if (bd == NULL)
+               return;
+
+       gd = get_gendisk(info->dev, &unused);
+       rq = gd->queue;
+
+       del_gendisk(gd);
+       put_disk(gd);
+       xlbd_put_major_info(info->mi);
+       info->mi = NULL;
+       blk_cleanup_queue(rq);
+
+       bdput(bd);
+}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Thu Aug 25 22:53:20 2005
@@ -23,6 +23,9 @@
     blkif_be_driver_status_t be_st;
 
     printk(KERN_INFO "Initialising Xen block tap device\n");
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    printk(KERN_INFO "Block tap is using grant tables.\n");
+#endif
 
     DPRINTK("   tap - Backend connection init:\n");
 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h  Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h  Thu Aug 25 22:53:20 2005
@@ -71,7 +71,6 @@
     /* Physical parameters of the comms window. */
     unsigned long       shmem_frame;
     unsigned int        evtchn;
-    int                 irq;
     /* Comms information. */
     blkif_back_ring_t   blk_ring;
     
@@ -86,6 +85,11 @@
     spinlock_t          blk_ring_lock;
     atomic_t            refcnt;
     struct work_struct work;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    u16 shmem_handle;
+    unsigned long shmem_vaddr;
+    grant_ref_t shmem_ref;
+#endif
 } blkif_t;
 
 blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
@@ -104,8 +108,6 @@
     blkif_t       *blkif;
     unsigned long  id;
     int            nr_pages;
-    unsigned long  mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    unsigned long  virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
     int            next_free;
 } active_req_t;
 
@@ -173,32 +175,7 @@
 
 
 /* -------[ Mappings to User VMA ]------------------------------------ */
-#define MAX_PENDING_REQS 64
 #define BATCH_PER_DOMAIN 16
-extern struct vm_area_struct *blktap_vma;
-
-/* The following are from blkback.c and should probably be put in a
- * header and included from there.
- * The mmap area described here is where attached data pages eill be mapped.
- */
- 
-extern unsigned long mmap_vstart;
-#define MMAP_PAGES_PER_REQUEST \
-    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
-#define MMAP_PAGES             \
-    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg)                        \
-    (mmap_vstart +                                   \
-     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
-     ((_seg) * PAGE_SIZE))
-
-/* immediately before the mmap area, we have a bunch of pages reserved
- * for shared memory rings.
- */
-
-#define RING_PAGES 3 /* Ctrl, Front, and Back */ 
-extern unsigned long rings_vstart;
-
 
 /* -------[ Here be globals ]----------------------------------------- */
 extern unsigned long blktap_mode;
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c       Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c       Thu Aug 
25 22:53:20 2005
@@ -9,6 +9,7 @@
  */
  
 #include "blktap.h"
+#include <asm-xen/evtchn.h>
 
 static char *blkif_state_name[] = {
     [BLKIF_STATE_CLOSED]       = "closed",
@@ -16,16 +17,15 @@
     [BLKIF_STATE_CONNECTED]    = "connected",
 };
 
-static char * blkif_status_name[] = {
+static char *blkif_status_name[] = {
     [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
     [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
     [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
     [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
 };
 
-static unsigned blktap_be_irq;
-unsigned int    blktap_be_state = BLKIF_STATE_CLOSED;
-unsigned int    blktap_be_evtchn;
+unsigned int blktap_be_state = BLKIF_STATE_CLOSED;
+unsigned int blktap_be_evtchn;
 
 /*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
 
@@ -49,13 +49,21 @@
     blkif_t              *blkif = (blkif_t *)arg;
     ctrl_msg_t            cmsg;
     blkif_be_disconnect_t disc;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    struct gnttab_unmap_grant_ref op;
+#endif
 
     /*
      * These can't be done in blkif_disconnect() because at that point there
      * may be outstanding requests at the disc whose asynchronous responses
      * must still be notified to the remote driver.
      */
-    unbind_evtchn_from_irq(blkif->evtchn);
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    op.host_addr = blkif->shmem_vaddr;
+    op.handle         = blkif->shmem_handle;
+    op.dev_bus_addr   = 0;
+    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+#endif
     vfree(blkif->blk_ring.sring);
 
     /* Construct the deferred response message. */
@@ -179,8 +187,12 @@
     unsigned int   evtchn = connect->evtchn;
     unsigned long  shmem_frame = connect->shmem_frame;
     struct vm_struct *vma;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    int ref = connect->shmem_ref;
+#else
     pgprot_t       prot;
     int            error;
+#endif
     blkif_t       *blkif;
     blkif_sring_t *sring;
 
@@ -201,24 +213,46 @@
         return;
     }
 
-    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+#ifndef CONFIG_XEN_BLKDEV_GRANT
+    prot = __pgprot(_KERNPG_TABLE);
     error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
                                     shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
                                     prot, domid);
     if ( error != 0 )
     {
-        WPRINTK("BE_CONNECT: error! (%d)\n", error);
         if ( error == -ENOMEM ) 
             connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        else if ( error == -EFAULT ) {
+        else if ( error == -EFAULT )
             connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
-            WPRINTK("BE_CONNECT: MAPPING error!\n");
-        }
         else
             connect->status = BLKIF_BE_STATUS_ERROR;
         vfree(vma->addr);
         return;
     }
+#else
+    { /* Map: Use the Grant table reference */
+        struct gnttab_map_grant_ref op;
+        op.host_addr = VMALLOC_VMADDR(vma->addr);
+        op.flags            = GNTMAP_host_map;
+        op.ref              = ref;
+        op.dom              = domid;
+       
+        BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+       
+        handle = op.handle;
+       
+        if (op.handle < 0) {
+            DPRINTK(" Grant table operation failure !\n");
+            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+            vfree(vma->addr);
+            return;
+        }
+
+        blkif->shmem_ref = ref;
+        blkif->shmem_handle = handle;
+        blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr);
+    }
+#endif
 
     if ( blkif->status != DISCONNECTED )
     {
@@ -232,12 +266,12 @@
     BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
     
     blkif->evtchn        = evtchn;
-    blkif->irq           = bind_evtchn_to_irq(evtchn);
     blkif->shmem_frame   = shmem_frame;
     blkif->status        = CONNECTED;
     blkif_get(blkif);
 
-    request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
+    bind_evtchn_to_irqhandler(
+        evtchn, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
 
     connect->status = BLKIF_BE_STATUS_OKAY;
 }
@@ -264,7 +298,7 @@
         blkif->status = DISCONNECTING;
         blkif->disconnect_rspid = rsp_id;
         wmb(); /* Let other CPUs see the status change. */
-        free_irq(blkif->irq, blkif);
+        unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
         blkif_deschedule(blkif);
         blkif_put(blkif);
         return 0; /* Caller should not send response message. */
@@ -286,7 +320,7 @@
     };
     blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
     msg->handle      = 0;
-    msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT;
+    msg->shmem_frame = virt_to_mfn(blktap_be_ring.sring);
     
     ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
 }
@@ -313,12 +347,11 @@
     int err = 0;
     
     blktap_be_evtchn = status->evtchn;
-    blktap_be_irq    = bind_evtchn_to_irq(blktap_be_evtchn);
-
-    err = request_irq(blktap_be_irq, blkif_ptbe_int, 
-                      SA_SAMPLE_RANDOM, "blkif", NULL);
+
+    err = bind_evtchn_to_irqhandler(
+        blktap_be_evtchn, blkif_ptbe_int, SA_SAMPLE_RANDOM, "blkif", NULL);
     if ( err ) {
-       WPRINTK("blkfront request_irq failed (%d)\n", err);
+       WPRINTK("blkfront bind_evtchn_to_irqhandler failed (%d)\n", err);
         return;
     } else {
        /* transtion to connected in case we need to do a 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c Thu Aug 25 
22:53:20 2005
@@ -280,8 +280,6 @@
     int more_to_do = 0;
     int notify_be = 0, notify_user = 0;
     
-    if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
-    
     /* lock both rings */
     spin_lock_irqsave(&blkif_io_lock, flags);
 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c  Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c  Thu Aug 25 
22:53:20 2005
@@ -5,7 +5,6 @@
  * Control interface between the driver and a character device.
  * 
  * Copyright (c) 2004, Andrew Warfield
- *
  */
 
 #include <linux/config.h>
@@ -19,7 +18,11 @@
 #include <linux/gfp.h>
 #include <linux/poll.h>
 #include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
 #include <asm-xen/xen-public/io/blkif.h> /* for control ring. */
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+#include <asm-xen/xen-public/grant_table.h>
+#endif
 
 #include "blktap.h"
 
@@ -32,11 +35,6 @@
 
 /* for poll: */
 static wait_queue_head_t blktap_wait;
-
-/* Where things are inside the device mapping. */
-struct vm_area_struct *blktap_vma = NULL;
-unsigned long mmap_vstart;
-unsigned long rings_vstart;
 
 /* Rings up to user space. */
 static blkif_front_ring_t blktap_ufe_ring;
@@ -47,6 +45,61 @@
 static int blktap_read_fe_ring(void);
 static int blktap_read_be_ring(void);
 
+
+/* -------[ mmap region ]--------------------------------------------- */
+/*
+ * We use a big chunk of address space to map in-flight requests into,
+ * and export this region up to user-space.  See the comments in blkback
+ * about this -- the two must be kept in sync if the tap is used as a 
+ * passthrough.
+ */
+
+#define MAX_PENDING_REQS 64
+
+/* immediately before the mmap area, we have a bunch of pages reserved
+ * for shared memory rings.
+ */
+#define RING_PAGES 3 /* Ctrl, Front, and Back */ 
+
+/* Where things are inside the device mapping. */
+struct vm_area_struct *blktap_vma = NULL;
+unsigned long mmap_vstart;  /* Kernel pages for mapping in data. */
+unsigned long rings_vstart; /* start of mmaped vma               */
+unsigned long user_vstart;  /* start of user mappings            */
+
+#define MMAP_PAGES_PER_REQUEST \
+    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
+#define MMAP_PAGES             \
+    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+#define MMAP_VADDR(_start, _req,_seg)                \
+    ( _start +                                       \
+     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+     ((_seg) * PAGE_SIZE))
+
+/* -------[ grant handles ]------------------------------------------- */
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+/* When using grant tables to map a frame for device access then the
+ * handle returned must be used to unmap the frame. This is needed to
+ * drop the ref count on the frame.
+ */
+struct grant_handle_pair
+{
+    u16  kernel;
+    u16  user;
+};
+static struct grant_handle_pair pending_grant_handles[MMAP_PAGES];
+#define pending_handle(_idx, _i) \
+    (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
+#define BLKTAP_INVALID_HANDLE(_g) \
+    (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))
+#define BLKTAP_INVALIDATE_HANDLE(_g) do {       \
+    (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
+    } while(0)
+    
+#endif
+
+
 /* -------[ blktap vm ops ]------------------------------------------- */
 
 static struct page *blktap_nopage(struct vm_area_struct *vma,
@@ -76,8 +129,6 @@
     
     if ( test_and_set_bit(0, &blktap_dev_inuse) )
         return -EBUSY;
-
-    printk(KERN_ALERT "blktap open.\n");
     
     /* Allocate the ctrl ring. */
     csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL);
@@ -128,7 +179,7 @@
     blktap_dev_inuse = 0;
     blktap_ring_ok = 0;
 
-    printk(KERN_ALERT "blktap closed.\n");
+    DPRINTK(KERN_ALERT "blktap closed.\n");
 
     /* Free the ring page. */
     ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring));
@@ -140,7 +191,7 @@
     ClearPageReserved(virt_to_page(blktap_ube_ring.sring));
     free_page((unsigned long) blktap_ube_ring.sring);
 
-    /* Clear any active mappings. */
+    /* Clear any active mappings and free foreign map table */
     if (blktap_vma != NULL) {
         zap_page_range(blktap_vma, blktap_vma->vm_start, 
                        blktap_vma->vm_end - blktap_vma->vm_start, NULL);
@@ -151,21 +202,36 @@
 }
 
 /* Note on mmap:
- * remap_pfn_range sets VM_IO on vma->vm_flags.  In trying to make libaio
- * work to do direct page access from userspace, this ended up being a
- * problem.  The bigger issue seems to be that there is no way to map
- * a foreign page in to user space and have the virtual address of that 
- * page map sanely down to a mfn.
- * Removing the VM_IO flag results in a loop in get_user_pages, as 
- * pfn_valid() always fails on a foreign page.
+ * We need to map pages to user space in a way that will allow the block
+ * subsystem set up direct IO to them.  This couldn't be done before, because
+ * there isn't really a sane way to make a user virtual address down to a 
+ * physical address when the page belongs to another domain.
+ *
+ * My first approach was to map the page in to kernel memory, add an entry
+ * for it in the physical frame list (using alloc_lomem_region as in blkback)
+ * and then attempt to map that page up to user space.  This is disallowed
+ * by xen though, which realizes that we don't really own the machine frame
+ * underlying the physical page.
+ *
+ * The new approach is to provide explicit support for this in xen linux.
+ * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
+ * mapped from other vms.  vma->vm_private_data is set up as a mapping 
+ * from pages to actual page structs.  There is a new clause in get_user_pages
+ * that does the right thing for this sort of mapping.
+ * 
+ * blktap_mmap sets up this mapping.  Most of the real work is done in
+ * blktap_write_fe_ring below.
  */
 static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
 {
     int size;
-
-    printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+    struct page **map;
+    int i;
+
+    DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n",
            vma->vm_start, vma->vm_end);
 
+    vma->vm_flags |= VM_RESERVED;
     vma->vm_ops = &blktap_vm_ops;
 
     size = vma->vm_end - vma->vm_start;
@@ -177,10 +243,10 @@
     }
 
     size >>= PAGE_SHIFT;
-    printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
+    DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
     
     rings_vstart = vma->vm_start;
-    mmap_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+    user_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
     
     /* Map the ring pages to the start of the region and reserve it. */
 
@@ -190,29 +256,44 @@
     DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring));
     if (remap_pfn_range(vma, vma->vm_start, 
                          __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, 
-                         PAGE_SIZE, vma->vm_page_prot)) {
-        WPRINTK("ctrl_ring: remap_pfn_range failure!\n");
-    }
+                         PAGE_SIZE, vma->vm_page_prot)) 
+        goto fail;
 
 
     DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring));
     if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, 
                          __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, 
-                         PAGE_SIZE, vma->vm_page_prot)) {
-        WPRINTK("be_ring: remap_pfn_range failure!\n");
-    }
+                         PAGE_SIZE, vma->vm_page_prot)) 
+        goto fail;
 
     DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring));
     if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), 
                          __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 
-                         PAGE_SIZE, vma->vm_page_prot)) {
-        WPRINTK("fe_ring: remap_pfn_range failure!\n");
-    }
-            
+                         PAGE_SIZE, vma->vm_page_prot)) 
+        goto fail;
+
+    /* Mark this VM as containing foreign pages, and set up mappings. */
+    map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
+                  * sizeof(struct page_struct*),
+                  GFP_KERNEL);
+    if (map == NULL) goto fail;
+
+    for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
+        map[i] = NULL;
+    
+    vma->vm_private_data = map;
+    vma->vm_flags |= VM_FOREIGN;
+
     blktap_vma = vma;
     blktap_ring_ok = 1;
 
     return 0;
+ fail:
+    /* Clear any active mappings. */
+    zap_page_range(vma, vma->vm_start, 
+                   vma->vm_end - vma->vm_start, NULL);
+
+    return -ENOMEM;
 }
 
 static int blktap_ioctl(struct inode *inode, struct file *filp,
@@ -263,6 +344,8 @@
              RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring)   ||
              RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) {
 
+            flush_tlb_all();
+
             RING_PUSH_REQUESTS(&blktap_uctrl_ring);
             RING_PUSH_REQUESTS(&blktap_ufe_ring);
             RING_PUSH_RESPONSES(&blktap_ube_ring);
@@ -289,11 +372,71 @@
     
 /*-----[ Data to/from user space ]----------------------------------------*/
 
+static void fast_flush_area(int idx, int nr_pages)
+{
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+    unsigned int i, op = 0;
+    struct grant_handle_pair *handle;
+    unsigned long ptep;
+
+    for (i=0; i<nr_pages; i++)
+    {
+        handle = &pending_handle(idx, i);
+        if (!BLKTAP_INVALID_HANDLE(handle))
+        {
+
+            unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i);
+            unmap[op].dev_bus_addr = 0;
+            unmap[op].handle = handle->kernel;
+            op++;
+
+            if (create_lookup_pte_addr(blktap_vma->vm_mm,
+                                       MMAP_VADDR(user_vstart, idx, i), 
+                                       &ptep) !=0) {
+                DPRINTK("Couldn't get a pte addr!\n");
+                return;
+            }
+            unmap[op].host_addr    = ptep;
+            unmap[op].dev_bus_addr = 0;
+            unmap[op].handle       = handle->user;
+            op++;
+            
+            BLKTAP_INVALIDATE_HANDLE(handle);
+        }
+    }
+    if ( unlikely(HYPERVISOR_grant_table_op(
+        GNTTABOP_unmap_grant_ref, unmap, op)))
+        BUG();
+#else
+    multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    int               i;
+
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        MULTI_update_va_mapping(mcl+i, MMAP_VADDR(mmap_vstart, idx, i),
+                                __pte(0), 0);
+    }
+
+    mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
+        BUG();
+#endif
+}
+
 
 int blktap_write_fe_ring(blkif_request_t *req)
 {
     blkif_request_t *target;
-    int error, i;
+    int i, ret = 0;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+    int op;
+#else
+    unsigned long remap_prot;
+    multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST+1];
+    mmu_update_t mmu[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+#endif
 
     /*
      * This is called to pass a request from the real frontend domain's
@@ -310,29 +453,184 @@
         return 0;
     }
 
-    target = RING_GET_REQUEST(&blktap_ufe_ring,
-            blktap_ufe_ring.req_prod_pvt);
+    flush_cache_all(); /* a noop on intel... */
+
+    target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt);
     memcpy(target, req, sizeof(*req));
 
-    /* Attempt to map the foreign pages directly in to the application */
+    /* Map the foreign pages directly in to the application */
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    op = 0;
     for (i=0; i<target->nr_segments; i++) {
 
-        error = direct_remap_area_pages(blktap_vma->vm_mm, 
-                                        MMAP_VADDR(ID_TO_IDX(req->id), i), 
-                                        target->frame_and_sects[i] & PAGE_MASK,
-                                        PAGE_SIZE,
-                                        blktap_vma->vm_page_prot,
-                                        ID_TO_DOM(req->id));
-        if ( error != 0 ) {
-            printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
-            /* the request is now dropped on the floor. */
-            return 0;
-        }
-    }
-    
+        unsigned long uvaddr;
+        unsigned long kvaddr;
+        unsigned long ptep;
+
+        uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i);
+        kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i);
+
+        /* Map the remote page to kernel. */
+        map[op].host_addr = kvaddr;
+        map[op].dom   = ID_TO_DOM(req->id);
+        map[op].ref   = blkif_gref_from_fas(target->frame_and_sects[i]);
+        map[op].flags = GNTMAP_host_map;
+        /* This needs a bit more thought in terms of interposition: 
+         * If we want to be able to modify pages during write using 
+         * grant table mappings, the guest will either need to allow 
+         * it, or we'll need to incur a copy. */
+        if (req->operation == BLKIF_OP_WRITE)
+            map[op].flags |= GNTMAP_readonly;
+        op++;
+
+        /* Now map it to user. */
+        ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
+        if (ret)
+        {
+            DPRINTK("Couldn't get a pte addr!\n");
+            goto fail;
+        }
+
+        map[op].host_addr = ptep;
+        map[op].dom       = ID_TO_DOM(req->id);
+        map[op].ref       = blkif_gref_from_fas(target->frame_and_sects[i]);
+        map[op].flags     = GNTMAP_host_map | GNTMAP_application_map
+                            | GNTMAP_contains_pte;
+        /* Above interposition comment applies here as well. */
+        if (req->operation == BLKIF_OP_WRITE)
+            map[op].flags |= GNTMAP_readonly;
+        op++;
+    }
+
+    if ( unlikely(HYPERVISOR_grant_table_op(
+            GNTTABOP_map_grant_ref, map, op)))
+        BUG();
+
+    op = 0;
+    for (i=0; i<(target->nr_segments*2); i+=2) {
+        unsigned long uvaddr;
+        unsigned long kvaddr;
+        unsigned long offset;
+        int cancel = 0;
+
+        uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i/2);
+        kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i/2);
+
+        if ( unlikely(map[i].handle < 0) ) {
+            DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle);
+            ret = map[i].handle;
+            cancel = 1;
+        }
+
+        if ( unlikely(map[i+1].handle < 0) ) {
+            DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle);
+            ret = map[i+1].handle;
+            cancel = 1;
+        }
+
+        if (cancel) 
+            goto fail;
+
+        /* Set the necessary mappings in p2m and in the VM_FOREIGN 
+         * vm_area_struct to allow user vaddr -> struct page lookups
+         * to work.  This is needed for direct IO to foreign pages. */
+        phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] =
+            FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
+
+        offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
+        ((struct page **)blktap_vma->vm_private_data)[offset] =
+            pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+
+        /* Save handles for unmapping later. */
+        pending_handle(ID_TO_IDX(req->id), i/2).kernel = map[i].handle;
+        pending_handle(ID_TO_IDX(req->id), i/2).user   = map[i+1].handle;
+    }
+    
+#else
+
+    remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
+
+    for (i=0; i<target->nr_segments; i++) {
+        unsigned long buf;
+        unsigned long uvaddr;
+        unsigned long kvaddr;
+        unsigned long offset;
+        unsigned long ptep;
+
+        buf   = target->frame_and_sects[i] & PAGE_MASK;
+        uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i);
+        kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i);
+
+        MULTI_update_va_mapping_otherdomain(
+            mcl+i, 
+            kvaddr, 
+            pfn_pte_ma(buf >> PAGE_SHIFT, __pgprot(remap_prot)),
+            0,
+            ID_TO_DOM(req->id));
+
+        phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] =
+            FOREIGN_FRAME(buf >> PAGE_SHIFT);
+
+        ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
+        if (ret)
+        { 
+            DPRINTK("error getting pte\n");
+            goto fail;
+        }
+
+        mmu[i].ptr = ptep;
+        mmu[i].val = (target->frame_and_sects[i] & PAGE_MASK)
+            | pgprot_val(blktap_vma->vm_page_prot);
+
+        offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
+        ((struct page **)blktap_vma->vm_private_data)[offset] =
+            pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+    }
+    
+    /* Add the mmu_update call. */
+    mcl[i].op = __HYPERVISOR_mmu_update;
+    mcl[i].args[0] = (unsigned long)mmu;
+    mcl[i].args[1] = target->nr_segments;
+    mcl[i].args[2] = 0;
+    mcl[i].args[3] = ID_TO_DOM(req->id);
+
+    BUG_ON(HYPERVISOR_multicall(mcl, target->nr_segments+1) != 0);
+
+    /* Make sure it all worked. */
+    for ( i = 0; i < target->nr_segments; i++ )
+    {
+        if ( unlikely(mcl[i].result != 0) )
+        {
+            DPRINTK("invalid buffer -- could not remap it\n");
+            ret = mcl[i].result;
+            goto fail;
+        }
+    }
+    if ( unlikely(mcl[i].result != 0) )
+    {
+        DPRINTK("direct remapping of pages to /dev/blktap failed.\n");
+        ret = mcl[i].result;
+        goto fail;
+    }
+#endif /* CONFIG_XEN_BLKDEV_GRANT */
+
+    /* Mark mapped pages as reserved: */
+    for ( i = 0; i < target->nr_segments; i++ )
+    {
+        unsigned long kvaddr;
+
+        kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i);
+        SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT));
+    }
+
+
     blktap_ufe_ring.req_prod_pvt++;
     
     return 0;
+
+ fail:
+    fast_flush_area(ID_TO_IDX(req->id), target->nr_segments);
+    return ret;
 }
 
 int blktap_write_be_ring(blkif_response_t *rsp)
@@ -366,7 +664,7 @@
 {
     /* This is called to read responses from the UFE ring. */
 
-    RING_IDX i, rp;
+    RING_IDX i, j, rp;
     blkif_response_t *resp_s;
     blkif_t *blkif;
     active_req_t *ar;
@@ -387,7 +685,21 @@
             DPRINTK("resp->fe_ring\n");
             ar = lookup_active_req(ID_TO_IDX(resp_s->id));
             blkif = ar->blkif;
-            zap_page_range(blktap_vma, MMAP_VADDR(ID_TO_IDX(resp_s->id), 0), 
+            for (j = 0; j < ar->nr_pages; j++) {
+                unsigned long vaddr;
+                struct page **map = blktap_vma->vm_private_data;
+                int offset; 
+
+                vaddr  = MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), j);
+                offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
+
+                ClearPageReserved(virt_to_page(vaddr));
+                map[offset] = NULL;
+            }
+
+            fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
+            zap_page_range(blktap_vma, 
+                    MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), 0), 
                     ar->nr_pages << PAGE_SHIFT, NULL);
             write_resp_to_fe_ring(blkif, resp_s);
             blktap_ufe_ring.rsp_cons = i + 1;
@@ -462,7 +774,18 @@
 
 int blktap_init(void)
 {
-    int err;
+    int err, i, j;
+    struct page *page;
+
+    page = balloon_alloc_empty_page_range(MMAP_PAGES);
+    BUG_ON(page == NULL);
+    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    for (i=0; i<MAX_PENDING_REQS ; i++)
+        for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
+            BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j));
+#endif
 
     err = misc_register(&blktap_miscdev);
     if ( err != 0 )
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c        Thu Aug 25 
22:53:20 2005
@@ -240,7 +240,11 @@
 #endif
 
 /*** Useful function for console debugging -- goes straight to Xen. ***/
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
 asmlinkage int xprintk(const char *fmt, ...)
+#else
+asmlinkage int xprintk(const char *fmt, ...)
+#endif
 {
     va_list args;
     int printk_len;
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/netback/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/netback/Makefile Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/Makefile Thu Aug 25 22:53:20 2005
@@ -1,2 +1,2 @@
 
-obj-y  := netback.o control.o interface.o loopback.o
+obj-y  := netback.o xenbus.o interface.o loopback.o
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Aug 25 22:53:20 2005
@@ -15,9 +15,17 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <asm-xen/ctrl_if.h>
+#include <asm-xen/evtchn.h>
 #include <asm-xen/xen-public/io/netif.h>
 #include <asm/io.h>
 #include <asm/pgalloc.h>
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#endif
+
+
 
 #if 0
 #define ASSERT(_p) \
@@ -39,9 +47,19 @@
 
     /* Physical parameters of the comms window. */
     unsigned long    tx_shmem_frame;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    u16              tx_shmem_handle;
+    unsigned long    tx_shmem_vaddr; 
+    grant_ref_t      tx_shmem_ref; 
+#endif
     unsigned long    rx_shmem_frame;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    u16              rx_shmem_handle;
+    unsigned long    rx_shmem_vaddr; 
+    grant_ref_t      rx_shmem_ref; 
+#endif
     unsigned int     evtchn;
-    int              irq;
+    unsigned int     remote_evtchn;
 
     /* The shared rings and indexes. */
     netif_tx_interface_t *tx;
@@ -65,36 +83,30 @@
     /* Miscellaneous private stuff. */
     enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
     int active;
-    /*
-     * DISCONNECT response is deferred until pending requests are ack'ed.
-     * We therefore need to store the id from the original request.
-     */
-    u8               disconnect_rspid;
-    struct netif_st *hash_next;
     struct list_head list;  /* scheduling list */
     atomic_t         refcnt;
     struct net_device *dev;
     struct net_device_stats stats;
 
-    struct work_struct work;
+    struct work_struct free_work;
 } netif_t;
 
-void netif_create(netif_be_create_t *create);
-void netif_destroy(netif_be_destroy_t *destroy);
-void netif_creditlimit(netif_be_creditlimit_t *creditlimit);
-void netif_connect(netif_be_connect_t *connect);
-int  netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id);
-void netif_disconnect_complete(netif_t *netif);
-netif_t *netif_find_by_handle(domid_t domid, unsigned int handle);
+void netif_creditlimit(netif_t *netif);
+int  netif_disconnect(netif_t *netif);
+
+netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]);
+void free_netif_callback(netif_t *netif);
+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+             unsigned long rx_ring_ref, unsigned int evtchn);
+
 #define netif_get(_b) (atomic_inc(&(_b)->refcnt))
 #define netif_put(_b)                             \
     do {                                          \
         if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            netif_disconnect_complete(_b);        \
+            free_netif_callback(_b);              \
     } while (0)
 
-void netif_interface_init(void);
-void netif_ctrlif_init(void);
+void netif_xenbus_init(void);
 
 void netif_schedule_work(netif_t *netif);
 void netif_deschedule_work(netif_t *netif);
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/netback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Thu Aug 25 
22:53:20 2005
@@ -9,31 +9,14 @@
 #include "common.h"
 #include <linux/rtnetlink.h>
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#endif
-
-#define NETIF_HASHSZ 1024
-#define NETIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(NETIF_HASHSZ-1))
-
-static netif_t *netif_hash[NETIF_HASHSZ];
-
-netif_t *netif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    netif_t *netif = netif_hash[NETIF_HASH(domid, handle)];
-    while ( (netif != NULL) && 
-            ((netif->domid != domid) || (netif->handle != handle)) )
-        netif = netif->hash_next;
-    return netif;
-}
-
 static void __netif_up(netif_t *netif)
 {
     struct net_device *dev = netif->dev;
     spin_lock_bh(&dev->xmit_lock);
     netif->active = 1;
     spin_unlock_bh(&dev->xmit_lock);
-    (void)request_irq(netif->irq, netif_be_int, 0, dev->name, netif);
+    (void)bind_evtchn_to_irqhandler(
+        netif->evtchn, netif_be_int, 0, dev->name, netif);
     netif_schedule_work(netif);
 }
 
@@ -43,14 +26,14 @@
     spin_lock_bh(&dev->xmit_lock);
     netif->active = 0;
     spin_unlock_bh(&dev->xmit_lock);
-    free_irq(netif->irq, netif);
+    unbind_evtchn_from_irqhandler(netif->evtchn, netif);
     netif_deschedule_work(netif);
 }
 
 static int net_open(struct net_device *dev)
 {
     netif_t *netif = netdev_priv(dev);
-    if ( netif->status == CONNECTED )
+    if (netif->status == CONNECTED)
         __netif_up(netif);
     netif_start_queue(dev);
     return 0;
@@ -60,74 +43,23 @@
 {
     netif_t *netif = netdev_priv(dev);
     netif_stop_queue(dev);
-    if ( netif->status == CONNECTED )
+    if (netif->status == CONNECTED)
         __netif_down(netif);
     return 0;
 }
 
-static void __netif_disconnect_complete(void *arg)
-{
-    netif_t              *netif = (netif_t *)arg;
-    ctrl_msg_t            cmsg;
-    netif_be_disconnect_t disc;
-
-    /*
-     * These can't be done in netif_disconnect() because at that point there
-     * may be outstanding requests in the network stack whose asynchronous
-     * responses must still be notified to the remote driver.
-     */
-    unbind_evtchn_from_irq(netif->evtchn);
-    vfree(netif->tx); /* Frees netif->rx as well. */
-
-    /* Construct the deferred response message. */
-    cmsg.type         = CMSG_NETIF_BE;
-    cmsg.subtype      = CMSG_NETIF_BE_DISCONNECT;
-    cmsg.id           = netif->disconnect_rspid;
-    cmsg.length       = sizeof(netif_be_disconnect_t);
-    disc.domid        = netif->domid;
-    disc.netif_handle = netif->handle;
-    disc.status       = NETIF_BE_STATUS_OKAY;
-    memcpy(cmsg.msg, &disc, sizeof(disc));
-
-    /*
-     * Make sure message is constructed /before/ status change, because
-     * after the status change the 'netif' structure could be deallocated at
-     * any time. Also make sure we send the response /after/ status change,
-     * as otherwise a subsequent CONNECT request could spuriously fail if
-     * another CPU doesn't see the status change yet.
-     */
-    mb();
-    if ( netif->status != DISCONNECTING )
-        BUG();
-    netif->status = DISCONNECTED;
-    mb();
-
-    /* Send the successful response. */
-    ctrl_if_send_response(&cmsg);
-}
-
-void netif_disconnect_complete(netif_t *netif)
-{
-    INIT_WORK(&netif->work, __netif_disconnect_complete, (void *)netif);
-    schedule_work(&netif->work);
-}
-
-void netif_create(netif_be_create_t *create)
-{
-    int                err = 0;
-    domid_t            domid  = create->domid;
-    unsigned int       handle = create->netif_handle;
+netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
+{
+    int err = 0, i;
     struct net_device *dev;
-    netif_t          **pnetif, *netif;
-    char               name[IFNAMSIZ] = {};
+    netif_t *netif;
+    char name[IFNAMSIZ] = {};
 
     snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
     dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
-    if ( dev == NULL )
-    {
+    if (dev == NULL) {
         DPRINTK("Could not create netif: out of memory\n");
-        create->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
+        return NULL;
     }
 
     netif = netdev_priv(dev);
@@ -142,19 +74,6 @@
     netif->credit_usec  = 0UL;
     init_timer(&netif->credit_timeout);
 
-    pnetif = &netif_hash[NETIF_HASH(domid, handle)];
-    while ( *pnetif != NULL )
-    {
-        if ( ((*pnetif)->domid == domid) && ((*pnetif)->handle == handle) )
-        {
-            DPRINTK("Could not create netif: already exists\n");
-            create->status = NETIF_BE_STATUS_INTERFACE_EXISTS;
-            free_netdev(dev);
-            return;
-        }
-        pnetif = &(*pnetif)->hash_next;
-    }
-
     dev->hard_start_xmit = netif_be_start_xmit;
     dev->get_stats       = netif_be_get_stats;
     dev->open            = net_open;
@@ -164,10 +83,10 @@
     /* Disable queuing. */
     dev->tx_queue_len = 0;
 
-    if ( (create->be_mac[0] == 0) && (create->be_mac[1] == 0) &&
-         (create->be_mac[2] == 0) && (create->be_mac[3] == 0) &&
-         (create->be_mac[4] == 0) && (create->be_mac[5] == 0) )
-    {
+    for (i = 0; i < ETH_ALEN; i++)
+       if (be_mac[i] != 0)
+           break;
+    if (i == ETH_ALEN) {
         /*
          * Initialise a dummy MAC address. We choose the numerically largest
          * non-broadcast address to prevent the address getting stolen by an
@@ -175,87 +94,200 @@
          */ 
         memset(dev->dev_addr, 0xFF, ETH_ALEN);
         dev->dev_addr[0] &= ~0x01;
-    }
-    else
-    {
-        memcpy(dev->dev_addr, create->be_mac, ETH_ALEN);
-    }
-
-    memcpy(netif->fe_dev_addr, create->mac, ETH_ALEN);
+    } else
+        memcpy(dev->dev_addr, be_mac, ETH_ALEN);
 
     rtnl_lock();
     err = register_netdevice(dev);
     rtnl_unlock();
-
-    if ( err != 0 )
-    {
+    if (err) {
         DPRINTK("Could not register new net device %s: err=%d\n",
                 dev->name, err);
-        create->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
         free_netdev(dev);
-        return;
-    }
-
-    netif->hash_next = *pnetif;
-    *pnetif = netif;
+        return NULL;
+    }
 
     DPRINTK("Successfully created netif\n");
-    create->status = NETIF_BE_STATUS_OKAY;
-}
-
-void netif_destroy(netif_be_destroy_t *destroy)
-{
-    domid_t       domid  = destroy->domid;
-    unsigned int  handle = destroy->netif_handle;
-    netif_t     **pnetif, *netif;
-
-    pnetif = &netif_hash[NETIF_HASH(domid, handle)];
-    while ( (netif = *pnetif) != NULL )
+    return netif;
+}
+
+static int map_frontend_page(netif_t *netif, unsigned long localaddr,
+                            unsigned long tx_ring_ref, unsigned long 
rx_ring_ref)
+{
+#if !defined(CONFIG_XEN_NETDEV_GRANT_TX)||!defined(CONFIG_XEN_NETDEV_GRANT_RX)
+    pgprot_t      prot = __pgprot(_KERNPG_TABLE);
+    int           err;
+#endif
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX)
     {
-        if ( (netif->domid == domid) && (netif->handle == handle) )
-        {
-            if ( netif->status != DISCONNECTED )
-                goto still_connected;
-            goto destroy;
+        struct gnttab_map_grant_ref op;
+
+        /* Map: Use the Grant table reference */
+        op.host_addr = localaddr;
+        op.flags     = GNTMAP_host_map;
+        op.ref       = tx_ring_ref;
+        op.dom       = netif->domid;
+       
+       BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+        if (op.handle < 0) { 
+            DPRINTK(" Grant table operation failure !\n");
+            return op.handle;
         }
-        pnetif = &netif->hash_next;
-    }
-
-    destroy->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- still_connected:
-    destroy->status = NETIF_BE_STATUS_INTERFACE_CONNECTED;
-    return;
-
- destroy:
-    *pnetif = netif->hash_next;
+
+        netif->tx_shmem_ref    = tx_ring_ref;
+        netif->tx_shmem_handle = op.handle;
+        netif->tx_shmem_vaddr  = localaddr;
+    }
+#else 
+    err = direct_remap_area_pages(&init_mm, localaddr,
+                                 tx_ring_ref<<PAGE_SHIFT, PAGE_SIZE,
+                                 prot, netif->domid); 
+    if (err)
+       return err;
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_RX)
+    {
+        struct gnttab_map_grant_ref op;
+
+        /* Map: Use the Grant table reference */
+        op.host_addr = localaddr + PAGE_SIZE;
+        op.flags     = GNTMAP_host_map;
+        op.ref       = rx_ring_ref;
+        op.dom       = netif->domid;
+
+       BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+        if (op.handle < 0) { 
+            DPRINTK(" Grant table operation failure !\n");
+            return op.handle;
+        }
+
+        netif->rx_shmem_ref    = rx_ring_ref;
+        netif->rx_shmem_handle = op.handle;
+        netif->rx_shmem_vaddr  = localaddr + PAGE_SIZE;
+    }
+#else 
+    err = direct_remap_area_pages(&init_mm, localaddr + PAGE_SIZE,
+                                 rx_ring_ref<<PAGE_SHIFT, PAGE_SIZE,
+                                 prot, netif->domid);
+    if (err)
+       return err;
+#endif
+
+    return 0;
+}
+
+static void unmap_frontend_page(netif_t *netif)
+{
+#if defined(CONFIG_XEN_NETDEV_GRANT_RX) || defined(CONFIG_XEN_NETDEV_GRANT_TX)
+    struct gnttab_unmap_grant_ref op;
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    op.host_addr    = netif->tx_shmem_vaddr;
+    op.handle       = netif->tx_shmem_handle;
+    op.dev_bus_addr = 0;
+    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    op.host_addr    = netif->rx_shmem_vaddr;
+    op.handle       = netif->rx_shmem_handle;
+    op.dev_bus_addr = 0;
+    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+#endif
+}
+
+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+             unsigned long rx_ring_ref, unsigned int evtchn)
+{
+    struct vm_struct *vma;
+    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+    int err;
+
+    vma = get_vm_area(2*PAGE_SIZE, VM_IOREMAP);
+    if (vma == NULL)
+        return -ENOMEM;
+
+    err = map_frontend_page(netif, (unsigned long)vma->addr, tx_ring_ref,
+                           rx_ring_ref);
+    if (err) {
+        vfree(vma->addr);
+       return err;
+    }
+
+    op.u.bind_interdomain.dom1 = DOMID_SELF;
+    op.u.bind_interdomain.dom2 = netif->domid;
+    op.u.bind_interdomain.port1 = 0;
+    op.u.bind_interdomain.port2 = evtchn;
+    err = HYPERVISOR_event_channel_op(&op);
+    if (err) {
+       unmap_frontend_page(netif);
+       vfree(vma->addr);
+       return err;
+    }
+
+    netif->evtchn = op.u.bind_interdomain.port1;
+    netif->remote_evtchn = evtchn;
+
+    netif->tx = (netif_tx_interface_t *)vma->addr;
+    netif->rx = (netif_rx_interface_t *)((char *)vma->addr + PAGE_SIZE);
+    netif->tx->resp_prod = netif->rx->resp_prod = 0;
+    netif_get(netif);
+    wmb(); /* Other CPUs see new state before interface is started. */
+
+    rtnl_lock();
+    netif->status = CONNECTED;
+    wmb();
+    if (netif_running(netif->dev))
+        __netif_up(netif);
+    rtnl_unlock();
+
+    return 0;
+}
+
+static void free_netif(void *arg)
+{
+    evtchn_op_t op = { .cmd = EVTCHNOP_close };
+    netif_t *netif = (netif_t *)arg;
+
+    /*
+     * These can't be done in netif_disconnect() because at that point there
+     * may be outstanding requests in the network stack whose asynchronous
+     * responses must still be notified to the remote driver.
+     */
+
+    op.u.close.port = netif->evtchn;
+    op.u.close.dom = DOMID_SELF;
+    HYPERVISOR_event_channel_op(&op);
+    op.u.close.port = netif->remote_evtchn;
+    op.u.close.dom = netif->domid;
+    HYPERVISOR_event_channel_op(&op);
+
     unregister_netdev(netif->dev);
+
+    if (netif->tx) {
+       unmap_frontend_page(netif);
+       vfree(netif->tx); /* Frees netif->rx as well. */
+    }
+
     free_netdev(netif->dev);
-    destroy->status = NETIF_BE_STATUS_OKAY;
-}
-
-void netif_creditlimit(netif_be_creditlimit_t *creditlimit)
-{
-    domid_t       domid  = creditlimit->domid;
-    unsigned int  handle = creditlimit->netif_handle;
-    netif_t      *netif;
-
-    netif = netif_find_by_handle(domid, handle);
-    if ( unlikely(netif == NULL) )
-    {
-        DPRINTK("netif_creditlimit attempted for non-existent netif"
-                " (%u,%u)\n", creditlimit->domid, creditlimit->netif_handle); 
-        creditlimit->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return; 
-    }
-
+}
+
+void free_netif_callback(netif_t *netif)
+{
+    INIT_WORK(&netif->free_work, free_netif, (void *)netif);
+    schedule_work(&netif->free_work);
+}
+
+void netif_creditlimit(netif_t *netif)
+{
+#if 0
     /* Set the credit limit (reset remaining credit to new limit). */
     netif->credit_bytes = netif->remaining_credit = creditlimit->credit_bytes;
     netif->credit_usec = creditlimit->period_usec;
 
-    if ( netif->status == CONNECTED )
-    {
+    if (netif->status == CONNECTED) {
         /*
          * Schedule work so that any packets waiting under previous credit 
          * limit are dealt with (acts like a replenishment point).
@@ -263,119 +295,22 @@
         netif->credit_timeout.expires = jiffies;
         netif_schedule_work(netif);
     }
-    
-    creditlimit->status = NETIF_BE_STATUS_OKAY;
-}
-
-void netif_connect(netif_be_connect_t *connect)
-{
-    domid_t       domid  = connect->domid;
-    unsigned int  handle = connect->netif_handle;
-    unsigned int  evtchn = connect->evtchn;
-    unsigned long tx_shmem_frame = connect->tx_shmem_frame;
-    unsigned long rx_shmem_frame = connect->rx_shmem_frame;
-    struct vm_struct *vma;
-    pgprot_t      prot;
-    int           error;
-    netif_t      *netif;
-
-    netif = netif_find_by_handle(domid, handle);
-    if ( unlikely(netif == NULL) )
-    {
-        DPRINTK("netif_connect attempted for non-existent netif (%u,%u)\n", 
-                connect->domid, connect->netif_handle); 
-        connect->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    if ( netif->status != DISCONNECTED )
-    {
-        connect->status = NETIF_BE_STATUS_INTERFACE_CONNECTED;
-        return;
-    }
-
-    if ( (vma = get_vm_area(2*PAGE_SIZE, VM_IOREMAP)) == NULL )
-    {
-        connect->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    prot = __pgprot(_KERNPG_TABLE);
-    error  = direct_remap_area_pages(&init_mm, 
-                                     VMALLOC_VMADDR(vma->addr),
-                                     tx_shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
-                                     prot, domid);
-    error |= direct_remap_area_pages(&init_mm, 
-                                     VMALLOC_VMADDR(vma->addr) + PAGE_SIZE,
-                                     rx_shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
-                                     prot, domid);
-    if ( error != 0 )
-    {
-        if ( error == -ENOMEM )
-            connect->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
-        else if ( error == -EFAULT )
-            connect->status = NETIF_BE_STATUS_MAPPING_ERROR;
-        else
-            connect->status = NETIF_BE_STATUS_ERROR;
-        vfree(vma->addr);
-        return;
-    }
-
-    netif->evtchn         = evtchn;
-    netif->irq            = bind_evtchn_to_irq(evtchn);
-    netif->tx_shmem_frame = tx_shmem_frame;
-    netif->rx_shmem_frame = rx_shmem_frame;
-    netif->tx             = 
-        (netif_tx_interface_t *)vma->addr;
-    netif->rx             = 
-        (netif_rx_interface_t *)((char *)vma->addr + PAGE_SIZE);
-    netif->tx->resp_prod = netif->rx->resp_prod = 0;
-    netif_get(netif);
-    wmb(); /* Other CPUs see new state before interface is started. */
-
-    rtnl_lock();
-    netif->status = CONNECTED;
-    wmb();
-    if ( netif_running(netif->dev) )
-        __netif_up(netif);
-    rtnl_unlock();
-
-    connect->status = NETIF_BE_STATUS_OKAY;
-}
-
-int netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id)
-{
-    domid_t       domid  = disconnect->domid;
-    unsigned int  handle = disconnect->netif_handle;
-    netif_t      *netif;
-
-    netif = netif_find_by_handle(domid, handle);
-    if ( unlikely(netif == NULL) )
-    {
-        DPRINTK("netif_disconnect attempted for non-existent netif"
-                " (%u,%u)\n", disconnect->domid, disconnect->netif_handle); 
-        disconnect->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return 1; /* Caller will send response error message. */
-    }
-
-    if ( netif->status == CONNECTED )
-    {
+#endif
+}
+
+int netif_disconnect(netif_t *netif)
+{
+
+    if (netif->status == CONNECTED) {
         rtnl_lock();
         netif->status = DISCONNECTING;
-        netif->disconnect_rspid = rsp_id;
         wmb();
-        if ( netif_running(netif->dev) )
+        if (netif_running(netif->dev))
             __netif_down(netif);
         rtnl_unlock();
         netif_put(netif);
         return 0; /* Caller should not send response message. */
     }
 
-    disconnect->status = NETIF_BE_STATUS_OKAY;
     return 1;
 }
-
-void netif_interface_init(void)
-{
-    memset(netif_hash, 0, sizeof(netif_hash));
-}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Thu Aug 25 
22:53:20 2005
@@ -12,11 +12,6 @@
 
 #include "common.h"
 #include <asm-xen/balloon.h>
-#include <asm-xen/evtchn.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-#include <linux/delay.h>
-#endif
 
 #if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
 #include <asm-xen/xen-public/grant_table.h>
@@ -44,7 +39,7 @@
 static int  make_rx_response(netif_t *netif, 
                              u16      id, 
                              s8       st,
-                             memory_t addr,
+                             unsigned long addr,
                              u16      size,
                              u16      csum_valid);
 
@@ -55,11 +50,15 @@
 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
 
 static struct timer_list net_timer;
+
+#define MAX_PENDING_REQS 256
 
 static struct sk_buff_head rx_queue;
 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
-#ifndef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
+#else
 static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
 #endif
 static unsigned char rx_notify[NR_EVENT_CHANNELS];
@@ -67,7 +66,6 @@
 /* Don't currently gate addition of an interface to the tx scheduling list. */
 #define tx_work_exists(_if) (1)
 
-#define MAX_PENDING_REQS 256
 static unsigned long mmap_vstart;
 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
 
@@ -91,11 +89,9 @@
 
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
 static u16 grant_tx_ref[MAX_PENDING_REQS];
-#endif
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
-#endif
-#ifndef CONFIG_XEN_NETDEV_GRANT_TX
+static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
+static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+#else
 static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
 #endif
 
@@ -153,11 +149,7 @@
 static inline int is_xen_skb(struct sk_buff *skb)
 {
     extern kmem_cache_t *skbuff_cachep;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
     kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
-#else
-    kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->list.next;
-#endif
     return (cp == skbuff_cachep);
 }
 
@@ -251,7 +243,7 @@
 #else
     struct mmuext_op *mmuext;
 #endif
-    unsigned long vdata, mdata, new_mfn;
+    unsigned long vdata, old_mfn, new_mfn;
     struct sk_buff_head rxq;
     struct sk_buff *skb;
     u16 notify_list[NETIF_RX_RING_SIZE];
@@ -271,7 +263,7 @@
     {
         netif   = netdev_priv(skb->dev);
         vdata   = (unsigned long)skb->data;
-        mdata   = virt_to_machine(vdata);
+        old_mfn = virt_to_mfn(vdata);
 
         /* Memory squeeze? Back off for an arbitrary while. */
         if ( (new_mfn = alloc_mfn()) == 0 )
@@ -293,7 +285,7 @@
         mcl++;
 
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-        gop->mfn = mdata >> PAGE_SHIFT;
+        gop->mfn = old_mfn;
         gop->domid = netif->domid;
         gop->handle = netif->rx->ring[
         MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
@@ -308,7 +300,7 @@
         mcl++;
 
         mmuext->cmd = MMUEXT_REASSIGN_PAGE;
-        mmuext->mfn = mdata >> PAGE_SHIFT;
+        mmuext->mfn = old_mfn;
         mmuext++;
 #endif
         mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
@@ -318,7 +310,7 @@
         __skb_queue_tail(&rxq, skb);
 
 #ifdef DEBUG_GRANT
-        dump_packet('a', mdata, vdata);
+        dump_packet('a', old_mfn, vdata);
 #endif
         /* Filled the batch queue? */
         if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
@@ -345,10 +337,8 @@
 
     mcl = rx_mcl;
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
-                                           grant_rx_op, gop - grant_rx_op))) {
-        BUG();
-    }
+    BUG_ON(HYPERVISOR_grant_table_op(
+        GNTTABOP_donate, grant_rx_op, gop - grant_rx_op));
     gop = grant_rx_op;
 #else
     mmuext = rx_mmuext;
@@ -361,10 +351,9 @@
         /* Rederive the machine addresses. */
         new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-        mdata = (unsigned long)skb->data & ~PAGE_MASK;
-#else
-        mdata   = ((mmuext[0].mfn << PAGE_SHIFT) |
-                   ((unsigned long)skb->data & ~PAGE_MASK));
+        old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
+#else
+        old_mfn = mmuext[0].mfn;
 #endif
         atomic_set(&(skb_shinfo(skb)->dataref), 1);
         skb_shinfo(skb)->nr_frags = 0;
@@ -379,18 +368,20 @@
         /* Check the reassignment error code. */
         status = NETIF_RSP_OKAY;
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-        BUG_ON(gop->status != 0);
+        BUG_ON(gop->status != 0); /* XXX */
 #else
         if ( unlikely(mcl[1].result != 0) )
         {
             DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
-            free_mfn(mdata >> PAGE_SHIFT);
+            free_mfn(old_mfn);
             status = NETIF_RSP_ERROR;
         }
 #endif
         evtchn = netif->evtchn;
         id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
-        if ( make_rx_response(netif, id, status, mdata,
+        if ( make_rx_response(netif, id, status,
+                              (old_mfn << PAGE_SHIFT) | /* XXX */
+                              ((unsigned long)skb->data & ~PAGE_MASK),
                               size, skb->proto_csum_valid) &&
              (rx_notify[evtchn] == 0) )
         {
@@ -493,7 +484,6 @@
 inline static void net_tx_action_dealloc(void)
 {
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    gnttab_unmap_grant_ref_t unmap_ops[MAX_PENDING_REQS];
     gnttab_unmap_grant_ref_t *gop;
 #else
     multicall_entry_t *mcl;
@@ -509,19 +499,18 @@
     /*
      * Free up any grants we have finished using
      */
-    gop = unmap_ops;
-    while (dc != dp) {
+    gop = tx_unmap_ops;
+    while ( dc != dp )
+    {
         pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
-        gop->host_virt_addr = MMAP_VADDR(pending_idx);
+        gop->host_addr    = MMAP_VADDR(pending_idx);
         gop->dev_bus_addr = 0;
-        gop->handle = grant_tx_ref[pending_idx];
+        gop->handle       = grant_tx_ref[pending_idx];
         grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
         gop++;
     }
-    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
-                                           unmap_ops, gop - unmap_ops))) {
-        BUG();
-    }
+    BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops));
 #else
     mcl = tx_mcl;
     while ( dc != dp )
@@ -584,7 +573,6 @@
     u16 pending_idx;
     NETIF_RING_IDX i;
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    gnttab_map_grant_ref_t map_ops[MAX_PENDING_REQS];
     gnttab_map_grant_ref_t *mop;
 #else
     multicall_entry_t *mcl;
@@ -595,7 +583,7 @@
         net_tx_action_dealloc();
 
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    mop = map_ops;
+    mop = tx_map_ops;
 #else
     mcl = tx_mcl;
 #endif
@@ -646,11 +634,7 @@
                 netif->credit_timeout.expires  = next_credit;
                 netif->credit_timeout.data     = (unsigned long)netif;
                 netif->credit_timeout.function = tx_credit_callback;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
                 add_timer_on(&netif->credit_timeout, smp_processor_id());
-#else
-                add_timer(&netif->credit_timeout); 
-#endif
                 break;
             }
         }
@@ -700,10 +684,10 @@
         /* Packets passed to netif_rx() must have some headroom. */
         skb_reserve(skb, 16);
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-        mop->host_virt_addr = MMAP_VADDR(pending_idx);
-        mop->dom = netif->domid;
-        mop->ref = txreq.addr >> PAGE_SHIFT;
-        mop->flags = GNTMAP_host_map | GNTMAP_readonly;
+        mop->host_addr = MMAP_VADDR(pending_idx);
+        mop->dom       = netif->domid;
+        mop->ref       = txreq.addr >> PAGE_SHIFT;
+        mop->flags     = GNTMAP_host_map | GNTMAP_readonly;
         mop++;
 #else
        MULTI_update_va_mapping_otherdomain(
@@ -723,7 +707,7 @@
         pending_cons++;
 
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-        if ((mop - map_ops) >= ARRAY_SIZE(map_ops))
+        if ( (mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops) )
             break;
 #else
         /* Filled the batch queue? */
@@ -733,20 +717,18 @@
     }
 
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    if (mop == map_ops) {
+    if ( mop == tx_map_ops )
         return;
-    }
-    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-                                           map_ops, mop - map_ops))) {
-        BUG();
-    }
-    mop = map_ops;
+
+    BUG_ON(HYPERVISOR_grant_table_op(
+        GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops));
+
+    mop = tx_map_ops;
 #else
     if ( mcl == tx_mcl )
         return;
 
-    if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
-        BUG();
+    BUG_ON(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0);
 
     mcl = tx_mcl;
 #endif
@@ -758,7 +740,13 @@
 
         /* Check the remap error code. */
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-        if (unlikely(mop->dev_bus_addr == 0)) {
+        /* 
+           XXX SMH: error returns from grant operations are pretty poorly
+           specified/thought out, but the below at least conforms with 
+           what the rest of the code uses. 
+        */
+        if ( unlikely(mop->handle < 0) )
+        {
             printk(KERN_ALERT "#### netback grant fails\n");
             make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
             netif_put(netif);
@@ -768,7 +756,7 @@
             continue;
         }
         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
-                             FOREIGN_FRAME(mop->dev_bus_addr);
+                             FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT);
         grant_tx_ref[pending_idx] = mop->handle;
 #else
         if ( unlikely(mcl[0].result != 0) )
@@ -887,7 +875,7 @@
 static int make_rx_response(netif_t *netif, 
                             u16      id, 
                             s8       st,
-                            memory_t addr,
+                            unsigned long addr,
                             u16      size,
                             u16      csum_valid)
 {
@@ -966,10 +954,9 @@
     net_timer.data = 0;
     net_timer.function = net_alarm;
     
-    netif_interface_init();
-
-    mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS);
-    BUG_ON(mmap_vstart == 0);
+    page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
+    BUG_ON(page == NULL);
+    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
 
     for ( i = 0; i < MAX_PENDING_REQS; i++ )
     {
@@ -986,7 +973,7 @@
     spin_lock_init(&net_schedule_list_lock);
     INIT_LIST_HEAD(&net_schedule_list);
 
-    netif_ctrlif_init();
+    netif_xenbus_init();
 
     (void)request_irq(bind_virq_to_irq(VIRQ_DEBUG),
                       netif_be_dbg, SA_SHIRQ, 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Aug 25 
22:53:20 2005
@@ -48,7 +48,7 @@
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm-xen/evtchn.h>
-#include <asm-xen/ctrl_if.h>
+#include <asm-xen/xenbus.h>
 #include <asm-xen/xen-public/io/netif.h>
 #include <asm-xen/balloon.h>
 #include <asm/page.h>
@@ -59,7 +59,7 @@
 #include <asm-xen/gnttab.h>
 #ifdef GRANT_DEBUG
 static void
-dump_packet(int tag, u32 addr, u32 ap)
+dump_packet(int tag, void *addr, u32 ap)
 {
     unsigned char *p = (unsigned char *)ap;
     int i;
@@ -102,19 +102,23 @@
 #endif
 
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t gref_tx_head;
 static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
 #endif
 
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t gref_rx_head;
 static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
 #endif
 
 #if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
-static domid_t rdomid = 0;
 #define GRANT_INVALID_REF      (0xFFFF)
 #endif
+
+#define NETIF_STATE_DISCONNECTED 0
+#define NETIF_STATE_CONNECTED    1
+
+static unsigned int netif_state = NETIF_STATE_DISCONNECTED;
 
 static void network_tx_buf_gc(struct net_device *dev);
 static void network_alloc_rx_buffers(struct net_device *dev);
@@ -133,12 +137,11 @@
 #define xennet_proc_delif(d) ((void)0)
 #endif
 
-static struct list_head dev_list;
-
+#define netfront_info net_private
 struct net_private
 {
     struct list_head list;
-    struct net_device *dev;
+    struct net_device *netdev;
 
     struct net_device_stats stats;
     NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
@@ -152,7 +155,6 @@
 
     unsigned int handle;
     unsigned int evtchn;
-    unsigned int irq;
 
     /* What is the status of our connection to the remote backend? */
 #define BEST_CLOSED       0
@@ -177,6 +179,14 @@
      */
     struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
     struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
+
+       struct xenbus_device *xbdev;
+       char *backend;
+       int backend_id;
+       struct xenbus_watch watch;
+       int tx_ring_ref;
+       int rx_ring_ref;
+       u8 mac[ETH_ALEN];
 };
 
 /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
@@ -188,20 +198,15 @@
     (_list)[0]  = (_list)[_id];                    \
     (unsigned short)_id; })
 
-static char *status_name[] = {
-    [NETIF_INTERFACE_STATUS_CLOSED]       = "closed",
-    [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
-    [NETIF_INTERFACE_STATUS_CONNECTED]    = "connected",
-    [NETIF_INTERFACE_STATUS_CHANGED]      = "changed",
-};
-
+#ifdef DEBUG
 static char *be_state_name[] = {
     [BEST_CLOSED]       = "closed",
     [BEST_DISCONNECTED] = "disconnected",
     [BEST_CONNECTED]    = "connected",
 };
-
-#if DEBUG
+#endif
+
+#ifdef DEBUG
 #define DPRINTK(fmt, args...) \
     printk(KERN_ALERT "xen_net (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
 #else
@@ -211,89 +216,6 @@
     printk(KERN_INFO "xen_net: " fmt, ##args)
 #define WPRINTK(fmt, args...) \
     printk(KERN_WARNING "xen_net: " fmt, ##args)
-
-static struct net_device *find_dev_by_handle(unsigned int handle)
-{
-    struct list_head *ent;
-    struct net_private *np;
-    list_for_each (ent, &dev_list) {
-        np = list_entry(ent, struct net_private, list);
-        if (np->handle == handle)
-            return np->dev;
-    }
-    return NULL;
-}
-
-/** Network interface info. */
-struct netif_ctrl {
-    /** Number of interfaces. */
-    int interface_n;
-    /** Number of connected interfaces. */
-    int connected_n;
-    /** Error code. */
-    int err;
-    int up;
-};
-
-static struct netif_ctrl netctrl;
-
-static void netctrl_init(void)
-{
-    memset(&netctrl, 0, sizeof(netctrl));
-    netctrl.up = NETIF_DRIVER_STATUS_DOWN;
-}
-
-/** Get or set a network interface error.
- */
-static int netctrl_err(int err)
-{
-    if ((err < 0) && !netctrl.err)
-        netctrl.err = err;
-    return netctrl.err;
-}
-
-/** Test if all network interfaces are connected.
- *
- * @return 1 if all connected, 0 if not, negative error code otherwise
- */
-static int netctrl_connected(void)
-{
-    int ok;
-
-    if (netctrl.err)
-        ok = netctrl.err;
-    else if (netctrl.up == NETIF_DRIVER_STATUS_UP)
-        ok = (netctrl.connected_n == netctrl.interface_n);
-    else
-        ok = 0;
-
-    return ok;
-}
-
-/** Count the connected network interfaces.
- *
- * @return connected count
- */
-static int netctrl_connected_count(void)
-{
-    
-    struct list_head *ent;
-    struct net_private *np;
-    unsigned int connected;
-
-    connected = 0;
-    
-    list_for_each(ent, &dev_list) {
-        np = list_entry(ent, struct net_private, list);
-        if (np->backend_state == BEST_CONNECTED)
-            connected++;
-    }
-
-    netctrl.connected_n = connected;
-    DPRINTK("> connected_n=%d interface_n=%d\n",
-            netctrl.connected_n, netctrl.interface_n);
-    return connected;
-}
 
 /** Send a packet on a net device to encourage switches to learn the
  * MAC. We send a fake ARP request.
@@ -357,10 +279,14 @@
             id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
             skb = np->tx_skbs[id];
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-            if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) {
-                printk(KERN_ALERT "netfront: query foreign access\n");
+            if (unlikely(gnttab_query_foreign_access(grant_tx_ref[id]) != 0)) {
+                /* other domain is still using this grant - shouldn't happen
+                   but if it does, we'll try to reclaim the grant later */
+                printk(KERN_ALERT "network_tx_buf_gc: warning -- grant "
+                       "still in use by backend domain.\n");
+                goto out; 
             }
-            gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly);
+            gnttab_end_foreign_access_ref(grant_tx_ref[id], GNTMAP_readonly);
             gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
             grant_tx_ref[id] = GRANT_INVALID_REF;
 #endif
@@ -383,6 +309,10 @@
         mb();
     } while (prod != np->tx->resp_prod);
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+  out: 
+#endif
+
     if (np->tx_full && ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE)) {
         np->tx_full = 0;
         if (np->user_state == UST_OPEN)
@@ -434,16 +364,17 @@
         
         np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-        if ((ref = gnttab_claim_grant_reference(&gref_rx_head, 
gref_rx_terminal)) < 0) {
+       ref = gnttab_claim_grant_reference(&gref_rx_head);
+        if (unlikely(ref < 0)) {
             printk(KERN_ALERT "#### netfront can't claim rx reference\n");
             BUG();
         }
         grant_rx_ref[id] = ref;
-        gnttab_grant_foreign_transfer_ref(ref, rdomid,
-        virt_to_machine(skb->head) >> PAGE_SHIFT);
+        gnttab_grant_foreign_transfer_ref(ref, np->backend_id,
+                                          virt_to_mfn(skb->head));
         np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
 #endif
-        rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
+        rx_pfn_array[i] = virt_to_mfn(skb->head);
 
        /* Remove this page from pseudo phys map before passing back to Xen. */
        phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] 
@@ -529,17 +460,19 @@
 
     tx->id   = id;
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal)) 
< 0) {
+    ref = gnttab_claim_grant_reference(&gref_tx_head);
+    if (unlikely(ref < 0)) {
         printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
         BUG();
     }
-    mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
-    gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
-    tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+    mfn = virt_to_mfn(skb->data);
+    gnttab_grant_foreign_access_ref(ref, np->backend_id, mfn, GNTMAP_readonly);
+    tx->addr = ref << PAGE_SHIFT;
     grant_tx_ref[id] = ref;
 #else
-    tx->addr = virt_to_machine(skb->data);
-#endif
+    tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
+#endif
+    tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
     tx->size = skb->len;
     tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
 
@@ -639,8 +572,7 @@
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
         ref = grant_rx_ref[rx->id];
         grant_rx_ref[rx->id] = GRANT_INVALID_REF;
-
-        mfn = gnttab_end_foreign_transfer(ref);
+        mfn = gnttab_end_foreign_transfer_ref(ref);
         gnttab_release_grant_reference(&gref_rx_head, ref);
 #endif
 
@@ -675,18 +607,20 @@
                                pfn_pte_ma(mfn, PAGE_KERNEL), 0);
 #else
        MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
-                               pfn_pte_ma(rx->addr >> PAGE_SHIFT, 
PAGE_KERNEL), 0);
+                               pfn_pte_ma(rx->addr >> PAGE_SHIFT, 
+                                           PAGE_KERNEL), 0);
 #endif
         mcl++;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = mfn;
+#else
         phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = 
-#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-            mfn;
-#else
             rx->addr >> PAGE_SHIFT;
 #endif
+
 #ifdef GRANT_DEBUG
-        printk(KERN_ALERT "#### rx_poll     enqueue vdata=%08x mfn=%08x 
ref=%04x\n",
+        printk(KERN_ALERT "#### rx_poll     enqueue vdata=%p mfn=%lu ref=%x\n",
                skb->data, mfn, ref);
 #endif
         __skb_queue_tail(&rxq, skb);
@@ -708,9 +642,9 @@
 
     while ((skb = __skb_dequeue(&rxq)) != NULL) {
 #ifdef GRANT_DEBUG
-         printk(KERN_ALERT "#### rx_poll     dequeue vdata=%08x mfn=%08x\n",
-                skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
-         dump_packet('d', skb->data, (unsigned long)skb->data);
+        printk(KERN_ALERT "#### rx_poll     dequeue vdata=%p mfn=%lu\n",
+               skb->data, virt_to_mfn(skb->data));
+        dump_packet('d', skb->data, (unsigned long)skb->data);
 #endif
         /*
          * Enough room in skbuff for the data we were passed? Also, Linux 
@@ -797,7 +731,7 @@
 {
     struct net_private *np = netdev_priv(dev);
     np->user_state = UST_CLOSED;
-    netif_stop_queue(np->dev);
+    netif_stop_queue(np->netdev);
     return 0;
 }
 
@@ -809,8 +743,7 @@
 }
 
 
-static void network_connect(struct net_device *dev,
-                            netif_fe_interface_status_t *status)
+static void network_connect(struct net_device *dev)
 {
     struct net_private *np;
     int i, requeue_idx;
@@ -843,18 +776,23 @@
      * interface has been down.
      */
     for (requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++) {
-            if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
-                struct sk_buff *skb = np->tx_skbs[i];
-                
-                tx = &np->tx->ring[requeue_idx++].req;
-                
-                tx->id   = i;
-                tx->addr = virt_to_machine(skb->data);
-                tx->size = skb->len;
-                
-                np->stats.tx_bytes += skb->len;
-                np->stats.tx_packets++;
-            }
+        if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
+            struct sk_buff *skb = np->tx_skbs[i];
+
+            tx = &np->tx->ring[requeue_idx++].req;
+
+            tx->id   = i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+            tx->addr = 0; /*(ref << PAGE_SHIFT) |*/
+#else
+            tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
+#endif
+            tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
+            tx->size = skb->len;
+
+            np->stats.tx_bytes += skb->len;
+            np->stats.tx_packets++;
+        }
     }
     wmb();
     np->tx->req_prod = requeue_idx;
@@ -873,7 +811,7 @@
      */
     np->backend_state = BEST_CONNECTED;
     wmb();
-    notify_via_evtchn(status->evtchn);  
+    notify_via_evtchn(np->evtchn);  
     network_tx_buf_gc(dev);
 
     if (np->user_state == UST_OPEN)
@@ -883,132 +821,21 @@
     spin_unlock_irq(&np->tx_lock);
 }
 
-static void vif_show(struct net_private *np)
-{
-#if DEBUG
-    if (np) {
-        IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
-               np->handle,
-               be_state_name[np->backend_state],
-               np->user_state ? "open" : "closed",
-               np->evtchn,
-               np->irq,
-               np->tx,
-               np->rx);
-    } else {
-        IPRINTK("<vif NULL>\n");
-    }
-#endif
-}
-
-/* Send a connect message to xend to tell it to bring up the interface. */
-static void send_interface_connect(struct net_private *np)
-{
-    ctrl_msg_t cmsg = {
-        .type    = CMSG_NETIF_FE,
-        .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT,
-        .length  = sizeof(netif_fe_interface_connect_t),
-    };
-    netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
-
-    msg->handle = np->handle;
-    msg->tx_shmem_frame = (virt_to_machine(np->tx) >> PAGE_SHIFT);
-    msg->rx_shmem_frame = (virt_to_machine(np->rx) >> PAGE_SHIFT);
-        
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-/* Send a driver status notification to the domain controller. */
-static int send_driver_status(int ok)
-{
-    int err = 0;
-    ctrl_msg_t cmsg = {
-        .type    = CMSG_NETIF_FE,
-        .subtype = CMSG_NETIF_FE_DRIVER_STATUS,
-        .length  = sizeof(netif_fe_driver_status_t),
-    };
-    netif_fe_driver_status_t *msg = (void*)cmsg.msg;
-
-    msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN);
-    err = ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-    return err;
-}
-
-/* Stop network device and free tx/rx queues and irq.
- */
-static void vif_release(struct net_private *np)
-{
-    /* Stop old i/f to prevent errors whilst we rebuild the state. */
-    spin_lock_irq(&np->tx_lock);
-    spin_lock(&np->rx_lock);
-    netif_stop_queue(np->dev);
-    /* np->backend_state = BEST_DISCONNECTED; */
-    spin_unlock(&np->rx_lock);
-    spin_unlock_irq(&np->tx_lock);
-    
-    /* Free resources. */
-    if(np->tx != NULL){
-        free_irq(np->irq, np->dev);
-        unbind_evtchn_from_irq(np->evtchn);
-        free_page((unsigned long)np->tx);
-        free_page((unsigned long)np->rx);
-        np->irq = 0;
-        np->evtchn = 0;
-        np->tx = NULL;
-        np->rx = NULL;
-    }
-}
-
-/* Release vif resources and close it down completely.
- */
-static void vif_close(struct net_private *np)
-{
-    WPRINTK("Unexpected netif-CLOSED message in state %s\n",
-            be_state_name[np->backend_state]);
-    vif_release(np);
-    np->backend_state = BEST_CLOSED;
-    /* todo: take dev down and free. */
-    vif_show(np);
-}
-
-/* Move the vif into disconnected state.
- * Allocates tx/rx pages.
- * Sends connect message to xend.
- */
-static void vif_disconnect(struct net_private *np)
-{
-    if(np->tx) free_page((unsigned long)np->tx);
-    if(np->rx) free_page((unsigned long)np->rx);
-    // Before this np->tx and np->rx had better be null.
-    np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
-    np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
-    memset(np->tx, 0, PAGE_SIZE);
-    memset(np->rx, 0, PAGE_SIZE);
-    np->backend_state = BEST_DISCONNECTED;
-    send_interface_connect(np);
-    vif_show(np);
-}
-
-/* Begin interface recovery.
- *
- * NB. Whilst we're recovering, we turn the carrier state off.  We
- * take measures to ensure that this device isn't used for
- * anything.  We also stop the queue for this device.  Various
- * different approaches (e.g. continuing to buffer packets) have
- * been tested but don't appear to improve the overall impact on
- * TCP connections.
- *
- * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
- * is initiated by a special "RESET" message - disconnect could
- * just mean we're not allowed to use this interface any more.
- */
-static void vif_reset(struct net_private *np)
-{
-    IPRINTK("Attempting to reconnect network interface: handle=%u\n",
-            np->handle);    
-    vif_release(np);
-    vif_disconnect(np);
-    vif_show(np);
+static void show_device(struct net_private *np)
+{
+#ifdef DEBUG
+       if (np) {
+               IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n",
+                       np->handle,
+                       be_state_name[np->backend_state],
+                       np->user_state ? "open" : "closed",
+                       np->evtchn,
+                       np->tx,
+                       np->rx);
+       } else {
+               IPRINTK("<vif NULL>\n");
+       }
+#endif
 }
 
 /* Move the vif into connected state.
@@ -1016,26 +843,22 @@
  * Binds the irq to the event channel.
  */
 static void 
-vif_connect(struct net_private *np, netif_fe_interface_status_t *status)
-{
-    struct net_device *dev = np->dev;
-    memcpy(dev->dev_addr, status->mac, ETH_ALEN);
-    network_connect(dev, status);
-    np->evtchn = status->evtchn;
-    np->irq = bind_evtchn_to_irq(np->evtchn);
-#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
-    rdomid = status->domid;
-#endif
-    (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
-    netctrl_connected_count();
-    (void)send_fake_arp(dev);
-    vif_show(np);
+connect_device(struct net_private *np, unsigned int evtchn)
+{
+       struct net_device *dev = np->netdev;
+       memcpy(dev->dev_addr, np->mac, ETH_ALEN);
+       np->evtchn = evtchn;
+       network_connect(dev);
+       (void)bind_evtchn_to_irqhandler(
+               np->evtchn, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
+       (void)send_fake_arp(dev);
+       show_device(np);
 }
 
 static struct ethtool_ops network_ethtool_ops =
 {
-    .get_tx_csum = ethtool_op_get_tx_csum,
-    .set_tx_csum = ethtool_op_set_tx_csum,
+       .get_tx_csum = ethtool_op_get_tx_csum,
+       .set_tx_csum = ethtool_op_set_tx_csum,
 };
 
 /** Create a network device.
@@ -1043,22 +866,24 @@
  * @param val return parameter for created device
  * @return 0 on success, error code otherwise
  */
-static int create_netdev(int handle, struct net_device **val)
+static int create_netdev(int handle, struct xenbus_device *dev,
+                        struct net_device **val)
 {
     int i, err = 0;
-    struct net_device *dev = NULL;
+    struct net_device *netdev = NULL;
     struct net_private *np = NULL;
 
-    if ((dev = alloc_etherdev(sizeof(struct net_private))) == NULL) {
+    if ((netdev = alloc_etherdev(sizeof(struct net_private))) == NULL) {
         printk(KERN_WARNING "%s> alloc_etherdev failed.\n", __FUNCTION__);
         err = -ENOMEM;
         goto exit;
     }
 
-    np                = netdev_priv(dev);
+    np                = netdev_priv(netdev);
     np->backend_state = BEST_CLOSED;
     np->user_state    = UST_CLOSED;
     np->handle        = handle;
+    np->xbdev         = dev;
     
     spin_lock_init(&np->tx_lock);
     spin_lock_init(&np->rx_lock);
@@ -1082,268 +907,47 @@
 #endif
     }
 
-    dev->open            = network_open;
-    dev->hard_start_xmit = network_start_xmit;
-    dev->stop            = network_close;
-    dev->get_stats       = network_get_stats;
-    dev->poll            = netif_poll;
-    dev->weight          = 64;
-    dev->features        = NETIF_F_IP_CSUM;
-
-    SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
-
-    if ((err = register_netdev(dev)) != 0) {
+    netdev->open            = network_open;
+    netdev->hard_start_xmit = network_start_xmit;
+    netdev->stop            = network_close;
+    netdev->get_stats       = network_get_stats;
+    netdev->poll            = netif_poll;
+    netdev->weight          = 64;
+    netdev->features        = NETIF_F_IP_CSUM;
+
+    SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
+
+    if ((err = register_netdev(netdev)) != 0) {
         printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err);
         goto exit;
     }
 
-    if ((err = xennet_proc_addif(dev)) != 0) {
-        unregister_netdev(dev);
+    if ((err = xennet_proc_addif(netdev)) != 0) {
+        unregister_netdev(netdev);
         goto exit;
     }
 
-    np->dev = dev;
-    list_add(&np->list, &dev_list);
+    np->netdev = netdev;
 
   exit:
-    if ((err != 0) && (dev != NULL))
-        kfree(dev);
+    if ((err != 0) && (netdev != NULL))
+        kfree(netdev);
     else if (val != NULL)
-        *val = dev;
+        *val = netdev;
     return err;
 }
 
-/* Get the target interface for a status message.
- * Creates the interface when it makes sense.
- * The returned interface may be null when there is no error.
- *
- * @param status status message
- * @param np return parameter for interface state
- * @return 0 on success, error code otherwise
- */
-static int 
-target_vif(netif_fe_interface_status_t *status, struct net_private **np)
-{
-    int err = 0;
-    struct net_device *dev;
-
-    DPRINTK("> handle=%d\n", status->handle);
-    if (status->handle < 0) {
-        err = -EINVAL;
-        goto exit;
-    }
-
-    if ((dev = find_dev_by_handle(status->handle)) != NULL)
-        goto exit;
-
-    if (status->status == NETIF_INTERFACE_STATUS_CLOSED)
-        goto exit;
-    if (status->status == NETIF_INTERFACE_STATUS_CHANGED)
-        goto exit;
-
-    /* It's a new interface in a good state - create it. */
-    DPRINTK("> create device...\n");
-    if ((err = create_netdev(status->handle, &dev)) != 0)
-        goto exit;
-
-    netctrl.interface_n++;
-
-  exit:
-    if (np != NULL)
-        *np = ((dev && !err) ? netdev_priv(dev) : NULL);
-    DPRINTK("< err=%d\n", err);
-    return err;
-}
-
-/* Handle an interface status message. */
-static void netif_interface_status(netif_fe_interface_status_t *status)
-{
-    int err = 0;
-    struct net_private *np = NULL;
-    
-    DPRINTK("> status=%s handle=%d\n",
-            status_name[status->status], status->handle);
-
-    if ((err = target_vif(status, &np)) != 0) {
-        WPRINTK("Invalid netif: handle=%u\n", status->handle);
-        return;
-    }
-
-    if (np == NULL) {
-        DPRINTK("> no vif\n");
-        return;
-    }
-
-    switch (status->status) {
-    case NETIF_INTERFACE_STATUS_CLOSED:
-        switch (np->backend_state) {
-        case BEST_CLOSED:
-        case BEST_DISCONNECTED:
-        case BEST_CONNECTED:
-            vif_close(np);
-            break;
-        }
-        break;
-
-    case NETIF_INTERFACE_STATUS_DISCONNECTED:
-        switch (np->backend_state) {
-        case BEST_CLOSED:
-            vif_disconnect(np);
-            break;
-        case BEST_DISCONNECTED:
-        case BEST_CONNECTED:
-            vif_reset(np);
-            break;
-        }
-        break;
-
-    case NETIF_INTERFACE_STATUS_CONNECTED:
-        switch (np->backend_state) {
-        case BEST_CLOSED:
-            WPRINTK("Unexpected netif status %s in state %s\n",
-                    status_name[status->status],
-                    be_state_name[np->backend_state]);
-            vif_disconnect(np);
-            vif_connect(np, status);
-            break;
-        case BEST_DISCONNECTED:
-            vif_connect(np, status);
-            break;
-        }
-        break;
-
-    case NETIF_INTERFACE_STATUS_CHANGED:
-        /*
-         * The domain controller is notifying us that a device has been
-         * added or removed.
-         */
-        break;
-
-    default:
-        WPRINTK("Invalid netif status code %d\n", status->status);
-        break;
-    }
-
-    vif_show(np);
-}
-
-/*
- * Initialize the network control interface. 
- */
-static void netif_driver_status(netif_fe_driver_status_t *status)
-{
-    netctrl.up = status->status;
-    netctrl_connected_count();
-}
-
-/* Receive handler for control messages. */
-static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-
-    switch (msg->subtype) {
-    case CMSG_NETIF_FE_INTERFACE_STATUS:
-        netif_interface_status((netif_fe_interface_status_t *) &msg->msg[0]);
-        break;
-
-    case CMSG_NETIF_FE_DRIVER_STATUS:
-        netif_driver_status((netif_fe_driver_status_t *) &msg->msg[0]);
-        break;
-
-    default:
-        msg->length = 0;
-        break;
-    }
-
-    ctrl_if_send_response(msg);
-}
-
-
-#if 1
-/* Wait for all interfaces to be connected.
- *
- * This works OK, but we'd like to use the probing mode (see below).
- */
-static int probe_interfaces(void)
-{
-    int err = 0, conn = 0;
-    int wait_i, wait_n = 100;
-
-    DPRINTK(">\n");
-
-    for (wait_i = 0; wait_i < wait_n; wait_i++) { 
-        DPRINTK("> wait_i=%d\n", wait_i);
-        conn = netctrl_connected();
-        if(conn) break;
-        DPRINTK("> schedule_timeout...\n");
-        set_current_state(TASK_INTERRUPTIBLE);
-        schedule_timeout(10);
-    }
-
-    DPRINTK("> wait finished...\n");
-    if (conn <= 0) {
-        err = netctrl_err(-ENETDOWN);
-        WPRINTK("Failed to connect all virtual interfaces: err=%d\n", err);
-    }
-
-    DPRINTK("< err=%d\n", err);
-
-    return err;
-}
-#else
-/* Probe for interfaces until no more are found.
- *
- * This is the mode we'd like to use, but at the moment it panics the kernel.
-*/
-static int probe_interfaces(void)
-{
-    int err = 0;
-    int wait_i, wait_n = 100;
-    ctrl_msg_t cmsg = {
-        .type    = CMSG_NETIF_FE,
-        .subtype = CMSG_NETIF_FE_INTERFACE_STATUS,
-        .length  = sizeof(netif_fe_interface_status_t),
-    };
-    netif_fe_interface_status_t msg = {};
-    ctrl_msg_t rmsg = {};
-    netif_fe_interface_status_t *reply = (void*)rmsg.msg;
-    int state = TASK_UNINTERRUPTIBLE;
-    u32 query = -1;
-
-    DPRINTK(">\n");
-
-    netctrl.interface_n = 0;
-    for (wait_i = 0; wait_i < wait_n; wait_i++) { 
-        DPRINTK("> wait_i=%d query=%d\n", wait_i, query);
-        msg.handle = query;
-        memcpy(cmsg.msg, &msg, sizeof(msg));
-        DPRINTK("> set_current_state...\n");
-        set_current_state(state);
-        DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
-        DPRINTK("> sending...\n");
-        err = ctrl_if_send_message_and_get_response(&cmsg, &rmsg, state);
-        DPRINTK("> err=%d\n", err);
-        if(err) goto exit;
-        DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
-        if((int)reply->handle < 0) {
-            // No more interfaces.
-            break;
-        }
-        query = -reply->handle - 2;
-        DPRINTK(">netif_interface_status ...\n");
-        netif_interface_status(reply);
-    }
-
-  exit:
-    if (err) {
-        err = netctrl_err(-ENETDOWN);
-        WPRINTK("Connecting virtual network interfaces failed: err=%d\n", err);
-    }
-
-    DPRINTK("< err=%d\n", err);
-    return err;
-}
-
-#endif
+static int destroy_netdev(struct net_device *netdev)
+{
+
+#ifdef CONFIG_PROC_FS
+       xennet_proc_delif(netdev);
+#endif
+
+        unregister_netdev(netdev);
+
+       return 0;
+}
 
 /*
  * We use this notifier to send out a fake ARP reply to reset switches and
@@ -1354,19 +958,11 @@
 {
     struct in_ifaddr  *ifa = (struct in_ifaddr *)ptr; 
     struct net_device *dev = ifa->ifa_dev->dev;
-    struct list_head  *ent;
-    struct net_private *np;
-
-    if (event != NETDEV_UP)
-        goto out;
-
-    list_for_each (ent, &dev_list) {
-        np = list_entry(ent, struct net_private, list);
-        if (np->dev == dev)
-            (void)send_fake_arp(dev);
-    }
+
+    /* UP event and is it one of our devices? */
+    if (event == NETDEV_UP && dev->open == network_open)
+        (void)send_fake_arp(dev);
         
- out:
     return NOTIFY_DONE;
 }
 
@@ -1376,66 +972,315 @@
     .priority       = 0
 };
 
-static int __init netif_init(void)
-{
-    int err = 0;
-
-    if (xen_start_info.flags & SIF_INITDOMAIN)
-        return 0;
+static struct xenbus_device_id netfront_ids[] = {
+       { "vif" },
+       { "" }
+};
+
+static void watch_for_status(struct xenbus_watch *watch, const char *node)
+{
+}
+
+static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
+{
+       evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
+       int err;
+
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
-                                      &gref_tx_head, &gref_tx_terminal) < 0) {
-        printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
-        return 1;
-    }
-    printk(KERN_ALERT "#### netfront tx using grant tables\n");
+       info->tx_ring_ref = GRANT_INVALID_REF;
 #endif
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-    if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
-                                      &gref_rx_head, &gref_rx_terminal) < 0) {
-        printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
-        return 1;
-    }
-    printk(KERN_ALERT "#### netfront rx using grant tables\n");
-#endif
-
-    if ((err = xennet_proc_init()) != 0)
-        return err;
-
-    IPRINTK("Initialising virtual ethernet driver.\n");
-    INIT_LIST_HEAD(&dev_list);
-    (void)register_inetaddr_notifier(¬ifier_inetdev);
-    netctrl_init();
-    (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-    send_driver_status(1);
-    err = probe_interfaces();
-    if (err)
-        ctrl_if_unregister_receiver(CMSG_NETIF_FE, netif_ctrlif_rx);
-
-    DPRINTK("< err=%d\n", err);
-    return err;
-}
-
-static void netif_exit(void)
-{
+       info->rx_ring_ref = GRANT_INVALID_REF;
+#endif
+
+       info->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
+       if (info->tx == 0) {
+               err = -ENOMEM;
+               xenbus_dev_error(dev, err, "allocating tx ring page");
+               goto out;
+       }
+       info->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
+       if (info->rx == 0) {
+               err = -ENOMEM;
+               xenbus_dev_error(dev, err, "allocating rx ring page");
+               goto out;
+       }
+       memset(info->tx, 0, PAGE_SIZE);
+       memset(info->rx, 0, PAGE_SIZE);
+       info->backend_state = BEST_DISCONNECTED;
+
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head);
-#endif
+       err = gnttab_grant_foreign_access(info->backend_id,
+                                         virt_to_mfn(info->tx), 0);
+       if (err < 0) {
+               xenbus_dev_error(dev, err, "granting access to tx ring page");
+               goto out;
+       }
+       info->tx_ring_ref = err;
+#else
+       info->tx_ring_ref = virt_to_mfn(info->tx);
+#endif
+
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-    gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head);
-#endif
-}
-
-static void vif_suspend(struct net_private *np)
-{
+       err = gnttab_grant_foreign_access(info->backend_id,
+                                         virt_to_mfn(info->rx), 0);
+       if (err < 0) {
+               xenbus_dev_error(dev, err, "granting access to rx ring page");
+               goto out;
+       }
+       info->rx_ring_ref = err;
+#else
+       info->rx_ring_ref = virt_to_mfn(info->rx);
+#endif
+
+       op.u.alloc_unbound.dom = info->backend_id;
+       err = HYPERVISOR_event_channel_op(&op);
+       if (err) {
+               xenbus_dev_error(dev, err, "allocating event channel");
+               goto out;
+       }
+       connect_device(info, op.u.alloc_unbound.port);
+       return 0;
+
+ out:
+       if (info->tx)
+               free_page((unsigned long)info->tx);
+       info->tx = 0;
+       if (info->rx)
+               free_page((unsigned long)info->rx);
+       info->rx = 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+       if (info->tx_ring_ref != GRANT_INVALID_REF)
+               gnttab_end_foreign_access(info->tx_ring_ref, 0);
+       info->tx_ring_ref = GRANT_INVALID_REF;
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+       if (info->rx_ring_ref != GRANT_INVALID_REF)
+               gnttab_end_foreign_access(info->rx_ring_ref, 0);
+       info->rx_ring_ref = GRANT_INVALID_REF;
+#endif
+       return err;
+}
+
+static void netif_free(struct netfront_info *info)
+{
+       if (info->tx)
+               free_page((unsigned long)info->tx);
+       info->tx = 0;
+       if (info->rx)
+               free_page((unsigned long)info->rx);
+       info->rx = 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+       if (info->tx_ring_ref != GRANT_INVALID_REF)
+               gnttab_end_foreign_access(info->tx_ring_ref, 0);
+       info->tx_ring_ref = GRANT_INVALID_REF;
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+       if (info->rx_ring_ref != GRANT_INVALID_REF)
+               gnttab_end_foreign_access(info->rx_ring_ref, 0);
+       info->rx_ring_ref = GRANT_INVALID_REF;
+#endif
+       unbind_evtchn_from_irqhandler(info->evtchn, info->netdev);
+       info->evtchn = 0;
+}
+
+/* Stop network device and free tx/rx queues and irq.
+ */
+static void shutdown_device(struct net_private *np)
+{
+       /* Stop old i/f to prevent errors whilst we rebuild the state. */
+       spin_lock_irq(&np->tx_lock);
+       spin_lock(&np->rx_lock);
+       netif_stop_queue(np->netdev);
+       /* np->backend_state = BEST_DISCONNECTED; */
+       spin_unlock(&np->rx_lock);
+       spin_unlock_irq(&np->tx_lock);
+    
+       /* Free resources. */
+       netif_free(np);
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+                          struct netfront_info *info)
+{
+       char *backend, *mac, *e, *s;
+       const char *message;
+       int err, i;
+
+       backend = NULL;
+       err = xenbus_gather(dev->nodename,
+                           "backend-id", "%i", &info->backend_id,
+                           "backend", NULL, &backend,
+                           NULL);
+       if (XENBUS_EXIST_ERR(err))
+               goto out;
+       if (backend && strlen(backend) == 0) {
+               err = -ENOENT;
+               goto out;
+       }
+       if (err < 0) {
+               xenbus_dev_error(dev, err, "reading %s/backend or backend-id",
+                                dev->nodename);
+               goto out;
+       }
+
+       mac = xenbus_read(dev->nodename, "mac", NULL);
+       if (IS_ERR(mac)) {
+               err = PTR_ERR(mac);
+               xenbus_dev_error(dev, err, "reading %s/mac",
+                                dev->nodename);
+               goto out;
+       }
+       s = mac;
+       for (i = 0; i < ETH_ALEN; i++) {
+               info->mac[i] = simple_strtoul(s, &e, 16);
+               if (s == e || (e[0] != ':' && e[0] != 0)) {
+                       kfree(mac);
+                       err = -ENOENT;
+                       xenbus_dev_error(dev, err, "parsing %s/mac",
+                                        dev->nodename);
+                       goto out;
+               }
+               s = &e[1];
+       }
+       kfree(mac);
+
+       /* Create shared ring, alloc event channel. */
+       err = setup_device(dev, info);
+       if (err) {
+               xenbus_dev_error(dev, err, "setting up ring");
+               goto out;
+       }
+
+       err = xenbus_transaction_start(dev->nodename);
+       if (err) {
+               xenbus_dev_error(dev, err, "starting transaction");
+               goto destroy_ring;
+       }
+
+       err = xenbus_printf(dev->nodename, "tx-ring-ref","%u",
+                           info->tx_ring_ref);
+       if (err) {
+               message = "writing tx ring-ref";
+               goto abort_transaction;
+       }
+       err = xenbus_printf(dev->nodename, "rx-ring-ref","%u",
+                           info->rx_ring_ref);
+       if (err) {
+               message = "writing rx ring-ref";
+               goto abort_transaction;
+       }
+       err = xenbus_printf(dev->nodename,
+                           "event-channel", "%u", info->evtchn);
+       if (err) {
+               message = "writing event-channel";
+               goto abort_transaction;
+       }
+
+       info->backend = backend;
+       backend = NULL;
+
+       info->watch.node = info->backend;
+       info->watch.callback = watch_for_status;
+       err = register_xenbus_watch(&info->watch);
+       if (err) {
+               message = "registering watch on backend";
+               goto abort_transaction;
+       }
+
+       err = xenbus_transaction_end(0);
+       if (err) {
+               xenbus_dev_error(dev, err, "completing transaction");
+               goto destroy_ring;
+       }
+
+       netif_state = NETIF_STATE_CONNECTED;
+
+ out:
+       if (backend)
+               kfree(backend);
+       return err;
+
+ abort_transaction:
+       xenbus_transaction_end(1);
+       /* Have to do this *outside* transaction.  */
+       xenbus_dev_error(dev, err, "%s", message);
+ destroy_ring:
+       shutdown_device(info);
+       goto out;
+}
+
+/* Setup supplies the backend dir, virtual device.
+
+   We place an event channel and shared frame entries.
+   We watch backend to wait if it's ok. */
+static int netfront_probe(struct xenbus_device *dev,
+                         const struct xenbus_device_id *id)
+{
+       int err;
+       struct net_device *netdev;
+       struct netfront_info *info;
+       unsigned int handle;
+
+       err = xenbus_scanf(dev->nodename, "handle", "%u", &handle);
+       if (XENBUS_EXIST_ERR(err))
+               return err;
+       if (err < 0) {
+               xenbus_dev_error(dev, err, "reading handle");
+               return err;
+       }
+
+       err = create_netdev(handle, dev, &netdev);
+       if (err) {
+               xenbus_dev_error(dev, err, "creating netdev");
+               return err;
+       }
+
+       info = netdev_priv(netdev);
+       dev->data = info;
+
+       err = talk_to_backend(dev, info);
+       if (err) {
+               destroy_netdev(netdev);
+               kfree(netdev);
+               dev->data = NULL;
+               return err;
+       }
+
+
+       /* Call once in case entries already there. */
+       watch_for_status(&info->watch, info->watch.node);
+
+       return 0;
+}
+
+static int netfront_remove(struct xenbus_device *dev)
+{
+       struct netfront_info *info = dev->data;
+
+       if (info->backend)
+               unregister_xenbus_watch(&info->watch);
+
+       netif_free(info);
+
+       kfree(info->backend);
+       kfree(info);
+
+       return 0;
+}
+
+static int netfront_suspend(struct xenbus_device *dev)
+{
+    struct net_private *np = dev->data;
     /* Avoid having tx/rx stuff happen until we're ready. */
-    free_irq(np->irq, np->dev);
-    unbind_evtchn_from_irq(np->evtchn);
-}
-
-static void vif_resume(struct net_private *np)
-{
+    unbind_evtchn_from_irqhandler(np->evtchn, np->netdev);
+    return 0;
+}
+
+static int netfront_resume(struct xenbus_device *dev)
+{
+    struct net_private *np = dev->data;
     /*
      * Connect regardless of whether IFF_UP flag set.
      * Stop bad things from happening until we're back up.
@@ -1444,29 +1289,96 @@
     memset(np->tx, 0, PAGE_SIZE);
     memset(np->rx, 0, PAGE_SIZE);
     
-    send_interface_connect(np);
-}
-
-void netif_suspend(void)
-{
-    struct list_head *ent;
-    struct net_private *np;
-    
-    list_for_each (ent, &dev_list) {
-        np = list_entry(ent, struct net_private, list);
-        vif_suspend(np);
-    }
-}
-
-void netif_resume(void)
-{
-    struct list_head *ent;
-    struct net_private *np;
-
-    list_for_each (ent, &dev_list) {
-        np = list_entry(ent, struct net_private, list);
-        vif_resume(np);
-    }
+    // send_interface_connect(np);
+    return 0;
+}
+
+static struct xenbus_driver netfront = {
+       .name = "vif",
+       .owner = THIS_MODULE,
+       .ids = netfront_ids,
+       .probe = netfront_probe,
+       .remove = netfront_remove,
+       .resume = netfront_resume,
+       .suspend = netfront_suspend,
+};
+
+static void __init init_net_xenbus(void)
+{
+       xenbus_register_device(&netfront);
+}
+
+static int wait_for_netif(void)
+{
+    int err = 0;
+    int i;
+
+    /*
+     * We should figure out how many and which devices we need to
+     * proceed and only wait for those.  For now, continue once the
+     * first device is around.
+     */
+    for ( i=0; netif_state != NETIF_STATE_CONNECTED && (i < 10*HZ); i++ )
+    {
+        set_current_state(TASK_INTERRUPTIBLE);
+        schedule_timeout(1);
+    }
+
+    if (netif_state != NETIF_STATE_CONNECTED) {
+        WPRINTK("Timeout connecting to device!\n");
+        err = -ENOSYS;
+    }
+    return err;
+}
+
+static int __init netif_init(void)
+{
+    int err = 0;
+
+    if (xen_start_info.flags & SIF_INITDOMAIN)
+        return 0;
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    /* A grant for every ring slot */
+    if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+                                      &gref_tx_head) < 0) {
+        printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+        return 1;
+    }
+    printk(KERN_ALERT "Netdev frontend (TX) is using grant tables.\n"); 
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    /* A grant for every ring slot */
+    if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+                                      &gref_rx_head) < 0) {
+        printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+        return 1;
+    }
+    printk(KERN_ALERT "Netdev frontend (RX) is using grant tables.\n"); 
+#endif
+
+    if ((err = xennet_proc_init()) != 0)
+        return err;
+
+    IPRINTK("Initialising virtual ethernet driver.\n");
+
+    (void)register_inetaddr_notifier(¬ifier_inetdev);
+
+    init_net_xenbus();
+
+    wait_for_netif();
+
+    return err;
+}
+
+static void netif_exit(void)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    gnttab_free_grant_references(gref_tx_head);
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    gnttab_free_grant_references(gref_rx_head);
+#endif
 }
 
 #ifdef CONFIG_PROC_FS
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Thu Aug 25 
22:53:20 2005
@@ -139,7 +139,7 @@
         privcmd_mmapbatch_t m;
         struct vm_area_struct *vma = NULL;
         unsigned long *p, addr;
-        unsigned long mfn;
+        unsigned long mfn, ptep;
         int i;
 
         if ( copy_from_user(&m, (void *)data, sizeof(m)) )
@@ -163,12 +163,12 @@
             if ( get_user(mfn, p) )
                 return -EFAULT;
 
-            u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
-
-            __direct_remap_area_pages(vma->vm_mm,
-                                      addr, 
-                                      PAGE_SIZE, 
-                                      &u);
+            ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
+            if (ret)
+                goto batch_err;
+
+            u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
+            u.ptr = ptep;
 
             if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) )
                 put_user(0xF0000000 | mfn, p);
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/usbback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/common.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/common.h Thu Aug 25 22:53:20 2005
@@ -37,7 +37,6 @@
     /* Physical parameters of the comms window. */
     unsigned long    shmem_frame;
     unsigned int     evtchn;
-    int              irq;
     /* Comms Information */
     usbif_back_ring_t usb_ring;
     /* Private fields. */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/usbback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/interface.c      Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/interface.c      Thu Aug 25 
22:53:20 2005
@@ -6,15 +6,6 @@
  * by Mark Williamson, Copyright (c) 2004
  */
 
-
-/******************************************************************************
- * arch/xen/drivers/blkif/backend/interface.c
- * 
- * Block-device interface management.
- * 
- * Copyright (c) 2004, Keir Fraser
- */
-
 #include "common.h"
 
 #define USBIF_HASHSZ 1024
@@ -42,7 +33,6 @@
      * may be outstanding requests at the device whose asynchronous responses
      * must still be notified to the remote driver.
      */
-    unbind_evtchn_from_irq(usbif->evtchn);
     vfree(usbif->usb_ring.sring);
 
     /* Construct the deferred response message. */
@@ -198,12 +188,12 @@
     BACK_RING_INIT(&up->usb_ring, sring, PAGE_SIZE);
 
     up->evtchn        = evtchn;
-    up->irq           = bind_evtchn_to_irq(evtchn);
     up->shmem_frame   = shmem_frame;
     up->status        = CONNECTED;
     usbif_get(up);
 
-    request_irq(up->irq, usbif_be_int, 0, "usbif-backend", up);
+    (void)bind_evtchn_to_irqhandler(
+        evtchn, usbif_be_int, 0, "usbif-backend", up);
 
     connect->status = USBIF_BE_STATUS_OKAY;
 }
@@ -233,7 +223,7 @@
         up->status = DISCONNECTING;
         up->disconnect_rspid = rsp_id;
         wmb(); /* Let other CPUs see the status change. */
-        free_irq(up->irq, up);
+        unbind_evtchn_from_irqhandler(up->evtchn, up);
        usbif_deschedule(up);
         usbif_put(up);
         return 0; /* Caller should not send response message. */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c        Thu Aug 25 
22:53:20 2005
@@ -657,8 +657,8 @@
         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
             FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
 
-        ASSERT(virt_to_machine(MMAP_VADDR(pending_idx, i))
-               == buffer_mach + i << PAGE_SHIFT);
+        ASSERT(virt_to_mfn(MMAP_VADDR(pending_idx, i))
+               == ((buffer_mach >> PAGE_SHIFT) + i));
     }
 
     if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
@@ -1027,13 +1027,15 @@
 static int __init usbif_init(void)
 {
     int i;
+    struct page *page;
 
     if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
          !(xen_start_info.flags & SIF_USB_BE_DOMAIN) )
         return 0;
-    
-    if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
-        BUG();
+
+    page = balloon_alloc_empty_page_range(MMAP_PAGES);
+    BUG_ON(page == NULL);
+    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
 
     pending_cons = 0;
     pending_prod = MAX_PENDING_REQS;
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c      Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c      Thu Aug 25 
22:53:20 2005
@@ -195,7 +195,7 @@
         }
 
         urb_priv->schedule = schedule;
-       req->iso_schedule = virt_to_machine(schedule);
+       req->iso_schedule = virt_to_mfn(schedule) << PAGE_SHIFT;
 
         return 0;
 }
@@ -212,7 +212,7 @@
 #if DEBUG
         printk(KERN_DEBUG
                "usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons 
= %d\n",
-               usbif, usbif->req_prod, virt_to_machine(&usbif->req_prod),
+               usbif, usbif->req_prod, virt_to_mfn(&usbif->req_prod),
                usbif->resp_prod, xhci->usb_resp_cons);
 #endif
         
@@ -232,7 +232,7 @@
         req->operation       = USBIF_OP_IO;
         req->port            = 0; /* We don't care what the port is. */
         req->id              = (unsigned long) urb->hcpriv;
-        req->transfer_buffer = virt_to_machine(urb->transfer_buffer);
+        req->transfer_buffer = virt_to_mfn(urb->transfer_buffer) << PAGE_SHIFT;
        req->devnum          = usb_pipedevice(urb->pipe);
         req->direction       = usb_pipein(urb->pipe);
        req->speed           = usb_pipeslow(urb->pipe);
@@ -280,7 +280,7 @@
        printk(KERN_DEBUG
                "queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, "
                "resp_cons = %d\n", usbif->req_prod,
-               virt_to_machine(&usbif->req_prod),
+               virt_to_mfn(&usbif->req_prod),
               usbif->resp_prod, xhci->usb_resp_cons);
 #endif
  
@@ -1536,8 +1536,7 @@
             
             /* Clean up resources. */
             free_page((unsigned long)xhci->usb_ring.sring);
-            free_irq(xhci->irq, xhci);
-            unbind_evtchn_from_irq(xhci->evtchn);
+            unbind_evtchn_from_irqhandler(xhci->evtchn, xhci);
 
             /* Plug the ring. */
             xhci->recovery = 1;
@@ -1556,7 +1555,7 @@
         cmsg.type      = CMSG_USBIF_FE;
         cmsg.subtype   = CMSG_USBIF_FE_INTERFACE_CONNECT;
         cmsg.length    = sizeof(usbif_fe_interface_connect_t);
-        up.shmem_frame = virt_to_machine(sring) >> PAGE_SHIFT;
+        up.shmem_frame = virt_to_mfn(sring);
         memcpy(cmsg.msg, &up, sizeof(up));
         
         /* Tell the controller to bring up the interface. */
@@ -1572,7 +1571,6 @@
         }
 
         xhci->evtchn = status->evtchn;
-        xhci->irq = bind_evtchn_to_irq(xhci->evtchn);
        xhci->bandwidth = status->bandwidth;
        xhci->rh.numports = status->num_ports;
 
@@ -1595,14 +1593,14 @@
        usb_claim_bandwidth(xhci->rh.dev, xhci->rh.urb,
                            1000 - xhci->bandwidth, 0);
 
-        if ( (rc = request_irq(xhci->irq, xhci_interrupt, 
+        if ( (rc = bind_evtchn_to_irqhandler(xhci->evtchn, xhci_interrupt, 
                                SA_SAMPLE_RANDOM, "usbif", xhci)) )
                 printk(KERN_ALERT"usbfront request_irq failed (%ld)\n",rc);
 
        DPRINTK(KERN_INFO __FILE__
-                ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d IRQ %d\n",
-                xhci->usb_ring.sring, virt_to_machine(xhci->usbif),
-                xhci->evtchn, xhci->irq);
+                ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d\n",
+                xhci->usb_ring.sring, virt_to_mfn(xhci->usbif),
+                xhci->evtchn);
 
         xhci->state = USBIF_STATE_CONNECTED;
 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h
--- a/linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h  Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h  Thu Aug 25 22:53:20 2005
@@ -54,7 +54,6 @@
 #endif
 
         int evtchn;                        /* Interdom channel to backend */
-        int irq;                           /* Bound to evtchn */
         enum { 
                 USBIF_STATE_CONNECTED    = 2,
                 USBIF_STATE_DISCONNECTED = 1,
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile  Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile  Thu Aug 25 22:53:20 2005
@@ -4,7 +4,3 @@
 xenbus-objs += xenbus_comms.o
 xenbus-objs += xenbus_xs.o
 xenbus-objs += xenbus_probe.o 
-
-XEN_TOOLS_DIR := "../tools"
-vpath %.h $(XEN_TOOLS_DIR)
-EXTRA_CFLAGS += -I $(XEN_TOOLS_DIR)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Thu Aug 25 
22:53:20 2005
@@ -26,7 +26,6 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
-//#define DEBUG
 
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/evtchn.h>
@@ -49,13 +48,12 @@
 
 static inline struct ringbuf_head *outbuf(void)
 {
-       return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT);
+       return mfn_to_virt(xen_start_info.store_mfn);
 }
 
 static inline struct ringbuf_head *inbuf(void)
 {
-       return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT)
-               + PAGE_SIZE/2;
+       return mfn_to_virt(xen_start_info.store_mfn) + PAGE_SIZE/2;
 }
 
 static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
@@ -202,14 +200,17 @@
        return 0;
 }
 
-/* Set up interrpt handler off store event channel. */
+/* Set up interrupt handler off store event channel. */
 int xb_init_comms(void)
 {
-       int err, irq;
-
-       irq = bind_evtchn_to_irq(xen_start_info.store_evtchn);
-
-       err = request_irq(irq, wake_waiting, SA_SHIRQ, "xenbus", &xb_waitq);
+       int err;
+
+       if (!xen_start_info.store_evtchn)
+               return 0;
+
+       err = bind_evtchn_to_irqhandler(
+               xen_start_info.store_evtchn, wake_waiting,
+               0, "xenbus", &xb_waitq);
        if (err) {
                printk(KERN_ERR "XENBUS request irq failed %i\n", err);
                unbind_evtchn_from_irq(xen_start_info.store_evtchn);
@@ -217,8 +218,16 @@
        }
 
        /* FIXME zero out page -- domain builder should probably do this*/
-       memset(machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT),
-              0, PAGE_SIZE);
+       memset(mfn_to_virt(xen_start_info.store_mfn), 0, PAGE_SIZE);
 
        return 0;
 }
+
+void xb_suspend_comms(void)
+{
+
+       if (!xen_start_info.store_evtchn)
+               return;
+
+       unbind_evtchn_from_irqhandler(xen_start_info.store_evtchn, &xb_waitq);
+}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h    Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h    Thu Aug 25 
22:53:20 2005
@@ -1,8 +1,36 @@
-/* Private include for xenbus communications. */
+/*
+ * Private include for xenbus communications.
+ * 
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
 #ifndef _XENBUS_COMMS_H
 #define _XENBUS_COMMS_H
+
 int xs_init(void);
 int xb_init_comms(void);
+void xb_suspend_comms(void);
 
 /* Low level routines. */
 int xb_write(const void *data, unsigned len);
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Thu Aug 25 
22:53:20 2005
@@ -29,30 +29,26 @@
 
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/xenbus.h>
+#include <asm-xen/balloon.h>
 #include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/fcntl.h>
 #include <stdarg.h>
+#include <linux/notifier.h>
 #include "xenbus_comms.h"
 
 #define streq(a, b) (strcmp((a), (b)) == 0)
+
+static struct notifier_block *xenstore_chain;
 
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
 {
        for (; !streq(arr->devicetype, ""); arr++) {
-               if (!streq(arr->devicetype, dev->devicetype))
-                       continue;
-
-               /* If they don't care what subtype, it's a match. */
-               if (streq(arr->subtype, ""))
-                       return arr;
-
-               /* If they care, device must have (same) subtype. */
-               if (dev->subtype && streq(arr->subtype, dev->subtype))
+               if (streq(arr->devicetype, dev->devicetype))
                        return arr;
        }
        return NULL;
@@ -68,10 +64,102 @@
        return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
 }
 
+struct xen_bus_type
+{
+       char *root;
+       unsigned int levels;
+       int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
+       int (*probe)(const char *type, const char *dir);
+       struct bus_type bus;
+       struct device dev;
+};
+
+/* device/<type>/<id> => <type>-<id> */
+static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+       nodename = strchr(nodename, '/');
+       if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) {
+               printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
+               return -EINVAL;
+       }
+
+       strlcpy(bus_id, nodename + 1, BUS_ID_SIZE);
+       if (!strchr(bus_id, '/')) {
+               printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
+               return -EINVAL;
+       }
+       *strchr(bus_id, '/') = '-';
+       return 0;
+}
+
 /* Bus type for frontend drivers. */
-static struct bus_type xenbus_type = {
-       .name  = "xenbus",
-       .match = xenbus_match,
+static int xenbus_probe_frontend(const char *type, const char *name);
+static struct xen_bus_type xenbus_frontend = {
+       .root = "device",
+       .levels = 2,            /* device/type/<id> */
+       .get_bus_id = frontend_bus_id,
+       .probe = xenbus_probe_frontend,
+       .bus = {
+               .name  = "xen",
+               .match = xenbus_match,
+       },
+       .dev = {
+               .bus_id = "xen",
+       },
+};
+
+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
+static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+       int domid, err;
+       const char *devid, *type, *frontend;
+       unsigned int typelen;
+
+       type = strchr(nodename, '/');
+       if (!type)
+               return -EINVAL;
+       type++;
+       typelen = strcspn(type, "/");
+       if (!typelen || type[typelen] != '/')
+               return -EINVAL;
+
+       devid = strrchr(nodename, '/') + 1;
+
+       err = xenbus_gather(nodename, "frontend-id", "%i", &domid,
+                           "frontend", NULL, &frontend,
+                           NULL);
+       if (err)
+               return err;
+       if (strlen(frontend) == 0)
+               err = -ERANGE;
+
+       if (!err && !xenbus_exists(frontend, ""))
+               err = -ENOENT;
+
+       if (err) {
+               kfree(frontend);
+               return err;
+       }
+
+       if (snprintf(bus_id, BUS_ID_SIZE,
+                    "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
+               return -ENOSPC;
+       return 0;
+}
+
+static int xenbus_probe_backend(const char *type, const char *uuid);
+static struct xen_bus_type xenbus_backend = {
+       .root = "backend",
+       .levels = 3,            /* backend/type/<frontend>/<id> */
+       .get_bus_id = backend_bus_id,
+       .probe = xenbus_probe_backend,
+       .bus = {
+               .name  = "xen-backend",
+               .match = xenbus_match,
+       },
+       .dev = {
+               .bus_id = "xen-backend",
+       },
 };
 
 static int xenbus_dev_probe(struct device *_dev)
@@ -100,12 +188,13 @@
        return drv->remove(dev);
 }
 
-int xenbus_register_driver(struct xenbus_driver *drv)
+static int xenbus_register_driver(struct xenbus_driver *drv,
+                                 struct xen_bus_type *bus)
 {
        int err;
 
        drv->driver.name = drv->name;
-       drv->driver.bus = &xenbus_type;
+       drv->driver.bus = &bus->bus;
        drv->driver.owner = drv->owner;
        drv->driver.probe = xenbus_dev_probe;
        drv->driver.remove = xenbus_dev_remove;
@@ -116,6 +205,16 @@
        return err;
 }
 
+int xenbus_register_device(struct xenbus_driver *drv)
+{
+       return xenbus_register_driver(drv, &xenbus_frontend);
+}
+
+int xenbus_register_backend(struct xenbus_driver *drv)
+{
+       return xenbus_register_driver(drv, &xenbus_backend);
+}
+
 void xenbus_unregister_driver(struct xenbus_driver *drv)
 {
        down(&xenbus_lock);
@@ -126,52 +225,98 @@
 struct xb_find_info
 {
        struct xenbus_device *dev;
-       const char *busid;
+       const char *nodename;
 };
 
 static int cmp_dev(struct device *dev, void *data)
 {
+       struct xenbus_device *xendev = to_xenbus_device(dev);
        struct xb_find_info *info = data;
 
-       if (streq(dev->bus_id, info->busid)) {
-               info->dev = container_of(get_device(dev),
-                                        struct xenbus_device, dev);
+       if (streq(xendev->nodename, info->nodename)) {
+               info->dev = xendev;
+               get_device(dev);
                return 1;
        }
        return 0;
 }
 
-/* FIXME: device_find is fixed in 2.6.13-rc2 according to Greg KH --RR */
-struct xenbus_device *xenbus_device_find(const char *busid)
-{
-       struct xb_find_info info = { .dev = NULL, .busid = busid };
-
-       bus_for_each_dev(&xenbus_type, NULL, &info, cmp_dev);
+struct xenbus_device *xenbus_device_find(const char *nodename,
+                                        struct bus_type *bus)
+{
+       struct xb_find_info info = { .dev = NULL, .nodename = nodename };
+
+       bus_for_each_dev(bus, NULL, &info, cmp_dev);
        return info.dev;
 }
 
+static int cleanup_dev(struct device *dev, void *data)
+{
+       struct xenbus_device *xendev = to_xenbus_device(dev);
+       struct xb_find_info *info = data;
+       int len = strlen(info->nodename);
+
+       if (!strncmp(xendev->nodename, info->nodename, len)) {
+               info->dev = xendev;
+               get_device(dev);
+               return 1;
+       }
+       return 0;
+}
+
+static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
+{
+       struct xb_find_info info = { .nodename = path };
+
+       do {
+               info.dev = NULL;
+               bus_for_each_dev(bus, NULL, &info, cleanup_dev);
+               if (info.dev) {
+                       device_unregister(&info.dev->dev);
+                       put_device(&info.dev->dev);
+               }
+       } while (info.dev);
+}
 
 static void xenbus_release_device(struct device *dev)
 {
        if (dev) {
                struct xenbus_device *xendev = to_xenbus_device(dev);
 
-               kfree(xendev->subtype);
                kfree(xendev);
        }
 }
-/* devices/<typename>/<name> */
-static int xenbus_probe_device(const char *dirpath, const char *devicetype,
-                              const char *name)
+
+/* Simplified asprintf. */
+static char *kasprintf(const char *fmt, ...)
+{
+       va_list ap;
+       unsigned int len;
+       char *p, dummy[1];
+
+       va_start(ap, fmt);
+       /* FIXME: vsnprintf has a bug, NULL should work */
+       len = vsnprintf(dummy, 0, fmt, ap);
+       va_end(ap);
+
+       p = kmalloc(len + 1, GFP_KERNEL);
+       if (!p)
+               return NULL;
+       va_start(ap, fmt);
+       vsprintf(p, fmt, ap);
+       va_end(ap);
+       return p;
+}
+
+static int xenbus_probe_node(struct xen_bus_type *bus,
+                            const char *type,
+                            const char *nodename)
 {
        int err;
        struct xenbus_device *xendev;
        unsigned int stringlen;
 
-       /* Nodename: /device/<typename>/<name>/ */
-       stringlen = strlen(dirpath) + strlen(devicetype) + strlen(name) + 3;
-       /* Typename */
-       stringlen += strlen(devicetype) + 1;
+       stringlen = strlen(nodename) + 1 + strlen(type) + 1;
        xendev = kmalloc(sizeof(*xendev) + stringlen, GFP_KERNEL);
        if (!xendev)
                return -ENOMEM;
@@ -179,38 +324,103 @@
 
        /* Copy the strings into the extra space. */
        xendev->nodename = (char *)(xendev + 1);
-       sprintf(xendev->nodename, "%s/%s/%s", dirpath, devicetype, name);
+       strcpy(xendev->nodename, nodename);
        xendev->devicetype = xendev->nodename + strlen(xendev->nodename) + 1;
-       strcpy(xendev->devicetype, devicetype);
-
-       /* FIXME: look for "subtype" field. */
-       snprintf(xendev->dev.bus_id, BUS_ID_SIZE, "%s-%s", devicetype, name);
-       xendev->dev.bus = &xenbus_type;
+       strcpy(xendev->devicetype, type);
+
+       xendev->dev.parent = &bus->dev;
+       xendev->dev.bus = &bus->bus;
        xendev->dev.release = xenbus_release_device;
+
+       err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
+       if (err) {
+               kfree(xendev);
+               return err;
+       }
 
        /* Register with generic device framework. */
        err = device_register(&xendev->dev);
        if (err) {
-               printk("XENBUS: Registering device %s: error %i\n",
-                      xendev->dev.bus_id, err);
+               printk("XENBUS: Registering %s device %s: error %i\n",
+                      bus->bus.name, xendev->dev.bus_id, err);
                kfree(xendev);
        }
        return err;
 }
 
-static int xenbus_probe_device_type(const char *dirpath, const char *typename)
+/* device/<typename>/<name> */
+static int xenbus_probe_frontend(const char *type, const char *name)
+{
+       char *nodename;
+       int err;
+
+       nodename = kasprintf("%s/%s/%s", xenbus_frontend.root, type, name);
+       if (!nodename)
+               return -ENOMEM;
+       
+       err = xenbus_probe_node(&xenbus_frontend, type, nodename);
+       kfree(nodename);
+       return err;
+}
+
+/* backend/<typename>/<frontend-uuid>/<name> */
+static int xenbus_probe_backend_unit(const char *dir,
+                                    const char *type,
+                                    const char *name)
+{
+       char *nodename;
+       int err;
+
+       nodename = kasprintf("%s/%s", dir, name);
+       if (!nodename)
+               return -ENOMEM;
+
+       err = xenbus_probe_node(&xenbus_backend, type, nodename);
+       kfree(nodename);
+       return err;
+}
+
+/* backend/<typename>/<frontend-uuid> */
+static int xenbus_probe_backend(const char *type, const char *uuid)
+{
+       char *nodename;
+       int err = 0;
+       char **dir;
+       unsigned int i, dir_n = 0;
+
+       nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, uuid);
+       if (!nodename)
+               return -ENOMEM;
+
+       dir = xenbus_directory(nodename, "", &dir_n);
+       if (IS_ERR(dir)) {
+               kfree(nodename);
+               return PTR_ERR(dir);
+       }
+
+       for (i = 0; i < dir_n; i++) {
+               err = xenbus_probe_backend_unit(nodename, type, dir[i]);
+               if (err)
+                       break;
+       }
+       kfree(dir);
+       kfree(nodename);
+       return err;
+}
+
+static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
 {
        int err = 0;
        char **dir;
        unsigned int dir_n = 0;
        int i;
 
-       dir = xenbus_directory(dirpath, typename, &dir_n);
+       dir = xenbus_directory(bus->root, type, &dir_n);
        if (IS_ERR(dir))
                return PTR_ERR(dir);
 
        for (i = 0; i < dir_n; i++) {
-               err = xenbus_probe_device(dirpath, typename, dir[i]);
+               err = bus->probe(type, dir[i]);
                if (err)
                        break;
        }
@@ -218,18 +428,18 @@
        return err;
 }
 
-static int xenbus_probe_devices(const char *path)
+static int xenbus_probe_devices(struct xen_bus_type *bus)
 {
        int err = 0;
        char **dir;
        unsigned int i, dir_n;
 
-       dir = xenbus_directory(path, "", &dir_n);
+       dir = xenbus_directory(bus->root, "", &dir_n);
        if (IS_ERR(dir))
                return PTR_ERR(dir);
 
        for (i = 0; i < dir_n; i++) {
-               err = xenbus_probe_device_type(path, dir[i]);
+               err = xenbus_probe_device_type(bus, dir[i]);
                if (err)
                        break;
        }
@@ -247,53 +457,154 @@
        return ret;
 }
 
-static void dev_changed(struct xenbus_watch *watch, const char *node)
-{
-       char busid[BUS_ID_SIZE];
-       int exists;
+static int strsep_len(const char *str, char c, unsigned int len)
+{
+       unsigned int i;
+
+       for (i = 0; str[i]; i++)
+               if (str[i] == c) {
+                       if (len == 0)
+                               return i;
+                       len--;
+               }
+       return (len == 0) ? i : -ERANGE;
+}
+
+static void dev_changed(const char *node, struct xen_bus_type *bus)
+{
+       int exists, rootlen;
        struct xenbus_device *dev;
-       char *p;
-
-       /* Node is of form device/<type>/<identifier>[/...] */
-       if (char_count(node, '/') != 2)
+       char type[BUS_ID_SIZE];
+       const char *p, *root;
+
+       if (char_count(node, '/') < 2)
+               return;
+
+       exists = xenbus_exists(node, "");
+       if (!exists) {
+               xenbus_cleanup_devices(node, &bus->bus);
                return;
-
-       /* Created or deleted? */
-       exists = xenbus_exists(node, "");
-
+       }
+
+       /* backend/<type>/... or device/<type>/... */
        p = strchr(node, '/') + 1;
-       if (strlen(p) + 1 > BUS_ID_SIZE) {
-               printk("Device for node %s is too big!\n", node);
+       snprintf(type, BUS_ID_SIZE, "%.*s", strcspn(p, "/"), p);
+       type[BUS_ID_SIZE-1] = '\0';
+
+       rootlen = strsep_len(node, '/', bus->levels);
+       if (rootlen < 0)
                return;
-       }
-       /* Bus ID is name with / changed to - */
-       strcpy(busid, p);
-       *strchr(busid, '/') = '-';
-
-       dev = xenbus_device_find(busid);
-       printk("xenbus: device %s %s\n", busid, dev ? "exists" : "new");
-       if (dev && !exists) {
-               printk("xenbus: Unregistering device %s\n", busid);
-               /* FIXME: free? */
-               device_unregister(&dev->dev);
-       } else if (!dev && exists) {
-               printk("xenbus: Adding device %s\n", busid);
-               /* Hack bus id back into two strings. */
-               *strrchr(busid, '-') = '\0';
-               xenbus_probe_device("device", busid, busid+strlen(busid)+1);
-       } else
-               printk("xenbus: strange, %s already %s\n", busid,
-                      exists ? "exists" : "gone");
-       if (dev)
+       root = kasprintf("%.*s", rootlen, node);
+       if (!root)
+               return;
+
+       dev = xenbus_device_find(root, &bus->bus);
+       if (!dev)
+               xenbus_probe_node(bus, type, root);
+       else
                put_device(&dev->dev);
+
+       kfree(root);
+}
+
+static void frontend_changed(struct xenbus_watch *watch, const char *node)
+{
+       dev_changed(node, &xenbus_frontend);
+}
+
+static void backend_changed(struct xenbus_watch *watch, const char *node)
+{
+       dev_changed(node, &xenbus_backend);
 }
 
 /* We watch for devices appearing and vanishing. */
-static struct xenbus_watch dev_watch = {
-       /* FIXME: Ideally we'd only watch for changes 2 levels deep... */
+static struct xenbus_watch fe_watch = {
        .node = "device",
-       .callback = dev_changed,
+       .callback = frontend_changed,
 };
+
+static struct xenbus_watch be_watch = {
+       .node = "backend",
+       .callback = backend_changed,
+};
+
+static int suspend_dev(struct device *dev, void *data)
+{
+       int err = 0;
+       struct xenbus_driver *drv;
+       struct xenbus_device *xdev;
+
+       if (dev->driver == NULL)
+               return 0;
+       drv = to_xenbus_driver(dev->driver);
+       xdev = container_of(dev, struct xenbus_device, dev);
+       if (drv->suspend)
+               err = drv->suspend(xdev);
+       if (err)
+               printk("xenbus: suspend %s failed: %i\n", dev->bus_id, err);
+       return 0;
+}
+
+static int resume_dev(struct device *dev, void *data)
+{
+       int err = 0;
+       struct xenbus_driver *drv;
+       struct xenbus_device *xdev;
+
+       if (dev->driver == NULL)
+               return 0;
+       drv = to_xenbus_driver(dev->driver);
+       xdev = container_of(dev, struct xenbus_device, dev);
+       if (drv->resume)
+               err = drv->resume(xdev);
+       if (err)
+               printk("xenbus: resume %s failed: %i\n", dev->bus_id, err);
+       return 0;
+}
+
+void xenbus_suspend(void)
+{
+       /* We keep lock, so no comms can happen as page moves. */
+       down(&xenbus_lock);
+       bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
+       bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, suspend_dev);
+       xb_suspend_comms();
+}
+
+void xenbus_resume(void)
+{
+       xb_init_comms();
+       reregister_xenbus_watches();
+       bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
+       bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, resume_dev);
+       up(&xenbus_lock);
+}
+
+int register_xenstore_notifier(struct notifier_block *nb)
+{
+       int ret = 0;
+
+       down(&xenbus_lock);
+
+       if (xen_start_info.store_evtchn) {
+               ret = nb->notifier_call(nb, 0, NULL);
+       } else {
+               notifier_chain_register(&xenstore_chain, nb);
+       }
+
+       up(&xenbus_lock);
+
+       return ret;
+}
+EXPORT_SYMBOL(register_xenstore_notifier);
+
+void unregister_xenstore_notifier(struct notifier_block *nb)
+{
+       down(&xenbus_lock);
+       notifier_chain_unregister(&xenstore_chain, nb);
+       up(&xenbus_lock);
+}
+EXPORT_SYMBOL(unregister_xenstore_notifier);
 
 /* called from a thread in privcmd/privcmd.c */
 int do_xenbus_probe(void *unused)
@@ -309,21 +620,25 @@
                return err;
        }
 
-       /* Initialize non-xenbus drivers */
-       balloon_init_watcher();
-
        down(&xenbus_lock);
        /* Enumerate devices in xenstore. */
-       xenbus_probe_devices("device");
+       xenbus_probe_devices(&xenbus_frontend);
+       xenbus_probe_devices(&xenbus_backend);
        /* Watch for changes. */
-       register_xenbus_watch(&dev_watch);
+       register_xenbus_watch(&fe_watch);
+       register_xenbus_watch(&be_watch);
+       /* Notify others that xenstore is up */
+       notifier_call_chain(&xenstore_chain, 0, 0);
        up(&xenbus_lock);
        return 0;
 }
 
 static int __init xenbus_probe_init(void)
 {
-       bus_register(&xenbus_type);
+       bus_register(&xenbus_frontend.bus);
+       bus_register(&xenbus_backend.bus);
+       device_register(&xenbus_frontend.dev);
+       device_register(&xenbus_backend.dev);
 
        if (!xen_start_info.store_evtchn)
                return 0;
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Thu Aug 25 
22:53:20 2005
@@ -30,7 +30,6 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
-#include "xenstore/xenstored.h"
 #include <linux/uio.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -39,6 +38,7 @@
 #include <linux/fcntl.h>
 #include <linux/kthread.h>
 #include <asm-xen/xenbus.h>
+#include "xenstored.h"
 #include "xenbus_comms.h"
 
 #define streq(a, b) (strcmp((a), (b)) == 0)
@@ -187,6 +187,7 @@
        static char buffer[4096];
 
        BUG_ON(down_trylock(&xenbus_lock) == 0);
+       /* XXX FIXME: might not be correct if name == "" */
        BUG_ON(strlen(dir) + strlen("/") + strlen(name) + 1 > sizeof(buffer));
 
        strcpy(buffer, dir);
@@ -399,9 +400,12 @@
                        ret = PTR_ERR(p);
                        break;
                }
-               if (sscanf(p, fmt, result) == 0)
-                       ret = -EINVAL;
-               kfree(p);
+               if (fmt) {
+                       if (sscanf(p, fmt, result) == 0)
+                               ret = -EINVAL;
+                       kfree(p);
+               } else
+                       *(char **)result = p;
        }
        va_end(ap);
        return ret;
@@ -494,6 +498,18 @@
                printk(KERN_WARNING
                       "XENBUS Failed to release watch %s: %i\n",
                       watch->node, err);
+}
+
+/* Re-register callbacks to all watches. */
+void reregister_xenbus_watches(void)
+{
+       struct xenbus_watch *watch;
+       char token[sizeof(watch) * 2 + 1];
+
+       list_for_each_entry(watch, &watches, list) {
+               sprintf(token, "%lX", (long)watch);
+               xs_watch(watch->node, token);
+       }
 }
 
 static int watch_thread(void *unused)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-generic/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-generic/pgtable.h        Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-generic/pgtable.h        Thu Aug 25 
22:53:20 2005
@@ -37,7 +37,7 @@
  */
 #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
 do {                                                                     \
-       set_pte_at((__vma)>vm_mm, (__address), __ptep, __entry);          \
+       set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry);         \
        flush_tlb_page(__vma, __address);                                 \
 } while (0)
 #endif
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h      Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h      Thu Aug 25 
22:53:20 2005
@@ -93,7 +93,7 @@
 
 static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 {
-#define C(i) 
HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN
 + i]), ((u32 *)&t->tls_array[i])[0], ((u32 *)&t->tls_array[i])[1])
+#define C(i) 
HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN
 + i]), *(u64 *)&t->tls_array[i])
        C(0); C(1); C(2);
 #undef C
 }
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h       Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h       Thu Aug 
25 22:53:20 2005
@@ -1,11 +1,35 @@
 #ifndef _ASM_I386_DMA_MAPPING_H
 #define _ASM_I386_DMA_MAPPING_H
 
+/*
+ * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
+ * documentation.
+ */
+
+#include <linux/config.h>
 #include <linux/mm.h>
-
 #include <asm/cache.h>
 #include <asm/io.h>
 #include <asm/scatterlist.h>
+#include <asm-i386/swiotlb.h>
+
+static inline int
+address_needs_mapping(struct device *hwdev, dma_addr_t addr)
+{
+       dma_addr_t mask = 0xffffffff;
+       /* If the device has a mask, use it, otherwise default to 32 bits */
+       if (hwdev && hwdev->dma_mask)
+               mask = *hwdev->dma_mask;
+       return (addr & ~mask) != 0;
+}
+
+static inline int
+range_straddles_page_boundary(void *p, size_t size)
+{
+       extern unsigned long *contiguous_bitmap;
+       return (((((unsigned long)p & ~PAGE_MASK) + size) > PAGE_SIZE) &&
+               !test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap));
+}
 
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
@@ -24,46 +48,18 @@
 dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
                 enum dma_data_direction direction);
 
-static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-          enum dma_data_direction direction)
-{
-       int i;
+extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
+                     int nents, enum dma_data_direction direction);
+extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+                        int nents, enum dma_data_direction direction);
 
-       BUG_ON(direction == DMA_NONE);
+extern dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+            size_t size, enum dma_data_direction direction);
 
-       for (i = 0; i < nents; i++ ) {
-               BUG_ON(!sg[i].page);
-
-               sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
-       }
-
-       flush_write_buffers();
-       return nents;
-}
-
-static inline dma_addr_t
-dma_map_page(struct device *dev, struct page *page, unsigned long offset,
-            size_t size, enum dma_data_direction direction)
-{
-       BUG_ON(direction == DMA_NONE);
-       return page_to_phys(page) + offset;
-}
-
-static inline void
+extern void
 dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
-              enum dma_data_direction direction)
-{
-       BUG_ON(direction == DMA_NONE);
-}
-
-
-static inline void
-dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
-            enum dma_data_direction direction)
-{
-       BUG_ON(direction == DMA_NONE);
-}
+              enum dma_data_direction direction);
 
 extern void
 dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
@@ -93,34 +89,25 @@
 dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
                    enum dma_data_direction direction)
 {
+       if (swiotlb)
+               swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction);
+       flush_write_buffers();
 }
 
 static inline void
 dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
                    enum dma_data_direction direction)
 {
+       if (swiotlb)
+               swiotlb_sync_sg_for_device(dev,sg,nelems,direction);
        flush_write_buffers();
 }
 
-static inline int
-dma_mapping_error(dma_addr_t dma_addr)
-{
-       return 0;
-}
+extern int
+dma_mapping_error(dma_addr_t dma_addr);
 
-static inline int
-dma_supported(struct device *dev, u64 mask)
-{
-        /*
-         * we fall back to GFP_DMA when the mask isn't all 1s,
-         * so we can't guarantee allocations that must be
-         * within a tighter range than GFP_DMA..
-         */
-        if(mask < 0x00ffffff)
-                return 0;
-
-       return 1;
-}
+extern int
+dma_supported(struct device *dev, u64 mask);
 
 static inline int
 dma_set_mask(struct device *dev, u64 mask)
@@ -133,6 +120,7 @@
        return 0;
 }
 
+#ifdef __i386__
 static inline int
 dma_get_cache_alignment(void)
 {
@@ -140,6 +128,9 @@
         * maximum possible, to be safe */
        return (1 << L1_CACHE_SHIFT_MAX);
 }
+#else
+extern int dma_get_cache_alignment(void);
+#endif
 
 #define dma_is_consistent(d)   (1)
 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/fixmap.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/fixmap.h    Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/fixmap.h    Thu Aug 25 
22:53:20 2005
@@ -102,8 +102,8 @@
        __end_of_fixed_addresses
 };
 
-extern void __set_fixmap (enum fixed_addresses idx,
-                                       unsigned long phys, pgprot_t flags);
+extern void __set_fixmap(
+       enum fixed_addresses idx, maddr_t phys, pgprot_t flags);
 
 #define set_fixmap(idx, phys) \
                __set_fixmap(idx, phys, PAGE_KERNEL)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Aug 25 
22:53:20 2005
@@ -163,7 +163,7 @@
         TRAP_INSTR
         : "=a" (ret), "=b" (ign)
        : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
-       : "memory" );
+       : "memory", "ecx" );
 
     return ret;
 }
@@ -178,7 +178,7 @@
         TRAP_INSTR
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
-       : "memory" );
+       : "memory", "ecx" );
 
     return ret;
 }
@@ -194,7 +194,7 @@
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op),
          "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
-        : "memory" );
+        : "memory", "ecx" );
 
     return ret;
 }
@@ -210,7 +210,7 @@
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op),
          "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
-        : "memory" );
+        : "memory", "ecx" );
 
     return ret;
 }
@@ -228,7 +228,7 @@
         : "=a" (ret), "=b" (ign1), "=S" (ign2)
        : "0" (__HYPERVISOR_sched_op),
         "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), 
-        "S" (srec) : "memory");
+        "S" (srec) : "memory", "ecx");
 
     return ret;
 }
@@ -244,7 +244,7 @@
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op),
          "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
-        : "memory" );
+        : "memory", "ecx" );
 
     return ret;
 }
@@ -316,16 +316,17 @@
 
 static inline int
 HYPERVISOR_update_descriptor(
-    unsigned long ma, unsigned long word1, unsigned long word2)
-{
-    int ret;
-    unsigned long ign1, ign2, ign3;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
-       : "0" (__HYPERVISOR_update_descriptor), "1" (ma), "2" (word1),
-         "3" (word2)
+    u64 ma, u64 desc)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3, ign4;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+       : "0" (__HYPERVISOR_update_descriptor),
+         "1" ((unsigned long)ma), "2" ((unsigned long)(ma>>32)),
+         "3" ((unsigned long)desc), "4" ((unsigned long)(desc>>32))
        : "memory" );
 
     return ret;
@@ -385,13 +386,6 @@
 #endif
          "4" (flags)
        : "memory" );
-
-    if ( unlikely(ret < 0) )
-    {
-        printk(KERN_ALERT "Failed update VA mapping: %08lx, %08lx, %08lx\n",
-               va, (new_val).pte_low, flags);
-        BUG();
-    }
 
     return ret;
 }
@@ -536,12 +530,15 @@
 {
     int ret;
     unsigned long ign1;
+    /* Yes, I really do want to clobber edx here: when we resume a
+       vcpu after unpickling a multi-processor domain, it returns
+       here, but clobbers all of the call clobbered registers. */
     __asm__ __volatile__ (
         TRAP_INSTR
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op),
          "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
-        : "memory" );
+        : "memory", "ecx", "edx" );
 
     return ret;
 }
@@ -557,8 +554,26 @@
         : "=a" (ret), "=b" (ign1)
        : "0" (__HYPERVISOR_sched_op),
          "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift))
+        : "memory", "ecx" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_vcpu_pickle(
+    int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int ret;
+    unsigned long ign1, ign2;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+       : "0" (__HYPERVISOR_sched_op),
+         "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
+         "2" (ctxt)
         : "memory" );
 
     return ret;
 }
+
 #endif /* __HYPERCALL_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h      
Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h      
Thu Aug 25 22:53:20 2005
@@ -124,17 +124,4 @@
 #define dynirq_to_irq(_x)      ((_x) + DYNIRQ_BASE)
 #define irq_to_dynirq(_x)      ((_x) - DYNIRQ_BASE)
 
-#ifndef __ASSEMBLY__
-/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
-extern int  bind_virq_to_irq(int virq);
-extern void unbind_virq_from_irq(int virq);
-extern int  bind_ipi_to_irq(int ipi);
-extern void unbind_ipi_from_irq(int ipi);
-extern int  bind_evtchn_to_irq(int evtchn);
-extern void unbind_evtchn_from_irq(int evtchn);
-
-extern void irq_suspend(void);
-extern void irq_resume(void);
-#endif /* __ASSEMBLY__ */
-
 #endif /* _ASM_IRQ_VECTORS_H */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h  
Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h  
Thu Aug 25 22:53:20 2005
@@ -8,23 +8,12 @@
 
 static char * __init machine_specific_memory_setup(void)
 {
-       char *who;
-       unsigned long start_pfn, max_pfn;
-
-       who = "Xen";
-
-       /* In dom0, we have to start the fake e820 map above the first
-        * 1MB, in other domains, it can start at 0. */
-       if (xen_start_info.flags & SIF_INITDOMAIN)
-               start_pfn = 0x100;
-       else
-               start_pfn = 0;
-       max_pfn = xen_start_info.nr_pages;
+       unsigned long max_pfn = xen_start_info.nr_pages;
 
        e820.nr_map = 0;
-       add_memory_region(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn) - 
PFN_PHYS(start_pfn), E820_RAM);
+       add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
 
-       return who;
+       return "Xen";
 }
 
 void __init machine_specific_modify_cpu_capabilities(struct cpuinfo_x86 *c)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h       Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h       Thu Aug 
25 22:53:20 2005
@@ -34,10 +34,10 @@
         * are always kernel segments while inside the kernel. Must
         * happen before reload of cr3/ldt (i.e., not in __switch_to).
         */
-       __asm__ __volatile__ ( "mov %%fs,%0 ; mov %%gs,%1"
+       asm volatile ( "mov %%fs,%0 ; mov %%gs,%1"
                : "=m" (*(int *)¤t->thread.fs),
                  "=m" (*(int *)¤t->thread.gs));
-       __asm__ __volatile__ ( "mov %0,%%fs ; mov %0,%%gs"
+       asm volatile ( "mov %0,%%fs ; mov %0,%%gs"
                : : "r" (0) );
 }
 
@@ -100,7 +100,7 @@
 }
 
 #define deactivate_mm(tsk, mm) \
-       asm("mov %0,%%fs ; mov %0,%%gs": :"r" (0))
+       asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
 
 #define activate_mm(prev, next) \
        switch_mm((prev),(next),NULL)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Thu Aug 25 
22:53:20 2005
@@ -60,18 +60,50 @@
 #define copy_user_page(to, from, vaddr, pg)    copy_page(to, from)
 
 /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+#define INVALID_P2M_ENTRY      (~0U)
+#define FOREIGN_FRAME(m)       ((m) | 0x80000000U)
 extern unsigned int *phys_to_machine_mapping;
-#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)]))
-#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)]))
-static inline unsigned long phys_to_machine(unsigned long phys)
-{
-       unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+#define pfn_to_mfn(pfn)        \
+((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL)
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+       unsigned int pfn;
+
+       /*
+        * The array access can fail (e.g., device space beyond end of RAM).
+        * In such cases it doesn't matter what we return (we return garbage),
+        * but we must handle the fault without crashing!
+        */
+       asm (
+               "1:     movl %1,%0\n"
+               "2:\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b,2b\n"
+               ".previous"
+               : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
+
+       return (unsigned long)pfn;
+}
+
+/* Definitions for machine and pseudophysical addresses. */
+#ifdef CONFIG_X86_PAE
+typedef unsigned long long paddr_t;
+typedef unsigned long long maddr_t;
+#else
+typedef unsigned long paddr_t;
+typedef unsigned long maddr_t;
+#endif
+
+static inline maddr_t phys_to_machine(paddr_t phys)
+{
+       maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
        machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
        return machine;
 }
-static inline unsigned long machine_to_phys(unsigned long machine)
-{
-       unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+static inline paddr_t machine_to_phys(maddr_t machine)
+{
+       paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
        phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
        return phys;
 }
@@ -86,8 +118,9 @@
 typedef struct { unsigned long long pmd; } pmd_t;
 typedef struct { unsigned long long pgd; } pgd_t;
 typedef struct { unsigned long long pgprot; } pgprot_t;
-#define __pte(x) ({ unsigned long long _x = (x); \
-    (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+#define __pte(x) ({ unsigned long long _x = (x);        \
+    if (_x & 1) _x = phys_to_machine(_x);               \
+    ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); })
 #define __pgd(x) ({ unsigned long long _x = (x); \
     (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
 #define __pmd(x) ({ unsigned long long _x = (x); \
@@ -227,8 +260,10 @@
                 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 /* VIRT <-> MACHINE conversion */
-#define virt_to_machine(_a)    (phys_to_machine(__pa(_a)))
-#define machine_to_virt(_m)    (__va(machine_to_phys(_m)))
+#define virt_to_machine(v)     (phys_to_machine(__pa(v)))
+#define machine_to_virt(m)     (__va(machine_to_phys(m)))
+#define virt_to_mfn(v)         (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
+#define mfn_to_virt(m)         (__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
 #endif /* __KERNEL__ */
 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h       Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h       Thu Aug 25 
22:53:20 2005
@@ -43,11 +43,8 @@
 
 struct pci_dev;
 
-/* The PCI address space does equal the physical memory
- * address space.  The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS    (1)
+/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */
+#define PCI_DMA_BUS_IS_PHYS    (0)
 
 /* pci_unmap_{page,single} is a nop so... */
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h   Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h   Thu Aug 25 
22:53:20 2005
@@ -14,9 +14,9 @@
 do {                                                                   \
        if (unlikely((mm)->context.pinned)) {                           \
                if (!PageHighMem(pte))                                  \
-                       HYPERVISOR_update_va_mapping(                   \
+                       BUG_ON(HYPERVISOR_update_va_mapping(            \
                          (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT),\
-                         pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0);\
+                         pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0));\
                set_pmd(pmd, __pmd(_PAGE_TABLE +                        \
                        ((unsigned long long)page_to_pfn(pte) <<        \
                                (unsigned long long) PAGE_SHIFT)));     \
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h    Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h    Thu Aug 
25 22:53:20 2005
@@ -14,7 +14,28 @@
  * hook is made available.
  */
 #define set_pte(pteptr, pteval) (*(pteptr) = pteval)
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+inline static void set_pte_at(struct mm_struct *mm, unsigned long addr, 
+                      pte_t *ptep, pte_t val )
+{
+    if ( ((mm != current->mm) && (mm != &init_mm)) ||
+        HYPERVISOR_update_va_mapping( (addr), (val), 0 ) )
+    {
+        set_pte(ptep, val);
+    }
+}
+
+inline static void set_pte_at_sync(struct mm_struct *mm, unsigned long addr, 
+                      pte_t *ptep, pte_t val )
+{
+    if ( ((mm != current->mm) && (mm != &init_mm)) ||
+        HYPERVISOR_update_va_mapping( (addr), (val), UVMF_INVLPG ) )
+    {
+        set_pte(ptep, val);
+       xen_invlpg(addr);
+    }
+}
+
 #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
 
 #ifndef CONFIG_XEN_SHADOW_MODE
@@ -42,17 +63,15 @@
  * 
  * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
  *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
- *      require. In all the cases we care about, the high bit gets shifted out
- *      (e.g., phys_to_machine()) so behaviour there is correct.
+ *      require. In all the cases we care about, the FOREIGN_FRAME bit is
+ *      masked (e.g., pfn_to_mfn()) so behaviour there is correct.
  */
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
 #define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
 #define pte_pfn(_pte)                                                  \
 ({                                                                     \
        unsigned long mfn = pte_mfn(_pte);                              \
        unsigned long pfn = mfn_to_pfn(mfn);                            \
-       if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn))             \
+       if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
                pfn = max_mapnr; /* special: force !pfn_valid() */      \
        pfn;                                                            \
 })
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h    Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h    Thu Aug 
25 22:53:20 2005
@@ -68,7 +68,27 @@
                xen_l1_entry_update((pteptr), (pteval))
 # define set_pte_atomic(pteptr,pteval) set_pte(pteptr,pteval)
 #endif
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+inline static void set_pte_at(struct mm_struct *mm, unsigned long addr, 
+                      pte_t *ptep, pte_t val )
+{
+    if ( ((mm != current->mm) && (mm != &init_mm)) ||
+        HYPERVISOR_update_va_mapping( (addr), (val), 0 ) )
+    {
+        set_pte(ptep, val);
+    }
+}
+
+inline static void set_pte_at_sync(struct mm_struct *mm, unsigned long addr, 
+                      pte_t *ptep, pte_t val )
+{
+    if ( ((mm != current->mm) && (mm != &init_mm)) ||
+        HYPERVISOR_update_va_mapping( (addr), (val), UVMF_INVLPG ) )
+    {
+        set_pte(ptep, val);
+       xen_invlpg(addr);
+    }
+}
 
 #ifdef CONFIG_XEN_SHADOW_MODE
 # define set_pmd(pmdptr,pmdval) \
@@ -130,14 +150,13 @@
        return !pte.pte_low && !pte.pte_high;
 }
 
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
-#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) /* FIXME */
+#define pte_mfn(_pte) ( ((_pte).pte_low >> PAGE_SHIFT) |\
+                       (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)) )
 #define pte_pfn(_pte)                                                  \
 ({                                                                     \
        unsigned long mfn = pte_mfn(_pte);                              \
        unsigned long pfn = mfn_to_pfn(mfn);                            \
-       if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn))             \
+       if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
                pfn = max_mapnr; /* special: force !pfn_valid() */      \
        pfn;                                                            \
 })
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Thu Aug 25 
22:53:20 2005
@@ -32,7 +32,7 @@
  */
 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 extern unsigned long empty_zero_page[1024];
-extern pgd_t swapper_pg_dir[1024];
+extern pgd_t *swapper_pg_dir;
 extern kmem_cache_t *pgd_cache;
 extern kmem_cache_t *pmd_cache;
 extern spinlock_t pgd_lock;
@@ -398,7 +398,7 @@
        do {                                                              \
                if (__dirty) {                                            \
                        if ( likely((__vma)->vm_mm == current->mm) ) {    \
-                           HYPERVISOR_update_va_mapping((__address), 
(__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned 
long)((__vma)->vm_mm->cpu_vm_mask.bits)); \
+                           BUG_ON(HYPERVISOR_update_va_mapping((__address), 
(__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned 
long)((__vma)->vm_mm->cpu_vm_mask.bits))); \
                        } else {                                          \
                             xen_l1_entry_update((__ptep), (__entry)); \
                            flush_tlb_page((__vma), (__address));         \
@@ -416,8 +416,8 @@
 #define ptep_establish_new(__vma, __address, __ptep, __entry)          \
 do {                                                                   \
        if (likely((__vma)->vm_mm == current->mm)) {                    \
-               HYPERVISOR_update_va_mapping((__address),               \
-                                            __entry, 0);               \
+               BUG_ON(HYPERVISOR_update_va_mapping((__address),        \
+                                            __entry, 0));              \
        } else {                                                        \
                xen_l1_entry_update((__ptep), (__entry));       \
        }                                                               \
@@ -450,7 +450,7 @@
 #define arbitrary_virt_to_machine(__va)                                        
\
 ({                                                                     \
        pte_t *__pte = virt_to_ptep(__va);                              \
-       unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK;     \
+       maddr_t __pa = (maddr_t)pte_mfn(*__pte) << PAGE_SHIFT;          \
        __pa | ((unsigned long)(__va) & (PAGE_SIZE-1));                 \
 })
 
@@ -466,10 +466,12 @@
                             unsigned long size, 
                             pgprot_t prot,
                             domid_t  domid);
-int __direct_remap_area_pages(struct mm_struct *mm,
-                             unsigned long address, 
-                             unsigned long size, 
-                             mmu_update_t *v);
+int create_lookup_pte_addr(struct mm_struct *mm,
+                           unsigned long address,
+                           unsigned long *ptep);
+int touch_pte_range(struct mm_struct *mm,
+                    unsigned long address,
+                    unsigned long size);
 
 #define io_remap_page_range(vma,from,phys,size,prot) \
 direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h     Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h     Thu Aug 
25 22:53:20 2005
@@ -1,89 +1,1 @@
-#ifndef _X8664_DMA_MAPPING_H
-#define _X8664_DMA_MAPPING_H 1
-
-/*
- * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
- * documentation.
- */
-
-#include <linux/config.h>
-
-#include <asm/scatterlist.h>
-#include <asm/io.h>
-#include <asm/swiotlb.h>
-
-extern dma_addr_t bad_dma_address;
-#define dma_mapping_error(x) \
-       (swiotlb ? swiotlb_dma_mapping_error(x) : ((x) == bad_dma_address))
-
-void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t 
*dma_handle,
-                        unsigned gfp);
-void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
-                        dma_addr_t dma_handle);
-
-extern dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size,
-                                enum dma_data_direction direction);
-extern void dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
-                            enum dma_data_direction direction);
-
-#define dma_map_page(dev,page,offset,size,dir) \
-       dma_map_single((dev), page_address(page)+(offset), (size), (dir))
-
-extern void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
-                       enum dma_data_direction direction);
-
-extern void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t 
size,
-                           enum dma_data_direction direction);
-
-static inline void dma_sync_sg_for_cpu(struct device *hwdev,
-                                      struct scatterlist *sg,
-                                      int nelems, int direction)
-{
-       if (direction == DMA_NONE)
-               out_of_line_bug();
-
-       if (swiotlb)
-               return swiotlb_sync_sg_for_cpu(hwdev,sg,nelems,direction);
-
-       flush_write_buffers();
-}
-
-static inline void dma_sync_sg_for_device(struct device *hwdev,
-                                         struct scatterlist *sg,
-                                         int nelems, int direction)
-{
-       if (direction == DMA_NONE)
-               out_of_line_bug();
-
-       if (swiotlb)
-               return swiotlb_sync_sg_for_device(hwdev,sg,nelems,direction);
-
-       flush_write_buffers();
-}
-
-extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
-                     int nents, int direction);
-extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
-                        int nents, int direction);
-
-#define dma_unmap_page dma_unmap_single
-
-extern int dma_supported(struct device *hwdev, u64 mask);
-extern int dma_get_cache_alignment(void);
-#define dma_is_consistent(h) 1
-
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
-       if (!dev->dma_mask || !dma_supported(dev, mask))
-               return -EIO;
-       *dev->dma_mask = mask;
-       return 0;
-}
-
-static inline void dma_cache_sync(void *vaddr, size_t size, enum 
dma_data_direction dir)
-{
-       flush_write_buffers();
-}
-#endif
+#include <asm-i386/dma-mapping.h>
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h       Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h       Thu Aug 
25 22:53:20 2005
@@ -502,4 +502,21 @@
     return ret;
 }
 
+static inline int
+HYPERVISOR_vcpu_pickle(
+    int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int ret;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret)
+       : "0" ((unsigned long)__HYPERVISOR_sched_op),
+       "D" ((unsigned long)SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
+       "S" ((unsigned long)ctxt)
+       : __syscall_clobber );
+
+    return ret;
+}
+
 #endif /* __HYPERCALL_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h    
Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h    
Thu Aug 25 22:53:20 2005
@@ -122,17 +122,4 @@
 #define dynirq_to_irq(_x)      ((_x) + DYNIRQ_BASE)
 #define irq_to_dynirq(_x)      ((_x) - DYNIRQ_BASE)
 
-#ifndef __ASSEMBLY__
-/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
-extern int  bind_virq_to_irq(int virq);
-extern void unbind_virq_from_irq(int virq);
-extern int  bind_ipi_to_irq(int ipi);
-extern void unbind_ipi_from_irq(int ipi);
-extern int  bind_evtchn_to_irq(int evtchn);
-extern void unbind_evtchn_from_irq(int evtchn);
-
-extern void irq_suspend(void);
-extern void irq_resume(void);
-#endif /* __ASSEMBLY__ */
-
 #endif /* _ASM_IRQ_VECTORS_H */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Thu Aug 25 
22:53:20 2005
@@ -62,19 +62,46 @@
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+#define INVALID_P2M_ENTRY      (~0U)
+#define FOREIGN_FRAME(m)       ((m) | 0x80000000U)
 extern u32 *phys_to_machine_mapping;
-#define pfn_to_mfn(_pfn) ((unsigned long) phys_to_machine_mapping[(unsigned 
int)(_pfn)])
-#define mfn_to_pfn(_mfn) ((unsigned long) machine_to_phys_mapping[(unsigned 
int)(_mfn)])
-static inline unsigned long phys_to_machine(unsigned long phys)
-{
-       unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+#define pfn_to_mfn(pfn)        \
+((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL)
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+       unsigned int pfn;
+
+       /*
+        * The array access can fail (e.g., device space beyond end of RAM).
+        * In such cases it doesn't matter what we return (we return garbage),
+        * but we must handle the fault without crashing!
+        */
+       asm (
+               "1:     movl %1,%k0\n"
+               "2:\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 8\n"
+               "       .quad 1b,2b\n"
+               ".previous"
+               : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
+
+       return (unsigned long)pfn;
+}
+
+/* Definitions for machine and pseudophysical addresses. */
+typedef unsigned long paddr_t;
+typedef unsigned long maddr_t;
+
+static inline maddr_t phys_to_machine(paddr_t phys)
+{
+       maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
        machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
        return machine;
 }
 
-static inline unsigned long machine_to_phys(unsigned long machine)
-{
-       unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+static inline paddr_t machine_to_phys(maddr_t machine)
+{
+       paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
        phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
        return phys;
 }
@@ -211,8 +238,10 @@
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
 
 /* VIRT <-> MACHINE conversion */
-#define virt_to_machine(_a)    (phys_to_machine(__pa(_a)))
-#define machine_to_virt(_m)    (__va(machine_to_phys(_m)))
+#define virt_to_machine(v)     (phys_to_machine(__pa(v)))
+#define machine_to_virt(m)     (__va(machine_to_phys(m)))
+#define virt_to_mfn(v)         (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
+#define mfn_to_virt(m)         (__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
 #define VM_DATA_DEFAULT_FLAGS \
        (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h     Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h     Thu Aug 25 
22:53:20 2005
@@ -79,7 +79,9 @@
 #else
 /* No IOMMU */
 
-#define PCI_DMA_BUS_IS_PHYS    1
+/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */
+#define PCI_DMA_BUS_IS_PHYS    (0)
+
 #define pci_dac_dma_supported(pci_dev, mask)    1
 
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Wed Aug 24 
02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Thu Aug 25 
22:53:20 2005
@@ -4,31 +4,20 @@
 /*
  * This file contains the functions and defines necessary to modify and use
  * the x86-64 page table tree.
- * 
- * x86-64 has a 4 level table setup. Generic linux MM only supports
- * three levels. The fourth level is currently a single static page that
- * is shared by everybody and just contains a pointer to the current
- * three level page setup on the beginning and some kernel mappings at 
- * the end. For more details see Documentation/x86_64/mm.txt
  */
 #include <asm/processor.h>
 #include <asm/fixmap.h>
 #include <asm/bitops.h>
 #include <linux/threads.h>
+#include <linux/sched.h>
 #include <asm/pda.h>
+#ifdef CONFIG_XEN
 #include <asm-xen/hypervisor.h>
+
 extern pud_t level3_user_pgt[512];
-extern pud_t init_level4_pgt[];
 extern pud_t init_level4_user_pgt[];
-extern unsigned long __supported_pte_mask;
-
-#define swapper_pg_dir NULL
-
-extern int nonx_setup(char *str);
-extern void paging_init(void);
-extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
-
-extern unsigned long pgkern_mask;
+
+extern void xen_init_pt(void);
 
 #define virt_to_ptep(__va)                                             \
 ({                                                                     \
@@ -44,6 +33,22 @@
        unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK;     \
        __pa | ((unsigned long)(__va) & (PAGE_SIZE-1));                 \
 })
+#endif
+
+extern pud_t level3_kernel_pgt[512];
+extern pud_t level3_physmem_pgt[512];
+extern pud_t level3_ident_pgt[512];
+extern pmd_t level2_kernel_pgt[512];
+extern pgd_t init_level4_pgt[];
+extern unsigned long __supported_pte_mask;
+
+#define swapper_pg_dir init_level4_pgt
+
+extern int nonx_setup(char *str);
+extern void paging_init(void);
+extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
+
+extern unsigned long pgkern_mask;
 
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
@@ -52,11 +57,14 @@
 extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 
+/*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
 #define PGDIR_SHIFT    39
 #define PTRS_PER_PGD   512
 
 /*
- * PUDIR_SHIFT determines what a top-level page table entry can map
+ * 3rd level page
  */
 #define PUD_SHIFT      30
 #define PTRS_PER_PUD   512
@@ -80,7 +88,7 @@
 #define pud_ERROR(e) \
        printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), 
pud_val(e))
 #define pgd_ERROR(e) \
-        printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), 
pgd_val(e))
+       printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), 
pgd_val(e))
 
 #define pgd_none(x)    (!pgd_val(x))
 #define pud_none(x)    (!pud_val(x))
@@ -90,18 +98,10 @@
 
 extern inline int pud_present(pud_t pud)       { return !pud_none(pud); }
 
-#ifdef CONFIG_SMP
-#define set_pte(pteptr, pteval) xen_l1_entry_update(pteptr, (pteval))
-
-#else
-#define set_pte(pteptr, pteval) xen_l1_entry_update(pteptr, (pteval))
-#if 0
 static inline void set_pte(pte_t *dst, pte_t val)
 {
        *dst = val;
 }
-#endif
-#endif
 
 #define set_pmd(pmdptr, pmdval) xen_l2_entry_update(pmdptr, (pmdval))
 #define set_pud(pudptr, pudval) xen_l3_entry_update(pudptr, (pudval))
@@ -132,6 +132,9 @@
  * each domain will have separate page tables, with their own versions of
  * accessed & dirty state.
  */
+#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte, 0))
+
+#if 0
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr, pte_t *xp)
 {
         pte_t pte = *xp;
@@ -139,21 +142,22 @@
                 set_pte(xp, __pte_ma(0));
         return pte;
 }
+#endif
 
 #define pte_same(a, b)         ((a).pte == (b).pte)
 
-#define PMD_SIZE        (1UL << PMD_SHIFT)
-#define PMD_MASK        (~(PMD_SIZE-1))
-#define PUD_SIZE        (1UL << PUD_SHIFT)
-#define PUD_MASK        (~(PUD_SIZE-1))
-#define PGDIR_SIZE      (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK      (~(PGDIR_SIZE-1))
-
-#define USER_PTRS_PER_PGD       (TASK_SIZE/PGDIR_SIZE)
+#define PMD_SIZE       (1UL << PMD_SHIFT)
+#define PMD_MASK       (~(PMD_SIZE-1))
+#define PUD_SIZE       (1UL << PUD_SHIFT)
+#define PUD_MASK       (~(PUD_SIZE-1))
+#define PGDIR_SIZE     (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK     (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD      (TASK_SIZE/PGDIR_SIZE)
 #define FIRST_USER_ADDRESS     0
 
 #ifndef __ASSEMBLY__
-#define MAXMEM           0x3fffffffffffUL
+#define MAXMEM          0x3fffffffffffUL
 #define VMALLOC_START    0xffffc20000000000UL
 #define VMALLOC_END      0xffffe1ffffffffffUL
 #define MODULES_VADDR    0xffffffff88000000UL
@@ -262,7 +266,16 @@
        val &= ~(_PAGE_USER | _PAGE_DIRTY); 
        return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);      
 } 
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+inline static void set_pte_at(struct mm_struct *mm, unsigned long addr, 
+                      pte_t *ptep, pte_t val )
+{
+    if ( ((mm != current->mm) && (mm != &init_mm)) ||
+        HYPERVISOR_update_va_mapping( (addr), (val), 0 ) )
+    {
+        set_pte(ptep, val);
+    }
+}
 
 #define pte_none(x)    (!(x).pte)
 #define pte_present(x) ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE))
@@ -287,17 +300,15 @@
  * 
  * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
  *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
- *      require. In all the cases we care about, the high bit gets shifted out
- *      (e.g., phys_to_machine()) so behaviour there is correct.
- */
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
+ *      require. In all the cases we care about, the FOREIGN_FRAME bit is
+ *      masked (e.g., pfn_to_mfn()) so behaviour there is correct.
+ */
 #define pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
 #define pte_pfn(_pte)                                                  \
 ({                                                                     \
        unsigned long mfn = pte_mfn(_pte);                              \
        unsigned pfn = mfn_to_pfn(mfn);                                 \
-       if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn))             \
+       if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
                pfn = max_mapnr; /* special: force !pfn_valid() */      \
        pfn;                                                            \
 })
@@ -347,7 +358,7 @@
        pte_t pte = *ptep;
        int ret = pte_dirty(pte);
        if (ret)
-               xen_l1_entry_update(ptep, pte_mkclean(pte));
+               set_pte(ptep, pte_mkclean(pte));
        return ret;
 }
 
@@ -356,7 +367,7 @@
        pte_t pte = *ptep;
        int ret = pte_young(pte);
        if (ret)
-               xen_l1_entry_update(ptep, pte_mkold(pte));
+               set_pte(ptep, pte_mkold(pte));
        return ret;
 }
 
@@ -398,7 +409,7 @@
 
 /* PUD - Level3 access */
 /* to find an entry in a page-table-directory. */
-#define pud_index(address) ((address >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 #define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + 
pud_index(address))
 static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address)
 { 
@@ -413,7 +424,7 @@
 {
        unsigned long addr;
 
-       addr = pud_val(init_level4_pgt[pud_index(address)]);
+       addr = pgd_val(init_level4_pgt[pud_index(address)]);
        addr &= PHYSICAL_PAGE_MASK; /* machine physical */
         addr = machine_to_phys(addr);
        return __pud_offset_k((pud_t *)__va(addr), address);
@@ -427,9 +438,11 @@
 #define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
                                   pmd_index(address))
 #define pmd_none(x)    (!pmd_val(x))
-#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
+/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+   can temporarily clear it. */
+#define pmd_present(x) (pmd_val(x))
 #define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
-#define        pmd_bad(x)      ((pmd_val(x) & ~PTE_MASK) != _KERNPG_TABLE )
+#define        pmd_bad(x)      ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT)) 
!= (_KERNPG_TABLE & ~_PAGE_PRESENT))
 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
 #define pmd_pfn(x)  ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
 
@@ -479,11 +492,24 @@
  * race with other CPU's that might be updating the dirty
  * bit at the same time. */
 #define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#if 0
 #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
        do {                                                              \
                if (__dirty) {                                            \
                        set_pte(__ptep, __entry);                         \
                        flush_tlb_page(__vma, __address);                 \
+               }                                                         \
+       } while (0)
+#endif
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+       do {                                                              \
+               if (__dirty) {                                            \
+                       if ( likely((__vma)->vm_mm == current->mm) ) {    \
+                           BUG_ON(HYPERVISOR_update_va_mapping((__address), 
(__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned 
long)((__vma)->vm_mm->cpu_vm_mask.bits))); \
+                       } else {                                          \
+                            xen_l1_entry_update((__ptep), (__entry)); \
+                           flush_tlb_page((__vma), (__address));         \
+                       }                                                 \
                }                                                         \
        } while (0)
 
@@ -510,12 +536,18 @@
                               unsigned long address,
                               unsigned long size,
                               mmu_update_t *v);
+int create_lookup_pte_addr(struct mm_struct *mm,
+                           unsigned long address,
+                           unsigned long *ptep);
+int touch_pte_range(struct mm_struct *mm,
+                    unsigned long address,
+                    unsigned long size);
 
 #define io_remap_page_range(vma, vaddr, paddr, size, prot)             \
-               remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
+               
direct_remap_area_pages((vma)->vm_mm,vaddr,paddr,size,prot,DOMID_IO)
 
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)                \
-               remap_pfn_range(vma, vaddr, pfn, size, prot)
+               
direct_remap_area_pages((vma)->vm_mm,vaddr,(pfn)<<PAGE_SHIFT,size,prot,DOMID_IO)
 
 #define MK_IOSPACE_PFN(space, pfn)     (pfn)
 #define GET_IOSPACE(pfn)               0
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h       Wed Aug 
24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h       Thu Aug 
25 22:53:20 2005
@@ -153,6 +153,20 @@
                } while (0);
        }
 }
+
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+#ifndef CONFIG_XEN
+       mmu_cr4_features &= ~mask;
+       __asm__("movq %%cr4,%%rax\n\t"
+               "andq %0,%%rax\n\t"
+               "movq %%rax,%%cr4\n"
+               : : "irg" (~mask)
+               :"ax");
+#endif
+}
+
 
 #define load_cr3(pgdir) do {                           \
        xen_pt_switch(__pa(pgdir));                     \
@@ -283,9 +297,9 @@
        load_gs_index(0);                                                       
\
        (regs)->rip = (new_rip);                                                
 \
        (regs)->rsp = (new_rsp);                                                
 \
-       write_pda(oldrsp, (new_rsp));                                           
 \
-       (regs)->cs = __USER_CS;                                                 
 \
-       (regs)->ss = __USER_DS;                                                 
 \
+       write_pda(oldrsp, (new_rsp));                                           
 \
+       (regs)->cs = __USER_CS;                                                 
 \
+       (regs)->ss = __USER_DS;                                                 
 \
        (regs)->eflags = 0x200;                                                 
 \
        set_fs(USER_DS);                                                        
 \
 } while(0) 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/balloon.h
--- a/linux-2.6-xen-sparse/include/asm-xen/balloon.h    Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/balloon.h    Thu Aug 25 22:53:20 2005
@@ -35,10 +35,19 @@
  * Inform the balloon driver that it should allow some slop for device-driver
  * memory activities.
  */
-extern void balloon_update_driver_allowance(long delta);
+extern void
+balloon_update_driver_allowance(
+       long delta);
 
-/* Give up unmapped pages to the balloon driver. */
-extern void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns);
+/* Allocate an empty low-memory page range. */
+extern struct page *
+balloon_alloc_empty_page_range(
+       unsigned long nr_pages);
+
+/* Deallocate an empty page range, adding to the balloon. */
+extern void
+balloon_dealloc_empty_page_range(
+       struct page *page, unsigned long nr_pages);
 
 /*
  * Prevent the balloon driver from changing the memory reservation during
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/evtchn.h
--- a/linux-2.6-xen-sparse/include/asm-xen/evtchn.h     Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/evtchn.h     Thu Aug 25 22:53:20 2005
@@ -32,6 +32,7 @@
 #define __ASM_EVTCHN_H__
 
 #include <linux/config.h>
+#include <linux/interrupt.h>
 #include <asm-xen/hypervisor.h>
 #include <asm/ptrace.h>
 #include <asm-xen/synch_bitops.h>
@@ -41,6 +42,34 @@
 /*
  * LOW-LEVEL DEFINITIONS
  */
+
+/* Dynamically bind a VIRQ source to Linux IRQ space. */
+extern int  bind_virq_to_irq(int virq);
+extern void unbind_virq_from_irq(int virq);
+
+/* Dynamically bind an IPI source to Linux IRQ space. */
+extern int  bind_ipi_to_irq(int ipi);
+extern void unbind_ipi_from_irq(int ipi);
+
+/* Dynamically bind an event-channel port to Linux IRQ space. */
+extern int  bind_evtchn_to_irq(unsigned int evtchn);
+extern void unbind_evtchn_from_irq(unsigned int evtchn);
+
+/*
+ * Dynamically bind an event-channel port to an IRQ-like callback handler.
+ * On some platforms this may not be implemented via the Linux IRQ subsystem.
+ * You *cannot* trust the irq argument passed to the callback handler.
+ */
+extern int  bind_evtchn_to_irqhandler(
+    unsigned int evtchn,
+    irqreturn_t (*handler)(int, void *, struct pt_regs *),
+    unsigned long irqflags,
+    const char *devname,
+    void *dev_id);
+extern void unbind_evtchn_from_irqhandler(unsigned int evtchn, void *dev_id);
+
+extern void irq_suspend(void);
+extern void irq_resume(void);
 
 /* Entry point for notifications into Linux subsystems. */
 asmlinkage void evtchn_do_upcall(struct pt_regs *regs);
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/asm-xen/gnttab.h     Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/gnttab.h     Thu Aug 25 22:53:20 2005
@@ -19,54 +19,48 @@
 
 /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
 #define NR_GRANT_FRAMES 4
-#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
 
-int
-gnttab_grant_foreign_access(
-    domid_t domid, unsigned long frame, int readonly);
+struct gnttab_free_callback {
+    struct gnttab_free_callback *next;
+    void (*fn)(void *);
+    void *arg;
+    u16 count;
+};
 
-void
-gnttab_end_foreign_access(
-    grant_ref_t ref, int readonly);
+int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
+                               int readonly);
 
-int
-gnttab_grant_foreign_transfer(
-    domid_t domid, unsigned long pfn);
+void gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
+void gnttab_end_foreign_access(grant_ref_t ref, int readonly);
 
-unsigned long
-gnttab_end_foreign_transfer(
-    grant_ref_t ref);
+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
 
-int
-gnttab_query_foreign_access( 
-    grant_ref_t ref );
+unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
+unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
+
+int gnttab_query_foreign_access(grant_ref_t ref);
 
 /*
  * operations on reserved batches of grant references
  */
-int
-gnttab_alloc_grant_references(
-    u16 count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal );
+int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
 
-void
-gnttab_free_grant_references(
-    u16 count, grant_ref_t private_head );
+void gnttab_free_grant_reference(grant_ref_t ref);
 
-int
-gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal
-);
+void gnttab_free_grant_references(grant_ref_t head);
 
-void
-gnttab_release_grant_reference(
-    grant_ref_t *private_head, grant_ref_t release );
+int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
 
-void
-gnttab_grant_foreign_access_ref(
-    grant_ref_t ref, domid_t domid, unsigned long frame, int readonly);
+void gnttab_release_grant_reference(grant_ref_t *private_head,
+                                   grant_ref_t release);
 
-void
-gnttab_grant_foreign_transfer_ref(
-    grant_ref_t, domid_t domid, unsigned long pfn);
+void gnttab_request_free_callback(struct gnttab_free_callback *callback,
+                                 void (*fn)(void *), void *arg, u16 count);
 
+void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+                                    unsigned long frame, int readonly);
+
+void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
+                                      unsigned long pfn);
 
 #endif /* __ASM_GNTTAB_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Thu Aug 25 22:53:20 2005
@@ -134,12 +134,8 @@
 #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
 #endif /* linux < 2.6.0 */
 
-void xen_contig_memory(unsigned long vstart, unsigned int order);
-
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
-/* Allocate a contiguous empty region of low memory. Return virtual start. */
-unsigned long allocate_empty_lowmem_region(unsigned long pages);
-#endif
+void xen_create_contiguous_region(unsigned long vstart, unsigned int order);
+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
 
 #include <asm/hypercall.h>
 
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Thu Aug 25 22:53:20 2005
@@ -1,5 +1,3 @@
-#ifndef _ASM_XEN_XENBUS_H
-#define _ASM_XEN_XENBUS_H
 /******************************************************************************
  * xenbus.h
  *
@@ -28,13 +26,17 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
+
+#ifndef _ASM_XEN_XENBUS_H
+#define _ASM_XEN_XENBUS_H
+
 #include <linux/device.h>
+#include <linux/notifier.h>
 #include <asm/semaphore.h>
 
 /* A xenbus device. */
 struct xenbus_device {
        char *devicetype;
-       char *subtype;
        char *nodename;
        struct device dev;
        int has_error;
@@ -50,7 +52,6 @@
 {
        /* .../device/<device_type>/<identifier> */
        char devicetype[32];    /* General class of device. */
-       char subtype[32];       /* Contents of "subtype" for this device */
 };
 
 /* A xenbus driver. */
@@ -58,9 +59,11 @@
        char *name;
        struct module *owner;
        const struct xenbus_device_id *ids;
-       int  (*probe)    (struct xenbus_device * dev,
-                         const struct xenbus_device_id * id);
-       int  (*remove)   (struct xenbus_device * dev);
+       int (*probe)(struct xenbus_device *dev,
+                    const struct xenbus_device_id *id);
+       int (*remove)(struct xenbus_device *dev);
+       int (*suspend)(struct xenbus_device *dev);
+       int (*resume)(struct xenbus_device *dev);
        struct device_driver driver;
 };
 
@@ -69,7 +72,8 @@
        return container_of(drv, struct xenbus_driver, driver);
 }
 
-int xenbus_register_driver(struct xenbus_driver *drv);
+int xenbus_register_device(struct xenbus_driver *drv);
+int xenbus_register_backend(struct xenbus_driver *drv);
 void xenbus_unregister_driver(struct xenbus_driver *drv);
 
 /* Caller must hold this lock to call these functions: it's also held
@@ -112,7 +116,26 @@
        void (*callback)(struct xenbus_watch *, const char *node);
 };
 
+/* notifer routines for when the xenstore comes up */
+int register_xenstore_notifier(struct notifier_block *nb);
+void unregister_xenstore_notifier(struct notifier_block *nb);
+
 int register_xenbus_watch(struct xenbus_watch *watch);
 void unregister_xenbus_watch(struct xenbus_watch *watch);
+void reregister_xenbus_watches(void);
+
+/* Called from xen core code. */
+void xenbus_suspend(void);
+void xenbus_resume(void);
+
+#define XENBUS_IS_ERR_READ(str) ({                     \
+       if (!IS_ERR(str) && strlen(str) == 0) {         \
+               kfree(str);                             \
+               str = ERR_PTR(-ERANGE);                 \
+       }                                               \
+       IS_ERR(str);                                    \
+})
+
+#define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE)
 
 #endif /* _ASM_XEN_XENBUS_H */
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.6-xen-sparse/include/linux/mm.h
--- a/linux-2.6-xen-sparse/include/linux/mm.h   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/linux/mm.h   Thu Aug 25 22:53:20 2005
@@ -817,6 +817,12 @@
 int remap_pfn_range(struct vm_area_struct *, unsigned long,
                unsigned long, unsigned long, pgprot_t);
 
+typedef int (*pte_fn_t)(pte_t *pte, struct page *pte_page, unsigned long addr, 
+                        void *data);
+extern int generic_page_range(struct mm_struct *mm, unsigned long address, 
+                              unsigned long size, pte_fn_t fn, void *data);
+
+
 #ifdef CONFIG_PROC_FS
 void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
 #else
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.6-xen-sparse/mkbuildtree
--- a/linux-2.6-xen-sparse/mkbuildtree  Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/mkbuildtree  Thu Aug 25 22:53:20 2005
@@ -102,10 +102,11 @@
 relative_lndir ${RS}
 rm -f mkbuildtree
 
-
 # Create links to the shared definitions of the Xen interfaces.
 rm -rf ${AD}/include/asm-xen/xen-public
 mkdir  ${AD}/include/asm-xen/xen-public
 cd     ${AD}/include/asm-xen/xen-public
 relative_lndir ../../../${RS}/../xen/include/public
 
+cd ${AD}/drivers/xen/xenbus
+ln -sf ../../../${RS}/../tools/xenstore/xenstored.h
diff -r 5f1ed597f107 -r 8799d14bef77 linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c  Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/mm/memory.c  Thu Aug 25 22:53:20 2005
@@ -954,8 +954,10 @@
                         i++;
                         start += PAGE_SIZE;
                         len--;
+printk(KERN_ALERT "HIT  0x%lx\n", start);
                         continue;
-                    }
+                    } 
+else printk(KERN_ALERT "MISS 0x%lx\n", start);
                 }
 
                if (!vma || (vma->vm_flags & VM_IO)
@@ -1213,6 +1215,104 @@
 }
 EXPORT_SYMBOL(remap_pfn_range);
 
+static inline int generic_pte_range(struct mm_struct *mm,
+                                    pmd_t *pmd, 
+                                    unsigned long addr, 
+                                    unsigned long end,
+                                    pte_fn_t fn, void *data)
+{
+       pte_t *pte;
+        int err;
+        struct page *pte_page;
+
+        pte = (mm == &init_mm) ? 
+                pte_alloc_kernel(mm, pmd, addr) :
+                pte_alloc_map(mm, pmd, addr);
+        if (!pte)
+                return -ENOMEM;
+
+        pte_page = pmd_page(*pmd);
+
+        do {
+                err = fn(pte, pte_page, addr, data);
+               if (err)
+                        break;
+        } while (pte++, addr += PAGE_SIZE, addr != end);
+
+        if (mm != &init_mm)
+                pte_unmap(pte-1);
+        return err;
+
+}
+
+static inline int generic_pmd_range(struct mm_struct *mm,
+                                    pud_t *pud, 
+                                    unsigned long addr, 
+                                    unsigned long end,
+                                    pte_fn_t fn, void *data)
+{
+       pmd_t *pmd;
+       unsigned long next;
+        int err;
+
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return -ENOMEM;
+       do {
+               next = pmd_addr_end(addr, end);
+                err = generic_pte_range(mm, pmd, addr, next, fn, data);
+                if (err)
+                    break;
+       } while (pmd++, addr = next, addr != end);
+       return err;
+}
+
+static inline int generic_pud_range(struct mm_struct *mm, pgd_t *pgd, 
+                                    unsigned long addr,
+                                    unsigned long end,
+                                    pte_fn_t fn, void *data)
+{
+       pud_t *pud;
+       unsigned long next;
+        int err;
+
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return -ENOMEM;
+       do {
+               next = pud_addr_end(addr, end);
+               err = generic_pmd_range(mm, pud, addr, next, fn, data);
+                if (err)
+                       break;
+       } while (pud++, addr = next, addr != end);
+       return err;
+}
+
+/*
+ * Scan a region of virtual memory, filling in page tables as necessary
+ * and calling a provided function on each leaf page table.
+ */
+int generic_page_range(struct mm_struct *mm, unsigned long addr, 
+                  unsigned long size, pte_fn_t fn, void *data)
+{
+       pgd_t *pgd;
+       unsigned long next;
+       unsigned long end = addr + size;
+       int err;
+
+       BUG_ON(addr >= end);
+       pgd = pgd_offset(mm, addr);
+       spin_lock(&mm->page_table_lock);
+       do {
+               next = pgd_addr_end(addr, end);
+               err = generic_pud_range(mm, pgd, addr, next, fn, data);
+               if (err)
+                       break;
+       } while (pgd++, addr = next, addr != end);
+       spin_unlock(&mm->page_table_lock);
+       return err;
+}
+
 /*
  * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
  * servicing faults for write access.  In the normal case, do always want
diff -r 5f1ed597f107 -r 8799d14bef77 tools/Makefile
--- a/tools/Makefile    Wed Aug 24 02:43:18 2005
+++ b/tools/Makefile    Thu Aug 25 22:53:20 2005
@@ -13,7 +13,8 @@
 #SUBDIRS += pygrub
 SUBDIRS += firmware
 SUBDIRS += security
-#SUBDIRS += consoled
+SUBDIRS += console
+SUBDIRS += xenstat
 
 .PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/Rules.mk
--- a/tools/Rules.mk    Wed Aug 24 02:43:18 2005
+++ b/tools/Rules.mk    Thu Aug 25 22:53:20 2005
@@ -6,6 +6,7 @@
 XEN_LIBXC          = $(XEN_ROOT)/tools/libxc
 XEN_XCS            = $(XEN_ROOT)/tools/xcs
 XEN_XENSTORE       = $(XEN_ROOT)/tools/xenstore
+XEN_LIBXENSTAT     = $(XEN_ROOT)/tools/xenstat/libxenstat/src
 
 ifeq ($(XEN_TARGET_ARCH),x86_32)
 CFLAGS  += -m32 -march=i686
diff -r 5f1ed597f107 -r 8799d14bef77 tools/blktap/blktaplib.c
--- a/tools/blktap/blktaplib.c  Wed Aug 24 02:43:18 2005
+++ b/tools/blktap/blktaplib.c  Thu Aug 25 22:53:20 2005
@@ -34,7 +34,7 @@
 #else
 #define DPRINTF(_f, _a...) ((void)0)
 #endif
-#define DEBUG_RING_IDXS 0
+#define DEBUG_RING_IDXS 1
 
 #define POLLRDNORM     0x040 
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/blktap/blktaplib.h
--- a/tools/blktap/blktaplib.h  Wed Aug 24 02:43:18 2005
+++ b/tools/blktap/blktaplib.h  Thu Aug 25 22:53:20 2005
@@ -7,7 +7,7 @@
 #ifndef __BLKTAPLIB_H__
 #define __BLKTAPLIB_H__
 
-#include <xc.h>
+#include <xenctrl.h>
 #include <sys/user.h>
 #include <xen/xen.h>
 #include <xen/io/blkif.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/blktap/parallax/block-async.h
--- a/tools/blktap/parallax/block-async.h       Wed Aug 24 02:43:18 2005
+++ b/tools/blktap/parallax/block-async.h       Thu Aug 25 22:53:20 2005
@@ -7,7 +7,7 @@
 #define _BLOCKASYNC_H_
 
 #include <assert.h>
-#include <xc.h>
+#include <xenctrl.h>
 #include "vdi.h"
 
 struct io_ret
diff -r 5f1ed597f107 -r 8799d14bef77 tools/blktap/parallax/blockstore.h
--- a/tools/blktap/parallax/blockstore.h        Wed Aug 24 02:43:18 2005
+++ b/tools/blktap/parallax/blockstore.h        Thu Aug 25 22:53:20 2005
@@ -10,7 +10,7 @@
 #define __BLOCKSTORE_H__
 
 #include <netinet/in.h>
-#include <xc.h>
+#include <xenctrl.h>
 
 #define BLOCK_SIZE  4096
 #define BLOCK_SHIFT   12
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c
--- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c     
Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c     
Thu Aug 25 22:53:20 2005
@@ -35,7 +35,7 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <errno.h>
-#include <xc.h>
+#include <xenctrl.h>
 #define TRACE_ENTER /* printf("enter %s\n", __FUNCTION__) */
 long (*myptrace)(enum __ptrace_request, pid_t, long, long);
 int (*myxcwait)(int domain, int *status, int options) ;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/libxendebug/Makefile
--- a/tools/debugger/libxendebug/Makefile       Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/libxendebug/Makefile       Thu Aug 25 22:53:20 2005
@@ -20,7 +20,7 @@
 CFLAGS   += -Wp,-MD,.$(@F).d
 DEPS     = .*.d
 
-LDFLAGS  += -L$(XEN_ROOT)/tools/libxc -lxc
+LDFLAGS  += -L$(XEN_ROOT)/tools/libxc -lxenctrl
 
 LIB_OBJS := $(patsubst %.c,%.o,$(SRCS))
 PIC_OBJS := $(patsubst %.c,%.opic,$(SRCS))
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/libxendebug/xendebug.c
--- a/tools/debugger/libxendebug/xendebug.c     Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/libxendebug/xendebug.c     Thu Aug 25 22:53:20 2005
@@ -12,7 +12,7 @@
 #include <string.h>
 #include <errno.h>
 #include <sys/mman.h>
-#include <xc.h>
+#include <xenctrl.h>
 #include "list.h"
 
 #if defined(__i386__)
@@ -40,7 +40,7 @@
 typedef struct bwcpoint                           /* break/watch/catch point */
 {
     struct list_head list;
-    memory_t address;
+    unsigned long address;
     u32 domain;
     u8 old_value;                             /* old value for software bkpt */
 } bwcpoint_t, *bwcpoint_p;
@@ -311,7 +311,7 @@
 /* access to one page */
 static int
 xendebug_memory_page (domain_context_p ctxt, int xc_handle, u32 vcpu,
-                      int protection, memory_t address, int length, u8 *buffer)
+                      int protection, unsigned long address, int length, u8 
*buffer)
 {
     vcpu_guest_context_t *vcpu_ctxt = &ctxt->context[vcpu];
     unsigned long pde, page;
@@ -407,7 +407,7 @@
 /* divide a memory operation into accesses to individual pages */
 static int
 xendebug_memory_op (domain_context_p ctxt, int xc_handle, u32 vcpu,
-                    int protection, memory_t address, int length, u8 *buffer)
+                    int protection, unsigned long address, int length, u8 
*buffer)
 {
     int      remain;              /* number of bytes to touch past this page */
     int      bytes   = 0;
@@ -431,7 +431,7 @@
 xendebug_read_memory(int xc_handle,
                      u32 domid,
                      u32 vcpu,
-                     memory_t address,
+                     unsigned long address,
                      u32 length,
                      u8 *data)
 {
@@ -451,7 +451,7 @@
 xendebug_write_memory(int xc_handle,
                       u32 domid,
                       u32 vcpu,
-                      memory_t address,
+                      unsigned long address,
                       u32 length,
                       u8 *data)
 {
@@ -471,7 +471,7 @@
 xendebug_insert_memory_breakpoint(int xc_handle,
                                   u32 domid,
                                   u32 vcpu,
-                                  memory_t address,
+                                  unsigned long address,
                                   u32 length)
 {
     bwcpoint_p bkpt;
@@ -517,7 +517,7 @@
 xendebug_remove_memory_breakpoint(int xc_handle,
                                   u32 domid,
                                   u32 vcpu,
-                                  memory_t address,
+                                  unsigned long address,
                                   u32 length)
 {
     bwcpoint_p bkpt = NULL;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/libxendebug/xendebug.h
--- a/tools/debugger/libxendebug/xendebug.h     Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/libxendebug/xendebug.h     Thu Aug 25 22:53:20 2005
@@ -9,7 +9,7 @@
 #ifndef _XENDEBUG_H_DEFINED
 #define _XENDEBUG_H_DEFINED
 
-#include <xc.h>
+#include <xenctrl.h>
 
 int xendebug_attach(int xc_handle,
                    u32 domid,
@@ -45,7 +45,7 @@
 int xendebug_read_memory(int xc_handle,
                         u32 domid,
                         u32 vcpu,
-                        memory_t address,
+                        unsigned long address,
                         u32 length,
                         u8 *data);
 
@@ -53,7 +53,7 @@
 int xendebug_write_memory(int xc_handle,
                          u32 domid,
                          u32 vcpu,
-                         memory_t address,
+                         unsigned long address,
                          u32 length,
                          u8 *data);
 
@@ -61,13 +61,13 @@
 int xendebug_insert_memory_breakpoint(int xc_handle,
                                      u32 domid,
                                      u32 vcpu,
-                                     memory_t address,
+                                     unsigned long address,
                                      u32 length);
 
 int xendebug_remove_memory_breakpoint(int xc_handle,
                                      u32 domid,
                                      u32 vcpu,
-                                     memory_t address,
+                                     unsigned long address,
                                      u32 length);
 
 int xendebug_query_domain_stop(int xc_handle,
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/Domain.ml
--- a/tools/debugger/pdb/Domain.ml      Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/Domain.ml      Thu Aug 25 22:53:20 2005
@@ -36,6 +36,7 @@
       Printf.sprintf "{domain} domain: %d, vcpu: %d"
                       ctx.domain  ctx.vcpu
 
+external read_register : context_t -> int -> int32 = "dom_read_register"
 external read_registers : context_t -> registers = "dom_read_registers"
 external write_register : context_t -> register -> int32 -> unit =
   "dom_write_register"
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/Domain.mli
--- a/tools/debugger/pdb/Domain.mli     Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/Domain.mli     Thu Aug 25 22:53:20 2005
@@ -22,6 +22,7 @@
 
 val string_of_context : context_t -> string
 
+val read_register : context_t -> int -> int32
 val read_registers : context_t -> registers
 val write_register : context_t -> register -> int32 -> unit
 val read_memory : context_t -> int32 -> int -> int list
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/Makefile
--- a/tools/debugger/pdb/Makefile       Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/Makefile       Thu Aug 25 22:53:20 2005
@@ -33,7 +33,8 @@
 LIBS       += unix str
 
 # bc = byte-code, dc = debug byte-code
-all : patches dc
+# patches = patch linux domU source code
+all : dc
 
 SOURCES    += pdb_caml_xc.c 
 SOURCES    += pdb_caml_domain.c pdb_caml_process.c
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/PDB.ml
--- a/tools/debugger/pdb/PDB.ml Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/PDB.ml Thu Aug 25 22:53:20 2005
@@ -219,6 +219,17 @@
 
 (***************************************************************************)
 
+let read_register ctx register =    (* register is int32 because of sscanf *)
+  match ctx with
+  | Void -> 0l                                      (* default for startup *)
+  | Domain d  -> Domain.read_register d register
+  | Process p ->
+      begin
+       Process.read_register p register;
+       raise No_reply
+      end
+  | _ -> raise (Unimplemented "read registers")
+
 let read_registers ctx =
   match ctx with
   | Void -> Intel.null_registers                    (* default for startup *)
@@ -278,14 +289,42 @@
 let insert_memory_breakpoint ctx addr len =
   match ctx with
   | Domain d  -> Domain.insert_memory_breakpoint d addr len
-  | Process p  -> Process.insert_memory_breakpoint p addr len
+  | Process p  ->
+      begin
+       Process.insert_memory_breakpoint p addr len;
+       raise No_reply
+      end
   | _ -> raise (Unimplemented "insert memory breakpoint")
 
 let remove_memory_breakpoint ctx addr len =
   match ctx with
   | Domain d  -> Domain.remove_memory_breakpoint d addr len
-  | Process p  -> Process.remove_memory_breakpoint p addr len
+  | Process p  ->
+      begin
+       Process.remove_memory_breakpoint p addr len;
+       raise No_reply
+      end
   | _ -> raise (Unimplemented "remove memory breakpoint")
+
+let insert_watchpoint ctx kind addr len =
+  match ctx with
+(*  | Domain d  -> Domain.insert_watchpoint d kind addr len  TODO *)
+  | Process p  ->
+      begin
+       Process.insert_watchpoint p kind addr len;
+       raise No_reply
+      end
+  | _ -> raise (Unimplemented "insert watchpoint")
+
+let remove_watchpoint ctx kind addr len =
+  match ctx with
+(*  | Domain d  -> Domain.remove_watchpoint d kind addr len  TODO *)
+  | Process p  ->
+      begin
+       Process.remove_watchpoint p kind addr len;
+       raise No_reply
+      end
+  | _ -> raise (Unimplemented "remove watchpoint")
 
 
 let pause ctx =
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/Process.ml
--- a/tools/debugger/pdb/Process.ml     Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/Process.ml     Thu Aug 25 22:53:20 2005
@@ -54,6 +54,7 @@
   proc_ctx.ring   <- Xen_domain.get_ring   dom_ctx;
   _attach_debugger proc_ctx
 
+external read_register : context_t -> int -> unit = "proc_read_register"
 external read_registers : context_t -> unit = "proc_read_registers"
 external write_register : context_t -> register -> int32 -> unit =
   "proc_write_register"
@@ -69,6 +70,10 @@
   "proc_insert_memory_breakpoint"
 external remove_memory_breakpoint : context_t -> int32 -> int -> unit = 
   "proc_remove_memory_breakpoint"
+external insert_watchpoint : context_t -> int -> int32 -> int -> unit =
+  "proc_insert_watchpoint"
+external remove_watchpoint : context_t -> int -> int32 -> int -> unit =
+  "proc_remove_watchpoint"
 
 let pause ctx =
   pause_target ctx
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/Process.mli
--- a/tools/debugger/pdb/Process.mli    Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/Process.mli    Thu Aug 25 22:53:20 2005
@@ -26,7 +26,7 @@
 val detach_debugger : context_t -> unit
 val pause : context_t -> unit
 
-
+val read_register : context_t -> int -> unit
 val read_registers : context_t -> unit
 val write_register : context_t -> register -> int32 -> unit
 val read_memory : context_t -> int32 -> int -> unit
@@ -37,3 +37,5 @@
 
 val insert_memory_breakpoint : context_t -> int32 -> int -> unit
 val remove_memory_breakpoint : context_t -> int32 -> int -> unit
+val insert_watchpoint : context_t -> int -> int32 -> int -> unit
+val remove_watchpoint : context_t -> int -> int32 -> int -> unit
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/debugger.ml
--- a/tools/debugger/pdb/debugger.ml    Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/debugger.ml    Thu Aug 25 22:53:20 2005
@@ -53,10 +53,20 @@
   PDB.step ctx;
   raise No_reply
 
+(**
+   Read Register Command.
+   return register as a 4-byte value.
+ *)
+let gdb_read_register ctx command =
+  let read_reg register =
+    (Printf.sprintf "%08lx" (Util.flip_int32 (PDB.read_register ctx register)))
+  in
+  Scanf.sscanf command "p%x" read_reg
+    
 
 (**
    Read Registers Command.
-   returns 16 4-byte registers in a particular defined by gdb.
+   returns 16 4-byte registers in a particular format defined by gdb.
  *)
 let gdb_read_registers ctx =
   let regs = PDB.read_registers ctx in
@@ -100,7 +110,7 @@
     with
       Failure s -> "E02"
   in
-  Scanf.sscanf command "m%lx,%d" read_mem
+  Scanf.sscanf command "m%lx,%x" read_mem
 
 
 
@@ -218,16 +228,24 @@
 (**
    Insert Breakpoint or Watchpoint Packet
  *)
+
+let bwc_watch_write  = 102                              (* from pdb_module.h *)
+let bwc_watch_read   = 103
+let bwc_watch_access = 104
+
 let gdb_insert_bwcpoint ctx command =
   let insert cmd addr length =
     try
       match cmd with
       | 0 -> PDB.insert_memory_breakpoint ctx addr length; "OK"
+      | 2 -> PDB.insert_watchpoint ctx bwc_watch_write  addr length; "OK"
+      | 3 -> PDB.insert_watchpoint ctx bwc_watch_read   addr length; "OK"
+      | 4 -> PDB.insert_watchpoint ctx bwc_watch_access addr length; "OK"
       | _ -> ""
     with
       Failure s -> "E03"
   in
-  Scanf.sscanf command "Z%d,%lx,%d" insert
+  Scanf.sscanf command "Z%d,%lx,%x" insert
 
 (**
    Remove Breakpoint or Watchpoint Packet
@@ -237,6 +255,9 @@
     try
       match cmd with
       | 0 -> PDB.remove_memory_breakpoint ctx addr length; "OK"
+      | 2 -> PDB.remove_watchpoint ctx bwc_watch_write  addr length; "OK"
+      | 3 -> PDB.remove_watchpoint ctx bwc_watch_read   addr length; "OK"
+      | 4 -> PDB.remove_watchpoint ctx bwc_watch_access addr length; "OK"
       | _ -> ""
     with
       Failure s -> "E04"
@@ -260,6 +281,7 @@
     | 'k' -> gdb_kill ()
     | 'm' -> gdb_read_memory ctx command
     | 'M' -> gdb_write_memory ctx command
+    | 'p' -> gdb_read_register ctx command
     | 'P' -> gdb_write_register ctx command
     | 'q' -> gdb_query command
     | 's' -> gdb_step ctx
@@ -270,7 +292,7 @@
     | 'Z' -> gdb_insert_bwcpoint ctx command
     | _ -> 
        print_endline (Printf.sprintf "unknown gdb command [%s]" command);
-       "E02"
+       ""
   with
     Unimplemented s ->
       print_endline (Printf.sprintf "loser. unimplemented command [%s][%s]" 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/linux-2.6-module/debug.c
--- a/tools/debugger/pdb/linux-2.6-module/debug.c       Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/linux-2.6-module/debug.c       Thu Aug 25 22:53:20 2005
@@ -9,33 +9,143 @@
 #include <asm-i386/kdebug.h>
 #include <asm-xen/asm-i386/processor.h>
 #include <asm-xen/asm-i386/ptrace.h>
+#include <asm-xen/asm-i386/tlbflush.h>
 #include <asm-xen/xen-public/xen.h>
 #include "pdb_module.h"
 #include "pdb_debug.h"
 
-#define BWC_DEBUG 1
-#define BWC_INT3  3
+
+static int pdb_debug_fn (struct pt_regs *regs, long error_code,
+                         unsigned int condition);
+static int pdb_int3_fn (struct pt_regs *regs, long error_code);
+static int pdb_page_fault_fn (struct pt_regs *regs, long error_code,
+                              unsigned int condition);
+
+/***********************************************************************/
+
 typedef struct bwcpoint                           /* break/watch/catch point */
 {
     struct list_head list;
-    memory_t address;
-    u32 domain;
+    unsigned long address;
+    int length;
+
+    u8  type;                                                     /* BWC_??? */
+    u8  mode;                   /* for BWC_PAGE, the current protection mode */
     u32 process;
-    u8  old_value;                            /* old value for software bkpt */
-    u8  type;                                                     /* BWC_??? */
+    u8  error;                /* error occured when enabling: don't disable. */
+
+    /* original values */
+    u8    orig_bkpt;                               /* single byte breakpoint */
+    pte_t orig_pte;
+
+    struct list_head watchpt_read_list;     /* read watchpoints on this page */
+    struct list_head watchpt_write_list;                            /* write */
+    struct list_head watchpt_access_list;                          /* access */
+    struct list_head watchpt_disabled_list;                      /* disabled */
+
+    struct bwcpoint *parent;             /* watchpoint: bwc_watch (the page) */
+    struct bwcpoint *watchpoint;      /* bwc_watch_step: original watchpoint */
 } bwcpoint_t, *bwcpoint_p;
 
-static bwcpoint_t bwcpoint_list;
+static struct list_head bwcpoint_list = LIST_HEAD_INIT(bwcpoint_list);
+
+#define _pdb_bwcpoint_alloc(_var) \
+{ \
+    if ( (_var = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL)) == NULL ) \
+        printk("error: unable to allocate memory %d\n", __LINE__); \
+    else { \
+        memset(_var, 0, sizeof(bwcpoint_t)); \
+        INIT_LIST_HEAD(&_var->watchpt_read_list); \
+        INIT_LIST_HEAD(&_var->watchpt_write_list); \
+        INIT_LIST_HEAD(&_var->watchpt_access_list); \
+        INIT_LIST_HEAD(&_var->watchpt_disabled_list); \
+    } \
+}
+
+/***********************************************************************/
+
+static void _pdb_bwc_print_list (struct list_head *, char *, int);
+
+static void
+_pdb_bwc_print (bwcpoint_p bwc, char *label, int level)
+{
+    printk("%s%03d 0x%08lx:0x%02x %c\n", label, bwc->type,
+           bwc->address, bwc->length, bwc->error ? 'e' : '-');
+
+    if ( !list_empty(&bwc->watchpt_read_list) )
+        _pdb_bwc_print_list(&bwc->watchpt_read_list, "r", level);
+    if ( !list_empty(&bwc->watchpt_write_list) )
+        _pdb_bwc_print_list(&bwc->watchpt_write_list, "w", level);
+    if ( !list_empty(&bwc->watchpt_access_list) )
+        _pdb_bwc_print_list(&bwc->watchpt_access_list, "a", level);
+    if ( !list_empty(&bwc->watchpt_disabled_list) )
+        _pdb_bwc_print_list(&bwc->watchpt_disabled_list, "d", level);
+}
+
+static void
+_pdb_bwc_print_list (struct list_head *bwc_list, char *label, int level)
+{
+    struct list_head *ptr;
+    int counter = 0;
+
+    list_for_each(ptr, bwc_list)
+    {
+        bwcpoint_p bwc = list_entry(ptr, bwcpoint_t, list);
+        printk("  %s[%02d]%s ", level > 0 ? "  " : "", counter++,
+                                level > 0 ? "" : "  ");
+        _pdb_bwc_print(bwc, label, level+1);
+    }
+
+    if (counter == 0)
+    {
+        printk("  empty list\n");
+    }
+}
 
 void
-pdb_initialize_bwcpoint (void)
-{
-    memset((void *) &bwcpoint_list, 0, sizeof(bwcpoint_t));
-    INIT_LIST_HEAD(&bwcpoint_list.list);
-
-    return;
-}
-
+pdb_bwc_print_list (void)
+{
+    _pdb_bwc_print_list(&bwcpoint_list, " ", 0);
+}
+
+bwcpoint_p
+pdb_search_watchpoint (u32 process, unsigned long address)
+{
+    bwcpoint_p bwc_watch = (bwcpoint_p) 0;
+    bwcpoint_p bwc_entry = (bwcpoint_p) 0;
+    struct list_head *ptr;
+
+    list_for_each(ptr, &bwcpoint_list)                /* find bwc page entry */
+    {
+        bwc_watch = list_entry(ptr, bwcpoint_t, list);
+        if (bwc_watch->address == (address & PAGE_MASK)) break;
+    }
+
+    if ( !bwc_watch )
+    {
+        return (bwcpoint_p) 0;
+    }
+
+#define __pdb_search_watchpoint_list(__list) \
+    list_for_each(ptr, (__list))  \
+    { \
+        bwc_entry = list_entry(ptr, bwcpoint_t, list); \
+        if ( bwc_entry->process == process &&          \
+             bwc_entry->address <= address &&          \
+             bwc_entry->address + bwc_entry->length > address ) \
+            return bwc_entry; \
+    }
+
+    __pdb_search_watchpoint_list(&bwc_watch->watchpt_read_list);
+    __pdb_search_watchpoint_list(&bwc_watch->watchpt_write_list);
+    __pdb_search_watchpoint_list(&bwc_watch->watchpt_access_list);
+
+#undef __pdb_search_watchpoint_list
+
+    return (bwcpoint_p) 0;
+}
+
+/*************************************************************/
 
 int
 pdb_suspend (struct task_struct *target)
@@ -134,6 +244,35 @@
     *(unsigned long *) stack = value;
 
     return;
+}
+
+int
+pdb_read_register (struct task_struct *target, pdb_op_rd_reg_p op)
+{
+    int rc = 0;
+
+    switch (op->reg)
+    {
+    case  0: op->value = _pdb_get_register(target, LINUX_EAX); break;
+    case  1: op->value = _pdb_get_register(target, LINUX_ECX); break;
+    case  2: op->value = _pdb_get_register(target, LINUX_EDX); break;
+    case  3: op->value = _pdb_get_register(target, LINUX_EBX); break;
+    case  4: op->value = _pdb_get_register(target, LINUX_ESP); break;
+    case  5: op->value = _pdb_get_register(target, LINUX_EBP); break;
+    case  6: op->value = _pdb_get_register(target, LINUX_ESI); break;
+    case  7: op->value = _pdb_get_register(target, LINUX_EDI); break;
+    case  8: op->value = _pdb_get_register(target, LINUX_EIP); break;
+    case  9: op->value = _pdb_get_register(target, LINUX_EFL); break;
+
+    case 10: op->value = _pdb_get_register(target, LINUX_CS); break;
+    case 11: op->value = _pdb_get_register(target, LINUX_SS); break;
+    case 12: op->value = _pdb_get_register(target, LINUX_DS); break;
+    case 13: op->value = _pdb_get_register(target, LINUX_ES); break;
+    case 14: op->value = _pdb_get_register(target, LINUX_FS); break;
+    case 15: op->value = _pdb_get_register(target, LINUX_GS); break;
+    }
+
+    return rc;
 }
 
 int
@@ -209,18 +348,14 @@
     eflags |= X86_EFLAGS_TF;
     _pdb_set_register(target, LINUX_EFL, eflags);
 
-    bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL);
-    if ( bkpt == NULL )
-    {
-        printk("error: unable to allocation memory\n");
-        return -1;
-    }
+    _pdb_bwcpoint_alloc(bkpt);
+    if ( bkpt == NULL )  return -1;
 
     bkpt->process = target->pid;
     bkpt->address = 0;
     bkpt->type    = BWC_DEBUG;
     
-    list_add(&bkpt->list, &bwcpoint_list.list);
+    list_add_tail(&bkpt->list, &bwcpoint_list);
 
     wake_up_process(target);
 
@@ -229,7 +364,7 @@
 
 int
 pdb_insert_memory_breakpoint (struct task_struct *target, 
-                              memory_t address, u32 length)
+                              unsigned long address, u32 length)
 {
     int rc = 0;
     bwcpoint_p bkpt;
@@ -237,38 +372,34 @@
 
     printk("insert breakpoint %d:%lx len: %d\n", target->pid, address, length);
 
-    bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL);
-    if ( bkpt == NULL )
-    {
-        printk("error: unable to allocation memory\n");
+    if ( length != 1 )
+    {
+        printk("error: breakpoint length should be 1\n");
         return -1;
     }
 
-    if ( length != 1 )
-    {
-        printk("error: breakpoint length should be 1\n");
-        kfree(bkpt);
-        return -1;
-    }
+    _pdb_bwcpoint_alloc(bkpt);
+    if ( bkpt == NULL ) return -1;
 
     bkpt->process = target->pid;
     bkpt->address = address;
     bkpt->type    = BWC_INT3;
 
-    pdb_access_memory(target, address, &bkpt->old_value, 1, 0);
-    pdb_access_memory(target, address, &breakpoint_opcode, 1, 1);
+    pdb_access_memory(target, address, &bkpt->orig_bkpt, 1, PDB_MEM_READ);
+    pdb_access_memory(target, address, &breakpoint_opcode, 1, PDB_MEM_WRITE);
     
-    list_add(&bkpt->list, &bwcpoint_list.list);
+    list_add_tail(&bkpt->list, &bwcpoint_list);
 
     printk("breakpoint_set %d:%lx  OLD: 0x%x\n",
-           target->pid, address, bkpt->old_value);
+           target->pid, address, bkpt->orig_bkpt);
+    pdb_bwc_print_list();
 
     return rc;
 }
 
 int
 pdb_remove_memory_breakpoint (struct task_struct *target,
-                              memory_t address, u32 length)
+                              unsigned long address, u32 length)
 {
     int rc = 0;
     bwcpoint_p bkpt = NULL;
@@ -276,7 +407,7 @@
     printk ("remove breakpoint %d:%lx\n", target->pid, address);
 
     struct list_head *entry;
-    list_for_each(entry, &bwcpoint_list.list)
+    list_for_each(entry, &bwcpoint_list)
     {
         bkpt = list_entry(entry, bwcpoint_t, list);
         if ( target->pid == bkpt->process && 
@@ -285,17 +416,223 @@
             break;
     }
     
-    if (bkpt == &bwcpoint_list || bkpt == NULL)
+    if (entry == &bwcpoint_list)
     {
         printk ("error: no breakpoint found\n");
         return -1;
     }
 
+    pdb_access_memory(target, address, &bkpt->orig_bkpt, 1, PDB_MEM_WRITE);
+
     list_del(&bkpt->list);
-
-    pdb_access_memory(target, address, &bkpt->old_value, 1, 1);
-
     kfree(bkpt);
+
+    pdb_bwc_print_list();
+
+    return rc;
+}
+
+#define PDB_PTE_UPDATE   1
+#define PDB_PTE_RESTORE  2
+
+int
+pdb_change_pte (struct task_struct *target, bwcpoint_p bwc, int mode)
+{
+    int rc = 0;
+    pgd_t *pgd;
+    pud_t *pud;
+    pmd_t *pmd;
+    pte_t *ptep;
+
+    pgd = pgd_offset(target->mm, bwc->address);
+    if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))  return -1;
+
+    pud = pud_offset(pgd, bwc->address);
+    if (pud_none(*pud) || unlikely(pud_bad(*pud))) return -2;
+
+    pmd = pmd_offset(pud, bwc->address);
+    if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) return -3;
+
+    ptep = pte_offset_map(pmd, bwc->address);
+    if (!ptep)  return -4;
+
+    switch ( mode )
+    {
+    case PDB_PTE_UPDATE:      /* added or removed a watchpoint.  update pte. */
+    {
+        pte_t new_pte;
+
+        if ( pte_val(bwc->parent->orig_pte) == 0 )    /* new watchpoint page */
+        {
+            bwc->parent->orig_pte = *ptep;
+        }
+
+        new_pte = bwc->parent->orig_pte;
+
+        if ( !list_empty(&bwc->parent->watchpt_read_list)  || 
+             !list_empty(&bwc->parent->watchpt_access_list) )
+        {
+            new_pte = pte_rdprotect(new_pte);
+        }
+
+        if ( !list_empty(&bwc->parent->watchpt_write_list) ||
+             !list_empty(&bwc->parent->watchpt_access_list) )
+        {
+            new_pte = pte_wrprotect(new_pte);
+        }
+        
+        if ( pte_val(new_pte) != pte_val(*ptep) )
+        {
+            *ptep = new_pte;
+            flush_tlb_mm(target->mm);
+        }
+        break;
+    }
+    case PDB_PTE_RESTORE :   /* suspend watchpoint by restoring original pte */
+    {
+        *ptep = bwc->parent->orig_pte;
+        flush_tlb_mm(target->mm);
+        break;
+    }
+    default :
+    {
+        printk("(linux) unknown mode %d %d\n", mode, __LINE__);
+        break;
+    }
+    }
+
+    pte_unmap(ptep);                /* can i flush the tlb before pte_unmap? */
+
+    return rc;
+}
+
+int
+pdb_insert_watchpoint (struct task_struct *target, pdb_op_watchpt_p watchpt)
+{
+    int rc = 0;
+
+    bwcpoint_p bwc_watch;
+    bwcpoint_p bwc_entry;
+    struct list_head *ptr;
+    unsigned long page = watchpt->address & PAGE_MASK;
+    struct list_head *watchpoint_list;
+    
+    printk("insert watchpoint: %d %x %x\n", 
+           watchpt->type, watchpt->address, watchpt->length);
+
+    list_for_each(ptr, &bwcpoint_list) /* find existing bwc page entry */
+    {
+        bwc_watch = list_entry(ptr, bwcpoint_t, list);
+
+        if (bwc_watch->address == page)  goto got_bwc_watch;
+    }
+
+    _pdb_bwcpoint_alloc(bwc_watch);                  /* create new bwc:watch */
+    if ( bwc_watch == NULL ) return -1;
+
+    bwc_watch->type    = BWC_WATCH;
+    bwc_watch->process = target->pid;
+    bwc_watch->address = page;
+
+    list_add_tail(&bwc_watch->list, &bwcpoint_list);
+
+ got_bwc_watch:
+
+    switch (watchpt->type)
+    {
+    case BWC_WATCH_READ:
+        watchpoint_list = &bwc_watch->watchpt_read_list; break;
+    case BWC_WATCH_WRITE: 
+        watchpoint_list = &bwc_watch->watchpt_write_list; break;
+    case BWC_WATCH_ACCESS:
+        watchpoint_list = &bwc_watch->watchpt_access_list; break;
+    default:
+        printk("unknown type %d\n", watchpt->type); return -2;
+    }
+
+    _pdb_bwcpoint_alloc(bwc_entry);                  /* create new bwc:entry */
+    if ( bwc_entry == NULL ) return -1;
+
+    bwc_entry->process = target->pid;
+    bwc_entry->address = watchpt->address;
+    bwc_entry->length  = watchpt->length;
+    bwc_entry->type    = watchpt->type;
+    bwc_entry->parent  = bwc_watch;
+
+    list_add_tail(&bwc_entry->list, watchpoint_list);
+    pdb_change_pte(target, bwc_entry, PDB_PTE_UPDATE);
+
+    pdb_bwc_print_list();
+
+    return rc;
+}
+
+int 
+pdb_remove_watchpoint (struct task_struct *target, pdb_op_watchpt_p watchpt)
+{
+    int rc = 0;
+    bwcpoint_p bwc_watch = (bwcpoint_p) NULL;
+    bwcpoint_p bwc_entry = (bwcpoint_p) NULL;
+    unsigned long page = watchpt->address & PAGE_MASK;
+    struct list_head *ptr;
+    struct list_head *watchpoint_list;
+
+    printk("remove watchpoint: %d %x %x\n", 
+           watchpt->type, watchpt->address, watchpt->length);
+
+    list_for_each(ptr, &bwcpoint_list)                /* find bwc page entry */
+    {
+        bwc_watch = list_entry(ptr, bwcpoint_t, list);
+        if (bwc_watch->address == page) break;
+    }
+
+    if ( !bwc_watch )
+    {
+        printk("(linux) delete watchpoint: can't find bwc page 0x%08x\n",
+               watchpt->address);
+        return -1;
+    }
+
+    switch (watchpt->type)
+    {
+    case BWC_WATCH_READ:
+        watchpoint_list = &bwc_watch->watchpt_read_list; break;
+    case BWC_WATCH_WRITE:
+        watchpoint_list = &bwc_watch->watchpt_write_list; break;
+    case BWC_WATCH_ACCESS:
+        watchpoint_list = &bwc_watch->watchpt_access_list; break;
+    default:
+        printk("unknown type %d\n", watchpt->type); return -2;
+    }
+
+    list_for_each(ptr, watchpoint_list)                   /* find watchpoint */
+    {
+        bwc_entry = list_entry(ptr, bwcpoint_t, list);
+        if ( bwc_entry->address == watchpt->address &&
+             bwc_entry->length  == watchpt->length ) break;
+    }
+
+    if ( !bwc_entry )                           /* or ptr == watchpoint_list */
+    {
+        printk("(linux) delete watchpoint: can't find watchpoint 0x%08x\n",
+               watchpt->address);
+        return -1;
+    }
+    
+    list_del(&bwc_entry->list);
+    pdb_change_pte(target, bwc_entry, PDB_PTE_UPDATE);
+    kfree(bwc_entry);
+
+
+    if ( list_empty(&bwc_watch->watchpt_read_list)  &&
+         list_empty(&bwc_watch->watchpt_write_list) &&
+         list_empty(&bwc_watch->watchpt_access_list) )
+    {
+        list_del(&bwc_watch->list);
+        kfree(bwc_watch);
+    }
+
+    pdb_bwc_print_list();
 
     return rc;
 }
@@ -312,16 +649,24 @@
        switch (val) 
     {
        case DIE_DEBUG:
-               if (pdb_debug_fn(args->regs, args->trapnr, args->err))
+               if ( pdb_debug_fn(args->regs, args->trapnr, args->err) )
                        return NOTIFY_STOP;
                break;
     case DIE_TRAP:
-               if (args->trapnr == 3 && pdb_int3_fn(args->regs, args->err))
+               if ( args->trapnr == 3 && pdb_int3_fn(args->regs, args->err) )
                        return NOTIFY_STOP;
         break;
        case DIE_INT3:          /* without kprobes, we should never see 
DIE_INT3 */
+               if ( pdb_int3_fn(args->regs, args->err) )
+                       return NOTIFY_STOP;
+               break;
+       case DIE_PAGE_FAULT:
+               if ( pdb_page_fault_fn(args->regs, args->trapnr, args->err) )
+                       return NOTIFY_STOP;
+               break;
        case DIE_GPF:
-       case DIE_PAGE_FAULT:
+        printk("---------------GPF\n");
+        break;
        default:
                break;
        }
@@ -330,70 +675,110 @@
 }
 
 
-int
+static int
 pdb_debug_fn (struct pt_regs *regs, long error_code, 
                    unsigned int condition)
 {
     pdb_response_t resp;
     bwcpoint_p bkpt = NULL;
-
     struct list_head *entry;
-    list_for_each(entry, &bwcpoint_list.list)
+
+    printk("pdb_debug_fn\n");
+
+    list_for_each(entry, &bwcpoint_list)
     {
         bkpt = list_entry(entry, bwcpoint_t, list);
         if ( current->pid == bkpt->process && 
-             bkpt->type == BWC_DEBUG )
+             (bkpt->type == BWC_DEBUG ||                      /* single step */
+              bkpt->type == BWC_WATCH_STEP))  /* single step over watchpoint */
             break;
     }
     
-    if (bkpt == &bwcpoint_list || bkpt == NULL)
+    if (entry == &bwcpoint_list)
     {
         printk("not my debug  0x%x 0x%lx\n", current->pid, regs->eip);
         return 0;
     }
 
-    list_del(&bkpt->list);
-
     pdb_suspend(current);
 
-    printk("(pdb) debug  pid: %d, eip: 0x%08lx\n", current->pid, regs->eip);
+    printk("(pdb) %s  pid: %d, eip: 0x%08lx\n", 
+           bkpt->type == BWC_DEBUG ? "debug" : "watch-step",
+           current->pid, regs->eip);
 
     regs->eflags &= ~X86_EFLAGS_TF;
        set_tsk_thread_flag(current, TIF_SINGLESTEP);
 
-    resp.operation = PDB_OPCODE_STEP;
+    switch (bkpt->type)
+    {
+    case BWC_DEBUG:
+        resp.operation = PDB_OPCODE_STEP;
+        break;
+    case BWC_WATCH_STEP:
+    {
+        struct list_head *watchpoint_list;
+        bwcpoint_p watch_page = bkpt->watchpoint->parent;
+
+        switch (bkpt->watchpoint->type)
+        {
+        case BWC_WATCH_READ:
+            watchpoint_list = &watch_page->watchpt_read_list; break;
+        case BWC_WATCH_WRITE: 
+            watchpoint_list = &watch_page->watchpt_write_list; break;
+        case BWC_WATCH_ACCESS:
+            watchpoint_list = &watch_page->watchpt_access_list; break;
+        default:
+            printk("unknown type %d\n", bkpt->watchpoint->type); return 0;
+        }
+
+        resp.operation = PDB_OPCODE_WATCHPOINT;
+        list_del_init(&bkpt->watchpoint->list);
+        list_add_tail(&bkpt->watchpoint->list, watchpoint_list);
+        pdb_change_pte(current, bkpt->watchpoint, PDB_PTE_UPDATE);
+        pdb_bwc_print_list();
+        break;
+    }
+    default:
+        printk("unknown breakpoint type %d %d\n", __LINE__, bkpt->type);
+        return 0;
+    }
+
     resp.process   = current->pid;
     resp.status    = PDB_RESPONSE_OKAY;
 
     pdb_send_response(&resp);
 
+    list_del(&bkpt->list);
+    kfree(bkpt);
+
     return 1;
 }
 
 
-int
+static int
 pdb_int3_fn (struct pt_regs *regs, long error_code)
 {
     pdb_response_t resp;
     bwcpoint_p bkpt = NULL;
+    unsigned long address = regs->eip - 1;
 
     struct list_head *entry;
-    list_for_each(entry, &bwcpoint_list.list)
+    list_for_each(entry, &bwcpoint_list)
     {
         bkpt = list_entry(entry, bwcpoint_t, list);
         if ( current->pid == bkpt->process && 
-             regs->eip == bkpt->address    &&
+             address == bkpt->address      &&
              bkpt->type == BWC_INT3 )
             break;
     }
     
-    if (bkpt == &bwcpoint_list || bkpt == NULL)
-    {
-        printk("not my int3 bkpt  0x%x 0x%lx\n", current->pid, regs->eip);
+    if (entry == &bwcpoint_list)
+    {
+        printk("not my int3 bkpt  0x%x 0x%lx\n", current->pid, address);
         return 0;
     }
 
-    printk("(pdb) int3  pid: %d, eip: 0x%08lx\n", current->pid, regs->eip);
+    printk("(pdb) int3  pid: %d, eip: 0x%08lx\n", current->pid, address);
 
     pdb_suspend(current);
 
@@ -405,6 +790,54 @@
 
     return 1;
 }
+
+static int
+pdb_page_fault_fn (struct pt_regs *regs, long error_code, 
+                   unsigned int condition)
+{
+    unsigned long cr2;
+    unsigned long cr3;
+    bwcpoint_p bwc;
+    bwcpoint_p watchpt;
+    bwcpoint_p bkpt;
+
+    __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
+    __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : );
+
+    bwc = pdb_search_watchpoint(current->pid, cr2);
+    if ( !bwc )
+    {
+        return 0;                                                /* not mine */
+    }
+
+    printk("page_fault cr2:%08lx err:%lx eip:%08lx\n", 
+           cr2, error_code, regs->eip);
+
+    /* disable the watchpoint */
+    watchpt = bwc->watchpoint;
+    list_del_init(&bwc->list);
+    list_add_tail(&bwc->list, &bwc->parent->watchpt_disabled_list);
+    pdb_change_pte(current, bwc, PDB_PTE_RESTORE);
+
+    /* single step the faulting instruction */
+    regs->eflags |= X86_EFLAGS_TF;
+
+    /* create a bwcpoint entry so we know what to do once we regain control */
+    _pdb_bwcpoint_alloc(bkpt);
+    if ( bkpt == NULL )  return -1;
+
+    bkpt->process    = current->pid;
+    bkpt->address    = 0;
+    bkpt->type       = BWC_WATCH_STEP;
+    bkpt->watchpoint = bwc;
+
+    /* add to head so we see it first the next time we break */
+    list_add(&bkpt->list, &bwcpoint_list);                
+
+    pdb_bwc_print_list();
+    return 1;
+}
+
 
 /*
  * Local variables:
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/debugger/pdb/linux-2.6-module/module.c
--- a/tools/debugger/pdb/linux-2.6-module/module.c      Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/linux-2.6-module/module.c      Thu Aug 25 22:53:20 2005
@@ -98,6 +98,11 @@
         printk("(linux) detach 0x%x\n", request->process);
         resp.status = PDB_RESPONSE_OKAY;
         break;
+    case PDB_OPCODE_RD_REG :
+        resp.u.rd_reg.reg = request->u.rd_reg.reg;
+        pdb_read_register(target, &resp.u.rd_reg);
+        resp.status = PDB_RESPONSE_OKAY;
+        break;
     case PDB_OPCODE_RD_REGS :
         pdb_read_registers(target, &resp.u.rd_regs);
         resp.status = PDB_RESPONSE_OKAY;
@@ -108,14 +113,16 @@
         break;
     case PDB_OPCODE_RD_MEM :
         pdb_access_memory(target, request->u.rd_mem.address,
-                          &resp.u.rd_mem.data, request->u.rd_mem.length, 0);
+                          &resp.u.rd_mem.data, request->u.rd_mem.length, 
+                          PDB_MEM_READ);
         resp.u.rd_mem.address = request->u.rd_mem.address;
         resp.u.rd_mem.length  = request->u.rd_mem.length;
         resp.status = PDB_RESPONSE_OKAY;
         break;
     case PDB_OPCODE_WR_MEM :
         pdb_access_memory(target, request->u.wr_mem.address,
-                         &request->u.wr_mem.data, request->u.wr_mem.length, 1);
+                         &request->u.wr_mem.data, request->u.wr_mem.length, 
+                          PDB_MEM_WRITE);
         resp.status = PDB_RESPONSE_OKAY;
         break;
     case PDB_OPCODE_CONTINUE :
@@ -137,6 +144,14 @@
                                      request->u.bkpt.length);
         resp.status = PDB_RESPONSE_OKAY;
         break;
+    case PDB_OPCODE_SET_WATCHPT :
+        pdb_insert_watchpoint(target, &request->u.watchpt);
+        resp.status = PDB_RESPONSE_OKAY;
+        break;
+    case PDB_OPCODE_CLR_WATCHPT :
+        pdb_remove_watchpoint(target, &request->u.watchpt);
+        resp.status = PDB_RESPONSE_OKAY;
+        break;
     default:
         printk("(pdb) unknown request operation %d\n", request->operation);
         resp.status = PDB_RESPONSE_ERROR;
@@ -184,7 +199,7 @@
 }
 
 static void
-pdb_send_connection_status(int status, memory_t ring)
+pdb_send_connection_status(int status, unsigned long ring)
 {
     ctrl_msg_t cmsg = 
     {
@@ -248,8 +263,6 @@
     pdb_sring_t *sring;
 
     printk("----\npdb initialize   %s %s\n", __DATE__, __TIME__);
-
-    pdb_initialize_bwcpoint();
 
     /*
     if ( xen_start_info.flags & SIF_INITDOMAIN )
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/debugger/pdb/linux-2.6-module/pdb_debug.h
--- a/tools/debugger/pdb/linux-2.6-module/pdb_debug.h   Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/linux-2.6-module/pdb_debug.h   Thu Aug 25 22:53:20 2005
@@ -6,6 +6,7 @@
 void pdb_initialize_bwcpoint (void);
 int pdb_suspend (struct task_struct *target);
 int pdb_resume (struct task_struct *target);
+int pdb_read_register (struct task_struct *target, pdb_op_rd_reg_p op);
 int pdb_read_registers (struct task_struct *target, pdb_op_rd_regs_p op);
 int pdb_write_register (struct task_struct *target, pdb_op_wr_reg_p op);
 int pdb_read_memory (struct task_struct *target, pdb_op_rd_mem_req_p req, 
@@ -17,16 +18,16 @@
 int pdb_step (struct task_struct *target);
 
 int pdb_insert_memory_breakpoint (struct task_struct *target, 
-                                  memory_t address, u32 length);
+                                  unsigned long address, u32 length);
 int pdb_remove_memory_breakpoint (struct task_struct *target,
-                                  memory_t address, u32 length);
+                                  unsigned long address, u32 length);
+int pdb_insert_watchpoint (struct task_struct *target,
+                           pdb_op_watchpt_p watchpt);
+int pdb_remove_watchpoint (struct task_struct *target,
+                           pdb_op_watchpt_p watchpt);
 
 int pdb_exceptions_notify (struct notifier_block *self, unsigned long val,
                            void *data);
-
-int pdb_debug_fn (struct pt_regs *regs, long error_code,
-                  unsigned int condition);
-int pdb_int3_fn (struct pt_regs *regs, long error_code);
 
 /* module.c */
 void pdb_send_response (pdb_response_t *response);
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/debugger/pdb/linux-2.6-module/pdb_module.h
--- a/tools/debugger/pdb/linux-2.6-module/pdb_module.h  Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/linux-2.6-module/pdb_module.h  Thu Aug 25 22:53:20 2005
@@ -14,20 +14,27 @@
 
 #define PDB_OPCODE_DETACH 3
 
-#define PDB_OPCODE_RD_REGS 4
+#define PDB_OPCODE_RD_REG 4
+typedef struct pdb_op_rd_reg
+{
+    u32 reg;
+    u32 value;
+} pdb_op_rd_reg_t, *pdb_op_rd_reg_p;
+
+#define PDB_OPCODE_RD_REGS 5
 typedef struct pdb_op_rd_regs
 {
     u32 reg[GDB_REGISTER_FRAME_SIZE];
 } pdb_op_rd_regs_t, *pdb_op_rd_regs_p;
 
-#define PDB_OPCODE_WR_REG 5
+#define PDB_OPCODE_WR_REG 6
 typedef struct pdb_op_wr_reg
 {
     u32 reg;
     u32 value;
 } pdb_op_wr_reg_t, *pdb_op_wr_reg_p;
 
-#define PDB_OPCODE_RD_MEM 6
+#define PDB_OPCODE_RD_MEM 7
 typedef struct pdb_op_rd_mem_req
 {
     u32 address;
@@ -41,7 +48,7 @@
     u8  data[1024];
 } pdb_op_rd_mem_resp_t, *pdb_op_rd_mem_resp_p;
 
-#define PDB_OPCODE_WR_MEM 7
+#define PDB_OPCODE_WR_MEM 8
 typedef struct pdb_op_wr_mem
 {
     u32 address;
@@ -49,16 +56,33 @@
     u8  data[1024];                                             /* arbitrary */
 } pdb_op_wr_mem_t, *pdb_op_wr_mem_p;
 
-#define PDB_OPCODE_CONTINUE 8
-#define PDB_OPCODE_STEP     9
+#define PDB_OPCODE_CONTINUE 9
+#define PDB_OPCODE_STEP     10
 
-#define PDB_OPCODE_SET_BKPT 10
-#define PDB_OPCODE_CLR_BKPT 11
+#define PDB_OPCODE_SET_BKPT 11
+#define PDB_OPCODE_CLR_BKPT 12
 typedef struct pdb_op_bkpt
 {
     u32 address;
     u32 length;
 } pdb_op_bkpt_t, *pdb_op_bkpt_p;
+
+#define PDB_OPCODE_SET_WATCHPT 13
+#define PDB_OPCODE_CLR_WATCHPT 14
+#define PDB_OPCODE_WATCHPOINT  15
+typedef struct pdb_op_watchpt
+{
+#define BWC_DEBUG 1
+#define BWC_INT3  3
+#define BWC_WATCH        100                         /* pdb: watchpoint page */
+#define BWC_WATCH_STEP   101                  /* pdb: watchpoint single step */
+#define BWC_WATCH_WRITE  102
+#define BWC_WATCH_READ   103
+#define BWC_WATCH_ACCESS 104
+    u32 type;
+    u32 address;
+    u32 length;
+} pdb_op_watchpt_t, *pdb_op_watchpt_p;
 
 
 typedef struct 
@@ -68,10 +92,12 @@
     union
     {
         pdb_op_attach_t     attach;
+        pdb_op_rd_reg_t     rd_reg;
         pdb_op_wr_reg_t     wr_reg;
         pdb_op_rd_mem_req_t rd_mem;
         pdb_op_wr_mem_t     wr_mem;
         pdb_op_bkpt_t       bkpt;
+        pdb_op_watchpt_t    watchpt;
     } u;
 } pdb_request_t, *pdb_request_p;
 
@@ -87,6 +113,7 @@
     s16  status;          /* PDB_RESPONSE_???    */
     union
     {
+        pdb_op_rd_reg_t      rd_reg;
         pdb_op_rd_regs_t     rd_regs;
         pdb_op_rd_mem_resp_t rd_mem;
     } u;
@@ -94,6 +121,11 @@
 
 
 DEFINE_RING_TYPES(pdb, pdb_request_t, pdb_response_t);
+
+
+/* from access_process_vm */
+#define PDB_MEM_READ  0
+#define PDB_MEM_WRITE 1
 
 #endif
 
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch
--- a/tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch     Wed Aug 24 
02:43:18 2005
+++ b/tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch     Thu Aug 25 
22:53:20 2005
@@ -1,7 +1,15 @@
 diff -u linux-2.6.12/arch/xen/i386/kernel/i386_ksyms.c 
linux-2.6.12-pdb/arch/xen/i386/kernel/i386_ksyms.c
 --- linux-2.6.12/arch/xen/i386/kernel/i386_ksyms.c     2005-07-31 
22:36:50.000000000 +0100
 +++ linux-2.6.12-pdb/arch/xen/i386/kernel/i386_ksyms.c 2005-08-01 
10:57:31.000000000 +0100
-@@ -172,6 +172,7 @@
+@@ -151,6 +151,7 @@
+ /* TLB flushing */
+ EXPORT_SYMBOL(flush_tlb_page);
+ #endif
++EXPORT_SYMBOL(flush_tlb_mm);
+ 
+ #ifdef CONFIG_X86_IO_APIC
+ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+@@ -172,6 +173,7 @@
  EXPORT_SYMBOL_GPL(unset_nmi_callback);
  
  EXPORT_SYMBOL(register_die_notifier);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_caml_domain.c
--- a/tools/debugger/pdb/pdb_caml_domain.c      Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_caml_domain.c      Thu Aug 25 22:53:20 2005
@@ -6,7 +6,7 @@
  * PDB's OCaml interface library for debugging domains
  */
 
-#include <xc.h>
+#include <xenctrl.h>
 #include <xendebug.h>
 #include <errno.h>
 #include <stdio.h>
@@ -41,6 +41,54 @@
 
 
 /****************************************************************************/
+
+/*
+ * dom_read_register : context_t -> int -> int32
+ */
+value
+dom_read_register (value context, value reg)
+{
+    CAMLparam2(context, reg);
+    CAMLlocal1(result);
+
+    int my_reg = Int_val(reg);
+    cpu_user_regs_t *regs;
+    context_t ctx;
+
+    decode_context(&ctx, context);
+
+    if ( xendebug_read_registers(xc_handle, ctx.domain, ctx.vcpu, ®s) )
+    {
+        printf("(pdb) read registers error!\n");  fflush(stdout);
+        failwith("read registers error");
+    }
+
+    dump_regs(regs);
+
+    result = caml_alloc_tuple(16);
+
+    switch (my_reg)
+    {
+    case GDB_EAX: result = caml_copy_int32(regs->eax); break;
+    case GDB_ECX: result = caml_copy_int32(regs->ecx); break;
+    case GDB_EDX: result = caml_copy_int32(regs->edx); break;
+    case GDB_EBX: result = caml_copy_int32(regs->ebx); break;
+    case GDB_ESP: result = caml_copy_int32(regs->esp); break;
+    case GDB_EBP: result = caml_copy_int32(regs->ebp); break;
+    case GDB_ESI: result = caml_copy_int32(regs->esi); break;
+    case GDB_EDI: result = caml_copy_int32(regs->edi); break;
+    case GDB_EIP: result = caml_copy_int32(regs->eip); break;
+    case GDB_EFL: result = caml_copy_int32(regs->eflags); break;
+    case GDB_CS:  result = caml_copy_int32(regs->cs);  break;
+    case GDB_SS: result = caml_copy_int32(regs->ss); break;
+    case GDB_DS: result = caml_copy_int32(regs->ds); break;
+    case GDB_ES: result = caml_copy_int32(regs->es); break;
+    case GDB_FS: result = caml_copy_int32(regs->fs); break;
+    case GDB_GS: result = caml_copy_int32(regs->gs); break;
+    }
+
+    CAMLreturn(result);
+}
 
 /*
  * dom_read_registers : context_t -> int32
@@ -155,7 +203,7 @@
     context_t ctx;
     int loop;
     char *buffer;
-    memory_t my_address = Int32_val(address);
+    unsigned long my_address = Int32_val(address);
     u32 my_length = Int_val(length);
 
     printf ("(pdb) read memory\n");
@@ -211,7 +259,7 @@
     context_t ctx;
 
     char buffer[4096];  /* a big buffer */
-    memory_t  my_address;
+    unsigned long  my_address;
     u32 length = 0;
 
     printf ("(pdb) write memory\n");
@@ -231,7 +279,7 @@
     }
     buffer[length++] = Int_val(Field(node, 0));
 
-    my_address = (memory_t) Int32_val(address);
+    my_address = (unsigned long) Int32_val(address);
 
     if ( xendebug_write_memory(xc_handle, ctx.domain, ctx.vcpu,
                                my_address, length, buffer) )
@@ -296,7 +344,7 @@
     CAMLparam3(context, address, length);
 
     context_t ctx;
-    memory_t my_address = (memory_t) Int32_val(address);
+    unsigned long my_address = (unsigned long) Int32_val(address);
     int my_length = Int_val(length);
 
     decode_context(&ctx, context);
@@ -325,7 +373,7 @@
 
     context_t ctx;
 
-    memory_t my_address = (memory_t) Int32_val(address);
+    unsigned long my_address = (unsigned long) Int32_val(address);
     int my_length = Int_val(length);
 
     printf ("(pdb) remove memory breakpoint 0x%lx %d\n",
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_caml_evtchn.c
--- a/tools/debugger/pdb/pdb_caml_evtchn.c      Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_caml_evtchn.c      Thu Aug 25 22:53:20 2005
@@ -6,7 +6,7 @@
  * PDB's OCaml interface library for event channels
  */
 
-#include <xc.h>
+#include <xenctrl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_caml_process.c
--- a/tools/debugger/pdb/pdb_caml_process.c     Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_caml_process.c     Thu Aug 25 22:53:20 2005
@@ -15,7 +15,7 @@
 #include <caml/memory.h>
 #include <caml/mlvalues.h>
 
-#include <xc.h>
+#include <xenctrl.h>
 #include <xen/xen.h>
 #include <xen/io/domain_controller.h>
 #include <xen/linux/privcmd.h>
@@ -113,6 +113,12 @@
         case PDB_OPCODE_DETACH :
             break;
             
+        case PDB_OPCODE_RD_REG :
+        {
+            sprintf(&msg[0], "%08x", _flip(resp->u.rd_reg.value));
+            break;
+        }
+
         case PDB_OPCODE_RD_REGS :
         {
             int loop;
@@ -161,16 +167,22 @@
         }
 
         case PDB_OPCODE_SET_BKPT :
-        {
-            break;
-        }
         case PDB_OPCODE_CLR_BKPT :
-        {
+        case PDB_OPCODE_SET_WATCHPT :
+        case PDB_OPCODE_CLR_WATCHPT :
+        {
+            break;
+        }
+
+        case PDB_OPCODE_WATCHPOINT :
+        {
+            sprintf(msg, "S05");
             break;
         }
 
         default :
-            printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE\n");
+            printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE %d\n",
+                   resp->operation);
             break;
         }
 
@@ -258,6 +270,32 @@
 
     CAMLreturn(Val_unit);
 }
+
+
+/*
+ * proc_read_register : context_t -> int -> unit
+ */
+value
+proc_read_register (value context, value reg)
+{
+    CAMLparam1(context);
+
+    pdb_request_t req;
+    context_t ctx;
+    int my_reg = Int_val(reg);
+
+    decode_context(&ctx, context);
+
+    req.operation = PDB_OPCODE_RD_REG;
+    req.process = ctx.process;
+    req.u.rd_reg.reg = my_reg;
+    req.u.rd_reg.value = 0;
+
+    send_request (ctx.ring, ctx.evtchn, &req);
+
+    CAMLreturn(Val_unit);
+}
+
 
 
 /*
@@ -443,7 +481,7 @@
 
 
 /*
- * proc_insert_memory_breakpoint : context_t -> int32 -> int list -> unit
+ * proc_insert_memory_breakpoint : context_t -> int32 -> int -> unit
  */
 value
 proc_insert_memory_breakpoint (value context, value address, value length)
@@ -457,7 +495,7 @@
 
     req.operation = PDB_OPCODE_SET_BKPT;
     req.process = ctx.process;
-    req.u.bkpt.address = (memory_t) Int32_val(address);
+    req.u.bkpt.address = (unsigned long) Int32_val(address);
     req.u.bkpt.length  =  Int_val(length);
 
     send_request(ctx.ring, ctx.evtchn, &req);
@@ -466,7 +504,7 @@
 }
 
 /*
- * proc_remove_memory_breakpoint : context_t -> int32 -> int list -> unit
+ * proc_remove_memory_breakpoint : context_t -> int32 -> int -> unit
  */
 value
 proc_remove_memory_breakpoint (value context, value address, value length)
@@ -480,8 +518,56 @@
 
     req.operation = PDB_OPCODE_CLR_BKPT;
     req.process = ctx.process;
-    req.u.bkpt.address = (memory_t) Int32_val(address);
+    req.u.bkpt.address = (unsigned long) Int32_val(address);
     req.u.bkpt.length  =  Int_val(length);
+
+    send_request(ctx.ring, ctx.evtchn, &req);
+
+    CAMLreturn(Val_unit);
+}
+
+/*
+ * proc_insert_watchpoint : context_t -> bwcpoint_t -> int32 -> int -> unit
+ */
+value
+proc_insert_watchpoint (value context, value kind, value address, value length)
+{
+    CAMLparam3(context, address, length);
+
+    context_t ctx;
+    pdb_request_t req;
+
+    decode_context(&ctx, context);
+
+    req.operation = PDB_OPCODE_SET_WATCHPT;
+    req.process = ctx.process;
+    req.u.watchpt.type    =  Int_val(kind);
+    req.u.watchpt.address = (unsigned long) Int32_val(address);
+    req.u.watchpt.length  =  Int_val(length);
+
+    send_request(ctx.ring, ctx.evtchn, &req);
+
+    CAMLreturn(Val_unit);
+}
+
+/*
+ * proc_remove_watchpoint : context_t -> bwcpoint_t -> int32 -> int -> unit
+ */
+value
+proc_remove_watchpoint (value context, value kind, value address, value length)
+{
+    CAMLparam3(context, address, length);
+
+    context_t ctx;
+    pdb_request_t req;
+
+    decode_context(&ctx, context);
+
+    req.operation = PDB_OPCODE_CLR_WATCHPT;
+    req.process = ctx.process;
+    req.u.watchpt.type    =  Int_val(kind);
+    req.u.watchpt.address = (unsigned long) Int32_val(address);
+    req.u.watchpt.length  =  Int_val(length);
 
     send_request(ctx.ring, ctx.evtchn, &req);
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_caml_xc.c
--- a/tools/debugger/pdb/pdb_caml_xc.c  Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_caml_xc.c  Thu Aug 25 22:53:20 2005
@@ -6,7 +6,7 @@
  * PDB's OCaml interface library for debugging domains
  */
 
-#include <xc.h>
+#include <xenctrl.h>
 #include <xendebug.h>
 #include <errno.h>
 #include <stdio.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_caml_xcs.c
--- a/tools/debugger/pdb/pdb_caml_xcs.c Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_caml_xcs.c Thu Aug 25 22:53:20 2005
@@ -17,7 +17,7 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <errno.h>
-#include <xc.h>
+#include <xenctrl.h>
 
 #include <xen/xen.h>
 #include <xen/io/domain_controller.h>
@@ -50,7 +50,7 @@
 {
     CAMLparam2(domain, ring);
     int my_domain = Int_val(domain);
-    memory_t my_ring = Int32_val(ring);
+    unsigned long my_ring = Int32_val(ring);
 
     pdb_front_ring_t *front_ring;
     pdb_sring_t *sring;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/pdb_xen.c
--- a/tools/debugger/pdb/pdb_xen.c      Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/pdb_xen.c      Thu Aug 25 22:53:20 2005
@@ -7,7 +7,7 @@
  * PDB interface library for accessing Xen
  */
 
-#include <xc.h>
+#include <xenctrl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <errno.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/debugger/pdb/readme
--- a/tools/debugger/pdb/readme Wed Aug 24 02:43:18 2005
+++ b/tools/debugger/pdb/readme Thu Aug 25 22:53:20 2005
@@ -1,9 +1,9 @@
 
-PDB 0.3 
+PDB 0.3.3
 http://www.cl.cam.ac.uk/netos/pdb
 
 Alex Ho  
-June 2005
+August 2005
 
 
 This is the latest incarnation of the pervasive debugger.
@@ -79,6 +79,11 @@
 Process
 
   PDB can also debug a process running in a Linux 2.6 domain. 
+  You will need to patch the Linux 2.6 domain U tree to export some
+  additional symbols for the pdb module
+
+  % make -C linux-2.6-patches
+
   After running PDB in domain 0, insert the pdb module in dom u:
   
   % insmod linux-2.6-module/pdb.ko
@@ -87,7 +92,14 @@
 
   (gdb) maint packet x context = process <domid> <pid>
 
+  Read, write, and access watchpoint should also work for processes, 
+  use the "rwatch", "watch" and "awatch" gdb commands respectively.
+
+  If you are having trouble with GDB 5.3 (i386-redhat-linux-gnu),
+  try GDB 6.3 (configured with --target=i386-linux-gnu).
+
+  
 To Do
 
-- watchpoints
+- watchpoints for domains
 - support for SMP
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/Makefile
--- a/tools/examples/Makefile   Wed Aug 24 02:43:18 2005
+++ b/tools/examples/Makefile   Thu Aug 25 22:53:20 2005
@@ -16,7 +16,7 @@
 
 # Xen script dir and scripts to go there.
 XEN_SCRIPT_DIR = /etc/xen/scripts
-XEN_SCRIPTS = network vif-bridge
+XEN_SCRIPTS = network-bridge vif-bridge
 XEN_SCRIPTS += network-route vif-route
 XEN_SCRIPTS += block-file
 XEN_SCRIPTS += block-enbd
@@ -24,10 +24,14 @@
 XEN_BOOT_DIR = /usr/lib/xen/boot
 XEN_BOOT = mem-map.sxp
 
+XEN_HOTPLUG_DIR = /etc/hotplug.d/xen-backend
+XEN_HOTPLUG_SCRIPTS = backend.hotplug
+
 all: 
 build:
 
-install: all install-initd install-configs install-scripts install-boot
+install: all install-initd install-configs install-scripts install-boot \
+        install-hotplug
 
 install-initd:
        [ -d $(DESTDIR)/etc/init.d ] || $(INSTALL_DIR) $(DESTDIR)/etc/init.d
@@ -60,4 +64,12 @@
            $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_BOOT_DIR); \
        done
 
+install-hotplug:
+       [ -d $(DESTDIR)$(XEN_HOTPLUG_DIR) ] || \
+               $(INSTALL_DIR) $(DESTDIR)$(XEN_HOTPLUG_DIR)
+       for i in $(XEN_HOTPLUG_SCRIPTS); \
+           do [ -a $(DESTDIR)$(XEN_HOTPLUG_DIR)/$$i ] || \
+           $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_HOTPLUG_DIR); \
+       done
+
 clean:
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/README
--- a/tools/examples/README     Wed Aug 24 02:43:18 2005
+++ b/tools/examples/README     Thu Aug 25 22:53:20 2005
@@ -9,9 +9,20 @@
 send it (preferably with a little summary to go in this file) to
 <xen-devel@xxxxxxxxxxxxxxxxxxxxx> so we can add it to this directory.
 
+block-enbd          - binds/unbinds network block devices
+block-file          - binds/unbinds file to loopback device
+mem-map.sxp         - memory map xend configuration file.
 network             - default network setup script called by xend at startup.
+network-route       - default xen network start/stop script.
+network-nat         - default xen network start/stop script when using NAT.
 vif-bridge          - default virtual network interface setup script.
+vif-route           - default xen virtual network start/stop script
+vif-nat             - configures vif in routed-nat mode.
 xend-config.sxp     - default xend configuration file.
 xmexample1          - example configuration script for 'xm create'.
 xmexample2          - a more complex configuration script for 'xm create'.
+xmexample3          - an advanced configuration script for 'xm create' 
+                      that utilizes the vmid.
+xmexample.vmx       - a configuration script for creating a vmx domain with
+                      'xm create'.
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/vif-bridge
--- a/tools/examples/vif-bridge Wed Aug 24 02:43:18 2005
+++ b/tools/examples/vif-bridge Thu Aug 25 22:53:20 2005
@@ -74,8 +74,10 @@
     exit
 fi
 
-# Add/remove vif to/from bridge.
-brctl ${brcmd} ${bridge} ${vif}
+# Add vif to bridge. vifs are auto-removed from bridge.
+if [ "${brcmd}" == "addif" ] ; then
+    brctl ${brcmd} ${bridge} ${vif}
+fi
 ifconfig ${vif} $OP
 
 if [ ${ip} ] ; then
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp    Wed Aug 24 02:43:18 2005
+++ b/tools/examples/xend-config.sxp    Thu Aug 25 22:53:20 2005
@@ -28,7 +28,7 @@
 
 ## Use the following if VIF traffic is bridged.
 # The script used to start/stop networking for xend.
-(network-script    network)
+(network-script    network-bridge)
 # The default bridge that virtual interfaces should be connected to.
 (vif-bridge        xen-br0)
 # The default script used to control virtual interfaces.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/xmexample.vmx
--- a/tools/examples/xmexample.vmx      Wed Aug 24 02:43:18 2005
+++ b/tools/examples/xmexample.vmx      Thu Aug 25 22:53:20 2005
@@ -10,13 +10,8 @@
 # Kernel image file.
 kernel = "/usr/lib/xen/boot/vmxloader"
 
-# Optional ramdisk.
-#ramdisk = "/boot/initrd.gz"
-
-# The domain build function. Default is 'linux'.
+# The domain build function. VMX domain uses 'vmx'.
 builder='vmx'
-#builder='linux'
-#builder='netbsd'
 
 # Initial memory allocation (in megabytes) for the new domain.
 memory = 128
@@ -26,13 +21,6 @@
 
 # Which CPU to start domain on? 
 #cpu = -1   # leave to Xen to pick
-
-#----------------------------------------------------------------------------
-# Define network interfaces.
-
-# Number of network interfaces. Default is 1.
-#nics=1
-nics=0
 
 # Optionally define mac and/or bridge for the network interfaces.
 # Random MACs are assigned if not given.
@@ -46,37 +34,7 @@
 # and MODE is r for read-only, w for read-write.
 
 #disk = [ 'phy:hda1,hda1,r' ]
-disk = [ 'file:/var/images/min-el3-i386.img,hda,w' ]
-
-#----------------------------------------------------------------------------
-# Set the kernel command line for the new domain.
-# You only need to define the IP parameters and hostname if the domain's
-# IP config doesn't, e.g. in ifcfg-eth0 or via DHCP.
-# You can use 'extra' to set the runlevel and custom environment
-# variables used by custom rc scripts (e.g. VMID=, usr= ).
-
-# Set if you want dhcp to allocate the IP address.
-#dhcp="dhcp"
-# Set netmask.
-#netmask=
-# Set default gateway.
-#gateway=
-# Set the hostname.
-#hostname= "vm%d" % vmid
-
-# Set root device.
-#root = "/dev/ram0"
-root = "/dev/hda1 ro"
-
-# Root device for nfs.
-#root = "/dev/nfs"
-# The nfs server.
-#nfs_server = '169.254.1.0'  
-# Root directory on the nfs server.
-#nfs_root   = '/full/path/to/root/directory'
-
-# Sets runlevel 4.
-#extra = "acpi=off console=ttyS0 console=tty0 1"
+disk = [ 'file:/var/images/min-el3-i386.img,ioemu:hda,w' ]
 
 #----------------------------------------------------------------------------
 # Set according to whether you want the domain restarted when it exits.
@@ -125,15 +83,10 @@
 
 
 #-----------------------------------------------------------------------------
-#    set the real time clock to local time [default=utc]
-#localtime='utc'
+#    set the real time clock to local time [default=0 i.e. set to utc]
+#localtime=1
 
 
 #-----------------------------------------------------------------------------
 #    start in full screen
 #full-screen=1   
-
-#-----------------------------------------------------------------------------
-#   set the mac address of the first interface
-#macaddr=  
-
diff -r 5f1ed597f107 -r 8799d14bef77 tools/firmware/acpi/acpi2_0.h
--- a/tools/firmware/acpi/acpi2_0.h     Wed Aug 24 02:43:18 2005
+++ b/tools/firmware/acpi/acpi2_0.h     Thu Aug 25 22:53:20 2005
@@ -18,7 +18,7 @@
 #ifndef _ACPI_2_0_H_
 #define _ACPI_2_0_H_
 
-#include "xc.h"  // for u8, u16, u32, u64 definition
+#include "xenctrl.h"  // for u8, u16, u32, u64 definition
 
 #pragma pack (1)
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/hw/i8254.c
--- a/tools/ioemu/hw/i8254.c    Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/hw/i8254.c    Thu Aug 25 22:53:20 2005
@@ -22,7 +22,7 @@
  * THE SOFTWARE.
  */
 #include "vl.h"
-#include "xc.h"
+#include "xenctrl.h"
 #include <io/ioreq.h>
 
 //#define DEBUG_PIT
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/hw/i8259.c
--- a/tools/ioemu/hw/i8259.c    Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/hw/i8259.c    Thu Aug 25 22:53:20 2005
@@ -22,7 +22,7 @@
  * THE SOFTWARE.
  */
 #include "vl.h"
-#include "xc.h"
+#include "xenctrl.h"
 #include <io/ioreq.h>
 
 /* debug PIC */
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/hw/ide.c
--- a/tools/ioemu/hw/ide.c      Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/hw/ide.c      Thu Aug 25 22:53:20 2005
@@ -430,6 +430,7 @@
         put_le16(p + 59, 0x100 | s->mult_sectors);
     put_le16(p + 60, s->nb_sectors);
     put_le16(p + 61, s->nb_sectors >> 16);
+    put_le16(p + 63, 0x07);
     put_le16(p + 80, (1 << 1) | (1 << 2));
     put_le16(p + 82, (1 << 14));
     put_le16(p + 83, (1 << 14));
@@ -460,7 +461,7 @@
     put_le16(p + 48, 1); /* dword I/O (XXX: should not be set on CDROM) */
     put_le16(p + 49, 1 << 9); /* LBA supported, no DMA */
     put_le16(p + 53, 3); /* words 64-70, 54-58 valid */
-    put_le16(p + 63, 0x103); /* DMA modes XXX: may be incorrect */
+    put_le16(p + 63, 0x07); /* Multi-word DMA mode 2 */ 
     put_le16(p + 64, 1); /* PIO modes */
     put_le16(p + 65, 0xb4); /* minimum DMA multiword tx cycle time */
     put_le16(p + 66, 0xb4); /* recommended DMA multiword tx cycle time */
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/hw/ioapic.h
--- a/tools/ioemu/hw/ioapic.h   Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/hw/ioapic.h   Thu Aug 25 22:53:20 2005
@@ -26,7 +26,7 @@
 #ifndef __IOAPIC_H
 #define __IOAPIC_H
 
-#include "xc.h"
+#include "xenctrl.h"
 #include <io/ioreq.h>
 #include <io/vmx_vlapic.h>
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/monitor.c
--- a/tools/ioemu/monitor.c     Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/monitor.c     Thu Aug 25 22:53:20 2005
@@ -225,14 +225,10 @@
     }
 }
 
+extern void destroy_vmx_domain(void);
 static void do_quit(void)
 {
-    extern int domid;
-    extern FILE* logfile;
-    char destroy_cmd[20];
-    sprintf(destroy_cmd, "xm destroy %d", domid);
-    if (system(destroy_cmd) == -1)
-        fprintf(logfile, "%s failed.!\n", destroy_cmd);
+    destroy_vmx_domain();
     exit(0);
 }
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/target-i386-dm/Makefile
--- a/tools/ioemu/target-i386-dm/Makefile       Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/target-i386-dm/Makefile       Thu Aug 25 22:53:20 2005
@@ -188,7 +188,7 @@
 #########################################################
 
 DEFINES+=-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE 
-DAPIC_SUPPORT
-LIBS+=-lm -L../../libxc -lxc
+LIBS+=-lm -L../../libxc -lxenctrl
 ifndef CONFIG_USER_ONLY
 LIBS+=-lz
 endif
@@ -376,10 +376,10 @@
        $(CC) $(DEFINES) -c -o $@ $<
 
 clean:
-       rm -rf *.o  *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp 
qemu-vgaram-bin
+       rm -rf *.o  *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp
 
 distclean:
-       rm -rf *.o  *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp 
qemu-vgaram-bin
+       rm -rf *.o  *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp
 
 install: all 
        if [ ! -d $(INSTALL_DIR) ];then mkdir -p $(INSTALL_DIR);fi
@@ -387,8 +387,6 @@
        install -m 755 -s $(PROGS) "$(INSTALL_DIR)"
        install -m 755 qemu-dm.debug "$(INSTALL_DIR)"
        install -m 755 qemu-ifup "$(DESTDIR)$(configdir)"
-       gunzip -c qemu-vgaram-bin.gz >qemu-vgaram-bin 
-       install -m 755 qemu-vgaram-bin "$(DESTDIR)$(configdir)"
 ifneq ($(wildcard .depend),)
 include .depend
 endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/target-i386-dm/helper2.c      Thu Aug 25 22:53:20 2005
@@ -47,7 +47,7 @@
 #include <fcntl.h>
 #include <sys/ioctl.h>
 
-#include "xc.h"
+#include "xenctrl.h"
 #include <io/ioreq.h>
 
 #include "cpu.h"
@@ -55,6 +55,7 @@
 #include "vl.h"
 
 shared_iopage_t *shared_page = NULL;
+extern int reset_requested;
 
 CPUX86State *cpu_86_init(void)
 {
@@ -327,7 +328,16 @@
        env->send_event = 1;
 }
 
-//static unsigned long tsc_per_tick = 1; /* XXX: calibrate */
+void
+destroy_vmx_domain(void)
+{
+    extern int domid;
+    extern FILE* logfile;
+    char destroy_cmd[20];
+    sprintf(destroy_cmd, "xm destroy %d", domid);
+    if (system(destroy_cmd) == -1)
+        fprintf(logfile, "%s failed.!\n", destroy_cmd);
+}
 
 int main_loop(void)
 {
@@ -348,6 +358,10 @@
                 if (vm_running) {
                     if (shutdown_requested) {
                         break;
+                    }
+                    if (reset_requested){
+                        qemu_system_reset();
+                        reset_requested = 0;
                     }
                 }
 
@@ -391,7 +405,21 @@
                        }
                }
        }
+        destroy_vmx_domain();
        return 0;
+}
+
+static void
+qemu_vmx_reset(void *unused)
+{
+    char cmd[255];
+    extern int domid;
+
+    /* pause domain first, to avoid repeated reboot request*/ 
+    xc_domain_pause (xc_handle, domid);
+
+    sprintf(cmd,"xm shutdown -R %d", domid);
+    system (cmd);
 }
 
 CPUState *
@@ -400,7 +428,7 @@
        CPUX86State *env;
       
         cpu_exec_init();
-
+        qemu_register_reset(qemu_vmx_reset, NULL);
        env = malloc(sizeof(CPUX86State));
        if (!env)
                return NULL;
@@ -427,3 +455,4 @@
 
        return env;
 }
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/vl.c  Thu Aug 25 22:53:20 2005
@@ -72,7 +72,7 @@
 #endif
 #endif /* CONFIG_SDL */
 
-#include "xc.h"
+#include "xenctrl.h"
 #include "exec-all.h"
 
 //#define DO_TB_FLUSH
@@ -2030,7 +2030,7 @@
 } QEMUResetEntry;
 
 static QEMUResetEntry *first_reset_entry;
-static int reset_requested;
+int reset_requested;
 int shutdown_requested;
 
 void qemu_register_reset(QEMUResetHandler *func, void *opaque)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Wed Aug 24 02:43:18 2005
+++ b/tools/ioemu/vl.h  Thu Aug 25 22:53:20 2005
@@ -107,6 +107,7 @@
 
 void qemu_register_reset(QEMUResetHandler *func, void *opaque);
 void qemu_system_reset_request(void);
+void qemu_system_reset(void);
 void qemu_system_shutdown_request(void);
 
 void main_loop_wait(int timeout);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/Makefile
--- a/tools/libxc/Makefile      Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/Makefile      Thu Aug 25 22:53:20 2005
@@ -12,28 +12,32 @@
 XEN_ROOT = ../..
 include $(XEN_ROOT)/tools/Rules.mk
 
-SRCS     :=
-SRCS     += xc_sedf.c
-SRCS     += xc_bvtsched.c
-SRCS     += xc_core.c
-SRCS     += xc_domain.c
-SRCS     += xc_evtchn.c
-SRCS     += xc_gnttab.c
-SRCS     += xc_load_bin.c
-SRCS     += xc_load_elf.c
-SRCS     += xc_linux_build.c
-SRCS     += xc_misc.c
-SRCS     += xc_physdev.c
-SRCS     += xc_private.c
+SRCS       :=
+BUILD_SRCS :=
+SRCS       += xc_bvtsched.c
+SRCS       += xc_core.c
+SRCS       += xc_domain.c
+SRCS       += xc_evtchn.c
+SRCS       += xc_gnttab.c
+SRCS       += xc_misc.c
+SRCS       += xc_physdev.c
+SRCS       += xc_private.c
+SRCS       += xc_sedf.c
+
 ifeq ($(XEN_TARGET_ARCH),ia64)
-SRCS     += xc_ia64_stubs.c
+BUILD_SRCS += xc_ia64_stubs.c
 else
-SRCS     += xc_load_aout9.c
-SRCS     += xc_linux_restore.c
-SRCS     += xc_linux_save.c
-SRCS     += xc_vmx_build.c
-SRCS     += xc_ptrace.c
-SRCS     += xc_ptrace_core.c
+SRCS       += xc_ptrace.c
+SRCS       += xc_ptrace_core.c
+
+BUILD_SRCS := xc_load_aout9.c
+BUILD_SRCS += xc_load_bin.c
+BUILD_SRCS += xc_load_elf.c
+BUILD_SRCS += xc_linux_build.c
+BUILD_SRCS += xc_linux_restore.c
+BUILD_SRCS += xc_linux_save.c
+BUILD_SRCS += xc_vmx_build.c
+BUILD_SRCS += xg_private.c
 endif
 
 CFLAGS   += -Wall
@@ -43,13 +47,20 @@
 CFLAGS   += $(INCLUDES) -I.
 # Get gcc to generate the dependencies for us.
 CFLAGS   += -Wp,-MD,.$(@F).d
+LDFLAGS  += -L.
 DEPS     = .*.d
 
 LIB_OBJS := $(patsubst %.c,%.o,$(SRCS))
 PIC_OBJS := $(patsubst %.c,%.opic,$(SRCS))
 
-LIB      := libxc.a libxc-pic.a
-LIB      += libxc.so libxc.so.$(MAJOR) libxc.so.$(MAJOR).$(MINOR)
+LIB_BUILD_OBJS := $(patsubst %.c,%.o,$(BUILD_SRCS))
+PIC_BUILD_OBJS := $(patsubst %.c,%.opic,$(BUILD_SRCS))
+
+LIB := libxenctrl.a
+LIB += libxenctrl.so libxenctrl.so.$(MAJOR) libxenctrl.so.$(MAJOR).$(MINOR)
+
+LIB += libxenguest.a
+LIB += libxenguest.so libxenguest.so.$(MAJOR) libxenguest.so.$(MAJOR).$(MINOR)
 
 all: build
 build: check-for-zlib mk-symlinks
@@ -77,11 +88,16 @@
 install: build
        [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) 
$(DESTDIR)/usr/$(LIBDIR)
        [ -d $(DESTDIR)/usr/include ] || $(INSTALL_DIR) $(DESTDIR)/usr/include
-       $(INSTALL_PROG) libxc.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_DATA) libxc.a $(DESTDIR)/usr/$(LIBDIR)
-       ln -sf libxc.so.$(MAJOR).$(MINOR) 
$(DESTDIR)/usr/$(LIBDIR)/libxc.so.$(MAJOR)
-       ln -sf libxc.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxc.so
-       $(INSTALL_DATA) xc.h $(DESTDIR)/usr/include
+       $(INSTALL_PROG) libxenctrl.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
+       $(INSTALL_DATA) libxenctrl.a $(DESTDIR)/usr/$(LIBDIR)
+       ln -sf libxenctrl.so.$(MAJOR).$(MINOR) 
$(DESTDIR)/usr/$(LIBDIR)/libxenctrl.so.$(MAJOR)
+       ln -sf libxenctrl.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenctrl.so
+       $(INSTALL_DATA) xenctrl.h $(DESTDIR)/usr/include
+
+       $(INSTALL_PROG) libxenguest.so.$(MAJOR).$(MINOR) 
$(DESTDIR)/usr/$(LIBDIR)
+       $(INSTALL_DATA) libxenguest.a $(DESTDIR)/usr/$(LIBDIR)
+       ln -sf libxenguest.so.$(MAJOR).$(MINOR) 
$(DESTDIR)/usr/$(LIBDIR)/libxenguest.so.$(MAJOR)
+       ln -sf libxenguest.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenguest.so
 
 .PHONY: TAGS clean rpm install all
 
@@ -100,18 +116,30 @@
        mv staging/i386/*.rpm .
        rm -rf staging
 
-libxc.a: $(LIB_OBJS)
+# libxenctrl
+
+libxenctrl.a: $(LIB_OBJS)
        $(AR) rc $@ $^
 
-libxc-pic.a: $(PIC_OBJS)
+libxenctrl.so: libxenctrl.so.$(MAJOR)
+       ln -sf $< $@
+libxenctrl.so.$(MAJOR): libxenctrl.so.$(MAJOR).$(MINOR)
+       ln -sf $< $@
+
+libxenctrl.so.$(MAJOR).$(MINOR): $(PIC_OBJS)
+       $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenctrl.so.$(MAJOR) 
-shared -o $@ $^
+
+# libxenguest
+
+libxenguest.a: $(LIB_BUILD_OBJS)
        $(AR) rc $@ $^
 
-libxc.so: libxc.so.$(MAJOR)
+libxenguest.so: libxenguest.so.$(MAJOR)
        ln -sf $< $@
-libxc.so.$(MAJOR): libxc.so.$(MAJOR).$(MINOR)
+libxenguest.so.$(MAJOR): libxenguest.so.$(MAJOR).$(MINOR)
        ln -sf $< $@
 
-libxc.so.$(MAJOR).$(MINOR): $(PIC_OBJS)
-       $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxc.so.$(MAJOR) -shared -o 
$@ $^ -lz
+libxenguest.so.$(MAJOR).$(MINOR): $(PIC_BUILD_OBJS)
+       $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenguest.so.$(MAJOR) 
-shared -o $@ $^ -lz -lxenctrl
 
 -include $(DEPS)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/linux_boot_params.h
--- a/tools/libxc/linux_boot_params.h   Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/linux_boot_params.h   Thu Aug 25 22:53:20 2005
@@ -17,6 +17,7 @@
 #define E820_NVS        4
 #define E820_IO         16
 #define E820_SHARED     17
+#define E820_XENSTORE   18
 
         u32 caching_attr;    /* used by hypervisor */
 #define MEMMAP_UC      0
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_core.c     Thu Aug 25 22:53:20 2005
@@ -1,4 +1,4 @@
-#include "xc_private.h"
+#include "xg_private.h"
 #define ELFSIZE 32
 #include "xc_elf.h"
 #include <stdlib.h>
@@ -43,7 +43,7 @@
                goto error_out;
        }
        
-       if ((dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == 0) {
+       if ((dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL) {
                PERROR("Could not allocate dump_mem");
                goto error_out;
        }
@@ -108,9 +108,8 @@
        free(dump_mem_start);
        return 0;
  error_out:
-       if (dump_fd)
+       if (dump_fd != -1)
                close(dump_fd);
-       if (dump_mem_start)
-               free(dump_mem_start);
+       free(dump_mem_start);
        return -1;
 }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_domain.c   Thu Aug 25 22:53:20 2005
@@ -266,7 +266,7 @@
     int err;
     unsigned int npages = mem_kb / (PAGE_SIZE/1024);
 
-    err = do_dom_mem_op(xc_handle, MEMOP_increase_reservation, NULL,
+    err = xc_dom_mem_op(xc_handle, MEMOP_increase_reservation, NULL,
                         npages, 0, domid);
     if (err == npages)
         return 0;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_gnttab.c
--- a/tools/libxc/xc_gnttab.c   Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_gnttab.c   Thu Aug 25 22:53:20 2005
@@ -40,17 +40,17 @@
 
 
 int xc_gnttab_map_grant_ref(int         xc_handle,
-                            memory_t    host_virt_addr,
+                            u64    host_virt_addr,
                             u32         dom,
                             u16         ref,
                             u16         flags,
                             s16        *handle,
-                            memory_t   *dev_bus_addr)
+                            u64   *dev_bus_addr)
 {
     struct gnttab_map_grant_ref op;
     int rc;
 
-    op.host_virt_addr = host_virt_addr;
+    op.host_addr      = host_virt_addr;
     op.dom            = (domid_t)dom;
     op.ref            = ref;
     op.flags          = flags;
@@ -67,15 +67,15 @@
 
 
 int xc_gnttab_unmap_grant_ref(int       xc_handle,
-                              memory_t  host_virt_addr,
-                              memory_t  dev_bus_addr,
+                              u64  host_virt_addr,
+                              u64  dev_bus_addr,
                               u16       handle,
                               s16      *status)
 {
     struct gnttab_unmap_grant_ref op;
     int rc;
 
-    op.host_virt_addr = host_virt_addr;
+    op.host_addr      = host_virt_addr;
     op.dev_bus_addr   = dev_bus_addr;
     op.handle         = handle;
  
@@ -92,7 +92,7 @@
                           u32        dom,
                           u16        nr_frames,
                           s16       *status,
-                          memory_t **frame_list)
+                          unsigned long **frame_list)
 {
     struct gnttab_setup_table op;
     int rc, i;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_linux_build.c      Thu Aug 25 22:53:20 2005
@@ -2,7 +2,8 @@
  * xc_linux_build.c
  */
 
-#include "xc_private.h"
+#include "xg_private.h"
+#include <xenctrl.h>
 
 #if defined(__i386__)
 #define ELFSIZE 32
@@ -318,8 +319,7 @@
     return 0;
 
  error_out:
-    if ( page_array != NULL )
-        free(page_array);
+    free(page_array);
     return -1;
 }
 #else /* x86 */
@@ -341,7 +341,7 @@
     unsigned long count, i;
     start_info_t *start_info;
     shared_info_t *shared_info;
-    mmu_t *mmu = NULL;
+    xc_mmu_t *mmu = NULL;
     int rc;
 
     unsigned long nr_pt_pages;
@@ -491,7 +491,7 @@
         }
     }
 
-    if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
+    if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
         goto error_out;
 
     /* setup page tables */
@@ -521,9 +521,9 @@
         page_array[physmap_pfn++]);
     for ( count = 0; count < nr_pages; count++ )
     {
-        if ( add_mmu_update(xc_handle, mmu,
-                            (page_array[count] << PAGE_SHIFT) | 
-                            MMU_MACHPHYS_UPDATE, count) )
+        if ( xc_add_mmu_update(xc_handle, mmu,
+                              (page_array[count] << PAGE_SHIFT) | 
+                              MMU_MACHPHYS_UPDATE, count) )
         {
             munmap(physmap, PAGE_SIZE);
             goto error_out;
@@ -603,7 +603,7 @@
     munmap(shared_info, PAGE_SIZE);
 
     /* Send the page update requests down to the hypervisor. */
-    if ( finish_mmu_updates(xc_handle, mmu) )
+    if ( xc_finish_mmu_updates(xc_handle, mmu) )
         goto error_out;
 
     free(mmu);
@@ -616,10 +616,8 @@
     return 0;
 
  error_out:
-    if ( mmu != NULL )
-        free(mmu);
-    if ( page_array != NULL )
-        free(page_array);
+    free(mmu);
+    free(page_array);
     return -1;
 }
 #endif
@@ -679,7 +677,7 @@
 
     op.cmd = DOM0_GETDOMAININFO;
     op.u.getdomaininfo.domain = (domid_t)domid;
-    if ( (do_dom0_op(xc_handle, &op) < 0) || 
+    if ( (xc_dom0_op(xc_handle, &op) < 0) || 
          ((u16)op.u.getdomaininfo.domain != domid) )
     {
         PERROR("Could not get info on domain");
@@ -719,8 +717,7 @@
         close(initrd_fd);
     if ( initrd_gfd )
         gzclose(initrd_gfd);
-    if ( image != NULL )
-        free(image);
+    free(image);
 
 #ifdef __ia64__
     /* based on new_thread in xen/arch/ia64/domain.c */
@@ -797,7 +794,7 @@
     launch_op.u.setdomaininfo.ctxt   = ctxt;
 
     launch_op.cmd = DOM0_SETDOMAININFO;
-    rc = do_dom0_op(xc_handle, &launch_op);
+    rc = xc_dom0_op(xc_handle, &launch_op);
     
     return rc;
 
@@ -806,8 +803,7 @@
         gzclose(initrd_gfd);
     else if ( initrd_fd >= 0 )
         close(initrd_fd);
-    if ( image != NULL )
-        free(image);
+    free(image);
 
     return -1;
 }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_linux_restore.c    Thu Aug 25 22:53:20 2005
@@ -6,7 +6,12 @@
  * Copyright (c) 2003, K A Fraser.
  */
 
-#include "xc_private.h"
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "xg_private.h"
+#include <xenctrl.h>
+
 #include <xen/linux/suspend.h>
 
 #define MAX_BATCH_SIZE 1024
@@ -32,7 +37,7 @@
 #define PPRINTF(_f, _a...)
 #endif
 
-ssize_t
+static ssize_t
 read_exact(int fd, void *buf, size_t count)
 {
     int r = 0, s;
@@ -48,7 +53,8 @@
     return r;
 }
 
-int xc_linux_restore(int xc_handle, int io_fd, u32 dom, unsigned long nr_pfns)
+int xc_linux_restore(int xc_handle, int io_fd, u32 dom, unsigned long nr_pfns,
+                    unsigned int store_evtchn, unsigned long *store_mfn)
 {
     dom0_op_t op;
     int rc = 1, i, n, k;
@@ -88,7 +94,7 @@
 
     char *region_base;
 
-    mmu_t *mmu = NULL;
+    xc_mmu_t *mmu = NULL;
 
     /* used by debug verify code */
     unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
@@ -131,7 +137,7 @@
     /* Get the domain's shared-info frame. */
     op.cmd = DOM0_GETDOMAININFO;
     op.u.getdomaininfo.domain = (domid_t)dom;
-    if (do_dom0_op(xc_handle, &op) < 0) {
+    if (xc_dom0_op(xc_handle, &op) < 0) {
         ERR("Could not get information on new domain");
         goto out;
     }
@@ -157,7 +163,7 @@
         goto out;
     }
 
-    mmu = init_mmu_updates(xc_handle, dom);
+    mmu = xc_init_mmu_updates(xc_handle, dom);
     if (mmu == NULL) {
         ERR("Could not initialise for MMU updates");
         goto out;
@@ -354,8 +360,9 @@
                 }
             }
 
-            if ( add_mmu_update(xc_handle, mmu,
-                                (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) )
+            if ( xc_add_mmu_update(xc_handle, mmu,
+                                  (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
+                                  pfn) )
             {
                 printf("machpys mfn=%ld pfn=%ld\n",mfn,pfn);
                 goto out;
@@ -369,7 +376,7 @@
 
     DPRINTF("Received all pages\n");
 
-    if ( finish_mmu_updates(xc_handle, mmu) )
+    if ( xc_finish_mmu_updates(xc_handle, mmu) )
         goto out;
 
     /*
@@ -387,14 +394,14 @@
         pin[nr_pins].mfn = pfn_to_mfn_table[i];
         if ( ++nr_pins == MAX_PIN_BATCH )
         {
-            if ( do_mmuext_op(xc_handle, pin, nr_pins, dom) < 0 )
+            if ( xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0 )
                 goto out;
             nr_pins = 0;
         }
     }
 
     if ( (nr_pins != 0) &&
-         (do_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) )
+         (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) )
         goto out;
 
     DPRINTF("\b\b\b\b100%%\n");
@@ -434,7 +441,7 @@
 
        if ( count > 0 )
        {
-           if ( (rc = do_dom_mem_op( xc_handle,
+           if ( (rc = xc_dom_mem_op( xc_handle,
                                       MEMOP_decrease_reservation,
                                       pfntab, count, 0, dom )) <0 )
            {
@@ -464,10 +471,13 @@
     }
     ctxt.user_regs.esi = mfn = pfn_to_mfn_table[pfn];
     p_srec = xc_map_foreign_range(
-        xc_handle, dom, PAGE_SIZE, PROT_WRITE, mfn);
+        xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
     p_srec->resume_info.nr_pages    = nr_pfns;
     p_srec->resume_info.shared_info = shared_info_frame << PAGE_SHIFT;
     p_srec->resume_info.flags       = 0;
+    *store_mfn = p_srec->resume_info.store_mfn   =
+       pfn_to_mfn_table[p_srec->resume_info.store_mfn];
+    p_srec->resume_info.store_evtchn = store_evtchn;
     munmap(p_srec, PAGE_SIZE);
 
     /* Uncanonicalise each GDT frame number. */
@@ -582,7 +592,7 @@
     op.u.setdomaininfo.domain = (domid_t)dom;
     op.u.setdomaininfo.vcpu   = 0;
     op.u.setdomaininfo.ctxt   = &ctxt;
-    rc = do_dom0_op(xc_handle, &op);
+    rc = xc_dom0_op(xc_handle, &op);
 
     if ( rc != 0 )
     {
@@ -593,7 +603,7 @@
     DPRINTF("Domain ready to be unpaused\n");
     op.cmd = DOM0_UNPAUSEDOMAIN;
     op.u.unpausedomain.domain = (domid_t)dom;
-    rc = do_dom0_op(xc_handle, &op);
+    rc = xc_dom0_op(xc_handle, &op);
     if (rc == 0) {
         /* Success: print the domain id. */
         DPRINTF("DOM=%u\n", dom);
@@ -603,12 +613,9 @@
  out:
     if ( (rc != 0) && (dom != 0) )
         xc_domain_destroy(xc_handle, dom);
-    if ( mmu != NULL )
-        free(mmu);
-    if ( pfn_to_mfn_table != NULL )
-        free(pfn_to_mfn_table);
-    if ( pfn_type != NULL )
-        free(pfn_type);
+    free(mmu);
+    free(pfn_to_mfn_table);
+    free(pfn_type);
 
     DPRINTF("Restore exit with rc=%d\n", rc);
     return rc;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_linux_save.c       Thu Aug 25 22:53:20 2005
@@ -7,11 +7,15 @@
  */
 
 #include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
 #include <sys/time.h>
-#include "xc_private.h"
+
+#include "xg_private.h"
+
 #include <xen/linux/suspend.h>
 #include <xen/io/domain_controller.h>
-#include <time.h>
 
 #define BATCH_SIZE 1024   /* 1024 pages (4MB) at a time */
 
@@ -20,7 +24,7 @@
 #define DEBUG 0
 
 #if 1
-#define ERR(_f, _a...) fprintf ( stderr, _f , ## _a )
+#define ERR(_f, _a...) do { fprintf(stderr, _f , ## _a); fflush(stderr); } 
while (0)
 #else
 #define ERR(_f, _a...) ((void)0)
 #endif
@@ -136,7 +140,7 @@
     return (new->tv_sec * 1000000) + new->tv_usec;
 }
 
-static long long llgettimeofday()
+static long long llgettimeofday( void )
 {
     struct timeval now;
     gettimeofday(&now, NULL);
@@ -312,9 +316,9 @@
 }
 
 
-int suspend_and_state(int xc_handle, int io_fd,        int dom,              
-                      xc_dominfo_t *info,
-                      vcpu_guest_context_t *ctxt)
+static int suspend_and_state(int xc_handle, int io_fd, int dom,              
+                             xc_dominfo_t *info,
+                             vcpu_guest_context_t *ctxt)
 {
     int i=0;
     char ans[30];
@@ -429,7 +433,7 @@
        - that should be sent this iteration (unless later marked as skip); 
        - to skip this iteration because already dirty;
        - to fixup by sending at the end if not already resent; */
-    unsigned long *to_send, *to_skip, *to_fix;
+    unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL;
     
     xc_shadow_control_stats_t stats;
 
@@ -643,6 +647,22 @@
         goto out;
     }
 
+    /* Map the suspend-record MFN to pin it. The page must be owned by 
+       dom for this to succeed. */
+    p_srec = xc_map_foreign_range(xc_handle, dom,
+                                   sizeof(*p_srec), PROT_READ | PROT_WRITE, 
+                                   ctxt.user_regs.esi);
+    if (!p_srec){
+        ERR("Couldn't map suspend record");
+        goto out;
+    }
+
+    /* Canonicalize store mfn. */
+    if ( !translate_mfn_to_pfn(&p_srec->resume_info.store_mfn) ) {
+       ERR("Store frame is not in range of pseudophys map");
+       goto out;
+    }
+
     print_stats( xc_handle, dom, 0, &stats, 0 );
 
     /* Now write out each data page, canonicalising page tables as we go... */
@@ -756,7 +776,7 @@
                 goto out;
             }
      
-            if ( get_pfn_type_batch(xc_handle, dom, batch, pfn_type) ){
+            if ( xc_get_pfn_type_batch(xc_handle, dom, batch, pfn_type) ){
                 ERR("get_pfn_type_batch failed");
                 goto out;
             }
@@ -983,16 +1003,6 @@
        }
     }
 
-    /* Map the suspend-record MFN to pin it. The page must be owned by 
-       dom for this to succeed. */
-    p_srec = xc_map_foreign_range(xc_handle, dom,
-                                   sizeof(*p_srec), PROT_READ, 
-                                   ctxt.user_regs.esi);
-    if (!p_srec){
-        ERR("Couldn't map suspend record");
-        goto out;
-    }
-
     if (nr_pfns != p_srec->nr_pfns )
     {
        ERR("Suspend record nr_pfns unexpected (%ld != %ld)",
@@ -1045,8 +1055,11 @@
     if(live_mfn_to_pfn_table) 
         munmap(live_mfn_to_pfn_table, PAGE_SIZE*1024);
 
-    if (pfn_type != NULL) 
-        free(pfn_type);
+    free(pfn_type);
+    free(pfn_batch);
+    free(to_send);
+    free(to_fix);
+    free(to_skip);
 
     DPRINTF("Save exit rc=%d\n",rc);
     return !!rc;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_load_aout9.c
--- a/tools/libxc/xc_load_aout9.c       Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_load_aout9.c       Thu Aug 25 22:53:20 2005
@@ -1,5 +1,5 @@
 
-#include "xc_private.h"
+#include "xg_private.h"
 #include "xc_aout9.h"
 
 #if defined(__i386__)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_load_bin.c
--- a/tools/libxc/xc_load_bin.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_load_bin.c Thu Aug 25 22:53:20 2005
@@ -66,7 +66,7 @@
  * Free Software Foundation, Inc.
  */
 
-#include "xc_private.h"
+#include "xg_private.h"
 #include <stdlib.h>
 
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_load_elf.c Thu Aug 25 22:53:20 2005
@@ -2,7 +2,7 @@
  * xc_elf_load.c
  */
 
-#include "xc_private.h"
+#include "xg_private.h"
 
 #if defined(__i386__)
 #define ELFSIZE 32
@@ -309,8 +309,7 @@
     dsi->v_end = round_pgup(maxva);
 
  out:
-    if ( p != NULL )
-        free(p);
+    free(p);
 
     return 0;
 }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_private.c  Thu Aug 25 22:53:20 2005
@@ -64,8 +64,8 @@
 /*******************/
 
 /* NB: arr must be mlock'ed */
-int get_pfn_type_batch(int xc_handle, 
-                       u32 dom, int num, unsigned long *arr)
+int xc_get_pfn_type_batch(int xc_handle, 
+                         u32 dom, int num, unsigned long *arr)
 {
     dom0_op_t op;
     op.cmd = DOM0_GETPAGEFRAMEINFO2;
@@ -92,25 +92,40 @@
     return op.u.getpageframeinfo.type;
 }
 
-
-
-/*******************/
-
-int pin_table(
-    int xc_handle, unsigned int type, unsigned long mfn, domid_t dom)
-{
-    struct mmuext_op op;
-
-    op.cmd = type;
-    op.mfn = mfn;
-
-    if ( do_mmuext_op(xc_handle, &op, 1, dom) < 0 )
-        return 1;
-
-    return 0;
-}
-
-static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
+int xc_mmuext_op(
+    int xc_handle,
+    struct mmuext_op *op,
+    unsigned int nr_ops,
+    domid_t dom)
+{
+    privcmd_hypercall_t hypercall;
+    long ret = -EINVAL;
+
+    hypercall.op     = __HYPERVISOR_mmuext_op;
+    hypercall.arg[0] = (unsigned long)op;
+    hypercall.arg[1] = (unsigned long)nr_ops;
+    hypercall.arg[2] = (unsigned long)0;
+    hypercall.arg[3] = (unsigned long)dom;
+
+    if ( mlock(op, nr_ops*sizeof(*op)) != 0 )
+    {
+        PERROR("Could not lock memory for Xen hypercall");
+        goto out1;
+    }
+
+    if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
+    {
+       fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
+                    " rebuild the user-space tool set?\n",ret,errno);
+    }
+
+    safe_munlock(op, nr_ops*sizeof(*op));
+
+ out1:
+    return ret;
+}    
+
+static int flush_mmu_updates(int xc_handle, xc_mmu_t *mmu)
 {
     int err = 0;
     privcmd_hypercall_t hypercall;
@@ -145,9 +160,9 @@
     return err;
 }
 
-mmu_t *init_mmu_updates(int xc_handle, domid_t dom)
-{
-    mmu_t *mmu = malloc(sizeof(mmu_t));
+xc_mmu_t *xc_init_mmu_updates(int xc_handle, domid_t dom)
+{
+    xc_mmu_t *mmu = malloc(sizeof(xc_mmu_t));
     if ( mmu == NULL )
         return mmu;
     mmu->idx     = 0;
@@ -155,8 +170,8 @@
     return mmu;
 }
 
-int add_mmu_update(int xc_handle, mmu_t *mmu, 
-                   unsigned long ptr, unsigned long val)
+int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, 
+                     unsigned long ptr, unsigned long val)
 {
     mmu->updates[mmu->idx].ptr = ptr;
     mmu->updates[mmu->idx].val = val;
@@ -167,10 +182,47 @@
     return 0;
 }
 
-int finish_mmu_updates(int xc_handle, mmu_t *mmu)
+int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu)
 {
     return flush_mmu_updates(xc_handle, mmu);
 }
+
+int xc_dom_mem_op(int xc_handle,
+                 unsigned int memop, 
+                 unsigned int *extent_list, 
+                 unsigned int nr_extents,
+                 unsigned int extent_order,
+                 domid_t domid)
+{
+    privcmd_hypercall_t hypercall;
+    long ret = -EINVAL;
+
+    hypercall.op     = __HYPERVISOR_dom_mem_op;
+    hypercall.arg[0] = (unsigned long)memop;
+    hypercall.arg[1] = (unsigned long)extent_list;
+    hypercall.arg[2] = (unsigned long)nr_extents;
+    hypercall.arg[3] = (unsigned long)extent_order;
+    hypercall.arg[4] = (unsigned long)domid;
+
+    if ( (extent_list != NULL) && 
+         (mlock(extent_list, nr_extents*sizeof(unsigned long)) != 0) )
+    {
+        PERROR("Could not lock memory for Xen hypercall");
+        goto out1;
+    }
+
+    if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
+    {
+       fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
+                " rebuild the user-space tool set?\n",ret,errno);
+    }
+
+    if ( extent_list != NULL )
+        safe_munlock(extent_list, nr_extents*sizeof(unsigned long));
+
+ out1:
+    return ret;
+}    
 
 
 long long xc_domain_get_cpu_usage( int xc_handle, domid_t domid, int vcpu )
@@ -189,19 +241,6 @@
     return op.u.getvcpucontext.cpu_time;
 }
 
-
-/* This is shared between save and restore, and may generally be useful. */
-unsigned long csum_page (void * page)
-{
-    int i;
-    unsigned long *p = page;
-    unsigned long long sum=0;
-
-    for ( i = 0; i < (PAGE_SIZE/sizeof(unsigned long)); i++ )
-        sum += p[i];
-
-    return sum ^ (sum>>32);
-}
 
 unsigned long xc_get_m2p_start_mfn ( int xc_handle )
 {
@@ -332,53 +371,6 @@
     return sz;
 }
 
-char *xc_read_kernel_image(const char *filename, unsigned long *size)
-{
-    int kernel_fd = -1;
-    gzFile kernel_gfd = NULL;
-    char *image = NULL;
-    unsigned int bytes;
-
-    if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
-    {
-        PERROR("Could not open kernel image");
-        goto out;
-    }
-
-    if ( (*size = xc_get_filesz(kernel_fd)) == 0 )
-    {
-        PERROR("Could not read kernel image");
-        goto out;
-    }
-
-    if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
-    {
-        PERROR("Could not allocate decompression state for state file");
-        goto out;
-    }
-
-    if ( (image = malloc(*size)) == NULL )
-    {
-        PERROR("Could not allocate memory for kernel image");
-        goto out;
-    }
-
-    if ( (bytes = gzread(kernel_gfd, image, *size)) != *size )
-    {
-        PERROR("Error reading kernel image, could not"
-               " read the whole image (%d != %ld).", bytes, *size);
-        free(image);
-        image = NULL;
-    }
-
- out:
-    if ( kernel_gfd != NULL )
-        gzclose(kernel_gfd);
-    else if ( kernel_fd >= 0 )
-        close(kernel_fd);
-    return image;
-}
-
 void xc_map_memcpy(unsigned long dst, char *src, unsigned long size,
                    int xch, u32 dom, unsigned long *parray,
                    unsigned long vstart)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_private.h
--- a/tools/libxc/xc_private.h  Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_private.h  Thu Aug 25 22:53:20 2005
@@ -1,123 +1,25 @@
 
-#ifndef __XC_PRIVATE_H__
-#define __XC_PRIVATE_H__
+#ifndef XC_PRIVATE_H
+#define XC_PRIVATE_H
 
 #include <unistd.h>
 #include <stdio.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <string.h>
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <stdlib.h>
 #include <sys/ioctl.h>
-#include <errno.h>
-#include <string.h>
 
-#include "xc.h"
+#include "xenctrl.h"
 
 #include <xen/linux/privcmd.h>
 
-#define _PAGE_PRESENT   0x001
-#define _PAGE_RW        0x002
-#define _PAGE_USER      0x004
-#define _PAGE_PWT       0x008
-#define _PAGE_PCD       0x010
-#define _PAGE_ACCESSED  0x020
-#define _PAGE_DIRTY     0x040
-#define _PAGE_PAT       0x080
-#define _PAGE_PSE       0x080
-#define _PAGE_GLOBAL    0x100
-
-#if defined(__i386__)
-#define L1_PAGETABLE_SHIFT       12
-#define L2_PAGETABLE_SHIFT       22
-#define L1_PAGETABLE_SHIFT_PAE   12
-#define L2_PAGETABLE_SHIFT_PAE   21
-#define L3_PAGETABLE_SHIFT_PAE   30
-#elif defined(__x86_64__)
-#define L1_PAGETABLE_SHIFT      12
-#define L2_PAGETABLE_SHIFT      21
-#define L3_PAGETABLE_SHIFT      30
-#define L4_PAGETABLE_SHIFT      39
-#endif
-
-#if defined(__i386__) 
-#define ENTRIES_PER_L1_PAGETABLE 1024
-#define ENTRIES_PER_L2_PAGETABLE 1024
-#define L1_PAGETABLE_ENTRIES_PAE  512
-#define L2_PAGETABLE_ENTRIES_PAE  512
-#define L3_PAGETABLE_ENTRIES_PAE    4
-#elif defined(__x86_64__)
-#define L1_PAGETABLE_ENTRIES    512
-#define L2_PAGETABLE_ENTRIES    512
-#define L3_PAGETABLE_ENTRIES    512
-#define L4_PAGETABLE_ENTRIES    512
-#endif
- 
 #define PAGE_SHIFT              XC_PAGE_SHIFT
 #define PAGE_SIZE               (1UL << PAGE_SHIFT)
 #define PAGE_MASK               (~(PAGE_SIZE-1))
-
-typedef u32 l1_pgentry_32_t;
-typedef u32 l2_pgentry_32_t;
-typedef u64 l1_pgentry_64_t;
-typedef u64 l2_pgentry_64_t;
-typedef u64 l3_pgentry_64_t;
-typedef unsigned long l1_pgentry_t;
-typedef unsigned long l2_pgentry_t;
-#if defined(__x86_64__)
-typedef unsigned long l3_pgentry_t;
-typedef unsigned long l4_pgentry_t;
-#endif
-
-#if defined(__i386__)
-#define l1_table_offset(_a) \
-          (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
-#define l2_table_offset(_a) \
-          ((_a) >> L2_PAGETABLE_SHIFT)
-#define l1_table_offset_pae(_a) \
-  (((_a) >> L1_PAGETABLE_SHIFT_PAE) & (L1_PAGETABLE_ENTRIES_PAE - 1))
-#define l2_table_offset_pae(_a) \
-  (((_a) >> L2_PAGETABLE_SHIFT_PAE) & (L2_PAGETABLE_ENTRIES_PAE - 1))
-#define l3_table_offset_pae(_a) \
-       (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1))
-#elif defined(__x86_64__)
-#define l1_table_offset(_a) \
-  (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
-#define l2_table_offset(_a) \
-  (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
-#define l3_table_offset(_a) \
-       (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
-#define l4_table_offset(_a) \
-       (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
-#endif
-
-struct domain_setup_info
-{
-    unsigned long v_start;
-    unsigned long v_end;
-    unsigned long v_kernstart;
-    unsigned long v_kernend;
-    unsigned long v_kernentry;
-
-    unsigned int  load_symtab;
-    unsigned int  pae_kernel;
-    unsigned long symtab_addr;
-    unsigned long symtab_len;
-};
-
-typedef int (*parseimagefunc)(char *image, unsigned long image_size,
-                             struct domain_setup_info *dsi);
-typedef int (*loadimagefunc)(char *image, unsigned long image_size, int xch,
-                            u32 dom, unsigned long *parray,
-                            struct domain_setup_info *dsi);
-
-struct load_funcs
-{
-    parseimagefunc parseimage;
-    loadimagefunc loadimage;
-};
 
 #define ERROR(_m, _a...)                                \
 do {                                                    \
@@ -186,97 +88,6 @@
     return ret;
 }
 
-static inline int do_dom_mem_op(int            xc_handle,
-                               unsigned int   memop, 
-                               unsigned int *extent_list, 
-                               unsigned int  nr_extents,
-                               unsigned int   extent_order,
-                               domid_t        domid)
-{
-    privcmd_hypercall_t hypercall;
-    long ret = -EINVAL;
-
-    hypercall.op     = __HYPERVISOR_dom_mem_op;
-    hypercall.arg[0] = (unsigned long)memop;
-    hypercall.arg[1] = (unsigned long)extent_list;
-    hypercall.arg[2] = (unsigned long)nr_extents;
-    hypercall.arg[3] = (unsigned long)extent_order;
-    hypercall.arg[4] = (unsigned long)domid;
-
-    if ( (extent_list != NULL) && 
-         (mlock(extent_list, nr_extents*sizeof(unsigned long)) != 0) )
-    {
-        PERROR("Could not lock memory for Xen hypercall");
-        goto out1;
-    }
-
-    if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
-    {
-       fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
-                " rebuild the user-space tool set?\n",ret,errno);
-    }
-
-    if ( extent_list != NULL )
-        safe_munlock(extent_list, nr_extents*sizeof(unsigned long));
-
- out1:
-    return ret;
-}    
-
-static inline int do_mmuext_op(
-    int xc_handle,
-    struct mmuext_op *op,
-    unsigned int nr_ops,
-    domid_t dom)
-{
-    privcmd_hypercall_t hypercall;
-    long ret = -EINVAL;
-
-    hypercall.op     = __HYPERVISOR_mmuext_op;
-    hypercall.arg[0] = (unsigned long)op;
-    hypercall.arg[1] = (unsigned long)nr_ops;
-    hypercall.arg[2] = (unsigned long)0;
-    hypercall.arg[3] = (unsigned long)dom;
-
-    if ( mlock(op, nr_ops*sizeof(*op)) != 0 )
-    {
-        PERROR("Could not lock memory for Xen hypercall");
-        goto out1;
-    }
-
-    if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
-    {
-       fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
-                    " rebuild the user-space tool set?\n",ret,errno);
-    }
-
-    safe_munlock(op, nr_ops*sizeof(*op));
-
- out1:
-    return ret;
-}    
-
-
-/*
- * PFN mapping.
- */
-int get_pfn_type_batch(int xc_handle, u32 dom, int num, unsigned long *arr);
-unsigned long csum_page (void * page);
-
-/*
- * MMU updates.
- */
-#define MAX_MMU_UPDATES 1024
-typedef struct {
-    mmu_update_t updates[MAX_MMU_UPDATES];
-    int          idx;
-    domid_t      subject;
-} mmu_t;
-mmu_t *init_mmu_updates(int xc_handle, domid_t dom);
-int add_mmu_update(int xc_handle, mmu_t *mmu, 
-                   unsigned long ptr, unsigned long val);
-int finish_mmu_updates(int xc_handle, mmu_t *mmu);
-
 
 /*
  * ioctl-based mfn mapping interface
@@ -296,38 +107,4 @@
 } privcmd_mmap_t; 
 */
 
-#define mfn_mapper_queue_size 128
-
-typedef struct mfn_mapper {
-    int xc_handle;
-    int size;
-    int prot;
-    int error;
-    int max_queue_size;
-    void * addr;
-    privcmd_mmap_t ioctl; 
-    
-} mfn_mapper_t;
-
-unsigned long xc_get_m2p_start_mfn (int xc_handle);
-
-int xc_copy_to_domain_page(int xc_handle, u32 domid,
-                            unsigned long dst_pfn, void *src_page);
-
-unsigned long xc_get_filesz(int fd);
-
-char *xc_read_kernel_image(const char *filename, unsigned long *size);
-
-void xc_map_memcpy(unsigned long dst, char *src, unsigned long size,
-                   int xch, u32 dom, unsigned long *parray,
-                   unsigned long vstart);
-
-int pin_table(int xc_handle, unsigned int type, unsigned long mfn,
-             domid_t dom);
-
-/* image loading */
-int probe_elf(char *image, unsigned long image_size, struct load_funcs *funcs);
-int probe_bin(char *image, unsigned long image_size, struct load_funcs *funcs);
-int probe_aout9(char *image, unsigned long image_size, struct load_funcs 
*funcs);
-
 #endif /* __XC_PRIVATE_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_ptrace.c   Thu Aug 25 22:53:20 2005
@@ -221,7 +221,7 @@
     return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK));
 
  error_out:
-    return 0;
+    return NULL;
 }
 
 int 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xc_vmx_build.c
--- a/tools/libxc/xc_vmx_build.c        Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xc_vmx_build.c        Thu Aug 25 22:53:20 2005
@@ -3,7 +3,7 @@
  */
 
 #include <stddef.h>
-#include "xc_private.h"
+#include "xg_private.h"
 #define ELFSIZE 32
 #include "xc_elf.h"
 #include <stdlib.h>
@@ -37,58 +37,70 @@
     int nr_map = 0;
 
     /* XXX: Doesn't work for > 4GB yet */
-    mem_mapp->map[0].addr = 0x0;
-    mem_mapp->map[0].size = 0x9F800;
-    mem_mapp->map[0].type = E820_RAM;
-    mem_mapp->map[0].caching_attr = MEMMAP_WB;
+    mem_mapp->map[nr_map].addr = 0x0;
+    mem_mapp->map[nr_map].size = 0x9F800;
+    mem_mapp->map[nr_map].type = E820_RAM;
+    mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
     nr_map++;
 
-    mem_mapp->map[1].addr = 0x9F800;
-    mem_mapp->map[1].size = 0x800;
-    mem_mapp->map[1].type = E820_RESERVED;
-    mem_mapp->map[1].caching_attr = MEMMAP_UC;
+    mem_mapp->map[nr_map].addr = 0x9F800;
+    mem_mapp->map[nr_map].size = 0x800;
+    mem_mapp->map[nr_map].type = E820_RESERVED;
+    mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
     nr_map++;
 
-    mem_mapp->map[2].addr = 0xA0000;
-    mem_mapp->map[2].size = 0x20000;
-    mem_mapp->map[2].type = E820_IO;
-    mem_mapp->map[2].caching_attr = MEMMAP_UC;
+    mem_mapp->map[nr_map].addr = 0xA0000;
+    mem_mapp->map[nr_map].size = 0x20000;
+    mem_mapp->map[nr_map].type = E820_IO;
+    mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
     nr_map++;
 
-    mem_mapp->map[3].addr = 0xF0000;
-    mem_mapp->map[3].size = 0x10000;
-    mem_mapp->map[3].type = E820_RESERVED;
-    mem_mapp->map[3].caching_attr = MEMMAP_UC;
+    mem_mapp->map[nr_map].addr = 0xF0000;
+    mem_mapp->map[nr_map].size = 0x10000;
+    mem_mapp->map[nr_map].type = E820_RESERVED;
+    mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
     nr_map++;
 
-    mem_mapp->map[4].addr = 0x100000;
-    mem_mapp->map[4].size = mem_size - 0x100000 - PAGE_SIZE;
-    mem_mapp->map[4].type = E820_RAM;
-    mem_mapp->map[4].caching_attr = MEMMAP_WB;
+#define STATIC_PAGES    2       /* for ioreq_t and store_mfn */
+    /* Most of the ram goes here */
+    mem_mapp->map[nr_map].addr = 0x100000;
+    mem_mapp->map[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE;
+    mem_mapp->map[nr_map].type = E820_RAM;
+    mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
     nr_map++;
 
-    mem_mapp->map[5].addr = mem_size - PAGE_SIZE;
-    mem_mapp->map[5].size = PAGE_SIZE;
-    mem_mapp->map[5].type = E820_SHARED;
-    mem_mapp->map[5].caching_attr = MEMMAP_WB;
+    /* Statically allocated special pages */
+
+    /* Shared ioreq_t page */
+    mem_mapp->map[nr_map].addr = mem_size - PAGE_SIZE;
+    mem_mapp->map[nr_map].size = PAGE_SIZE;
+    mem_mapp->map[nr_map].type = E820_SHARED;
+    mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
     nr_map++;
 
-    mem_mapp->map[6].addr = mem_size;
-    mem_mapp->map[6].size = 0x3 * PAGE_SIZE;
-    mem_mapp->map[6].type = E820_NVS;
-    mem_mapp->map[6].caching_attr = MEMMAP_UC;
+    /* For xenstore */
+    mem_mapp->map[nr_map].addr = mem_size - 2*PAGE_SIZE;
+    mem_mapp->map[nr_map].size = PAGE_SIZE;
+    mem_mapp->map[nr_map].type = E820_XENSTORE;
+    mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
     nr_map++;
 
-    mem_mapp->map[7].addr = mem_size + 0x3 * PAGE_SIZE;
-    mem_mapp->map[7].size = 0xA * PAGE_SIZE;
-    mem_mapp->map[7].type = E820_ACPI;
-    mem_mapp->map[7].caching_attr = MEMMAP_WB;
+    mem_mapp->map[nr_map].addr = mem_size;
+    mem_mapp->map[nr_map].size = 0x3 * PAGE_SIZE;
+    mem_mapp->map[nr_map].type = E820_NVS;
+    mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
     nr_map++;
 
-    mem_mapp->map[8].addr = 0xFEC00000;
-    mem_mapp->map[8].size = 0x1400000;
-    mem_mapp->map[8].type = E820_IO;
-    mem_mapp->map[8].caching_attr = MEMMAP_UC;
+    mem_mapp->map[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
+    mem_mapp->map[nr_map].size = 0xA * PAGE_SIZE;
+    mem_mapp->map[nr_map].type = E820_ACPI;
+    mem_mapp->map[nr_map].caching_attr = MEMMAP_WB;
+    nr_map++;
+
+    mem_mapp->map[nr_map].addr = 0xFEC00000;
+    mem_mapp->map[nr_map].size = 0x1400000;
+    mem_mapp->map[nr_map].type = E820_IO;
+    mem_mapp->map[nr_map].caching_attr = MEMMAP_UC;
     nr_map++;
 
     mem_mapp->nr_map = nr_map;
@@ -212,7 +224,11 @@
                          unsigned long shared_info_frame,
                          unsigned int control_evtchn,
                          unsigned long flags,
-                         struct mem_map * mem_mapp)
+                         unsigned int vcpus,
+                         unsigned int store_evtchn,
+                         unsigned long *store_mfn,
+                         struct mem_map *mem_mapp
+                         )
 {
     l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
     l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
@@ -227,7 +243,7 @@
     shared_info_t *shared_info;
     struct linux_boot_params * boot_paramsp;
     __u16 * boot_gdtp;
-    mmu_t *mmu = NULL;
+    xc_mmu_t *mmu = NULL;
     int rc;
 
     unsigned long nr_pt_pages;
@@ -342,7 +358,7 @@
         }
     }
 
-    if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
+    if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
         goto error_out;
 
 #ifdef __i386__
@@ -443,9 +459,9 @@
     /* Write the machine->phys table entries. */
     for ( count = 0; count < nr_pages; count++ )
     {
-        if ( add_mmu_update(xc_handle, mmu,
-                            (page_array[count] << PAGE_SHIFT) | 
-                            MMU_MACHPHYS_UPDATE, count) )
+        if ( xc_add_mmu_update(xc_handle, mmu,
+                              (page_array[count] << PAGE_SHIFT) | 
+                              MMU_MACHPHYS_UPDATE, count) )
            goto error_out;
     }
     
@@ -510,7 +526,10 @@
     boot_paramsp->drive_info.dummy[14] = 32;
 
     /* memsize is in megabytes */
+    /* If you need to create a special e820map, comment this line
+       and use mem-map.sxp */
     build_e820map(mem_mapp, memsize << 20);
+    *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
 #if defined (__i386__)
     if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
 #else
@@ -568,7 +587,7 @@
 #endif
 
     /* Send the page update requests down to the hypervisor. */
-    if ( finish_mmu_updates(xc_handle, mmu) )
+    if ( xc_finish_mmu_updates(xc_handle, mmu) )
         goto error_out;
 
     free(mmu);
@@ -597,17 +616,15 @@
     return 0;
 
  error_out:
-    if ( mmu != NULL )
-        free(mmu);
-    if ( page_array != NULL )
-        free(page_array);
+    free(mmu);
+    free(page_array);
     return -1;
 }
 
 
 #define VMX_FEATURE_FLAG 0x20
 
-int vmx_identify(void)
+static int vmx_identify(void)
 {
     int eax, ecx;
 
@@ -637,7 +654,10 @@
                    const char *ramdisk_name,
                    const char *cmdline,
                    unsigned int control_evtchn,
-                   unsigned long flags)
+                   unsigned long flags,
+                   unsigned int vcpus,
+                   unsigned int store_evtchn,
+                   unsigned long *store_mfn)
 {
     dom0_op_t launch_op, op;
     int initrd_fd = -1;
@@ -688,7 +708,7 @@
 
     op.cmd = DOM0_GETDOMAININFO;
     op.u.getdomaininfo.domain = (domid_t)domid;
-    if ( (do_dom0_op(xc_handle, &op) < 0) || 
+    if ( (xc_dom0_op(xc_handle, &op) < 0) || 
          ((u16)op.u.getdomaininfo.domain != domid) )
     {
         PERROR("Could not get info on domain");
@@ -712,7 +732,8 @@
                        initrd_gfd, initrd_size, nr_pages, 
                        ctxt, cmdline,
                        op.u.getdomaininfo.shared_info_frame,
-                       control_evtchn, flags, mem_mapp) < 0 )
+                       control_evtchn, flags, vcpus, store_evtchn, store_mfn,
+                       mem_mapp) < 0 )
     {
         ERROR("Error constructing guest OS");
         goto error_out;
@@ -722,8 +743,7 @@
         close(initrd_fd);
     if ( initrd_gfd )
         gzclose(initrd_gfd);
-    if ( image != NULL )
-        free(image);
+    free(image);
 
     ctxt->flags = VGCF_VMX_GUEST;
     /* FPU is set up to default initial state. */
@@ -769,7 +789,7 @@
     launch_op.u.setdomaininfo.ctxt   = ctxt;
 
     launch_op.cmd = DOM0_SETDOMAININFO;
-    rc = do_dom0_op(xc_handle, &launch_op);
+    rc = xc_dom0_op(xc_handle, &launch_op);
     
     return rc;
 
@@ -778,8 +798,7 @@
         gzclose(initrd_gfd);
     else if ( initrd_fd >= 0 )
         close(initrd_fd);
-    if ( image != NULL )
-        free(image);
+    free(image);
 
     return -1;
 }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/Makefile
--- a/tools/misc/Makefile       Wed Aug 24 02:43:18 2005
+++ b/tools/misc/Makefile       Thu Aug 25 22:53:20 2005
@@ -50,4 +50,4 @@
        $(CC) -c $(CFLAGS) -o $@ $<
 
 $(TARGETS): %: %.o Makefile
-       $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxc
+       $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxenctrl
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/cpuperf/Makefile
--- a/tools/misc/cpuperf/Makefile       Wed Aug 24 02:43:18 2005
+++ b/tools/misc/cpuperf/Makefile       Thu Aug 25 22:53:20 2005
@@ -37,7 +37,7 @@
        $(CC) $(CFLAGS) -o $@ $<
 
 cpuperf-xen: cpuperf.c $(HDRS) Makefile
-       $(CC) $(CFLAGS) -I $(XEN_LIBXC) -L$(XEN_LIBXC) -lxc -DXENO -o $@ $<
+       $(CC) $(CFLAGS) -I $(XEN_LIBXC) -L$(XEN_LIBXC) -lxenctrl -DXENO -o $@ $<
 
 cpuperf-perfcntr: cpuperf.c $(HDRS) Makefile
        $(CC) $(CFLAGS) -DPERFCNTR -o $@ $<
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/cpuperf/cpuperf_xeno.h
--- a/tools/misc/cpuperf/cpuperf_xeno.h Wed Aug 24 02:43:18 2005
+++ b/tools/misc/cpuperf/cpuperf_xeno.h Thu Aug 25 22:53:20 2005
@@ -9,7 +9,7 @@
  *
  */
 
-#include <xc.h>
+#include <xenctrl.h>
 
 static int xc_handle;
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/xc_shadow.c
--- a/tools/misc/xc_shadow.c    Wed Aug 24 02:43:18 2005
+++ b/tools/misc/xc_shadow.c    Thu Aug 25 22:53:20 2005
@@ -11,7 +11,7 @@
  */
 
 
-#include <xc.h>
+#include <xenctrl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/mman.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/xend
--- a/tools/misc/xend   Wed Aug 24 02:43:18 2005
+++ b/tools/misc/xend   Thu Aug 25 22:53:20 2005
@@ -24,6 +24,7 @@
 import socket
 import signal
 import time
+import commands
 
 XCS_PATH    = "/var/lib/xen/xcs_socket"
 XCS_EXEC    = "/usr/sbin/xcs"
@@ -114,6 +115,17 @@
         xcs_pidfile.close()
     except:
        return    
+
+def start_xenstored():
+    XENSTORED_TRACE = os.getenv("XENSTORED_TRACE")
+    cmd = "/usr/sbin/xenstored --pid-file=/var/run/xenstore.pid"
+    if XENSTORED_TRACE:
+        cmd += " -T /var/log/xenstored-trace.log"
+    s,o = commands.getstatusoutput(cmd)
+
+def start_consoled():
+    if os.fork() == 0:
+        os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled'])
             
 def main():
     try:
@@ -130,9 +142,13 @@
         return status >> 8
     elif sys.argv[1] == 'start':
         start_xcs()
+        start_xenstored()
+        start_consoled()
         return daemon.start()
     elif sys.argv[1] == 'trace_start':
         start_xcs()
+        start_xenstored()
+        start_consoled()
         return daemon.start(trace=1)
     elif sys.argv[1] == 'stop':
         stop_xcs()
@@ -140,6 +156,8 @@
     elif sys.argv[1] == 'restart':
         stop_xcs()
         start_xcs()
+        start_xenstored()
+        start_consoled()
         return daemon.stop() or daemon.start()
     elif sys.argv[1] == 'status':
         return daemon.status()
diff -r 5f1ed597f107 -r 8799d14bef77 tools/misc/xenperf.c
--- a/tools/misc/xenperf.c      Wed Aug 24 02:43:18 2005
+++ b/tools/misc/xenperf.c      Thu Aug 25 22:53:20 2005
@@ -11,7 +11,7 @@
  */
 
 
-#include <xc.h>
+#include <xenctrl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/mman.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/setup.py
--- a/tools/python/setup.py     Wed Aug 24 02:43:18 2005
+++ b/tools/python/setup.py     Thu Aug 25 22:53:20 2005
@@ -17,7 +17,7 @@
                  XEN_ROOT + "/tools/xenstore",
                  ]
 
-libraries = [ "xc", "xenstore-pic" ]
+libraries = [ "xenctrl", "xenguest", "xenstore" ]
 
 xc = Extension("xc",
                extra_compile_args = extra_compile_args,
@@ -41,7 +41,7 @@
                sources            = [ "xen/lowlevel/xs/xs.c" ])
 
 setup(name            = 'xen',
-      version         = '2.0',
+      version         = '3.0',
       description     = 'Xen',
       packages        = ['xen',
                          'xen.lowlevel',
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/lowlevel/xc/xc.c Thu Aug 25 22:53:20 2005
@@ -5,7 +5,8 @@
  */
 
 #include <Python.h>
-#include <xc.h>
+#include <xenctrl.h>
+#include <xenguest.h>
 #include <zlib.h>
 #include <fcntl.h>
 #include <netinet/in.h>
@@ -297,22 +298,23 @@
     u32   dom;
     char *image, *ramdisk = NULL, *cmdline = "";
     PyObject *memmap;
-    int   control_evtchn, flags = 0;
+    int   control_evtchn, store_evtchn;
+    int flags = 0, vcpus = 1;
     int numItems, i;
     int memsize;
     struct mem_map mem_map;
-
-    static char *kwd_list[] = { "dom", "control_evtchn",
-                                "memsize",
-                                "image", "memmap",
+    unsigned long store_mfn = 0;
+
+    static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
+                                "memsize", "image", "memmap",
                                "ramdisk", "cmdline", "flags",
-                                NULL };
-
-    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiisO!|ssi", kwd_list, 
-                                      &dom, &control_evtchn, 
+                               "vcpus", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisO!|ssii", kwd_list, 
+                                      &dom, &control_evtchn, &store_evtchn,
                                       &memsize,
                                       &image, &PyList_Type, &memmap,
-                                     &ramdisk, &cmdline, &flags) )
+                                     &ramdisk, &cmdline, &flags, &vcpus) )
         return NULL;
 
     memset(&mem_map, 0, sizeof(mem_map));
@@ -321,7 +323,6 @@
     /* get the number of lines passed to us */
     numItems = PyList_Size(memmap) - 1;        /* removing the line 
                                           containing "memmap" */
-    printf ("numItems: %d\n", numItems);
     mem_map.nr_map = numItems;
    
     /* should raise an error here. */
@@ -365,11 +366,11 @@
     }
 
     if ( xc_vmx_build(xc->xc_handle, dom, memsize, image, &mem_map,
-                        ramdisk, cmdline, control_evtchn, flags) != 0 )
-        return PyErr_SetFromErrno(xc_error);
-    
-    Py_INCREF(zero);
-    return zero;
+                        ramdisk, cmdline, control_evtchn, flags,
+                        vcpus, store_evtchn, &store_mfn) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+    
+    return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
 }
 
 static PyObject *pyxc_bvtsched_global_set(PyObject *self,
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/lowlevel/xs/xs.c Thu Aug 25 22:53:20 2005
@@ -1,6 +1,21 @@
 /* 
  * Python interface to the Xen Store Daemon.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
  * Copyright (C) 2005 Mike Wray Hewlett-Packard
+ *
  */
 
 #include <Python.h>
@@ -253,12 +268,10 @@
     }
     val = PyList_New(perms_n);
     for (i = 0; i < perms_n; i++, perms++) {
-        PyObject *p = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i}",
-                                    "dom",    perms->id,
-                                    "read",   (perms->perms & XS_PERM_READ),
-                                    "write",  (perms->perms & XS_PERM_WRITE),
-                                    "create", (perms->perms & XS_PERM_CREATE),
-                                    "owner",  (perms->perms & XS_PERM_OWNER));
+        PyObject *p = Py_BuildValue("{s:i,s:i,s:i}",
+                                    "dom",   perms->id,
+                                    "read",  (perms->perms & XS_PERM_READ),
+                                    "write",  (perms->perms & XS_PERM_WRITE));
         PyList_SetItem(val, i, p);
     }
  exit:
@@ -281,8 +294,7 @@
     static char *arg_spec = "sO";
     char *path = NULL;
     PyObject *perms = NULL;
-    static char *perm_names[] = { "dom", "read", "write", "create", "owner",
-                                 NULL };
+    static char *perm_names[] = { "dom", "read", "write", NULL };
     static char *perm_spec = "i|iiii";
 
     struct xs_handle *xh = xshandle(self);
@@ -315,15 +327,9 @@
         int dom = 0;
         /* Read/write perms. Set these. */
         int p_read = 0, p_write = 0;
-        /* Create/owner perms. Ignore them.
-         * This is so the output from get_permissions() can be used
-         * as input to set_permissions().
-         */
-        int p_create = 0, p_owner = 0;
         PyObject *p = PyList_GetItem(perms, i);
         if (!PyArg_ParseTupleAndKeywords(tuple0, p, perm_spec, perm_names,
-                                        &dom, &p_read, &p_write, &p_create,
-                                        &p_owner))
+                                        &dom, &p_read, &p_write))
             goto exit;
         xsperms[i].id = dom;
         if (p_read)
@@ -343,7 +349,6 @@
 #define xspy_watch_doc "\n"                                            \
        "Watch a path, get notifications when it changes.\n"            \
        " path     [string] : xenstore path.\n"                         \
-       " priority [int]    : watch priority (default 0).\n"            \
        " token    [string] : returned in watch notification.\n"        \
        "\n"                                                            \
        "Returns: [int] 0 on success.\n"                                \
@@ -352,10 +357,9 @@
 
 static PyObject *xspy_watch(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", "priority", "token", NULL };
+    static char *kwd_spec[] = { "path", "token", NULL };
     static char *arg_spec = "s|is";
     char *path = NULL;
-    int priority = 0;
     char *token = "";
 
     struct xs_handle *xh = xshandle(self);
@@ -365,7 +369,7 @@
     if (!xh)
        goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, 
-                                     &path, &priority, &token))
+                                     &path, &token))
         goto exit;
     xsval = xs_watch(xh, path, token);
     val = pyvalue_int(xsval);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/lowlevel/xu/xu.c
--- a/tools/python/xen/lowlevel/xu/xu.c Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/lowlevel/xu/xu.c Thu Aug 25 22:53:20 2005
@@ -21,7 +21,7 @@
 #include <unistd.h>
 #include <errno.h>
 #include <signal.h>
-#include <xc.h>
+#include <xenctrl.h>
 
 #include <xen/xen.h>
 #include <xen/io/domain_controller.h>
@@ -655,7 +655,9 @@
     case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_INTERFACE_CONNECT):
         C2P(netif_fe_interface_connect_t, handle,         Int, Long);
         C2P(netif_fe_interface_connect_t, tx_shmem_frame, Int, Long);
+        C2P(netif_fe_interface_connect_t, tx_shmem_ref,   Int, Long);
         C2P(netif_fe_interface_connect_t, rx_shmem_frame, Int, Long);
+        C2P(netif_fe_interface_connect_t, rx_shmem_ref,   Int, Long);
         return dict;
     case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_INTERFACE_DISCONNECT):
         C2P(netif_fe_interface_disconnect_t, handle, Int, Long);
@@ -681,7 +683,9 @@
         C2P(netif_be_connect_t, domid,          Int, Long);
         C2P(netif_be_connect_t, netif_handle,   Int, Long);
         C2P(netif_be_connect_t, tx_shmem_frame, Int, Long);
+        C2P(netif_be_connect_t, tx_shmem_ref,   Int, Long);
         C2P(netif_be_connect_t, rx_shmem_frame, Int, Long);
+        C2P(netif_be_connect_t, rx_shmem_ref,   Int, Long);
         C2P(netif_be_connect_t, evtchn,         Int, Long);
         C2P(netif_be_connect_t, status,         Int, Long);
         return dict;
@@ -840,7 +844,7 @@
     case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CONNECT):
         P2C(blkif_be_connect_t, domid,        u32);
         P2C(blkif_be_connect_t, blkif_handle, u32);
-        P2C(blkif_be_connect_t, shmem_frame,  memory_t);
+        P2C(blkif_be_connect_t, shmem_frame,  unsigned long);
         P2C(blkif_be_connect_t, shmem_ref,    u32);
         P2C(blkif_be_connect_t, evtchn,       u16);
         break;
@@ -902,9 +906,11 @@
     case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CONNECT):
         P2C(netif_be_connect_t, domid,          u32);
         P2C(netif_be_connect_t, netif_handle,   u32);
-        P2C(netif_be_connect_t, tx_shmem_frame, memory_t);
-        P2C(netif_be_connect_t, rx_shmem_frame, memory_t);
-        P2C(netif_be_connect_t, evtchn,         u16);
+        P2C(netif_be_connect_t, tx_shmem_frame, unsigned long);
+        P2C(netif_be_connect_t, tx_shmem_ref,   u32); 
+        P2C(netif_be_connect_t, rx_shmem_frame, unsigned long);
+        P2C(netif_be_connect_t, rx_shmem_ref,   u32); 
+        P2C(netif_be_connect_t, evtchn,         u16); 
         break;
     case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_DISCONNECT):
         P2C(netif_be_disconnect_t, domid,        u32);
@@ -936,7 +942,7 @@
         P2C(usbif_fe_driver_status_changed_t, status, u32);
         break;
     case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_CONNECT):
-        P2C(usbif_fe_interface_connect_t, shmem_frame, memory_t);
+        P2C(usbif_fe_interface_connect_t, shmem_frame, unsigned long);
         break;
     case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_DISCONNECT):
         break;
@@ -950,7 +956,7 @@
         break;
     case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CONNECT):
         P2C(usbif_be_connect_t, domid, domid_t);
-        P2C(usbif_be_connect_t, shmem_frame, memory_t);
+        P2C(usbif_be_connect_t, shmem_frame, unsigned long);
         P2C(usbif_be_connect_t, evtchn, u32);
         P2C(usbif_be_connect_t, bandwidth, u32);
         P2C(usbif_be_connect_t, status, u32);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/CreateDomain.py
--- a/tools/python/xen/sv/CreateDomain.py       Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/CreateDomain.py       Thu Aug 25 22:53:20 2005
@@ -17,26 +17,56 @@
                    CreateFinish ]
     
        Wizard.__init__( self, urlWriter, "Create Domain", sheets )
-       
+
+    def op_finish( self, request ):
+        pass
+    
 class CreatePage0( Sheet ):
 
+    title = "General"
+    
     def __init__( self, urlWriter ):
         Sheet.__init__( self, urlWriter, "General", 0 )
         self.addControl( InputControl( 'name', 'VM Name', 'VM Name:', 
"[\\w|\\S]+", "You must enter a name in this field" ) )
         self.addControl( InputControl( 'memory', '64', 'Memory (Mb):', 
"[\\d]+", "You must enter a number in this field" ) )
         self.addControl( InputControl( 'cpu', '0', 'CPU:', "[\\d]+", "You must 
enter a number in this feild" ) )
         self.addControl( InputControl( 'cpu_weight', '1', 'CPU Weight:', 
"[\\d]+", "You must enter a number in this feild" ) )
+        self.addControl( InputControl( 'vcpus', '1', 'Virtual CPUs:', 
'[\\d]+', "You must enter a number in this feild") )
                         
 class CreatePage1( Sheet ):
 
+    title = "Setup Kernel Image"
+
     def __init__( self, urlWriter ):
         Sheet.__init__( self, urlWriter, "Setup Kernel Image", 1 )
-# For now we don't need to select a builder...
-#        self.addControl( ListControl( 'builder', [('linux', 'Linux'), 
('netbsd', 'NetBSD')], 'Kernel Type:' ) )
-        self.addControl( FileControl( 'kernel', '/boot/vmlinuz-2.6.9-xenU', 
'Kernel Image:' ) )
+        self.addControl( ListControl( 'builder', [('linux', 'Linux'), 
('netbsd', 'NetBSD')], 'Domain Builder:' ) )
+        self.addControl( FileControl( 'kernel', '/boot/vmlinuz-2.6.12-xenU', 
'Kernel Image:' ) )
         self.addControl( InputControl( 'extra', '', 'Kernel Command Line 
Parameters:' ) )
+        self.addControl( ListControl( 'use-initrd', [('yes', 'Yes'), ('no', 
'No')], 'Use an Initial Ram Disk?:' ) )
+        self.addControl( FileControl( 'initrd', 
'/boot/initrd-2.6.12-xenU.img', 'Initial Ram Disk:' ) )
+
+    def validate( self, request ):
+        if not self.passback: self.parseForm( request )
+        check = True
+        request.write( previous_values.get( '>>>>>use-initrd' ) )
+        previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the 
map for quick reference
+        if DEBUG: print previous_values
+        for (feild, control) in self.feilds:
+            if feild == 'initrd' and previous_values.get( 'use-initrd' ) != 
'no':
+                request.write( previous_values.get( '>>>>>use-initrd' ) )
+                if control.validate( previous_values.get( feild ) ):
+                    check = False
+            elif not control.validate( previous_values.get( feild ) ):
+                check = False
+
+            if DEBUG: print "> %s = %s" % (feild, previous_values.get( feild ))
+
+        return check
+                                                 
 
 class CreatePage2( Sheet ):
+
+    title = "Choose number of VBDS"
 
     def __init__( self, urlWriter ):
        Sheet.__init__( self, urlWriter, "Setup Virtual Block Device", 2 )
@@ -44,10 +74,12 @@
 
 class CreatePage3( Sheet ):
 
+    title = "Setup VBDS"
+
     def __init__( self, urlWriter ):
         Sheet.__init__( self, urlWriter, "Setup Virtual Block Device", 3 )
         
-    def write_BODY( self, request, err ):
+    def write_BODY( self, request ):
         if not self.passback: self.parseForm( request )
     
        previous_values = sxp2hash( string2sxp( self.passback ) ) #get the hash 
for quick reference
@@ -61,9 +93,11 @@
             
         self.addControl( InputControl( 'root', '/dev/sda1', 'Root device (in 
VM):' ) )
         
-        Sheet.write_BODY( self, request, err )
+        Sheet.write_BODY( self, request )
                 
 class CreatePage4( Sheet ):
+
+    title = "Network Setting"
 
     def __init__( self, urlWriter ):        
         Sheet.__init__( self, urlWriter, "Network settings", 4 )
@@ -76,26 +110,27 @@
                  
 class CreateFinish( Sheet ):
 
+    title = "Finish"
+
     def __init__( self, urlWriter ):
         Sheet.__init__( self, urlWriter, "All Done", 5 )
         
-    def write_BODY( self, request, err ):
+    def write_BODY( self, request ):
     
         if not self.passback: self.parseForm( request )
         
         xend_sxp = self.translate_sxp( string2sxp( self.passback ) )
+
+        request.write( "<pre>%s</pre>" % sxp2prettystring( xend_sxp ) )
         
         try:
-            dom_sxp = server.xend_domain_create( xend_sxp )
-            success = "Your domain was successfully created.\n"
-        except:
-            success = "There was an error creating your domain.\nThe 
configuration used is as follows:\n"
-            dom_sxp = xend_sxp
-            
-            
-        
-        pt = PreTab( success + sxp2prettystring( dom_sxp ) )
-        pt.write_BODY( request )
+            server.xend_domain_create( xend_sxp )
+            request.write( "<p>You domain had been successfully created.</p>" )
+        except Exception, e:
+            request.write( "<p>There was an error creating your 
domain.<br/>The configuration used is as follows:\n</p>" )
+            request.write( "<pre>%s</pre>" % sxp2prettystring( xend_sxp ) )
+            request.write( "<p>The error was:</p>" )
+            request.write( "<pre>%s</pre>" % str( e ) )
 
         request.write( "<input type='hidden' name='passback' 
value=\"%s\"></p>" % self.passback )
         request.write( "<input type='hidden' name='sheet' value='%s'></p>" % 
self.location )
@@ -117,6 +152,7 @@
         vals.maxmem =   get( 'maxmem' )
         vals.cpu =     get( 'cpu' )
         vals.cpu_weight = get( 'cpu_weight' )
+        vals.vcpus = get( 'vcpus' )
         
         vals.builder =  get( 'builder' )       
         vals.kernel =   get( 'kernel' )
@@ -128,7 +164,7 @@
         vbds = []
         
         for i in range( int( get( 'num_vbds' ) ) ):
-            vbds.append( ( get( 'vbd%s_dom0' % i ), get('vbd%s_domU' % i ), 
get( 'vbd%s_mode' % i ) ) )
+            vbds.append( ( get( 'vbd%s_dom0' % i ), get('vbd%s_domU' % i ), 
get( 'vbd%s_mode' % i ), None ) )
         
         vals.disk = vbds    
             
@@ -141,6 +177,9 @@
         vals.restart = None
         vals.console = None
         vals.ramdisk = None
+        vals.ssidref = -1
+        vals.bootloader = None
+        vals.usb = []
         
         #setup vifs
         
@@ -155,9 +194,11 @@
         dhcp = get( 'dhcp' )
         
         vals.cmdline_ip = "%s:%s:%s:%s:%s:eth0:%s" % (ip, nfs, gate, mask, 
host, dhcp)
+
+        opts = None
         
         try:
-            return make_config( vals )
-        except:
-            return [["Error creating domain config."]]    
-        
+            return make_config( opts, vals )
+        except Exception, e:
+            return [["There was an error creating the domain config SXP.  This 
is typically due to an interface change in xm/create.py:make_config", e]]    
+        
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/DomInfo.py
--- a/tools/python/xen/sv/DomInfo.py    Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/DomInfo.py    Thu Aug 25 22:53:20 2005
@@ -4,6 +4,7 @@
 from xen.sv.HTMLBase import HTMLBase
 from xen.sv.util import *
 from xen.sv.GenTabbed import *
+from xen.sv.Wizard import *
 
 DEBUG=1
 
@@ -12,33 +13,69 @@
     def __init__( self, urlWriter ):
         
         self.dom = 0;
-    
-        def tabUrlWriter( tab ):
-            return urlWriter( "&dom=%s%s" % ( self.dom, tab ) )
-        
-        GenTabbed.__init__( self, "Domain Info", tabUrlWriter, [ 'General', 
'SXP', 'Devices' ], [ DomGeneralTab, DomSXPTab, NullTab ]  )
+                   
+        GenTabbed.__init__( self, "Domain Info", urlWriter, [ 'General', 
'SXP', 'Devices', 'Migrate', 'Save' ], [ DomGeneralTab, DomSXPTab, 
DomDeviceTab, DomMigrateTab, DomSaveTab ]  )
 
     def write_BODY( self, request ):
-        dom = request.args.get('dom')
-        
-        if dom is None or len(dom) != 1:
+        try:
+            dom = int( getVar( 'dom', request ) )
+        except:
             request.write( "<p>Please Select a Domain</p>" )
             return None
-        else:
-            self.dom = dom[0]
-        
+       
         GenTabbed.write_BODY( self, request )
         
     def write_MENU( self, request ):
-        pass
-
+       domains = []
+
+       try:
+           domains = server.xend_domains()
+           domains.sort()
+       except:
+           pass
+
+       request.write( "\n<table style='border:0px solid white' cellspacing='0' 
cellpadding='0' border='0' width='100%'>\n" )
+       request.write( "<tr class='domainInfoHead'>" )
+       request.write( "<td class='domainInfoHead' 
align='center'>Domain</td>\n" )
+       request.write( "<td class='domainInfoHead' align='center'>Name</td>\n" )
+       request.write( "<td class='domainInfoHead' align='center'>State</td>\n" 
)
+       request.write( "<td class='domainInfoHead' align='center'></td>\n" )
+       request.write( "</tr>" )
+
+       odd = True
+       if not domains is None:
+           for domain in domains:
+               odd = not odd;
+               if odd:
+                   request.write( "<tr class='domainInfoOdd'>\n" )
+               else:
+                   request.write( "<tr class='domainInfoEven'>\n" )
+               domInfo = getDomInfo( domain )
+               request.write( "<td class='domainInfo' 
align='center'>%(id)s</td>\n" % domInfo )
+               url = self.urlWriter( "&dom=%(id)s" % domInfo )
+               request.write( "<td class='domainInfo' align='center'><a 
href='%s'>%s</a></td>\n" % ( url, domInfo['name'] ) )
+               request.write( "<td class='domainInfo' 
align='center'>%(state)5s</td>\n" % domInfo )
+               if domInfo[ 'id' ] != "0":
+                   request.write( "<td class='domainInfo' align='center'>" )
+                   if domInfo[ 'state' ][ 2 ] == "-":
+                       request.write( "<img src='images/small-pause.png' 
onclick='doOp2( \"pause\", \"%(dom)-4s\" )'>" % domInfo )
+                   else:
+                       request.write( "<img src='images/small-unpause.png' 
onclick='doOp2( \"unpause\", \"%(dom)-4s\" )'>" % domInfo )
+                   request.write( "<img src='images/small-destroy.png' 
onclick='doOp2( \"destroy\", \"%(dom)-4s\" )'></td>" % domInfo )
+               else:
+                   request.write( "<td> </td>" )
+               request.write( "</tr>\n" )
+       else:
+           request.write( "<tr colspan='10'><p class='small'>Error getting 
domain list<br/>Perhaps XenD not running?</p></tr>")
+       request.write( "</table>" )
+       
 class DomGeneralTab( CompositeTab ):
-    def __init__( self ):
-       CompositeTab.__init__( self, [ DomGenTab, DomActionTab ] )        
-        
+    def __init__( self, urlWriter ):
+       CompositeTab.__init__( self, [ DomGenTab, DomActionTab ], urlWriter )   
     
+       
 class DomGenTab( GeneralTab ):
 
-    def __init__( self ):
+    def __init__( self, urlWriter ):
     
         titles = {}
     
@@ -60,13 +97,13 @@
             request.write( "<p>Please Select a Domain</p>" )
             return None
             
-        self.dict = getDomInfoHash( self.dom )
+        self.dict = getDomInfo( self.dom )
         
         GeneralTab.write_BODY( self, request )
             
 class DomSXPTab( PreTab ):
 
-    def __init__( self ):
+    def __init__( self, urlWriter ):
         self.dom = 0
         PreTab.__init__( self, "" )
 
@@ -86,15 +123,15 @@
         self.source = sxp2prettystring( domInfo )
         
         PreTab.write_BODY( self, request )
-        
+       
 class DomActionTab( ActionTab ):
 
-    def __init__( self ):
-       actions = { "shutdown" : "shutdown",
-                   "reboot" : "reboot",
-                    "pause" : "pause",
-                    "unpause" : "unpause",
-                    "destroy" : "destroy" }
+    def __init__( self, urlWriter ):
+       actions = { "shutdown" : "Shutdown",
+                   "reboot" : "Reboot",
+                    "pause" : "Pause",
+                    "unpause" : "Unpause",
+                    "destroy" : "Destroy" }
         ActionTab.__init__( self, actions )    
         
     def op_shutdown( self, request ):
@@ -141,8 +178,91 @@
                server.xend_domain_destroy( int( dom ), "halt" )
            except:
                pass
-        
-    
-    
-        
-
+
+class DomDeviceTab( CompositeTab ):
+
+    def __init__( self, urlWriter ):
+        CompositeTab.__init__( self, [ DomDeviceListTab, DomDeviceOptionsTab, 
DomDeviceActionTab ], urlWriter )
+
+class DomDeviceListTab( NullTab ):
+
+    title = "Device List"
+
+    def __init__( self, urlWriter ):
+        pass
+
+class DomDeviceOptionsTab( NullTab ):
+
+    title = "Device Options"
+
+    def __init__( self, urlWriter ):
+        pass
+
+class DomDeviceActionTab( ActionTab ):
+
+    def __init__( self, urlWriter ):
+        ActionTab.__init__( self, { "addvcpu" : "Add VCPU", "addvbd" : "Add 
VBD", "addvif" : "Add VIF" } )
+
+class DomMigrateTab( CompositeTab ):
+
+    def __init__( self, urlWriter ):
+        CompositeTab.__init__( self, [ DomMigrateExtraTab, DomMigrateActionTab 
], urlWriter ) 
+
+class DomMigrateExtraTab( Sheet ):
+
+    def __init__( self, urlWriter ):
+        Sheet.__init__( self, urlWriter, "Configure Migration", 0)
+        self.addControl( TickControl('live', 'True', 'Live migrate:') )
+        self.addControl( InputControl('rate', '0', 'Rate limit:') )
+        self.addControl( InputControl( 'dest', 'host.domain', 'Name or IP 
address:', ".*") )
+                                                                               
                             
+class DomMigrateActionTab( ActionTab ):
+
+    def __init__( self, urlWriter ):
+        actions = { "migrate" : "Migrate" }
+        ActionTab.__init__( self, actions )
+                
+    def op_migrate( self, request ):
+        try:
+            domid = int( getVar( 'dom', request ) )
+            live  = getVar( 'live', request )
+            rate  = getVar( 'rate', request )
+            dest  = getVar( 'dest', request )
+            dom_sxp = server.xend_domain_migrate( domid, dest, live == 'True', 
rate )
+            success = "Your domain was successfully Migrated.\n"
+        except Exception, e:
+            success = "There was an error migrating your domain\n"
+            dom_sxp = str(e)
+                                                        
+class DomSaveTab( CompositeTab ):
+
+    def __init__( self, urlWriter ):
+        CompositeTab.__init__( self, [ DomSaveExtraTab, DomSaveActionTab ], 
urlWriter ) 
+
+class DomSaveExtraTab( Sheet ):
+
+    title = "Save location"
+
+    def __init__( self, urlWriter ):
+        Sheet.__init__( self, urlWriter, "Save Domain to file", 0 )
+        self.addControl( InputControl( 'file', '', 'Suspend file name:', ".*") 
)
+               
+class DomSaveActionTab( ActionTab ):
+
+    def __init__( self, urlWriter ):
+        actions = { "save" : "Save" }
+        ActionTab.__init__( self, actions )
+
+    def op_save( self, request ):
+
+        try:
+            dom_sxp = server.xend_domain_save( config['domid'], config['file'] 
)
+            success = "Your domain was successfully saved.\n"
+        except Exception, e:
+            success = "There was an error saving your domain\n"
+            dom_sxp = str(e)
+                                                                               
        
+        try:
+            dom = int( getVar( 'dom', request ) )
+        except:
+            pass
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/GenTabbed.py
--- a/tools/python/xen/sv/GenTabbed.py  Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/GenTabbed.py  Thu Aug 25 22:53:20 2005
@@ -1,7 +1,6 @@
 import types
 
 from xen.sv.HTMLBase import HTMLBase
-from xen.sv.TabView import TabView
 from xen.sv.util import getVar
 
 class GenTabbed( HTMLBase ):
@@ -12,39 +11,44 @@
         self.tabObjects = tabObjects
         self.urlWriter = urlWriter
         self.title = title
+        
+    def write_BODY( self, request ):
+        if not self.__dict__.has_key( "tab" ):
+            try:
+                self.tab = int( getVar( 'tab', request, 0 ) )
+            except:
+                self.tab = 0
+            
+        request.write( "\n<div class='title'>%s</div>" % self.title )
+        
+        TabView( self.tab, self.tabStrings, self.urlWriter ).write_BODY( 
request )
+        
+        try:
+            request.write( "\n<div class='tab'>" )
+            render_tab = self.tabObjects[ self.tab ]
+            render_tab( self.urlWriter ).write_BODY( request )
+            request.write( "\n</div>" )
+        except Exception, e:
+            request.write( "\n<p>Error Rendering Tab</p>" )
+            request.write( "\n<p>%s</p>" % str( e ) )
 
-    def write_BODY( self, request, urlWriter = None ):
-        try:
-            tab = int( getVar( 'tab', request, 0 ) )
-        except:
-            tab = 0
-            
-        request.write( "<table style='' width='100%' border='0' 
cellspacing='0' cellpadding='0'>" )
-        request.write( "<tr><td>" )
-        request.write( "<p align='center'><u>%s</u></p>" % self.title )
-        
-        TabView( tab, self.tabStrings, self.urlWriter ).write_BODY( request )
-        
-        request.write( "</td></tr><tr><td>" )
+        request.write( "\n<input type=\"hidden\" name=\"tab\" value=\"%d\">" % 
self.tab )
+
+    def perform( self, request ):
+        request.write( "Tab> perform" )
+        request.write( "<br/>op: " + str( getVar( 'op', request ) ) )
+        request.write( "<br/>args: " + str( getVar( 'args', request ) ) )
+        request.write( "<br/>tab: " + str( getVar( 'tab', request ) ) )      
 
         try:
-            render_tab = self.tabObjects[ tab ]
-            render_tab().write_BODY( request )
+            action = getVar( 'op', request, 0 )
+            if action == "tab":
+                self.tab = int( getVar( 'args', request ) )
+            else:
+                this.tab = int( getVar( 'tab', request, 0 ) )
+                self.tabObjects[ self.tab ]( self.urlWriter ).perform( request 
)
         except:
-            request.write( "<p>Error Rendering Tab</p>" )
-       
-        request.write( "</td></tr></table>" )
-       
-    def perform( self, request ):
-        try:
-            tab = int( getVar( 'tab', request, 0 ) )
-        except:
-            tab = 0;
-            
-        op_tab = self.tabObjects[ tab ]
-        
-        if op_tab:
-            op_tab().perform( request )
+            pass
         
 class PreTab( HTMLBase ):
 
@@ -53,12 +57,9 @@
         self.source = source
     
     def write_BODY( self, request ):
-        
-        request.write( "<div style='display: block; overflow: auto; border: 
0px solid black; width: 540px; padding: 5px; z-index:0; align: center'><pre>" )
-        
+        request.write( "\n<pre>" )
         request.write( self.source )
-        
-        request.write( "</pre></div>" )
+        request.write( "\n</pre>" )
 
 class GeneralTab( HTMLBase ):
                         
@@ -69,7 +70,7 @@
                         
     def write_BODY( self, request ): 
         
-        request.write( "<table width='100%' cellspacing='0' cellpadding='0' 
border='0'>" )
+        request.write( "\n<table width='100%' cellspacing='0' cellpadding='0' 
border='0'>" )
         
         def writeAttr( niceName, attr, formatter=None ):
             if type( attr ) is types.TupleType:
@@ -80,7 +81,7 @@
                     temp = formatter( self.dict[ attr ] )
                 else:
                     temp = str( self.dict[ attr ] )
-                request.write( "<tr><td width='50%%'><p>%s:</p></td><td 
width='50%%'><p>%s</p></td></tr>" % ( niceName, temp ) )
+                request.write( "\n<tr><td width='50%%'><p>%s:</p></td><td 
width='50%%'><p>%s</p></td></tr>" % ( niceName, temp ) )
         
         for niceName, attr in self.titles.items():
             writeAttr( niceName, attr )
@@ -89,16 +90,12 @@
 
 class NullTab( HTMLBase ):
     
-    def __init__( self ):
-        HTMLBase.__init__( self )
-        self.title = "Null Tab"
-
-    def __init__( self, title ):
+    def __init__( self, title="Null Tab" ):
         HTMLBase.__init__( self )
         self.title = title
-        
+
     def write_BODY( self, request ):
-        request.write( "<p>%s</p>" % self.title )
+        request.write( "\n<p>%s</p>" % self.title )
 
 class ActionTab( HTMLBase ):
 
@@ -107,29 +104,44 @@
         HTMLBase.__init__( self )
         
     def write_BODY( self, request ):
-        request.write( "<p align='center'><table cellspacing='3' 
cellpadding='2' border='0'><tr>" )
-    
-        for ( command, text ) in self.actions.items():
-            request.write( "<td style='border: 1px solid black; 
background-color: grey' onmouseover='buttonMouseOver( this )' 
onmouseout='buttonMouseOut( this )'>" )
-            request.write( "<p><a href='javascript: doOp( \"%s\" 
);'>%s</a></p></td>" % (command, text) )
- 
-        request.write("</table></p>")        
-        
+        for item in self.actions.items():
+            try:
+                ((op, attr), title) = item
+            except:
+                (op, title) = item
+                attr = ""
+            request.write( "\n<div class='button' onclick=\"doOp2( '%s', '%s' 
)\">%s</a></div>" % (op, attr, title) )
+
 class CompositeTab( HTMLBase ):
 
-    def __init__( self, tabs ):
+    def __init__( self, tabs, urlWriter ):
        HTMLBase.__init__( self )
         self.tabs = tabs
+        self.urlWriter = urlWriter
         
     def write_BODY( self, request ):
        for tab in self.tabs:
-            request.write( "<br/>" )
-            tab().write_BODY( request )
+            tab( self.urlWriter ).write_BODY( request )
             
     def perform( self, request ):
        for tab in self.tabs:
-            tab().perform( request )
-    
-    
-       
-        
+            tab( self.urlWriter ).perform( request )
+
+class TabView( HTMLBase ):
+
+        # tab - int, id into tabs of selected tab
+        # tabs - list of strings, tab names
+        # urlWriter -
+        def __init__( self, tab, tabs, urlWriter ):
+            HTMLBase.__init__(self)
+            self.tab = tab
+            self.tabs = tabs
+            self.urlWriter = urlWriter
+
+        def write_BODY( self, request ):
+            for i in range( len( self.tabs ) ):
+                if self.tab == i:
+                    at = " id='activeTab'"
+                else:
+                    at = ""
+                request.write( "\n<div%s class='tabButton' onclick=\"doOp2( 
'tab', '%d' )\">%s</div>" % ( at, i, self.tabs[ i ] ) )
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/HTMLBase.py
--- a/tools/python/xen/sv/HTMLBase.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/HTMLBase.py   Thu Aug 25 22:53:20 2005
@@ -12,26 +12,17 @@
         return self.render_GET( request )
         
     def render_GET( self, request ):
-        self.write_TOP( request )
-        self.write_BODY( request )
-        self.write_BOTTOM( request )
-        return ''
-                
+        pass
+    
     def write_BODY( self, request ):
-        request.write( "BODY" )
+        pass
         
     def write_TOP( self, request ):
-        request.write( '<html><head><title>Xen</title><link rel="stylesheet" 
type="text/css" href="inc/style.css" />' )
-        request.write( '<script src="inc/script.js"></script>' )
-        request.write( '</head><body>' )
-        request.write('<form method="post" action="%s">' % request.uri)
-
+        pass
+    
     def write_BOTTOM( self, request ):
-        request.write('<input type="hidden" name="op" value="">')
-        request.write('<input type="hidden" name="args" value="">')
-        request.write('</form>')
-        request.write( "</body></html>" )
-
+        pass
+    
     def get_op_method(self, op):
         """Get the method for an operation.
         For operation 'foo' looks for 'op_foo'.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/Main.py
--- a/tools/python/xen/sv/Main.py       Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/Main.py       Thu Aug 25 22:53:20 2005
@@ -1,113 +1,90 @@
+
 from xen.sv.HTMLBase import HTMLBase
-from xen.sv.DomList  import DomList
 from xen.sv.NodeInfo import NodeInfo
 from xen.sv.DomInfo  import DomInfo
 from xen.sv.CreateDomain import CreateDomain
-from xen.sv.MigrateDomain import MigrateDomain
-from xen.sv.SaveDomain import SaveDomain
 from xen.sv.RestoreDomain import RestoreDomain
-
-from xen.xend.XendClient import server
 
 from xen.sv.util import getVar
 
-class Main( HTMLBase ):
-    
-    isLeaf = True
+# adapter to make this all work with mod_python
+# as opposed to Twisted
+# (c) Tom Wilkie 2005
 
-    def __init__( self, urlWriter = None ):
+class Args:
+    def __init__( self, req ):
+        from mod_python.util import FieldStorage
+        self.fieldStorage = FieldStorage( req, True )
+
+    # return a list of values for the given key,
+    # or None if key not there
+    def get( self, var ):
+        retVar = self.fieldStorage.getlist( var )
+        if len( retVar ) == 0:
+            return None
+        else:
+            return retVar
+
+    # return a list of tuples,
+    # (key, value) where value is a list of values
+    def items( self ):
+        result = [];
+        for key in self.fieldStorage.keys():
+            result.append( (key, self.fieldStorage.getlist( key ) ) )
+        return result
+                                                                               
                                                                             
+class TwistedAdapter:
+    def __init__( self, req ):
+        self.args = Args( req )
+        self.uri = req.unparsed_uri
+        self.url = req.uri
+        self.write = req.write
+
+# This is the Main class
+# It peices together all the modules
+
+class Main:
+    def __init__( self ):
         self.modules = { "node": NodeInfo, 
-                         "list": DomList, 
-                         "info": DomInfo,
                          "create": CreateDomain,
-                         "migrate" : MigrateDomain,
-                         "save" : SaveDomain,
-                         "restore" : RestoreDomain }
+                         "restore" : RestoreDomain,
+                         "info": DomInfo }
 
-        # ordered list of module menus to display
-        self.module_menus = [ "node", "create", "migrate", "save",
-                              "restore", "list" ]
-        HTMLBase.__init__(self)
-        
-    def render_POST( self, request ):
-    
-       #decide what module post'd the action
-                
-       args = getVar( 'args', request )
+        self.init_done = False
 
-        mod = getVar( 'mod', request )
-                
-        if not mod is None and args is None:
-            module = self.modules[ mod ]
-            #check module exists
-            if module:
-               module( self.mainUrlWriter ).perform( request )
-        else:
-            self.perform( request )     
-    
-        return self.render_GET( request )
+    def init_modules( self, request ):
+        for moduleName, module in self.modules.iteritems():
+            self.modules[ moduleName ] = module( self.urlWriter( moduleName, 
request.url ) )             
 
-    def mainUrlWriter( self, module ):
-       def fun( f ):
-            return "Main.rpy?mod=%s%s" % ( module, f )
-        return fun    
-        
-    def write_BODY( self, request ):
-    
-        request.write( "\n<table style='border:0px solid black; background: 
url(images/orb_01.jpg) no-repeat' cellspacing='0' cellpadding='0' border='0' 
width='780px' height='536px'>\n" )
-        request.write( "<tr>\n" )
-        request.write( " <td width='15px'> </td>" )
-        request.write( " <td width='175px' align='center' valign'center'>" )
-        request.write( "  <table cellspacing='0' cellpadding='0' border='0' 
width='100%' height='100%'>" )
-        request.write( "   <tr><td height='140px' align='center' 
valign='bottom'><a href='http://www.cl.cam.ac.uk/Research/SRG/netos/xen/'>" )
-        request.write( "   <img src='images/xen.png' width='150' height='75' 
border='0'/></a><br/></td></tr>" )
-        request.write( "   <tr><td height='60px' align='center'><p 
class='small'>SV Web Interface<br/>(C) <a href='mailto:tw275@xxxxxxxxx'>Tom 
Wilkie</a> 2004</p></td></tr>")
-        request.write( "   <tr><td align='center' valign='top'>" )
+    def render_menu( self, request ):
+        if not self.init_done:
+            self.init_modules( request )
+            self.init_done = True
+            
+        for moduleName, module in self.modules.iteritems():
+            module.write_MENU( request )
+            request.write( "\n" )
 
-        for modName in self.module_menus:
-            self.modules[modName]( self.mainUrlWriter( modName ) ).write_MENU( 
request )
-        
-        request.write( "   </td></tr>" )
-        request.write( "  </table>" )
-        request.write( "  " )
-        request.write( " </td>\n" )
-        request.write( " <td width='15px'> </td>" )
-        request.write( " <td width='558px' align='left' valign='top'>" )
-        request.write( "  <table cellspacing='0' cellpadding='0' border='0' 
width='100%' height='100%'>" )
-        request.write( "   <tr><td height='20px'></td></tr>" )
-        request.write( "   <tr><td align='center' valign='top'>" )
-        
-        modName = getVar('mod', request)
-        
-        if modName is None:
+    def render_main( self, request ):
+        if not self.init_done:
+            self.init_modules( request )
+            self.init_done = True
+                                   
+        moduleName = getVar('mod', request)
+        if moduleName not in self.modules:
             request.write( '<p>Please select a module</p>' )
         else:
-            module = self.modules[ modName ]
-            if module:
-               module( self.mainUrlWriter( modName ) ).write_BODY( request )  
-            else:
-               request.write( '<p>Invalid module. Please select another</p>' )
-    
-        request.write( "   </td></tr>" )
-        request.write( "  </table>" )
-        request.write( " </td>\n" )
-        request.write( " <td width='17px'> </td>" )
-        request.write( "</tr>\n" )
+            module = self.modules[ moduleName ]
+            module.write_BODY( request )
+
+    def do_POST( self, request ): 
+        if not self.init_done:
+            self.init_modules( request )
+            self.init_done = True                       
         
-        request.write( "</table>\n" )
-        
-                
-    def op_destroy( self, request ):
-       dom = getVar( 'dom', request )
-        if not dom is None and dom != "0":
-            server.xend_domain_destroy( int( dom ), "halt" ) 
-                 
-    def op_pause( self, request ):
-       dom = getVar( 'dom', request )
-        if not dom is None and dom != "0":
-            server.xend_domain_pause( int( dom ) )      
-    
-    def op_unpause( self, request ):
-       dom = getVar( 'dom', request )
-        if not dom is None and dom != "0":
-            server.xend_domain_unpause( int( dom ) )      
+       moduleName = getVar( 'mod', request )      
+        if moduleName in self.modules:
+            self.modules[ moduleName ].perform( request )
+
+    def urlWriter( self, module, url ):
+        return lambda x: "%s?mod=%s%s" % ( url, module, x )
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/NodeInfo.py
--- a/tools/python/xen/sv/NodeInfo.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/NodeInfo.py   Thu Aug 25 22:53:20 2005
@@ -6,18 +6,18 @@
 class NodeInfo( GenTabbed ):
 
     def __init__( self, urlWriter ):  
-        GenTabbed.__init__( self, "Node Details", urlWriter, [ 'General', 
'Dmesg', ], [ NodeGeneralTab, NodeDmesgTab ] )
+        GenTabbed.__init__( self, "Node Details", urlWriter, [ 'General', 
'Dmesg', 'SXP' ], [ NodeGeneralTab, NodeDmesgTab, NodeSXPTab ] )
     
     def write_MENU( self, request ):
         request.write( "<p class='small'><a href='%s'>Node details</a></p>" % 
self.urlWriter( '' ) )
 
 class NodeGeneralTab( CompositeTab ):
-    def __init__( self ):
-       CompositeTab.__init__( self, [ NodeInfoTab, NodeActionTab ] )        
+    def __init__( self, urlWriter ):
+       CompositeTab.__init__( self, [ NodeInfoTab, NodeActionTab ], urlWriter 
)        
         
 class NodeInfoTab( GeneralTab ):
                         
-    def __init__( self ):
+    def __init__( self, urlWriter ):
          
        nodeInfo = {}
         try:
@@ -41,7 +41,7 @@
 
 class NodeDmesgTab( PreTab ):
 
-    def __init__( self ):
+    def __init__( self, urlWriter ):
        try:
             dmesg = server.xend_node_get_dmesg()
         except:
@@ -50,7 +50,7 @@
   
 class NodeActionTab( ActionTab ):
 
-    def __init__( self ):
+    def __init__( self, urlWriter ):
         ActionTab.__init__( self, { "shutdown" : "shutdown",
                "reboot" : "reboot" } )    
         
@@ -61,3 +61,13 @@
     def op_reboot( self, request ):
         if debug: print ">NodeReboot"
         server.xend_node_reboot()
+
+class NodeSXPTab( PreTab ):
+
+    def __init__( self, urlWriter ):
+        try:
+            nodeSXP = sxp2string( server.xend_node() )
+        except:
+            nodeSXP = 'Error getting node sxp'
+
+        PreTab.__init__( self, nodeSXP )
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/RestoreDomain.py
--- a/tools/python/xen/sv/RestoreDomain.py      Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/RestoreDomain.py      Thu Aug 25 22:53:20 2005
@@ -16,6 +16,8 @@
 
 
 class ChooseRestoreDomain( Sheet ):
+    title = "Configure Restore"
+
     def __init__( self, urlWriter ):
         Sheet.__init__( self, urlWriter, "Configure Restore", 0)
         
@@ -24,6 +26,8 @@
                                        ".*") )
 
 class DoRestore( Sheet ):
+    title = "Restore Done"
+    
     def __init__(self, urlWriter ):
         Sheet.__init__(self, urlWriter, "Restore Done", 1)
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/Wizard.py
--- a/tools/python/xen/sv/Wizard.py     Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/Wizard.py     Thu Aug 25 22:53:20 2005
@@ -1,71 +1,44 @@
 from xen.sv.util import *
 from xen.sv.HTMLBase import HTMLBase
+from xen.sv.GenTabbed import GenTabbed, ActionTab
 from xen.xend import sxp
 
 import re
 
 DEBUG = 0
 
-class Wizard( HTMLBase ):
+class Wizard( GenTabbed ):
 
     def __init__( self, urlWriter, title, sheets ):
-        HTMLBase.__init__( self )
         self.title = title
         self.sheets = sheets
         self.urlWriter = urlWriter
+        self.offset = 0
+        GenTabbed.__init__( self, title, urlWriter, map( lambda x: x.title, 
sheets ), sheets ) 
         
     def write_MENU( self, request ):
        request.write( "<p class='small'><a href='%s'>%s</a></p>" % 
(self.urlWriter( '' ), self.title) ) 
     
     def write_BODY( self, request ):
-        
-       request.write( "<table width='100%' border='0' cellspacing='0' 
cellpadding='0'><tr><td>" )
-        request.write( "<p align='center'><u>%s</u></p></td></tr><tr><td>" % 
self.title )
-        
-        currSheet = getVar( 'sheet', request )
-    
-        if not currSheet is None:
-            currSheet = int( currSheet )
-        else:
-            currSheet = 0
-            
-        sheet = self.sheets[ currSheet ]( self.urlWriter )
-        
-        err = not sheet.validate( request )
-        
-        if not err:    
-            op = getVar( 'op', request )
-        
-            if op == 'next':
-               currSheet += 1
-            elif op == 'prev':
-               currSheet -= 1
-             
-            sheet = self.sheets[ currSheet ]( self.urlWriter )
-        
-        if getVar( 'visited-sheet%s' % currSheet, request ):
-            sheet.write_BODY( request, err )
-        else:
-            sheet.write_BODY( request, False )
-
-        
-        request.write( "</td></tr><tr><td><table width='100%' border='0' 
cellspacing='0' cellpadding='0'><tr>" )
-        request.write( "<td width='80%'></td><td width='20%' align='center'><p 
align='center'>" )
-       if currSheet > 0:
-                   request.write( "<img src='images/previous.png' 
onclick='doOp( \"prev\" )' onmouseover='update( \"wizText\", \"Previous\" )' 
onmouseout='update( \"wizText\", \" \" )'> " )
-        if currSheet < ( len( self.sheets ) - 2 ):        
-            request.write( "<img src='images/next.png' onclick='doOp( \"next\" 
)' onmouseover='update( \"wizText\", \"Next\" )' onmouseout='update( 
\"wizText\", \" \" )'>" )
-        elif currSheet == ( len( self.sheets ) - 2 ):
-            request.write( "<img src='images/finish.png' onclick='doOp( 
\"next\" )' onmouseover='update( \"wizText\", \"Finish\" )' onmouseout='update( 
\"wizText\", \" \" )'>" )
-        request.write( "</p><p align='center'><span 
id='wizText'></span></p></td></tr></table>" )
-        request.write( "</td></tr></table>" )
-        
-    def op_next( self, request ):
-       pass
-        
-    def op_prev( self, request ):
-       pass
-        
+        GenTabbed.write_BODY( self, request )
+        actionTab = ActionTab( { ("tab", str(self.tab-1)) : "< Prev", ("tab", 
str(self.tab+1)) : "Next >", "finish" : "Finish" } )
+        actionTab.write_BODY( request )
+
+    def perform( self, request ):
+        try:
+            action = getVar( 'op', request, 0 )
+            if action == "tab":
+                self.tab = int( getVar( 'args', request ) )
+                oldtab = int( getVar( 'tab', request ) )
+                if not self.tabObjects[ oldtab ]( self.urlWriter ).validate( 
request ):
+                    self.tab = oldtab
+            else:
+                self.tab = int( getVar( 'tab', request, 0 ) )
+                self.tabObjects[ self.tab ]( self.urlWriter ).perform( request 
)
+                getattr( self, "op_" +  getVar( "op", request ), None )( 
request )
+        except:
+            pass
+            
     def op_finish( self, request ):
        pass  
         
@@ -80,7 +53,7 @@
         self.passback = None
         
     def parseForm( self, request ):
-       do_not_parse = [ 'mod', 'op', 'sheet', 'passback' ] 
+       do_not_parse = [ 'mod', 'op', 'passback' ] 
     
        passed_back = request.args
         
@@ -103,7 +76,7 @@
         
         if DEBUG: print self.passback
         
-    def write_BODY( self, request, err ):
+    def write_BODY( self, request ):
     
        if not self.passback: self.parseForm( request )
         
@@ -115,14 +88,13 @@
         
        for (feild, control) in self.feilds:
             control.write_Control( request, previous_values.get( feild ) )
-            if err and not control.validate( previous_values.get( feild ) ):
+            if previous_values.get( feild ) is not None and not 
control.validate( previous_values.get( feild ) ):
                control.write_Help( request )
             
         request.write( "</table>" )
             
         request.write( "<input type='hidden' name='passback' 
value=\"%s\"></p>" % self.passback )
-        request.write( "<input type='hidden' name='sheet' value='%s'></p>" % 
self.location )
-        request.write( "<input type='hidden' name='visited-sheet%s' 
value='True'></p>" % self.location )
+        #request.write( "<input type='hidden' name='visited-sheet%s' 
value='True'></p>" % self.location )
                 
     def addControl( self, control ):
        self.feilds.append( [ control.getName(), control ] )
@@ -133,7 +105,7 @@
             
        check = True
         
-        previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the 
hash for quick reference
+        previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the 
map for quick reference
        if DEBUG: print previous_values
       
        for (feild, control) in self.feilds:
@@ -258,12 +230,16 @@
         
     def write_Control( self, request, persistedValue ):
         request.write( "<tr><td width='50%%'><p>%s</p></td><td width='50%%'>" 
% self.humanText )
+
+        #request.write( str( persistedValue ) )
+
+        #TODO: Theres a problem with this: it doesn't persist an untick, 
because the browsers don't pass it back. Need a fix...
         
         if persistedValue == 'True':
            request.write( "<input type='checkbox' name='%s' value='True' 
checked>" % self.getName() )
         else:
            request.write( "<input type='checkbox' name='%s' value='True'>" % 
self.getName() )
             
-        request.write( "</select></td></tr>" )
+        request.write( "</td></tr>" )
 
       
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/sv/util.py
--- a/tools/python/xen/sv/util.py       Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/sv/util.py       Thu Aug 25 22:53:20 2005
@@ -4,7 +4,7 @@
 
 import types
 
-def getDomInfoHash( domain ):
+def getDomInfo( domain ):
     domInfoHash = {}
     try:
         domInfoHash = sxp2hash( server.xend_domain( domain ) )
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/SrvBase.py
--- a/tools/python/xen/web/SrvBase.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/SrvBase.py   Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 import types
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/SrvDir.py
--- a/tools/python/xen/web/SrvDir.py    Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/SrvDir.py    Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 import types
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/__init__.py
--- a/tools/python/xen/web/__init__.py  Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/__init__.py  Thu Aug 25 22:53:20 2005
@@ -1,1 +1,17 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/connection.py
--- a/tools/python/xen/web/connection.py        Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/connection.py        Thu Aug 25 22:53:20 2005
@@ -1,7 +1,26 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 import sys
 import threading
 import select
 import socket
+import fcntl
 
 from errno import EAGAIN, EINTR, EWOULDBLOCK
 
@@ -133,6 +152,9 @@
 
     def createSocket(self):
         raise NotImplementedError()
+
+    def setCloExec(self):
+        fcntl.fcntl(self.sock.fileno(), fcntl.F_SETFD, fcntl.FD_CLOEXEC)
 
     def acceptConnection(self, sock, protocol, addr):
         return SocketServerConnection(sock, protocol, addr, self)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/httpserver.py
--- a/tools/python/xen/web/httpserver.py        Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/httpserver.py        Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 import threading
 
 import string
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/protocol.py
--- a/tools/python/xen/web/protocol.py  Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/protocol.py  Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 class Factory:
     """Generic protocol factory.
     """
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/reactor.py
--- a/tools/python/xen/web/reactor.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/reactor.py   Thu Aug 25 22:53:20 2005
@@ -1,2 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 from unix import listenUNIX, connectUNIX
-from tcp import listenTCP, connectTCP
+from tcp import listenTCP, connectTCP, SetCloExec
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/resource.py
--- a/tools/python/xen/web/resource.py  Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/resource.py  Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 import http
 
 def findResource(resource, request):
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/static.py
--- a/tools/python/xen/web/static.py    Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/static.py    Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 import os
 
 from resource import Resource
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/tcp.py
--- a/tools/python/xen/web/tcp.py       Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/tcp.py       Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 import sys
 import socket
 import types
@@ -68,6 +85,9 @@
     l.startListening()
     return l
 
+def SetCloExec(SocketListener):
+    SocketListener.SetCloExec()
+
 def connectTCP(host, port, factory, timeout=None, bindAddress=None):
     c = TCPConnector(host, port, factory, timeout=timeout, 
bindAddress=bindAddress)
     c.connect()
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/web/unix.py
--- a/tools/python/xen/web/unix.py      Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/web/unix.py      Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 import sys
 import socket
 import os
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/Args.py
--- a/tools/python/xen/xend/Args.py     Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/Args.py     Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 import types
 import StringIO
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/EventServer.py
--- a/tools/python/xen/xend/EventServer.py      Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/EventServer.py      Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 """Simple publish/subscribe event server.
 
 """
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/PrettyPrint.py
--- a/tools/python/xen/xend/PrettyPrint.py      Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/PrettyPrint.py      Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 """General pretty-printer, including support for SXP.
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/Vifctl.py
--- a/tools/python/xen/xend/Vifctl.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/Vifctl.py   Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 """Xend interface to networking control scripts.
 """
 import os
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendCheckpoint.py   Thu Aug 25 22:53:20 2005
@@ -6,6 +6,7 @@
 
 import errno
 import os
+import re
 import select
 import sxp
 from string import join
@@ -64,6 +65,13 @@
                 if l.rstrip() == "suspend":
                     log.info("suspending %d" % dominfo.id)
                     xd.domain_shutdown(dominfo.id, reason='suspend')
+                    if dominfo.store_channel:
+                        try:
+                            dominfo.db.releaseDomain(dominfo.id)
+                        except Exception, ex:
+                            log.warning("error in domain release on xenstore: 
%s",
+                                        ex)
+                            pass
                     dominfo.state_wait("suspended")
                     log.info("suspend %d done" % dominfo.id)
                     child.tochild.write("done\n")
@@ -76,6 +84,11 @@
     if child.wait() != 0:
         raise XendError("xc_save failed: %s" % lasterr)
 
+    if dominfo.store_channel:
+        dominfo.store_channel.close()
+        dominfo.db['store_channel'].delete()
+        dominfo.db.saveDB(save=True)
+        dominfo.store_channel = None
     xd.domain_destroy(dominfo.id)
     return None
 
@@ -107,8 +120,13 @@
         raise XendError(
             "not a valid guest state file: pfn count out of range")
 
+    if dominfo.store_channel:
+        evtchn = dominfo.store_channel.port2
+    else:
+        evtchn = 0
+
     cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
-           str(dominfo.id), str(nr_pfns)]
+           str(dominfo.id), str(nr_pfns), str(evtchn)]
     log.info("[xc_restore] " + join(cmd))
     child = xPopen3(cmd, True, -1, [fd, xc.handle()])
     child.tochild.close()
@@ -128,7 +146,21 @@
                 lasterr = l.rstrip()
             if fd == child.fromchild.fileno():
                 l = child.fromchild.readline()
-                log.info(l.rstrip())
+                while l:
+                    m = re.match(r"^(store-mfn) (\d+)\n$", l)
+                    if m:
+                        if dominfo.store_channel:
+                            dominfo.store_mfn = int(m.group(2))
+                            if dominfo.store_mfn >= 0:
+                                dominfo.db.introduceDomain(dominfo.id,
+                                                           dominfo.store_mfn,
+                                                           
dominfo.store_channel)
+                            dominfo.exportToDB(save=True, sync=True)
+                    log.info(l.rstrip())
+                    try:
+                        l = child.fromchild.readline()
+                    except:
+                        l = None
         if filter(lambda (fd, event): event & select.POLLHUP, r):
             break
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendClient.py
--- a/tools/python/xen/xend/XendClient.py       Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendClient.py       Thu Aug 25 22:53:20 2005
@@ -1,13 +1,27 @@
 #!/usr/bin/env python
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 """Client API for the HTTP interface on xend.
 Callable as a script - see main().
 Supports inet or unix connection to xend.
 
 This API is the 'control-plane' for xend.
-The 'data-plane' is done separately. For example, consoles
-are accessed via sockets on xend, but the list of consoles
-is accessible via this API.
+The 'data-plane' is done separately.
 """
 import os
 import sys
@@ -145,9 +159,6 @@
 
     def domainurl(self, id=''):
         return self.url.relative('domain/' + str(id))
-
-    def consoleurl(self, id=''):
-        return self.url.relative('console/' + str(id))
 
     def deviceurl(self, id=''):
         return self.url.relative('device/' + str(id))
@@ -213,11 +224,15 @@
         return self.xendPost(self.domainurl(id),
                              {'op'      : 'pause' })
 
-    def xend_domain_shutdown(self, id, reason, key=0):
+    def xend_domain_shutdown(self, id, reason):
         return self.xendPost(self.domainurl(id),
                              {'op'      : 'shutdown',
-                              'reason'  : reason,
-                              'key'     : key })
+                              'reason'  : reason})
+
+    def xend_domain_sysrq(self, id, key):
+        return self.xendPost(self.domainurl(id),
+                             {'op'      : 'sysrq',
+                              'key'     : key})
 
     def xend_domain_destroy(self, id, reason):
         return self.xendPost(self.domainurl(id),
@@ -317,16 +332,6 @@
                              {'op'      : 'device_configure',
                               'idx'     : idx,
                               'config'  : fileof(config) })
-
-    def xend_consoles(self):
-        return self.xendGet(self.consoleurl())
-
-    def xend_console(self, id):
-        return self.xendGet(self.consoleurl(id))
-
-    def xend_console_disconnect(self, id):
-        return self.xendPost(self.consoleurl(id),
-                             {'op'      : 'disconnect'})
 
     def xend_vnets(self):
         return self.xendGet(self.vneturl())
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendDB.py
--- a/tools/python/xen/xend/XendDB.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendDB.py   Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 import os
 import os.path
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendDmesg.py
--- a/tools/python/xen/xend/XendDmesg.py        Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendDmesg.py        Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
- # Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 """Get dmesg output for this node.
 """
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendDomain.py       Thu Aug 25 22:53:20 2005
@@ -1,5 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
 # Copyright (C) 2005 Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>
+#============================================================================
 
 """Handler for domain operations.
  Nothing here is persistent (across reboots).
@@ -305,8 +320,7 @@
         @param vmconfig: vm configuration
         """
         config = sxp.child_value(vmconfig, 'config')
-        uuid = sxp.child_value(vmconfig, 'uuid')
-        dominfo = XendDomainInfo.restore(self.dbmap, config, uuid=uuid)
+        dominfo = XendDomainInfo.restore(self.dbmap, config)
         return dominfo
 
     def domain_restore(self, src, progress=False):
@@ -386,7 +400,7 @@
         except Exception, ex:
             raise XendError(str(ex))
     
-    def domain_shutdown(self, id, reason='poweroff', key=0):
+    def domain_shutdown(self, id, reason='poweroff'):
         """Shutdown domain (nicely).
          - poweroff: restart according to exit code and restart mode
          - reboot:   restart on exit
@@ -402,9 +416,16 @@
         eserver.inject('xend.domain.shutdown', [dominfo.name, dominfo.id, 
reason])
         if reason == 'halt':
             reason = 'poweroff'
-        val = dominfo.shutdown(reason, key=key)
-        if not reason in ['suspend', 'sysrq']:
+        val = dominfo.shutdown(reason)
+        if not reason in ['suspend']:
             self.domain_shutdowns()
+        return val
+
+    def domain_sysrq(self, id, key):
+        """Send a SysRq to a domain
+        """
+        dominfo = self.domain_lookup(id)
+        val = dominfo.send_sysrq(key)
         return val
 
     def domain_shutdowns(self):
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py   Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 """Representation of a single domain.
 Includes support for domain construction, using
@@ -8,7 +23,7 @@
 
 """
 
-import string
+import string, re
 import os
 import time
 import threading
@@ -21,8 +36,10 @@
 from xen.xend.server import SrvDaemon; xend = SrvDaemon.instance()
 from xen.xend.server import messages
 from xen.xend.server.channel import EventChannel, channelFactory
+from xen.util.blkif import blkdev_name_to_number, expand_dev_name
 
 from xen.xend import sxp
+from xen.xend import Blkctl
 from xen.xend.PrettyPrint import prettyprintstring
 from xen.xend.XendBootloader import bootloader
 from xen.xend.XendLogging import log
@@ -52,15 +69,6 @@
     DOMAIN_CRASH   : "crash",
     }
 
-"""Map shutdown reasons to the message type to use.
-"""
-shutdown_messages = {
-    'poweroff' : 'shutdown_poweroff_t',
-    'reboot'   : 'shutdown_reboot_t',
-    'suspend'  : 'shutdown_suspend_t',
-    'sysrq'    : 'shutdown_sysrq_t',
-    }
-
 RESTART_ALWAYS   = 'always'
 RESTART_ONREBOOT = 'onreboot'
 RESTART_NEVER    = 'never'
@@ -132,7 +140,7 @@
     if domlist and dom == domlist[0]['dom']:
         return domlist[0]
     return None
-    
+
 class XendDomainInfo:
     """Virtual machine object."""
 
@@ -152,8 +160,6 @@
         vm = cls(db)
         vm.construct(config)
         vm.saveToDB(sync=True)
-        # Flush info to xenstore immediately
-        vm.exportToDB()
 
         return vm
 
@@ -191,19 +197,22 @@
 
     recreate = classmethod(recreate)
 
-    def restore(cls, parentdb, config, uuid):
+    def restore(cls, parentdb, config, uuid=None):
         """Create a domain and a VM object to do a restore.
 
         @param parentdb:  parent db
         @param config:    domain configuration
         @param uuid:      uuid to use
         """
+        if not uuid:
+            uuid = getUuid()
         db = parentdb.addChild(uuid)
         vm = cls(db)
         ssidref = int(sxp.child_value(config, 'ssidref'))
         log.debug('restoring with ssidref='+str(ssidref))
         id = xc.domain_create(ssidref = ssidref)
         vm.setdom(id)
+        vm.clear_shutdown()
         try:
             vm.restore = True
             vm.construct(config)
@@ -227,6 +236,7 @@
         DBVar('restart_time',  ty='float'),
         DBVar('restart_count', ty='int'),
         DBVar('target',        ty='long', path="memory/target"),
+        DBVar('device_model_pid', ty='int'),
         ]
     
     def __init__(self, db):
@@ -255,6 +265,8 @@
         self.info = None
         self.blkif_backend = False
         self.netif_backend = False
+        self.netif_idx = 0
+        
         #todo: state: running, suspended
         self.state = STATE_VM_OK
         self.state_updated = threading.Condition()
@@ -268,9 +280,10 @@
         self.restart_time = None
         self.restart_count = 0
         
-        self.console_port = None
         self.vcpus = 1
+        self.vcpusdb = {}
         self.bootloader = None
+        self.device_model_pid = 0
 
     def setDB(self, db):
         self.db = db
@@ -344,9 +357,6 @@
         s += " name=" + self.name
         s += " memory=" + str(self.memory)
         s += " ssidref=" + str(self.ssidref)
-        console = self.getConsole()
-        if console:
-            s += " console=" + str(console.console_port)
         s += ">"
         return s
 
@@ -374,6 +384,71 @@
         return ctrl
 
     def createDevice(self, type, devconfig, change=False):
+        if type == 'vbd':
+            typedev = sxp.child_value(devconfig, 'dev')
+            if re.match('^ioemu:', typedev):
+               return;
+            backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
+
+            devnum = blkdev_name_to_number(sxp.child_value(devconfig, 'dev'))
+
+            # create backend db
+            backdb = backdom.db.addChild("/backend/%s/%s/%d" %
+                                         (type, self.uuid, devnum))
+
+            # create frontend db
+            db = self.db.addChild("/device/%s/%d" % (type, devnum))
+            
+            db['virtual-device'] = "%i" % devnum
+            #db['backend'] = sxp.child_value(devconfig, 'backend', '0')
+            db['backend'] = backdb.getPath()
+            db['backend-id'] = "%i" % backdom.id
+
+            backdb['frontend'] = db.getPath()
+            (type, params) = string.split(sxp.child_value(devconfig, 'uname'), 
':', 1)
+            node = Blkctl.block('bind', type, params)
+            backdb['frontend-id'] = "%i" % self.id
+            backdb['physical-device'] = "%li" % blkdev_name_to_number(node)
+            backdb.saveDB(save=True)
+
+            # Ok, super gross, this really doesn't belong in the frontend db...
+            db['type'] = type
+            db['node'] = node
+            db['params'] = params
+            db.saveDB(save=True)
+            
+            return
+
+        if type == 'vif':
+            backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
+
+            log.error(devconfig)
+            
+            devnum = self.netif_idx
+            self.netif_idx += 1
+
+            # create backend db
+            backdb = backdom.db.addChild("/backend/%s/%s/%d" %
+                                         (type, self.uuid, devnum))
+
+            # create frontend db
+            db = self.db.addChild("/device/%s/%d" % (type, devnum))
+            
+            backdb['frontend'] = db.getPath()
+            backdb['frontend-id'] = "%i" % self.id
+            backdb['handle'] = "%i" % devnum
+            backdb.saveDB(save=True)
+
+            db['backend'] = backdb.getPath()
+            db['backend-id'] = "%i" % backdom.id
+            db['handle'] = "%i" % devnum
+            log.error(sxp.child_value(devconfig, 'mac'))
+            db['mac'] = sxp.child_value(devconfig, 'mac')
+
+            db.saveDB(save=True)
+
+            return
+        
         ctrl = self.findDeviceController(type)
         return ctrl.createDevice(devconfig, recreate=self.recreate,
                                  change=change)
@@ -443,9 +518,6 @@
             sxpr.append(self.store_channel.sxpr())
         if self.store_mfn:
             sxpr.append(['store_mfn', self.store_mfn])
-        console = self.getConsole()
-        if console:
-            sxpr.append(console.sxpr())
 
         if self.restart_count:
             sxpr.append(['restart_count', self.restart_count])
@@ -459,6 +531,8 @@
             sxpr.append(devs)
         if self.config:
             sxpr.append(['config', self.config])
+        if self.device_model_pid:
+            sxpr.append(['device_model_pid',self.device_model_pid])
         return sxpr
 
     def sxpr_devices(self):
@@ -519,7 +593,6 @@
 
             # Create domain devices.
             self.configure_backends()
-            self.configure_console()
             self.configure_restart()
             self.construct_image()
             self.configure()
@@ -558,6 +631,16 @@
         except:
             raise VmError('invalid vcpus value')
 
+    def exportVCPUSToDB(self, vcpus):
+        for v in range(0,vcpus):
+            path = "/cpu/%d"%(v)
+            if not self.vcpusdb.has_key(path):
+                self.vcpusdb[path] = self.db.addChild(path)
+            db = self.vcpusdb[path]
+            log.debug("writing key availability=online to path %s in 
store"%(path))
+            db['availability'] = "online"
+            db.saveDB(save=True)
+
     def init_image(self):
         """Create boot image handler for the domain.
         """
@@ -572,15 +655,17 @@
         self.create_channel()
         self.image.createImage()
         self.exportToDB()
-        if self.store_channel:
+        if self.store_channel and self.store_mfn >= 0:
             self.db.introduceDomain(self.id,
                                     self.store_mfn,
                                     self.store_channel)
+        # get the configured value of vcpus and update store
+        self.exportVCPUSToDB(self.vcpus)
 
     def delete(self):
         """Delete the vm's db.
         """
-        if self.dom_get(self.id):
+        if dom_get(self.id):
             return
         self.id = None
         self.saveToDB(sync=True)
@@ -629,6 +714,7 @@
                 pass
         if self.image:
             try:
+                self.device_model_pid = 0
                 self.image.destroy()
                 self.image = None
             except:
@@ -654,6 +740,21 @@
         for ctrl in self.getDeviceControllers():
             if ctrl.isDestroyed(): continue
             ctrl.destroyController(reboot=reboot)
+        ddb = self.db.addChild("/device")
+        for type in ddb.keys():
+            if type == 'vbd':
+                typedb = ddb.addChild(type)
+                for dev in typedb.keys():
+                    devdb = typedb.addChild(str(dev))
+                    Blkctl.block('unbind', devdb['type'].getData(),
+                                 devdb['node'].getData())
+                    typedb[dev].delete()
+                typedb.saveDB(save=True)
+            if type == 'vif':
+                typedb = ddb.addChild(type)
+                for dev in typedb.keys():
+                    typedb[dev].delete()
+                typedb.saveDB(save=True)
 
     def show(self):
         """Print virtual machine info.
@@ -730,7 +831,8 @@
                 ctrl.initController(reboot=True)
         else:
             self.create_configured_devices()
-        self.image.createDeviceModel()
+        if not self.device_model_pid:
+            self.device_model_pid = self.image.createDeviceModel()
 
     def device_create(self, dev_config):
         """Create a new device.
@@ -738,7 +840,7 @@
         @param dev_config: device configuration
         """
         dev_type = sxp.name(dev_config)
-        dev = self.createDevice(self, dev_config, change=True)
+        dev = self.createDevice(dev_type, dev_config, change=True)
         self.config.append(['device', dev.getConfig()])
         return dev.sxpr()
 
@@ -785,17 +887,6 @@
         """
         self.bootloader = sxp.child_value(self.config, "bootloader")
 
-    def configure_console(self):
-        """Configure the vm console port.
-        """
-        x = sxp.child_value(self.config, 'console')
-        if x:
-            try:
-                port = int(x)
-            except:
-                raise VmError('invalid console:' + str(x))
-            self.console_port = port
-
     def configure_restart(self):
         """Configure the vm restart mode.
         """
@@ -855,7 +946,7 @@
 
     def restart(self):
         """Restart the domain after it has exited.
-        Reuses the domain id and console port.
+        Reuses the domain id
 
         """
         try:
@@ -910,24 +1001,8 @@
 
         """
         self.configure_fields()
-        self.create_console()
         self.create_devices()
         self.create_blkif()
-
-    def create_console(self):
-        console = self.getConsole()
-        if not console:
-            config = ['console']
-            if self.console_port:
-                config.append(['console_port', self.console_port])
-            console = self.createDevice('console', config)
-        return console
-
-    def getConsole(self):
-        console_ctrl = self.getDeviceController("console", error=False)
-        if console_ctrl:
-            return console_ctrl.getDevice(0)
-        return None
 
     def create_blkif(self):
         """Create the block device interface (blkif) for the vm.
@@ -935,6 +1010,7 @@
         at creation time, for example when it uses NFS root.
 
         """
+        return
         blkif = self.getDeviceController("vbd", error=False)
         if not blkif:
             blkif = self.createDeviceController("vbd")
@@ -967,28 +1043,39 @@
     def vcpu_hotplug(self, vcpu, state):
         """Disable or enable VCPU in domain.
         """
-        log.error("Holly Shit! %d %d\n" % (vcpu, state))
-        if self.channel:
+        db = ""
+        try:
+            db = self.vcpusdb['/cpu/%d'%(vcpu)]
+        except:
+            log.error("Invalid VCPU")
+            return
+
+        if self.store_channel:
             if int(state) == 0:
-                msg = messages.packMsg('vcpu_hotplug_off_t', { 'vcpu' : vcpu} )
+                db['availability'] = "offline"
             else:
-                msg = messages.packMsg('vcpu_hotplug_on_t',  { 'vcpu' : vcpu} )
-
-            self.channel.writeRequest(msg)
-
-    def shutdown(self, reason, key=0):
-        msgtype = shutdown_messages.get(reason)
-        if not msgtype:
+                db['availability'] = "online"
+
+        db.saveDB(save=True)
+
+    def shutdown(self, reason):
+        if not reason in shutdown_reasons.values():
             raise XendError('invalid reason:' + reason)
-        extra = {}
-        if reason == 'sysrq':
-            extra['key'] = key
-        if self.channel:
-            msg = messages.packMsg(msgtype, extra)
-            self.channel.writeRequest(msg)
-        if not reason in ['suspend', 'sysrq']:
-            self.shutdown_pending = {'start':time.time(), 'reason':reason,
-                                     'key':key}
+        db = self.db.addChild("/control");
+        db['shutdown'] = reason;
+        db.saveDB(save=True);
+        if not reason in ['suspend']:
+            self.shutdown_pending = {'start':time.time(), 'reason':reason}
+
+    def clear_shutdown(self):
+        db = self.db.addChild("/control")
+        db['shutdown'] = ""
+        db.saveDB(save=True)
+
+    def send_sysrq(self, key=0):
+        db = self.db.addChild("/control");
+        db['sysrq'] = '%c' % key;
+        db.saveDB(save=True);        
 
     def shutdown_time_left(self, timeout):
         if not self.shutdown_pending:
@@ -1003,6 +1090,8 @@
             self.db.introduceDomain(self.id, self.store_mfn,
                                     self.store_channel)
         self.exportToDB(save=True, sync=True)
+        # get run-time value of vcpus and update store
+        self.exportVCPUSToDB(dom_get(self.id)['vcpus'])
 
 def vm_field_ignore(vm, config, val, index):
     """Dummy config field handler used for fields with built-in handling.
@@ -1048,7 +1137,6 @@
 add_config_handler('ssidref',    vm_field_ignore)
 add_config_handler('cpu',        vm_field_ignore)
 add_config_handler('cpu_weight', vm_field_ignore)
-add_config_handler('console',    vm_field_ignore)
 add_config_handler('restart',    vm_field_ignore)
 add_config_handler('image',      vm_field_ignore)
 add_config_handler('device',     vm_field_ignore)
@@ -1062,9 +1150,6 @@
 #============================================================================
 # Register device controllers and their device config types.
 
-from server import console
-controller.addDevControllerClass("console", console.ConsoleController)
-
 from server import blkif
 controller.addDevControllerClass("vbd", blkif.BlkifController)
 add_device_handler("vbd", "vbd")
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendError.py
--- a/tools/python/xen/xend/XendError.py        Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendError.py        Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 class XendError(ValueError):
     
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendLogging.py
--- a/tools/python/xen/xend/XendLogging.py      Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendLogging.py      Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 import types
 import logging
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendNode.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 """Handler for node operations.
  Has some persistent state:
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendProtocol.py
--- a/tools/python/xen/xend/XendProtocol.py     Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendProtocol.py     Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 import socket
 import httplib
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendRoot.py
--- a/tools/python/xen/xend/XendRoot.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendRoot.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 """Xend root class.
 Creates the event server and handles configuration.
@@ -69,12 +84,6 @@
 
     """Default path the unix-domain server listens at."""
     xend_unix_path_default = '/var/lib/xend/xend-socket'
-
-    """Default interface address xend listens at for consoles."""
-    console_address_default   = 'localhost'
-
-    """Default port xend serves consoles at. """
-    console_port_base_default = '9600'
 
     dom0_min_mem_default = '0'
 
@@ -302,24 +311,11 @@
         """
         return self.get_config_value("xend-unix-path", 
self.xend_unix_path_default)
 
-    def get_console_address(self):
-        """Get the address xend listens at for its console ports.
-        This defaults to 'localhost', allowing only the localhost to connect
-        to the console ports.  Setting this to the empty string, allows all
-        hosts to connect.
-        """
-        return self.get_config_value('console-address', 
self.console_address_default)
-
-    def get_console_port_base(self):
-        """Get the base port number used to generate console ports for domains.
-        """
-        return self.get_config_int('console-port-base', 
self.console_port_base_default)
-
     def get_block_script(self, type):
         return self.get_config_value('block-%s' % type, '')
 
     def get_network_script(self):
-        return self.get_config_value('network-script', 'network')
+        return self.get_config_value('network-script', '')
 
     def get_enable_dump(self):
         return self.get_config_bool('enable-dump', 'no')
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/XendVnet.py
--- a/tools/python/xen/xend/XendVnet.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/XendVnet.py Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 """Handler for vnet operations.
 """
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/encode.py
--- a/tools/python/xen/xend/encode.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/encode.py   Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 """Encoding for arguments to HTTP calls.
    Uses the url-encoding with MIME type 'application/x-www-form-urlencoded'
    if the data does not include files. Otherwise it uses the encoding with
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/image.py    Thu Aug 25 22:53:20 2005
@@ -1,4 +1,22 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 import os, string
+import re
 
 import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
 from xen.xend import sxp
@@ -245,7 +263,7 @@
     memmap = None
     memmap_value = []
     device_channel = None
-
+    pid = 0
     def createImage(self):
         """Create a VM for the VMX environment.
         """
@@ -257,14 +275,24 @@
         # Create an event channel
         self.device_channel = channel.eventChannel(0, self.vm.getDomain())
         log.info("VMX device model port: %d", self.device_channel.port2)
-        return xc.vmx_build(dom            = self.vm.getDomain(),
+        if self.vm.store_channel:
+            store_evtchn = self.vm.store_channel.port2
+        else:
+            store_evtchn = 0
+        ret = xc.vmx_build(dom            = self.vm.getDomain(),
                             image          = self.kernel,
                             control_evtchn = self.device_channel.port2,
+                            store_evtchn   = store_evtchn,
                             memsize        = self.vm.memory,
                             memmap         = self.memmap_value,
                             cmdline        = self.cmdline,
                             ramdisk        = self.ramdisk,
-                            flags          = self.flags)
+                            flags          = self.flags,
+                            vcpus          = self.vm.vcpus)
+        if isinstance(ret, dict):
+            self.vm.store_mfn = ret.get('store_mfn')
+            return 0
+        return ret
 
     def parseMemmap(self):
         self.memmap = sxp.child_value(self.vm.config, "memmap")
@@ -278,7 +306,7 @@
     # xm config file
     def parseDeviceModelArgs(self):
        dmargs = [ 'cdrom', 'boot', 'fda', 'fdb',
-                   'localtime', 'serial', 'macaddr', 'stdvga', 'isa' ] 
+                   'localtime', 'serial', 'stdvga', 'isa' ] 
        ret = []
        for a in dmargs:
                    v = sxp.child_value(self.vm.config, a)
@@ -295,20 +323,32 @@
                ret.append("-%s" % a)
                ret.append("%s" % v)
 
-        # Handle hd img related options
+        # Handle disk/network related options
         devices = sxp.children(self.vm.config, 'device')
         for device in devices:
-            vbdinfo = sxp.child(device, 'vbd')
-            if not vbdinfo:
-                raise VmError("vmx: missing vbd configuration")
-            uname = sxp.child_value(vbdinfo, 'uname')
-            vbddev = sxp.child_value(vbdinfo, 'dev')
-            (vbdtype, vbdparam) = string.split(uname, ':', 1)
-            vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
-            if vbdtype != 'file' or vbddev not in vbddev_list:
-                raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
-            ret.append("-%s" % vbddev)
-            ret.append("%s" % vbdparam)
+            name = sxp.name(sxp.child0(device))
+            if name == 'vbd':
+               vbdinfo = sxp.child(device, 'vbd')
+               uname = sxp.child_value(vbdinfo, 'uname')
+               typedev = sxp.child_value(vbdinfo, 'dev')
+               (vbdtype, vbdparam) = string.split(uname, ':', 1)
+               if re.match('^ioemu:', typedev):
+                  (emtype, vbddev) = string.split(typedev, ':', 1)
+               else:
+                  emtype = 'vbd'
+                  vbddev = typedev
+               if emtype != 'ioemu':
+                  continue;
+               vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
+               if vbddev not in vbddev_list:
+                  raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
+               ret.append("-%s" % vbddev)
+               ret.append("%s" % vbdparam)
+            if name == 'vif':
+               vifinfo = sxp.child(device, 'vif')
+               mac = sxp.child_value(vifinfo, 'mac')
+               ret.append("-macaddr")
+               ret.append("%s" % mac)
 
        # Handle graphics library related options
        vnc = sxp.child_value(self.vm.config, 'vnc')
@@ -347,6 +387,7 @@
         log.info("spawning device models: %s %s", device_model, args)
         self.pid = os.spawnve(os.P_NOWAIT, device_model, args, env)
         log.info("device model pid: %d", self.pid)
+        return self.pid
 
     def vncParams(self):
         # see if a vncviewer was specified
@@ -366,11 +407,16 @@
     def destroy(self):
         channel.eventChannelClose(self.device_channel)
         import signal
+        if not self.pid:
+            self.pid = self.vm.device_model_pid
         os.kill(self.pid, signal.SIGKILL)
         (pid, status) = os.waitpid(self.pid, 0)
+        self.pid = 0
 
     def getDomainMemory(self, mem_mb):
-        return (mem_mb * 1024) + self.getPageTableSize(mem_mb)
+        # for ioreq_t and xenstore
+        static_pages = 2
+        return (mem_mb * 1024) + self.getPageTableSize(mem_mb) + 4 * 
static_pages
             
     def getPageTableSize(self, mem_mb):
         """Return the size of memory needed for 1:1 page tables for physical
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/scheduler.py
--- a/tools/python/xen/xend/scheduler.py        Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/scheduler.py        Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 import threading
 
 def later(delay, fn, args=(), kwargs={}):
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvDaemon.py
--- a/tools/python/xen/xend/server/SrvDaemon.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvDaemon.py Thu Aug 25 22:53:20 2005
@@ -126,12 +126,8 @@
     def cleanup_xend(self, kill=False):
         return self.cleanup_process(XEND_PID_FILE, "xend", kill)
 
-    def cleanup_xenstored(self, kill=False):
-        return self.cleanup_process(XENSTORED_PID_FILE, "xenstored", kill)
-
     def cleanup(self, kill=False):
         self.cleanup_xend(kill=kill)
-        #self.cleanup_xenstored(kill=kill)
 
     def status(self):
         """Returns the status of the xend daemon.
@@ -167,31 +163,6 @@
             pidfile.write(str(pid))
             pidfile.close()
         return pid
-
-    def start_xenstored(self):
-        """Fork and exec xenstored, writing its pid to XENSTORED_PID_FILE.
-        """
-        def mkdirs(p):
-            try:
-                os.makedirs(p)
-            except:
-                pass
-        mkdirs(XENSTORED_RUN_DIR)
-        mkdirs(XENSTORED_LIB_DIR)
-        
-        pid = self.fork_pid(XENSTORED_PID_FILE)
-        if pid:
-            # Parent
-            log.info("Started xenstored, pid=%d", pid)
-        else:
-            # Child
-            if XEND_DAEMONIZE:
-                self.daemonize()
-            if XENSTORED_DEBUG:
-                os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork",
-                         "-T", "/var/log/xenstored-trace.log")
-            else:
-                os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork")
 
     def daemonize(self):
         if not XEND_DAEMONIZE: return
@@ -223,14 +194,10 @@
         4  Insufficient privileges
         """
         xend_pid = self.cleanup_xend()
-        xenstored_pid = self.cleanup_xenstored()
 
         if self.set_user():
             return 4
         os.chdir("/")
-
-        if xenstored_pid == 0:
-            self.start_xenstored()
 
         if xend_pid > 0:
             # Trying to run an already-running service is a success.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvDmesg.py
--- a/tools/python/xen/xend/server/SrvDmesg.py  Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvDmesg.py  Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 import os
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvDomain.py Thu Aug 25 22:53:20 2005
@@ -1,10 +1,24 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 from xen.web import http
 
 from xen.xend import sxp
 from xen.xend import XendDomain
-from xen.xend import XendConsole
 from xen.xend import PrettyPrint
 from xen.xend.Args import FormFn
 
@@ -18,7 +32,6 @@
         SrvDir.__init__(self)
         self.dom = dom
         self.xd = XendDomain.instance()
-        self.xconsole = XendConsole.instance()
 
     def op_configure(self, op, req):
         """Configure an existing domain.
@@ -41,9 +54,17 @@
     def op_shutdown(self, op, req):
         fn = FormFn(self.xd.domain_shutdown,
                     [['dom',    'int'],
-                     ['reason', 'str'],
+                     ['reason', 'str']])
+        val = fn(req.args, {'dom': self.dom.id})
+        req.setResponseCode(http.ACCEPTED)
+        req.setHeader("Location", "%s/.." % req.prePathURL())
+        return val
+
+    def op_sysrq(self, op, req):
+        fn = FormFn(self.xd.domain_sysrq,
+                    [['dom',    'int'],
                      ['key',    'int']])
-        val = fn(req.args, {'dom': self.dom.id})
+        val = fn(req.args, {'dom' : self.dom.id})
         req.setResponseCode(http.ACCEPTED)
         req.setHeader("Location", "%s/.." % req.prePathURL())
         return val
@@ -208,14 +229,6 @@
             self.print_path(req)
             #self.ls()
             req.write('<p>%s</p>' % self.dom)
-            if self.dom.console:
-                cinfo = self.dom.console
-                cid = str(cinfo.console_port)
-                #todo: Local xref: need to know server prefix.
-                req.write('<p><a href="/xend/console/%s">Console %s</a></p>'
-                          % (cid, cid))
-                req.write('<p><a href="%s">Connect to console</a></p>'
-                          % cinfo.uri())
             if self.dom.config:
                 req.write("<code><pre>")
                 PrettyPrint.prettyprint(self.dom.config, out=req)
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/python/xen/xend/server/SrvDomainDir.py
--- a/tools/python/xen/xend/server/SrvDomainDir.py      Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvDomainDir.py      Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 import traceback
 from StringIO import StringIO
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvNode.py
--- a/tools/python/xen/xend/server/SrvNode.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvNode.py   Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 import os
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvRoot.py
--- a/tools/python/xen/xend/server/SrvRoot.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvRoot.py   Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 from xen.xend import XendRoot
 xroot = XendRoot.instance()
@@ -15,7 +30,6 @@
     subdirs = [
         ('node',    'SrvNode'       ),
         ('domain',  'SrvDomainDir'  ),
-        ('console', 'SrvConsoleDir' ),
         ('vnet',    'SrvVnetDir'    ),
         ]
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvServer.py
--- a/tools/python/xen/xend/server/SrvServer.py Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvServer.py Thu Aug 25 22:53:20 2005
@@ -1,7 +1,22 @@
 #!/usr/bin/python
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
-"""Example xend HTTP and console server.
+"""Example xend HTTP
 
    Can be accessed from a browser or from a program.
    Do 'python SrvServer.py' to run the server.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvVnetDir.py
--- a/tools/python/xen/xend/server/SrvVnetDir.py        Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvVnetDir.py        Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 from xen.xend import sxp
 from xen.xend.Args import FormFn
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/SrvXendLog.py
--- a/tools/python/xen/xend/server/SrvXendLog.py        Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/SrvXendLog.py        Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 from xen.web import static
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/blkif.py     Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 """Support for virtual block devices.
 """
 import string
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/channel.py
--- a/tools/python/xen/xend/server/channel.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/channel.py   Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 import threading
 import select
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/controller.py
--- a/tools/python/xen/xend/server/controller.py        Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/controller.py        Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 """General support for controllers, which handle devices
 for a domain.
 """
@@ -126,7 +142,7 @@
     def createDevController(self, type, vm, recreate=False):
         cls = self.getDevControllerClass(type)
         if not cls:
-            raise XendError("unknown device type: " + type)
+            raise XendError("unknown device type: " + str(type))
         return cls.createDevController(vm, recreate=recreate)
 
 def getDevControllerTable():
@@ -267,6 +283,8 @@
         dev.attach(recreate=recreate, change=change)
         dev.exportToDB()
 
+        return dev
+
     def configureDevice(self, id, config, change=False):
         """Reconfigure an existing device.
         May be defined in subclass."""
@@ -307,9 +325,9 @@
         return self.destroyed
 
     def getDevice(self, id, error=False):
-        dev = self.devices.get(id)
+        dev = self.devices.get(int(id))
         if error and not dev:
-            raise XendError("invalid device id: " + id)
+            raise XendError("invalid device id: " + str(id))
         return dev
 
     def getDeviceIds(self):
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/event.py
--- a/tools/python/xen/xend/server/event.py     Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/event.py     Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 import sys
 import StringIO
 
@@ -33,7 +50,7 @@
     def dataReceived(self, data):
         try:
             self.parser.input(data)
-            if self.parser.ready():
+            while(self.parser.ready()):
                 val = self.parser.get_val()
                 res = self.dispatch(val)
                 self.send_result(res)
@@ -128,16 +145,8 @@
     def op_pretty(self, name, req):
         self.pretty = 1
 
-    def op_console_disconnect(self, name, req):
-        id = sxp.child_value(req, 'id')
-        if not id:
-            raise XendError('Missing console id')
-        id = int(id)
-        self.daemon.console_disconnect(id)
-
     def op_info(self, name, req):
         val = ['info']
-        #val += self.daemon.consoles()
         #val += self.daemon.blkifs()
         #val += self.daemon.netifs()
         #val += self.daemon.usbifs()
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/messages.py
--- a/tools/python/xen/xend/server/messages.py  Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/messages.py  Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 import sys
 import struct
 import types
@@ -16,16 +33,6 @@
 See below.
 """
 msg_formats = {}
-
-#============================================================================
-# Console message types.
-#============================================================================
-
-CMSG_CONSOLE  = 0
-
-console_formats = { 'console_data': (CMSG_CONSOLE, 0) }
-
-msg_formats.update(console_formats)
 
 #============================================================================
 # Block interface message types.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/netif.py
--- a/tools/python/xen/xend/server/netif.py     Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/netif.py     Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 """Support for virtual network interfaces.
 """
 
@@ -405,7 +421,9 @@
                         'netif_handle'   : self.vif,
                         'evtchn'         : self.getEventChannelBackend(),
                         'tx_shmem_frame' : val['tx_shmem_frame'],
-                        'rx_shmem_frame' : val['rx_shmem_frame'] })
+                        'tx_shmem_ref'   : val['tx_shmem_ref'],
+                        'rx_shmem_frame' : val['rx_shmem_frame'],
+                        'rx_shmem_ref'   : val['rx_shmem_ref'] })
         msg = self.backendChannel.requestResponse(msg)
         #todo: check return status
         self.status = NETIF_INTERFACE_STATUS_CONNECTED
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/params.py
--- a/tools/python/xen/xend/server/params.py    Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/params.py    Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 import os
 
 def getenv(var, val, conv=None):
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/pciif.py     Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 import types
 
 import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/server/relocate.py
--- a/tools/python/xen/xend/server/relocate.py  Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/server/relocate.py  Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 import socket
 import sys
@@ -26,7 +42,7 @@
     def dataReceived(self, data):
         try:
             self.parser.input(data)
-            if self.parser.ready():
+            while(self.parser.ready()):
                 val = self.parser.get_val()
                 res = self.dispatch(val)
                 self.send_result(res)
@@ -124,7 +140,8 @@
     if xroot.get_xend_relocation_server():
         port = xroot.get_xend_relocation_port()
         interface = xroot.get_xend_relocation_address()
-        reactor.listenTCP(port, factory, interface=interface)
+        l = reactor.listenTCP(port, factory, interface=interface)
+        l.setCloExec()
 
 def setupRelocation(dst, port):
     try:
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/sxp.py
--- a/tools/python/xen/xend/sxp.py      Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/sxp.py      Thu Aug 25 22:53:20 2005
@@ -1,5 +1,21 @@
 #!/usr/bin/python
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 """
 Input-driven parsing for s-expression (sxp) format.
 Create a parser: pin = Parser();
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/uuid.py
--- a/tools/python/xen/xend/uuid.py     Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/uuid.py     Thu Aug 25 22:53:20 2005
@@ -1,3 +1,20 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 """Universal(ly) Unique Identifiers (UUIDs).
 """
 import commands
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/xenstore/__init__.py
--- a/tools/python/xen/xend/xenstore/__init__.py        Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/xenstore/__init__.py        Thu Aug 25 22:53:20 2005
@@ -1,2 +1,18 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 from xsnode import *
 from xsobj import *
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/xenstore/xsnode.py
--- a/tools/python/xen/xend/xenstore/xsnode.py  Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/xenstore/xsnode.py  Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 import errno
 import os
 import os.path
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xend/xenstore/xsobj.py
--- a/tools/python/xen/xend/xenstore/xsobj.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/xenstore/xsobj.py   Thu Aug 25 22:53:20 2005
@@ -1,3 +1,19 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 import string
 import types
 
@@ -307,24 +323,24 @@
         db = self.__db__
         if path is None:
             path = db.relPath()
-        print 'DBMap>introduceDomain>', dom, page, evtchn, path
+        log.info("DBMap>introduceDomain> %d %d %s %s" %(dom, page, evtchn, 
path))
         try:
             db.introduceDomain(dom, page, evtchn, path)
         except Exception, ex:
             import traceback
             traceback.print_exc()
-            print 'DBMap>introduceDomain>', ex
+            log.info("DBMap>introduceDomain> %s" %ex)
             pass # todo: don't ignore
         
     def releaseDomain(self, dom):
         db = self.__db__
-        print 'DBMap>releaseDomain>', dom
+        log.info("DBMap>releaseDomain> %d" %dom)
         try:
             db.releaseDomain(dom)
         except Exception, ex:
             import traceback
             traceback.print_exc()
-            print 'DBMap>releaseDomain>', ex
+            log.info("DBMap>releaseDomain> %s" %ex)
             pass # todo: don't ignore
 
     def watch(self, fn, path=""):
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/python/xen/xend/xenstore/xsresource.py
--- a/tools/python/xen/xend/xenstore/xsresource.py      Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xend/xenstore/xsresource.py      Thu Aug 25 22:53:20 2005
@@ -1,3 +1,16 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
 #============================================================================
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/create.py     Thu Aug 25 22:53:20 2005
@@ -1,5 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
 # Copyright (C) 2005 Nguyen Anh Quynh <aquynh@xxxxxxxxx>
+#============================================================================
 
 """Domain creation.
 """
@@ -7,6 +22,8 @@
 import string
 import sys
 import socket
+import commands
+import time
 
 import xen.lowlevel.xc
 
@@ -16,8 +33,6 @@
 from xen.xend.XendBootloader import bootloader
 from xen.xend import XendRoot; xroot = XendRoot.instance()
 from xen.util import blkif
-
-from xen.util import console_client
 
 from xen.xm.opts import *
 
@@ -144,10 +159,6 @@
           fn=set_float, default=None,
           use="""Set the new domain's cpu weight.
           WEIGHT is a float that controls the domain's share of the cpu.""")
-
-gopts.var('console', val='PORT',
-          fn=set_int, default=None,
-          use="Console port to use. Default is 9600 + domain id.")
 
 gopts.var('restart', val='onreboot|always|never',
           fn=set_value, default=None,
@@ -370,7 +381,6 @@
 
     @return: MAC address string
     """
-    random.seed()
     mac = [ 0xaa, 0x00, 0x00,
             random.randint(0x00, 0x7f),
             random.randint(0x00, 0xff),
@@ -471,8 +481,6 @@
         config.append(['backend', ['netif']])
     if vals.restart:
         config.append(['restart', vals.restart])
-    if vals.console:
-        config.append(['console', vals.console])
 
     if vals.bootloader:
         run_bootloader(opts, config, vals)
@@ -584,9 +592,14 @@
         return d
     return None
 
+vncpid = None
+
 def spawn_vnc(display):
-    os.system("vncviewer -log *:stdout:0 -listen %d &" %
-              (VNC_BASE_PORT + display))
+    vncargs = (["vncviewer" + "-log", "*:stdout:0",
+            "-listen", "%d" % (VNC_BASE_PORT + display) ])
+    global vncpid    
+    vncpid = os.spawnvp(os.P_NOWAIT, "vncviewer", vncargs)
+
     return VNC_BASE_PORT + display
     
 def preprocess_vnc(opts, vals):
@@ -620,8 +633,8 @@
 
     @param opts:   options
     @param config: configuration
-    @return: domain id, console port
-    @rtype:  (int, int)
+    @return: domain id
+    @rtype:  int
     """
 
     try:
@@ -631,22 +644,19 @@
         else:
             dominfo = server.xend_domain_create(config)
     except XendError, ex:
+        import signal
+        if vncpid:
+            os.kill(vncpid, signal.SIGKILL)
         opts.err(str(ex))
 
     dom = sxp.child_value(dominfo, 'name')
-    console_info = sxp.child(dominfo, 'console')
-    if console_info:
-        console_port = int(sxp.child_value(console_info, 'console_port'))
-    else:
-        console_port = None
 
     if not opts.vals.paused:
         if server.xend_domain_unpause(dom) < 0:
             server.xend_domain_destroy(dom)
             opts.err("Failed to unpause domain %s" % dom)
-    opts.info("Started domain %s, console on port %d"
-              % (dom, console_port))
-    return (dom, console_port)
+    opts.info("Started domain %s" % (dom))
+    return int(sxp.child_value(dominfo, 'id'))
 
 def get_dom0_alloc():
     """Return current allocation memory of dom0 (in MB). Return 0 on error"""
@@ -665,20 +675,38 @@
     return 0
 
 def balloon_out(dom0_min_mem, opts):
-    """Balloon out to get memory for domU, if necessarily"""
+    """Balloon out memory from dom0 if necessary"""
     SLACK = 4
+    timeout = 20 # 2s
+    ret = 0
 
     xc = xen.lowlevel.xc.new()
     pinfo = xc.physinfo()
-    free_mem = pinfo['free_pages']/256
-    if free_mem < opts.vals.memory + SLACK:
-        need_mem = opts.vals.memory + SLACK - free_mem
-        cur_alloc = get_dom0_alloc()
-        if cur_alloc - need_mem >= dom0_min_mem:
-            server.xend_domain_mem_target_set(0, cur_alloc - need_mem)
+    free_mem = pinfo['free_pages'] / 256
+    domU_need_mem = opts.vals.memory + SLACK 
+
+    dom0_cur_alloc = get_dom0_alloc()
+    dom0_new_alloc = dom0_cur_alloc - (domU_need_mem - free_mem)
+
+    if free_mem < domU_need_mem and dom0_new_alloc < dom0_min_mem:
+        ret = 1
+    if free_mem < domU_need_mem and ret == 0:
+
+        server.xend_domain_mem_target_set(0, dom0_new_alloc)
+
+        while dom0_cur_alloc > dom0_new_alloc and timeout > 0:
+            time.sleep(0.1) # sleep 100ms
+            dom0_cur_alloc = get_dom0_alloc()
+            timeout -= 1
+        
+        if dom0_cur_alloc > dom0_new_alloc:
+            ret = 1
+    
     del xc
+    return ret
 
 def main(argv):
+    random.seed()
     opts = gopts
     args = opts.parse(argv)
     if opts.vals.help:
@@ -707,12 +735,14 @@
     else:
         dom0_min_mem = xroot.get_dom0_min_mem()
         if dom0_min_mem != 0:
-            balloon_out(dom0_min_mem, opts)
-
-        (dom, console) = make_domain(opts, config)
+            if balloon_out(dom0_min_mem, opts):
+                print >>sys.stderr, "error: cannot allocate enough memory for 
domain"
+                sys.exit(1)
+
+        dom = make_domain(opts, config)
         if opts.vals.console_autoconnect:
-            path = "/var/lib/xend/console-%s" % console
-            console_client.connect('localhost', console, path=path)
+            cmd = "/usr/libexec/xen/xenconsole %d" % dom
+            os.execvp('/usr/libexec/xen/xenconsole', cmd.split())
         
 if __name__ == '__main__':
     main(sys.argv)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/destroy.py
--- a/tools/python/xen/xm/destroy.py    Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/destroy.py    Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 """Destroy a domain.
 """
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/help.py
--- a/tools/python/xen/xm/help.py       Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/help.py       Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 """Variable definition and help support for Python defconfig files.
 """
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/main.py       Thu Aug 25 22:53:20 2005
@@ -1,27 +1,124 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+# (C) Copyright IBM Corp. 2005
+# Copyright (C) 2004 Mike Wray
+#
+# Authors:
+#     Sean Dague <sean at dague dot net>
+#     Mike Wray <mike dot wray at hp dot com>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
 """Grand unified management application for Xen.
 """
 import os
 import os.path
 import sys
+import commands
+import re
 from getopt import getopt
 import socket
 import warnings
 warnings.filterwarnings('ignore', category=FutureWarning)
-
 from xen.xend import PrettyPrint
 from xen.xend import sxp
-# this is a nasty place to stick this in, but required because
-# log file access is set up via a 5 deep import chain.  This
-# ensures the user sees a useful message instead of a stack trace
-if os.getuid() != 0:
-    print "xm requires root access to execute, please try again as root"
-    sys.exit(1)
-
-from xen.xend.XendClient import XendError, server
-from xen.xend.XendClient import main as xend_client_main
-from xen.xm import create, destroy, migrate, shutdown, sysrq
 from xen.xm.opts import *
+shorthelp = """Usage: xm <subcommand> [args]
+    Control, list, and manipulate Xen guest instances
+
+xm common subcommands:
+    console <DomId>         attach to console of DomId
+    create <CfgFile>        create a domain based on Config File
+    destroy <DomId>         terminate a domain immediately
+    help                    display this message
+    list [DomId, ...]       list information about domains
+    mem-max <DomId> <Mem>   set the maximum memory reservation for a domain
+    mem-set <DomId> <Mem>   adjust the current memory usage for a domain
+    migrate <DomId> <Host>  migrate a domain to another machine
+    pause <DomId>           pause execution of a domain
+    reboot <DomId>          reboot a domain
+    restore <File>          create a domain from a saved state file
+    save <DomId> <File>     save domain state (and config) to file
+    shutdown <DomId>        shutdown a domain
+    top                     monitor system and domains in real-time
+    unpause <DomId>         unpause a paused domain
+
+For a complete list of subcommands run 'xm help --long'
+For more help on xm see the xm(1) man page
+For more help on xm create, see the xmdomain.cfg(5) man page"""
+
+longhelp = """Usage: xm <subcommand> [args]
+    Control, list, and manipulate Xen guest instances
+
+xm full list of subcommands:
+
+  Domain Commands:
+    console <DomId>         attach to console of DomId
+    cpus-list <DomId> <VCpu>          get the list of cpus for a VCPU
+    cpus-set <DomId> <VCpu> <CPUS>    set which cpus a VCPU can use. 
+    create  <ConfigFile>      create a domain
+    destroy <DomId>           terminate a domain immediately
+    domid   <DomName>         convert a domain name to a domain id
+    domname <DomId>           convert a domain id to a domain name
+    list                      list information about domains
+    mem-max <DomId> <Mem>     set domain maximum memory limit
+    mem-set <DomId> <Mem>     set the domain's memory dynamically
+    migrate <DomId> <Host>    migrate a domain to another machine
+    pause   <DomId>           pause execution of a domain
+    reboot   [-w|-a] <DomId>  reboot a domain
+    restore <File>            create a domain from a saved state file
+    save    <DomId> <File>    save domain state (and config) to file
+    shutdown [-w|-a] <DomId>  shutdown a domain
+    sysrq   <DomId> <letter>  send a sysrq to a domain
+    unpause <DomId>           unpause a paused domain
+    vcpu-enable <DomId> <VCPU>        disable VCPU in a domain
+    vcpu-disable <DomId> <VCPU>       enable VCPU in a domain
+    vcpu-list <DomId>                 get the list of VCPUs for a domain
+
+  Xen Host Commands:
+    dmesg   [--clear]         read or clear Xen's message buffer
+    info                      get information about the xen host
+    log                       print the xend log
+    top                       monitor system and domains in real-time
+
+  Scheduler Commands:
+    bvt <options>             set BVT scheduler parameters
+    bvt_ctxallow <Allow>      set the BVT scheduler context switch allowance
+    sedf <options>            set simple EDF parameters
+
+  Virtual Device Commands:
+    block-create <DomId> <BackDev> <FrontDev> <Mode> [BackDomId]
+        Create a new virtual block device 
+    block-destroy <DomId> <DevId>  Destroy a domain's virtual block device
+    block-list    <DomId>          List virtual block devices for a domain
+    block-refresh <DomId> <DevId>  Refresh a virtual block device for a domain
+    network-limit   <DomId> <Vif> <Credit> <Period>
+        Limit the transmission rate of a virtual network interface
+    network-list    <DomId>        List virtual network interfaces for a domain
+
+For a short list of subcommands run 'xm help'
+For more help on xm see the xm(1) man page
+For more help on xm create, see the xmdomain.cfg(5) man page"""
+
+####################################################################
+#
+#  Utility functions
+#
+####################################################################
+
+def arg_check(args,num,name):
+    if len(args) < num:
+        err("'xm %s' requires %s argument(s)!\n" % (name, num))
+        usage(name)
 
 def unit(c):
     if not c.isalpha():
@@ -48,724 +145,325 @@
     else:
         return value * (base / dst_base)
 
-class Group:
-
-    name = ""
-    info = ""
-    
-    def __init__(self, xm):
-        self.xm = xm
-        self.progs = {}
-
-    def addprog(self, prog):
-        self.progs[prog.name] = prog
-
-    def getprog(self, name):
-        return self.progs.get(name)
-
-    def proglist(self):
-        kl = self.progs.keys()
-        kl.sort()
-        return [ self.getprog(k) for k in kl ]
-
-    def help(self, args):
-        if self.info:
-            print 
-            print self.info
-            print
-        else:
-            print
-        
-    def shortHelp(self, args):
-        self.help(args)
-        for p in self.proglist():
-            p.shortHelp(args)
-
-class Prog:
-    """Base class for sub-programs.
-    """
-
-    """Program group it belongs to"""
-    group = 'all'
-    """Program name."""
-    name = '??'
-    """Short program info."""
-    info = ''
-
-    def __init__(self, xm):
-        self.xm = xm
-
-    def err(self, msg):
-        self.xm.err(msg)
-
-    def help(self, args):
-        self.shortHelp(args)
-
-    def shortHelp(self, args):
-        print "%-14s %s" % (self.name, self.info)
-
-    def main(self, args):
-        """Program main entry point.
-        """
-        pass
-
-
-class ProgUnknown(Prog):
-
-    name = 'unknown'
-    info = ''
-    
-    def help(self, args):
-        self.xm.err("Unknown command: %s\nTry '%s help' for more information."
-                    % (args[0], self.xm.name))
-
-    main = help
-
-class Xm:
-    """Main application.
-    """
-
-    def __init__(self):
-        self.name = 'xm'
-        self.unknown = ProgUnknown(self)
-        self.progs = {}
-        self.groups = {}
-
-    def err(self, msg):
-        print >>sys.stderr, "Error:", msg
+def err(msg):
+    print >>sys.stderr, "Error:", msg
+
+def handle_xend_error(cmd, dom, ex):
+    error = str(ex)
+    if error == "Not found" and dom != None:
+        err("Domain '%s' not found when running 'xm %s'" % (dom, cmd))
         sys.exit(1)
-
-    def main(self, args):
-        try:
-            self.main_call(args)
-        except socket.error, ex:
-            print >>sys.stderr, ex
-            self.err("Error connecting to xend, is xend running?")
-        except XendError, ex:
-            self.err(str(ex))
-
-    def main_call(self, args):
-        """Main entry point. Dispatches to the progs.
-        """
-        self.name = args[0]
-        if len(args) < 2:
-               args.append('help')
-       help = self.helparg(args)
-        p = self.getprog(args[1], self.unknown)
-        if help or len(args) < 2: 
-            p.help(args[1:])
-        else:
-            p.main(args[1:])
-
-    def helparg(self, args):
-        for a in args:
-            if a in ['-h', '--help']:
-                return 1
-        return 0
-
-    def prog(self, pklass):
-        """Add a sub-program.
-
-        pklass  program class (Prog subclass)
-        """
-        p = pklass(self)
-        self.progs[p.name] = p
-        self.getgroup(p.group).addprog(p)
-        return p
-
-    def getprog(self, name, val=None):
-        """Get a sub-program.
-        name  Name of the sub-program (or optionally, an unambiguous
-              prefix of its name)
-        val   Default return value if no (unique) match is found
-        """
-
-        match = None
-        for progname in self.progs.keys():
-            if progname == name:
-                match = progname
-                break
-            if progname.startswith(name):
-                if not match:
-                    match = progname
-                else:
-                    return val # name is ambiguous - bail out
-
-        return self.progs.get(match, val)
-
-    def group(self, klass):
-        g = klass(self)
-        self.groups[g.name] = g
-        return g
-
-    def getgroup(self, name):
-        return self.groups[name]
-
-    def grouplist(self):
-        kl = self.groups.keys()
-        kl.sort()
-        return [ self.getgroup(k) for k in kl ]
-        
-# Create the application object, then add the sub-program classes.
-xm = Xm()
-
-class GroupAll(Group):
-
-    name = "all"
-    info = ""
-
-xm.group(GroupAll)
-
-class GroupDomain(Group):
-
-    name = "domain"
-    info = "Commands on domains:"
-    
-xm.group(GroupDomain)
-
-class GroupScheduler(Group):
-
-    name = "scheduler"
-    info = "Comands controlling scheduling:"
-
-xm.group(GroupScheduler)
-
-class GroupHost(Group):
-
-    name = "host"
-    info = "Commands related to the xen host (node):"
-
-xm.group(GroupHost)
-
-class GroupConsole(Group):
-
-    name = "console"
-    info = "Commands related to consoles:"
-
-xm.group(GroupConsole)
-
-class GroupVbd(Group):
-
-    name = "vbd"
-    info = "Commands related to virtual block devices:"
-
-xm.group(GroupVbd)
-
-class GroupVif(Group):
-
-    name = "vif"
-    info = "Commands related to virtual network interfaces:"
-
-xm.group(GroupVif)
-
-class ProgHelp(Prog):
-
-    name = "help"
-    info = "Print help."
-    
-    def help(self, args):
-        if len(args) == 2:
-            name = args[1]
-            p = self.xm.getprog(name)
-            if p:
-                p.help(args[1:])
-            else:
-                print '%s: Unknown command: %s' % (self.name, name)
-        else:
-            for g in self.xm.grouplist():
-                g.shortHelp(args)
-            print "\nTry '%s help CMD' for help on CMD" % self.xm.name
-
-    main = help
-
-xm.prog(ProgHelp)
-
-class ProgCreate(Prog):
-
-    group = 'domain'
-    name = "create"
-    info = """Create a domain."""
-
-    def help(self, args):
-        create.main([args[0], '-h'])
-
-    def main(self, args):
-        create.main(args)
-
-xm.prog(ProgCreate)
-
-class ProgSave(Prog):
-    group = 'domain'
-    name = "save"
-    info = """Save domain state (and config) to file."""
-
-    def help(self, args):
-        print args[0], "DOM FILE"
-        print """\nSave domain with id DOM to FILE."""
-        
-    def main(self, args):
-        if len(args) < 3: self.err("%s: Missing arguments" % args[0])
-        dom = args[1]
-        savefile = os.path.abspath(args[2])
-        server.xend_domain_save(dom, savefile)
-
-xm.prog(ProgSave)
-
-class ProgRestore(Prog):
-    group = 'domain'
-    name = "restore"
-    info = """Create a domain from a saved state."""
-
-    def help(self, args):
-        print args[0], "FILE"
-        print "\nRestore a domain from FILE."
-    
-    def main(self, args):
-        if len(args) < 2: self.err("%s: Missing arguments" % args[0])
-        savefile = os.path.abspath(args[1])
-        info = server.xend_domain_restore(savefile)
-        PrettyPrint.prettyprint(info)
-        id = sxp.child_value(info, 'id')
-        if id is not None:
-            server.xend_domain_unpause(id)
-
-xm.prog(ProgRestore)
-
-class ProgMigrate(Prog):
-    group = 'domain'
-    name = "migrate"
-    info = """Migrate a domain to another machine."""
-
-    def help(self, args):
-        migrate.help([self.name] + args)
-    
-    def main(self, args):
-        migrate.main(args)
-
-xm.prog(ProgMigrate)
-
-class ProgList(Prog):
-    group = 'domain'
-    name = "list"
-    info = """List information about domains."""
-
-    short_options = 'lv'
-    long_options = ['long','vcpus']
-
-    def help(self, args):
-        if help:
-            print args[0], '[options] [DOM...]'
-            print """\nGet information about domains.
-            Either all domains or the domains given.
-
-            -l, --long   Get more detailed information.
-            -v, --vcpus  Show VCPU to CPU mapping.
-            """
-            return
-        
-    def main(self, args):
-        use_long = 0
-        show_vcpus = 0
-        (options, params) = getopt(args[1:],
-                                   self.short_options,
-                                   self.long_options)
-        n = len(params)
-        for (k, v) in options:
-            if k in ['-l', '--long']:
-                use_long = 1
-            if k in ['-v', '--vcpus']:
-                show_vcpus = 1
-                
-        if n == 0:
-            doms = server.xend_domains()
-            doms.sort()
-        else:
-            doms = params
-            
-        if use_long:
-            self.long_list(doms)
-        elif show_vcpus:
-            self.show_vcpus(doms)
-        else:
-            self.brief_list(doms)
-
-    def brief_list(self, doms):
-        print 'Name              Id  Mem(MB)  CPU VCPU(s)  State  Time(s)  
Console'
-        for dom in doms:
-            info = server.xend_domain(dom)
-            d = {}
-            d['dom'] = int(sxp.child_value(info, 'id', '-1'))
-            d['name'] = sxp.child_value(info, 'name', '??')
-            d['mem'] = int(sxp.child_value(info, 'memory', '0'))
-            d['cpu'] = str(sxp.child_value(info, 'cpu', '0'))
-            d['vcpus'] = int(sxp.child_value(info, 'vcpus', '0'))
-            d['state'] = sxp.child_value(info, 'state', '??')
-            d['cpu_time'] = float(sxp.child_value(info, 'cpu_time', '0'))
-            console = sxp.child(info, 'console')
-            if console:
-                d['port'] = sxp.child_value(console, 'console_port')
-            else:
-                d['port'] = ''
-            if d['vcpus'] > 1:
-                d['cpu'] = '-'
-            if ((int(sxp.child_value(info, 'ssidref', '0'))) != 0):
-                d['ssidref1'] =  int(sxp.child_value(info, 'ssidref', '0')) & 
0xffff
-                d['ssidref2'] = (int(sxp.child_value(info, 'ssidref', '0')) >> 
16) & 0xffff
-                print ("%(name)-16s %(dom)3d  %(mem)7d  %(cpu)3s  %(vcpus)5d   
%(state)5s  %(cpu_time)7.1f     %(port)4s    s:%(ssidref2)02x/p:%(ssidref1)02x" 
% d)
-            else:
-                print ("%(name)-16s %(dom)3d  %(mem)7d  %(cpu)3s  %(vcpus)5d   
%(state)5s  %(cpu_time)7.1f     %(port)4s" % d)
-
-    def show_vcpus(self, doms):
-        print 'Name              Id  VCPU  CPU  CPUMAP'
-        for dom in doms:
-            info = server.xend_domain(dom)
-            vcpu_to_cpu = sxp.child_value(info, 'vcpu_to_cpu', '-1').split('|')
-            cpumap = sxp.child_value(info, 'cpumap', [])
-            mask = ((int(sxp.child_value(info, 'vcpus', '0')))**2) - 1
-            count = 0
-            for cpu in vcpu_to_cpu:
-                d = {}
-                d['name']   = sxp.child_value(info, 'name', '??')
-                d['dom']    = int(sxp.child_value(info, 'id', '-1'))
-                d['vcpu']   = int(count)
-                d['cpu']    = int(cpu)
-                d['cpumap'] = int(cpumap[count])&mask
-                count = count + 1
-                print ("%(name)-16s %(dom)3d  %(vcpu)4d  %(cpu)3d  
0x%(cpumap)x" % d)
-
-    def long_list(self, doms):
+    else:
+        raise ex
+    
+
+#########################################################################
+#
+#  Main xm functions
+#
+#########################################################################
+
+def xm_create(args):
+    from xen.xm import create
+    # ugly hack because the opt parser apparently wants
+    # the subcommand name just to throw it away!
+    args.insert(0,"bogus")
+    create.main(args)
+
+def xm_save(args):
+    arg_check(args,2,"save")
+
+    dom = args[0] # TODO: should check if this exists
+    savefile = os.path.abspath(args[1])
+    
+    from xen.xend.XendClient import server
+    server.xend_domain_save(dom, savefile)
+    
+def xm_restore(args):
+    arg_check(args,1,"restore")
+
+    savefile = os.path.abspath(args[0])
+
+    from xen.xend.XendClient import server
+    info = server.xend_domain_restore(savefile)
+    PrettyPrint.prettyprint(info)
+    id = sxp.child_value(info, 'id')
+    if id is not None:
+        server.xend_domain_unpause(id)
+
+def xm_migrate(args):
+    # TODO: arg_check
+    from xen.xm import migrate
+    # ugly hack because the opt parser apparently wants
+    # the subcommand name just to throw it away!
+    args.insert(0,"bogus")
+    migrate.main(args)
+
+def xm_list(args):
+    use_long = 0
+    show_vcpus = 0
+    try:
+        (options, params) = getopt(args, 'lv', ['long','vcpus'])
+    except GetoptError, opterr:
+        err(opterr)
+        sys.exit(1)
+    
+    n = len(params)
+    for (k, v) in options:
+        if k in ['-l', '--long']:
+            use_long = 1
+        if k in ['-v', '--vcpus']:
+            show_vcpus = 1
+
+    domsinfo = []
+    from xen.xend.XendClient import server
+    if n == 0:
+        doms = server.xend_domains()
+        doms.sort()
+    else:
+        doms = params
+    for dom in doms:
+        info = server.xend_domain(dom)
+        domsinfo.append(parse_doms_info(info))
+               
+    if use_long:
         for dom in doms:
             info = server.xend_domain(dom)
             PrettyPrint.prettyprint(info)
-
-xm.prog(ProgList)
-
-class ProgDestroy(Prog):
-    group = 'domain'
-    name = "destroy"
-    info = """Terminate a domain immediately."""
-
-    def help(self, args):
-        destroy.main([args[0], '-h'])
-
-    def main(self, args):
-        destroy.main(args)
-
-xm.prog(ProgDestroy)
-
-class ProgShutdown(Prog):
-    group = 'domain'
-    name = "shutdown"
-    info = """Shutdown a domain."""
-
-    def help(self, args):
-        shutdown.main([args[0], '-h'])
-    
-    def main(self, args):
-        shutdown.main(args)
-
-xm.prog(ProgShutdown)
-
-class ProgSysrq(Prog):
-    group = 'domain'
-    name = "sysrq"
-    info = """Send a sysrq to a domain."""
-
-    def help(self, args):
-        sysrq.main([args[0], '-h'])
-    
-    def main(self, args):
-        sysrq.main(args)
-
-xm.prog(ProgSysrq)
-
-class ProgPause(Prog):
-    group = 'domain'
-    name = "pause"
-    info = """Pause execution of a domain."""
-
-    def help(self, args):
-        print args[0], 'DOM'
-        print '\nPause execution of domain DOM.'
-
-    def main(self, args):
-        if len(args) < 2: self.err("%s: Missing domain" % args[0])
-        dom = args[1]
-        server.xend_domain_pause(dom)
-
-xm.prog(ProgPause)
-
-class ProgUnpause(Prog):
-    group = 'domain'
-    name = "unpause"
-    info = """Unpause a paused domain."""
-
-    def help(self, args):
-        print args[0], 'DOM'
-        print '\nUnpause execution of domain DOM.'
-
-    def main(self, args):
-        if len(args) < 2: self.err("%s: Missing domain" % args[0])
-        dom = args[1]
-        server.xend_domain_unpause(dom)
-
-xm.prog(ProgUnpause)
-
-class ProgPincpu(Prog):
-    group = 'domain'
-    name = "pincpu"
-    info = """Set which cpus a VCPU can use. """
-
-    def help(self, args):
-        print args[0],'DOM VCPU CPUS'
-        print '\nSet which cpus VCPU in domain DOM can use.'
-
-    # convert list of cpus to bitmap integer value
-    def make_map(self, cpulist):
-        cpus = []
-        cpumap = 0
-        for c in cpulist.split(','):
-            if c.find('-') != -1:
-                (x,y) = c.split('-')
-                for i in range(int(x),int(y)+1):
-                    cpus.append(int(i))
-            else:
-                cpus.append(int(c))
-        cpus.sort()
-        for c in cpus:
-            cpumap = cpumap | 1<<c
-
-        return cpumap
-
-    def main(self, args):
-        if len(args) != 4: self.err("%s: Invalid argument(s)" % args[0])
-        dom  = args[1]
-        vcpu = int(args[2])
-        cpumap  = self.make_map(args[3]);
-        server.xend_domain_pincpu(dom, vcpu, cpumap)
-
-xm.prog(ProgPincpu)
-
-class ProgMaxmem(Prog):
-    group = 'domain'
-    name = 'maxmem'
-    info = """Set domain memory limit."""
-
-    def help(self, args):
-        print args[0], "DOM MEMORY"
-        print "\nSet the memory limit for domain DOM to MEMORY megabytes."
-
-    def main(self, args):
-        if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
-        dom = args[1]
-        mem = int_unit(args[2], 'm')
-        server.xend_domain_maxmem_set(dom, mem)
-
-xm.prog(ProgMaxmem)
-
-class ProgSetMem(Prog):
-    group = 'domain'
-    name  = 'set-mem'
-    info  = """Set the domain's memory footprint using the balloon driver."""
-
-    def help(self, args):
-        print args[0], "DOM MEMORY_TARGET"
-        print """\nRequest domain DOM to adjust its memory footprint to
-MEMORY_TARGET megabytes"""
-
-    def main(self, args):
-        if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
-        dom = args[1]
-        mem_target = int_unit(args[2], 'm')
-        server.xend_domain_mem_target_set(dom, mem_target)
-
-xm.prog(ProgSetMem)
-
-class ProgVcpuhotplug(Prog):
-    group = 'domain'
-    name  = 'vcpu-hotplug'
-    info  = """Enable or disable a VCPU in a domain."""
-
-    def help(self, args):
-        print args[0], "DOM VCPU [0|1]"
-        print """\nRequest virtual processor VCPU to be disabled or enabled in
-domain DOM"""
-
-    def main(self, args):
-        if len(args) != 4: self.err("%s: Invalid arguments(s)" % args[0])
-        name = args[1]
-        vcpu = int(args[2])
-        state = int(args[3])
-        dom = server.xend_domain(name)
-        id = sxp.child_value(dom, 'id')
-        server.xend_domain_vcpu_hotplug(id, vcpu, state)
-
-xm.prog(ProgVcpuhotplug)
-
-class ProgDomid(Prog):
-    group = 'domain'
-    name = 'domid'
-    info = 'Convert a domain name to a domain id.'
-
-    def help(self, args):
-        print args[0], "DOM"
-        print '\nGet the domain id for the domain with name DOM.'
+    elif show_vcpus:
+        xm_show_vcpus(domsinfo)
+    else:
+        xm_brief_list(domsinfo)
+
+def parse_doms_info(info):
+    dominfo = {}
+    dominfo['dom'] = int(sxp.child_value(info, 'id', '-1'))
+    dominfo['name'] = sxp.child_value(info, 'name', '??')
+    dominfo['mem'] = int(sxp.child_value(info, 'memory', '0'))
+    dominfo['cpu'] = str(sxp.child_value(info, 'cpu', '0'))
+    dominfo['vcpus'] = int(sxp.child_value(info, 'vcpus', '0'))
+    # if there is more than 1 cpu, the value doesn't mean much
+    if dominfo['vcpus'] > 1:
+        dominfo['cpu'] = '-'
+    dominfo['state'] = sxp.child_value(info, 'state', '??')
+    dominfo['cpu_time'] = float(sxp.child_value(info, 'cpu_time', '0'))
+    # security identifiers
+    if ((int(sxp.child_value(info, 'ssidref', '0'))) != 0):
+        dominfo['ssidref1'] =  int(sxp.child_value(info, 'ssidref', '0')) & 
0xffff
+        dominfo['ssidref2'] = (int(sxp.child_value(info, 'ssidref', '0')) >> 
16) & 0xffff
+    # get out the vcpu information
+    dominfo['vcpulist'] = []
+    vcpu_to_cpu = sxp.child_value(info, 'vcpu_to_cpu', '-1').split('|')
+    cpumap = sxp.child_value(info, 'cpumap', [])
+    mask = ((int(sxp.child_value(info, 'vcpus', '0')))**2) - 1
+    count = 0
+    for cpu in vcpu_to_cpu:
+        vcpuinfo = {}
+        vcpuinfo['name']   = sxp.child_value(info, 'name', '??')
+        vcpuinfo['dom']    = int(sxp.child_value(info, 'id', '-1'))
+        vcpuinfo['vcpu']   = int(count)
+        vcpuinfo['cpu']    = int(cpu)
+        vcpuinfo['cpumap'] = int(cpumap[count])&mask
+        count = count + 1
+        dominfo['vcpulist'].append(vcpuinfo)
+    return dominfo
         
-    def main (self, args):
-        if len(args) != 2: self.err("%s: Invalid argument(s)" % args[0])
-        name = args[1]
-        dom = server.xend_domain(name)
-        print sxp.child_value(dom, 'id')
-
-xm.prog(ProgDomid)
-
-class ProgDomname(Prog):
-    group = 'domain'
-    name = 'domname'
-    info = 'Convert a domain id to a domain name.'
-
-    def help(self, args):
-        print args[0], "DOM"
-        print '\nGet the name for the domain with id DOM.'
-        
-    def main (self, args):
-        if len(args) != 2: self.err("%s: Invalid argument(s)" % args[0])
-        name = args[1]
-        dom = server.xend_domain(name)
-        print sxp.child_value(dom, 'name')
-
-xm.prog(ProgDomname)
-
-class ProgBvt(Prog):
-    group = 'scheduler'
-    name = "bvt"
-    info = """Set BVT scheduler parameters."""
-    
-    def help(self, args):
-        print args[0], "DOM MCUADV WARPBACK WARPVALUE WARPL WARPU"
-        print '\nSet Borrowed Virtual Time scheduler parameters.'
-
-    def main(self, args):
-        if len(args) != 7: self.err("%s: Invalid argument(s)" % args[0])
-        dom = args[1]
-        v = map(long, args[2:7])
-        server.xend_domain_cpu_bvt_set(dom, *v)
-
-xm.prog(ProgBvt)
-
-class ProgBvtslice(Prog):
-    group = 'scheduler'
-    name = "bvt_ctxallow"
-    info = """Set the BVT scheduler context switch allowance."""
-
-    def help(self, args):
-        print args[0], 'CTX_ALLOW'
-        print '\nSet Borrowed Virtual Time scheduler context switch allowance.'
-
-    def main(self, args):
-        if len(args) < 2: self.err('%s: Missing context switch allowance'
-                                                            % args[0])
-        slice = int(args[1])
-        server.xend_node_cpu_bvt_slice_set(slice)
-
-xm.prog(ProgBvtslice)
-
-class ProgSedf(Prog):
-    group = 'scheduler'
-    name= "sedf"
-    info = """Set simple EDF parameters."""
-
-    def help(self, args):
-        print args[0], "DOM PERIOD SLICE LATENCY EXTRATIME WEIGHT"
-        print "\nSet simple EDF parameters."
-
-    def main(self, args):
-       if len(args) != 7: self.err("%s: Invalid argument(s)" % args[0])
-       dom = args[1]
-       v = map(int, args[2:7])
-       server.xend_domain_cpu_sedf_set(dom, *v)
-
-xm.prog(ProgSedf)
-
-class ProgInfo(Prog):
-    group = 'host'
-    name = "info"
-    info = """Get information about the xen host."""
-
-    def main(self, args):
-        info = server.xend_node()
-        for x in info[1:]:
-            print "%-23s:" % x[0], x[1]
-
-xm.prog(ProgInfo)
-
-class ProgConsoles(Prog):
-    group = 'console'
-    name = "consoles"
-    info = """Get information about domain consoles."""
-
-    def main(self, args):
-        l = server.xend_consoles()
-        print "Dom Port  Id Connection"
-        for x in l:
-            info = server.xend_console(x)
-            d = {}
-            d['dom'] = sxp.child(info, 'domain', '?')[1]
-            d['port'] = sxp.child_value(info, 'console_port', '?')
-            d['id'] = sxp.child_value(info, 'id', '?')
-            connected = sxp.child(info, 'connected')
-            if connected:
-                d['conn'] = '%s:%s' % (connected[1], connected[2])
-            else:
-                d['conn'] = ''
-            print "%(dom)3s %(port)4s %(id)3s %(conn)s" % d
-
-xm.prog(ProgConsoles)
-
-class ProgConsole(Prog):
-    group = 'console'
-    name = "console"
-    info = """Open a console to a domain."""
-    
-    def help(self, args):
-        print args[0], "DOM"
-        print "\nOpen a console to domain DOM."
-
-    def main(self, args):
-        if len(args) < 2: self.err("%s: Missing domain" % args[0])
-        dom = args[1]
-        info = server.xend_domain(dom)
-        console = sxp.child(info, "console")
-        if not console:
-            self.err("No console information")
-        port = sxp.child_value(console, "console_port")
-        from xen.util import console_client
-        path = "/var/lib/xend/console-%s" % port
-        console_client.connect("localhost", int(port), path=path)
-
-xm.prog(ProgConsole)
-
-class ProgCall(Prog):
-    name = "call"
-    info = "Call xend api functions."
-
-    def help (self, args):
-        print args[0], "function args..."
-        print """
-        Call a xend HTTP API function. The leading 'xend_' on the function
-can be omitted. See xen.xend.XendClient for the API functions.
-"""
-
-    def main(self, args):
-        xend_client_main(args)
-
-xm.prog(ProgCall)
-
-class ProgDmesg(Prog):
-    group = 'host'
-    name  =  "dmesg"
-    info  = """Read or clear Xen's message buffer."""
-
+def xm_brief_list(domsinfo):
+    print 'Name              Id  Mem(MB)  CPU VCPU(s)  State  Time(s)'
+    for dominfo in domsinfo:
+        if dominfo.has_key("ssidref1"):
+            print ("%(name)-16s %(dom)3d  %(mem)7d  %(cpu)3s  %(vcpus)5d   
%(state)5s  %(cpu_time)7.1f     s:%(ssidref2)02x/p:%(ssidref1)02x" % dominfo)
+        else:
+            print ("%(name)-16s %(dom)3d  %(mem)7d  %(cpu)3s  %(vcpus)5d   
%(state)5s  %(cpu_time)7.1f" % dominfo)
+
+def xm_show_vcpus(domsinfo):
+    print 'Name              Id  VCPU  CPU  CPUMAP'
+    for dominfo in domsinfo:
+        for vcpuinfo in dominfo['vcpulist']:
+            print ("%(name)-16s %(dom)3d  %(vcpu)4d  %(cpu)3d  0x%(cpumap)x" %
+                   vcpuinfo)
+
+def xm_vcpu_list(args):
+    args.insert(0,"-v")
+    xm_list(args)
+
+def xm_destroy(args):
+    arg_check(args,1,"destroy")
+
+    from xen.xm import destroy
+    # ugly hack because the opt parser apparently wants
+    # the subcommand name just to throw it away!
+    args.insert(0,"bogus")
+    destroy.main(args)
+            
+def xm_reboot(args):
+    arg_check(args,1,"reboot")
+    # ugly hack because the opt parser apparently wants
+    # the subcommand name just to throw it away!
+    args.insert(0,"bogus")
+    args.insert(2,"-R")
+    from xen.xm import shutdown
+    shutdown.main(args)
+
+def xm_shutdown(args):
+    arg_check(args,1,"shutdown")
+
+    # ugly hack because the opt parser apparently wants
+    # the subcommand name just to throw it away!
+    args.insert(0,"bogus")
+    from xen.xm import shutdown
+    shutdown.main(args)
+
+def xm_sysrq(args):
+    from xen.xm import sysrq
+    # ugly hack because the opt parser apparently wants
+    # the subcommand name just to throw it away!
+    args.insert(0,"bogus")
+    sysrq.main(args)
+
+def xm_pause(args):
+    arg_check(args, 1, "pause")
+    dom = args[0]
+
+    from xen.xend.XendClient import server
+    server.xend_domain_pause(dom)
+
+def xm_unpause(args):
+    arg_check(args, 1, "unpause")
+    dom = args[0]
+
+    from xen.xend.XendClient import server
+    server.xend_domain_unpause(dom)
+
+#############################################################
+
+def cpu_make_map(cpulist):
+    cpus = []
+    cpumap = 0
+    for c in cpulist.split(','):
+        if c.find('-') != -1:
+            (x,y) = c.split('-')
+            for i in range(int(x),int(y)+1):
+                cpus.append(int(i))
+        else:
+            cpus.append(int(c))
+    cpus.sort()
+    for c in cpus:
+        cpumap = cpumap | 1<<c
+
+    return cpumap
+
+def xm_cpus_set(args):
+    arg_check(args, 3, "cpus-set")
+    
+    dom  = args[0]
+    vcpu = int(args[1])
+    cpumap = cpu_make_map(args[2])
+    
+    from xen.xend.XendClient import server
+    server.xend_domain_pincpu(dom, vcpu, cpumap)
+
+def xm_mem_max(args):
+    arg_check(args, 2, "mem-max")
+    
+    dom = args[0]
+    mem = int_unit(args[1], 'm')
+
+    from xen.xend.XendClient import server
+    server.xend_domain_maxmem_set(dom, mem)
+    
+def xm_mem_set(args):
+    arg_check(args, 2, "mem-set")
+    
+    dom = args[0]
+    mem_target = int_unit(args[1], 'm')
+
+    from xen.xend.XendClient import server
+    server.xend_domain_mem_target_set(dom, mem_target)
+    
+# TODO: why does this lookup by name?  and what if that fails!?
+def xm_vcpu_enable(args):
+    arg_check(args, 2, "vcpu-enable")
+    
+    name = args[0]
+    vcpu = int(args[1])
+    
+    from xen.xend.XendClient import server
+    dom = server.xend_domain(name)
+    id = sxp.child_value(dom, 'id')
+    server.xend_domain_vcpu_hotplug(id, vcpu, 1)
+
+def xm_vcpu_disable(args):
+    arg_check(args, 2, "vcpu-disable")
+    
+    name = args[0]
+    vcpu = int(args[1])
+    
+    from xen.xend.XendClient import server
+    dom = server.xend_domain(name)
+    id = sxp.child_value(dom, 'id')
+    server.xend_domain_vcpu_hotplug(id, vcpu, 0)
+
+def xm_domid(args):
+    name = args[0]
+
+    from xen.xend.XendClient import server
+    dom = server.xend_domain(name)
+    print sxp.child_value(dom, 'id')
+    
+def xm_domname(args):
+    name = args[0]
+
+    from xen.xend.XendClient import server
+    dom = server.xend_domain(name)
+    print sxp.child_value(dom, 'name')
+
+def xm_bvt(args):
+    arg_check(args, 6, "bvt")
+    dom = args[0]
+    v = map(long, args[1:6])
+    from xen.xend.XendClient import server
+    server.xend_domain_cpu_bvt_set(dom, *v)
+
+def xm_bvt_ctxallow(args):
+    arg_check(args, 1, "bvt_ctxallow")
+
+    slice = int(args[0])
+    from xen.xend.XendClient import server
+    server.xend_node_cpu_bvt_slice_set(slice)
+
+def xm_sedf(args):
+    arg_check(args, 6, "sedf")
+    
+    dom = args[0]
+    v = map(int, args[1:6])
+    from xen.xend.XendClient import server
+    server.xend_domain_cpu_sedf_set(dom, *v)
+
+def xm_info(args):
+    from xen.xend.XendClient import server
+    info = server.xend_node()
+    
+    for x in info[1:]:
+        print "%-23s:" % x[0], x[1]
+
+# TODO: remove as soon as console server shows up
+def xm_console(args):
+    arg_check(args,1,"console")
+
+    dom = args[0]
+    from xen.xend.XendClient import server
+    info = server.xend_domain(dom)
+    domid = int(sxp.child_value(info, 'id', '-1'))
+    cmd = "/usr/libexec/xen/xenconsole %d" % domid
+    os.execvp('/usr/libexec/xen/xenconsole', cmd.split())
+    console = sxp.child(info, "console")
+
+def xm_top(args):
+    os.execv('/usr/sbin/xentop', ['/usr/sbin/xentop'])
+
+def xm_dmesg(args):
+    
     gopts = Opts(use="""[-c|--clear]
 
 Read Xen's message buffer (boot output, warning and error messages) or clear
@@ -775,161 +473,220 @@
     gopts.opt('clear', short='c',
               fn=set_true, default=0,
               use="Clear the contents of the Xen message buffer.")
-
-    short_options = ['-c']
-    long_options = ['--clear']
-
-    def help(self, args):
-        self.gopts.argv = args
-        self.gopts.usage()
-
-    def main(self, args):
-        self.gopts.parse(args)
-        if not (1 <= len(args) <=2):
-            self.gopts.err('Invalid arguments: ' + str(args))
-
-        if not self.gopts.vals.clear:
-            print server.xend_node_get_dmesg()
-        else:
-            server.xend_node_clear_dmesg()
-
-xm.prog(ProgDmesg)
-
-class ProgLog(Prog):
-    group = 'host'
-    name  =  "log"
-    info  = """Print the xend log."""
-
-    def main(self, args):
-        print server.xend_node_log()
-
-xm.prog(ProgLog)
-
-class ProgVifCreditLimit(Prog):
-    group = 'vif'
-    name= "vif-limit"
-    info = """Limit the transmission rate of a virtual network interface."""
-
-    def help(self, args):
-        print args[0], "DOMAIN VIF CREDIT_IN_BYTES PERIOD_IN_USECS"
-        print "\nSet the credit limit of a virtual network interface."
-
-    def main(self, args):
-        if len(args) != 5: self.err("%s: Invalid argument(s)" % args[0])
-        dom = args[1]
-        v = map(int, args[2:5])
-        server.xend_domain_vif_limit(dom, *v)
-
-xm.prog(ProgVifCreditLimit)
-
-class ProgVifList(Prog):
-    group = 'vif'
-    name  = 'vif-list'
-    info  = """List virtual network interfaces for a domain."""
-
-    def help(self, args):
-        print args[0], "DOM"
-        print "\nList virtual network interfaces for domain DOM"
-
-    def main(self, args):
-        if len(args) != 2: self.err("%s: Invalid argument(s)" % args[0])
-        dom = args[1]
-        for x in server.xend_domain_devices(dom, 'vif'):
-            sxp.show(x)
+    # Work around for gopts
+    args.insert(0,"bogus")
+    gopts.parse(args)
+    if not (1 <= len(args) <= 2):
+        err('Invalid arguments: ' + str(args))
+
+    from xen.xend.XendClient import server
+    if not gopts.vals.clear:
+        print server.xend_node_get_dmesg()
+    else:
+        server.xend_node_clear_dmesg()
+
+def xm_log(args):
+    from xen.xend.XendClient import server
+    print server.xend_node_log()
+
+def xm_network_limit(args):
+    arg_check(args,4,"network-limit")
+    dom = args[0]
+    v = map(int, args[1:4])
+    from xen.xend.XendClient import server
+    server.xend_domain_vif_limit(dom, *v)
+
+def xm_network_list(args):
+    arg_check(args,1,"network-list")
+    dom = args[0]
+    from xen.xend.XendClient import server
+    for x in server.xend_domain_devices(dom, 'vif'):
+        sxp.show(x)
+        print
+
+def xm_block_list(args):
+    arg_check(args,1,"block-list")
+    dom = args[0]
+    from xen.xend.XendClient import server
+    for x in server.xend_domain_devices(dom, 'vbd'):
+        sxp.show(x)
+        print
+
+def xm_block_create(args):
+    n = len(args)
+    if n < 4 or n > 5:
+        err("%s: Invalid argument(s)" % args[0])
+        usage("block-create")
+
+    dom = args[0]
+    vbd = ['vbd',
+           ['uname', args[1]],
+           ['dev',   args[2]],
+           ['mode',  args[3]]]
+    if n == 5:
+        vbd.append(['backend', args[4]])
+
+    from xen.xend.XendClient import server
+    server.xend_domain_device_create(dom, vbd)
+
+def xm_block_refresh(args):
+    arg_check(args,2,"block-refresh")
+
+    dom = args[0]
+    dev = args[1]
+
+    from xen.xend.XendClient import server
+    server.xend_domain_device_refresh(dom, 'vbd', dev)
+
+def xm_block_destroy(args):
+    arg_check(args,2,"block-destroy")
+
+    dom = args[0]
+    dev = args[1]
+
+    from xen.xend.XendClient import server
+    server.xend_domain_device_destroy(dom, 'vbd', dev)
+
+commands = {
+    # console commands
+    "console": xm_console,
+    # xenstat commands
+    "top": xm_top,
+    # domain commands
+    "domid": xm_domid,
+    "domname": xm_domname,
+    "create": xm_create,
+    "destroy": xm_destroy,
+    "restore": xm_restore,
+    "save": xm_save,
+    "shutdown": xm_shutdown,
+    "reboot": xm_reboot,
+    "list": xm_list,
+    # memory commands
+    "mem-max": xm_mem_max,
+    "mem-set": xm_mem_set,
+    # cpu commands
+    "cpus-set": xm_cpus_set,
+#    "cpus-list": xm_cpus_list,
+    "vcpu-enable": xm_vcpu_enable,
+    "vcpu-disable": xm_vcpu_disable,
+    "vcpu-list": xm_vcpu_list,
+    # migration
+    "migrate": xm_migrate,
+    # special
+    "sysrq": xm_sysrq,
+    "pause": xm_pause,
+    "unpause": xm_unpause,
+    # host commands
+    "dmesg": xm_dmesg,
+    "info": xm_info,
+    "log": xm_log,
+    # scheduler
+    "bvt": xm_bvt,
+    "bvt_ctxallow": xm_bvt_ctxallow,
+    "sedf": xm_sedf,
+    # block
+    "block-create": xm_block_create,
+    "block-destroy": xm_block_destroy,
+    "block-list": xm_block_list,
+    "block-refresh": xm_block_refresh,
+    # network
+    "network-limit": xm_network_limit,
+    "network-list": xm_network_list
+    }
+
+aliases = {
+    "balloon": "mem-set",
+    "vif-list": "network-list",
+    "vif-limit": "network-limit",
+    "vbd-create": "block-create",
+    "vbd-destroy": "block-destroy",
+    "vbd-list": "block-list",
+    "vbd-refresh": "block-refresh",
+    }
+
+help = {
+    "--long": longhelp
+   }
+
+def xm_lookup_cmd(cmd):
+    if commands.has_key(cmd):
+        return commands[cmd]
+    elif aliases.has_key(cmd):
+        deprecated(cmd,aliases[cmd])
+        return commands[aliases[cmd]]
+    else:
+        if len( cmd ) > 1:
+            matched_commands = filter( lambda (command, func): command[ 
0:len(cmd) ] == cmd, commands.iteritems() )
+            if len( matched_commands ) == 1:
+                       return matched_commands[0][1]
+        err('Sub Command %s not found!' % cmd)
+        usage()
+
+def deprecated(old,new):
+    err('Option %s is deprecated, and will be removed in future!!!' % old)
+    err('Option %s is the new replacement, see "xm help %s" for more info' % 
(new, new))
+
+def usage(cmd=None):
+    if cmd == "full":
+        print fullhelp
+    elif help.has_key(cmd):
+        print help[cmd]
+    else:
+        print shorthelp
+    sys.exit(1)
+
+def main(argv=sys.argv):
+    if len(argv) < 2:
+        usage()
+    
+    if re.compile('-*help').match(argv[1]):
+       if len(argv) > 2 and help.has_key(argv[2]):
+           usage(argv[2])
+       else:
+           usage()
+       sys.exit(0)
+
+    cmd = xm_lookup_cmd(argv[1])
+
+    # strip off prog name and subcmd
+    args = argv[2:]
+    if cmd:
+        try:
+            from xen.xend.XendClient import XendError
+            rc = cmd(args)
+            if rc:
+                usage()
+        except socket.error, ex:
+            print >>sys.stderr, ex
+            err("Error connecting to xend, is xend running?")
+            sys.exit(1)
+        except IOError:
+            err("Most commands need root access.  Please try again as root")
+            sys.exit(1)
+        except XendError, ex:
+            if args[0] == "bogus":
+                args.remove("bogus")
+            if len(args) > 0:
+                handle_xend_error(argv[1], args[0], ex)
+            else:
+                print "Unexpected error:", sys.exc_info()[0]
+                print
+                print "Please report to xen-devel@xxxxxxxxxxxxxxxxxxx"
+                raise
+        except SystemExit:
+            sys.exit(1)
+        except:
+            print "Unexpected error:", sys.exc_info()[0]
             print
-
-xm.prog(ProgVifList)
-
-class ProgVbdList(Prog):
-    group = 'vbd'
-    name  = 'vbd-list'
-    info  = """List virtual block devices for a domain."""
-
-    def help(self, args):
-        print args[0], "DOM"
-        print "\nList virtual block devices for domain DOM"
-
-    def main(self, args):
-        if len(args) != 2: self.err("%s: Invalid argument(s)" % args[0])
-        dom = args[1]
-        for x in server.xend_domain_devices(dom, 'vbd'):
-            sxp.show(x)
-            print
-
-xm.prog(ProgVbdList)
-
-class ProgVbdCreate(Prog):
-    group = 'vbd'
-    name  = 'vbd-create'
-    info = """Create a new virtual block device for a domain"""
-
-    def help(self, args):
-        print args[0], "DOM UNAME DEV MODE [BACKEND]"
-        print """
-Create a virtual block device for a domain.
-
-  UNAME   - device to export, e.g. phy:hda2
-  DEV     - device name in the domain, e.g. sda1
-  MODE    - access mode: r for read, w for read-write
-  BACKEND - backend driver domain
-"""
-
-    def main(self, args):
-        n = len(args)
-        if n < 5 or n > 6: self.err("%s: Invalid argument(s)" % args[0])
-        dom = args[1]
-        vbd = ['vbd',
-               ['uname', args[2]],
-               ['dev',   args[3]],
-               ['mode',  args[4]]]
-        if n == 6:
-            vbd.append(['backend', args[5]])
-        server.xend_domain_device_create(dom, vbd)
-
-xm.prog(ProgVbdCreate)
-
-class ProgVbdRefresh(Prog):
-    group = 'vbd'
-    name  = 'vbd-refresh'
-    info = """Refresh a virtual block device for a domain"""
-
-    def help(self, args):
-        print args[0], "DOM DEV"
-        print """
-Refresh a virtual block device for a domain.
-
-  DEV     - idx field in the device information
-"""
-
-    def main(self, args):
-        if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
-        dom = args[1]
-        dev = args[2]
-        server.xend_domain_device_refresh(dom, 'vbd', dev)
-
-xm.prog(ProgVbdRefresh)
-
-
-class ProgVbdDestroy(Prog):
-    group = 'vbd'
-    name = 'vbd-destroy'
-    info = """Destroy a domain's virtual block device"""
-
-    def help(self, args):
-        print args[0], "DOM DEV"
-        print """
-Destroy vbd DEV attached to domain DOM. Detaches the device
-from the domain, but does not destroy the device contents.
-The device indentifier DEV is the idx field in the device
-information. This is visible in 'xm vbd-list'."""
-
-    def main(self, args):
-        if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
-        dom = args[1]
-        dev = args[2]
-        server.xend_domain_device_destroy(dom, 'vbd', dev)
-
-xm.prog(ProgVbdDestroy)
-
-def main(args):
-    xm.main(args)
+            print "Please report to xen-devel@xxxxxxxxxxxxxxxxxxx"
+            raise
+                
+    else:
+        usage()
+
+if __name__ == "__main__":
+    main()
+
+
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/migrate.py
--- a/tools/python/xen/xm/migrate.py    Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/migrate.py    Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 """Domain migration.
 """
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/opts.py
--- a/tools/python/xen/xm/opts.py       Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/opts.py       Thu Aug 25 22:53:20 2005
@@ -1,4 +1,20 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+
 """Object-oriented command-line option support.
 """
 from getopt import getopt, GetoptError
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/shutdown.py
--- a/tools/python/xen/xm/shutdown.py   Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/shutdown.py   Thu Aug 25 22:53:20 2005
@@ -1,4 +1,19 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+#============================================================================
 
 """Domain shutdown.
 """
diff -r 5f1ed597f107 -r 8799d14bef77 tools/python/xen/xm/sysrq.py
--- a/tools/python/xen/xm/sysrq.py      Wed Aug 24 02:43:18 2005
+++ b/tools/python/xen/xm/sysrq.py      Thu Aug 25 22:53:20 2005
@@ -21,9 +21,6 @@
          fn=set_true, default=0,
          use="Print this help.")
 
-def sysrq(dom, req):
-    server.xend_domain_shutdown(dom, 'sysrq', req)
-
 def main(argv):
     opts = gopts
     args = opts.parse(argv)
@@ -36,4 +33,4 @@
     if len(args) < 2: opts.err('Missing sysrq character')
     dom = args[0]
     req = ord(args[1][0])
-    sysrq(dom, req)
+    server.xend_domain_sysrq(dom, req)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/Makefile
--- a/tools/security/Makefile   Wed Aug 24 02:43:18 2005
+++ b/tools/security/Makefile   Thu Aug 25 22:53:20 2005
@@ -2,27 +2,71 @@
 include $(XEN_ROOT)/tools/Rules.mk
 
 SRCS     = secpol_tool.c
-CFLAGS   += -static
 CFLAGS   += -Wall
 CFLAGS   += -Werror
 CFLAGS   += -O3
 CFLAGS   += -fno-strict-aliasing
-CFLAGS   += -I.
+CFLAGS   += -I. -I/usr/include/libxml2
+CFLAGS_XML2BIN += $(shell xml2-config --cflags --libs )
+#if above does not work, try  -L/usr/lib -lxml2 -lz -lpthread -lm
+XML2VERSION = $(shell xml2-config --version )
+VALIDATE_SCHEMA=$(shell if [[ $(XML2VERSION) < 2.6.20 ]]; then echo ""; else 
echo "-DVALIDATE_SCHEMA"; fi; )
 
+ifeq ($(ACM_USE_SECURITY_POLICY),ACM_NULL_POLICY)
+POLICY=null
+endif
+ifeq ($(ACM_USE_SECURITY_POLICY),ACM_CHINESE_WALL_POLICY)
+POLICY=chwall
+endif
+ifeq ($(ACM_USE_SECURITY_POLICY),ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)
+POLICY=ste
+endif
+ifeq 
($(ACM_USE_SECURITY_POLICY),ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY)
+POLICY=chwall_ste
+endif
+POLICYFILE=./policies/$(POLICY)/$(POLICY).bin
+
+ifneq ($(ACM_USE_SECURITY_POLICY), ACM_NULL_POLICY)
 all: build
+
+install:all
+
+default:all
+else
+all:
+
+install:
+
+default:
+endif
+
 build: mk-symlinks
        $(MAKE) secpol_tool
+       $(MAKE) secpol_xml2bin
+       chmod 700 ./setlabel.sh
+       chmod 700 ./updategrub.sh
 
-default: all
-
-install: all
-
-secpol_tool : secpol_tool.c
+secpol_tool : secpol_tool.c secpol_compat.h
        $(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $<
 
+secpol_xml2bin : secpol_xml2bin.c secpol_xml2bin.h secpol_compat.h
+       $(CC) $(CPPFLAGS) $(CFLAGS) $(CFLAGS_XML2BIN) $(VALIDATE_SCHEMA) -o $@ 
$<
+
 clean:
-       rm -rf secpol_tool xen
+       rm -rf secpol_tool secpol_xml2bin xen
 
+policy_clean:
+       rm -rf policies/*/*.bin policies/*/*.map
+
+mrproper: clean policy_clean
+
+
+$(POLICYFILE) : build
+       @./secpol_xml2bin $(POLICY) > /dev/null
+
+boot_install: $(POLICYFILE)
+       @cp $(POLICYFILE) /boot
+       @./updategrub.sh $(POLICY) $(PWD)/$(XEN_ROOT)
 
 LINUX_ROOT := $(XEN_ROOT)/linux-2.6-xen-sparse
 mk-symlinks:
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/secpol_tool.c
--- a/tools/security/secpol_tool.c      Wed Aug 24 02:43:18 2005
+++ b/tools/security/secpol_tool.c      Thu Aug 25 22:53:20 2005
@@ -31,18 +31,8 @@
 #include <stdlib.h>
 #include <sys/ioctl.h>
 #include <string.h>
-#include <stdint.h>
 #include <netinet/in.h>
-
-typedef uint8_t u8;
-typedef uint16_t u16;
-typedef uint32_t u32;
-typedef uint64_t u64;
-typedef int8_t s8;
-typedef int16_t s16;
-typedef int32_t s32;
-typedef int64_t s64;
-
+#include "secpol_compat.h"
 #include <xen/acm.h>
 #include <xen/acm_ops.h>
 #include <xen/linux/privcmd.h>
@@ -270,171 +260,6 @@
     }
 }
 
-/*************************** set policy ****************************/
-
-int acm_domain_set_chwallpolicy(void *bufstart, int buflen)
-{
-#define CWALL_MAX_SSIDREFS             6
-#define CWALL_MAX_TYPES             10
-#define CWALL_MAX_CONFLICTSETS         2
-
-    struct acm_chwall_policy_buffer *chwall_bin_pol =
-        (struct acm_chwall_policy_buffer *) bufstart;
-    domaintype_t *ssidrefs, *conflicts;
-    int ret = 0;
-    int j;
-
-    chwall_bin_pol->chwall_max_types = htonl(CWALL_MAX_TYPES);
-    chwall_bin_pol->chwall_max_ssidrefs = htonl(CWALL_MAX_SSIDREFS);
-    chwall_bin_pol->policy_code = htonl(ACM_CHINESE_WALL_POLICY);
-    chwall_bin_pol->policy_version = htonl(ACM_CHWALL_VERSION);
-    chwall_bin_pol->chwall_ssid_offset =
-        htonl(sizeof(struct acm_chwall_policy_buffer));
-    chwall_bin_pol->chwall_max_conflictsets =
-        htonl(CWALL_MAX_CONFLICTSETS);
-    chwall_bin_pol->chwall_conflict_sets_offset =
-        htonl(ntohl(chwall_bin_pol->chwall_ssid_offset) +
-              sizeof(domaintype_t) * CWALL_MAX_SSIDREFS * CWALL_MAX_TYPES);
-    chwall_bin_pol->chwall_running_types_offset = 0;    /* not set */
-    chwall_bin_pol->chwall_conflict_aggregate_offset = 0;       /* not set */
-    ret += sizeof(struct acm_chwall_policy_buffer);
-    /* now push example ssids into the buffer (max_ssidrefs x max_types 
entries) */
-    /* check buffer size */
-    if ((buflen - ret) <
-        (CWALL_MAX_TYPES * CWALL_MAX_SSIDREFS * sizeof(domaintype_t)))
-        return -1;              /* not enough space */
-
-    ssidrefs = (domaintype_t *) (bufstart +
-                          ntohl(chwall_bin_pol->chwall_ssid_offset));
-    memset(ssidrefs, 0,
-           CWALL_MAX_TYPES * CWALL_MAX_SSIDREFS * sizeof(domaintype_t));
-
-    /* now set type j-1 for ssidref i+1 */
-    for (j = 0; j <= CWALL_MAX_SSIDREFS; j++)
-        if ((0 < j) && (j <= CWALL_MAX_TYPES))
-            ssidrefs[j * CWALL_MAX_TYPES + j - 1] = htons(1);
-
-    ret += CWALL_MAX_TYPES * CWALL_MAX_SSIDREFS * sizeof(domaintype_t);
-    if ((buflen - ret) <
-        (CWALL_MAX_CONFLICTSETS * CWALL_MAX_TYPES * sizeof(domaintype_t)))
-        return -1;              /* not enough space */
-
-    /* now the chinese wall policy conflict sets */
-    conflicts = (domaintype_t *) (bufstart +
-                                  ntohl(chwall_bin_pol->
-                                        chwall_conflict_sets_offset));
-    memset((void *) conflicts, 0,
-           CWALL_MAX_CONFLICTSETS * CWALL_MAX_TYPES *
-           sizeof(domaintype_t));
-    /* just 1 conflict set [0]={2,3}, [1]={1,5,6} */
-    if (CWALL_MAX_TYPES > 3)
-    {
-        conflicts[2] = htons(1);
-        conflicts[3] = htons(1);        /* {2,3} */
-        conflicts[CWALL_MAX_TYPES + 1] = htons(1);
-        conflicts[CWALL_MAX_TYPES + 5] = htons(1);
-        conflicts[CWALL_MAX_TYPES + 6] = htons(1);      /* {0,5,6} */
-    }
-    ret += sizeof(domaintype_t) * CWALL_MAX_CONFLICTSETS * CWALL_MAX_TYPES;
-    return ret;
-}
-
-int acm_domain_set_stepolicy(void *bufstart, int buflen)
-{
-#define STE_MAX_SSIDREFS        6
-#define STE_MAX_TYPES                  5
-
-    struct acm_ste_policy_buffer *ste_bin_pol =
-        (struct acm_ste_policy_buffer *) bufstart;
-    domaintype_t *ssidrefs;
-    int j, ret = 0;
-
-    ste_bin_pol->ste_max_types = htonl(STE_MAX_TYPES);
-    ste_bin_pol->ste_max_ssidrefs = htonl(STE_MAX_SSIDREFS);
-    ste_bin_pol->policy_code = htonl(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY);
-    ste_bin_pol->policy_version = htonl(ACM_STE_VERSION);
-    ste_bin_pol->ste_ssid_offset =
-        htonl(sizeof(struct acm_ste_policy_buffer));
-    ret += sizeof(struct acm_ste_policy_buffer);
-    /* check buffer size */
-    if ((buflen - ret) <
-        (STE_MAX_TYPES * STE_MAX_SSIDREFS * sizeof(domaintype_t)))
-        return -1;              /* not enough space */
-
-    ssidrefs =
-        (domaintype_t *) (bufstart + ntohl(ste_bin_pol->ste_ssid_offset));
-    memset(ssidrefs, 0,
-           STE_MAX_TYPES * STE_MAX_SSIDREFS * sizeof(domaintype_t));
-    /* all types 1 for ssidref 1 */
-    for (j = 0; j < STE_MAX_TYPES; j++)
-        ssidrefs[1 * STE_MAX_TYPES + j] = htons(1);
-    /* now set type j-1 for ssidref j */
-    for (j = 0; j < STE_MAX_SSIDREFS; j++)
-        if ((0 < j) && (j <= STE_MAX_TYPES))
-            ssidrefs[j * STE_MAX_TYPES + j - 1] = htons(1);
-    ret += STE_MAX_TYPES * STE_MAX_SSIDREFS * sizeof(domaintype_t);
-    return ret;
-}
-
-#define MAX_PUSH_BUFFER        16384
-u8 push_buffer[MAX_PUSH_BUFFER];
-
-int acm_domain_setpolicy(int xc_handle)
-{
-    int ret;
-    struct acm_policy_buffer *bin_pol;
-    acm_op_t op;
-
-    /* future: read policy from file and set it */
-    bin_pol = (struct acm_policy_buffer *) push_buffer;
-    bin_pol->policy_version = htonl(ACM_POLICY_VERSION);
-    bin_pol->magic = htonl(ACM_MAGIC);
-    bin_pol->primary_policy_code = htonl(ACM_CHINESE_WALL_POLICY);
-    bin_pol->secondary_policy_code =
-        htonl(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY);
-
-    bin_pol->len = htonl(sizeof(struct acm_policy_buffer));
-    bin_pol->primary_buffer_offset = htonl(ntohl(bin_pol->len));
-    ret =
-        acm_domain_set_chwallpolicy(push_buffer +
-                                    ntohl(bin_pol->primary_buffer_offset),
-                                    MAX_PUSH_BUFFER -
-                                    ntohl(bin_pol->primary_buffer_offset));
-    if (ret < 0)
-    {
-        printf("ERROR creating chwallpolicy buffer.\n");
-        return -1;
-    }
-    bin_pol->len = htonl(ntohl(bin_pol->len) + ret);
-    bin_pol->secondary_buffer_offset = htonl(ntohl(bin_pol->len));
-    ret = acm_domain_set_stepolicy(push_buffer +
-                                 ntohl(bin_pol->secondary_buffer_offset),
-                                 MAX_PUSH_BUFFER -
-                                 ntohl(bin_pol->secondary_buffer_offset));
-    if (ret < 0)
-    {
-        printf("ERROR creating chwallpolicy buffer.\n");
-        return -1;
-    }
-    bin_pol->len = htonl(ntohl(bin_pol->len) + ret);
-
-    /* dump it and then push it down into xen/acm */
-    acm_dump_policy_buffer(push_buffer, ntohl(bin_pol->len));
-
-    op.cmd = ACM_SETPOLICY;
-    op.interface_version = ACM_INTERFACE_VERSION;
-    op.u.setpolicy.pushcache = (void *) push_buffer;
-    op.u.setpolicy.pushcache_size = ntohl(bin_pol->len);
-    ret = do_acm_op(xc_handle, &op);
-
-    if (ret)
-        printf("ERROR setting policy. Use 'xm dmesg' to see details.\n");
-    else
-        printf("Successfully changed policy.\n");
-
-    return ret;
-}
-
 /******************************* get policy ******************************/
 
 #define PULL_CACHE_SIZE                8192
@@ -602,7 +427,6 @@
 void usage(char *progname)
 {
     printf("Use: %s \n"
-           "\t setpolicy\n"
            "\t getpolicy\n"
            "\t dumpstats\n"
            "\t loadpolicy <binary policy file>\n", progname);
@@ -612,7 +436,7 @@
 int main(int argc, char **argv)
 {
 
-    int acm_cmd_fd, ret;
+    int acm_cmd_fd, ret = 0;
 
     if (argc < 2)
         usage(argv[0]);
@@ -623,12 +447,7 @@
         exit(-1);
     }
 
-    if (!strcmp(argv[1], "setpolicy"))
-    {
-        if (argc != 2)
-            usage(argv[0]);
-        ret = acm_domain_setpolicy(acm_cmd_fd);
-    } else if (!strcmp(argv[1], "getpolicy")) {
+    if (!strcmp(argv[1], "getpolicy")) {
         if (argc != 2)
             usage(argv[0]);
         ret = acm_domain_getpolicy(acm_cmd_fd);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/sv/inc/style.css
--- a/tools/sv/inc/style.css    Wed Aug 24 02:43:18 2005
+++ b/tools/sv/inc/style.css    Thu Aug 25 22:53:20 2005
@@ -1,32 +1,95 @@
+.small  {
+       font-size: 10px
+}
 
-P       {font-family: verdana, arial; font-size: 12px; color: black}
-.small  {font-size: 10px}
+TD.domainInfo     { 
+       font-size: 10px; 
+       color: black
+}
 
-TD.domainInfo     {font-family: verdana, arial; font-size: 10px; color: black}
-TD.domainInfoHead {font-family: verdana, arial; font-size: 10px; color: white; 
font-face: bold}
+TD.domainInfoHead {
+       font-size: 10px; 
+       color: white; 
+       font-face: bold
+}
 
 TD.domainInfoHead {background-color: black}
 TR.domainInfoOdd  {background-color: white}
 TR.domainInfoEven {background-color: lightgrey}
 
 body { 
-       width: 670px;
-       margin: 0px;
-       padding: 0px;
-       background-color: #fff;
-       background-image: url(../images/orb_02.jpg);
-       background-repeat: repeat-y;
-       background-position: left top;
-       font-family: Arial, Helvetica, sans-serif;
-       font-weight: bold;
-       color: #333333;
-       letter-spacing: 0px;
-       scrollbar-base-color: #333333;
-       scrollbar-track-color: #666666;
-       scrollbar-face-color: #fff;
-       
-       
-       }
-        
-.button (cursor:hand)
-       
+       margin:         0px;
+       padding:        0px;
+       font-family:    Arial, Helvetica, sans-serif;
+       font-size:      12px;
+       color:          #000000;
+}
+
+div#menu {
+        position:       absolute;
+        left:           10px;
+        top:            10px;
+        width:          160px;
+        padding:        10px;
+        border:         0px solid black;
+        text-align:     center;
+}
+
+div#main {
+        position:       absolute;
+        left:           200px;
+        top:            10px;
+        right:          10px;
+        padding:        10px;
+        border:         0px solid black;
+}
+
+div.button {
+        float:          right;
+        margin:         10px 0px 0px 10px;
+        padding:        5px;
+        text-align:     center;
+        border:         1px solid black;
+        background:     gray;
+       cursor:         hand;
+}
+
+div.tabButton {
+       position:       relative;
+       top:            0px;
+        float:          left;
+        margin:         0px 10px -1px 0px;
+        padding:        5px;
+        text-align:     center;
+        border:         1px solid black;
+        background:     gray;
+       cursor:         hand;
+}
+
+div.tabButton#activeTab {
+       top:            0px;
+        background:     white;
+        border-color:   black black white black;
+}
+
+div.button:hover, div.tabButton:hover {
+        background:     white;
+}
+
+div.button a, div.tabButton a {
+        font-size:      12px;
+       font-weight:    bold;
+}
+
+div.title {
+       float:          right;
+       font-size:      14px;
+       font-weight:    bold;
+}
+
+div.tab {
+        overflow:       auto;
+        clear:          both;
+        border:         1px solid black;
+        padding:        10px;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/sv/index.psp
--- a/tools/sv/index.psp        Wed Aug 24 02:43:18 2005
+++ b/tools/sv/index.psp        Thu Aug 25 22:53:20 2005
@@ -7,158 +7,29 @@
 for path in sys.path:
     if debug: req.write( path + "<br/>" )
 
-from xen.sv.HTMLBase import HTMLBase
-from xen.sv.DomList  import DomList
-from xen.sv.NodeInfo import NodeInfo
-from xen.sv.DomInfo  import DomInfo
-from xen.sv.CreateDomain import CreateDomain
-from xen.sv.MigrateDomain import MigrateDomain
-from xen.sv.SaveDomain import SaveDomain
-from xen.sv.RestoreDomain import RestoreDomain
-
-from xen.xend.XendClient import server
-
-from xen.sv.util import getVar
-
-# adapter to make this all work with mod_python
-# (c) Tom Wilkie 2005
-
-class TwistedAdapter:
-
-    def write( self, text ):
-        req.write( text )
-
-    class Args:
-
-        from mod_python.util import FieldStorage
-
-        fieldStorage = FieldStorage( req, True )
-
-        # return a list of values for the given key,
-        # or None if key not there
-        def get( self, var ):
-            retVar = self.fieldStorage.getlist( var )
-            if len( retVar ) == 0:
-                return None
-            else:
-                return retVar
-
-        # return a list of tuples, 
-        # (key, value) where value is a list of values
-       def items( self ):
-            result = [];
-            for key in self.fieldStorage.keys():
-               result.append( (key, self.fieldStorage.getlist( key ) ) )
-           return result
-
-    args = Args()
-
-    uri = req.unparsed_uri
-
-
-class Main( HTMLBase ):
-    
-    isLeaf = True
-
-    def __init__( self, urlWriter = None ):
-        self.modules = { "node": NodeInfo, 
-                         "list": DomList, 
-                         "info": DomInfo,
-                         "create": CreateDomain,
-                         "migrate" : MigrateDomain,
-                         "save" : SaveDomain,
-                         "restore" : RestoreDomain }
-
-        # ordered list of module menus to display
-        self.module_menus = [ "node", "create", "migrate", "save",
-                              "restore", "list" ]
-        HTMLBase.__init__(self)
-        
-    def render_POST( self, request ):
-    
-       #decide what module post'd the action
-                
-       args = getVar( 'args', request )
-
-        mod = getVar( 'mod', request )
-                
-        if mod in self.modules and args is None:
-            module = self.modules[ mod ]
-            #check module exists
-            if module:
-               module( self.mainUrlWriter ).perform( request )
-        else:
-            self.perform( request )     
-    
-        return self.render_GET( request )
-
-    #TODO: need to make this get the request uri automatically
-    def mainUrlWriter( self, module ):
-       def fun( f ):
-            return "index.psp?mod=%s%s" % ( module, f )
-        return fun    
-        
-    def write_BODY( self, request ):
-    
-        request.write( "\n<table style='border:0px solid black; background: 
url(images/orb_01.jpg) no-repeat' cellspacing='0' cellpadding='0' border='0' 
width='780px' height='536px'>\n" )
-        request.write( "<tr>\n" )
-        request.write( " <td width='15px'> </td>" )
-        request.write( " <td width='175px' align='center' valign'center'>" )
-        request.write( "  <table cellspacing='0' cellpadding='0' border='0' 
width='100%' height='100%'>" )
-        request.write( "   <tr><td height='140px' align='center' 
valign='bottom'><a href='http://www.cl.cam.ac.uk/Research/SRG/netos/xen/'>" )
-        request.write( "   <img src='images/xen.png' width='150' height='75' 
border='0'/></a><br/></td></tr>" )
-        request.write( "   <tr><td height='60px' align='center'><p 
class='small'>SV Web Interface<br/>(C) <a href='mailto:tw275@xxxxxxxxx'>Tom 
Wilkie</a> 2004</p></td></tr>")
-        request.write( "   <tr><td align='center' valign='top'>" )
-
-        for modName in self.module_menus:
-            self.modules[modName]( self.mainUrlWriter( modName ) ).write_MENU( 
request )
-        
-        request.write( "   </td></tr>" )
-        request.write( "  </table>" )
-        request.write( "  " )
-        request.write( " </td>\n" )
-        request.write( " <td width='15px'> </td>" )
-        request.write( " <td width='558px' align='left' valign='top'>" )
-        request.write( "  <table cellspacing='0' cellpadding='0' border='0' 
width='100%' height='100%'>" )
-        request.write( "   <tr><td height='20px'></td></tr>" )
-        request.write( "   <tr><td align='center' valign='top'>" )
-        
-        modName = getVar('mod', request)
-        
-        if modName not in self.modules:
-            request.write( '<p>Please select a module</p>' )
-        else:
-            module = self.modules[ modName ]
-            if module:
-               module( self.mainUrlWriter( modName ) ).write_BODY( request )  
-            else:
-               request.write( '<p>Invalid module. Please select another</p>' )
-    
-        request.write( "   </td></tr>" )
-        request.write( "  </table>" )
-        request.write( " </td>\n" )
-        request.write( " <td width='17px'> </td>" )
-        request.write( "</tr>\n" )
-        
-        request.write( "</table>\n" )
-        
-                
-    def op_destroy( self, request ):
-       dom = getVar( 'dom', request )
-        if not dom is None and dom != "0":
-            server.xend_domain_destroy( int( dom ), "halt" ) 
-                 
-    def op_pause( self, request ):
-       dom = getVar( 'dom', request )
-        if not dom is None and dom != "0":
-            server.xend_domain_pause( int( dom ) )      
-    
-    def op_unpause( self, request ):
-       dom = getVar( 'dom', request )
-        if not dom is None and dom != "0":
-            server.xend_domain_unpause( int( dom ) )      
+from xen.sv.Main import Main, TwistedAdapter
 
 main = Main()
-
-main.render_POST( TwistedAdapter() )
+request = TwistedAdapter( req )
+main.do_POST( request )
 %>
+<html>
+<head>
+       <title>XenSV</title>
+       <script src="inc/script.js"></script>
+       <link rel="StyleSheet" type="text/css" href="inc/style.css">
+</head>
+<body>
+    <form method="post" action="<%=request.uri%>">
+        <div id="menu">
+               <img src="images/xen.png">
+               <% main.render_menu( request ) %>
+       </div>
+       <div id="main">
+               <% main.render_main( request ) %>
+       </div>
+       <input type="hidden" name="op" value="">
+        <input type="hidden" name="args" value="">
+    </form>
+</body>
+</html>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcs/Makefile
--- a/tools/xcs/Makefile        Wed Aug 24 02:43:18 2005
+++ b/tools/xcs/Makefile        Thu Aug 25 22:53:20 2005
@@ -34,10 +34,10 @@
 
 xcsdump: xcsdump.c dump.c
        $(CC) $(CFLAGS) -o xcsdump xcsdump.c -L$(XEN_LIBXC) \
-              ctrl_interface.c evtchn.c dump.c -lxc
+              ctrl_interface.c evtchn.c dump.c -lxenctrl
 
 $(BIN): $(OBJS)
-       $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -lxc 
+       $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -lxenctrl 
 
 $(OBJS): $(HDRS)
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcs/dump.h
--- a/tools/xcs/dump.h  Wed Aug 24 02:43:18 2005
+++ b/tools/xcs/dump.h  Thu Aug 25 22:53:20 2005
@@ -20,7 +20,7 @@
 #define XENCTLD_ERROR_H
 
 #include <stdint.h>
-#include <xc.h>
+#include <xenctrl.h>
 #include <xen/io/domain_controller.h>
 
 void dump_msg(const control_msg_t *msg, uint64_t flags);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcs/xcs.h
--- a/tools/xcs/xcs.h   Wed Aug 24 02:43:18 2005
+++ b/tools/xcs/xcs.h   Thu Aug 25 22:53:20 2005
@@ -11,7 +11,7 @@
 #define __XCS_H__
 
 #include <pthread.h>
-#include <xc.h>
+#include <xenctrl.h>
 #include <xen/xen.h>
 #include <xen/io/domain_controller.h>
 #include <xen/linux/privcmd.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcs/xcsdump.c
--- a/tools/xcs/xcsdump.c       Wed Aug 24 02:43:18 2005
+++ b/tools/xcs/xcsdump.c       Thu Aug 25 22:53:20 2005
@@ -16,7 +16,7 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 #include <ctype.h>
-#include <xc.h>
+#include <xenctrl.h>
 #include <xen/xen.h>
 #include <xen/io/domain_controller.h>
 #include <getopt.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcutils/Makefile
--- a/tools/xcutils/Makefile    Wed Aug 24 02:43:18 2005
+++ b/tools/xcutils/Makefile    Thu Aug 25 22:53:20 2005
@@ -30,7 +30,7 @@
 
 PROGRAMS               = xc_restore xc_save
 
-LDLIBS                 = -L$(XEN_LIBXC) -lxc
+LDLIBS                 = -L$(XEN_LIBXC) -lxenguest -lxenctrl
 
 .PHONY: all
 all: build
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c        Wed Aug 24 02:43:18 2005
+++ b/tools/xcutils/xc_restore.c        Thu Aug 25 22:53:20 2005
@@ -7,24 +7,33 @@
  *
  */
 
+#include <err.h>
 #include <stdlib.h>
+#include <stdint.h>
 #include <stdio.h>
-#include <err.h>
 
-#include <xc.h>
+#include <xenguest.h>
 
 int
 main(int argc, char **argv)
 {
-    unsigned int xc_fd, io_fd, domid, nr_pfns;
+    unsigned int xc_fd, io_fd, domid, nr_pfns, evtchn;
+    int ret;
+    unsigned long mfn;
 
-    if (argc != 5)
-       errx(1, "usage: %s xcfd iofd domid nr_pfns", argv[0]);
+    if (argc != 6)
+       errx(1, "usage: %s xcfd iofd domid nr_pfns evtchn", argv[0]);
 
     xc_fd = atoi(argv[1]);
     io_fd = atoi(argv[2]);
     domid = atoi(argv[3]);
     nr_pfns = atoi(argv[4]);
+    evtchn = atoi(argv[5]);
 
-    return xc_linux_restore(xc_fd, io_fd, domid, nr_pfns);
+    ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, evtchn, &mfn);
+    if (ret == 0) {
+       printf("store-mfn %li\n", mfn);
+       fflush(stdout);
+    }
+    return ret;
 }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c   Wed Aug 24 02:43:18 2005
+++ b/tools/xcutils/xc_save.c   Thu Aug 25 22:53:20 2005
@@ -7,11 +7,12 @@
  *
  */
 
+#include <err.h>
 #include <stdlib.h>
+#include <stdint.h>
 #include <stdio.h>
-#include <err.h>
 
-#include <xc.h>
+#include <xenguest.h>
 
 int
 main(int argc, char **argv)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/Makefile   Thu Aug 25 22:53:20 2005
@@ -1,6 +1,5 @@
 XEN_ROOT=../..
 include $(XEN_ROOT)/tools/Rules.mk
-LIBDIR = lib
 XEN_LIBXC          = $(XEN_ROOT)/tools/libxc
 
 INSTALL         = install
@@ -25,7 +24,7 @@
 TESTFLAGS= -DTESTING
 TESTENV  = XENSTORED_ROOTDIR=$(TESTDIR) XENSTORED_RUNDIR=$(TESTDIR)
 
-all: xen xenstored libxenstore.a libxenstore-pic.a
+all: xen xenstored libxenstore.so
 
 testcode: xen xs_test xenstored_test xs_random xs_dom0_test
 
@@ -33,7 +32,7 @@
        ln -sf $(XEN_ROOT)/xen/include/public $@
 
 xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o 
xenstored_transaction.o xs_lib.o talloc.o utils.o
-       $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxc -o $@
+       $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
 
 xenstored_test: xenstored_core_test.o xenstored_watch_test.o 
xenstored_domain_test.o xenstored_transaction_test.o xs_lib.o talloc_test.o 
fake_libxc.o utils.o
        $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
@@ -41,9 +40,9 @@
 xs_test: xs_test.o xs_lib.o utils.o
 xs_random: xs_random.o xs_test_lib.o xs_lib.o talloc.o utils.o
 xs_stress: xs_stress.o xs_test_lib.o xs_lib.o talloc.o utils.o
-xs_watch_stress: xs_watch_stress.o xs_test_lib.o xs_lib.o talloc.o utils.o
+xs_crashme: xs_crashme.o xs_lib.o talloc.o utils.o
 
-xs_test.o xs_stress.o xs_watch_stress.o xenstored_core_test.o 
xenstored_watch_test.o xenstored_transaction_test.o xenstored_domain_test.o 
xs_random.o xs_test_lib.o talloc_test.o fake_libxc.o: CFLAGS=$(BASECFLAGS) 
$(TESTFLAGS)
+xs_test.o xs_stress.o xenstored_core_test.o xenstored_watch_test.o 
xenstored_transaction_test.o xenstored_domain_test.o xs_random.o xs_test_lib.o 
talloc_test.o fake_libxc.o xs_crashme.o: CFLAGS=$(BASECFLAGS) $(TESTFLAGS)
 
 xenstored_%_test.o: xenstored_%.c
        $(COMPILE.c) -o $@ $<
@@ -54,25 +53,30 @@
 talloc_test.o: talloc.c
        $(COMPILE.c) -o $@ $<
 
-LIB_OBJS     := xs.o xs_lib.o
-
-LIB_OBJS_A   := $(patsubst %.o,libxenstore.a(%.o),$(LIB_OBJS))
-LIB_OBJS_PIC := $(patsubst %.o,libxenstore-pic.a(%.opic),$(LIB_OBJS))
-
-libxenstore.a: $(LIB_OBJS_A)
-
-libxenstore-pic.a: $(LIB_OBJS_PIC)
+libxenstore.so: xs.opic xs_lib.opic
+       $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenstore.so -shared -o $@ 
$^
 
 clean: testsuite-clean
-       rm -f *.o *.opic *.a
-       rm -f xen xenstored xs_random xs_stress xs_watch_stress
+       rm -f *.o *.opic *.so
+       rm -f xen xenstored xs_random xs_stress xs_crashme
        rm -f xs_test xenstored_test xs_dom0_test
-       -$(RM) $(PROG_DEP)
+       $(RM) $(PROG_DEP)
 
-check: testsuite-run randomcheck stresstest
+print-dir:
+       @echo -n tools/xenstore: 
+
+print-end:
+       @echo
+
+check: print-dir testsuite-fast randomcheck-fast print-end
+
+fullcheck: testsuite-run randomcheck stresstest
 
 testsuite-run: xen xenstored_test xs_test
-       $(TESTENV) testsuite/test.sh
+       $(TESTENV) testsuite/test.sh && echo
+
+testsuite-fast: xen xenstored_test xs_test
+       @$(TESTENV) testsuite/test.sh --fast
 
 testsuite-clean:
        rm -rf $(TESTDIR)
@@ -81,18 +85,25 @@
 # fail.
 RANDSEED=$(shell date +%s)
 randomcheck: xs_random xenstored_test
-       $(TESTENV) ./xs_random --simple --fast /tmp/xs_random 200000 $(RANDSEED)
-       $(TESTENV) ./xs_random --fast /tmp/xs_random 100000 $(RANDSEED)
+       $(TESTENV) ./xs_random --simple --fast /tmp/xs_random 200000 
$(RANDSEED) && echo
+       $(TESTENV) ./xs_random --fast /tmp/xs_random 100000 $(RANDSEED) && echo
        $(TESTENV) ./xs_random --fail /tmp/xs_random 10000 $(RANDSEED)
 
-stresstest: xs_stress xs_watch_stress xenstored_test
+crashme:  xs_crashme xenstored_test
+       rm -rf $(TESTDIR)/store $(TESTDIR)/transactions /tmp/xs_crashme.vglog* 
/tmp/trace
+       export $(TESTENV); ./xs_crashme 5000 $(RANDSEED) 2>/dev/null
+       if [ -n "`cat /tmp/xs_crashme.vglog*`" ]; then echo Valgrind 
complained; cat /tmp/xs_crashme.vglog*; exit 1; fi
+       rm -rf $(TESTDIR)/store $(TESTDIR)/transactions /tmp/xs_crashme.vglog* 
/tmp/trace
+
+randomcheck-fast: xs_random xenstored_test
+       @$(TESTENV) ./xs_random --fast /tmp/xs_random 2000 $(RANDSEED)
+
+stresstest: xs_stress xenstored_test
        rm -rf $(TESTDIR)/store $(TESTDIR)/transactions
        export $(TESTENV); PID=`./xenstored_test --output-pid 
--trace-file=/tmp/trace`; ./xs_stress 5000; ret=$$?; kill $$PID; exit $$ret
-       rm -rf $(TESTDIR)/store $(TESTDIR)/transactions
-       export $(TESTENV); PID=`./xenstored_test --output-pid`; 
./xs_watch_stress; ret=$$?; kill $$PID; exit $$ret
 
 xs_dom0_test: xs_dom0_test.o utils.o
-       $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxc -o $@
+       $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
 
 TAGS:
        etags `find . -name '*.[ch]'`
@@ -100,15 +111,14 @@
 tarball: clean
        cd .. && tar -c -j -v -h -f xenstore.tar.bz2 xenstore/
 
-install: xenstored libxenstore.a libxenstore-pic.a
+install: xenstored libxenstore.so
        $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored
        $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored
        $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
        $(INSTALL_DIR) -p $(DESTDIR)/usr/include
        $(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin
        $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_DATA) libxenstore.a $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_DATA) libxenstore-pic.a $(DESTDIR)/usr/$(LIBDIR)
+       $(INSTALL_DATA) libxenstore.so $(DESTDIR)/usr/$(LIBDIR)
        $(INSTALL_DATA) xs.h $(DESTDIR)/usr/include
        $(INSTALL_DATA) xs_lib.h $(DESTDIR)/usr/include
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/test.sh
--- a/tools/xenstore/testsuite/test.sh  Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/test.sh  Thu Aug 25 22:53:20 2005
@@ -7,20 +7,20 @@
 {
     rm -rf $XENSTORED_ROOTDIR
     mkdir $XENSTORED_ROOTDIR
-# Weird failures with this.
-    if type valgrind >/dev/null 2>&1; then
-       valgrind -q --logfile-fd=3 ./xenstored_test --output-pid 
--trace-file=testsuite/tmp/trace --no-fork 3>testsuite/tmp/vgout > /tmp/pid 2> 
testsuite/tmp/xenstored_errors &
+    if [ $VALGRIND -eq 1 ]; then
+       valgrind --suppressions=testsuite/vg-suppressions -q ./xenstored_test 
--output-pid --trace-file=testsuite/tmp/trace --no-fork > /tmp/pid 2> 
testsuite/tmp/xenstored_errors &
        while [ ! -s /tmp/pid ]; do sleep 0; done
        PID=`cat /tmp/pid`
        rm /tmp/pid
     else
-       PID=`./xenstored_test --output-pid`
+       # We don't get error messages from this, though. 
+       PID=`./xenstored_test --output-pid --trace-file=testsuite/tmp/trace`
     fi
-    if sh -e $2 $1; then
-       if [ -s testsuite/tmp/vgout ]; then
+    if ./xs_test $2 $1; then
+       if [ -s testsuite/tmp/xenstored_errors ]; then
            kill $PID
-           echo VALGRIND errors:
-           cat testsuite/tmp/vgout
+           echo Errors:
+           cat testsuite/tmp/xenstored_errors
            return 1
        fi
        echo shutdown | ./xs_test
@@ -33,15 +33,29 @@
     fi
 }
 
+if [ x$1 = x--fast ]; then
+    VALGRIND=0
+    SLOWTESTS=""
+    shift
+else
+    if type valgrind >/dev/null 2>&1; then
+       VALGRIND=1
+    else
+       echo "WARNING: valgrind not available" >&2
+       VALGRIND=0
+    fi
+    SLOWTESTS=testsuite/[0-9]*.slowtest
+fi
+
 MATCH=${1:-"*"}
-for f in testsuite/[0-9]*.sh; do
+for f in testsuite/[0-9]*.test $SLOWTESTS; do
     case `basename $f` in $MATCH) RUN=1;; esac
     [ -n "$RUN" ] || continue
-    if run_test $f; then
-       echo Test $f passed...
+
+    if run_test $f -x >/tmp/out; then
+       echo -n .
     else
-       echo Test $f failed, running verbosely...
-       run_test $f -x || true
+       cat /tmp/out
        # That will have filled the screen, repeat message.
        echo Test $f failed
        exit 1
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/utils.c
--- a/tools/xenstore/utils.c    Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/utils.c    Thu Aug 25 22:53:20 2005
@@ -80,30 +80,6 @@
        barf("malloc of %zu failed", size);
 }
 
-/* Stevens. */
-void daemonize(void)
-{
-       pid_t pid;
-
-       /* Separate from our parent via fork, so init inherits us. */
-       if ((pid = fork()) < 0)
-               barf_perror("Failed to fork daemon");
-       if (pid != 0)
-               exit(0);
-
-       close(STDIN_FILENO);
-       close(STDOUT_FILENO);
-       close(STDERR_FILENO);
-
-       /* Session leader so ^C doesn't whack us. */
-       setsid();
-       /* Move off any mount points we might be in. */
-       chdir("/");
-       /* Discard our parent's old-fashioned umask prejudices. */
-       umask(0);
-}
-
-
 /* This version adds one byte (for nul term) */
 void *grab_file(const char *filename, unsigned long *size)
 {
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/utils.h
--- a/tools/xenstore/utils.h    Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/utils.h    Thu Aug 25 22:53:20 2005
@@ -40,9 +40,6 @@
 void *grab_file(const char *filename, unsigned long *size);
 void release_file(void *data, unsigned long size);
 
-/* For writing daemons, based on Stevens. */
-void daemonize(void);
-
 /* Signal handling: returns fd to listen on. */
 int signal_to_fd(int signal);
 void close_signal(int fd);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored.h
--- a/tools/xenstore/xenstored.h        Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored.h        Thu Aug 25 22:53:20 2005
@@ -1,21 +1,29 @@
-/* 
-    Simple prototyle Xen Store Daemon providing simple tree-like database.
-    Copyright (C) 2005 Rusty Russell IBM Corporation
+/*
+ * Simple prototyle Xen Store Daemon providing simple tree-like database.
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
 #ifndef _XENSTORED_H
 #define _XENSTORED_H
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored_core.c   Thu Aug 25 22:53:20 2005
@@ -252,6 +252,7 @@
        int ret;
        struct buffered_data *out = conn->out;
 
+       assert(conn->state != BLOCKED);
        if (out->inhdr) {
                if (verbose)
                        xprintf("Writing msg %s (%s) out to %p\n",
@@ -289,6 +290,10 @@
        talloc_free(out);
 
        queue_next_event(conn);
+
+       /* No longer busy? */
+       if (!conn->out)
+               conn->state = OK;
        return true;
 }
 
@@ -418,14 +423,24 @@
        return node_dir_inside_transaction(trans, node);
 }
 
+static char *datafile(const char *dir)
+{
+       return talloc_asprintf(dir, "%s/.data", dir);
+}
+
 static char *node_datafile(struct transaction *trans, const char *node)
 {
-       return talloc_asprintf(node, "%s/.data", node_dir(trans, node));
+       return datafile(node_dir(trans, node));
+}
+
+static char *permfile(const char *dir)
+{
+       return talloc_asprintf(dir, "%s/.perms", dir);
 }
 
 static char *node_permfile(struct transaction *trans, const char *node)
 {
-       return talloc_asprintf(node, "%s/.perms", node_dir(trans, node));
+       return permfile(node_dir(trans, node));
 }
 
 struct buffered_data *new_buffer(void *ctx)
@@ -492,6 +507,8 @@
                conn->waiting_reply = bdata;
        } else
                conn->out = bdata;
+       assert(conn->state != BLOCKED);
+       conn->state = BUSY;
 }
 
 /* Some routines (write, mkdir, etc) just need a non-error return */
@@ -504,11 +521,13 @@
 {
        unsigned int i;
 
-       for (i = 0; error != xsd_errors[i].errnum; i++)
-               if (i == ARRAY_SIZE(xsd_errors) - 1)
-                       corrupt(conn, "Unknown error %i (%s)", error,
-                               strerror(error));
-
+       for (i = 0; error != xsd_errors[i].errnum; i++) {
+               if (i == ARRAY_SIZE(xsd_errors) - 1) {
+                       eprintf("xenstored: error %i untranslatable", error);
+                       i = 0;  /* EINVAL */
+                       break;
+               }
+       }
        send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
                          strlen(xsd_errors[i].errstring) + 1);
 }
@@ -542,21 +561,20 @@
 /* We expect one arg in the input: return NULL otherwise. */
 static const char *onearg(struct buffered_data *in)
 {
-       if (get_string(in, 0) != in->used)
+       if (!in->used || get_string(in, 0) != in->used)
                return NULL;
        return in->buffer;
 }
 
 /* If it fails, returns NULL and sets errno. */
-static struct xs_permissions *get_perms(struct transaction *transaction,
-                                       const char *node, unsigned int *num)
+static struct xs_permissions *get_perms(const char *dir, unsigned int *num)
 {
        unsigned int size;
        char *strings;
        struct xs_permissions *ret;
        int *fd;
 
-       fd = talloc_open(node_permfile(transaction, node), O_RDONLY, 0);
+       fd = talloc_open(permfile(dir), O_RDONLY, 0);
        if (!fd)
                return NULL;
        strings = read_all(fd, &size);
@@ -564,14 +582,14 @@
                return NULL;
 
        *num = xs_count_strings(strings, size);
-       ret = talloc_array(node, struct xs_permissions, *num);
+       ret = talloc_array(dir, struct xs_permissions, *num);
        if (!xs_strings_to_perms(ret, *num, strings))
-               corrupt(NULL, "Permissions corrupt for %s", node);
+               corrupt(NULL, "Permissions corrupt for %s", dir);
 
        return ret;
 }
 
-static char *perms_to_strings(const char *node,
+static char *perms_to_strings(const void *ctx,
                              struct xs_permissions *perms, unsigned int num,
                              unsigned int *len)
 {
@@ -583,7 +601,7 @@
                if (!xs_perm_to_string(&perms[i], buffer))
                        return NULL;
 
-               strings = talloc_realloc(node, strings, char,
+               strings = talloc_realloc(ctx, strings, char,
                                         *len + strlen(buffer) + 1);
                strcpy(strings + *len, buffer);
                *len += strlen(buffer) + 1;
@@ -616,16 +634,23 @@
        return 0;
 }
 
+/* Create a self-destructing temporary path */
+static char *temppath(const char *path)
+{
+       char *tmppath = talloc_asprintf(path, "%s.tmp", path);
+       talloc_set_destructor(tmppath, destroy_path);
+       return tmppath;
+}
+
 /* Create a self-destructing temporary file */
 static char *tempfile(const char *path, void *contents, unsigned int len)
 {
        int *fd;
-       char *tmppath = talloc_asprintf(path, "%s.tmp", path);
+       char *tmppath = temppath(path);
 
        fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640);
        if (!fd)
                return NULL;
-       talloc_set_destructor(tmppath, destroy_path);
        if (!xs_write_all(*fd, contents, len))
                return NULL;
 
@@ -705,44 +730,50 @@
 
        /* Owners and tools get it all... */
        if (!id || perms[0].id == id)
-               return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_CREATE|XS_PERM_OWNER;
+               return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
 
        for (i = 1; i < num; i++)
                if (perms[i].id == id)
                        return perms[i].perms;
 
        return perms[0].perms;
+}
+
+/* What do parents say? */
+static enum xs_perm_type ask_parents(struct connection *conn,
+                                    const char *node)
+{
+       struct xs_permissions *perms;
+       unsigned int num;
+
+       do {
+               node = get_parent(node);
+               perms = get_perms(node_dir(conn->transaction, node), &num);
+               if (perms)
+                       break;
+       } while (!streq(node, "/"));
+
+       /* No permission at root?  We're in trouble. */
+       if (!perms)
+               corrupt(conn, "No permissions file at root");
+
+       return perm_for_id(conn->id, perms, num);
 }
 
 /* We have a weird permissions system.  You can allow someone into a
  * specific node without allowing it in the parents.  If it's going to
  * fail, however, we don't want the errno to indicate any information
  * about the node. */
-static int check_with_parents(struct connection *conn, const char *node,
+static int errno_from_parents(struct connection *conn, const char *node,
                              int errnum)
 {
-       struct xs_permissions *perms;
-       unsigned int num;
-
        /* We always tell them about memory failures. */
        if (errnum == ENOMEM)
                return errnum;
 
-       do {
-               node = get_parent(node);
-               perms = get_perms(conn->transaction, node, &num);
-               if (perms)
-                       break;
-       } while (!streq(node, "/"));
-
-       /* No permission at root?  We're in trouble. */
-       if (!perms)
-               corrupt(conn, "No permissions file at root");
-
-       if (!(perm_for_id(conn->id, perms, num) & XS_PERM_READ))
-               return EACCES;
-
-       return errnum;
+       if (ask_parents(conn, node) & XS_PERM_READ)
+               return errnum;
+       return EACCES;
 }
 
 char *canonicalize(struct connection *conn, const char *node)
@@ -773,31 +804,33 @@
                return false;
        }
 
-       perms = get_perms(conn->transaction, node, &num);
-       /* No permissions.  If we want to create it and
-        * it doesn't exist, check parent directory. */
-       if (!perms && errno == ENOENT && (perm & XS_PERM_CREATE)) {
-               char *parent = get_parent(node);
-               if (!parent)
-                       return false;
-
-               perms = get_perms(conn->transaction, parent, &num);
-       }
-       if (!perms) {
-               errno = check_with_parents(conn, node, errno);
+       perms = get_perms(node_dir(conn->transaction, node), &num);
+
+       if (perms) {
+               if (perm_for_id(conn->id, perms, num) & perm)
+                       return true;
+               errno = EACCES;
                return false;
        }
 
-       if (perm_for_id(conn->id, perms, num) & perm)
-               return true;
-
-       errno = check_with_parents(conn, node, EACCES);
+       /* If it's OK not to exist, we consult parents. */
+       if (errno == ENOENT && (perm & XS_PERM_ENOENT_OK)) {
+               if (ask_parents(conn, node) & perm)
+                       return true;
+               /* Parents say they should not know. */
+               errno = EACCES;
+               return false;
+       }
+
+       /* They might not have permission to even *see* this node, in
+        * which case we return EACCES even if it's ENOENT or EIO. */
+       errno = errno_from_parents(conn, node, errno);
        return false;
 }
 
 static void send_directory(struct connection *conn, const char *node)
 {
-       char *path, *reply = talloc_strdup(node, "");
+       char *path, *reply;
        unsigned int reply_len = 0;
        DIR **dir;
        struct dirent *dirent;
@@ -815,6 +848,7 @@
                return;
        }
 
+       reply = talloc_strdup(node, "");
        while ((dirent = readdir(*dir)) != NULL) {
                int len = strlen(dirent->d_name) + 1;
 
@@ -857,44 +891,64 @@
                send_reply(conn, XS_READ, value, size);
 }
 
-/* Create a new directory.  Optionally put data in it (if data != NULL) */
-static bool new_directory(struct connection *conn,
-                         const char *node, void *data, unsigned int datalen)
+/* Commit this directory, eg. comitting a/b.tmp/c causes a/b.tmp -> a.b */
+static bool commit_dir(char *dir)
+{
+       char *dot, *slash, *dest;
+
+       dot = strrchr(dir, '.');
+       slash = strchr(dot, '/');
+       if (slash)
+               *slash = '\0';
+
+       dest = talloc_asprintf(dir, "%.*s", dot - dir, dir);
+       return rename(dir, dest) == 0;
+}
+
+/* Create a temporary directory.  Put data in it (if data != NULL) */
+static char *tempdir(struct connection *conn,
+                    const char *node, void *data, unsigned int datalen)
 {
        struct xs_permissions *perms;
        char *permstr;
        unsigned int num, len;
        int *fd;
-       char *dir = node_dir(conn->transaction, node);
-
-       if (mkdir(dir, 0750) != 0)
-               return false;
-
-       /* Set destructor so we clean up if neccesary. */
-       talloc_set_destructor(dir, destroy_path);
-
-       perms = get_perms(conn->transaction, get_parent(node), &num);
+       char *dir;
+
+       dir = temppath(node_dir(conn->transaction, node));
+       if (mkdir(dir, 0750) != 0) {
+               if (errno != ENOENT)
+                       return NULL;
+
+               dir = tempdir(conn, get_parent(node), NULL, 0);
+               if (!dir)
+                       return NULL;
+
+               dir = talloc_asprintf(dir, "%s%s", dir, strrchr(node, '/'));
+               if (mkdir(dir, 0750) != 0)
+                       return NULL;
+               talloc_set_destructor(dir, destroy_path);
+       }
+
+       perms = get_perms(get_parent(dir), &num);
+       assert(perms);
        /* Domains own what they create. */
        if (conn->id)
                perms->id = conn->id;
 
        permstr = perms_to_strings(dir, perms, num, &len);
-       fd = talloc_open(node_permfile(conn->transaction, node),
-                        O_WRONLY|O_CREAT|O_EXCL, 0640);
+       fd = talloc_open(permfile(dir), O_WRONLY|O_CREAT|O_EXCL, 0640);
        if (!fd || !xs_write_all(*fd, permstr, len))
-               return false;
+               return NULL;
 
        if (data) {
-               char *datapath = node_datafile(conn->transaction, node);
+               char *datapath = datafile(dir);
 
                fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640);
                if (!fd || !xs_write_all(*fd, data, datalen))
-                       return false;
-       }
-
-       /* Finished! */
-       talloc_set_destructor(dir, NULL);
-       return true;
+                       return NULL;
+       }
+       return dir;
 }
 
 /* path, flags, data... */
@@ -913,8 +967,7 @@
        }
 
        node = canonicalize(conn, vec[0]);
-       if (/*suppress error on write outside transaction*/ 0 &&
-           !within_transaction(conn->transaction, node)) {
+       if (!within_transaction(conn->transaction, node)) {
                send_error(conn, EROFS);
                return;
        }
@@ -928,9 +981,9 @@
        if (streq(vec[1], XS_WRITE_NONE))
                mode = XS_PERM_WRITE;
        else if (streq(vec[1], XS_WRITE_CREATE))
-               mode = XS_PERM_WRITE|XS_PERM_CREATE;
+               mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
        else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
-               mode = XS_PERM_WRITE|XS_PERM_CREATE;
+               mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
        else {
                send_error(conn, EINVAL);
                return;
@@ -942,6 +995,8 @@
        }
 
        if (lstat(node_dir(conn->transaction, node), &st) != 0) {
+               char *dir;
+
                /* Does not exist... */
                if (errno != ENOENT) {
                        send_error(conn, errno);
@@ -949,15 +1004,17 @@
                }
 
                /* Not going to create it? */
-               if (!(mode & XS_PERM_CREATE)) {
+               if (streq(vec[1], XS_WRITE_NONE)) {
                        send_error(conn, ENOENT);
                        return;
                }
 
-               if (!new_directory(conn, node, in->buffer + offset, datalen)) {
+               dir = tempdir(conn, node, in->buffer + offset, datalen);
+               if (!dir || !commit_dir(dir)) {
                        send_error(conn, errno);
                        return;
                }
+               
        } else {
                /* Exists... */
                if (streq(vec[1], XS_WRITE_CREATE_EXCL)) {
@@ -982,8 +1039,11 @@
 
 static void do_mkdir(struct connection *conn, const char *node)
 {
+       char *dir;
+       struct stat st;
+
        node = canonicalize(conn, node);
-       if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_CREATE)) {
+       if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_ENOENT_OK)) {
                send_error(conn, errno);
                return;
        }
@@ -996,7 +1056,14 @@
        if (transaction_block(conn, node))
                return;
 
-       if (!new_directory(conn, node, NULL, 0)) {
+       /* Must not already exist. */
+       if (lstat(node_dir(conn->transaction, node), &st) == 0) {
+               send_error(conn, EEXIST);
+               return;
+       }
+
+       dir = tempdir(conn, node, NULL, 0);
+       if (!dir || !commit_dir(dir)) {
                send_error(conn, errno);
                return;
        }
@@ -1056,7 +1123,7 @@
                return;
        }
 
-       perms = get_perms(conn->transaction, node, &num);
+       perms = get_perms(node_dir(conn->transaction, node), &num);
        if (!perms) {
                send_error(conn, errno);
                return;
@@ -1072,7 +1139,7 @@
 static void do_set_perms(struct connection *conn, struct buffered_data *in)
 {
        unsigned int num;
-       char *node;
+       char *node, *permstr;
        struct xs_permissions *perms;
 
        num = xs_count_strings(in->buffer, in->used);
@@ -1083,7 +1150,7 @@
 
        /* First arg is node name. */
        node = canonicalize(conn, in->buffer);
-       in->buffer += strlen(in->buffer) + 1;
+       permstr = in->buffer + strlen(in->buffer) + 1;
        num--;
 
        if (!within_transaction(conn->transaction, node)) {
@@ -1101,7 +1168,7 @@
        }
 
        perms = talloc_array(node, struct xs_permissions, num);
-       if (!xs_strings_to_perms(perms, num, in->buffer)) {
+       if (!xs_strings_to_perms(perms, num, permstr)) {
                send_error(conn, errno);
                return;
        }
@@ -1270,8 +1337,10 @@
        talloc_free(in);
        talloc_set_fail_handler(NULL, NULL);
        if (talloc_total_blocks(NULL)
-           != talloc_total_blocks(talloc_autofree_context()) + 1)
+           != talloc_total_blocks(talloc_autofree_context()) + 1) {
                talloc_report_full(NULL, stderr);
+               abort();
+       }
 }
 
 /* Errors in reading or allocating here mean we get out of sync, so we
@@ -1295,8 +1364,10 @@
                        return;
 
                if (in->hdr.msg.len > PATH_MAX) {
+#ifndef TESTING
                        syslog(LOG_DAEMON, "Client tried to feed us %i",
                               in->hdr.msg.len);
+#endif
                        goto bad_client;
                }
 
@@ -1347,6 +1418,7 @@
                                consider_message(i);
                        }
                        break;
+               case BUSY:
                case OK:
                        break;
                }
@@ -1372,6 +1444,7 @@
        new->state = OK;
        new->blocked_by = NULL;
        new->out = new->waiting_reply = NULL;
+       new->waiting_for_ack = NULL;
        new->fd = -1;
        new->id = 0;
        new->domain = NULL;
@@ -1451,6 +1524,7 @@
                printf("    state = %s\n",
                       i->state == OK ? "OK"
                       : i->state == BLOCKED ? "BLOCKED"
+                      : i->state == BUSY ? "BUSY"
                       : "INVALID");
                if (i->id)
                        printf("    id = %i\n", i->id);
@@ -1516,19 +1590,59 @@
                            xs_daemon_transactions());
 }
 
+static void write_pidfile(const char *pidfile)
+{
+       char buf[100];
+       int len;
+       int fd;
+
+       fd = open(pidfile, O_RDWR | O_CREAT, 0600);
+       if (fd == -1)
+               barf_perror("Opening pid file %s", pidfile);
+
+       /* We exit silently if daemon already running. */
+       if (lockf(fd, F_TLOCK, 0) == -1)
+               exit(0);
+
+       len = sprintf(buf, "%d\n", getpid());
+       write(fd, buf, len);
+}
+
+/* Stevens. */
+static void daemonize(void)
+{
+       pid_t pid;
+
+       /* Separate from our parent via fork, so init inherits us. */
+       if ((pid = fork()) < 0)
+               barf_perror("Failed to fork daemon");
+       if (pid != 0)
+               exit(0);
+
+       /* Session leader so ^C doesn't whack us. */
+       setsid();
+       /* Move off any mount points we might be in. */
+       chdir("/");
+       /* Discard our parent's old-fashioned umask prejudices. */
+       umask(0);
+}
+
+
 static struct option options[] = { { "no-fork", 0, NULL, 'N' },
                                   { "verbose", 0, NULL, 'V' },
                                   { "output-pid", 0, NULL, 'P' },
                                   { "trace-file", 1, NULL, 'T' },
+                                  { "pid-file", 1, NULL, 'F' },
                                   { NULL, 0, NULL, 0 } };
 
 int main(int argc, char *argv[])
 {
-       int opt, *sock, *ro_sock, event_fd, max, tmpout;
+       int opt, *sock, *ro_sock, event_fd, max;
        struct sockaddr_un addr;
        fd_set inset, outset;
        bool dofork = true;
        bool outputpid = false;
+       const char *pidfile = NULL;
 
        while ((opt = getopt_long(argc, argv, "DVT:", options, NULL)) != -1) {
                switch (opt) {
@@ -1548,10 +1662,19 @@
                                            optarg);
                         write(tracefd, "\n***\n", strlen("\n***\n"));
                        break;
+               case 'F':
+                       pidfile = optarg;
                }
        }
        if (optind != argc)
                barf("%s: No arguments desired", argv[0]);
+
+       if (dofork) {
+               openlog("xenstored", 0, LOG_DAEMON);
+               daemonize();
+       }
+       if (pidfile)
+               write_pidfile(pidfile);
 
        talloc_enable_leak_report_full();
 
@@ -1599,19 +1722,17 @@
        /* Restore existing connections. */
        restore_existing_connections();
 
-       /* Debugging: daemonize() closes standard fds, so dup here. */
-       tmpout = dup(STDOUT_FILENO);
+       if (outputpid) {
+               printf("%i\n", getpid());
+               fflush(stdout);
+       }
+
+       /* close stdin/stdout now we're ready to accept connections */
        if (dofork) {
-               openlog("xenstored", 0, LOG_DAEMON);
-               daemonize();
-       }
-
-       if (outputpid) {
-               char buffer[20];
-               sprintf(buffer, "%i\n", getpid());
-               write(tmpout, buffer, strlen(buffer));
-       }
-       close(tmpout);
+               close(STDIN_FILENO);
+               close(STDOUT_FILENO);
+               close(STDERR_FILENO);
+       }
 
 #ifdef TESTING
        signal(SIGUSR1, stop_failtest);
@@ -1621,6 +1742,7 @@
        max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd);
 
        /* Main loop. */
+       /* FIXME: Rewrite so noone can starve. */
        for (;;) {
                struct connection *i;
                struct timeval *tvp = NULL, tv;
@@ -1665,10 +1787,22 @@
                        }
                }
 
-               /* Flush output for domain connections,  */
-               list_for_each_entry(i, &connections, list)
-                       if (i->domain && i->out)
+               /* Handle all possible I/O for domain connections. */
+       more:
+               list_for_each_entry(i, &connections, list) {
+                       if (!i->domain)
+                               continue;
+
+                       if (domain_can_read(i)) {
+                               handle_input(i);
+                               goto more;
+                       }
+
+                       if (domain_can_write(i)) {
                                handle_output(i);
+                               goto more;
+                       }
+               }
 
                if (tvp) {
                        check_transaction_timeout();
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored_core.h   Thu Aug 25 22:53:20 2005
@@ -51,6 +51,8 @@
 {
        /* Blocked by transaction. */
        BLOCKED,
+       /* Doing action, not listening */
+       BUSY,
        /* Completed */
        OK,
 };
@@ -65,7 +67,7 @@
        /* Who am I?  0 for socket connections. */
        domid_t id;
 
-       /* Blocked on transaction? */
+       /* Blocked on transaction?  Busy? */
        enum state state;
 
        /* Node we are waiting for (if state == BLOCKED) */
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored_domain.c Thu Aug 25 22:53:20 2005
@@ -227,32 +227,27 @@
        return NULL;
 }
 
+/* We scan all domains rather than use the information given here. */
 void handle_event(int event_fd)
 {
        u16 port;
-       struct domain *domain;
 
        if (read(event_fd, &port, sizeof(port)) != sizeof(port))
                barf_perror("Failed to read from event fd");
-
-       /* We have to handle *all* the data available before we ack:
-        * careful that handle_input/handle_output can destroy conn.
-        */
-       while ((domain = find_domain(port)) != NULL) {
-               if (domain->conn->state == OK
-                   && buffer_has_input(domain->input))
-                       handle_input(domain->conn);
-               else if (domain->conn->out
-                        && buffer_has_output_room(domain->output))
-                       handle_output(domain->conn);
-               else
-                       break;
-       }
-
 #ifndef TESTING
        if (write(event_fd, &port, sizeof(port)) != sizeof(port))
                barf_perror("Failed to write to event fd");
 #endif
+}
+
+bool domain_can_read(struct connection *conn)
+{
+       return conn->state == OK && buffer_has_input(conn->domain->input);
+}
+
+bool domain_can_write(struct connection *conn)
+{
+       return conn->out && buffer_has_output_room(conn->domain->output);
 }
 
 static struct domain *new_domain(void *context, domid_t domid,
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored_domain.h
--- a/tools/xenstore/xenstored_domain.h Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored_domain.h Thu Aug 25 22:53:20 2005
@@ -40,4 +40,8 @@
 /* Read existing connection information from store. */
 void restore_existing_connections(void);
 
+/* Can connection attached to domain read/write. */
+bool domain_can_read(struct connection *conn);
+bool domain_can_write(struct connection *conn);
+
 #endif /* _XENSTORED_DOMAIN_H */
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xenstored_watch.c
--- a/tools/xenstore/xenstored_watch.c  Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xenstored_watch.c  Thu Aug 25 22:53:20 2005
@@ -95,9 +95,18 @@
        return 0;
 }
 
-static void add_event(struct watch *watch, const char *node)
+static void add_event(struct connection *conn,
+                     struct watch *watch, const char *node)
 {
        struct watch_event *event;
+
+       /* Check read permission: no permission, no watch event.
+        * If it doesn't exist, we need permission to read parent.
+        */
+       if (!check_node_perms(conn, node, XS_PERM_READ|XS_PERM_ENOENT_OK)) {
+               fprintf(stderr, "No permission for %s\n", node);
+               return;
+       }
 
        if (watch->relative_path) {
                node += strlen(watch->relative_path);
@@ -132,9 +141,9 @@
 
                list_for_each_entry(watch, &i->watches, list) {
                        if (is_child(node, watch->node))
-                               add_event(watch, node);
+                               add_event(i, watch, node);
                        else if (recurse && is_child(watch->node, node))
-                               add_event(watch, watch->node);
+                               add_event(i, watch, watch->node);
                        else
                                continue;
                        /* If connection not doing anything, queue this. */
@@ -206,7 +215,7 @@
 
        relative = !strstarts(vec[0], "/");
        vec[0] = canonicalize(conn, vec[0]);
-       if (!check_node_perms(conn, vec[0], XS_PERM_READ)) {
+       if (!is_valid_nodename(vec[0])) {
                send_error(conn, errno);
                return;
        }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs.c
--- a/tools/xenstore/xs.c       Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs.c       Thu Aug 25 22:53:20 2005
@@ -2,19 +2,19 @@
     Xen Store Daemon interface providing simple tree-like database.
     Copyright (C) 2005 Rusty Russell IBM Corporation
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 
 #include <sys/types.h>
@@ -204,13 +204,19 @@
                return NULL;
        }
 
-       assert(msg.type == type);
+       if (msg.type != type) {
+               free(ret);
+               saved_errno = EBADF;
+               goto close_fd;
+               
+       }
        return ret;
 
 fail:
        /* We're in a bad state, so close fd. */
        saved_errno = errno;
        sigaction(SIGPIPE, &oldact, NULL);
+close_fd:
        close(h->fd);
        h->fd = -1;
        errno = saved_errno;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs.h
--- a/tools/xenstore/xs.h       Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs.h       Thu Aug 25 22:53:20 2005
@@ -2,19 +2,19 @@
     Xen Store Daemon providing simple tree-like database.
     Copyright (C) 2005 Rusty Russell IBM Corporation
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
 
-    This program is distributed in the hope that it will be useful,
+    This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
 
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 
 #ifndef _XS_H
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_dom0_test.c
--- a/tools/xenstore/xs_dom0_test.c     Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_dom0_test.c     Thu Aug 25 22:53:20 2005
@@ -3,7 +3,7 @@
 #include <sys/ioctl.h>
 #include "xs.h"
 #include "utils.h"
-#include <xc.h>
+#include <xenctrl.h>
 #include <xen/linux/privcmd.h>
 #include <stdio.h>
 #include <unistd.h>
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_lib.c
--- a/tools/xenstore/xs_lib.c   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_lib.c   Thu Aug 25 22:53:20 2005
@@ -1,3 +1,22 @@
+/* 
+    Common routines between Xen store user library and daemon.
+    Copyright (C) 2005 Rusty Russell IBM Corporation
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
 #include "xs_lib.h"
 #include <unistd.h>
 #include <stdio.h>
@@ -133,8 +152,9 @@
        unsigned int num;
        const char *p;
 
-       for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
-               num++;
+       for (p = strings, num = 0; p < strings + len; p++)
+               if (*p == '\0')
+                       num++;
 
        return num;
 }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_lib.h
--- a/tools/xenstore/xs_lib.h   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_lib.h   Thu Aug 25 22:53:20 2005
@@ -2,19 +2,19 @@
     Common routines between Xen store user library and daemon.
     Copyright (C) 2005 Rusty Russell IBM Corporation
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
 
-    This program is distributed in the hope that it will be useful,
+    This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
 
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 
 #ifndef _XS_LIB_H
@@ -22,7 +22,7 @@
 
 #include <stdbool.h>
 #include <limits.h>
-#include <xc.h>
+#include <xenctrl.h>
 
 /* Bitmask of permissions. */
 enum xs_perm_type {
@@ -30,7 +30,7 @@
        XS_PERM_READ = 1,
        XS_PERM_WRITE = 2,
        /* Internal use. */
-       XS_PERM_CREATE = 4,
+       XS_PERM_ENOENT_OK = 4,
        XS_PERM_OWNER = 8,
 };
 
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_random.c
--- a/tools/xenstore/xs_random.c        Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_random.c        Thu Aug 25 22:53:20 2005
@@ -303,6 +303,34 @@
        return true;
 }
 
+static char *parent_filename(const char *name)
+{
+       char *slash = strrchr(name + 1, '/');
+       if (!slash)
+               return talloc_strdup(name, "/");
+       return talloc_asprintf(name, "%.*s", slash-name, name);
+}
+
+static void make_dirs(const char *filename)
+{
+       struct stat st;
+
+       if (lstat(filename, &st) == 0 && S_ISREG(st.st_mode))
+               convert_to_dir(filename);
+
+       if (mkdir(filename, 0700) == 0) {
+               init_perms(filename);
+               return;
+       }
+       if (errno == EEXIST)
+               return;
+
+       make_dirs(parent_filename(filename));
+       if (mkdir(filename, 0700) != 0)
+               barf_perror("Failed to mkdir %s", filename);
+       init_perms(filename);
+}
+
 static bool file_write(struct file_ops_info *info,
                       const char *path, const void *data,
                       unsigned int len, int createflags)
@@ -329,6 +357,9 @@
                }
        }
 
+       if (createflags & O_CREAT)
+               make_dirs(parent_filename(filename));
+
        fd = open(filename, createflags|O_TRUNC|O_WRONLY, 0600);
        if (fd < 0) {
                /* FIXME: Another hack. */
@@ -349,19 +380,13 @@
 {
        char *dirname = path_to_name(info, path);
 
-       /* Same effective order as daemon, so error returns are right. */
-       if (mkdir(dirname, 0700) != 0) {
-               if (errno != ENOENT && errno != ENOTDIR)
-                       write_ok(info, path);
-               return false;
-       }
-
-       if (!write_ok(info, path)) {
-               int saved_errno = errno;
-               rmdir(dirname);
-               errno = saved_errno;
-               return false;
-       }
+       if (!write_ok(info, path))
+               return false;
+
+       make_dirs(parent_filename(dirname));
+       if (mkdir(dirname, 0700) != 0)
+               return false;
+
        init_perms(dirname);
        return true;
 }
@@ -427,7 +452,7 @@
        }
 
        if (abort) {
-               cmd = talloc_asprintf(NULL, "rm -r %s", info->transact_base);
+               cmd = talloc_asprintf(NULL, "rm -rf %s", info->transact_base);
                do_command(cmd);
                goto success;
        }
@@ -984,13 +1009,15 @@
 
 static void setup_file_ops(const char *dir)
 {
-       char *cmd = talloc_asprintf(NULL, "echo -n r0 > %s/.perms", dir);
+       struct xs_permissions perm = { .id = 0, .perms = XS_PERM_READ };
+       struct file_ops_info *h = file_handle(dir);
        if (mkdir(dir, 0700) != 0)
                barf_perror("Creating directory %s", dir);
-       if (mkdir(talloc_asprintf(cmd, "%s/tool", dir), 0700) != 0)
+       if (mkdir(talloc_asprintf(h, "%s/tool", dir), 0700) != 0)
                barf_perror("Creating directory %s/tool", dir);
-       do_command(cmd);
-       talloc_free(cmd);
+       if (!file_set_perms(h, talloc_strdup(h, "/"), &perm, 1))
+               barf_perror("Setting root perms in %s", dir);
+       file_close(h);
 }
 
 static void setup_xs_ops(void)
@@ -1009,8 +1036,8 @@
        } else {
                dup2(fds[1], STDOUT_FILENO);
                close(fds[0]);
-#if 0
-               execlp("valgrind", "valgrind", "xenstored_test", "--output-pid",
+#if 1
+               execlp("valgrind", "valgrind", "-q", 
"--suppressions=testsuite/vg-suppressions", "xenstored_test", "--output-pid",
                       "--no-fork", NULL);
 #else
                execlp("./xenstored_test", "xenstored_test", "--output-pid",
@@ -1112,9 +1139,6 @@
                        data->ops->close(pre);
                }
        }
-       if (data->print_progress)
-               printf("\n");
-
 out:
        data->ops->close(h);    
        return i;
@@ -1192,10 +1216,9 @@
        try = try_simple(NULL, iters, verbose, &data);
        if (try == iters) {
                cleanup_xs_ops();
-               printf("Succeeded\n");
                exit(0);
        }
-       printf("Failed on iteration %u\n", try + 1);
+       printf("Failed on iteration %u of seed %u\n", try + 1, seed);
        data.print_progress = false;
        reduce_problem(try + 1, try_simple, &data);
 }
@@ -1406,8 +1429,6 @@
                        talloc_free(fileh_pre);
                }
        }
-       if (data->print_progress)
-               printf("\n");
 
        fail = NULL;
        if (data->fast)
@@ -1435,10 +1456,9 @@
        try = try_diff(NULL, iters, verbose, &data);
        if (try == iters) {
                cleanup_xs_ops();
-               printf("Succeeded\n");
                exit(0);
        }
-       printf("Failed on iteration %u\n", try + 1);
+       printf("Failed on iteration %u of seed %u\n", try + 1, seed);
        data.print_progress = false;
        reduce_problem(try + 1, try_diff, &data);
 }
@@ -1593,8 +1613,6 @@
                xs_close(tmpxsh);
                file_close(tmpfileh);
        }
-
-       printf("Total %u of %u not aborted\n", tried - aborted, tried);
 out:
        if (xsh)
                xs_close(xsh);
@@ -1615,10 +1633,9 @@
        try = try_fail(NULL, iters, verbose, &data);
        if (try == iters) {
                cleanup_xs_ops();
-               printf("Succeeded\n");
                exit(0);
        }
-       printf("Failed on iteration %u\n", try + 1);
+       printf("Failed on iteration %u of seed %u\n", try + 1, seed);
        fflush(stdout);
        data.print_progress = false;
        reduce_problem(try + 1, try_fail, &data);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_test.c
--- a/tools/xenstore/xs_test.c  Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_test.c  Thu Aug 25 22:53:20 2005
@@ -17,6 +17,7 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>
@@ -28,16 +29,25 @@
 #include <stdbool.h>
 #include <stdlib.h>
 #include <sys/mman.h>
+#include <fnmatch.h>
+#include <stdarg.h>
+#include <string.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/time.h>
 #include "utils.h"
 #include "xs_lib.h"
+#include "list.h"
 
 #define XSTEST
 
 static struct xs_handle *handles[10] = { NULL };
-static unsigned int children;
-
-static bool timeout = true;
+
+static unsigned int timeout_ms = 200;
+static bool timeout_suppressed = true;
 static bool readonly = false;
+static bool print_input = false;
+static unsigned int linenum = 0;
 
 struct ringbuf_head
 {
@@ -178,7 +188,7 @@
 static void __attribute__((noreturn)) usage(void)
 {
        barf("Usage:\n"
-            "       xs_test [--readonly] [--notimeout]\n"
+            "       xs_test [--readonly] [--no-timeout] [-x]\n"
             "Reads commands from stdin, one per line:"
             "  dir <path>\n"
             "  read <path>\n"
@@ -190,8 +200,6 @@
             "  setperm <path> <id> <flags> ...\n"
             "  shutdown\n"
             "  watch <path> <token>\n"
-            "  async <command>...\n"
-            "  asyncwait\n"
             "  waitwatch\n"
             "  ackwatch <token>\n"
             "  unwatch <path> <token>\n"
@@ -200,7 +208,13 @@
             "  abort\n"
             "  introduce <domid> <mfn> <eventchn> <path>\n"
             "  commit\n"
-            "  sleep <seconds>\n"
+            "  sleep <milliseconds>\n"
+            "  expect <pattern>\n"
+            "  notimeout\n"
+            "  readonly\n"
+            "  readwrite\n"
+            "  noackwrite <path> <flags> <value>...\n"
+            "  readack\n"
             "  dump\n");
 }
 
@@ -218,7 +232,7 @@
        return off;
 }
 
-static char *arg(char *line, unsigned int num)
+static char *arg(const char *line, unsigned int num)
 {
        static char *args[10];
        unsigned int off, len;
@@ -236,12 +250,64 @@
        return args[num];
 }
 
+struct expect
+{
+       struct list_head list;
+       char *pattern;
+};
+static LIST_HEAD(expects);
+
 static char *command;
-static void __attribute__((noreturn)) failed(int handle)
+
+/* Trim leading and trailing whitespace */
+static void trim(char *str)
+{
+       while (isspace(str[0]))
+               memmove(str, str+1, strlen(str));
+
+       while (strlen(str) && isspace(str[strlen(str)-1]))
+               str[strlen(str)-1] = '\0';
+}
+
+static void output(const char *fmt, ...)
+{
+       char *str;
+       struct expect *i;
+       va_list arglist;
+
+       va_start(arglist, fmt);
+       vasprintf(&str, fmt, arglist);
+       va_end(arglist);
+
+       printf("%s", str);
+       fflush(stdout);
+       trim(str);
+       list_for_each_entry(i, &expects, list) {
+               if (fnmatch(i->pattern, str, 0) == 0) {
+                       list_del(&i->list);
+                       free(i);
+                       return;
+               }
+       }
+       barf("Unexpected output %s\n", str);
+}
+
+static void failed(int handle)
 {
        if (handle)
-               barf_perror("%i: %s", handle, command);
-       barf_perror("%s", command);
+               output("%i: %s failed: %s\n",
+                      handle, command, strerror(errno));
+       else
+               output("%s failed: %s\n", command, strerror(errno));
+}
+
+static void expect(const char *line)
+{
+       struct expect *e = malloc(sizeof(*e));
+
+       e->pattern = strdup(line + argpos(line, 1));
+       trim(e->pattern);
+       list_add(&e->list, &expects);
 }
 
 static void do_dir(unsigned int handle, char *path)
@@ -250,14 +316,16 @@
        unsigned int i, num;
 
        entries = xs_directory(handles[handle], path, &num);
-       if (!entries)
-               failed(handle);
+       if (!entries) {
+               failed(handle);
+               return;
+       }
 
        for (i = 0; i < num; i++)
                if (handle)
-                       printf("%i:%s\n", handle, entries[i]);
+                       output("%i:%s\n", handle, entries[i]);
                else
-                       printf("%s\n", entries[i]);
+                       output("%s\n", entries[i]);
        free(entries);
 }
 
@@ -267,15 +335,17 @@
        unsigned int len;
 
        value = xs_read(handles[handle], path, &len);
-       if (!value)
-               failed(handle);
+       if (!value) {
+               failed(handle);
+               return;
+       }
 
        /* It's supposed to nul terminate for us. */
        assert(value[len] == '\0');
        if (handle)
-               printf("%i:%.*s\n", handle, len, value);
+               output("%i:%.*s\n", handle, len, value);
        else
-               printf("%.*s\n", len, value);
+               output("%.*s\n", len, value);
 }
 
 static void do_write(unsigned int handle, char *path, char *flags, char *data)
@@ -297,6 +367,45 @@
                failed(handle);
 }
 
+static void do_noackwrite(unsigned int handle,
+                         char *path, const char *flags, char *data)
+{
+       struct xsd_sockmsg msg;
+
+       /* Format: Flags (as string), path, data. */
+       if (streq(flags, "none"))
+               flags = XS_WRITE_NONE;
+       else if (streq(flags, "create"))
+               flags = XS_WRITE_CREATE;
+       else if (streq(flags, "excl"))
+               flags = XS_WRITE_CREATE_EXCL;
+       else
+               barf("noackwrite flags 'none', 'create' or 'excl' only");
+
+       msg.len = strlen(path) + 1 + strlen(flags) + 1 + strlen(data);
+       msg.type = XS_WRITE;
+       if (!write_all_choice(handles[handle]->fd, &msg, sizeof(msg)))
+               failed(handle);
+       if (!write_all_choice(handles[handle]->fd, path, strlen(path) + 1))
+               failed(handle);
+       if (!write_all_choice(handles[handle]->fd, flags, strlen(flags) + 1))
+               failed(handle);
+       if (!write_all_choice(handles[handle]->fd, data, strlen(data)))
+               failed(handle);
+       /* Do not wait for ack. */
+}
+
+static void do_readack(unsigned int handle)
+{
+       enum xsd_sockmsg_type type;
+       char *ret;
+
+       ret = read_reply(handles[handle]->fd, &type, NULL);
+       if (!ret)
+               failed(handle);
+       free(ret);
+}
+
 static void do_setid(unsigned int handle, char *id)
 {
        if (!xs_bool(xs_debug_command(handles[handle], "setid", id,
@@ -322,8 +431,10 @@
        struct xs_permissions *perms;
 
        perms = xs_get_permissions(handles[handle], path, &num);
-       if (!perms)
-               failed(handle);
+       if (!perms) {
+               failed(handle);
+               return;
+       }
 
        for (i = 0; i < num; i++) {
                char *permstring;
@@ -346,9 +457,9 @@
                }
 
                if (handle)
-                       printf("%i:%i %s\n", handle, perms[i].id, permstring);
+                       output("%i:%i %s\n", handle, perms[i].id, permstring);
                else
-                       printf("%i %s\n", perms[i].id, permstring);
+                       output("%i %s\n", perms[i].id, permstring);
        }
        free(perms);
 }
@@ -396,18 +507,56 @@
                failed(handle);
 }
 
+static void set_timeout(void)
+{
+       struct itimerval timeout;
+
+       timeout.it_value.tv_sec = timeout_ms / 1000;
+       timeout.it_value.tv_usec = (timeout_ms * 1000) % 1000000;
+       timeout.it_interval.tv_sec = timeout.it_interval.tv_usec = 0;
+       setitimer(ITIMER_REAL, &timeout, NULL);
+}
+
+static void disarm_timeout(void)
+{
+       struct itimerval timeout;
+
+       timeout.it_value.tv_sec = 0;
+       timeout.it_value.tv_usec = 0;
+       setitimer(ITIMER_REAL, &timeout, NULL);
+}
+
 static void do_waitwatch(unsigned int handle)
 {
        char **vec;
+       struct timeval tv = {.tv_sec = timeout_ms/1000,
+                            .tv_usec = (timeout_ms*1000)%1000000 };
+       fd_set set;
+
+       if (xs_fileno(handles[handle]) != -2) {
+               /* Manually select here so we can time out gracefully. */
+               FD_ZERO(&set);
+               FD_SET(xs_fileno(handles[handle]), &set);
+               disarm_timeout();
+               if (select(xs_fileno(handles[handle])+1, &set,
+                          NULL, NULL, &tv) == 0) {
+                       errno = ETIMEDOUT;
+                       failed(handle);
+                       return;
+               }
+               set_timeout();
+       }
 
        vec = xs_read_watch(handles[handle]);
-       if (!vec)
-               failed(handle);
+       if (!vec) {
+               failed(handle);
+               return;
+       }
 
        if (handle)
-               printf("%i:%s:%s\n", handle, vec[0], vec[1]);
+               output("%i:%s:%s\n", handle, vec[0], vec[1]);
        else
-               printf("%s:%s\n", vec[0], vec[1]);
+               output("%s:%s\n", vec[0], vec[1]);
        free(vec);
 }
 
@@ -415,82 +564,6 @@
 {
        if (!xs_acknowledge_watch(handles[handle], token))
                failed(handle);
-}
-
-static bool wait_for_input(unsigned int handle)
-{
-       unsigned int i;
-       for (i = 0; i < ARRAY_SIZE(handles); i++) {
-               int fd;
-
-               if (!handles[i] || i == handle)
-                       continue;
-
-               fd = xs_fileno(handles[i]);
-               if (fd == -2) {
-                       unsigned int avail;
-                       get_input_chunk(in, in->buf, &avail);
-                       if (avail != 0)
-                               return true;
-               } else {
-                       struct timeval tv = {.tv_sec = 0, .tv_usec = 0 };
-                       fd_set set;
-
-                       FD_ZERO(&set);
-                       FD_SET(fd, &set);
-                       if (select(fd+1, &set, NULL, NULL,&tv))
-                               return true;
-               }
-       }
-       return false;
-}
-
-
-/* Async wait for watch on handle */
-static void do_command(unsigned int default_handle, char *line);
-static void do_async(unsigned int handle, char *line)
-{
-       int child;
-       unsigned int i;
-       children++;
-       if ((child = fork()) != 0) {
-               /* Wait until *something* happens, which indicates
-                * child has created an event.  V. sloppy, but we can't
-                * select on fake domain connections.
-                */
-               while (!wait_for_input(handle));
-               return;
-       }
-
-       /* Don't keep other handles open in parent. */
-       for (i = 0; i < ARRAY_SIZE(handles); i++) {
-               if (handles[i] && i != handle) {
-                       xs_daemon_close(handles[i]);
-                       handles[i] = NULL;
-               }
-       }
-
-       do_command(handle, line + argpos(line, 1));
-       exit(0);
-}
-
-static void do_asyncwait(unsigned int handle)
-{
-       int status;
-
-       if (handle)
-               barf("handle has no meaning with asyncwait");
-
-       if (children == 0)
-               barf("No children to wait for!");
-
-       if (waitpid(0, &status, 0) > 0) {
-               if (!WIFEXITED(status))
-                       barf("async died");
-               if (WEXITSTATUS(status))
-                       exit(WEXITSTATUS(status));
-       }
-       children--;
 }
 
 static void do_unwatch(unsigned int handle, const char *node, const char 
*token)
@@ -519,6 +592,9 @@
 {
        unsigned int i;
        int fd;
+
+       /* This mechanism is v. slow w. valgrind running. */
+       timeout_ms = 5000;
 
        /* We poll, so ignore signal */
        signal(SIGUSR2, SIG_IGN);
@@ -538,14 +614,17 @@
        *(int *)((void *)out + 32) = getpid();
        *(u16 *)((void *)out + 36) = atoi(eventchn);
 
+       if (!xs_introduce_domain(handles[handle], atoi(domid),
+                                atol(mfn), atoi(eventchn), path)) {
+               failed(handle);
+               munmap(out, getpagesize());
+               return;
+       }
+       output("handle is %i\n", i);
+
        /* Create new handle. */
        handles[i] = new(struct xs_handle);
        handles[i]->fd = -2;
-
-       if (!xs_introduce_domain(handles[handle], atoi(domid),
-                                atol(mfn), atoi(eventchn), path))
-               failed(handle);
-       printf("handle is %i\n", i);
 
        /* Read in daemon pid. */
        daemon_pid = *(int *)((void *)out + 32);
@@ -593,18 +672,20 @@
                sprintf(subnode, "%s/%s", node, dir[i]);
 
                perms = xs_get_permissions(handles[handle], subnode,&numperms);
-               if (!perms)
+               if (!perms) {
                        failed(handle);
-
-               printf("%s%s: ", spacing, dir[i]);
+                       return;
+               }
+
+               output("%s%s: ", spacing, dir[i]);
                for (j = 0; j < numperms; j++) {
                        char buffer[100];
                        if (!xs_perm_to_string(&perms[j], buffer))
                                barf("perm to string");
-                       printf("%s ", buffer);
+                       output("%s ", buffer);
                }
                free(perms);
-               printf("\n");
+               output("\n");
 
                /* Even directories can have contents. */
                contents = xs_read(handles[handle], subnode, &len);
@@ -612,14 +693,16 @@
                        if (errno != EISDIR)
                                failed(handle);
                } else {
-                       printf(" %s(%.*s)\n", spacing, len, contents);
+                       output(" %s(%.*s)\n", spacing, len, contents);
                        free(contents);
                }                       
 
                /* Every node is a directory. */
                subdirs = xs_directory(handles[handle], subnode, &subnum);
-               if (!subdirs)
+               if (!subdirs) {
                        failed(handle);
+                       return;
+               }
                dump_dir(handle, subnode, subdirs, subnum, depth+1);
                free(subdirs);
        }
@@ -631,8 +714,10 @@
        unsigned int subnum;
 
        subdirs = xs_directory(handles[handle], "/", &subnum);
-       if (!subdirs)
-               failed(handle);
+       if (!subdirs) {
+               failed(handle);
+               return;
+       }
 
        dump_dir(handle, "", subdirs, subnum, 0);
        free(subdirs);
@@ -655,6 +740,9 @@
 static void do_command(unsigned int default_handle, char *line)
 {
        char *endp;
+
+       if (print_input)
+               printf("%i> %s", ++linenum, line);
 
        if (strspn(line, " \n") == strlen(line))
                return;
@@ -667,6 +755,7 @@
        else
                handle = default_handle;
 
+       command = arg(line, 0);
        if (!handles[handle]) {
                if (readonly)
                        handles[handle] = xs_daemon_open_readonly();
@@ -675,10 +764,10 @@
                if (!handles[handle])
                        barf_perror("Opening connection to daemon");
        }
-       command = arg(line, 0);
-
-       if (timeout)
-               alarm(1);
+
+       if (!timeout_suppressed)
+               set_timeout();
+       timeout_suppressed = false;
 
        if (streq(command, "dir"))
                do_dir(handle, arg(line, 1));
@@ -703,10 +792,6 @@
                do_watch(handle, arg(line, 1), arg(line, 2));
        else if (streq(command, "waitwatch"))
                do_waitwatch(handle);
-       else if (streq(command, "async"))
-               do_async(handle, line);
-       else if (streq(command, "asyncwait"))
-               do_asyncwait(handle);
        else if (streq(command, "ackwatch"))
                do_ackwatch(handle, arg(line, 1));
        else if (streq(command, "unwatch"))
@@ -727,32 +812,70 @@
                do_release(handle, arg(line, 1));
        else if (streq(command, "dump"))
                dump(handle);
-       else if (streq(command, "sleep"))
-               sleep(atoi(arg(line, 1)));
+       else if (streq(command, "sleep")) {
+               disarm_timeout();
+               usleep(atoi(arg(line, 1)) * 1000);
+       } else if (streq(command, "expect"))
+               expect(line);
+       else if (streq(command, "notimeout"))
+               timeout_suppressed = true;
+       else if (streq(command, "readonly")) {
+               readonly = true;
+               xs_daemon_close(handles[handle]);
+               handles[handle] = NULL;
+       } else if (streq(command, "readwrite")) {
+               readonly = false;
+               xs_daemon_close(handles[handle]);
+               handles[handle] = NULL;
+       } else if (streq(command, "noackwrite"))
+               do_noackwrite(handle, arg(line,1), arg(line,2), arg(line,3));
+       else if (streq(command, "readack"))
+               do_readack(handle);
        else
                barf("Unknown command %s", command);
        fflush(stdout);
-       alarm(0);
-}
+       disarm_timeout();
+
+       /* Check expectations. */
+       if (!streq(command, "expect")) {
+               struct expect *i = list_top(&expects, struct expect, list);
+
+               if (i)
+                       barf("Expected '%s', didn't happen\n", i->pattern);
+       }
+}
+
+static struct option options[] = { { "readonly", 0, NULL, 'r' },
+                                  { "no-timeout", 0, NULL, 't' },
+                                  { NULL, 0, NULL, 0 } };
 
 int main(int argc, char *argv[])
 {
+       int opt;
        char line[1024];
 
-       if (argc > 1 && streq(argv[1], "--readonly")) {
-               readonly = true;
-               argc--;
-               argv++;
-       }
-
-       if (argc > 1 && streq(argv[1], "--no-timeout")) {
-               timeout = false;
-               argc--;
-               argv++;
-       }
-
-       if (argc != 1)
+       while ((opt = getopt_long(argc, argv, "xrt", options, NULL)) != -1) {
+               switch (opt) {
+               case 'r':
+                       readonly = true;
+                       break;
+               case 't':
+                       timeout_ms = 0;
+                       break;
+               case 'x':
+                       print_input = true;
+                       break;
+               }
+       }
+
+       if (optind + 1 == argc) {
+               int fd = open(argv[optind], O_RDONLY);
+               if (!fd)
+                       barf_perror("Opening %s", argv[optind]);
+               dup2(fd, STDIN_FILENO);
+       } else if (optind != argc)
                usage();
+       
 
        /* The size of the ringbuffer: half a page minus head structure. */
        ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head);
@@ -761,7 +884,5 @@
        while (fgets(line, sizeof(line), stdin))
                do_command(0, line);
 
-       while (children)
-               do_asyncwait(0);
        return 0;
 }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xentrace/Makefile
--- a/tools/xentrace/Makefile   Wed Aug 24 02:43:18 2005
+++ b/tools/xentrace/Makefile   Thu Aug 25 22:53:20 2005
@@ -36,4 +36,4 @@
        $(RM) *.a *.so *.o *.rpm $(BIN)
 
 %: %.c $(HDRS) Makefile
-       $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxc
+       $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxenctrl
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xentrace/xenctx.c
--- a/tools/xentrace/xenctx.c   Wed Aug 24 02:43:18 2005
+++ b/tools/xentrace/xenctx.c   Thu Aug 25 22:53:20 2005
@@ -21,7 +21,7 @@
 #include <argp.h>
 #include <signal.h>
 
-#include "xc.h"
+#include "xenctrl.h"
 
 #ifdef __i386__
 void print_ctx(vcpu_guest_context_t *ctx1)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Wed Aug 24 02:43:18 2005
+++ b/tools/xentrace/xentrace.c Thu Aug 25 22:53:20 2005
@@ -45,6 +45,8 @@
     char *outfile;
     struct timespec poll_sleep;
     unsigned long new_data_thresh;
+    u32 evt_mask;
+    u32 cpu_mask;
 } settings_t;
 
 settings_t opts;
@@ -93,13 +95,13 @@
 
 /**
  * get_tbufs - get pointer to and size of the trace buffers
- * @mach_addr: location to store machine address if the trace buffers to
- * @size:      location to store the size of a trace buffer to
+ * @mfn:  location to store mfn of the trace buffers to
+ * @size: location to store the size of a trace buffer to
  *
  * Gets the machine address of the trace pointer area and the size of the
  * per CPU buffers.
  */
-void get_tbufs(unsigned long *mach_addr, unsigned long *size)
+void get_tbufs(unsigned long *mfn, unsigned long *size)
 {
     int ret;
     dom0_op_t op;                        /* dom0 op we'll build             */
@@ -119,19 +121,19 @@
         exit(EXIT_FAILURE);
     }
 
-    *mach_addr = op.u.tbufcontrol.mach_addr;
-    *size      = op.u.tbufcontrol.size;
+    *mfn  = op.u.tbufcontrol.buffer_mfn;
+    *size = op.u.tbufcontrol.size;
 }
 
 /**
  * map_tbufs - memory map Xen trace buffers into user space
- * @tbufs:     machine address of the trace buffers
+ * @tbufs_mfn: mfn of the trace buffers
  * @num:       number of trace buffers to map
  * @size:      size of each trace buffer
  *
  * Maps the Xen trace buffers them into process address space.
  */
-struct t_buf *map_tbufs(unsigned long tbufs_mach, unsigned int num,
+struct t_buf *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
                         unsigned long size)
 {
     int xc_handle;                  /* file descriptor for /proc/xen/privcmd */
@@ -147,7 +149,7 @@
 
     tbufs_mapped = xc_map_foreign_range(xc_handle, 0 /* Dom 0 ID */,
                                         size * num, PROT_READ,
-                                        tbufs_mach >> PAGE_SHIFT);
+                                        tbufs_mfn);
 
     xc_interface_close(xc_handle);
 
@@ -160,6 +162,41 @@
     return tbufs_mapped;
 }
 
+/**
+ * set_mask - set the cpu/event mask in HV
+ * @mask:           the new mask 
+ * @type:           the new mask type,0-event mask, 1-cpu mask
+ *
+ */
+void set_mask(u32 mask, int type)
+{
+    int ret;
+    dom0_op_t op;                        /* dom0 op we'll build             */
+    int xc_handle = xc_interface_open(); /* for accessing control interface */
+
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    if (type == 1) { /* cpu mask */
+        op.u.tbufcontrol.op  = DOM0_TBUF_SET_CPU_MASK;
+        op.u.tbufcontrol.cpu_mask = mask;
+        fprintf(stderr, "change cpumask to 0x%x\n", mask);
+    }else if (type == 0) { /* event mask */
+        op.u.tbufcontrol.op  = DOM0_TBUF_SET_EVT_MASK;
+        op.u.tbufcontrol.evt_mask = mask;
+        fprintf(stderr, "change evtmask to 0x%x\n", mask);
+    }
+
+    ret = do_dom0_op(xc_handle, &op);
+
+    xc_interface_close(xc_handle);
+
+    if ( ret != 0 )
+    {
+        PERROR("Failure to get trace buffer pointer from Xen and set the new 
mask");
+        exit(EXIT_FAILURE);
+    }
+
+}
 
 /**
  * init_bufs_ptrs - initialises an array of pointers to the trace buffers
@@ -194,7 +231,7 @@
 
 /**
  * init_rec_ptrs - initialises data area pointers to locations in user space
- * @tbufs_mach:    machine base address of the trace buffer area
+ * @tbufs_mfn:     base mfn of the trace buffer area
  * @tbufs_mapped:  user virtual address of base of trace buffer area
  * @meta:          array of user-space pointers to struct t_buf's of metadata
  * @num:           number of trace buffers
@@ -203,7 +240,7 @@
  * mapped in user space.  Note that the trace buffer metadata contains machine
  * pointers - the array returned allows more convenient access to them.
  */
-struct t_rec **init_rec_ptrs(unsigned long tbufs_mach,
+struct t_rec **init_rec_ptrs(unsigned long tbufs_mfn,
                              struct t_buf *tbufs_mapped,
                              struct t_buf **meta,
                              unsigned int num)
@@ -219,7 +256,7 @@
     }
 
     for ( i = 0; i < num; i++ )
-        data[i] = (struct t_rec *)(meta[i]->rec_addr - tbufs_mach
+        data[i] = (struct t_rec *)(meta[i]->rec_addr - 
(tbufs_mfn<<XC_PAGE_SHIFT) /* XXX */
                                    + (unsigned long)tbufs_mapped);
 
     return data;
@@ -293,7 +330,7 @@
     struct t_rec **data;         /* pointers to the trace buffer data areas
                                   * where they are mapped into user space.   */
     unsigned long *cons;         /* store tail indexes for the trace buffers */
-    unsigned long tbufs_mach;    /* machine address of the tbufs             */
+    unsigned long tbufs_mfn;     /* mfn of the tbufs                         */
     unsigned int  num;           /* number of trace buffers / logical CPUS   */
     unsigned long size;          /* size of a single trace buffer            */
 
@@ -303,14 +340,14 @@
     num = get_num_cpus();
 
     /* setup access to trace buffers */
-    get_tbufs(&tbufs_mach, &size);
-    tbufs_mapped = map_tbufs(tbufs_mach, num, size);
+    get_tbufs(&tbufs_mfn, &size);
+    tbufs_mapped = map_tbufs(tbufs_mfn, num, size);
 
     size_in_recs = (size - sizeof(struct t_buf)) / sizeof(struct t_rec);
 
     /* build arrays of convenience ptrs */
     meta  = init_bufs_ptrs (tbufs_mapped, num, size);
-    data  = init_rec_ptrs  (tbufs_mach, tbufs_mapped, meta, num);
+    data  = init_rec_ptrs  (tbufs_mfn, tbufs_mapped, meta, num);
     cons  = init_tail_idxs (meta, num);
 
     /* now, scan buffers for events */
@@ -341,6 +378,31 @@
  * Various declarations / definitions GNU argp needs to do its work
  *****************************************************************************/
 
+int parse_evtmask(char *arg, struct argp_state *state)
+{
+    settings_t *setup = (settings_t *)state->input;
+    char *inval;
+
+    /* search filtering class */
+    if (strcmp(arg, "gen") == 0){ 
+        setup->evt_mask |= TRC_GEN;
+    } else if(strcmp(arg, "sched") == 0){ 
+        setup->evt_mask |= TRC_SCHED;
+    } else if(strcmp(arg, "dom0op") == 0){ 
+        setup->evt_mask |= TRC_DOM0OP;
+    } else if(strcmp(arg, "vmx") == 0){ 
+        setup->evt_mask |= TRC_VMX;
+    } else if(strcmp(arg, "all") == 0){ 
+        setup->evt_mask |= TRC_ALL;
+    } else {
+        setup->evt_mask = strtol(arg, &inval, 0);
+        if ( inval == arg )
+            argp_usage(state);
+    }
+
+    return 0;
+
+}
 
 /* command parser for GNU argp - see GNU docs for more info */
 error_t cmd_parser(int key, char *arg, struct argp_state *state)
@@ -366,6 +428,21 @@
             argp_usage(state);
     }
     break;
+
+    case 'c': /* set new cpu mask for filtering*/
+    {
+        char *inval;
+        setup->cpu_mask = strtol(arg, &inval, 0);
+        if ( inval == arg )
+            argp_usage(state);
+    }
+    break;
+    
+    case 'e': /* set new event mask for filtering*/
+    {
+        parse_evtmask(arg, state);
+    }
+    break;
     
     case ARGP_KEY_ARG:
     {
@@ -397,6 +474,14 @@
       .doc = 
       "Set sleep time, p, in milliseconds between polling the trace buffer "
       "for new data (default " xstr(POLL_SLEEP_MILLIS) ")." },
+
+    { .name = "cpu-mask", .key='c', .arg="c",
+      .doc = 
+      "set cpu-mask " },
+
+    { .name = "evt-mask", .key='e', .arg="e",
+      .doc = 
+      "set evt-mask " },
 
     {0}
 };
@@ -430,8 +515,18 @@
     opts.outfile = 0;
     opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
     opts.new_data_thresh = NEW_DATA_THRESH;
+    opts.evt_mask = 0;
+    opts.cpu_mask = 0;
 
     argp_parse(&parser_def, argc, argv, 0, 0, &opts);
+
+    if (opts.evt_mask != 0) { 
+        set_mask(opts.evt_mask, 0);
+    }
+
+    if (opts.cpu_mask != 0) {
+        set_mask(opts.evt_mask, 1);
+    }
 
     if ( opts.outfile )
         outfd = open(opts.outfile, O_WRONLY | O_CREAT);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/Rules.mk
--- a/xen/Rules.mk      Wed Aug 24 02:43:18 2005
+++ b/xen/Rules.mk      Thu Aug 25 22:53:20 2005
@@ -2,7 +2,7 @@
 # If you change any of these configuration options then you must
 # 'make clean' before rebuilding.
 #
-verbose     ?= n
+verbose     ?= y
 debug       ?= n
 perfc       ?= n
 perfc_arrays?= n
@@ -10,14 +10,6 @@
 optimize    ?= y
 domu_debug  ?= n
 crash_debug ?= n
-
-# ACM_USE_SECURITY_POLICY is set to security policy of Xen
-# Supported models are:
-#      ACM_NULL_POLICY (ACM will not be built with this policy)
-#      ACM_CHINESE_WALL_POLICY
-#      ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY
-#      ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY
-ACM_USE_SECURITY_POLICY ?= ACM_NULL_POLICY
 
 include $(BASEDIR)/../Config.mk
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/domain.c
--- a/xen/arch/ia64/domain.c    Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/domain.c    Thu Aug 25 22:53:20 2005
@@ -1092,3 +1092,12 @@
 {
        vcpu_pend_interrupt(dom0->vcpu[0],irq);
 }
+
+void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
+{
+       if ( v->processor == newcpu )
+               return;
+
+       set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
+       v->processor = newcpu;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/grant_table.c
--- a/xen/arch/ia64/grant_table.c       Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/grant_table.c       Thu Aug 25 22:53:20 2005
@@ -355,7 +355,7 @@
     /* Bitwise-OR avoids short-circuiting which screws control flow. */
     if ( unlikely(__get_user(dom, &uop->dom) |
                   __get_user(ref, &uop->ref) |
-                  __get_user(host_virt_addr, &uop->host_virt_addr) |
+                  __get_user(host_virt_addr, &uop->host_addr) |
                   __get_user(dev_hst_ro_flags, &uop->flags)) )
     {
         DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
@@ -500,7 +500,7 @@
     ld = current->domain;
 
     /* Bitwise-OR avoids short-circuiting which screws control flow. */
-    if ( unlikely(__get_user(virt, &uop->host_virt_addr) |
+    if ( unlikely(__get_user(virt, &uop->host_addr) |
                   __get_user(frame, &uop->dev_bus_addr) |
                   __get_user(handle, &uop->handle)) )
     {
@@ -545,15 +545,6 @@
     if ( frame == 0 )
     {
         frame = act->frame;
-    }
-    else if ( frame == GNTUNMAP_DEV_FROM_VIRT )
-    {
-        if ( !( flags & GNTMAP_device_map ) )
-            PIN_FAIL(unmap_out, GNTST_bad_dev_addr,
-                     "Bad frame number: frame not mapped for dev access.\n");
-        frame = act->frame;
-
-        /* Frame will be unmapped for device access below if virt addr okay. */
     }
     else
     {
@@ -615,15 +606,6 @@
 
         act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
                                               : GNTPIN_hstw_inc;
-
-        if ( frame == GNTUNMAP_DEV_FROM_VIRT )
-        {
-            act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
-                                                  : GNTPIN_devw_inc;
-
-            map->ref_and_flags &= ~GNTMAP_device_map;
-            (void)__put_user(0, &uop->dev_bus_addr);
-        }
 
         rc = 0;
         *va = virt;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/vcpu.c
--- a/xen/arch/ia64/vcpu.c      Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/vcpu.c      Thu Aug 25 22:53:20 2005
@@ -585,6 +585,14 @@
        set_bit(vector,PSCBX(vcpu,irr));
        PSCB(vcpu,pending_interruption) = 1;
     }
+
+    /* Keir: I think you should unblock when an interrupt is pending. */
+    {
+        int running = test_bit(_VCPUF_running, &vcpu->vcpu_flags);
+        vcpu_unblock(vcpu);
+        if ( running )
+            smp_send_event_check_cpu(vcpu->processor);
+    }
 }
 
 void early_tick(VCPU *vcpu)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/xenmisc.c
--- a/xen/arch/ia64/xenmisc.c   Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/xenmisc.c   Thu Aug 25 22:53:20 2005
@@ -280,7 +280,6 @@
 
 unsigned long context_switch_count = 0;
 
-// context_switch
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
 //printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
@@ -290,22 +289,14 @@
 //if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
 //printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
 #ifdef CONFIG_VTI
-       unsigned long psr;
-       /* Interrupt is enabled after next task is chosen.
-        * So we have to disable it for stack switch.
-        */
-       local_irq_save(psr);
        vtm_domain_out(prev);
-       /* Housekeeping for prev domain */
-#endif // CONFIG_VTI
-
+#endif
        context_switch_count++;
        switch_to(prev,next,prev);
 #ifdef CONFIG_VTI
-       /* Post-setup for new domain */
         vtm_domain_in(current);
-       local_irq_restore(psr);
-#endif // CONFIG_VTI
+#endif
+
 // leave this debug for now: it acts as a heartbeat when more than
 // one domain is active
 {
@@ -315,25 +306,27 @@
 if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
 if (!i--) { printk("+",id); i = 1000000; }
 }
-       clear_bit(_VCPUF_running, &prev->vcpu_flags);
-       //if (!is_idle_task(next->domain) )
-               //send_guest_virq(next, VIRQ_TIMER);
+
 #ifdef CONFIG_VTI
        if (VMX_DOMAIN(current))
                vmx_load_all_rr(current);
-       return;
-#else // CONFIG_VTI
+#else
        if (!is_idle_task(current->domain)) {
                load_region_regs(current);
                if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
        }
        if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
-#endif // CONFIG_VTI
+#endif
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+       /* nothing to do */
 }
 
 void continue_running(struct vcpu *same)
 {
-    /* nothing to do */
+       /* nothing to do */
 }
 
 void panic_domain(struct pt_regs *regs, const char *fmt, ...)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/xensetup.c
--- a/xen/arch/ia64/xensetup.c  Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/xensetup.c  Thu Aug 25 22:53:20 2005
@@ -131,12 +131,14 @@
 }
 
 struct ns16550_defaults ns16550_com1 = {
+    .baud      = BAUD_AUTO,
     .data_bits = 8,
     .parity    = 'n',
     .stop_bits = 1
 };
 
 struct ns16550_defaults ns16550_com2 = {
+    .baud      = BAUD_AUTO,
     .data_bits = 8,
     .parity    = 'n',
     .stop_bits = 1
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/ia64/xentime.c
--- a/xen/arch/ia64/xentime.c   Wed Aug 24 02:43:18 2005
+++ b/xen/arch/ia64/xentime.c   Thu Aug 25 22:53:20 2005
@@ -48,7 +48,7 @@
 static s_time_t        stime_irq = 0x0;       /* System time at last 'time 
update' */
 unsigned long itc_scale;
 unsigned long itc_at_irq;
-static unsigned long   wc_sec, wc_usec; /* UTC time at last 'time update'.   */
+static unsigned long   wc_sec, wc_nsec; /* UTC time at last 'time update'.   */
 //static rwlock_t        time_lock = RW_LOCK_UNLOCKED;
 static irqreturn_t vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs 
*regs);
 
@@ -103,25 +103,22 @@
 }
 
 /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
-void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
+void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
 {
 #ifdef  CONFIG_VTI
-    s64 delta;
-    long _usecs = (long)usecs;
+    u64 _nsecs;
 
     write_lock_irq(&xtime_lock);
 
-    delta = (s64)(stime_irq - system_time_base);
-
-    _usecs += (long)(delta/1000);
-    while ( _usecs >= 1000000 ) 
+    _nsecs = (u64)nsecs + (s64)(stime_irq - system_time_base);
+    while ( _nsecs >= 1000000000 ) 
     {
-        _usecs -= 1000000;
+        _nsecs -= 1000000000;
         secs++;
     }
 
     wc_sec  = secs;
-    wc_usec = _usecs;
+    wc_nsec = (unsigned long)_nsecs;
 
     write_unlock_irq(&xtime_lock);
 
@@ -290,13 +287,13 @@
     /* Wallclock time starts as the initial RTC time. */
     efi_gettimeofday(&tm);
     wc_sec  = tm.tv_sec;
-    wc_usec = tm.tv_nsec/1000;
+    wc_nsec = tm.tv_nsec;
 
 
     printk("Time init:\n");
     printk(".... System Time: %ldns\n", NOW());
     printk(".... scale:       %16lX\n", itc_scale);
-    printk(".... Wall Clock:  %lds %ldus\n", wc_sec, wc_usec);
+    printk(".... Wall Clock:  %lds %ldus\n", wc_sec, wc_nsec/1000);
 
     return 0;
 }
@@ -338,10 +335,10 @@
             (*(unsigned long *)&jiffies_64)++;
 
             /* Update wall time. */
-            wc_usec += 1000000/HZ;
-            if ( wc_usec >= 1000000 )
+            wc_nsec += 1000000000/HZ;
+            if ( wc_nsec >= 1000000000 )
             {
-                wc_usec -= 1000000;
+                wc_nsec -= 1000000000;
                 wc_sec++;
             }
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/audit.c
--- a/xen/arch/x86/audit.c      Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/audit.c      Thu Aug 25 22:53:20 2005
@@ -73,7 +73,7 @@
             if ( tcount < 0 )
             {
                 APRINTK("Audit %d: type count went below zero "
-                        "mfn=%lx t=%x ot=%x",
+                        "mfn=%lx t=%" PRtype_info " ot=%x",
                         d->domain_id, page_to_pfn(page),
                         page->u.inuse.type_info,
                         page->tlbflush_timestamp);
@@ -82,7 +82,7 @@
             else if ( (tcount & ~PGT_count_mask) != 0 )
             {
                 APRINTK("Audit %d: type count overflowed "
-                        "mfn=%lx t=%x ot=%x",
+                        "mfn=%lx t=%" PRtype_info " ot=%x",
                         d->domain_id, page_to_pfn(page),
                         page->u.inuse.type_info,
                         page->tlbflush_timestamp);
@@ -101,7 +101,7 @@
         if ( count < 0 )
         {
             APRINTK("Audit %d: general count went below zero "
-                    "mfn=%lx t=%x ot=%x",
+                    "mfn=%lx t=%" PRtype_info " ot=%x",
                     d->domain_id, page_to_pfn(page),
                     page->u.inuse.type_info,
                     page->tlbflush_timestamp);
@@ -110,7 +110,7 @@
         else if ( (count & ~PGT_count_mask) != 0 )
         {
             APRINTK("Audit %d: general count overflowed "
-                    "mfn=%lx t=%x ot=%x",
+                    "mfn=%lx t=%" PRtype_info " ot=%x",
                     d->domain_id, page_to_pfn(page),
                     page->u.inuse.type_info,
                     page->tlbflush_timestamp);
@@ -152,7 +152,8 @@
                         if ( page_type != PGT_l1_shadow )
                         {
                             printk("Audit %d: [Shadow L2 mfn=%lx i=%x] "
-                                   "Expected Shadow L1 t=%x mfn=%lx\n",
+                                   "Expected Shadow L1 t=%" PRtype_info 
+                                  " mfn=%lx\n",
                                    d->domain_id, mfn, i,
                                    l1page->u.inuse.type_info, l1mfn);
                             errors++;
@@ -178,14 +179,14 @@
                         if ( page_type == PGT_l2_page_table )
                         {
                             printk("Audit %d: [%x] Found %s Linear PT "
-                                   "t=%x mfn=%lx\n",
+                                   "t=%" PRtype_info " mfn=%lx\n",
                                    d->domain_id, i, (l1mfn==mfn) ? "Self" : 
"Other",
                                    l1page->u.inuse.type_info, l1mfn);
                         }
                         else if ( page_type != PGT_l1_page_table )
                         {
                             printk("Audit %d: [L2 mfn=%lx i=%x] "
-                                   "Expected L1 t=%x mfn=%lx\n",
+                                   "Expected L1 t=%" PRtype_info " mfn=%lx\n",
                                    d->domain_id, mfn, i,
                                    l1page->u.inuse.type_info, l1mfn);
                             errors++;
@@ -237,7 +238,8 @@
                     if ( page_get_owner(gpage) != d )
                     {
                         printk("Audit %d: [hl2mfn=%lx,i=%x] Skip foreign page "
-                               "dom=%p (id=%d) mfn=%lx c=%08x t=%08x\n",
+                               "dom=%p (id=%d) mfn=%lx c=%08x t=%"
+                              PRtype_info "\n",
                                d->domain_id, hl2mfn, i,
                                page_get_owner(gpage),
                                page_get_owner(gpage)->domain_id,
@@ -288,7 +290,7 @@
                                PGT_writable_page) )
                         {
                             printk("Audit %d: [l1mfn=%lx, i=%x] Illegal RW "
-                                   "t=%x mfn=%lx\n",
+                                   "t=%" PRtype_info " mfn=%lx\n",
                                    d->domain_id, l1mfn, i,
                                    gpage->u.inuse.type_info, gmfn);
                             errors++;
@@ -308,7 +310,8 @@
                     if ( page_get_owner(gpage) != d )
                     {
                         printk("Audit %d: [l1mfn=%lx,i=%x] Skip foreign page "
-                               "dom=%p (id=%d) mfn=%lx c=%08x t=%08x\n",
+                               "dom=%p (id=%d) mfn=%lx c=%08x t=%" 
+                              PRtype_info "\n",
                                d->domain_id, l1mfn, i,
                                page_get_owner(gpage),
                                page_get_owner(gpage)->domain_id,
@@ -454,7 +457,7 @@
                     if ( shadow_refcounts )
                     {
                         printk("Audit %d: found an L2 guest page "
-                               "mfn=%lx t=%08x c=%08x while in shadow mode\n",
+                               "mfn=%lx t=%" PRtype_info " c=%08x while in 
shadow mode\n",
                                d->domain_id, mfn, page->u.inuse.type_info,
                                page->count_info);
                         errors++;
@@ -465,14 +468,16 @@
                         if ( (page->u.inuse.type_info & PGT_validated) !=
                              PGT_validated )
                         {
-                            printk("Audit %d: L2 mfn=%lx not validated %08x\n",
+                            printk("Audit %d: L2 mfn=%lx not validated %"
+                                  PRtype_info "\n",
                                    d->domain_id, mfn, page->u.inuse.type_info);
                             errors++;
                         }
 
                         if ( (page->u.inuse.type_info & PGT_pinned) != 
PGT_pinned )
                         {
-                            printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
+                            printk("Audit %d: L2 mfn=%lx not pinned t=%"
+                                  PRtype_info "\n",
                                    d->domain_id, mfn, page->u.inuse.type_info);
                             errors++;
                         }
@@ -494,7 +499,8 @@
                 {
                     if ( shadow_refcounts )
                     {
-                        printk("found an L1 guest page mfn=%lx t=%08x c=%08x "
+                        printk("found an L1 guest page mfn=%lx t=%" 
+                              PRtype_info " c=%08x "
                                "while in shadow mode\n",
                                mfn, page->u.inuse.type_info, page->count_info);
                         errors++;
@@ -505,7 +511,8 @@
                         if ( (page->u.inuse.type_info & PGT_validated) !=
                              PGT_validated )
                         {
-                            printk("Audit %d: L1 not validated mfn=%lx 
t=%08x\n",
+                            printk("Audit %d: L1 not validated mfn=%lx t=%"
+                                  PRtype_info "\n",
                                    d->domain_id, mfn, page->u.inuse.type_info);
                             errors++;
                         }
@@ -514,7 +521,8 @@
                         {
                             if ( !VM_ASSIST(d, 
VMASST_TYPE_writable_pagetables) )
                             {
-                                printk("Audit %d: L1 mfn=%lx not pinned 
t=%08x\n",
+                                printk("Audit %d: L1 mfn=%lx not pinned t=%"
+                                      PRtype_info "\n",
                                        d->domain_id, mfn, 
page->u.inuse.type_info);
                             }
                         }
@@ -621,7 +629,7 @@
         for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
         {
             if ( (pt[i] & _PAGE_PRESENT) && ((pt[i] >> PAGE_SHIFT) == xmfn) )
-                printk("     found dom=%d mfn=%lx t=%08x c=%08x "
+                printk("     found dom=%d mfn=%lx t=%" PRtype_info " c=%08x "
                        "pt[i=%x]=%lx\n",
                        d->domain_id, mfn, page->u.inuse.type_info,
                        page->count_info, i, pt[i]);
@@ -754,7 +762,7 @@
         if ( (page->u.inuse.type_info & PGT_count_mask) >
              (page->count_info & PGC_count_mask) )
         {
-            printk("taf(%08x) > caf(%08x) mfn=%lx\n",
+            printk("taf(%" PRtype_info ") > caf(%08x) mfn=%lx\n",
                    page->u.inuse.type_info, page->count_info, mfn);
             errors++;
         }
@@ -763,8 +771,8 @@
              (page_type == PGT_writable_page) &&
              !(page->u.inuse.type_info & PGT_validated) )
         {
-            printk("shadow mode writable page not validated mfn=%lx "
-                   "t=%08x c=%08x\n",
+            printk("shadow mode writable page not validated mfn=%lx " 
+                  "t=%" PRtype_info  " c=%08x\n",
                    mfn, page->u.inuse.type_info, page->count_info);
             errors++;
         }
@@ -774,7 +782,7 @@
              (page->u.inuse.type_info & PGT_count_mask) > 1 )
         {
             printk("writeable page with type count >1: "
-                   "mfn=%lx t=%08x c=%08x\n",
+                   "mfn=%lx t=%" PRtype_info " c=%08x\n",
                   mfn,
                   page->u.inuse.type_info,
                   page->count_info );
@@ -786,7 +794,7 @@
         if ( page_type == PGT_none && 
              (page->u.inuse.type_info & PGT_count_mask) > 0 )
         {
-            printk("normal page with type count >0: mfn=%lx t=%08x c=%08x\n",
+            printk("normal page with type count >0: mfn=%lx t=%" PRtype_info " 
c=%08x\n",
                   mfn,
                   page->u.inuse.type_info,
                   page->count_info );
@@ -812,7 +820,7 @@
                  : !(page_type && (page_type <= PGT_l4_page_table)) )
             {
                 printk("out of sync page mfn=%lx has strange type "
-                       "t=%08x c=%08x\n",
+                       "t=%" PRtype_info  " c=%08x\n",
                        mfn, page->u.inuse.type_info, page->count_info);
                 errors++;
             }
@@ -850,7 +858,7 @@
         case PGT_l4_page_table:
             if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
             {
-                printk("Audit %d: type count!=0 t=%x ot=%x c=%x mfn=%lx\n",
+                printk("Audit %d: type count!=0 t=%" PRtype_info " ot=%x c=%x 
mfn=%lx\n",
                        d->domain_id, page->u.inuse.type_info, 
                        page->tlbflush_timestamp,
                        page->count_info, mfn);
@@ -864,7 +872,7 @@
         case PGT_ldt_page:
             if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
             {
-                printk("Audit %d: type count!=0 t=%x ot=%x c=%x mfn=%lx\n",
+                printk("Audit %d: type count!=0 t=%" PRtype_info " ot=%x c=%x 
mfn=%lx\n",
                        d->domain_id, page->u.inuse.type_info, 
                        page->tlbflush_timestamp,
                        page->count_info, mfn);
@@ -877,7 +885,7 @@
         
         if ( (page->count_info & PGC_count_mask) != 1 )
         {
-            printk("Audit %d: gen count!=1 (c=%x) t=%x ot=%x mfn=%lx\n",
+            printk("Audit %d: gen count!=1 (c=%x) t=%" PRtype_info " ot=%x 
mfn=%lx\n",
                    d->domain_id,
                    page->count_info,
                    page->u.inuse.type_info, 
@@ -913,7 +921,7 @@
                          (page->count_info != 0) )
                     {
                         printk("Audit %d: shadow page counts wrong "
-                               "mfn=%lx t=%08x c=%08x\n",
+                               "mfn=%lx t=%" PRtype_info " c=%08x\n",
                                d->domain_id, page_to_pfn(page),
                                page->u.inuse.type_info,
                                page->count_info);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/cpu/amd.c    Thu Aug 25 22:53:20 2005
@@ -8,6 +8,20 @@
 #include <asm/processor.h>
 
 #include "cpu.h"
+
+/*
+ * amd_flush_filter={on,off}. Forcibly Enable or disable the TLB flush
+ * filter on AMD 64-bit processors.
+ */
+static int flush_filter_force;
+static void flush_filter(char *s)
+{
+       if (!strcmp(s, "off"))
+               flush_filter_force = -1;
+       if (!strcmp(s, "on"))
+               flush_filter_force = 1;
+}
+custom_param("amd_flush_filter", flush_filter);
 
 #define num_physpages 0
 
@@ -25,7 +39,7 @@
  */
  
 extern void vide(void);
-__asm__(".align 4\nvide: ret");
+__asm__(".text\n.align 4\nvide: ret");
 
 static void __init init_amd(struct cpuinfo_x86 *c)
 {
@@ -190,6 +204,21 @@
        case 6:
                set_bit(X86_FEATURE_K7, c->x86_capability); 
                break;
+       }
+
+       if (c->x86 == 15) {
+               rdmsr(MSR_K7_HWCR, l, h);
+               printk(KERN_INFO "CPU%d: AMD Flush Filter %sabled",
+                      smp_processor_id(), (l & (1<<6)) ? "dis" : "en");
+               if ((flush_filter_force > 0) && (l & (1<<6))) {
+                       l &= ~(1<<6);
+                       printk(" -> Forcibly enabled");
+               } else if ((flush_filter_force < 0) && !(l & (1<<6))) {
+                       l |= 1<<6;
+                       printk(" -> Forcibly disabled");
+               }
+               wrmsr(MSR_K7_HWCR, l, h);
+               printk("\n");
        }
 
        display_cacheinfo(c);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/dom0_ops.c   Thu Aug 25 22:53:20 2005
@@ -404,15 +404,17 @@
 
     memcpy(c, &v->arch.guest_context, sizeof(*c));
 
-    /* IOPL privileges are virtualised -- merge back into returned eflags. */
-    BUG_ON((c->user_regs.eflags & EF_IOPL) != 0);
-    c->user_regs.eflags |= v->arch.iopl << 12;
-
     if ( VMX_DOMAIN(v) )
     {
         save_vmx_cpu_user_regs(&c->user_regs);
         __vmread(CR0_READ_SHADOW, &c->ctrlreg[0]);
         __vmread(CR4_READ_SHADOW, &c->ctrlreg[4]);
+    }
+    else
+    {
+        /* IOPL privileges are virtualised: merge back into returned eflags. */
+        BUG_ON((c->user_regs.eflags & EF_IOPL) != 0);
+        c->user_regs.eflags |= v->arch.iopl << 12;
     }
 
     c->flags = 0;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/domain.c     Thu Aug 25 22:53:20 2005
@@ -48,6 +48,8 @@
 
 struct percpu_ctxt {
     struct vcpu *curr_vcpu;
+    unsigned int context_not_finalised;
+    unsigned int dirty_segment_mask;
 } __cacheline_aligned;
 static struct percpu_ctxt percpu_ctxt[NR_CPUS];
 
@@ -190,7 +192,7 @@
     {
         list_for_each_entry ( page, &d->page_list, list )
         {
-            printk("Page %p: caf=%08x, taf=%08x\n",
+            printk("Page %p: caf=%08x, taf=%" PRtype_info "\n",
                    _p(page_to_phys(page)), page->count_info,
                    page->u.inuse.type_info);
         }
@@ -198,14 +200,14 @@
 
     list_for_each_entry ( page, &d->xenpage_list, list )
     {
-        printk("XenPage %p: caf=%08x, taf=%08x\n",
+        printk("XenPage %p: caf=%08x, taf=%" PRtype_info "\n",
                _p(page_to_phys(page)), page->count_info,
                page->u.inuse.type_info);
     }
 
     
     page = virt_to_page(d->shared_info);
-    printk("Shared_info@%p: caf=%08x, taf=%08x\n",
+    printk("Shared_info@%p: caf=%08x, taf=%" PRtype_info "\n",
            _p(page_to_phys(page)), page->count_info,
            page->u.inuse.type_info);
 }
@@ -215,8 +217,16 @@
     return xmalloc(struct vcpu);
 }
 
+/* We assume that vcpu 0 is always the last one to be freed in a
+   domain i.e. if v->vcpu_id == 0, the domain should be
+   single-processor. */
 void arch_free_vcpu_struct(struct vcpu *v)
 {
+    struct vcpu *p;
+    for_each_vcpu(v->domain, p) {
+        if (p->next_in_list == v)
+            p->next_in_list = v->next_in_list;
+    }
     xfree(v);
 }
 
@@ -295,26 +305,23 @@
         l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
 }
 
+void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
+{
+    if ( v->processor == newcpu )
+        return;
+
+    set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
+    v->processor = newcpu;
+
+    if ( VMX_DOMAIN(v) )
+    {
+        __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
+        v->arch.schedule_tail = arch_vmx_do_relaunch;
+    }
+}
+
 #ifdef CONFIG_VMX
 static int vmx_switch_on;
-
-void arch_vmx_do_resume(struct vcpu *v) 
-{
-    u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
-    load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
-    vmx_do_resume(v);
-    reset_stack_and_jump(vmx_asm_do_resume);
-}
-
-void arch_vmx_do_launch(struct vcpu *v) 
-{
-    u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
-    load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
-    vmx_do_launch(v);
-    reset_stack_and_jump(vmx_asm_do_launch);
-}
 
 static int vmx_final_setup_guest(
     struct vcpu *v, struct vcpu_guest_context *ctxt)
@@ -346,7 +353,7 @@
 
     v->arch.schedule_tail = arch_vmx_do_launch;
 
-#if defined (__i386)
+#if defined (__i386__)
     v->domain->arch.vmx_platform.real_mode_data = 
         (unsigned long *) regs->esi;
 #endif
@@ -404,7 +411,7 @@
     {
         if ( ((c->user_regs.cs & 3) == 0) ||
              ((c->user_regs.ss & 3) == 0) )
-                return -EINVAL;
+            return -EINVAL;
     }
 
     clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
@@ -458,7 +465,7 @@
         if ( !(c->flags & VGCF_VMX_GUEST) )
 #endif
             if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d, 
-                  PGT_base_page_table) )
+                                    PGT_base_page_table) )
                 return -EINVAL;
     }
 
@@ -479,7 +486,10 @@
     }
 
     update_pagetables(v);
-    
+
+    if ( v->vcpu_id == 0 )
+        init_domain_time(d);
+
     /* Don't redo final setup */
     set_bit(_VCPUF_initialised, &v->vcpu_flags);
 
@@ -541,51 +551,59 @@
     __r; })
 
 #if CONFIG_VMX
-#define load_msrs(_p, _n)     if (vmx_switch_on) vmx_load_msrs((_p), (_n))
+#define load_msrs(n)     if (vmx_switch_on) vmx_load_msrs(n)
 #else
-#define load_msrs(_p, _n)     ((void)0)
+#define load_msrs(n)     ((void)0)
 #endif 
 
-static void load_segments(struct vcpu *p, struct vcpu *n)
-{
-    struct vcpu_guest_context *pctxt = &p->arch.guest_context;
+/*
+ * save_segments() writes a mask of segments which are dirty (non-zero),
+ * allowing load_segments() to avoid some expensive segment loads and
+ * MSR writes.
+ */
+#define DIRTY_DS           0x01
+#define DIRTY_ES           0x02
+#define DIRTY_FS           0x04
+#define DIRTY_GS           0x08
+#define DIRTY_FS_BASE      0x10
+#define DIRTY_GS_BASE_USER 0x20
+
+static void load_segments(struct vcpu *n)
+{
     struct vcpu_guest_context *nctxt = &n->arch.guest_context;
     int all_segs_okay = 1;
+    unsigned int dirty_segment_mask, cpu = smp_processor_id();
+
+    /* Load and clear the dirty segment mask. */
+    dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask;
+    percpu_ctxt[cpu].dirty_segment_mask = 0;
 
     /* Either selector != 0 ==> reload. */
-    if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) )
+    if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )
         all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
 
     /* Either selector != 0 ==> reload. */
-    if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) )
+    if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )
         all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
 
     /*
      * Either selector != 0 ==> reload.
      * Also reload to reset FS_BASE if it was non-zero.
      */
-    if ( unlikely(pctxt->user_regs.fs |
-                  pctxt->fs_base |
+    if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
                   nctxt->user_regs.fs) )
-    {
         all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
-        if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */
-            pctxt->fs_base = 0;
-    }
 
     /*
      * Either selector != 0 ==> reload.
      * Also reload to reset GS_BASE if it was non-zero.
      */
-    if ( unlikely(pctxt->user_regs.gs |
-                  pctxt->gs_base_user |
+    if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
                   nctxt->user_regs.gs) )
     {
         /* Reset GS_BASE with user %gs? */
-        if ( pctxt->user_regs.gs || !nctxt->gs_base_user )
+        if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )
             all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
-        if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */
-            pctxt->gs_base_user = 0;
     }
 
     /* This can only be non-zero if selector is NULL. */
@@ -650,7 +668,9 @@
 
 static void save_segments(struct vcpu *v)
 {
-    struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
+    struct vcpu_guest_context *ctxt = &v->arch.guest_context;
+    struct cpu_user_regs      *regs = &ctxt->user_regs;
+    unsigned int dirty_segment_mask = 0;
 
     if ( VMX_DOMAIN(v) )
         rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
@@ -659,18 +679,34 @@
     __asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) );
     __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) );
     __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) );
-}
-
-static void clear_segments(void)
-{
-    __asm__ __volatile__ (
-        " movl %0,%%ds; "
-        " movl %0,%%es; "
-        " movl %0,%%fs; "
-        " movl %0,%%gs; "
-        ""safe_swapgs"  "
-        " movl %0,%%gs"
-        : : "r" (0) );
+
+    if ( regs->ds )
+        dirty_segment_mask |= DIRTY_DS;
+
+    if ( regs->es )
+        dirty_segment_mask |= DIRTY_ES;
+
+    if ( regs->fs )
+    {
+        dirty_segment_mask |= DIRTY_FS;
+        ctxt->fs_base = 0; /* != 0 selector kills fs_base */
+    }
+    else if ( ctxt->fs_base )
+    {
+        dirty_segment_mask |= DIRTY_FS_BASE;
+    }
+
+    if ( regs->gs )
+    {
+        dirty_segment_mask |= DIRTY_GS;
+        ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
+    }
+    else if ( ctxt->gs_base_user )
+    {
+        dirty_segment_mask |= DIRTY_GS_BASE_USER;
+    }
+
+    percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
 }
 
 long do_switch_to_user(void)
@@ -706,10 +742,9 @@
 
 #elif defined(__i386__)
 
-#define load_segments(_p, _n) ((void)0)
-#define load_msrs(_p, _n)     ((void)0)
-#define save_segments(_p)     ((void)0)
-#define clear_segments()      ((void)0)
+#define load_segments(n) ((void)0)
+#define load_msrs(n)     ((void)0)
+#define save_segments(p) ((void)0)
 
 static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
 {
@@ -726,9 +761,9 @@
 static void __context_switch(void)
 {
     struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
-    unsigned int         cpu = smp_processor_id();
-    struct vcpu  *p = percpu_ctxt[cpu].curr_vcpu;
-    struct vcpu  *n = current;
+    unsigned int          cpu = smp_processor_id();
+    struct vcpu          *p = percpu_ctxt[cpu].curr_vcpu;
+    struct vcpu          *n = current;
 
     if ( !is_idle_task(p->domain) )
     {
@@ -786,23 +821,31 @@
 
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
-    struct vcpu *realprev;
-
-    local_irq_disable();
+    unsigned int cpu = smp_processor_id();
+
+    ASSERT(!local_irq_is_enabled());
 
     set_current(next);
 
-    if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) || 
-         is_idle_task(next->domain) )
-    {
-        local_irq_enable();
-    }
-    else
+    if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
     {
         __context_switch();
-
-        local_irq_enable();
-        
+        percpu_ctxt[cpu].context_not_finalised = 1;
+    }
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+    unsigned int cpu = smp_processor_id();
+
+    ASSERT(local_irq_is_enabled());
+
+    if ( percpu_ctxt[cpu].context_not_finalised )
+    {
+        percpu_ctxt[cpu].context_not_finalised = 0;
+
+        BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
+
         if ( VMX_DOMAIN(next) )
         {
             vmx_restore_msrs(next);
@@ -810,18 +853,10 @@
         else
         {
             load_LDT(next);
-            load_segments(realprev, next);
-            load_msrs(realprev, next);
-        }
-    }
-
-    /*
-     * We do this late on because it doesn't need to be protected by the
-     * schedule_lock, and because we want this to be the very last use of
-     * 'prev' (after this point, a dying domain's info structure may be freed
-     * without warning). 
-     */
-    clear_bit(_VCPUF_running, &prev->vcpu_flags);
+            load_segments(next);
+            load_msrs(next);
+        }
+    }
 
     schedule_tail(next);
     BUG();
@@ -835,12 +870,19 @@
 
 int __sync_lazy_execstate(void)
 {
-    if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current )
-        return 0;
-    __context_switch();
-    load_LDT(current);
-    clear_segments();
-    return 1;
+    unsigned long flags;
+    int switch_required;
+
+    local_irq_save(flags);
+
+    switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current);
+
+    if ( switch_required )
+        __context_switch();
+
+    local_irq_restore(flags);
+
+    return switch_required;
 }
 
 void sync_lazy_execstate_cpu(unsigned int cpu)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/domain_build.c       Thu Aug 25 22:53:20 2005
@@ -22,16 +22,28 @@
 #include <asm/i387.h>
 #include <asm/shadow.h>
 
-/* opt_dom0_mem: memory allocated to domain 0. */
-static unsigned int opt_dom0_mem;
+static long dom0_nrpages;
+
+/*
+ * dom0_mem:
+ *  If +ve:
+ *   * The specified amount of memory is allocated to domain 0.
+ *  If -ve:
+ *   * All of memory is allocated to domain 0, minus the specified amount.
+ *  If not specified: 
+ *   * All of memory is allocated to domain 0, minus 1/16th which is reserved
+ *     for uses such as DMA buffers (the reservation is clamped to 128MB).
+ */
 static void parse_dom0_mem(char *s)
 {
-    unsigned long long bytes = parse_size_and_unit(s);
-    /* If no unit is specified we default to kB units, not bytes. */
-    if ( isdigit(s[strlen(s)-1]) )
-        opt_dom0_mem = (unsigned int)bytes;
-    else
-        opt_dom0_mem = (unsigned int)(bytes >> 10);
+    unsigned long long bytes;
+    char *t = s;
+    if ( *s == '-' )
+        t++;
+    bytes = parse_size_and_unit(t);
+    dom0_nrpages = bytes >> PAGE_SHIFT;
+    if ( *s == '-' )
+        dom0_nrpages = -dom0_nrpages;
 }
 custom_param("dom0_mem", parse_dom0_mem);
 
@@ -57,11 +69,21 @@
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
 
-static struct pfn_info *alloc_largest(struct domain *d, unsigned long max)
+static struct pfn_info *alloc_chunk(struct domain *d, unsigned long max_pages)
 {
     struct pfn_info *page;
-    unsigned int order = get_order(max * PAGE_SIZE);
-    if ( (max & (max-1)) != 0 )
+    unsigned int order;
+    /*
+     * Allocate up to 2MB at a time:
+     *  1. This prevents overflow of get_order() when allocating more than
+     *     4GB to domain 0 on a PAE machine.
+     *  2. It prevents allocating very large chunks from DMA pools before
+     *     the >4GB pool is fully depleted.
+     */
+    if ( max_pages > (2UL << (20 - PAGE_SHIFT)) )
+        max_pages = 2UL << (20 - PAGE_SHIFT);
+    order = get_order(max_pages << PAGE_SHIFT);
+    if ( (max_pages & (max_pages-1)) != 0 )
         order--;
     while ( (page = alloc_domheap_pages(d, order, 0)) == NULL )
         if ( order-- == 0 )
@@ -74,12 +96,12 @@
                    unsigned long _initrd_start, unsigned long initrd_len,
                    char *cmdline)
 {
-    int i, rc, dom0_pae, xen_pae;
+    int i, rc, dom0_pae, xen_pae, order;
     unsigned long pfn, mfn;
     unsigned long nr_pages;
     unsigned long nr_pt_pages;
-    unsigned long alloc_start;
-    unsigned long alloc_end;
+    unsigned long alloc_spfn;
+    unsigned long alloc_epfn;
     unsigned long count;
     struct pfn_info *page = NULL;
     start_info_t *si;
@@ -137,16 +159,30 @@
 
     printk("*** LOADING DOMAIN 0 ***\n");
 
-    /* By default DOM0 is allocated all available memory. */
     d->max_pages = ~0U;
-    if ( (nr_pages = opt_dom0_mem >> (PAGE_SHIFT - 10)) == 0 )
+
+    /*
+     * If domain 0 allocation isn't specified, reserve 1/16th of available
+     * memory for things like DMA buffers. This reservation is clamped to 
+     * a maximum of 128MB.
+     */
+    if ( dom0_nrpages == 0 )
+    {
+        dom0_nrpages = avail_domheap_pages() +
+            ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) +
+            ((image_len  + PAGE_SIZE - 1) >> PAGE_SHIFT);
+        dom0_nrpages = min(dom0_nrpages / 16, 128L << (20 - PAGE_SHIFT));
+        dom0_nrpages = -dom0_nrpages;
+    }
+
+    /* Negative memory specification means "all memory - specified amount". */
+    if ( dom0_nrpages < 0 )
         nr_pages = avail_domheap_pages() +
             ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) +
-            ((image_len  + PAGE_SIZE - 1) >> PAGE_SHIFT);
-    if ( (page = alloc_largest(d, nr_pages)) == NULL )
-        panic("Not enough RAM for DOM0 reservation.\n");
-    alloc_start = page_to_phys(page);
-    alloc_end   = alloc_start + (d->tot_pages << PAGE_SHIFT);
+            ((image_len  + PAGE_SIZE - 1) >> PAGE_SHIFT) +
+            dom0_nrpages;
+    else
+        nr_pages = dom0_nrpages;
 
     if ( (rc = parseelfimage(&dsi)) != 0 )
         return rc;
@@ -166,7 +202,7 @@
         return -EINVAL;
     }
     if (strstr(dsi.xen_section_string, "SHADOW=translate"))
-       opt_dom0_translate = 1;
+        opt_dom0_translate = 1;
 
     /* Align load address to 4MB boundary. */
     dsi.v_start &= ~((1UL<<22)-1);
@@ -215,12 +251,19 @@
 #endif
     }
 
-    if ( (v_end - dsi.v_start) > (alloc_end - alloc_start) )
-        panic("Insufficient contiguous RAM to build kernel image.\n");
+    order = get_order(v_end - dsi.v_start);
+    if ( (1UL << order) > nr_pages )
+        panic("Domain 0 allocation is too small for kernel image.\n");
+
+    /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */
+    if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL )
+        panic("Not enough RAM for domain 0 allocation.\n");
+    alloc_spfn = page_to_pfn(page);
+    alloc_epfn = alloc_spfn + d->tot_pages;
 
     printk("PHYSICAL MEMORY ARRANGEMENT:\n"
-           " Dom0 alloc.:   %p->%p",
-           _p(alloc_start), _p(alloc_end));
+           " Dom0 alloc.:   %"PRIphysaddr"->%"PRIphysaddr,
+           pfn_to_phys(alloc_spfn), pfn_to_phys(alloc_epfn));
     if ( d->tot_pages < nr_pages )
         printk(" (%lu pages to be allocated)",
                nr_pages - d->tot_pages);
@@ -249,7 +292,8 @@
         return -ENOMEM;
     }
 
-    mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
+    mpt_alloc = (vpt_start - dsi.v_start) + 
+        (unsigned long)pfn_to_phys(alloc_spfn);
 
     /*
      * We're basically forcing default RPLs to 1, so that our "what privilege
@@ -306,7 +350,7 @@
 #endif
 
     l2tab += l2_linear_offset(dsi.v_start);
-    mfn = alloc_start >> PAGE_SHIFT;
+    mfn = alloc_spfn;
     for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
     {
         if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
@@ -428,7 +472,7 @@
     v->arch.guest_table = mk_pagetable(__pa(l4start));
 
     l4tab += l4_table_offset(dsi.v_start);
-    mfn = alloc_start >> PAGE_SHIFT;
+    mfn = alloc_spfn;
     for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
     {
         if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
@@ -563,24 +607,24 @@
     /* Write the phys->machine and machine->phys table entries. */
     for ( pfn = 0; pfn < d->tot_pages; pfn++ )
     {
-        mfn = pfn + (alloc_start>>PAGE_SHIFT);
+        mfn = pfn + alloc_spfn;
 #ifndef NDEBUG
 #define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
         if ( !opt_dom0_translate && (pfn > REVERSE_START) )
-            mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
+            mfn = alloc_epfn - (pfn - REVERSE_START);
 #endif
         ((u32 *)vphysmap_start)[pfn] = mfn;
         machine_to_phys_mapping[mfn] = pfn;
     }
     while ( pfn < nr_pages )
     {
-        if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL )
+        if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL )
             panic("Not enough RAM for DOM0 reservation.\n");
         while ( pfn < d->tot_pages )
         {
             mfn = page_to_pfn(page);
 #ifndef NDEBUG
-#define pfn (nr_pages - 1 - (pfn - ((alloc_end - alloc_start) >> PAGE_SHIFT)))
+#define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn)))
 #endif
             ((u32 *)vphysmap_start)[pfn] = mfn;
             machine_to_phys_mapping[mfn] = pfn;
@@ -614,19 +658,21 @@
     /* DOM0 gets access to everything. */
     physdev_init_dom0(d);
 
+    init_domain_time(d);
+
     set_bit(_DOMF_constructed, &d->domain_flags);
 
     new_thread(v, dsi.v_kernentry, vstack_end, vstartinfo_start);
 
     if ( opt_dom0_shadow || opt_dom0_translate )
     {
-       printk("dom0: shadow enable\n");
+        printk("dom0: shadow enable\n");
         shadow_mode_enable(d, (opt_dom0_translate
                                ? SHM_enable | SHM_refcounts | SHM_translate
                                : SHM_enable));
         if ( opt_dom0_translate )
         {
-           printk("dom0: shadow translate\n");
+            printk("dom0: shadow translate\n");
 #if defined(__i386__) && defined(CONFIG_X86_PAE)
             printk("FIXME: PAE code needed here: %s:%d (%s)\n",
                    __FILE__, __LINE__, __FUNCTION__);
@@ -659,7 +705,7 @@
         }
 
         update_pagetables(v); /* XXX SMP */
-       printk("dom0: shadow setup done\n");
+        printk("dom0: shadow setup done\n");
     }
 
     return 0;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/io_apic.c    Thu Aug 25 22:53:20 2005
@@ -1751,8 +1751,30 @@
     
     pin = (address - 0x10) >> 1;
 
+    *(u32 *)&rte = val;
     rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-    *(int *)&rte = val;
+
+    /*
+     * What about weird destination types?
+     *  SMI:    Ignore? Ought to be set up by the BIOS.
+     *  NMI:    Ignore? Watchdog functionality is Xen's concern.
+     *  INIT:   Definitely ignore: probably a guest OS bug.
+     *  ExtINT: Ignore? Linux only asserts this at start of day.
+     * For now, print a message and return an error. We can fix up on demand.
+     */
+    if ( rte.delivery_mode > dest_LowestPrio )
+    {
+        printk("ERROR: Attempt to write weird IOAPIC destination mode!\n");
+        printk("       APIC=%d/%d, lo-reg=%x\n", apicid, pin, val);
+        return -EINVAL;
+    }
+
+    /*
+     * The guest does not know physical APIC arrangement (flat vs. cluster).
+     * Apply genapic conventions for this platform.
+     */
+    rte.delivery_mode = INT_DELIVERY_MODE;
+    rte.dest_mode     = INT_DEST_MODE;
 
     if ( rte.vector >= FIRST_DEVICE_VECTOR )
     {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/mm.c Thu Aug 25 22:53:20 2005
@@ -95,6 +95,7 @@
 #include <xen/irq.h>
 #include <xen/softirq.h>
 #include <xen/domain_page.h>
+#include <xen/event.h>
 #include <asm/shadow.h>
 #include <asm/page.h>
 #include <asm/flushtlb.h>
@@ -122,7 +123,7 @@
 static void free_l1_table(struct pfn_info *page);
 
 static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long,
-                        unsigned int type);
+                        unsigned long type);
 static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
 
 /* Used to defer flushing of memory structures. */
@@ -138,7 +139,7 @@
  * Returns the current foreign domain; defaults to the currently-executing
  * domain if a foreign override hasn't been specified.
  */
-#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ? : 
current->domain)
+#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ?: current->domain)
 
 /* Private domain structs for DOMID_XEN and DOMID_IO. */
 static struct domain *dom_xen, *dom_io;
@@ -354,7 +355,7 @@
 
 
 static int get_page_and_type_from_pagenr(unsigned long page_nr, 
-                                         u32 type,
+                                         unsigned long type,
                                          struct domain *d)
 {
     struct pfn_info *page = &frame_table[page_nr];
@@ -365,7 +366,7 @@
     if ( unlikely(!get_page_type(page, type)) )
     {
         if ( (type & PGT_type_mask) != PGT_l1_page_table )
-            MEM_LOG("Bad page type for pfn %lx (%08x)", 
+            MEM_LOG("Bad page type for pfn %lx (%" PRtype_info ")", 
                     page_nr, page->u.inuse.type_info);
         put_page(page);
         return 0;
@@ -390,7 +391,7 @@
 get_linear_pagetable(
     root_pgentry_t re, unsigned long re_pfn, struct domain *d)
 {
-    u32 x, y;
+    unsigned long x, y;
     struct pfn_info *page;
     unsigned long pfn;
 
@@ -443,7 +444,7 @@
 
     if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
     {
-        MEM_LOG("Bad L1 flags %x\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
+        MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
         return 0;
     }
 
@@ -489,7 +490,7 @@
 
     if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
     {
-        MEM_LOG("Bad L2 flags %x\n", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
+        MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
         return 0;
     }
 
@@ -522,7 +523,7 @@
 
     if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) )
     {
-        MEM_LOG("Bad L3 flags %x\n", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
+        MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
         return 0;
     }
 
@@ -544,7 +545,8 @@
 
 static int 
 get_page_from_l4e(
-    l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
+    l4_pgentry_t l4e, unsigned long pfn, 
+    struct domain *d, unsigned long vaddr)
 {
     int rc;
 
@@ -555,12 +557,15 @@
 
     if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
     {
-        MEM_LOG("Bad L4 flags %x\n", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
+        MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
         return 0;
     }
 
+    vaddr >>= L4_PAGETABLE_SHIFT;
+    vaddr <<= PGT_va_shift;
     rc = get_page_and_type_from_pagenr(
-        l4e_get_pfn(l4e), PGT_l3_page_table, d);
+        l4e_get_pfn(l4e), 
+        PGT_l3_page_table | vaddr, d);
 
     if ( unlikely(!rc) )
         return get_linear_pagetable(l4e, pfn, d);
@@ -731,7 +736,7 @@
         pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
             (l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ?
             l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR) :
-            l2e_empty();
+        l2e_empty();
     unmap_domain_page(pl2e);
 
     return 1;
@@ -750,13 +755,47 @@
     return 1;
 }
 
+#elif CONFIG_X86_64
+# define create_pae_xen_mappings(pl3e) (1)
+
+static inline int l1_backptr(
+    unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
+{
+    unsigned long l2_backptr = l2_type & PGT_va_mask;
+    BUG_ON(l2_backptr == PGT_va_unknown);
+
+    *backptr = ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) | 
+        (offset_in_l2 << L2_PAGETABLE_SHIFT);
+    return 1;
+}
+
+static inline int l2_backptr(
+    unsigned long *backptr, unsigned long offset_in_l3, unsigned long l3_type)
+{
+    unsigned long l3_backptr = l3_type & PGT_va_mask;
+    BUG_ON(l3_backptr == PGT_va_unknown);
+
+    *backptr = ((l3_backptr >> PGT_va_shift) << L4_PAGETABLE_SHIFT) | 
+        (offset_in_l3 << L3_PAGETABLE_SHIFT);
+    return 1;
+}
+
+static inline int l3_backptr(
+    unsigned long *backptr, unsigned long offset_in_l4, unsigned long l4_type)
+{
+    unsigned long l4_backptr = l4_type & PGT_va_mask;
+    BUG_ON(l4_backptr == PGT_va_unknown);
+
+    *backptr = (offset_in_l4 << L4_PAGETABLE_SHIFT);
+    return 1;
+}
 #else
 # define create_pae_xen_mappings(pl3e) (1)
 # define l1_backptr(bp,l2o,l2t) \
     ({ *(bp) = (unsigned long)(l2o) << L2_PAGETABLE_SHIFT; 1; })
 #endif
 
-static int alloc_l2_table(struct pfn_info *page, unsigned int type)
+static int alloc_l2_table(struct pfn_info *page, unsigned long type)
 {
     struct domain *d = page_get_owner(page);
     unsigned long  pfn = page_to_pfn(page);
@@ -808,7 +847,7 @@
 
 
 #if CONFIG_PAGING_LEVELS >= 3
-static int alloc_l3_table(struct pfn_info *page)
+static int alloc_l3_table(struct pfn_info *page, unsigned long type)
 {
     struct domain *d = page_get_owner(page);
     unsigned long  pfn = page_to_pfn(page);
@@ -818,10 +857,23 @@
 
     ASSERT(!shadow_mode_refcounts(d));
 
+#ifdef CONFIG_X86_PAE
+    if ( pfn >= 0x100000 )
+    {
+        MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
+        return 0;
+    }
+#endif
+
     pl3e = map_domain_page(pfn);
     for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
     {
+#if CONFIG_PAGING_LEVELS >= 4
+        if ( !l2_backptr(&vaddr, i, type) )
+            goto fail;
+#else
         vaddr = (unsigned long)i << L3_PAGETABLE_SHIFT;
+#endif
         if ( is_guest_l3_slot(i) &&
              unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
             goto fail;
@@ -842,15 +894,16 @@
     return 0;
 }
 #else
-#define alloc_l3_table(page) (0)
+#define alloc_l3_table(page, type) (0)
 #endif
 
 #if CONFIG_PAGING_LEVELS >= 4
-static int alloc_l4_table(struct pfn_info *page)
+static int alloc_l4_table(struct pfn_info *page, unsigned long type)
 {
     struct domain *d = page_get_owner(page);
     unsigned long  pfn = page_to_pfn(page);
     l4_pgentry_t  *pl4e = page_to_virt(page);
+    unsigned long vaddr;
     int            i;
 
     /* See the code in shadow_promote() to understand why this is here. */
@@ -860,9 +913,14 @@
     ASSERT(!shadow_mode_refcounts(d));
 
     for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
+    {
+        if ( !l3_backptr(&vaddr, i, type) )
+            goto fail;
+
         if ( is_guest_l4_slot(i) &&
-             unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
+             unlikely(!get_page_from_l4e(pl4e[i], pfn, d, vaddr)) )
             goto fail;
+    }
 
     /* Xen private mappings. */
     memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
@@ -885,7 +943,7 @@
     return 0;
 }
 #else
-#define alloc_l4_table(page) (0)
+#define alloc_l4_table(page, type) (0)
 #endif
 
 
@@ -967,7 +1025,7 @@
          unlikely(o != l1e_get_intpte(ol1e)) )
     {
         MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
-                ": saw %" PRIpte "\n",
+                ": saw %" PRIpte,
                 l1e_get_intpte(ol1e),
                 l1e_get_intpte(nl1e),
                 o);
@@ -993,7 +1051,7 @@
     {
         if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
         {
-            MEM_LOG("Bad L1 flags %x\n",
+            MEM_LOG("Bad L1 flags %x",
                     l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
             return 0;
         }
@@ -1037,10 +1095,10 @@
 static int mod_l2_entry(l2_pgentry_t *pl2e, 
                         l2_pgentry_t nl2e, 
                         unsigned long pfn,
-                        unsigned int type)
+                        unsigned long type)
 {
     l2_pgentry_t ol2e;
-    unsigned long vaddr;
+    unsigned long vaddr = 0;
 
     if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) )
     {
@@ -1055,7 +1113,7 @@
     {
         if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
         {
-            MEM_LOG("Bad L2 flags %x\n",
+            MEM_LOG("Bad L2 flags %x",
                     l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
             return 0;
         }
@@ -1074,10 +1132,9 @@
             return 0;
         }
     }
-    else
-    {
-        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
-            return 0;
+    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
+    {
+        return 0;
     }
 
     put_page_from_l2e(ol2e, pfn);
@@ -1090,7 +1147,8 @@
 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
 static int mod_l3_entry(l3_pgentry_t *pl3e, 
                         l3_pgentry_t nl3e, 
-                        unsigned long pfn)
+                        unsigned long pfn,
+                        unsigned long type)
 {
     l3_pgentry_t ol3e;
     unsigned long vaddr;
@@ -1117,7 +1175,7 @@
     {
         if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) )
         {
-            MEM_LOG("Bad L3 flags %x\n",
+            MEM_LOG("Bad L3 flags %x",
                     l3e_get_flags(nl3e) & L3_DISALLOW_MASK);
             return 0;
         }
@@ -1126,28 +1184,29 @@
         if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
             return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e);
 
+#if CONFIG_PAGING_LEVELS >= 4
+        if ( unlikely(!l2_backptr(&vaddr, pgentry_ptr_to_slot(pl3e), type)) ||
+             unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
+            return 0; 
+#else
         vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t))
             << L3_PAGETABLE_SHIFT;
         if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
             return 0;
+#endif
 
         if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
         {
-            BUG_ON(!create_pae_xen_mappings(pl3e));
             put_page_from_l3e(nl3e, pfn);
             return 0;
         }
-
-        put_page_from_l3e(ol3e, pfn);
-        return 1;
-    }
-
-    if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
-    {
-        BUG_ON(!create_pae_xen_mappings(pl3e));
+    }
+    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
+    {
         return 0;
     }
 
+    BUG_ON(!create_pae_xen_mappings(pl3e));
     put_page_from_l3e(ol3e, pfn);
     return 1;
 }
@@ -1159,9 +1218,11 @@
 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
 static int mod_l4_entry(l4_pgentry_t *pl4e, 
                         l4_pgentry_t nl4e, 
-                        unsigned long pfn)
+                        unsigned long pfn,
+                        unsigned long type)
 {
     l4_pgentry_t ol4e;
+    unsigned long vaddr;
 
     if ( unlikely(!is_guest_l4_slot(pgentry_ptr_to_slot(pl4e))) )
     {
@@ -1176,7 +1237,7 @@
     {
         if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
         {
-            MEM_LOG("Bad L4 flags %x\n",
+            MEM_LOG("Bad L4 flags %x",
                     l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
             return 0;
         }
@@ -1185,7 +1246,8 @@
         if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
             return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e);
 
-        if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
+        if ( unlikely(!l3_backptr(&vaddr, pgentry_ptr_to_slot(pl4e), type)) ||
+             unlikely(!get_page_from_l4e(nl4e, pfn, current->domain, vaddr)) )
             return 0;
 
         if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
@@ -1193,13 +1255,11 @@
             put_page_from_l4e(nl4e, pfn);
             return 0;
         }
-        
-        put_page_from_l4e(ol4e, pfn);
-        return 1;
-    }
-
-    if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
+    }
+    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
+    {
         return 0;
+    }
 
     put_page_from_l4e(ol4e, pfn);
     return 1;
@@ -1207,7 +1267,7 @@
 
 #endif
 
-int alloc_page_type(struct pfn_info *page, unsigned int type)
+int alloc_page_type(struct pfn_info *page, unsigned long type)
 {
     switch ( type & PGT_type_mask )
     {
@@ -1216,14 +1276,14 @@
     case PGT_l2_page_table:
         return alloc_l2_table(page, type);
     case PGT_l3_page_table:
-        return alloc_l3_table(page);
+        return alloc_l3_table(page, type);
     case PGT_l4_page_table:
-        return alloc_l4_table(page);
+        return alloc_l4_table(page, type);
     case PGT_gdt_page:
     case PGT_ldt_page:
         return alloc_segdesc_page(page);
     default:
-        printk("Bad type in alloc_page_type %x t=%x c=%x\n", 
+        printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n", 
                type, page->u.inuse.type_info,
                page->count_info);
         BUG();
@@ -1233,7 +1293,7 @@
 }
 
 
-void free_page_type(struct pfn_info *page, unsigned int type)
+void free_page_type(struct pfn_info *page, unsigned long type)
 {
     struct domain *owner = page_get_owner(page);
     unsigned long gpfn;
@@ -1273,7 +1333,7 @@
 #endif
 
     default:
-        printk("%s: type %x pfn %lx\n",__FUNCTION__,
+        printk("%s: type %lx pfn %lx\n",__FUNCTION__,
                type, page_to_pfn(page));
         BUG();
     }
@@ -1282,7 +1342,7 @@
 
 void put_page_type(struct pfn_info *page)
 {
-    u32 nx, x, y = page->u.inuse.type_info;
+    unsigned long nx, x, y = page->u.inuse.type_info;
 
  again:
     do {
@@ -1335,9 +1395,9 @@
 }
 
 
-int get_page_type(struct pfn_info *page, u32 type)
-{
-    u32 nx, x, y = page->u.inuse.type_info;
+int get_page_type(struct pfn_info *page, unsigned long type)
+{
+    unsigned long nx, x, y = page->u.inuse.type_info;
 
  again:
     do {
@@ -1388,8 +1448,11 @@
                 {
                     if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
                          ((type & PGT_type_mask) != PGT_l1_page_table) )
-                        MEM_LOG("Bad type (saw %08x != exp %08x) for pfn %lx",
-                                x, type, page_to_pfn(page));
+                        MEM_LOG("Bad type (saw %" PRtype_info
+                                "!= exp %" PRtype_info ") "
+                                "for mfn %lx (pfn %x)",
+                                x, type, page_to_pfn(page),
+                                machine_to_phys_mapping[page_to_pfn(page)]);
                     return 0;
                 }
                 else if ( (x & PGT_va_mask) == PGT_va_mutable )
@@ -1427,8 +1490,8 @@
         /* Try to validate page type; drop the new reference on failure. */
         if ( unlikely(!alloc_page_type(page, type)) )
         {
-            MEM_LOG("Error while validating pfn %lx for type %08x."
-                    " caf=%08x taf=%08x",
+            MEM_LOG("Error while validating pfn %lx for type %" PRtype_info "."
+                    " caf=%08x taf=%" PRtype_info,
                     page_to_pfn(page), type,
                     page->count_info,
                     page->u.inuse.type_info);
@@ -1537,7 +1600,7 @@
             percpu_info[cpu].foreign = dom_io;
             break;
         default:
-            MEM_LOG("Dom %u cannot set foreign dom\n", d->domain_id);
+            MEM_LOG("Dom %u cannot set foreign dom", d->domain_id);
             okay = 0;
             break;
         }
@@ -1596,7 +1659,7 @@
 {
     struct mmuext_op op;
     int rc = 0, i = 0, okay, cpu = smp_processor_id();
-    unsigned int type, done = 0;
+    unsigned long type, done = 0;
     struct pfn_info *page;
     struct vcpu *v = current;
     struct domain *d = v->domain, *e;
@@ -1674,16 +1737,16 @@
 
 #ifndef CONFIG_X86_PAE /* Unsafe on PAE because of Xen-private mappings. */
         case MMUEXT_PIN_L2_TABLE:
-            type = PGT_l2_page_table;
+            type = PGT_l2_page_table | PGT_va_mutable;
             goto pin_page;
 #endif
 
         case MMUEXT_PIN_L3_TABLE:
-            type = PGT_l3_page_table;
+            type = PGT_l3_page_table | PGT_va_mutable;
             goto pin_page;
 
         case MMUEXT_PIN_L4_TABLE:
-            type = PGT_l4_page_table;
+            type = PGT_l4_page_table | PGT_va_mutable;
             goto pin_page;
 
         case MMUEXT_UNPIN_TABLE:
@@ -1770,7 +1833,7 @@
         case MMUEXT_FLUSH_CACHE:
             if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
             {
-                MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n");
+                MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
                 okay = 0;
             }
             else
@@ -1784,7 +1847,7 @@
             if ( shadow_mode_external(d) )
             {
                 MEM_LOG("ignoring SET_LDT hypercall from external "
-                        "domain %u\n", d->domain_id);
+                        "domain %u", d->domain_id);
                 okay = 0;
                 break;
             }
@@ -1855,7 +1918,7 @@
                  unlikely(IS_XEN_HEAP_FRAME(page)) )
             {
                 MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
-                        "page is in Xen heap (%lx), or dom is dying (%ld).\n",
+                        "page is in Xen heap (%lx), or dom is dying (%ld).",
                         e->tot_pages, e->max_pages, op.mfn, e->domain_flags);
                 okay = 0;
                 goto reassign_fail;
@@ -1876,9 +1939,9 @@
                      unlikely(_nd != _d) )
                 {
                     MEM_LOG("Bad page values %lx: ed=%p(%u), sd=%p,"
-                            " caf=%08x, taf=%08x\n", page_to_pfn(page),
-                            d, d->domain_id, unpickle_domptr(_nd), x,
-                            page->u.inuse.type_info);
+                            " caf=%08x, taf=%" PRtype_info,
+                            page_to_pfn(page), d, d->domain_id,
+                            unpickle_domptr(_nd), x, page->u.inuse.type_info);
                     okay = 0;
                     goto reassign_fail;
                 }
@@ -1951,7 +2014,7 @@
     unsigned int cmd, done = 0;
     struct vcpu *v = current;
     struct domain *d = v->domain;
-    u32 type_info;
+    unsigned long type_info;
     struct domain_mmap_cache mapcache, sh_mapcache;
 
     LOCK_BIGLOCK(d);
@@ -2041,7 +2104,8 @@
                     l1e = l1e_from_intpte(req.val);
                     okay = mod_l1_entry(va, l1e);
                     if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l1_normal_pt_update(d, req.ptr, l1e, 
&sh_mapcache);
+                        shadow_l1_normal_pt_update(
+                            d, req.ptr, l1e, &sh_mapcache);
                     put_page_type(page);
                 }
                 break;
@@ -2054,24 +2118,28 @@
 
                     /* FIXME: doesn't work with PAE */
                     l2e = l2e_from_intpte(req.val);
-                    okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn, 
type_info);
+                    okay = mod_l2_entry(
+                        (l2_pgentry_t *)va, l2e, mfn, type_info);
                     if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l2_normal_pt_update(d, req.ptr, l2e, 
&sh_mapcache);
+                        shadow_l2_normal_pt_update(
+                            d, req.ptr, l2e, &sh_mapcache);
                     put_page_type(page);
                 }
                 break;
 #if CONFIG_PAGING_LEVELS >= 3
             case PGT_l3_page_table:
                 ASSERT( !shadow_mode_refcounts(d) );
-                if ( likely(get_page_type(page, PGT_l3_page_table)) )
+                if ( likely(get_page_type(
+                    page, type_info & (PGT_type_mask|PGT_va_mask))) )
                 {
                     l3_pgentry_t l3e;
 
                     /* FIXME: doesn't work with PAE */
                     l3e = l3e_from_intpte(req.val);
-                    okay = mod_l3_entry(va, l3e, mfn);
+                    okay = mod_l3_entry(va, l3e, mfn, type_info);
                     if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l3_normal_pt_update(d, req.ptr, l3e, 
&sh_mapcache);
+                        shadow_l3_normal_pt_update(
+                            d, req.ptr, l3e, &sh_mapcache);
                     put_page_type(page);
                 }
                 break;
@@ -2079,14 +2147,16 @@
 #if CONFIG_PAGING_LEVELS >= 4
             case PGT_l4_page_table:
                 ASSERT( !shadow_mode_refcounts(d) );
-                if ( likely(get_page_type(page, PGT_l4_page_table)) )
+                if ( likely(get_page_type(
+                    page, type_info & (PGT_type_mask|PGT_va_mask))) )
                 {
                     l4_pgentry_t l4e;
 
                     l4e = l4e_from_intpte(req.val);
-                    okay = mod_l4_entry(va, l4e, mfn);
+                    okay = mod_l4_entry(va, l4e, mfn, type_info);
                     if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l4_normal_pt_update(d, req.ptr, l4e, 
&sh_mapcache);
+                        shadow_l4_normal_pt_update(
+                            d, req.ptr, l4e, &sh_mapcache);
                     put_page_type(page);
                 }
                 break;
@@ -2108,7 +2178,7 @@
                         }
                     }
 
-                    *(unsigned long *)va = req.val;
+                    *(intpte_t *)va = req.val;
                     okay = 1;
 
                     if ( shadow_mode_enabled(d) )
@@ -2133,7 +2203,8 @@
             if ( unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d)) )
             {
                 shadow_lock(FOREIGNDOM);
-                printk("privileged guest dom%d requests pfn=%lx to map mfn=%lx 
for dom%d\n",
+                printk("privileged guest dom%d requests pfn=%lx to "
+                       "map mfn=%lx for dom%d\n",
                        d->domain_id, gpfn, mfn, FOREIGNDOM->domain_id);
                 set_machinetophys(mfn, gpfn);
                 set_p2m_entry(FOREIGNDOM, gpfn, mfn, &sh_mapcache, &mapcache);
@@ -2199,60 +2270,213 @@
     return rc;
 }
 
-/* This function assumes the caller is holding the domain's BIGLOCK
- * and is running in a shadow mode
- */
-int update_grant_va_mapping(unsigned long va,
-                            l1_pgentry_t _nl1e, 
-                            struct domain *d,
-                            struct vcpu *v)
-{
-    /* Caller must:
-     * . own d's BIGLOCK 
-     * . already have 'get_page' correctly on the to-be-installed nl1e
-     * . be responsible for flushing the TLB
-     * . check PTE being installed isn't DISALLOWED
+
+int update_grant_pte_mapping(
+    unsigned long pte_addr, l1_pgentry_t _nl1e, 
+    struct domain *d, struct vcpu *v)
+{
+    int rc = GNTST_okay;
+    void *va;
+    unsigned long gpfn, mfn;
+    struct pfn_info *page;
+    u32 type_info;
+    l1_pgentry_t ol1e;
+
+    ASSERT(spin_is_locked(&d->big_lock));
+    ASSERT(!shadow_mode_refcounts(d));
+    ASSERT((l1e_get_flags(_nl1e) & L1_DISALLOW_MASK) == 0);
+
+    gpfn = pte_addr >> PAGE_SHIFT;
+    mfn = __gpfn_to_mfn(d, gpfn);
+
+    if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
+    {
+        MEM_LOG("Could not get page for normal update");
+        return GNTST_general_error;
+    }
+    
+    va = map_domain_page(mfn);
+    va = (void *)((unsigned long)va + (pte_addr & ~PAGE_MASK));
+    page = pfn_to_page(mfn);
+
+    type_info = page->u.inuse.type_info;
+    if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
+         !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
+    {
+        MEM_LOG("Grant map attempted to update a non-L1 page");
+        rc = GNTST_general_error;
+        goto failed;
+    }
+
+    if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) ||
+         !update_l1e(va, ol1e, _nl1e) )
+    {
+        put_page_type(page);
+        rc = GNTST_general_error;
+        goto failed;
+    } 
+
+    put_page_from_l1e(ol1e, d);
+
+    rc = (l1e_get_flags(ol1e) & _PAGE_PRESENT) ? GNTST_flush_all : GNTST_okay;
+
+    if ( unlikely(shadow_mode_enabled(d)) )
+    {
+        struct domain_mmap_cache sh_mapcache;
+        domain_mmap_cache_init(&sh_mapcache);
+        shadow_l1_normal_pt_update(d, pte_addr, _nl1e, &sh_mapcache);
+        domain_mmap_cache_destroy(&sh_mapcache);
+    }
+
+    put_page_type(page);
+ 
+ failed:
+    unmap_domain_page(va);
+    put_page(page);
+    return rc;
+}
+
+int clear_grant_pte_mapping(
+    unsigned long addr, unsigned long frame, struct domain *d)
+{
+    int rc = GNTST_okay;
+    void *va;
+    unsigned long gpfn, mfn;
+    struct pfn_info *page;
+    u32 type_info;
+    l1_pgentry_t ol1e;
+
+    ASSERT(!shadow_mode_refcounts(d));
+
+    gpfn = addr >> PAGE_SHIFT;
+    mfn = __gpfn_to_mfn(d, gpfn);
+
+    if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
+    {
+        MEM_LOG("Could not get page for normal update");
+        return GNTST_general_error;
+    }
+    
+    va = map_domain_page(mfn);
+    va = (void *)((unsigned long)va + (addr & ~PAGE_MASK));
+    page = pfn_to_page(mfn);
+
+    type_info = page->u.inuse.type_info;
+    if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
+         !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
+    {
+        MEM_LOG("Grant map attempted to update a non-L1 page");
+        rc = GNTST_general_error;
+        goto failed;
+    }
+
+    if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
+    {
+        put_page_type(page);
+        rc = GNTST_general_error;
+        goto failed;
+    }
+    
+    /* Check that the virtual address supplied is actually mapped to frame. */
+    if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
+    {
+        MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
+                (unsigned long)l1e_get_intpte(ol1e), addr, frame);
+        put_page_type(page);
+        rc = GNTST_general_error;
+        goto failed;
+    }
+
+    /* Delete pagetable entry. */
+    if ( unlikely(__put_user(0, (intpte_t *)va)))
+    {
+        MEM_LOG("Cannot delete PTE entry at %p", va);
+        put_page_type(page);
+        rc = GNTST_general_error;
+        goto failed;
+    }
+
+    if ( unlikely(shadow_mode_enabled(d)) )
+    {
+        struct domain_mmap_cache sh_mapcache;
+        domain_mmap_cache_init(&sh_mapcache);
+        shadow_l1_normal_pt_update(d, addr, l1e_empty(), &sh_mapcache);
+        domain_mmap_cache_destroy(&sh_mapcache);
+    }
+
+    put_page_type(page);
+
+ failed:
+    unmap_domain_page(va);
+    put_page(page);
+    return rc;
+}
+
+
+int update_grant_va_mapping(
+    unsigned long va, l1_pgentry_t _nl1e, struct domain *d, struct vcpu *v)
+{
+    int rc = GNTST_okay;
+    l1_pgentry_t *pl1e, ol1e;
+    
+    ASSERT(spin_is_locked(&d->big_lock));
+    ASSERT(!shadow_mode_refcounts(d));
+    ASSERT((l1e_get_flags(_nl1e) & L1_DISALLOW_MASK) == 0);
+
+    /*
+     * This is actually overkill - we don't need to sync the L1 itself,
+     * just everything involved in getting to this L1 (i.e. we need
+     * linear_pg_table[l1_linear_offset(va)] to be in sync)...
      */
-
-    int             rc = 0;
-    l1_pgentry_t   *pl1e;
-    l1_pgentry_t    ol1e;
-    
-    cleanup_writable_pagetable(d);
-
-    // This is actually overkill - we don't need to sync the L1 itself,
-    // just everything involved in getting to this L1 (i.e. we need
-    // linear_pg_table[l1_linear_offset(va)] to be in sync)...
-    //
     __shadow_sync_va(v, va);
 
     pl1e = &linear_pg_table[l1_linear_offset(va)];
 
-    if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
-        rc = -EINVAL;
-    else if ( !shadow_mode_refcounts(d) )
-    {
-        if ( update_l1e(pl1e, ol1e, _nl1e) )
-        {
-            put_page_from_l1e(ol1e, d);
-            if ( l1e_get_flags(ol1e) & _PAGE_PRESENT )
-                rc = 0; /* Caller needs to invalidate TLB entry */
-            else
-                rc = 1; /* Caller need not invalidate TLB entry */
-        }
-        else
-            rc = -EINVAL;
-    }
-    else
-    {
-        printk("grant tables and shadow mode currently don't work together\n");
-        BUG();
-    }
+    if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ||
+         !update_l1e(pl1e, ol1e, _nl1e) )
+        return GNTST_general_error;
+
+    put_page_from_l1e(ol1e, d);
+
+    rc = (l1e_get_flags(ol1e) & _PAGE_PRESENT) ? GNTST_flush_one : GNTST_okay;
 
     if ( unlikely(shadow_mode_enabled(d)) )
         shadow_do_update_va_mapping(va, _nl1e, v);
 
     return rc;
+}
+
+int clear_grant_va_mapping(unsigned long addr, unsigned long frame)
+{
+    l1_pgentry_t *pl1e, ol1e;
+    
+    pl1e = &linear_pg_table[l1_linear_offset(addr)];
+
+    if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) )
+    {
+        MEM_LOG("Could not find PTE entry for address %lx", addr);
+        return GNTST_general_error;
+    }
+
+    /*
+     * Check that the virtual address supplied is actually mapped to
+     * frame.
+     */
+    if ( unlikely(l1e_get_pfn(ol1e) != frame) )
+    {
+        MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
+                l1e_get_pfn(ol1e), addr, frame);
+        return GNTST_general_error;
+    }
+
+    /* Delete pagetable entry. */
+    if ( unlikely(__put_user(0, &pl1e->l1)) )
+    {
+        MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
+        return GNTST_general_error;
+    }
+    
+    return 0;
 }
 
 
@@ -2289,10 +2513,11 @@
                       (shadow_mode_translate(d) ||
                        shadow_mode_translate(percpu_info[cpu].foreign))) )
         {
-            // The foreign domain's pfn's are in a different namespace.
-            // There's not enough information in just a gpte to figure out
-            // how to (re-)shadow this entry.
-            //
+            /*
+             * The foreign domain's pfn's are in a different namespace. There's
+             * not enough information in just a gpte to figure out how to
+             * (re-)shadow this entry.
+             */
             domain_crash();
         }
     
@@ -2409,14 +2634,16 @@
 
     if ( entries > FIRST_RESERVED_GDT_ENTRY )
         return -EINVAL;
-    
+
     shadow_sync_all(d);
 
     /* Check the pages in the new GDT. */
-    for ( i = 0; i < nr_pages; i++ )
-        if ( ((pfn = frames[i]) >= max_page) ||
-             !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
+    for ( i = 0; i < nr_pages; i++ ) {
+        pfn = frames[i];
+        if ((pfn >= max_page) ||
+            !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
             goto fail;
+    }
 
     /* Tear down the old GDT. */
     destroy_gdt(v);
@@ -2463,22 +2690,24 @@
 }
 
 
-long do_update_descriptor(unsigned long pa, u64 desc)
+long do_update_descriptor(u64 pa, u64 desc)
 {
     struct domain *dom = current->domain;
     unsigned long gpfn = pa >> PAGE_SHIFT;
     unsigned long mfn;
-    unsigned int  offset = (pa & ~PAGE_MASK) / sizeof(struct desc_struct);
+    unsigned int  offset;
     struct desc_struct *gdt_pent, d;
     struct pfn_info *page;
     long ret = -EINVAL;
 
+    offset = ((unsigned int)pa & ~PAGE_MASK) / sizeof(struct desc_struct);
+
     *(u64 *)&d = desc;
 
     LOCK_BIGLOCK(dom);
 
     if ( !VALID_MFN(mfn = __gpfn_to_mfn(dom, gpfn)) ||
-         ((pa % sizeof(struct desc_struct)) != 0) ||
+         (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
          (mfn >= max_page) ||
          !check_descriptor(&d) )
     {
@@ -2547,7 +2776,7 @@
  * Writable Pagetables
  */
 
-#ifdef VERBOSE
+#ifdef VVERBOSE
 int ptwr_debug = 0x0;
 #define PTWR_PRINTK(_f, _a...) \
  do { if ( unlikely(ptwr_debug) ) printk( _f , ## _a ); } while ( 0 )
@@ -2556,18 +2785,128 @@
 #define PTWR_PRINTK(_f, _a...) ((void)0)
 #endif
 
+
+#ifdef PERF_ARRAYS
+
+/**************** writeable pagetables profiling functions *****************/
+
+#define ptwr_eip_buckets        256
+
+int ptwr_eip_stat_threshold[] = {1, 10, 50, 100, L1_PAGETABLE_ENTRIES};
+
+#define ptwr_eip_stat_thresholdN (sizeof(ptwr_eip_stat_threshold)/sizeof(int))
+
+struct {
+    unsigned long eip;
+    domid_t       id;
+    u32           val[ptwr_eip_stat_thresholdN];
+} typedef ptwr_eip_stat_t;
+
+ptwr_eip_stat_t ptwr_eip_stats[ptwr_eip_buckets];
+
+static inline unsigned int ptwr_eip_stat_hash( unsigned long eip, domid_t id )
+{
+    return (((unsigned long) id) ^ eip ^ (eip>>8) ^ (eip>>16) ^ (eip>24)) % 
+        ptwr_eip_buckets;
+}
+
+static void ptwr_eip_stat_inc(u32 *n)
+{
+    int i, j;
+
+    if ( ++(*n) != 0 )
+        return;
+
+    *n = ~0;
+
+    /* Re-scale all buckets. */
+    for ( i = 0; i <ptwr_eip_buckets; i++ )
+        for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
+            ptwr_eip_stats[i].val[j] >>= 1;
+}
+
+static void ptwr_eip_stat_update(unsigned long eip, domid_t id, int modified)
+{
+    int i, j, b;
+
+    i = b = ptwr_eip_stat_hash(eip, id);
+
+    do
+    {
+        if ( !ptwr_eip_stats[i].eip )
+        {
+            /* doesn't exist */
+            ptwr_eip_stats[i].eip = eip;
+            ptwr_eip_stats[i].id = id;
+            memset(ptwr_eip_stats[i].val,0, sizeof(ptwr_eip_stats[i].val));
+        }
+
+        if ( ptwr_eip_stats[i].eip == eip )
+        {
+            for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
+                if ( modified <= ptwr_eip_stat_threshold[j] )
+                    break;
+            BUG_ON(j >= ptwr_eip_stat_thresholdN);
+            ptwr_eip_stat_inc(&ptwr_eip_stats[i].val[j]);
+            return;
+        }
+
+        i = (i+1) % ptwr_eip_buckets;
+    }
+    while ( i != b );
+   
+    printk("ptwr_eip_stat: too many EIPs in use!\n");
+    
+    ptwr_eip_stat_print();
+    ptwr_eip_stat_reset();
+}
+
+void ptwr_eip_stat_reset(void)
+{
+    memset(ptwr_eip_stats, 0, sizeof(ptwr_eip_stats));
+}
+
+void ptwr_eip_stat_print(void)
+{
+    struct domain *e;
+    domid_t d;
+    int i, j;
+
+    for_each_domain( e )
+    {
+        d = e->domain_id;
+
+        for ( i = 0; i < ptwr_eip_buckets; i++ )
+        {
+            if ( ptwr_eip_stats[i].eip && ptwr_eip_stats[i].id != d )
+                continue;
+
+            printk("D %d  eip %08lx ",
+                   ptwr_eip_stats[i].id, ptwr_eip_stats[i].eip);
+
+            for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
+                printk("<=%u %4u \t",
+                       ptwr_eip_stat_threshold[j],
+                       ptwr_eip_stats[i].val[j]);
+            printk("\n");
+        }
+    }
+}
+
+#else /* PERF_ARRAYS */
+
+#define ptwr_eip_stat_update(eip, id, modified) ((void)0)
+
+#endif
+
+/*******************************************************************/
+
 /* Re-validate a given p.t. page, given its prior snapshot */
-int revalidate_l1(struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t 
*snapshot)
+int revalidate_l1(
+    struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot)
 {
     l1_pgentry_t ol1e, nl1e;
     int modified = 0, i;
-
-#if 0
-    if ( d->domain_id )
-        printk("%s: l1page mfn=%lx snapshot mfn=%lx\n", __func__,
-               l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned 
long)l1page)]),
-               l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned 
long)snapshot)]));
-#endif
 
     for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
     {
@@ -2593,7 +2932,7 @@
 
         if ( unlikely(!get_page_from_l1e(nl1e, d)) )
         {
-            MEM_LOG("ptwr: Could not re-validate l1 page\n");
+            MEM_LOG("ptwr: Could not re-validate l1 page");
             /*
              * Make the remaining p.t's consistent before crashing, so the
              * reference counts are correct.
@@ -2614,24 +2953,34 @@
 /* Flush the given writable p.t. page and write-protect it again. */
 void ptwr_flush(struct domain *d, const int which)
 {
-    unsigned long  pte, *ptep, l1va;
-    l1_pgentry_t  *pl1e;
+    unsigned long l1va;
+    l1_pgentry_t  *pl1e, pte, *ptep;
     l2_pgentry_t  *pl2e;
     unsigned int   modified;
 
+#ifdef CONFIG_X86_64
+    struct vcpu *v = current;
+    extern void toggle_guest_mode(struct vcpu *);
+    int user_mode = !(v->arch.flags & TF_kernel_mode);
+#endif
+
     ASSERT(!shadow_mode_enabled(d));
 
     if ( unlikely(d->arch.ptwr[which].vcpu != current) )
-        write_ptbase(d->arch.ptwr[which].vcpu);
+        /* Don't use write_ptbase: it may switch to guest_user on x86/64! */
+        write_cr3(pagetable_get_paddr(
+            d->arch.ptwr[which].vcpu->arch.guest_table));
+    else
+        TOGGLE_MODE();
 
     l1va = d->arch.ptwr[which].l1va;
-    ptep = (unsigned long *)&linear_pg_table[l1_linear_offset(l1va)];
+    ptep = (l1_pgentry_t *)&linear_pg_table[l1_linear_offset(l1va)];
 
     /*
      * STEP 1. Write-protect the p.t. page so no more updates can occur.
      */
 
-    if ( unlikely(__get_user(pte, ptep)) )
+    if ( unlikely(__get_user(pte.l1, &ptep->l1)) )
     {
         MEM_LOG("ptwr: Could not read pte at %p", ptep);
         /*
@@ -2640,9 +2989,9 @@
          */
         BUG();
     }
-    PTWR_PRINTK("[%c] disconnected_l1va at %p is %lx\n",
-                PTWR_PRINT_WHICH, ptep, pte);
-    pte &= ~_PAGE_RW;
+    PTWR_PRINTK("[%c] disconnected_l1va at %p is %"PRIpte"\n",
+                PTWR_PRINT_WHICH, ptep, pte.l1);
+    l1e_remove_flags(pte, _PAGE_RW);
 
     /* Write-protect the p.t. page in the guest page table. */
     if ( unlikely(__put_user(pte, ptep)) )
@@ -2658,8 +3007,8 @@
     /* Ensure that there are no stale writable mappings in any TLB. */
     /* NB. INVLPG is a serialising instruction: flushes pending updates. */
     flush_tlb_one_mask(d->cpumask, l1va);
-    PTWR_PRINTK("[%c] disconnected_l1va at %p now %lx\n",
-                PTWR_PRINT_WHICH, ptep, pte);
+    PTWR_PRINTK("[%c] disconnected_l1va at %p now %"PRIpte"\n",
+                PTWR_PRINT_WHICH, ptep, pte.l1);
 
     /*
      * STEP 2. Validate any modified PTEs.
@@ -2669,6 +3018,7 @@
     modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
     unmap_domain_page(pl1e);
     perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
+    ptwr_eip_stat_update(  d->arch.ptwr[which].eip, d->domain_id, modified);
     d->arch.ptwr[which].prev_nr_updates  = modified;
 
     /*
@@ -2689,6 +3039,8 @@
 
     if ( unlikely(d->arch.ptwr[which].vcpu != current) )
         write_ptbase(current);
+    else 
+        TOGGLE_MODE();
 }
 
 static int ptwr_emulated_update(
@@ -2706,13 +3058,13 @@
     /* Aligned access only, thank you. */
     if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) )
     {
-        MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)\n",
+        MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)",
                 bytes, addr);
         return X86EMUL_UNHANDLEABLE;
     }
 
     /* Turn a sub-word access into a full-word access. */
-    if (bytes != sizeof(physaddr_t))
+    if ( bytes != sizeof(physaddr_t) )
     {
         int           rc;
         physaddr_t    full;
@@ -2721,7 +3073,7 @@
         /* Align address; read full word. */
         addr &= ~(sizeof(physaddr_t)-1);
         if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
-                                       sizeof(physaddr_t))) )
+                                        sizeof(physaddr_t))) )
             return rc; 
         /* Mask out bits provided by caller. */
         full &= ~((((physaddr_t)1 << (bytes*8)) - 1) << (offset*8));
@@ -2729,13 +3081,17 @@
         val  &= (((physaddr_t)1 << (bytes*8)) - 1);
         val <<= (offset)*8;
         val  |= full;
+        /* Also fill in missing parts of the cmpxchg old value. */
+        old  &= (((physaddr_t)1 << (bytes*8)) - 1);
+        old <<= (offset)*8;
+        old  |= full;
     }
 
     /* Read the PTE that maps the page being updated. */
     if (__copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
                          sizeof(pte)))
     {
-        MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table\n");
+        MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table");
         return X86EMUL_UNHANDLEABLE;
     }
 
@@ -2747,7 +3103,8 @@
          ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
          (page_get_owner(page) != d) )
     {
-        MEM_LOG("ptwr_emulate: Page is mistyped or bad pte (%lx, %08x)\n",
+        MEM_LOG("ptwr_emulate: Page is mistyped or bad pte "
+                "(%lx, %" PRtype_info ")",
                 l1e_get_pfn(pte), page->u.inuse.type_info);
         return X86EMUL_UNHANDLEABLE;
     }
@@ -2763,7 +3120,7 @@
     if ( do_cmpxchg )
     {
         ol1e = l1e_from_intpte(old);
-        if ( cmpxchg((unsigned long *)pl1e, old, val) != old )
+        if ( cmpxchg((intpte_t *)pl1e, old, val) != old )
         {
             unmap_domain_page(pl1e);
             put_page_from_l1e(nl1e, d);
@@ -2821,14 +3178,15 @@
 };
 
 /* Write page fault handler: check if guest is trying to modify a PTE. */
-int ptwr_do_page_fault(struct domain *d, unsigned long addr)
+int ptwr_do_page_fault(struct domain *d, unsigned long addr, 
+                       struct cpu_user_regs *regs)
 {
     unsigned long    pfn;
     struct pfn_info *page;
     l1_pgentry_t     pte;
-    l2_pgentry_t    *pl2e;
+    l2_pgentry_t    *pl2e, l2e;
     int              which;
-    u32              l2_idx;
+    unsigned long    l2_idx;
 
     if ( unlikely(shadow_mode_enabled(d)) )
         return 0;
@@ -2837,7 +3195,7 @@
      * Attempt to read the PTE that maps the VA being accessed. By checking for
      * PDE validity in the L2 we avoid many expensive fixups in __get_user().
      */
-    if ( !(l2e_get_flags(__linear_l2_table[addr>>L2_PAGETABLE_SHIFT]) &
+    if ( !(l2e_get_flags(__linear_l2_table[l2_linear_offset(addr)]) &
            _PAGE_PRESENT) ||
          __copy_from_user(&pte,&linear_pg_table[l1_linear_offset(addr)],
                           sizeof(pte)) )
@@ -2857,9 +3215,8 @@
         return 0;
     }
 
-    /* x86/64: Writable pagetable code needs auditing. Use emulator for now. */
-#if defined(__x86_64__)
-    goto emulate;
+#if 0 /* Leave this in as useful for debugging */ 
+    goto emulate; 
 #endif
 
     /* Get the L2 index at which this L1 p.t. is always mapped. */
@@ -2868,7 +3225,7 @@
         goto emulate; /* Urk! This L1 is mapped in multiple L2 slots! */
     l2_idx >>= PGT_va_shift;
 
-    if ( unlikely(l2_idx == (addr >> L2_PAGETABLE_SHIFT)) )
+    if ( unlikely(l2_idx == l2_linear_offset(addr)) )
         goto emulate; /* Urk! Pagetable maps itself! */
 
     /*
@@ -2877,7 +3234,8 @@
      */
     pl2e = &__linear_l2_table[l2_idx];
     which = PTWR_PT_INACTIVE;
-    if ( (l2e_get_pfn(*pl2e)) == pfn )
+
+    if ( (__get_user(l2e.l2, &pl2e->l2) == 0) && (l2e_get_pfn(l2e) == pfn) )
     {
         /*
          * Check the PRESENT bit to set ACTIVE mode.
@@ -2885,7 +3243,7 @@
          * ACTIVE p.t. (it may be the same p.t. mapped at another virt addr).
          * The ptwr_flush call below will restore the PRESENT bit.
          */
-        if ( likely(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
+        if ( likely(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
              (d->arch.ptwr[PTWR_PT_ACTIVE].l1va &&
               (l2_idx == d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx)) )
             which = PTWR_PT_ACTIVE;
@@ -2905,7 +3263,7 @@
         goto emulate;
     }
 
-    PTWR_PRINTK("[%c] page_fault on l1 pt at va %lx, pt for %08x, "
+    PTWR_PRINTK("[%c] page_fault on l1 pt at va %lx, pt for %08lx, "
                 "pfn %lx\n", PTWR_PRINT_WHICH,
                 addr, l2_idx << L2_PAGETABLE_SHIFT, pfn);
     
@@ -2930,7 +3288,11 @@
     d->arch.ptwr[which].l1va   = addr | 1;
     d->arch.ptwr[which].l2_idx = l2_idx;
     d->arch.ptwr[which].vcpu   = current;
-    
+
+#ifdef PERF_ARRAYS
+    d->arch.ptwr[which].eip    = regs->eip;
+#endif
+
     /* For safety, disconnect the L1 p.t. page from current space. */
     if ( which == PTWR_PT_ACTIVE )
     {
@@ -2946,11 +3308,11 @@
     
     /* Finally, make the p.t. page writable by the guest OS. */
     l1e_add_flags(pte, _PAGE_RW);
-    if ( unlikely(__copy_to_user(&linear_pg_table[addr>>PAGE_SHIFT],
-                                 &pte, sizeof(pte))) )
+    if ( unlikely(__put_user(pte.l1,
+                             &linear_pg_table[l1_linear_offset(addr)].l1)) )
     {
         MEM_LOG("ptwr: Could not update pte at %p", (unsigned long *)
-                &linear_pg_table[addr>>PAGE_SHIFT]);
+                &linear_pg_table[l1_linear_offset(addr)]);
         /* Toss the writable pagetable state and crash. */
         unmap_domain_page(d->arch.ptwr[which].pl1e);
         d->arch.ptwr[which].l1va = 0;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/physdev.c    Thu Aug 25 22:53:20 2005
@@ -106,7 +106,7 @@
              (op.u.set_iobitmap.nr_ports > 65536) )
             break;
         ret = 0;
-        current->arch.iobmp       = (u8 *)op.u.set_iobitmap.bitmap;
+        current->arch.iobmp       = op.u.set_iobitmap.bitmap;
         current->arch.iobmp_limit = op.u.set_iobitmap.nr_ports;
         break;
     default:
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/setup.c      Thu Aug 25 22:53:20 2005
@@ -244,15 +244,17 @@
 
 #define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
 
+static struct e820entry e820_raw[E820MAX];
+
 void __init __start_xen(multiboot_info_t *mbi)
 {
     char *cmdline;
     module_t *mod = (module_t *)__va(mbi->mods_addr);
-    unsigned long firsthole_start, nr_pages;
+    unsigned long nr_pages, modules_length;
     unsigned long initial_images_start, initial_images_end;
     unsigned long _initrd_start = 0, _initrd_len = 0;
     unsigned int initrdidx = 1;
-    struct e820entry e820_raw[E820MAX];
+    physaddr_t s, e;
     int i, e820_raw_nr = 0, bytes = 0;
     struct ns16550_defaults ns16550 = {
         .data_bits = 8,
@@ -330,22 +332,30 @@
 
     max_page = init_e820(e820_raw, &e820_raw_nr);
 
-    /* Find the first high-memory RAM hole. */
-    for ( i = 0; i < e820.nr_map; i++ )
+    modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
+
+    /* Find a large enough RAM extent to stash the DOM0 modules. */
+    for ( i = 0; ; i++ )
+    {
+        if ( i == e820.nr_map )
+        {
+            printk("Not enough memory to stash the DOM0 kernel image.\n");
+            for ( ; ; ) ;
+        }
+        
         if ( (e820.map[i].type == E820_RAM) &&
-             (e820.map[i].addr >= 0x100000) )
+             (e820.map[i].size >= modules_length) &&
+             ((e820.map[i].addr + e820.map[i].size) >=
+              (xenheap_phys_end + modules_length)) )
             break;
-    firsthole_start = e820.map[i].addr + e820.map[i].size;
-
-    /* Relocate the Multiboot modules. */
-    initial_images_start = xenheap_phys_end;
-    initial_images_end   = initial_images_start + 
-        (mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
-    if ( initial_images_end > firsthole_start )
-    {
-        printk("Not enough memory to stash the DOM0 kernel image.\n");
-        for ( ; ; ) ;
-    }
+    }
+
+    /* Stash as near as possible to the beginning of the RAM extent. */
+    initial_images_start = e820.map[i].addr;
+    if ( initial_images_start < xenheap_phys_end )
+        initial_images_start = xenheap_phys_end;
+    initial_images_end = initial_images_start + modules_length;
+
 #if defined(CONFIG_X86_32)
     memmove((void *)initial_images_start,  /* use low mapping */
             (void *)mod[0].mod_start,      /* use low mapping */
@@ -358,16 +368,23 @@
 
     /* Initialise boot-time allocator with all RAM situated after modules. */
     xenheap_phys_start = init_boot_allocator(__pa(&_end));
-    nr_pages   = 0;
+    nr_pages = 0;
     for ( i = 0; i < e820.nr_map; i++ )
     {
         if ( e820.map[i].type != E820_RAM )
             continue;
+
         nr_pages += e820.map[i].size >> PAGE_SHIFT;
-        if ( (e820.map[i].addr + e820.map[i].size) >= initial_images_end )
-            init_boot_pages((e820.map[i].addr < initial_images_end) ?
-                            initial_images_end : e820.map[i].addr,
-                            e820.map[i].addr + e820.map[i].size);
+
+        /* Initialise boot heap, skipping Xen heap and dom0 modules. */
+        s = e820.map[i].addr;
+        e = s + e820.map[i].size;
+        if ( s < xenheap_phys_end )
+            s = xenheap_phys_end;
+        if ( (s < initial_images_end) && (e > initial_images_start) )
+            s = initial_images_end;
+        init_boot_pages(s, e);
+
 #if defined (CONFIG_X86_64)
         /*
          * x86/64 maps all registered RAM. Points to note:
@@ -404,10 +421,30 @@
 
     end_boot_allocator();
 
-    init_xenheap_pages(xenheap_phys_start, xenheap_phys_end);
-    printk("Xen heap: %luMB (%lukB)\n",
-           (xenheap_phys_end-xenheap_phys_start) >> 20,
-           (xenheap_phys_end-xenheap_phys_start) >> 10);
+    /* Initialise the Xen heap, skipping RAM holes. */
+    nr_pages = 0;
+    for ( i = 0; i < e820.nr_map; i++ )
+    {
+        if ( e820.map[i].type != E820_RAM )
+            continue;
+
+        s = e820.map[i].addr;
+        e = s + e820.map[i].size;
+        if ( s < xenheap_phys_start )
+            s = xenheap_phys_start;
+        if ( e > xenheap_phys_end )
+            e = xenheap_phys_end;
+ 
+        if ( s < e )
+        {
+            nr_pages += (e - s) >> PAGE_SHIFT;
+            init_xenheap_pages(s, e);
+        }
+    }
+
+    printk("Xen heap: %luMB (%lukB)\n", 
+           nr_pages >> (20 - PAGE_SHIFT),
+           nr_pages << (PAGE_SHIFT - 10));
 
     early_boot = 0;
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/shadow.c     Thu Aug 25 22:53:20 2005
@@ -1578,7 +1578,7 @@
 
     if ( unlikely(!VALID_MFN(gmfn)) )
     {
-        SH_LOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
+        SH_VLOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
         *spte_p = l1e_empty();
         return 0;
     }
@@ -1612,7 +1612,7 @@
 
     if ( unlikely(!VALID_MFN(mfn)) )
     {
-        SH_LOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
+        SH_VLOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
         *spte_p = l1e_empty();
         return 0;
     }
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/shadow32.c   Thu Aug 25 22:53:20 2005
@@ -418,7 +418,7 @@
         break;
 
     default:
-        printk("Free shadow weird page type mfn=%lx type=%08x\n",
+        printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
                page_to_pfn(page), page->u.inuse.type_info);
         break;
     }
@@ -665,7 +665,7 @@
 
     shadow_audit(d, 0);
 
-    SH_LOG("Free shadow table.");
+    SH_VLOG("Free shadow table.");
 }
 
 void shadow_mode_init(void)
@@ -1137,7 +1137,7 @@
     d->arch.shadow_ht_free = NULL;
 
     ASSERT(d->arch.shadow_extras_count == 0);
-    SH_LOG("freed extras, now %d", d->arch.shadow_extras_count);
+    SH_VLOG("freed extras, now %d", d->arch.shadow_extras_count);
 
     if ( d->arch.shadow_dirty_bitmap != NULL )
     {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/shadow_public.c      Thu Aug 25 22:53:20 2005
@@ -571,7 +571,7 @@
         break;
 
     default:
-        printk("Free shadow weird page type mfn=%lx type=%08x\n",
+        printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
                page_to_pfn(page), page->u.inuse.type_info);
         break;
     }
@@ -1638,14 +1638,14 @@
     /* XXX This needs more thought... */
     printk("%s: needing to call __shadow_remove_all_access for mfn=%lx\n",
       __func__, page_to_pfn(page));
-    printk("Before: mfn=%lx c=%08x t=%08x\n", page_to_pfn(page),
+    printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
       page->count_info, page->u.inuse.type_info);
 
     shadow_lock(d);
     __shadow_remove_all_access(d, page_to_pfn(page));
     shadow_unlock(d);
 
-    printk("After:  mfn=%lx c=%08x t=%08x\n", page_to_pfn(page),
+    printk("After:  mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
       page->count_info, page->u.inuse.type_info);
 }
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/smpboot.c    Thu Aug 25 22:53:20 2005
@@ -434,7 +434,6 @@
        unsigned int cpu = cpucount;
 
        extern void percpu_traps_init(void);
-       extern void cpu_init(void);
 
        set_current(idle_task[cpu]);
        set_processor_id(cpu);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/time.c       Thu Aug 25 22:53:20 2005
@@ -43,7 +43,8 @@
 spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
 int timer_ack = 0;
 unsigned long volatile jiffies;
-static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */
+static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
+static spinlock_t wc_lock = SPIN_LOCK_UNLOCKED;
 
 struct time_scale {
     int shift;
@@ -67,13 +68,6 @@
 static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED;
 static u64 (*read_platform_count)(void);
 
-static inline u32 down_shift(u64 time, int shift)
-{
-    if ( shift < 0 )
-        return (u32)(time >> -shift);
-    return (u32)((u32)time << shift);
-}
-
 /*
  * 32-bit division of integer dividend and integer divisor yielding
  * 32-bit fractional quotient.
@@ -83,7 +77,7 @@
     u32 quotient, remainder;
     ASSERT(dividend < divisor);
     __asm__ ( 
-        "div %4"
+        "divl %4"
         : "=a" (quotient), "=d" (remainder)
         : "0" (0), "1" (dividend), "r" (divisor) );
     return quotient;
@@ -101,6 +95,42 @@
         : "=a" (product_frac), "=d" (product_int)
         : "0" (multiplicand), "r" (multiplier) );
     return product_int;
+}
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, struct time_scale *scale)
+{
+    u64 product;
+#ifdef CONFIG_X86_32
+    u32 tmp1, tmp2;
+#endif
+
+    if ( scale->shift < 0 )
+        delta >>= -scale->shift;
+    else
+        delta <<= scale->shift;
+
+#ifdef CONFIG_X86_32
+    __asm__ (
+        "mul  %5       ; "
+        "mov  %4,%%eax ; "
+        "mov  %%edx,%4 ; "
+        "mul  %5       ; "
+        "add  %4,%%eax ; "
+        "xor  %5,%5    ; "
+        "adc  %5,%%edx ; "
+        : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+        : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (scale->mul_frac) );
+#else
+    __asm__ (
+        "mul %%rdx ; shrd $32,%%rdx,%%rax"
+        : "=a" (product) : "0" (delta), "d" ((u64)scale->mul_frac) );
+#endif
+
+    return product;
 }
 
 void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
@@ -486,11 +516,9 @@
 
 static s_time_t __read_platform_stime(u64 platform_time)
 {
-    u64 diff64 = platform_time - platform_timer_stamp;
-    u32 diff   = down_shift(diff64, platform_timer_scale.shift);
+    u64 diff = platform_time - platform_timer_stamp;
     ASSERT(spin_is_locked(&platform_timer_lock));
-    return (stime_platform_stamp + 
-            (u64)mul_frac(diff, platform_timer_scale.mul_frac));
+    return (stime_platform_stamp + scale_delta(diff, &platform_timer_scale));
 }
 
 static s_time_t read_platform_stime(void)
@@ -619,15 +647,27 @@
 s_time_t get_s_time(void)
 {
     struct cpu_time *t = &cpu_time[smp_processor_id()];
-    u64 tsc;
-    u32 delta;
+    u64 tsc, delta;
     s_time_t now;
 
     rdtscll(tsc);
-    delta = down_shift(tsc - t->local_tsc_stamp, t->tsc_scale.shift);
-    now = t->stime_local_stamp + (u64)mul_frac(delta, t->tsc_scale.mul_frac);
+    delta = tsc - t->local_tsc_stamp;
+    now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
 
     return now;
+}
+
+static inline void version_update_begin(u32 *version)
+{
+    /* Explicitly OR with 1 just in case version number gets out of sync. */
+    *version = (*version + 1) | 1;
+    wmb();
+}
+
+static inline void version_update_end(u32 *version)
+{
+    wmb();
+    (*version)++;
 }
 
 static inline void __update_dom_time(struct vcpu *v)
@@ -635,20 +675,14 @@
     struct cpu_time       *t = &cpu_time[smp_processor_id()];
     struct vcpu_time_info *u = &v->domain->shared_info->vcpu_time[v->vcpu_id];
 
-    u->time_version1++;
-    wmb();
+    version_update_begin(&u->version);
 
     u->tsc_timestamp     = t->local_tsc_stamp;
     u->system_time       = t->stime_local_stamp;
     u->tsc_to_system_mul = t->tsc_scale.mul_frac;
     u->tsc_shift         = (s8)t->tsc_scale.shift;
 
-    wmb();
-    u->time_version2++;
-
-    /* Should only do this during do_settime(). */
-    v->domain->shared_info->wc_sec  = wc_sec;
-    v->domain->shared_info->wc_usec = wc_usec;
+    version_update_end(&u->version);
 }
 
 void update_dom_time(struct vcpu *v)
@@ -659,21 +693,43 @@
 }
 
 /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
-void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
-{
-    u64 x, base_usecs;
-    u32 y;
-
-    base_usecs = system_time_base;
-    do_div(base_usecs, 1000);
-
-    x = (secs * 1000000ULL) + (u64)usecs + base_usecs;
-    y = do_div(x, 1000000);
-
-    wc_sec  = (unsigned long)x;
-    wc_usec = (unsigned long)y;
-
-    __update_dom_time(current);
+void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
+{
+    u64 x;
+    u32 y, _wc_sec, _wc_nsec;
+    struct domain *d;
+    shared_info_t *s;
+
+    x = (secs * 1000000000ULL) + (u64)nsecs - system_time_base;
+    y = do_div(x, 1000000000);
+
+    wc_sec  = _wc_sec  = (u32)x;
+    wc_nsec = _wc_nsec = (u32)y;
+
+    read_lock(&domlist_lock);
+    spin_lock(&wc_lock);
+
+    for_each_domain ( d )
+    {
+        s = d->shared_info;
+        version_update_begin(&s->wc_version);
+        s->wc_sec  = _wc_sec;
+        s->wc_nsec = _wc_nsec;
+        version_update_end(&s->wc_version);
+    }
+
+    spin_unlock(&wc_lock);
+    read_unlock(&domlist_lock);
+}
+
+void init_domain_time(struct domain *d)
+{
+    spin_lock(&wc_lock);
+    version_update_begin(&d->shared_info->wc_version);
+    d->shared_info->wc_sec  = wc_sec;
+    d->shared_info->wc_nsec = wc_nsec;
+    version_update_end(&d->shared_info->wc_version);
+    spin_unlock(&wc_lock);
 }
 
 static void local_time_calibration(void *unused)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/traps.c      Thu Aug 25 22:53:20 2005
@@ -159,10 +159,8 @@
         addr = *stack++;
         if ( is_kernel_text(addr) )
         {
-            if ( (i != 0) && ((i % 6) == 0) )
-                printk("\n   ");
             printk("[<%p>]", _p(addr));
-            print_symbol(" %s\n", addr);
+            print_symbol(" %s\n   ", addr);
             i++;
         }
     }
@@ -422,7 +420,7 @@
     {
         LOCK_BIGLOCK(d);
         if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
-             unlikely((addr >> L2_PAGETABLE_SHIFT) ==
+             unlikely(l2_linear_offset(addr) ==
                       d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
         {
             ptwr_flush(d, PTWR_PT_ACTIVE);
@@ -430,10 +428,15 @@
             return EXCRET_fault_fixed;
         }
 
-        if ( (addr < HYPERVISOR_VIRT_START) &&
+        if ( ((addr < HYPERVISOR_VIRT_START) 
+#if defined(__x86_64__)
+              || (addr >= HYPERVISOR_VIRT_END)
+#endif        
+            )     
+             &&
              KERNEL_MODE(v, regs) &&
              ((regs->error_code & 3) == 3) && /* write-protection fault */
-             ptwr_do_page_fault(d, addr) )
+             ptwr_do_page_fault(d, addr, regs) )
         {
             UNLOCK_BIGLOCK(d);
             return EXCRET_fault_fixed;
@@ -459,15 +462,13 @@
         goto xen_fault;
 
     propagate_page_fault(addr, regs->error_code);
-    return 0; 
+    return 0;
 
  xen_fault:
 
     if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
     {
         perfc_incrc(copy_user_faults);
-        if ( !shadow_mode_enabled(d) )
-            DPRINTK("Page fault: %p -> %p\n", _p(regs->eip), _p(fixup));
         regs->eip = fixup;
         return 0;
     }
@@ -1155,7 +1156,6 @@
 void __init trap_init(void)
 {
     extern void percpu_traps_init(void);
-    extern void cpu_init(void);
 
     /*
      * Note that interrupt gates are always used, rather than trap gates. We 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/vmx.c        Thu Aug 25 22:53:20 2005
@@ -65,7 +65,7 @@
  * are not modified once set for generic domains, we don't save them, 
  * but simply reset them to the values set at percpu_traps_init().
  */
-void vmx_load_msrs(struct vcpu *p, struct vcpu *n)
+void vmx_load_msrs(struct vcpu *n)
 {
     struct msr_state *host_state;
     host_state = &percpu_msr[smp_processor_id()];
@@ -1712,9 +1712,6 @@
     default:
         __vmx_bug(®s);       /* should not happen */
     }
-
-    vmx_intr_assist(v);
-    return;
 }
 
 asmlinkage void load_cr2(void)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c      Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/vmx_intercept.c      Thu Aug 25 22:53:20 2005
@@ -74,10 +74,10 @@
 
 static void pit_cal_count(struct vmx_virpit_t *vpit)
 {
-    unsigned int usec_delta = (unsigned int)((NOW() - vpit->inject_point) / 
1000);
-    if (usec_delta > vpit->period * 1000)
+    u64 nsec_delta = (unsigned int)((NOW() - vpit->inject_point));
+    if (nsec_delta > vpit->period)
         VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT:long time has passed from last 
injection!");
-    vpit->count = vpit->init_val - ((usec_delta * PIT_FREQ / 1000000) % 
vpit->init_val );
+    vpit->count = vpit->init_val - ((nsec_delta * PIT_FREQ / 1000000000ULL) % 
vpit->init_val );
 }
 
 static void pit_latch_io(struct vmx_virpit_t *vpit)
@@ -197,9 +197,10 @@
 static void pit_timer_fn(void *data)
 {
     struct vmx_virpit_t *vpit = data;
-    int missed_ticks;
-
-    missed_ticks = (NOW() - vpit->scheduled) / MILLISECS(vpit->period);
+    s_time_t   next;
+    int        missed_ticks;
+
+    missed_ticks = (NOW() - vpit->scheduled)/(s_time_t) vpit->period;
 
     /* Set the pending intr bit, and send evtchn notification to myself. */
     if (test_and_set_bit(vpit->vector, vpit->intr_bitmap))
@@ -208,12 +209,12 @@
     /* pick up missed timer tick */
     if ( missed_ticks > 0 ) {
         vpit->pending_intr_nr += missed_ticks;
-        vpit->scheduled += missed_ticks * MILLISECS(vpit->period);
-    }
-    vpit->scheduled += MILLISECS(vpit->period);
-    set_ac_timer(&vpit->pit_timer, vpit->scheduled);
-}
-
+        vpit->scheduled += missed_ticks * vpit->period;
+    }
+    next = vpit->scheduled + vpit->period;
+    set_ac_timer(&vpit->pit_timer, next);
+    vpit->scheduled = next;
+}
 
 /* Only some PIT operations such as load init counter need a hypervisor hook.
  * leave all other operations in user space DM
@@ -236,16 +237,17 @@
             reinit = 1;
         }
         else
-            init_ac_timer(&vpit->pit_timer, pit_timer_fn, vpit, 0);
+            init_ac_timer(&vpit->pit_timer, pit_timer_fn, vpit, d->processor);
 
         /* init count for this channel */
         vpit->init_val = (p->u.data & 0xFFFF) ; 
-        /* frequency(ms) of pit */
-        vpit->period = DIV_ROUND(((vpit->init_val) * 1000), PIT_FREQ); 
-        if (vpit->period < 1) {
+        /* frequency(ns) of pit */
+        vpit->period = DIV_ROUND(((vpit->init_val) * 1000000000ULL), 
PIT_FREQ); 
+        VMX_DBG_LOG(DBG_LEVEL_1,"VMX_PIT: guest set init pit freq:%u ns, 
initval:0x%x\n", vpit->period, vpit->init_val);
+        if (vpit->period < 900000) { /* < 0.9 ms */
             printk("VMX_PIT: guest programmed too small an init_val: %x\n",
                    vpit->init_val);
-            vpit->period = 1;
+            vpit->period = 1000000;
         }
         vpit->vector = ((p->u.data >> 16) & 0xFF);
         vpit->channel = ((p->u.data >> 24) & 0x3);
@@ -272,7 +274,7 @@
 
         vpit->intr_bitmap = intr;
 
-        vpit->scheduled = NOW() + MILLISECS(vpit->period);
+        vpit->scheduled = NOW() + vpit->period;
         set_ac_timer(&vpit->pit_timer, vpit->scheduled);
 
         /*restore the state*/
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/vmx_io.c     Thu Aug 25 22:53:20 2005
@@ -631,12 +631,14 @@
     return ((eflags & X86_EFLAGS_IF) == 0);
 }
 
-void vmx_intr_assist(struct vcpu *v) 
+asmlinkage void vmx_intr_assist(void) 
 {
     int intr_type = 0;
-    int highest_vector = find_highest_pending_irq(v, &intr_type);
+    int highest_vector;
     unsigned long intr_fields, eflags, interruptibility, cpu_exec_control;
-
+    struct vcpu *v = current;
+
+    highest_vector = find_highest_pending_irq(v, &intr_type);
     __vmread(CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control);
 
     if (highest_vector == -1) {
@@ -712,9 +714,6 @@
 
     /* We can't resume the guest if we're waiting on I/O */
     ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags));
-
-    /* We always check for interrupts before resuming guest */
-    vmx_intr_assist(d);
 }
 
 #endif /* CONFIG_VMX */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/vmx_vmcs.c   Thu Aug 25 22:53:20 2005
@@ -187,46 +187,52 @@
     return 0;
 }
 
-void vmx_do_launch(struct vcpu *v) 
-{
-/* Update CR3, GDT, LDT, TR */
+void vmx_set_host_env(struct vcpu *v)
+{
     unsigned int tr, cpu, error = 0;
     struct host_execution_env host_env;
     struct Xgt_desc_struct desc;
-    unsigned long pfn = 0;
-    struct pfn_info *page;
-    struct cpu_user_regs *regs = guest_cpu_user_regs();
-
-    vmx_stts();
 
     cpu = smp_processor_id();
-
-    page = (struct pfn_info *) alloc_domheap_page(NULL);
-    pfn = (unsigned long) (page - frame_table);
-
-    vmx_setup_platform(v, regs);
-
     __asm__ __volatile__ ("sidt  (%0) \n" :: "a"(&desc) : "memory");
     host_env.idtr_limit = desc.size;
     host_env.idtr_base = desc.address;
     error |= __vmwrite(HOST_IDTR_BASE, host_env.idtr_base);
- 
+
     __asm__ __volatile__ ("sgdt  (%0) \n" :: "a"(&desc) : "memory");
     host_env.gdtr_limit = desc.size;
     host_env.gdtr_base = desc.address;
     error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
 
+    __asm__ __volatile__ ("str  (%0) \n" :: "a"(&tr) : "memory");
+    host_env.tr_selector = tr;
+    host_env.tr_limit = sizeof(struct tss_struct);
+    host_env.tr_base = (unsigned long) &init_tss[cpu];
+    error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
+    error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
+}
+
+void vmx_do_launch(struct vcpu *v) 
+{
+/* Update CR3, GDT, LDT, TR */
+    unsigned int  error = 0;
+    unsigned long pfn = 0;
+    struct pfn_info *page;
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
+
+    vmx_stts();
+
+    page = (struct pfn_info *) alloc_domheap_page(NULL);
+    pfn = (unsigned long) (page - frame_table);
+
+    vmx_setup_platform(v, regs);
+
+    vmx_set_host_env(v);
+
     error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
     error |= __vmwrite(GUEST_LDTR_BASE, 0);
     error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
         
-    __asm__ __volatile__ ("str  (%0) \n" :: "a"(&tr) : "memory");
-    host_env.tr_selector = tr;
-    host_env.tr_limit = sizeof(struct tss_struct);
-    host_env.tr_base = (unsigned long) &init_tss[cpu];
-
-    error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
-    error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
     error |= __vmwrite(GUEST_TR_BASE, 0);
     error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
 
@@ -523,12 +529,48 @@
 
 void vm_launch_fail(unsigned long eflags)
 {
+    unsigned long error;
+    __vmread(VM_INSTRUCTION_ERROR, &error);
+    printk("<vm_launch_fail> error code %lx\n", error);
     __vmx_bug(guest_cpu_user_regs());
 }
 
 void vm_resume_fail(unsigned long eflags)
 {
+    unsigned long error;
+    __vmread(VM_INSTRUCTION_ERROR, &error);
+    printk("<vm_resume_fail> error code %lx\n", error);
     __vmx_bug(guest_cpu_user_regs());
+}
+
+void arch_vmx_do_resume(struct vcpu *v) 
+{
+    u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
+
+    load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
+    vmx_do_resume(v);
+    reset_stack_and_jump(vmx_asm_do_resume);
+}
+
+void arch_vmx_do_launch(struct vcpu *v) 
+{
+    u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
+
+    load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
+    vmx_do_launch(v);
+    reset_stack_and_jump(vmx_asm_do_launch);
+}
+
+void arch_vmx_do_relaunch(struct vcpu *v)
+{
+    u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
+
+    load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
+    vmx_do_resume(v);
+    vmx_set_host_env(v);
+    v->arch.schedule_tail = arch_vmx_do_resume;
+
+    reset_stack_and_jump(vmx_asm_do_relaunch);
 }
 
 #endif /* CONFIG_VMX */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_32/entry.S       Thu Aug 25 22:53:20 2005
@@ -108,31 +108,26 @@
         pushl %ecx; \
         pushl %ebx;
 
+#define VMX_RESTORE_ALL_NOSEGREGS   \
+        popl %ebx;  \
+        popl %ecx;  \
+        popl %edx;  \
+        popl %esi;  \
+        popl %edi;  \
+        popl %ebp;  \
+        popl %eax;  \
+        addl $(NR_SKIPPED_REGS*4), %esp
+
 ENTRY(vmx_asm_vmexit_handler)
         /* selectors are restored/saved by VMX */
         VMX_SAVE_ALL_NOSEGREGS
         call vmx_vmexit_handler
         jmp vmx_asm_do_resume
 
-ENTRY(vmx_asm_do_launch)
-        popl %ebx
-        popl %ecx
-        popl %edx
-        popl %esi
-        popl %edi
-        popl %ebp
-        popl %eax
-        addl $(NR_SKIPPED_REGS*4), %esp
-        /* VMLUANCH */
-        .byte 0x0f,0x01,0xc2
-        pushf
-        call vm_launch_fail
-        hlt
-        
-        ALIGN
-        
-ENTRY(vmx_asm_do_resume)
-vmx_test_all_events:
+.macro vmx_asm_common launch initialized
+1:
+/* vmx_test_all_events */
+        .if \initialized
         GET_CURRENT(%ebx)
 /*test_all_events:*/
         xorl %ecx,%ecx
@@ -142,34 +137,51 @@
         movl VCPU_processor(%ebx),%eax
         shl  $IRQSTAT_shift,%eax
         test %ecx,irq_stat(%eax,1)
-        jnz  vmx_process_softirqs
-
-vmx_restore_all_guest:
+        jnz 2f
+
+/* vmx_restore_all_guest */
+        call vmx_intr_assist
         call load_cr2
+        .endif
+        VMX_RESTORE_ALL_NOSEGREGS
         /* 
          * Check if we are going back to VMX-based VM
          * By this time, all the setups in the VMCS must be complete.
          */
-        popl %ebx
-        popl %ecx
-        popl %edx
-        popl %esi
-        popl %edi
-        popl %ebp
-        popl %eax
-        addl $(NR_SKIPPED_REGS*4), %esp
+        .if \launch
+        /* VMLUANCH */
+        .byte 0x0f,0x01,0xc2
+        pushf
+        call vm_launch_fail
+        .else
         /* VMRESUME */
         .byte 0x0f,0x01,0xc3
         pushf
         call vm_resume_fail
+        .endif
         /* Should never reach here */
         hlt
 
         ALIGN
-vmx_process_softirqs:
+        .if \initialized
+2:
+/* vmx_process_softirqs */
         sti       
         call do_softirq
-        jmp  vmx_test_all_events
+        jmp 1b
+        ALIGN
+        .endif
+.endm
+
+ENTRY(vmx_asm_do_launch)
+    vmx_asm_common 1 0
+
+ENTRY(vmx_asm_do_resume)
+    vmx_asm_common 0 1
+
+ENTRY(vmx_asm_do_relaunch)
+    vmx_asm_common 1 1
+
 #endif
 
         ALIGN
@@ -335,7 +347,8 @@
         movl VCPU_vcpu_info(%ebx),%eax
         pushl VCPUINFO_upcall_mask(%eax)
         testb $TBF_INTERRUPT,%cl
-        setnz VCPUINFO_upcall_mask(%eax) # TBF_INTERRUPT -> clear upcall mask
+        setnz %ch                        # TBF_INTERRUPT -> set upcall mask
+        orb  %ch,VCPUINFO_upcall_mask(%eax)
         popl %eax
         shll $16,%eax                    # Bits 16-23: saved_upcall_mask
         movw UREGS_cs+4(%esp),%ax        # Bits  0-15: CS
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_32/mm.c  Thu Aug 25 22:53:20 2005
@@ -93,13 +93,10 @@
 
     /*
      * Allocate and map the machine-to-phys table and create read-only mapping 
-     * of MPT for guest-OS use.  Without PAE we'll end up with one 4MB page, 
-     * with PAE we'll allocate 2MB pages depending on the amount of memory 
-     * installed, but at least 4MB to cover 4GB address space.  This is needed 
-     * to make PCI I/O memory address lookups work in guests.
+     * of MPT for guest-OS use.
      */
-    if ( (mpt_size = max_page * 4) < (4*1024*1024) )
-        mpt_size = 4*1024*1024;
+    mpt_size  = (max_page * 4) + (1UL << L2_PAGETABLE_SHIFT) - 1UL;
+    mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
     for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
     {
         if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
@@ -148,7 +145,7 @@
 void subarch_init_memory(struct domain *dom_xen)
 {
     unsigned long m2p_start_mfn;
-    int i;
+    unsigned int i, j;
 
     /*
      * We are rather picky about the layout of 'struct pfn_info'. The
@@ -172,12 +169,12 @@
     {
         m2p_start_mfn = l2e_get_pfn(
             idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]);
-        for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+        for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ )
         {
-            frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
+            frame_table[m2p_start_mfn+j].count_info = PGC_allocated | 1;
             /* Ensure it's only mapped read-only by domains. */
-            frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
-            page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
+            frame_table[m2p_start_mfn+j].u.inuse.type_info = PGT_gdt_page | 1;
+            page_set_owner(&frame_table[m2p_start_mfn+j], dom_xen);
         }
     }
 }
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_32/traps.c       Thu Aug 25 22:53:20 2005
@@ -1,5 +1,6 @@
 
 #include <xen/config.h>
+#include <xen/domain_page.h>
 #include <xen/init.h>
 #include <xen/sched.h>
 #include <xen/lib.h>
@@ -66,8 +67,9 @@
 
     printk("CPU:    %d\nEIP:    %04lx:[<%08lx>]",
            smp_processor_id(), (unsigned long)0xffff & regs->cs, eip);
-    print_symbol(" %s\n", eip);
-    printk("EFLAGS: %08lx   CONTEXT: %s\n", eflags, context);
+    if ( !GUEST_MODE(regs) )
+        print_symbol(" %s", eip);
+    printk("\nEFLAGS: %08lx   CONTEXT: %s\n", eflags, context);
     printk("eax: %08x   ebx: %08x   ecx: %08x   edx: %08x\n",
            regs->eax, regs->ebx, regs->ecx, regs->edx);
     printk("esi: %08x   edi: %08x   ebp: %08x   esp: %08lx\n",
@@ -85,24 +87,33 @@
 
 void show_page_walk(unsigned long addr)
 {
-    l2_pgentry_t pmd;
-    l1_pgentry_t *pte;
-
-    if ( addr < PAGE_OFFSET )
-        return;
+    unsigned long pfn = read_cr3() >> PAGE_SHIFT;
+    intpte_t *ptab, ent;
 
     printk("Pagetable walk from %08lx:\n", addr);
-    
-    pmd = idle_pg_table_l2[l2_linear_offset(addr)];
-    printk(" L2 = %"PRIpte" %s\n", l2e_get_intpte(pmd),
-           (l2e_get_flags(pmd) & _PAGE_PSE) ? "(2/4MB)" : "");
-    if ( !(l2e_get_flags(pmd) & _PAGE_PRESENT) ||
-         (l2e_get_flags(pmd) & _PAGE_PSE) )
-        return;
-
-    pte  = __va(l2e_get_paddr(pmd));
-    pte += l1_table_offset(addr);
-    printk("  L1 = %"PRIpte"\n", l1e_get_intpte(*pte));
+
+#ifdef CONFIG_X86_PAE
+    ptab = map_domain_page(pfn);
+    ent = ptab[l3_table_offset(addr)];
+    printk(" L3 = %"PRIpte"\n", ent);
+    unmap_domain_page(ptab);
+    if ( !(ent & _PAGE_PRESENT) )
+        return;
+    pfn = ent >> PAGE_SHIFT;
+#endif
+
+    ptab = map_domain_page(pfn);
+    ent = ptab[l2_table_offset(addr)];
+    printk("  L2 = %"PRIpte" %s\n", ent, (ent & _PAGE_PSE) ? "(PSE)" : "");
+    unmap_domain_page(ptab);
+    if ( !(ent & _PAGE_PRESENT) || (ent & _PAGE_PSE) )
+        return;
+    pfn = ent >> PAGE_SHIFT;
+
+    ptab = map_domain_page(ent >> PAGE_SHIFT);
+    ent = ptab[l2_table_offset(addr)];
+    printk("   L1 = %"PRIpte"\n", ent);
+    unmap_domain_page(ptab);
 }
 
 #define DOUBLEFAULT_STACK_SIZE 1024
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_64/entry.S       Thu Aug 25 22:53:20 2005
@@ -194,39 +194,34 @@
         pushq %r14; \
         pushq %r15; \
 
+#define VMX_RESTORE_ALL_NOSEGREGS \
+        popq %r15; \
+        popq %r14; \
+        popq %r13; \
+        popq %r12; \
+        popq %rbp; \
+        popq %rbx; \
+        popq %r11; \
+        popq %r10; \
+        popq %r9;  \
+        popq %r8;  \
+        popq %rax; \
+        popq %rcx; \
+        popq %rdx; \
+        popq %rsi; \
+        popq %rdi; \
+        addq $(NR_SKIPPED_REGS*8), %rsp; \
+
 ENTRY(vmx_asm_vmexit_handler)
         /* selectors are restored/saved by VMX */
         VMX_SAVE_ALL_NOSEGREGS
         call vmx_vmexit_handler
         jmp vmx_asm_do_resume
 
-ENTRY(vmx_asm_do_launch)
-        popq %r15
-        popq %r14
-        popq %r13
-        popq %r12
-        popq %rbp
-        popq %rbx
-        popq %r11
-        popq %r10
-        popq %r9
-        popq %r8
-        popq %rax
-        popq %rcx
-        popq %rdx
-        popq %rsi
-        popq %rdi
-        addq $(NR_SKIPPED_REGS*8), %rsp
-        /* VMLUANCH */
-        .byte 0x0f,0x01,0xc2
-        pushfq
-        call vm_launch_fail
-        hlt
-        
-        ALIGN
-        
-ENTRY(vmx_asm_do_resume)
-vmx_test_all_events:
+.macro vmx_asm_common launch initialized 
+1:
+        .if \initialized
+/* vmx_test_all_events */
         GET_CURRENT(%rbx)
 /* test_all_events: */
         cli                             # tests must not race interrupts
@@ -235,42 +230,52 @@
         shl   $IRQSTAT_shift,%rax
         leaq  irq_stat(%rip), %rdx
         testl $~0,(%rdx,%rax,1)
-        jnz   vmx_process_softirqs
-
-vmx_restore_all_guest:
+        jnz  2f 
+
+/* vmx_restore_all_guest */
+        call vmx_intr_assist
         call load_cr2
+        .endif
         /* 
          * Check if we are going back to VMX-based VM
          * By this time, all the setups in the VMCS must be complete.
          */
-        popq %r15
-        popq %r14
-        popq %r13
-        popq %r12
-        popq %rbp
-        popq %rbx
-        popq %r11
-        popq %r10
-        popq %r9
-        popq %r8
-        popq %rax
-        popq %rcx
-        popq %rdx
-        popq %rsi
-        popq %rdi
-        addq $(NR_SKIPPED_REGS*8), %rsp
+        VMX_RESTORE_ALL_NOSEGREGS
+        .if \launch
+        /* VMLUANCH */
+        .byte 0x0f,0x01,0xc2
+        pushfq
+        call vm_launch_fail
+        .else
         /* VMRESUME */
         .byte 0x0f,0x01,0xc3
         pushfq
         call vm_resume_fail
+        .endif
         /* Should never reach here */
         hlt
 
         ALIGN
-vmx_process_softirqs:
+
+        .if \initialized
+2:
+/* vmx_process_softirqs */
         sti       
         call do_softirq
-        jmp  vmx_test_all_events
+        jmp 1b
+        ALIGN
+        .endif
+.endm
+
+ENTRY(vmx_asm_do_launch)
+      vmx_asm_common 1 0
+
+ENTRY(vmx_asm_do_resume)
+      vmx_asm_common 0 1
+
+ENTRY(vmx_asm_do_relaunch)
+      vmx_asm_common 1 1
+
 #endif
 
         ALIGN
@@ -314,7 +319,8 @@
         movq  VCPU_vcpu_info(%rbx),%rax
         pushq VCPUINFO_upcall_mask(%rax)
         testb $TBF_INTERRUPT,%cl
-        setnz VCPUINFO_upcall_mask(%rax)# TBF_INTERRUPT -> clear upcall mask
+        setnz %ch                       # TBF_INTERRUPT -> set upcall mask
+        orb   %ch,VCPUINFO_upcall_mask(%rax)
         popq  %rax
         shlq  $32,%rax                  # Bits 32-39: saved_upcall_mask
         movw  UREGS_cs+8(%rsp),%ax      # Bits  0-15: CS
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_64/mm.c  Thu Aug 25 22:53:20 2005
@@ -74,7 +74,7 @@
 
 void __init paging_init(void)
 {
-    unsigned long i;
+    unsigned long i, mpt_size;
     l3_pgentry_t *l3_ro_mpt;
     l2_pgentry_t *l2_ro_mpt;
     struct pfn_info *pg;
@@ -98,16 +98,17 @@
      * Allocate and map the machine-to-phys table.
      * This also ensures L3 is present for fixmaps.
      */
-    for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) )
-    {
-        pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0);
-        if ( pg == NULL )
+    mpt_size  = (max_page * 4) + (1UL << L2_PAGETABLE_SHIFT) - 1UL;
+    mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
+    for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
+    {
+        if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
             panic("Not enough memory for m2p table\n");
         map_pages_to_xen(
-            RDWR_MPT_VIRT_START + i*8, page_to_pfn(pg), 
+            RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), page_to_pfn(pg), 
             1UL << PAGETABLE_ORDER,
             PAGE_HYPERVISOR);
-        memset((void *)(RDWR_MPT_VIRT_START + i*8), 0x55,
+        memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55,
                1UL << L2_PAGETABLE_SHIFT);
         *l2_ro_mpt++ = l2e_from_page(
             pg, _PAGE_GLOBAL|_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Wed Aug 24 02:43:18 2005
+++ b/xen/arch/x86/x86_64/traps.c       Thu Aug 25 22:53:20 2005
@@ -17,8 +17,9 @@
 {
     printk("CPU:    %d\nEIP:    %04x:[<%016lx>]",
            smp_processor_id(), 0xffff & regs->cs, regs->rip);
-    print_symbol(" %s\n", regs->rip);
-    printk("EFLAGS: %016lx\n", regs->eflags);
+    if ( !GUEST_MODE(regs) )
+        print_symbol(" %s", regs->rip);
+    printk("\nEFLAGS: %016lx\n", regs->eflags);
     printk("rax: %016lx   rbx: %016lx   rcx: %016lx   rdx: %016lx\n",
            regs->rax, regs->rbx, regs->rcx, regs->rdx);
     printk("rsi: %016lx   rdi: %016lx   rbp: %016lx   rsp: %016lx\n",
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Wed Aug 24 02:43:18 2005
+++ b/xen/common/dom0_ops.c     Thu Aug 25 22:53:20 2005
@@ -70,8 +70,7 @@
             flags &= ~DOMFLAGS_BLOCKED;
         if ( v->vcpu_flags & VCPUF_running )
             flags |= DOMFLAGS_RUNNING;
-        if ( v->cpu_time > cpu_time )
-            cpu_time += v->cpu_time;
+        cpu_time += v->cpu_time;
         vcpu_count++;
     }
     
@@ -294,17 +293,17 @@
         v->cpumap = cpumap;
 
         if ( cpumap == CPUMAP_RUNANYWHERE )
+        {
             clear_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
+        }
         else
         {
             /* pick a new cpu from the usable map */
             int new_cpu = (int)find_first_set_bit(cpumap) % num_online_cpus();
 
             vcpu_pause(v);
-            if ( v->processor != new_cpu )
-                set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
+            vcpu_migrate_cpu(v, new_cpu);
             set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
-            v->processor = new_cpu;
             vcpu_unpause(v);
         }
 
@@ -475,7 +474,7 @@
     case DOM0_SETTIME:
     {
         do_settime(op->u.settime.secs, 
-                   op->u.settime.usecs, 
+                   op->u.settime.nsecs, 
                    op->u.settime.system_time);
         ret = 0;
     }
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/event_channel.c
--- a/xen/common/event_channel.c        Wed Aug 24 02:43:18 2005
+++ b/xen/common/event_channel.c        Thu Aug 25 22:53:20 2005
@@ -588,7 +588,6 @@
     long           rc = 0;
 
     if ( (vcpu >= MAX_VIRT_CPUS) || (d->vcpu[vcpu] == NULL) ) {
-        printf("vcpu %d bad.\n", vcpu);
         return -EINVAL;
     }
 
@@ -596,7 +595,6 @@
 
     if ( !port_is_valid(d, port) )
     {
-        printf("port %d bad.\n", port);
         rc = -EINVAL;
         goto out;
     }
@@ -610,7 +608,6 @@
         chn->notify_vcpu_id = vcpu;
         break;
     default:
-        printf("evtchn type %d can't be rebound.\n", chn->state);
         rc = -EINVAL;
         break;
     }
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/grant_table.c
--- a/xen/common/grant_table.c  Wed Aug 24 02:43:18 2005
+++ b/xen/common/grant_table.c  Thu Aug 25 22:53:20 2005
@@ -6,6 +6,8 @@
  * 
  * Copyright (c) 2005 Christopher Clark
  * Copyright (c) 2004 K A Fraser
+ * Copyright (c) 2005 Andrew Warfield
+ * Modifications by Geoffrey Lefebvre are (c) Intel Research Cambridge
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -50,7 +52,7 @@
     grant_table_t *t)
 {
     unsigned int h;
-    if ( unlikely((h = t->maptrack_head) == t->maptrack_limit) )
+    if ( unlikely((h = t->maptrack_head) == (t->maptrack_limit - 1)) )
         return -1;
     t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT;
     t->map_count++;
@@ -68,13 +70,13 @@
 
 static int
 __gnttab_activate_grant_ref(
-    struct domain          *mapping_d,          /* IN */
+    struct domain   *mapping_d,          /* IN */
     struct vcpu     *mapping_ed,
-    struct domain          *granting_d,
-    grant_ref_t             ref,
-    u16                     dev_hst_ro_flags,
-    unsigned long           host_virt_addr,
-    unsigned long          *pframe )            /* OUT */
+    struct domain   *granting_d,
+    grant_ref_t      ref,
+    u16              dev_hst_ro_flags,
+    u64              addr,
+    unsigned long   *pframe )            /* OUT */
 {
     domid_t               sdom;
     u16                   sflags;
@@ -95,7 +97,7 @@
      * Returns:
      * .  -ve: error
      * .    1: ok
-     * .    0: ok and TLB invalidate of host_virt_addr needed.
+     * .    0: ok and TLB invalidate of host_addr needed.
      *
      * On success, *pframe contains mfn.
      */
@@ -121,6 +123,10 @@
         sflags = sha->flags;
         sdom   = sha->domid;
 
+        /* This loop attempts to set the access (reading/writing) flags
+         * in the grant table entry.  It tries a cmpxchg on the field
+         * up to five times, and then fails under the assumption that 
+         * the guest is misbehaving. */
         for ( ; ; )
         {
             u32 scombo, prev_scombo, new_scombo;
@@ -253,28 +259,32 @@
 
     /*
      * At this point:
-     * act->pin updated to reflect mapping.
+     * act->pin updated to reference count mappings.
      * sha->flags updated to indicate to granting domain mapping done.
      * frame contains the mfn.
      */
 
     spin_unlock(&granting_d->grant_table->lock);
 
-    if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
+    if ( (addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
     {
         /* Write update into the pagetable. */
         l1_pgentry_t pte;
         pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS);
+        
+        if ( (dev_hst_ro_flags & GNTMAP_application_map) )
+            l1e_add_flags(pte,_PAGE_USER);
         if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
             l1e_add_flags(pte,_PAGE_RW);
-        rc = update_grant_va_mapping( host_virt_addr, pte, 
-                       mapping_d, mapping_ed );
-
-        /*
-         * IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB.
-         * This is done in the outer gnttab_map_grant_ref.
-         */
-
+
+        if ( dev_hst_ro_flags & GNTMAP_contains_pte )
+            rc = update_grant_pte_mapping(addr, pte, mapping_d, mapping_ed);
+        else
+            rc = update_grant_va_mapping(addr, pte, mapping_d, mapping_ed);
+
+        /* IMPORTANT: rc indicates the degree of TLB flush that is required.
+         * GNTST_flush_one (1) or GNTST_flush_all (2). This is done in the 
+         * outer gnttab_map_grant_ref. */
         if ( rc < 0 )
         {
             /* Failure: undo and abort. */
@@ -317,20 +327,24 @@
 /*
  * Returns 0 if TLB flush / invalidate required by caller.
  * va will indicate the address to be invalidated.
+ * 
+ * addr is _either_ a host virtual address, or the address of the pte to
+ * update, as indicated by the GNTMAP_contains_pte flag.
  */
 static int
 __gnttab_map_grant_ref(
     gnttab_map_grant_ref_t *uop,
     unsigned long *va)
 {
-    domid_t               dom;
-    grant_ref_t           ref;
-    struct domain        *ld, *rd;
+    domid_t        dom;
+    grant_ref_t    ref;
+    struct domain *ld, *rd;
     struct vcpu   *led;
-    u16                   dev_hst_ro_flags;
-    int                   handle;
-    unsigned long         frame = 0, host_virt_addr;
-    int                   rc;
+    u16            dev_hst_ro_flags;
+    int            handle;
+    u64            addr;
+    unsigned long  frame = 0;
+    int            rc;
 
     led = current;
     ld = led->domain;
@@ -338,19 +352,20 @@
     /* Bitwise-OR avoids short-circuiting which screws control flow. */
     if ( unlikely(__get_user(dom, &uop->dom) |
                   __get_user(ref, &uop->ref) |
-                  __get_user(host_virt_addr, &uop->host_virt_addr) |
+                  __get_user(addr, &uop->host_addr) |
                   __get_user(dev_hst_ro_flags, &uop->flags)) )
     {
         DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
         return -EFAULT; /* don't set status */
     }
 
-
-    if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map)) &&
-         unlikely(!__addr_ok(host_virt_addr)))
-    {
-        DPRINTK("Bad virtual address (%lx) or flags (%x).\n",
-                host_virt_addr, dev_hst_ro_flags);
+    if ( (dev_hst_ro_flags & GNTMAP_host_map) &&
+         ( (addr == 0) ||
+           (!(dev_hst_ro_flags & GNTMAP_contains_pte) && 
+            unlikely(!__addr_ok(addr))) ) )
+    {
+        DPRINTK("Bad virtual address (%"PRIx64") or flags (%"PRIx16").\n",
+                addr, dev_hst_ro_flags);
         (void)__put_user(GNTST_bad_virt_addr, &uop->handle);
         return GNTST_bad_gntref;
     }
@@ -386,12 +401,20 @@
         grant_mapping_t *new_mt;
         grant_table_t   *lgt      = ld->grant_table;
 
+        if ( (lgt->maptrack_limit << 1) > MAPTRACK_MAX_ENTRIES )
+        {
+            put_domain(rd);
+            DPRINTK("Maptrack table is at maximum size.\n");
+            (void)__put_user(GNTST_no_device_space, &uop->handle);
+            return GNTST_no_device_space;
+        }
+
         /* Grow the maptrack table. */
         new_mt = alloc_xenheap_pages(lgt->maptrack_order + 1);
         if ( new_mt == NULL )
         {
             put_domain(rd);
-            DPRINTK("No more map handles available\n");
+            DPRINTK("No more map handles available.\n");
             (void)__put_user(GNTST_no_device_space, &uop->handle);
             return GNTST_no_device_space;
         }
@@ -405,7 +428,7 @@
         lgt->maptrack_order   += 1;
         lgt->maptrack_limit  <<= 1;
 
-        printk("Doubled maptrack size\n");
+        DPRINTK("Doubled maptrack size\n");
         handle = get_maptrack_handle(ld->grant_table);
     }
 
@@ -416,7 +439,7 @@
 
     if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref,
                                                   dev_hst_ro_flags,
-                                                  host_virt_addr, &frame)))
+                                                  addr, &frame)))
     {
         /*
          * Only make the maptrack live _after_ writing the pte, in case we 
@@ -428,10 +451,11 @@
             = (ref << MAPTRACK_REF_SHIFT) |
               (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK);
 
-        (void)__put_user(frame, &uop->dev_bus_addr);
-
-        if ( dev_hst_ro_flags & GNTMAP_host_map )
-            *va = host_virt_addr;
+        (void)__put_user((u64)frame << PAGE_SHIFT, &uop->dev_bus_addr);
+
+        if ( ( dev_hst_ro_flags & GNTMAP_host_map ) &&
+             !( dev_hst_ro_flags & GNTMAP_contains_pte) )
+            *va = addr;
 
         (void)__put_user(handle, &uop->handle);
     }
@@ -449,12 +473,12 @@
 gnttab_map_grant_ref(
     gnttab_map_grant_ref_t *uop, unsigned int count)
 {
-    int i, flush = 0;
+    int i, rc, flush = 0;
     unsigned long va = 0;
 
     for ( i = 0; i < count; i++ )
-        if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 )
-            flush++;
+        if ( (rc =__gnttab_map_grant_ref(&uop[i], &va)) >= 0 )
+            flush += rc;
 
     if ( flush == 1 )
         flush_tlb_one_mask(current->domain->cpumask, va);
@@ -469,28 +493,30 @@
     gnttab_unmap_grant_ref_t *uop,
     unsigned long *va)
 {
-    domid_t        dom;
-    grant_ref_t    ref;
-    u16            handle;
-    struct domain *ld, *rd;
-
+    domid_t          dom;
+    grant_ref_t      ref;
+    u16              handle;
+    struct domain   *ld, *rd;
     active_grant_entry_t *act;
-    grant_entry_t *sha;
+    grant_entry_t   *sha;
     grant_mapping_t *map;
-    u16            flags;
-    s16            rc = 1;
-    unsigned long  frame, virt;
+    u16              flags;
+    s16              rc = 1;
+    u64              addr, dev_bus_addr;
+    unsigned long    frame;
 
     ld = current->domain;
 
     /* Bitwise-OR avoids short-circuiting which screws control flow. */
-    if ( unlikely(__get_user(virt, &uop->host_virt_addr) |
-                  __get_user(frame, &uop->dev_bus_addr) |
+    if ( unlikely(__get_user(addr, &uop->host_addr) |
+                  __get_user(dev_bus_addr, &uop->dev_bus_addr) |
                   __get_user(handle, &uop->handle)) )
     {
         DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n");
         return -EFAULT; /* don't set status */
     }
+
+    frame = (unsigned long)(dev_bus_addr >> PAGE_SHIFT);
 
     map = &ld->grant_table->maptrack[handle];
 
@@ -529,15 +555,6 @@
     if ( frame == 0 )
     {
         frame = act->frame;
-    }
-    else if ( frame == GNTUNMAP_DEV_FROM_VIRT )
-    {
-        if ( !( flags & GNTMAP_device_map ) )
-            PIN_FAIL(unmap_out, GNTST_bad_dev_addr,
-                     "Bad frame number: frame not mapped for dev access.\n");
-        frame = act->frame;
-
-        /* Frame will be unmapped for device access below if virt addr okay. */
     }
     else
     {
@@ -554,41 +571,19 @@
         /* Frame is now unmapped for device access. */
     }
 
-    if ( (virt != 0) &&
+    if ( (addr != 0) &&
          (flags & GNTMAP_host_map) &&
          ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0))
     {
-        l1_pgentry_t   *pl1e;
-        unsigned long   _ol1e;
-
-        pl1e = &linear_pg_table[l1_linear_offset(virt)];
-
-        if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
-        {
-            DPRINTK("Could not find PTE entry for address %lx\n", virt);
-            rc = -EINVAL;
-            goto unmap_out;
-        }
-
-        /*
-         * Check that the virtual address supplied is actually mapped to 
-         * act->frame.
-         */
-        if ( unlikely((_ol1e >> PAGE_SHIFT) != frame ))
-        {
-            DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n",
-                    _ol1e, virt, frame);
-            rc = -EINVAL;
-            goto unmap_out;
-        }
-
-        /* Delete pagetable entry. */
-        if ( unlikely(__put_user(0, (unsigned long *)pl1e)))
-        {
-            DPRINTK("Cannot delete PTE entry at %p for virtual address %lx\n",
-                    pl1e, virt);
-            rc = -EINVAL;
-            goto unmap_out;
+        if ( flags & GNTMAP_contains_pte )
+        {
+            if ( (rc = clear_grant_pte_mapping(addr, frame, ld)) < 0 )
+                goto unmap_out;
+        }
+        else
+        {
+            if ( (rc = clear_grant_va_mapping(addr, frame)) < 0 )
+                goto unmap_out;
         }
 
         map->ref_and_flags &= ~GNTMAP_host_map;
@@ -596,17 +591,9 @@
         act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
                                               : GNTPIN_hstw_inc;
 
-        if ( frame == GNTUNMAP_DEV_FROM_VIRT )
-        {
-            act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
-                                                  : GNTPIN_devw_inc;
-
-            map->ref_and_flags &= ~GNTMAP_device_map;
-            (void)__put_user(0, &uop->dev_bus_addr);
-        }
-
         rc = 0;
-        *va = virt;
+        if ( !( flags & GNTMAP_contains_pte) )
+            *va = addr;
     }
 
     if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0)
@@ -630,6 +617,7 @@
 
     if ( act->pin == 0 )
     {
+        act->frame = 0xdeadbeef;
         clear_bit(_GTF_reading, &sha->flags);
         put_page(&frame_table[frame]);
     }
@@ -768,7 +756,7 @@
         if ( sha_copy.flags )
         {
             DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) "
-                    "dom:(%hu) frame:(%lx)\n",
+                    "dom:(%hu) frame:(%x)\n",
                     op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
         }
     }
@@ -822,18 +810,20 @@
     for (i = 0; i < count; i++) {
         gnttab_donate_t *gop = &uop[i];
 #if GRANT_DEBUG
-        printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n",
+        printk("gnttab_donate: i=%d mfn=%lx domid=%d gref=%08x\n",
                i, gop->mfn, gop->domid, gop->handle);
 #endif
         page = &frame_table[gop->mfn];
-
+        
         if (unlikely(IS_XEN_HEAP_FRAME(page))) { 
-            printk("gnttab_donate: xen heap frame mfn=%lx\n", (unsigned long) 
gop->mfn);
+            printk("gnttab_donate: xen heap frame mfn=%lx\n", 
+                   (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
         }
         if (unlikely(!pfn_valid(page_to_pfn(page)))) {
-            printk("gnttab_donate: invalid pfn for mfn=%lx\n", (unsigned long) 
gop->mfn);
+            printk("gnttab_donate: invalid pfn for mfn=%lx\n", 
+                   (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
         }
@@ -859,7 +849,8 @@
             if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
                          (1 | PGC_allocated)) || unlikely(_nd != _d)) {
                 printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
-                        " caf=%08x, taf=%08x\n", (void *) page_to_pfn(page),
+                       " caf=%08x, taf=%" PRtype_info "\n", 
+                       (void *) page_to_pfn(page),
                         d, d->domain_id, unpickle_domptr(_nd), x, 
                         page->u.inuse.type_info);
                 spin_unlock(&d->page_alloc_lock);
@@ -918,9 +909,9 @@
         if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
             unlikely(e->tot_pages == e->max_pages) ||
             unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
-            printk("gnttab_donate: Transferee has no reservation headroom 
(%d,%d), or "
-                    "provided a bad grant ref (%08x), or is dying (%p).\n",
-                    e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+            printk("gnttab_donate: Transferee has no reservation headroom (%d,"
+                   "%d) or provided a bad grant ref (%08x) or is dying (%p)\n",
+                   e->tot_pages, e->max_pages, gop->handle, e->d_flags);
             spin_unlock(&e->page_alloc_lock);
             put_domain(e);
             result = GNTST_general_error;
@@ -933,9 +924,9 @@
         }
         list_add_tail(&page->list, &e->page_list);
         page_set_owner(page, e);
-
+        
         spin_unlock(&e->page_alloc_lock);
-
+        
         /*
          * Transfer is all done: tell the guest about its new page
          * frame.
@@ -943,7 +934,7 @@
         gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
         
         put_domain(e);
-
+        
         gop->status = GNTST_okay;
     }
     return result;
@@ -954,48 +945,53 @@
     unsigned int cmd, void *uop, unsigned int count)
 {
     long rc;
-
+    struct domain *d = current->domain;
+    
     if ( count > 512 )
         return -EINVAL;
-
-    LOCK_BIGLOCK(current->domain);
-
+    
+    LOCK_BIGLOCK(d);
+    
+    sync_pagetable_state(d);
+    
     rc = -EFAULT;
     switch ( cmd )
-    {
-    case GNTTABOP_map_grant_ref:
-        if ( unlikely(!array_access_ok(
-            uop, count, sizeof(gnttab_map_grant_ref_t))) )
-            goto out;
-        rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
-        break;
-    case GNTTABOP_unmap_grant_ref:
-        if ( unlikely(!array_access_ok(
-            uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
-            goto out;
-        rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count);
-        break;
-    case GNTTABOP_setup_table:
-        rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
-        break;
+        {
+        case GNTTABOP_map_grant_ref:
+            if ( unlikely(!array_access_ok(
+                              uop, count, sizeof(gnttab_map_grant_ref_t))) )
+                goto out;
+            rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
+            break;
+        case GNTTABOP_unmap_grant_ref:
+            if ( unlikely(!array_access_ok(
+                              uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
+                goto out;
+            rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, 
+                                        count);
+            break;
+        case GNTTABOP_setup_table:
+            rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
+            break;
 #if GRANT_DEBUG
-    case GNTTABOP_dump_table:
-        rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
-        break;
+        case GNTTABOP_dump_table:
+            rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
+            break;
 #endif
-    case GNTTABOP_donate:
-        if (unlikely(!array_access_ok(uop, count, sizeof(gnttab_donate_t))))
-            goto out;
-        rc = gnttab_donate(uop, count);
-        break;
-    default:
-        rc = -ENOSYS;
-        break;
-    }
-
-out:
-    UNLOCK_BIGLOCK(current->domain);
-
+        case GNTTABOP_donate:
+            if (unlikely(!array_access_ok(uop, count, 
+                                          sizeof(gnttab_donate_t))))
+                goto out;
+            rc = gnttab_donate(uop, count);
+            break;
+        default:
+            rc = -ENOSYS;
+            break;
+        }
+    
+  out:
+    UNLOCK_BIGLOCK(d);
+    
     return rc;
 }
 
@@ -1009,106 +1005,101 @@
      * Called a _lot_ at domain creation because pages mapped by priv domains
      * also traverse this.
      */
-
+    
     /* Note: If the same frame is mapped multiple times, and then one of
      *       the ptes is overwritten, which maptrack handle gets invalidated?
      * Advice: Don't do it. Explicitly unmap.
      */
-
+    
     unsigned int handle, ref, refcount;
     grant_table_t        *lgt, *rgt;
     active_grant_entry_t *act;
     grant_mapping_t      *map;
     int found = 0;
-
+    
     lgt = ld->grant_table;
-
+    
 #if GRANT_DEBUG_VERBOSE
-    if ( ld->domain_id != 0 )
-    {
-        DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n",
-                rd->domain_id, ld->domain_id, frame, readonly);
-    }
+    if ( ld->domain_ id != 0 ) {
+            DPRINTK("Foreign unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
+                    rd->domain_id, ld->domain_id, frame, readonly);
+      }
 #endif
-
+    
     /* Fast exit if we're not mapping anything using grant tables */
     if ( lgt->map_count == 0 )
         return 0;
-
-    if ( get_domain(rd) == 0 )
-    {
+    
+    if ( get_domain(rd) == 0 ) {
         DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n",
                 rd->domain_id);
         return 0;
     }
-
+    
     rgt = rd->grant_table;
-
-    for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
-    {
+    
+    for ( handle = 0; handle < lgt->maptrack_limit; handle++ ) {
+
         map = &lgt->maptrack[handle];
-
+            
         if ( map->domid != rd->domain_id )
             continue;
-
+        
         if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) &&
-             ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly))))
-        {
+             ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly)))) {
+
             ref = (map->ref_and_flags >> MAPTRACK_REF_SHIFT);
             act = &rgt->active[ref];
-
+                    
             spin_lock(&rgt->lock);
-
-            if ( act->frame != frame )
-            {
+                    
+            if ( act->frame != frame ) {
                 spin_unlock(&rgt->lock);
                 continue;
             }
-
+                    
             refcount = act->pin & ( readonly ? GNTPIN_hstr_mask
-                                             : GNTPIN_hstw_mask );
-            if ( refcount == 0 )
-            {
+                                    : GNTPIN_hstw_mask );
+
+            if ( refcount == 0 ) {
                 spin_unlock(&rgt->lock);
                 continue;
             }
-
+                    
             /* gotcha */
             DPRINTK("Grant unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
                     rd->domain_id, ld->domain_id, frame, readonly);
-
+                    
             if ( readonly )
                 act->pin -= GNTPIN_hstr_inc;
-            else
-            {
+            else {
                 act->pin -= GNTPIN_hstw_inc;
-
+                            
                 /* any more granted writable mappings? */
-                if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
-                {
+                if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) {
                     clear_bit(_GTF_writing, &rgt->shared[ref].flags);
                     put_page_type(&frame_table[frame]);
                 }
             }
-
-            if ( act->pin == 0 )
-            {
+                
+            if ( act->pin == 0 ) {
                 clear_bit(_GTF_reading, &rgt->shared[ref].flags);
                 put_page(&frame_table[frame]);
             }
+
             spin_unlock(&rgt->lock);
-
+                    
             clear_bit(GNTMAP_host_map, &map->ref_and_flags);
-
+                    
             if ( !(map->ref_and_flags & GNTMAP_device_map) )
                 put_maptrack_handle(lgt, handle);
-
+                    
             found = 1;
             break;
         }
     }
     put_domain(rd);
-
+    
     return found;
 }
 
@@ -1124,8 +1115,10 @@
     int            retries = 0;
     unsigned long  target_pfn;
 
+#if GRANT_DEBUG_VERBOSE
     DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n",
             rd->domain_id, ld->domain_id, ref);
+#endif
 
     if ( unlikely((rgt = rd->grant_table) == NULL) ||
          unlikely(ref >= NR_GRANT_ENTRIES) )
@@ -1203,8 +1196,10 @@
     grant_entry_t  *sha;
     unsigned long   pfn;
 
+#if GRANT_DEBUG_VERBOSE
     DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
             rd->domain_id, ld->domain_id, ref);
+#endif
 
     sha = &rd->grant_table->shared[ref];
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/lib.c
--- a/xen/common/lib.c  Wed Aug 24 02:43:18 2005
+++ b/xen/common/lib.c  Thu Aug 25 22:53:20 2005
@@ -450,8 +450,10 @@
                ret <<= 10;
        case 'M': case 'm':
                ret <<= 10;
-       case 'K': case 'k':
+       case 'K': case 'k': default:
                ret <<= 10;
+       case 'B': case 'b':
+               break;
        }
 
        return ret;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Wed Aug 24 02:43:18 2005
+++ b/xen/common/page_alloc.c   Thu Aug 25 22:53:20 2005
@@ -52,7 +52,6 @@
  *  One bit per page of memory. Bit set => page is allocated.
  */
 
-static unsigned long  bitmap_size; /* in bytes */
 static unsigned long *alloc_bitmap;
 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
 
@@ -135,10 +134,16 @@
 /* Initialise allocator to handle up to @max_page pages. */
 physaddr_t init_boot_allocator(physaddr_t bitmap_start)
 {
+    unsigned long bitmap_size;
+
     bitmap_start = round_pgup(bitmap_start);
 
-    /* Allocate space for the allocation bitmap. */
+    /*
+     * Allocate space for the allocation bitmap. Include an extra longword
+     * of padding for possible overrun in map_alloc and map_free.
+     */
     bitmap_size  = max_page / 8;
+    bitmap_size += sizeof(unsigned long);
     bitmap_size  = round_pgup(bitmap_size);
     alloc_bitmap = (unsigned long *)phys_to_virt(bitmap_start);
 
@@ -171,7 +176,7 @@
         else if ( *p != '\0' )
             break;
 
-        if ( (bad_pfn < (bitmap_size*8)) && !allocated_in_map(bad_pfn) )
+        if ( (bad_pfn < max_page) && !allocated_in_map(bad_pfn) )
         {
             printk("Marking page %lx as bad\n", bad_pfn);
             map_alloc(bad_pfn, 1);
@@ -183,7 +188,7 @@
 {
     unsigned long pg, i;
 
-    for ( pg = 0; (pg + nr_pfns) < (bitmap_size*8); pg += pfn_align )
+    for ( pg = 0; (pg + nr_pfns) < max_page; pg += pfn_align )
     {
         for ( i = 0; i < nr_pfns; i++ )
             if ( allocated_in_map(pg + i) )
@@ -362,7 +367,7 @@
 
     printk("Scrubbing Free RAM: ");
 
-    for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
+    for ( pfn = 0; pfn < max_page; pfn++ )
     {
         /* Every 100MB, print a progress dot. */
         if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
@@ -413,6 +418,8 @@
 
     ps = round_pgup(ps);
     pe = round_pgdown(pe);
+    if ( pe <= ps )
+        return;
 
     memguard_guard_range(phys_to_virt(ps), pe - ps);
 
@@ -482,19 +489,25 @@
 
     ps = round_pgup(ps) >> PAGE_SHIFT;
     pe = round_pgdown(pe) >> PAGE_SHIFT;
-
-    if (ps < MAX_DMADOM_PFN && pe > MAX_DMADOM_PFN) {
-        init_heap_pages(MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps);
-        init_heap_pages(MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN),
-                        pe - MAX_DMADOM_PFN);
+    if ( pe <= ps )
+        return;
+
+    if ( (ps < MAX_DMADOM_PFN) && (pe > MAX_DMADOM_PFN) )
+    {
+        init_heap_pages(
+            MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps);
+        init_heap_pages(
+            MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN), pe - MAX_DMADOM_PFN);
     }
     else
+    {
         init_heap_pages(pfn_dom_zone_type(ps), pfn_to_page(ps), pe - ps);
-}
-
-
-struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order,
-                                     unsigned int flags)
+    }
+}
+
+
+struct pfn_info *alloc_domheap_pages(
+    struct domain *d, unsigned int order, unsigned int flags)
 {
     struct pfn_info *pg;
     cpumask_t mask;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/perfc.c
--- a/xen/common/perfc.c        Wed Aug 24 02:43:18 2005
+++ b/xen/common/perfc.c        Thu Aug 25 22:53:20 2005
@@ -7,6 +7,7 @@
 #include <xen/spinlock.h>
 #include <public/dom0_ops.h>
 #include <asm/uaccess.h>
+#include <xen/mm.h>
 
 #undef  PERFCOUNTER
 #undef  PERFCOUNTER_CPU
@@ -81,6 +82,10 @@
         }
         printk("\n");
     }
+
+#ifdef PERF_ARRAYS
+    ptwr_eip_stat_print();
+#endif
 }
 
 void perfc_reset(unsigned char key)
@@ -118,6 +123,10 @@
             break;
         }
     }
+
+#ifdef PERF_ARRAYS
+    ptwr_eip_stat_reset();
+#endif
 }
 
 static dom0_perfc_desc_t perfc_d[NR_PERFCTRS];
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/schedule.c
--- a/xen/common/schedule.c     Wed Aug 24 02:43:18 2005
+++ b/xen/common/schedule.c     Thu Aug 25 22:53:20 2005
@@ -38,6 +38,8 @@
 #include <xen/mm.h>
 #include <public/sched_ctl.h>
 
+extern void arch_getdomaininfo_ctxt(struct vcpu *,
+                                    struct vcpu_guest_context *);
 /* opt_sched: scheduler - default to SEDF */
 static char opt_sched[10] = "sedf";
 string_param("sched", opt_sched);
@@ -82,7 +84,8 @@
     int i;
 
     SCHED_OP(free_task, d);
-    for (i = 0; i < MAX_VIRT_CPUS; i++)
+    /* vcpu 0 has to be the last one destructed. */
+    for (i = MAX_VIRT_CPUS-1; i >= 0; i--)
         if ( d->vcpu[i] )
             arch_free_vcpu_struct(d->vcpu[i]);
 
@@ -295,10 +298,36 @@
     return 0;
 }
 
+static long do_vcpu_pickle(int vcpu, unsigned long arg)
+{
+    struct vcpu *v;
+    vcpu_guest_context_t *c;
+    int ret = 0;
+
+    if (vcpu >= MAX_VIRT_CPUS)
+        return -EINVAL;
+    v = current->domain->vcpu[vcpu];
+    if (!v)
+        return -ESRCH;
+    /* Don't pickle vcpus which are currently running */
+    if (!test_bit(_VCPUF_down, &v->vcpu_flags)) {
+        return -EBUSY;
+    }
+    c = xmalloc(vcpu_guest_context_t);
+    if (!c)
+        return -ENOMEM;
+    arch_getdomaininfo_ctxt(v, c);
+    if (copy_to_user((vcpu_guest_context_t *)arg,
+                     (const vcpu_guest_context_t *)c, sizeof(*c)))
+        ret = -EFAULT;
+    xfree(c);
+    return ret;
+}
+
 /*
  * Demultiplex scheduler-related hypercalls.
  */
-long do_sched_op(unsigned long op)
+long do_sched_op(unsigned long op, unsigned long arg)
 {
     long ret = 0;
 
@@ -332,6 +361,11 @@
     case SCHEDOP_vcpu_up:
     {
         ret = do_vcpu_up((int)(op >> SCHEDOP_vcpushift));
+        break;
+    }
+    case SCHEDOP_vcpu_pickle:
+    {
+        ret = do_vcpu_pickle((int)(op >> SCHEDOP_vcpushift), arg);
         break;
     }
 
@@ -474,13 +508,14 @@
 
     set_ac_timer(&schedule_data[cpu].s_timer, now + r_time);
 
-    /* Must be protected by the schedule_lock! */
+    if ( unlikely(prev == next) )
+    {
+        spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+        return continue_running(prev);
+    }
+
+    clear_bit(_VCPUF_running, &prev->vcpu_flags);
     set_bit(_VCPUF_running, &next->vcpu_flags);
-
-    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-
-    if ( unlikely(prev == next) )
-        return continue_running(prev);
 
     perfc_incrc(sched_ctx);
 
@@ -517,6 +552,10 @@
              next->domain->domain_id, next->vcpu_id);
 
     context_switch(prev, next);
+
+    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+    context_switch_finalise(next);
 }
 
 /* No locking needed -- pointer comparison is safe :-) */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/common/trace.c
--- a/xen/common/trace.c        Wed Aug 24 02:43:18 2005
+++ b/xen/common/trace.c        Thu Aug 25 22:53:20 2005
@@ -113,10 +113,10 @@
     switch ( tbc->op)
     {
     case DOM0_TBUF_GET_INFO:
-        tbc->cpu_mask  = tb_cpu_mask;
-        tbc->evt_mask  = tb_event_mask;
-        tbc->mach_addr = __pa(t_bufs[0]);
-        tbc->size      = opt_tbuf_size * PAGE_SIZE;
+        tbc->cpu_mask   = tb_cpu_mask;
+        tbc->evt_mask   = tb_event_mask;
+        tbc->buffer_mfn = __pa(t_bufs[0]) >> PAGE_SHIFT;
+        tbc->size       = opt_tbuf_size * PAGE_SIZE;
         break;
     case DOM0_TBUF_SET_CPU_MASK:
         tb_cpu_mask = tbc->cpu_mask;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Wed Aug 24 02:43:18 2005
+++ b/xen/drivers/char/console.c        Thu Aug 25 22:53:20 2005
@@ -652,8 +652,9 @@
 void panic(const char *fmt, ...)
 {
     va_list args;
-    char buf[128], cpustr[10];
+    char buf[128];
     unsigned long flags;
+    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
     extern void machine_restart(char *);
     
     debugtrace_dump();
@@ -665,16 +666,13 @@
     debugger_trap_immediate();
 
     /* Spit out multiline message in one go. */
-    spin_lock_irqsave(&console_lock, flags);
-    __putstr("\n****************************************\n");
-    __putstr("Panic on CPU");
-    sprintf(cpustr, "%d", smp_processor_id());
-    __putstr(cpustr);
-    __putstr(":\n");
-    __putstr(buf);
-    __putstr("****************************************\n\n");
-    __putstr("Reboot in five seconds...\n");
-    spin_unlock_irqrestore(&console_lock, flags);
+    spin_lock_irqsave(&lock, flags);
+    printk("\n****************************************\n");
+    printk("Panic on CPU %d:\n", smp_processor_id());
+    printk(buf);
+    printk("****************************************\n\n");
+    printk("Reboot in five seconds...\n");
+    spin_unlock_irqrestore(&lock, flags);
 
     watchdog_disable();
     mdelay(5000);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/drivers/char/ns16550.c
--- a/xen/drivers/char/ns16550.c        Wed Aug 24 02:43:18 2005
+++ b/xen/drivers/char/ns16550.c        Thu Aug 25 22:53:20 2005
@@ -15,7 +15,12 @@
 #include <xen/serial.h>
 #include <asm/io.h>
 
-/* Config serial port with a string <baud>,DPS,<io-base>,<irq>. */
+/*
+ * Configure serial port with a string <baud>,DPS,<io-base>,<irq>.
+ * The tail of the string can be omitted if platform defaults are sufficient.
+ * If the baud rate is pre-configured, perhaps by a bootloader, then 'auto'
+ * can be specified in place of a numeric baud rate.
+ */
 static char opt_com1[30] = "", opt_com2[30] = "";
 string_param("com1", opt_com1);
 string_param("com2", opt_com2);
@@ -154,7 +159,7 @@
     ns_write_reg(uart, IER, 0);
 
     /* Line control and baud-rate generator. */
-    if ( uart->baud != 0 )
+    if ( uart->baud != BAUD_AUTO )
     {
         ns_write_reg(uart, LCR, lcr | LCR_DLAB);
         ns_write_reg(uart, DLL, 115200/uart->baud); /* baud lo */
@@ -244,38 +249,50 @@
 {
     int baud;
 
+    /* No user-specified configuration? */
     if ( (conf == NULL) || (*conf == '\0') )
-        goto config_parsed;
-
-    if ( (baud = simple_strtol(conf, &conf, 10)) != 0 )
+    {
+        /* Some platforms may automatically probe the UART configuartion. */
+        if ( uart->baud != 0 )
+            goto config_parsed;
+        return;
+    }
+
+    if ( strncmp(conf, "auto", 4) == 0 )
+    {
+        uart->baud = BAUD_AUTO;
+        conf += 4;
+    }
+    else if ( (baud = simple_strtoul(conf, &conf, 10)) != 0 )
         uart->baud = baud;
 
     if ( *conf != ',' )
         goto config_parsed;
     conf++;
 
-    uart->data_bits = simple_strtol(conf, &conf, 10);
+    uart->data_bits = simple_strtoul(conf, &conf, 10);
 
     uart->parity = parse_parity_char(*conf);
     conf++;
 
-    uart->stop_bits = simple_strtol(conf, &conf, 10);
+    uart->stop_bits = simple_strtoul(conf, &conf, 10);
 
     if ( *conf == ',' )
     {
         conf++;
-        uart->io_base = simple_strtol(conf, &conf, 0);
+        uart->io_base = simple_strtoul(conf, &conf, 0);
 
         if ( *conf == ',' )
         {
             conf++;
-            uart->irq = simple_strtol(conf, &conf, 10);
+            uart->irq = simple_strtoul(conf, &conf, 10);
         }
     }
 
  config_parsed:
     /* Sanity checks. */
-    if ( (uart->baud != 0) && ((uart->baud < 1200) || (uart->baud > 115200)) )
+    if ( (uart->baud != BAUD_AUTO) &&
+         ((uart->baud < 1200) || (uart->baud > 115200)) )
         PARSE_ERR("Baud rate %d outside supported range.", uart->baud);
     if ( (uart->data_bits < 5) || (uart->data_bits > 8) )
         PARSE_ERR("%d data bits are unsupported.", uart->data_bits);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/e820.h
--- a/xen/include/asm-x86/e820.h        Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/e820.h        Thu Aug 25 22:53:20 2005
@@ -3,7 +3,7 @@
 
 #include <asm/page.h>
 
-#define E820MAX        32
+#define E820MAX        128
 
 #define E820_RAM          1
 #define E820_RESERVED     2
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h       Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/event.h       Thu Aug 25 22:53:20 2005
@@ -11,6 +11,19 @@
 
 static inline void evtchn_notify(struct vcpu *v)
 {
+    /*
+     * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
+     * pending flag. These values may fluctuate (after all, we hold no
+     * locks) but the key insight is that each change will cause
+     * evtchn_upcall_pending to be polled.
+     * 
+     * NB2. We save VCPUF_running across the unblock to avoid a needless
+     * IPI for domains that we IPI'd to unblock.
+     */
+    int running = test_bit(_VCPUF_running, &v->vcpu_flags);
+    vcpu_unblock(v);
+    if ( running )
+        smp_send_event_check_cpu(v->processor);
 }
 
 #endif
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/io.h
--- a/xen/include/asm-x86/io.h  Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/io.h  Thu Aug 25 22:53:20 2005
@@ -2,6 +2,7 @@
 #define _ASM_IO_H
 
 #include <xen/config.h>
+#include <xen/types.h>
 #include <asm/page.h>
 
 #define IO_SPACE_LIMIT 0xffff
@@ -45,11 +46,7 @@
 /*
  * Change "struct pfn_info" to physical address.
  */
-#ifdef CONFIG_HIGHMEM64G
-#define page_to_phys(page)  ((u64)(page - frame_table) << PAGE_SHIFT)
-#else
-#define page_to_phys(page)  ((page - frame_table) << PAGE_SHIFT)
-#endif
+#define page_to_phys(page)  ((physaddr_t)(page - frame_table) << PAGE_SHIFT)
 
 #define page_to_pfn(_page)  ((unsigned long)((_page) - frame_table))
 #define page_to_virt(_page) phys_to_virt(page_to_phys(_page))
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/mm.h  Thu Aug 25 22:53:20 2005
@@ -36,7 +36,7 @@
             /* Owner of this page (NULL if page is anonymous). */
             u32 _domain; /* pickled format */
             /* Type reference count and various PGT_xxx flags and fields. */
-            u32 type_info;
+            unsigned long type_info;
         } inuse;
 
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
@@ -77,6 +77,7 @@
  /* Owning guest has pinned this page to its current type? */
 #define _PGT_pinned         27
 #define PGT_pinned          (1U<<_PGT_pinned)
+#if defined(__i386__)
  /* The 11 most significant bits of virt address if this is a page table. */
 #define PGT_va_shift        16
 #define PGT_va_mask         (((1U<<11)-1)<<PGT_va_shift)
@@ -84,6 +85,16 @@
 #define PGT_va_mutable      (((1U<<11)-1)<<PGT_va_shift)
  /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
 #define PGT_va_unknown      (((1U<<11)-2)<<PGT_va_shift)
+#elif defined(__x86_64__)
+ /* The 27 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift        32
+#define PGT_va_mask         ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable      ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown      ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
+#endif
+
  /* 16-bit count of uses of this frame as its current type. */
 #define PGT_count_mask      ((1U<<16)-1)
 
@@ -114,11 +125,13 @@
 #if defined(__i386__)
 #define pickle_domptr(_d)   ((u32)(unsigned long)(_d))
 #define unpickle_domptr(_d) ((struct domain *)(unsigned long)(_d))
+#define PRtype_info "08lx" /* should only be used for printk's */
 #elif defined(__x86_64__)
 static inline struct domain *unpickle_domptr(u32 _domain)
 { return (_domain == 0) ? NULL : __va(_domain); }
 static inline u32 pickle_domptr(struct domain *domain)
 { return (domain == NULL) ? 0 : (u32)__pa(domain); }
+#define PRtype_info "016lx"/* should only be used for printk's */
 #endif
 
 #define page_get_owner(_p)    (unpickle_domptr((_p)->u.inuse._domain))
@@ -144,8 +157,8 @@
 extern unsigned long max_page;
 void init_frametable(void);
 
-int alloc_page_type(struct pfn_info *page, unsigned int type);
-void free_page_type(struct pfn_info *page, unsigned int type);
+int alloc_page_type(struct pfn_info *page, unsigned long type);
+void free_page_type(struct pfn_info *page, unsigned long type);
 extern void invalidate_shadow_ldt(struct vcpu *d);
 extern int shadow_remove_all_write_access(
     struct domain *d, unsigned long gpfn, unsigned long gmfn);
@@ -183,7 +196,7 @@
              unlikely(d != _domain) )                /* Wrong owner? */
         {
             if ( !_shadow_mode_refcounts(domain) )
-                DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%08x\n",
+                DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" 
PRtype_info "\n",
                         page_to_pfn(page), domain, unpickle_domptr(d),
                         x, page->u.inuse.type_info);
             return 0;
@@ -200,7 +213,7 @@
 }
 
 void put_page_type(struct pfn_info *page);
-int  get_page_type(struct pfn_info *page, u32 type);
+int  get_page_type(struct pfn_info *page, unsigned long type);
 int  get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
 
@@ -213,7 +226,7 @@
 
 static inline int get_page_and_type(struct pfn_info *page,
                                     struct domain *domain,
-                                    u32 type)
+                                    unsigned long type)
 {
     int rc = get_page(page, domain);
 
@@ -300,6 +313,9 @@
     unsigned int prev_nr_updates;
     /* Exec domain which created writable mapping. */
     struct vcpu *vcpu;
+    /* EIP of the address which took the original write fault
+       used for stats collection only */
+    unsigned long eip;
 };
 
 #define PTWR_PT_ACTIVE 0
@@ -311,7 +327,8 @@
 int  ptwr_init(struct domain *);
 void ptwr_destroy(struct domain *);
 void ptwr_flush(struct domain *, const int);
-int  ptwr_do_page_fault(struct domain *, unsigned long);
+int  ptwr_do_page_fault(struct domain *, unsigned long, 
+                       struct cpu_user_regs *);
 int  revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
 
 void cleanup_writable_pagetable(struct domain *d);
@@ -334,6 +351,18 @@
 #define _audit_domain(_d, _f) ((void)0)
 #define audit_domain(_d)      ((void)0)
 #define audit_domains()       ((void)0)
+
+#endif
+
+#ifdef PERF_ARRAYS
+
+void ptwr_eip_stat_reset();
+void ptwr_eip_stat_print();
+
+#else
+
+#define ptwr_eip_stat_reset() ((void)0)
+#define ptwr_eip_stat_print() ((void)0)
 
 #endif
 
@@ -345,8 +374,14 @@
  * Caller must own d's BIGLOCK, is responsible for flushing the TLB, and must 
  * hold a reference to the page.
  */
-int update_grant_va_mapping(unsigned long va,
-                            l1_pgentry_t _nl1e, 
-                            struct domain *d,
-                            struct vcpu *v);
+int update_grant_va_mapping(
+    unsigned long va, l1_pgentry_t _nl1e, 
+    struct domain *d, struct vcpu *v);
+int update_grant_pte_mapping(
+    unsigned long pte_addr, l1_pgentry_t _nl1e, 
+    struct domain *d, struct vcpu *v);
+int clear_grant_va_mapping(unsigned long addr, unsigned long frame);
+int clear_grant_pte_mapping(
+    unsigned long addr, unsigned long frame, struct domain *d);
+
 #endif /* __ASM_X86_MM_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/page.h        Thu Aug 25 22:53:20 2005
@@ -189,6 +189,9 @@
 #define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT))
 #define pfn_valid(_pfn)     ((_pfn) < max_page)
 
+#define pfn_to_phys(pfn)    ((physaddr_t)(pfn) << PAGE_SHIFT)
+#define phys_to_pfn(pa)     ((unsigned long)((pa) >> PAGE_SHIFT))
+
 /* High table entries are reserved by the hypervisor. */
 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
 #define DOMAIN_ENTRIES_PER_L2_PAGETABLE     \
@@ -208,20 +211,21 @@
      + DOMAIN_ENTRIES_PER_L4_PAGETABLE)
 #endif
 
-#define linear_l1_table                                                 \
+#define LINEAR_PT_OFFSET (LINEAR_PT_VIRT_START & VADDR_MASK)
+#define linear_l1_table                                             \
     ((l1_pgentry_t *)(LINEAR_PT_VIRT_START))
-#define __linear_l2_table                                               \
-    ((l2_pgentry_t *)(LINEAR_PT_VIRT_START +                            \
-                     (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0))))
-#define __linear_l3_table                                               \
-    ((l3_pgentry_t *)(LINEAR_PT_VIRT_START +                            \
-                     (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0)) +   \
-                     (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<1))))
-#define __linear_l4_table                                               \
-    ((l4_pgentry_t *)(LINEAR_PT_VIRT_START +                            \
-                     (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0)) +   \
-                     (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<1)) +   \
-                     (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<2))))
+#define __linear_l2_table                                           \
+    ((l2_pgentry_t *)(LINEAR_PT_VIRT_START +                        \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0))))
+#define __linear_l3_table                                           \
+    ((l3_pgentry_t *)(LINEAR_PT_VIRT_START +                        \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0)) +   \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<1))))
+#define __linear_l4_table                                           \
+    ((l4_pgentry_t *)(LINEAR_PT_VIRT_START +                        \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<0)) +   \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<1)) +   \
+                     (LINEAR_PT_OFFSET >> (PAGETABLE_ORDER<<2))))
 
 #define linear_pg_table linear_l1_table
 #define linear_l2_table(_ed) ((_ed)->arch.guest_vtable)
@@ -279,13 +283,9 @@
 static __inline__ int get_order(unsigned long size)
 {
     int order;
-    
-    size = (size-1) >> (PAGE_SHIFT-1);
-    order = -1;
-    do {
+    size = (size-1) >> PAGE_SHIFT;
+    for ( order = 0; size; order++ )
         size >>= 1;
-        order++;
-    } while (size);
     return order;
 }
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/shadow.h      Thu Aug 25 22:53:20 2005
@@ -483,9 +483,9 @@
 #ifndef NDEBUG
     else if ( mfn < max_page )
     {
-        SH_LOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (dom %p)",
+        SH_VLOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (dom %p)",
                mfn, pfn, d->arch.shadow_dirty_bitmap_size, d);
-        SH_LOG("dom=%p caf=%08x taf=%08x", 
+        SH_VLOG("dom=%p caf=%08x taf=%" PRtype_info, 
                page_get_owner(&frame_table[mfn]),
                frame_table[mfn].count_info, 
                frame_table[mfn].u.inuse.type_info );
@@ -602,14 +602,14 @@
     /* XXX This needs more thought... */
     printk("%s: needing to call shadow_remove_all_access for mfn=%lx\n",
            __func__, page_to_pfn(page));
-    printk("Before: mfn=%lx c=%08x t=%08x\n", page_to_pfn(page),
+    printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
            page->count_info, page->u.inuse.type_info);
 
     shadow_lock(d);
     shadow_remove_all_access(d, page_to_pfn(page));
     shadow_unlock(d);
 
-    printk("After:  mfn=%lx c=%08x t=%08x\n", page_to_pfn(page),
+    printk("After:  mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
            page->count_info, page->u.inuse.type_info);
 }
 
@@ -648,7 +648,7 @@
 
     if ( unlikely(nx == 0) )
     {
-        printk("get_shadow_ref overflow, gmfn=%x smfn=%lx\n",
+        printk("get_shadow_ref overflow, gmfn=%" PRtype_info  " smfn=%lx\n",
                frame_table[smfn].u.inuse.type_info & PGT_mfn_mask,
                smfn);
         BUG();
@@ -678,7 +678,8 @@
 
     if ( unlikely(x == 0) )
     {
-        printk("put_shadow_ref underflow, smfn=%lx oc=%08x t=%08x\n",
+        printk("put_shadow_ref underflow, smfn=%lx oc=%08x t=%" 
+               PRtype_info "\n",
                smfn,
                frame_table[smfn].count_info,
                frame_table[smfn].u.inuse.type_info);
@@ -735,7 +736,7 @@
 
     if ( unlikely(!VALID_MFN(gmfn)) )
     {
-        SH_LOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
+        SH_VLOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
         *spte_p = l1e_empty();
         return 0;
     }
@@ -769,7 +770,7 @@
 
     if ( unlikely(!VALID_MFN(mfn)) )
     {
-        SH_LOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
+        SH_VLOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
         *spte_p = l1e_empty();
         return 0;
     }
@@ -1200,7 +1201,7 @@
 #ifndef NDEBUG
         if ( ___shadow_status(d, gpfn, stype) != 0 )
         {
-            printk("d->id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%x "
+            printk("d->id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%" PRtype_info 
" "
                    "mfn_out_of_sync(gmfn)=%d mfn_is_page_table(gmfn)=%d\n",
                    d->domain_id, gpfn, gmfn, stype,
                    frame_table[gmfn].count_info,
@@ -1471,7 +1472,7 @@
     /* We need to allocate a new node. Ensure the quicklist is non-empty. */
     if ( unlikely(d->arch.shadow_ht_free == NULL) )
     {
-        SH_LOG("Allocate more shadow hashtable blocks.");
+        SH_VLOG("Allocate more shadow hashtable blocks.");
 
         extra = xmalloc_bytes(
             sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h        Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/time.h        Thu Aug 25 22:53:20 2005
@@ -7,4 +7,7 @@
 extern void calibrate_tsc_bp(void);
 extern void calibrate_tsc_ap(void);
 
+struct domain;
+extern void init_domain_time(struct domain *d);
+
 #endif /* __X86_TIME_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/types.h
--- a/xen/include/asm-x86/types.h       Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/types.h       Thu Aug 25 22:53:20 2005
@@ -38,13 +38,16 @@
 typedef unsigned long long u64;
 #if defined(CONFIG_X86_PAE)
 typedef u64 physaddr_t;
+#define PRIphysaddr "016llx"
 #else
-typedef u32 physaddr_t;
+typedef unsigned long physaddr_t;
+#define PRIphysaddr "08lx"
 #endif
 #elif defined(__x86_64__)
 typedef signed long s64;
 typedef unsigned long u64;
-typedef u64 physaddr_t;
+typedef unsigned long physaddr_t;
+#define PRIphysaddr "016lx"
 #endif
 
 typedef unsigned long size_t;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/uaccess.h
--- a/xen/include/asm-x86/uaccess.h     Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/uaccess.h     Thu Aug 25 22:53:20 2005
@@ -125,22 +125,20 @@
        __pu_err;                                                       \
 })                                                     
 
-#define __get_user_nocheck(x,ptr,size)                         \
-({                                                             \
-       long __gu_err, __gu_val;                                \
-       __get_user_size(__gu_val,(ptr),(size),__gu_err,-EFAULT);\
-       (x) = (__typeof__(*(ptr)))__gu_val;                     \
-       __gu_err;                                               \
+#define __get_user_nocheck(x,ptr,size)                          \
+({                                                              \
+       long __gu_err;                                          \
+       __get_user_size((x),(ptr),(size),__gu_err,-EFAULT);     \
+       __gu_err;                                               \
 })
 
-#define __get_user_check(x,ptr,size)                                   \
-({                                                                     \
-       long __gu_err, __gu_val;                                        \
-       __typeof__(*(ptr)) __user *__gu_addr = (ptr);                   \
-       __get_user_size(__gu_val,__gu_addr,(size),__gu_err,-EFAULT);    \
-       (x) = (__typeof__(*(ptr)))__gu_val;                             \
-       if (!__addr_ok(__gu_addr)) __gu_err = -EFAULT;                  \
-       __gu_err;                                                       \
+#define __get_user_check(x,ptr,size)                            \
+({                                                              \
+       long __gu_err;                                          \
+       __typeof__(*(ptr)) __user *__gu_addr = (ptr);           \
+       __get_user_size((x),__gu_addr,(size),__gu_err,-EFAULT); \
+       if (!__addr_ok(__gu_addr)) __gu_err = -EFAULT;          \
+       __gu_err;                                               \
 })                                                     
 
 struct __large_struct { unsigned long buf[100]; };
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/vmx.h Thu Aug 25 22:53:20 2005
@@ -31,10 +31,11 @@
 extern void vmx_asm_vmexit_handler(struct cpu_user_regs);
 extern void vmx_asm_do_resume(void);
 extern void vmx_asm_do_launch(void);
-extern void vmx_intr_assist(struct vcpu *d);
+extern void vmx_intr_assist(void);
 
 extern void arch_vmx_do_launch(struct vcpu *);
 extern void arch_vmx_do_resume(struct vcpu *);
+extern void arch_vmx_do_relaunch(struct vcpu *);
 
 extern int vmcs_size;
 extern unsigned int cpu_rev;
@@ -354,7 +355,7 @@
 }
 
 /* Make sure that xen intercepts any FP accesses from current */
-static inline void vmx_stts()
+static inline void vmx_stts(void)
 {
     unsigned long cr0;
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h    Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/vmx_vmcs.h    Thu Aug 25 22:53:20 2005
@@ -28,10 +28,10 @@
 extern void stop_vmx(void);
 
 #if defined (__x86_64__)
-extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n);
+extern void vmx_load_msrs(struct vcpu *n);
 void vmx_restore_msrs(struct vcpu *d);
 #else
-#define vmx_load_msrs(_p, _n)      ((void)0)
+#define vmx_load_msrs(_n)          ((void)0)
 #define vmx_restore_msrs(_v)       ((void)0)
 #endif
 
@@ -93,6 +93,7 @@
 
 void vmx_do_launch(struct vcpu *); 
 void vmx_do_resume(struct vcpu *); 
+void vmx_set_host_env(struct vcpu *);
 
 struct vmcs_struct *alloc_vmcs(void);
 void free_vmcs(struct vmcs_struct *);
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h  Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/x86_32/page-3level.h  Thu Aug 25 22:53:20 2005
@@ -63,7 +63,7 @@
 
 /* Extract flags into 32-bit integer, or turn 32-bit flags into a pte mask. */
 #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
-#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 40) | ((x) & 0xFFF))
+#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF))
 
 #define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */
 #define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/x86_32/uaccess.h
--- a/xen/include/asm-x86/x86_32/uaccess.h      Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/x86_32/uaccess.h      Thu Aug 25 22:53:20 2005
@@ -22,7 +22,11 @@
 #define array_access_ok(addr,count,size) \
     (likely(count < (~0UL/size)) && access_ok(addr,count*size))
 
+/* Undefined function to catch size mismatches on 64-bit get_user/put_user. */
+extern void __uaccess_var_not_u64(void);
+
 #define __put_user_u64(x, addr, retval, errret)                        \
+       if (sizeof(x) != 8) __uaccess_var_not_u64();            \
        __asm__ __volatile__(                                   \
                "1:     movl %%eax,0(%2)\n"                     \
                "2:     movl %%edx,4(%2)\n"                     \
@@ -52,6 +56,7 @@
 } while (0)
 
 #define __get_user_u64(x, addr, retval, errret)                        \
+       if (sizeof(x) != 8) __uaccess_var_not_u64();            \
        __asm__ __volatile__(                                   \
                "1:     movl 0(%2),%%eax\n"                     \
                "2:     movl 4(%2),%%edx\n"                     \
@@ -67,7 +72,7 @@
                "       .long 1b,4b\n"                          \
                "       .long 2b,4b\n"                          \
                ".previous"                                     \
-               : "=r" (retval), "=A" (x)                       \
+               : "=r" (retval), "=&A" (x)                      \
                : "r" (addr), "i"(errret), "0"(retval))
 
 #define __get_user_size(x,ptr,size,retval,errret)                      \
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/asm-x86/x86_64/page.h Thu Aug 25 22:53:20 2005
@@ -42,7 +42,8 @@
 #endif /* !__ASSEMBLY__ */
 
 /* Given a virtual address, get an entry offset into a linear page table. */
-#define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> PAGE_SHIFT)
+#define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT)
+#define l2_linear_offset(_a) (((_a) & VADDR_MASK) >> L2_PAGETABLE_SHIFT)
 
 #define is_guest_l1_slot(_s) (1)
 #define is_guest_l2_slot(_t, _s) (1)
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h    Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/arch-ia64.h    Thu Aug 25 22:53:20 2005
@@ -12,9 +12,6 @@
 #define MAX_VIRT_CPUS 1
 
 #ifndef __ASSEMBLY__
-
-/* NB. Both the following are 64 bits each. */
-typedef unsigned long memory_t;   /* Full-sized pointer/address/memory-size. */
 
 #define MAX_NR_SECTION  32  // at most 32 memory holes
 typedef struct {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/arch-x86_32.h  Thu Aug 25 22:53:20 2005
@@ -63,9 +63,6 @@
 
 #ifndef __ASSEMBLY__
 
-/* NB. Both the following are 32 bits each. */
-typedef unsigned long memory_t;   /* Full-sized pointer/address/memory-size. */
-
 /*
  * Send an array of these to HYPERVISOR_set_trap_table()
  */
@@ -74,10 +71,10 @@
 #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
 #define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))
 typedef struct trap_info {
-    u8       vector;  /* exception vector                              */
-    u8       flags;   /* 0-3: privilege level; 4: clear event enable?  */
-    u16      cs;      /* code selector                                 */
-    memory_t address; /* code address                                  */
+    u8       vector;       /* exception vector                              */
+    u8       flags;        /* 0-3: privilege level; 4: clear event enable?  */
+    u16      cs;           /* code selector                                 */
+    unsigned long address; /* code offset                                   */
 } trap_info_t;
 
 typedef struct cpu_user_regs {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/arch-x86_64.h  Thu Aug 25 22:53:20 2005
@@ -103,9 +103,6 @@
     /* Bottom of switch_to_user stack frame. */
 };
 
-/* NB. Both the following are 64 bits each. */
-typedef unsigned long memory_t;   /* Full-sized pointer/address/memory-size. */
-
 /*
  * Send an array of these to HYPERVISOR_set_trap_table().
  * N.B. As in x86/32 mode, the privilege level specifies which modes may enter
@@ -121,10 +118,10 @@
 #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
 #define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))
 typedef struct trap_info {
-    u8       vector;  /* exception vector                              */
-    u8       flags;   /* 0-3: privilege level; 4: clear event enable?  */
-    u16      cs;      /* code selector                                 */
-    memory_t address; /* code address                                  */
+    u8       vector;       /* exception vector                              */
+    u8       flags;        /* 0-3: privilege level; 4: clear event enable?  */
+    u16      cs;           /* code selector                                 */
+    unsigned long address; /* code offset                                   */
 } trap_info_t;
 
 typedef struct cpu_user_regs {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/dom0_ops.h     Thu Aug 25 22:53:20 2005
@@ -19,7 +19,7 @@
  * This makes sure that old versions of dom0 tools will stop working in a
  * well-defined way (rather than crashing the machine, for instance).
  */
-#define DOM0_INTERFACE_VERSION   0xAAAA100E
+#define DOM0_INTERFACE_VERSION   0xAAAA1010
 
 /************************************************************************/
 
@@ -27,10 +27,10 @@
 typedef struct {
     /* IN variables. */
     domid_t       domain;
-    memory_t      max_pfns;
+    unsigned long max_pfns;
     void         *buffer;
     /* OUT variables. */
-    memory_t      num_pfns;
+    unsigned long num_pfns;
 } dom0_getmemlist_t;
 
 #define DOM0_SCHEDCTL          6
@@ -83,9 +83,9 @@
 #define DOMFLAGS_SHUTDOWNMASK 255 /* DOMFLAGS_SHUTDOWN guest-supplied code.  */
 #define DOMFLAGS_SHUTDOWNSHIFT 16
     u32      flags;
-    memory_t tot_pages;
-    memory_t max_pages;
-    memory_t shared_info_frame;       /* MFN of shared_info struct */
+    unsigned long tot_pages;
+    unsigned long max_pages;
+    unsigned long shared_info_frame;       /* MFN of shared_info struct */
     u64      cpu_time;
     u32      n_vcpu;
     s32      vcpu_to_cpu[MAX_VIRT_CPUS];  /* current mapping   */
@@ -131,14 +131,14 @@
 } dom0_debug_t;
 
 /*
- * Set clock such that it would read <secs,usecs> after 00:00:00 UTC,
+ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,
  * 1 January, 1970 if the current system time was <system_time>.
  */
 #define DOM0_SETTIME          17
 typedef struct {
     /* IN variables. */
     u32 secs;
-    u32 usecs;
+    u32 nsecs;
     u64 system_time;
 } dom0_settime_t;
 
@@ -155,7 +155,7 @@
 
 typedef struct {
     /* IN variables. */
-    memory_t pfn;          /* Machine page frame number to query.       */
+    unsigned long pfn;     /* Machine page frame number to query.       */
     domid_t domain;        /* To which domain does the frame belong?    */
     /* OUT variables. */
     /* Is the page PINNED to a type? */
@@ -197,7 +197,7 @@
     unsigned long cpu_mask;
     u32           evt_mask;
     /* OUT variables */
-    memory_t mach_addr;
+    unsigned long buffer_mfn;
     u32      size;
 } dom0_tbufcontrol_t;
 
@@ -211,8 +211,8 @@
     u32      sockets_per_node;
     u32      nr_nodes;
     u32      cpu_khz;
-    memory_t total_pages;
-    memory_t free_pages;
+    unsigned long total_pages;
+    unsigned long free_pages;
 } dom0_physinfo_t;
 
 /*
@@ -252,7 +252,7 @@
     u32            op;
     unsigned long *dirty_bitmap; /* pointer to locked buffer */
     /* IN/OUT variables. */
-    memory_t       pages;        /* size of buffer, updated with actual size */
+    unsigned long  pages;        /* size of buffer, updated with actual size */
     /* OUT variables. */
     dom0_shadow_control_stats_t stats;
 } dom0_shadow_control_t;
@@ -260,15 +260,15 @@
 #define DOM0_SETDOMAINMAXMEM   28
 typedef struct {
     /* IN variables. */
-    domid_t     domain;
-    memory_t    max_memkb;
+    domid_t       domain;
+    unsigned long max_memkb;
 } dom0_setdomainmaxmem_t;
 
 #define DOM0_GETPAGEFRAMEINFO2 29   /* batched interface */
 typedef struct {
     /* IN variables. */
-    domid_t  domain;
-    memory_t num;
+    domid_t        domain;
+    unsigned long  num;
     /* IN/OUT variables. */
     unsigned long *array;
 } dom0_getpageframeinfo2_t;
@@ -283,12 +283,12 @@
 #define DOM0_ADD_MEMTYPE         31
 typedef struct {
     /* IN variables. */
-    memory_t pfn;
-    memory_t nr_pfns;
-    u32      type;
-    /* OUT variables. */
-    u32      handle;
-    u32      reg;
+    unsigned long pfn;
+    unsigned long nr_pfns;
+    u32           type;
+    /* OUT variables. */
+    u32           handle;
+    u32           reg;
 } dom0_add_memtype_t;
 
 /*
@@ -311,8 +311,8 @@
     /* IN variables. */
     u32      reg;
     /* OUT variables. */
-    memory_t pfn;
-    memory_t nr_pfns;
+    unsigned long pfn;
+    unsigned long nr_pfns;
     u32      type;
 } dom0_read_memtype_t;
 
@@ -361,10 +361,10 @@
 typedef struct {
     /* IN variables. */
     domid_t               first_domain;
-    memory_t              max_domains;
+    unsigned int          max_domains;
     dom0_getdomaininfo_t *buffer;
     /* OUT variables. */
-    memory_t              num_domains;
+    unsigned int          num_domains;
 } dom0_getdomaininfolist_t;
 
 #define DOM0_PLATFORM_QUIRK      39  
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h  Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/grant_table.h  Thu Aug 25 22:53:20 2005
@@ -142,7 +142,10 @@
  *  1. If GNTPIN_map_for_dev is specified then <dev_bus_addr> is the address
  *     via which I/O devices may access the granted frame.
  *  2. If GNTPIN_map_for_host is specified then a mapping will be added at
- *     virtual address <host_virt_addr> in the current address space.
+ *     either a host virtual address in the current address space, or at
+ *     a PTE at the specified machine address.  The type of mapping to
+ *     perform is selected through the GNTMAP_contains_pte flag, and the 
+ *     address is specified in <host_addr>.
  *  3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
  *     host mapping is destroyed by other means then it is *NOT* guaranteed
  *     to be accounted to the correct grant reference!
@@ -150,18 +153,18 @@
 #define GNTTABOP_map_grant_ref        0
 typedef struct gnttab_map_grant_ref {
     /* IN parameters. */
-    memory_t    host_virt_addr;
+    u64         host_addr;
     domid_t     dom;
     grant_ref_t ref;
     u16         flags;                /* GNTMAP_* */
     /* OUT parameters. */
     s16         handle;               /* +ve: handle; -ve: GNTST_* */
-    memory_t    dev_bus_addr;
+    u64         dev_bus_addr;
 } gnttab_map_grant_ref_t;
 
 /*
  * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
- * tracked by <handle>. If <host_virt_addr> or <dev_bus_addr> is zero, that
+ * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
  * field is ignored. If non-zero, they must refer to a device/host mapping
  * that is tracked by <handle>
  * NOTES:
@@ -173,14 +176,12 @@
 #define GNTTABOP_unmap_grant_ref      1
 typedef struct gnttab_unmap_grant_ref {
     /* IN parameters. */
-    memory_t    host_virt_addr;
-    memory_t    dev_bus_addr;
+    u64         host_addr;
+    u64         dev_bus_addr;
     u16         handle;
     /* OUT parameters. */
     s16         status;               /* GNTST_* */
 } gnttab_unmap_grant_ref_t;
-
-#define GNTUNMAP_DEV_FROM_VIRT (~0U)
 
 /*
  * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
@@ -220,7 +221,7 @@
  */
 #define GNTTABOP_donate                4
 typedef struct {
-    memory_t    mfn;                 /*  0 */
+    unsigned long mfn;               /*  0 */
     domid_t     domid;               /*  4 */
     u16         handle;               /*  8 */
     s16         status;               /*  10: GNTST_* */
@@ -247,10 +248,18 @@
 #define _GNTMAP_application_map (3)
 #define GNTMAP_application_map  (1<<_GNTMAP_application_map)
 
+ /*
+  * GNTMAP_contains_pte subflag:
+  *  0 => This map request contains a host virtual address.
+  *  1 => This map request contains the machine addess of the PTE to update.
+  */ 
+#define _GNTMAP_contains_pte    (4)
+#define GNTMAP_contains_pte     (1<<_GNTMAP_contains_pte)
+
 /*
  * Values for error status returns. All errors are -ve.
  */
-#define GNTST_okay             (0)
+#define GNTST_okay             (0)  /* Normal return.                        */
 #define GNTST_general_error    (-1) /* General undefined error.              */
 #define GNTST_bad_domain       (-2) /* Unrecognsed domain id.                */
 #define GNTST_bad_gntref       (-3) /* Unrecognised or inappropriate gntref. */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/io/blkif.h
--- a/xen/include/public/io/blkif.h     Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/io/blkif.h     Thu Aug 25 22:53:20 2005
@@ -18,7 +18,6 @@
 
 #define BLKIF_OP_READ      0
 #define BLKIF_OP_WRITE     1
-#define BLKIF_OP_PROBE     2
 
 /* NB. Ring size must be small enough for sizeof(blkif_ring_t) <= PAGE_SIZE. */
 #define BLKIF_RING_SIZE        64
@@ -33,28 +32,22 @@
 typedef struct blkif_request {
     u8             operation;    /* BLKIF_OP_???                         */
     u8             nr_segments;  /* number of segments                   */
-    blkif_vdev_t   device;       /* only for read/write requests         */
+    blkif_vdev_t   handle;       /* only for read/write requests         */
     unsigned long  id;           /* private guest value, echoed in resp  */
     blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
     /* @f_a_s[4:0]=last_sect ; @f_a_s[9:5]=first_sect                        */
-#ifdef CONFIG_XEN_BLKDEV_GRANT
     /* @f_a_s[:16]= grant reference (16 bits)                                */
-#else
-    /* @f_a_s[:12]=@frame: machine page frame number.                        */
-#endif
     /* @first_sect: first sector in frame to transfer (inclusive).           */
     /* @last_sect: last sector in frame to transfer (inclusive).             */
     unsigned long  frame_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 } blkif_request_t;
 
-#define blkif_fas(_addr, _fs, _ls) ((addr)|((_fs)<<5)|(_ls))
+#define blkif_fas(_addr, _fs, _ls) ((_addr)|((_fs)<<5)|(_ls))
 #define blkif_first_sect(_fas) (((_fas)>>5)&31)
 #define blkif_last_sect(_fas)  ((_fas)&31)
 
-#ifdef CONFIG_XEN_BLKDEV_GRANT
 #define blkif_fas_from_gref(_gref, _fs, _ls) (((_gref)<<16)|((_fs)<<5)|(_ls))
 #define blkif_gref_from_fas(_fas) ((_fas)>>16)
-#endif
 
 typedef struct blkif_response {
     unsigned long   id;              /* copied from request */
@@ -65,37 +58,17 @@
 #define BLKIF_RSP_ERROR  -1 /* non-specific 'error' */
 #define BLKIF_RSP_OKAY    0 /* non-specific 'okay'  */
 
+#define BLKIF_MAJOR(dev) ((dev)>>8)
+#define BLKIF_MINOR(dev) ((dev) & 0xff)
+
 /*
  * Generate blkif ring structures and types.
  */
 
 DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t);
 
-/*
- * BLKIF_OP_PROBE:
- * The request format for a probe request is constrained as follows:
- *  @operation   == BLKIF_OP_PROBE
- *  @nr_segments == size of probe buffer in pages
- *  @device      == unused (zero)
- *  @id          == any value (echoed in response message)
- *  @sector_num  == unused (zero)
- *  @frame_and_sects == list of page-sized buffers.
- *                       (i.e., @first_sect == 0, @last_sect == 7).
- * 
- * The response is a list of vdisk_t elements copied into the out-of-band
- * probe buffer. On success the response status field contains the number
- * of vdisk_t elements.
- */
-
 #define VDISK_CDROM        0x1
 #define VDISK_REMOVABLE    0x2
 #define VDISK_READONLY     0x4
 
-typedef struct vdisk {
-    blkif_sector_t capacity;     /* Size in terms of 512-byte sectors.   */
-    blkif_vdev_t   device;       /* Device number (opaque 16 bit value). */
-    u16            info;         /* Device type and flags (VDISK_*).     */
-    u16            sector_size;  /* Minimum alignment for requests.      */
-} vdisk_t; /* 16 bytes */
-
 #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/io/domain_controller.h
--- a/xen/include/public/io/domain_controller.h Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/io/domain_controller.h Thu Aug 25 22:53:20 2005
@@ -139,7 +139,7 @@
  */
 typedef struct blkif_fe_interface_connect {
     u32      handle;
-    memory_t shmem_frame;
+    unsigned long shmem_frame;
     int      shmem_ref;
 } blkif_fe_interface_connect_t;
 
@@ -249,7 +249,7 @@
     /* IN */
     domid_t    domid;         /* Domain attached to new interface.   */
     u32        blkif_handle;  /* Domain-specific interface handle.   */
-    memory_t   shmem_frame;   /* Page cont. shared comms window.     */
+    unsigned long shmem_frame;/* Page cont. shared comms window.     */
     int        shmem_ref;     /* Grant table reference.              */
     u32        evtchn;        /* Event channel for notifications.    */
     /* OUT */
@@ -364,9 +364,11 @@
  *  STATUS_CONNECTED message.
  */
 typedef struct netif_fe_interface_connect {
-    u32        handle;
-    memory_t   tx_shmem_frame;
-    memory_t   rx_shmem_frame;
+    u32           handle;
+    unsigned long tx_shmem_frame; 
+    int           tx_shmem_ref;
+    unsigned long rx_shmem_frame;
+    int           rx_shmem_ref;
 } netif_fe_interface_connect_t;
 
 /*
@@ -484,11 +486,13 @@
  */
 typedef struct netif_be_connect { 
     /* IN */
-    domid_t    domid;          /* Domain attached to new interface.   */
-    u32        netif_handle;   /* Domain-specific interface handle.   */
-    memory_t   tx_shmem_frame; /* Page cont. tx shared comms window.  */
-    memory_t   rx_shmem_frame; /* Page cont. rx shared comms window.  */
-    u16        evtchn;         /* Event channel for notifications.    */
+    domid_t    domid;            /* Domain attached to new interface.   */
+    u32        netif_handle;     /* Domain-specific interface handle.   */
+    unsigned long tx_shmem_frame;/* Page cont. tx shared comms window.  */
+    int        tx_shmem_ref;     /* Grant reference for above           */
+    unsigned long rx_shmem_frame;/* Page cont. rx shared comms window.  */
+    int        rx_shmem_ref;     /* Grant reference for above           */
+    u16        evtchn;           /* Event channel for notifications.    */
     /* OUT */
     u32        status;
 } netif_be_connect_t;
@@ -573,7 +577,7 @@
  *  STATUS_CONNECTED message.
  */
 typedef struct usbif_fe_interface_connect {
-    memory_t shmem_frame;
+    unsigned long shmem_frame;
 } usbif_fe_interface_connect_t;
 
 /*
@@ -656,7 +660,7 @@
 typedef struct usbif_be_connect { 
     /* IN */
     domid_t    domid;         /* Domain attached to new interface.   */
-    memory_t   shmem_frame;   /* Page cont. shared comms window.     */
+    unsigned long shmem_frame;/* Page cont. shared comms window.     */
     u32        evtchn;        /* Event channel for notifications.    */
     u32        bandwidth;     /* Bandwidth allocated for isoch / int - us
                                * per 1ms frame (ie between 0 and 900 or 800
@@ -776,7 +780,7 @@
 #define PDB_CONNECTION_STATUS_UP   1
 #define PDB_CONNECTION_STATUS_DOWN 2
     u32      status;
-    memory_t ring;       /* status: UP */
+    unsigned long ring;  /* status: UP */
     u32      evtchn;     /* status: UP */
 } pdb_connection_t, *pdb_connection_p;
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h     Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/io/netif.h     Thu Aug 25 22:53:20 2005
@@ -10,7 +10,7 @@
 #define __XEN_PUBLIC_IO_NETIF_H__
 
 typedef struct netif_tx_request {
-    memory_t addr;   /* Machine address of packet.  */
+    unsigned long addr;   /* Machine address of packet.  */
     u16      csum_blank:1; /* Proto csum field blank?   */
     u16      id:15;  /* Echoed in response message. */
     u16      size;   /* Packet size in bytes.       */
@@ -32,7 +32,7 @@
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
     u32      addr;   /*  0: Offset in page of start of received packet  */
 #else
-    memory_t addr;   /* Machine address of packet.              */
+    unsigned long addr; /* Machine address of packet.              */
 #endif
     u16      csum_valid:1; /* Protocol checksum is validated?       */
     u16      id:15;
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/physdev.h
--- a/xen/include/public/physdev.h      Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/physdev.h      Thu Aug 25 22:53:20 2005
@@ -27,8 +27,8 @@
 
 typedef struct physdevop_set_iobitmap {
     /* IN */
-    memory_t bitmap;
-    u32      nr_ports;
+    u8 *bitmap;
+    u32 nr_ports;
 } physdevop_set_iobitmap_t;
 
 typedef struct physdevop_apic {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/trace.h
--- a/xen/include/public/trace.h        Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/trace.h        Thu Aug 25 22:53:20 2005
@@ -9,11 +9,21 @@
 #define __XEN_PUBLIC_TRACE_H__
 
 /* Trace classes */
-#define TRC_GEN     0x00010000    /* General trace            */
-#define TRC_SCHED   0x00020000    /* Xen Scheduler trace      */
-#define TRC_DOM0OP  0x00040000    /* Xen DOM0 operation trace */
-#define TRC_VMX     0x00080000    /* Xen VMX trace            */
-#define TRC_ALL     0xffff0000
+#define TRC_CLS_SHIFT 16
+#define TRC_GEN     0x0001f000    /* General trace            */
+#define TRC_SCHED   0x0002f000    /* Xen Scheduler trace      */
+#define TRC_DOM0OP  0x0004f000    /* Xen DOM0 operation trace */
+#define TRC_VMX     0x0008f000    /* Xen VMX trace            */
+#define TRC_ALL     0xfffff000
+
+/* Trace subclasses */
+#define TRC_SUBCLS_SHIFT 12
+/* trace subclasses for VMX */
+#define TRC_VMXEXIT  0x00081000   /* VMX exit trace            */
+#define TRC_VMXTIMER 0x00082000   /* VMX timer trace           */
+#define TRC_VMXINT   0x00084000   /* VMX interrupt trace       */
+#define TRC_VMXIO    0x00088000   /* VMX io emulation trace  */
+
 
 /* Trace events per class */
 
@@ -31,9 +41,13 @@
 #define TRC_SCHED_T_TIMER_FN    (TRC_SCHED + 12)
 #define TRC_SCHED_DOM_TIMER_FN  (TRC_SCHED + 13)
 
-#define TRC_VMX_VMEXIT          (TRC_VMX + 1)
-#define TRC_VMX_VECTOR          (TRC_VMX + 2)
-#define TRC_VMX_INT             (TRC_VMX + 3)
+/* trace events per subclass */
+#define TRC_VMX_VMEXIT          (TRC_VMXEXIT + 1)
+#define TRC_VMX_VECTOR          (TRC_VMXEXIT + 2)
+
+#define TRC_VMX_TIMER_INTR      (TRC_VMXTIMER + 1)
+
+#define TRC_VMX_INT             (TRC_VMXINT + 1)
 
 /* This structure represents a single trace buffer record. */
 struct t_rec {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/public/xen.h
--- a/xen/include/public/xen.h  Wed Aug 24 02:43:18 2005
+++ b/xen/include/public/xen.h  Thu Aug 25 22:53:20 2005
@@ -171,9 +171,9 @@
     unsigned int cmd;
     union {
         /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR, REASSIGN_PAGE */
-        memory_t mfn;
+        unsigned long mfn;
         /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
-        memory_t linear_addr;
+        unsigned long linear_addr;
     };
     union {
         /* SET_LDT */
@@ -203,6 +203,7 @@
 #define SCHEDOP_shutdown        2   /* Stop executing this domain.        */
 #define SCHEDOP_vcpu_down       3   /* make target VCPU not-runnable.     */
 #define SCHEDOP_vcpu_up         4   /* make target VCPU runnable.         */
+#define SCHEDOP_vcpu_pickle     5   /* save a vcpu's context to memory.   */
 #define SCHEDOP_cmdmask       255   /* 8-bit command. */
 #define SCHEDOP_reasonshift     8   /* 8-bit reason code. (SCHEDOP_shutdown) */
 #define SCHEDOP_vcpushift       8   /* 8-bit VCPU target. (SCHEDOP_up|down) */
@@ -331,14 +332,15 @@
 
 typedef struct vcpu_time_info {
     /*
-     * The following values are updated periodically (and not necessarily
-     * atomically!). The guest OS detects this because 'time_version1' is
-     * incremented just before updating these values, and 'time_version2' is
-     * incremented immediately after. See the Xen-specific Linux code for an
-     * example of how to read these values safely (arch/xen/kernel/time.c).
+     * Updates to the following values are preceded and followed by an
+     * increment of 'version'. The guest can therefore detect updates by
+     * looking for changes to 'version'. If the least-significant bit of
+     * the version number is set then an update is in progress and the guest
+     * must wait to read a consistent set of values.
+     * The correct way to interact with the version number is similar to
+     * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry.
      */
-    u32 time_version1;
-    u32 time_version2;
+    u32 version;
     u64 tsc_timestamp;   /* TSC at last update of time vals.  */
     u64 system_time;     /* Time, in nanosecs, since boot.    */
     /*
@@ -400,8 +402,9 @@
      * Wallclock time: updated only by control software. Guests should base
      * their gettimeofday() syscall on this wallclock-base value.
      */
-    u32                wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
-    u32                wc_usec;         /* Usecs 00:00:00 UTC, Jan 1, 1970.  */
+    u32 wc_version;      /* Version counter: see vcpu_time_info_t. */
+    u32 wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
+    u32 wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
 
     arch_shared_info_t arch;
 
@@ -435,18 +438,18 @@
 #define MAX_GUEST_CMDLINE 1024
 typedef struct start_info {
     /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME.    */
-    memory_t nr_pages;        /* Total pages allocated to this domain.    */
-    memory_t shared_info;     /* MACHINE address of shared info struct.   */
+    unsigned long nr_pages;   /* Total pages allocated to this domain.    */
+    unsigned long shared_info;/* MACHINE address of shared info struct.   */
     u32      flags;           /* SIF_xxx flags.                           */
     u16      domain_controller_evtchn;
     /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME).     */
-    memory_t pt_base;         /* VIRTUAL address of page directory.       */
-    memory_t nr_pt_frames;    /* Number of bootstrap p.t. frames.         */
-    memory_t mfn_list;        /* VIRTUAL address of page-frame list.      */
-    memory_t mod_start;       /* VIRTUAL address of pre-loaded module.    */
-    memory_t mod_len;         /* Size (bytes) of pre-loaded module.       */
+    unsigned long pt_base;    /* VIRTUAL address of page directory.       */
+    unsigned long nr_pt_frames;/* Number of bootstrap p.t. frames.        */
+    unsigned long mfn_list;   /* VIRTUAL address of page-frame list.      */
+    unsigned long mod_start;  /* VIRTUAL address of pre-loaded module.    */
+    unsigned long mod_len;    /* Size (bytes) of pre-loaded module.       */
     s8 cmd_line[MAX_GUEST_CMDLINE];
-    memory_t store_mfn;       /* MACHINE page number of shared page.      */
+    unsigned long store_mfn;  /* MACHINE page number of shared page.      */
     u16      store_evtchn;    /* Event channel for store communication.   */
 } start_info_t;
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/ac_timer.h
--- a/xen/include/xen/ac_timer.h        Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/ac_timer.h        Thu Aug 25 22:53:20 2005
@@ -10,6 +10,7 @@
 
 #include <xen/spinlock.h>
 #include <xen/time.h>
+#include <xen/string.h>
 
 struct ac_timer {
     /* System time expiry value (nanoseconds since boot). */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/domain.h
--- a/xen/include/xen/domain.h  Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/domain.h  Thu Aug 25 22:53:20 2005
@@ -15,7 +15,9 @@
 extern void arch_do_boot_vcpu(struct vcpu *v);
 
 extern int  arch_set_info_guest(
-    struct vcpu *d, struct vcpu_guest_context *c);
+    struct vcpu *v, struct vcpu_guest_context *c);
+
+extern void vcpu_migrate_cpu(struct vcpu *v, int newcpu);
 
 extern void free_perdomain_pt(struct domain *d);
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/event.h
--- a/xen/include/xen/event.h   Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/event.h   Thu Aug 25 22:53:20 2005
@@ -26,30 +26,14 @@
 {
     struct domain *d = v->domain;
     shared_info_t *s = d->shared_info;
-    int            running;
 
-    /* These three operations must happen in strict order. */
+    /* These four operations must happen in strict order. */
     if ( !test_and_set_bit(port,    &s->evtchn_pending[0]) &&
          !test_bit        (port,    &s->evtchn_mask[0])    &&
-         !test_and_set_bit(port>>5, &v->vcpu_info->evtchn_pending_sel) )
+         !test_and_set_bit(port>>5, &v->vcpu_info->evtchn_pending_sel) &&
+         !test_and_set_bit(0,       &v->vcpu_info->evtchn_upcall_pending) )
     {
-        /* The VCPU pending flag must be set /after/ update to evtchn-pend. */
-        set_bit(0, &v->vcpu_info->evtchn_upcall_pending);
         evtchn_notify(v);
-
-        /*
-         * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
-         * pending flag. These values may fluctuate (after all, we hold no
-         * locks) but the key insight is that each change will cause
-         * evtchn_upcall_pending to be polled.
-         * 
-         * NB2. We save VCPUF_running across the unblock to avoid a needless
-         * IPI for domains that we IPI'd to unblock.
-         */
-        running = test_bit(_VCPUF_running, &v->vcpu_flags);
-        vcpu_unblock(v);
-        if ( running )
-            smp_send_event_check_cpu(v->processor);
     }
 }
 
@@ -73,8 +57,9 @@
  */
 extern void send_guest_pirq(struct domain *d, int pirq);
 
-#define event_pending(_d)                                     \
-    ((_d)->vcpu_info->evtchn_upcall_pending && \
-     !(_d)->vcpu_info->evtchn_upcall_mask)
+/* Note: Bitwise operations result in fast code with no branches. */
+#define event_pending(v)                        \
+    ((v)->vcpu_info->evtchn_upcall_pending &    \
+     ~(v)->vcpu_info->evtchn_upcall_mask)
 
 #endif /* __XEN_EVENT_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/grant_table.h
--- a/xen/include/xen/grant_table.h     Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/grant_table.h     Thu Aug 25 22:53:20 2005
@@ -53,19 +53,20 @@
 
 #define ORDER_GRANT_FRAMES   2
 #define NR_GRANT_FRAMES      (1U << ORDER_GRANT_FRAMES)
-#define NR_GRANT_ENTRIES     (NR_GRANT_FRAMES * PAGE_SIZE / 
sizeof(grant_entry_t))
-
+#define NR_GRANT_ENTRIES     \
+    ((NR_GRANT_FRAMES << PAGE_SHIFT) / sizeof(grant_entry_t))
 
 /*
  * Tracks a mapping of another domain's grant reference. Each domain has a
  * table of these, indexes into which are returned as a 'mapping handle'.
  */
 typedef struct {
-    u16      ref_and_flags; /* 0-2: GNTMAP_* ; 3-15: grant ref */
+    u16      ref_and_flags; /* 0-4: GNTMAP_* ; 5-15: grant ref */
     domid_t  domid;         /* granting domain */
 } grant_mapping_t;
-#define MAPTRACK_GNTMAP_MASK 7
-#define MAPTRACK_REF_SHIFT   3
+#define MAPTRACK_GNTMAP_MASK  0x1f
+#define MAPTRACK_REF_SHIFT    5
+#define MAPTRACK_MAX_ENTRIES  (1 << (16 - MAPTRACK_REF_SHIFT))
 
 /* Per-domain grant information. */
 typedef struct {
@@ -108,10 +109,15 @@
 /* Notify 'rd' of a completed transfer via an already-locked grant entry. */
 void 
 gnttab_notify_transfer(
-    struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long 
frame);
+    struct domain *rd, struct domain *ld,
+    grant_ref_t ref, unsigned long frame);
 
-/* Pre-domain destruction release of granted device mappings of other 
domains.*/
+/* Domain death release of granted device mappings of other domains.*/
 void
 gnttab_release_dev_mappings(grant_table_t *gt);
 
+/* Extra GNTST_ values, for internal use only. */
+#define GNTST_flush_all        (2)  /* Success, need to flush entire TLB.    */
+#define GNTST_flush_one        (1)  /* Success, need to flush a vaddr.       */
+
 #endif /* __XEN_GRANT_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/perfc_defn.h
--- a/xen/include/xen/perfc_defn.h      Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/perfc_defn.h      Thu Aug 25 22:53:20 2005
@@ -1,5 +1,6 @@
-#ifndef __XEN_PERFC_DEFN_H__
-#define __XEN_PERFC_DEFN_H__
+/* This file is legitimately included multiple times. */
+/*#ifndef __XEN_PERFC_DEFN_H__*/
+/*#define __XEN_PERFC_DEFN_H__*/
 
 #define PERFC_MAX_PT_UPDATES 64
 #define PERFC_PT_UPDATES_BUCKET_SIZE 3
@@ -124,4 +125,4 @@
 PERFCOUNTER_CPU(remove_write_bad_prediction, "remove_write bad prediction")
 PERFCOUNTER_CPU(update_hl2e_invlpg,     "update_hl2e calls invlpg")
 
-#endif /* __XEN_PERFC_DEFN_H__ */
+/*#endif*/ /* __XEN_PERFC_DEFN_H__ */
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/sched.h   Thu Aug 25 22:53:20 2005
@@ -258,12 +258,32 @@
 extern void sync_lazy_execstate_all(void);
 extern int __sync_lazy_execstate(void);
 
-/* Called by the scheduler to switch to another vcpu. */
+/*
+ * Called by the scheduler to switch to another VCPU. On entry, although
+ * VCPUF_running is no longer asserted for @prev, its context is still running
+ * on the local CPU and is not committed to memory. The local scheduler lock
+ * is therefore still held, and interrupts are disabled, because the local CPU
+ * is in an inconsistent state.
+ * 
+ * The callee must ensure that the local CPU is no longer running in @prev's
+ * context, and that the context is saved to memory, before returning.
+ * Alternatively, if implementing lazy context switching, it suffices to ensure
+ * that invoking __sync_lazy_execstate() will switch and commit @prev's state.
+ */
 extern void context_switch(
     struct vcpu *prev, 
     struct vcpu *next);
 
-/* Called by the scheduler to continue running the current vcpu. */
+/*
+ * On some architectures (notably x86) it is not possible to entirely load
+ * @next's context with interrupts disabled. These may implement a function to
+ * finalise loading the new context after interrupts are re-enabled. This
+ * function is not given @prev and is not permitted to access it.
+ */
+extern void context_switch_finalise(
+    struct vcpu *next);
+
+/* Called by the scheduler to continue running the current VCPU. */
 extern void continue_running(
     struct vcpu *same);
 
@@ -297,10 +317,9 @@
         (unsigned long)(_a1), (unsigned long)(_a2), (unsigned long)(_a3), \
         (unsigned long)(_a4), (unsigned long)(_a5), (unsigned long)(_a6))
 
-#define hypercall_preempt_check() (unlikely(            \
-        softirq_pending(smp_processor_id()) |           \
-        (!!current->vcpu_info->evtchn_upcall_pending &  \
-          !current->vcpu_info->evtchn_upcall_mask)      \
+#define hypercall_preempt_check() (unlikely(    \
+        softirq_pending(smp_processor_id()) |   \
+        event_pending(current)                  \
     ))
 
 /* This domain_hash and domain_list are protected by the domlist_lock. */
@@ -386,6 +405,7 @@
 void domain_unpause(struct domain *d);
 void domain_pause_by_systemcontroller(struct domain *d);
 void domain_unpause_by_systemcontroller(struct domain *d);
+void cpu_init(void);
 
 static inline void vcpu_unblock(struct vcpu *v)
 {
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/serial.h
--- a/xen/include/xen/serial.h  Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/serial.h  Thu Aug 25 22:53:20 2005
@@ -113,8 +113,9 @@
 /*
  * Initialisers for individual uart drivers.
  */
+/* NB. Any default value can be 0 if it is unknown and must be specified. */
 struct ns16550_defaults {
-    int baud;      /* default baud rate; 0 == pre-configured */
+    int baud;      /* default baud rate; BAUD_AUTO == pre-configured */
     int data_bits; /* default data bits (5, 6, 7 or 8) */
     int parity;    /* default parity (n, o, e, m or s) */
     int stop_bits; /* default stop bits (1 or 2) */
@@ -122,6 +123,9 @@
     unsigned long io_base; /* default io_base address */
 };
 void ns16550_init(int index, struct ns16550_defaults *defaults);
+
+/* Baud rate was pre-configured before invoking the UART driver. */
+#define BAUD_AUTO (-1)
 
 #endif /* __XEN_SERIAL_H__ */
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/time.h
--- a/xen/include/xen/time.h    Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/time.h    Thu Aug 25 22:53:20 2005
@@ -57,7 +57,7 @@
 
 extern void update_dom_time(struct vcpu *v);
 extern void do_settime(
-    unsigned long secs, unsigned long usecs, u64 system_time_base);
+    unsigned long secs, unsigned long nsecs, u64 system_time_base);
 
 #endif /* __XEN_TIME_H__ */
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/include/xen/trace.h
--- a/xen/include/xen/trace.h   Wed Aug 24 02:43:18 2005
+++ b/xen/include/xen/trace.h   Thu Aug 25 22:53:20 2005
@@ -67,6 +67,15 @@
     if ( (tb_event_mask & event) == 0 )
         return 0;
 
+    /* match class */
+    if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
+        return 0;
+
+    /* then match subclass */
+    if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
+                & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
+        return 0;
+
     if ( (tb_cpu_mask & (1UL << smp_processor_id())) == 0 )
         return 0;
 
diff -r 5f1ed597f107 -r 8799d14bef77 xen/tools/Makefile
--- a/xen/tools/Makefile        Wed Aug 24 02:43:18 2005
+++ b/xen/tools/Makefile        Thu Aug 25 22:53:20 2005
@@ -10,4 +10,4 @@
        rm -f *.o symbols
 
 symbols: symbols.c
-       $(HOSTCC) -o $@ $<
+       $(HOSTCC) $(HOSTCFLAGS) -o $@ $<
diff -r 5f1ed597f107 -r 8799d14bef77 xen/tools/symbols.c
--- a/xen/tools/symbols.c       Wed Aug 24 02:43:18 2005
+++ b/xen/tools/symbols.c       Thu Aug 25 22:53:20 2005
@@ -152,8 +152,8 @@
        /* include the type field in the symbol name, so that it gets
         * compressed together */
        s->len = strlen(str) + 1;
-       s->sym = (char *) malloc(s->len + 1);
-       strcpy(s->sym + 1, str);
+       s->sym = (unsigned char *) malloc(s->len + 1);
+       strcpy((char *)s->sym + 1, str);
        s->sym[0] = s->type;
 
        return 0;
@@ -197,16 +197,16 @@
                 * move then they may get dropped in pass 2, which breaks the
                 * symbols rules.
                 */
-               if (s->addr == _etext && strcmp(s->sym + offset, "_etext"))
+               if (s->addr == _etext && strcmp((char *)s->sym + offset, 
"_etext"))
                        return 0;
        }
 
        /* Exclude symbols which vary between passes. */
-       if (strstr(s->sym + offset, "_compiled."))
+       if (strstr((char *)s->sym + offset, "_compiled."))
                return 0;
 
        for (i = 0; special_symbols[i]; i++)
-               if( strcmp(s->sym + offset, special_symbols[i]) == 0 )
+               if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 )
                        return 0;
 
        return 1;
@@ -311,7 +311,7 @@
        off = 0;
        for (i = 0; i < cnt; i++) {
 
-               if (!table[i].flags & SYM_FLAG_VALID)
+               if (!(table[i].flags & SYM_FLAG_VALID))
                        continue;
 
                if ((valid & 0xFF) == 0)
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/init_task.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/init_task.c     Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,49 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+
+#define swapper_pg_dir ((pgd_t *)NULL)
+struct mm_struct init_mm = INIT_MM(init_mm);
+#undef swapper_pg_dir
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is THREAD_SIZE aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union 
+       __attribute__((__section__(".data.init_task"))) =
+               { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
+
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's.
+ */ 
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = 
INIT_TSS;
+
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c       Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,660 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * This implementation is a fallback for platforms that do not support
+ * I/O TLBs (aka DMA address translation hardware).
+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@xxxxxxxxx>
+ * Copyright (C) 2000 Goutham Rao <goutham.rao@xxxxxxxxx>
+ * Copyright (C) 2000, 2003 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * Copyright (C) 2005 Keir Fraser <keir@xxxxxxxxxxxxx>
+ */
+
+#include <linux/cache.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <asm/io.h>
+#include <asm/pci.h>
+#include <asm/dma.h>
+
+#define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1)))
+
+#define SG_ENT_PHYS_ADDRESS(sg)        (page_to_phys((sg)->page) + 
(sg)->offset)
+
+/*
+ * Maximum allowable number of contiguous slabs to map,
+ * must be a power of 2.  What is the appropriate value ?
+ * The complexity of {map,unmap}_single is linearly dependent on this value.
+ */
+#define IO_TLB_SEGSIZE 128
+
+/*
+ * log of the size of each IO TLB slab.  The number of slabs is command line
+ * controllable.
+ */
+#define IO_TLB_SHIFT 11
+
+int swiotlb_force;
+static char *iotlb_virt_start;
+static unsigned long iotlb_nslabs;
+
+/*
+ * Used to do a quick range check in swiotlb_unmap_single and
+ * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * API.
+ */
+static dma_addr_t iotlb_bus_start, iotlb_bus_mask;
+
+/* Does the given dma address reside within the swiotlb aperture? */
+#define in_swiotlb_aperture(a) (!(((a) ^ iotlb_bus_start) & iotlb_bus_mask))
+
+/*
+ * When the IOMMU overflows we return a fallback buffer. This sets the size.
+ */
+static unsigned long io_tlb_overflow = 32*1024;
+
+void *io_tlb_overflow_buffer;
+
+/*
+ * This is a free list describing the number of free entries available from
+ * each index
+ */
+static unsigned int *io_tlb_list;
+static unsigned int io_tlb_index;
+
+/*
+ * We need to save away the original address corresponding to a mapped entry
+ * for the sync operations.
+ */
+static struct phys_addr {
+       struct page *page;
+       unsigned int offset;
+} *io_tlb_orig_addr;
+
+/*
+ * Protect the above data structures in the map and unmap calls
+ */
+static DEFINE_SPINLOCK(io_tlb_lock);
+
+static int __init
+setup_io_tlb_npages(char *str)
+{
+       /* Unlike ia64, the size is aperture in megabytes, not 'slabs'! */
+       if (isdigit(*str)) {
+               iotlb_nslabs = simple_strtoul(str, &str, 0) <<
+                       (20 - IO_TLB_SHIFT);
+               iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
+               /* Round up to power of two (xen_create_contiguous_region). */
+               while (iotlb_nslabs & (iotlb_nslabs-1))
+                       iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1);
+       }
+       if (*str == ',')
+               ++str;
+       /*
+         * NB. 'force' enables the swiotlb, but doesn't force its use for
+         * every DMA like it does on native Linux.
+         */
+       if (!strcmp(str, "force"))
+               swiotlb_force = 1;
+       return 1;
+}
+__setup("swiotlb=", setup_io_tlb_npages);
+/* make io_tlb_overflow tunable too? */
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the PCI DMA API.
+ */
+void
+swiotlb_init_with_default_size (size_t default_size)
+{
+       unsigned long i, bytes;
+
+       if (!iotlb_nslabs) {
+               iotlb_nslabs = (default_size >> IO_TLB_SHIFT);
+               iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
+               /* Round up to power of two (xen_create_contiguous_region). */
+               while (iotlb_nslabs & (iotlb_nslabs-1))
+                       iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1);
+       }
+
+       bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT);
+
+       /*
+        * Get IO TLB memory from the low pages
+        */
+       iotlb_virt_start = alloc_bootmem_low_pages(bytes);
+       if (!iotlb_virt_start)
+               panic("Cannot allocate SWIOTLB buffer!\n"
+                     "Use dom0_mem Xen boot parameter to reserve\n"
+                     "some DMA memory (e.g., dom0_mem=-128M).\n");
+
+       xen_create_contiguous_region(
+               (unsigned long)iotlb_virt_start, get_order(bytes));
+
+       /*
+        * Allocate and initialize the free list array.  This array is used
+        * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE.
+        */
+       io_tlb_list = alloc_bootmem(iotlb_nslabs * sizeof(int));
+       for (i = 0; i < iotlb_nslabs; i++)
+               io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+       io_tlb_index = 0;
+       io_tlb_orig_addr = alloc_bootmem(
+               iotlb_nslabs * sizeof(*io_tlb_orig_addr));
+
+       /*
+        * Get the overflow emergency buffer
+        */
+       io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
+
+       iotlb_bus_start = virt_to_bus(iotlb_virt_start);
+       iotlb_bus_mask  = ~(dma_addr_t)(bytes - 1);
+
+       printk(KERN_INFO "Software IO TLB enabled: \n"
+              " Aperture:     %lu megabytes\n"
+              " Bus range:    0x%016lx - 0x%016lx\n"
+              " Kernel range: 0x%016lx - 0x%016lx\n",
+              bytes >> 20,
+              (unsigned long)iotlb_bus_start,
+              (unsigned long)iotlb_bus_start + bytes,
+              (unsigned long)iotlb_virt_start,
+              (unsigned long)iotlb_virt_start + bytes);
+}
+
+void
+swiotlb_init(void)
+{
+       /* The user can forcibly enable swiotlb. */
+       if (swiotlb_force)
+               swiotlb = 1;
+
+       /*
+         * Otherwise, enable for domain 0 if the machine has 'lots of memory',
+         * which we take to mean more than 2GB.
+         */
+       if (xen_start_info.flags & SIF_INITDOMAIN) {
+               dom0_op_t op;
+               op.cmd = DOM0_PHYSINFO;
+               if ((HYPERVISOR_dom0_op(&op) == 0) &&
+                   (op.u.physinfo.total_pages > 0x7ffff))
+                       swiotlb = 1;
+       }
+
+       if (swiotlb)
+               swiotlb_init_with_default_size(64 * (1<<20));
+}
+
+static void
+__sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir)
+{
+       if (PageHighMem(buffer.page)) {
+               size_t len, bytes;
+               char *dev, *host, *kmp;
+               len = size;
+               while (len != 0) {
+                       if (((bytes = len) + buffer.offset) > PAGE_SIZE)
+                               bytes = PAGE_SIZE - buffer.offset;
+                       kmp  = kmap_atomic(buffer.page, KM_SWIOTLB);
+                       dev  = dma_addr + size - len;
+                       host = kmp + buffer.offset;
+                       memcpy((dir == DMA_FROM_DEVICE) ? host : dev,
+                              (dir == DMA_FROM_DEVICE) ? dev : host,
+                              bytes);
+                       kunmap_atomic(kmp, KM_SWIOTLB);
+                       len -= bytes;
+                       buffer.page++;
+                       buffer.offset = 0;
+               }
+       } else {
+               char *host = (char *)phys_to_virt(
+                       page_to_pseudophys(buffer.page)) + buffer.offset;
+               if (dir == DMA_FROM_DEVICE)
+                       memcpy(host, dma_addr, size);
+               else if (dir == DMA_TO_DEVICE)
+                       memcpy(dma_addr, host, size);
+       }
+}
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+static void *
+map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir)
+{
+       unsigned long flags;
+       char *dma_addr;
+       unsigned int nslots, stride, index, wrap;
+       int i;
+
+       /*
+        * For mappings greater than a page, we limit the stride (and
+        * hence alignment) to a page size.
+        */
+       nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+       if (size > PAGE_SIZE)
+               stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+       else
+               stride = 1;
+
+       BUG_ON(!nslots);
+
+       /*
+        * Find suitable number of IO TLB entries size that will fit this
+        * request and allocate a buffer from that IO TLB pool.
+        */
+       spin_lock_irqsave(&io_tlb_lock, flags);
+       {
+               wrap = index = ALIGN(io_tlb_index, stride);
+
+               if (index >= iotlb_nslabs)
+                       wrap = index = 0;
+
+               do {
+                       /*
+                        * If we find a slot that indicates we have 'nslots'
+                        * number of contiguous buffers, we allocate the
+                        * buffers from that slot and mark the entries as '0'
+                        * indicating unavailable.
+                        */
+                       if (io_tlb_list[index] >= nslots) {
+                               int count = 0;
+
+                               for (i = index; i < (int)(index + nslots); i++)
+                                       io_tlb_list[i] = 0;
+                               for (i = index - 1;
+                                    (OFFSET(i, IO_TLB_SEGSIZE) !=
+                                     IO_TLB_SEGSIZE -1) && io_tlb_list[i];
+                                    i--)
+                                       io_tlb_list[i] = ++count;
+                               dma_addr = iotlb_virt_start +
+                                       (index << IO_TLB_SHIFT);
+
+                               /*
+                                * Update the indices to avoid searching in
+                                * the next round.
+                                */
+                               io_tlb_index = 
+                                       ((index + nslots) < iotlb_nslabs
+                                        ? (index + nslots) : 0);
+
+                               goto found;
+                       }
+                       index += stride;
+                       if (index >= iotlb_nslabs)
+                               index = 0;
+               } while (index != wrap);
+
+               spin_unlock_irqrestore(&io_tlb_lock, flags);
+               return NULL;
+       }
+  found:
+       spin_unlock_irqrestore(&io_tlb_lock, flags);
+
+       /*
+        * Save away the mapping from the original address to the DMA address.
+        * This is needed when we sync the memory.  Then we sync the buffer if
+        * needed.
+        */
+       io_tlb_orig_addr[index] = buffer;
+       if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL))
+               __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
+
+       return dma_addr;
+}
+
+/*
+ * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ */
+static void
+unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+{
+       unsigned long flags;
+       int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+       int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT;
+       struct phys_addr buffer = io_tlb_orig_addr[index];
+
+       /*
+        * First, sync the memory before unmapping the entry
+        */
+       if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
+               __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
+
+       /*
+        * Return the buffer to the free list by setting the corresponding
+        * entries to indicate the number of contigous entries available.
+        * While returning the entries to the free list, we merge the entries
+        * with slots below and above the pool being returned.
+        */
+       spin_lock_irqsave(&io_tlb_lock, flags);
+       {
+               count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
+                        io_tlb_list[index + nslots] : 0);
+               /*
+                * Step 1: return the slots to the free list, merging the
+                * slots with superceeding slots
+                */
+               for (i = index + nslots - 1; i >= index; i--)
+                       io_tlb_list[i] = ++count;
+               /*
+                * Step 2: merge the returned slots with the preceding slots,
+                * if available (non zero)
+                */
+               for (i = index - 1;
+                    (OFFSET(i, IO_TLB_SEGSIZE) !=
+                     IO_TLB_SEGSIZE -1) && io_tlb_list[i];
+                    i--)
+                       io_tlb_list[i] = ++count;
+       }
+       spin_unlock_irqrestore(&io_tlb_lock, flags);
+}
+
+static void
+sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+{
+       int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT;
+       struct phys_addr buffer = io_tlb_orig_addr[index];
+       BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE));
+       __sync_single(buffer, dma_addr, size, dir);
+}
+
+static void
+swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+{
+       /*
+        * Ran out of IOMMU space for this operation. This is very bad.
+        * Unfortunately the drivers cannot handle this operation properly.
+        * unless they check for pci_dma_mapping_error (most don't)
+        * When the mapping is small enough return a static buffer to limit
+        * the damage, or panic when the transfer is too big.
+        */
+       printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
+              "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?");
+
+       if (size > io_tlb_overflow && do_panic) {
+               if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+                       panic("PCI-DMA: Memory would be corrupted\n");
+               if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+                       panic("PCI-DMA: Random memory would be DMAed\n");
+       }
+}
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode.  The
+ * PCI address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory until
+ * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ */
+dma_addr_t
+swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
+{
+       dma_addr_t dev_addr = virt_to_bus(ptr);
+       void *map;
+       struct phys_addr buffer;
+
+       BUG_ON(dir == DMA_NONE);
+
+       /*
+        * If the pointer passed in happens to be in the device's DMA window,
+        * we can safely return the device addr and not worry about bounce
+        * buffering it.
+        */
+       if (!range_straddles_page_boundary(ptr, size) &&
+           !address_needs_mapping(hwdev, dev_addr))
+               return dev_addr;
+
+       /*
+        * Oh well, have to allocate and map a bounce buffer.
+        */
+       buffer.page   = virt_to_page(ptr);
+       buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
+       map = map_single(hwdev, buffer, size, dir);
+       if (!map) {
+               swiotlb_full(hwdev, size, dir, 1);
+               map = io_tlb_overflow_buffer;
+       }
+
+       dev_addr = virt_to_bus(map);
+
+       /*
+        * Ensure that the address returned is DMA'ble
+        */
+       if (address_needs_mapping(hwdev, dev_addr))
+               panic("map_single: bounce buffer is not DMA'ble");
+
+       return dev_addr;
+}
+
+/*
+ * Unmap a single streaming mode DMA translation.  The dma_addr and size must
+ * match what was provided for in a previous swiotlb_map_single call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+void
+swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
+                    int dir)
+{
+       BUG_ON(dir == DMA_NONE);
+       if (in_swiotlb_aperture(dev_addr))
+               unmap_single(hwdev, bus_to_virt(dev_addr), size, dir);
+}
+
+/*
+ * Make physical memory consistent for a single streaming mode DMA translation
+ * after a transfer.
+ *
+ * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * using the cpu, yet do not wish to teardown the PCI dma mapping, you must
+ * call this function before doing so.  At the next point you give the PCI dma
+ * address back to the card, you must first perform a
+ * swiotlb_dma_sync_for_device, and then the device again owns the buffer
+ */
+void
+swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+                           size_t size, int dir)
+{
+       BUG_ON(dir == DMA_NONE);
+       if (in_swiotlb_aperture(dev_addr))
+               sync_single(hwdev, bus_to_virt(dev_addr), size, dir);
+}
+
+void
+swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
+                              size_t size, int dir)
+{
+       BUG_ON(dir == DMA_NONE);
+       if (in_swiotlb_aperture(dev_addr))
+               sync_single(hwdev, bus_to_virt(dev_addr), size, dir);
+}
+
+/*
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * This is the scatter-gather version of the above swiotlb_map_single
+ * interface.  Here the scatter gather list elements are each tagged with the
+ * appropriate dma address and length.  They are obtained via
+ * sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * same here.
+ */
+int
+swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+              int dir)
+{
+       struct phys_addr buffer;
+       dma_addr_t dev_addr;
+       char *map;
+       int i;
+
+       BUG_ON(dir == DMA_NONE);
+
+       for (i = 0; i < nelems; i++, sg++) {
+               dev_addr = SG_ENT_PHYS_ADDRESS(sg);
+               if (address_needs_mapping(hwdev, dev_addr)) {
+                       buffer.page   = sg->page;
+                       buffer.offset = sg->offset;
+                       map = map_single(hwdev, buffer, sg->length, dir);
+                       if (!map) {
+                               /* Don't panic here, we expect map_sg users
+                                  to do proper error handling. */
+                               swiotlb_full(hwdev, sg->length, dir, 0);
+                               swiotlb_unmap_sg(hwdev, sg - i, i, dir);
+                               sg[0].dma_length = 0;
+                               return 0;
+                       }
+                       sg->dma_address = (dma_addr_t)virt_to_bus(map);
+               } else
+                       sg->dma_address = dev_addr;
+               sg->dma_length = sg->length;
+       }
+       return nelems;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
+ * concerning calls here are the same as for swiotlb_unmap_single() above.
+ */
+void
+swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+                int dir)
+{
+       int i;
+
+       BUG_ON(dir == DMA_NONE);
+
+       for (i = 0; i < nelems; i++, sg++)
+               if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+                       unmap_single(hwdev, 
+                                    (void *)bus_to_virt(sg->dma_address),
+                                    sg->dma_length, dir);
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA translations
+ * after a transfer.
+ *
+ * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
+ * and usage.
+ */
+void
+swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+                       int nelems, int dir)
+{
+       int i;
+
+       BUG_ON(dir == DMA_NONE);
+
+       for (i = 0; i < nelems; i++, sg++)
+               if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+                       sync_single(hwdev,
+                                   (void *)bus_to_virt(sg->dma_address),
+                                   sg->dma_length, dir);
+}
+
+void
+swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+                          int nelems, int dir)
+{
+       int i;
+
+       BUG_ON(dir == DMA_NONE);
+
+       for (i = 0; i < nelems; i++, sg++)
+               if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+                       sync_single(hwdev,
+                                   (void *)bus_to_virt(sg->dma_address),
+                                   sg->dma_length, dir);
+}
+
+dma_addr_t
+swiotlb_map_page(struct device *hwdev, struct page *page,
+                unsigned long offset, size_t size,
+                enum dma_data_direction direction)
+{
+       struct phys_addr buffer;
+       dma_addr_t dev_addr;
+       char *map;
+
+       dev_addr = page_to_phys(page) + offset;
+       if (address_needs_mapping(hwdev, dev_addr)) {
+               buffer.page   = page;
+               buffer.offset = offset;
+               map = map_single(hwdev, buffer, size, direction);
+               if (!map) {
+                       swiotlb_full(hwdev, size, direction, 1);
+                       map = io_tlb_overflow_buffer;
+               }
+               dev_addr = (dma_addr_t)virt_to_bus(map);
+       }
+
+       return dev_addr;
+}
+
+void
+swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
+                  size_t size, enum dma_data_direction direction)
+{
+       BUG_ON(direction == DMA_NONE);
+       if (in_swiotlb_aperture(dma_address))
+               unmap_single(hwdev, bus_to_virt(dma_address), size, direction);
+}
+
+int
+swiotlb_dma_mapping_error(dma_addr_t dma_addr)
+{
+       return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
+}
+
+/*
+ * Return whether the given PCI device DMA address mask can be supported
+ * properly.  For example, if your device can only drive the low 24-bits
+ * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
+ * this function.
+ */
+int
+swiotlb_dma_supported (struct device *hwdev, u64 mask)
+{
+       return (mask >= 0xffffffffUL);
+}
+
+EXPORT_SYMBOL(swiotlb_init);
+EXPORT_SYMBOL(swiotlb_map_single);
+EXPORT_SYMBOL(swiotlb_unmap_single);
+EXPORT_SYMBOL(swiotlb_map_sg);
+EXPORT_SYMBOL(swiotlb_unmap_sg);
+EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_single_for_device);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
+EXPORT_SYMBOL(swiotlb_map_page);
+EXPORT_SYMBOL(swiotlb_unmap_page);
+EXPORT_SYMBOL(swiotlb_dma_mapping_error);
+EXPORT_SYMBOL(swiotlb_dma_supported);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c     Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic APIC sub-arch probe layer.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+
+#if defined(CONFIG_ACPI_BUS)
+#include <acpi/acpi_bus.h>
+#endif
+
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+extern struct genapic apic_cluster;
+extern struct genapic apic_flat;
+
+#ifndef CONFIG_XEN
+struct genapic *genapic = &apic_flat;
+#else
+extern struct genapic apic_xen;
+struct genapic *genapic = &apic_xen;
+#endif
+
+
+/*
+ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+ */
+void __init clustered_apic_check(void)
+{
+#ifndef CONFIG_XEN
+       long i;
+       u8 clusters, max_cluster;
+       u8 id;
+       u8 cluster_cnt[NUM_APIC_CLUSTERS];
+
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+               /* AMD always uses flat mode right now */
+               genapic = &apic_flat;
+               goto print;
+       }
+
+#if defined(CONFIG_ACPI_BUS)
+       /*
+        * Some x86_64 machines use physical APIC mode regardless of how many
+        * procs/clusters are present (x86_64 ES7000 is an example).
+        */
+       if (acpi_fadt.revision > FADT2_REVISION_ID)
+               if (acpi_fadt.force_apic_physical_destination_mode) {
+                       genapic = &apic_cluster;
+                       goto print;
+               }
+#endif
+
+       memset(cluster_cnt, 0, sizeof(cluster_cnt));
+
+       for (i = 0; i < NR_CPUS; i++) {
+               id = bios_cpu_apicid[i];
+               if (id != BAD_APICID)
+                       cluster_cnt[APIC_CLUSTERID(id)]++;
+       }
+
+       clusters = 0;
+       max_cluster = 0;
+       for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+               if (cluster_cnt[i] > 0) {
+                       ++clusters;
+                       if (cluster_cnt[i] > max_cluster)
+                               max_cluster = cluster_cnt[i];
+               }
+       }
+
+       /*
+        * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
+        * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
+        * else physical mode.
+        * (We don't use lowest priority delivery + HW APIC IRQ steering, so
+        * can ignore the clustered logical case and go straight to physical.)
+        */
+       if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster)
+               genapic = &apic_flat;
+       else
+               genapic = &apic_cluster;
+
+print:
+#else
+       /* hardcode to xen apic functions */
+       genapic = &apic_xen;
+#endif
+       printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+}
+
+/* Same for both flat and clustered. */
+
+#ifdef CONFIG_XEN
+extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned 
int dest);
+#endif
+
+void send_IPI_self(int vector)
+{
+#ifndef CONFIG_XEN
+       __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+#else
+       xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+#endif
+}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Xen APIC subarch code.  Maximum 8 CPUs, logical delivery.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ *
+ * Hacked to pieces for Xen by Chris Wright.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#else
+#include <asm/apic.h>
+#include <asm/apicdef.h>
+#include <asm/genapic.h>
+#endif
+#include <asm-xen/evtchn.h>
+
+DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
+
+static inline void __send_IPI_one(unsigned int cpu, int vector)
+{
+       unsigned int evtchn;
+       Dprintk("%s\n", __FUNCTION__);
+
+       evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+       if (evtchn)
+               notify_via_evtchn(evtchn);
+       else
+               printk("send_IPI to unbound port %d/%d", cpu, vector);
+}
+
+void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int 
dest)
+{
+       int cpu;
+
+       switch (shortcut) {
+       case APIC_DEST_SELF:
+               __send_IPI_one(smp_processor_id(), vector);
+               break;
+       case APIC_DEST_ALLBUT:
+               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+                       if (cpu == smp_processor_id())
+                               continue;
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               __send_IPI_one(cpu, vector);
+                       }
+               }
+               break;
+       case APIC_DEST_ALLINC:
+               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               __send_IPI_one(cpu, vector);
+                       }
+               }
+               break;
+       default:
+               printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+                      vector);
+               break;
+       }
+}
+
+static cpumask_t xen_target_cpus(void)
+{
+       return cpu_online_map;
+}
+
+/*
+ * Set up the logical destination ID.
+ * Do nothing, not called now.
+ */
+static void xen_init_apic_ldr(void)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       return;
+}
+
+static void xen_send_IPI_allbutself(int vector)
+{
+       /*
+        * if there are no other CPUs in the system then
+        * we get an APIC send error if we try to broadcast.
+        * thus we have to avoid sending IPIs in this case.
+        */
+       Dprintk("%s\n", __FUNCTION__);
+       if (num_online_cpus() > 1)
+               xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, 
APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_all(int vector)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_mask(cpumask_t cpumask, int vector)
+{
+       unsigned long mask = cpus_addr(cpumask)[0];
+       unsigned int cpu;
+       unsigned long flags;
+
+       Dprintk("%s\n", __FUNCTION__);
+       local_irq_save(flags);
+       WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+
+       for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+               if (cpu_isset(cpu, cpumask)) {
+                       __send_IPI_one(cpu, vector);
+               }
+       }
+       local_irq_restore(flags);
+}
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+static int xen_apic_id_registered(void)
+{
+       /* better be set */
+       Dprintk("%s\n", __FUNCTION__);
+       return physid_isset(smp_processor_id(), phys_cpu_present_map);
+}
+#endif
+
+static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+       u32 ebx;
+
+       Dprintk("%s\n", __FUNCTION__);
+       ebx = cpuid_ebx(1);
+       return ((ebx >> 24) & 0xFF) >> index_msb;
+}
+
+struct genapic apic_xen =  {
+       .name = "xen",
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+       .int_delivery_mode = dest_LowestPrio,
+#endif
+       .int_dest_mode = (APIC_DEST_LOGICAL != 0),
+       .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
+       .target_cpus = xen_target_cpus,
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+       .apic_id_registered = xen_apic_id_registered,
+#endif
+       .init_apic_ldr = xen_init_apic_ldr,
+       .send_IPI_all = xen_send_IPI_all,
+       .send_IPI_allbutself = xen_send_IPI_allbutself,
+       .send_IPI_mask = xen_send_IPI_mask,
+       .cpu_mask_to_apicid = xen_cpu_mask_to_apicid,
+       .phys_pkg_id = phys_pkg_id,
+};
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,280 @@
+/*  Xenbus code for blkif backend
+    Copyright (C) 2005 Rusty Russell <rusty@xxxxxxxxxxxxxxx>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <stdarg.h>
+#include <linux/module.h>
+#include <asm-xen/xenbus.h>
+#include "common.h"
+
+struct backend_info
+{
+       struct xenbus_device *dev;
+
+       /* our communications channel */
+       blkif_t *blkif;
+
+       long int frontend_id;
+       long int pdev;
+       long int readonly;
+
+       /* watch back end for changes */
+       struct xenbus_watch backend_watch;
+
+       /* watch front end for changes */
+       struct xenbus_watch watch;
+       char *frontpath;
+};
+
+static int blkback_remove(struct xenbus_device *dev)
+{
+       struct backend_info *be = dev->data;
+
+       if (be->watch.node)
+               unregister_xenbus_watch(&be->watch);
+       unregister_xenbus_watch(&be->backend_watch);
+       if (be->blkif)
+               blkif_put(be->blkif);
+       if (be->frontpath)
+               kfree(be->frontpath);
+       kfree(be);
+       return 0;
+}
+
+/* Front end tells us frame. */
+static void frontend_changed(struct xenbus_watch *watch, const char *node)
+{
+       unsigned long ring_ref;
+       unsigned int evtchn;
+       int err;
+       struct backend_info *be
+               = container_of(watch, struct backend_info, watch);
+
+       /* If other end is gone, delete ourself. */
+       if (node && !xenbus_exists(be->frontpath, "")) {
+               xenbus_rm(be->dev->nodename, "");
+               device_unregister(&be->dev->dev);
+               return;
+       }
+       if (be->blkif == NULL || be->blkif->status == CONNECTED)
+               return;
+
+       err = xenbus_gather(be->frontpath, "ring-ref", "%lu", &ring_ref,
+                           "event-channel", "%u", &evtchn, NULL);
+       if (err) {
+               xenbus_dev_error(be->dev, err,
+                                "reading %s/ring-ref and event-channel",
+                                be->frontpath);
+               return;
+       }
+
+       /* Supply the information about the device the frontend needs */
+       err = xenbus_transaction_start(be->dev->nodename);
+       if (err) {
+               xenbus_dev_error(be->dev, err, "starting transaction");
+               return;
+       }
+
+       err = xenbus_printf(be->dev->nodename, "sectors", "%lu",
+                           vbd_size(&be->blkif->vbd));
+       if (err) {
+               xenbus_dev_error(be->dev, err, "writing %s/sectors",
+                                be->dev->nodename);
+               goto abort;
+       }
+
+       /* FIXME: use a typename instead */
+       err = xenbus_printf(be->dev->nodename, "info", "%u",
+                           vbd_info(&be->blkif->vbd));
+       if (err) {
+               xenbus_dev_error(be->dev, err, "writing %s/info",
+                                be->dev->nodename);
+               goto abort;
+       }
+       err = xenbus_printf(be->dev->nodename, "sector-size", "%lu",
+                           vbd_secsize(&be->blkif->vbd));
+       if (err) {
+               xenbus_dev_error(be->dev, err, "writing %s/sector-size",
+                                be->dev->nodename);
+               goto abort;
+       }
+
+       /* Map the shared frame, irq etc. */
+       err = blkif_map(be->blkif, ring_ref, evtchn);
+       if (err) {
+               xenbus_dev_error(be->dev, err, "mapping ring-ref %lu port %u",
+                                ring_ref, evtchn);
+               goto abort;
+       }
+
+       xenbus_transaction_end(0);
+       xenbus_dev_ok(be->dev);
+
+       return;
+
+abort:
+       xenbus_transaction_end(1);
+}
+
+/* 
+   Setup supplies physical device.  
+   We provide event channel and device details to front end.
+   Frontend supplies shared frame and event channel.
+ */
+static void backend_changed(struct xenbus_watch *watch, const char *node)
+{
+       int err;
+       char *p;
+       long int handle, pdev;
+       struct backend_info *be
+               = container_of(watch, struct backend_info, backend_watch);
+       struct xenbus_device *dev = be->dev;
+
+       err = xenbus_scanf(dev->nodename, "physical-device", "%li", &pdev);
+       if (XENBUS_EXIST_ERR(err))
+               return;
+       if (err < 0) {
+               xenbus_dev_error(dev, err, "reading physical-device");
+               return;
+       }
+       if (be->pdev && be->pdev != pdev) {
+               printk(KERN_WARNING
+                      "changing physical-device not supported\n");
+               return;
+       }
+       be->pdev = pdev;
+
+       /* If there's a read-only node, we're read only. */
+       p = xenbus_read(dev->nodename, "read-only", NULL);
+       if (!IS_ERR(p)) {
+               be->readonly = 1;
+               kfree(p);
+       }
+
+       if (be->blkif == NULL) {
+               /* Front end dir is a number, which is used as the handle. */
+               p = strrchr(be->frontpath, '/') + 1;
+               handle = simple_strtoul(p, NULL, 0);
+
+               be->blkif = alloc_blkif(be->frontend_id);
+               if (IS_ERR(be->blkif)) {
+                       err = PTR_ERR(be->blkif);
+                       be->blkif = NULL;
+                       xenbus_dev_error(dev, err, "creating block interface");
+                       return;
+               }
+
+               err = vbd_create(be->blkif, handle, be->pdev, be->readonly);
+               if (err) {
+                       xenbus_dev_error(dev, err, "creating vbd structure");
+                       return;
+               }
+
+               /* Pass in NULL node to skip exist test. */
+               frontend_changed(&be->watch, NULL);
+       }
+}
+
+static int blkback_probe(struct xenbus_device *dev,
+                        const struct xenbus_device_id *id)
+{
+       struct backend_info *be;
+       char *frontend;
+       int err;
+
+       be = kmalloc(sizeof(*be), GFP_KERNEL);
+       if (!be) {
+               xenbus_dev_error(dev, -ENOMEM, "allocating backend structure");
+               return -ENOMEM;
+       }
+       memset(be, 0, sizeof(*be));
+
+       frontend = NULL;
+       err = xenbus_gather(dev->nodename,
+                           "frontend-id", "%li", &be->frontend_id,
+                           "frontend", NULL, &frontend,
+                           NULL);
+       if (XENBUS_EXIST_ERR(err))
+               goto free_be;
+       if (err < 0) {
+               xenbus_dev_error(dev, err,
+                                "reading %s/frontend or frontend-id",
+                                dev->nodename);
+               goto free_be;
+       }
+       if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) {
+               /* If we can't get a frontend path and a frontend-id,
+                * then our bus-id is no longer valid and we need to
+                * destroy the backend device.
+                */
+               err = -ENOENT;
+               goto free_be;
+       }
+
+       be->dev = dev;
+       be->backend_watch.node = dev->nodename;
+       be->backend_watch.callback = backend_changed;
+       err = register_xenbus_watch(&be->backend_watch);
+       if (err) {
+               be->backend_watch.node = NULL;
+               xenbus_dev_error(dev, err, "adding backend watch on %s",
+                                dev->nodename);
+               goto free_be;
+       }
+
+       be->frontpath = frontend;
+       be->watch.node = be->frontpath;
+       be->watch.callback = frontend_changed;
+       err = register_xenbus_watch(&be->watch);
+       if (err) {
+               be->watch.node = NULL;
+               xenbus_dev_error(dev, err,
+                                "adding frontend watch on %s",
+                                be->frontpath);
+               goto free_be;
+       }
+
+       dev->data = be;
+
+       backend_changed(&be->backend_watch, dev->nodename);
+       return 0;
+
+ free_be:
+       if (be->backend_watch.node)
+               unregister_xenbus_watch(&be->backend_watch);
+       if (frontend)
+               kfree(frontend);
+       kfree(be);
+       return err;
+}
+
+static struct xenbus_device_id blkback_ids[] = {
+       { "vbd" },
+       { "" }
+};
+
+static struct xenbus_driver blkback = {
+       .name = "vbd",
+       .owner = THIS_MODULE,
+       .ids = blkback_ids,
+       .probe = blkback_probe,
+       .remove = blkback_remove,
+};
+
+void blkif_xenbus_init(void)
+{
+       xenbus_register_backend(&blkback);
+}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,257 @@
+/*  Xenbus code for netif backend
+    Copyright (C) 2005 Rusty Russell <rusty@xxxxxxxxxxxxxxx>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <stdarg.h>
+#include <linux/module.h>
+#include <asm-xen/xenbus.h>
+#include "common.h"
+
+struct backend_info
+{
+       struct xenbus_device *dev;
+
+       /* our communications channel */
+       netif_t *netif;
+
+       long int frontend_id;
+#if 0
+       long int pdev;
+       long int readonly;
+#endif
+
+       /* watch back end for changes */
+       struct xenbus_watch backend_watch;
+
+       /* watch front end for changes */
+       struct xenbus_watch watch;
+       char *frontpath;
+};
+
+static int netback_remove(struct xenbus_device *dev)
+{
+       struct backend_info *be = dev->data;
+
+       if (be->watch.node)
+               unregister_xenbus_watch(&be->watch);
+       unregister_xenbus_watch(&be->backend_watch);
+       if (be->netif)
+               netif_disconnect(be->netif);
+       if (be->frontpath)
+               kfree(be->frontpath);
+       kfree(be);
+       return 0;
+}
+
+/* Front end tells us frame. */
+static void frontend_changed(struct xenbus_watch *watch, const char *node)
+{
+       unsigned long tx_ring_ref, rx_ring_ref;
+       unsigned int evtchn;
+       int err;
+       struct backend_info *be
+               = container_of(watch, struct backend_info, watch);
+       char *mac, *e, *s;
+       int i;
+
+       /* If other end is gone, delete ourself. */
+       if (node && !xenbus_exists(be->frontpath, "")) {
+               xenbus_rm(be->dev->nodename, "");
+               device_unregister(&be->dev->dev);
+               return;
+       }
+       if (be->netif == NULL || be->netif->status == CONNECTED)
+               return;
+
+       mac = xenbus_read(be->frontpath, "mac", NULL);
+       if (IS_ERR(mac)) {
+               err = PTR_ERR(mac);
+               xenbus_dev_error(be->dev, err, "reading %s/mac",
+                                be->dev->nodename);
+               return;
+       }
+       s = mac;
+       for (i = 0; i < ETH_ALEN; i++) {
+               be->netif->fe_dev_addr[i] = simple_strtoul(s, &e, 16);
+               if (s == e || (e[0] != ':' && e[0] != 0)) {
+                       kfree(mac);
+                       err = -ENOENT;
+                       xenbus_dev_error(be->dev, err, "parsing %s/mac",
+                                        be->dev->nodename);
+                       return;
+               }
+               s = &e[1];
+       }
+       kfree(mac);
+
+       err = xenbus_gather(be->frontpath, "tx-ring-ref", "%lu", &tx_ring_ref,
+                           "rx-ring-ref", "%lu", &rx_ring_ref,
+                           "event-channel", "%u", &evtchn, NULL);
+       if (err) {
+               xenbus_dev_error(be->dev, err,
+                                "reading %s/ring-ref and event-channel",
+                                be->frontpath);
+               return;
+       }
+
+       /* Map the shared frame, irq etc. */
+       err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
+       if (err) {
+               xenbus_dev_error(be->dev, err,
+                                "mapping shared-frames %lu/%lu port %u",
+                                tx_ring_ref, rx_ring_ref, evtchn);
+               return;
+       }
+
+       xenbus_dev_ok(be->dev);
+
+       return;
+}
+
+/* 
+   Setup supplies physical device.  
+   We provide event channel and device details to front end.
+   Frontend supplies shared frame and event channel.
+ */
+static void backend_changed(struct xenbus_watch *watch, const char *node)
+{
+       int err;
+       long int handle;
+       struct backend_info *be
+               = container_of(watch, struct backend_info, backend_watch);
+       struct xenbus_device *dev = be->dev;
+       u8 be_mac[ETH_ALEN] = { 0, 0, 0, 0, 0, 0 };
+
+       err = xenbus_scanf(dev->nodename, "handle", "%li", &handle);
+       if (XENBUS_EXIST_ERR(err))
+               return;
+       if (err < 0) {
+               xenbus_dev_error(dev, err, "reading handle");
+               return;
+       }
+
+       if (be->netif == NULL) {
+               be->netif = alloc_netif(be->frontend_id, handle, be_mac);
+               if (IS_ERR(be->netif)) {
+                       err = PTR_ERR(be->netif);
+                       be->netif = NULL;
+                       xenbus_dev_error(dev, err, "creating interface");
+                       return;
+               }
+
+#if 0
+               err = vbd_create(be->netif, handle, be->pdev, be->readonly);
+               if (err) {
+                       xenbus_dev_error(dev, err, "creating vbd structure");
+                       return;
+               }
+#endif
+
+               /* Pass in NULL node to skip exist test. */
+               frontend_changed(&be->watch, NULL);
+       }
+}
+
+static int netback_probe(struct xenbus_device *dev,
+                        const struct xenbus_device_id *id)
+{
+       struct backend_info *be;
+       char *frontend;
+       int err;
+
+       be = kmalloc(sizeof(*be), GFP_KERNEL);
+       if (!be) {
+               xenbus_dev_error(dev, -ENOMEM, "allocating backend structure");
+               return -ENOMEM;
+       }
+       memset(be, 0, sizeof(*be));
+
+       frontend = NULL;
+       err = xenbus_gather(dev->nodename,
+                           "frontend-id", "%li", &be->frontend_id,
+                           "frontend", NULL, &frontend,
+                           NULL);
+       if (XENBUS_EXIST_ERR(err))
+               goto free_be;
+       if (err < 0) {
+               xenbus_dev_error(dev, err,
+                                "reading %s/frontend or frontend-id",
+                                dev->nodename);
+               goto free_be;
+       }
+       if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) {
+               /* If we can't get a frontend path and a frontend-id,
+                * then our bus-id is no longer valid and we need to
+                * destroy the backend device.
+                */
+               err = -ENOENT;
+               goto free_be;
+       }
+
+       be->dev = dev;
+       be->backend_watch.node = dev->nodename;
+       be->backend_watch.callback = backend_changed;
+       err = register_xenbus_watch(&be->backend_watch);
+       if (err) {
+               be->backend_watch.node = NULL;
+               xenbus_dev_error(dev, err, "adding backend watch on %s",
+                                dev->nodename);
+               goto free_be;
+       }
+
+       be->frontpath = frontend;
+       be->watch.node = be->frontpath;
+       be->watch.callback = frontend_changed;
+       err = register_xenbus_watch(&be->watch);
+       if (err) {
+               be->watch.node = NULL;
+               xenbus_dev_error(dev, err,
+                                "adding frontend watch on %s",
+                                be->frontpath);
+               goto free_be;
+       }
+
+       dev->data = be;
+
+       backend_changed(&be->backend_watch, dev->nodename);
+       return 0;
+
+ free_be:
+       if (be->backend_watch.node)
+               unregister_xenbus_watch(&be->backend_watch);
+       if (frontend)
+               kfree(frontend);
+       kfree(be);
+       return err;
+}
+
+static struct xenbus_device_id netback_ids[] = {
+       { "vif" },
+       { "" }
+};
+
+static struct xenbus_driver netback = {
+       .name = "vif",
+       .owner = THIS_MODULE,
+       .ids = netback_ids,
+       .probe = netback_probe,
+       .remove = netback_remove,
+};
+
+void netif_xenbus_init(void)
+{
+       xenbus_register_backend(&netback);
+}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/hw_irq.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hw_irq.h    Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,71 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/*
+ *     linux/include/asm/hw_irq.h
+ *
+ *     (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ *     moved some of the old arch/i386/kernel/irq.h to here. VY
+ *
+ *     IRQ/IPI changes taken from work by Thomas Radke
+ *     <tomsoft@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ */
+
+#include <linux/config.h>
+#include <linux/profile.h>
+#include <asm/atomic.h>
+#include <asm/irq.h>
+#include <asm/sections.h>
+
+/*
+ * Various low-level irq details needed by irq.c, process.c,
+ * time.c, io_apic.c and smp.c
+ *
+ * Interrupt entry/exit code at both C and assembly level
+ */
+
+extern u8 irq_vector[NR_IRQ_VECTORS];
+#define IO_APIC_VECTOR(irq)    (irq_vector[irq])
+#define AUTO_ASSIGN            -1
+
+extern void (*interrupt[NR_IRQS])(void);
+
+#ifdef CONFIG_SMP
+fastcall void reschedule_interrupt(void);
+fastcall void invalidate_interrupt(void);
+fastcall void call_function_interrupt(void);
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+fastcall void apic_timer_interrupt(void);
+fastcall void error_interrupt(void);
+fastcall void spurious_interrupt(void);
+fastcall void thermal_interrupt(struct pt_regs *);
+#define platform_legacy_irq(irq)       ((irq) < 16)
+#endif
+
+void disable_8259A_irq(unsigned int irq);
+void enable_8259A_irq(unsigned int irq);
+int i8259A_irq_pending(unsigned int irq);
+void make_8259A_irq(unsigned int irq);
+void init_8259A(int aeoi);
+void FASTCALL(send_IPI_self(int vector));
+void init_VISWS_APIC_irqs(void);
+void setup_IO_APIC(void);
+void disable_IO_APIC(void);
+void print_IO_APIC(void);
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+void send_IPI(int dest, int vector);
+void setup_ioapic_dest(void);
+
+extern unsigned long io_apic_irqs;
+
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+
+extern void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i);
+
+#endif /* _ASM_HW_IRQ_H */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/kmap_types.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/kmap_types.h        Thu Aug 
25 22:53:20 2005
@@ -0,0 +1,32 @@
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+#include <linux/config.h>
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+# define D(n) __KM_FENCE_##n ,
+#else
+# define D(n)
+#endif
+
+enum km_type {
+D(0)   KM_BOUNCE_READ,
+D(1)   KM_SKB_SUNRPC_DATA,
+D(2)   KM_SKB_DATA_SOFTIRQ,
+D(3)   KM_USER0,
+D(4)   KM_USER1,
+D(5)   KM_BIO_SRC_IRQ,
+D(6)   KM_BIO_DST_IRQ,
+D(7)   KM_PTE0,
+D(8)   KM_PTE1,
+D(9)   KM_IRQ0,
+D(10)  KM_IRQ1,
+D(11)  KM_SOFTIRQ0,
+D(12)  KM_SOFTIRQ1,
+D(13)  KM_SWIOTLB,
+D(14)  KM_TYPE_NR
+};
+
+#undef D
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/scatterlist.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/scatterlist.h       Thu Aug 
25 22:53:20 2005
@@ -0,0 +1,22 @@
+#ifndef _I386_SCATTERLIST_H
+#define _I386_SCATTERLIST_H
+
+struct scatterlist {
+    struct page                *page;
+    unsigned int       offset;
+    unsigned int       length;
+    dma_addr_t         dma_address;
+    unsigned int       dma_length;
+};
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)     ((sg)->dma_address)
+#define sg_dma_len(sg)         ((sg)->dma_length)
+
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+#endif /* !(_I386_SCATTERLIST_H) */
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/swiotlb.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/swiotlb.h   Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,42 @@
+#ifndef _ASM_SWIOTLB_H
+#define _ASM_SWIOTLB_H 1
+
+#include <linux/config.h>
+
+/* SWIOTLB interface */
+
+extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t 
size,
+                                     int dir);
+extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+                                 size_t size, int dir);
+extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
+                                        dma_addr_t dev_addr,
+                                        size_t size, int dir);
+extern void swiotlb_sync_single_for_device(struct device *hwdev,
+                                           dma_addr_t dev_addr,
+                                           size_t size, int dir);
+extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,
+                                    struct scatterlist *sg, int nelems,
+                                    int dir);
+extern void swiotlb_sync_sg_for_device(struct device *hwdev,
+                                       struct scatterlist *sg, int nelems,
+                                       int dir);
+extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
+                     int nents, int direction);
+extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+                        int nents, int direction);
+extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
+extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page,
+                                   unsigned long offset, size_t size,
+                                   enum dma_data_direction direction);
+extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
+                               size_t size, enum dma_data_direction direction);
+extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
+
+#ifdef CONFIG_SWIOTLB
+extern int swiotlb;
+#else
+#define swiotlb 0
+#endif
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hw_irq.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hw_irq.h  Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,138 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/*
+ *     linux/include/asm/hw_irq.h
+ *
+ *     (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ *     moved some of the old arch/i386/kernel/irq.h to here. VY
+ *
+ *     IRQ/IPI changes taken from work by Thomas Radke
+ *     <tomsoft@xxxxxxxxxxxxxxxxxxxxxxxxx>
+ *
+ *     hacked by Andi Kleen for x86-64.
+ * 
+ *  $Id: hw_irq.h,v 1.24 2001/09/14 20:55:03 vojtech Exp $
+ */
+
+#ifndef __ASSEMBLY__
+#include <linux/config.h>
+#include <asm/atomic.h>
+#include <asm/irq.h>
+#include <linux/profile.h>
+#include <linux/smp.h>
+
+struct hw_interrupt_type;
+#endif
+
+/*
+ * IDT vectors usable for external interrupt sources start
+ * at 0x20:
+ */
+#define FIRST_EXTERNAL_VECTOR  0x20
+
+#define IA32_SYSCALL_VECTOR    0x80
+
+
+/*
+ * Vectors 0x20-0x2f are used for ISA interrupts.
+ */
+
+/*
+ * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+ *
+ *  some of the following vectors are 'rare', they are merged
+ *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+ *  TLB, reschedule and local APIC vectors are performance-critical.
+ *
+ *  Vectors 0xf0-0xf9 are free (reserved for future Linux use).
+ */
+#ifndef CONFIG_XEN
+#define SPURIOUS_APIC_VECTOR   0xff
+#define ERROR_APIC_VECTOR      0xfe
+#define INVALIDATE_TLB_VECTOR  0xfd
+#define RESCHEDULE_VECTOR      0xfc
+#define TASK_MIGRATION_VECTOR  0xfb
+#define CALL_FUNCTION_VECTOR   0xfa
+#define KDB_VECTOR     0xf9
+
+#define THERMAL_APIC_VECTOR    0xf0
+#endif
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR     0xef
+
+/*
+ * First APIC vector available to drivers: (vectors 0x30-0xee)
+ * we start at 0x31 to spread out vectors evenly between priority
+ * levels. (0x80 is the syscall vector)
+ */
+#define FIRST_DEVICE_VECTOR    0x31
+#define FIRST_SYSTEM_VECTOR    0xef   /* duplicated in irq.h */
+
+
+#ifndef __ASSEMBLY__
+extern u8 irq_vector[NR_IRQ_VECTORS];
+#define IO_APIC_VECTOR(irq)    (irq_vector[irq])
+#define AUTO_ASSIGN            -1
+
+/*
+ * Various low-level irq details needed by irq.c, process.c,
+ * time.c, io_apic.c and smp.c
+ *
+ * Interrupt entry/exit code at both C and assembly level
+ */
+
+extern void disable_8259A_irq(unsigned int irq);
+extern void enable_8259A_irq(unsigned int irq);
+extern int i8259A_irq_pending(unsigned int irq);
+extern void make_8259A_irq(unsigned int irq);
+extern void init_8259A(int aeoi);
+extern void FASTCALL(send_IPI_self(int vector));
+extern void init_VISWS_APIC_irqs(void);
+extern void setup_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void print_IO_APIC(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+extern void send_IPI(int dest, int vector);
+extern void setup_ioapic_dest(void);
+
+extern unsigned long io_apic_irqs;
+
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+#include <asm/ptrace.h>
+
+#define IRQ_NAME2(nr) nr##_interrupt(void)
+#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+/*
+ *     SMP has a few special interrupts for IPI messages
+ */
+
+#define BUILD_IRQ(nr) \
+asmlinkage void IRQ_NAME(nr); \
+__asm__( \
+"\n.p2align\n" \
+"IRQ" #nr "_interrupt:\n\t" \
+       "push $" #nr "-256 ; " \
+       "jmp common_interrupt");
+
+extern void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i);
+
+#define platform_legacy_irq(irq)       ((irq) < 16)
+
+#endif
+
+#endif /* _ASM_HW_IRQ_H */
diff -r 5f1ed597f107 -r 8799d14bef77 patches/linux-2.6.12/patch-2.6.12.5
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/patches/linux-2.6.12/patch-2.6.12.5       Thu Aug 25 22:53:20 2005
@@ -0,0 +1,1614 @@
+diff --git a/Makefile b/Makefile
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 12
+-EXTRAVERSION =
++EXTRAVERSION = .5
+ NAME=Woozy Numbat
+ 
+ # *DOCUMENTATION*
+@@ -1149,7 +1149,7 @@ endif # KBUILD_EXTMOD
+ #(which is the most common case IMHO) to avoid unneeded clutter in the big 
tags file.
+ #Adding $(srctree) adds about 20M on i386 to the size of the output file!
+ 
+-ifeq ($(KBUILD_OUTPUT),)
++ifeq ($(src),$(obj))
+ __srctree =
+ else
+ __srctree = $(srctree)/
+diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c 
b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
++++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+@@ -44,7 +44,7 @@
+ 
+ #define PFX "powernow-k8: "
+ #define BFX PFX "BIOS error: "
+-#define VERSION "version 1.40.2"
++#define VERSION "version 1.40.4"
+ #include "powernow-k8.h"
+ 
+ /* serialize freq changes  */
+@@ -978,7 +978,7 @@ static int __init powernowk8_cpu_init(st
+ {
+       struct powernow_k8_data *data;
+       cpumask_t oldmask = CPU_MASK_ALL;
+-      int rc;
++      int rc, i;
+ 
+       if (!check_supported_cpu(pol->cpu))
+               return -ENODEV;
+@@ -1064,7 +1064,9 @@ static int __init powernowk8_cpu_init(st
+       printk("cpu_init done, current fid 0x%x, vid 0x%x\n",
+              data->currfid, data->currvid);
+ 
+-      powernow_data[pol->cpu] = data;
++      for_each_cpu_mask(i, cpu_core_map[pol->cpu]) {
++              powernow_data[i] = data;
++      }
+ 
+       return 0;
+ 
+diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
+--- a/arch/i386/kernel/process.c
++++ b/arch/i386/kernel/process.c
+@@ -827,6 +827,8 @@ asmlinkage int sys_get_thread_area(struc
+       if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+               return -EINVAL;
+ 
++      memset(&info, 0, sizeof(info));
++
+       desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+ 
+       info.entry_number = idx;
+diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
+--- a/arch/ia64/kernel/ptrace.c
++++ b/arch/ia64/kernel/ptrace.c
+@@ -945,6 +945,13 @@ access_uarea (struct task_struct *child,
+                               *data = (pt->cr_ipsr & IPSR_MASK);
+                       return 0;
+ 
++                    case PT_AR_RSC:
++                      if (write_access)
++                              pt->ar_rsc = *data | (3 << 2); /* force PL3 */
++                      else
++                              *data = pt->ar_rsc;
++                      return 0;
++
+                     case PT_AR_RNAT:
+                       urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
+                       rnat_addr = (long) ia64_rse_rnat_addr((long *)
+@@ -996,9 +1003,6 @@ access_uarea (struct task_struct *child,
+                     case PT_AR_BSPSTORE:
+                       ptr = pt_reg_addr(pt, ar_bspstore);
+                       break;
+-                    case PT_AR_RSC:
+-                      ptr = pt_reg_addr(pt, ar_rsc);
+-                      break;
+                     case PT_AR_UNAT:
+                       ptr = pt_reg_addr(pt, ar_unat);
+                       break;
+@@ -1234,7 +1238,7 @@ ptrace_getregs (struct task_struct *chil
+ static long
+ ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user 
*ppr)
+ {
+-      unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val = 0;
++      unsigned long psr, rsc, ec, lc, rnat, bsp, cfm, nat_bits, val = 0;
+       struct unw_frame_info info;
+       struct switch_stack *sw;
+       struct ia64_fpreg fpval;
+@@ -1267,7 +1271,7 @@ ptrace_setregs (struct task_struct *chil
+       /* app regs */
+ 
+       retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]);
+-      retval |= __get_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]);
++      retval |= __get_user(rsc, &ppr->ar[PT_AUR_RSC]);
+       retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]);
+       retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]);
+       retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
+@@ -1365,6 +1369,7 @@ ptrace_setregs (struct task_struct *chil
+       retval |= __get_user(nat_bits, &ppr->nat);
+ 
+       retval |= access_uarea(child, PT_CR_IPSR, &psr, 1);
++      retval |= access_uarea(child, PT_AR_RSC, &rsc, 1);
+       retval |= access_uarea(child, PT_AR_EC, &ec, 1);
+       retval |= access_uarea(child, PT_AR_LC, &lc, 1);
+       retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1);
+diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
+--- a/arch/ia64/kernel/signal.c
++++ b/arch/ia64/kernel/signal.c
+@@ -94,7 +94,7 @@ sys_sigaltstack (const stack_t __user *u
+ static long
+ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
+ {
+-      unsigned long ip, flags, nat, um, cfm;
++      unsigned long ip, flags, nat, um, cfm, rsc;
+       long err;
+ 
+       /* Always make any pending restarted system calls return -EINTR */
+@@ -106,7 +106,7 @@ restore_sigcontext (struct sigcontext __
+       err |= __get_user(ip, &sc->sc_ip);                      /* instruction 
pointer */
+       err |= __get_user(cfm, &sc->sc_cfm);
+       err |= __get_user(um, &sc->sc_um);                      /* user mask */
+-      err |= __get_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
++      err |= __get_user(rsc, &sc->sc_ar_rsc);
+       err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
+       err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
+       err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+@@ -119,6 +119,7 @@ restore_sigcontext (struct sigcontext __
+       err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8);       /* r15 
*/
+ 
+       scr->pt.cr_ifs = cfm | (1UL << 63);
++      scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */
+ 
+       /* establish new instruction pointer: */
+       scr->pt.cr_iip = ip & ~0x3UL;
+diff --git a/arch/ppc/kernel/time.c b/arch/ppc/kernel/time.c
+--- a/arch/ppc/kernel/time.c
++++ b/arch/ppc/kernel/time.c
+@@ -89,6 +89,9 @@ unsigned long tb_to_ns_scale;
+ 
+ extern unsigned long wall_jiffies;
+ 
++/* used for timezone offset */
++static long timezone_offset;
++
+ DEFINE_SPINLOCK(rtc_lock);
+ 
+ EXPORT_SYMBOL(rtc_lock);
+@@ -170,7 +173,7 @@ void timer_interrupt(struct pt_regs * re
+                    xtime.tv_sec - last_rtc_update >= 659 &&
+                    abs((xtime.tv_nsec / 1000) - (1000000-1000000/HZ)) < 
500000/HZ &&
+                    jiffies - wall_jiffies == 1) {
+-                      if (ppc_md.set_rtc_time(xtime.tv_sec+1 + time_offset) 
== 0)
++                      if (ppc_md.set_rtc_time(xtime.tv_sec+1 + 
timezone_offset) == 0)
+                               last_rtc_update = xtime.tv_sec+1;
+                       else
+                               /* Try again one minute later */
+@@ -286,7 +289,7 @@ void __init time_init(void)
+       unsigned old_stamp, stamp, elapsed;
+ 
+         if (ppc_md.time_init != NULL)
+-                time_offset = ppc_md.time_init();
++                timezone_offset = ppc_md.time_init();
+ 
+       if (__USE_RTC()) {
+               /* 601 processor: dec counts down by 128 every 128ns */
+@@ -331,10 +334,10 @@ void __init time_init(void)
+       set_dec(tb_ticks_per_jiffy);
+ 
+       /* If platform provided a timezone (pmac), we correct the time */
+-        if (time_offset) {
+-              sys_tz.tz_minuteswest = -time_offset / 60;
++        if (timezone_offset) {
++              sys_tz.tz_minuteswest = -timezone_offset / 60;
+               sys_tz.tz_dsttime = 0;
+-              xtime.tv_sec -= time_offset;
++              xtime.tv_sec -= timezone_offset;
+         }
+         set_normalized_timespec(&wall_to_monotonic,
+                                 -xtime.tv_sec, -xtime.tv_nsec);
+diff --git a/arch/ppc64/boot/zlib.c b/arch/ppc64/boot/zlib.c
+--- a/arch/ppc64/boot/zlib.c
++++ b/arch/ppc64/boot/zlib.c
+@@ -1307,7 +1307,7 @@ local int huft_build(
+   {
+     *t = (inflate_huft *)Z_NULL;
+     *m = 0;
+-    return Z_OK;
++    return Z_DATA_ERROR;
+   }
+ 
+ 
+@@ -1351,6 +1351,7 @@ local int huft_build(
+     if ((j = *p++) != 0)
+       v[x[j]++] = i;
+   } while (++i < n);
++  n = x[g];                   /* set n to length of v */
+ 
+ 
+   /* Generate the Huffman codes and for each, make the table entries */
+diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
+--- a/arch/um/kernel/process.c
++++ b/arch/um/kernel/process.c
+@@ -130,7 +130,7 @@ int start_fork_tramp(void *thread_arg, u
+       return(arg.pid);
+ }
+ 
+-static int ptrace_child(void)
++static int ptrace_child(void *arg)
+ {
+       int ret;
+       int pid = os_getpid(), ppid = getppid();
+@@ -159,16 +159,20 @@ static int ptrace_child(void)
+       _exit(ret);
+ }
+ 
+-static int start_ptraced_child(void)
++static int start_ptraced_child(void **stack_out)
+ {
++      void *stack;
++      unsigned long sp;
+       int pid, n, status;
+       
+-      pid = fork();
+-      if(pid == 0)
+-              ptrace_child();
+-
++      stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
++                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
++      if(stack == MAP_FAILED)
++              panic("check_ptrace : mmap failed, errno = %d", errno);
++      sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *);
++      pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL);
+       if(pid < 0)
+-              panic("check_ptrace : fork failed, errno = %d", errno);
++              panic("check_ptrace : clone failed, errno = %d", errno);
+       CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
+       if(n < 0)
+               panic("check_ptrace : wait failed, errno = %d", errno);
+@@ -176,6 +180,7 @@ static int start_ptraced_child(void)
+               panic("check_ptrace : expected SIGSTOP, got status = %d",
+                     status);
+ 
++      *stack_out = stack;
+       return(pid);
+ }
+ 
+@@ -183,12 +188,12 @@ static int start_ptraced_child(void)
+  * just avoid using sysemu, not panic, but only if SYSEMU features are broken.
+  * So only for SYSEMU features we test mustpanic, while normal host features
+  * must work anyway!*/
+-static int stop_ptraced_child(int pid, int exitcode, int mustexit)
++static int stop_ptraced_child(int pid, void *stack, int exitcode, int 
mustpanic)
+ {
+       int status, n, ret = 0;
+ 
+       if(ptrace(PTRACE_CONT, pid, 0, 0) < 0)
+-              panic("stop_ptraced_child : ptrace failed, errno = %d", errno);
++              panic("check_ptrace : ptrace failed, errno = %d", errno);
+       CATCH_EINTR(n = waitpid(pid, &status, 0));
+       if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
+               int exit_with = WEXITSTATUS(status);
+@@ -199,13 +204,15 @@ static int stop_ptraced_child(int pid, i
+               printk("check_ptrace : child exited with exitcode %d, while "
+                     "expecting %d; status 0x%x", exit_with,
+                     exitcode, status);
+-              if (mustexit)
++              if (mustpanic)
+                       panic("\n");
+               else
+                       printk("\n");
+               ret = -1;
+       }
+ 
++      if(munmap(stack, PAGE_SIZE) < 0)
++              panic("check_ptrace : munmap failed, errno = %d", errno);
+       return ret;
+ }
+ 
+@@ -227,11 +234,12 @@ __uml_setup("nosysemu", nosysemu_cmd_par
+ 
+ static void __init check_sysemu(void)
+ {
++      void *stack;
+       int pid, syscall, n, status, count=0;
+ 
+       printk("Checking syscall emulation patch for ptrace...");
+       sysemu_supported = 0;
+-      pid = start_ptraced_child();
++      pid = start_ptraced_child(&stack);
+ 
+       if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0)
+               goto fail;
+@@ -249,7 +257,7 @@ static void __init check_sysemu(void)
+               panic("check_sysemu : failed to modify system "
+                     "call return, errno = %d", errno);
+ 
+-      if (stop_ptraced_child(pid, 0, 0) < 0)
++      if (stop_ptraced_child(pid, stack, 0, 0) < 0)
+               goto fail_stopped;
+ 
+       sysemu_supported = 1;
+@@ -257,7 +265,7 @@ static void __init check_sysemu(void)
+       set_using_sysemu(!force_sysemu_disabled);
+ 
+       printk("Checking advanced syscall emulation patch for ptrace...");
+-      pid = start_ptraced_child();
++      pid = start_ptraced_child(&stack);
+       while(1){
+               count++;
+               if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0)
+@@ -282,7 +290,7 @@ static void __init check_sysemu(void)
+                       break;
+               }
+       }
+-      if (stop_ptraced_child(pid, 0, 0) < 0)
++      if (stop_ptraced_child(pid, stack, 0, 0) < 0)
+               goto fail_stopped;
+ 
+       sysemu_supported = 2;
+@@ -293,17 +301,18 @@ static void __init check_sysemu(void)
+       return;
+ 
+ fail:
+-      stop_ptraced_child(pid, 1, 0);
++      stop_ptraced_child(pid, stack, 1, 0);
+ fail_stopped:
+       printk("missing\n");
+ }
+ 
+ void __init check_ptrace(void)
+ {
++      void *stack;
+       int pid, syscall, n, status;
+ 
+       printk("Checking that ptrace can change system call numbers...");
+-      pid = start_ptraced_child();
++      pid = start_ptraced_child(&stack);
+ 
+       if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) 
< 0)
+               panic("check_ptrace: PTRACE_SETOPTIONS failed, errno = %d", 
errno);
+@@ -330,7 +339,7 @@ void __init check_ptrace(void)
+                       break;
+               }
+       }
+-      stop_ptraced_child(pid, 0, 1);
++      stop_ptraced_child(pid, stack, 0, 1);
+       printk("OK\n");
+       check_sysemu();
+ }
+@@ -362,10 +371,11 @@ void forward_pending_sigio(int target)
+ static inline int check_skas3_ptrace_support(void)
+ {
+       struct ptrace_faultinfo fi;
++      void *stack;
+       int pid, n, ret = 1;
+ 
+       printf("Checking for the skas3 patch in the host...");
+-      pid = start_ptraced_child();
++      pid = start_ptraced_child(&stack);
+ 
+       n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi);
+       if (n < 0) {
+@@ -380,7 +390,7 @@ static inline int check_skas3_ptrace_sup
+       }
+ 
+       init_registers(pid);
+-      stop_ptraced_child(pid, 1, 1);
++      stop_ptraced_child(pid, stack, 1, 1);
+ 
+       return(ret);
+ }
+diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
+--- a/arch/x86_64/ia32/syscall32.c
++++ b/arch/x86_64/ia32/syscall32.c
+@@ -57,6 +57,7 @@ int syscall32_setup_pages(struct linux_b
+       int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
+       struct vm_area_struct *vma;
+       struct mm_struct *mm = current->mm;
++      int ret;
+ 
+       vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+       if (!vma)
+@@ -78,7 +79,11 @@ int syscall32_setup_pages(struct linux_b
+       vma->vm_mm = mm;
+ 
+       down_write(&mm->mmap_sem);
+-      insert_vm_struct(mm, vma);
++      if ((ret = insert_vm_struct(mm, vma))) {
++              up_write(&mm->mmap_sem);
++              kmem_cache_free(vm_area_cachep, vma);
++              return ret;
++      }
+       mm->total_vm += npages;
+       up_write(&mm->mmap_sem);
+       return 0;
+diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
+--- a/arch/x86_64/kernel/setup.c
++++ b/arch/x86_64/kernel/setup.c
+@@ -729,8 +729,6 @@ static void __init amd_detect_cmp(struct
+       int cpu = smp_processor_id();
+       int node = 0;
+       unsigned bits;
+-      if (c->x86_num_cores == 1)
+-              return;
+ 
+       bits = 0;
+       while ((1 << bits) < c->x86_num_cores)
+diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
+--- a/arch/x86_64/kernel/smp.c
++++ b/arch/x86_64/kernel/smp.c
+@@ -284,6 +284,71 @@ struct call_data_struct {
+ static struct call_data_struct * call_data;
+ 
+ /*
++ * this function sends a 'generic call function' IPI to one other CPU
++ * in the system.
++ */
++static void __smp_call_function_single (int cpu, void (*func) (void *info), 
void *info,
++                              int nonatomic, int wait)
++{
++      struct call_data_struct data;
++      int cpus = 1;
++
++      data.func = func;
++      data.info = info;
++      atomic_set(&data.started, 0);
++      data.wait = wait;
++      if (wait)
++              atomic_set(&data.finished, 0);
++
++      call_data = &data;
++      wmb();
++      /* Send a message to all other CPUs and wait for them to respond */
++      send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
++
++      /* Wait for response */
++      while (atomic_read(&data.started) != cpus)
++              cpu_relax();
++
++      if (!wait)
++              return;
++
++      while (atomic_read(&data.finished) != cpus)
++              cpu_relax();
++}
++
++/*
++ * Run a function on another CPU
++ *  <func>    The function to run. This must be fast and non-blocking.
++ *  <info>    An arbitrary pointer to pass to the function.
++ *  <nonatomic>       Currently unused.
++ *  <wait>    If true, wait until function has completed on other CPUs.
++ *  [RETURNS]   0 on success, else a negative status code.
++ *
++ * Does not return until the remote CPU is nearly ready to execute <func>
++ * or is or has executed.
++ */
++
++int smp_call_function_single (int cpu, void (*func) (void *info), void *info, 
++      int nonatomic, int wait)
++{
++      
++      int me = get_cpu(); /* prevent preemption and reschedule on another 
processor */
++
++      if (cpu == me) {
++              printk("%s: trying to call self\n", __func__);
++              put_cpu();
++              return -EBUSY;
++      }
++      spin_lock_bh(&call_lock);
++
++      __smp_call_function_single(cpu, func,info,nonatomic,wait);      
++
++      spin_unlock_bh(&call_lock);
++      put_cpu();
++      return 0;
++}
++
++/*
+  * this function sends a 'generic call function' IPI to all other CPUs
+  * in the system.
+  */
+diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
+--- a/arch/x86_64/kernel/smpboot.c
++++ b/arch/x86_64/kernel/smpboot.c
+@@ -202,9 +202,6 @@ static __cpuinit void sync_master(void *
+ {
+       unsigned long flags, i;
+ 
+-      if (smp_processor_id() != boot_cpu_id)
+-              return;
+-
+       go[MASTER] = 0;
+ 
+       local_irq_save(flags);
+@@ -253,7 +250,7 @@ get_delta(long *rt, long *master)
+       return tcenter - best_tm;
+ }
+ 
+-static __cpuinit void sync_tsc(void)
++static __cpuinit void sync_tsc(unsigned int master)
+ {
+       int i, done = 0;
+       long delta, adj, adjust_latency = 0;
+@@ -267,9 +264,17 @@ static __cpuinit void sync_tsc(void)
+       } t[NUM_ROUNDS] __cpuinitdata;
+ #endif
+ 
++      printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n",
++              smp_processor_id(), master);
++
+       go[MASTER] = 1;
+ 
+-      smp_call_function(sync_master, NULL, 1, 0);
++      /* It is dangerous to broadcast IPI as cpus are coming up,
++       * as they may not be ready to accept them.  So since
++       * we only need to send the ipi to the boot cpu direct
++       * the message, and avoid the race.
++       */
++      smp_call_function_single(master, sync_master, NULL, 1, 0);
+ 
+       while (go[MASTER])      /* wait for master to be ready */
+               no_cpu_relax();
+@@ -313,16 +318,14 @@ static __cpuinit void sync_tsc(void)
+       printk(KERN_INFO
+              "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
+              "maxerr %lu cycles)\n",
+-             smp_processor_id(), boot_cpu_id, delta, rt);
++             smp_processor_id(), master, delta, rt);
+ }
+ 
+ static void __cpuinit tsc_sync_wait(void)
+ {
+       if (notscsync || !cpu_has_tsc)
+               return;
+-      printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
+-                      boot_cpu_id);
+-      sync_tsc();
++      sync_tsc(0);
+ }
+ 
+ static __init int notscsync_setup(char *s)
+diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
+--- a/drivers/acpi/pci_irq.c
++++ b/drivers/acpi/pci_irq.c
+@@ -433,8 +433,9 @@ acpi_pci_irq_enable (
+               printk(KERN_WARNING PREFIX "PCI Interrupt %s[%c]: no GSI",
+                       pci_name(dev), ('A' + pin));
+               /* Interrupt Line values above 0xF are forbidden */
+-              if (dev->irq >= 0 && (dev->irq <= 0xF)) {
++              if (dev->irq > 0 && (dev->irq <= 0xF)) {
+                       printk(" - using IRQ %d\n", dev->irq);
++                      acpi_register_gsi(dev->irq, ACPI_LEVEL_SENSITIVE, 
ACPI_ACTIVE_LOW);
+                       return_VALUE(0);
+               }
+               else {
+diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
+--- a/drivers/char/rocket.c
++++ b/drivers/char/rocket.c
+@@ -277,7 +277,7 @@ static void rp_do_receive(struct r_port 
+               ToRecv = space;
+ 
+       if (ToRecv <= 0)
+-              return;
++              goto done;
+ 
+       /*
+        * if status indicates there are errored characters in the
+@@ -359,6 +359,7 @@ static void rp_do_receive(struct r_port 
+       }
+       /*  Push the data up to the tty layer */
+       ld->receive_buf(tty, tty->flip.char_buf, tty->flip.flag_buf, count);
++done:
+       tty_ldisc_deref(ld);
+ }
+ 
+diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
+--- a/drivers/char/tpm/tpm.c
++++ b/drivers/char/tpm/tpm.c
+@@ -32,12 +32,6 @@
+ 
+ #define       TPM_BUFSIZE                     2048
+ 
+-/* PCI configuration addresses */
+-#define       PCI_GEN_PMCON_1                 0xA0
+-#define       PCI_GEN1_DEC                    0xE4
+-#define       PCI_LPC_EN                      0xE6
+-#define       PCI_GEN2_DEC                    0xEC
+-
+ static LIST_HEAD(tpm_chip_list);
+ static DEFINE_SPINLOCK(driver_lock);
+ static int dev_mask[32];
+@@ -61,72 +55,6 @@ void tpm_time_expired(unsigned long ptr)
+ EXPORT_SYMBOL_GPL(tpm_time_expired);
+ 
+ /*
+- * Initialize the LPC bus and enable the TPM ports
+- */
+-int tpm_lpc_bus_init(struct pci_dev *pci_dev, u16 base)
+-{
+-      u32 lpcenable, tmp;
+-      int is_lpcm = 0;
+-
+-      switch (pci_dev->vendor) {
+-      case PCI_VENDOR_ID_INTEL:
+-              switch (pci_dev->device) {
+-              case PCI_DEVICE_ID_INTEL_82801CA_12:
+-              case PCI_DEVICE_ID_INTEL_82801DB_12:
+-                      is_lpcm = 1;
+-                      break;
+-              }
+-              /* init ICH (enable LPC) */
+-              pci_read_config_dword(pci_dev, PCI_GEN1_DEC, &lpcenable);
+-              lpcenable |= 0x20000000;
+-              pci_write_config_dword(pci_dev, PCI_GEN1_DEC, lpcenable);
+-
+-              if (is_lpcm) {
+-                      pci_read_config_dword(pci_dev, PCI_GEN1_DEC,
+-                                            &lpcenable);
+-                      if ((lpcenable & 0x20000000) == 0) {
+-                              dev_err(&pci_dev->dev,
+-                                      "cannot enable LPC\n");
+-                              return -ENODEV;
+-                      }
+-              }
+-
+-              /* initialize TPM registers */
+-              pci_read_config_dword(pci_dev, PCI_GEN2_DEC, &tmp);
+-
+-              if (!is_lpcm)
+-                      tmp = (tmp & 0xFFFF0000) | (base & 0xFFF0);
+-              else
+-                      tmp =
+-                          (tmp & 0xFFFF0000) | (base & 0xFFF0) |
+-                          0x00000001;
+-
+-              pci_write_config_dword(pci_dev, PCI_GEN2_DEC, tmp);
+-
+-              if (is_lpcm) {
+-                      pci_read_config_dword(pci_dev, PCI_GEN_PMCON_1,
+-                                            &tmp);
+-                      tmp |= 0x00000004;      /* enable CLKRUN */
+-                      pci_write_config_dword(pci_dev, PCI_GEN_PMCON_1,
+-                                             tmp);
+-              }
+-              tpm_write_index(0x0D, 0x55);    /* unlock 4F */
+-              tpm_write_index(0x0A, 0x00);    /* int disable */
+-              tpm_write_index(0x08, base);    /* base addr lo */
+-              tpm_write_index(0x09, (base & 0xFF00) >> 8);    /* base addr hi 
*/
+-              tpm_write_index(0x0D, 0xAA);    /* lock 4F */
+-              break;
+-      case PCI_VENDOR_ID_AMD:
+-              /* nothing yet */
+-              break;
+-      }
+-
+-      return 0;
+-}
+-
+-EXPORT_SYMBOL_GPL(tpm_lpc_bus_init);
+-
+-/*
+  * Internal kernel interface to transmit TPM commands
+  */
+ static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
+@@ -590,10 +518,6 @@ int tpm_pm_resume(struct pci_dev *pci_de
+       if (chip == NULL)
+               return -ENODEV;
+ 
+-      spin_lock(&driver_lock);
+-      tpm_lpc_bus_init(pci_dev, chip->vendor->base);
+-      spin_unlock(&driver_lock);
+-
+       return 0;
+ }
+ 
+diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
+--- a/drivers/char/tpm/tpm.h
++++ b/drivers/char/tpm/tpm.h
+@@ -79,8 +79,6 @@ static inline void tpm_write_index(int i
+ }
+ 
+ extern void tpm_time_expired(unsigned long);
+-extern int tpm_lpc_bus_init(struct pci_dev *, u16);
+-
+ extern int tpm_register_hardware(struct pci_dev *,
+                                struct tpm_vendor_specific *);
+ extern int tpm_open(struct inode *, struct file *);
+diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c
+--- a/drivers/char/tpm/tpm_atmel.c
++++ b/drivers/char/tpm/tpm_atmel.c
+@@ -22,7 +22,10 @@
+ #include "tpm.h"
+ 
+ /* Atmel definitions */
+-#define       TPM_ATML_BASE                   0x400
++enum tpm_atmel_addr {
++      TPM_ATMEL_BASE_ADDR_LO = 0x08,
++      TPM_ATMEL_BASE_ADDR_HI = 0x09
++};
+ 
+ /* write status bits */
+ #define       ATML_STATUS_ABORT               0x01
+@@ -127,7 +130,6 @@ static struct tpm_vendor_specific tpm_at
+       .cancel = tpm_atml_cancel,
+       .req_complete_mask = ATML_STATUS_BUSY | ATML_STATUS_DATA_AVAIL,
+       .req_complete_val = ATML_STATUS_DATA_AVAIL,
+-      .base = TPM_ATML_BASE,
+       .miscdev = { .fops = &atmel_ops, },
+ };
+ 
+@@ -136,14 +138,16 @@ static int __devinit tpm_atml_init(struc
+ {
+       u8 version[4];
+       int rc = 0;
++      int lo, hi;
+ 
+       if (pci_enable_device(pci_dev))
+               return -EIO;
+ 
+-      if (tpm_lpc_bus_init(pci_dev, TPM_ATML_BASE)) {
+-              rc = -ENODEV;
+-              goto out_err;
+-      }
++      lo = tpm_read_index( TPM_ATMEL_BASE_ADDR_LO );
++      hi = tpm_read_index( TPM_ATMEL_BASE_ADDR_HI );
++
++      tpm_atmel.base = (hi<<8)|lo;
++      dev_dbg( &pci_dev->dev, "Operating with base: 0x%x\n", tpm_atmel.base);
+ 
+       /* verify that it is an Atmel part */
+       if (tpm_read_index(4) != 'A' || tpm_read_index(5) != 'T'
+diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
+--- a/drivers/char/tpm/tpm_nsc.c
++++ b/drivers/char/tpm/tpm_nsc.c
+@@ -24,6 +24,10 @@
+ /* National definitions */
+ #define       TPM_NSC_BASE                    0x360
+ #define       TPM_NSC_IRQ                     0x07
++#define       TPM_NSC_BASE0_HI                0x60
++#define       TPM_NSC_BASE0_LO                0x61
++#define       TPM_NSC_BASE1_HI                0x62
++#define       TPM_NSC_BASE1_LO                0x63
+ 
+ #define       NSC_LDN_INDEX                   0x07
+ #define       NSC_SID_INDEX                   0x20
+@@ -234,7 +238,6 @@ static struct tpm_vendor_specific tpm_ns
+       .cancel = tpm_nsc_cancel,
+       .req_complete_mask = NSC_STATUS_OBF,
+       .req_complete_val = NSC_STATUS_OBF,
+-      .base = TPM_NSC_BASE,
+       .miscdev = { .fops = &nsc_ops, },
+       
+ };
+@@ -243,15 +246,16 @@ static int __devinit tpm_nsc_init(struct
+                                 const struct pci_device_id *pci_id)
+ {
+       int rc = 0;
++      int lo, hi;
++
++      hi = tpm_read_index(TPM_NSC_BASE0_HI);
++      lo = tpm_read_index(TPM_NSC_BASE0_LO);
++
++      tpm_nsc.base = (hi<<8) | lo;
+ 
+       if (pci_enable_device(pci_dev))
+               return -EIO;
+ 
+-      if (tpm_lpc_bus_init(pci_dev, TPM_NSC_BASE)) {
+-              rc = -ENODEV;
+-              goto out_err;
+-      }
+-
+       /* verify that it is a National part (SID) */
+       if (tpm_read_index(NSC_SID_INDEX) != 0xEF) {
+               rc = -ENODEV;
+diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
+--- a/drivers/char/tty_ioctl.c
++++ b/drivers/char/tty_ioctl.c
+@@ -476,11 +476,11 @@ int n_tty_ioctl(struct tty_struct * tty,
+                       ld = tty_ldisc_ref(tty);
+                       switch (arg) {
+                       case TCIFLUSH:
+-                              if (ld->flush_buffer)
++                              if (ld && ld->flush_buffer)
+                                       ld->flush_buffer(tty);
+                               break;
+                       case TCIOFLUSH:
+-                              if (ld->flush_buffer)
++                              if (ld && ld->flush_buffer)
+                                       ld->flush_buffer(tty);
+                               /* fall through */
+                       case TCOFLUSH:
+diff --git a/drivers/media/video/cx88/cx88-video.c 
b/drivers/media/video/cx88/cx88-video.c
+--- a/drivers/media/video/cx88/cx88-video.c
++++ b/drivers/media/video/cx88/cx88-video.c
+@@ -261,7 +261,7 @@ static struct cx88_ctrl cx8800_ctls[] = 
+                       .default_value = 0,
+                       .type          = V4L2_CTRL_TYPE_INTEGER,
+               },
+-              .off                   = 0,
++              .off                   = 128,
+               .reg                   = MO_HUE,
+               .mask                  = 0x00ff,
+               .shift                 = 0,
+diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
+--- a/drivers/net/e1000/e1000_main.c
++++ b/drivers/net/e1000/e1000_main.c
+@@ -2307,6 +2307,7 @@ e1000_xmit_frame(struct sk_buff *skb, st
+       tso = e1000_tso(adapter, skb);
+       if (tso < 0) {
+               dev_kfree_skb_any(skb);
++              spin_unlock_irqrestore(&adapter->tx_lock, flags);
+               return NETDEV_TX_OK;
+       }
+ 
+diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig
+--- a/drivers/net/hamradio/Kconfig
++++ b/drivers/net/hamradio/Kconfig
+@@ -17,7 +17,7 @@ config MKISS
+ 
+ config 6PACK
+       tristate "Serial port 6PACK driver"
+-      depends on AX25 && BROKEN_ON_SMP
++      depends on AX25
+       ---help---
+         6pack is a transmission protocol for the data exchange between your
+         PC and your TNC (the Terminal Node Controller acts as a kind of
+diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c
+--- a/drivers/net/shaper.c
++++ b/drivers/net/shaper.c
+@@ -135,10 +135,8 @@ static int shaper_start_xmit(struct sk_b
+ {
+       struct shaper *shaper = dev->priv;
+       struct sk_buff *ptr;
+-   
+-      if (down_trylock(&shaper->sem))
+-              return -1;
+ 
++      spin_lock(&shaper->lock);
+       ptr=shaper->sendq.prev;
+       
+       /*
+@@ -232,7 +230,7 @@ static int shaper_start_xmit(struct sk_b
+                 shaper->stats.collisions++;
+       }
+       shaper_kick(shaper);
+-      up(&shaper->sem);
++      spin_unlock(&shaper->lock);
+       return 0;
+ }
+ 
+@@ -271,11 +269,9 @@ static void shaper_timer(unsigned long d
+ {
+       struct shaper *shaper = (struct shaper *)data;
+ 
+-      if (!down_trylock(&shaper->sem)) {
+-              shaper_kick(shaper);
+-              up(&shaper->sem);
+-      } else
+-              mod_timer(&shaper->timer, jiffies);
++      spin_lock(&shaper->lock);
++      shaper_kick(shaper);
++      spin_unlock(&shaper->lock);
+ }
+ 
+ /*
+@@ -332,21 +328,6 @@ static void shaper_kick(struct shaper *s
+ 
+ 
+ /*
+- *    Flush the shaper queues on a closedown
+- */
+- 
+-static void shaper_flush(struct shaper *shaper)
+-{
+-      struct sk_buff *skb;
+-
+-      down(&shaper->sem);
+-      while((skb=skb_dequeue(&shaper->sendq))!=NULL)
+-              dev_kfree_skb(skb);
+-      shaper_kick(shaper);
+-      up(&shaper->sem);
+-}
+-
+-/*
+  *    Bring the interface up. We just disallow this until a 
+  *    bind.
+  */
+@@ -375,7 +356,15 @@ static int shaper_open(struct net_device
+ static int shaper_close(struct net_device *dev)
+ {
+       struct shaper *shaper=dev->priv;
+-      shaper_flush(shaper);
++      struct sk_buff *skb;
++
++      while ((skb = skb_dequeue(&shaper->sendq)) != NULL)
++              dev_kfree_skb(skb);
++
++      spin_lock_bh(&shaper->lock);
++      shaper_kick(shaper);
++      spin_unlock_bh(&shaper->lock);
++
+       del_timer_sync(&shaper->timer);
+       return 0;
+ }
+@@ -576,6 +565,7 @@ static void shaper_init_priv(struct net_
+       init_timer(&sh->timer);
+       sh->timer.function=shaper_timer;
+       sh->timer.data=(unsigned long)sh;
++      spin_lock_init(&sh->lock);
+ }
+ 
+ /*
+diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -396,7 +396,7 @@ int pci_register_driver(struct pci_drive
+       /* FIXME, once all of the existing PCI drivers have been fixed to set
+        * the pci shutdown function, this test can go away. */
+       if (!drv->driver.shutdown)
+-              drv->driver.shutdown = pci_device_shutdown,
++              drv->driver.shutdown = pci_device_shutdown;
+       drv->driver.owner = drv->owner;
+       drv->driver.kobj.ktype = &pci_driver_kobj_type;
+       pci_init_dynids(&drv->dynids);
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -1914,9 +1914,11 @@ qla2x00_reg_remote_port(scsi_qla_host_t 
+               rport_ids.roles |= FC_RPORT_ROLE_FCP_TARGET;
+ 
+       fcport->rport = rport = fc_remote_port_add(ha->host, 0, &rport_ids);
+-      if (!rport)
++      if (!rport) {
+               qla_printk(KERN_WARNING, ha,
+                   "Unable to allocate fc remote port!\n");
++              return;
++      }
+ 
+       if (rport->scsi_target_id != -1 && rport->scsi_target_id < MAX_TARGETS)
+               fcport->os_target_id = rport->scsi_target_id;
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -1150,7 +1150,7 @@ iospace_error_exit:
+  */
+ int qla2x00_probe_one(struct pci_dev *pdev, struct qla_board_info *brd_info)
+ {
+-      int     ret;
++      int     ret = -ENODEV;
+       device_reg_t __iomem *reg;
+       struct Scsi_Host *host;
+       scsi_qla_host_t *ha;
+@@ -1161,7 +1161,7 @@ int qla2x00_probe_one(struct pci_dev *pd
+       fc_port_t *fcport;
+ 
+       if (pci_enable_device(pdev))
+-              return -1;
++              goto probe_out;
+ 
+       host = scsi_host_alloc(&qla2x00_driver_template,
+           sizeof(scsi_qla_host_t));
+@@ -1183,9 +1183,8 @@ int qla2x00_probe_one(struct pci_dev *pd
+ 
+       /* Configure PCI I/O space */
+       ret = qla2x00_iospace_config(ha);
+-      if (ret != 0) {
+-              goto probe_alloc_failed;
+-      }
++      if (ret)
++              goto probe_failed;
+ 
+       /* Sanitize the information from PCI BIOS. */
+       host->irq = pdev->irq;
+@@ -1258,23 +1257,10 @@ int qla2x00_probe_one(struct pci_dev *pd
+               qla_printk(KERN_WARNING, ha,
+                   "[ERROR] Failed to allocate memory for adapter\n");
+ 
+-              goto probe_alloc_failed;
++              ret = -ENOMEM;
++              goto probe_failed;
+       }
+ 
+-      pci_set_drvdata(pdev, ha);
+-      host->this_id = 255;
+-      host->cmd_per_lun = 3;
+-      host->unique_id = ha->instance;
+-      host->max_cmd_len = MAX_CMDSZ;
+-      host->max_channel = ha->ports - 1;
+-      host->max_id = ha->max_targets;
+-      host->max_lun = ha->max_luns;
+-      host->transportt = qla2xxx_transport_template;
+-      if (scsi_add_host(host, &pdev->dev))
+-              goto probe_alloc_failed;
+-
+-      qla2x00_alloc_sysfs_attr(ha);
+-
+       if (qla2x00_initialize_adapter(ha) &&
+           !(ha->device_flags & DFLG_NO_CABLE)) {
+ 
+@@ -1285,11 +1271,10 @@ int qla2x00_probe_one(struct pci_dev *pd
+                   "Adapter flags %x.\n",
+                   ha->host_no, ha->device_flags));
+ 
++              ret = -ENODEV;
+               goto probe_failed;
+       }
+ 
+-      qla2x00_init_host_attr(ha);
+-
+       /*
+        * Startup the kernel thread for this host adapter
+        */
+@@ -1299,17 +1284,26 @@ int qla2x00_probe_one(struct pci_dev *pd
+               qla_printk(KERN_WARNING, ha,
+                   "Unable to start DPC thread!\n");
+ 
++              ret = -ENODEV;
+               goto probe_failed;
+       }
+       wait_for_completion(&ha->dpc_inited);
+ 
++      host->this_id = 255;
++      host->cmd_per_lun = 3;
++      host->unique_id = ha->instance;
++      host->max_cmd_len = MAX_CMDSZ;
++      host->max_channel = ha->ports - 1;
++      host->max_lun = MAX_LUNS;
++      host->transportt = qla2xxx_transport_template;
++
+       if (IS_QLA2100(ha) || IS_QLA2200(ha))
+               ret = request_irq(host->irq, qla2100_intr_handler,
+                   SA_INTERRUPT|SA_SHIRQ, ha->brd_info->drv_name, ha);
+       else
+               ret = request_irq(host->irq, qla2300_intr_handler,
+                   SA_INTERRUPT|SA_SHIRQ, ha->brd_info->drv_name, ha);
+-      if (ret != 0) {
++      if (ret) {
+               qla_printk(KERN_WARNING, ha,
+                   "Failed to reserve interrupt %d already in use.\n",
+                   host->irq);
+@@ -1363,9 +1357,18 @@ int qla2x00_probe_one(struct pci_dev *pd
+               msleep(10);
+       }
+ 
++      pci_set_drvdata(pdev, ha);
+       ha->flags.init_done = 1;
+       num_hosts++;
+ 
++      ret = scsi_add_host(host, &pdev->dev);
++      if (ret)
++              goto probe_failed;
++
++      qla2x00_alloc_sysfs_attr(ha);
++
++      qla2x00_init_host_attr(ha);
++
+       qla_printk(KERN_INFO, ha, "\n"
+           " QLogic Fibre Channel HBA Driver: %s\n"
+           "  QLogic %s - %s\n"
+@@ -1384,9 +1387,6 @@ int qla2x00_probe_one(struct pci_dev *pd
+ probe_failed:
+       fc_remove_host(ha->host);
+ 
+-      scsi_remove_host(host);
+-
+-probe_alloc_failed:
+       qla2x00_free_device(ha);
+ 
+       scsi_host_put(host);
+@@ -1394,7 +1394,8 @@ probe_alloc_failed:
+ probe_disable_device:
+       pci_disable_device(pdev);
+ 
+-      return -1;
++probe_out:
++      return ret;
+ }
+ EXPORT_SYMBOL_GPL(qla2x00_probe_one);
+ 
+diff --git a/fs/bio.c b/fs/bio.c
+--- a/fs/bio.c
++++ b/fs/bio.c
+@@ -261,6 +261,7 @@ inline void __bio_clone(struct bio *bio,
+        */
+       bio->bi_vcnt = bio_src->bi_vcnt;
+       bio->bi_size = bio_src->bi_size;
++      bio->bi_idx = bio_src->bi_idx;
+       bio_phys_segments(q, bio);
+       bio_hw_segments(q, bio);
+ }
+diff --git a/fs/char_dev.c b/fs/char_dev.c
+--- a/fs/char_dev.c
++++ b/fs/char_dev.c
+@@ -139,7 +139,7 @@ __unregister_chrdev_region(unsigned majo
+       struct char_device_struct *cd = NULL, **cp;
+       int i = major_to_index(major);
+ 
+-      up(&chrdevs_lock);
++      down(&chrdevs_lock);
+       for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
+               if ((*cp)->major == major &&
+                   (*cp)->baseminor == baseminor &&
+diff --git a/fs/exec.c b/fs/exec.c
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -649,6 +649,7 @@ static inline int de_thread(struct task_
+       }
+       sig->group_exit_task = NULL;
+       sig->notify_count = 0;
++      sig->real_timer.data = (unsigned long)current;
+       spin_unlock_irq(lock);
+ 
+       /*
+diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
+--- a/fs/isofs/compress.c
++++ b/fs/isofs/compress.c
+@@ -129,8 +129,14 @@ static int zisofs_readpage(struct file *
+       cend = le32_to_cpu(*(__le32 *)(bh->b_data + (blockendptr & bufmask)));
+       brelse(bh);
+ 
++      if (cstart > cend)
++              goto eio;
++              
+       csize = cend-cstart;
+ 
++      if (csize > deflateBound(1UL << zisofs_block_shift))
++              goto eio;
++
+       /* Now page[] contains an array of pages, any of which can be NULL,
+          and the locks on which we hold.  We should now read the data and
+          release the pages.  If the pages are NULL the decompressed data
+diff --git a/include/asm-i386/string.h b/include/asm-i386/string.h
+--- a/include/asm-i386/string.h
++++ b/include/asm-i386/string.h
+@@ -116,7 +116,8 @@ __asm__ __volatile__(
+       "orb $1,%%al\n"
+       "3:"
+       :"=a" (__res), "=&S" (d0), "=&D" (d1)
+-                   :"1" (cs),"2" (ct));
++      :"1" (cs),"2" (ct)
++      :"memory");
+ return __res;
+ }
+ 
+@@ -138,8 +139,9 @@ __asm__ __volatile__(
+       "3:\tsbbl %%eax,%%eax\n\t"
+       "orb $1,%%al\n"
+       "4:"
+-                   :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+-                   :"1" (cs),"2" (ct),"3" (count));
++      :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
++      :"1" (cs),"2" (ct),"3" (count)
++      :"memory");
+ return __res;
+ }
+ 
+@@ -158,7 +160,9 @@ __asm__ __volatile__(
+       "movl $1,%1\n"
+       "2:\tmovl %1,%0\n\t"
+       "decl %0"
+-      :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c));
++      :"=a" (__res), "=&S" (d0)
++      :"1" (s),"0" (c)
++      :"memory");
+ return __res;
+ }
+ 
+@@ -175,7 +179,9 @@ __asm__ __volatile__(
+       "leal -1(%%esi),%0\n"
+       "2:\ttestb %%al,%%al\n\t"
+       "jne 1b"
+-      :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c));
++      :"=g" (__res), "=&S" (d0), "=&a" (d1)
++      :"0" (0),"1" (s),"2" (c)
++      :"memory");
+ return __res;
+ }
+ 
+@@ -189,7 +195,9 @@ __asm__ __volatile__(
+       "scasb\n\t"
+       "notl %0\n\t"
+       "decl %0"
+-      :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffffu));
++      :"=c" (__res), "=&D" (d0)
++      :"1" (s),"a" (0), "0" (0xffffffffu)
++      :"memory");
+ return __res;
+ }
+ 
+@@ -333,7 +341,9 @@ __asm__ __volatile__(
+       "je 1f\n\t"
+       "movl $1,%0\n"
+       "1:\tdecl %0"
+-      :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count));
++      :"=D" (__res), "=&c" (d0)
++      :"a" (c),"0" (cs),"1" (count)
++      :"memory");
+ return __res;
+ }
+ 
+@@ -369,7 +379,7 @@ __asm__ __volatile__(
+       "je 2f\n\t"
+       "stosb\n"
+       "2:"
+-      : "=&c" (d0), "=&D" (d1)
++      :"=&c" (d0), "=&D" (d1)
+       :"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
+       :"memory");
+ return (s);   
+@@ -392,7 +402,8 @@ __asm__ __volatile__(
+       "jne 1b\n"
+       "3:\tsubl %2,%0"
+       :"=a" (__res), "=&d" (d0)
+-      :"c" (s),"1" (count));
++      :"c" (s),"1" (count)
++      :"memory");
+ return __res;
+ }
+ /* end of additional stuff */
+@@ -473,7 +484,8 @@ static inline void * memscan(void * addr
+               "dec %%edi\n"
+               "1:"
+               : "=D" (addr), "=c" (size)
+-              : "0" (addr), "1" (size), "a" (c));
++              : "0" (addr), "1" (size), "a" (c)
++              : "memory");
+       return addr;
+ }
+ 
+diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
+--- a/include/asm-x86_64/smp.h
++++ b/include/asm-x86_64/smp.h
+@@ -46,6 +46,8 @@ extern int pic_mode;
+ extern int smp_num_siblings;
+ extern void smp_flush_tlb(void);
+ extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
++extern int smp_call_function_single (int cpuid, void (*func) (void *info), 
void *info,
++                                   int retry, int wait);
+ extern void smp_send_reschedule(int cpu);
+ extern void smp_invalidate_rcv(void);         /* Process an NMI */
+ extern void zap_low_mappings(void);
+diff --git a/include/linux/if_shaper.h b/include/linux/if_shaper.h
+--- a/include/linux/if_shaper.h
++++ b/include/linux/if_shaper.h
+@@ -23,7 +23,7 @@ struct shaper
+       __u32 shapeclock;
+       unsigned long recovery; /* Time we can next clock a packet out on
+                                  an empty queue */
+-      struct semaphore sem;
++      spinlock_t lock;
+         struct net_device_stats stats;
+       struct net_device *dev;
+       int  (*hard_start_xmit) (struct sk_buff *skb,
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1192,7 +1192,7 @@ static inline void *skb_header_pointer(c
+ {
+       int hlen = skb_headlen(skb);
+ 
+-      if (offset + len <= hlen)
++      if (hlen - offset >= len)
+               return skb->data + offset;
+ 
+       if (skb_copy_bits(skb, offset, buffer, len) < 0)
+diff --git a/include/linux/zlib.h b/include/linux/zlib.h
+--- a/include/linux/zlib.h
++++ b/include/linux/zlib.h
+@@ -506,6 +506,11 @@ extern int zlib_deflateReset (z_streamp 
+    stream state was inconsistent (such as zalloc or state being NULL).
+ */
+ 
++static inline unsigned long deflateBound(unsigned long s)
++{
++      return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
++}
++
+ extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
+ /*
+      Dynamically update the compression level and compression strategy.  The
+diff --git a/kernel/module.c b/kernel/module.c
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -249,13 +249,18 @@ static inline unsigned int block_size(in
+ /* Created by linker magic */
+ extern char __per_cpu_start[], __per_cpu_end[];
+ 
+-static void *percpu_modalloc(unsigned long size, unsigned long align)
++static void *percpu_modalloc(unsigned long size, unsigned long align,
++                           const char *name)
+ {
+       unsigned long extra;
+       unsigned int i;
+       void *ptr;
+ 
+-      BUG_ON(align > SMP_CACHE_BYTES);
++      if (align > SMP_CACHE_BYTES) {
++              printk(KERN_WARNING "%s: per-cpu alignment %li > %i\n",
++                     name, align, SMP_CACHE_BYTES);
++              align = SMP_CACHE_BYTES;
++      }
+ 
+       ptr = __per_cpu_start;
+       for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
+@@ -347,7 +352,8 @@ static int percpu_modinit(void)
+ }     
+ __initcall(percpu_modinit);
+ #else /* ... !CONFIG_SMP */
+-static inline void *percpu_modalloc(unsigned long size, unsigned long align)
++static inline void *percpu_modalloc(unsigned long size, unsigned long align,
++                                  const char *name)
+ {
+       return NULL;
+ }
+@@ -1554,7 +1560,8 @@ static struct module *load_module(void _
+       if (pcpuindex) {
+               /* We have a special allocation for this section. */
+               percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
+-                                       sechdrs[pcpuindex].sh_addralign);
++                                       sechdrs[pcpuindex].sh_addralign,
++                                       mod->name);
+               if (!percpu) {
+                       err = -ENOMEM;
+                       goto free_mod;
+diff --git a/lib/inflate.c b/lib/inflate.c
+--- a/lib/inflate.c
++++ b/lib/inflate.c
+@@ -326,7 +326,7 @@ DEBG("huft1 ");
+   {
+     *t = (struct huft *)NULL;
+     *m = 0;
+-    return 0;
++    return 2;
+   }
+ 
+ DEBG("huft2 ");
+@@ -374,6 +374,7 @@ DEBG("huft5 ");
+     if ((j = *p++) != 0)
+       v[x[j]++] = i;
+   } while (++i < n);
++  n = x[g];                   /* set n to length of v */
+ 
+ DEBG("h6 ");
+ 
+@@ -410,12 +411,13 @@ DEBG1("1 ");
+ DEBG1("2 ");
+           f -= a + 1;           /* deduct codes from patterns left */
+           xp = c + k;
+-          while (++j < z)       /* try smaller tables up to z bits */
+-          {
+-            if ((f <<= 1) <= *++xp)
+-              break;            /* enough codes to use up j bits */
+-            f -= *xp;           /* else deduct codes from patterns */
+-          }
++          if (j < z)
++            while (++j < z)       /* try smaller tables up to z bits */
++            {
++              if ((f <<= 1) <= *++xp)
++                break;            /* enough codes to use up j bits */
++              f -= *xp;           /* else deduct codes from patterns */
++            }
+         }
+ DEBG1("3 ");
+         z = 1 << j;             /* table entries for j-bit table */
+diff --git a/lib/zlib_inflate/inftrees.c b/lib/zlib_inflate/inftrees.c
+--- a/lib/zlib_inflate/inftrees.c
++++ b/lib/zlib_inflate/inftrees.c
+@@ -141,7 +141,7 @@ static int huft_build(
+   {
+     *t = NULL;
+     *m = 0;
+-    return Z_OK;
++    return Z_DATA_ERROR;
+   }
+ 
+ 
+diff --git a/mm/memory.c b/mm/memory.c
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1164,7 +1164,7 @@ int remap_pfn_range(struct vm_area_struc
+ {
+       pgd_t *pgd;
+       unsigned long next;
+-      unsigned long end = addr + size;
++      unsigned long end = addr + PAGE_ALIGN(size);
+       struct mm_struct *mm = vma->vm_mm;
+       int err;
+ 
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -409,7 +409,7 @@ asmlinkage long sys_set_mempolicy(int mo
+       struct mempolicy *new;
+       DECLARE_BITMAP(nodes, MAX_NUMNODES);
+ 
+-      if (mode > MPOL_MAX)
++      if (mode < 0 || mode > MPOL_MAX)
+               return -EINVAL;
+       err = get_nodes(nodes, nmask, maxnode, mode);
+       if (err)
+diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
+--- a/net/8021q/vlan.c
++++ b/net/8021q/vlan.c
+@@ -578,6 +578,14 @@ static int vlan_device_event(struct noti
+                       if (!vlandev)
+                               continue;
+ 
++                      if (netif_carrier_ok(dev)) {
++                              if (!netif_carrier_ok(vlandev))
++                                      netif_carrier_on(vlandev);
++                      } else {
++                              if (netif_carrier_ok(vlandev))
++                                      netif_carrier_off(vlandev);
++                      }
++
+                       if ((vlandev->state & VLAN_LINK_STATE_MASK) != flgs) {
+                               vlandev->state = (vlandev->state &~ 
VLAN_LINK_STATE_MASK) 
+                                       | flgs;
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -111,7 +111,6 @@ static int ip_dev_loopback_xmit(struct s
+ #ifdef CONFIG_NETFILTER_DEBUG
+       nf_debug_ip_loopback_xmit(newskb);
+ #endif
+-      nf_reset(newskb);
+       netif_rx(newskb);
+       return 0;
+ }
+@@ -196,8 +195,6 @@ static inline int ip_finish_output2(stru
+       nf_debug_ip_finish_output2(skb);
+ #endif /*CONFIG_NETFILTER_DEBUG*/
+ 
+-      nf_reset(skb);
+-
+       if (hh) {
+               int hh_alen;
+ 
+diff --git a/net/ipv4/netfilter/ip_conntrack_core.c 
b/net/ipv4/netfilter/ip_conntrack_core.c
+--- a/net/ipv4/netfilter/ip_conntrack_core.c
++++ b/net/ipv4/netfilter/ip_conntrack_core.c
+@@ -1124,6 +1124,9 @@ void ip_conntrack_cleanup(void)
+               schedule();
+               goto i_see_dead_people;
+       }
++      /* wait until all references to ip_conntrack_untracked are dropped */
++      while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
++              schedule();
+ 
+       kmem_cache_destroy(ip_conntrack_cachep);
+       kmem_cache_destroy(ip_conntrack_expect_cachep);
+diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c 
b/net/ipv4/netfilter/ip_conntrack_standalone.c
+--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
++++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
+@@ -432,6 +432,13 @@ static unsigned int ip_conntrack_defrag(
+                                       const struct net_device *out,
+                                       int (*okfn)(struct sk_buff *))
+ {
++#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
++      /* Previously seen (loopback)?  Ignore.  Do this before
++           fragment check. */
++      if ((*pskb)->nfct)
++              return NF_ACCEPT;
++#endif
++
+       /* Gather fragments. */
+       if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+               *pskb = ip_ct_gather_frags(*pskb,
+diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c 
b/net/ipv4/netfilter/ip_nat_proto_tcp.c
+--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
++++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
+@@ -40,7 +40,8 @@ tcp_unique_tuple(struct ip_conntrack_tup
+                enum ip_nat_manip_type maniptype,
+                const struct ip_conntrack *conntrack)
+ {
+-      static u_int16_t port, *portptr;
++      static u_int16_t port;
++      u_int16_t *portptr;
+       unsigned int range_size, min, i;
+ 
+       if (maniptype == IP_NAT_MANIP_SRC)
+diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c 
b/net/ipv4/netfilter/ip_nat_proto_udp.c
+--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
++++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
+@@ -41,7 +41,8 @@ udp_unique_tuple(struct ip_conntrack_tup
+                enum ip_nat_manip_type maniptype,
+                const struct ip_conntrack *conntrack)
+ {
+-      static u_int16_t port, *portptr;
++      static u_int16_t port;
++      u_int16_t *portptr;
+       unsigned int range_size, min, i;
+ 
+       if (maniptype == IP_NAT_MANIP_SRC)
+diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
+--- a/net/ipv6/netfilter/ip6_queue.c
++++ b/net/ipv6/netfilter/ip6_queue.c
+@@ -76,7 +76,9 @@ static DECLARE_MUTEX(ipqnl_sem);
+ static void
+ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+ {
++      local_bh_disable();
+       nf_reinject(entry->skb, entry->info, verdict);
++      local_bh_enable();
+       kfree(entry);
+ }
+ 
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -315,8 +315,8 @@ err:
+ static void netlink_remove(struct sock *sk)
+ {
+       netlink_table_grab();
+-      nl_table[sk->sk_protocol].hash.entries--;
+-      sk_del_node_init(sk);
++      if (sk_del_node_init(sk))
++              nl_table[sk->sk_protocol].hash.entries--;
+       if (nlk_sk(sk)->groups)
+               __sk_del_bind_node(sk);
+       netlink_table_ungrab();
+@@ -429,7 +429,12 @@ retry:
+       err = netlink_insert(sk, pid);
+       if (err == -EADDRINUSE)
+               goto retry;
+-      return 0;
++
++      /* If 2 threads race to autobind, that is fine.  */
++      if (err == -EBUSY)
++              err = 0;
++
++      return err;
+ }
+ 
+ static inline int netlink_capable(struct socket *sock, unsigned int flag) 
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -274,6 +274,9 @@ static int packet_rcv_spkt(struct sk_buf
+       dst_release(skb->dst);
+       skb->dst = NULL;
+ 
++      /* drop conntrack reference */
++      nf_reset(skb);
++
+       spkt = (struct sockaddr_pkt*)skb->cb;
+ 
+       skb_push(skb, skb->data-skb->mac.raw);
+@@ -517,6 +520,9 @@ static int packet_rcv(struct sk_buff *sk
+       dst_release(skb->dst);
+       skb->dst = NULL;
+ 
++      /* drop conntrack reference */
++      nf_reset(skb);
++
+       spin_lock(&sk->sk_receive_queue.lock);
+       po->stats.tp_packets++;
+       __skb_queue_tail(&sk->sk_receive_queue, skb);
+diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
+--- a/net/xfrm/xfrm_user.c
++++ b/net/xfrm/xfrm_user.c
+@@ -1180,6 +1180,9 @@ static struct xfrm_policy *xfrm_compile_
+       if (nr > XFRM_MAX_DEPTH)
+               return NULL;
+ 
++      if (p->dir > XFRM_POLICY_OUT)
++              return NULL;
++
+       xp = xfrm_policy_alloc(GFP_KERNEL);
+       if (xp == NULL) {
+               *dir = -ENOBUFS;
+diff --git a/security/keys/keyring.c b/security/keys/keyring.c
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -188,7 +188,11 @@ static void keyring_destroy(struct key *
+ 
+       if (keyring->description) {
+               write_lock(&keyring_name_lock);
+-              list_del(&keyring->type_data.link);
++
++              if (keyring->type_data.link.next != NULL &&
++                  !list_empty(&keyring->type_data.link))
++                      list_del(&keyring->type_data.link);
++
+               write_unlock(&keyring_name_lock);
+       }
+ 
+diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -641,7 +641,7 @@ long join_session_keyring(const char *na
+               keyring = keyring_alloc(name, tsk->uid, tsk->gid, 0, NULL);
+               if (IS_ERR(keyring)) {
+                       ret = PTR_ERR(keyring);
+-                      goto error;
++                      goto error2;
+               }
+       }
+       else if (IS_ERR(keyring)) {
diff -r 5f1ed597f107 -r 8799d14bef77 tools/blktap/parallax/Makefile
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/blktap/parallax/Makefile    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,64 @@
+XEN_ROOT = ../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+PARALLAX_INSTALL_DIR   = /usr/sbin
+
+INSTALL         = install
+INSTALL_PROG    = $(INSTALL) -m0755
+INSTALL_DIR     = $(INSTALL) -d -m0755
+
+INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC)
+
+LDFLAGS = -L.. -lpthread -lz -lblktap
+
+#PLX_SRCS := 
+PLX_SRCS := vdi.c 
+PLX_SRCS += radix.c 
+PLX_SRCS += snaplog.c
+PLX_SRCS += blockstore.c 
+PLX_SRCS += block-async.c
+PLX_SRCS += requests-async.c
+VDI_SRCS := $(PLX_SRCS)
+PLX_SRCS += parallax.c
+
+#VDI_TOOLS :=
+VDI_TOOLS := vdi_create
+VDI_TOOLS += vdi_list
+VDI_TOOLS += vdi_snap
+VDI_TOOLS += vdi_snap_list
+VDI_TOOLS += vdi_snap_delete
+VDI_TOOLS += vdi_fill
+VDI_TOOLS += vdi_tree
+VDI_TOOLS += vdi_validate
+
+CFLAGS   += -Wall
+CFLAGS   += -Werror
+CFLAGS   += -Wno-unused
+#CFLAGS   += -O3
+CFLAGS   += -g3
+CFLAGS   += -fno-strict-aliasing
+CFLAGS   += $(INCLUDES)
+CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
+# Get gcc to generate the dependencies for us.
+CFLAGS   += -Wp,-MD,.$(@F).d
+DEPS     = .*.d
+
+OBJS     = $(patsubst %.c,%.o,$(SRCS))
+IBINS    = parallax $(VDI_TOOLS)
+
+all: $(VDI_TOOLS) parallax blockstored
+
+install: all
+       $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR)
+
+clean:
+       rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest
+
+parallax: $(PLX_SRCS)
+       $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS)
+
+${VDI_TOOLS}: %: %.c $(VDI_SRCS)
+       $(CC) $(CFLAGS) -g3 -o $@ $@.c $(LDFLAGS) $(VDI_SRCS)
+
+.PHONY: TAGS clean install rpm
+-include $(DEPS)
\ No newline at end of file
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/Makefile
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/Makefile    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,39 @@
+
+XEN_ROOT=../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+DAEMON_INSTALL_DIR = /usr/sbin
+CLIENT_INSTALL_DIR = /usr/libexec/xen
+
+INSTALL         = install
+INSTALL_PROG    = $(INSTALL) -m0755
+INSTALL_DIR     = $(INSTALL) -d -m0755
+
+CC       = gcc
+CFLAGS   = -Wall -Werror -g3
+
+CFLAGS  += -I $(XEN_XCS)
+CFLAGS  += -I $(XEN_LIBXC)
+CFLAGS  += -I $(XEN_XENSTORE)
+
+BIN      = xenconsoled xenconsole
+
+all: $(BIN)
+
+clean:
+       $(RM) *.a *.so *.o *.rpm $(BIN)
+       $(RM) client/*.o daemon/*.o
+
+xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c))
+       $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_XENSTORE) \
+              -lxenctrl -lxenstore
+
+xenconsole: $(patsubst %.c,%.o,$(wildcard client/*.c))
+       $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_XENSTORE) \
+             -lxenctrl -lxenstore
+
+install: $(BIN)
+       $(INSTALL_DIR) -p $(DESTDIR)/$(DAEMON_INSTALL_DIR)
+       $(INSTALL_PROG) xenconsoled $(DESTDIR)/$(DAEMON_INSTALL_DIR)
+       $(INSTALL_DIR) -p $(DESTDIR)/$(CLIENT_INSTALL_DIR)
+       $(INSTALL_PROG) xenconsole $(DESTDIR)/$(CLIENT_INSTALL_DIR)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/client/main.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/client/main.c       Thu Aug 25 22:53:20 2005
@@ -0,0 +1,236 @@
+/*\
+ *  Copyright (C) International Business Machines  Corp., 2005
+ *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ *  Xen Console Daemon
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ * 
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+\*/
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <time.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <termios.h>
+#include <signal.h>
+#include <getopt.h>
+#include <sys/select.h>
+#include <err.h>
+#include <errno.h>
+#include <pty.h>
+
+#include "xenctrl.h"
+#include "xs.h"
+
+#define ESCAPE_CHARACTER 0x1d
+
+static volatile sig_atomic_t received_signal = 0;
+
+static void sighandler(int signum)
+{
+       received_signal = 1;
+}
+
+static bool write_sync(int fd, const void *data, size_t size)
+{
+       size_t offset = 0;
+       ssize_t len;
+
+       while (offset < size) {
+               len = write(fd, data + offset, size - offset);
+               if (len < 1) {
+                       return false;
+               }
+               offset += len;
+       }
+
+       return true;
+}
+
+static void usage(const char *program) {
+       printf("Usage: %s [OPTION] DOMID\n"
+              "Attaches to a virtual domain console\n"
+              "\n"
+              "  -h, --help       display this help and exit\n"
+              , program);
+}
+
+/* don't worry too much if setting terminal attributes fail */
+static void init_term(int fd, struct termios *old)
+{
+       struct termios new_term;
+
+       if (tcgetattr(fd, old) == -1) {
+               return;
+       }
+
+       new_term = *old;
+       cfmakeraw(&new_term);
+
+       tcsetattr(fd, TCSAFLUSH, &new_term);
+}
+
+static void restore_term(int fd, struct termios *old)
+{
+       tcsetattr(fd, TCSAFLUSH, old);
+}
+
+static int console_loop(int xc_handle, domid_t domid, int fd)
+{
+       int ret;
+
+       do {
+               fd_set fds;
+
+               FD_ZERO(&fds);
+               FD_SET(STDIN_FILENO, &fds);
+               FD_SET(fd, &fds);
+
+               ret = select(fd + 1, &fds, NULL, NULL, NULL);
+               if (ret == -1) {
+                       if (errno == EINTR || errno == EAGAIN) {
+                               continue;
+                       }
+                       return -1;
+               }
+
+               if (FD_ISSET(STDIN_FILENO, &fds)) {
+                       ssize_t len;
+                       char msg[60];
+
+                       len = read(STDIN_FILENO, msg, sizeof(msg));
+                       if (len == 1 && msg[0] == ESCAPE_CHARACTER) {
+                               return 0;
+                       } 
+
+                       if (len == 0 || len == -1) {
+                               if (len == -1 &&
+                                   (errno == EINTR || errno == EAGAIN)) {
+                                       continue;
+                               }
+                               return -1;
+                       }
+
+                       if (!write_sync(fd, msg, len)) {
+                               perror("write() failed");
+                               return -1;
+                       }
+               }
+
+               if (FD_ISSET(fd, &fds)) {
+                       ssize_t len;
+                       char msg[512];
+
+                       len = read(fd, msg, sizeof(msg));
+                       if (len == 0 || len == -1) {
+                               if (len == -1 &&
+                                   (errno == EINTR || errno == EAGAIN)) {
+                                       continue;
+                               }
+                               return -1;
+                       }
+
+                       if (!write_sync(STDOUT_FILENO, msg, len)) {
+                               perror("write() failed");
+                               return -1;
+                       }
+               }
+       } while (received_signal == 0);
+
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       struct termios attr;
+       int domid;
+       int xc_handle;
+       char *sopt = "h";
+       int ch;
+       int opt_ind=0;
+       struct option lopt[] = {
+               { "help",    0, 0, 'h' },
+               { 0 },
+
+       };
+       char *str_pty;
+       char path[1024];
+       int spty;
+       unsigned int len = 0;
+       struct xs_handle *xs;
+       char *end;
+
+       while((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
+               switch(ch) {
+               case 'h':
+                       usage(argv[0]);
+                       exit(0);
+                       break;
+               }
+       }
+       
+       if ((argc - optind) != 1) {
+               fprintf(stderr, "Invalid number of arguments\n");
+               fprintf(stderr, "Try `%s --help' for more information.\n", 
+                       argv[0]);
+               exit(EINVAL);
+       }
+       
+       domid = strtol(argv[optind], &end, 10);
+       if (end && *end) {
+               fprintf(stderr, "Invalid DOMID `%s'\n", argv[optind]);
+               fprintf(stderr, "Try `%s --help' for more information.\n",
+                       argv[0]);
+               exit(EINVAL);
+       }
+
+       xs = xs_daemon_open();
+       if (xs == NULL) {
+               err(errno, "Could not contact XenStore");
+       }
+
+       xc_handle = xc_interface_open();
+       if (xc_handle == -1) {
+               err(errno, "xc_interface_open()");
+       }
+       
+       signal(SIGTERM, sighandler);
+
+       snprintf(path, sizeof(path), "/console/%d/tty", domid);
+       str_pty = xs_read(xs, path, &len);
+       /* FIXME consoled currently does not assume domain-0 doesn't have a
+          console which is good when we break domain-0 up.  To keep us
+          user friendly, we'll bail out here since no data will ever show
+          up on domain-0. */
+       if (domid == 0 || str_pty == NULL) {
+               err(errno, "Could not read tty from store");
+       }
+       spty = open(str_pty, O_RDWR | O_NOCTTY);
+       if (spty == -1) {
+               err(errno, "Could not open tty `%s'", str_pty);
+       }
+       free(str_pty);
+
+       init_term(STDIN_FILENO, &attr);
+       console_loop(xc_handle, domid, spty);
+       restore_term(STDIN_FILENO, &attr);
+
+       return 0;
+ }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/daemon/io.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/daemon/io.c Thu Aug 25 22:53:20 2005
@@ -0,0 +1,362 @@
+/*\
+ *  Copyright (C) International Business Machines  Corp., 2005
+ *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ *  Xen Console Daemon
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ * 
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+\*/
+
+#define _GNU_SOURCE
+
+#include "utils.h"
+#include "io.h"
+
+#include "xenctrl.h"
+#include "xs.h"
+#include "xen/io/domain_controller.h"
+#include "xcs_proto.h"
+
+#include <malloc.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/select.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <termios.h>
+
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+struct buffer
+{
+       char *data;
+       size_t size;
+       size_t capacity;
+       size_t max_capacity;
+};
+
+static void buffer_append(struct buffer *buffer, const void *data, size_t size)
+{
+       if ((buffer->capacity - buffer->size) < size) {
+               buffer->capacity += (size + 1024);
+               buffer->data = realloc(buffer->data, buffer->capacity);
+               if (buffer->data == NULL) {
+                       dolog(LOG_ERR, "Memory allocation failed");
+                       exit(ENOMEM);
+               }
+       }
+
+       memcpy(buffer->data + buffer->size, data, size);
+       buffer->size += size;
+
+       if (buffer->max_capacity &&
+           buffer->size > buffer->max_capacity) {
+               memmove(buffer->data + (buffer->size - buffer->max_capacity),
+                       buffer->data, buffer->max_capacity);
+               buffer->data = realloc(buffer->data, buffer->max_capacity);
+               buffer->capacity = buffer->max_capacity;
+       }
+}
+
+static bool buffer_empty(struct buffer *buffer)
+{
+       return buffer->size == 0;
+}
+
+static void buffer_advance(struct buffer *buffer, size_t size)
+{
+       size = MIN(size, buffer->size);
+       memmove(buffer->data, buffer + size, buffer->size - size);
+       buffer->size -= size;
+}
+
+struct domain
+{
+       int domid;
+       int tty_fd;
+       bool is_dead;
+       struct buffer buffer;
+       struct domain *next;
+};
+
+static struct domain *dom_head;
+
+static bool domain_is_valid(int domid)
+{
+       bool ret;
+       xc_dominfo_t info;
+
+       ret = (xc_domain_getinfo(xc, domid, 1, &info) == 1 &&
+              info.domid == domid);
+               
+       return ret;
+}
+
+static int domain_create_tty(struct domain *dom)
+{
+       char path[1024];
+       int master;
+
+       if ((master = getpt()) == -1 ||
+           grantpt(master) == -1 || unlockpt(master) == -1) {
+               dolog(LOG_ERR, "Failed to create tty for domain-%d",
+                     dom->domid);
+               master = -1;
+       } else {
+               const char *slave = ptsname(master);
+               struct termios term;
+               char *data;
+               unsigned int len;
+
+               if (tcgetattr(master, &term) != -1) {
+                       cfmakeraw(&term);
+                       tcsetattr(master, TCSAFLUSH, &term);
+               }
+
+               xs_mkdir(xs, "/console");
+               snprintf(path, sizeof(path), "/console/%d", dom->domid);
+               xs_mkdir(xs, path);
+               strcat(path, "/tty");
+
+               xs_write(xs, path, slave, strlen(slave), O_CREAT);
+
+               snprintf(path, sizeof(path), "/console/%d/limit", dom->domid);
+               data = xs_read(xs, path, &len);
+               if (data) {
+                       dom->buffer.max_capacity = strtoul(data, 0, 0);
+                       free(data);
+               }
+       }
+
+       return master;
+}
+
+static struct domain *create_domain(int domid)
+{
+       struct domain *dom;
+
+       dom = (struct domain *)malloc(sizeof(struct domain));
+       if (dom == NULL) {
+               dolog(LOG_ERR, "Out of memory %s:%s():L%d",
+                     __FILE__, __FUNCTION__, __LINE__);
+               exit(ENOMEM);
+       }
+
+       dom->domid = domid;
+       dom->tty_fd = domain_create_tty(dom);
+       dom->is_dead = false;
+       dom->buffer.data = 0;
+       dom->buffer.size = 0;
+       dom->buffer.capacity = 0;
+       dom->buffer.max_capacity = 0;
+       dom->next = 0;
+
+       dolog(LOG_DEBUG, "New domain %d", domid);
+
+       return dom;
+}
+
+static struct domain *lookup_domain(int domid)
+{
+       struct domain **pp;
+
+       for (pp = &dom_head; *pp; pp = &(*pp)->next) {
+               struct domain *dom = *pp;
+
+               if (dom->domid == domid) {
+                       return dom;
+               } else if (dom->domid > domid) {
+                       *pp = create_domain(domid);
+                       (*pp)->next = dom;
+                       return *pp;
+               }
+       }
+
+       *pp = create_domain(domid);
+       return *pp;
+}
+
+static void remove_domain(struct domain *dom)
+{
+       struct domain **pp;
+
+       dolog(LOG_DEBUG, "Removing domain-%d", dom->domid);
+
+       for (pp = &dom_head; *pp; pp = &(*pp)->next) {
+               struct domain *d = *pp;
+
+               if (dom->domid == d->domid) {
+                       *pp = d->next;
+                       if (d->buffer.data) {
+                               free(d->buffer.data);
+                       }
+                       free(d);
+                       break;
+               }
+       }
+}
+
+static void remove_dead_domains(struct domain *dom)
+{
+       if (dom == NULL) return;
+       remove_dead_domains(dom->next);
+
+       if (dom->is_dead) {
+               remove_domain(dom);
+       }
+}
+
+static void handle_tty_read(struct domain *dom)
+{
+       ssize_t len;
+       xcs_msg_t msg;
+
+       msg.type = XCS_REQUEST;
+       msg.u.control.remote_dom = dom->domid;
+       msg.u.control.msg.type = CMSG_CONSOLE;
+       msg.u.control.msg.subtype = CMSG_CONSOLE_DATA;
+       msg.u.control.msg.id = 1;
+
+       len = read(dom->tty_fd, msg.u.control.msg.msg, 60);
+       if (len < 1) {
+               close(dom->tty_fd);
+
+               if (domain_is_valid(dom->domid)) {
+                       dom->tty_fd = domain_create_tty(dom);
+               } else {
+                       dom->is_dead = true;
+               }
+       } else if (domain_is_valid(dom->domid)) {
+               msg.u.control.msg.length = len;
+
+               if (!write_sync(xcs_data_fd, &msg, sizeof(msg))) {
+                       dolog(LOG_ERR, "Write to xcs failed: %m");
+                       exit(1);
+               }
+       } else {
+               close(dom->tty_fd);
+               dom->is_dead = true;
+       }
+}
+
+static void handle_tty_write(struct domain *dom)
+{
+       ssize_t len;
+
+       len = write(dom->tty_fd, dom->buffer.data, dom->buffer.size);
+       if (len < 1) {
+               close(dom->tty_fd);
+
+               if (domain_is_valid(dom->domid)) {
+                       dom->tty_fd = domain_create_tty(dom);
+               } else {
+                       dom->is_dead = true;
+               }
+       } else {
+               buffer_advance(&dom->buffer, len);
+       }
+}
+
+static void handle_xcs_msg(int fd)
+{
+       xcs_msg_t msg;
+
+       if (!read_sync(fd, &msg, sizeof(msg))) {
+               dolog(LOG_ERR, "read from xcs failed! %m");
+               exit(1);
+       } else if (msg.type == XCS_REQUEST) {
+               struct domain *dom;
+
+               dom = lookup_domain(msg.u.control.remote_dom);
+               buffer_append(&dom->buffer,
+                             msg.u.control.msg.msg,
+                             msg.u.control.msg.length);
+       }
+}
+
+static void enum_domains(void)
+{
+       int domid = 0;
+       xc_dominfo_t dominfo;
+
+       while (xc_domain_getinfo(xc, domid, 1, &dominfo) == 1) {
+               lookup_domain(dominfo.domid);
+               domid = dominfo.domid + 1;
+       }
+}
+
+void handle_io(void)
+{
+       fd_set readfds, writefds;
+       int ret;
+       int max_fd = -1;
+       int num_of_writes = 0;
+
+       do {
+               struct domain *d;
+               struct timeval tv = { 1, 0 };
+
+               FD_ZERO(&readfds);
+               FD_ZERO(&writefds);
+
+               FD_SET(xcs_data_fd, &readfds);
+               max_fd = MAX(xcs_data_fd, max_fd);
+
+               for (d = dom_head; d; d = d->next) {
+                       if (d->tty_fd != -1) {
+                               FD_SET(d->tty_fd, &readfds);
+                       }
+
+                       if (d->tty_fd != -1 && !buffer_empty(&d->buffer)) {
+                               FD_SET(d->tty_fd, &writefds);
+                       }
+
+                       max_fd = MAX(d->tty_fd, max_fd);
+               }
+
+               ret = select(max_fd + 1, &readfds, &writefds, 0, &tv);
+               if (tv.tv_sec == 1 && (++num_of_writes % 100) == 0) {
+#if 0
+                       /* FIXME */
+                       /* This is a nasty hack.  xcs does not handle the
+                          control channels filling up well at all.  We'll
+                          throttle ourselves here since we do proper
+                          queueing to give the domains a shot at pulling out
+                          the data.  Fixing xcs is not worth it as it's
+                          going away */
+                       tv.tv_usec = 1000;
+                       select(0, 0, 0, 0, &tv);
+#endif
+               }
+               enum_domains();
+
+               if (FD_ISSET(xcs_data_fd, &readfds)) {
+                       handle_xcs_msg(xcs_data_fd);
+               }
+
+               for (d = dom_head; d; d = d->next) {
+                       if (!d->is_dead && FD_ISSET(d->tty_fd, &readfds)) {
+                               handle_tty_read(d);
+                       }
+
+                       if (!d->is_dead && FD_ISSET(d->tty_fd, &writefds)) {
+                               handle_tty_write(d);
+                       }
+               }
+
+               remove_dead_domains(dom_head);
+       } while (ret > -1);
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/daemon/io.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/daemon/io.h Thu Aug 25 22:53:20 2005
@@ -0,0 +1,26 @@
+/*\
+ *  Copyright (C) International Business Machines  Corp., 2005
+ *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ *  Xen Console Daemon
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ * 
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+\*/
+
+#ifndef CONSOLED_IO_H
+#define CONSOLED_IO_H
+
+void handle_io(void);
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/daemon/main.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/daemon/main.c       Thu Aug 25 22:53:20 2005
@@ -0,0 +1,93 @@
+/*\
+ *  Copyright (C) International Business Machines  Corp., 2005
+ *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ *  Xen Console Daemon
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ * 
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+\*/
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#include "xenctrl.h"
+#include "xen/io/domain_controller.h"
+#include "xcs_proto.h"
+
+#include "utils.h"
+#include "io.h"
+
+int main(int argc, char **argv)
+{
+       const char *sopts = "hVvi";
+       struct option lopts[] = {
+               { "help", 0, 0, 'h' },
+               { "version", 0, 0, 'V' },
+               { "verbose", 0, 0, 'v' },
+               { "interactive", 0, 0, 'i' },
+               { 0 },
+       };
+       bool is_interactive = false;
+       int ch;
+       int syslog_option = LOG_CONS;
+       int syslog_mask = LOG_WARNING;
+       int opt_ind = 0;
+
+       while ((ch = getopt_long(argc, argv, sopts, lopts, &opt_ind)) != -1) {
+               switch (ch) {
+               case 'h':
+                       //usage(argv[0]);
+                       exit(0);
+               case 'V':
+                       //version(argv[0]);
+                       exit(0);
+               case 'v':
+                       syslog_option |= LOG_PERROR;
+                       syslog_mask = LOG_DEBUG;
+                       break;
+               case 'i':
+                       is_interactive = true;
+                       break;
+               case '?':
+                       fprintf(stderr,
+                               "Try `%s --help' for more information\n",
+                               argv[0]);
+                       exit(EINVAL);
+               }
+       }
+
+       if (geteuid() != 0) {
+               fprintf(stderr, "%s requires root to run.\n", argv[0]);
+               exit(EPERM);
+       }
+
+       openlog("xenconsoled", syslog_option, LOG_DAEMON);
+       setlogmask(syslog_mask);
+
+       if (!is_interactive) {
+               daemonize("/var/run/xenconsoled.pid");
+       }
+
+       xen_setup();
+
+       handle_io();
+
+       closelog();
+
+       return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/daemon/utils.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/daemon/utils.c      Thu Aug 25 22:53:20 2005
@@ -0,0 +1,253 @@
+/*\
+ *  Copyright (C) International Business Machines  Corp., 2005
+ *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ *  Xen Console Daemon
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ * 
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+\*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <string.h>
+
+#include "xenctrl.h"
+#include "xen/io/domain_controller.h"
+#include "xcs_proto.h"
+
+#include "utils.h"
+
+struct xs_handle *xs;
+int xc;
+
+int xcs_ctrl_fd = -1;
+int xcs_data_fd = -1;
+
+bool _read_write_sync(int fd, void *data, size_t size, bool do_read)
+{
+       size_t offset = 0;
+       ssize_t len;
+
+       while (offset < size) {
+               if (do_read) {
+                       len = read(fd, data + offset, size - offset);
+               } else {
+                       len = write(fd, data + offset, size - offset);
+               }
+
+               if (len < 1) {
+                       if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
+                               continue;
+                       } else {
+                               return false;
+                       }
+               } else {
+                       offset += len;
+               }
+       }
+
+       return true;
+}
+
+static int open_domain_socket(const char *path)
+{
+       struct sockaddr_un addr;
+       int sock;
+       size_t addr_len;
+
+       if ((sock = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) {
+               goto out;
+       }
+
+       addr.sun_family = AF_UNIX;
+       strcpy(addr.sun_path, path);
+       addr_len = sizeof(addr.sun_family) + strlen(XCS_SUN_PATH) + 1;
+
+       if (connect(sock, (struct sockaddr *)&addr, addr_len) == -1) {
+               goto out_close_sock;
+       }
+
+       return sock;
+
+ out_close_sock:
+       close(sock);
+ out:
+       return -1;
+}
+
+static void child_exit(int sig)
+{
+       while (waitpid(-1, NULL, WNOHANG) > 0);
+}
+
+void daemonize(const char *pidfile)
+{
+       pid_t pid;
+       int fd;
+       int len;
+       int i;
+       char buf[100];
+
+       if (getppid() == 1) {
+               return;
+       }
+
+       if ((pid = fork()) > 0) {
+               exit(0);
+       } else if (pid == -1) {
+               err(errno, "fork() failed");
+       }
+
+       setsid();
+
+       /* redirect fd 0,1,2 to /dev/null */
+       if ((fd = open("/dev/null",O_RDWR)) == -1) {
+               exit(1);
+       }
+
+       for (i = 0; i <= 2; i++) {
+               close(i);
+               dup2(fd, i);
+       }
+
+       close(fd);
+
+       umask(027);
+       chdir("/");
+
+       fd = open(pidfile, O_RDWR | O_CREAT);
+       if (fd == -1) {
+               exit(1);
+       }
+
+       if (lockf(fd, F_TLOCK, 0) == -1) {
+               exit(1);
+       }
+
+       len = sprintf(buf, "%d\n", getpid());
+       write(fd, buf, len);
+
+       signal(SIGCHLD, child_exit);
+       signal(SIGTSTP, SIG_IGN);
+       signal(SIGTTOU, SIG_IGN);
+       signal(SIGTTIN, SIG_IGN);
+}
+
+/* synchronized send/recv strictly for setting up xcs */
+/* always use asychronize callbacks any other time */
+static bool xcs_send_recv(int fd, xcs_msg_t *msg)
+{
+       bool ret = false;
+
+       if (!write_sync(fd, msg, sizeof(*msg))) {
+               dolog(LOG_ERR, "Write failed at %s:%s():L%d?  Possible bug.",
+                      __FILE__, __FUNCTION__, __LINE__);
+               goto out;
+       }
+
+       if (!read_sync(fd, msg, sizeof(*msg))) {
+               dolog(LOG_ERR, "Read failed at %s:%s():L%d?  Possible bug.",
+                      __FILE__, __FUNCTION__, __LINE__);
+               goto out;
+       }
+
+       ret = true;
+
+ out:
+       return ret;
+}
+
+bool xen_setup(void)
+{
+       int sock;
+       xcs_msg_t msg;
+       
+       xs = xs_daemon_open();
+       if (xs == NULL) {
+               dolog(LOG_ERR,
+                     "Failed to contact xenstore (%m).  Is it running?");
+               goto out;
+       }
+
+       xc = xc_interface_open();
+       if (xc == -1) {
+               dolog(LOG_ERR, "Failed to contact hypervisor (%m)");
+               goto out;
+       }
+
+       sock = open_domain_socket(XCS_SUN_PATH);
+       if (sock == -1) {
+               dolog(LOG_ERR, "Failed to contact xcs (%m).  Is it running?");
+               goto out_close_store;
+       }
+
+       xcs_ctrl_fd = sock;
+
+       sock = open_domain_socket(XCS_SUN_PATH);
+       if (sock == -1) {
+               dolog(LOG_ERR, "Failed to contact xcs (%m).  Is it running?");
+               goto out_close_ctrl;
+       }
+       
+       xcs_data_fd = sock;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.type = XCS_CONNECT_CTRL;
+       if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) {
+               dolog(LOG_ERR, "xcs control connect failed.  Possible bug.");
+               goto out_close_data;
+       }
+
+       msg.type = XCS_CONNECT_DATA;
+       if (!xcs_send_recv(xcs_data_fd, &msg) || msg.result != XCS_RSLT_OK) {
+               dolog(LOG_ERR, "xcs data connect failed.  Possible bug.");
+               goto out_close_data;
+       }
+
+       /* Since the vast majority of control messages are console messages
+          it's just easier to ignore other messages that try to bind to 
+          a specific type. */
+       msg.type = XCS_MSG_BIND;
+       msg.u.bind.port = PORT_WILDCARD;
+       msg.u.bind.type = TYPE_WILDCARD;
+       if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) {
+               dolog(LOG_ERR, "xcs vind failed.  Possible bug.");
+               goto out_close_data;
+       }
+       
+       return true;
+
+ out_close_data:
+       close(xcs_ctrl_fd);
+       xcs_data_fd = -1;
+ out_close_ctrl:
+       close(xcs_ctrl_fd);
+       xcs_ctrl_fd = -1;
+ out_close_store:
+       xs_daemon_close(xs);
+ out:
+       return false;
+}
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/daemon/utils.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/daemon/utils.h      Thu Aug 25 22:53:20 2005
@@ -0,0 +1,47 @@
+/*\
+ *  Copyright (C) International Business Machines  Corp., 2005
+ *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ *  Xen Console Daemon
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ * 
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+\*/
+
+#ifndef CONSOLED_UTILS_H
+#define CONSOLED_UTILS_H
+
+#include <stdbool.h>
+#include <syslog.h>
+#include <stdio.h>
+
+#include "xs.h"
+
+void daemonize(const char *pidfile);
+bool xen_setup(void);
+#define read_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, true)
+#define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false)
+bool _read_write_sync(int fd, void *data, size_t size, bool do_read);
+
+extern int xcs_ctrl_fd;
+extern int xcs_data_fd;
+extern struct xs_handle *xs;
+extern int xc;
+
+#if 1
+#define dolog(val, fmt, ...) syslog(val, fmt, ## __VA_ARGS__)
+#else
+#define dolog(val, fmt, ...) fprintf(stderr, fmt "\n", ## __VA_ARGS__)
+#endif
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/testsuite/Makefile
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/testsuite/Makefile  Thu Aug 25 22:53:20 2005
@@ -0,0 +1,11 @@
+CFLAGS=-g -Wall
+CC=gcc
+LDFLAGS=-static
+
+all: console-dom0 console-domU procpipe
+
+console-dom0: console-dom0.o
+console-domU: console-domU.o
+procpipe: procpipe.o
+
+clean:; $(RM) *.o console-domU console-dom0 procpipe
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/testsuite/README
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/testsuite/README    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,29 @@
+ABOUT
+
+This tool uses two programs, one that lives in dom0 and one that lives in domU
+to verify that no data is lost.  dom0 and domU share a handshake with each
+other that they use to exchange a random seed.
+
+Both programs then generate a series of random numbers and then writes and
+reads the numbers via the console.  Because each side starts with the same seed
+they know what data the other side is generating and therefore what should be
+expected.
+
+RUNNNING
+
+console-domU should be installed within the guest image.  It must be launched
+from the client automatically.  I use a custom initrd image and put it in the
+/linuxrc.
+
+console-dom0 and console-domU will communicate with each other and stress the
+console code.  You can verify it at various levels by invoking it in different
+ways.  procpipe is used to connect the two.  I use the following command for
+testing:
+
+./procpipe ./console-dom0 'xm create -c /etc/xen/xmexample1'
+
+xmexample1 has no devices and no root set (this is what triggers /linuxrc).
+
+If it freezes, it probably means that console-domU is expecting more data from
+console-dom0 (which means that some data got dropped).  I'd like to add
+timeouts in the future to handle this more gracefully.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/testsuite/console-dom0.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/testsuite/console-dom0.c    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,117 @@
+/* Written by Anthony Liguori <aliguori@xxxxxxxxxx> */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string.h>
+#include <unistd.h>
+#include <termios.h>
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+static void generate_random_buffer(char *buffer, size_t size)
+{
+       int i;
+
+       for (i = 0; i < size; i++) {
+               buffer[i] = random() & 0xFF;
+       }
+}
+
+static void canonicalize(char *buffer)
+{
+       char *reader, *writer;
+
+       reader = writer = buffer;
+
+       while (*reader) {
+               *writer = *reader;
+               if (*reader != '\r') writer++;
+               reader++;
+       }
+       *writer = *reader;
+}
+
+int main(int argc, char **argv)
+{
+       char buffer[4096];
+       char *line;
+       unsigned int seed;
+       size_t size;
+       int runs;
+       unsigned long long total_bytes = 0;
+       struct termios term;
+
+       tcgetattr(STDIN_FILENO, &term);
+       cfmakeraw(&term);
+       tcsetattr(STDIN_FILENO, TCSAFLUSH, &term);
+
+       tcgetattr(STDOUT_FILENO, &term);
+       cfmakeraw(&term);
+       tcsetattr(STDOUT_FILENO, TCSAFLUSH, &term);
+
+       while ((line = fgets(buffer, sizeof(buffer), stdin))) {
+               canonicalize(line);
+
+               if (strcmp(line, "!!!XEN Test Begin!!!\n") == 0) {
+                       break;
+               } else {
+                       fprintf(stderr, "%s", line);
+               }
+       }
+
+       if (line == NULL) {
+               fprintf(stderr, "Client never sent start string.\n");
+               return 1;
+       }
+
+       seed = time(0);
+
+       printf("%u\n", seed); fflush(stdout);
+
+       fprintf(stderr, "Waiting for seed acknowledgement\n");
+       line = fgets(buffer, sizeof(buffer), stdin);
+       if (line == NULL) {
+               fprintf(stderr, "Client never acknowledge seed.\n");
+               return 1;
+       }
+
+       canonicalize(line);
+       if (strcmp(line, "Seed Okay.\n") != 0) {
+               fprintf(stderr, "Incorrect seed acknowledgement.\n");
+               fprintf(stderr, "[%s]", line);
+               return 1;
+       } else {
+               fprintf(stderr, "Processed seed.\n");
+       }
+
+       srandom(seed);
+
+       for (runs = (random() % 100000) + 4096; runs > 0; runs--) {
+
+               size = random() % 4096;
+
+               fprintf(stderr, "Writing %d bytes.\n", size);
+
+               generate_random_buffer(buffer, size);
+               fwrite(buffer, size, 1, stdout);
+               fflush(stdout);
+
+               do {
+                       line = fgets(buffer, sizeof(buffer), stdin);
+                       if (line == NULL) {
+                               fprintf(stderr, "Premature EOF from client.\n");
+                               return 1;
+                       }
+
+                       canonicalize(line);
+                       fprintf(stderr, "%s", line);
+               } while (strcmp(line, "Okay.\n") != 0);
+
+               total_bytes += size;
+       }
+
+       fprintf(stderr, "PASS: processed %llu byte(s).\n", total_bytes);
+
+       return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/testsuite/console-domU.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/testsuite/console-domU.c    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,76 @@
+/* Written by Anthony Liguori <aliguori@xxxxxxxxxx> */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <termios.h>
+#include <unistd.h>
+
+static void canonicalize(char *buffer)
+{
+       char *reader, *writer;
+
+       reader = writer = buffer;
+
+       while (*reader) {
+               *writer = *reader;
+               if (*reader != '\r') writer++;
+               reader++;
+       }
+       *writer = *reader;
+}
+
+int main(int argc, char **argv)
+{
+       char buffer[4096];
+       char *line;
+       unsigned int seed;
+       size_t size;
+       int i;
+       int runs;
+       struct termios term;
+
+       tcgetattr(STDIN_FILENO, &term);
+       cfmakeraw(&term);
+       tcsetattr(STDIN_FILENO, TCSAFLUSH, &term);
+
+       tcgetattr(STDOUT_FILENO, &term);
+       cfmakeraw(&term);
+       tcsetattr(STDOUT_FILENO, TCSAFLUSH, &term);
+
+       printf("!!!XEN Test Begin!!!\n"); fflush(stdout);
+       line = fgets(buffer, sizeof(buffer), stdin);
+       if (line == NULL) {
+               printf("Failure\n"); fflush(stdout);
+               return 1;
+       }
+
+       canonicalize(line);
+       seed = strtoul(line, 0, 0);
+
+       printf("Seed Okay.\n"); fflush(stdout);
+
+       srandom(seed);
+
+       for (runs = (random() % 100000) + 4096; runs > 0; runs--) {
+               size = random() % 4096;
+
+               for (i = 0; i < size; i++) {
+                       int ch;
+                       int exp;
+
+                       ch = fgetc(stdin);
+                       exp = random() & 0xFF;
+                       if (ch != exp) {
+                               printf("Expected %d got %d\n",
+                                      exp, ch);
+                               fflush(stdout);
+                       }
+                       printf("Got %d/%d good bytes\n", i, size);
+               }
+               
+               printf("Okay.\n"); fflush(stdout);
+       }
+
+       return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/console/testsuite/procpipe.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/console/testsuite/procpipe.c        Thu Aug 25 22:53:20 2005
@@ -0,0 +1,133 @@
+/* Written by Anthony Liguori <aliguori@xxxxxxxxxx> */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <err.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define PACKAGE_NAME "procpipe"
+#define PACKAGE_VERSION "0.0.1"
+
+#define GPL_SHORT \
+"This is free software; see the source for copying conditions.  There is NO\n"\
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+#define PACKAGE_BUGS "aliguori@xxxxxxxxxx"
+#define PACKAGE_AUTHOR "Anthony Liguori"
+#define PACKAGE_OWNER "IBM, Corp."
+#define PACKAGE_LICENSE GPL_SHORT
+
+static void usage(const char *name)
+{
+       printf("Usage: %s [OPTIONS]\n"
+              "\n"
+              "  -h, --help      display this help and exit\n"
+              "  -V, --version   output version information and exit\n"
+              "\n"
+              "Report bugs to <%s>.\n"
+              , name, PACKAGE_BUGS);
+}
+
+static void version(const char *name)
+{
+       printf("%s (%s) %s\n"
+              "Written by %s.\n"
+              "\n"
+              "Copyright (C) 2005 %s.\n"
+              "%s\n"
+              , name, PACKAGE_NAME, PACKAGE_VERSION,
+              PACKAGE_AUTHOR, PACKAGE_OWNER, PACKAGE_LICENSE);
+}
+
+static pid_t exec(int stdout, int stdin, const char *cmd)
+{
+       pid_t pid;
+
+       pid = fork();
+       if (pid == 0) {
+               close(STDOUT_FILENO);
+               dup2(stdout, STDOUT_FILENO);
+               close(STDIN_FILENO);
+               dup2(stdin, STDIN_FILENO);
+
+               execlp("/bin/sh", "sh", "-c", cmd, NULL);
+       }
+
+       return pid;
+}
+
+int main(int argc, char **argv)
+{
+       int ch, opt_ind = 0;
+       const char *sopt = "hV";
+       struct option lopt[] = {
+               { "help", 0, 0, 'h' },
+               { "version", 0, 0, 'V' },
+               { 0 }
+       };
+       int host_stdout[2];
+       int host_stdin[2];
+       int res;
+       pid_t pid1, pid2;
+       int status;
+
+       while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
+               switch (ch) {
+               case 'h':
+                       usage(argv[0]);
+                       exit(0);
+               case 'V':
+                       version(argv[0]);
+                       exit(0);
+               case '?':
+                       errx(EINVAL, "Try `%s --help' for more information.",
+                            argv[0]);
+               }
+       }
+
+       if ((argc - optind) != 2) {
+               errx(EINVAL, "Two commands are required.\n"
+                    "Try `%s --help' for more information.", argv[0]);
+       }
+
+       res = pipe(host_stdout);
+       if (res == -1) {
+               err(errno, "pipe() failed");
+       }
+
+       res = pipe(host_stdin);
+       if (res == -1) {
+               err(errno, "pipe() failed");
+       }
+
+       pid1 = exec(host_stdout[1], host_stdin[0], argv[optind]);
+       if (pid1 == -1) {
+               err(errno, "exec(%s)", argv[optind]);
+       }
+
+       pid2 = exec(host_stdin[1], host_stdout[0], argv[optind + 1]);
+       if (pid2 == -1) {
+               err(errno, "exec(%s)", argv[optind + 1]);
+       }
+
+       waitpid(pid1, &status, 0);
+       if (WIFEXITED(status)) status = WEXITSTATUS(status);
+
+       if (status != 0) {
+               printf("Child exited with status %d\n", status);
+       }
+
+       waitpid(pid2, &status, 0);
+       if (WIFEXITED(status)) status = WEXITSTATUS(status);
+
+       if (status != 0) {
+               printf("Child2 exited with status %d\n", status);
+       }
+
+       return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/backend.hotplug
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/examples/backend.hotplug    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,21 @@
+#! /bin/sh
+
+#DEVPATH=/devices/xen-backend/vif-1-0
+#ACTION=add
+
+PATH=/etc/xen/scripts:$PATH
+
+DEV=$(basename "$DEVPATH")
+case "$ACTION" in
+  add)
+    case "$DEV" in
+      vif-*)
+        vif=$(echo "$DEV" | sed 's/-\([0-9]*\)-\([0-9]*\)/\1.\2/')
+        vif-bridge up domain=unknown vif="$vif" mac=fe:ff:ff:ff:ff:ff 
bridge=xen-br0 >/dev/null 2>&1
+        ;;
+    esac
+    ;;
+  remove)
+    ;;
+esac
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/network-bridge
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/examples/network-bridge     Thu Aug 25 22:53:20 2005
@@ -0,0 +1,261 @@
+#!/bin/sh -x
+#============================================================================
+# Default Xen network start/stop script.
+# Xend calls a network script when it starts.
+# The script name to use is defined in /etc/xen/xend-config.sxp
+# in the network-script field.
+#
+# This script creates a bridge (default xen-br0), adds a device
+# (default eth0) to it, copies the IP addresses from the device
+# to the bridge and adjusts the routes accordingly.
+#
+# If all goes well, this should ensure that networking stays up.
+# However, some configurations are upset by this, especially
+# NFS roots. If the bridged setup does not meet your needs,
+# configure a different script, for example using routing instead.
+#
+# Usage:
+#
+# network (start|stop|status) {VAR=VAL}*
+#
+# Vars:
+#
+# bridge     The bridge to use (default xen-br0).
+# netdev     The interface to add to the bridge (default eth0).
+# antispoof  Whether to use iptables to prevent spoofing (default yes).
+#
+# start:
+# Creates the bridge and enslaves netdev to it.
+# Copies the IP addresses from netdev to the bridge.
+# Deletes the routes to netdev and adds them on bridge.
+#
+# stop:
+# Removes netdev from the bridge.
+# Deletes the routes to bridge and adds them to netdev.
+#
+# status:
+# Print ifconfig for netdev and bridge.
+# Print routes.
+#
+#============================================================================
+
+# Exit if anything goes wrong.
+set -e 
+
+# First arg is the operation.
+OP=$1
+shift
+
+# Pull variables in args in to environment.
+for arg ; do export "${arg}" ; done
+
+bridge=${bridge:-xen-br0}
+netdev=${netdev:-eth0}
+antispoof=${antispoof:-no}
+
+echo "*network $OP bridge=$bridge netdev=$netdev antispoof=$antispoof" >&2
+
+# Usage: transfer_addrs src dst
+# Copy all IP addresses (including aliases) from device $src to device $dst.
+transfer_addrs () {
+    local src=$1
+    local dst=$2
+    # Don't bother if $dst already has IP addresses.
+    if ip addr show dev ${dst} | egrep -q '^ *inet ' ; then
+        return
+    fi
+    # Address lines start with 'inet' and have the device in them.
+    # Replace 'inet' with 'ip addr add' and change the device name $src
+    # to 'dev $src'.
+    ip addr show dev ${src} | egrep '^ *inet ' | sed -e "
+s/inet/ip addr add/
+s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+/[0-9]\+\)@\1@
+s/${src}/dev ${dst}/
+" | sh -e
+    # Remove automatic routes on destionation device
+    ip route list | sed -ne "
+/dev ${dst}\( \|$\)/ {
+  s/^/ip route del /
+  p
+}" | sh -e
+}
+
+# Usage: del_addrs src
+del_addrs () {
+    local src=$1
+    ip addr show dev ${src} | egrep '^ *inet ' | sed -e "
+s/inet/ip addr del/
+s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\)/[0-9]\+@\1@
+s/${src}/dev ${src}/
+" | sh -e
+}
+
+# Usage: transfer_routes src dst
+# Get all IP routes to device $src, delete them, and
+# add the same routes to device $dst.
+# The original routes have to be deleted, otherwise adding them
+# for $dst fails (duplicate routes).
+transfer_routes () {
+    local src=$1
+    local dst=$2
+    # List all routes and grep the ones with $src in.
+    # Stick 'ip route del' on the front to delete.
+    # Change $src to $dst and use 'ip route add' to add.
+    ip route list | sed -ne "
+/dev ${src}\( \|$\)/ {
+  h
+  s/^/ip route del /
+  P
+  g
+  s/${src}/${dst}/
+  s/^/ip route add /
+  P
+  d
+}" | sh -e
+}
+
+# Usage: create_bridge bridge
+create_bridge () {
+    local bridge=$1
+
+    # Don't create the bridge if it already exists.
+    if ! brctl show | grep -q ${bridge} ; then
+        brctl addbr ${bridge}
+        brctl stp ${bridge} off
+        brctl setfd ${bridge} 0
+    fi
+    ifconfig ${bridge} up
+}
+
+# Usage: add_to_bridge bridge dev
+add_to_bridge () {
+    local bridge=$1
+    local dev=$2
+    # Don't add $dev to $bridge if it's already on a bridge.
+    if ! brctl show | grep -q ${dev} ; then
+        brctl addif ${bridge} ${dev}
+    fi
+}
+
+# Usage: antispoofing dev bridge
+# Set the default forwarding policy for $dev to drop.
+# Allow forwarding to the bridge.
+antispoofing () {
+    local dev=$1
+    local bridge=$2
+
+    iptables -P FORWARD DROP
+    iptables -A FORWARD -m physdev --physdev-in ${dev} -j ACCEPT
+}
+
+# Usage: show_status dev bridge
+# Print ifconfig and routes.
+show_status () {
+    local dev=$1
+    local bridge=$2
+    
+    echo '============================================================'
+    ifconfig ${dev}
+    ifconfig ${bridge}
+    echo ' '
+    ip route list
+    echo ' '
+    route -n
+    echo '============================================================'
+}
+
+op_start () {
+    if [ "${bridge}" == "null" ] ; then
+        return
+    fi
+
+    create_bridge ${bridge}
+
+    if ifconfig 2>/dev/null | grep -q veth0 ; then
+        return
+    fi
+
+    if ifconfig veth0 2>/dev/null | grep -q veth0 ; then
+       mac=`ifconfig ${netdev} | grep HWadd | sed -e 
's/.*\(..:..:..:..:..:..\).*/\1/'`
+       if ! ifdown ${netdev} ; then
+               # if ifup didn't work, see if we have an ip= on cmd line
+               if egrep 'ip=[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:' /proc/cmdline ; 
+               then
+                        kip=`sed -e 
's!.*ip=\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\):.*!\1!' /proc/cmdline`
+                        kmask=`sed -e 
's!.*ip=[^:]*:[^:]*:[^:]*:\([^:]*\):.*!\1!' /proc/cmdline` 
+                        kgate=`sed -e 's!.*ip=[^:]*:[^:]*:\([^:]*\):.*!\1!' 
/proc/cmdline`
+                       ifconfig ${netdev} 0.0.0.0 down
+               fi
+       fi
+       ip link set ${netdev} name p${netdev}
+       ip link set veth0 name ${netdev}
+       ifconfig p${netdev} 0.0.0.0 -arp down
+       ifconfig p${netdev} hw ether fe:ff:ff:ff:ff:ff
+       ifconfig ${netdev} hw ether ${mac}
+       add_to_bridge ${bridge} vif0.0
+       add_to_bridge ${bridge} p${netdev}
+       ip link set ${bridge} up
+       ip link set vif0.0 up
+       ip link set p${netdev} up
+       if ! ifup ${netdev} ; then
+               if [ ${kip} ] ; then
+                       # use the addresses we grocked from /proc/cmdline       
+                       ifconfig ${netdev} ${kip} 
+                       [ ${kmask} ] && ifconfig ${netdev} netmask ${kmask} 
+                       ifconfig ${netdev} up
+                       [ ${kgate} ] && ip route add default via ${kgate}       
+               fi
+        fi
+    else
+       # old style without veth0
+       transfer_addrs ${netdev} ${bridge}
+        transfer_routes ${netdev} ${bridge}
+    fi
+    
+    if [ ${antispoof} == 'yes' ] ; then
+        antispoofing ${netdev} ${bridge}
+    fi
+}
+
+op_stop () {
+    if [ "${bridge}" == "null" ] ; then
+        return
+    fi
+
+    brctl delif ${bridge} ${netdev}
+
+    if ifconfig veth0 2>/dev/null | grep -q veth0 ; then
+        brctl delif ${bridge} vif0.0
+        ifconfig vif0.0 down
+        mac=`ifconfig veth0 | grep HWadd | sed -e 
's/.*\(..:..:..:..:..:..\).*/\1/'`
+        ifconfig ${netdev} down
+        ifconfig ${netdev} hw ether ${mac}
+        ifconfig ${netdev} arp up
+        transfer_addrs veth0 ${netdev}
+        transfer_routes veth0 ${netdev}
+        del_addrs veth0
+        ifconfig veth0 -arp down
+        ifconfig veth0 hw ether 00:00:00:00:00:00
+    else
+        transfer_routes ${bridge} ${netdev}
+    fi
+}
+
+case ${OP} in
+  start)
+        op_start
+        ;;
+    
+    stop)
+        op_stop
+        ;;
+
+    status)
+        show_status ${netdev} ${bridge}
+       ;;
+
+    *)
+       echo 'Unknown command: ' ${OP} >&2
+       echo 'Valid commands are: start, stop, status' >&2
+       exit 1
+esac
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xenctrl.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xenctrl.h     Thu Aug 25 22:53:20 2005
@@ -0,0 +1,526 @@
+/******************************************************************************
+ * xenctrl.h
+ * 
+ * A library for low-level access to the Xen control interfaces.
+ * 
+ * Copyright (c) 2003-2004, K A Fraser.
+ */
+
+#ifndef XENCTRL_H
+#define XENCTRL_H
+
+#include <stdint.h>
+
+typedef uint8_t            u8;
+typedef uint16_t           u16;
+typedef uint32_t           u32;
+typedef uint64_t           u64;
+typedef int8_t             s8;
+typedef int16_t            s16;
+typedef int32_t            s32;
+typedef int64_t            s64;
+
+#include <sys/ptrace.h>
+#include <xen/xen.h>
+#include <xen/dom0_ops.h>
+#include <xen/event_channel.h>
+#include <xen/sched_ctl.h>
+#include <xen/acm.h>
+
+#ifdef __ia64__
+#define XC_PAGE_SHIFT           14
+#else
+#define XC_PAGE_SHIFT           12
+#endif
+#define XC_PAGE_SIZE            (1UL << XC_PAGE_SHIFT)
+#define XC_PAGE_MASK            (~(XC_PAGE_SIZE-1))
+
+/*
+ *  DEFINITIONS FOR CPU BARRIERS
+ */ 
+
+#if defined(__i386__)
+#define mb()  __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
+#define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
+#define wmb() __asm__ __volatile__ ( "" : : : "memory")
+#elif defined(__x86_64__)
+#define mb()  __asm__ __volatile__ ( "mfence" : : : "memory")
+#define rmb() __asm__ __volatile__ ( "lfence" : : : "memory")
+#define wmb() __asm__ __volatile__ ( "" : : : "memory")
+#elif defined(__ia64__)
+/* FIXME */
+#define mb()
+#define rmb()
+#define wmb()
+#else
+#error "Define barriers"
+#endif
+
+/*
+ *  INITIALIZATION FUNCTIONS
+ */ 
+
+/**
+ * This function opens a handle to the hypervisor interface.  This function can
+ * be called multiple times within a single process.  Multiple processes can
+ * have an open hypervisor interface at the same time.
+ *
+ * Each call to this function should have a corresponding call to
+ * xc_interface_close().
+ *
+ * This function can fail if the caller does not have superuser permission or
+ * if a Xen-enabled kernel is not currently running.
+ *
+ * @return a handle to the hypervisor interface or -1 on failure
+ */
+int xc_interface_open(void);
+
+/**
+ * This function closes an open hypervisor interface.
+ *
+ * This function can fail if the handle does not represent an open interface or
+ * if there were problems closing the interface.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @return 0 on success, -1 otherwise.
+ */
+int xc_interface_close(int xc_handle);
+
+/*
+ * DOMAIN DEBUGGING FUNCTIONS
+ */
+
+typedef struct xc_core_header {
+    unsigned int xch_magic;
+    unsigned int xch_nr_vcpus;
+    unsigned int xch_nr_pages;
+    unsigned int xch_ctxt_offset;
+    unsigned int xch_index_offset;
+    unsigned int xch_pages_offset;
+} xc_core_header_t;
+
+
+long xc_ptrace(enum __ptrace_request request, 
+               u32  domid,
+               long addr, 
+               long data);
+
+long xc_ptrace_core(enum __ptrace_request request, 
+                    u32 domid, 
+                    long addr, 
+                    long data);
+
+int xc_waitdomain(int domain, 
+                  int *status, 
+                  int options);
+
+int xc_waitdomain_core(int domain, 
+                       int *status, 
+                       int options);
+
+/*
+ * DOMAIN MANAGEMENT FUNCTIONS
+ */
+
+typedef struct {
+    u32           domid;
+    u32           ssidref;
+    unsigned int  dying:1, crashed:1, shutdown:1, 
+                  paused:1, blocked:1, running:1;
+    unsigned int  shutdown_reason; /* only meaningful if shutdown==1 */
+    unsigned long nr_pages;
+    unsigned long shared_info_frame;
+    u64           cpu_time;
+    unsigned long max_memkb;
+    unsigned int  vcpus;
+    s32           vcpu_to_cpu[MAX_VIRT_CPUS];
+    cpumap_t      cpumap[MAX_VIRT_CPUS];
+} xc_dominfo_t;
+
+typedef dom0_getdomaininfo_t xc_domaininfo_t;
+int xc_domain_create(int xc_handle, 
+                     u32 ssidref,
+                     u32 *pdomid);
+
+
+int xc_domain_dumpcore(int xc_handle, 
+                       u32 domid,
+                       const char *corename);
+
+
+/**
+ * This function pauses a domain. A paused domain still exists in memory
+ * however it does not receive any timeslices from the hypervisor.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain id to pause
+ * @return 0 on success, -1 on failure.
+ */
+int xc_domain_pause(int xc_handle, 
+                    u32 domid);
+/**
+ * This function unpauses a domain.  The domain should have been previously
+ * paused.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain id to unpause
+ * return 0 on success, -1 on failure
+ */
+int xc_domain_unpause(int xc_handle, 
+                      u32 domid);
+
+/**
+ * This function will destroy a domain.  Destroying a domain removes the domain
+ * completely from memory.  This function should be called after sending the
+ * domain a SHUTDOWN control message to free up the domain resources.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain id to destroy
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_destroy(int xc_handle, 
+                      u32 domid);
+int xc_domain_pincpu(int xc_handle,
+                     u32 domid,
+                     int vcpu,
+                     cpumap_t *cpumap);
+/**
+ * This function will return information about one or more domains. It is
+ * designed to iterate over the list of domains. If a single domain is
+ * requested, this function will return the next domain in the list - if
+ * one exists. It is, therefore, important in this case to make sure the
+ * domain requested was the one returned.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm first_domid the first domain to enumerate information from.  Domains
+ *                   are currently enumerate in order of creation.
+ * @parm max_doms the number of elements in info
+ * @parm info an array of max_doms size that will contain the information for
+ *            the enumerated domains.
+ * @return the number of domains enumerated or -1 on error
+ */
+int xc_domain_getinfo(int xc_handle,
+                      u32 first_domid, 
+                      unsigned int max_doms,
+                      xc_dominfo_t *info);
+
+/**
+ * This function will return information about one or more domains, using a
+ * single hypercall.  The domain information will be stored into the supplied
+ * array of xc_domaininfo_t structures.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm first_domain the first domain to enumerate information from.
+ *                    Domains are currently enumerate in order of creation.
+ * @parm max_domains the number of elements in info
+ * @parm info an array of max_doms size that will contain the information for
+ *            the enumerated domains.
+ * @return the number of domains enumerated or -1 on error
+ */
+int xc_domain_getinfolist(int xc_handle,
+                          u32 first_domain,
+                          unsigned int max_domains,
+                          xc_domaininfo_t *info);
+
+/**
+ * This function returns information about one domain.  This information is
+ * more detailed than the information from xc_domain_getinfo().
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to get information from
+ * @parm info a pointer to an xc_domaininfo_t to store the domain information
+ * @parm ctxt a pointer to a structure to store the execution context of the
+ *            domain
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_get_vcpu_context(int xc_handle,
+                               u32 domid,
+                               u32 vcpu,
+                               vcpu_guest_context_t *ctxt);
+
+int xc_domain_setcpuweight(int xc_handle,
+                           u32 domid,
+                           float weight);
+long long xc_domain_get_cpu_usage(int xc_handle,
+                                  domid_t domid,
+                                  int vcpu);
+
+
+typedef dom0_shadow_control_stats_t xc_shadow_control_stats_t;
+int xc_shadow_control(int xc_handle,
+                      u32 domid, 
+                      unsigned int sop,
+                      unsigned long *dirty_bitmap,
+                      unsigned long pages,
+                      xc_shadow_control_stats_t *stats);
+
+int xc_bvtsched_global_set(int xc_handle,
+                           unsigned long ctx_allow);
+
+int xc_bvtsched_domain_set(int xc_handle,
+                           u32 domid,
+                           u32 mcuadv,
+                           int warpback,
+                           s32 warpvalue,
+                           long long warpl,
+                           long long warpu);
+
+int xc_bvtsched_global_get(int xc_handle,
+                           unsigned long *ctx_allow);
+
+int xc_bvtsched_domain_get(int xc_handle,
+                           u32 domid,
+                           u32 *mcuadv,
+                           int *warpback,
+                           s32 *warpvalue,
+                           long long *warpl,
+                           long long *warpu);
+
+int xc_sedf_domain_set(int xc_handle,
+                          u32 domid,
+                          u64 period, u64 slice, u64 latency, u16 extratime, 
u16 weight);
+
+int xc_sedf_domain_get(int xc_handle,
+                          u32 domid,
+                          u64* period, u64 *slice, u64 *latency, u16 
*extratime, u16* weight);
+
+typedef evtchn_status_t xc_evtchn_status_t;
+
+/*
+ * EVENT CHANNEL FUNCTIONS
+ */
+
+/**
+ * This function allocates an unbound port.  Ports are named endpoints used for
+ * interdomain communication.  This function is most useful in opening a
+ * well-known port within a domain to receive events on.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm dom the ID of the domain.  This maybe DOMID_SELF
+ * @parm port a pointer to a port.  This is an in/out parameter.  If *port is
+ *            0, then a new port will be assigned, if port is > 0 then that
+ *            port is allocated if the port is unallocated.
+ * @return 0 on success, -1 on failure
+ */
+int xc_evtchn_alloc_unbound(int xc_handle,
+                            u32 dom,
+                            int *port);
+
+/**
+ * This function creates a pair of ports between two domains.  A port can only
+ * be bound once within a domain.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm dom1 one of the two domains to connect.  Can be DOMID_SELF.
+ * @parm dom2 the other domain to connect.  Can be DOMID_SELF.
+ * @parm port1 an in/out parameter.  If > 0, then try to connect *port.  If
+ *             0, then allocate a new port and store the port in *port.
+ * @parm port2 the port connected on port2.  This parameter behaves the same
+ *             way as port1.
+ * @return 0 on success, -1 on error.
+ */
+int xc_evtchn_bind_interdomain(int xc_handle,
+                               u32 dom1,
+                               u32 dom2,
+                               int *port1,
+                               int *port2);
+int xc_evtchn_bind_virq(int xc_handle,
+                        int virq,
+                        int *port);
+
+/**
+ * This function will close a single port on an event channel.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm dom the domain that the port exists on.  May be DOMID_SELF.
+ * @parm port the port to close
+ * @return 0 on success, -1 on error
+ */
+int xc_evtchn_close(int xc_handle,
+                    u32 dom,   /* may be DOMID_SELF */
+                    int port);
+
+/**
+ * This function generates a notify event on a bound port.
+ *
+ * Notifies can be read within Linux by opening /dev/xen/evtchn and reading
+ * a 16 bit value.  The result will be the port the event occurred on.  When
+ * events occur, the port is masked until the 16 bit port value is written back
+ * to the file.  When /dev/xen/evtchn is opened, it has to be bound via an
+ * ioctl to each port to listen on.  The ioctl for binding is _IO('E', 2).  The
+ * parameter is the port to listen on.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm local_port the port to generate the notify on
+ * @return 0 on success, -1 on error
+ */
+int xc_evtchn_send(int xc_handle,
+                   int local_port);
+int xc_evtchn_status(int xc_handle,
+                     u32 dom, /* may be DOMID_SELF */
+                     int port,
+                     xc_evtchn_status_t *status);
+
+int xc_physdev_pci_access_modify(int xc_handle,
+                                 u32 domid,
+                                 int bus,
+                                 int dev,
+                                 int func,
+                                 int enable);
+
+int xc_readconsolering(int xc_handle,
+                       char **pbuffer,
+                       unsigned int *pnr_chars, 
+                       int clear);
+
+typedef dom0_physinfo_t xc_physinfo_t;
+int xc_physinfo(int xc_handle,
+                xc_physinfo_t *info);
+
+int xc_sched_id(int xc_handle,
+                int *sched_id);
+
+int xc_domain_setmaxmem(int xc_handle,
+                        u32 domid, 
+                        unsigned int max_memkb);
+
+int xc_domain_memory_increase_reservation(int xc_handle,
+                                          u32 domid, 
+                                          unsigned int mem_kb);
+
+typedef dom0_perfc_desc_t xc_perfc_desc_t;
+/* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
+int xc_perfc_control(int xc_handle,
+                     u32 op,
+                     xc_perfc_desc_t *desc);
+
+/* read/write msr */
+long long xc_msr_read(int xc_handle, int cpu_mask, int msr);
+int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low,
+                  unsigned int high);
+
+/**
+ * Memory maps a range within one domain to a local address range.  Mappings
+ * should be unmapped with munmap and should follow the same rules as mmap
+ * regarding page alignment.  Returns NULL on failure.
+ *
+ * In Linux, the ring queue for the control channel is accessible by mapping
+ * the shared_info_frame (from xc_domain_getinfo()) + 2048.  The structure
+ * stored there is of type control_if_t.
+ *
+ * @parm xc_handle a handle on an open hypervisor interface
+ * @parm dom the domain to map memory from
+ * @parm size the amount of memory to map (in multiples of page size)
+ * @parm prot same flag as in mmap().
+ * @parm mfn the frame address to map.
+ */
+void *xc_map_foreign_range(int xc_handle, u32 dom,
+                            int size, int prot,
+                            unsigned long mfn );
+
+void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot,
+                           unsigned long *arr, int num );
+
+int xc_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf, 
+                    unsigned long max_pfns);
+
+int xc_ia64_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf, 
+                    unsigned int start_page, unsigned int nr_pages);
+
+int xc_mmuext_op(int xc_handle, struct mmuext_op *op, unsigned int nr_ops,
+                domid_t dom);
+
+int xc_dom_mem_op(int xc_handle, unsigned int memop, unsigned int *extent_list,
+                 unsigned int nr_extents, unsigned int extent_order,
+                 domid_t domid);
+
+int xc_get_pfn_type_batch(int xc_handle, u32 dom, int num, unsigned long *arr);
+
+
+/*\
+ *  GRANT TABLE FUNCTIONS
+\*/ 
+
+/**
+ * This function opens a handle to the more restricted grant table hypervisor
+ * interface. This may be used where the standard interface is not
+ * available because the domain is not privileged.
+ * This function can  be called multiple times within a single process.
+ * Multiple processes can have an open hypervisor interface at the same time.
+ *
+ * Each call to this function should have a corresponding call to
+ * xc_grant_interface_close().
+ *
+ * This function can fail if a Xen-enabled kernel is not currently running.
+ *
+ * @return a handle to the hypervisor grant table interface or -1 on failure
+ */
+int xc_grant_interface_open(void);
+
+/**
+ * This function closes an open grant table hypervisor interface.
+ *
+ * This function can fail if the handle does not represent an open interface or
+ * if there were problems closing the interface.
+ *
+ * @parm xc_handle a handle to an open grant table hypervisor interface
+ * @return 0 on success, -1 otherwise.
+ */
+int xc_grant_interface_close(int xc_handle);
+
+int xc_gnttab_map_grant_ref(int  xc_handle,
+                            u64  host_virt_addr,
+                            u32  dom,
+                            u16  ref,
+                            u16  flags,
+                            s16 *handle,
+                            u64 *dev_bus_addr);
+
+int xc_gnttab_unmap_grant_ref(int  xc_handle,
+                              u64  host_virt_addr,
+                              u64  dev_bus_addr,
+                              u16  handle,
+                              s16 *status);
+
+int xc_gnttab_setup_table(int        xc_handle,
+                          u32        dom,
+                          u16        nr_frames,
+                          s16       *status,
+                          unsigned long **frame_list);
+
+/* Grant debug builds only: */
+int xc_gnttab_dump_table(int        xc_handle,
+                         u32        dom,
+                         s16       *status);
+
+/* Get current total pages allocated to a domain. */
+long xc_get_tot_pages(int xc_handle, u32 domid);
+
+/* Execute a privileged dom0 operation. */
+int xc_dom0_op(int xc_handle, dom0_op_t *op);
+
+/* Initializes the store (for dom0)
+   remote_port should be the remote end of a bound interdomain channel between
+   the store and dom0.
+
+   This function returns a shared frame that should be passed to
+   xs_introduce_domain
+ */
+long xc_init_store(int xc_handle, int remote_port);
+
+/*
+ * MMU updates.
+ */
+#define MAX_MMU_UPDATES 1024
+struct xc_mmu {
+    mmu_update_t updates[MAX_MMU_UPDATES];
+    int          idx;
+    domid_t      subject;
+};
+typedef struct xc_mmu xc_mmu_t;
+xc_mmu_t *xc_init_mmu_updates(int xc_handle, domid_t dom);
+int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, 
+                   unsigned long ptr, unsigned long val);
+int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu);
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xenguest.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xenguest.h    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,66 @@
+/******************************************************************************
+ * xenguest.h
+ * 
+ * A library for guest domain management in Xen.
+ * 
+ * Copyright (c) 2003-2004, K A Fraser.
+ */
+
+#ifndef XENBUILD_H
+#define XENBUILD_H
+
+#define XCFLAGS_VERBOSE   1
+#define XCFLAGS_LIVE      2
+#define XCFLAGS_DEBUG     4
+#define XCFLAGS_CONFIGURE 8
+
+/**
+ * This function will save a domain running Linux.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm fd the file descriptor to save a domain to
+ * @parm dom the id of the domain
+ * @return 0 on success, -1 on failure
+ */
+int xc_linux_save(int xc_handle, int fd, uint32_t dom);
+
+/**
+ * This function will restore a saved domain running Linux.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm fd the file descriptor to restore a domain from
+ * @parm dom the id of the domain
+ * @parm nr_pfns the number of pages
+ * @parm store_evtchn the store event channel for this domain to use
+ * @parm store_mfn returned with the mfn of the store page
+ * @return 0 on success, -1 on failure
+ */
+int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, unsigned long 
nr_pfns,
+                    unsigned int store_evtchn, unsigned long *store_mfn);
+
+int xc_linux_build(int xc_handle,
+                   uint32_t domid,
+                   const char *image_name,
+                   const char *ramdisk_name,
+                   const char *cmdline,
+                   unsigned int control_evtchn,
+                   unsigned long flags,
+                   unsigned int vcpus,
+                   unsigned int store_evtchn,
+                   unsigned long *store_mfn);
+
+struct mem_map;
+int xc_vmx_build(int xc_handle,
+                 uint32_t domid,
+                 int memsize,
+                 const char *image_name,
+                 struct mem_map *memmap,
+                 const char *ramdisk_name,
+                 const char *cmdline,
+                 unsigned int control_evtchn,
+                 unsigned long flags,
+                 unsigned int vcpus,
+                 unsigned int store_evtchn,
+                 unsigned long *store_mfn);
+
+#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xg_private.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xg_private.c  Thu Aug 25 22:53:20 2005
@@ -0,0 +1,86 @@
+/******************************************************************************
+ * xg_private.c
+ * 
+ * Helper functions for the rest of the library.
+ */
+
+#include <stdlib.h>
+#include <zlib.h>
+
+#include "xg_private.h"
+
+char *xc_read_kernel_image(const char *filename, unsigned long *size)
+{
+    int kernel_fd = -1;
+    gzFile kernel_gfd = NULL;
+    char *image = NULL;
+    unsigned int bytes;
+
+    if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
+    {
+        PERROR("Could not open kernel image");
+        goto out;
+    }
+
+    if ( (*size = xc_get_filesz(kernel_fd)) == 0 )
+    {
+        PERROR("Could not read kernel image");
+        goto out;
+    }
+
+    if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
+    {
+        PERROR("Could not allocate decompression state for state file");
+        goto out;
+    }
+
+    if ( (image = malloc(*size)) == NULL )
+    {
+        PERROR("Could not allocate memory for kernel image");
+        goto out;
+    }
+
+    if ( (bytes = gzread(kernel_gfd, image, *size)) != *size )
+    {
+        PERROR("Error reading kernel image, could not"
+               " read the whole image (%d != %ld).", bytes, *size);
+        free(image);
+        image = NULL;
+    }
+
+ out:
+    if ( kernel_gfd != NULL )
+        gzclose(kernel_gfd);
+    else if ( kernel_fd >= 0 )
+        close(kernel_fd);
+    return image;
+}
+
+/*******************/
+
+int pin_table(
+    int xc_handle, unsigned int type, unsigned long mfn, domid_t dom)
+{
+    struct mmuext_op op;
+
+    op.cmd = type;
+    op.mfn = mfn;
+
+    if ( xc_mmuext_op(xc_handle, &op, 1, dom) < 0 )
+        return 1;
+
+    return 0;
+}
+
+/* This is shared between save and restore, and may generally be useful. */
+unsigned long csum_page (void * page)
+{
+    int i;
+    unsigned long *p = page;
+    unsigned long long sum=0;
+
+    for ( i = 0; i < (PAGE_SIZE/sizeof(unsigned long)); i++ )
+        sum += p[i];
+
+    return sum ^ (sum>>32);
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/libxc/xg_private.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/libxc/xg_private.h  Thu Aug 25 22:53:20 2005
@@ -0,0 +1,170 @@
+#ifndef XG_PRIVATE_H
+#define XG_PRIVATE_H
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "xenctrl.h"
+
+#include <xen/linux/privcmd.h>
+
+char *xc_read_kernel_image(const char *filename, unsigned long *size);
+unsigned long csum_page (void * page);
+
+#define _PAGE_PRESENT   0x001
+#define _PAGE_RW        0x002
+#define _PAGE_USER      0x004
+#define _PAGE_PWT       0x008
+#define _PAGE_PCD       0x010
+#define _PAGE_ACCESSED  0x020
+#define _PAGE_DIRTY     0x040
+#define _PAGE_PAT       0x080
+#define _PAGE_PSE       0x080
+#define _PAGE_GLOBAL    0x100
+
+#if defined(__i386__)
+#define L1_PAGETABLE_SHIFT       12
+#define L2_PAGETABLE_SHIFT       22
+#define L1_PAGETABLE_SHIFT_PAE   12
+#define L2_PAGETABLE_SHIFT_PAE   21
+#define L3_PAGETABLE_SHIFT_PAE   30
+#elif defined(__x86_64__)
+#define L1_PAGETABLE_SHIFT      12
+#define L2_PAGETABLE_SHIFT      21
+#define L3_PAGETABLE_SHIFT      30
+#define L4_PAGETABLE_SHIFT      39
+#endif
+
+#if defined(__i386__) 
+#define ENTRIES_PER_L1_PAGETABLE 1024
+#define ENTRIES_PER_L2_PAGETABLE 1024
+#define L1_PAGETABLE_ENTRIES_PAE  512
+#define L2_PAGETABLE_ENTRIES_PAE  512
+#define L3_PAGETABLE_ENTRIES_PAE    4
+#elif defined(__x86_64__)
+#define L1_PAGETABLE_ENTRIES    512
+#define L2_PAGETABLE_ENTRIES    512
+#define L3_PAGETABLE_ENTRIES    512
+#define L4_PAGETABLE_ENTRIES    512
+#endif
+ 
+#define PAGE_SHIFT              XC_PAGE_SHIFT
+#define PAGE_SIZE               (1UL << PAGE_SHIFT)
+#define PAGE_MASK               (~(PAGE_SIZE-1))
+
+typedef u32 l1_pgentry_32_t;
+typedef u32 l2_pgentry_32_t;
+typedef u64 l1_pgentry_64_t;
+typedef u64 l2_pgentry_64_t;
+typedef u64 l3_pgentry_64_t;
+typedef unsigned long l1_pgentry_t;
+typedef unsigned long l2_pgentry_t;
+#if defined(__x86_64__)
+typedef unsigned long l3_pgentry_t;
+typedef unsigned long l4_pgentry_t;
+#endif
+
+#if defined(__i386__)
+#define l1_table_offset(_a) \
+          (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
+#define l2_table_offset(_a) \
+          ((_a) >> L2_PAGETABLE_SHIFT)
+#define l1_table_offset_pae(_a) \
+  (((_a) >> L1_PAGETABLE_SHIFT_PAE) & (L1_PAGETABLE_ENTRIES_PAE - 1))
+#define l2_table_offset_pae(_a) \
+  (((_a) >> L2_PAGETABLE_SHIFT_PAE) & (L2_PAGETABLE_ENTRIES_PAE - 1))
+#define l3_table_offset_pae(_a) \
+       (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1))
+#elif defined(__x86_64__)
+#define l1_table_offset(_a) \
+  (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
+#define l2_table_offset(_a) \
+  (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
+#define l3_table_offset(_a) \
+       (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
+#define l4_table_offset(_a) \
+       (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
+#endif
+
+#define ERROR(_m, _a...)                                \
+do {                                                    \
+    int __saved_errno = errno;                          \
+    fprintf(stderr, "ERROR: " _m "\n" , ## _a );        \
+    errno = __saved_errno;                              \
+} while (0)
+
+
+#define PERROR(_m, _a...)                                       \
+do {                                                            \
+    int __saved_errno = errno;                                  \
+    fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a ,       \
+            __saved_errno, strerror(__saved_errno));            \
+    errno = __saved_errno;                                      \
+} while (0)
+
+
+struct domain_setup_info
+{
+    unsigned long v_start;
+    unsigned long v_end;
+    unsigned long v_kernstart;
+    unsigned long v_kernend;
+    unsigned long v_kernentry;
+
+    unsigned int  load_symtab;
+    unsigned int  pae_kernel;
+    unsigned long symtab_addr;
+    unsigned long symtab_len;
+};
+
+typedef int (*parseimagefunc)(char *image, unsigned long image_size,
+                             struct domain_setup_info *dsi);
+typedef int (*loadimagefunc)(char *image, unsigned long image_size, int xch,
+                            u32 dom, unsigned long *parray,
+                            struct domain_setup_info *dsi);
+
+struct load_funcs
+{
+    parseimagefunc parseimage;
+    loadimagefunc loadimage;
+};
+
+#define mfn_mapper_queue_size 128
+
+typedef struct mfn_mapper {
+    int xc_handle;
+    int size;
+    int prot;
+    int error;
+    int max_queue_size;
+    void * addr;
+    privcmd_mmap_t ioctl; 
+    
+} mfn_mapper_t;
+
+unsigned long xc_get_m2p_start_mfn (int xc_handle);
+
+int xc_copy_to_domain_page(int xc_handle, u32 domid,
+                            unsigned long dst_pfn, void *src_page);
+
+unsigned long xc_get_filesz(int fd);
+
+void xc_map_memcpy(unsigned long dst, char *src, unsigned long size,
+                   int xch, u32 dom, unsigned long *parray,
+                   unsigned long vstart);
+
+int pin_table(int xc_handle, unsigned int type, unsigned long mfn,
+             domid_t dom);
+
+/* image loading */
+int probe_elf(char *image, unsigned long image_size, struct load_funcs *funcs);
+int probe_bin(char *image, unsigned long image_size, struct load_funcs *funcs);
+int probe_aout9(char *image, unsigned long image_size, struct load_funcs 
*funcs);
+
+#endif
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/example.txt
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/example.txt        Thu Aug 25 22:53:20 2005
@@ -0,0 +1,269 @@
+##
+# example.txt <description to the xen access control architecture>
+#
+# Author:
+# Reiner Sailer 08/15/2005 <sailer@xxxxxxxxxxxxxx>
+#
+#
+# This file introduces into the tools to manage policies
+# and to label domains and resources.
+##
+
+We will show how to install and use the chwall_ste policy.
+Other policies work similarly. Feedback welcome!
+
+
+
+1. Using secpol_xml2bin to translate the chwall_ste policy:
+===========================================================
+
+#tools/security/secpol_xml2bin chwall_ste
+
+Successful execution should print:
+
+    [root@laptopxn security]# ./secpol_xml2bin chwall_ste
+    Validating label file 
policies/chwall_ste/chwall_ste-security_label_template.xml...
+    XML Schema policies/security_policy.xsd valid.
+    Validating policy file 
policies/chwall_ste/chwall_ste-security_policy.xml...
+    XML Schema policies/security_policy.xsd valid.
+    Creating ssid mappings ...
+    Creating label mappings ...
+    Max chwall labels:  7
+    Max chwall-types:   4
+    Max chwall-ssids:   5
+    Max ste labels:     14
+    Max ste-types:      6
+    Max ste-ssids:      10
+
+The tool looks in directory policies/chwall_ste for
+the label and policy files.
+
+The default policy directory structure under tools/security looks like:
+
+policies
+|-- security_policy.xsd
+|-- chwall
+|   |-- chwall-security_label_template.xml
+|   `-- chwall-security_policy.xml
+|-- chwall_ste
+|   |-- chwall_ste-security_label_template.xml
+|   `-- chwall_ste-security_policy.xml
+|-- null
+|   |-- null-security_label_template.xml
+|   `-- null-security_policy.xml
+`-- ste
+    |-- ste-security_label_template.xml
+    `-- ste-security_policy.xml
+
+policies/security_policy.xsd contains the schema against which both the
+label-template and the policy files must validate during translation.
+
+policies/chwall_ste/chwall_ste-security_policy.xml defines the
+policies and the types known to the policies.
+
+policies/chwall_ste/chwall_ste-security_label_template.xml contains
+label definitions that group chwall and ste types together and make
+them easier to use for users
+
+After executing the above secpol_xml2bin command, you will find 2 new
+files in the policies/chwall_ste sub-directory:
+
+policies/chwall_ste/chwall_ste.map ... this file includes the mapping
+of names from the xml files into their binary code representation.
+
+policies/chwall_ste/chwall_ste.bin ... this is the binary policy file,
+the result of parsing the xml files and using the mapping to extract a
+binary version that can be loaded into the hypervisor.
+
+
+
+2. Loading and activating the policy:
+=====================================
+
+We assume that xen is already configured to use the chwall_ste policy;
+please refer to install.txt for instructions.
+
+To activate the policy from the command line (assuming that the
+currently established policy is the minimal boot-policy that is
+hard-coded into the hypervisor:
+
+# ./secpol_tool loadpolicy policies/chwall_ste/chwall_ste.bin
+
+To activate the policy at next reboot:
+
+# cp policies/chwall_ste/chwall_ste.bin /boot
+
+Add a module line to your /boot/grub/grub.conf Xen entry.
+My boot entry with chwall_ste enabled looks like this:
+
+    title Xen (2.6.12)
+        root (hd0,5)
+        kernel /boot/xen.gz dom0_mem=1200000 console=vga
+        module /boot/vmlinuz-2.6.12-xen0 ro root=/dev/hda6 rhgb
+        module /boot/initrd-2.6.12-xen0.img
+        module /boot/chwall_ste.bin
+
+This tells the grub boot-loader to load the binary policy, which
+the hypervisor will recognize. The hypervisor will then establish
+this binary policy during boot instead of the minimal policy that
+is hardcoded as default.
+
+If you have any trouble here, maks sure you have the access control
+framework enabled (see: install.txt).
+
+
+
+3. Labeling domains:
+====================
+
+a) Labeling Domain0:
+
+The chwall_ste-security_label_template.xml file includes an attribute
+"bootstrap", which is set to the label name that will be assigned to
+Dom0 (this label will be mapped to ssidref 1/1, the default for Dom0).
+
+b) Labeling User Domains:
+
+Use the script tools/security/setlabel.sh to choose a label and to
+assign labels to user domains.
+
+To show available labels for the chwall_ste policy:
+
+#tools/security/setlabel.sh -l
+
+lists all available labels. For the default chwall_ste it should print
+the following:
+
+    [root@laptopxn security]# ./setlabel.sh -l chwall_ste
+    The following labels are available:
+    dom_SystemManagement
+    dom_HomeBanking
+    dom_Fun
+    dom_BoincClient
+    dom_StorageDomain
+    dom_NetworkDomain
+
+You need to have compiled the policy beforehand so that a .map file
+exists. Setlabel.sh uses the mapping file created throughout the
+policy translation to translate a user-friendly label string into a
+ssidref-number that is eventually used by the Xen hypervisor.
+
+We distinguish two kinds of labels: a) VM labels (for domains) and RES
+Labels (for resources). We are currently working on support for
+resource labeling but will focus here on VM labels.
+
+Setlabel.sh only prints VM labels (which we have prefixed with "dom_")
+since only those are used at this time.
+
+If you would like to assign the dom_HomeBanking label to one of your
+user domains (which you hopefully keep clean), look at an example
+domain configuration homebanking.xm:
+
+    #------HOMEBANKING---------
+    kernel = "/boot/vmlinuz-2.6.12-xenU"
+    ramdisk="/boot/U1_ramdisk.img"
+    memory = 65
+    name = "test34"
+    cpu = -1   # leave to Xen to pick
+    # Number of network interfaces. Default is 1.
+    nics=1
+    dhcp="dhcp"
+    #-------------------------
+
+Now we label this domain
+
+[root@laptopxn security]# ./setlabel.sh homebanking.xm dom_HomeBanking 
chwall_ste
+Mapped label 'dom_HomeBanking' to ssidref '0x00020002'.
+
+The domain configuration my look now like:
+
+    [root@laptopxn security]# cat homebanking.xm
+    #------HOMEBANKING---------
+    kernel = "/boot/vmlinuz-2.6.12-xenU"
+    ramdisk="/boot/U1_ramdisk.img"
+    memory = 65
+    name = "test34"
+    cpu = -1   # leave to Xen to pick
+    # Number of network interfaces. Default is 1.
+    nics=1
+    dhcp="dhcp"
+    #-------------------------
+    #ACM_POLICY=chwall_ste-security_policy.xml
+    #ACM_LABEL=dom_HomeBanking
+    ssidref = 0x00020002
+
+You can see 3 new entries, two of which are comments.  The only value
+that the hypervisor cares about is the ssidref that will reference
+those types assigned to this label. You can look them up in the
+xml label-template file for the chwall_ste policy.
+
+This script will eventually move into the domain management and will
+be called when the domain is instantiated. For now, the setlabel
+script must be run on domains whenever the policy files change since
+the mapping between label names and ssidrefs can change in this case.
+
+
+4. Starting a labeled domain
+============================
+
+Now, start the domain:
+    #xm create -c homebanking.xm
+
+
+If you label another domain configuration as dom_Fun and try to start
+it afterwards, its start will fail. Why?
+
+Because the running homebanking domain has the chinese wall type
+"cw_Sensitive". The new domain dom_Fun has the chinese wall label
+"cw_Distrusted". This domain is not allowed to run simultaneously
+because of the defined conflict set
+
+                       <conflictset name="Protection1">
+                               <type>cw_Sensitive</type>
+                               <type>cw_Distrusted</type>
+                       </conflictset>
+
+(in policies/chwall_ste/chwall_ste-security_policy.xml), which says
+that only one of the types cw_sensitive and cw_Distrusted can run at a
+time.
+
+If you save or shutdown the HomeBanking domain, you will be able to
+start the "Fun" domain. You can look into the Xen log to see if a
+domain was denied to start because of the access control framework
+with the command 'xm dmesg'.
+
+It is important (and usually non-trivial) to define the labels in a
+way that the semantics of the labels are enforced and supported by the
+types and the conflict sets.
+
+Note: While the chinese wall policy enforcement is complete, the type
+enforcement is currently enforced in the Xen hypervisor
+only. Therefore, only point-to-point sharing with regard to the type
+enforcement is currently controlled. We are working on enhancements to
+Dom0 that enforce types also for network traffic that is routed
+through Dom0 and on the enforcement of resource labeling when binding
+resources to domains (e.g., enforcing types between domains and
+hardware resources, such as disk partitions).
+
+
+4. Adding your own policies
+===========================
+
+Writing your own policy (e.g. "mypolicy") requires the following:
+
+a) the policy definition (types etc.) file
+b) the label template definition (labels etc.) file
+
+If your policy name is "mypolicy", you need to create a
+subdirectory mypolicy in tools/security/policies.
+
+Then you create
+tools/security/policies/mypolicy/mypolicy-security_policy.xml and
+tools/security/policies/mypolicy/mypolicy-security_label_template.xml.
+
+You need to keep to the schema as defined in
+tools/security/security_policy.xsd since the translation tool
+secpol_xml2bin is written against this schema.
+
+If you keep to the security policy schema, then you can use all the
+tools described above. Refer to install.txt to install it.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/install.txt
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/install.txt        Thu Aug 25 22:53:20 2005
@@ -0,0 +1,67 @@
+##
+# install.txt <description to the xen access control architecture>
+#
+# Author:
+# Reiner Sailer 08/15/2005 <sailer@xxxxxxxxxxxxxx>
+#
+#
+# This file shows how to activate and install the access control
+# framework.
+##
+
+
+INSTALLING A SECURITY POLICY IN XEN
+===================================
+
+By default, the access control architecture is disabled in Xen. To
+enable the access control architecture in Xen follow the steps below.
+This description assumes that you want to install the Chinese Wall and
+Simple Type Enforcement policy. Some file names need to be replaced
+below to activate the Chinese Wall OR the Type Enforcement policy
+exclusively (chwall_ste --> {chwall, ste}).
+
+1. enable access control in Xen
+       # cd "xen_root"
+       # edit/xemacs/vi Config.mk
+
+       change the line:
+       ACM_USE_SECURITY_POLICY ?= ACM_NULL_POLICY
+
+       to:
+       ACM_USE_SECURITY_POLICY ?= 
ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY
+
+       # make all
+       # ./install.sh
+
+2. compile the policy from xml to a binary format that can be loaded
+   into the hypervisor for enforcement
+       # cd tools/security
+       # make
+
+       manual steps (alternative to make boot_install):
+       #./secpol_xml2bin chwall_ste
+       #cp policies/chwall_ste/chwall_ste.bin /boot
+       #edit /boot/grub/grub.conf
+        add the follwoing line to your xen boot entry:
+       "module chwall_ste.bin"
+
+       alternatively, you can try our automatic translation and
+       installation of the policy:
+       # make boot_install
+
+       [we try hard to do the right thing to the right boot entry but
+        please verify boot entry in /boot/grub/grub.conf afterwards;
+        your xen boot entry should have an additional module line
+        specifying a chwall_ste.bin file with the correct directory
+        (e.g. "/" or "/boot").]
+
+
+3. reboot into the newly compiled hypervisor
+
+        after boot
+       #xm dmesg should show an entry about the policy being loaded
+            during the boot process
+
+        #tools/security/secpol_tool getpolicy
+            should print the new chwall_ste binary policy representation
+
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/security/policies/chwall/chwall-security_label_template.xml
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/chwall/chwall-security_label_template.xml Thu Aug 
25 22:53:20 2005
@@ -0,0 +1,76 @@
+<?xml version="1.0"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com  -->
+<!--              This file defines the security labels, which can  -->
+<!--              be attached to Domains and resources. Based on    -->
+<!--              these labels, the access control module decides   -->
+<!--              about sharing between Domains and about access    -->
+<!--              of Domains to real resources.                     -->
+
+<SecurityLabelTemplate
+ xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+   <LabelHeader>
+      <Name>chwall-security_label_template</Name>
+      <Date>2005-08-10</Date>
+      <PolicyName>
+         <Url>chwall-security_policy.xml</Url>
+         <Reference>abcdef123456abcdef</Reference>
+      </PolicyName>
+   </LabelHeader>
+
+   <SubjectLabels bootstrap="dom_SystemManagement">
+      <!-- single ste typed domains            -->
+      <!-- ACM enforces that only domains with -->
+      <!-- the same type can share information -->
+      <!--                                     -->
+      <!-- Bootstrap label is assigned to Dom0 -->
+      <VirtualMachineLabel>
+       <Name>dom_HomeBanking</Name>
+         <ChineseWallTypes>
+            <Type>cw_Sensitive</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+       <Name>dom_Fun</Name>
+         <ChineseWallTypes>
+            <Type>cw_Distrusted</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+        <!-- donating some cycles to seti@home -->
+       <Name>dom_BoincClient</Name>
+         <ChineseWallTypes>
+            <Type>cw_Isolated</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+
+      <!-- Domains with multiple ste types services; such domains   -->
+      <!-- must keep the types inside their domain safely confined. -->
+      <VirtualMachineLabel>
+       <Name>dom_SystemManagement</Name>
+         <ChineseWallTypes>
+            <Type>cw_SystemManagement</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+        <!-- serves persistent storage to other domains -->
+       <Name>dom_StorageDomain</Name>
+         <ChineseWallTypes>
+            <Type>cw_SystemManagement</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+        <!-- serves network access to other domains -->
+       <Name>dom_NetworkDomain</Name>
+         <ChineseWallTypes>
+            <Type>cw_SystemManagement</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+   </SubjectLabels>
+</SecurityLabelTemplate>
+
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/security/policies/chwall/chwall-security_policy.xml
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/chwall/chwall-security_policy.xml Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com  -->
+<!--             This file defines the security policies, which     -->
+<!--             can be enforced by the Xen Access Control Module.  -->
+<!--             Currently: Chinese Wall and Simple Type Enforcement-->
+<SecurityPolicyDefinition xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+<PolicyHeader>
+               <Name>chwall-security_policy</Name>
+               <Date>2005-08-10</Date>
+</PolicyHeader>
+<!--                                             -->
+<!-- example of a chinese wall type definition   -->
+<!-- along with its conflict sets                -->
+<!-- (typse in a confict set are exclusive, i.e. -->
+<!--  once a Domain with one type of a set is    -->
+<!--  running, no other Domain with another type -->
+<!--  of the same conflict set can start.)       -->
+       <ChineseWall priority="PrimaryPolicyComponent">
+        <ChineseWallTypes>
+            <Type>cw_SystemManagement</Type>
+            <Type>cw_Sensitive</Type>
+            <Type>cw_Isolated</Type>
+            <Type>cw_Distrusted</Type>
+        </ChineseWallTypes>
+
+        <ConflictSets>
+        <Conflict name="Protection1">
+            <Type>cw_Sensitive</Type>
+            <Type>cw_Distrusted</Type>
+        </Conflict>
+        </ConflictSets>
+       </ChineseWall>
+</SecurityPolicyDefinition>
+
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/security/policies/chwall_ste/chwall_ste-security_label_template.xml
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/chwall_ste/chwall_ste-security_label_template.xml 
Thu Aug 25 22:53:20 2005
@@ -0,0 +1,167 @@
+<?xml version="1.0"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com  -->
+<!--              This file defines the security labels, which can  -->
+<!--              be attached to Domains and resources. Based on    -->
+<!--              these labels, the access control module decides   -->
+<!--              about sharing between Domains and about access    -->
+<!--              of Domains to real resources.                     -->
+
+<SecurityLabelTemplate
+ xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+   <LabelHeader>
+      <Name>chwall_ste-security_label_template</Name>
+      <Date>2005-08-10</Date>
+      <PolicyName>
+         <Url>chwall_ste-security_policy.xml</Url>
+         <Reference>abcdef123456abcdef</Reference>
+      </PolicyName>
+   </LabelHeader>
+
+   <SubjectLabels bootstrap="dom_SystemManagement">
+      <!-- single ste typed domains            -->
+      <!-- ACM enforces that only domains with -->
+      <!-- the same type can share information -->
+      <!--                                     -->
+      <!-- Bootstrap label is assigned to Dom0 -->
+      <VirtualMachineLabel>
+       <Name>dom_HomeBanking</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_PersonalFinances</Type>
+         </SimpleTypeEnforcementTypes>
+
+         <ChineseWallTypes>
+            <Type>cw_Sensitive</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+       <Name>dom_Fun</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_InternetInsecure</Type>
+         </SimpleTypeEnforcementTypes>
+
+         <ChineseWallTypes>
+            <Type>cw_Distrusted</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+        <!-- donating some cycles to seti@home -->
+       <Name>dom_BoincClient</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_DonatedCycles</Type>
+         </SimpleTypeEnforcementTypes>
+
+         <ChineseWallTypes>
+            <Type>cw_Isolated</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+
+      <!-- Domains with multiple ste types services; such domains   -->
+      <!-- must keep the types inside their domain safely confined. -->
+      <VirtualMachineLabel>
+       <Name>dom_SystemManagement</Name>
+         <SimpleTypeEnforcementTypes>
+            <!-- since dom0 needs access to every domain and -->
+            <!-- resource right now ... -->
+            <Type>ste_SystemManagement</Type>
+            <Type>ste_PersonalFinances</Type>
+            <Type>ste_InternetInsecure</Type>
+            <Type>ste_DonatedCycles</Type>
+            <Type>ste_PersistentStorageA</Type>
+            <Type>ste_NetworkAdapter0</Type>
+         </SimpleTypeEnforcementTypes>
+
+         <ChineseWallTypes>
+            <Type>cw_SystemManagement</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+        <!-- serves persistent storage to other domains -->
+       <Name>dom_StorageDomain</Name>
+         <SimpleTypeEnforcementTypes>
+            <!-- access right to the resource (hard drive a) -->
+            <Type>ste_PersistentStorageA</Type>
+            <!-- can serve following types -->
+            <Type>ste_PersonalFinances</Type>
+            <Type>ste_InternetInsecure</Type>
+         </SimpleTypeEnforcementTypes>
+
+         <ChineseWallTypes>
+            <Type>cw_SystemManagement</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+        <!-- serves network access to other domains -->
+       <Name>dom_NetworkDomain</Name>
+         <SimpleTypeEnforcementTypes>
+            <!-- access right to the resource (ethernet card) -->
+            <Type>ste_NetworkAdapter0</Type>
+            <!-- can serve following types -->
+            <Type>ste_PersonalFinances</Type>
+            <Type>ste_InternetInsecure</Type>
+            <Type>ste_DonatedCycles</Type>
+         </SimpleTypeEnforcementTypes>
+
+         <ChineseWallTypes>
+            <Type>cw_SystemManagement</Type>
+         </ChineseWallTypes>
+      </VirtualMachineLabel>
+   </SubjectLabels>
+
+   <ObjectLabels>
+      <ResourceLabel>
+       <Name>res_ManagementResource</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_SystemManagement</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_HardDrive (hda)</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_PersistentStorageA</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_LogicalDiskPartition1 (hda1)</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_PersonalFinances</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_LogicalDiskPartition2 (hda2)</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_InternetInsecure</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_EthernetCard</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_NetworkAdapter0</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_SecurityToken</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_PersonalFinances</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_GraphicsAdapter</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_SystemManagement</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+   </ObjectLabels>
+</SecurityLabelTemplate>
+
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/security/policies/chwall_ste/chwall_ste-security_policy.xml
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/chwall_ste/chwall_ste-security_policy.xml Thu Aug 
25 22:53:20 2005
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com  -->
+<!--             This file defines the security policies, which     -->
+<!--             can be enforced by the Xen Access Control Module.  -->
+<!--             Currently: Chinese Wall and Simple Type Enforcement-->
+<SecurityPolicyDefinition xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+<PolicyHeader>
+               <Name>chwall_ste-security_policy</Name>
+               <Date>2005-08-10</Date>
+</PolicyHeader>
+<!--                                                        -->
+<!-- example of a simple type enforcement policy definition -->
+<!--                                                        -->
+       <SimpleTypeEnforcement>
+        <SimpleTypeEnforcementTypes>
+            <Type>ste_SystemManagement</Type>   <!-- machine/security 
management -->
+            <Type>ste_PersonalFinances</Type>   <!-- personal finances -->
+            <Type>ste_InternetInsecure</Type>   <!-- games, active X, etc. -->
+            <Type>ste_DonatedCycles</Type>      <!-- donation to 
BOINC/seti@home -->
+            <Type>ste_PersistentStorageA</Type> <!-- domain managing the 
harddrive A-->
+            <Type>ste_NetworkAdapter0</Type>    <!-- type of the domain 
managing ethernet adapter 0-->
+        </SimpleTypeEnforcementTypes>
+       </SimpleTypeEnforcement>
+<!--                                             -->
+<!-- example of a chinese wall type definition   -->
+<!-- along with its conflict sets                -->
+<!-- (typse in a confict set are exclusive, i.e. -->
+<!--  once a Domain with one type of a set is    -->
+<!--  running, no other Domain with another type -->
+<!--  of the same conflict set can start.)       -->
+       <ChineseWall priority="PrimaryPolicyComponent">
+        <ChineseWallTypes>
+            <Type>cw_SystemManagement</Type>
+            <Type>cw_Sensitive</Type>
+            <Type>cw_Isolated</Type>
+            <Type>cw_Distrusted</Type>
+        </ChineseWallTypes>
+
+        <ConflictSets>
+        <Conflict name="Protection1">
+            <Type>cw_Sensitive</Type>
+            <Type>cw_Distrusted</Type>
+        </Conflict>
+        </ConflictSets>
+       </ChineseWall>
+</SecurityPolicyDefinition>
+
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/security/policies/null/null-security_label_template.xml
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/null/null-security_label_template.xml     Thu Aug 
25 22:53:20 2005
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com  -->
+<!--              This file defines the security labels, which can  -->
+<!--              be attached to Domains and resources. Based on    -->
+<!--              these labels, the access control module decides   -->
+<!--              about sharing between Domains and about access    -->
+<!--              of Domains to real resources.                     -->
+
+<SecurityLabelTemplate
+ xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+   <LabelHeader>
+      <Name>null-security_label_template</Name>
+
+      <Date>2005-08-10</Date>
+      <PolicyName>
+         <Url>null-security_policy.xml</Url>
+
+         <Reference>abcdef123456abcdef</Reference>
+      </PolicyName>
+   </LabelHeader>
+</SecurityLabelTemplate>
+
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/security/policies/null/null-security_policy.xml
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/null/null-security_policy.xml     Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com  -->
+<!--             This file defines the security policies, which     -->
+<!--             can be enforced by the Xen Access Control Module.  -->
+<!--             Currently: Chinese Wall and Simple Type Enforcement-->
+<SecurityPolicyDefinition xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+<PolicyHeader>
+               <Name>null-security_policy</Name>
+               <Date>2005-08-10</Date>
+</PolicyHeader>
+</SecurityPolicyDefinition>
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/policies/security_policy.xsd
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/security_policy.xsd       Thu Aug 25 22:53:20 2005
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Author: Ray Valdez, Reiner Sailer {rvaldez,sailer}@us.ibm.com -->
+<!--         This file defines the schema, which is used to define -->
+<!--         the security policy and the security labels in Xe.    -->
+
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" 
targetNamespace="http://www.ibm.com" xmlns="http://www.ibm.com" 
elementFormDefault="qualified">
+       <xsd:element name="SecurityPolicyDefinition">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element ref="PolicyHeader" minOccurs="0" 
maxOccurs="1"></xsd:element>
+                               <xsd:element ref="SimpleTypeEnforcement" 
minOccurs="0" maxOccurs="1"></xsd:element>
+                               <xsd:element ref="ChineseWall" minOccurs="0" 
maxOccurs="1"></xsd:element>
+                       </xsd:sequence>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="SecurityLabelTemplate">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element ref="LabelHeader" minOccurs="1" 
maxOccurs="1"></xsd:element>
+                               <xsd:element name="SubjectLabels" minOccurs="0" 
maxOccurs="1">
+                                       <xsd:complexType>
+                                               <xsd:sequence>
+                                                       <xsd:element 
ref="VirtualMachineLabel" minOccurs="1" maxOccurs="unbounded"></xsd:element>
+                                               </xsd:sequence>
+                                               <xsd:attribute name="bootstrap" 
type="xsd:string" use="required"></xsd:attribute>
+                                       </xsd:complexType>
+                               </xsd:element>
+                               <xsd:element name="ObjectLabels" minOccurs="0" 
maxOccurs="1">
+                                       <xsd:complexType>
+                                               <xsd:sequence>
+                                                       <xsd:element 
ref="ResourceLabel" minOccurs="1" maxOccurs="unbounded"></xsd:element>
+                                               </xsd:sequence>
+                                       </xsd:complexType>
+                               </xsd:element>
+                       </xsd:sequence>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="PolicyHeader">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element ref="Name" minOccurs="1" 
maxOccurs="1" />
+                               <xsd:element ref="Date" minOccurs="1" 
maxOccurs="1" />
+                       </xsd:sequence>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="LabelHeader">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element ref="Name"></xsd:element>
+                               <xsd:element ref="Date" minOccurs="1" 
maxOccurs="1"></xsd:element>
+                               <xsd:element ref="PolicyName" minOccurs="1" 
maxOccurs="1"></xsd:element>
+                       </xsd:sequence>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="SimpleTypeEnforcement">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element ref="SimpleTypeEnforcementTypes" />
+                       </xsd:sequence>
+                       <xsd:attribute name="priority" type="PolicyOrder" 
use="optional"></xsd:attribute>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="ChineseWall">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element ref="ChineseWallTypes" />
+                               <xsd:element ref="ConflictSets" />
+                       </xsd:sequence>
+                       <xsd:attribute name="priority" type="PolicyOrder" 
use="optional"></xsd:attribute>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="ChineseWallTypes">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element maxOccurs="unbounded" 
minOccurs="1" ref="Type" />
+                       </xsd:sequence>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="ConflictSets">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element maxOccurs="unbounded" 
minOccurs="1" ref="Conflict" />
+                       </xsd:sequence>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="SimpleTypeEnforcementTypes">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element maxOccurs="unbounded" 
minOccurs="1" ref="Type" />
+                       </xsd:sequence>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="Conflict">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element maxOccurs="unbounded" 
minOccurs="1" ref="Type" />
+                       </xsd:sequence>
+                       <xsd:attribute name="name" type="xsd:string" 
use="optional"></xsd:attribute>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="VirtualMachineLabel">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element ref="Name"></xsd:element>
+                               <xsd:element ref="SimpleTypeEnforcementTypes" 
minOccurs="0" maxOccurs="unbounded" />
+                               <xsd:element ref="ChineseWallTypes" 
minOccurs="0" maxOccurs="unbounded" />
+                       </xsd:sequence>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="ResourceLabel">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element ref="Name"></xsd:element>
+                               <xsd:element ref="SimpleTypeEnforcementTypes" 
minOccurs="0" maxOccurs="unbounded" />
+                       </xsd:sequence>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="PolicyName">
+               <xsd:complexType>
+                       <xsd:sequence>
+                               <xsd:element ref="Url" />
+                               <xsd:element ref="Reference" />
+                       </xsd:sequence>
+               </xsd:complexType>
+       </xsd:element>
+       <xsd:element name="Date" type="xsd:string" />
+       <xsd:element name="Name" type="xsd:string" />
+       <xsd:element name="Type" type="xsd:string" />
+       <xsd:element name="Reference" type="xsd:string" />
+       <xsd:element name="Url"></xsd:element>
+
+       <xsd:simpleType name="PolicyOrder">
+               <xsd:restriction base="xsd:string">
+                       <xsd:enumeration 
value="PrimaryPolicyComponent"></xsd:enumeration>
+               </xsd:restriction>
+       </xsd:simpleType>
+
+</xsd:schema>
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/security/policies/ste/ste-security_label_template.xml
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/ste/ste-security_label_template.xml       Thu Aug 
25 22:53:20 2005
@@ -0,0 +1,143 @@
+<?xml version="1.0"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com  -->
+<!--              This file defines the security labels, which can  -->
+<!--              be attached to Domains and resources. Based on    -->
+<!--              these labels, the access control module decides   -->
+<!--              about sharing between Domains and about access    -->
+<!--              of Domains to real resources.                     -->
+
+<SecurityLabelTemplate
+ xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+   <LabelHeader>
+      <Name>ste-security_label_template</Name>
+      <Date>2005-08-10</Date>
+      <PolicyName>
+         <Url>ste-security_policy.xml</Url>
+         <Reference>abcdef123456abcdef</Reference>
+      </PolicyName>
+   </LabelHeader>
+
+   <SubjectLabels bootstrap="dom_SystemManagement">
+      <!-- single ste typed domains            -->
+      <!-- ACM enforces that only domains with -->
+      <!-- the same type can share information -->
+      <!--                                     -->
+      <!-- Bootstrap label is assigned to Dom0 -->
+      <VirtualMachineLabel>
+       <Name>dom_HomeBanking</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_PersonalFinances</Type>
+         </SimpleTypeEnforcementTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+       <Name>dom_Fun</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_InternetInsecure</Type>
+         </SimpleTypeEnforcementTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+        <!-- donating some cycles to seti@home -->
+       <Name>dom_BoincClient</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_DonatedCycles</Type>
+         </SimpleTypeEnforcementTypes>
+      </VirtualMachineLabel>
+
+      <!-- Domains with multiple ste types services; such domains   -->
+      <!-- must keep the types inside their domain safely confined. -->
+      <VirtualMachineLabel>
+       <Name>dom_SystemManagement</Name>
+         <SimpleTypeEnforcementTypes>
+            <!-- since dom0 needs access to every domain and -->
+            <!-- resource right now ... -->
+            <Type>ste_SystemManagement</Type>
+            <Type>ste_PersonalFinances</Type>
+            <Type>ste_InternetInsecure</Type>
+            <Type>ste_DonatedCycles</Type>
+            <Type>ste_PersistentStorageA</Type>
+            <Type>ste_NetworkAdapter0</Type>
+         </SimpleTypeEnforcementTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+        <!-- serves persistent storage to other domains -->
+       <Name>dom_StorageDomain</Name>
+         <SimpleTypeEnforcementTypes>
+            <!-- access right to the resource (hard drive a) -->
+            <Type>ste_PersistentStorageA</Type>
+            <!-- can serve following types -->
+            <Type>ste_PersonalFinances</Type>
+            <Type>ste_InternetInsecure</Type>
+         </SimpleTypeEnforcementTypes>
+      </VirtualMachineLabel>
+
+      <VirtualMachineLabel>
+        <!-- serves network access to other domains -->
+       <Name>dom_NetworkDomain</Name>
+         <SimpleTypeEnforcementTypes>
+            <!-- access right to the resource (ethernet card) -->
+            <Type>ste_NetworkAdapter0</Type>
+            <!-- can serve following types -->
+            <Type>ste_PersonalFinances</Type>
+            <Type>ste_InternetInsecure</Type>
+            <Type>ste_DonatedCycles</Type>
+         </SimpleTypeEnforcementTypes>
+      </VirtualMachineLabel>
+   </SubjectLabels>
+
+   <ObjectLabels>
+      <ResourceLabel>
+       <Name>res_ManagementResource</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_SystemManagement</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_HardDrive (hda)</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_PersistentStorageA</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_LogicalDiskPartition1 (hda1)</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_PersonalFinances</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_LogicalDiskPartition2 (hda2)</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_InternetInsecure</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_EthernetCard</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_NetworkAdapter0</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_SecurityToken</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_PersonalFinances</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+
+      <ResourceLabel>
+       <Name>res_GraphicsAdapter</Name>
+         <SimpleTypeEnforcementTypes>
+            <Type>ste_SystemManagement</Type>
+         </SimpleTypeEnforcementTypes>
+      </ResourceLabel>
+   </ObjectLabels>
+</SecurityLabelTemplate>
+
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/security/policies/ste/ste-security_policy.xml
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/policies/ste/ste-security_policy.xml       Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Author: Reiner Sailer, Ray Valdez {sailer,rvaldez}@us.ibm.com  -->
+<!--             This file defines the security policies, which     -->
+<!--             can be enforced by the Xen Access Control Module.  -->
+<!--             Currently: Chinese Wall and Simple Type Enforcement-->
+<SecurityPolicyDefinition xmlns="http://www.ibm.com"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.ibm.com security_policy.xsd">
+<PolicyHeader>
+               <Name>ste-security_policy</Name>
+               <Date>2005-08-10</Date>
+</PolicyHeader>
+<!--                                                        -->
+<!-- example of a simple type enforcement policy definition -->
+<!--                                                        -->
+       <SimpleTypeEnforcement>
+        <SimpleTypeEnforcementTypes>
+            <Type>ste_SystemManagement</Type>   <!-- machine/security 
management -->
+            <Type>ste_PersonalFinances</Type>   <!-- personal finances -->
+            <Type>ste_InternetInsecure</Type>   <!-- games, active X, etc. -->
+            <Type>ste_DonatedCycles</Type>      <!-- donation to 
BOINC/seti@home -->
+            <Type>ste_PersistentStorageA</Type> <!-- domain managing the 
harddrive A-->
+            <Type>ste_NetworkAdapter0</Type>    <!-- type of the domain 
managing ethernet adapter 0-->
+        </SimpleTypeEnforcementTypes>
+       </SimpleTypeEnforcement>
+</SecurityPolicyDefinition>
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/policy.txt
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/policy.txt Thu Aug 25 22:53:20 2005
@@ -0,0 +1,405 @@
+##
+# policy.txt <description to the Xen access control architecture>
+#
+# Author:
+# Reiner Sailer 08/15/2005 <sailer@xxxxxxxxxxxxxx>
+#
+#
+# This file gives an overview of the security policies currently
+# provided and also gives some reasoning about how to assign
+# labels to domains.
+##
+
+Xen access control policies
+
+
+General explanation of supported security policies:
+=====================================================
+
+We have implemented the mandatory access control architecture of our
+hypervisor security architecture (sHype) for the Xen hypervisor. It
+controls communication (in Xen: event channels, grant tables) between
+Virtual Machines (from here on called domains) and through this the
+virtual block devices, networking, and shared memory are implemented
+on top of these communication means. While we have implemented the
+described policies and access control architecture for other
+hypervisor systems, we will describe below specifically its
+implementation and use in the Xen hypervisor. The policy enforcement
+is called mandatory regarding user domains since the policy it is
+given by the security administration and enforced independently of the
+user domains by the Xen hypervisor in cooperation with the domain
+management.
+
+The access control architecture consists of three parts:
+
+i) The access control policy determines the "command set" of the ACM
+and the hooks with which they can be configured to constrain the
+sharing of virtual resources. The current access control architecture
+implemented for Xen supports two policies: Chinese Wall and Simple
+Type Enforcement, which we describe in turn below.
+
+
+ii) The actually enforced policy instantiation uses the policy
+language (i) to configure the Xen access control in a way that suits
+the specific application (home desktop environment, company desktop,
+Web server system, etc.). We have defined an exemplary policy
+instantiation for Chinese Wall (chwall policy) and Simple Type
+Enforcement (ste policy) for a desktop system. We offer these policies
+in combination since they are controlling orthogonal events.
+
+
+iii) The access control module (ACM) and related hooks are part of the
+core hypervisor and their controls cannot be bypassed by domains. The
+ACM and hooks are the active security components. We refer to
+publications that describe how access control is enforced in the Xen
+hypervisor using the ACM (access decision) and the hooks (decision
+enforcement) inserted into the setup of event channels and grant
+tables, and into domain operations (create, destroy, save, restore,
+migrate). These controls decide based on the active policy
+configuration (see i. and ii.) if the operation proceeds of if the
+operation is aborted (denied).
+
+
+In general, security policy instantiations in the Xen access control
+framework are defined by two files:
+
+a) a single "policy-name"-security_policy.xml file that defines the
+types known to the ACM and policy rules based on these types
+
+b) a single "policy-name"-security_label_template.xml file that
+defines labels based on known types
+
+Every security policy has its own sub-directory under
+"Xen-root"/tools/security/policies in order to simplify their
+management and the security policy tools. We will describe those files
+for our example policy (Chinese Wall and Simple Type Enforcement) in
+more detail as we go along. Eventually, we will move towards a system
+installation where the policies will reside under /etc.
+
+
+CHINESE WALL
+============
+
+The Chinese Wall policy enables the user to define "which workloads
+(domain payloads) cannot run on a single physical system at the same
+time". Why would we want to prevent workloads from running at the same
+time on the same system? This supports requirements that can (but
+don't have to) be rooted in the measure of trust into the isolation of
+different domains that share the same hardware. Since the access
+control architecture aims at high performance and non-intrusive
+implementation, it currently does not address covert (timing) channels
+and aims at medium assurance. Users can apply the Chinese Wall policy
+to guarantee an air-gap between very sensitive payloads both regarding
+covert information channels and regarding resource starvation.
+
+To enable the CW control, each domain is labeled with a set of Chinese
+Wall types and CW Conflict Sets are defined which include those CW
+types that cannot run simultaneously on the same hardware. This
+interpretation of conflict sets is the only policy rule for the Chines
+Wall policy.
+
+This is enforced by controlling the start of domains according to
+their assigned CW worload types. Domains with Chinese Wall types that
+appear in a common conflict set are running mutually exclusive on a
+platform, i.e., once a domain with one of the cw-types of a conflict
+set is running, no domain with another cw-type of the same conflict
+set can start until the first domain is destroyed, paused, or migrated
+away from the physical system (this assumes that such a partition can
+no longer be observed). The idea is to assign cw-types according to
+the type of payload that a domain runs and to use the Chinese Wall
+policy to ensure that payload types can be differentiated by the
+hypervisor and can be prevented from being executed on the same system
+at the same time. Using the flexible CW policy maintains system
+consolidation and workload-balancing while introducing guaranteed
+constraints where necessary.
+
+
+Example of a Chinese Wall Policy Instantiation
+----------------------------------------------
+
+The file chwall-security_policy.xml defines the Chinese Wall types as
+well as the conflict sets for our example policy (you find it in the
+directory "xen_root"/tools/security/policies/chwall).
+
+It defines four Chinese Wall types (prefixed with cw_) with the
+following meaning:
+
+* cw_SystemsManagement is a type identifying workloads for systems
+management, e.g., domain management, device management, or hypervisor
+management.
+
+* cw_Sensitive is identifying workloads that are critical to the user
+for one reason or another.
+
+* cw_Distrusted is identifying workloads a user does not have much
+confidence in. E.g. a domain used for surfing in the internet without
+protection( i.e., active-X, java, java-script, executing web content)
+or for (Internet) Games should be typed this way.
+
+* cw_Isolated is identifying workloads that are supposedly isolated by
+use of the type enforcement policy (described below). For example, if
+a user wants to donate cycles to seti@home, she can setup a separate
+domain for a Boinc (http://boinc.ssl.berkeley.edu/) client, disable
+this domain from accessing the hard drive and from communicating to
+other local domains, and type it as cw_Isolated. We will look at a
+specific example later.
+
+The example policy uses the defined types to define one conflict set:
+Protection1 = {cw_Sensitive, cw_Distrusted}. This conflict set tells
+the hypervisor that once a domain typed as cw_Sensitive is running, a
+domain typed as cw_Distrusted cannot run concurrently (and the other
+way round). With this policy, a domain typed as cw_Isolated is allowed
+to run simultaneously with domains tagged as cw_Sensitive.
+
+Consequently, the access control module in the Xen hypervisor
+distinguishes in this example policy 4 different workload types in
+this example policy. It is the user's responsibility to type the
+domains in a way that reflects the workloads of these domains and, in
+the case of cw_Isolated, its properties, e.g. by configuring the
+sharing capabilities of the domain accordingly by using the simple
+type enforcement policy.
+
+Users can define their own or change the existing example policy
+according to their working environment and security requirements. To
+do so, replace the file chwall-security_policy.xml with the new
+policy.
+
+
+SIMPLE TYPE ENFORCEMENT
+=======================
+
+The file ste-security_policy.xml defines the simple type enforcement
+types for our example policy (you find it in the directory
+"xen_root"/tools/security/policies/ste). The Simple Type Enforcement
+policy defines which domains can share information with which other
+domains. To this end, it controls
+
+i) inter-domain communication channels (e.g., network traffic, events,
+and shared memory).
+
+ii) access of domains to physical resources (e.g., hard drive, network
+cards, graphics adapter, keyboard).
+
+In order to enable the hypervisor to distinguish different domains and
+the user to express access rules, the simple type enforcement defines
+a set of types (ste_types).
+
+The policy defines that communication between domains is allowed if
+the domains share a common STE type. As with the chwall types, STE
+types should enable the differentiation of workloads. The simple type
+enforcement access control implementation in the hypervisor enforces
+that domains can only communicate (setup event channels, grant tables)
+if they share a common type, i.e., both domains have assigned at least
+on type in common. A domain can access a resource, if the domain and
+the resource share a common type. Hence, assigning STE types to
+domains and resources allows users to define constraints on sharing
+between domains and to keep sensitive data confined from distrusted
+domains.
+
+Domain <--> Domain Sharing
+''''''''''''''''''''''''''
+(implemented but its effective use requires factorization of Dom0)
+
+a) Domains with a single STE type (general user domains): Sharing
+between such domains is enforced entirely by the hypervisor access
+control. It is independent of the domains and does not require their
+co-operation.
+
+b) Domains with multiple STE types: One example is a domain that
+virtualizes a physical resource (e.g., hard drive) and serves it as
+multiple virtual resources (virtual block drives) to other domains of
+different types. The idea is that only a specific device domain has
+assigned the type required to access the physical hard-drive. Logical
+drives are then assigned the types of domains that have access to this
+logical drive. Since the Xen hypervisor cannot distinguish between the
+logical drives, the access control (type enforcement) is delegated to
+the device domain, which has access to the types of domains requesting
+to mount a logical drive as well as the types assigned to the
+different available logical drives.
+
+Currently in Xen, Dom0 controls all hardware, needs to communicate
+with all domains during their setup, and intercepts all communication
+between domains. Consequently, Dom0 needs to be assigned all types
+used and must be completely trusted to maintain the separation of
+informatio ncoming from domains with different STE types. Thus a
+refactoring of Dom0 is recommended for stronger confinement
+guarantees.
+
+Domain --> RESOURCES Access
+'''''''''''''''''''''''''''
+(current work)
+
+We define for each resource that we want to distinguish a separate STE
+type. Each STE type is assigned to the respective resource and to
+those domains that are allowed to access this resource. Type
+enforcement will guarantee that other domains cannot access this
+resource since they don't share the resource's STE type.
+
+Since in the current implementation of Xen, Dom0 controls access to
+all hardware (e.g., disk drives, network), Domain-->Resource access
+control enforcement must be implemented in Dom0. This is possible
+since Dom0 has access to both the domain configuration (including the
+domain STE types) and the resource configuration (including the
+resource STE types).
+
+For purposes of gaining higher assurance in the resulting system, it
+may be desirable to reduce the size of dom0 by adding one or more
+"device domains" (DDs). These DDs, e.g. providing storage or network
+access, can support one or more physical devices, and manage
+enforcement of MAC policy relevant for said devices. Security benefits
+come from the smaller size of these DDs, as they can be more easily
+audited than monolithic device driver domains. DDs can help to obtain
+maximum security benefit from sHype.
+
+
+Example of a Simple Type Enforcement Policy Instantiation
+---------------------------------------------------------
+
+We define the following types:
+
+* ste_SystemManagement identifies workloads (and domains that runs
+them) that must share information to accomplish the management of the
+system
+
+* ste_PersonalFinances identifies workloads that are related to
+sensitive programs such as HomeBanking applications or safely
+configured web browsers for InternetBanking
+
+* ste_InternetInsecure identifies workloads that are very
+function-rich and unrestricted to offer for example an environment
+where internet games can run efficiently
+
+* ste_DonatedCycles identifies workloads that run on behalf of others,
+e.g. a Boinc client
+
+* ste_PersistentStorage identifies workloads that have direct access
+to persistent storage (e.g., hard drive)
+
+* ste_NetworkAccess identifies workload that have direct access to
+network cards and related networks
+
+
+
+SECURITY LABEL TEMPLATES
+========================
+
+We introduce security label templates because it is difficult for
+users to ensure tagging of domains consistently and since there are
+--as we have seen in the case of isolation-- useful dependencies
+between the policies. Security Label Templates define type sets that
+can be addressed by more user-friendly label names,
+e.g. dom_Homebanking describes a typical typeset tagged to domains
+used for sensitive Homebanking work-loads. Labels are defined in the
+file
+
+Using Security Label Templates has multiple advantages:
+a) easy reference of typical sets of type assignments
+b) consistent interpretation of type combinations
+c) meaningful application-level label names
+
+The definition of label templates depends on the combination of
+policies that are used. We will describe some of the labels defined
+for the Chinese Wall and Simple Type Enforcement combination.
+
+In the BoincClient example, the label_template file specifies that
+this Label is assigned the Chinese Wall type cw_Isolated. We do this
+assuming that this BoincClient is isolated against the rest of the
+system infrastructure (no persistent memory, no sharing with local
+domains). Since cw_Isolated is not included in any conflict set, it
+can run at any time concurrently with any other domain. The
+ste_DonatedCycles type assigned to the BoincClient reflect the
+isolation assumption: it is only assigned to the dom_NetworkDomain
+giving the BoincClient domain access to the network to communicate
+with its BoincServer.
+
+The strategy for combining types into Labels is the following: First
+we define a label for each type of general user domain
+(workload-oriented). Then we define a new label for each physical
+resource that shall be shared using a DD domain (e.g., disk) and for
+each logical resource offered through this physical resource (logical
+disk partition). We define then device domain labels (here:
+dom_SystemManagement, dom_StorageDomain, dom_NetworkDomain) which
+include the types of the physical resources (e.g. hda) their domains
+need to connect to. Such physical resources can only be accessed
+directly by device domains types with the respective device's STE
+type. Additionally we assign to such a device domain Label the STE
+types of those user domains that are allowed to access one of the
+logical resources (e.g., hda1, hda2) built on top of this physical
+resource through the device domain.
+
+
+Label Construction Example:
+---------------------------
+
+We define here a storage domain label for a domain that owns a real
+disk drive and creates the logical disk partitions hda1 and hda2 which
+it serves to domains labeled dom_HomeBanking and dom_Fun
+respectively. The labels we refer to are defined in the label template
+file policies/chwall_ste/chwall_ste-security-label-template.xml.
+
+step1: To distinguish different shared disk drives, we create a
+separate Label and STE type for each of them. Here: we create a type
+ste_PersistentStorageA for disk drive hda. If you have another disk
+drive, you may define another persistent storage type
+ste_PersistentStorageB in the chwall_ste-security_policy.xml.
+
+step2: To distinguish different domains, we create multiple domain
+labels including different types. Here: label dom_HomeBanking includes
+STE type ste_PersonalFinances, label dom_Fun includes STE type
+ste_InternetInsecure.
+
+step3: The storage domain in charge of the hard drive A needs access
+to this hard drive. Therefore the storage domain label
+dom_StorageDomain must include the type assigned to the hard drive
+(ste_PersistentStorageA).
+
+step4: In order to serve dom hda1 to domains labeled dom_HomeBanking
+and hda2 to domains labeled dom_Fun, the storage domain label must
+include the types of those domains as well (ste_PersonalFinance,
+ste_InternetInsecure).
+
+step5: In order to keep the data for different types safely apart, the
+different logical disk partitions must be assigned unique labels and
+types, which are used inside the storage domain to extend the ACM
+access enforcement to logical resources served from inside the storage
+domain. We define labels "res_LogicalDiskPartition1 (hda1)" and assign
+it to hda1 and "res_LogicalDiskPartition2 (hda2)" and assign it to
+hda2. These labels must include the STE types of those domains that
+are allowed to use them (e.g., ste_PersonalFinances for hda1).
+
+The overall mandatory access control is then enforced in 3 different
+Xen components and these components use a single consistent policy to
+co-operatively enforce the policy. In the storage domain example, we
+have three components that co-operate:
+
+1. The ACM module inside the hypervisor enforces: communication between
+user domains and the storage domain (only domains including types
+ste_PersonalFinances or ste_InternetInsecure can communicate with the
+storage domain and request access to logical resource). This confines
+the sharing to the types assigned to the storage domain.
+
+2. The domain management will enforce (work in progress): assignment of
+real resources (hda) to domains (storage domain) that share a
+type with the resource.
+
+3. If the storage domain serves multiple STE types (as in our example),
+it enforces (work in progress): that domains can access (mount)
+logical resources only if they share an STE type with the respective
+resource. In our example, domains with the STE type
+ste_PersonalFinances can request access (mount) to logical resource
+hda1 from the storage domain.
+
+If you look at the virtual machine label dom_StorageDomain, you will
+see the minimal set of types assigned to our domain manageing disk
+drive hda for serving logical disk partitions exclusively to
+dom_HomeBanking and dom_Fun.
+
+Similary, network domains can confine access to the network or
+network communication between user domains.
+
+As a result, device domains (e.g., storage domain, network domain)
+must be simple and small to ensure their correct co-operation in the
+type enforcement model. If such trust is not possible, then hardware
+should be assigned exclusively to a single type (or to a single
+partition) in which case the hypervisor ACM enforcement enforces the
+types independently.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/readme.txt
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/readme.txt Thu Aug 25 22:53:20 2005
@@ -0,0 +1,29 @@
+
+##
+# readme.txt <description to the xen access control architecture>
+#
+# Author:
+# Reiner Sailer 08/15/2005 <sailer@xxxxxxxxxxxxxx>
+#
+#
+# This file is a toc for information regarding
+# the access control policy and tools in Xen.
+##
+
+1. policy.txt:
+
+   describes the general reasoning and examples for access
+   control policies in Xen
+
+
+2. install.txt
+
+   describes the activation of the access control framework
+   in Xen
+
+3. example.txt
+
+   describes the available tools for managing security policies
+   in Xen and the tools to label domains
+
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/secpol_compat.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/secpol_compat.h    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,14 @@
+/* secpol_compat.h
+ *     'translates' data types necessary to
+ *     include <xen/acm.h>
+ */
+#include <stdint.h>
+
+typedef uint8_t  u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef int8_t   s8;
+typedef int16_t  s16;
+typedef int32_t  s32;
+typedef int64_t  s64;
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/secpol_xml2bin.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/secpol_xml2bin.c   Thu Aug 25 22:53:20 2005
@@ -0,0 +1,1396 @@
+/****************************************************************
+ * secpol_xml2bin.c
+ *
+ * Copyright (C) 2005 IBM Corporation
+ *
+ * Author: Reiner Sailer <sailer@xxxxxxxxxx>
+ *
+ * Maintained:
+ * Reiner Sailer <sailer@xxxxxxxxxx>
+ * Ray Valdez <rvaldez@xxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * sHype policy translation tool. This tool takes an XML
+ * policy specification as input and produces a binary
+ * policy file that can be loaded into Xen through the
+ * ACM operations (secpol_tool loadpolicy) interface or at
+ * boot time (grub module parameter)
+ *
+ * indent -i4 -kr -nut
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <libgen.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <libxml/xmlschemas.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/xmlreader.h>
+#include "secpol_compat.h"
+#include <xen/acm.h>
+
+#include "secpol_xml2bin.h"
+
+#define DEBUG    0
+
+/* primary / secondary policy component setting */
+enum policycomponent { CHWALL, STE, NULLPOLICY }
+    primary = NULLPOLICY, secondary = NULLPOLICY;
+
+/* general list element for ste and chwall type queues */
+struct type_entry {
+    TAILQ_ENTRY(type_entry) entries;
+    char *name;                 /* name of type from xml file */
+    type_t mapping;             /* type mapping into 16bit */
+};
+
+TAILQ_HEAD(tailhead, type_entry) ste_head, chwall_head;
+
+/* general list element for all label queues */
+enum label_type { VM, RES, ANY };
+struct ssid_entry {
+    TAILQ_ENTRY(ssid_entry) entries;
+    char *name;                 /* label name */
+    enum label_type type;       /* type: VM / RESOURCE LABEL */
+    u_int32_t num;              /* ssid or referenced ssid */
+    int is_ref;                 /* if this entry references earlier ssid 
number */
+    unsigned char *row;         /* index of types (if not a reference) */
+};
+
+TAILQ_HEAD(tailhead_ssid, ssid_entry) ste_ssid_head, chwall_ssid_head,
+    conflictsets_head;
+struct ssid_entry *current_chwall_ssid_p = NULL;
+struct ssid_entry *current_ste_ssid_p = NULL;
+struct ssid_entry *current_conflictset_p = NULL;
+
+/* which label to assign to dom0 during boot */
+char *bootstrap_label;
+
+u_int32_t max_ste_ssids = 0;
+u_int32_t max_chwall_ssids = 0;
+u_int32_t max_chwall_labels = 0;
+u_int32_t max_ste_labels = 0;
+u_int32_t max_conflictsets = 0;
+
+char *current_ssid_name;        /* store name until structure is allocated */
+char *current_conflictset_name; /* store name until structure is allocated */
+
+/* dynamic list of type mappings for STE */
+u_int32_t max_ste_types = 0;
+
+/* dynamic list of type mappings for CHWALL */
+u_int32_t max_chwall_types = 0;
+
+/* dynamic list of conflict sets */
+int max_conflict_set = 0;
+
+/* which policies are defined */
+int have_ste = 0;
+int have_chwall = 0;
+
+/* input/output file names */
+char *policy_filename = NULL,
+    *label_filename = NULL,
+    *binary_filename = NULL, *mapping_filename = NULL;
+
+void usage(char *prg)
+{
+    printf("usage:\n%s policyname[-policy.xml/-security_label_template.xml]\n",
+         prg);
+    exit(EXIT_FAILURE);
+}
+
+
+/***************** policy-related parsing *********************/
+
+char *type_by_mapping(struct tailhead *head, u_int32_t mapping)
+{
+    struct type_entry *np;
+    for (np = head->tqh_first; np != NULL; np = np->entries.tqe_next)
+        if (np->mapping == mapping)
+            return np->name;
+    return NULL;
+}
+
+
+struct type_entry *lookup(struct tailhead *head, char *name)
+{
+    struct type_entry *np;
+    for (np = head->tqh_first; np != NULL; np = np->entries.tqe_next)
+        if (!(strcmp(np->name, name)))
+            return np;
+    return NULL;
+}
+
+/* enforces single-entry lists */
+int add_entry(struct tailhead *head, char *name, type_t mapping)
+{
+    struct type_entry *e;
+    if (lookup(head, name))
+    {
+        printf("Error: Type >%s< defined more than once.\n", name);
+        return -EFAULT;         /* already in the list */
+    }
+    if (!(e = malloc(sizeof(struct type_entry))))
+        return -ENOMEM;
+
+    e->name = name;
+    e->mapping = mapping;
+    TAILQ_INSERT_TAIL(head, e, entries);
+    return 0;
+}
+
+int totoken(char *tok)
+{
+    int i;
+    for (i = 0; token[i] != NULL; i++)
+        if (!strcmp(token[i], tok))
+            return i;
+    return -EFAULT;
+}
+
+/* conflictsets use the same data structure as ssids; since
+ * they are similar in structure (set of types)
+ */
+int init_next_conflictset(void)
+{
+    struct ssid_entry *conflictset = malloc(sizeof(struct ssid_entry));
+
+    if (!conflictset)
+        return -ENOMEM;
+
+    conflictset->name = current_conflictset_name;
+    conflictset->num = max_conflictsets++;
+    conflictset->is_ref = 0;    /* n/a for conflictsets */
+        /**
+         *  row: allocate one byte per type;
+         *  [i] != 0 --> mapped type >i< is part of the conflictset
+         */
+    conflictset->row = malloc(max_chwall_types);
+    if (!conflictset->row)
+        return -ENOMEM;
+
+    memset(conflictset->row, 0, max_chwall_types);
+    TAILQ_INSERT_TAIL(&conflictsets_head, conflictset, entries);
+    current_conflictset_p = conflictset;
+    return 0;
+}
+
+int register_type(xmlNode * cur_node, xmlDocPtr doc, unsigned long state)
+{
+    xmlChar *text;
+    struct type_entry *e;
+
+
+    text = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+    if (!text)
+    {
+        printf("Error reading type name!\n");
+        return -EFAULT;
+    }
+
+    switch (state) {
+    case XML2BIN_stetype_S:
+        if (add_entry(&ste_head, (char *) text, max_ste_types))
+        {
+            xmlFree(text);
+            return -EFAULT;
+        }
+        max_ste_types++;
+        break;
+
+    case XML2BIN_chwalltype_S:
+        if (add_entry(&chwall_head, (char *) text, max_chwall_types))
+        {
+            xmlFree(text);
+            return -EFAULT;
+        }
+        max_chwall_types++;
+        break;
+
+    case XML2BIN_conflictsettype_S:
+        /* a) search the type in the chwall_type list */
+        e = lookup(&chwall_head, (char *) text);
+        if (e == NULL)
+        {
+            printf("CS type >%s< not a CHWALL type.\n", text);
+            xmlFree(text);
+            return -EFAULT;
+        }
+        /* b) add type entry to the current cs set */
+        if (current_conflictset_p->row[e->mapping])
+        {
+            printf("ERROR: Double entry of type >%s< in conflict set %d.\n",
+                 text, current_conflictset_p->num);
+            xmlFree(text);
+            return -EFAULT;
+        }
+        current_conflictset_p->row[e->mapping] = 1;
+        break;
+
+    default:
+        printf("Incorrect type environment (state = %lx, text = %s).\n",
+               state, text);
+        xmlFree(text);
+        return -EFAULT;
+    }
+    return 0;
+}
+
+void set_component_type(xmlNode * cur_node, enum policycomponent pc)
+{
+    xmlChar *order;
+
+    if ((order = xmlGetProp(cur_node, (xmlChar *) 
PRIMARY_COMPONENT_ATTR_NAME))) {
+        if (strcmp((char *) order, PRIMARY_COMPONENT))
+        {
+            printf("ERROR: Illegal attribut value >order=%s<.\n",
+                   (char *) order);
+            xmlFree(order);
+            exit(EXIT_FAILURE);
+        }
+        if (primary != NULLPOLICY)
+        {
+            printf("ERROR: Primary Policy Component set twice!\n");
+            exit(EXIT_FAILURE);
+        }
+        primary = pc;
+        xmlFree(order);
+    }
+}
+
+void walk_policy(xmlNode * start, xmlDocPtr doc, unsigned long state)
+{
+    xmlNode *cur_node = NULL;
+    int code;
+
+    for (cur_node = start; cur_node; cur_node = cur_node->next)
+    {
+        if ((code = totoken((char *) cur_node->name)) < 0)
+        {
+            printf("Unknown token: >%s<. Aborting.\n", cur_node->name);
+            exit(EXIT_FAILURE);
+        }
+        switch (code) {         /* adjust state to new state */
+        case XML2BIN_SECPOL:
+        case XML2BIN_STETYPES:
+        case XML2BIN_CHWALLTYPES:
+        case XML2BIN_CONFLICTSETS:
+            walk_policy(cur_node->children, doc, state | (1 << code));
+            break;
+
+        case XML2BIN_STE:
+            if (WRITTEN_AGAINST_ACM_STE_VERSION != ACM_STE_VERSION)
+            {
+                printf("ERROR: This program was written against another STE 
version.\n");
+                exit(EXIT_FAILURE);
+            }
+            have_ste = 1;
+            set_component_type(cur_node, STE);
+            walk_policy(cur_node->children, doc, state | (1 << code));
+            break;
+
+        case XML2BIN_CHWALL:
+            if (WRITTEN_AGAINST_ACM_CHWALL_VERSION != ACM_CHWALL_VERSION)
+            {
+                printf("ERROR: This program was written against another CHWALL 
version.\n");
+                exit(EXIT_FAILURE);
+            }
+            have_chwall = 1;
+            set_component_type(cur_node, CHWALL);
+            walk_policy(cur_node->children, doc, state | (1 << code));
+            break;
+
+        case XML2BIN_CSTYPE:
+            current_conflictset_name =
+                (char *) xmlGetProp(cur_node, (xmlChar *) "name");
+            if (!current_conflictset_name)
+                current_conflictset_name = "";
+
+            if (init_next_conflictset())
+            {
+                printf
+                    ("ERROR: creating new conflictset structure failed.\n");
+                exit(EXIT_FAILURE);
+            }
+            walk_policy(cur_node->children, doc, state | (1 << code));
+            break;
+
+        case XML2BIN_TYPE:
+            if (register_type(cur_node, doc, state))
+                exit(EXIT_FAILURE);
+            /* type leaf */
+            break;
+
+        case XML2BIN_TEXT:
+        case XML2BIN_COMMENT:
+        case XML2BIN_POLICYHEADER:
+            /* leaf - nothing to do */
+            break;
+
+        default:
+            printf("Unkonwn token Error (%d)\n", code);
+            exit(EXIT_FAILURE);
+        }
+
+    }
+    return;
+}
+
+int create_type_mapping(xmlDocPtr doc)
+{
+    xmlNode *root_element = xmlDocGetRootElement(doc);
+    struct type_entry *te;
+    struct ssid_entry *se;
+    int i;
+
+    printf("Creating ssid mappings ...\n");
+
+    /* initialize the ste and chwall type lists */
+    TAILQ_INIT(&ste_head);
+    TAILQ_INIT(&chwall_head);
+    TAILQ_INIT(&conflictsets_head);
+
+    walk_policy(root_element, doc, XML2BIN_NULL);
+
+    /* determine primary/secondary policy component orders */
+    if ((primary == NULLPOLICY) && have_chwall)
+        primary = CHWALL;       /* default if not set */
+    else if ((primary == NULLPOLICY) && have_ste)
+        primary = STE;
+
+    switch (primary) {
+
+    case CHWALL:
+        if (have_ste)
+            secondary = STE;
+        /* else default = NULLPOLICY */
+        break;
+
+    case STE:
+        if (have_chwall)
+            secondary = CHWALL;
+        /* else default = NULLPOLICY */
+        break;
+
+    default:
+        /* NULL/NULL policy */
+        break;
+    }
+
+    if (!DEBUG)
+        return 0;
+
+    /* print queues */
+    if (have_ste)
+    {
+        printf("STE-Type queue (%s):\n",
+               (primary == STE) ? "PRIMARY" : "SECONDARY");
+        for (te = ste_head.tqh_first; te != NULL;
+             te = te->entries.tqe_next)
+            printf("name=%22s, map=%x\n", te->name, te->mapping);
+    }
+    if (have_chwall)
+    {
+        printf("CHWALL-Type queue (%s):\n",
+               (primary == CHWALL) ? "PRIMARY" : "SECONDARY");
+        for (te = chwall_head.tqh_first; te != NULL;
+             te = te->entries.tqe_next)
+            printf("name=%s, map=%x\n", te->name, te->mapping);
+
+        printf("Conflictset queue (max=%d):\n", max_conflictsets);
+        for (se = conflictsets_head.tqh_first; se != NULL;
+             se = se->entries.tqe_next)
+        {
+            printf("conflictset name >%s<\n",
+                   se->name ? se->name : "NONAME");
+            for (i = 0; i < max_chwall_types; i++)
+                if (se->row[i])
+                    printf("#%x ", i);
+            printf("\n");
+        }
+    }
+    return 0;
+}
+
+
+/***************** template-related parsing *********************/
+
+/* add default ssid at head of ssid queues */
+int init_ssid_queues(void)
+{
+    struct ssid_entry *default_ssid_chwall, *default_ssid_ste;
+
+    default_ssid_chwall = malloc(sizeof(struct ssid_entry));
+    default_ssid_ste = malloc(sizeof(struct ssid_entry));
+
+    if ((!default_ssid_chwall) || (!default_ssid_ste))
+        return -ENOMEM;
+
+    /* default chwall ssid */
+    default_ssid_chwall->name = "DEFAULT";
+    default_ssid_chwall->num = max_chwall_ssids++;
+    default_ssid_chwall->is_ref = 0;
+    default_ssid_chwall->type = ANY;
+
+    default_ssid_chwall->row = malloc(max_chwall_types);
+
+    if (!default_ssid_chwall->row)
+        return -ENOMEM;
+
+    memset(default_ssid_chwall->row, 0, max_chwall_types);
+
+    TAILQ_INSERT_TAIL(&chwall_ssid_head, default_ssid_chwall, entries);
+    current_chwall_ssid_p = default_ssid_chwall;
+    max_chwall_labels++;
+
+    /* default ste ssid */
+    default_ssid_ste->name = "DEFAULT";
+    default_ssid_ste->num = max_ste_ssids++;
+    default_ssid_ste->is_ref = 0;
+    default_ssid_ste->type = ANY;
+
+    default_ssid_ste->row = malloc(max_ste_types);
+
+    if (!default_ssid_ste->row)
+        return -ENOMEM;
+
+    memset(default_ssid_ste->row, 0, max_ste_types);
+
+    TAILQ_INSERT_TAIL(&ste_ssid_head, default_ssid_ste, entries);
+    current_ste_ssid_p = default_ssid_ste;
+    max_ste_labels++;
+    return 0;
+}
+
+int init_next_chwall_ssid(unsigned long state)
+{
+    struct ssid_entry *ssid = malloc(sizeof(struct ssid_entry));
+
+    if (!ssid)
+        return -ENOMEM;
+
+    ssid->name = current_ssid_name;
+    ssid->num = max_chwall_ssids++;
+    ssid->is_ref = 0;
+
+    if (state & (1 << XML2BIN_VM))
+        ssid->type = VM;
+    else
+        ssid->type = RES;
+        /**
+         *  row: allocate one byte per type;
+         *  [i] != 0 --> mapped type >i< is part of the ssid
+         */
+    ssid->row = malloc(max_chwall_types);
+    if (!ssid->row)
+        return -ENOMEM;
+
+    memset(ssid->row, 0, max_chwall_types);
+    TAILQ_INSERT_TAIL(&chwall_ssid_head, ssid, entries);
+    current_chwall_ssid_p = ssid;
+    max_chwall_labels++;
+    return 0;
+}
+
+int init_next_ste_ssid(unsigned long state)
+{
+    struct ssid_entry *ssid = malloc(sizeof(struct ssid_entry));
+
+    if (!ssid)
+        return -ENOMEM;
+
+    ssid->name = current_ssid_name;
+    ssid->num = max_ste_ssids++;
+    ssid->is_ref = 0;
+
+    if (state & (1 << XML2BIN_VM))
+        ssid->type = VM;
+    else
+        ssid->type = RES;
+
+        /**
+         *  row: allocate one byte per type;
+         *  [i] != 0 --> mapped type >i< is part of the ssid
+         */
+    ssid->row = malloc(max_ste_types);
+    if (!ssid->row)
+        return -ENOMEM;
+
+    memset(ssid->row, 0, max_ste_types);
+    TAILQ_INSERT_TAIL(&ste_ssid_head, ssid, entries);
+    current_ste_ssid_p = ssid;
+    max_ste_labels++;
+
+    return 0;
+}
+
+
+/* adds a type to the current ssid */
+int add_type(xmlNode * cur_node, xmlDocPtr doc, unsigned long state)
+{
+    xmlChar *text;
+    struct type_entry *e;
+
+    text = xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+    if (!text)
+    {
+        printf("Error reading type name!\n");
+        return -EFAULT;
+    }
+    /* same for all: 1. lookup type mapping, 2. mark type in ssid */
+    switch (state) {
+    case XML2BIN_VM_STE_S:
+    case XML2BIN_RES_STE_S:
+        /* lookup the type mapping and include the type mapping into the array 
*/
+        if (!(e = lookup(&ste_head, (char *) text)))
+        {
+            printf("ERROR: unknown VM STE type >%s<.\n", text);
+            exit(EXIT_FAILURE);
+        }
+        if (current_ste_ssid_p->row[e->mapping])
+            printf("Warning: double entry of VM STE type >%s<.\n", text);
+
+        current_ste_ssid_p->row[e->mapping] = 1;
+        break;
+
+    case XML2BIN_VM_CHWALL_S:
+        /* lookup the type mapping and include the type mapping into the array 
*/
+        if (!(e = lookup(&chwall_head, (char *) text)))
+        {
+            printf("ERROR: unknown VM CHWALL type >%s<.\n", text);
+            exit(EXIT_FAILURE);
+        }
+        if (current_chwall_ssid_p->row[e->mapping])
+            printf("Warning: double entry of VM CHWALL type >%s<.\n",
+                   text);
+
+        current_chwall_ssid_p->row[e->mapping] = 1;
+        break;
+
+    default:
+        printf("Incorrect type environment (state = %lx, text = %s).\n",
+               state, text);
+        xmlFree(text);
+        return -EFAULT;
+    }
+    return 0;
+}
+
+void set_bootstrap_label(xmlNode * cur_node)
+{
+    xmlChar *order;
+
+    if ((order = xmlGetProp(cur_node, (xmlChar *) BOOTSTRAP_LABEL_ATTR_NAME)))
+        bootstrap_label = (char *)order;
+    else {
+        printf("ERROR: No bootstrap label defined!\n");
+        exit(EXIT_FAILURE);
+    }
+}
+
+void walk_labels(xmlNode * start, xmlDocPtr doc, unsigned long state)
+{
+    xmlNode *cur_node = NULL;
+    int code;
+
+    for (cur_node = start; cur_node; cur_node = cur_node->next)
+    {
+        if ((code = totoken((char *) cur_node->name)) < 0)
+        {
+            printf("Unkonwn token: >%s<. Aborting.\n", cur_node->name);
+            exit(EXIT_FAILURE);
+        }
+        switch (code) {         /* adjust state to new state */
+
+        case XML2BIN_SUBJECTS:
+            set_bootstrap_label(cur_node);
+            /* fall through */
+        case XML2BIN_VM:
+        case XML2BIN_RES:
+        case XML2BIN_SECTEMPLATE:
+        case XML2BIN_OBJECTS:
+            walk_labels(cur_node->children, doc, state | (1 << code));
+            break;
+
+        case XML2BIN_STETYPES:
+            /* create new ssid entry to use and point current to it */
+            if (init_next_ste_ssid(state))
+            {
+                printf("ERROR: creating new ste ssid structure failed.\n");
+                exit(EXIT_FAILURE);
+            }
+            walk_labels(cur_node->children, doc, state | (1 << code));
+
+            break;
+
+        case XML2BIN_CHWALLTYPES:
+            /* create new ssid entry to use and point current to it */
+            if (init_next_chwall_ssid(state))
+            {
+                printf("ERROR: creating new chwall ssid structure failed.\n");
+                exit(EXIT_FAILURE);
+            }
+            walk_labels(cur_node->children, doc, state | (1 << code));
+
+            break;
+
+        case XML2BIN_TYPE:
+            /* add type to current ssid */
+            if (add_type(cur_node, doc, state))
+                exit(EXIT_FAILURE);
+            break;
+
+        case XML2BIN_NAME:
+            if ((state != XML2BIN_VM_S) && (state != XML2BIN_RES_S))
+            {
+                printf("ERROR: >name< out of VM/RES context.\n");
+                exit(EXIT_FAILURE);
+            }
+            current_ssid_name = (char *)
+                xmlNodeListGetString(doc, cur_node->xmlChildrenNode, 1);
+
+            if (!current_ssid_name)
+            {
+                printf("ERROR: empty >name<!\n");
+                exit(EXIT_FAILURE);
+            }
+            break;
+
+        case XML2BIN_TEXT:
+        case XML2BIN_COMMENT:
+        case XML2BIN_LABELHEADER:
+            break;
+
+        default:
+            printf("Unkonwn token Error (%d)\n", code);
+            exit(EXIT_FAILURE);
+        }
+
+    }
+    return;
+}
+
+/* this function walks through a ssid queue
+ * and transforms double entries into references
+ * of the first definition (we need to keep the
+ * entry to map labels but we don't want double
+ * ssids in the binary policy
+ */
+void
+remove_doubles(struct tailhead_ssid *head,
+                        u_int32_t max_types, u_int32_t * max_ssids)
+{
+    struct ssid_entry *np, *ni;
+
+    /* walk once through the list */
+    for (np = head->tqh_first; np != NULL; np = np->entries.tqe_next)
+    {
+        /* now search from the start until np for the same entry */
+        for (ni = head->tqh_first; ni != np; ni = ni->entries.tqe_next)
+        {
+            if (ni->is_ref)
+                continue;
+            if (memcmp(np->row, ni->row, max_types))
+                continue;
+            /* found one, set np reference to ni */
+            np->is_ref = 1;
+            np->num = ni->num;
+            (*max_ssids)--;
+        }
+    }
+
+    /* now minimize the ssid numbers used (doubles introduce holes) */
+    (*max_ssids) = 0; /* reset */
+
+    for (np = head->tqh_first; np != NULL; np = np->entries.tqe_next)
+    {
+        if (np->is_ref)
+            continue;
+
+        if (np->num != (*max_ssids)) {
+                /* first reset all later references to the new max_ssid */
+                for (ni = np->entries.tqe_next; ni != NULL; ni = 
ni->entries.tqe_next)
+                {
+                    if (ni->num == np->num)
+                        ni->num = (*max_ssids);
+                }
+                /* now reset num */
+                np->num = (*max_ssids)++;
+        }
+        else
+            (*max_ssids)++;
+    }
+}
+
+/*
+ * will go away as soon as we have non-static bootstrap ssidref for dom0
+ */
+void fixup_bootstrap_label(struct tailhead_ssid *head,
+                         u_int32_t max_types, u_int32_t * max_ssids)
+{
+    struct ssid_entry *np;
+    int i;
+
+    /* should not happen if xml / xsd checks work */
+    if (!bootstrap_label)
+    {
+        printf("ERROR: No bootstrap label defined.\n");
+        exit(EXIT_FAILURE);
+    }
+
+    /* search bootstrap_label */
+    for (np = head->tqh_first; np != NULL; np = np->entries.tqe_next)
+    {
+        if (!strcmp(np->name, bootstrap_label))
+        {
+            break;
+        }
+    }
+
+    if (!np) {
+        /* bootstrap label not found */
+        printf("ERROR: Bootstrap label >%s< not found.\n", bootstrap_label);
+        exit(EXIT_FAILURE);
+    }
+
+    /* move this entry ahead in the list right after the default entry so it
+     * receives ssidref 1/1 */
+    TAILQ_REMOVE(head, np, entries);
+    TAILQ_INSERT_AFTER(head, head->tqh_first, np, entries);
+
+    /* renumber the ssids (we could also just switch places with 1st element) 
*/
+    for (np = head->tqh_first, i=0; np != NULL; np = np->entries.tqe_next, i++)
+        np->num   = i;
+
+}
+
+int create_ssid_mapping(xmlDocPtr doc)
+{
+    xmlNode *root_element = xmlDocGetRootElement(doc);
+    struct ssid_entry *np;
+    int i;
+
+    printf("Creating label mappings ...\n");
+    /* initialize the ste and chwall type lists */
+    TAILQ_INIT(&chwall_ssid_head);
+    TAILQ_INIT(&ste_ssid_head);
+
+    /* init with default ssids */
+    if (init_ssid_queues())
+    {
+        printf("ERROR adding default ssids.\n");
+        exit(EXIT_FAILURE);
+    }
+
+    /* now walk the template DOM tree and fill in ssids */
+    walk_labels(root_element, doc, XML2BIN_NULL);
+
+    /*
+     * now sort bootstrap label to the head of the list
+     * (for now), dom0 assumes its label in the first
+     * defined ssidref (1/1). 0/0 is the default non-Label
+     */
+    if (have_chwall)
+        fixup_bootstrap_label(&chwall_ssid_head, max_chwall_types,
+                                &max_chwall_ssids);
+    if (have_ste)
+        fixup_bootstrap_label(&ste_ssid_head, max_ste_types,
+                                &max_ste_ssids);
+
+    /* remove any double entries (insert reference instead) */
+    if (have_chwall)
+        remove_doubles(&chwall_ssid_head, max_chwall_types,
+                       &max_chwall_ssids);
+    if (have_ste)
+        remove_doubles(&ste_ssid_head, max_ste_types,
+                       &max_ste_ssids);
+
+    if (!DEBUG)
+        return 0;
+
+    /* print queues */
+    if (have_chwall)
+    {
+        printf("CHWALL SSID queue (max ssidrefs=%d):\n", max_chwall_ssids);
+        np = NULL;
+        for (np = chwall_ssid_head.tqh_first; np != NULL;
+             np = np->entries.tqe_next)
+        {
+            printf("SSID #%02u (Label=%s)\n", np->num, np->name);
+            if (np->is_ref)
+                printf("REFERENCE");
+            else
+                for (i = 0; i < max_chwall_types; i++)
+                    if (np->row[i])
+                        printf("#%02d ", i);
+            printf("\n\n");
+        }
+    }
+    if (have_ste)
+    {
+        printf("STE SSID queue (max ssidrefs=%d):\n", max_ste_ssids);
+        np = NULL;
+        for (np = ste_ssid_head.tqh_first; np != NULL;
+             np = np->entries.tqe_next)
+        {
+            printf("SSID #%02u (Label=%s)\n", np->num, np->name);
+            if (np->is_ref)
+                printf("REFERENCE");
+            else
+                for (i = 0; i < max_ste_types; i++)
+                    if (np->row[i])
+                        printf("#%02d ", i);
+            printf("\n\n");
+        }
+    }
+    return 0;
+}
+
+/***************** writing the binary policy *********************/
+
+/*
+ * the mapping file is ascii-based since it will likely be used from
+ * within scripts (using awk, grep, etc.);
+ *
+ * We print from high-level to low-level information so that with one
+ * pass, any symbol can be resolved (e.g. Label -> types)
+ */
+int write_mapping(char *filename)
+{
+
+    struct ssid_entry *e;
+    struct type_entry *t;
+    int i;
+    FILE *file;
+
+    if ((file = fopen(filename, "w")) == NULL)
+        return -EIO;
+
+    fprintf(file, "MAGIC                  %08x\n", ACM_MAGIC);
+    fprintf(file, "POLICY                 %s\n",
+            basename(policy_filename));
+    fprintf(file, "BINARY                 %s\n",
+            basename(binary_filename));
+    if (have_chwall)
+    {
+        fprintf(file, "MAX-CHWALL-TYPES       %08x\n", max_chwall_types);
+        fprintf(file, "MAX-CHWALL-SSIDS       %08x\n", max_chwall_ssids);
+        fprintf(file, "MAX-CHWALL-LABELS      %08x\n", max_chwall_labels);
+    }
+    if (have_ste)
+    {
+        fprintf(file, "MAX-STE-TYPES          %08x\n", max_ste_types);
+        fprintf(file, "MAX-STE-SSIDS          %08x\n", max_ste_ssids);
+        fprintf(file, "MAX-STE-LABELS         %08x\n", max_ste_labels);
+    }
+    fprintf(file, "\n");
+
+    /* primary / secondary order for combined ssid synthesis/analysis
+     * if no primary is named, then chwall is primary */
+    switch (primary) {
+    case CHWALL:
+        fprintf(file, "PRIMARY                CHWALL\n");
+        break;
+
+    case STE:
+        fprintf(file, "PRIMARY                STE\n");
+        break;
+
+    default:
+        fprintf(file, "PRIMARY                NULL\n");
+        break;
+    }
+
+    switch (secondary) {
+    case CHWALL:
+        fprintf(file, "SECONDARY              CHWALL\n");
+        break;
+
+    case STE:
+        fprintf(file, "SECONDARY              STE\n");
+        break;
+
+    default:
+        fprintf(file, "SECONDARY              NULL\n");
+        break;
+    }
+    fprintf(file, "\n");
+
+    /* first labels to ssid mappings */
+    if (have_chwall)
+    {
+        for (e = chwall_ssid_head.tqh_first; e != NULL;
+             e = e->entries.tqe_next)
+        {
+            fprintf(file, "LABEL->SSID %s CHWALL %-25s %8x\n",
+                    (e->type ==
+                     VM) ? "VM " : ((e->type == RES) ? "RES" : "ANY"),
+                    e->name, e->num);
+        }
+        fprintf(file, "\n");
+    }
+    if (have_ste)
+    {
+        for (e = ste_ssid_head.tqh_first; e != NULL;
+             e = e->entries.tqe_next)
+        {
+            fprintf(file, "LABEL->SSID %s STE    %-25s %8x\n",
+                    (e->type ==
+                     VM) ? "VM " : ((e->type == RES) ? "RES" : "ANY"),
+                    e->name, e->num);
+        }
+        fprintf(file, "\n");
+    }
+
+    /* second ssid to type mappings */
+    if (have_chwall)
+    {
+        for (e = chwall_ssid_head.tqh_first; e != NULL;
+             e = e->entries.tqe_next)
+        {
+            if (e->is_ref)
+                continue;
+
+            fprintf(file, "SSID->TYPE CHWALL      %08x", e->num);
+
+            for (i = 0; i < max_chwall_types; i++)
+                if (e->row[i])
+                    fprintf(file, " %s", type_by_mapping(&chwall_head, i));
+
+            fprintf(file, "\n");
+        }
+        fprintf(file, "\n");
+    }
+    if (have_ste) {
+        for (e = ste_ssid_head.tqh_first; e != NULL;
+             e = e->entries.tqe_next)
+        {
+            if (e->is_ref)
+                continue;
+
+            fprintf(file, "SSID->TYPE STE         %08x", e->num);
+
+            for (i = 0; i < max_ste_types; i++)
+                if (e->row[i])
+                    fprintf(file, " %s", type_by_mapping(&ste_head, i));
+
+            fprintf(file, "\n");
+        }
+        fprintf(file, "\n");
+    }
+    /* third type mappings */
+    if (have_chwall)
+    {
+        for (t = chwall_head.tqh_first; t != NULL; t = t->entries.tqe_next)
+        {
+            fprintf(file, "TYPE CHWALL            %-25s %8x\n",
+                    t->name, t->mapping);
+        }
+        fprintf(file, "\n");
+    }
+    if (have_ste) {
+        for (t = ste_head.tqh_first; t != NULL; t = t->entries.tqe_next)
+        {
+            fprintf(file, "TYPE STE               %-25s %8x\n",
+                    t->name, t->mapping);
+        }
+        fprintf(file, "\n");
+    }
+    fclose(file);
+    return 0;
+}
+
+unsigned char *write_chwall_binary(u_int32_t * len_chwall)
+{
+    unsigned char *buf, *ptr;
+    struct acm_chwall_policy_buffer *chwall_header;
+    u_int32_t len;
+    struct ssid_entry *e;
+    int i;
+
+    if (!have_chwall)
+        return NULL;
+
+    len = sizeof(struct acm_chwall_policy_buffer) +
+        sizeof(type_t) * max_chwall_types * max_chwall_ssids +
+        sizeof(type_t) * max_chwall_types * max_conflictsets;
+
+    buf = malloc(len);
+    ptr = buf;
+
+    if (!buf)
+    {
+        printf("ERROR: out of memory allocating chwall buffer.\n");
+        exit(EXIT_FAILURE);
+    }
+    /* chwall has 3 parts : header, types, conflictsets */
+
+    chwall_header = (struct acm_chwall_policy_buffer *) buf;
+    chwall_header->chwall_max_types = htonl(max_chwall_types);
+    chwall_header->chwall_max_ssidrefs = htonl(max_chwall_ssids);
+    chwall_header->policy_code = htonl(ACM_CHINESE_WALL_POLICY);
+    chwall_header->policy_version = htonl(ACM_CHWALL_VERSION);
+    chwall_header->chwall_ssid_offset =
+        htonl(sizeof(struct acm_chwall_policy_buffer));
+    chwall_header->chwall_max_conflictsets = htonl(max_conflictsets);
+    chwall_header->chwall_conflict_sets_offset =
+        htonl(ntohl(chwall_header->chwall_ssid_offset) +
+              sizeof(domaintype_t) * max_chwall_ssids * max_chwall_types);
+    chwall_header->chwall_running_types_offset = 0;     /* not set, only 
retrieved */
+    chwall_header->chwall_conflict_aggregate_offset = 0;        /* not set, 
only retrieved */
+    ptr += sizeof(struct acm_chwall_policy_buffer);
+
+    /* types */
+    for (e = chwall_ssid_head.tqh_first; e != NULL;
+         e = e->entries.tqe_next)
+    {
+        if (e->is_ref)
+            continue;
+
+        for (i = 0; i < max_chwall_types; i++)
+            ((type_t *) ptr)[i] = htons((type_t) e->row[i]);
+
+        ptr += sizeof(type_t) * max_chwall_types;
+    }
+
+    /* conflictsets */
+    for (e = conflictsets_head.tqh_first; e != NULL;
+         e = e->entries.tqe_next)
+    {
+        for (i = 0; i < max_chwall_types; i++)
+            ((type_t *) ptr)[i] = htons((type_t) e->row[i]);
+
+        ptr += sizeof(type_t) * max_chwall_types;
+    }
+
+    if ((ptr - buf) != len)
+    {
+        printf("ERROR: wrong lengths in %s.\n", __func__);
+        exit(EXIT_FAILURE);
+    }
+
+    (*len_chwall) = len;
+    return buf;
+}
+
+unsigned char *write_ste_binary(u_int32_t * len_ste)
+{
+    unsigned char *buf, *ptr;
+    struct acm_ste_policy_buffer *ste_header;
+    struct ssid_entry *e;
+    u_int32_t len;
+    int i;
+
+    if (!have_ste)
+        return NULL;
+
+    len = sizeof(struct acm_ste_policy_buffer) +
+        sizeof(type_t) * max_ste_types * max_ste_ssids;
+
+    buf = malloc(len);
+    ptr = buf;
+
+    if (!buf)
+    {
+        printf("ERROR: out of memory allocating chwall buffer.\n");
+        exit(EXIT_FAILURE);
+    }
+
+    /* fill buffer */
+    ste_header = (struct acm_ste_policy_buffer *) buf;
+    ste_header->policy_version = htonl(ACM_STE_VERSION);
+    ste_header->policy_code = htonl(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY);
+    ste_header->ste_max_types = htonl(max_ste_types);
+    ste_header->ste_max_ssidrefs = htonl(max_ste_ssids);
+    ste_header->ste_ssid_offset =
+        htonl(sizeof(struct acm_ste_policy_buffer));
+
+    ptr += sizeof(struct acm_ste_policy_buffer);
+
+    /* types */
+    for (e = ste_ssid_head.tqh_first; e != NULL; e = e->entries.tqe_next)
+    {
+        if (e->is_ref)
+            continue;
+
+        for (i = 0; i < max_ste_types; i++)
+            ((type_t *) ptr)[i] = htons((type_t) e->row[i]);
+
+        ptr += sizeof(type_t) * max_ste_types;
+    }
+
+    if ((ptr - buf) != len)
+    {
+        printf("ERROR: wrong lengths in %s.\n", __func__);
+        exit(EXIT_FAILURE);
+    }
+    (*len_ste) = len;
+    return buf;                 /* for now */
+}
+
+int write_binary(char *filename)
+{
+    struct acm_policy_buffer header;
+    unsigned char *ste_buffer = NULL, *chwall_buffer = NULL;
+    u_int32_t len;
+    int fd;
+
+    u_int32_t len_ste = 0, len_chwall = 0;      /* length of policy components 
*/
+
+    /* open binary file */
+    if ((fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR)) 
<= 0)
+        return -EIO;
+
+    ste_buffer = write_ste_binary(&len_ste);
+    chwall_buffer = write_chwall_binary(&len_chwall);
+
+    /* determine primary component (default chwall) */
+    header.policy_version = htonl(ACM_POLICY_VERSION);
+    header.magic = htonl(ACM_MAGIC);
+
+    len = sizeof(struct acm_policy_buffer);
+    if (have_chwall)
+        len += len_chwall;
+    if (have_ste)
+        len += len_ste;
+    header.len = htonl(len);
+
+    header.primary_buffer_offset = htonl(sizeof(struct acm_policy_buffer));
+    if (primary == CHWALL)
+    {
+        header.primary_policy_code = htonl(ACM_CHINESE_WALL_POLICY);
+        header.secondary_buffer_offset =
+            htonl((sizeof(struct acm_policy_buffer)) + len_chwall);
+    }
+    else if (primary == STE)
+    {
+        header.primary_policy_code =
+            htonl(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY);
+        header.secondary_buffer_offset =
+            htonl((sizeof(struct acm_policy_buffer)) + len_ste);
+    }
+    else
+    {
+        /* null policy */
+        header.primary_policy_code = htonl(ACM_NULL_POLICY);
+        header.secondary_buffer_offset =
+            htonl(header.primary_buffer_offset);
+    }
+
+    if (secondary == CHWALL)
+        header.secondary_policy_code = htonl(ACM_CHINESE_WALL_POLICY);
+    else if (secondary == STE)
+        header.secondary_policy_code =
+            htonl(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY);
+    else
+        header.secondary_policy_code = htonl(ACM_NULL_POLICY);
+
+    if (write(fd, (void *) &header, sizeof(struct acm_policy_buffer))
+        != sizeof(struct acm_policy_buffer))
+        return -EIO;
+
+    /* write primary policy component */
+    if (primary == CHWALL)
+    {
+        if (write(fd, chwall_buffer, len_chwall) != len_chwall)
+            return -EIO;
+    }
+    else if (primary == STE)
+    {
+        if (write(fd, ste_buffer, len_ste) != len_ste)
+            return -EIO;
+    } else
+        ;                     /* NULL POLICY has no policy data */
+
+    /* write secondary policy component */
+    if (secondary == CHWALL)
+    {
+        if (write(fd, chwall_buffer, len_chwall) != len_chwall)
+            return -EIO;
+    }
+    else if (secondary == STE)
+    {
+        if (write(fd, ste_buffer, len_ste) != len_ste)
+            return -EIO;
+    } else;                     /* NULL POLICY has no policy data */
+
+    close(fd);
+    return 0;
+}
+
+int is_valid(xmlDocPtr doc)
+{
+    int err = 0;
+    xmlSchemaPtr schema_ctxt = NULL;
+    xmlSchemaParserCtxtPtr schemaparser_ctxt = NULL;
+    xmlSchemaValidCtxtPtr schemavalid_ctxt = NULL;
+
+    schemaparser_ctxt = xmlSchemaNewParserCtxt(SCHEMA_FILENAME);
+    schema_ctxt = xmlSchemaParse(schemaparser_ctxt);
+    schemavalid_ctxt = xmlSchemaNewValidCtxt(schema_ctxt);
+
+#ifdef VALIDATE_SCHEMA
+    /* only tested to be available from libxml2-2.6.20 upwards */
+    if ((err = xmlSchemaIsValid(schemavalid_ctxt)) != 1)
+    {
+        printf("ERROR: Invalid schema file %s (err=%d)\n",
+               SCHEMA_FILENAME, err);
+        err = -EIO;
+        goto out;
+    }
+    else
+        printf("XML Schema %s valid.\n", SCHEMA_FILENAME);
+#endif
+    if ((err = xmlSchemaValidateDoc(schemavalid_ctxt, doc)))
+    {
+        err = -EIO;
+        goto out;
+    }
+  out:
+    xmlSchemaFreeValidCtxt(schemavalid_ctxt);
+    xmlSchemaFreeParserCtxt(schemaparser_ctxt);
+    xmlSchemaFree(schema_ctxt);
+    return (err != 0) ? 0 : 1;
+}
+
+int main(int argc, char **argv)
+{
+    xmlDocPtr labeldoc = NULL;
+    xmlDocPtr policydoc = NULL;
+
+    int err = EXIT_SUCCESS;
+
+    char *file_prefix;
+    int prefix_len;
+
+    if (ACM_POLICY_VERSION != WRITTEN_AGAINST_ACM_POLICY_VERSION)
+    {
+        printf("ERROR: This program was written against an older ACM 
version.\n");
+        exit(EXIT_FAILURE);
+    }
+
+    if (argc != 2)
+        usage(basename(argv[0]));
+
+    prefix_len = strlen(POLICY_SUBDIR) +
+        strlen(argv[1]) + 1 /* "/" */  +
+        strlen(argv[1]) + 1 /* "/" */ ;
+
+    file_prefix = malloc(prefix_len);
+    policy_filename = malloc(prefix_len + strlen(POLICY_EXTENSION));
+    label_filename = malloc(prefix_len + strlen(LABEL_EXTENSION));
+    binary_filename = malloc(prefix_len + strlen(BINARY_EXTENSION));
+    mapping_filename = malloc(prefix_len + strlen(MAPPING_EXTENSION));
+
+    if (!file_prefix || !policy_filename || !label_filename ||
+        !binary_filename || !mapping_filename)
+    {
+        printf("ERROR allocating file name memory.\n");
+        goto out2;
+    }
+
+    /* create input/output filenames out of prefix */
+    strcat(file_prefix, POLICY_SUBDIR);
+    strcat(file_prefix, argv[1]);
+    strcat(file_prefix, "/");
+    strcat(file_prefix, argv[1]);
+
+    strcpy(policy_filename, file_prefix);
+    strcpy(label_filename, file_prefix);
+    strcpy(binary_filename, file_prefix);
+    strcpy(mapping_filename, file_prefix);
+
+    strcat(policy_filename, POLICY_EXTENSION);
+    strcat(label_filename, LABEL_EXTENSION);
+    strcat(binary_filename, BINARY_EXTENSION);
+    strcat(mapping_filename, MAPPING_EXTENSION);
+
+    labeldoc = xmlParseFile(label_filename);
+
+    if (labeldoc == NULL)
+    {
+        printf("Error: could not parse file %s.\n", argv[1]);
+        goto out2;
+    }
+
+    printf("Validating label file %s...\n", label_filename);
+    if (!is_valid(labeldoc))
+    {
+        printf("ERROR: Failed schema-validation for file %s (err=%d)\n",
+               label_filename, err);
+        goto out1;
+    }
+
+    policydoc = xmlParseFile(policy_filename);
+
+    if (policydoc == NULL)
+    {
+        printf("Error: could not parse file %s.\n", argv[1]);
+        goto out1;
+    }
+
+    printf("Validating policy file %s...\n", policy_filename);
+
+    if (!is_valid(policydoc))
+    {
+        printf("ERROR: Failed schema-validation for file %s (err=%d)\n",
+               policy_filename, err);
+        goto out;
+    }
+
+    /* Init queues and parse policy */
+    create_type_mapping(policydoc);
+
+    /* create ssids */
+    create_ssid_mapping(labeldoc);
+
+    /* write label mapping file */
+    if (write_mapping(mapping_filename))
+    {
+        printf("ERROR: writing mapping file %s.\n", mapping_filename);
+        goto out;
+    }
+
+    /* write binary file */
+    if (write_binary(binary_filename))
+    {
+        printf("ERROR: writing binary file %s.\n", binary_filename);
+        goto out;
+    }
+
+    /* write stats */
+    if (have_chwall)
+    {
+        printf("Max chwall labels:  %u\n", max_chwall_labels);
+        printf("Max chwall-types:   %u\n", max_chwall_types);
+        printf("Max chwall-ssids:   %u\n", max_chwall_ssids);
+    }
+
+    if (have_ste)
+    {
+        printf("Max ste labels:     %u\n", max_ste_labels);
+        printf("Max ste-types:      %u\n", max_ste_types);
+        printf("Max ste-ssids:      %u\n", max_ste_ssids);
+    }
+    /* cleanup */
+  out:
+    xmlFreeDoc(policydoc);
+  out1:
+    xmlFreeDoc(labeldoc);
+  out2:
+    xmlCleanupParser();
+    return err;
+}
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/secpol_xml2bin.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/secpol_xml2bin.h   Thu Aug 25 22:53:20 2005
@@ -0,0 +1,139 @@
+/****************************************************************
+ * secpol_xml2bin.h
+ *
+ * Copyright (C) 2005 IBM Corporation
+ *
+ * Authors:
+ * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ */
+#define POLICY_SUBDIR       "policies/"
+#define POLICY_EXTENSION    "-security_policy.xml"
+#define LABEL_EXTENSION     "-security_label_template.xml"
+#define BINARY_EXTENSION    ".bin"
+#define MAPPING_EXTENSION   ".map"
+#define PRIMARY_COMPONENT_ATTR_NAME "order"
+#define BOOTSTRAP_LABEL_ATTR_NAME   "bootstrap"
+#define PRIMARY_COMPONENT   "PrimaryPolicyComponent"
+#define SCHEMA_FILENAME     "policies/security_policy.xsd"
+
+/* basic states (used as 1 << X) */
+#define XML2BIN_SECPOL             0   /* policy tokens */
+#define XML2BIN_STE                    1
+#define XML2BIN_CHWALL          2
+#define XML2BIN_CONFLICTSETS           3
+#define XML2BIN_CSTYPE         4
+
+#define XML2BIN_SECTEMPLATE        5   /* label tokens */
+#define XML2BIN_POLICYHEADER           6
+#define XML2BIN_LABELHEADER     7
+#define XML2BIN_SUBJECTS        8
+#define XML2BIN_OBJECTS            9
+#define XML2BIN_VM                 10
+#define XML2BIN_RES            11
+
+#define XML2BIN_STETYPES           12  /* shared tokens */
+#define XML2BIN_CHWALLTYPES        13
+#define XML2BIN_TYPE               14
+#define XML2BIN_NAME            15
+#define XML2BIN_TEXT               16
+#define XML2BIN_COMMENT                17
+
+/* type "data type" (currently 16bit) */
+typedef u_int16_t type_t;
+
+/* list of known elements and token equivalent  *
+ * state constants and token positions must be  *
+ * in sync for correct state recognition        */
+
+char *token[20] =                       /* parser triggers */
+{
+    [0] = "SecurityPolicyDefinition",   /* policy xml */
+    [1] = "SimpleTypeEnforcement",
+    [2] = "ChineseWall",
+    [3] = "ConflictSets",
+    [4] = "Conflict",                   /* label-template xml */
+    [5] = "SecurityLabelTemplate",
+    [6] = "PolicyHeader",
+    [7] = "LabelHeader",
+    [8] = "SubjectLabels",
+    [9] = "ObjectLabels",
+    [10] = "VirtualMachineLabel",
+    [11] = "ResourceLabel",
+    [12] = "SimpleTypeEnforcementTypes",                  /* common tags */
+    [13] = "ChineseWallTypes",
+    [14] = "Type",
+    [15] = "Name",
+    [16] = "text",
+    [17] = "comment",
+    [18] = NULL,
+};
+
+/* important combined states */
+#define XML2BIN_NULL           0
+
+/* policy xml parsing states _S */
+
+/* e.g., here we are in a <secpol,ste,stetypes> environment,  *
+ * so when finding a type element, we know where to put it    */
+#define XML2BIN_stetype_S ((1 << XML2BIN_SECPOL) | \
+                                (1 << XML2BIN_STE) |    \
+                                (1 << XML2BIN_STETYPES))
+
+#define XML2BIN_chwalltype_S ((1 << XML2BIN_SECPOL) | \
+                                (1 << XML2BIN_CHWALL) | \
+                                (1 << XML2BIN_CHWALLTYPES))
+
+#define XML2BIN_conflictset_S ((1 << XML2BIN_SECPOL) | \
+                                (1 << XML2BIN_CHWALL) | \
+                                (1 << XML2BIN_CONFLICTSETS))
+
+#define XML2BIN_conflictsettype_S ((1 << XML2BIN_SECPOL) | \
+                                (1 << XML2BIN_CHWALL) | \
+                                (1 << XML2BIN_CONFLICTSETS) | \
+                                (1 << XML2BIN_CSTYPE))
+
+
+/* label xml states */
+#define XML2BIN_VM_S ((1 << XML2BIN_SECTEMPLATE) | \
+                      (1 << XML2BIN_SUBJECTS) |    \
+                      (1 << XML2BIN_VM))
+
+#define XML2BIN_RES_S ((1 << XML2BIN_SECTEMPLATE) | \
+                       (1 << XML2BIN_OBJECTS) |     \
+                       (1 << XML2BIN_RES))
+
+#define XML2BIN_VM_STE_S ((1 << XML2BIN_SECTEMPLATE) | \
+                        (1 << XML2BIN_SUBJECTS) | \
+                        (1 << XML2BIN_VM) | \
+                        (1 << XML2BIN_STETYPES))
+
+#define XML2BIN_VM_CHWALL_S ((1 << XML2BIN_SECTEMPLATE) | \
+                           (1 << XML2BIN_SUBJECTS) | \
+                           (1 << XML2BIN_VM) | \
+                           (1 << XML2BIN_CHWALLTYPES))
+
+#define XML2BIN_RES_STE_S ((1 << XML2BIN_SECTEMPLATE) | \
+                         (1 << XML2BIN_OBJECTS) | \
+                         (1 << XML2BIN_RES) | \
+                         (1 << XML2BIN_STETYPES))
+
+
+
+/* check versions of headers against which the
+ * xml2bin translation tool was written
+ */
+
+/* protects from unnoticed changes in struct acm_policy_buffer */
+#define WRITTEN_AGAINST_ACM_POLICY_VERSION  1
+
+/* protects from unnoticed changes in struct acm_chwall_policy_buffer */
+#define WRITTEN_AGAINST_ACM_CHWALL_VERSION  1
+
+/* protects from unnoticed changes in struct acm_ste_policy_buffer */
+#define WRITTEN_AGAINST_ACM_STE_VERSION     1
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/setlabel.sh
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/setlabel.sh        Thu Aug 25 22:53:20 2005
@@ -0,0 +1,345 @@
+#!/bin/sh
+# *
+# * setlabel
+# *
+# * Copyright (C) 2005 IBM Corporation
+# *
+# * Authors:
+# * Stefan Berger <stefanb@xxxxxxxxxx>
+# *
+# * This program is free software; you can redistribute it and/or
+# * modify it under the terms of the GNU General Public License as
+# * published by the Free Software Foundation, version 2 of the
+# * License.
+# *
+# * 'setlabel' labels virtual machine (domain) configuration files with
+# * security identifiers that can be enforced in Xen.
+# *
+# * 'setlabel -?' shows the usage of the program
+# *
+# * 'setlabel -l vmconfig-file' lists all available labels (only VM
+# *            labels are used right now)
+# *
+# * 'setlabel vmconfig-file security-label map-file' inserts the 'ssidref'
+# *                       that corresponds to the security-label under the
+# *                       current policy (if policy changes, 'label'
+# *                       must be re-run over the configuration files;
+# *                       map-file is created during policy translation and
+# *                       is found in the policy's directory
+#
+
+if [ -z "$runbash" ]; then
+       runbash="1"
+       export runbash
+       exec sh -c "bash $0 $*"
+fi
+
+
+usage ()
+{
+       echo "Usage: $0 [Option] <vmfile> <label> <policy name> "
+       echo "    or $0 -l <policy name>"
+       echo ""
+       echo "Valid Options are:"
+       echo "-r          : to relabel a file without being prompted"
+       echo ""
+       echo "vmfile      : XEN vm configuration file"
+       echo "label       : the label to map"
+       echo "policy name : the name of the policy, i.e. 'chwall'"
+       echo ""
+       echo "-l <policy name> is used to show valid labels in the map file"
+       echo ""
+}
+
+
+findMapFile ()
+{
+       mapfile="./$1.map"
+       if [ -r "$mapfile" ]; then
+               return 1
+       fi
+
+       mapfile="./policies/$1/$1.map"
+       if [ -r "$mapfile" ]; then
+               return 1
+       fi
+
+       return 0
+}
+
+showLabels ()
+{
+       mapfile=$1
+       if [ ! -r "$mapfile" -o "$mapfile" == "" ]; then
+               echo "Cannot read from vm configuration file $vmfile."
+               return -1
+       fi
+
+       getPrimaryPolicy $mapfile
+       getSecondaryPolicy $mapfile
+
+       echo "The following labels are available:"
+       let line=1
+       while [ 1 ]; do
+               ITEM=`cat $mapfile |         \
+                     awk -vline=$line       \
+                         -vprimary=$primary \
+                     '{                     \
+                        if ($1 == "LABEL->SSID" &&  \
+                            $2 == "VM" &&           \
+                            $3 == primary ) {       \
+                          ctr++;                    \
+                          if (ctr == line) {        \
+                            print $4;               \
+                          }                         \
+                        }                           \
+                      } END {                       \
+                      }'`
+
+               if [ "$ITEM" == "" ]; then
+                       break
+               fi
+               if [ "$secondary" != "NULL" ]; then
+                       LABEL=`cat $mapfile |     \
+                              awk -vitem=$ITEM   \
+                              '{
+                                 if ($1 == "LABEL->SSID" && \
+                                     $2 == "VM" &&          \
+                                     $3 == "CHWALL" &&      \
+                                     $4 == item ) {         \
+                                   result = item;           \
+                                 }                          \
+                               } END {                      \
+                                   print result             \
+                               }'`
+               else
+                       LABEL=$ITEM
+               fi
+
+               if [ "$LABEL" != "" ]; then
+                       echo "$LABEL"
+                       found=1
+               fi
+               let line=line+1
+       done
+       if [ "$found" != "1" ]; then
+               echo "No labels found."
+       fi
+}
+
+getPrimaryPolicy ()
+{
+       mapfile=$1
+       primary=`cat $mapfile  |   \
+                awk '             \
+                 {                \
+                   if ( $1 == "PRIMARY" ) { \
+                     res=$2;                \
+                   }                        \
+                 } END {                    \
+                   print res;               \
+                 } '`
+}
+
+getSecondaryPolicy ()
+{
+       mapfile=$1
+       secondary=`cat $mapfile  |   \
+                awk '             \
+                 {                \
+                   if ( $1 == "SECONDARY" ) { \
+                     res=$2;                \
+                   }                        \
+                 } END {                    \
+                   print res;               \
+                 } '`
+}
+
+
+getDefaultSsid ()
+{
+       mapfile=$1
+       pol=$2
+       RES=`cat $mapfile    \
+            awk -vpol=$pol  \
+             {              \
+               if ($1 == "LABEL->SSID" && \
+                   $2 == "ANY"         && \
+                   $3 == pol           && \
+                   $4 == "DEFAULT"       ) {\
+                     res=$5;                \
+               }                            \
+             } END {                        \
+               printf "%04x", strtonum(res) \
+            }'`
+       echo "default NULL mapping is $RES"
+       defaultssid=$RES
+}
+
+relabel ()
+{
+       vmfile=$1
+       label=$2
+       mapfile=$3
+       mode=$4
+
+       if [ ! -r "$vmfile" ]; then
+               echo "Cannot read from vm configuration file $vmfile."
+               return -1
+       fi
+
+       if [ ! -w "$vmfile" ]; then
+               echo "Cannot write to vm configuration file $vmfile."
+               return -1
+       fi
+
+       if [ ! -r "$mapfile" ] ; then
+               echo "Cannot read mapping file $mapfile."
+               return -1
+       fi
+
+       # Determine which policy is primary, which sec.
+       getPrimaryPolicy $mapfile
+       getSecondaryPolicy $mapfile
+
+       # Calculate the primary policy's SSIDREF
+       if [ "$primary" == "NULL" ]; then
+               SSIDLO="0000"
+       else
+               SSIDLO=`cat $mapfile |                    \
+                       awk -vlabel=$label                \
+                           -vprimary=$primary            \
+                          '{                             \
+                             if ( $1 == "LABEL->SSID" && \
+                                  $2 == "VM" &&          \
+                                  $3 == primary  &&      \
+                                  $4 == label ) {        \
+                               result=$5                 \
+                             }                           \
+                          } END {                        \
+                            if (result != "" )           \
+                              {printf "%04x", strtonum(result)}\
+                          }'`
+       fi
+
+       # Calculate the secondary policy's SSIDREF
+       if [ "$secondary" == "NULL" ]; then
+               SSIDHI="0000"
+       else
+               SSIDHI=`cat $mapfile |                    \
+                       awk -vlabel=$label                \
+                           -vsecondary=$secondary        \
+                          '{                             \
+                             if ( $1 == "LABEL->SSID" && \
+                                  $2 == "VM"          && \
+                                  $3 == secondary     && \
+                                  $4 == label ) {        \
+                               result=$5                 \
+                             }                           \
+                           }  END {                      \
+                             if (result != "" )          \
+                               {printf "%04x", strtonum(result)}\
+                           }'`
+       fi
+
+       if [ "$SSIDLO" == "" -o \
+            "$SSIDHI" == "" ]; then
+               echo "Could not map the given label '$label'."
+               return -1
+       fi
+
+       ACM_POLICY=`cat $mapfile |             \
+           awk ' { if ( $1 == "POLICY" ) {    \
+                     result=$2                \
+                   }                          \
+                 }                            \
+                 END {                        \
+                   if (result != "") {        \
+                     printf result            \
+                   }                          \
+                 }'`
+
+       if [ "$ACM_POLICY" == "" ]; then
+               echo "Could not find 'POLICY' entry in map file."
+               return -1
+       fi
+
+       SSIDREF="0x$SSIDHI$SSIDLO"
+
+       if [ "$mode" != "relabel" ]; then
+               RES=`cat $vmfile |  \
+                    awk '{         \
+                      if ( substr($1,0,7) == "ssidref" ) {\
+                        print $0;             \
+                      }                       \
+                    }'`
+               if [ "$RES" != "" ]; then
+                       echo "Do you want to overwrite the existing mapping 
($RES)? (y/N)"
+                       read user
+                       if [ "$user" != "y" -a "$user" != "Y" ]; then
+                               echo "Aborted."
+                               return 0
+                       fi
+               fi
+       fi
+
+       #Write the output
+       vmtmp1="/tmp/__setlabel.tmp1"
+       vmtmp2="/tmp/__setlabel.tmp2"
+       touch $vmtmp1
+       touch $vmtmp2
+       if [ ! -w "$vmtmp1" -o ! -w "$vmtmp2" ]; then
+               echo "Cannot create temporary files. Aborting."
+               return -1
+       fi
+       RES=`sed -e '/^#ACM_POLICY/d' $vmfile > $vmtmp1`
+       RES=`sed -e '/^#ACM_LABEL/d' $vmtmp1 > $vmtmp2`
+       RES=`sed -e '/^ssidref/d' $vmtmp2 > $vmtmp1`
+       echo "#ACM_POLICY=$ACM_POLICY" >> $vmtmp1
+       echo "#ACM_LABEL=$label" >> $vmtmp1
+       echo "ssidref = $SSIDREF" >> $vmtmp1
+       mv -f $vmtmp1 $vmfile
+       rm -rf $vmtmp1 $vmtmp2
+       echo "Mapped label '$label' to ssidref '$SSIDREF'."
+}
+
+
+
+if [ "$1" == "-r" ]; then
+       mode="relabel"
+       shift
+elif [ "$1" == "-l" ]; then
+       mode="show"
+       shift
+elif [ "$1" == "-?" ]; then
+       mode="usage"
+fi
+
+if [ "$mode" == "show" ]; then
+       if [ "$1" == "" ]; then
+               usage
+               exit -1;
+       fi
+       findMapFile $1
+       res=$?
+       if [ "$res" != "0" ]; then
+               showLabels $mapfile
+       else
+               echo "Could not find map file for policy '$1'."
+       fi
+elif [ "$mode" == "usage" ]; then
+       usage
+else
+       if [ "$3" == "" ]; then
+               usage
+               exit -1;
+       fi
+       findMapFile $3
+       res=$?
+       if [ "$res" != "0" ]; then
+               relabel $1 $2 $mapfile $mode
+       else
+               echo "Could not find map file for policy '$3'."
+       fi
+
+fi
diff -r 5f1ed597f107 -r 8799d14bef77 tools/security/updategrub.sh
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/security/updategrub.sh      Thu Aug 25 22:53:20 2005
@@ -0,0 +1,171 @@
+#!/bin/sh
+# *
+# * updategrub
+# *
+# * Copyright (C) 2005 IBM Corporation
+# *
+# * Authors:
+# * Stefan Berger <stefanb@xxxxxxxxxx>
+# *
+# * This program is free software; you can redistribute it and/or
+# * modify it under the terms of the GNU General Public License as
+# * published by the Free Software Foundation, version 2 of the
+# * License.
+# *
+# *
+#
+
+if [ -z "$runbash" ]; then
+       runbash="1"
+       export runbash
+       exec sh -c "bash $0 $*"
+       exit
+fi
+
+
+# Show usage of this program
+usage ()
+{
+       echo "Usage: $0 <policy name> <root of xen repository>"
+       echo ""
+       echo "<policy name>             : The name of the policy, i.e. xen_null"
+       echo "<root of xen repository>  : The root of the XEN repositrory."
+       echo ""
+}
+
+# This function sets the global variable 'linux'
+# to the name of the linux kernel that was compiled
+# For now a pattern should do the trick
+getLinuxVersion ()
+{
+       path=$1
+       linux=""
+       for f in $path/linux-*-xen0 ; do
+               versionfile=$f/include/linux/version.h
+               if [ -r $versionfile ]; then
+                       lnx=`cat $versionfile | \
+                            grep UTS_RELEASE | \
+                            awk '{             \
+                              len=length($3);  \
+                              print substr($3,2,len-2) }'`
+               fi
+               if [ "$lnx" != "" ]; then
+                       linux="[./0-9a-zA-z]*$lnx"
+                       return;
+               fi
+       done
+
+       #Last resort.
+       linux="vmlinuz-2.[45678].[0-9]*[.0-9]*-xen0$"
+}
+
+#Return where the grub.conf file is.
+#I only know of one place it can be.
+findGrubConf()
+{
+       grubconf="/boot/grub/grub.conf"
+       if [ -w $grubconf ]; then
+               return 1
+       fi
+       return 0
+}
+
+
+#Update the grub configuration file.
+#Search for existing entries and replace the current
+#policy entry with the policy passed to this script
+#
+#Arguments passed to this function
+# 1st : the grub configuration file
+# 2nd : the binary policy file name
+# 3rd : the name or pattern of the linux kernel name to match
+#
+# The algorithm here is based on pattern matching
+# and is working correctly if
+# - under a title a line beginning with 'kernel' is found
+#   whose following item ends with "xen.gz"
+#   Example:  kernel /xen.gz dom0_mem=....
+# - a module line matching the 3rd parameter is found
+#
+updateGrub ()
+{
+       grubconf=$1
+       policyfile=$2
+       linux=$3
+
+       tmpfile="/tmp/new_grub.conf"
+
+       cat $grubconf |                                \
+                awk -vpolicy=$policyfile              \
+                    -vlinux=$linux '{                 \
+                  if ( $1 == "title" ) {              \
+                    kernelfound = 0;                  \
+                    if ( policymaycome == 1 ){        \
+                      printf ("\tmodule %s%s\n", path, policy);      \
+                    }                                 \
+                    policymaycome = 0;                \
+                  }                                   \
+                  else if ( $1 == "kernel" ) {        \
+                    if ( match($2,"xen.gz$") ) {      \
+                      path=substr($2,1,RSTART-1);     \
+                      kernelfound = 1;                \
+                    }                                 \
+                  }                                   \
+                  else if ( $1 == "module" &&         \
+                            kernelfound == 1 &&       \
+                            match($2,linux) ) {       \
+                     policymaycome = 1;               \
+                  }                                   \
+                  else if ( $1 == "module" &&         \
+                            kernelfound == 1 &&       \
+                            policymaycome == 1 &&     \
+                            match($2,"[0-9a-zA-Z]*.bin$") ) { \
+                     printf ("\tmodule %s%s\n", path, policy); \
+                     policymaycome = 0;               \
+                     kernelfound = 0;                 \
+                     dontprint = 1;                   \
+                  }                                   \
+                  else if ( $1 == "" &&               \
+                            kernelfound == 1 &&       \
+                            policymaycome == 1) {     \
+                     dontprint = 1;                   \
+                  }                                   \
+                  if (dontprint == 0) {               \
+                    printf ("%s\n", $0);              \
+                  }                                   \
+                  dontprint = 0;                      \
+                } END {                               \
+                  if ( policymaycome == 1 ) {         \
+                    printf ("\tmodule %s%s\n", path, policy);  \
+                  }                                   \
+                }' > $tmpfile
+       if [ ! -r $tmpfile ]; then
+               echo "Could not create temporary file! Aborting."
+               exit -1
+       fi
+       mv -f $tmpfile $grubconf
+}
+
+if [ "$1" == "" -o "$2" == "" ]; then
+       usage
+       exit -1
+fi
+
+if [ "$1" == "-?" ]; then
+       usage
+       exit 0
+fi
+
+policy=$1
+policyfile=$policy.bin
+
+getLinuxVersion $2
+
+findGrubConf
+ERR=$?
+if [ $ERR -eq 0 ]; then
+       echo "Could not find grub.conf. Aborting."
+       exit -1
+fi
+
+updateGrub $grubconf $policyfile $linux
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/Makefile
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/Makefile    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,13 @@
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+SUBDIRS :=
+SUBDIRS += libxenstat
+SUBDIRS += xentop
+
+.PHONY: all install clean
+
+all install clean:
+       @set -e; for subdir in $(SUBDIRS); do \
+               $(MAKE) -C $$subdir $@; \
+       done
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/libxenstat/COPYING
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/COPYING  Thu Aug 25 22:53:20 2005
@@ -0,0 +1,510 @@
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+       51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations
+below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it
+becomes a de-facto standard.  To achieve this, non-free programs must
+be allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control
+compilation and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at least
+    three years, to give the same user the materials specified in
+    Subsection 6a, above, for a charge no more than the cost of
+    performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply, and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License
+may add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+                            NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms
+of the ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.
+It is safest to attach them to the start of each source file to most
+effectively convey the exclusion of warranty; and each file should
+have at least the "copyright" line and a pointer to where the full
+notice is found.
+
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or
+your school, if any, to sign a "copyright disclaimer" for the library,
+if necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James
+  Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/libxenstat/Makefile
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/Makefile Thu Aug 25 22:53:20 2005
@@ -0,0 +1,142 @@
+# libxenstat: statistics-collection library for Xen
+# Copyright (C) International Business Machines Corp., 2005
+# Author: Josh Triplett <josht@xxxxxxxxxx>
+# 
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+LINUX_ROOT := $(XEN_ROOT)/linux-2.6-xen-sparse
+
+INSTALL         = install
+INSTALL_PROG    = $(INSTALL) -m0755 -D
+INSTALL_DATA    = $(INSTALL) -m0644 -D
+
+prefix=/usr
+includedir=$(prefix)/include
+libdir=$(prefix)/lib
+
+LDCONFIG=ldconfig
+MAKE_LINK=ln -sf
+
+MAJOR=0
+MINOR=0
+
+LIB=src/libxenstat.a
+SHLIB=src/libxenstat.so.$(MAJOR).$(MINOR)
+SHLIB_LINKS=src/libxenstat.so.$(MAJOR) src/libxenstat.so
+OBJECTS=src/xenstat.o src/xen-interface.o
+SONAME_FLAGS=-Wl,-soname -Wl,libxenstat.so.$(MAJOR)
+
+WARN_FLAGS=-Wall -Werror
+
+CFLAGS+=-Isrc
+CFLAGS+=-I$(XEN_ROOT)/xen/include/public
+CFLAGS+=-I$(LINUX_ROOT)/include/asm-xen/linux-public/
+LDFLAGS+=-Lsrc
+
+all: $(LIB)
+
+$(LIB): $(OBJECTS)
+       $(AR) rc $@ $^
+       $(RANLIB) $@
+
+$(SHLIB): $(OBJECTS)
+       $(CC) $(LDFLAGS) $(SONAME_FLAGS) -shared -o $@ $(OBJECTS)
+
+src/xenstat.o: src/xenstat.c src/xenstat.h src/xen-interface.h
+       $(CC) $(CFLAGS) $(WARN_FLAGS) -c -o $@ $<
+
+src/xen-interface.o: src/xen-interface.c src/xen-interface.h
+       $(CC) $(CFLAGS) $(WARN_FLAGS) -c -o $@ $<
+
+src/libxenstat.so.$(MAJOR): $(LIB)
+       $(MAKE_LINK) $(<F) $@
+
+src/libxenstat.so: src/libxenstat.so.$(MAJOR)
+       $(MAKE_LINK) $(<F) $@
+
+install: all
+#install: all
+#      $(INSTALL_DATA) src/xenstat.h $(DESTDIR)$(includedir)/xenstat.h
+#      $(INSTALL_PROG) $(LIB) $(DESTDIR)$(libdir)/libxenstat.a
+#      $(INSTALL_PROG) $(SHLIB) \
+#                      $(DESTDIR)$(libdir)/libxenstat.so.$(MAJOR).$(MINOR)
+#      $(MAKE_LINK) libxenstat.so.$(MAJOR).$(MINOR) \
+#                   $(DESTDIR)$(libdir)/libxenstat.so.$(MAJOR)
+#      $(MAKE_LINK) libxenstat.so.$(MAJOR) \
+#                   $(DESTDIR)$(libdir)/libxenstat.so
+#      -$(LDCONFIG)
+
+PYLIB=bindings/swig/python/_xenstat.so
+PYMOD=bindings/swig/python/xenstat.py
+PYSRC=bindings/swig/python/_xenstat.c
+PERLLIB=bindings/swig/perl/xenstat.so
+PERLMOD=bindings/swig/perl/xenstat.pm
+PERLSRC=bindings/swig/perl/xenstat.c
+BINDINGS=$(PYLIB) $(PYMOD) $(PERLLIB) $(PERLMOD)
+BINDINGSRC=$(PYSRC) $(PERLSRC)
+
+# The all-bindings target builds all the language bindings
+all-bindings: perl-bindings python-bindings
+
+# The install-bindings target installs all the language bindings
+install-bindings: install-perl-bindings install-python-bindings
+
+$(BINDINGS): $(SHLIB) $(SHLIB_LINKS) src/xenstat.h
+
+SWIG_FLAGS=-module xenstat -Isrc
+
+# Python bindings
+PYTHON_VERSION=2.3
+PYTHON_FLAGS=-I/usr/include/python$(PYTHON_VERSION) -lpython$(PYTHON_VERSION)
+$(PYSRC) $(PYMOD): bindings/swig/xenstat.i
+       swig -python $(SWIG_FLAGS) -outdir $(@D) -o $(PYSRC) $<
+
+$(PYLIB): $(PYSRC)
+       $(CC) $(CFLAGS) $(LDFLAGS) $(PYTHON_FLAGS) -shared -lxenstat -o $@ $<
+
+python-bindings: $(PYLIB) $(PYMOD)
+
+pythonlibdir=$(prefix)/lib/python$(PYTHON_VERSION)/site-packages
+install-python-bindings: $(PYLIB) $(PYMOD)
+       $(INSTALL_PROG) $(PYLIB) $(DESTDIR)$(pythonlibdir)/_xenstat.so
+       $(INSTALL_PROG) $(PYMOD) $(DESTDIR)$(pythonlibdir)/xenstat.py
+
+ifeq ($(XENSTAT_PYTHON_BINDINGS),y)
+all: python-bindings
+install: install-python-bindings
+endif
+
+# Perl bindings
+PERL_FLAGS=`perl -MConfig -e 'print "$$Config{ccflags} 
-I$$Config{archlib}/CORE";'`
+$(PERLSRC) $(PERLMOD): bindings/swig/xenstat.i
+       swig -perl $(SWIG_FLAGS) -outdir $(@D) -o $(PERLSRC) $<
+
+$(PERLLIB): $(PERLSRC)
+       $(CC) $(CFLAGS) $(LDFLAGS) $(PERL_FLAGS) -shared -lxenstat -o $@ $<
+
+perl-bindings: $(PERLLIB) $(PERLMOD)
+
+perllibdir=$(prefix)/lib/perl5
+perlmoddir=$(prefix)/share/perl5
+install-perl-bindings: $(PERLLIB) $(PERLMOD)
+       $(INSTALL_PROG) $(PERLLIB) $(DESTDIR)$(perllibdir)/xenstat.so
+       $(INSTALL_PROG) $(PERLMOD) $(DESTDIR)$(perlmoddir)/xenstat.pm
+
+ifeq ($(XENSTAT_PERL_BINDINGS),y)
+all: perl-bindings
+install: install-perl-bindings
+endif
+
+clean:
+       rm -f $(LIB) $(SHLIB) $(SHLIB_LINKS) $(OBJECTS) \
+             $(BINDINGS) $(BINDINGSRC)
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/xenstat/libxenstat/bindings/swig/perl/.empty
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/bindings/swig/perl/.empty        Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,1 @@
+This directory is empty; this file is included to prevent version control 
systems from removing the directory.
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/xenstat/libxenstat/bindings/swig/python/.empty
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/bindings/swig/python/.empty      Thu Aug 25 
22:53:20 2005
@@ -0,0 +1,1 @@
+This directory is empty; this file is included to prevent version control 
systems from removing the directory.
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/xenstat/libxenstat/bindings/swig/xenstat.i
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/bindings/swig/xenstat.i  Thu Aug 25 22:53:20 2005
@@ -0,0 +1,8 @@
+%module xenstat_swig
+%{
+/* Includes the header in the wrapper code */
+#include "xenstat.h"
+%}
+
+/* Parse the header file to generate wrappers */
+%include "xenstat.h"
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/xenstat/libxenstat/src/xen-interface.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/src/xen-interface.c      Thu Aug 25 22:53:20 2005
@@ -0,0 +1,204 @@
+/* xen-interface.c
+ *
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ *          Judy Fischbach <jfisch@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.         See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "xen-interface.h"
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "version.h"
+#include "privcmd.h"
+#include "xen.h"
+
+struct xi_handle {
+       int fd;
+};
+
+/* Initialize for xen-interface.  Returns a handle to be used with subsequent
+ * calls to the xen-interface functions or NULL if an error occurs. */
+xi_handle *xi_init()
+{
+       xi_handle *handle;
+
+       handle = (xi_handle *)calloc(1, sizeof(xi_handle));
+       if (handle == NULL)
+               return NULL;
+
+       handle->fd = open("/proc/xen/privcmd", O_RDWR);
+       if (handle->fd < 0) {
+               perror("Couldn't open /proc/xen/privcmd");
+               free(handle);
+               return NULL;
+       }
+
+       return handle;
+}
+
+/* Release the handle to libxc, free resources, etc. */
+void xi_uninit(xi_handle *handle)
+{
+       close (handle->fd);
+       free (handle);
+}
+
+/* Make simple xen version hypervisor calls */
+static int xi_make_xen_version_hypercall(xi_handle *handle, long *vnum, 
xen_extraversion_t *ver)
+{
+       privcmd_hypercall_t privcmd;
+       multicall_entry_t multicall[2];
+       int ret = 0;
+
+       /* set up for doing hypercall */
+       privcmd.op = __HYPERVISOR_multicall; 
+       privcmd.arg[0] = (unsigned long)multicall;
+       privcmd.arg[1] = 2;
+
+       /* first one to get xen version number */
+       multicall[0].op = __HYPERVISOR_xen_version;
+       multicall[0].args[0] = (unsigned long)XENVER_version;
+
+       /* second to get xen version flag */
+       multicall[1].op = __HYPERVISOR_xen_version; 
+       multicall[1].args[0] = (unsigned long)XENVER_extraversion;
+       multicall[1].args[1] = (unsigned long)ver;
+
+       if (mlock( &privcmd, sizeof(privcmd_hypercall_t)) < 0) {
+               perror("Failed to mlock privcmd structure");
+               return -1;
+       }
+
+       if (mlock( multicall, sizeof(multicall_entry_t)) < 0) {
+               perror("Failed to mlock multicall_entry structure");
+               munlock( &multicall, sizeof(multicall_entry_t));
+               return -1;
+       }
+
+       if (ioctl( handle->fd, IOCTL_PRIVCMD_HYPERCALL, &privcmd) < 0) {
+               perror("Hypercall failed");
+               ret = -1;
+       }
+
+       *vnum = multicall[0].result;
+
+       munlock( &privcmd, sizeof(privcmd_hypercall_t));
+       munlock( &multicall, sizeof(multicall_entry_t));
+
+       return ret;
+}
+
+/* Make Xen Dom0 op hypervisor call */
+static int xi_make_dom0_op(xi_handle *handle, dom0_op_t *dom_op, int 
dom_opcode)
+{
+       privcmd_hypercall_t privcmd;
+       int ret = 0;
+
+       /* set up for doing hypercall */
+       privcmd.op = __HYPERVISOR_dom0_op;
+       privcmd.arg[0] = (unsigned long)dom_op;
+       dom_op->cmd = dom_opcode;
+       dom_op->interface_version = DOM0_INTERFACE_VERSION;
+
+       if (mlock( &privcmd, sizeof(privcmd_hypercall_t)) < 0) {
+               perror("Failed to mlock privcmd structure");
+               return -1;
+       }
+
+       if (mlock( dom_op, sizeof(dom0_op_t)) < 0) {
+               perror("Failed to mlock dom0_op structure");
+               munlock( &privcmd, sizeof(privcmd_hypercall_t));
+               return -1;
+       }
+
+       if (ioctl( handle->fd, IOCTL_PRIVCMD_HYPERCALL, &privcmd) < 0) {
+               perror("Hypercall failed");
+               ret = -1;
+       }
+
+       munlock( &privcmd, sizeof(privcmd_hypercall_t));
+       munlock( dom_op, sizeof(dom0_op_t));
+
+       return ret;
+}
+
+/* Obtain domain data from dom0 */
+int xi_get_physinfo(xi_handle *handle, dom0_physinfo_t *physinfo)
+{
+       dom0_op_t op;
+
+       if (xi_make_dom0_op(handle, &op, DOM0_PHYSINFO) < 0) {
+               perror("DOM0_PHYSINFO Hypercall failed");
+               return -1;
+       }
+
+       *physinfo = op.u.physinfo;
+       return 0;
+}
+
+/* Obtain domain data from dom0 */
+int xi_get_domaininfolist(xi_handle *handle, dom0_getdomaininfo_t *info,
+                          unsigned int first_domain, unsigned int max_domains)
+{
+       dom0_op_t op;
+       op.u.getdomaininfolist.first_domain = first_domain;
+       op.u.getdomaininfolist.max_domains = max_domains;
+       op.u.getdomaininfolist.buffer = info;
+
+       if (mlock( info, max_domains * sizeof(dom0_getdomaininfo_t)) < 0) {
+               perror("Failed to mlock domaininfo array");
+               return -1;
+       }
+
+       if (xi_make_dom0_op(handle, &op, DOM0_GETDOMAININFOLIST) < 0) {
+               perror("DOM0_GETDOMAININFOLIST Hypercall failed");
+               return -1;
+       }
+
+       return op.u.getdomaininfolist.num_domains;
+}
+
+/* Returns cpu usage data from dom0 */
+long long xi_get_vcpu_usage(xi_handle *handle, unsigned int domain,
+                            unsigned int vcpu)
+{
+       dom0_op_t op;
+       op.u.getvcpucontext.domain = domain;
+       op.u.getvcpucontext.vcpu = vcpu;
+       op.u.getvcpucontext.ctxt = NULL;
+
+       if (xi_make_dom0_op(handle, &op, DOM0_GETVCPUCONTEXT) < 0) {
+               perror("DOM0_GETVCPUCONTEXT Hypercall failed");
+               return -1;
+       }
+
+       return op.u.getvcpucontext.cpu_time;
+}
+
+/* gets xen version information from hypervisor */
+int xi_get_xen_version(xi_handle *handle, long *vnum, xen_extraversion_t *ver) 
+{
+
+        /* gets the XENVER_version and XENVER_extraversion */
+       if (xi_make_xen_version_hypercall( handle, vnum, ver) < 0) {; 
+               perror("XEN VERSION Hypercall failed");
+               return -1;
+       }
+
+       return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/xenstat/libxenstat/src/xen-interface.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/src/xen-interface.h      Thu Aug 25 22:53:20 2005
@@ -0,0 +1,53 @@
+/* xen-interface.h
+ *
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ *          Judy Fischbach <jfisch@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include <stdint.h>
+
+typedef int8_t   s8;
+typedef int16_t  s16;
+typedef int32_t  s32;
+typedef int64_t  s64;
+typedef uint8_t  u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+#include "dom0_ops.h"
+#include "version.h"
+
+/* Opaque handles */
+typedef struct xi_handle xi_handle;
+
+/* Initialize for xen-interface.  Returns a handle to be used with subsequent
+ * calls to the xen-interface functions or NULL if an error occurs. */
+xi_handle *xi_init();
+
+/* Release the handle to libxc, free resources, etc. */
+void xi_uninit(xi_handle *handle);
+
+/* Obtain xen version information from hypervisor */
+int xi_get_xen_version(xi_handle *, long *vnum, xen_extraversion_t *ver);
+
+/* Obtain physinfo data from dom0 */
+int xi_get_physinfo(xi_handle *, dom0_physinfo_t *);
+
+/* Obtain domain data from dom0 */
+int xi_get_domaininfolist(xi_handle *, dom0_getdomaininfo_t *, unsigned int,
+                          unsigned int);
+
+/* Returns cpu usage data from dom0 */
+long long xi_get_vcpu_usage(xi_handle *, unsigned int, unsigned int);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/libxenstat/src/xenstat.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/src/xenstat.c    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,640 @@
+/* libxenstat: statistics-collection library for Xen
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ *          Judy Fischbach <jfisch@xxxxxxxxxx>
+ *          David Hendricks <dhendrix@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <xen-interface.h>
+#include "xenstat.h"
+#include "version.h"
+
+/*
+ * Types
+ */
+struct xenstat_handle {
+       xi_handle *xihandle;
+       int page_size;
+       FILE *procnetdev;
+};
+
+#define SHORT_ASC_LEN 5                /* length of 65535 */
+#define VERSION_SIZE (2 * SHORT_ASC_LEN + 1 + sizeof(xen_extraversion_t) + 1)
+
+struct xenstat_node {
+       unsigned int flags;
+       unsigned long long cpu_hz;
+       unsigned int num_cpus;
+       unsigned long long tot_mem;
+       unsigned long long free_mem;
+       unsigned int num_domains;
+       char xen_version[VERSION_SIZE]; /* xen version running on this node */
+       xenstat_domain *domains;        /* Array of length num_domains */
+};
+
+struct xenstat_domain {
+       unsigned int id;
+       unsigned int state;
+       unsigned long long cpu_ns;
+       unsigned int num_vcpus;
+       xenstat_vcpu *vcpus;            /* Array of length num_vcpus */
+       unsigned long long cur_mem;     /* Current memory reservation */
+       unsigned long long max_mem;     /* Total memory allowed */
+       unsigned int ssid;
+       unsigned int num_networks;
+       xenstat_network *networks;      /* Array of length num_networks */
+};
+
+struct xenstat_vcpu {
+       unsigned long long ns;
+};
+
+struct xenstat_network {
+       unsigned int id;
+       /* Received */
+       unsigned long long rbytes;
+       unsigned long long rpackets;
+       unsigned long long rerrs;
+       unsigned long long rdrop;
+       /* Transmitted */
+       unsigned long long tbytes;
+       unsigned long long tpackets;
+       unsigned long long terrs;
+       unsigned long long tdrop;
+};
+
+/*
+ * Data-collection types
+ */
+/* Called to collect the information for the node and all the domains on
+ * it. When called, the domain information has already been collected. */
+typedef int (*xenstat_collect_func)(xenstat_handle * handle,
+                                   xenstat_node * node);
+/* Called to free the information collected by the collect function.  The free
+ * function will only be called on a xenstat_node if that node includes
+ * information collected by the corresponding collector. */
+typedef void (*xenstat_free_func)(xenstat_node * node);
+/* Called to free any information stored in the handle.  Note the lack of a
+ * matching init function; the collect functions should initialize on first
+ * use.  Also, the uninit function must handle the case that the collector has
+ * never been initialized. */
+typedef void (*xenstat_uninit_func)(xenstat_handle * handle);
+typedef struct xenstat_collector {
+       unsigned int flag;
+       xenstat_collect_func collect;
+       xenstat_free_func free;
+       xenstat_uninit_func uninit;
+} xenstat_collector;
+
+static int  xenstat_collect_vcpus(xenstat_handle * handle,
+                                 xenstat_node * node);
+static int  xenstat_collect_networks(xenstat_handle * handle,
+                                   xenstat_node * node);
+static void xenstat_free_vcpus(xenstat_node * node);
+static void xenstat_free_networks(xenstat_node * node);
+static void xenstat_uninit_vcpus(xenstat_handle * handle);
+static void xenstat_uninit_networks(xenstat_handle * handle);
+
+static xenstat_collector collectors[] = {
+       { XENSTAT_VCPU, xenstat_collect_vcpus,
+         xenstat_free_vcpus, xenstat_uninit_vcpus },
+       { XENSTAT_NETWORK, xenstat_collect_networks,
+         xenstat_free_networks, xenstat_uninit_networks }
+};
+
+#define NUM_COLLECTORS (sizeof(collectors)/sizeof(xenstat_collector))
+
+/*
+ * libxenstat API
+ */
+xenstat_handle *xenstat_init()
+{
+       xenstat_handle *handle;
+
+       handle = (xenstat_handle *) calloc(1, sizeof(xenstat_handle));
+       if (handle == NULL)
+               return NULL;
+
+#if defined(PAGESIZE)
+       handle->page_size = PAGESIZE;
+#elif defined(PAGE_SIZE)
+       handle->page_size = PAGE_SIZE;
+#else
+       handle->page_size = sysconf(_SC_PAGE_SIZE);
+       if (handle->page_size < 0) {
+               perror("Failed to retrieve page size.");
+               free(handle);
+               return NULL;
+       }
+#endif
+
+       handle->xihandle = xi_init();
+       if (handle->xihandle == NULL) {
+               perror("xi_init");
+               free(handle);
+               return NULL;
+       }
+
+       return handle;
+}
+
+void xenstat_uninit(xenstat_handle * handle)
+{
+       unsigned int i;
+       if (handle) {
+               for (i = 0; i < NUM_COLLECTORS; i++)
+                       collectors[i].uninit(handle);
+               xi_uninit(handle->xihandle);
+               free(handle);
+       }
+}
+
+xenstat_node *xenstat_get_node(xenstat_handle * handle, unsigned int flags)
+{
+#define DOMAIN_CHUNK_SIZE 256
+       xenstat_node *node;
+       dom0_physinfo_t physinfo;
+       xen_extraversion_t version;
+       long vnum = 0; 
+       dom0_getdomaininfo_t domaininfo[DOMAIN_CHUNK_SIZE];
+       unsigned int num_domains, new_domains;
+       unsigned int i;
+
+       /* Create the node */
+       node = (xenstat_node *) calloc(1, sizeof(xenstat_node));
+       if (node == NULL)
+               return NULL;
+
+       /* Get information about the physical system */
+       if (xi_get_physinfo(handle->xihandle, &physinfo) < 0) {
+               free(node);
+               return NULL;
+       }
+
+       /* Get the xen version number and xen version tag */
+       if (xi_get_xen_version(handle->xihandle, &vnum, &version) < 0) {
+               free(node); 
+               return NULL;
+       } 
+       snprintf(node->xen_version, VERSION_SIZE,
+               "%ld.%ld%s\n", ((vnum >> 16) & 0xFFFF), vnum & 0xFFFF, (char 
*)version); 
+
+       node->cpu_hz = ((unsigned long long)physinfo.cpu_khz) * 1000ULL;
+       node->num_cpus =
+           (physinfo.threads_per_core * physinfo.cores_per_socket *
+            physinfo.sockets_per_node * physinfo.nr_nodes);
+       node->tot_mem = ((unsigned long long)physinfo.total_pages)
+           * handle->page_size;
+       node->free_mem = ((unsigned long long)physinfo.free_pages)
+           * handle->page_size;
+
+       /* malloc(0) is not portable, so allocate a single domain.  This will
+        * be resized below. */
+       node->domains = malloc(sizeof(xenstat_domain));
+       if (node->domains == NULL) {
+               free(node);
+               return NULL;
+       }
+
+       num_domains = 0;
+       do {
+               xenstat_domain *domain;
+
+               new_domains = xi_get_domaininfolist(handle->xihandle,
+                                                   domaininfo, num_domains,
+                                                   DOMAIN_CHUNK_SIZE);
+
+               node->domains = realloc(node->domains,
+                                       (num_domains + new_domains)
+                                       * sizeof(xenstat_domain));
+               if (node->domains == NULL) {
+                       free(node);
+                       return NULL;
+               }
+
+               domain = node->domains + num_domains;
+
+               for (i = 0; i < new_domains; i++) {
+                       /* Fill in domain using domaininfo[i] */
+                       domain->id = domaininfo[i].domain;
+                       domain->state = domaininfo[i].flags;
+                       domain->cpu_ns = domaininfo[i].cpu_time;
+                       domain->num_vcpus = domaininfo[i].n_vcpu;
+                       domain->vcpus = NULL;
+                       domain->cur_mem =
+                           ((unsigned long long)domaininfo[i].tot_pages)
+                           * handle->page_size;
+                       domain->max_mem =
+                           domaininfo[i].max_pages == UINT_MAX
+                           ? (unsigned long long)-1
+                           : (unsigned long long)(domaininfo[i].max_pages
+                                                  * handle->page_size);
+                       domain->ssid = domaininfo[i].ssidref;
+                       domain->num_networks = 0;
+                       domain->networks = NULL;
+
+                       domain++;
+               }
+               num_domains += new_domains;
+       } while (new_domains == DOMAIN_CHUNK_SIZE);
+       node->num_domains = num_domains;
+
+       /* Run all the extra data collectors requested */
+       node->flags = 0;
+       for (i = 0; i < NUM_COLLECTORS; i++) {
+               if ((flags & collectors[i].flag) == collectors[i].flag) {
+                       node->flags |= collectors[i].flag;
+                       if(collectors[i].collect(handle, node) == 0) {
+                               xenstat_free_node(node);
+                               return NULL;
+                       }
+               }
+       }
+
+       return node;
+}
+
+void xenstat_free_node(xenstat_node * node)
+{
+       int i;
+
+       if (node) {
+               if (node->domains) {
+                       for (i = 0; i < NUM_COLLECTORS; i++)
+                               if((node->flags & collectors[i].flag)
+                                  == collectors[i].flag)
+                                       collectors[i].free(node);
+                       free(node->domains);
+               }
+               free(node);
+       }
+}
+
+xenstat_domain *xenstat_node_domain(xenstat_node * node, unsigned int domid)
+{
+       unsigned int i;
+
+       /* FIXME: binary search */
+       /* Find the appropriate domain entry in the node struct. */
+       for (i = 0; i < node->num_domains; i++) {
+               if (node->domains[i].id == domid)
+                       return &(node->domains[i]);
+       }
+       return NULL;
+}
+
+xenstat_domain *xenstat_node_domain_by_index(xenstat_node * node,
+                                            unsigned int index)
+{
+       if (0 <= index && index < node->num_domains)
+               return &(node->domains[index]);
+       return NULL;
+}
+
+const char *xenstat_node_xen_ver(xenstat_node * node)
+{
+       return node->xen_version;
+}
+
+unsigned long long xenstat_node_tot_mem(xenstat_node * node)
+{
+       return node->tot_mem;
+}
+
+unsigned long long xenstat_node_free_mem(xenstat_node * node)
+{
+       return node->free_mem;
+}
+
+unsigned int xenstat_node_num_domains(xenstat_node * node)
+{
+       return node->num_domains;
+}
+
+unsigned int xenstat_node_num_cpus(xenstat_node * node)
+{
+       return node->num_cpus;
+}
+
+/* Get information about the CPU speed */
+unsigned long long xenstat_node_cpu_hz(xenstat_node * node)
+{
+       return node->cpu_hz;
+}
+
+/* Get the domain ID for this domain */
+unsigned xenstat_domain_id(xenstat_domain * domain)
+{
+       return domain->id;
+}
+
+/* Get information about how much CPU time has been used */
+unsigned long long xenstat_domain_cpu_ns(xenstat_domain * domain)
+{
+       return domain->cpu_ns;
+}
+
+/* Find the number of VCPUs allocated to a domain */
+unsigned int xenstat_domain_num_vcpus(xenstat_domain * domain)
+{
+       return domain->num_vcpus;
+}
+
+xenstat_vcpu *xenstat_domain_vcpu(xenstat_domain * domain, unsigned int vcpu)
+{
+       if (0 <= vcpu && vcpu < domain->num_vcpus)
+               return &(domain->vcpus[vcpu]);
+       return NULL;
+}
+
+/* Find the current memory reservation for this domain */
+unsigned long long xenstat_domain_cur_mem(xenstat_domain * domain)
+{
+       return domain->cur_mem;
+}
+
+/* Find the maximum memory reservation for this domain */
+unsigned long long xenstat_domain_max_mem(xenstat_domain * domain)
+{
+       return domain->max_mem;
+}
+
+/* Find the domain's SSID */
+unsigned int xenstat_domain_ssid(xenstat_domain * domain)
+{
+       return domain->ssid;
+}
+
+/* Get domain states */
+unsigned int xenstat_domain_dying(xenstat_domain * domain)
+{
+       return (domain->state & DOMFLAGS_DYING) == DOMFLAGS_DYING;
+}
+
+unsigned int xenstat_domain_crashed(xenstat_domain * domain)
+{
+       return ((domain->state & DOMFLAGS_SHUTDOWN) == DOMFLAGS_SHUTDOWN)
+           && (((domain->state >> DOMFLAGS_SHUTDOWNSHIFT)
+                & DOMFLAGS_SHUTDOWNMASK) == SHUTDOWN_crash);
+}
+
+unsigned int xenstat_domain_shutdown(xenstat_domain * domain)
+{
+       return ((domain->state & DOMFLAGS_SHUTDOWN) == DOMFLAGS_SHUTDOWN)
+           && (((domain->state >> DOMFLAGS_SHUTDOWNSHIFT)
+                & DOMFLAGS_SHUTDOWNMASK) != SHUTDOWN_crash);
+}
+
+unsigned int xenstat_domain_paused(xenstat_domain * domain)
+{
+       return (domain->state & DOMFLAGS_PAUSED) == DOMFLAGS_PAUSED;
+}
+
+unsigned int xenstat_domain_blocked(xenstat_domain * domain)
+{
+       return (domain->state & DOMFLAGS_BLOCKED) == DOMFLAGS_BLOCKED;
+}
+
+unsigned int xenstat_domain_running(xenstat_domain * domain)
+{
+       return (domain->state & DOMFLAGS_RUNNING) == DOMFLAGS_RUNNING;
+}
+
+/* Get the number of networks for a given domain */
+unsigned int xenstat_domain_num_networks(xenstat_domain * domain)
+{
+       return domain->num_networks;
+}
+
+/* Get the network handle to obtain network stats */
+xenstat_network *xenstat_domain_network(xenstat_domain * domain,
+                                       unsigned int network)
+{
+       if (domain->networks && 0 <= network && network < domain->num_networks)
+               return &(domain->networks[network]);
+       return NULL;
+}
+
+/*
+ * VCPU functions
+ */
+/* Collect information about VCPUs */
+static int xenstat_collect_vcpus(xenstat_handle * handle, xenstat_node * node)
+{
+       unsigned int i, vcpu;
+       /* Fill in VCPU information */
+       for (i = 0; i < node->num_domains; i++) {
+               node->domains[i].vcpus = malloc(node->domains[i].num_vcpus
+                                               * sizeof(xenstat_vcpu));
+               if (node->domains[i].vcpus == NULL)
+                       return 0;
+
+               for (vcpu = 0; vcpu < node->domains[i].num_vcpus; vcpu++) {
+                       /* FIXME: need to be using a more efficient mechanism*/
+                       long long vcpu_time;
+                       vcpu_time =
+                           xi_get_vcpu_usage(handle->xihandle,
+                                             node->domains[i].id,
+                                             vcpu);
+                       if (vcpu_time < 0)
+                               return 0;
+                       node->domains[i].vcpus[vcpu].ns = vcpu_time;
+               }
+       }
+       return 1;
+}
+
+/* Free VCPU information */
+static void xenstat_free_vcpus(xenstat_node * node)
+{
+       unsigned int i;
+       for (i = 0; i < node->num_domains; i++)
+               free(node->domains[i].vcpus);
+}
+
+/* Free VCPU information in handle - nothing to do */
+static void xenstat_uninit_vcpus(xenstat_handle * handle)
+{
+}
+
+/* Get VCPU usage */
+unsigned long long xenstat_vcpu_ns(xenstat_vcpu * vcpu)
+{
+       return vcpu->ns;
+}
+
+/*
+ * Network functions
+ */
+
+/* Expected format of /proc/net/dev */
+static const char PROCNETDEV_HEADER[] =
+    "Inter-|   Receive                                                |"
+    "  Transmit\n"
+    " face |bytes    packets errs drop fifo frame compressed multicast|"
+    "bytes    packets errs drop fifo colls carrier compressed\n";
+
+/* Collect information about networks */
+static int xenstat_collect_networks(xenstat_handle * handle,
+                                   xenstat_node * node)
+{
+       /* Open and validate /proc/net/dev if we haven't already */
+       if (handle->procnetdev == NULL) {
+               char header[sizeof(PROCNETDEV_HEADER)];
+               handle->procnetdev = fopen("/proc/net/dev", "r");
+               if (handle->procnetdev == NULL) {
+                       perror("Error opening /proc/net/dev");
+                       return 1;
+               }
+
+               /* Validate the format of /proc/net/dev */
+               if (fread(header, sizeof(PROCNETDEV_HEADER) - 1, 1,
+                         handle->procnetdev) != 1) {
+                       perror("Error reading /proc/net/dev header");
+                       return 1;
+               }
+               header[sizeof(PROCNETDEV_HEADER) - 1] = '\0';
+               if (strcmp(header, PROCNETDEV_HEADER) != 0) {
+                       fprintf(stderr,
+                               "Unexpected /proc/net/dev format\n");
+                       return 1;
+               }
+       }
+
+       /* Fill in networks */
+       /* FIXME: optimize this */
+       fseek(handle->procnetdev, sizeof(PROCNETDEV_HEADER) - 1, SEEK_SET);
+       while (1) {
+               xenstat_domain *domain;
+               xenstat_network net;
+               unsigned int domid;
+               int ret = fscanf(handle->procnetdev,
+                                "vif%u.%u:%llu%llu%llu%llu%*u%*u%*u%*u"
+                                "%llu%llu%llu%llu%*u%*u%*u%*u\n",
+                                &domid, &net.id,
+                                &net.tbytes, &net.tpackets, &net.terrs,
+                                &net.tdrop,
+                                &net.rbytes, &net.rpackets, &net.rerrs,
+                                &net.rdrop);
+               if (ret == EOF)
+                       break;
+               if (ret != 10) {
+                       unsigned int c;
+                       do {
+                               c = fgetc(handle->procnetdev);
+                       } while (c != '\n' && c != EOF);
+                       if (c == EOF)
+                               break;
+                       continue;
+               }
+
+               /* FIXME: this does a search for the domid */
+               domain = xenstat_node_domain(node, domid);
+               if (domain == NULL) {
+                       fprintf(stderr,
+                               "Found interface vif%u.%u but domain %u"
+                               " does not exist.\n", domid, net.id,
+                               domid);
+                       continue;
+               }
+               if (domain->networks == NULL) {
+                       domain->num_networks = 1;
+                       domain->networks = malloc(sizeof(xenstat_network));
+               } else {
+                       domain->num_networks++;
+                       domain->networks =
+                           realloc(domain->networks,
+                                   domain->num_networks *
+                                   sizeof(xenstat_network));
+               }
+               if (domain->networks == NULL)
+                       return 1;
+               domain->networks[domain->num_networks - 1] = net;
+       }
+
+       return 1;
+}
+
+/* Free network information */
+static void xenstat_free_networks(xenstat_node * node)
+{
+       unsigned int i;
+       for (i = 0; i < node->num_domains; i++)
+               free(node->domains[i].networks);
+}
+
+/* Free network information in handle */
+static void xenstat_uninit_networks(xenstat_handle * handle)
+{
+       if(handle->procnetdev)
+               fclose(handle->procnetdev);
+}
+
+/* Get the network ID */
+unsigned int xenstat_network_id(xenstat_network * network)
+{
+       return network->id;
+}
+
+/* Get the number of receive bytes */
+unsigned long long xenstat_network_rbytes(xenstat_network * network)
+{
+       return network->rbytes;
+}
+
+/* Get the number of receive packets */
+unsigned long long xenstat_network_rpackets(xenstat_network * network)
+{
+       return network->rpackets;
+}
+
+/* Get the number of receive errors */
+unsigned long long xenstat_network_rerrs(xenstat_network * network)
+{
+       return network->rerrs;
+}
+
+/* Get the number of receive drops */
+unsigned long long xenstat_network_rdrop(xenstat_network * network)
+{
+       return network->rdrop;
+}
+
+/* Get the number of transmit bytes */
+unsigned long long xenstat_network_tbytes(xenstat_network * network)
+{
+       return network->tbytes;
+}
+
+/* Get the number of transmit packets */
+unsigned long long xenstat_network_tpackets(xenstat_network * network)
+{
+       return network->tpackets;
+}
+
+/* Get the number of transmit errors */
+unsigned long long xenstat_network_terrs(xenstat_network * network)
+{
+       return network->terrs;
+}
+
+/* Get the number of transmit dropped packets */
+unsigned long long xenstat_network_tdrop(xenstat_network * network)
+{
+       return network->tdrop;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/libxenstat/src/xenstat.h
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/libxenstat/src/xenstat.h    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,150 @@
+/* libxenstat: statistics-collection library for Xen
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ *          Judy Fischbach <jfisch@xxxxxxxxxx>
+ *          David Hendricks <dhendrix@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+/* libxenstat API */
+
+/* Opaque handles */
+typedef struct xenstat_handle xenstat_handle;
+typedef struct xenstat_domain xenstat_domain;
+typedef struct xenstat_node xenstat_node;
+typedef struct xenstat_vcpu xenstat_vcpu;
+typedef struct xenstat_network xenstat_network;
+
+/* Initialize the xenstat library.  Returns a handle to be used with
+ * subsequent calls to the xenstat library, or NULL if an error occurs. */
+xenstat_handle *xenstat_init();
+
+/* Release the handle to libxc, free resources, etc. */
+void xenstat_uninit(xenstat_handle * handle);
+
+/* Get all available information about a node */
+#define XENSTAT_VCPU 0x1
+#define XENSTAT_NETWORK 0x2
+#define XENSTAT_ALL (XENSTAT_VCPU|XENSTAT_NETWORK)
+xenstat_node *xenstat_get_node(xenstat_handle * handle, unsigned int flags);
+
+/* Free the information */
+void xenstat_free_node(xenstat_node * node);
+
+/*
+ * Node functions - extract information from a xenstat_node
+ */
+
+/* Get information about the domain with the given domain ID */
+xenstat_domain *xenstat_node_domain(xenstat_node * node,
+                                   unsigned int domid);
+
+/* Get the domain with the given index; used to loop over all domains. */
+xenstat_domain *xenstat_node_domain_by_index(xenstat_node * node,
+                                            unsigned index);
+/* Get xen version of the node */
+const char *xenstat_node_xen_ver(xenstat_node * node);
+
+/* Get amount of total memory on a node */
+unsigned long long xenstat_node_tot_mem(xenstat_node * node);
+
+/* Get amount of free memory on a node */
+unsigned long long xenstat_node_free_mem(xenstat_node * node);
+
+/* Find the number of domains existing on a node */
+unsigned int xenstat_node_num_domains(xenstat_node * node);
+
+/* Find the number of CPUs existing on a node */
+unsigned int xenstat_node_num_cpus(xenstat_node * node);
+
+/* Get information about the CPU speed */
+unsigned long long xenstat_node_cpu_hz(xenstat_node * node);
+
+/*
+ * Domain functions - extract information from a xenstat_domain
+ */
+
+/* Get the domain ID for this domain */
+unsigned xenstat_domain_id(xenstat_domain * domain);
+
+/* Get information about how much CPU time has been used */
+unsigned long long xenstat_domain_cpu_ns(xenstat_domain * domain);
+
+/* Find the number of VCPUs allocated to a domain */
+unsigned int xenstat_domain_num_vcpus(xenstat_domain * domain);
+
+/* Get the VCPU handle to obtain VCPU stats */
+xenstat_vcpu *xenstat_domain_vcpu(xenstat_domain * domain,
+                                 unsigned int vcpu);
+
+/* Find the current memory reservation for this domain */
+unsigned long long xenstat_domain_cur_mem(xenstat_domain * domain);
+
+/* Find the maximum memory reservation for this domain */
+unsigned long long xenstat_domain_max_mem(xenstat_domain * domain);
+
+/* Find the domain's SSID */
+unsigned int xenstat_domain_ssid(xenstat_domain * domain);
+
+/* Get domain states */
+unsigned int xenstat_domain_dying(xenstat_domain * domain);
+unsigned int xenstat_domain_crashed(xenstat_domain * domain);
+unsigned int xenstat_domain_shutdown(xenstat_domain * domain);
+unsigned int xenstat_domain_paused(xenstat_domain * domain);
+unsigned int xenstat_domain_blocked(xenstat_domain * domain);
+unsigned int xenstat_domain_running(xenstat_domain * domain);
+
+/* Get the number of networks for a given domain */
+unsigned int xenstat_domain_num_networks(xenstat_domain *);
+
+/* Get the network handle to obtain network stats */
+xenstat_network *xenstat_domain_network(xenstat_domain * domain,
+                                       unsigned int network);
+
+/*
+ * VCPU functions - extract information from a xenstat_vcpu
+ */
+
+/* Get VCPU usage */
+unsigned long long xenstat_vcpu_ns(xenstat_vcpu * vcpu);
+
+
+/*
+ * Network functions - extract information from a xenstat_network
+ */
+
+/* Get the ID for this network */
+unsigned int xenstat_network_id(xenstat_network * network);
+
+/* Get the number of receive bytes for this network */
+unsigned long long xenstat_network_rbytes(xenstat_network * network);
+
+/* Get the number of receive packets for this network */
+unsigned long long xenstat_network_rpackets(xenstat_network * network);
+
+/* Get the number of receive errors for this network */
+unsigned long long xenstat_network_rerrs(xenstat_network * network);
+
+/* Get the number of receive drops for this network */
+unsigned long long xenstat_network_rdrop(xenstat_network * network);
+
+/* Get the number of transmit bytes for this network */
+unsigned long long xenstat_network_tbytes(xenstat_network * network);
+
+/* Get the number of transmit packets for this network */
+unsigned long long xenstat_network_tpackets(xenstat_network * network);
+
+/* Get the number of transmit errors for this network */
+unsigned long long xenstat_network_terrs(xenstat_network * network);
+
+/* Get the number of transmit drops for this network */
+unsigned long long xenstat_network_tdrop(xenstat_network * network);
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/xentop/Makefile
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/xentop/Makefile     Thu Aug 25 22:53:20 2005
@@ -0,0 +1,44 @@
+# Copyright (C) International Business Machines Corp., 2005
+# Author: Josh Triplett <josht@xxxxxxxxxx>
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; under version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+ifneq ($(XENSTAT_XENTOP),y)
+all install xentop:
+else
+
+INSTALL         = install
+INSTALL_PROG    = $(INSTALL) -m0755 -D
+INSTALL_DATA    = $(INSTALL) -m0644 -D
+
+prefix=/usr
+mandir=$(prefix)/share/man
+man1dir=$(mandir)/man1
+sbindir=$(prefix)/sbin
+
+CFLAGS += -DGCC_PRINTF -Wall -Werror -I$(XEN_LIBXENSTAT)
+LDFLAGS += -L$(XEN_LIBXENSTAT)
+LDLIBS += -lxenstat -lncurses
+
+all: xentop
+
+xentop: xentop.o
+
+install: xentop xentop.1
+       $(INSTALL_PROG) xentop $(DESTDIR)$(sbindir)/xentop
+       $(INSTALL_DATA) xentop.1 $(DESTDIR)$(man1dir)/xentop.1
+
+endif
+
+clean:
+       rm -f xentop xentop.o
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/xentop/TODO
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/xentop/TODO Thu Aug 25 22:53:20 2005
@@ -0,0 +1,34 @@
+Display error messages on the help line after bad input at a prompt.
+Fractional delay times
+Use prompting to search for domains
+Better line editing?
+
+* Make CPU in % more accurate
+* Domain total network TX % and RX %
+
+Like Top, f feature, field select of domain columns, toggle the display of
+field by typing the letter associated with field, if displayed it shows in
+bold and the letter is Capitalized along with a leading asterisk for the
+field, if not selected for display letter is lowercase, no leading asterisk
+and field is not bolded.
+
+Like Top, ordering of domain columns, o feature Capital letter shifts left,
+lowercase letter shifts right?
+
+Color
+Full management: pause, destroy, create domains
+
+Add support for Virtual Block Devices (vbd)
+
+To think about:
+Support for one than one node display (distributed monitoring 
+from any node of all other nodes in a cluster)
+Bottom line option (Switch node, Search node [tab completion?])
+
+Capture/Logging of resource information generated during a time interval.
+-b batch mode dump snapshots to standard output (used with -n)
+-n number of iterations to dump to standard output (unlimited if not specified)
+-d monitor DomIDs as -dD1,-dD2 or -dD1,D2...
+   Monitor only domains with specified domain IDs
+-m monitor nodeIDs as -mN1,-mN2 or -mN1,N2...
+   Monitor only domains with specified node IDs
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/xentop/xentop.1
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/xentop/xentop.1     Thu Aug 25 22:53:20 2005
@@ -0,0 +1,88 @@
+.\" Copyright (C) International Business Machines  Corp., 2005
+.\" Author: Josh Triplett <josht@xxxxxxxxxx>
+.\"
+.\" This program is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; under version 2 of the License.
+.\"
+.\" This program is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License
+.\" along with this program; if not, write to the Free Software
+.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+.TH xentop 1 "August 2005"
+.SH NAME
+\fBxentop\fR \- displays real-time information about a Xen system and domains
+
+.SH SYNOPSIS
+.B xentop
+[\fB\-h\fR]
+[\fB\-V\fR]
+[\fB\-d\fRSECONDS]
+[\fB\-n\fR]
+[\fB\-r\fR]
+[\fB\-v\fR]
+
+.SH DESCRIPTION
+\fBxentop\fR displays information about the Xen system and domains, in a
+continually-updating manner.  Command-line options and interactive commands
+can change the detail and format of the information displayed by \fBxentop\fR.
+
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+display help and exit
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+output version information and exit
+.TP
+\fB\-d\fR, \fB\-\-delay\fR=\fISECONDS\fR
+seconds between updates (default 3)
+.TP
+\fB\-n\fR, \fB\-\-networks\fR
+output network information
+.TP
+\fB\-r\fR, \fB\-\-repeat\-header\fR
+repeat table header before each domain
+.TP
+\fB\-v\fR, \fB\-\-vcpus\fR
+output VCPU data
+
+.SH "INTERACTIVE COMMANDS"
+All interactive commands are case-insensitive.
+.TP
+.B D
+set delay between updates
+.TP
+.B N
+toggle display of network information
+.TP
+.B Q, Esc
+quit
+.TP
+.B R
+toggle table header before each domain
+.TP
+.B S
+cycle sort order
+.TP
+.B V
+toggle display of VCPU information
+.TP
+.B Arrows
+scroll domain display
+
+.SH AUTHORS
+Written by Judy Fischbach, David Hendricks, and Josh Triplett
+
+.SH "REPORTING BUGS"
+Report bugs to <dsteklof@xxxxxxxxxx>.
+
+.SH COPYRIGHT
+Copyright \(co 2005  International Business Machines  Corp
+.br
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstat/xentop/xentop.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstat/xentop/xentop.c     Thu Aug 25 22:53:20 2005
@@ -0,0 +1,876 @@
+/*
+ *  Copyright (C) International Business Machines  Corp., 2005
+ *  Author(s): Judy Fischbach <jfisch@xxxxxxxxxx>
+ *             David Hendricks <dhendrix@xxxxxxxxxx>
+ *             Josh Triplett <josht@xxxxxxxxxx>
+ *    based on code from Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <curses.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <xenstat.h>
+
+#define XENTOP_VERSION "1.0"
+
+#define XENTOP_DISCLAIMER \
+"Copyright (C) 2005  International Business Machines  Corp\n"\
+"This is free software; see the source for copying conditions.There is NO\n"\
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
+#define XENTOP_BUGSTO "Report bugs to <dsteklof@xxxxxxxxxx>.\n"
+
+#define _GNU_SOURCE
+#include <getopt.h>
+
+#if !defined(__GNUC__) && !defined(__GNUG__)
+#define __attribute__(arg) /* empty */
+#endif
+
+#define KEY_ESCAPE '\x1B'
+
+/*
+ * Function prototypes
+ */
+/* Utility functions */
+static void usage(const char *);
+static void version(void);
+static void cleanup(void);
+static void fail(const char *);
+static int current_row(void);
+static int lines(void);
+static void print(const char *, ...) __attribute__((format(printf,1,2)));
+static void attr_addstr(int attr, const char *str);
+static void set_delay(char *value);
+static void set_prompt(char *new_prompt, void (*func)(char *));
+static int handle_key(int);
+static int compare(unsigned long long, unsigned long long);
+static int compare_domains(xenstat_domain **, xenstat_domain **);
+static unsigned long long tot_net_bytes( xenstat_domain *, int);
+
+/* Field functions */
+static int compare_domid(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_domid(xenstat_domain *domain);
+static int compare_state(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_state(xenstat_domain *domain);
+static int compare_cpu(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_cpu(xenstat_domain *domain);
+static int compare_cpu_pct(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_cpu_pct(xenstat_domain *domain);
+static int compare_mem(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_mem(xenstat_domain *domain);
+static void print_mem_pct(xenstat_domain *domain);
+static int compare_maxmem(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_maxmem(xenstat_domain *domain);
+static void print_max_pct(xenstat_domain *domain);
+static int compare_vcpus(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_vcpus(xenstat_domain *domain);
+static int compare_nets(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_nets(xenstat_domain *domain);
+static int compare_net_tx(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_net_tx(xenstat_domain *domain);
+static int compare_net_rx(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_net_rx(xenstat_domain *domain);
+static int compare_ssid(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_ssid(xenstat_domain *domain);
+
+/* Section printing functions */
+static void do_summary(void);
+static void do_header(void);
+static void do_bottom_line(void);
+static void do_domain(xenstat_domain *);
+static void do_vcpu(xenstat_domain *);
+static void do_network(xenstat_domain *);
+static void top(void);
+
+/* Field types */
+typedef enum field_id {
+       FIELD_DOMID,
+       FIELD_STATE,
+       FIELD_CPU,
+       FIELD_CPU_PCT,
+       FIELD_MEM,
+       FIELD_MEM_PCT,
+       FIELD_MAXMEM,
+       FIELD_MAX_PCT,
+       FIELD_VCPUS,
+       FIELD_NETS,
+       FIELD_NET_TX,
+       FIELD_NET_RX,
+       FIELD_SSID
+} field_id;
+
+typedef struct field {
+       field_id num;
+       const char *header;
+       unsigned int default_width;
+       int (*compare)(xenstat_domain *domain1, xenstat_domain *domain2);
+       void (*print)(xenstat_domain *domain);
+} field;
+
+field fields[] = {
+       { FIELD_DOMID,   "DOMID",      5, compare_domid,   print_domid   },
+       { FIELD_STATE,   "STATE",      6, compare_state,   print_state   },
+       { FIELD_CPU,     "CPU(sec)",  10, compare_cpu,     print_cpu     },
+       { FIELD_CPU_PCT, "CPU(%)",     6, compare_cpu_pct, print_cpu_pct },
+       { FIELD_MEM,     "MEM(k)",    10, compare_mem,     print_mem     },
+       { FIELD_MEM_PCT, "MEM(%)",     6, compare_mem,     print_mem_pct },
+       { FIELD_MAXMEM,  "MAXMEM(k)", 10, compare_maxmem,  print_maxmem  },
+       { FIELD_MAX_PCT, "MAXMEM(%)",  9, compare_maxmem,  print_max_pct },
+       { FIELD_VCPUS,   "VCPUS",      5, compare_vcpus,   print_vcpus   },
+       { FIELD_NETS,    "NETS",       4, compare_nets,    print_nets    },
+       { FIELD_NET_TX,  "NETTX(k)",   8, compare_net_tx,  print_net_tx  },
+       { FIELD_NET_RX,  "NETRX(k)",   8, compare_net_rx,  print_net_rx  },
+       { FIELD_SSID,    "SSID",       4, compare_ssid,    print_ssid    }
+};
+
+const unsigned int NUM_FIELDS = sizeof(fields)/sizeof(field);
+
+/* Globals */
+struct timeval curtime, oldtime;
+xenstat_handle *xhandle = NULL;
+xenstat_node *prev_node = NULL;
+xenstat_node *cur_node = NULL;
+field_id sort_field = FIELD_DOMID;
+unsigned int first_domain_index = 0;
+unsigned int delay = 3;
+int show_vcpus = 0;
+int show_networks = 0;
+int repeat_header = 0;
+#define PROMPT_VAL_LEN 80
+char *prompt = NULL;
+char prompt_val[PROMPT_VAL_LEN];
+int prompt_val_len = 0;
+void (*prompt_complete_func)(char *);
+
+/*
+ * Function definitions
+ */
+
+/* Utility functions */
+
+/* Print usage message, using given program name */
+static void usage(const char *program)
+{
+       printf("Usage: %s [OPTION]\n"
+              "Displays ongoing information about xen vm resources \n\n"
+              "-h, --help           display this help and exit\n"
+              "-V, --version        output version information and exit\n"
+              "-d, --delay=SECONDS  seconds between updates (default 3)\n"
+              "-n, --networks       output vif network data\n"
+              "-r, --repeat-header  repeat table header before each domain\n"
+              "-v, --vcpus          output vcpu data\n"
+              "\n" XENTOP_BUGSTO,
+              program);
+       return;
+}
+
+/* Print program version information */
+static void version(void)
+{
+       printf("xentop " XENTOP_VERSION "\n"
+              "Written by Judy Fischbach, David Hendricks, Josh Triplett\n"
+              "\n" XENTOP_DISCLAIMER);
+}
+
+/* Clean up any open resources */
+static void cleanup(void)
+{
+       if(!isendwin())
+               endwin();
+       if(prev_node != NULL)
+               xenstat_free_node(prev_node);
+       if(cur_node != NULL)
+               xenstat_free_node(cur_node);
+       if(xhandle != NULL)
+               xenstat_uninit(xhandle);
+}
+
+/* Display the given message and gracefully exit */
+static void fail(const char *str)
+{
+       if(!isendwin())
+               endwin();
+       fprintf(stderr, str);
+       exit(1);
+}
+
+/* Return the row containing the cursor. */
+static int current_row(void)
+{
+       int y, x;
+       getyx(stdscr, y, x);
+       return y;
+}
+
+/* Return the number of lines on the screen. */
+static int lines(void)
+{
+       int y, x;
+       getmaxyx(stdscr, y, x);
+       return y;
+}
+
+/* printf-style print function which calls printw, but only if the cursor is
+ * not on the last line. */
+static void print(const char *fmt, ...)
+{
+       va_list args;
+
+       if(current_row() < lines()-1) {
+               va_start(args, fmt);
+               vw_printw(stdscr, fmt, args);
+               va_end(args);
+       }
+}
+
+/* Print a string with the given attributes set. */
+static void attr_addstr(int attr, const char *str)
+{
+       attron(attr);
+       addstr(str);
+       attroff(attr);
+}
+
+/* Handle setting the delay from the user-supplied value in prompt_val */
+static void set_delay(char *value)
+{
+       int new_delay;
+       new_delay = atoi(value);
+       if(new_delay > 0)
+               delay = new_delay;
+}
+
+/* Enable prompting mode with the given prompt string; call the given function
+ * when a value is available. */
+static void set_prompt(char *new_prompt, void (*func)(char *))
+{
+       prompt = new_prompt;
+       prompt_val[0] = '\0';
+       prompt_val_len = 0;
+       prompt_complete_func = func;
+}
+
+/* Handle user input, return 0 if the program should quit, or 1 if not */
+static int handle_key(int ch)
+{
+       if(prompt == NULL) {
+               /* Not prompting for input; handle interactive commands */
+               switch(ch) {
+               case 'n': case 'N':
+                       show_networks ^= 1;
+                       break;
+               case 'r': case 'R':
+                       repeat_header ^= 1;
+                       break;
+               case 's': case 'S':
+                       sort_field = (sort_field + 1) % NUM_FIELDS;
+                       break;
+               case 'v': case 'V':
+                       show_vcpus ^= 1;
+                       break;
+               case KEY_DOWN:
+                       first_domain_index++;
+                       break;
+               case KEY_UP:
+                       if(first_domain_index > 0)
+                               first_domain_index--;
+                       break;
+               case 'd': case 'D':
+                       set_prompt("Delay(sec)", set_delay);
+                       break;
+               case 'q': case 'Q': case KEY_ESCAPE:
+                       return 0;
+               }
+       } else {
+               /* Prompting for input; handle line editing */
+               switch(ch) {
+               case '\r':
+                       prompt_complete_func(prompt_val);
+                       set_prompt(NULL, NULL);
+                       break;
+               case KEY_ESCAPE:
+                       set_prompt(NULL, NULL);
+                       break;
+               case KEY_BACKSPACE:
+                       if(prompt_val_len > 0)
+                               prompt_val[--prompt_val_len] = '\0';
+               default:
+                       if((prompt_val_len+1) < PROMPT_VAL_LEN
+                          && isprint(ch)) {
+                               prompt_val[prompt_val_len++] = (char)ch;
+                               prompt_val[prompt_val_len] = '\0';
+                       }
+               }
+       }
+
+       return 1;
+}
+
+/* Compares two integers, returning -1,0,1 for <,=,> */
+static int compare(unsigned long long i1, unsigned long long i2)
+{
+       if(i1 < i2)
+               return -1;
+       if(i1 > i2)
+               return 1;
+       return 0;
+}
+
+/* Comparison function for use with qsort.  Compares two domains using the
+ * current sort field. */
+static int compare_domains(xenstat_domain **domain1, xenstat_domain **domain2)
+{
+       return fields[sort_field].compare(*domain1, *domain2);
+}
+
+/* Field functions */
+
+/* Compares domain ids of two domains, returning -1,0,1 for <,=,> */
+int compare_domid(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return compare(xenstat_domain_id(domain1), xenstat_domain_id(domain2));
+}
+
+/* Prints domain identification number */
+void print_domid(xenstat_domain *domain)
+{
+       print("%5u", xenstat_domain_id(domain));
+}
+
+struct {
+       unsigned int (*get)(xenstat_domain *);
+       char ch;
+} state_funcs[] = {
+       { xenstat_domain_dying,    'd' },
+       { xenstat_domain_shutdown, 's' },
+       { xenstat_domain_blocked,  'b' },
+       { xenstat_domain_crashed,  'c' },
+       { xenstat_domain_paused,   'p' },
+       { xenstat_domain_running,  'r' }
+};
+const unsigned int NUM_STATES = sizeof(state_funcs)/sizeof(*state_funcs);
+
+/* Compare states of two domains, returning -1,0,1 for <,=,> */
+static int compare_state(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       unsigned int i, d1s, d2s;
+       for(i = 0; i < NUM_STATES; i++) {
+               d1s = state_funcs[i].get(domain1);
+               d2s = state_funcs[i].get(domain2);
+               if(d1s && !d2s)
+                       return -1;
+               if(d2s && !d1s)
+                       return 1;
+       }
+       return 0;
+}
+
+/* Prints domain state in abbreviated letter format */
+static void print_state(xenstat_domain *domain)
+{
+       unsigned int i;
+       for(i = 0; i < NUM_STATES; i++)
+               print("%c", state_funcs[i].get(domain) ? state_funcs[i].ch
+                                                      : '-');
+}
+
+/* Compares cpu usage of two domains, returning -1,0,1 for <,=,> */
+static int compare_cpu(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(xenstat_domain_cpu_ns(domain1),
+                       xenstat_domain_cpu_ns(domain2));
+}
+
+/* Prints domain cpu usage in seconds */
+static void print_cpu(xenstat_domain *domain)
+{
+       print("%10llu", xenstat_domain_cpu_ns(domain)/1000000000);
+}
+
+/* Computes the CPU percentage used for a specified domain */
+static double get_cpu_pct(xenstat_domain *domain)
+{
+       xenstat_domain *old_domain;
+       double us_elapsed;
+
+       /* Can't calculate CPU percentage without a previous sample. */
+       if(prev_node == NULL)
+               return 0.0;
+
+       old_domain = xenstat_node_domain(prev_node, xenstat_domain_id(domain));
+       if(old_domain == NULL)
+               return 0.0;
+
+       /* Calculate the time elapsed in microseconds */
+       us_elapsed = ((curtime.tv_sec-oldtime.tv_sec)*1000000.0
+                     +(curtime.tv_usec - oldtime.tv_usec));
+
+       /* In the following, nanoseconds must be multiplied by 1000.0 to
+        * convert to microseconds, then divided by 100.0 to get a percentage,
+        * resulting in a multiplication by 10.0 */
+       return ((xenstat_domain_cpu_ns(domain)
+                -xenstat_domain_cpu_ns(old_domain))/10.0)/us_elapsed;
+}
+
+static int compare_cpu_pct(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(get_cpu_pct(domain1), get_cpu_pct(domain2));
+}
+
+/* Prints cpu percentage statistic */
+static void print_cpu_pct(xenstat_domain *domain)
+{
+       print("%6.1f", get_cpu_pct(domain));
+}
+
+/* Compares current memory of two domains, returning -1,0,1 for <,=,> */
+static int compare_mem(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(xenstat_domain_cur_mem(domain1),
+                       xenstat_domain_cur_mem(domain2));
+}
+
+/* Prints current memory statistic */
+static void print_mem(xenstat_domain *domain)
+{
+       print("%10llu", xenstat_domain_cur_mem(domain)/1024);
+}
+
+/* Prints memory percentage statistic, ratio of current domain memory to total
+ * node memory */
+static void print_mem_pct(xenstat_domain *domain)
+{
+       print("%6.1f", (double)xenstat_domain_cur_mem(domain) /
+                      (double)xenstat_node_tot_mem(cur_node) * 100);
+}
+
+/* Compares maximum memory of two domains, returning -1,0,1 for <,=,> */
+static int compare_maxmem(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(xenstat_domain_max_mem(domain1),
+                       xenstat_domain_max_mem(domain2));
+}
+
+/* Prints maximum domain memory statistic in KB */
+static void print_maxmem(xenstat_domain *domain)
+{
+       unsigned long long max_mem = xenstat_domain_max_mem(domain);
+       if(max_mem == ((unsigned long long)-1))
+               print("%10s", "no limit");
+       else
+               print("%10llu", max_mem/1024);
+}
+
+/* Prints memory percentage statistic, ratio of current domain memory to total
+ * node memory */
+static void print_max_pct(xenstat_domain *domain)
+{
+       if (xenstat_domain_max_mem(domain) == (unsigned long long)-1)
+               print("%9s", "n/a");
+       else
+               print("%9.1f", (double)xenstat_domain_max_mem(domain) /
+                              (double)xenstat_node_tot_mem(cur_node) * 100);
+}
+
+/* Compares number of virtual CPUs of two domains, returning -1,0,1 for
+ * <,=,> */
+static int compare_vcpus(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(xenstat_domain_num_vcpus(domain1),
+                       xenstat_domain_num_vcpus(domain2));
+}
+
+/* Prints number of virtual CPUs statistic */
+static void print_vcpus(xenstat_domain *domain)
+{
+       print("%5u", xenstat_domain_num_vcpus(domain));
+}
+
+/* Compares number of virtual networks of two domains, returning -1,0,1 for
+ * <,=,> */
+static int compare_nets(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(xenstat_domain_num_networks(domain1),
+                       xenstat_domain_num_networks(domain2));
+}
+
+/* Prints number of virtual networks statistic */
+static void print_nets(xenstat_domain *domain)
+{
+       print("%4u", xenstat_domain_num_networks(domain));
+}
+
+/* Compares number of total network tx bytes of two domains, returning -1,0,1 
for
+ * <,=,> */
+static int compare_net_tx(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(tot_net_bytes(domain1, FALSE),
+                       tot_net_bytes(domain2, FALSE));
+}
+
+/* Prints number of total network tx bytes statistic */
+static void print_net_tx(xenstat_domain *domain)
+{
+       print("%8llu", tot_net_bytes(domain, FALSE)/1024);
+}
+
+/* Compares number of total network rx bytes of two domains, returning -1,0,1 
for
+ * <,=,> */
+static int compare_net_rx(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(tot_net_bytes(domain1, TRUE),
+                       tot_net_bytes(domain2, TRUE));
+}
+
+/* Prints number of total network rx bytes statistic */
+static void print_net_rx(xenstat_domain *domain)
+{
+       print("%8llu", tot_net_bytes(domain, TRUE)/1024);
+}
+
+/* Gets number of total network bytes statistic, if rx true, then rx bytes
+ * otherwise tx bytes
+ */
+static unsigned long long tot_net_bytes(xenstat_domain *domain, int rx_flag)
+{
+       int i = 0;
+       xenstat_network *network;
+       unsigned num_networks = 0;
+        unsigned long long total = 0;
+
+       /* How many networks? */
+       num_networks = xenstat_domain_num_networks(domain);
+
+       /* Dump information for each network */
+       for (i=0; i < num_networks; i++) {
+               /* Next get the network information */
+               network = xenstat_domain_network(domain,i);
+                if (rx_flag) 
+                       total += xenstat_network_rbytes(network);
+                else 
+                       total += xenstat_network_tbytes(network);
+       }
+        return (total);
+}
+
+/* Compares security id (ssid) of two domains, returning -1,0,1 for <,=,> */
+static int compare_ssid(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return compare(xenstat_domain_ssid(domain1),
+                      xenstat_domain_ssid(domain2));
+}
+
+/* Prints ssid statistic */
+static void print_ssid(xenstat_domain *domain)
+{
+       print("%4u", xenstat_domain_ssid(domain));
+}
+
+/* Section printing functions */
+/* Prints the top summary, above the domain table */
+void do_summary(void)
+{
+#define TIME_STR_LEN 9
+       const char *TIME_STR_FORMAT = "%H:%M:%S";
+       char time_str[TIME_STR_LEN];
+       unsigned run = 0, block = 0, pause = 0,
+                crash = 0, dying = 0, shutdown = 0;
+       unsigned i, num_domains = 0;
+       unsigned long long used = 0;
+       xenstat_domain *domain;
+
+       /* Print program name, current time, and number of domains */
+       strftime(time_str, TIME_STR_LEN, TIME_STR_FORMAT,
+                localtime(&curtime.tv_sec));
+       num_domains = xenstat_node_num_domains(cur_node);
+       print("xentop - %s\n", time_str);
+
+       /* Tabulate what states domains are in for summary */
+       for (i=0; i < num_domains; i++) {
+               domain = xenstat_node_domain_by_index(cur_node,i);
+               if (xenstat_domain_running(domain)) run++;
+               else if (xenstat_domain_blocked(domain)) block++;
+               else if (xenstat_domain_paused(domain)) pause++;
+               else if (xenstat_domain_shutdown(domain)) shutdown++;
+               else if (xenstat_domain_crashed(domain)) crash++;
+               else if (xenstat_domain_dying(domain)) dying++;
+       }
+
+       print("%u domains: %u running, %u blocked, %u paused, "
+             "%u crashed, %u dying, %u shutdown \n",
+             num_domains, run, block, pause, crash, dying, shutdown);
+
+       used = xenstat_node_tot_mem(cur_node)-xenstat_node_free_mem(cur_node);
+
+       /* Dump node memory and cpu information */
+       print("Mem: %lluk total, %lluk used, %lluk free    "
+             "CPUs: %u @ %lluMHz\n",
+             xenstat_node_tot_mem(cur_node)/1024, used/1024,
+             xenstat_node_free_mem(cur_node)/1024,
+             xenstat_node_num_cpus(cur_node),
+             xenstat_node_cpu_hz(cur_node)/1000000);
+}
+
+/* Display the top header for the domain table */
+void do_header(void)
+{
+       field_id i;
+
+       /* Turn on REVERSE highlight attribute for headings */
+       attron(A_REVERSE);
+       for(i = 0; i < NUM_FIELDS; i++) {
+               if(i != 0)
+                       print(" ");
+               /* The BOLD attribute is turned on for the sort column */
+               if(i == sort_field)
+                       attron(A_BOLD);
+               print("%*s", fields[i].default_width, fields[i].header);
+               if(i == sort_field)
+                       attroff(A_BOLD);
+       }
+       attroff(A_REVERSE);
+       print("\n");
+}
+
+/* Displays bottom status line or current prompt */
+void do_bottom_line(void)
+{
+       move(lines()-1, 2);
+
+       if (prompt != NULL) {
+               printw("%s: %s", prompt, prompt_val);
+       } else {
+               addch(A_REVERSE | 'D'); addstr("elay  ");
+
+               /* network */
+               addch(A_REVERSE | 'N');
+               attr_addstr(show_networks ? COLOR_PAIR(1) : 0, "etworks");
+               addstr("  ");
+
+               /* vcpus */
+               addch(A_REVERSE | 'V');
+               attr_addstr(show_vcpus ? COLOR_PAIR(1) : 0, "CPUs");
+               addstr("  ");
+
+               /* repeat */
+               addch(A_REVERSE | 'R');
+               attr_addstr(repeat_header ? COLOR_PAIR(1) : 0, "epeat header");
+               addstr("  ");
+
+               /* sort order */
+               addch(A_REVERSE | 'S'); addstr("ort order  ");
+
+               addch(A_REVERSE | 'Q'); addstr("uit  ");
+       }
+}
+
+/* Prints Domain information */
+void do_domain(xenstat_domain *domain)
+{
+       unsigned int i;
+       for(i = 0; i < NUM_FIELDS; i++) {
+               if(i != 0)
+                       print(" ");
+               if(i == sort_field)
+                       attron(A_BOLD);
+               fields[i].print(domain);
+               if(i == sort_field)
+                       attroff(A_BOLD);
+       }
+       print("\n");
+}
+
+/* Output all vcpu information */
+void do_vcpu(xenstat_domain *domain)
+{
+       int i = 0;
+       unsigned num_vcpus = 0;
+       xenstat_vcpu *vcpu;
+
+       print("VCPUs(sec): ");
+
+       num_vcpus = xenstat_domain_num_vcpus(domain);
+
+       /* for all vcpus dump out values */
+       for (i=0; i< num_vcpus; i++) {
+               vcpu = xenstat_domain_vcpu(domain,i);
+
+               if (i != 0 && (i%5)==0)
+                       print("\n        ");
+               print(" %2u: %10llus", i, xenstat_vcpu_ns(vcpu)/1000000000);
+       }
+       print("\n");
+}
+
+/* Output all network information */
+void do_network(xenstat_domain *domain)
+{
+       int i = 0;
+       xenstat_network *network;
+       unsigned num_networks = 0;
+
+       /* How many networks? */
+       num_networks = xenstat_domain_num_networks(domain);
+
+       /* Dump information for each network */
+       for (i=0; i < num_networks; i++) {
+               /* Next get the network information */
+               network = xenstat_domain_network(domain,i);
+
+               print("Net%d RX: %8llubytes %8llupkts %8lluerr %8lludrop  ",
+                     i,
+                     xenstat_network_rbytes(network),
+                     xenstat_network_rpackets(network),
+                     xenstat_network_rerrs(network),
+                     xenstat_network_rdrop(network));
+
+               print("TX: %8llubytes %8llupkts %8lluerr %8lludrop\n",
+                     xenstat_network_tbytes(network),
+                     xenstat_network_tpackets(network),
+                     xenstat_network_terrs(network),
+                     xenstat_network_tdrop(network));
+       }
+}
+
+static void top(void)
+{
+       xenstat_domain **domains;
+       unsigned int i, num_domains = 0;
+
+       /* Now get the node information */
+       if (prev_node != NULL)
+               xenstat_free_node(prev_node);
+       prev_node = cur_node;
+       cur_node = xenstat_get_node(xhandle, XENSTAT_ALL);
+       if (cur_node == NULL)
+               fail("Failed to retrieve statistics from libxenstat\n");
+
+       /* dump summary top information */
+       do_summary();
+
+       /* Count the number of domains for which to report data */
+       num_domains = xenstat_node_num_domains(cur_node);
+
+       domains = malloc(num_domains*sizeof(xenstat_domain *));
+       if(domains == NULL)
+               fail("Failed to allocate memory\n");
+
+       for (i=0; i < num_domains; i++)
+               domains[i] = xenstat_node_domain_by_index(cur_node, i);
+
+       /* Sort */
+       qsort(domains, num_domains, sizeof(xenstat_domain *),
+             (int(*)(const void *, const void *))compare_domains);
+
+       if(first_domain_index >= num_domains)
+               first_domain_index = num_domains-1;
+
+       for (i = first_domain_index; i < num_domains; i++) {
+               if(current_row() == lines()-1)
+                       break;
+               if (i == first_domain_index || repeat_header)
+                       do_header();
+               do_domain(domains[i]);
+               if (show_vcpus)
+                       do_vcpu(domains[i]);
+               if (show_networks)
+                       do_network(domains[i]);
+       }
+
+       do_bottom_line();
+}
+
+int main(int argc, char **argv)
+{
+       int opt, optind = 0;
+       int ch = ERR;
+
+       struct option lopts[] = {
+               { "help",          no_argument,       NULL, 'h' },
+               { "version",       no_argument,       NULL, 'V' },
+               { "networks",      no_argument,       NULL, 'n' },
+               { "repeat-header", no_argument,       NULL, 'r' },
+               { "vcpus",         no_argument,       NULL, 'v' },
+               { "delay",         required_argument, NULL, 'd' },
+               { 0, 0, 0, 0 },
+       };
+       const char *sopts = "hVbnvd:";
+
+       if (atexit(cleanup) != 0)
+               fail("Failed to install cleanup handler.\n");
+
+       while ((opt = getopt_long(argc, argv, sopts, lopts, &optind)) != -1) {
+               switch (opt) {
+               case 'h':
+               case '?':
+               default:
+                       usage(argv[0]);
+                       exit(0);
+               case 'V':
+                       version();
+                       exit(0);
+               case 'n':
+                       show_networks = 1;
+                       break;
+               case 'r':
+                       repeat_header = 1;
+                       break;
+               case 'v':
+                       show_vcpus = 1;
+                       break;
+               case 'd':
+                       delay = atoi(optarg);
+                       break;
+               }
+       }
+
+       /* Get xenstat handle */
+       xhandle = xenstat_init();
+       if (xhandle == NULL)
+               fail("Failed to initialize xenstat library\n");
+
+       /* Begin curses stuff */
+       initscr();
+       start_color();
+       cbreak();
+       noecho();
+       nonl();
+       keypad(stdscr, TRUE);
+       halfdelay(5);
+       use_default_colors();
+       init_pair(1, -1, COLOR_YELLOW);
+
+       do {
+               gettimeofday(&curtime, NULL);
+               if(ch != ERR || (curtime.tv_sec - oldtime.tv_sec) >= delay) {
+                       clear();
+                       top();
+                       oldtime = curtime;
+                       refresh();
+               }
+               ch = getch();
+       } while (handle_key(ch));
+
+       /* Cleanup occurs in cleanup(), so no work to do here. */
+
+       return 0;
+}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/COPYING
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/COPYING    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,515 @@
+This license (LGPL) applies to the xenstore library which interfaces
+with the xenstore daemon (as stated in xs.c, xs.h, xs_lib.c and
+xs_lib.h).  The remaining files in the directory are licensed as
+stated in the comments (as of this writing, GPL, see ../../COPYING).
+
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+       51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations
+below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it
+becomes a de-facto standard.  To achieve this, non-free programs must
+be allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control
+compilation and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at least
+    three years, to give the same user the materials specified in
+    Subsection 6a, above, for a charge no more than the cost of
+    performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply, and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License
+may add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+                            NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms
+of the ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.
+It is safest to attach them to the start of each source file to most
+effectively convey the exclusion of warranty; and each file should
+have at least the "copyright" line and a pointer to where the full
+notice is found.
+
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or
+your school, if any, to sign a "copyright disclaimer" for the library,
+if necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James
+  Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/01simple.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/01simple.test    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,4 @@
+# Create an entry, read it.
+write /test create contents
+expect contents
+read /test
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/02directory.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/02directory.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,47 @@
+# Root directory has only tool dir in it.
+expect tool
+dir /
+
+# Create a file.
+write /test create contents
+
+# Directory shows it.
+expect test
+expect tool
+dir /
+
+# Make a new directory, check it's there
+mkdir /dir
+expect dir
+expect test
+expect tool
+dir /
+
+# Check it's empty.
+dir /dir
+
+# Create a file, check it exists.
+write /dir/test2 create contents2
+expect test2
+dir /dir
+expect contents2
+read /dir/test2
+
+# Creating dir over the top should fail.
+expect mkdir failed: File exists
+mkdir /dir
+expect mkdir failed: File exists
+mkdir /dir/test2
+
+# Mkdir implicitly creates directories.
+mkdir /dir/1/2/3/4
+expect test2
+expect 1
+dir /dir
+expect 2
+dir /dir/1
+expect 3
+dir /dir/1/2
+expect 4
+dir /dir/1/2/3
+dir /dir/1/2/3/4
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/03write.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/03write.test     Thu Aug 25 22:53:20 2005
@@ -0,0 +1,39 @@
+# Write without create fails.
+expect write failed: No such file or directory
+write /test none contents
+
+# Exclusive write succeeds
+write /test excl contents
+expect contents
+read /test
+
+# Exclusive write fails to overwrite.
+expect write failed: File exists
+write /test excl contents
+
+# Non-exclusive overwrite succeeds.
+write /test none contents2
+expect contents2
+read /test
+write /test create contents3
+expect contents3
+read /test
+
+# Write should implicitly create directories
+write /dir/test create contents
+expect test
+dir /dir
+expect contents
+read /dir/test
+write /dir/1/2/3/4 excl contents4
+expect test
+expect 1
+dir /dir
+expect 2
+dir /dir/1
+expect 3
+dir /dir/1/2
+expect 4
+dir /dir/1/2/3
+expect contents4
+read /dir/1/2/3/4
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/04rm.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/04rm.test        Thu Aug 25 22:53:20 2005
@@ -0,0 +1,18 @@
+# Remove non-existant fails.
+expect rm failed: No such file or directory
+rm /test
+expect rm failed: No such file or directory
+rm /dir/test
+
+# Create file and remove it
+write /test excl contents
+rm /test
+
+# Create directory and remove it.
+mkdir /dir
+rm /dir
+
+# Create directory, create file, remove all.
+mkdir /dir
+write /dir/test excl contents
+rm /dir
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/xenstore/testsuite/05filepermissions.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/05filepermissions.test   Thu Aug 25 22:53:20 2005
@@ -0,0 +1,81 @@
+# Fail to get perms on non-existent file.
+expect getperm failed: No such file or directory
+getperm /test
+expect getperm failed: No such file or directory
+getperm /dir/test
+
+# Create file: inherits from root (0 READ)
+write /test excl contents
+expect 0 READ
+getperm /test
+setid 1
+expect 0 READ
+getperm /test
+expect contents
+read /test
+expect write failed: Permission denied
+write /test none contents
+
+# Take away read access to file.
+setid 0
+setperm /test 0 NONE
+setid 1
+expect getperm failed: Permission denied
+getperm /test
+expect read failed: Permission denied
+read /test
+expect write failed: Permission denied
+write /test none contents
+
+# Grant everyone write access to file.
+setid 0
+setperm /test 0 WRITE
+setid 1
+expect getperm failed: Permission denied
+getperm /test
+expect read failed: Permission denied
+read /test
+write /test none contents2
+setid 0
+expect contents2
+read /test
+
+# Grant everyone both read and write access.
+setperm /test 0 READ/WRITE
+setid 1
+expect 0 READ/WRITE
+getperm /test
+expect contents2
+read /test
+write /test none contents3
+expect contents3
+read /test
+
+# Change so that user 1 owns it, noone else can do anything.
+setid 0
+setperm /test 1 NONE
+setid 1
+expect 1 NONE
+getperm /test
+expect contents3
+read /test
+write /test none contents4
+
+# User 2 can do nothing.
+setid 2
+expect setperm failed: Permission denied
+setperm /test 2 NONE
+expect getperm failed: Permission denied
+getperm /test
+expect read failed: Permission denied
+read /test
+expect write failed: Permission denied
+write /test none contents4
+
+# Tools can always access things.
+setid 0
+expect 1 NONE
+getperm /test
+expect contents4
+read /test
+write /test none contents5
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/xenstore/testsuite/06dirpermissions.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/06dirpermissions.test    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,127 @@
+# Root directory: owned by tool, everyone has read access.
+expect 0 READ
+getperm /
+
+# Create directory: inherits from root.
+mkdir /dir
+expect 0 READ
+getperm /dir
+setid 1
+expect 0 READ
+getperm /dir
+dir /dir
+expect write failed: Permission denied
+write /dir/test create contents2
+
+# Remove everyone's read access to directoy.
+setid 0
+setperm /dir 0 NONE
+setid 1
+expect dir failed: Permission denied
+dir /dir
+expect read failed: Permission denied
+read /dir/test create contents2
+expect write failed: Permission denied
+write /dir/test create contents2
+
+# Grant everyone write access to directory.
+setid 0
+setperm /dir 0 WRITE
+setid 1
+expect getperm failed: Permission denied
+getperm /dir
+expect dir failed: Permission denied
+dir /dir
+write /dir/test create contents
+setid 0
+expect 1 WRITE
+getperm /dir/test
+setperm /dir/test 0 NONE
+expect contents
+read /dir/test
+
+# Grant everyone both read and write access.
+setperm /dir 0 READ/WRITE
+setid 1
+expect 0 READ/WRITE
+getperm /dir
+expect test
+dir /dir
+write /dir/test2 create contents
+expect contents
+read /dir/test2
+setperm /dir/test2 1 NONE
+
+# Change so that user 1 owns it, noone else can do anything.
+setid 0
+setperm /dir 1 NONE
+expect 1 NONE
+getperm /dir
+expect test
+expect test2
+dir /dir
+write /dir/test3 create contents
+
+# User 2 can do nothing.  Can't even tell if file exists.
+setid 2
+expect setperm failed: Permission denied
+setperm /dir 2 NONE
+expect getperm failed: Permission denied
+getperm /dir
+expect dir failed: Permission denied
+dir /dir
+expect read failed: Permission denied
+read /dir/test
+expect read failed: Permission denied
+read /dir/test2
+expect read failed: Permission denied
+read /dir/test3
+expect read failed: Permission denied
+read /dir/test4
+expect write failed: Permission denied
+write /dir/test none contents
+expect write failed: Permission denied
+write /dir/test create contents
+expect write failed: Permission denied
+write /dir/test excl contents
+expect write failed: Permission denied
+write /dir/test4 none contents
+expect write failed: Permission denied
+write /dir/test4 create contents
+expect write failed: Permission denied
+write /dir/test4 excl contents
+
+# Tools can always access things.
+setid 0
+expect 1 NONE
+getperm /dir
+expect test
+expect test2
+expect test3
+dir /dir
+write /dir/test4 create contents
+
+# Inherited by child.
+mkdir /dir/subdir
+expect 1 NONE
+getperm /dir/subdir
+write /dir/subfile excl contents
+expect 1 NONE
+getperm /dir/subfile
+
+# But for domains, they own it.
+setperm /dir/subdir 2 READ/WRITE
+expect 2 READ/WRITE
+getperm /dir/subdir
+setid 3
+write /dir/subdir/subfile excl contents
+expect 3 READ/WRITE
+getperm /dir/subdir/subfile
+
+# Inheritence works through multiple directories, too.
+write /dir/subdir/1/2/3/4 excl contents
+expect 3 READ/WRITE
+getperm /dir/subdir/1/2/3/4
+mkdir /dir/subdir/a/b/c/d
+expect 3 READ/WRITE
+getperm /dir/subdir/a/b/c/d
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/07watch.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/07watch.test     Thu Aug 25 22:53:20 2005
@@ -0,0 +1,194 @@
+# Watch something, write to it, check watch has fired.
+write /test create contents
+
+1 watch /test token
+2 write /test create contents2
+expect 1:/test:token
+1 waitwatch
+1 ackwatch token
+1 close
+
+# Check that reads don't set it off.
+1 watch /test token
+expect 2:contents2
+2 read /test
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+1 close
+
+# mkdir, setperm and rm should (also tests watching dirs)
+mkdir /dir
+1 watch /dir token
+2 mkdir /dir/newdir
+expect 1:/dir/newdir:token
+1 waitwatch
+1 ackwatch token
+2 setperm /dir/newdir 0 READ
+expect 1:/dir/newdir:token
+1 waitwatch
+1 ackwatch token
+2 rm /dir/newdir
+expect 1:/dir/newdir:token
+1 waitwatch
+1 ackwatch token
+1 close
+2 close
+
+# We don't get a watch from our own commands.
+watch /dir token
+mkdir /dir/newdir
+expect waitwatch failed: Connection timed out
+waitwatch
+close
+
+# ignore watches while doing commands, should work.
+watch /dir token
+1 write /dir/test create contents
+expect contents
+read /dir/test
+expect /dir/test:token
+waitwatch
+ackwatch token
+close
+
+# watch priority test: all simultaneous
+1 watch /dir token1
+3 watch /dir token3
+2 watch /dir token2
+write /dir/test create contents
+expect 3:/dir/test:token3
+3 waitwatch
+3 ackwatch token3
+expect 2:/dir/test:token2
+2 waitwatch
+2 ackwatch token2
+expect 1:/dir/test:token1
+1 waitwatch
+1 ackwatch token1
+1 close
+2 close
+3 close
+
+# If one dies (without acking), the other should still get ack.
+1 watch /dir token1
+2 watch /dir token2
+write /dir/test create contents
+expect 2:/dir/test:token2
+2 waitwatch
+2 close
+expect 1:/dir/test:token1
+1 waitwatch
+1 ackwatch token1
+1 close
+
+# If one dies (without reading at all), the other should still get ack.
+1 watch /dir token1
+2 watch /dir token2
+write /dir/test create contents
+2 close
+expect 1:/dir/test:token1
+1 waitwatch
+1 ackwatch token1
+1 close
+2 close
+
+# unwatch
+1 watch /dir token1
+1 unwatch /dir token1
+1 watch /dir token2
+2 write /dir/test2 create contents
+expect 1:/dir/test2:token2
+1 waitwatch
+1 unwatch /dir token2
+1 close
+2 close
+
+# unwatch while watch pending.  Other watcher still gets the event.
+1 watch /dir token1
+2 watch /dir token2
+write /dir/test create contents
+2 unwatch /dir token2
+expect 1:/dir/test:token1
+1 waitwatch
+1 ackwatch token1
+1 close
+2 close
+
+# unwatch while watch pending.  Should clear this so we get next event.
+1 watch /dir token1
+write /dir/test create contents
+1 unwatch /dir token1
+1 watch /dir/test token2
+write /dir/test none contents2
+expect 1:/dir/test:token2
+1 waitwatch
+1 ackwatch token2
+
+# check we only get notified once.
+1 watch /test token
+2 write /test create contents2
+expect 1:/test:token
+1 waitwatch
+1 ackwatch token
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+1 close
+
+# watches are queued in order.
+1 watch / token
+2 write /test1 create contents
+2 write /test2 create contents
+2 write /test3 create contents
+expect 1:/test1:token
+1 waitwatch
+1 ackwatch token
+expect 1:/test2:token
+1 waitwatch
+1 ackwatch token
+expect 1:/test3:token
+1 waitwatch
+1 ackwatch token
+1 close
+
+# Creation of subpaths should be covered correctly.
+1 watch / token
+2 write /test/subnode create contents2
+2 write /test/subnode/subnode create contents2
+expect 1:/test/subnode:token
+1 waitwatch
+1 ackwatch token
+expect 1:/test/subnode/subnode:token
+1 waitwatch
+1 ackwatch token
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+1 close
+
+# Watch event must have happened before we registered interest.
+1 watch / token
+2 write /test/subnode create contents2
+1 watch / token2 0
+expect 1:/test/subnode:token
+1 waitwatch
+1 ackwatch token
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+1 close
+
+# Rm fires notification on child.
+1 watch /test/subnode token
+2 rm /test
+expect 1:/test/subnode:token
+1 waitwatch
+1 ackwatch token
+
+# Watch should not double-send after we ack, even if we did something in 
between.
+1 watch /test2 token
+2 write /test2/foo create contents2
+expect 1:/test2/foo:token
+1 waitwatch
+expect 1:contents2
+1 read /test2/foo
+1 ackwatch token
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/xenstore/testsuite/08transaction.slowtest
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/08transaction.slowtest   Thu Aug 25 22:53:20 2005
@@ -0,0 +1,21 @@
+# Test transaction timeouts.  Take a second each.
+
+mkdir /test
+write /test/entry1 create contents
+
+# Transactions can take as long as the want...
+start /test
+sleep 1100
+rm /test/entry1
+commit
+dir /test
+
+# ... as long as noone is waiting.
+1 start /test
+notimeout
+2 mkdir /test/dir
+1 mkdir /test/dir
+expect 1:dir
+1 dir /test
+expect 1: commit failed: Connection timed out
+1 commit
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/08transaction.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/08transaction.test       Thu Aug 25 22:53:20 2005
@@ -0,0 +1,96 @@
+# Test transactions.
+
+mkdir /test
+
+# Simple transaction: create a file inside transaction.
+1 start /test
+1 write /test/entry1 create contents
+2 dir /test
+expect 1:entry1
+1 dir /test
+1 commit
+expect 2:contents
+2 read /test/entry1
+
+rm /test/entry1
+
+# Create a file and abort transaction.
+1 start /test
+1 write /test/entry1 create contents
+2 dir /test
+expect 1:entry1
+1 dir /test
+1 abort
+2 dir /test
+
+write /test/entry1 create contents
+# Delete in transaction, commit
+1 start /test
+1 rm /test/entry1
+expect 2:entry1
+2 dir /test
+1 dir /test
+1 commit
+2 dir /test
+
+# Delete in transaction, abort.
+write /test/entry1 create contents
+1 start /test
+1 rm /test/entry1
+expect 2:entry1
+2 dir /test
+1 dir /test
+1 abort
+expect 2:entry1
+2 dir /test
+
+# Events inside transactions don't trigger watches until (successful) commit.
+mkdir /test/dir
+1 watch /test token
+2 start /test
+2 mkdir /test/dir/sub
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+2 close
+1 close
+
+1 watch /test token
+2 start /test
+2 mkdir /test/dir/sub
+2 abort
+expect 1: waitwatch failed: Connection timed out
+1 waitwatch
+1 close
+
+1 watch /test token
+2 start /test
+2 mkdir /test/dir/sub
+2 commit
+expect 1:/test/dir/sub:token
+1 waitwatch
+1 ackwatch token
+1 close
+
+# Rm inside transaction works like rm outside: children get notified.
+1 watch /test/dir/sub token
+2 start /test
+2 rm /test/dir
+2 commit
+expect 1:/test/dir/sub:token
+1 waitwatch
+1 ackwatch token
+1 close
+
+# Multiple events from single transaction don't trigger assert
+1 watch /test token
+2 start /test
+2 write /test/1 create contents
+2 write /test/2 create contents
+2 commit
+expect 1:/test/1:token
+1 waitwatch
+1 ackwatch token
+expect 1:/test/2:token
+1 waitwatch
+1 ackwatch token
+1 close
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/09domain.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/09domain.test    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,19 @@
+# Test domain communication.
+
+# Create a domain, write an entry.
+expect handle is 1
+introduce 1 100 7 /my/home
+1 write /entry1 create contents
+expect entry1
+expect tool
+dir /
+close
+
+# Release that domain.
+release 1
+close
+
+# Introduce and release by same connection.
+expect handle is 2
+introduce 1 100 7 /my/home
+release 1
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/xenstore/testsuite/10domain-homedir.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/10domain-homedir.test    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,19 @@
+# Test domain "implicit" paths.
+
+# Create a domain, write an entry using implicit path, read using implicit
+mkdir /home
+expect handle is 1
+introduce 1 100 7 /home
+1 write entry1 create contents
+expect contents
+read /home/entry1
+expect entry1
+dir /home
+
+# Place a watch using a relative path: expect relative answer.
+1 mkdir foo
+1 watch foo token
+write /home/foo/bar create contents
+expect 1:foo/bar:token
+1 waitwatch
+1 ackwatch token
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/xenstore/testsuite/11domain-watch.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/11domain-watch.test      Thu Aug 25 22:53:20 2005
@@ -0,0 +1,52 @@
+# Test watching from a domain.
+
+# Watch something, write to it, check watch has fired.
+write /test create contents
+mkdir /dir
+
+expect handle is 1
+introduce 1 100 7 /my/home
+1 watch /test token
+write /test create contents2
+expect 1:/test:token
+1 waitwatch
+1 ackwatch token
+1 unwatch /test token
+release 1
+1 close
+
+# ignore watches while doing commands, should work.
+expect handle is 1
+introduce 1 100 7 /my/home
+1 watch /dir token
+write /dir/test create contents
+1 write /dir/test2 create contents2
+1 write /dir/test3 create contents3
+1 write /dir/test4 create contents4
+expect 1:/dir/test:token
+1 waitwatch
+1 ackwatch token
+release 1
+1 close
+
+# unwatch
+expect handle is 1
+introduce 1 100 7 /my/home
+1 watch /dir token1
+1 unwatch /dir token1
+1 watch /dir token2
+write /dir/test2 create contents
+expect 1:/dir/test2:token2
+1 waitwatch
+1 unwatch /dir token2
+release 1
+1 close
+
+# unwatch while watch pending.
+expect handle is 1
+introduce 1 100 7 /my/home
+1 watch /dir token1
+write /dir/test2 create contents
+1 unwatch /dir token1
+release 1
+1 close
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/12readonly.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/12readonly.test  Thu Aug 25 22:53:20 2005
@@ -0,0 +1,41 @@
+# Test that read only connection can't alter store.
+
+write /test create contents
+
+readonly
+expect test
+expect tool
+dir /
+
+expect contents
+read /test
+expect 0 READ
+getperm /test
+watch /test token
+unwatch /test token 
+start /
+commit
+start /
+abort
+
+# These don't work
+expect write failed: Read-only file system
+write /test2 create contents
+expect write failed: Read-only file system
+write /test create contents
+expect setperm failed: Read-only file system
+setperm /test 100 NONE
+expect setperm failed: Read-only file system
+setperm /test 100 NONE
+expect shutdown failed: Read-only file system
+shutdown
+expect introduce failed: Read-only file system
+introduce 1 100 7 /home
+
+# Check that watches work like normal.
+watch / token
+1 readwrite
+1 write /test create contents
+expect /test:token
+waitwatch
+ackwatch token
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/13watch-ack.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/13watch-ack.test Thu Aug 25 22:53:20 2005
@@ -0,0 +1,22 @@
+# This demonstrates a bug where an xs_acknowledge_watch returns
+# EINVAL, because the daemon doesn't track what watch event it sent
+# and relies on it being the "first" watch which has an event.
+# Watches firing after the first event is sent out will change this.
+
+# Create three things to watch.
+mkdir /test
+mkdir /test/1
+mkdir /test/2
+mkdir /test/3
+
+# Watch all three, fire event on 2, read watch, fire event on 1 and 3, ack 2.
+1 watch /test/1 token1
+1 watch /test/2 token2
+1 watch /test/3 token3
+2 write /test/2 create contents2
+expect 1:/test/2:token2
+1 waitwatch
+3 write /test/1 create contents1
+4 write /test/3 create contents3
+1 ackwatch token2
+1 close
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/xenstore/testsuite/14complexperms.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/14complexperms.test      Thu Aug 25 22:53:20 2005
@@ -0,0 +1,99 @@
+# We should not be able to tell the difference between a node which
+# doesn't exist, and a node we don't have permission on, if we don't
+# have permission on it directory.
+
+mkdir /dir
+setperm /dir 0 NONE
+
+# First when it doesn't exist
+setid 1
+expect *Permission denied
+dir /dir/file
+expect *Permission denied
+read /dir/file 
+expect *Permission denied
+write /dir/file none value 
+expect *Permission denied
+write /dir/file create value 
+expect *Permission denied
+write /dir/file excl value 
+expect write failed: Invalid argument
+write /dir/file crap value 
+expect *Permission denied
+mkdir /dir/file 
+expect *Permission denied
+rm /dir/file 
+expect *Permission denied
+rm /dir 
+expect *Permission denied
+getperm /dir/file 
+expect *Permission denied
+setperm /dir/file 0 NONE 
+watch /dir/file token 
+setid 0
+write /dir/file create contents
+rm /dir/file
+setid 1
+expect waitwatch failed: Connection timed out
+waitwatch
+unwatch /dir/file token 
+expect *No such file or directory
+unwatch /dir/file token 
+expect *Permission denied
+start /dir/file
+expect *No such file or directory
+abort
+expect *Permission denied
+start /dir/file
+expect *No such file or directory
+commit
+expect *Permission denied
+introduce 2 100 7 /dir/file
+
+# Now it exists
+setid 0
+write /dir/file create contents
+
+setid 1
+expect *Permission denied
+dir /dir/file
+expect *Permission denied
+read /dir/file 
+expect *Permission denied
+write /dir/file none value 
+expect *Permission denied
+write /dir/file create value 
+expect *Permission denied
+write /dir/file excl value 
+expect write failed: Invalid argument
+write /dir/file crap value 
+expect *Permission denied
+mkdir /dir/file 
+expect *Permission denied
+rm /dir/file 
+expect *Permission denied
+rm /dir 
+expect *Permission denied
+getperm /dir/file 
+expect *Permission denied
+setperm /dir/file 0 NONE 
+watch /dir/file token 
+setid 0
+write /dir/file create contents
+rm /dir/file
+setid 1
+expect waitwatch failed: Connection timed out
+waitwatch
+unwatch /dir/file token 
+expect *No such file or directory
+unwatch /dir/file token 
+expect *Permission denied
+start /dir/file
+expect *No such file or directory
+abort
+expect *Permission denied
+start /dir/file
+expect *No such file or directory
+commit
+expect *Permission denied
+introduce 2 100 7 /dir/file
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/testsuite/15nowait.test
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/testsuite/15nowait.test    Thu Aug 25 22:53:20 2005
@@ -0,0 +1,25 @@
+# If we don't wait for an ack, we can crash daemon as it never expects to be
+# sending out two replies on top of each other.
+noackwrite /1 create 1
+noackwrite /2 create 2
+noackwrite /3 create 3
+noackwrite /4 create 4
+noackwrite /5 create 5
+readack
+readack
+readack
+readack
+readack
+
+expect handle is 1
+introduce 1 100 7 /my/home
+1 noackwrite /1 create 1
+1 noackwrite /2 create 2
+1 noackwrite /3 create 3
+1 noackwrite /4 create 4
+1 noackwrite /5 create 5
+1 readack
+1 readack
+1 readack
+1 readack
+1 readack
diff -r 5f1ed597f107 -r 8799d14bef77 tools/xenstore/xs_crashme.c
--- /dev/null   Wed Aug 24 02:43:18 2005
+++ b/tools/xenstore/xs_crashme.c       Thu Aug 25 22:53:20 2005
@@ -0,0 +1,413 @@
+/* Code which randomly corrupts bits going to the daemon.
+    Copyright (C) 2005 Rusty Russell IBM Corporation
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <stdarg.h>
+#include <string.h>
+#include <sys/time.h>
+#include "xs.h"
+#include "talloc.h"
+#include <errno.h>
+#include "xenstored.h"
+
+#define XSTEST
+#define RAND_FREQ 128          /* One char in 32 is corrupted. */
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 1996 Bob Jenkins (bob_jenkins@xxxxxxxxxxxxxxxx)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
+ * hash(), hash2(), hash3, and mix() are externally useful functions.
+ * Routines to test the hash are included if SELF_TEST is defined.
+ * You can use this free for any purpose.  It has no warranty.
+ *
+ * Copyright (C) 2003 David S. Miller (davem@xxxxxxxxxx)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are surely my fault.  -DaveM
+ */
+
+/* NOTE: Arguments are modified. */
+#define __jhash_mix(a, b, c) \
+{ \
+  a -= b; a -= c; a ^= (c>>13); \
+  b -= c; b -= a; b ^= (a<<8); \
+  c -= a; c -= b; c ^= (b>>13); \
+  a -= b; a -= c; a ^= (c>>12);  \
+  b -= c; b -= a; b ^= (a<<16); \
+  c -= a; c -= b; c ^= (b>>5); \
+  a -= b; a -= c; a ^= (c>>3);  \
+  b -= c; b -= a; b ^= (a<<10); \
+  c -= a; c -= b; c ^= (b>>15); \
+}
+
+/* The golden ration: an arbitrary value */
+#define JHASH_GOLDEN_RATIO     0x9e3779b9
+
+/* The most generic version, hashes an arbitrary sequence
+ * of bytes.  No alignment or length assumptions are made about
+ * the input key.
+ */
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+       u32 a, b, c, len;
+       const u8 *k = key;
+
+       len = length;
+       a = b = JHASH_GOLDEN_RATIO;
+       c = initval;
+
+       while (len >= 12) {
+               a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
+               b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
+               c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
+
+               __jhash_mix(a,b,c);
+
+               k += 12;
+               len -= 12;
+       }
+
+       c += length;
+       switch (len) {
+       case 11: c += ((u32)k[10]<<24);
+       case 10: c += ((u32)k[9]<<16);
+       case 9 : c += ((u32)k[8]<<8);
+       case 8 : b += ((u32)k[7]<<24);
+       case 7 : b += ((u32)k[6]<<16);
+       case 6 : b += ((u32)k[5]<<8);
+       case 5 : b += k[4];
+       case 4 : a += ((u32)k[3]<<24);
+       case 3 : a += ((u32)k[2]<<16);
+       case 2 : a += ((u32)k[1]<<8);
+       case 1 : a += k[0];
+       };
+
+       __jhash_mix(a,b,c);
+
+       return c;
+}
+
+/* A special optimized version that handles 1 or more of u32s.
+ * The length parameter here is the number of u32s in the key.
+ */
+static inline u32 jhash2(u32 *k, u32 length, u32 initval)
+{
+       u32 a, b, c, len;
+
+       a = b = JHASH_GOLDEN_RATIO;
+       c = initval;
+       len = length;
+
+       while (len >= 3) {
+               a += k[0];
+               b += k[1];
+               c += k[2];
+               __jhash_mix(a, b, c);
+               k += 3; len -= 3;
+       }
+
+       c += length * 4;
+
+       switch (len) {
+       case 2 : b += k[1];
+       case 1 : a += k[0];
+       };
+
+       __jhash_mix(a,b,c);
+
+       return c;
+}
+
+
+/* A special ultra-optimized versions that knows they are hashing exactly
+ * 3, 2 or 1 word(s).
+ *
+ * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
+ *       done at the end is not done here.
+ */
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+       a += JHASH_GOLDEN_RATIO;
+       b += JHASH_GOLDEN_RATIO;
+       c += initval;
+
+       __jhash_mix(a, b, c);
+
+       return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+       return jhash_3words(a, b, 0, initval);
+}
+
+static inline u32 jhash_1word(u32 a, u32 initval)
+{
+       return jhash_3words(a, 0, 0, initval);
+}
+
+static unsigned int get_randomness(int *state)
+{
+       return jhash_1word((*state)++, *state * 1103515243);
+}
+
+static int state;
+
+/* Lengthening headers is pointless: other end will just wait for more
+ * data and timeout.  We merely shorten the length. */
+static void corrupt_header(char *output, const struct xsd_sockmsg *msg,
+                          unsigned int *next_bit)
+{
+       struct xsd_sockmsg newmsg = *msg;
+
+       while (*next_bit < sizeof(*msg)) {
+               if (newmsg.len)
+                       newmsg.len = get_randomness(&state) % newmsg.len;
+               *next_bit += get_randomness(&state) % RAND_FREQ;
+       }
+       memcpy(output, &newmsg, sizeof(newmsg));
+}
+
+#define read_all_choice read_all
+static bool write_all_choice(int fd, const void *data, unsigned int len)
+{
+       char corrupt_data[len];
+       bool ret;
+       static unsigned int next_bit;
+
+       if (len == sizeof(struct xsd_sockmsg)
+           && ((unsigned long)data % __alignof__(struct xsd_sockmsg)) == 0)
+               corrupt_header(corrupt_data, data, &next_bit);
+       else {
+               memcpy(corrupt_data, data, len);
+               while (next_bit < len * CHAR_BIT) {
+                       corrupt_data[next_bit/CHAR_BIT]
+                               ^= (1 << (next_bit%CHAR_BIT));
+                       next_bit += get_randomness(&state) % RAND_FREQ;
+               }
+       }
+
+       ret = xs_write_all(fd, corrupt_data, len);
+       next_bit -= len * CHAR_BIT;
+       return ret;
+}
+
+#include "xs.c"
+
+static char *random_path(void)
+{
+       unsigned int i;
+       char *ret = NULL;
+
+       if (get_randomness(&state) % 20 == 0)
+               return talloc_strdup(NULL, "/");
+
+       for (i = 0; i < 1 || (get_randomness(&state) % 2); i++) {
+               ret = talloc_asprintf_append(ret, "/%i", 
+                                            get_randomness(&state) % 15);
+       }
+       return ret;
+}
+
+static int random_flags(int *state)
+{
+       switch (get_randomness(state) % 4) {
+       case 0:
+               return 0;
+       case 1:
+               return O_CREAT;
+       case 2:
+               return O_CREAT|O_EXCL;
+       default:
+               return get_randomness(state);
+       }
+}
+
+/* Do the next operation, return the results. */
+static void do_next_op(struct xs_handle *h, bool verbose)
+{
+       char *name;
+       unsigned int num;
+
+       if (verbose)
+               printf("State %i: ", state);
+
+       name = random_path();
+       switch (get_randomness(&state) % 9) {
+       case 0:
+               if (verbose)
+                       printf("DIR %s\n", name);
+               free(xs_directory(h, name, &num));
+               break;
+       case 1:
+               if (verbose)
+                       printf("READ %s\n", name);
+               free(xs_read(h, name, &num));
+               break;
+       case 2: {
+               int flags = random_flags(&state);
+               char *contents = talloc_asprintf(NULL, "%i",
+                                                get_randomness(&state));
+               unsigned int len = get_randomness(&state)%(strlen(contents)+1);
+               if (verbose)
+                       printf("WRITE %s %s %.*s\n", name,
+                              flags == O_CREAT ? "O_CREAT" 
+                              : flags == (O_CREAT|O_EXCL) ? "O_CREAT|O_EXCL"
+                              : flags == 0 ? "0" : "CRAPFLAGS",
+                              len, contents);
+               xs_write(h, name, contents, len, flags);
+               break;
+       }
+       case 3:
+               if (verbose)
+                       printf("MKDIR %s\n", name);
+               xs_mkdir(h, name);
+               break;
+       case 4:
+               if (verbose)
+                       printf("RM %s\n", name);
+               xs_rm(h, name);
+               break;
+       case 5:
+               if (verbose)
+                       printf("GETPERMS %s\n", name);
+               free(xs_get_permissions(h, name, &num));
+               break;
+       case 6: {
+               unsigned int i, num = get_randomness(&state)%8;
+               struct xs_permissions perms[num];
+
+               if (verbose)
+                       printf("SETPERMS %s: ", name);
+               for (i = 0; i < num; i++) {
+                       perms[i].id = get_randomness(&state)%8;
+                       perms[i].perms = get_randomness(&state)%4;
+                       if (verbose)
+                               printf("%i%c ", perms[i].id,
+                                      perms[i].perms == XS_PERM_WRITE ? 'W'
+                                      : perms[i].perms == XS_PERM_READ ? 'R'
+                                      : perms[i].perms == 
+                                      (XS_PERM_READ|XS_PERM_WRITE) ? 'B'
+                                      : 'N');
+               }
+               if (verbose)
+                       printf("\n");
+               xs_set_permissions(h, name, perms, num);
+               break;
+       }
+       case 7: {
+               if (verbose)
+                       printf("START %s\n", name);
+               xs_transaction_start(h, name);
+               break;
+       }
+       case 8: {
+               bool abort = (get_randomness(&state) % 2);
+
+               if (verbose)
+                       printf("STOP %s\n", abort ? "ABORT" : "COMMIT");
+               xs_transaction_end(h, abort);
+               break;
+       }
+       default:
+               barf("Impossible randomness");
+       }
+}
+
+static struct xs_handle *h;
+static void alarmed(int sig __attribute__((unused)))
+{
+       /* We force close on timeout. */
+       close(h->fd);
+}
+
+static int start_daemon(void)
+{
+       int fds[2];
+       int daemon_pid;
+
+       /* Start daemon. */
+       pipe(fds);
+       if ((daemon_pid = fork())) {
+               /* Child writes PID when its ready: we wait for that. */
+               char buffer[20];
+               close(fds[1]);
+               if (read(fds[0], buffer, sizeof(buffer)) < 0)
+                       barf("Failed to summon daemon");
+               close(fds[0]);
+               return daemon_pid;
+       } else {
+               dup2(fds[1], STDOUT_FILENO);
+               close(fds[0]);
+#if 1
+               execlp("valgrind", "valgrind", 
"--log-file=/tmp/xs_crashme.vglog", "-q", "./xenstored_test", "--output-pid",
+                      "--no-fork", "--trace-file=/tmp/trace", NULL);
+#else
+               execlp("./xenstored_test", "xenstored_test", "--output-pid",
+                      "--no-fork", NULL);
+#endif
+               exit(1);
+       }
+}
+
+
+int main(int argc, char **argv)
+{
+       unsigned int i;
+       int pid;
+
+       if (argc != 3 && argc != 4)
+               barf("Usage: xs_crashme <iterations> <seed> [pid]");
+
+       if (argc == 3)
+               pid = start_daemon();
+       else
+               pid = atoi(argv[3]);
+
+       state = atoi(argv[2]);
+       h = xs_daemon_open();
+       if (!h)
+               barf_perror("Opening connection to daemon");
+       signal(SIGALRM, alarmed);
+       for (i = 0; i < (unsigned)atoi(argv[1]); i++) {
+               alarm(1);
+               do_next_op(h, false);
+               if (i % (atoi(argv[1]) / 72 ?: 1) == 0) {
+                       printf(".");
+                       fflush(stdout);
+               }
+               if (kill(pid, 0) != 0)
+                       barf_perror("Pinging daemon on iteration %i", i);
+               if (h->fd < 0) {
+                       xs_daemon_close(h);
+                       h = xs_daemon_open();
+                       if (!h)
+                               barf_perror("Connecting on iteration %i", i);
+               }
+       }
+       kill(pid, SIGTERM);
+       return 0;
+}
+
diff -r 5f1ed597f107 -r 8799d14bef77 docs/misc/shype4xen_readme.txt
--- a/docs/misc/shype4xen_readme.txt    Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,588 +0,0 @@
-Copyright: IBM Corporation (C)
-20 June 2005
-Author: Reiner Sailer
-
-This document is a very short introduction into the sHype access control 
-security architecture implementation and how it is perceived by users. It 
-is a very preliminary draft  for the courageous ones to get "their feet wet" 
-and to be able to give feedback (via the xen-devel/xense-devel mailing lists).
-
-Install:
-
-cd into xeno-unstable.bk 
-(use --dry-run option if you want to test the patch only)
-patch -p1 -g0 < *tools.diff
-patch -p1 -g0 < *xen.diff
-
-(no rejects, probably some line offsets)
-
-make uninstall; make mrproper; make; ./install.sh should install the default 
-sHype into Xen (rebuild your initrd images if necessary). Reboot.
-
-Debug output: there are two triggers for debug output:
-a) General sHype debug:
-    xeno-unstable.bk/xen/include/public/acm.h
-    undefine ACM_DEBUG to switch this debug off
-
-b) sHype enforcement hook trace: This prints a small trace for each 
enforcement 
-hook that is executed. The trigger is in
-    xeno-unstable.bk/xen/include/acm/acm_hooks.h
-    undefine ACM_TRACE_MODE to switch this debug off
-
-1. The default NULL policy
-***************************
-When you apply the patches and startup xen, you should at first not notice any 
-difference because the default policy is the "NULL" policy, which as the name 
-implies does not enforce anything.
-
-To display the currently enforced policy, use the policy tool under xeno-
-unstable.bk/tools/policy: policy_tool getpolicy. You should see output like 
the 
-one below.
-
-[root@laptop policy]#./policy_tool getpolicy
-
-Policy dump:
-============
-Magic     = 1debc.
-PolVer    = aaaa0000.
-Len       = 14.
-Primary   = NULL policy (c=0, off=14).
-Secondary = NULL policy (c=0, off=14).
-No primary policy (NULL).
-No secondary policy (NULL).
-
-Policy dump End.
-
-Since this is a dump of a binary policy, it's not pretty. The important parts 
-are the "Primary" and "Secondary" policy fields set to "NULL policy". sHype 
-currently allows to set two independent policies; thus the two SSID-REF parts 
-shown in 'xm list'. Right here: primary policy only means this policy is 
-checked first, the secondary policy is checked if the primary results in 
-"permitted access". The result of the combined policy is "permitted" if both 
-policies return permitted (NULL policy always returns permitted). The result 
is 
-"denied" if at least one of the policies returns "denied". Look into xeno-
-unstable.bk/xen/include/acm/acm_hooks.h for the general hook structure 
-integrating the policy decisions (if you like, you won't need it for the rest 
-of the Readme file).
-
-2. Setting Chinese Wall and Simple Type Enforcement policies:
-*************************************************************
-
-We'll get fast to the point. However, in order to understand what we are 
doing, 
-we must at least understand the purpose of the policies that we are going to 
-enforce. The two policies presented here are just examples and the 
-implementation encourages adding new policies easily.
-
-2.1. Chinese Wall policy: "decides whether a domain can be started based on 
-this domain's ssidref and the ssidrefs of the currently running domains". 
-Generally, the Chinese wall policy allows specifying certain types (or classes 
-or categories, whatever the preferred word) that conflict; we usually assign a 
-type to a workload and the set of types of those workloads running in a domain 
-make up the type set for this domain.  Each domain is assigned a set of types 
-through its SSID-REF (we register Chinese Wall as primary policy, so the 
-ssidref used for determining the Chinese Wall types is the one annotated with 
-"p:" in xm list) since each SSID-REF points at a set of types. We'll see how 
-SSIDREFs are represented in Xen later when we will look at the policy. (A good 
-read for Chinese Wall is: Brewer/Nash The Chinese Wall Security Policy 1989.)
-
-So let's assume the Chinese Wall policy we are running distinguishes 10 types: 
-t0 ... t9. Let us assume further that each SSID-REF points to a set that 
-includes exactly one type (attached to domains that run workloads of a single 
-type). SSID-REF 0 points to {t0}, ssidref 1 points to {t1} ... 9 points to 
-{t9}. [This is actually the example policy we are going to push into xen later]
-
-Now the Chinese Wall policy allows you to define "Conflict type sets" and it 
-guarantees that of any conflict set at most one type is "running" at any time. 
-As an example, we have defined 2 conflict set: {t2, t3} and {t0, t5, t6}. 
-Specifying these conflict sets, sHype ensures that at most one type of each 
set 
-is running (either t2 or t3 but not both; either t0 or t5 or t6 but not 
-multiple of them).
-
-The effect is that administrators can define which workload types cannot run 
-simultaneously on a single Xen system. This is useful to limit the covert 
-timing channels between such payloads or to ensure that payloads don't 
-interfere with each other through existing resource dependencies.
-
-2.2. Simple Type Enforcement (ste) policy: "decides whether two domains can 
-share data, e.g., setup event channels or grant tables to each other, based on 
-the two domains' ssidref. This, as the name says, is a simple policy. Think of 
-each type as of a single color. Each domain has one or more colors, i.e., the 
-domains ssid for the ste policy points to a set that has set one or multiple 
-types. Let us assume in our example policy we differentiate 5 colors (types) 
-and define 5 different ssids referenced by ssidref=0..4. Each ssid shall have 
-exactly one type set, i.e., describes a uni-color. Only ssid(0) has all types 
-set, i.e., has all defined colors.
-
-Sharing is enforced by the ste policy by requiring that two domains that want 
-to establish an event channel or grant pages to each other must have a common 
-color. Currently all domains communicate through DOM0 by default; i.e., 
Domain0 
-will necessarily have all colors to be able to create domains (thus, we will 
-assign ssidref(0) to Domain0 in our example below.
-
-More complex mandatory access control policies governing sharing will follow; 
-such policies are more sophisticated than the "color" scheme above by allowing 
-more flexible (and complex :_) access control decisions than "share a color" 
or 
-"don't share a color" and will be able to express finer-grained policies.
-
-
-2.3 Binary Policy:
-In the future, we will have a policy tool that takes as input a more humane 
-policy description, using types such as development, home-banking, donated-
-Grid, CorpA-Payload ... and translates the respective policy into what we see 
-today as the binary policy using 1s and 0s and sets of them. For now, we must 
-live with the binary policy when working with sHype.
-
-    
-2.4 Exemplary use of a real sHype policy on Xen. To activate a real policy, 
-edit the file (yes, this will soon be a compile option):
-  xeno-unstable.bk/xen/include/public/acm.h
-  Change: #define ACM_USE_SECURITY_POLICY ACM_NULL_POLICY
-   To : #define ACM_USE_SECURITY_POLICY 
ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY
-   cd xeno-unstable.bk
-   make mrproper
-   make uninstall (manually remove /etc/xen.old if necessary)
-   make
-   ./install.sh      (recreate your kernel initrd's if necessary)
-   Reboot into new xen.gz
-     
-After booting, check out 'xm dmesg'; should show somewhere in the middle:
-
-(XEN) acm_init: Enforcing Primary CHINESE WALL policy, Secondary SIMPLE TYPE 
-ENFORCEMENT policy.
-
-Even though you can activate those policies in any combination and also 
-independently, the policy tool currently only supports setting the policy for 
-the above combination.
-
-Now look at the minimal startup policy with:
-                xeno-unstable.bk/tools/policytool getpolicy
-
-You should see something like:
-
-[root@laptop policy]# ./policy_tool getpolicy
-
-Policy dump:
-============
-Magic     = 1debc.
-PolVer    = aaaa0000.
-Len       = 36.
-Primary   = CHINESE WALL policy (c=1, off=14).
-Secondary = SIMPLE TYPE ENFORCEMENT policy (c=2, off=2c).
-
-
-Chinese Wall policy:
-====================
-Max Types     = 1.
-Max Ssidrefs  = 1.
-Max ConfSets  = 1.
-Ssidrefs Off  = 10.
-Conflicts Off = 12.
-Runing T. Off = 14.
-C. Agg. Off   = 16.
-
-SSID To CHWALL-Type matrix:
-
-   ssidref 0:  00 
-
-Confict Sets:
-
-   c-set 0:    00 
-
-Running
-Types:         00 
-
-Conflict
-Aggregate Set: 00 
-
-
-Simple Type Enforcement policy:
-===============================
-Max Types     = 1.
-Max Ssidrefs  = 1.
-Ssidrefs Off  = 8.
-
-SSID To STE-Type matrix:
-
-   ssidref 0: 01 
-
-
-Policy dump End.
-
-This is a minimal policy (of little use), except it will disable starting any 
-domain that does not have ssidref set to 0x0. The Chinese Wall policy has 
-nothing to enforce and the ste policy only knows one type, which is set for 
the 
-only defined ssidref.
-
-The item that defines the ssidref in a domain configuration is:
-
-ssidref = 0x12345678
-
-Where ssidref is interpreted as a 32bit number, where the lower 16bits become 
-the ssidref for the primary policy and the higher 16bits become the ssidref 
for 
-the secondary policy. sHype currently supports two policies but this is an 
-implementation decision and can be extended if necessary.
-
-This reference defines the security information of a domain. The meaning of 
the 
-SSID-REF depends on the policy, so we explain it when we explain the real 
-policies.
-
-
-Setting a new Security Policy:
-******************************
-The policy tool with all its current limitations has one usable example policy 
-compiled-in. Please try at this time to use the setpolicy command:
-       xeno-unstable.bk/tools/policy/policy_tool setpolicy
-
-You should see a dump of the policy you are setting. It should say at the very 
-end: 
-
-Policy successfully set.
-
-Now try to dump the currently enforced policy, which is the policy we have 
just 
-set and the dynamic security state information of this policy 
-(<<< ... some additional explanations)
-
-[root@laptop policy]# ./policy_tool getpolicy
-
-Policy dump:
-============
-Magic     = 1debc.
-PolVer    = aaaa0000.
-Len       = 112.
-Primary   = CHINESE WALL policy (c=1, off=14).
-Secondary = SIMPLE TYPE ENFORCEMENT policy (c=2, off=d8).
-
-
-Chinese Wall policy:
-====================
-Max Types     = a.
-Max Ssidrefs  = 5.
-Max ConfSets  = 2.
-Ssidrefs Off  = 10.
-Conflicts Off = 74.
-Runing T. Off = 9c.
-C. Agg. Off   = b0.
-
-SSID To CHWALL-Type matrix:
-
-   ssidref 0:  01 00 00 00 00 00 00 00 00 00  <<< type0 is set for ssidref0
-   ssidref 1:  00 01 00 00 00 00 00 00 00 00 
-   ssidref 2:  00 00 01 00 00 00 00 00 00 00 
-   ssidref 3:  00 00 00 01 00 00 00 00 00 00 
-   ssidref 4:  00 00 00 00 01 00 00 00 00 00  <<< type4 is set for ssidref4
-                                              <<< types 5-9 are unused
-Confict Sets:
-
-   c-set 0:    00 00 01 01 00 00 00 00 00 00  <<< type2 and type3 never run 
together
-   c-set 1:    01 00 00 00 00 01 01 00 00 00  <<< only one of types 0, 5 or 6 
-                                              <<<   can run simultaneously
-Running
-Types:         01 00 00 00 00 00 00 00 00 00  <<< ref-count for types of 
running domains
-
-Conflict
-Aggregate Set: 00 00 00 00 00 01 01 00 00 00  <<< aggregated set of types that 
                 
-                                              <<< cannot run because they 
-                                              <<< are in conflict set 1 and
-                                              <<< (domain 0 is running w t0)
-                                             
-
-Simple Type Enforcement policy:
-===============================
-Max Types     = 5.
-Max Ssidrefs  = 5.
-Ssidrefs Off  = 8.
-
-SSID To STE-Type matrix:
-
-   ssidref 0: 01 01 01 01 01                  <<< ssidref0 points to a set 
that                  
-                                              <<< has all types set (colors)
-   ssidref 1: 00 01 00 00 00                  <<< ssidref1 has color1 set
-   ssidref 2: 00 00 01 00 00                  <<< ...
-   ssidref 3: 00 00 00 01 00 
-   ssidref 4: 00 00 00 00 01 
-
-
-Policy dump End.
-
-
-This is a small example policy with which we will demonstrate the enforcement.
-
-Starting Domains with policy enforcement
-========================================
-Now let us play with this policy. 
-
-Define 3 or 4 domain configurations. I use the following config using a 
ramdisk 
-only and about 8MBytes of memory for each DomU (test purposes):
-
-#-------configuration xmsec1-------------------------
-kernel = "/boot/vmlinuz-2.6.11-xenU"
-ramdisk="/boot/U1_ramdisk.img"
-#security reference identifier
-ssidref= 0x00010001
-memory = 10
-name = "xmsec1"
-cpu = -1   # leave to Xen to pick
-# Number of network interfaces. Default is 1.
-nics=1
-dhcp="dhcp"
-#-----------------------------------------------------
-
-xmsec2 and xmsec3 look the same except for the name and the ssidref line. Use 
-your domain config file and add "ssidref = 0x00010001" to the first (xmsec1),  
-"ssidref= 0x00020002" to the second (call it xmsec2), and "ssidref=0x00030003" 
 
-to the third (we will call this one xmsec3).
-
-First start xmsec1: xm create -c xmsec1 (succeeds)
-
-Then
-[root@laptop policy]# xm list 
-Name              Id  Mem(MB)  CPU  State  Time(s)  Console  
-Domain-0           0      620   0  r----     42.3            s:00/p:00
-xmnosec            1        9   0  -b---      0.3    9601    s:00/p:05
-xmsec1             2        9   0  -b---      0.2    9602    s:01/p:01
-
-Shows a new domain xmsec1 running with primary (here: chinese wall) ssidref 1 
-and secondary (here: simple type enforcement) ssidref 1. The ssidrefs are  
-independent and can differ for a domain.
-
-[root@laptop policy]# ./policy_tool getpolicy
-
-Policy dump:
-============
-Magic     = 1debc.
-PolVer    = aaaa0000.
-Len       = 112.
-Primary   = CHINESE WALL policy (c=1, off=14).
-Secondary = SIMPLE TYPE ENFORCEMENT policy (c=2, off=d8).
-
-
-Chinese Wall policy:
-====================
-Max Types     = a.
-Max Ssidrefs  = 5.
-Max ConfSets  = 2.
-Ssidrefs Off  = 10.
-Conflicts Off = 74.
-Runing T. Off = 9c.
-C. Agg. Off   = b0.
-
-SSID To CHWALL-Type matrix:
-
-   ssidref 0:  01 00 00 00 00 00 00 00 00 00
-   ssidref 1:  00 01 00 00 00 00 00 00 00 00
-   ssidref 2:  00 00 01 00 00 00 00 00 00 00
-   ssidref 3:  00 00 00 01 00 00 00 00 00 00
-   ssidref 4:  00 00 00 00 01 00 00 00 00 00
-
-Confict Sets:
-
-   c-set 0:    00 00 01 01 00 00 00 00 00 00
-   c-set 1:    01 00 00 00 00 01 01 00 00 00   <<< t1 is not part of any c-set
-
-Running
-Types:         01 01 00 00 00 00 00 00 00 00   <<< xmsec1 has ssidref 1->type1
-                  ^^                           <<< ref-count at position 1 incr
-Conflict
-Aggregate Set: 00 00 00 00 00 01 01 00 00 00   <<< domain 1 was allowed to     
  
-                                               <<< start since type 1 was not
-                                               <<< in conflict with running 
-                                               <<< types
-                                            
-Simple Type Enforcement policy:
-===============================
-Max Types     = 5.
-Max Ssidrefs  = 5.
-Ssidrefs Off  = 8.
-
-SSID To STE-Type matrix:
-
-   ssidref 0: 01 01 01 01 01           <<< the ste policy does not maintain; we
-   ssidref 1: 00 01 00 00 00   <--     <<< see that domain xmsec1 has ste 
-   ssidref 2: 00 00 01 00 00           <<< ssidref1->type1 and has this type in
-   ssidref 3: 00 00 00 01 00           <<< common with dom0
-   ssidref 4: 00 00 00 00 01
-
-
-Policy dump End.
-
-Look at sHype output in xen dmesg:
-
-[root@laptop xen]# xm dmesg
-.
-.
-[somewhere near the very end]
-(XEN) chwall_init_domain_ssid: determined chwall_ssidref to 1.
-(XEN) ste_init_domain_ssid.
-(XEN) ste_init_domain_ssid: determined ste_ssidref to 1.
-(XEN) acm_init_domain_ssid: Instantiated individual ssid for domain 0x01.
-(XEN) chwall_post_domain_create.
-(XEN) ste_pre_eventchannel_interdomain.
-(XEN) ste_pre_eventchannel_interdomain: (evtchn 0 --> 1) common type #01.
-(XEN) shype_authorize_domops.
-(XEN) ste_pre_eventchannel_interdomain.
-(XEN) ste_pre_eventchannel_interdomain: (evtchn 0 --> 1) common type #01.
-(XEN) ste_pre_eventchannel_interdomain.
-(XEN) ste_pre_eventchannel_interdomain: (evtchn 0 --> 1) common type #01.
-
-
-You can see that the chinese wall policy does not complain and that the ste 
-policy makes three access control decisions for three event-channels setup 
-between domain 0 and the new domain 1. Each time, the two domains share the 
-type1 and setting up the eventchannel is permitted.
-
-
-Starting up a second domain xmsec2:
-
-[root@laptop xen]# xm create -c xmsec2
-Using config file "xmsec2".
-Started domain xmsec2, console on port 9602
-************ REMOTE CONSOLE: CTRL-] TO QUIT ********
-Linux version 2.6.11-xenU (root@xxxxxxxxxxxxxxx) (gcc version 3.4.2 20041017 
-(Red Hat 3.4.2-6.fc3)) #1 Wed Mar 30 13:14:31 EST 2005
-.
-.
-.
-[root@laptop policy]# xm list
-Name              Id  Mem(MB)  CPU  State  Time(s)  Console  
-Domain-0           0      620   0  r----     71.7            s:00/p:00
-xmsec1             1        9   0  -b---      0.3    9601    s:01/p:01
-xmsec2             2        7   0  -b---      0.3    9602    s:02/p:02   << 
our domain runs both policies with ssidref 2
-
-
-[root@laptop policy]# ./policy_tool getpolicy
-
-Policy dump:
-============
-Magic     = 1debc.
-PolVer    = aaaa0000.
-Len       = 112.
-Primary   = CHINESE WALL policy (c=1, off=14).
-Secondary = SIMPLE TYPE ENFORCEMENT policy (c=2, off=d8).
-
-
-Chinese Wall policy:
-====================
-Max Types     = a.
-Max Ssidrefs  = 5.
-Max ConfSets  = 2.
-Ssidrefs Off  = 10.
-Conflicts Off = 74.
-Runing T. Off = 9c.
-C. Agg. Off   = b0.
-
-SSID To CHWALL-Type matrix:
-
-   ssidref 0:  01 00 00 00 00 00 00 00 00 00
-   ssidref 1:  00 01 00 00 00 00 00 00 00 00
-   ssidref 2:  00 00 01 00 00 00 00 00 00 00   <<< our domain has type 2 set
-   ssidref 3:  00 00 00 01 00 00 00 00 00 00
-   ssidref 4:  00 00 00 00 01 00 00 00 00 00
-
-Confict Sets:
-
-   c-set 0:    00 00 01 01 00 00 00 00 00 00   <<< t2 is in c-set0 with type 3
-   c-set 1:    01 00 00 00 00 01 01 00 00 00
-
-Running
-Types:         01 01 01 00 00 00 00 00 00 00   <<< t2 is running since the 
-                     ^^                        <<< current aggregate conflict
-                                               <<< set (see above) does not 
-                                               <<< include type 2
-Conflict
-Aggregate Set: 00 00 00 01 00 01 01 00 00 00   <<< type 3 is added to the 
-                                               <<< conflict aggregate
-
-
-Simple Type Enforcement policy:
-===============================
-Max Types     = 5.
-Max Ssidrefs  = 5.
-Ssidrefs Off  = 8.
-
-SSID To STE-Type matrix:
-
-   ssidref 0: 01 01 01 01 01
-   ssidref 1: 00 01 00 00 00
-   ssidref 2: 00 00 01 00 00
-   ssidref 3: 00 00 00 01 00
-   ssidref 4: 00 00 00 00 01
-
-
-Policy dump End.
-
-
-The sHype xen dmesg output looks similar to the one above when starting the 
-first domain.
-
-Now we start xmsec3 and it has ssidref3. Thus, it tries to run as type3 which 
-conflicts with running type2 (from xmsec2). As expected, creating this domain 
-fails for security policy enforcement reasons.
-
-[root@laptop xen]# xm create -c xmsec3
-Using config file "xmsec3".
-Error: Error creating domain: (22, 'Invalid argument')
-[root@laptop xen]#
-
-[root@laptop xen]# xm dmesg
-.
-.
-[somewhere near the very end]
-(XEN) chwall_pre_domain_create.
-(XEN) chwall_pre_domain_create: CHINESE WALL CONFLICT in type 03.
-
-xmsec3 ssidref3 points to type3, which is in the current conflict aggregate 
-set. This domain cannot start until domain xmsec2 is destroyed, at which time 
-the aggregate conflict set is reduced and type3 is excluded from it. Then, 
-xmsec3 can start. Of course, afterwards, xmsec2 cannot be restarted. Try it.
-
-3. Policy tool
-**************
-toos/policy/policy_tool.c
-
-a) ./policy_tool getpolicy
-      prints the currently enforced policy
-      (see for example section 1.)
-
-b) ./policy_tool setpolicy
-      sets a predefined and hardcoded security
-      policy (the one described in section 2.)
-
-c) ./policy_tool dumpstats
-      prints some status information about the caching
-      of access control decisions (number of cache hits
-      and number of policy evaluations for grant_table
-      and event channels).
-
-d) ./policy_tool loadpolicy <binary_policy_file>
-      sets the policy defined in the <binary_policy_file>
-      please use the policy_processor that is posted to this
-      mailing list to create such a binary policy from an XML
-      policy description
-
-4. Policy interface:
-********************
-The Policy interface is working in "network-byte-order" (big endian). The 
reason for this
-is that policy files/management should be portable and independent of the 
platforms.
-
-Our policy interface enables managers to create a single binary policy file in 
a trusted
-environment and distributed it to multiple systems for enforcement.
-
-5. Booting with a binary policy:
-********************************
-The grub configuration file can be adapted to boot the hypervisor with an
-already active policy. To do this, a binary policy file - this can be
-the same file as used by the policy_tool - should be placed into the boot
-partition. The following entry from the grub configuration file shows how
-a binary policy can be added to the system during boot time. Note that the 
-binary policy must be of the same type that the hypervisor was compiled 
-for. The policy module line should also only be added as the last module
-line if XEN was compiled with the access control module (ACM).
-
-title XEN0 3.0 Devel
-       kernel /xen.gz dom0_mem=400000
-       module /vmlinuz-2.6.12-xen0 root=/dev/hda2 ro console=tty0
-       module /initrd-2.6.12-xen0.img
-       module /xen_sample_policy.bin
-
-
-====================end-of file=======================================
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/timers/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/timers/Makefile Wed Aug 24 
02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,17 +0,0 @@
-#
-# Makefile for x86 timers
-#
-
-XENARCH        := $(subst ",,$(CONFIG_XENARCH))
-
-obj-y :=       timer_tsc.o
-c-obj-y :=
-
-c-link :=
-
-$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
-       @ln -fsn $(srctree)/arch/i386/kernel/timers/$(notdir $@) $@
-
-obj-y  += $(c-obj-y)
-
-clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c      Wed Aug 
24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,379 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/cpufreq.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-/* processor.h for distable_tsc flag */
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-
-#include <asm/hpet.h>
-
-#ifdef CONFIG_HPET_TIMER
-static unsigned long hpet_usec_quotient;
-static unsigned long hpet_last;
-static struct timer_opts timer_tsc;
-#endif
-
-static inline void cpufreq_delayed_get(void);
-
-int tsc_disable __initdata = 0;
-
-extern spinlock_t i8253_lock;
-
-static int use_tsc;
-
-static unsigned long long monotonic_base;
-static u32 monotonic_offset;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *             ns = cycles / (freq / ns_per_sec)
- *             ns = cycles * (ns_per_sec / freq)
- *             ns = cycles * (10^9 / (cpu_mhz * 10^6))
- *             ns = cycles * (10^3 / cpu_mhz)
- *
- *     Then we use scaling math (suggested by george@xxxxxxxxxx) to get:
- *             ns = cycles * (10^3 * SC / cpu_mhz) / SC
- *             ns = cycles * cyc2ns_scale / SC
- *
- *     And since SC is a constant power of two, we can convert the div
- *  into a shift.   
- *                     -johnstul@xxxxxxxxxx "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale; 
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
-{
-       cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
-       return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-static unsigned long fast_gettimeoffset_quotient;
-
-extern u32 shadow_tsc_stamp;
-extern u64 shadow_system_time;
-
-static unsigned long get_offset_tsc(void)
-{
-       register unsigned long eax, edx;
-
-       /* Read the Time Stamp Counter */
-
-       rdtsc(eax,edx);
-
-       /* .. relative to previous jiffy (32 bits is enough) */
-       eax -= shadow_tsc_stamp;
-
-       /*
-         * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
-         *             = (tsc_low delta) * (usecs_per_clock)
-         *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
-        *
-        * Using a mull instead of a divl saves up to 31 clock cycles
-        * in the critical path.
-         */
-
-       __asm__("mull %2"
-               :"=a" (eax), "=d" (edx)
-               :"rm" (fast_gettimeoffset_quotient),
-                "0" (eax));
-
-       /* our adjusted time offset in microseconds */
-       return edx;
-}
-
-static unsigned long long monotonic_clock_tsc(void)
-{
-       unsigned long long last_offset, this_offset, base;
-       unsigned seq;
-       
-       /* atomically read monotonic base & last_offset */
-       do {
-               seq = read_seqbegin(&monotonic_lock);
-               last_offset = monotonic_offset;
-               base = monotonic_base;
-       } while (read_seqretry(&monotonic_lock, seq));
-
-       /* Read the Time Stamp Counter */
-       rdtscll(this_offset);
-
-       /* return the value in ns */
-       return base + cycles_2_ns(this_offset - last_offset);
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
-       unsigned long long this_offset;
-
-       /*
-        * In the NUMA case we dont use the TSC as they are not
-        * synchronized across all CPUs.
-        */
-#ifndef CONFIG_NUMA
-       if (!use_tsc)
-#endif
-               /* no locking but a rare wrong value is not a big deal */
-               return jiffies_64 * (1000000000 / HZ);
-
-       /* Read the Time Stamp Counter */
-       rdtscll(this_offset);
-
-       /* return the value in ns */
-       return cycles_2_ns(this_offset);
-}
-
-
-static void mark_offset_tsc(void)
-{
-
-       /* update the monotonic base value */
-       write_seqlock(&monotonic_lock);
-       monotonic_base = shadow_system_time;
-       monotonic_offset = shadow_tsc_stamp;
-       write_sequnlock(&monotonic_lock);
-}
-
-static void delay_tsc(unsigned long loops)
-{
-       unsigned long bclock, now;
-       
-       rdtscl(bclock);
-       do
-       {
-               rep_nop();
-               rdtscl(now);
-       } while ((now-bclock) < loops);
-}
-
-#ifdef CONFIG_HPET_TIMER
-static void mark_offset_tsc_hpet(void)
-{
-       unsigned long long this_offset, last_offset;
-       unsigned long offset, temp, hpet_current;
-
-       write_seqlock(&monotonic_lock);
-       last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-       /*
-        * It is important that these two operations happen almost at
-        * the same time. We do the RDTSC stuff first, since it's
-        * faster. To avoid any inconsistencies, we need interrupts
-        * disabled locally.
-        */
-       /*
-        * Interrupts are just disabled locally since the timer irq
-        * has the SA_INTERRUPT flag set. -arca
-        */
-       /* read Pentium cycle counter */
-
-       hpet_current = hpet_readl(HPET_COUNTER);
-       rdtsc(last_tsc_low, last_tsc_high);
-
-       /* lost tick compensation */
-       offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
-       if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
-               int lost_ticks = (offset - hpet_last) / hpet_tick;
-               jiffies_64 += lost_ticks;
-       }
-       hpet_last = hpet_current;
-
-       /* update the monotonic base value */
-       this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-       monotonic_base += cycles_2_ns(this_offset - last_offset);
-       write_sequnlock(&monotonic_lock);
-
-       /* calculate delay_at_last_interrupt */
-       /*
-        * Time offset = (hpet delta) * ( usecs per HPET clock )
-        *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
-        *             = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
-        * Where,
-        * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
-        */
-       delay_at_last_interrupt = hpet_current - offset;
-       ASM_MUL64_REG(temp, delay_at_last_interrupt,
-                       hpet_usec_quotient, delay_at_last_interrupt);
-}
-#endif
-
-
-#ifdef CONFIG_CPU_FREQ
-#include <linux/workqueue.h>
-
-static unsigned int cpufreq_delayed_issched = 0;
-static unsigned int cpufreq_init = 0;
-static struct work_struct cpufreq_delayed_get_work;
-
-static void handle_cpufreq_delayed_get(void *v)
-{
-       unsigned int cpu;
-       for_each_online_cpu(cpu) {
-               cpufreq_get(cpu);
-       }
-       cpufreq_delayed_issched = 0;
-}
-
-/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
- * to verify the CPU frequency the timing core thinks the CPU is running
- * at is still correct.
- */
-static inline void cpufreq_delayed_get(void) 
-{
-       if (cpufreq_init && !cpufreq_delayed_issched) {
-               cpufreq_delayed_issched = 1;
-               printk(KERN_DEBUG "Losing some ticks... checking if CPU 
frequency changed.\n");
-               schedule_work(&cpufreq_delayed_get_work);
-       }
-}
-
-/* If the CPU frequency is scaled, TSC-based delays will need a different
- * loops_per_jiffy value to function properly.
- */
-
-static unsigned int  ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
-
-#ifndef CONFIG_SMP
-static unsigned long fast_gettimeoffset_ref = 0;
-static unsigned long cpu_khz_ref = 0;
-#endif
-
-static int
-time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
-                      void *data)
-{
-       struct cpufreq_freqs *freq = data;
-
-       if (val != CPUFREQ_RESUMECHANGE)
-               write_seqlock_irq(&xtime_lock);
-       if (!ref_freq) {
-               ref_freq = freq->old;
-               loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
-#ifndef CONFIG_SMP
-               fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
-               cpu_khz_ref = cpu_khz;
-#endif
-       }
-
-       if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
-           (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
-           (val == CPUFREQ_RESUMECHANGE)) {
-               if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-                       cpu_data[freq->cpu].loops_per_jiffy = 
cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-#ifndef CONFIG_SMP
-               if (cpu_khz)
-                       cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, 
freq->new);
-               if (use_tsc) {
-                       if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
-                               fast_gettimeoffset_quotient = 
cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
-                               set_cyc2ns_scale(cpu_khz/1000);
-                       }
-               }
-#endif
-       }
-
-       if (val != CPUFREQ_RESUMECHANGE)
-               write_sequnlock_irq(&xtime_lock);
-
-       return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
-       .notifier_call  = time_cpufreq_notifier
-};
-
-
-static int __init cpufreq_tsc(void)
-{
-       int ret;
-       INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
-       ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
-                                       CPUFREQ_TRANSITION_NOTIFIER);
-       if (!ret)
-               cpufreq_init = 1;
-       return ret;
-}
-core_initcall(cpufreq_tsc);
-
-#else /* CONFIG_CPU_FREQ */
-static inline void cpufreq_delayed_get(void) { return; }
-#endif 
-
-
-static int init_tsc(char* override)
-{
-       u64 __cpu_khz;
-
-       __cpu_khz = HYPERVISOR_shared_info->cpu_freq;
-       do_div(__cpu_khz, 1000);
-       cpu_khz = (u32)__cpu_khz;
-       printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n", 
-              cpu_khz / 1000, cpu_khz % 1000);
-
-       /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
-          (2^32 * 1 / (clocks/us)) */
-       {
-               unsigned long eax=0, edx=1000;
-               __asm__("divl %2"
-                   :"=a" (fast_gettimeoffset_quotient), "=d" (edx)
-                   :"r" (cpu_khz),
-                   "0" (eax), "1" (edx));
-       }
-
-       set_cyc2ns_scale(cpu_khz/1000);
-
-       use_tsc = 1;
-
-       return 0;
-}
-
-static int __init tsc_setup(char *str)
-{
-       printk(KERN_WARNING "notsc: cannot disable TSC in Xen/Linux.\n");
-       return 1;
-}
-__setup("notsc", tsc_setup);
-
-
-
-/************************************************************/
-
-/* tsc timer_opts struct */
-struct timer_opts timer_tsc = {
-       .name = "tsc",
-       .mark_offset = mark_offset_tsc, 
-       .get_offset = get_offset_tsc,
-       .monotonic_clock = monotonic_clock_tsc,
-       .delay = delay_tsc,
-};
-
-struct init_timer_opts timer_tsc_init = {
-       .init = init_tsc,
-       .opts = &timer_tsc,
-};
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c Wed Aug 24 
02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,70 +0,0 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed to extract
- * and format the required data.
- */
-
-#include <linux/sched.h> 
-#include <linux/stddef.h>
-#include <linux/errno.h> 
-#include <linux/hardirq.h>
-#include <linux/suspend.h>
-#include <asm/pda.h>
-#include <asm/processor.h>
-#include <asm/segment.h>
-#include <asm/thread_info.h>
-#include <asm/ia32.h>
-
-#define DEFINE(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-int main(void)
-{
-#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
-       ENTRY(state);
-       ENTRY(flags); 
-       ENTRY(thread); 
-       ENTRY(pid);
-       BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, 
entry))
-       ENTRY(flags);
-       ENTRY(addr_limit);
-       ENTRY(preempt_count);
-       BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
-       ENTRY(kernelstack); 
-       ENTRY(oldrsp); 
-       ENTRY(pcurrent); 
-       ENTRY(irqrsp);
-       ENTRY(irqcount);
-       ENTRY(cpunumber);
-       ENTRY(irqstackptr);
-       ENTRY(kernel_mode);
-       BLANK();
-#undef ENTRY
-#ifdef CONFIG_IA32_EMULATION
-#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct 
sigcontext_ia32, entry))
-       ENTRY(eax);
-       ENTRY(ebx);
-       ENTRY(ecx);
-       ENTRY(edx);
-       ENTRY(esi);
-       ENTRY(edi);
-       ENTRY(ebp);
-       ENTRY(esp);
-       ENTRY(eip);
-       BLANK();
-#undef ENTRY
-       DEFINE(IA32_RT_SIGFRAME_sigcontext,
-              offsetof (struct rt_sigframe32, uc.uc_mcontext));
-       BLANK();
-#endif
-       DEFINE(pbe_address, offsetof(struct pbe, address));
-       DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
-       DEFINE(pbe_next, offsetof(struct pbe, next));
-       return 0;
-}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/init_task.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/init_task.c   Wed Aug 24 
02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,49 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial task structure.
- *
- * We need to make sure that this is 8192-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union 
-       __attribute__((__section__(".data.init_task"))) =
-               { INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
-/*
- * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data.cacheline_aligned
- * section. Since TSS's are completely CPU-local, we want them
- * on exact cacheline boundaries, to eliminate cacheline ping-pong.
- */ 
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp;
-
-#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c     Wed Aug 24 
02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,336 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- */
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm-xen/balloon.h>
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA.  This is the scatter-gather version of the
- * above pci_map_single interface.  Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length.  They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
-              int nents, int direction)
-{
-       int i;
-
-       BUG_ON(direction == DMA_NONE);
-       for (i = 0; i < nents; i++ ) {
-               struct scatterlist *s = &sg[i];
-               BUG_ON(!s->page); 
-               s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
-               s->dma_length = s->length;
-       }
-       return nents;
-}
-
-EXPORT_SYMBOL(dma_map_sg);
-
-/* Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-                 int nents, int dir)
-{
-       int i;
-       for (i = 0; i < nents; i++) { 
-               struct scatterlist *s = &sg[i];
-               BUG_ON(s->page == NULL); 
-               BUG_ON(s->dma_address == 0); 
-               dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
-       } 
-}
-
-EXPORT_SYMBOL(dma_unmap_sg);
-
-struct dma_coherent_mem {
-       void            *virt_base;
-       u32             device_base;
-       int             size;
-       int             flags;
-       unsigned long   *bitmap;
-};
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
-                          dma_addr_t *dma_handle, unsigned gfp)
-{
-       void *ret;
-       unsigned int order = get_order(size);
-       unsigned long vstart;
-
-       struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-
-       /* ignore region specifiers */
-       gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
-
-       if (mem) {
-               int page = bitmap_find_free_region(mem->bitmap, mem->size,
-                                                    order);
-               if (page >= 0) {
-                       *dma_handle = mem->device_base + (page << PAGE_SHIFT);
-                       ret = mem->virt_base + (page << PAGE_SHIFT);
-                       memset(ret, 0, size);
-                       return ret;
-               }
-               if (mem->flags & DMA_MEMORY_EXCLUSIVE)
-                       return NULL;
-       }
-
-       if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
-               gfp |= GFP_DMA;
-
-       vstart = __get_free_pages(gfp, order);
-       ret = (void *)vstart;
-       if (ret == NULL)
-               return ret;
-
-       xen_contig_memory(vstart, order);
-
-       memset(ret, 0, size);
-       *dma_handle = virt_to_bus(ret);
-
-       return ret;
-}
-EXPORT_SYMBOL(dma_alloc_coherent);
-
-void dma_free_coherent(struct device *dev, size_t size,
-                        void *vaddr, dma_addr_t dma_handle)
-{
-       struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-       int order = get_order(size);
-       
-       if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + 
(mem->size << PAGE_SHIFT))) {
-               int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-
-               bitmap_release_region(mem->bitmap, page, order);
-       } else
-               free_pages((unsigned long)vaddr, order);
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
-#if 0
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
-                               dma_addr_t device_addr, size_t size, int flags)
-{
-       void __iomem *mem_base;
-       int pages = size >> PAGE_SHIFT;
-       int bitmap_size = (pages + 31)/32;
-
-       if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
-               goto out;
-       if (!size)
-               goto out;
-       if (dev->dma_mem)
-               goto out;
-
-       /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
-
-       mem_base = ioremap(bus_addr, size);
-       if (!mem_base)
-               goto out;
-
-       dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
-       if (!dev->dma_mem)
-               goto out;
-       memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
-       dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
-       if (!dev->dma_mem->bitmap)
-               goto free1_out;
-       memset(dev->dma_mem->bitmap, 0, bitmap_size);
-
-       dev->dma_mem->virt_base = mem_base;
-       dev->dma_mem->device_base = device_addr;
-       dev->dma_mem->size = pages;
-       dev->dma_mem->flags = flags;
-
-       if (flags & DMA_MEMORY_MAP)
-               return DMA_MEMORY_MAP;
-
-       return DMA_MEMORY_IO;
-
- free1_out:
-       kfree(dev->dma_mem->bitmap);
- out:
-       return 0;
-}
-EXPORT_SYMBOL(dma_declare_coherent_memory);
-
-void dma_release_declared_memory(struct device *dev)
-{
-       struct dma_coherent_mem *mem = dev->dma_mem;
-       
-       if(!mem)
-               return;
-       dev->dma_mem = NULL;
-       iounmap(mem->virt_base);
-       kfree(mem->bitmap);
-       kfree(mem);
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-
-void *dma_mark_declared_memory_occupied(struct device *dev,
-                                       dma_addr_t device_addr, size_t size)
-{
-       struct dma_coherent_mem *mem = dev->dma_mem;
-       int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> 
PAGE_SHIFT;
-       int pos, err;
-
-       if (!mem)
-               return ERR_PTR(-EINVAL);
-
-       pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
-       err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
-       if (err != 0)
-               return ERR_PTR(err);
-       return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-#endif
-
-static LIST_HEAD(dma_map_head);
-static DEFINE_SPINLOCK(dma_map_lock);
-struct dma_map_entry {
-       struct list_head list;
-       dma_addr_t dma;
-       char *bounce, *host;
-       size_t size;
-};
-#define DMA_MAP_MATCHES(e,d) (((e)->dma<=(d)) && (((e)->dma+(e)->size)>(d)))
-
-dma_addr_t
-dma_map_single(struct device *dev, void *ptr, size_t size,
-              enum dma_data_direction direction)
-{
-       struct dma_map_entry *ent;
-       void *bnc;
-       dma_addr_t dma;
-       unsigned long flags;
-
-       if (direction == DMA_NONE)
-               out_of_line_bug();
-
-       /*
-        * Even if size is sub-page, the buffer may still straddle a page
-        * boundary. Take into account buffer start offset. All other calls are
-        * conservative and always search the dma_map list if it's non-empty.
-        */
-       if (((((unsigned long)ptr) & ~PAGE_MASK) + size) <= PAGE_SIZE) {
-               dma = virt_to_bus(ptr);
-       } else {
-               BUG_ON((bnc = dma_alloc_coherent(dev, size, &dma, 0)) == NULL);
-               BUG_ON((ent = kmalloc(sizeof(*ent), GFP_KERNEL)) == NULL);
-               if (direction != DMA_FROM_DEVICE)
-                       memcpy(bnc, ptr, size);
-               ent->dma    = dma;
-               ent->bounce = bnc;
-               ent->host   = ptr;
-               ent->size   = size;
-               spin_lock_irqsave(&dma_map_lock, flags);
-               list_add(&ent->list, &dma_map_head);
-               spin_unlock_irqrestore(&dma_map_lock, flags);
-       }
-
-       if ((dma+size) & ~*dev->dma_mask)
-               out_of_line_bug();
-       return dma;
-}
-EXPORT_SYMBOL(dma_map_single);
-
-void
-dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-                enum dma_data_direction direction)
-{
-       struct dma_map_entry *ent;
-       unsigned long flags;
-
-       if (direction == DMA_NONE)
-               out_of_line_bug();
-
-       /* Fast-path check: are there any multi-page DMA mappings? */
-       if (!list_empty(&dma_map_head)) {
-               spin_lock_irqsave(&dma_map_lock, flags);
-               list_for_each_entry ( ent, &dma_map_head, list ) {
-                       if (DMA_MAP_MATCHES(ent, dma_addr)) {
-                               list_del(&ent->list);
-                               break;
-                       }
-               }
-               spin_unlock_irqrestore(&dma_map_lock, flags);
-               if (&ent->list != &dma_map_head) {
-                       BUG_ON(dma_addr != ent->dma);
-                       BUG_ON(size != ent->size);
-                       if (direction != DMA_TO_DEVICE)
-                               memcpy(ent->host, ent->bounce, size);
-                       dma_free_coherent(dev, size, ent->bounce, ent->dma);
-                       kfree(ent);
-               }
-       }
-}
-EXPORT_SYMBOL(dma_unmap_single);
-
-void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
-                       enum dma_data_direction direction)
-{
-       struct dma_map_entry *ent;
-       unsigned long flags, off;
-
-       /* Fast-path check: are there any multi-page DMA mappings? */
-       if (!list_empty(&dma_map_head)) {
-               spin_lock_irqsave(&dma_map_lock, flags);
-               list_for_each_entry ( ent, &dma_map_head, list )
-                       if (DMA_MAP_MATCHES(ent, dma_handle))
-                               break;
-               spin_unlock_irqrestore(&dma_map_lock, flags);
-               if (&ent->list != &dma_map_head) {
-                       off = dma_handle - ent->dma;
-                       BUG_ON((off + size) > ent->size);
-                       /*if (direction != DMA_TO_DEVICE)*/
-                               memcpy(ent->host+off, ent->bounce+off, size);
-               }
-       }
-}
-EXPORT_SYMBOL(dma_sync_single_for_cpu);
-
-void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t 
size,
-                           enum dma_data_direction direction)
-{
-       struct dma_map_entry *ent;
-       unsigned long flags, off;
-
-       /* Fast-path check: are there any multi-page DMA mappings? */
-       if (!list_empty(&dma_map_head)) {
-               spin_lock_irqsave(&dma_map_lock, flags);
-               list_for_each_entry ( ent, &dma_map_head, list )
-                       if (DMA_MAP_MATCHES(ent, dma_handle))
-                               break;
-               spin_unlock_irqrestore(&dma_map_lock, flags);
-               if (&ent->list != &dma_map_head) {
-                       off = dma_handle - ent->dma;
-                       BUG_ON((off + size) > ent->size);
-                       /*if (direction != DMA_FROM_DEVICE)*/
-                               memcpy(ent->bounce+off, ent->host+off, size);
-               }
-       }
-
-       flush_write_buffers();
-}
-EXPORT_SYMBOL(dma_sync_single_for_device);
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,466 +0,0 @@
-/*
- * arch/x86_64/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm/fixmap.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-
-/*
- * Reuse arch/xen/i396/mm/ioremap.c. Need to merge later
- */
-#ifndef CONFIG_XEN_PHYSDEV_ACCESS
-
-void * __ioremap(unsigned long phys_addr, unsigned long size,
-                unsigned long flags)
-{
-       return NULL;
-}
-
-void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
-       return NULL;
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-}
-
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
-       return NULL;
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
-}
-
-#else
-
-#if defined(__i386__)
-/*
- * Does @address reside within a non-highmem page that is local to this virtual
- * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
- * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand
- * why this works.
- */
-static inline int is_local_lowmem(unsigned long address)
-{
-       extern unsigned long max_low_pfn;
-       unsigned long mfn = address >> PAGE_SHIFT;
-       unsigned long pfn = mfn_to_pfn(mfn);
-       return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
-}
-#elif defined(__x86_64__)
-/*
- * 
- */
-static inline int is_local_lowmem(unsigned long address)
-{
-        return 0;
-}
-#endif
-
-/*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned 
long flags)
-{
-       void __iomem * addr;
-       struct vm_struct * area;
-       unsigned long offset, last_addr;
-       domid_t domid = DOMID_IO;
-
-       /* Don't allow wraparound or zero size */
-       last_addr = phys_addr + size - 1;
-       if (!size || last_addr < phys_addr)
-               return NULL;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       /*
-        * Don't remap the low PCI/ISA area, it's always mapped..
-        */
-       if (phys_addr >= 0x0 && last_addr < 0x100000)
-               return isa_bus_to_virt(phys_addr);
-#endif
-
-       /*
-        * Don't allow anybody to remap normal RAM that we're using..
-        */
-       if (is_local_lowmem(phys_addr)) {
-               char *t_addr, *t_end;
-               struct page *page;
-
-               t_addr = bus_to_virt(phys_addr);
-               t_end = t_addr + (size - 1);
-          
-               for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); 
page++)
-                       if(!PageReserved(page))
-                               return NULL;
-
-               domid = DOMID_LOCAL;
-       }
-
-       /*
-        * Mappings have to be page-aligned
-        */
-       offset = phys_addr & ~PAGE_MASK;
-       phys_addr &= PAGE_MASK;
-       size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
-       /*
-        * Ok, go for it..
-        */
-       area = get_vm_area(size, VM_IOREMAP | (flags << 20));
-       if (!area)
-               return NULL;
-       area->phys_addr = phys_addr;
-       addr = (void __iomem *) area->addr;
-       if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr,
-                                   size, __pgprot(_PAGE_PRESENT | _PAGE_RW |
-                                                  _PAGE_DIRTY | _PAGE_ACCESSED
-#if defined(__x86_64__)
-                                                   | _PAGE_USER
-#endif
-                                                  | flags), domid)) {
-               vunmap((void __force *) addr);
-               return NULL;
-       }
-       return (void __iomem *) (offset + (char __iomem *)addr);
-}
-
-
-/**
- * ioremap_nocache     -   map bus memory into CPU space
- * @offset:    bus address of the memory
- * @size:      size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address. 
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many 
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- * 
- * Must be freed with iounmap.
- */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
-       unsigned long last_addr;
-       void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
-       if (!p) 
-               return p; 
-
-       /* Guaranteed to be > phys_addr, as per __ioremap() */
-       last_addr = phys_addr + size - 1;
-
-       if (is_local_lowmem(last_addr)) { 
-               struct page *ppage = virt_to_page(bus_to_virt(phys_addr));
-               unsigned long npages;
-
-               phys_addr &= PAGE_MASK;
-
-               /* This might overflow and become zero.. */
-               last_addr = PAGE_ALIGN(last_addr);
-
-               /* .. but that's ok, because modulo-2**n arithmetic will make
-               * the page-aligned "last - first" come out right.
-               */
-               npages = (last_addr - phys_addr) >> PAGE_SHIFT;
-
-               if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { 
-                       iounmap(p); 
-                       p = NULL;
-               }
-               global_flush_tlb();
-       }
-
-       return p;                                       
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-       struct vm_struct *p;
-       if ((void __force *) addr <= high_memory) 
-               return; 
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
-               return;
-#endif
-       p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
-       if (!p) { 
-               printk("__iounmap: bad address %p\n", addr);
-               return;
-       }
-
-       if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
-               /* p->size includes the guard page, but cpa doesn't like that */
-               change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
-                                (p->size - PAGE_SIZE) >> PAGE_SHIFT,
-                                PAGE_KERNEL);                           
-               global_flush_tlb();
-       } 
-       kfree(p); 
-}
-
-#if defined(__i386__)
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
-       unsigned long offset, last_addr;
-       unsigned int nrpages;
-       enum fixed_addresses idx;
-
-       /* Don't allow wraparound or zero size */
-       last_addr = phys_addr + size - 1;
-       if (!size || last_addr < phys_addr)
-               return NULL;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       /*
-        * Don't remap the low PCI/ISA area, it's always mapped..
-        */
-       if (phys_addr >= 0x0 && last_addr < 0x100000)
-               return isa_bus_to_virt(phys_addr);
-#endif
-
-       /*
-        * Mappings have to be page-aligned
-        */
-       offset = phys_addr & ~PAGE_MASK;
-       phys_addr &= PAGE_MASK;
-       size = PAGE_ALIGN(last_addr) - phys_addr;
-
-       /*
-        * Mappings have to fit in the FIX_BTMAP area.
-        */
-       nrpages = size >> PAGE_SHIFT;
-       if (nrpages > NR_FIX_BTMAPS)
-               return NULL;
-
-       /*
-        * Ok, go for it..
-        */
-       idx = FIX_BTMAP_BEGIN;
-       while (nrpages > 0) {
-               set_fixmap(idx, phys_addr);
-               phys_addr += PAGE_SIZE;
-               --idx;
-               --nrpages;
-       }
-       return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
-       unsigned long virt_addr;
-       unsigned long offset;
-       unsigned int nrpages;
-       enum fixed_addresses idx;
-
-       virt_addr = (unsigned long)addr;
-       if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
-               return;
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
-               return;
-#endif
-       offset = virt_addr & ~PAGE_MASK;
-       nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
-
-       idx = FIX_BTMAP_BEGIN;
-       while (nrpages > 0) {
-               clear_fixmap(idx);
-               --idx;
-               --nrpages;
-       }
-}
-#endif /* defined(__i386__) */
-
-#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
-
-/* These hacky macros avoid phys->machine translations. */
-#define __direct_pte(x) ((pte_t) { (x) } )
-#define __direct_mk_pte(page_nr,pgprot) \
-  __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
-#define direct_mk_pte_phys(physpage, pgprot) \
-  __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
-
-static inline void direct_remap_area_pte(pte_t *pte, 
-                                        unsigned long address, 
-                                        unsigned long size,
-                                        mmu_update_t **v)
-{
-       unsigned long end;
-
-       address &= ~PMD_MASK;
-       end = address + size;
-       if (end > PMD_SIZE)
-               end = PMD_SIZE;
-       if (address >= end)
-               BUG();
-
-       do {
-               (*v)->ptr = virt_to_machine(pte);
-               (*v)++;
-               address += PAGE_SIZE;
-               pte++;
-       } while (address && (address < end));
-}
-
-static inline int direct_remap_area_pmd(struct mm_struct *mm,
-                                       pmd_t *pmd, 
-                                       unsigned long address, 
-                                       unsigned long size,
-                                       mmu_update_t **v)
-{
-       unsigned long end;
-
-       address &= ~PGDIR_MASK;
-       end = address + size;
-       if (end > PGDIR_SIZE)
-               end = PGDIR_SIZE;
-       if (address >= end)
-               BUG();
-       do {
-               pte_t *pte = (mm == &init_mm) ? 
-                       pte_alloc_kernel(mm, pmd, address) :
-                       pte_alloc_map(mm, pmd, address);
-               if (!pte)
-                       return -ENOMEM;
-               direct_remap_area_pte(pte, address, end - address, v);
-               pte_unmap(pte);
-               address = (address + PMD_SIZE) & PMD_MASK;
-               pmd++;
-       } while (address && (address < end));
-       return 0;
-}
- 
-int __direct_remap_area_pages(struct mm_struct *mm,
-                             unsigned long address, 
-                             unsigned long size, 
-                             mmu_update_t *v)
-{
-       pgd_t * dir;
-       unsigned long end = address + size;
-       int error;
-
-#if defined(__i386__)
-       dir = pgd_offset(mm, address);
-#elif defined (__x86_64)
-        dir = (mm == &init_mm) ?
-               pgd_offset_k(address):
-               pgd_offset(mm, address);
-#endif
-       if (address >= end)
-               BUG();
-       spin_lock(&mm->page_table_lock);
-       do {
-               pud_t *pud;
-               pmd_t *pmd;
-
-               error = -ENOMEM;
-               pud = pud_alloc(mm, dir, address);
-               if (!pud)
-                       break;
-               pmd = pmd_alloc(mm, pud, address);
-               if (!pmd)
-                       break;
-               error = 0;
-               direct_remap_area_pmd(mm, pmd, address, end - address, &v);
-               address = (address + PGDIR_SIZE) & PGDIR_MASK;
-               dir++;
-
-       } while (address && (address < end));
-       spin_unlock(&mm->page_table_lock);
-       return error;
-}
-
-
-int direct_remap_area_pages(struct mm_struct *mm,
-                           unsigned long address, 
-                           unsigned long machine_addr,
-                           unsigned long size, 
-                           pgprot_t prot,
-                           domid_t  domid)
-{
-       int i;
-       unsigned long start_address;
-#define MAX_DIRECTMAP_MMU_QUEUE 130
-       mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
-
-       start_address = address;
-
-       flush_cache_all();
-
-       for (i = 0; i < size; i += PAGE_SIZE) {
-               if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
-                       /* Fill in the PTE pointers. */
-                       __direct_remap_area_pages(mm,
-                                                 start_address, 
-                                                 address-start_address, 
-                                                 u);
- 
-                       if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
-                               return -EFAULT;
-                       v = u;
-                       start_address = address;
-               }
-
-               /*
-                * Fill in the machine address: PTE ptr is done later by
-                * __direct_remap_area_pages(). 
-                */
-               v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
-
-               machine_addr += PAGE_SIZE;
-               address += PAGE_SIZE; 
-               v++;
-       }
-
-       if (v != u) {
-               /* get the ptep's filled in */
-               __direct_remap_area_pages(mm,
-                                         start_address, 
-                                         address-start_address, 
-                                         u);
-               if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
-                       return -EFAULT;
-       }
-
-       flush_tlb_all();
-
-       return 0;
-}
-
-EXPORT_SYMBOL(direct_remap_area_pages);
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/blkback/control.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/control.c        Wed Aug 24 
02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,61 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/blkif/backend/control.c
- * 
- * Routines for interfacing with the control plane.
- * 
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype);
-    
-    switch ( msg->subtype )
-    {
-    case CMSG_BLKIF_BE_CREATE:
-        blkif_create((blkif_be_create_t *)&msg->msg[0]);
-        break;        
-    case CMSG_BLKIF_BE_DESTROY:
-        blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]);
-        break;        
-    case CMSG_BLKIF_BE_CONNECT:
-        blkif_connect((blkif_be_connect_t *)&msg->msg[0]);
-        break;        
-    case CMSG_BLKIF_BE_DISCONNECT:
-        if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) )
-            return; /* Sending the response is deferred until later. */
-        break;        
-    case CMSG_BLKIF_BE_VBD_CREATE:
-        vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]);
-        break;
-    case CMSG_BLKIF_BE_VBD_DESTROY:
-        vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]);
-        break;
-    default:
-        DPRINTK("Parse error while reading message subtype %d, len %d\n",
-                msg->subtype, msg->length);
-        msg->length = 0;
-        break;
-    }
-
-    ctrl_if_send_response(msg);
-}
-
-void blkif_ctrlif_init(void)
-{
-    ctrl_msg_t cmsg;
-    blkif_be_driver_status_t st;
-
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_BLKIF_BE;
-    cmsg.subtype   = CMSG_BLKIF_BE_DRIVER_STATUS;
-    cmsg.length    = sizeof(blkif_be_driver_status_t);
-    st.status      = BLKIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &st, sizeof(st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/drivers/xen/netback/control.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/control.c        Wed Aug 24 
02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,58 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/netif/backend/control.c
- * 
- * Routines for interfacing with the control plane.
- * 
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    DPRINTK("Received netif backend message, subtype=%d\n", msg->subtype);
-    
-    switch ( msg->subtype )
-    {
-    case CMSG_NETIF_BE_CREATE:
-        netif_create((netif_be_create_t *)&msg->msg[0]);
-        break;        
-    case CMSG_NETIF_BE_DESTROY:
-        netif_destroy((netif_be_destroy_t *)&msg->msg[0]);
-        break;  
-    case CMSG_NETIF_BE_CREDITLIMIT:
-        netif_creditlimit((netif_be_creditlimit_t *)&msg->msg[0]);
-        break;       
-    case CMSG_NETIF_BE_CONNECT:
-        netif_connect((netif_be_connect_t *)&msg->msg[0]);
-        break; 
-    case CMSG_NETIF_BE_DISCONNECT:
-        if ( !netif_disconnect((netif_be_disconnect_t *)&msg->msg[0],msg->id) )
-            return; /* Sending the response is deferred until later. */
-        break;        
-    default:
-        DPRINTK("Parse error while reading message subtype %d, len %d\n",
-                msg->subtype, msg->length);
-        msg->length = 0;
-        break;
-    }
-
-    ctrl_if_send_response(msg);
-}
-
-void netif_ctrlif_init(void)
-{
-    ctrl_msg_t cmsg;
-    netif_be_driver_status_t st;
-
-    (void)ctrl_if_register_receiver(CMSG_NETIF_BE, netif_ctrlif_rx,
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_NETIF_BE;
-    cmsg.subtype   = CMSG_NETIF_BE_DRIVER_STATUS;
-    cmsg.length    = sizeof(netif_be_driver_status_t);
-    st.status      = NETIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &st, sizeof(st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
diff -r 5f1ed597f107 -r 8799d14bef77 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pda.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pda.h     Wed Aug 24 
02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,85 +0,0 @@
-#ifndef X86_64_PDA_H
-#define X86_64_PDA_H
-
-#ifndef __ASSEMBLY__
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <linux/cache.h>
-
-/* Per processor datastructure. %gs points to it while the kernel runs */ 
-struct x8664_pda {
-       struct task_struct *pcurrent;   /* Current process */
-       unsigned long data_offset;      /* Per cpu data offset from linker 
address */
-       struct x8664_pda *me;       /* Pointer to itself */  
-       unsigned long kernelstack;  /* top of kernel stack for current */ 
-       unsigned long oldrsp;       /* user rsp for system call */
-       unsigned long irqrsp;       /* Old rsp for interrupts. */ 
-        int irqcount;              /* Irq nesting counter. Starts with -1 */   
-       int cpunumber;              /* Logical CPU number */
-       char *irqstackptr;      /* top of irqstack */
-       unsigned int __softirq_pending;
-       unsigned int __nmi_count;       /* number of NMI on this CPUs */
-        unsigned long idle_timestamp;
-       struct mm_struct *active_mm;
-       int mmu_state;     
-       unsigned apic_timer_irqs;
-        int kernel_mode;          /* kernel or user mode */
-} ____cacheline_aligned;
-
-
-#define IRQSTACK_ORDER 2
-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) 
-
-extern struct x8664_pda cpu_pda[];
-
-/* 
- * There is no fast way to get the base address of the PDA, all the accesses
- * have to mention %fs/%gs.  So it needs to be done this Torvaldian way.
- */ 
-#define sizeof_field(type,field)  (sizeof(((type *)0)->field))
-#define typeof_field(type,field)  typeof(((type *)0)->field)
-
-extern void __bad_pda_field(void);
-
-#define pda_offset(field) offsetof(struct x8664_pda, field)
-
-#define pda_to_op(op,field,val) do { \
-       switch (sizeof_field(struct x8664_pda, field)) {                \
-case 2: \
-asm volatile(op "w %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); 
break; \
-case 4: \
-asm volatile(op "l %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); 
break; \
-case 8: \
-asm volatile(op "q %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); 
break; \
-       default: __bad_pda_field();                                     \
-       } \
-       } while (0)
-
-/* 
- * AK: PDA read accesses should be neither volatile nor have an memory clobber.
- * Unfortunately removing them causes all hell to break lose currently.
- */
-#define pda_from_op(op,field) ({ \
-       typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \
-       switch (sizeof_field(struct x8664_pda, field)) {                \
-case 2: \
-asm volatile(op "w %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); 
break;\
-case 4: \
-asm volatile(op "l %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); 
break;\
-case 8: \
-asm volatile(op "q %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); 
break;\
-       default: __bad_pda_field();                                     \
-       } \
-       ret__; })
-
-
-#define read_pda(field) pda_from_op("mov",field)
-#define write_pda(field,val) pda_to_op("mov",field,val)
-#define add_pda(field,val) pda_to_op("add",field,val)
-#define sub_pda(field,val) pda_to_op("sub",field,val)
-
-#endif
-
-#define PDA_STACKOFFSET (5*8)
-
-#endif
diff -r 5f1ed597f107 -r 8799d14bef77 patches/linux-2.6.12/x86_64-linux.patch
--- a/patches/linux-2.6.12/x86_64-linux.patch   Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,68 +0,0 @@
-diff -urN linux-2.6.10-orig/include/asm-x86_64/hw_irq.h 
linux-2.6.10/include/asm-x86_64/hw_irq.h
---- linux-2.6.10-orig/include/asm-x86_64/hw_irq.h      2005-01-06 
00:34:38.000000000 -0500
-+++ linux-2.6.10/include/asm-x86_64/hw_irq.h   2005-02-25 17:45:37.181518088 
-0500
-@@ -48,6 +48,7 @@
-  *
-  *  Vectors 0xf0-0xf9 are free (reserved for future Linux use).
-  */
-+#ifndef CONFIG_XEN
- #define SPURIOUS_APIC_VECTOR  0xff
- #define ERROR_APIC_VECTOR     0xfe
- #define INVALIDATE_TLB_VECTOR 0xfd
-@@ -57,7 +58,7 @@
- #define KDB_VECTOR    0xf9
- 
- #define THERMAL_APIC_VECTOR   0xf0
--
-+#endif
- 
- /*
-  * Local APIC timer IRQ vector is on a different priority level,
-diff -urN linux-2.6.10-orig/include/asm-x86_64/irq.h 
linux-2.6.10/include/asm-x86_64/irq.h
---- linux-2.6.10-orig/include/asm-x86_64/irq.h 2005-01-06 00:34:38.000000000 
-0500
-+++ linux-2.6.10/include/asm-x86_64/irq.h      2005-02-25 17:45:37.181518088 
-0500
-@@ -10,6 +10,9 @@
-  *    <tomsoft@xxxxxxxxxxxxxxxxxxxxxxxxx>
-  */
- 
-+#ifdef CONFIG_XEN
-+#include "irq_vectors.h"
-+#endif
- #define TIMER_IRQ 0
- 
- /*
-@@ -22,6 +25,7 @@
-  * the usable vector space is 0x20-0xff (224 vectors)
-  */
- 
-+#ifndef CONFIG_XEN
- /*
-  * The maximum number of vectors supported by x86_64 processors
-  * is limited to 256. For processors other than x86_64, NR_VECTORS
-@@ -38,6 +42,7 @@
- #define NR_IRQS 224
- #define NR_IRQ_VECTORS 1024
- #endif
-+#endif
- 
- static __inline__ int irq_canonicalize(int irq)
- {
-diff -urN linux-2.6.10-orig/include/asm-x86_64/posix_types.h 
linux-2.6.10/include/asm-x86_64/posix_types.h
---- linux-2.6.10-orig/include/asm-x86_64/posix_types.h 2004-10-18 
17:55:29.000000000 -0400
-+++ linux-2.6.10/include/asm-x86_64/posix_types.h      2005-02-25 
17:45:37.183517784 -0500
-@@ -6,7 +6,7 @@
-  * be a little careful about namespace pollution etc.  Also, we cannot
-  * assume GCC is being used.
-  */
--
-+#ifndef __ASSEMBLY__
- typedef unsigned long __kernel_ino_t;
- typedef unsigned int  __kernel_mode_t;
- typedef unsigned long __kernel_nlink_t;
-@@ -115,5 +115,5 @@
- }
- 
- #endif /* defined(__KERNEL__) */
--
-+#endif
- #endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/Makefile
--- a/tools/consoled/Makefile   Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,48 +0,0 @@
-# Makefile for consoled
-# based on xcs Makefile
-# Anthony Liguori 2005
-
-XEN_ROOT=../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-CONSOLED_INSTALL_DIR = /usr/sbin
-XC_CONSOLE_INSTALL_DIR = /usr/libexec/xen
-
-INSTALL         = install
-INSTALL_PROG    = $(INSTALL) -m0755
-INSTALL_DIR     = $(INSTALL) -d -m0755
-
-CC       = gcc
-CFLAGS   = -Wall -Werror -g3
-
-CFLAGS  += -I $(XEN_XCS)
-CFLAGS  += -I $(XEN_LIBXC)
-CFLAGS  += -I $(XEN_XENSTORE)
-
-SRCS    :=
-SRCS    += main.c utils.c io.c
-
-HDRS     = $(wildcard *.h)
-OBJS     = $(patsubst %.c,%.o,$(SRCS))
-BIN      = consoled xc_console
-
-all: $(BIN)
-
-clean:
-       $(RM) *.a *.so *.o *.rpm $(BIN)
-
-consoled: $(OBJS)
-       $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_XENSTORE) \
-              -lxc -lxenstore
-
-xc_console: xc_console.o
-       $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_XENSTORE) \
-             -lxc -lxenstore
-
-$(OBJS): $(HDRS)
-
-install: $(BIN)
-       $(INSTALL_DIR) -p $(DESTDIR)/$(CONSOLED_INSTALL_DIR)
-       $(INSTALL_PROG) consoled $(DESTDIR)/$(CONSOLED_INSTALL_DIR)
-       $(INSTALL_DIR) -p $(DESTDIR)/$(XC_CONSOLE_INSTALL_DIR)
-       $(INSTALL_PROG) xc_console $(DESTDIR)/$(XC_CONSOLE_INSTALL_DIR)
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/io.c
--- a/tools/consoled/io.c       Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,328 +0,0 @@
-/*\
- *  Copyright (C) International Business Machines  Corp., 2005
- *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- *  Xen Console Daemon
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; under version 2 of the License.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- * 
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-\*/
-
-#define _GNU_SOURCE
-
-#include "utils.h"
-#include "io.h"
-
-#include "xc.h"
-#include "xs.h"
-#include "xen/io/domain_controller.h"
-#include "xcs_proto.h"
-
-#include <malloc.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/select.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <termios.h>
-
-#define MAX(a, b) (((a) > (b)) ? (a) : (b))
-#define MIN(a, b) (((a) < (b)) ? (a) : (b))
-
-struct buffer
-{
-       char *data;
-       size_t size;
-       size_t capacity;
-       size_t max_capacity;
-};
-
-void buffer_append(struct buffer *buffer, const void *data, size_t size)
-{
-       if ((buffer->capacity - buffer->size) < size) {
-               buffer->capacity += (size + 1024);
-               buffer->data = realloc(buffer->data, buffer->capacity);
-               if (buffer->data == NULL) {
-                       dolog(LOG_ERR, "Memory allocation failed");
-                       exit(ENOMEM);
-               }
-       }
-
-       memcpy(buffer->data + buffer->size, data, size);
-       buffer->size += size;
-
-       if (buffer->max_capacity &&
-           buffer->size > buffer->max_capacity) {
-               memmove(buffer->data + (buffer->size - buffer->max_capacity),
-                       buffer->data, buffer->max_capacity);
-               buffer->data = realloc(buffer->data, buffer->max_capacity);
-               buffer->capacity = buffer->max_capacity;
-       }
-}
-
-bool buffer_empty(struct buffer *buffer)
-{
-       return buffer->size == 0;
-}
-
-void buffer_advance(struct buffer *buffer, size_t size)
-{
-       size = MIN(size, buffer->size);
-       memmove(buffer->data, buffer + size, buffer->size - size);
-       buffer->size -= size;
-}
-
-struct domain
-{
-       int domid;
-       int tty_fd;
-       struct buffer buffer;
-       struct domain *next;
-};
-
-static struct domain *dom_head;
-
-bool domain_is_valid(int domid)
-{
-       bool ret;
-       xc_dominfo_t info;
-
-       ret = (xc_domain_getinfo(xc, domid, 1, &info) == 1 &&
-              info.domid == domid);
-               
-       return ret;
-}
-
-int domain_create_tty(int domid)
-{
-       char path[1024];
-       int master;
-
-       if ((master = getpt()) == -1 ||
-           grantpt(master) == -1 || unlockpt(master) == -1) {
-               dolog(LOG_ERR, "Failed to create tty for domain-%d", domid);
-               master = -1;
-       } else {
-               const char *slave = ptsname(master);
-               struct termios term;
-
-               if (tcgetattr(master, &term) != -1) {
-                       cfmakeraw(&term);
-                       tcsetattr(master, TCSAFLUSH, &term);
-               }
-
-               xs_mkdir(xs, "/console");
-               snprintf(path, sizeof(path), "/console/%d", domid);
-               xs_mkdir(xs, path);
-               strcat(path, "/tty");
-
-               xs_write(xs, path, slave, strlen(slave), O_CREAT);
-       }
-
-       return master;
-}
-
-struct domain *create_domain(int domid)
-{
-       struct domain *dom;
-       char *data;
-       unsigned int len;
-       char path[1024];
-
-       dom = (struct domain *)malloc(sizeof(struct domain));
-       if (dom == NULL) {
-               dolog(LOG_ERR, "Out of memory %s:%s():L%d",
-                     __FILE__, __FUNCTION__, __LINE__);
-               exit(ENOMEM);
-       }
-
-       dom->domid = domid;
-       dom->tty_fd = domain_create_tty(domid);
-       dom->buffer.data = 0;
-       dom->buffer.size = 0;
-       dom->buffer.capacity = 0;
-       dom->buffer.max_capacity = 0;
-
-       snprintf(path, sizeof(path), "/console/%d/limit", domid);
-       data = xs_read(xs, path, &len);
-       if (data) {
-               dom->buffer.max_capacity = strtoul(data, 0, 0);
-               free(data);
-       }
-
-       dolog(LOG_DEBUG, "New domain %d", domid);
-
-       return dom;
-}
-
-struct domain *lookup_domain(int domid)
-{
-       struct domain **pp;
-
-       for (pp = &dom_head; *pp; pp = &(*pp)->next) {
-               struct domain *dom = *pp;
-
-               if (dom->domid == domid) {
-                       return dom;
-               } else if (dom->domid > domid) {
-                       *pp = create_domain(domid);
-                       (*pp)->next = dom;
-                       return *pp;
-               }
-       }
-
-       *pp = create_domain(domid);
-       return *pp;
-}
-
-void remove_domain(struct domain *dom)
-{
-       struct domain **pp;
-
-       dolog(LOG_DEBUG, "Removing domain-%d", dom->domid);
-
-       for (pp = &dom_head; *pp; pp = &(*pp)->next) {
-               struct domain *d = *pp;
-
-               if (dom->domid == d->domid) {
-                       *pp = d->next;
-                       free(d);
-                       break;
-               }
-       }
-}
-
-void handle_tty_read(struct domain *dom)
-{
-       ssize_t len;
-       xcs_msg_t msg;
-
-       msg.type = XCS_REQUEST;
-       msg.u.control.remote_dom = dom->domid;
-       msg.u.control.msg.type = CMSG_CONSOLE;
-       msg.u.control.msg.subtype = CMSG_CONSOLE_DATA;
-       msg.u.control.msg.id = 1;
-
-       len = read(dom->tty_fd, msg.u.control.msg.msg, 60);
-       if (len < 1) {
-               close(dom->tty_fd);
-
-               if (domain_is_valid(dom->domid)) {
-                       dom->tty_fd = domain_create_tty(dom->domid);
-               } else {
-                       remove_domain(dom);
-               }
-       } else if (domain_is_valid(dom->domid)) {
-               msg.u.control.msg.length = len;
-
-               if (!write_sync(xcs_data_fd, &msg, sizeof(msg))) {
-                       dolog(LOG_ERR, "Write to xcs failed: %m");
-               }
-       } else {
-               close(dom->tty_fd);
-               remove_domain(dom);
-       }
-}
-
-void handle_tty_write(struct domain *dom)
-{
-       ssize_t len;
-
-       len = write(dom->tty_fd, dom->buffer.data, dom->buffer.size);
-       if (len < 1) {
-               close(dom->tty_fd);
-
-               if (domain_is_valid(dom->domid)) {
-                       dom->tty_fd = domain_create_tty(dom->domid);
-               } else {
-                       remove_domain(dom);
-               }
-       } else {
-               buffer_advance(&dom->buffer, len);
-       }
-}
-
-void handle_xcs_msg(int fd)
-{
-       xcs_msg_t msg;
-
-       if (!read_sync(fd, &msg, sizeof(msg))) {
-               dolog(LOG_ERR, "read from xcs failed! %m");
-       } else if (msg.type == XCS_REQUEST) {
-               struct domain *dom;
-
-               dom = lookup_domain(msg.u.control.remote_dom);
-               buffer_append(&dom->buffer,
-                             msg.u.control.msg.msg,
-                             msg.u.control.msg.length);
-       }
-}
-
-static void enum_domains(void)
-{
-       int domid = 0;
-       xc_dominfo_t dominfo;
-
-       while (xc_domain_getinfo(xc, domid, 1, &dominfo) == 1) {
-               lookup_domain(dominfo.domid);
-               domid = dominfo.domid + 1;
-       }
-}
-
-void handle_io(void)
-{
-       fd_set readfds, writefds;
-       int ret;
-       int max_fd = -1;
-
-       do {
-               struct domain *d;
-               struct timeval tv = { 1, 0 };
-
-               FD_ZERO(&readfds);
-               FD_ZERO(&writefds);
-
-               FD_SET(xcs_data_fd, &readfds);
-               max_fd = MAX(xcs_data_fd, max_fd);
-
-               for (d = dom_head; d; d = d->next) {
-                       if (d->tty_fd != -1) {
-                               FD_SET(d->tty_fd, &readfds);
-                       }
-
-                       if (d->tty_fd != -1 && !buffer_empty(&d->buffer)) {
-                               FD_SET(d->tty_fd, &writefds);
-                       }
-
-                       max_fd = MAX(d->tty_fd, max_fd);
-               }
-
-               ret = select(max_fd + 1, &readfds, &writefds, 0, &tv);
-               enum_domains();
-
-               if (FD_ISSET(xcs_data_fd, &readfds)) {
-                       handle_xcs_msg(xcs_data_fd);
-               }
-
-               for (d = dom_head; d; d = d->next) {
-                       if (FD_ISSET(d->tty_fd, &readfds)) {
-                               handle_tty_read(d);
-                       }
-
-                       if (FD_ISSET(d->tty_fd, &writefds)) {
-                               handle_tty_write(d);
-                       }
-               }
-       } while (ret > -1);
-}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/io.h
--- a/tools/consoled/io.h       Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,26 +0,0 @@
-/*\
- *  Copyright (C) International Business Machines  Corp., 2005
- *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- *  Xen Console Daemon
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; under version 2 of the License.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- * 
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-\*/
-
-#ifndef CONSOLED_IO_H
-#define CONSOLED_IO_H
-
-void handle_io(void);
-
-#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/main.c
--- a/tools/consoled/main.c     Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,93 +0,0 @@
-/*\
- *  Copyright (C) International Business Machines  Corp., 2005
- *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- *  Xen Console Daemon
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; under version 2 of the License.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- * 
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-\*/
-
-#include <getopt.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-#include <unistd.h>
-#include <sys/types.h>
-
-#include "xc.h"
-#include "xen/io/domain_controller.h"
-#include "xcs_proto.h"
-
-#include "utils.h"
-#include "io.h"
-
-int main(int argc, char **argv)
-{
-       const char *sopts = "hVvi";
-       struct option lopts[] = {
-               { "help", 0, 0, 'h' },
-               { "version", 0, 0, 'V' },
-               { "verbose", 0, 0, 'v' },
-               { "interactive", 0, 0, 'i' },
-               { 0 },
-       };
-       bool is_interactive = false;
-       int ch;
-       int syslog_option = LOG_CONS;
-       int syslog_mask = LOG_WARNING;
-       int opt_ind = 0;
-
-       while ((ch = getopt_long(argc, argv, sopts, lopts, &opt_ind)) != -1) {
-               switch (ch) {
-               case 'h':
-                       //usage(argv[0]);
-                       exit(0);
-               case 'V':
-                       //version(argv[0]);
-                       exit(0);
-               case 'v':
-                       syslog_option |= LOG_PERROR;
-                       syslog_mask = LOG_DEBUG;
-                       break;
-               case 'i':
-                       is_interactive = true;
-                       break;
-               case '?':
-                       fprintf(stderr,
-                               "Try `%s --help' for more information\n",
-                               argv[0]);
-                       exit(EINVAL);
-               }
-       }
-
-       if (geteuid() != 0) {
-               fprintf(stderr, "%s requires root to run.\n", argv[0]);
-               exit(EPERM);
-       }
-
-       openlog("consoled", syslog_option, LOG_DAEMON);
-       setlogmask(syslog_mask);
-
-       if (!is_interactive) {
-               daemonize("/var/run/consoled.pid");
-       }
-
-       xen_setup();
-
-       handle_io();
-
-       closelog();
-
-       return 0;
-}
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/utils.c
--- a/tools/consoled/utils.c    Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,251 +0,0 @@
-/*\
- *  Copyright (C) International Business Machines  Corp., 2005
- *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- *  Xen Console Daemon
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; under version 2 of the License.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- * 
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-\*/
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <err.h>
-#include <errno.h>
-#include <stdio.h>
-#include <getopt.h>
-#include <stdbool.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <string.h>
-
-#include "xc.h"
-#include "xen/io/domain_controller.h"
-#include "xcs_proto.h"
-
-#include "utils.h"
-
-struct xs_handle *xs;
-int xc;
-
-int xcs_ctrl_fd = -1;
-int xcs_data_fd = -1;
-
-bool _read_write_sync(int fd, void *data, size_t size, bool do_read)
-{
-       size_t offset = 0;
-       ssize_t len;
-
-       while (offset < size) {
-               if (do_read) {
-                       len = read(fd, data + offset, size - offset);
-               } else {
-                       len = write(fd, data + offset, size - offset);
-               }
-
-               if (len < 1) {
-                       if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
-                               return false;
-                       }
-               } else {
-                       offset += len;
-               }
-       }
-
-       return true;
-}
-
-static int open_domain_socket(const char *path)
-{
-       struct sockaddr_un addr;
-       int sock;
-       size_t addr_len;
-
-       if ((sock = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) {
-               goto out;
-       }
-
-       addr.sun_family = AF_UNIX;
-       strcpy(addr.sun_path, path);
-       addr_len = sizeof(addr.sun_family) + strlen(XCS_SUN_PATH) + 1;
-
-       if (connect(sock, (struct sockaddr *)&addr, addr_len) == -1) {
-               goto out_close_sock;
-       }
-
-       return sock;
-
- out_close_sock:
-       close(sock);
- out:
-       return -1;
-}
-
-static void child_exit(int sig)
-{
-       while (waitpid(-1, NULL, WNOHANG) > 0);
-}
-
-void daemonize(const char *pidfile)
-{
-       pid_t pid;
-       int fd;
-       int len;
-       int i;
-       char buf[100];
-
-       if (getppid() == 1) {
-               return;
-       }
-
-       if ((pid = fork()) > 0) {
-               exit(0);
-       } else if (pid == -1) {
-               err(errno, "fork() failed");
-       }
-
-       setsid();
-
-       /* redirect fd 0,1,2 to /dev/null */
-       if ((fd = open("/dev/null",O_RDWR)) == -1) {
-               exit(1);
-       }
-
-       for (i = 0; i <= 2; i++) {
-               close(i);
-               dup2(fd, i);
-       }
-
-       close(fd);
-
-       umask(027);
-       chdir("/");
-
-       fd = open(pidfile, O_RDWR | O_CREAT);
-       if (fd == -1) {
-               exit(1);
-       }
-
-       if (lockf(fd, F_TLOCK, 0) == -1) {
-               exit(1);
-       }
-
-       len = sprintf(buf, "%d\n", getpid());
-       write(fd, buf, len);
-
-       signal(SIGCHLD, child_exit);
-       signal(SIGTSTP, SIG_IGN);
-       signal(SIGTTOU, SIG_IGN);
-       signal(SIGTTIN, SIG_IGN);
-}
-
-/* synchronized send/recv strictly for setting up xcs */
-/* always use asychronize callbacks any other time */
-static bool xcs_send_recv(int fd, xcs_msg_t *msg)
-{
-       bool ret = false;
-
-       if (!write_sync(fd, msg, sizeof(*msg))) {
-               dolog(LOG_ERR, "Write failed at %s:%s():L%d?  Possible bug.",
-                      __FILE__, __FUNCTION__, __LINE__);
-               goto out;
-       }
-
-       if (!read_sync(fd, msg, sizeof(*msg))) {
-               dolog(LOG_ERR, "Read failed at %s:%s():L%d?  Possible bug.",
-                      __FILE__, __FUNCTION__, __LINE__);
-               goto out;
-       }
-
-       ret = true;
-
- out:
-       return ret;
-}
-
-bool xen_setup(void)
-{
-       int sock;
-       xcs_msg_t msg;
-       
-       xs = xs_daemon_open();
-       if (xs == NULL) {
-               dolog(LOG_ERR,
-                     "Failed to contact xenstore (%m).  Is it running?");
-               goto out;
-       }
-
-       xc = xc_interface_open();
-       if (xc == -1) {
-               dolog(LOG_ERR, "Failed to contact hypervisor (%m)");
-               goto out;
-       }
-
-       sock = open_domain_socket(XCS_SUN_PATH);
-       if (sock == -1) {
-               dolog(LOG_ERR, "Failed to contact xcs (%m).  Is it running?");
-               goto out_close_store;
-       }
-
-       xcs_ctrl_fd = sock;
-
-       sock = open_domain_socket(XCS_SUN_PATH);
-       if (sock == -1) {
-               dolog(LOG_ERR, "Failed to contact xcs (%m).  Is it running?");
-               goto out_close_ctrl;
-       }
-       
-       xcs_data_fd = sock;
-
-       memset(&msg, 0, sizeof(msg));
-       msg.type = XCS_CONNECT_CTRL;
-       if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) {
-               dolog(LOG_ERR, "xcs control connect failed.  Possible bug.");
-               goto out_close_data;
-       }
-
-       msg.type = XCS_CONNECT_DATA;
-       if (!xcs_send_recv(xcs_data_fd, &msg) || msg.result != XCS_RSLT_OK) {
-               dolog(LOG_ERR, "xcs data connect failed.  Possible bug.");
-               goto out_close_data;
-       }
-
-       /* Since the vast majority of control messages are console messages
-          it's just easier to ignore other messages that try to bind to 
-          a specific type. */
-       msg.type = XCS_MSG_BIND;
-       msg.u.bind.port = PORT_WILDCARD;
-       msg.u.bind.type = TYPE_WILDCARD;
-       if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) {
-               dolog(LOG_ERR, "xcs vind failed.  Possible bug.");
-               goto out_close_data;
-       }
-       
-       return true;
-
- out_close_data:
-       close(xcs_ctrl_fd);
-       xcs_data_fd = -1;
- out_close_ctrl:
-       close(xcs_ctrl_fd);
-       xcs_ctrl_fd = -1;
- out_close_store:
-       xs_daemon_close(xs);
- out:
-       return false;
-}
-
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/utils.h
--- a/tools/consoled/utils.h    Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,47 +0,0 @@
-/*\
- *  Copyright (C) International Business Machines  Corp., 2005
- *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- *  Xen Console Daemon
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; under version 2 of the License.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- * 
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-\*/
-
-#ifndef CONSOLED_UTILS_H
-#define CONSOLED_UTILS_H
-
-#include <stdbool.h>
-#include <syslog.h>
-#include <stdio.h>
-
-#include "xs.h"
-
-void daemonize(const char *pidfile);
-bool xen_setup(void);
-#define read_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, true)
-#define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false)
-bool _read_write_sync(int fd, void *data, size_t size, bool do_read);
-
-extern int xcs_ctrl_fd;
-extern int xcs_data_fd;
-extern struct xs_handle *xs;
-extern int xc;
-
-#if 1
-#define dolog(val, fmt, ...) syslog(val, fmt, ## __VA_ARGS__)
-#else
-#define dolog(val, fmt, ...) fprintf(stderr, fmt "\n", ## __VA_ARGS__)
-#endif
-
-#endif
diff -r 5f1ed597f107 -r 8799d14bef77 tools/consoled/xc_console.c
--- a/tools/consoled/xc_console.c       Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,236 +0,0 @@
-/*\
- *  Copyright (C) International Business Machines  Corp., 2005
- *  Author(s): Anthony Liguori <aliguori@xxxxxxxxxx>
- *
- *  Xen Console Daemon
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; under version 2 of the License.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- * 
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-\*/
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <time.h>
-#include <fcntl.h>
-#include <sys/wait.h>
-#include <termios.h>
-#include <signal.h>
-#include <getopt.h>
-#include <sys/select.h>
-#include <err.h>
-#include <errno.h>
-#include <pty.h>
-
-#include "xc.h"
-#include "xs.h"
-
-#define ESCAPE_CHARACTER 0x1d
-
-static volatile sig_atomic_t received_signal = 0;
-
-static void sighandler(int signum)
-{
-       received_signal = 1;
-}
-
-static bool write_sync(int fd, const void *data, size_t size)
-{
-       size_t offset = 0;
-       ssize_t len;
-
-       while (offset < size) {
-               len = write(fd, data + offset, size - offset);
-               if (len < 1) {
-                       return false;
-               }
-               offset += len;
-       }
-
-       return true;
-}
-
-static void usage(const char *program) {
-       printf("Usage: %s [OPTION] DOMID\n"
-              "Attaches to a virtual domain console\n"
-              "\n"
-              "  -h, --help       display this help and exit\n"
-              , program);
-}
-
-/* don't worry too much if setting terminal attributes fail */
-static void init_term(int fd, struct termios *old)
-{
-       struct termios new_term;
-
-       if (tcgetattr(fd, old) == -1) {
-               perror("tcgetattr() failed");
-               return;
-       }
-
-       new_term = *old;
-       cfmakeraw(&new_term);
-
-       if (tcsetattr(fd, TCSAFLUSH, &new_term) == -1) {
-               perror("tcsetattr() failed");
-       }
-}
-
-static void restore_term(int fd, struct termios *old)
-{
-       if (tcsetattr(fd, TCSAFLUSH, old) == -1) {
-               perror("tcsetattr() failed");
-       }
-}
-
-static int console_loop(int xc_handle, domid_t domid, int fd)
-{
-       int ret;
-
-       do {
-               fd_set fds;
-
-               FD_ZERO(&fds);
-               FD_SET(STDIN_FILENO, &fds);
-               FD_SET(fd, &fds);
-
-               ret = select(fd + 1, &fds, NULL, NULL, NULL);
-               if (ret == -1) {
-                       if (errno == EINTR || errno == EAGAIN) {
-                               continue;
-                       }
-                       perror("select() failed");
-                       return -1;
-               }
-
-               if (FD_ISSET(STDIN_FILENO, &fds)) {
-                       ssize_t len;
-                       char msg[60];
-
-                       len = read(STDIN_FILENO, msg, sizeof(msg));
-                       if (len == 1 && msg[0] == ESCAPE_CHARACTER) {
-                               return 0;
-                       } 
-
-                       if (len == 0 && len == -1) {
-                               if (len == -1 &&
-                                   (errno == EINTR || errno == EAGAIN)) {
-                                       continue;
-                               }
-                               perror("select() failed");
-                               return -1;
-                       }
-
-                       if (!write_sync(fd, msg, len)) {
-                               perror("write() failed");
-                               return -1;
-                       }
-               }
-
-               if (FD_ISSET(fd, &fds)) {
-                       ssize_t len;
-                       char msg[512];
-
-                       len = read(fd, msg, sizeof(msg));
-                       if (len == 0 || len == -1) {
-                               if (len == -1 &&
-                                   (errno == EINTR || errno == EAGAIN)) {
-                                       continue;
-                               }
-                               perror("select() failed");
-                               return -1;
-                       }
-
-                       if (!write_sync(STDOUT_FILENO, msg, len)) {
-                               perror("write() failed");
-                               return -1;
-                       }
-               }
-       } while (received_signal == 0);
-
-       return 0;
-}
-
-int main(int argc, char **argv)
-{
-       struct termios attr;
-       int domid;
-       int xc_handle;
-       char *sopt = "hf:pc";
-       int ch;
-       int opt_ind=0;
-       struct option lopt[] = {
-               { "help",    0, 0, 'h' },
-               { "file",    1, 0, 'f' },
-               { "pty",     0, 0, 'p' },
-               { "ctty",    0, 0, 'c' },
-               { 0 },
-
-       };
-       char *str_pty;
-       char path[1024];
-       int spty;
-       unsigned int len = 0;
-       struct xs_handle *xs;
-
-       while((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
-               switch(ch) {
-               case 'h':
-                       usage(argv[0]);
-                       exit(0);
-                       break;
-               }
-       }
-       
-       if ((argc - optind) != 1) {
-               fprintf(stderr, "Invalid number of arguments\n");
-               fprintf(stderr, "Try `%s --help' for more information.\n", 
-                       argv[0]);
-               exit(EINVAL);
-       }
-       
-       domid = atoi(argv[optind]);
-
-       xs = xs_daemon_open();
-       if (xs == NULL) {
-               err(errno, "Could not contact XenStore");
-       }
-
-       xc_handle = xc_interface_open();
-       if (xc_handle == -1) {
-               err(errno, "xc_interface_open()");
-       }
-       
-       signal(SIGTERM, sighandler);
-
-       snprintf(path, sizeof(path), "/console/%d/tty", domid);
-       str_pty = xs_read(xs, path, &len);
-       if (str_pty == NULL) {
-               err(errno, "Could not read tty from store");
-       }
-       spty = open(str_pty, O_RDWR | O_NOCTTY);
-       if (spty == -1) {
-               err(errno, "Could not open tty `%s'", str_pty);
-       }
-       free(str_pty);
-
-       init_term(STDIN_FILENO, &attr);
-       console_loop(xc_handle, domid, spty);
-       restore_term(STDIN_FILENO, &attr);
-
-       return 0;
- }
diff -r 5f1ed597f107 -r 8799d14bef77 tools/examples/network
--- a/tools/examples/network    Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,246 +0,0 @@
-#!/bin/sh
-#============================================================================
-# Default Xen network start/stop script.
-# Xend calls a network script when it starts.
-# The script name to use is defined in /etc/xen/xend-config.sxp
-# in the network-script field.
-#
-# This script creates a bridge (default xen-br0), adds a device
-# (default eth0) to it, copies the IP addresses from the device
-# to the bridge and adjusts the routes accordingly.
-#
-# If all goes well, this should ensure that networking stays up.
-# However, some configurations are upset by this, especially
-# NFS roots. If the bridged setup does not meet your needs,
-# configure a different script, for example using routing instead.
-#
-# Usage:
-#
-# network (start|stop|status) {VAR=VAL}*
-#
-# Vars:
-#
-# bridge     The bridge to use (default xen-br0).
-# netdev     The interface to add to the bridge (default eth0).
-# antispoof  Whether to use iptables to prevent spoofing (default yes).
-#
-# start:
-# Creates the bridge and enslaves netdev to it.
-# Copies the IP addresses from netdev to the bridge.
-# Deletes the routes to netdev and adds them on bridge.
-#
-# stop:
-# Removes netdev from the bridge.
-# Deletes the routes to bridge and adds them to netdev.
-#
-# status:
-# Print ifconfig for netdev and bridge.
-# Print routes.
-#
-#============================================================================
-
-# Exit if anything goes wrong.
-set -e 
-
-# First arg is the operation.
-OP=$1
-shift
-
-# Pull variables in args in to environment.
-for arg ; do export "${arg}" ; done
-
-bridge=${bridge:-xen-br0}
-netdev=${netdev:-eth0}
-antispoof=${antispoof:-yes}
-
-echo "*network $OP bridge=$bridge netdev=$netdev antispoof=$antispoof" >&2
-
-# Usage: transfer_addrs src dst
-# Copy all IP addresses (including aliases) from device $src to device $dst.
-transfer_addrs () {
-    local src=$1
-    local dst=$2
-    # Don't bother if $dst already has IP addresses.
-    if ip addr show dev ${dst} | egrep -q '^ *inet ' ; then
-        return
-    fi
-    # Address lines start with 'inet' and have the device in them.
-    # Replace 'inet' with 'ip addr add' and change the device name $src
-    # to 'dev $src'.
-    ip addr show dev ${src} | egrep '^ *inet ' | sed -e "
-s/inet/ip addr add/
-s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+/[0-9]\+\)@\1@
-s/${src}/dev ${dst}/
-" | sh -e
-    # Remove automatic routes on destionation device
-    ip route list | sed -ne "
-/dev ${dst}\( \|$\)/ {
-  s/^/ip route del /
-  p
-}" | sh -e
-}
-
-# Usage: del_addrs src
-del_addrs () {
-    local src=$1
-    ip addr show dev ${src} | egrep '^ *inet ' | sed -e "
-s/inet/ip addr del/
-s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\)/[0-9]\+@\1@
-s/${src}/dev ${src}/
-" | sh -e
-}
-
-# Usage: transfer_routes src dst
-# Get all IP routes to device $src, delete them, and
-# add the same routes to device $dst.
-# The original routes have to be deleted, otherwise adding them
-# for $dst fails (duplicate routes).
-transfer_routes () {
-    local src=$1
-    local dst=$2
-    # List all routes and grep the ones with $src in.
-    # Stick 'ip route del' on the front to delete.
-    # Change $src to $dst and use 'ip route add' to add.
-    ip route list | sed -ne "
-/dev ${src}\( \|$\)/ {
-  h
-  s/^/ip route del /
-  P
-  g
-  s/${src}/${dst}/
-  s/^/ip route add /
-  P
-  d
-}" | sh -e
-}
-
-# Usage: create_bridge bridge
-create_bridge () {
-    local bridge=$1
-
-    # Don't create the bridge if it already exists.
-    if ! brctl show | grep -q ${bridge} ; then
-        brctl addbr ${bridge}
-        brctl stp ${bridge} off
-        brctl setfd ${bridge} 0
-    fi
-    ifconfig ${bridge} up
-}
-
-# Usage: add_to_bridge bridge dev
-add_to_bridge () {
-    local bridge=$1
-    local dev=$2
-    # Don't add $dev to $bridge if it's already on a bridge.
-    if ! brctl show | grep -q ${dev} ; then
-        brctl addif ${bridge} ${dev}
-    fi
-}
-
-# Usage: antispoofing dev bridge
-# Set the default forwarding policy for $dev to drop.
-# Allow forwarding to the bridge.
-antispoofing () {
-    local dev=$1
-    local bridge=$2
-
-    iptables -P FORWARD DROP
-    iptables -A FORWARD -m physdev --physdev-in ${dev} -j ACCEPT
-}
-
-# Usage: show_status dev bridge
-# Print ifconfig and routes.
-show_status () {
-    local dev=$1
-    local bridge=$2
-    
-    echo '============================================================'
-    ifconfig ${dev}
-    ifconfig ${bridge}
-    echo ' '
-    ip route list
-    echo ' '
-    route -n
-    echo '============================================================'
-}
-
-op_start () {
-    if [ "${bridge}" == "null" ] ; then
-        return
-    fi
-
-    create_bridge ${bridge}
-
-    if ifconfig 2>/dev/null | grep -q veth0 ; then
-        return
-    fi
-
-    if ifconfig veth0 2>/dev/null | grep -q veth0 ; then
-        # Propagate MAC address and ARP responsibilities to virtual interface.
-        mac=`ifconfig ${netdev} | grep HWadd | sed -e 
's/.*\(..:..:..:..:..:..\).*/\1/'`
-        ifconfig veth0 down
-        ifconfig veth0 hw ether ${mac}
-        ifconfig veth0 arp up
-        transfer_addrs ${netdev} veth0
-        transfer_routes ${netdev} veth0
-        del_addrs ${netdev}
-        ifconfig ${netdev} -arp down
-        ifconfig ${netdev} hw ether fe:ff:ff:ff:ff:ff up
-        # Bring up second half of virtual device and attach it to the bridge.
-        ifconfig vif0.0 up
-        add_to_bridge ${bridge} vif0.0
-    else
-        transfer_addrs ${netdev} ${bridge}
-        transfer_routes ${netdev} ${bridge}
-    fi
-
-    # Attach the real interface to the bridge.
-    add_to_bridge ${bridge} ${netdev}
-    
-    if [ ${antispoof} == 'yes' ] ; then
-        antispoofing ${netdev} ${bridge}
-    fi
-}
-
-op_stop () {
-    if [ "${bridge}" == "null" ] ; then
-        return
-    fi
-
-    brctl delif ${bridge} ${netdev}
-
-    if ifconfig veth0 2>/dev/null | grep -q veth0 ; then
-        brctl delif ${bridge} vif0.0
-        ifconfig vif0.0 down
-        mac=`ifconfig veth0 | grep HWadd | sed -e 
's/.*\(..:..:..:..:..:..\).*/\1/'`
-        ifconfig ${netdev} down
-        ifconfig ${netdev} hw ether ${mac}
-        ifconfig ${netdev} arp up
-        transfer_addrs veth0 ${netdev}
-        transfer_routes veth0 ${netdev}
-        del_addrs veth0
-        ifconfig veth0 -arp down
-        ifconfig veth0 hw ether 00:00:00:00:00:00
-    else
-        transfer_routes ${bridge} ${netdev}
-    fi
-}
-
-case ${OP} in
-  start)
-        op_start
-        ;;
-    
-    stop)
-        op_stop
-        ;;
-
-    status)
-        show_status ${netdev} ${bridge}
-       ;;
-
-    *)
-       echo 'Unknown command: ' ${OP} >&2
-       echo 'Valid commands are: start, stop, status' >&2
-       exit 1
-esac
diff -r 5f1ed597f107 -r 8799d14bef77 
tools/ioemu/target-i386-dm/qemu-vgaram-bin.gz
--- a/tools/ioemu/target-i386-dm/qemu-vgaram-bin.gz     Wed Aug 24 02:43:18 2005
+++ /dev/null   Thu Aug 25 22:53:20 2005
@@ -1,7 +0,0 @@
-?ËmB qemu-vgaram-bin íÝÍk\×ÇñCòÂÁMw¢    I
-^´?XÊ?#\c?Ô¡
\ No newline at end of file
-M7okɲe?L,a9?J?oò'B×Ýô(¨1?.L?Å?´x?lJ1/?ÜçÑ}Ï?3wf?h?s7ßü¸3wFsæ?{î¹ç¾?}öpJ=ÉÉ?dAr\2'yI²&Y?\?lH??¼#ù?äE{Ϫ½Ï_×÷¿/¹bÑϾ,¹*yÊ^ßÌÞ?/yáðuy|#½vø¯òY7äõòüïé?Ã?Èë7öò?¾çP
-?~ö}Éç6ÕÜ?|?=.ÿ-«?Õõ3gϵ?^¶×Ûê¬ÿ÷=©³]©³]Y/Ítéð5ymA??ÎÛûß·:=oõ{Åæ_µïsÁ?ë÷xMò¶=Õæ?µÇÃõð°dArM²kÓ?U÷?YÉIËWYf³×j?ïmÁ×yòGÕÅìÈ÷?Ͷß%kC¾î7³vµjmCç?Xï÷GDëýfÞÊD,».ó?ä%ËJ\ݾ??ItzIru/7Óºô;'¤ÿ?ÛË'é?Ì[?×?í}öw¶{·lõ¿bóuÞE?w̶پ.¾Ëc?#??I??lInÙ
-lo^çy¼íåÛ@YÍr=aýÎU˺µ³9ËY¼ÖlzqozZêðYyí´Ì??z??ùóò·óÒ?×åñ]ißwd=}çÔ?ÿCÉr6Ís15ÛÀ??íÀuàuüyê÷?'íù?ú?J>²åº#¹+¹??¾ÿ?=¿c¯Tþ?$3©iÿº?²òO¥þv1cïü[Ý·½?íWG?Wòýï?¶½®[?ítVÚÔµ?1?äñ¦D§«ò¼'óõñ¹¬¯ýùTÛ?¦Üöò¤âqD>´væù?ä??D?c?ÉbjÆ={¾c?uÞCöüZ`´e·Èàë/IÖ$Ë?K??lþ+?×%oJ?´ùOÛTß¿.yÿÁk³é¨lëoH_{AoJV%ò|Uæ¯ÉãËòøDö9Çlª?·ùG³éÉ?äªä?Äÿö{¿æ9ÉÜÞãÙô¼?3'e<+??¶þF§úü?=~Ú>G?ã?¢Ì?evLtÿ3~4ÝÌZ{Ûµir=ó©Ùvm:?ò½¾~?²6ð?=ßÈÖý?lýÿAò²ä÷?·mÞo%/JÞ°vsFúûW$oJ~gí^_˦OYð¿]²2ÎÛkú«öÞµ÷ýBò|ö=µ|ÝNþ?}O}ÿ/MM_wߦGÃêz¸þ?ã?
\ No newline at end of file
-IOú?%ɦ,ÿ?×?ÖÙe[.?êöÿ?=~Âê@?eÓê`Ù?¿þæZê{Õ:þ:XýyßöCÚ߯e;#ÛøS{}àIé7gåõfûoÚ?NgÓÙCM¿ªmì¹Û´7?÷?ýs6ÿ?¯÷?#ßׯµµÉõ§ûºÞÞúß?iO²$iÚ_Oê¯Ðþ¼?õ}ð§?m{>cù8õ÷½?ãÐdåkYÞïz_¸cß#ª\wøxcSòE<þÚ
*{>Ëbà2ÇÇ/kRßW÷ÆóÒ?äØË¾Ï?ä?DÇïí?áþ^ç?ýý±,û¯ßï¶ÅÛB?zñcßû©\Üþ^ß?õû?u«ß?mXÝèû.Úß½?ÍÓ1àÖ@?5ÛØë2îyW¦OÚövîÁºjú?ò3uÞ¥CýmýÝCý¾÷²=î}¯uâÉÇbµÖ?fx¼Ý]¢û=??ýßv?;þÖx;÷s~~èÏÏÁ=¨_íw·Òøã?¨ò}ÿ7ªîõµÈö???cu=?%úØW?ùQ+W÷?·RÿøÇóX`ù»VÏ?ï*/ÿB¶®½ü?"?åïë?o?y¢Û_¾¬5?¿,ïz±þµ>"Ï{ÚúùZ}ÿ¤ò£÷½m}_>.?©T×G<?NÛò?õUvÞþ½Ì¼-Ôh?ùwøO=ÚN±Û¿_wÎÏýî¦á>(ª|íïóó |