WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Merge.

# HG changeset patch
# User adsharma@xxxxxxxxxxxxxxxxxxxx
# Node ID cc5f88b719d038555e62460bbdf9d38e13b953ac
# Parent  cd984b3478f6403ad37ae2b72a246169de337b5c
# Parent  522bc50588eda1c0bba0562a16fe8edd1a715f09
Merge.

diff -r cd984b3478f6 -r cc5f88b719d0 .hgignore
--- a/.hgignore Mon Aug 22 18:37:48 2005
+++ b/.hgignore Tue Aug 23 19:03:21 2005
@@ -147,6 +147,7 @@
 ^tools/xcs/xcsdump$
 ^tools/xcutils/xc_restore$
 ^tools/xcutils/xc_save$
+^tools/xenstat/xentop/xentop$
 ^tools/xenstore/testsuite/tmp/.*$
 ^tools/xenstore/xen$
 ^tools/xenstore/xenstored$
diff -r cd984b3478f6 -r cc5f88b719d0 Config.mk
--- a/Config.mk Mon Aug 22 18:37:48 2005
+++ b/Config.mk Tue Aug 23 19:03:21 2005
@@ -14,6 +14,7 @@
 CC         = $(CROSS_COMPILE)gcc
 CPP        = $(CROSS_COMPILE)gcc -E
 AR         = $(CROSS_COMPILE)ar
+RANLIB     = $(CROSS_COMPILE)ranlib
 NM         = $(CROSS_COMPILE)nm
 STRIP      = $(CROSS_COMPILE)strip
 OBJCOPY    = $(CROSS_COMPILE)objcopy
@@ -43,3 +44,7 @@
 #      ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY
 #      ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY
 ACM_USE_SECURITY_POLICY ?= ACM_NULL_POLICY
+
+# Optional components
+XENSTAT_XENTOP ?= y
+
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64       Mon Aug 
22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64       Tue Aug 
23 19:03:21 2005
@@ -807,7 +807,107 @@
 #
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
-# CONFIG_USB is not set
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+# CONFIG_USB_DEVICEFS is not set
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_EHCI_HCD is not set
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_UHCI_HCD=y
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; 
see USB_STORAGE Help for more information
+#
+# CONFIG_USB_STORAGE is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+CONFIG_USB_HIDINPUT=y
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+
+#
+# USB ATM/DSL drivers
+#
 
 #
 # USB Gadget Support
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c       Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c       Tue Aug 23 
19:03:21 2005
@@ -784,7 +784,9 @@
 
 void __init find_smp_config (void)
 {
+#ifndef CONFIG_XEN
        unsigned int address;
+#endif
 
        /*
         * FIXME: Linux assumes you have 640K of base ram..
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Tue Aug 23 
19:03:21 2005
@@ -149,12 +149,12 @@
 
                        if (cpu_is_offline(cpu)) {
                                local_irq_disable();
+#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
                                /* Ack it.  From this point on until
                                   we get woken up, we're not allowed
                                   to take any locks.  In particular,
                                   don't printk. */
                                __get_cpu_var(cpu_state) = CPU_DEAD;
-#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
                                /* Tell hypervisor to take vcpu down. */
                                HYPERVISOR_vcpu_down(cpu);
 #endif
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug 23 19:03:21 2005
@@ -1575,19 +1575,20 @@
        /* Make sure we have a correctly sized P->M table. */
        if (max_pfn != xen_start_info.nr_pages) {
                phys_to_machine_mapping = alloc_bootmem_low_pages(
-                       max_pfn * sizeof(unsigned long));
+                       max_pfn * sizeof(unsigned int));
 
                if (max_pfn > xen_start_info.nr_pages) {
                        /* set to INVALID_P2M_ENTRY */
                        memset(phys_to_machine_mapping, ~0,
-                               max_pfn * sizeof(unsigned long));
+                               max_pfn * sizeof(unsigned int));
                        memcpy(phys_to_machine_mapping,
-                               (unsigned long *)xen_start_info.mfn_list,
-                               xen_start_info.nr_pages * sizeof(unsigned 
long));
+                               (unsigned int *)xen_start_info.mfn_list,
+                               xen_start_info.nr_pages * sizeof(unsigned int));
                } else {
                        memcpy(phys_to_machine_mapping,
-                               (unsigned long *)xen_start_info.mfn_list,
-                               max_pfn * sizeof(unsigned long));
+                               (unsigned int *)xen_start_info.mfn_list,
+                               max_pfn * sizeof(unsigned int));
+                       /* N.B. below relies on sizeof(int) == sizeof(long). */
                        if (HYPERVISOR_dom_mem_op(
                                MEMOP_decrease_reservation,
                                (unsigned long *)xen_start_info.mfn_list + 
max_pfn,
@@ -1597,11 +1598,11 @@
                free_bootmem(
                        __pa(xen_start_info.mfn_list), 
                        PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
-                       sizeof(unsigned long))));
+                       sizeof(unsigned int))));
        }
 
        pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
-       for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
+       for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned int)), j++ )
        {       
             pfn_to_mfn_frame_list[j] = 
                  virt_to_mfn(&phys_to_machine_mapping[i]);
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Tue Aug 23 
19:03:21 2005
@@ -1604,6 +1604,9 @@
        unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
 }
 
+extern void local_setup_timer_irq(void);
+extern void local_teardown_timer_irq(void);
+
 void smp_suspend(void)
 {
        /* XXX todo: take down time and ipi's on all cpus */
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c     Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c     Tue Aug 23 19:03:21 2005
@@ -281,7 +281,7 @@
        siginfo_t info;
 
        /* Set the "privileged fault" bit to something sane. */
-       error_code &= 3;
+       error_code &= ~4;
        error_code |= (regs->xcs & 2) << 1;
        if (regs->eflags & X86_EFLAGS_VM)
                error_code |= 4;
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Tue Aug 23 19:03:21 2005
@@ -348,9 +348,12 @@
 {
        unsigned long vaddr;
        pgd_t *pgd_base = (pgd_t *)xen_start_info.pt_base;
+       int i;
 
        swapper_pg_dir = pgd_base;
        init_mm.pgd    = pgd_base;
+       for (i = 0; i < NR_CPUS; i++)
+               per_cpu(cur_pgd, i) = pgd_base;
 
        /* Enable PSE if available */
        if (cpu_has_pse) {
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Tue Aug 23 19:03:21 2005
@@ -36,6 +36,8 @@
 {
 }
 
+#ifdef __i386__
+
 void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
 {
        return NULL;
@@ -44,6 +46,8 @@
 void __init bt_iounmap(void *addr, unsigned long size)
 {
 }
+
+#endif /* __i386__ */
 
 #else
 
@@ -58,7 +62,7 @@
        extern unsigned long max_low_pfn;
        unsigned long mfn = address >> PAGE_SHIFT;
        unsigned long pfn = mfn_to_pfn(mfn);
-       return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
+       return ((pfn < max_low_pfn) && (phys_to_machine_mapping[pfn] == mfn));
 }
 
 /*
@@ -126,10 +130,12 @@
                return NULL;
        area->phys_addr = phys_addr;
        addr = (void __iomem *) area->addr;
+       flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+#ifdef __x86_64__
+       flags |= _PAGE_USER;
+#endif
        if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr,
-                                   size, __pgprot(_PAGE_PRESENT | _PAGE_RW |
-                                                  _PAGE_DIRTY | _PAGE_ACCESSED
-                                                  | flags), domid)) {
+                                   size, __pgprot(flags), domid)) {
                vunmap((void __force *) addr);
                return NULL;
        }
@@ -218,6 +224,8 @@
        kfree(p); 
 }
 
+#ifdef __i386__
+
 void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
 {
        unsigned long offset, last_addr;
@@ -288,6 +296,8 @@
                --nrpages;
        }
 }
+
+#endif /* __i386__ */
 
 #endif /* CONFIG_XEN_PHYSDEV_ACCESS */
 
@@ -346,7 +356,7 @@
                 * Fill in the machine address: PTE ptr is done later by
                 * __direct_remap_area_pages(). 
                 */
-               v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
+               v->val = pte_val_ma(pfn_pte_ma(machine_addr >> PAGE_SHIFT, 
prot));
 
                machine_addr += PAGE_SIZE;
                address += PAGE_SIZE; 
@@ -368,35 +378,37 @@
 
 EXPORT_SYMBOL(direct_remap_area_pages);
 
+static int lookup_pte_fn(
+       pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+       unsigned long *ptep = (unsigned long *)data;
+       if (ptep)
+               *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) <<
+                        PAGE_SHIFT) |
+                       ((unsigned long)pte & ~PAGE_MASK);
+       return 0;
+}
+
 int create_lookup_pte_addr(struct mm_struct *mm, 
                           unsigned long address,
                           unsigned long *ptep)
 {
-       int f(pte_t *pte, struct page *pte_page, unsigned long addr,
-             void *data) {
-               unsigned long *ptep = (unsigned long *)data;
-               if (ptep)
-                       *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) <<
-                                PAGE_SHIFT) |
-                               ((unsigned long)pte & ~PAGE_MASK);
-               return 0;
-       }
-
-       return generic_page_range(mm, address, PAGE_SIZE, f, ptep);
+       return generic_page_range(mm, address, PAGE_SIZE, lookup_pte_fn, ptep);
 }
 
 EXPORT_SYMBOL(create_lookup_pte_addr);
+
+static int noop_fn(
+       pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+       return 0;
+}
 
 int touch_pte_range(struct mm_struct *mm,
                    unsigned long address,
                    unsigned long size)
 {
-       int f(pte_t *pte, struct page *pte_page, unsigned long addr,
-             void *data) {
-               return 0;
-       }
-
-       return generic_page_range(mm, address, size, f, NULL);
+       return generic_page_range(mm, address, size, noop_fn, NULL);
 } 
 
 EXPORT_SYMBOL(touch_pte_range);
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c     Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c     Tue Aug 23 19:03:21 2005
@@ -435,9 +435,11 @@
     unbind_evtchn_from_irq(evtchn);
 }
 
+#ifdef CONFIG_SMP
 static void do_nothing_function(void *ign)
 {
 }
+#endif
 
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
 static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Tue Aug 23 19:03:21 2005
@@ -40,38 +40,82 @@
 EXPORT_SYMBOL(gnttab_end_foreign_transfer);
 EXPORT_SYMBOL(gnttab_alloc_grant_references);
 EXPORT_SYMBOL(gnttab_free_grant_references);
+EXPORT_SYMBOL(gnttab_free_grant_reference);
 EXPORT_SYMBOL(gnttab_claim_grant_reference);
 EXPORT_SYMBOL(gnttab_release_grant_reference);
 EXPORT_SYMBOL(gnttab_grant_foreign_access_ref);
 EXPORT_SYMBOL(gnttab_grant_foreign_transfer_ref);
 
-static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES];
+#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
+#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1)
+
+static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
+static int gnttab_free_count = NR_GRANT_ENTRIES;
 static grant_ref_t gnttab_free_head;
+static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED;
 
 static grant_entry_t *shared;
 
-/*
- * Lock-free grant-entry allocator
- */
-
-static inline int
-get_free_entry(
-    void)
-{
-    grant_ref_t fh, nfh = gnttab_free_head;
-    do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; }
-    while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh,
-                                    gnttab_free_list[fh])) != fh) );
-    return fh;
+static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
+
+static int
+get_free_entries(int count)
+{
+    unsigned long flags;
+    int ref;
+    grant_ref_t head;
+    spin_lock_irqsave(&gnttab_list_lock, flags);
+    if (gnttab_free_count < count) {
+       spin_unlock_irqrestore(&gnttab_list_lock, flags);
+       return -1;
+    }
+    ref = head = gnttab_free_head;
+    gnttab_free_count -= count;
+    while (count-- > 1)
+       head = gnttab_list[head];
+    gnttab_free_head = gnttab_list[head];
+    gnttab_list[head] = GNTTAB_LIST_END;
+    spin_unlock_irqrestore(&gnttab_list_lock, flags);
+    return ref;
+}
+
+#define get_free_entry() get_free_entries(1)
+
+static void
+do_free_callbacks(void)
+{
+    struct gnttab_free_callback *callback = gnttab_free_callback_list, *next;
+    gnttab_free_callback_list = NULL;
+    while (callback) {
+       next = callback->next;
+       if (gnttab_free_count >= callback->count) {
+           callback->next = NULL;
+           callback->fn(callback->arg);
+       } else {
+           callback->next = gnttab_free_callback_list;
+           gnttab_free_callback_list = callback;
+       }
+       callback = next;
+    }
 }
 
 static inline void
-put_free_entry(
-    grant_ref_t ref)
-{
-    grant_ref_t fh, nfh = gnttab_free_head;
-    do { gnttab_free_list[ref] = fh = nfh; wmb(); }
-    while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) );
+check_free_callbacks(void)
+{
+    if (unlikely(gnttab_free_callback_list))
+       do_free_callbacks();
+}
+
+static void
+put_free_entry(grant_ref_t ref)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&gnttab_list_lock, flags);
+    gnttab_list[ref] = gnttab_free_head;
+    gnttab_free_head = ref;
+    gnttab_free_count++;
+    check_free_callbacks();
+    spin_unlock_irqrestore(&gnttab_list_lock, flags);
 }
 
 /*
@@ -79,8 +123,7 @@
  */
 
 int
-gnttab_grant_foreign_access(
-    domid_t domid, unsigned long frame, int readonly)
+gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly)
 {
     int ref;
     
@@ -96,8 +139,8 @@
 }
 
 void
-gnttab_grant_foreign_access_ref(
-    grant_ref_t ref, domid_t domid, unsigned long frame, int readonly)
+gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+                               unsigned long frame, int readonly)
 {
     shared[ref].frame = frame;
     shared[ref].domid = domid;
@@ -107,7 +150,7 @@
 
 
 int
-gnttab_query_foreign_access( grant_ref_t ref )
+gnttab_query_foreign_access(grant_ref_t ref)
 {
     u16 nflags;
 
@@ -117,7 +160,7 @@
 }
 
 void
-gnttab_end_foreign_access( grant_ref_t ref, int readonly )
+gnttab_end_foreign_access(grant_ref_t ref, int readonly)
 {
     u16 flags, nflags;
 
@@ -132,8 +175,7 @@
 }
 
 int
-gnttab_grant_foreign_transfer(
-    domid_t domid, unsigned long pfn )
+gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
 {
     int ref;
 
@@ -149,8 +191,8 @@
 }
 
 void
-gnttab_grant_foreign_transfer_ref(
-    grant_ref_t ref, domid_t domid, unsigned long pfn )
+gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
+                                 unsigned long pfn)
 {
     shared[ref].frame = pfn;
     shared[ref].domid = domid;
@@ -159,8 +201,7 @@
 }
 
 unsigned long
-gnttab_end_foreign_transfer(
-    grant_ref_t ref)
+gnttab_end_foreign_transfer(grant_ref_t ref)
 {
     unsigned long frame = 0;
     u16           flags;
@@ -189,59 +230,79 @@
 }
 
 void
-gnttab_free_grant_references( u16 count, grant_ref_t head )
-{
-    /* TODO: O(N)...? */
-    grant_ref_t to_die = 0, next = head;
-    int i;
-
-    for ( i = 0; i < count; i++ )
-    {
-        to_die = next;
-        next = gnttab_free_list[next];
-        put_free_entry( to_die );
+gnttab_free_grant_reference(grant_ref_t ref)
+{
+
+    put_free_entry(ref);
+}
+
+void
+gnttab_free_grant_references(grant_ref_t head)
+{
+    grant_ref_t ref;
+    unsigned long flags;
+    int count = 1;
+    if (head == GNTTAB_LIST_END)
+       return;
+    spin_lock_irqsave(&gnttab_list_lock, flags);
+    ref = head;
+    while (gnttab_list[ref] != GNTTAB_LIST_END) {
+       ref = gnttab_list[ref];
+       count++;
     }
-}
-
-int
-gnttab_alloc_grant_references( u16 count,
-                               grant_ref_t *head,
-                               grant_ref_t *terminal )
-{
-    int i;
-    grant_ref_t h = gnttab_free_head;
-
-    for ( i = 0; i < count; i++ )
-        if ( unlikely(get_free_entry() == -1) )
-            goto not_enough_refs;
+    gnttab_list[ref] = gnttab_free_head;
+    gnttab_free_head = head;
+    gnttab_free_count += count;
+    check_free_callbacks();
+    spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+
+int
+gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
+{
+    int h = get_free_entries(count);
+
+    if (h == -1)
+       return -ENOSPC;
 
     *head = h;
-    *terminal = gnttab_free_head;
 
     return 0;
-
-not_enough_refs:
-    gnttab_free_head = h;
-    return -ENOSPC;
-}
-
-int
-gnttab_claim_grant_reference( grant_ref_t *private_head,
-                              grant_ref_t  terminal )
-{
-    grant_ref_t g;
-    if ( unlikely((g = *private_head) == terminal) )
+}
+
+int
+gnttab_claim_grant_reference(grant_ref_t *private_head)
+{
+    grant_ref_t g = *private_head;
+    if (unlikely(g == GNTTAB_LIST_END))
         return -ENOSPC;
-    *private_head = gnttab_free_list[g];
+    *private_head = gnttab_list[g];
     return g;
 }
 
 void
-gnttab_release_grant_reference( grant_ref_t *private_head,
-                                grant_ref_t  release )
-{
-    gnttab_free_list[release] = *private_head;
+gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
+{
+    gnttab_list[release] = *private_head;
     *private_head = release;
+}
+
+void
+gnttab_request_free_callback(struct gnttab_free_callback *callback,
+                            void (*fn)(void *), void *arg, u16 count)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&gnttab_list_lock, flags);
+    if (callback->next)
+       goto out;
+    callback->fn = fn;
+    callback->arg = arg;
+    callback->count = count;
+    callback->next = gnttab_free_callback_list;
+    gnttab_free_callback_list = callback;
+    check_free_callbacks();
+ out:
+    spin_unlock_irqrestore(&gnttab_list_lock, flags);
 }
 
 /*
@@ -252,8 +313,9 @@
 
 static struct proc_dir_entry *grant_pde;
 
-static int grant_ioctl(struct inode *inode, struct file *file,
-                       unsigned int cmd, unsigned long data)
+static int
+grant_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+           unsigned long data)
 {
     int                     ret;
     privcmd_hypercall_t     hypercall;
@@ -291,8 +353,9 @@
     ioctl:  grant_ioctl,
 };
 
-static int grant_read(char *page, char **start, off_t off,
-                      int count, int *eof, void *data)
+static int
+grant_read(char *page, char **start, off_t off, int count, int *eof,
+          void *data)
 {
     int             len;
     unsigned int    i;
@@ -321,8 +384,9 @@
     return len;
 }
 
-static int grant_write(struct file *file, const char __user *buffer,
-                       unsigned long count, void *data)
+static int
+grant_write(struct file *file, const char __user *buffer, unsigned long count,
+           void *data)
 {
     /* TODO: implement this */
     return -ENOSYS;
@@ -330,7 +394,8 @@
 
 #endif /* CONFIG_PROC_FS */
 
-int gnttab_resume(void)
+int
+gnttab_resume(void)
 {
     gnttab_setup_table_t setup;
     unsigned long        frames[NR_GRANT_FRAMES];
@@ -349,7 +414,8 @@
     return 0;
 }
 
-int gnttab_suspend(void)
+int
+gnttab_suspend(void)
 {
     int i;
 
@@ -359,7 +425,8 @@
     return 0;
 }
 
-static int __init gnttab_init(void)
+static int __init
+gnttab_init(void)
 {
     int i;
 
@@ -368,7 +435,7 @@
     shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END);
 
     for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
-        gnttab_free_list[i] = i + 1;
+        gnttab_list[i] = i + 1;
     
 #ifdef CONFIG_PROC_FS
     /*
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile      Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile      Tue Aug 23 
19:03:21 2005
@@ -44,7 +44,7 @@
 
 c-obj-$(CONFIG_MODULES)                += module.o
 
-#obj-y                         += topology.o
+obj-y                          += topology.o
 c-obj-y                                += intel_cacheinfo.o
 
 bootflag-y                     += ../../../i386/kernel/bootflag.o
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Tue Aug 23 
19:03:21 2005
@@ -778,21 +778,21 @@
                /* Make sure we have a large enough P->M table. */
                if (end_pfn > xen_start_info.nr_pages) {
                        phys_to_machine_mapping = alloc_bootmem(
-                               max_pfn * sizeof(unsigned long));
+                               max_pfn * sizeof(u32));
                        memset(phys_to_machine_mapping, ~0,
-                              max_pfn * sizeof(unsigned long));
+                              max_pfn * sizeof(u32));
                        memcpy(phys_to_machine_mapping,
-                              (unsigned long *)xen_start_info.mfn_list,
-                              xen_start_info.nr_pages * sizeof(unsigned long));
+                              (u32 *)xen_start_info.mfn_list,
+                              xen_start_info.nr_pages * sizeof(u32));
                        free_bootmem(
                                __pa(xen_start_info.mfn_list), 
                                PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
-                                               sizeof(unsigned long))));
+                                               sizeof(u32))));
                }
 
                pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE);
 
-               for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(unsigned 
long)), j++ )
+               for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(u32)), j++ )
                {       
                        pfn_to_mfn_frame_list[j] = 
                                virt_to_mfn(&phys_to_machine_mapping[i]);
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile  Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile  Tue Aug 23 19:03:21 2005
@@ -6,10 +6,10 @@
 
 CFLAGS += -Iarch/$(XENARCH)/mm
 
-obj-y  := init.o fault.o ioremap.o pageattr.o
+obj-y  := init.o fault.o pageattr.o
 c-obj-y        := extable.o
 
-i386-obj-y := hypervisor.o
+i386-obj-y := hypervisor.o ioremap.o
 
 #obj-y  := init.o fault.o ioremap.o extable.o pageattr.o
 #c-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Tue Aug 23 19:03:21 2005
@@ -559,6 +559,11 @@
 
 void __init xen_init_pt(void)
 {
+       int i;
+
+       for (i = 0; i < NR_CPUS; i++)
+               per_cpu(cur_pgd, i) = init_mm.pgd;
+
        memcpy((void *)init_level4_pgt, 
               (void *)xen_start_info.pt_base, PAGE_SIZE);
 
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Tue Aug 23 
19:03:21 2005
@@ -434,20 +434,20 @@
        balloon_unlock(flags);
 }
 
+static int dealloc_pte_fn(
+       pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+       unsigned long mfn = pte_mfn(*pte);
+       set_pte(pte, __pte_ma(0));
+       phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] =
+               INVALID_P2M_ENTRY;
+       BUG_ON(HYPERVISOR_dom_mem_op(
+               MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+       return 0;
+}
+
 struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
 {
-       int f(pte_t *pte, struct page *pte_page,
-             unsigned long addr, void *data)
-       {
-               unsigned long mfn = pte_mfn(*pte);
-               set_pte(pte, __pte_ma(0));
-               phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] =
-                       INVALID_P2M_ENTRY;
-               BUG_ON(HYPERVISOR_dom_mem_op(
-                       MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
-               return 0;
-        }
-
        unsigned long vstart, flags;
        unsigned int  order = get_order(nr_pages * PAGE_SIZE);
 
@@ -459,7 +459,7 @@
 
        balloon_lock(flags);
        BUG_ON(generic_page_range(
-               &init_mm, vstart, PAGE_SIZE << order, f, NULL) != 0);
+               &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL));
        current_pages -= 1UL << order;
        balloon_unlock(flags);
 
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Tue Aug 23 
19:03:21 2005
@@ -11,6 +11,8 @@
  * Copyright (c) 2005, Christopher Clark
  */
 
+#include <linux/spinlock.h>
+#include <asm-xen/balloon.h>
 #include "common.h"
 
 /*
@@ -63,9 +65,6 @@
 static PEND_RING_IDX pending_prod, pending_cons;
 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-static kmem_cache_t *buffer_head_cachep;
-#else
 static request_queue_t *plugged_queue;
 static inline void flush_plugged_queue(void)
 {
@@ -78,7 +77,6 @@
         plugged_queue = NULL;
     }
 }
-#endif
 
 /* When using grant tables to map a frame for device access then the
  * handle returned must be used to unmap the frame. This is needed to
@@ -182,11 +180,7 @@
     blkif_t          *blkif;
     struct list_head *ent;
 
-    daemonize(
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-        "xenblkd"
-#endif
-        );
+    daemonize("xenblkd");
 
     for ( ; ; )
     {
@@ -213,11 +207,7 @@
         }
 
         /* Push the batch through to disc. */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-        run_task_queue(&tq_disk);
-#else
         flush_plugged_queue();
-#endif
     }
 }
 
@@ -266,13 +256,6 @@
     }
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-static void end_block_io_op(struct buffer_head *bh, int uptodate)
-{
-    __end_block_io_op(bh->b_private, uptodate);
-    kmem_cache_free(buffer_head_cachep, bh);
-}
-#else
 static int end_block_io_op(struct bio *bio, unsigned int done, int error)
 {
     if ( bio->bi_size != 0 )
@@ -281,7 +264,6 @@
     bio_put(bio);
     return error;
 }
-#endif
 
 
 /******************************************************************************
@@ -355,13 +337,9 @@
         unsigned long buf; unsigned int nsec;
     } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
     unsigned int nseg;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-    struct buffer_head *bh;
-#else
     struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
     int nbio = 0;
     request_queue_t *q;
-#endif
 
     /* Check that number of segments is sane. */
     nseg = req->nr_segments;
@@ -432,49 +410,6 @@
     pending_req->operation = operation;
     pending_req->status    = BLKIF_RSP_OKAY;
     pending_req->nr_pages  = nseg;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-
-    atomic_set(&pending_req->pendcnt, nseg);
-    pending_cons++;
-    blkif_get(blkif);
-
-    for ( i = 0; i < nseg; i++ )
-    {
-        bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
-        if ( unlikely(bh == NULL) )
-        {
-            __end_block_io_op(pending_req, 0);
-            continue;
-        }
-
-        memset(bh, 0, sizeof (struct buffer_head));
-
-        init_waitqueue_head(&bh->b_wait);
-        bh->b_size          = seg[i].nsec << 9;
-        bh->b_dev           = preq.dev;
-        bh->b_rdev          = preq.dev;
-        bh->b_rsector       = (unsigned long)preq.sector_number;
-        bh->b_data          = (char *)MMAP_VADDR(pending_idx, i) +
-            (seg[i].buf & ~PAGE_MASK);
-        bh->b_page          = virt_to_page(MMAP_VADDR(pending_idx, i));
-        bh->b_end_io        = end_block_io_op;
-        bh->b_private       = pending_req;
-
-        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | 
-            (1 << BH_Req) | (1 << BH_Launder);
-        if ( operation == WRITE )
-            bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
-
-        atomic_set(&bh->b_count, 1);
-
-        /* Dispatch a single request. We'll flush it to disc later. */
-        generic_make_request(operation, bh);
-
-        preq.sector_number += seg[i].nsec;
-    }
-
-#else
 
     for ( i = 0; i < nseg; i++ )
     {
@@ -524,8 +459,6 @@
     for ( i = 0; i < nbio; i++ )
         submit_bio(operation, biolist[i]);
 
-#endif
-
     return;
 
  bad_descriptor:
@@ -593,12 +526,6 @@
     if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
         BUG();
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-    buffer_head_cachep = kmem_cache_create(
-        "buffer_head_cache", sizeof(struct buffer_head),
-        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-#endif
-
     blkif_xenbus_init();
 
     memset( pending_grant_handles,  BLKBACK_INVALID_HANDLE, MMAP_PAGES );
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Tue Aug 23 19:03:21 2005
@@ -5,7 +5,6 @@
 #include <linux/config.h>
 #include <linux/version.h>
 #include <linux/module.h>
-#include <linux/rbtree.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
@@ -30,12 +29,13 @@
 #define DPRINTK(_f, _a...) ((void)0)
 #endif
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-typedef struct rb_root rb_root_t;
-typedef struct rb_node rb_node_t;
-#else
-struct block_device;
-#endif
+struct vbd {
+    blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
+    unsigned char  readonly;    /* Non-zero -> read-only */
+    unsigned char  type;        /* VDISK_xxx */
+    blkif_pdev_t   pdevice;     /* phys device that this vbd maps to */
+    struct block_device *bdev;
+}; 
 
 typedef struct blkif_st {
     /* Unique identifier for this interface. */
@@ -48,25 +48,18 @@
     /* Comms information. */
     blkif_back_ring_t blk_ring;
     /* VBDs attached to this interface. */
-    rb_root_t         vbd_rb;        /* Mapping from 16-bit vdevices to VBDs.*/
-    spinlock_t        vbd_lock;      /* Protects VBD mapping. */
+    struct vbd        vbd;
     /* Private fields. */
     enum { DISCONNECTED, CONNECTED } status;
-    /*
-     * DISCONNECT response is deferred until pending requests are ack'ed.
-     * We therefore need to store the id from the original request.
-     */
-    u8               disconnect_rspid;
 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
     /* Is this a blktap frontend */
     unsigned int     is_blktap;
 #endif
-    struct blkif_st *hash_next;
     struct list_head blkdev_list;
     spinlock_t       blk_ring_lock;
     atomic_t         refcnt;
 
-    struct work_struct work;
+    struct work_struct free_work;
     u16 shmem_handle;
     unsigned long shmem_vaddr;
     grant_ref_t shmem_ref;
@@ -77,30 +70,25 @@
 void blkif_connect(blkif_be_connect_t *connect);
 int  blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
 void blkif_disconnect_complete(blkif_t *blkif);
-blkif_t *blkif_find(domid_t domid);
-void free_blkif(blkif_t *blkif);
+blkif_t *alloc_blkif(domid_t domid);
+void free_blkif_callback(blkif_t *blkif);
 int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
 
 #define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
 #define blkif_put(_b)                             \
     do {                                          \
         if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            free_blkif(_b);                      \
+            free_blkif_callback(_b);             \
     } while (0)
 
-struct vbd;
-void vbd_free(blkif_t *blkif, struct vbd *vbd);
-
-/* Creates inactive vbd. */
-struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t 
pdevice, int readonly);
-int vbd_is_active(struct vbd *vbd);
-void vbd_activate(blkif_t *blkif, struct vbd *vbd);
+/* Create a vbd. */
+int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice,
+              int readonly);
+void vbd_free(struct vbd *vbd);
 
 unsigned long vbd_size(struct vbd *vbd);
 unsigned int vbd_info(struct vbd *vbd);
 unsigned long vbd_secsize(struct vbd *vbd);
-void vbd_destroy(blkif_be_vbd_destroy_t *delete); 
-void destroy_all_vbds(blkif_t *blkif);
 
 struct phys_req {
     unsigned short       dev;
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Tue Aug 23 
19:03:21 2005
@@ -9,27 +9,11 @@
 #include "common.h"
 #include <asm-xen/evtchn.h>
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#endif
+static kmem_cache_t *blkif_cachep;
 
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d) (((int)(_d))&(BLKIF_HASHSZ-1))
-
-static kmem_cache_t *blkif_cachep;
-static blkif_t      *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find(domid_t domid)
+blkif_t *alloc_blkif(domid_t domid)
 {
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid)];
-
-    while (blkif) {
-       if (blkif->domid == domid) {
-           blkif_get(blkif);
-           return blkif;
-       }
-        blkif = blkif->hash_next;
-    }
+    blkif_t *blkif;
 
     blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
     if (!blkif)
@@ -38,12 +22,9 @@
     memset(blkif, 0, sizeof(*blkif));
     blkif->domid = domid;
     blkif->status = DISCONNECTED;
-    spin_lock_init(&blkif->vbd_lock);
     spin_lock_init(&blkif->blk_ring_lock);
     atomic_set(&blkif->refcnt, 1);
 
-    blkif->hash_next = blkif_hash[BLKIF_HASH(domid)];
-    blkif_hash[BLKIF_HASH(domid)] = blkif;
     return blkif;
 }
 
@@ -55,7 +36,7 @@
     op.flags = GNTMAP_host_map;
     op.ref = shared_page;
     op.dom = blkif->domid;
-       
+
     BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
 
     if (op.handle < 0) {
@@ -91,7 +72,7 @@
     if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
        return -ENOMEM;
 
-    err = map_frontend_page(blkif, VMALLOC_VMADDR(vma->addr), shared_page);
+    err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page);
     if (err) {
         vfree(vma->addr);
        return err;
@@ -123,10 +104,10 @@
     return 0;
 }
 
-void free_blkif(blkif_t *blkif)
+static void free_blkif(void *arg)
 {
-    blkif_t     **pblkif;
     evtchn_op_t op = { .cmd = EVTCHNOP_close };
+    blkif_t *blkif = (blkif_t *)arg;
 
     op.u.close.port = blkif->evtchn;
     op.u.close.dom = DOMID_SELF;
@@ -134,6 +115,8 @@
     op.u.close.port = blkif->remote_evtchn;
     op.u.close.dom = blkif->domid;
     HYPERVISOR_event_channel_op(&op);
+
+    vbd_free(&blkif->vbd);
 
     if (blkif->evtchn)
         unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
@@ -143,20 +126,17 @@
        vfree(blkif->blk_ring.sring);
     }
 
-    pblkif = &blkif_hash[BLKIF_HASH(blkif->domid)];
-    while ( *pblkif != blkif )
-    {
-       BUG_ON(!*pblkif);
-        pblkif = &(*pblkif)->hash_next;
-    }
-    *pblkif = blkif->hash_next;
-    destroy_all_vbds(blkif);
     kmem_cache_free(blkif_cachep, blkif);
+}
+
+void free_blkif_callback(blkif_t *blkif)
+{
+    INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
+    schedule_work(&blkif->free_work);
 }
 
 void __init blkif_interface_init(void)
 {
     blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
                                      0, 0, NULL, NULL);
-    memset(blkif_hash, 0, sizeof(blkif_hash));
 }
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c    Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c    Tue Aug 23 19:03:21 2005
@@ -2,10 +2,6 @@
  * blkback/vbd.c
  * 
  * Routines for managing virtual block devices (VBDs).
- * 
- * NOTE: vbd_lock protects updates to the rb_tree against concurrent lookups 
- * in vbd_translate.  All other lookups are implicitly protected because the 
- * only caller (the control message dispatch routine) serializes the calls.
  * 
  * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
  */
@@ -13,28 +9,13 @@
 #include "common.h"
 #include <asm-xen/xenbus.h>
 
-struct vbd { 
-    blkif_vdev_t   handle;     /* what the domain refers to this vbd as */
-    unsigned char  readonly;    /* Non-zero -> read-only */
-    unsigned char  type;        /* VDISK_xxx */
-    blkif_pdev_t   pdevice;     /* phys device that this vbd maps to */
-    struct block_device *bdev;
-
-    int active;
-    rb_node_t      rb;          /* for linking into R-B tree lookup struct */
-}; 
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 static inline dev_t vbd_map_devnum(blkif_pdev_t cookie)
-{ return MKDEV(cookie>>8, cookie&0xff); }
+{
+    return MKDEV(BLKIF_MAJOR(cookie), BLKIF_MINOR(cookie));
+}
 #define vbd_sz(_v)   ((_v)->bdev->bd_part ? \
     (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
 #define bdev_put(_b) blkdev_put(_b)
-#else
-#define vbd_sz(_v)   (blk_size[MAJOR((_v)->pdevice)][MINOR((_v)->pdevice)]*2)
-#define bdev_put(_b) ((void)0)
-#define bdev_hardsect_size(_b) 512
-#endif
 
 unsigned long vbd_size(struct vbd *vbd)
 {
@@ -51,45 +32,32 @@
        return bdev_hardsect_size(vbd->bdev);
 }
 
-int vbd_is_active(struct vbd *vbd)
+int vbd_create(blkif_t *blkif, blkif_vdev_t handle,
+              blkif_pdev_t pdevice, int readonly)
 {
-       return vbd->active;
-}
+    struct vbd *vbd;
 
-struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t handle,
-                      blkif_pdev_t pdevice, int readonly)
-{
-    struct vbd  *vbd; 
-
-    if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) )
-    {
-        DPRINTK("vbd_create: out of memory\n");
-       return ERR_PTR(-ENOMEM);
-    }
-
+    vbd = &blkif->vbd;
     vbd->handle   = handle; 
     vbd->readonly = readonly;
     vbd->type     = 0;
-    vbd->active   = 0;
 
     vbd->pdevice  = pdevice;
 
-    /* FIXME: Who frees vbd on failure? --RR */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
     vbd->bdev = open_by_devnum(
         vbd_map_devnum(vbd->pdevice),
         vbd->readonly ? FMODE_READ : FMODE_WRITE);
     if ( IS_ERR(vbd->bdev) )
     {
         DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-        return ERR_PTR(-ENOENT);
+        return -ENOENT;
     }
 
     if ( (vbd->bdev->bd_disk == NULL) )
     {
         DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-        bdev_put(vbd->bdev);
-        return ERR_PTR(-ENOENT);
+       vbd_free(vbd);
+        return -ENOENT;
     }
 
     if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
@@ -97,121 +65,27 @@
     if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
         vbd->type |= VDISK_REMOVABLE;
 
-#else
-    if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) )
-    {
-        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-        return ERR_PTR(-ENOENT);
-    }
-#endif
-
     DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
             handle, blkif->domid);
-    return vbd;
+    return 0;
 }
 
-void vbd_activate(blkif_t *blkif, struct vbd *vbd)
+void vbd_free(struct vbd *vbd)
 {
-    rb_node_t  **rb_p, *rb_parent = NULL;
-    struct vbd *i;
-    BUG_ON(vbd_is_active(vbd));
-
-    /* Find where to put it. */
-    rb_p = &blkif->vbd_rb.rb_node;
-    while ( *rb_p != NULL )
-    {
-        rb_parent = *rb_p;
-        i = rb_entry(rb_parent, struct vbd, rb);
-        if ( vbd->handle < i->handle )
-        {
-            rb_p = &rb_parent->rb_left;
-        }
-        else if ( vbd->handle > i->handle )
-        {
-            rb_p = &rb_parent->rb_right;
-        }
-        else
-        {
-           /* We never create two of same vbd, so not possible. */
-           BUG();
-        }
-    }
-
-    /* Now we're active. */
-    vbd->active = 1;
-    blkif_get(blkif);
-
-    spin_lock(&blkif->vbd_lock);
-    rb_link_node(&vbd->rb, rb_parent, rb_p);
-    rb_insert_color(&vbd->rb, &blkif->vbd_rb);
-    spin_unlock(&blkif->vbd_lock);
-}
-
-void vbd_free(blkif_t *blkif, struct vbd *vbd)
-{
-    if (vbd_is_active(vbd)) {
-       spin_lock(&blkif->vbd_lock);
-       rb_erase(&vbd->rb, &blkif->vbd_rb);
-       spin_unlock(&blkif->vbd_lock);
-       blkif_put(blkif);
-    }
-    bdev_put(vbd->bdev);
-    kfree(vbd);
-}
-
-void destroy_all_vbds(blkif_t *blkif)
-{
-    struct vbd *vbd;
-    rb_node_t  *rb;
-
-    spin_lock(&blkif->vbd_lock);
-
-    while ( (rb = blkif->vbd_rb.rb_node) != NULL )
-    {
-        vbd = rb_entry(rb, struct vbd, rb);
-        rb_erase(rb, &blkif->vbd_rb);
-        spin_unlock(&blkif->vbd_lock);
-        bdev_put(vbd->bdev);
-        kfree(vbd);
-        spin_lock(&blkif->vbd_lock);
-        blkif_put(blkif);
-    }
-
-    spin_unlock(&blkif->vbd_lock);
+    if (vbd->bdev)
+       bdev_put(vbd->bdev);
+    vbd->bdev = NULL;
 }
 
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
 {
-    struct vbd *vbd;
-    rb_node_t  *rb;
-    int         rc = -EACCES;
+    struct vbd *vbd = &blkif->vbd;
+    int rc = -EACCES;
 
-    /* Take the vbd_lock because another thread could be updating the tree. */
-    spin_lock(&blkif->vbd_lock);
-
-    rb = blkif->vbd_rb.rb_node;
-    while ( rb != NULL )
-    {
-        vbd = rb_entry(rb, struct vbd, rb);
-        if ( req->dev < vbd->handle )
-            rb = rb->rb_left;
-        else if ( req->dev > vbd->handle )
-            rb = rb->rb_right;
-        else
-            goto found;
-    }
-
-    DPRINTK("vbd_translate; domain %u attempted to access "
-            "non-existent VBD.\n", blkif->domid);
-    rc = -ENODEV;
-    goto out;
-
- found:
-
-    if ( (operation == WRITE) && vbd->readonly )
+    if ((operation == WRITE) && vbd->readonly)
         goto out;
 
-    if ( unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)) )
+    if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
         goto out;
 
     req->dev  = vbd->pdevice;
@@ -219,6 +93,5 @@
     rc = 0;
 
  out:
-    spin_unlock(&blkif->vbd_lock);
     return rc;
 }
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue Aug 23 19:03:21 2005
@@ -26,7 +26,6 @@
 
        /* our communications channel */
        blkif_t *blkif;
-       struct vbd *vbd;
 
        long int frontend_id;
        long int pdev;
@@ -47,8 +46,6 @@
        if (be->watch.node)
                unregister_xenbus_watch(&be->watch);
        unregister_xenbus_watch(&be->backend_watch);
-       if (be->vbd)
-               vbd_free(be->blkif, be->vbd);
        if (be->blkif)
                blkif_put(be->blkif);
        if (be->frontpath)
@@ -72,7 +69,7 @@
                device_unregister(&be->dev->dev);
                return;
        }
-       if (vbd_is_active(be->vbd))
+       if (be->blkif->status == CONNECTED)
                return;
 
        err = xenbus_gather(be->frontpath, "grant-id", "%lu", &sharedmfn,
@@ -85,9 +82,8 @@
        }
 
        /* Domains must use same shared frame for all vbds. */
-       if (be->blkif->status == CONNECTED &&
-           (evtchn != be->blkif->remote_evtchn ||
-            sharedmfn != be->blkif->shmem_frame)) {
+       if (evtchn != be->blkif->remote_evtchn ||
+           sharedmfn != be->blkif->shmem_frame) {
                xenbus_dev_error(be->dev, err,
                                 "Shared frame/evtchn %li/%u not same as"
                                 " old %li/%u",
@@ -105,7 +101,7 @@
        }
 
        err = xenbus_printf(be->dev->nodename, "sectors", "%lu",
-                           vbd_size(be->vbd));
+                           vbd_size(&be->blkif->vbd));
        if (err) {
                xenbus_dev_error(be->dev, err, "writing %s/sectors",
                                 be->dev->nodename);
@@ -114,33 +110,28 @@
 
        /* FIXME: use a typename instead */
        err = xenbus_printf(be->dev->nodename, "info", "%u",
-                           vbd_info(be->vbd));
+                           vbd_info(&be->blkif->vbd));
        if (err) {
                xenbus_dev_error(be->dev, err, "writing %s/info",
                                 be->dev->nodename);
                goto abort;
        }
        err = xenbus_printf(be->dev->nodename, "sector-size", "%lu",
-                           vbd_secsize(be->vbd));
+                           vbd_secsize(&be->blkif->vbd));
        if (err) {
                xenbus_dev_error(be->dev, err, "writing %s/sector-size",
                                 be->dev->nodename);
                goto abort;
        }
 
-       /* First vbd?  We need to map the shared frame, irq etc. */
-       if (be->blkif->status != CONNECTED) {
-               err = blkif_map(be->blkif, sharedmfn, evtchn);
-               if (err) {
-                       xenbus_dev_error(be->dev, err,
-                                        "mapping shared-frame %lu port %u",
-                                        sharedmfn, evtchn);
-                       goto abort;
-               }
-       }
-
-       /* We're ready, activate. */
-       vbd_activate(be->blkif, be->vbd);
+       /* Map the shared frame, irq etc. */
+       err = blkif_map(be->blkif, sharedmfn, evtchn);
+       if (err) {
+               xenbus_dev_error(be->dev, err,
+                                "mapping shared-frame %lu port %u",
+                                sharedmfn, evtchn);
+               goto abort;
+       }
 
        xenbus_transaction_end(0);
        xenbus_dev_ok(be->dev);
@@ -228,20 +219,16 @@
                p = strrchr(be->frontpath, '/') + 1;
                handle = simple_strtoul(p, NULL, 0);
 
-               be->blkif = blkif_find(be->frontend_id);
+               be->blkif = alloc_blkif(be->frontend_id);
                if (IS_ERR(be->blkif)) {
                        err = PTR_ERR(be->blkif);
                        be->blkif = NULL;
                        goto device_fail;
                }
 
-               be->vbd = vbd_create(be->blkif, handle, be->pdev,
-                                    be->readonly);
-               if (IS_ERR(be->vbd)) {
-                       err = PTR_ERR(be->vbd);
-                       be->vbd = NULL;
+               err = vbd_create(be->blkif, handle, be->pdev, be->readonly);
+               if (err)
                        goto device_fail;
-               }
 
                frontend_changed(&be->watch, be->frontpath);
        }
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Tue Aug 23 
19:03:21 2005
@@ -63,25 +63,16 @@
 /* Control whether runtime update of vbds is enabled. */
 #define ENABLE_VBD_UPDATE 1
 
-#define BLKIF_STATE_CLOSED       0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED    2
-
-static unsigned int blkif_state = BLKIF_STATE_CLOSED;
-static unsigned int blkif_evtchn = 0;
-static unsigned int blkif_vbds = 0;
-static unsigned int blkif_vbds_connected = 0;
-
-static blkif_front_ring_t blk_ring;
+#define BLKIF_STATE_DISCONNECTED 0
+#define BLKIF_STATE_CONNECTED    1
+
+static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED;
 
 #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
 
-static domid_t rdomid = 0;
-static grant_ref_t gref_head, gref_terminal;
 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
     (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
 #define GRANTREF_INVALID (1<<15)
-static int shmem_ref;
 
 static struct blk_shadow {
     blkif_request_t req;
@@ -92,7 +83,7 @@
 
 static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */
 
-static void kick_pending_request_queues(void);
+static void kick_pending_request_queues(struct blkfront_info *info);
 
 static int __init xlblk_init(void);
 
@@ -119,7 +110,7 @@
 
 /* Kernel-specific definitions used in the common code */
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define DISABLE_SCATTERGATHER() 
+#define DISABLE_SCATTERGATHER()
 #else
 static int sg_operation = -1;
 #define DISABLE_SCATTERGATHER() (sg_operation = -1)
@@ -138,11 +129,11 @@
 }
 
 
-static inline void flush_requests(void)
+static inline void flush_requests(struct blkfront_info *info)
 {
     DISABLE_SCATTERGATHER();
-    RING_PUSH_REQUESTS(&blk_ring);
-    notify_via_evtchn(blkif_evtchn);
+    RING_PUSH_REQUESTS(&info->ring);
+    notify_via_evtchn(info->evtchn);
 }
 
 
@@ -152,30 +143,39 @@
 
 module_init(xlblk_init);
 
-static struct xlbd_disk_info *head_waiting = NULL;
-static void kick_pending_request_queues(void)
-{
-    struct xlbd_disk_info *di;
-    while ( ((di = head_waiting) != NULL) && !RING_FULL(&blk_ring) )
-    {
-        head_waiting = di->next_waiting;
-        di->next_waiting = NULL;
-        /* Re-enable calldowns. */
-        blk_start_queue(di->rq);
-        /* Kick things off immediately. */
-        do_blkif_request(di->rq);
-    }
+static void kick_pending_request_queues(struct blkfront_info *info)
+{
+       if (!RING_FULL(&info->ring)) {
+               /* Re-enable calldowns. */
+               blk_start_queue(info->rq);
+               /* Kick things off immediately. */
+               do_blkif_request(info->rq);
+       }
+}
+
+static void blkif_restart_queue(void *arg)
+{
+       struct blkfront_info *info = (struct blkfront_info *)arg;
+       spin_lock_irq(&blkif_io_lock);
+       kick_pending_request_queues(info);
+       spin_unlock_irq(&blkif_io_lock);
+}
+
+static void blkif_restart_queue_callback(void *arg)
+{
+       struct blkfront_info *info = (struct blkfront_info *)arg;
+       schedule_work(&info->work);
 }
 
 int blkif_open(struct inode *inode, struct file *filep)
 {
-    struct gendisk *gd = inode->i_bdev->bd_disk;
-    struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
-
-    /* Update of usage count is protected by per-device semaphore. */
-    di->mi->usage++;
-    
-    return 0;
+       // struct gendisk *gd = inode->i_bdev->bd_disk;
+       // struct xlbd_disk_info *di = (struct xlbd_disk_info 
*)gd->private_data;
+
+       /* Update of usage count is protected by per-device semaphore. */
+       // di->mi->usage++;
+
+       return 0;
 }
 
 
@@ -192,8 +192,8 @@
     int i;
 
     DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
-                  command, (long)argument, inode->i_rdev); 
-  
+                  command, (long)argument, inode->i_rdev);
+
     switch ( command )
     {
     case HDIO_GETGEO:
@@ -219,7 +219,7 @@
 /*
  * blkif_queue_request
  *
- * request block io 
+ * request block io
  * 
  * id: for guest use only.
  * operation: BLKIF_OP_{READ,WRITE,PROBE}
@@ -228,7 +228,7 @@
  */
 static int blkif_queue_request(struct request *req)
 {
-    struct xlbd_disk_info *di = req->rq_disk->private_data;
+    struct blkfront_info *info = req->rq_disk->private_data;
     unsigned long buffer_ma;
     blkif_request_t *ring_req;
     struct bio *bio;
@@ -237,20 +237,28 @@
     unsigned long id;
     unsigned int fsect, lsect;
     int ref;
-
-    if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
+    grant_ref_t gref_head;
+
+    if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
         return 1;
 
+    if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST,
+                                     &gref_head) < 0) {
+           gnttab_request_free_callback(&info->callback,
+                                        blkif_restart_queue_callback, info,
+                                        BLKIF_MAX_SEGMENTS_PER_REQUEST);
+           return 1;
+    }
+
     /* Fill out a communications ring structure. */
-    ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
+    ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
     id = GET_ID_FROM_FREELIST();
     blk_shadow[id].request = (unsigned long)req;
 
     ring_req->id = id;
-    ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
-        BLKIF_OP_READ;
+    ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ;
     ring_req->sector_number = (blkif_sector_t)req->sector;
-    ring_req->handle = di->handle;
+    ring_req->handle = info->handle;
 
     ring_req->nr_segments = 0;
     rq_for_each_bio(bio, req)
@@ -263,31 +271,34 @@
             fsect = bvec->bv_offset >> 9;
             lsect = fsect + (bvec->bv_len >> 9) - 1;
             /* install a grant reference. */
-            ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+            ref = gnttab_claim_grant_reference(&gref_head);
             ASSERT( ref != -ENOSPC );
 
             gnttab_grant_foreign_access_ref(
                         ref,
-                        rdomid,
+                        info->backend_id,
                         buffer_ma >> PAGE_SHIFT,
                         rq_data_dir(req) );
 
             blk_shadow[id].frame[ring_req->nr_segments] =
                 buffer_ma >> PAGE_SHIFT;
 
-            ring_req->frame_and_sects[ring_req->nr_segments++] =
+            ring_req->frame_and_sects[ring_req->nr_segments] =
                 blkif_fas_from_gref(ref, fsect, lsect);
+
+           ring_req->nr_segments++;
         }
     }
 
-    blk_ring.req_prod_pvt++;
-    
+    info->ring.req_prod_pvt++;
+
     /* Keep a private copy so we can reissue requests when recovering. */
     pickle_request(&blk_shadow[id], ring_req);
 
+    gnttab_free_grant_references(gref_head);
+
     return 0;
 }
-
 
 /*
  * do_blkif_request
@@ -295,24 +306,26 @@
  */
 void do_blkif_request(request_queue_t *rq)
 {
-    struct xlbd_disk_info *di;
+    struct blkfront_info *info = NULL;
     struct request *req;
     int queued;
 
-    DPRINTK("Entered do_blkif_request\n"); 
+    DPRINTK("Entered do_blkif_request\n");
 
     queued = 0;
 
     while ( (req = elv_next_request(rq)) != NULL )
     {
+       info = req->rq_disk->private_data;
+
         if ( !blk_fs_request(req) )
         {
             end_request(req, 0);
             continue;
         }
 
-        if ( RING_FULL(&blk_ring) )
-            goto wait;
+       if (RING_FULL(&info->ring))
+               goto wait;
 
         DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
                 req, req->cmd, req->sector, req->current_nr_sectors,
@@ -320,25 +333,19 @@
                 rq_data_dir(req) ? "write" : "read");
 
         blkdev_dequeue_request(req);
-        if ( blkif_queue_request(req) )
-        {
+        if (blkif_queue_request(req)) {
+               blk_requeue_request(rq, req);
         wait:
-            di = req->rq_disk->private_data;
-            if ( di->next_waiting == NULL )
-            {
-                di->next_waiting = head_waiting;
-                head_waiting = di;
-                /* Avoid pointless unplugs. */
-                blk_stop_queue(rq);
-            }
-            break;
+               /* Avoid pointless unplugs. */
+               blk_stop_queue(rq);
+               break;
         }
 
         queued++;
     }
 
     if ( queued != 0 )
-        flush_requests();
+        flush_requests(info);
 }
 
 
@@ -347,25 +354,24 @@
     struct request *req;
     blkif_response_t *bret;
     RING_IDX i, rp;
-    unsigned long flags; 
-    
-    spin_lock_irqsave(&blkif_io_lock, flags);     
-
-    if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) || 
-         unlikely(recovery) )
-    {
+    unsigned long flags;
+    struct blkfront_info *info = (struct blkfront_info *)dev_id;
+
+    spin_lock_irqsave(&blkif_io_lock, flags);
+
+    if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) {
         spin_unlock_irqrestore(&blkif_io_lock, flags);
         return IRQ_HANDLED;
     }
-    
-    rp = blk_ring.sring->rsp_prod;
+
+    rp = info->ring.sring->rsp_prod;
     rmb(); /* Ensure we see queued responses up to 'rp'. */
 
-    for ( i = blk_ring.rsp_cons; i != rp; i++ )
+    for ( i = info->ring.rsp_cons; i != rp; i++ )
     {
         unsigned long id;
 
-        bret = RING_GET_RESPONSE(&blk_ring, i);
+        bret = RING_GET_RESPONSE(&info->ring, i);
         id   = bret->id;
         req  = (struct request *)blk_shadow[id].request;
 
@@ -382,7 +388,7 @@
                         bret->status);
 
             if ( unlikely(end_that_request_first
-                          (req, 
+                          (req,
                            (bret->status == BLKIF_RSP_OKAY),
                            req->hard_nr_sectors)) )
                 BUG();
@@ -394,9 +400,9 @@
         }
     }
 
-    blk_ring.rsp_cons = i;
-
-    kick_pending_request_queues();
+    info->ring.rsp_cons = i;
+
+    kick_pending_request_queues(info);
 
     spin_unlock_irqrestore(&blkif_io_lock, flags);
 
@@ -425,31 +431,31 @@
 static void kick_pending_request_queues(void)
 {
     /* We kick pending request queues if the ring is reasonably empty. */
-    if ( (nr_pending != 0) && 
-         (RING_PENDING_REQUESTS(&blk_ring) < (BLK_RING_SIZE >> 1)) )
+    if ( (nr_pending != 0) &&
+         (RING_PENDING_REQUESTS(&info->ring) < (BLK_RING_SIZE >> 1)) )
     {
         /* Attempt to drain the queue, but bail if the ring becomes full. */
-        while ( (nr_pending != 0) && !RING_FULL(&blk_ring) )
+        while ( (nr_pending != 0) && !RING_FULL(&info->ring) )
             do_blkif_request(pending_queues[--nr_pending]);
     }
 }
 
 int blkif_open(struct inode *inode, struct file *filep)
 {
-    short xldev = inode->i_rdev; 
+    short xldev = inode->i_rdev;
     struct gendisk *gd = get_gendisk(xldev);
     xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
-    short minor = MINOR(xldev); 
+    short minor = MINOR(xldev);
 
     if ( gd->part[minor].nr_sects == 0 )
-    { 
+    {
         /*
          * Device either doesn't exist, or has zero capacity; we use a few
          * cheesy heuristics to return the relevant error code
          */
         if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
              ((minor & (gd->max_p - 1)) != 0) )
-        { 
+        {
             /*
              * We have a real device, but no such partition, or we just have a
              * partition number so guess this is the problem.
@@ -458,16 +464,16 @@
         }
         else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
         {
-            /* This is a removable device => assume that media is missing. */ 
+            /* This is a removable device => assume that media is missing. */
             return -ENOMEDIUM; /* media not present (this is a guess) */
-        } 
+        }
         else
-        { 
+        {
             /* Just go for the general 'no such device' error. */
             return -ENODEV;    /* no such device */
         }
     }
-    
+
     /* Update of usage count is protected by per-device semaphore. */
     disk->usage++;
 
@@ -496,24 +502,24 @@
 {
     kdev_t dev = inode->i_rdev;
     struct hd_geometry *geo = (struct hd_geometry *)argument;
-    struct gendisk *gd;     
-    struct hd_struct *part; 
+    struct gendisk *gd;
+    struct hd_struct *part;
     int i;
     unsigned short cylinders;
     byte heads, sectors;
 
     /* NB. No need to check permissions. That is done for us. */
-    
+
     DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
-                  command, (long) argument, dev); 
-  
+                  command, (long) argument, dev);
+
     gd = get_gendisk(dev);
-    part = &gd->part[MINOR(dev)]; 
+    part = &gd->part[MINOR(dev)];
 
     switch ( command )
     {
     case BLKGETSIZE:
-        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
+        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
         return put_user(part->nr_sects, (unsigned long *) argument);
 
     case BLKGETSIZE64:
@@ -526,7 +532,7 @@
         return blkif_revalidate(dev);
 
     case BLKSSZGET:
-        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
+        return hardsect_size[MAJOR(dev)][MINOR(dev)];
 
     case BLKBSZGET:                                        /* get block size */
         DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
@@ -552,7 +558,7 @@
            values consistent with the size of the device */
 
         heads = 0xff;
-        sectors = 0x3f; 
+        sectors = 0x3f;
         cylinders = part->nr_sects / (heads * sectors);
 
         if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
@@ -562,7 +568,7 @@
 
         return 0;
 
-    case HDIO_GETGEO_BIG: 
+    case HDIO_GETGEO_BIG:
         DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
         if (!argument) return -EINVAL;
 
@@ -570,7 +576,7 @@
            values consistent with the size of the device */
 
         heads = 0xff;
-        sectors = 0x3f; 
+        sectors = 0x3f;
         cylinders = part->nr_sects / (heads * sectors);
 
         if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
@@ -594,7 +600,7 @@
         WPRINTK("ioctl %08x not supported by XL blkif\n", command);
         return -ENOSYS;
     }
-    
+
     return 0;
 }
 
@@ -614,7 +620,7 @@
     xl_disk_t *disk;
     unsigned long capacity;
     int i, rc = 0;
-    
+
     if ( (bd = bdget(dev)) == NULL )
         return -EINVAL;
 
@@ -662,7 +668,7 @@
 /*
  * blkif_queue_request
  *
- * request block io 
+ * request block io
  * 
  * id: for guest use only.
  * operation: BLKIF_OP_{READ,WRITE,PROBE}
@@ -696,7 +702,7 @@
 
     buffer_ma &= PAGE_MASK;
 
-    if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
+    if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
         return 1;
 
     switch ( operation )
@@ -704,7 +710,7 @@
 
     case BLKIF_OP_READ:
     case BLKIF_OP_WRITE:
-        gd = get_gendisk(device); 
+        gd = get_gendisk(device);
 
         /*
          * Update the sector_number we'll pass down as appropriate; note that
@@ -714,10 +720,10 @@
         sector_number += gd->part[MINOR(device)].start_sect;
 
         /*
-         * If this unit doesn't consist of virtual partitions then we clear 
+         * If this unit doesn't consist of virtual partitions then we clear
          * the partn bits from the device number.
          */
-        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
+        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
                GENHD_FL_VIRT_PARTNS) )
             device &= ~(gd->max_p - 1);
 
@@ -725,20 +731,20 @@
              (sg_dev == device) &&
              (sg_next_sect == sector_number) )
         {
-            req = RING_GET_REQUEST(&blk_ring, 
-                                   blk_ring.req_prod_pvt - 1);
+            req = RING_GET_REQUEST(&info->ring,
+                                   info->ring.req_prod_pvt - 1);
             bh = (struct buffer_head *)id;
-     
+
             bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request;
             blk_shadow[req->id].request = (unsigned long)id;
 
             /* install a grant reference. */
-            ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+            ref = gnttab_claim_grant_reference(&gref_head);
             ASSERT( ref != -ENOSPC );
 
             gnttab_grant_foreign_access_ref(
                         ref,
-                        rdomid,
+                        info->backend_id,
                         buffer_ma >> PAGE_SHIFT,
                         ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
 
@@ -757,7 +763,7 @@
 
             return 0;
         }
-        else if ( RING_FULL(&blk_ring) )
+        else if ( RING_FULL(&info->ring) )
         {
             return 1;
         }
@@ -774,7 +780,7 @@
     }
 
     /* Fill out a communications ring structure. */
-    req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
+    req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
 
     xid = GET_ID_FROM_FREELIST();
     blk_shadow[xid].request = (unsigned long)id;
@@ -782,15 +788,15 @@
     req->id            = xid;
     req->operation     = operation;
     req->sector_number = (blkif_sector_t)sector_number;
-    req->handle        = handle; 
+    req->handle        = handle;
     req->nr_segments   = 1;
     /* install a grant reference. */
-    ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+    ref = gnttab_claim_grant_reference(&gref_head);
     ASSERT( ref != -ENOSPC );
 
     gnttab_grant_foreign_access_ref(
                 ref,
-                rdomid,
+                info->backend_id,
                 buffer_ma >> PAGE_SHIFT,
                 ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
 
@@ -798,11 +804,11 @@
 
     req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect);
 
-    /* Keep a private copy so we can reissue requests when recovering. */    
+    /* Keep a private copy so we can reissue requests when recovering. */
     pickle_request(&blk_shadow[xid], req);
 
-    blk_ring.req_prod_pvt++;
-    
+    info->ring.req_prod_pvt++;
+
     return 0;
 }
 
@@ -817,13 +823,13 @@
     struct buffer_head *bh, *next_bh;
     int rw, nsect, full, queued = 0;
 
-    DPRINTK("Entered do_blkif_request\n"); 
+    DPRINTK("Entered do_blkif_request\n");
 
     while ( !rq->plugged && !list_empty(&rq->queue_head))
     {
-        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
+        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
             goto out;
-  
+
         DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
                 req, req->cmd, req->sector,
                 req->current_nr_sectors, req->nr_sectors, req->bh);
@@ -844,16 +850,16 @@
 
             full = blkif_queue_request(
                 (unsigned long)bh,
-                (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, 
+                (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
                 bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
 
             if ( full )
-            { 
+            {
                 bh->b_reqnext = next_bh;
                 pending_queues[nr_pending++] = rq;
                 if ( unlikely(nr_pending >= MAX_PENDING) )
                     BUG();
-                goto out; 
+                goto out;
             }
 
             queued++;
@@ -861,7 +867,7 @@
             /* Dequeue the buffer head from the request. */
             nsect = bh->b_size >> 9;
             bh = req->bh = next_bh;
-            
+
             if ( bh != NULL )
             {
                 /* There's another buffer head to do. Update the request. */
@@ -891,27 +897,27 @@
 
 static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
 {
-    RING_IDX i, rp; 
-    unsigned long flags; 
+    RING_IDX i, rp;
+    unsigned long flags;
     struct buffer_head *bh, *next_bh;
-    
-    spin_lock_irqsave(&io_request_lock, flags);     
-
-    if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) )
+
+    spin_lock_irqsave(&io_request_lock, flags);
+
+    if ( unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery) )
     {
         spin_unlock_irqrestore(&io_request_lock, flags);
         return;
     }
 
-    rp = blk_ring.sring->rsp_prod;
+    rp = info->ring.sring->rsp_prod;
     rmb(); /* Ensure we see queued responses up to 'rp'. */
 
-    for ( i = blk_ring.rsp_cons; i != rp; i++ )
+    for ( i = info->ring.rsp_cons; i != rp; i++ )
     {
         unsigned long id;
         blkif_response_t *bret;
-        
-        bret = RING_GET_RESPONSE(&blk_ring, i);
+
+        bret = RING_GET_RESPONSE(&info->ring, i);
         id = bret->id;
         bh = (struct buffer_head *)blk_shadow[id].request;
 
@@ -943,8 +949,8 @@
         }
 
     }
-    blk_ring.rsp_cons = i;
-    
+    info->ring.rsp_cons = i;
+
     kick_pending_request_queues();
 
     spin_unlock_irqrestore(&io_request_lock, flags);
@@ -954,24 +960,24 @@
 
 /*****************************  COMMON CODE  *******************************/
 
-static void blkif_free(void)
+static void blkif_free(struct blkfront_info *info)
 {
     /* Prevent new requests being issued until we fix things up. */
     spin_lock_irq(&blkif_io_lock);
-    blkif_state = BLKIF_STATE_DISCONNECTED;
+    info->connected = BLKIF_STATE_DISCONNECTED;
     spin_unlock_irq(&blkif_io_lock);
 
     /* Free resources associated with old device channel. */
-    if ( blk_ring.sring != NULL )
-    {
-        free_page((unsigned long)blk_ring.sring);
-        blk_ring.sring = NULL;
-    }
-    unbind_evtchn_from_irqhandler(blkif_evtchn, NULL);
-    blkif_evtchn = 0;
-}
-
-static void blkif_recover(void)
+    if ( info->ring.sring != NULL )
+    {
+        free_page((unsigned long)info->ring.sring);
+        info->ring.sring = NULL;
+    }
+    unbind_evtchn_from_irqhandler(info->evtchn, NULL);
+    info->evtchn = 0;
+}
+
+static void blkif_recover(struct blkfront_info *info)
 {
     int i;
     blkif_request_t *req;
@@ -987,7 +993,7 @@
     memset(&blk_shadow, 0, sizeof(blk_shadow));
     for ( i = 0; i < BLK_RING_SIZE; i++ )
         blk_shadow[i].req.id = i+1;
-    blk_shadow_free = blk_ring.req_prod_pvt;
+    blk_shadow_free = info->ring.req_prod_pvt;
     blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
 
     /* Stage 3: Find pending requests and requeue them. */
@@ -999,7 +1005,7 @@
 
         /* Grab a request slot and unpickle shadow state into it. */
         req = RING_GET_REQUEST(
-            &blk_ring, blk_ring.req_prod_pvt);
+            &info->ring, info->ring.req_prod_pvt);
         unpickle_request(req, &copy[i]);
 
         /* We get a new request id, and must reset the shadow state. */
@@ -1012,7 +1018,7 @@
             if ( req->frame_and_sects[j] & GRANTREF_INVALID )
                 gnttab_grant_foreign_access_ref(
                     blkif_gref_from_fas(req->frame_and_sects[j]),
-                    rdomid,
+                    info->backend_id,
                     blk_shadow[req->id].frame[j],
                     rq_data_dir((struct request *)
                                 blk_shadow[req->id].request));
@@ -1020,32 +1026,31 @@
         }
         blk_shadow[req->id].req = *req;
 
-        blk_ring.req_prod_pvt++;
+        info->ring.req_prod_pvt++;
     }
 
     kfree(copy);
 
     recovery = 0;
 
-    /* blk_ring->req_prod will be set when we flush_requests().*/
+    /* info->ring->req_prod will be set when we flush_requests().*/
     wmb();
 
     /* Kicks things back into life. */
-    flush_requests();
+    flush_requests(info);
 
     /* Now safe to left other people use the interface. */
-    blkif_state = BLKIF_STATE_CONNECTED;
-}
-
-static void blkif_connect(u16 evtchn, domid_t domid)
+    info->connected = BLKIF_STATE_CONNECTED;
+}
+
+static void blkif_connect(struct blkfront_info *info, u16 evtchn)
 {
     int err = 0;
 
-    blkif_evtchn = evtchn;
-    rdomid       = domid;
+    info->evtchn = evtchn;
 
     err = bind_evtchn_to_irqhandler(
-        blkif_evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
+        info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
     if ( err != 0 )
     {
         WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
@@ -1059,17 +1064,6 @@
        { "" }
 };
 
-struct blkfront_info
-{
-       /* We watch the backend */
-       struct xenbus_watch watch;
-       int vdevice;
-       u16 handle;
-       int connected;
-       struct xenbus_device *dev;
-       char *backend;
-};
-
 static void watch_for_status(struct xenbus_watch *watch, const char *node)
 {
        struct blkfront_info *info;
@@ -1081,35 +1075,33 @@
        node += strlen(watch->node);
 
        /* FIXME: clean up when error on the other end. */
-       if (info->connected)
+       if (info->connected == BLKIF_STATE_CONNECTED)
                return;
 
-       err = xenbus_gather(watch->node, 
+       err = xenbus_gather(watch->node,
                            "sectors", "%lu", &sectors,
                            "info", "%u", &binfo,
                            "sector-size", "%lu", &sector_size,
                            NULL);
        if (err) {
-               xenbus_dev_error(info->dev, err, "reading backend fields");
+               xenbus_dev_error(info->xbdev, err, "reading backend fields");
                return;
        }
 
-       xlvbd_add(sectors, info->vdevice, info->handle, binfo, sector_size);
-       info->connected = 1;
-
-       /* First to connect?  blkif is now connected. */
-       if (blkif_vbds_connected++ == 0)
-               blkif_state = BLKIF_STATE_CONNECTED;
-
-       xenbus_dev_ok(info->dev);
+       xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+       info->connected = BLKIF_STATE_CONNECTED;
+
+       blkif_state = BLKIF_STATE_CONNECTED;
+
+       xenbus_dev_ok(info->xbdev);
 
        /* Kick pending requests. */
        spin_lock_irq(&blkif_io_lock);
-       kick_pending_request_queues();
+       kick_pending_request_queues(info);
        spin_unlock_irq(&blkif_io_lock);
 }
 
-static int setup_blkring(struct xenbus_device *dev, unsigned int backend_id)
+static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
 {
        blkif_sring_t *sring;
        evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
@@ -1121,25 +1113,28 @@
                return -ENOMEM;
        }
        SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE);
-
-       shmem_ref = gnttab_claim_grant_reference(&gref_head,
-                                                gref_terminal);
-       ASSERT(shmem_ref != -ENOSPC);
-       gnttab_grant_foreign_access_ref(shmem_ref,
-                                       backend_id,
-                                       virt_to_mfn(blk_ring.sring),
-                                       0);
-
-       op.u.alloc_unbound.dom = backend_id;
+       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+       err = gnttab_grant_foreign_access(info->backend_id,
+                                         virt_to_mfn(info->ring.sring), 0);
+       if (err == -ENOSPC) {
+               free_page((unsigned long)info->ring.sring);
+               info->ring.sring = 0;
+               xenbus_dev_error(dev, err, "granting access to ring page");
+               return err;
+       }
+       info->grant_id = err;
+
+       op.u.alloc_unbound.dom = info->backend_id;
        err = HYPERVISOR_event_channel_op(&op);
        if (err) {
-               free_page((unsigned long)blk_ring.sring);
-               blk_ring.sring = 0;
+               gnttab_end_foreign_access(info->grant_id, 0);
+               free_page((unsigned long)info->ring.sring);
+               info->ring.sring = 0;
                xenbus_dev_error(dev, err, "allocating event channel");
                return err;
        }
-       blkif_connect(op.u.alloc_unbound.port, backend_id);
+       blkif_connect(info, op.u.alloc_unbound.port);
        return 0;
 }
 
@@ -1149,11 +1144,11 @@
 {
        char *backend;
        const char *message;
-       int err, backend_id;
+       int err;
 
        backend = NULL;
        err = xenbus_gather(dev->nodename,
-                           "backend-id", "%i", &backend_id,
+                           "backend-id", "%i", &info->backend_id,
                            "backend", NULL, &backend,
                            NULL);
        if (XENBUS_EXIST_ERR(err))
@@ -1168,12 +1163,10 @@
                goto out;
        }
 
-       /* First device?  We create shared ring, alloc event channel. */
-       if (blkif_vbds == 0) {
-               err = setup_blkring(dev, backend_id);
-               if (err)
-                       goto out;
-       }
+       /* Create shared ring, alloc event channel. */
+       err = setup_blkring(dev, info);
+       if (err)
+               goto out;
 
        err = xenbus_transaction_start(dev->nodename);
        if (err) {
@@ -1181,13 +1174,13 @@
                goto destroy_blkring;
        }
 
-       err = xenbus_printf(dev->nodename, "grant-id","%u", shmem_ref);
+       err = xenbus_printf(dev->nodename, "grant-id","%u", info->grant_id);
        if (err) {
                message = "writing grant-id";
                goto abort_transaction;
        }
        err = xenbus_printf(dev->nodename,
-                           "event-channel", "%u", blkif_evtchn);
+                           "event-channel", "%u", info->evtchn);
        if (err) {
                message = "writing event-channel";
                goto abort_transaction;
@@ -1220,8 +1213,7 @@
        /* Have to do this *outside* transaction.  */
        xenbus_dev_error(dev, err, "%s", message);
  destroy_blkring:
-       if (blkif_vbds == 0)
-               blkif_free();
+       blkif_free(info);
        goto out;
 }
 
@@ -1250,9 +1242,11 @@
                xenbus_dev_error(dev, err, "allocating info structure");
                return err;
        }
-       info->dev = dev;
+       info->xbdev = dev;
        info->vdevice = vdevice;
-       info->connected = 0;
+       info->connected = BLKIF_STATE_DISCONNECTED;
+       info->mi = NULL;
+       INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
 
        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
@@ -1266,7 +1260,6 @@
 
        /* Call once in case entries already there. */
        watch_for_status(&info->watch, info->watch.node);
-       blkif_vbds++;
        return 0;
 }
 
@@ -1277,15 +1270,13 @@
        if (info->backend)
                unregister_xenbus_watch(&info->watch);
 
-       if (info->connected) {
-               xlvbd_del(info->handle);
-               blkif_vbds_connected--;
-       }
+       if (info->mi)
+               xlvbd_del(info);
+
+       blkif_free(info);
+
        kfree(info->backend);
        kfree(info);
-
-       if (--blkif_vbds == 0)
-               blkif_free();
 
        return 0;
 }
@@ -1298,10 +1289,8 @@
        kfree(info->backend);
        info->backend = NULL;
 
-       if (--blkif_vbds == 0) {
-               recovery = 1;
-               blkif_free();
-       }
+       recovery = 1;
+       blkif_free(info);
 
        return 0;
 }
@@ -1314,8 +1303,7 @@
        /* FIXME: Check geometry hasn't changed here... */
        err = talk_to_backend(dev, info);
        if (!err) {
-               if (blkif_vbds++ == 0)
-                       blkif_recover();
+               blkif_recover(info);
        }
        return err;
 }
@@ -1363,11 +1351,6 @@
 {
     int i;
 
-    /* A grant for every ring slot, plus one for the ring itself. */
-    if (gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1,
-                                     &gref_head, &gref_terminal) < 0)
-        return 1;
-
     if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
          (xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
         return 0;
@@ -1391,6 +1374,6 @@
 {
     int i;
     for ( i = 0; i < s->req.nr_segments; i++ )
-        gnttab_release_grant_reference(
-            &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i]));
-}
+        gnttab_free_grant_reference(
+               blkif_gref_from_fas(s->req.frame_and_sects[i]));
+}
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Tue Aug 23 19:03:21 2005
@@ -46,6 +46,7 @@
 #include <linux/major.h>
 #include <linux/devfs_fs_kernel.h>
 #include <asm-xen/hypervisor.h>
+#include <asm-xen/xenbus.h>
 #include <asm-xen/xen-public/xen.h>
 #include <asm-xen/xen-public/io/blkif.h>
 #include <asm-xen/xen-public/io/ring.h>
@@ -79,11 +80,20 @@
 #define DPRINTK_IOCTL(_f, _a...) ((void)0)
 #endif
 
-struct xlbd_type_info {
-    int partn_shift;
-    int disks_per_major;
-    char *devname;
-    char *diskname;
+struct xlbd_type_info
+{
+       int partn_shift;
+       int disks_per_major;
+       char *devname;
+       char *diskname;
+};
+
+struct xlbd_major_info
+{
+       int major;
+       int index;
+       int usage;
+       struct xlbd_type_info *type;
 };
 
 /*
@@ -91,26 +101,27 @@
  * hang in private_data off the gendisk structure. We may end up
  * putting all kinds of interesting stuff here :-)
  */
-struct xlbd_major_info {
-    int major;
-    int index;
-    int usage;
-    struct xlbd_type_info *type;
+struct blkfront_info
+{
+       struct xenbus_device *xbdev;
+       /* We watch the backend */
+       struct xenbus_watch watch;
+       dev_t dev;
+       int vdevice;
+       blkif_vdev_t handle;
+       int connected;
+       char *backend;
+       int backend_id;
+       int grant_id;
+       blkif_front_ring_t ring;
+       unsigned int evtchn;
+       struct xlbd_major_info *mi;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+       request_queue_t *rq;
+#endif
+       struct work_struct work;
+       struct gnttab_free_callback callback;
 };
-
-struct xlbd_disk_info {
-    int xd_device;
-    blkif_vdev_t handle;
-    struct xlbd_major_info *mi;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    struct xlbd_disk_info  *next_waiting;
-    request_queue_t        *rq;
-#endif
-};
-
-typedef struct xen_block {
-    int usage;
-} xen_block_t;
 
 extern spinlock_t blkif_io_lock;
 
@@ -123,7 +134,7 @@
 extern void do_blkif_request (request_queue_t *rq); 
 
 /* Virtual block-device subsystem. */
-int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle,
-             u16 info, u16 sector_size);
-void xlvbd_del(blkif_vdev_t handle);
+int xlvbd_add(blkif_sector_t capacity, int device,
+             u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
+void xlvbd_del(struct blkfront_info *info);
 #endif /* __XEN_DRIVERS_BLOCK_H__ */
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Tue Aug 23 19:03:21 2005
@@ -43,325 +43,269 @@
 #define NUM_SCSI_MAJORS 9
 #define NUM_VBD_MAJORS 1
 
-struct lvdisk
-{
-    blkif_sector_t capacity; /*  0: Size in terms of 512-byte sectors.   */
-    blkif_vdev_t   handle;   /*  8: Device number (opaque 16 bit value). */
-    u16            info;
-    dev_t          dev;
-    struct list_head list;
+static struct xlbd_type_info xlbd_ide_type = {
+       .partn_shift = 6,
+       .disks_per_major = 2,
+       .devname = "ide",
+       .diskname = "hd",
 };
 
-static struct xlbd_type_info xlbd_ide_type = {
-    .partn_shift = 6,
-    .disks_per_major = 2,
-    .devname = "ide",
-    .diskname = "hd",
+static struct xlbd_type_info xlbd_scsi_type = {
+       .partn_shift = 4,
+       .disks_per_major = 16,
+       .devname = "sd",
+       .diskname = "sd",
 };
 
-static struct xlbd_type_info xlbd_scsi_type = {
-    .partn_shift = 4,
-    .disks_per_major = 16,
-    .devname = "sd",
-    .diskname = "sd",
+static struct xlbd_type_info xlbd_vbd_type = {
+       .partn_shift = 4,
+       .disks_per_major = 16,
+       .devname = "xvd",
+       .diskname = "xvd",
 };
 
-static struct xlbd_type_info xlbd_vbd_type = {
-    .partn_shift = 4,
-    .disks_per_major = 16,
-    .devname = "xvd",
-    .diskname = "xvd",
-};
-
 static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
-                                         NUM_VBD_MAJORS];
-
-#define XLBD_MAJOR_IDE_START    0
-#define XLBD_MAJOR_SCSI_START   (NUM_IDE_MAJORS)
-#define XLBD_MAJOR_VBD_START    (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
-
-#define XLBD_MAJOR_IDE_RANGE    XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START 
- 1
-#define XLBD_MAJOR_SCSI_RANGE   XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START 
- 1
-#define XLBD_MAJOR_VBD_RANGE    XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START 
+ NUM_VBD_MAJORS - 1
+                                         NUM_VBD_MAJORS];
+
+#define XLBD_MAJOR_IDE_START   0
+#define XLBD_MAJOR_SCSI_START  (NUM_IDE_MAJORS)
+#define XLBD_MAJOR_VBD_START   (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
+
+#define XLBD_MAJOR_IDE_RANGE   XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START 
- 1
+#define XLBD_MAJOR_SCSI_RANGE  XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START 
- 1
+#define XLBD_MAJOR_VBD_RANGE   XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + 
NUM_VBD_MAJORS - 1
 
 /* Information about our VBDs. */
 #define MAX_VBDS 64
 static LIST_HEAD(vbds_list);
 
-#define MAJOR_XEN(dev) ((dev)>>8)
-#define MINOR_XEN(dev) ((dev) & 0xff)
-
-static struct block_device_operations xlvbd_block_fops = 
-{
-    .owner  = THIS_MODULE,
-    .open  = blkif_open,
-    .release = blkif_release,
-    .ioctl  = blkif_ioctl,
+static struct block_device_operations xlvbd_block_fops =
+{
+       .owner = THIS_MODULE,
+       .open = blkif_open,
+       .release = blkif_release,
+       .ioctl  = blkif_ioctl,
 };
 
 spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
 
-static struct lvdisk *xlvbd_device_alloc(void)
-{
-    struct lvdisk *disk;
-
-    disk = kmalloc(sizeof(*disk), GFP_KERNEL);
-    if (disk != NULL) {
-        memset(disk, 0, sizeof(*disk));
-        INIT_LIST_HEAD(&disk->list);
-    }
-    return disk;
-}
-
-static void xlvbd_device_free(struct lvdisk *disk)
-{
-    list_del(&disk->list);
-    kfree(disk);
-}
-
-static struct xlbd_major_info *xlbd_alloc_major_info(
-    int major, int minor, int index)
-{
-    struct xlbd_major_info *ptr;
-
-    ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
-    if (ptr == NULL)
-        return NULL;
-
-    memset(ptr, 0, sizeof(struct xlbd_major_info));
-
-    ptr->major = major;
-
-    switch (index) {
-    case XLBD_MAJOR_IDE_RANGE:
-        ptr->type = &xlbd_ide_type;
-        ptr->index = index - XLBD_MAJOR_IDE_START;
-        break;
-    case XLBD_MAJOR_SCSI_RANGE:
-        ptr->type = &xlbd_scsi_type;
-        ptr->index = index - XLBD_MAJOR_SCSI_START;
-        break;
-    case XLBD_MAJOR_VBD_RANGE:
-        ptr->type = &xlbd_vbd_type;
-        ptr->index = index - XLBD_MAJOR_VBD_START;
-        break;
-    }
-    
-    printk("Registering block device major %i\n", ptr->major);
-    if (register_blkdev(ptr->major, ptr->type->devname)) {
-        WPRINTK("can't get major %d with name %s\n",
-                ptr->major, ptr->type->devname);
-        kfree(ptr);
-        return NULL;
-    }
-
-    devfs_mk_dir(ptr->type->devname);
-    major_info[index] = ptr;
-    return ptr;
-}
-
-static struct xlbd_major_info *xlbd_get_major_info(int device)
-{
-    int major, minor, index;
-
-    major = MAJOR_XEN(device);
-    minor = MINOR_XEN(device);
-
-    switch (major) {
-    case IDE0_MAJOR: index = 0; break;
-    case IDE1_MAJOR: index = 1; break;
-    case IDE2_MAJOR: index = 2; break;
-    case IDE3_MAJOR: index = 3; break;
-    case IDE4_MAJOR: index = 4; break;
-    case IDE5_MAJOR: index = 5; break;
-    case IDE6_MAJOR: index = 6; break;
-    case IDE7_MAJOR: index = 7; break;
-    case IDE8_MAJOR: index = 8; break;
-    case IDE9_MAJOR: index = 9; break;
-    case SCSI_DISK0_MAJOR: index = 10; break;
-    case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
-        index = 11 + major - SCSI_DISK1_MAJOR;
-        break;
-    case SCSI_CDROM_MAJOR: index = 18; break;
-    default: index = 19; break;
-    }
-
-    return ((major_info[index] != NULL) ? major_info[index] :
-            xlbd_alloc_major_info(major, minor, index));
-}
-
-static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
-{
-    request_queue_t *rq;
-
-    rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
-    if (rq == NULL)
-        return -1;
-
-    elevator_init(rq, "noop");
-
-    /* Hard sector size and max sectors impersonate the equiv. hardware. */
-    blk_queue_hardsect_size(rq, sector_size);
-    blk_queue_max_sectors(rq, 512);
-
-    /* Each segment in a request is up to an aligned page in size. */
-    blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
-    blk_queue_max_segment_size(rq, PAGE_SIZE);
-
-    /* Ensure a merged request will fit in a single I/O ring slot. */
-    blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-    blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-    /* Make sure buffer addresses are sector-aligned. */
-    blk_queue_dma_alignment(rq, 511);
-
-    gd->queue = rq;
-
-    return 0;
-}
-
-static struct gendisk *xlvbd_alloc_gendisk(
-    struct xlbd_major_info *mi, int minor, blkif_sector_t capacity,
-    int device, blkif_vdev_t handle, u16 info, u16 sector_size)
-{
-    struct gendisk *gd;
-    struct xlbd_disk_info *di;
-    int nr_minors = 1;
-
-    di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
-    if (di == NULL)
-        return NULL;
-    memset(di, 0, sizeof(*di));
-    di->mi = mi;
-    di->xd_device = device;
-    di->handle = handle;
-
-    if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
-        nr_minors = 1 << mi->type->partn_shift;
-
-    gd = alloc_disk(nr_minors);
-    if (gd == NULL)
-        goto out;
-
-    if (nr_minors > 1)
-        sprintf(gd->disk_name, "%s%c", mi->type->diskname,
-                'a' + mi->index * mi->type->disks_per_major +
-                    (minor >> mi->type->partn_shift));
-    else
-        sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
-                'a' + mi->index * mi->type->disks_per_major +
-                (minor >> mi->type->partn_shift),
-                minor & ((1 << mi->type->partn_shift) - 1));
-
-    gd->major = mi->major;
-    gd->first_minor = minor;
-    gd->fops = &xlvbd_block_fops;
-    gd->private_data = di;
-    set_capacity(gd, capacity);
-
-    if (xlvbd_init_blk_queue(gd, sector_size)) {
-        del_gendisk(gd);
-        goto out;
-    }
-
-    di->rq = gd->queue;
-
-    if (info & VDISK_READONLY)
-        set_disk_ro(gd, 1);
-
-    if (info & VDISK_REMOVABLE)
-        gd->flags |= GENHD_FL_REMOVABLE;
-
-    if (info & VDISK_CDROM)
-        gd->flags |= GENHD_FL_CD;
-
-    add_disk(gd);
-
-    return gd;
-
-out:
-    kfree(di);
-    return NULL;
-}
-
-int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle,
-             u16 info, u16 sector_size)
-{
-    struct lvdisk *new;
-    struct block_device *bd;
-    struct gendisk *gd;
-    struct xlbd_major_info *mi;
-
-    mi = xlbd_get_major_info(device);
-    if (mi == NULL)
-        return -EPERM;
-
-    new = xlvbd_device_alloc();
-    if (new == NULL)
-        return -ENOMEM;
-    new->capacity = capacity;
-    new->info = info;
-    new->handle = handle;
-    new->dev = MKDEV(MAJOR_XEN(device), MINOR_XEN(device));
-
-    bd = bdget(new->dev);
-    if (bd == NULL)
-        goto out;
-    
-    gd = xlvbd_alloc_gendisk(mi, MINOR_XEN(device), capacity, device, handle,
-                            info, sector_size);
-    if (gd == NULL)
-        goto out_bd;
-
-    list_add(&new->list, &vbds_list);
-out_bd:
-    bdput(bd);
-out:
-    return 0;
-}
-
-static int xlvbd_device_del(struct lvdisk *disk)
-{
-    struct block_device *bd;
-    struct gendisk *gd;
-    struct xlbd_disk_info *di;
-    int ret = 0, unused;
-    request_queue_t *rq;
-
-    bd = bdget(disk->dev);
-    if (bd == NULL)
-        return -1;
-
-    gd = get_gendisk(disk->dev, &unused);
-    di = gd->private_data;
-
-#if 0 /* This is wrong: hda and hdb share same major, for example. */
-    if (di->mi->usage != 0) {
-        WPRINTK("disk removal failed: used [dev=%x]\n", disk->dev);
-        ret = -1;
-        goto out;
-    }
-#endif
-
-    rq = gd->queue;
-    del_gendisk(gd);
-    put_disk(gd);
-    blk_cleanup_queue(rq);
-
-    xlvbd_device_free(disk);
-    bdput(bd);
-    return ret;
-}
-
-void xlvbd_del(blkif_vdev_t handle)
-{
-       struct lvdisk *i;
-
-       list_for_each_entry(i, &vbds_list, list) {
-               if (i->handle == handle) {
-                       xlvbd_device_del(i);
-                       return;
-               }
+static struct xlbd_major_info *
+xlbd_alloc_major_info(int major, int minor, int index)
+{
+       struct xlbd_major_info *ptr;
+
+       ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
+       if (ptr == NULL)
+               return NULL;
+
+       memset(ptr, 0, sizeof(struct xlbd_major_info));
+
+       ptr->major = major;
+
+       switch (index) {
+       case XLBD_MAJOR_IDE_RANGE:
+               ptr->type = &xlbd_ide_type;
+               ptr->index = index - XLBD_MAJOR_IDE_START;
+               break;
+       case XLBD_MAJOR_SCSI_RANGE:
+               ptr->type = &xlbd_scsi_type;
+               ptr->index = index - XLBD_MAJOR_SCSI_START;
+               break;
+       case XLBD_MAJOR_VBD_RANGE:
+               ptr->type = &xlbd_vbd_type;
+               ptr->index = index - XLBD_MAJOR_VBD_START;
+               break;
        }
-       BUG();
-}
+
+       printk("Registering block device major %i\n", ptr->major);
+       if (register_blkdev(ptr->major, ptr->type->devname)) {
+               WPRINTK("can't get major %d with name %s\n",
+                       ptr->major, ptr->type->devname);
+               kfree(ptr);
+               return NULL;
+       }
+
+       devfs_mk_dir(ptr->type->devname);
+       major_info[index] = ptr;
+       return ptr;
+}
+
+static struct xlbd_major_info *
+xlbd_get_major_info(int vdevice)
+{
+       struct xlbd_major_info *mi;
+       int major, minor, index;
+
+       major = BLKIF_MAJOR(vdevice);
+       minor = BLKIF_MINOR(vdevice);
+
+       switch (major) {
+       case IDE0_MAJOR: index = 0; break;
+       case IDE1_MAJOR: index = 1; break;
+       case IDE2_MAJOR: index = 2; break;
+       case IDE3_MAJOR: index = 3; break;
+       case IDE4_MAJOR: index = 4; break;
+       case IDE5_MAJOR: index = 5; break;
+       case IDE6_MAJOR: index = 6; break;
+       case IDE7_MAJOR: index = 7; break;
+       case IDE8_MAJOR: index = 8; break;
+       case IDE9_MAJOR: index = 9; break;
+       case SCSI_DISK0_MAJOR: index = 10; break;
+       case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
+               index = 11 + major - SCSI_DISK1_MAJOR;
+               break;
+       case SCSI_CDROM_MAJOR: index = 18; break;
+       default: index = 19; break;
+       }
+
+       mi = ((major_info[index] != NULL) ? major_info[index] :
+             xlbd_alloc_major_info(major, minor, index));
+       mi->usage++;
+       return mi;
+}
+
+static void
+xlbd_put_major_info(struct xlbd_major_info *mi)
+{
+       mi->usage--;
+       /* XXX: release major if 0 */
+}
+
+static int
+xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+{
+       request_queue_t *rq;
+
+       rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
+       if (rq == NULL)
+               return -1;
+
+       elevator_init(rq, "noop");
+
+       /* Hard sector size and max sectors impersonate the equiv. hardware. */
+       blk_queue_hardsect_size(rq, sector_size);
+       blk_queue_max_sectors(rq, 512);
+
+       /* Each segment in a request is up to an aligned page in size. */
+       blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+       blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+       /* Ensure a merged request will fit in a single I/O ring slot. */
+       blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+       blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+       /* Make sure buffer addresses are sector-aligned. */
+       blk_queue_dma_alignment(rq, 511);
+
+       gd->queue = rq;
+
+       return 0;
+}
+
+static int
+xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
+                   u16 vdisk_info, u16 sector_size,
+                   struct blkfront_info *info)
+{
+       struct gendisk *gd;
+       struct xlbd_major_info *mi;
+       int nr_minors = 1;
+       int err = -ENODEV;
+
+       mi = xlbd_get_major_info(vdevice);
+       if (mi == NULL)
+               goto out;
+       info->mi = mi;
+
+       if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
+               nr_minors = 1 << mi->type->partn_shift;
+
+       gd = alloc_disk(nr_minors);
+       if (gd == NULL)
+               goto out;
+
+       if (nr_minors > 1)
+               sprintf(gd->disk_name, "%s%c", mi->type->diskname,
+                       'a' + mi->index * mi->type->disks_per_major +
+                       (minor >> mi->type->partn_shift));
+       else
+               sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
+                       'a' + mi->index * mi->type->disks_per_major +
+                       (minor >> mi->type->partn_shift),
+                       minor & ((1 << mi->type->partn_shift) - 1));
+
+       gd->major = mi->major;
+       gd->first_minor = minor;
+       gd->fops = &xlvbd_block_fops;
+       gd->private_data = info;
+       set_capacity(gd, capacity);
+
+       if (xlvbd_init_blk_queue(gd, sector_size)) {
+               del_gendisk(gd);
+               goto out;
+       }
+
+       info->rq = gd->queue;
+
+       if (vdisk_info & VDISK_READONLY)
+               set_disk_ro(gd, 1);
+
+       if (vdisk_info & VDISK_REMOVABLE)
+               gd->flags |= GENHD_FL_REMOVABLE;
+
+       if (vdisk_info & VDISK_CDROM)
+               gd->flags |= GENHD_FL_CD;
+
+       add_disk(gd);
+
+       return 0;
+
+ out:
+       if (mi)
+               xlbd_put_major_info(mi);
+       return err;
+}
+
+int
+xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
+         u16 sector_size, struct blkfront_info *info)
+{
+       struct block_device *bd;
+       int err = 0;
+
+       info->dev = MKDEV(BLKIF_MAJOR(vdevice), BLKIF_MINOR(vdevice));
+
+       bd = bdget(info->dev);
+       if (bd == NULL)
+               return -ENODEV;
+
+       err = xlvbd_alloc_gendisk(BLKIF_MINOR(vdevice), capacity, vdevice,
+                                 vdisk_info, sector_size, info);
+
+       bdput(bd);
+       return err;
+}
+
+void
+xlvbd_del(struct blkfront_info *info)
+{
+       struct block_device *bd;
+       struct gendisk *gd;
+       int unused;
+       request_queue_t *rq;
+
+       bd = bdget(info->dev);
+       if (bd == NULL)
+               return;
+
+       gd = get_gendisk(info->dev, &unused);
+       rq = gd->queue;
+
+       del_gendisk(gd);
+       put_disk(gd);
+       xlbd_put_major_info(info->mi);
+       info->mi = NULL;
+       blk_cleanup_queue(rq);
+
+       bdput(bd);
+}
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Tue Aug 23 
19:03:21 2005
@@ -102,12 +102,12 @@
 #endif
 
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t gref_tx_head;
 static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
 #endif
 
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t gref_rx_head;
 static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
 #endif
 
@@ -441,8 +441,8 @@
         
         np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-        if (unlikely((ref = gnttab_claim_grant_reference(&gref_rx_head, 
-                                                gref_rx_terminal)) < 0)) {
+       ref = gnttab_claim_grant_reference(&gref_rx_head);
+        if (unlikely(ref < 0)) {
             printk(KERN_ALERT "#### netfront can't claim rx reference\n");
             BUG();
         }
@@ -537,8 +537,8 @@
 
     tx->id   = id;
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    if (unlikely((ref = gnttab_claim_grant_reference(&gref_tx_head, 
-                                                     gref_tx_terminal)) < 0)) {
+    ref = gnttab_claim_grant_reference(&gref_tx_head);
+    if (unlikely(ref < 0)) {
         printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
         BUG();
     }
@@ -929,8 +929,7 @@
     msg->handle = np->handle;
     msg->tx_shmem_frame = virt_to_mfn(np->tx);
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    msg->tx_shmem_ref   = (u32)gnttab_claim_grant_reference(&gref_tx_head, 
-                                                            gref_tx_terminal);
+    msg->tx_shmem_ref   = (u32)gnttab_claim_grant_reference(&gref_tx_head);
     if(msg->tx_shmem_ref < 0) { 
         printk(KERN_ALERT "#### netfront can't claim tx_shmem reference\n");
         BUG();
@@ -941,8 +940,7 @@
 
     msg->rx_shmem_frame = virt_to_mfn(np->rx);
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-    msg->rx_shmem_ref   = (u32)gnttab_claim_grant_reference(&gref_rx_head, 
-                                                            gref_rx_terminal);
+    msg->rx_shmem_ref   = (u32)gnttab_claim_grant_reference(&gref_rx_head);
     if(msg->rx_shmem_ref < 0) {
         printk(KERN_ALERT "#### netfront can't claim rx_shmem reference\n");
         BUG();
@@ -1420,7 +1418,7 @@
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
     /* A grant for every ring slot, plus one for the ring itself */
     if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE + 1,
-                                      &gref_tx_head, &gref_tx_terminal) < 0) {
+                                      &gref_tx_head) < 0) {
         printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
         return 1;
     }
@@ -1429,7 +1427,7 @@
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
     /* A grant for every ring slot, plus one for the ring itself */
     if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE + 1,
-                                      &gref_rx_head, &gref_rx_terminal) < 0) {
+                                      &gref_rx_head) < 0) {
         printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
         return 1;
     }
@@ -1457,10 +1455,10 @@
 static void netif_exit(void)
 {
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
-    gnttab_free_grant_references(NETIF_TX_RING_SIZE + 1, gref_tx_head);
+    gnttab_free_grant_references(gref_tx_head);
 #endif
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-    gnttab_free_grant_references(NETIF_RX_RING_SIZE + 1, gref_rx_head);
+    gnttab_free_grant_references(gref_rx_head);
 #endif
 }
 
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Tue Aug 23 
19:03:21 2005
@@ -167,7 +167,7 @@
             if (ret)
                 goto batch_err;
 
-            u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
+            u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
             u.ptr = ptep;
 
             if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) )
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Tue Aug 23 
19:03:21 2005
@@ -60,9 +60,13 @@
 #define copy_user_page(to, from, vaddr, pg)    copy_page(to, from)
 
 /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+#define INVALID_P2M_ENTRY      (~0U)
+#define FOREIGN_FRAME(m)       ((m) | 0x80000000U)
 extern unsigned int *phys_to_machine_mapping;
-#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)]))
-#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)]))
+#define pfn_to_mfn(pfn)        \
+((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL)
+#define mfn_to_pfn(mfn)        \
+((unsigned long)machine_to_phys_mapping[(unsigned int)(mfn)])
 
 /* Definitions for machine and pseudophysical addresses. */
 #ifdef CONFIG_X86_PAE
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h    Mon Aug 
22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h    Tue Aug 
23 19:03:21 2005
@@ -63,17 +63,15 @@
  * 
  * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
  *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
- *      require. In all the cases we care about, the high bit gets shifted out
- *      (e.g., phys_to_machine()) so behaviour there is correct.
+ *      require. In all the cases we care about, the FOREIGN_FRAME bit is
+ *      masked (e.g., pfn_to_mfn()) so behaviour there is correct.
  */
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
 #define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
 #define pte_pfn(_pte)                                                  \
 ({                                                                     \
        unsigned long mfn = pte_mfn(_pte);                              \
        unsigned long pfn = mfn_to_pfn(mfn);                            \
-       if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn))             \
+       if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
                pfn = max_mapnr; /* special: force !pfn_valid() */      \
        pfn;                                                            \
 })
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h    Mon Aug 
22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h    Tue Aug 
23 19:03:21 2005
@@ -150,15 +150,13 @@
        return !pte.pte_low && !pte.pte_high;
 }
 
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
 #define pte_mfn(_pte) ( ((_pte).pte_low >> PAGE_SHIFT) |\
                        (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)) )
 #define pte_pfn(_pte)                                                  \
 ({                                                                     \
        unsigned long mfn = pte_mfn(_pte);                              \
        unsigned long pfn = mfn_to_pfn(mfn);                            \
-       if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn))             \
+       if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
                pfn = max_mapnr; /* special: force !pfn_valid() */      \
        pfn;                                                            \
 })
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Tue Aug 23 
19:03:21 2005
@@ -62,9 +62,13 @@
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+#define INVALID_P2M_ENTRY      (~0U)
+#define FOREIGN_FRAME(m)       ((m) | 0x80000000U)
 extern u32 *phys_to_machine_mapping;
-#define pfn_to_mfn(_pfn) ((unsigned long) phys_to_machine_mapping[(unsigned 
int)(_pfn)])
-#define mfn_to_pfn(_mfn) ((unsigned long) machine_to_phys_mapping[(unsigned 
int)(_mfn)])
+#define pfn_to_mfn(pfn)        \
+((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL)
+#define mfn_to_pfn(mfn)        \
+((unsigned long)machine_to_phys_mapping[(unsigned int)(mfn)])
 
 /* Definitions for machine and pseudophysical addresses. */
 typedef unsigned long paddr_t;
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Mon Aug 22 
18:37:48 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Tue Aug 23 
19:03:21 2005
@@ -300,17 +300,15 @@
  * 
  * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
  *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
- *      require. In all the cases we care about, the high bit gets shifted out
- *      (e.g., phys_to_machine()) so behaviour there is correct.
- */
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
+ *      require. In all the cases we care about, the FOREIGN_FRAME bit is
+ *      masked (e.g., pfn_to_mfn()) so behaviour there is correct.
+ */
 #define pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
 #define pte_pfn(_pte)                                                  \
 ({                                                                     \
        unsigned long mfn = pte_mfn(_pte);                              \
        unsigned pfn = mfn_to_pfn(mfn);                                 \
-       if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn))             \
+       if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
                pfn = max_mapnr; /* special: force !pfn_valid() */      \
        pfn;                                                            \
 })
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/include/asm-xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/asm-xen/gnttab.h     Mon Aug 22 18:37:48 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/gnttab.h     Tue Aug 23 19:03:21 2005
@@ -19,54 +19,46 @@
 
 /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
 #define NR_GRANT_FRAMES 4
-#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
 
-int
-gnttab_grant_foreign_access(
-    domid_t domid, unsigned long frame, int readonly);
+struct gnttab_free_callback {
+    struct gnttab_free_callback *next;
+    void (*fn)(void *);
+    void *arg;
+    u16 count;
+};
 
-void
-gnttab_end_foreign_access(
-    grant_ref_t ref, int readonly);
+int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
+                               int readonly);
 
-int
-gnttab_grant_foreign_transfer(
-    domid_t domid, unsigned long pfn);
+void gnttab_end_foreign_access(grant_ref_t ref, int readonly);
 
-unsigned long
-gnttab_end_foreign_transfer(
-    grant_ref_t ref);
+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
 
-int
-gnttab_query_foreign_access( 
-    grant_ref_t ref );
+unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
+
+int gnttab_query_foreign_access(grant_ref_t ref);
 
 /*
  * operations on reserved batches of grant references
  */
-int
-gnttab_alloc_grant_references(
-    u16 count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal );
+int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
 
-void
-gnttab_free_grant_references(
-    u16 count, grant_ref_t private_head );
+void gnttab_free_grant_reference(grant_ref_t ref);
 
-int
-gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal
-);
+void gnttab_free_grant_references(grant_ref_t head);
 
-void
-gnttab_release_grant_reference(
-    grant_ref_t *private_head, grant_ref_t release );
+int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
 
-void
-gnttab_grant_foreign_access_ref(
-    grant_ref_t ref, domid_t domid, unsigned long frame, int readonly);
+void gnttab_release_grant_reference(grant_ref_t *private_head,
+                                   grant_ref_t release);
 
-void
-gnttab_grant_foreign_transfer_ref(
-    grant_ref_t, domid_t domid, unsigned long pfn);
+void gnttab_request_free_callback(struct gnttab_free_callback *callback,
+                                 void (*fn)(void *), void *arg, u16 count);
 
+void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+                                    unsigned long frame, int readonly);
+
+void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
+                                      unsigned long pfn);
 
 #endif /* __ASM_GNTTAB_H__ */
diff -r cd984b3478f6 -r cc5f88b719d0 tools/Makefile
--- a/tools/Makefile    Mon Aug 22 18:37:48 2005
+++ b/tools/Makefile    Tue Aug 23 19:03:21 2005
@@ -14,6 +14,7 @@
 SUBDIRS += firmware
 SUBDIRS += security
 SUBDIRS += console
+SUBDIRS += xenstat
 
 .PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean
 
diff -r cd984b3478f6 -r cc5f88b719d0 tools/Rules.mk
--- a/tools/Rules.mk    Mon Aug 22 18:37:48 2005
+++ b/tools/Rules.mk    Tue Aug 23 19:03:21 2005
@@ -6,6 +6,7 @@
 XEN_LIBXC          = $(XEN_ROOT)/tools/libxc
 XEN_XCS            = $(XEN_ROOT)/tools/xcs
 XEN_XENSTORE       = $(XEN_ROOT)/tools/xenstore
+XEN_LIBXENSTAT     = $(XEN_ROOT)/tools/xenstat/libxenstat/src
 
 ifeq ($(XEN_TARGET_ARCH),x86_32)
 CFLAGS  += -m32 -march=i686
diff -r cd984b3478f6 -r cc5f88b719d0 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Mon Aug 22 18:37:48 2005
+++ b/tools/python/xen/xm/main.py       Tue Aug 23 19:03:21 2005
@@ -49,6 +49,7 @@
     restore <File>          create a domain from a saved state file
     save <DomId> <File>     save domain state (and config) to file
     shutdown <DomId>        shutdown a domain
+    top                     monitor system and domains in real-time
     unpause <DomId>         unpause a paused domain
 
 For a complete list of subcommands run 'xm help --long'
@@ -87,6 +88,7 @@
     dmesg   [--clear]         read or clear Xen's message buffer
     info                      get information about the xen host
     log                       print the xend log
+    top                       monitor system and domains in real-time
 
   Scheduler Commands:
     bvt <options>             set BVT scheduler parameters
@@ -457,6 +459,9 @@
     os.execvp('/usr/libexec/xen/xenconsole', cmd.split())
     console = sxp.child(info, "console")
 
+def xm_top(args):
+    os.execv('/usr/sbin/xentop', ['/usr/sbin/xentop'])
+
 def xm_dmesg(args):
     
     gopts = Opts(use="""[-c|--clear]
@@ -545,6 +550,8 @@
 commands = {
     # console commands
     "console": xm_console,
+    # xenstat commands
+    "top": xm_top,
     # domain commands
     "domid": xm_domid,
     "domname": xm_domname,
diff -r cd984b3478f6 -r cc5f88b719d0 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Mon Aug 22 18:37:48 2005
+++ b/xen/arch/x86/io_apic.c    Tue Aug 23 19:03:21 2005
@@ -1751,8 +1751,30 @@
     
     pin = (address - 0x10) >> 1;
 
+    *(u32 *)&rte = val;
     rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-    *(int *)&rte = val;
+
+    /*
+     * What about weird destination types?
+     *  SMI:    Ignore? Ought to be set up by the BIOS.
+     *  NMI:    Ignore? Watchdog functionality is Xen's concern.
+     *  INIT:   Definitely ignore: probably a guest OS bug.
+     *  ExtINT: Ignore? Linux only asserts this at start of day.
+     * For now, print a message and return an error. We can fix up on demand.
+     */
+    if ( rte.delivery_mode > dest_LowestPrio )
+    {
+        printk("ERROR: Attempt to write weird IOAPIC destination mode!\n");
+        printk("       APIC=%d/%d, lo-reg=%x\n", apicid, pin, val);
+        return -EINVAL;
+    }
+
+    /*
+     * The guest does not know physical APIC arrangement (flat vs. cluster).
+     * Apply genapic conventions for this platform.
+     */
+    rte.delivery_mode = INT_DELIVERY_MODE;
+    rte.dest_mode     = INT_DEST_MODE;
 
     if ( rte.vector >= FIRST_DEVICE_VECTOR )
     {
diff -r cd984b3478f6 -r cc5f88b719d0 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Aug 22 18:37:48 2005
+++ b/xen/arch/x86/mm.c Tue Aug 23 19:03:21 2005
@@ -444,7 +444,7 @@
 
     if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
     {
-        MEM_LOG("Bad L1 flags %x\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
+        MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
         return 0;
     }
 
@@ -490,7 +490,7 @@
 
     if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
     {
-        MEM_LOG("Bad L2 flags %x\n", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
+        MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
         return 0;
     }
 
@@ -523,7 +523,7 @@
 
     if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) )
     {
-        MEM_LOG("Bad L3 flags %x\n", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
+        MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
         return 0;
     }
 
@@ -557,7 +557,7 @@
 
     if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
     {
-        MEM_LOG("Bad L4 flags %x\n", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
+        MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
         return 0;
     }
 
@@ -1025,7 +1025,7 @@
          unlikely(o != l1e_get_intpte(ol1e)) )
     {
         MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
-                ": saw %" PRIpte "\n",
+                ": saw %" PRIpte,
                 l1e_get_intpte(ol1e),
                 l1e_get_intpte(nl1e),
                 o);
@@ -1051,7 +1051,7 @@
     {
         if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
         {
-            MEM_LOG("Bad L1 flags %x\n",
+            MEM_LOG("Bad L1 flags %x",
                     l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
             return 0;
         }
@@ -1113,7 +1113,7 @@
     {
         if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
         {
-            MEM_LOG("Bad L2 flags %x\n",
+            MEM_LOG("Bad L2 flags %x",
                     l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
             return 0;
         }
@@ -1175,7 +1175,7 @@
     {
         if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) )
         {
-            MEM_LOG("Bad L3 flags %x\n",
+            MEM_LOG("Bad L3 flags %x",
                     l3e_get_flags(nl3e) & L3_DISALLOW_MASK);
             return 0;
         }
@@ -1237,7 +1237,7 @@
     {
         if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
         {
-            MEM_LOG("Bad L4 flags %x\n",
+            MEM_LOG("Bad L4 flags %x",
                     l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
             return 0;
         }
@@ -1598,7 +1598,7 @@
             percpu_info[cpu].foreign = dom_io;
             break;
         default:
-            MEM_LOG("Dom %u cannot set foreign dom\n", d->domain_id);
+            MEM_LOG("Dom %u cannot set foreign dom", d->domain_id);
             okay = 0;
             break;
         }
@@ -1831,7 +1831,7 @@
         case MMUEXT_FLUSH_CACHE:
             if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
             {
-                MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n");
+                MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
                 okay = 0;
             }
             else
@@ -1845,7 +1845,7 @@
             if ( shadow_mode_external(d) )
             {
                 MEM_LOG("ignoring SET_LDT hypercall from external "
-                        "domain %u\n", d->domain_id);
+                        "domain %u", d->domain_id);
                 okay = 0;
                 break;
             }
@@ -1916,7 +1916,7 @@
                  unlikely(IS_XEN_HEAP_FRAME(page)) )
             {
                 MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
-                        "page is in Xen heap (%lx), or dom is dying (%ld).\n",
+                        "page is in Xen heap (%lx), or dom is dying (%ld).",
                         e->tot_pages, e->max_pages, op.mfn, e->domain_flags);
                 okay = 0;
                 goto reassign_fail;
@@ -1937,7 +1937,7 @@
                      unlikely(_nd != _d) )
                 {
                     MEM_LOG("Bad page values %lx: ed=%p(%u), sd=%p,"
-                            " caf=%08x, taf=%" PRtype_info "\n",
+                            " caf=%08x, taf=%" PRtype_info,
                             page_to_pfn(page), d, d->domain_id,
                             unpickle_domptr(_nd), x, page->u.inuse.type_info);
                     okay = 0;
@@ -2301,7 +2301,7 @@
     if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
          !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
     {
-        DPRINTK("Grant map attempted to update a non-L1 page\n");
+        MEM_LOG("Grant map attempted to update a non-L1 page");
         rc = GNTST_general_error;
         goto failed;
     }
@@ -2363,7 +2363,7 @@
     if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
          !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
     {
-        DPRINTK("Grant map attempted to update a non-L1 page\n");
+        MEM_LOG("Grant map attempted to update a non-L1 page");
         rc = GNTST_general_error;
         goto failed;
     }
@@ -2378,7 +2378,7 @@
     /* Check that the virtual address supplied is actually mapped to frame. */
     if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
     {
-        DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n",
+        MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
                 (unsigned long)l1e_get_intpte(ol1e), addr, frame);
         put_page_type(page);
         rc = GNTST_general_error;
@@ -2388,7 +2388,7 @@
     /* Delete pagetable entry. */
     if ( unlikely(__put_user(0, (intpte_t *)va)))
     {
-        DPRINTK("Cannot delete PTE entry at %p.\n", va);
+        MEM_LOG("Cannot delete PTE entry at %p", va);
         put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
@@ -2452,7 +2452,7 @@
 
     if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) )
     {
-        DPRINTK("Could not find PTE entry for address %lx\n", addr);
+        MEM_LOG("Could not find PTE entry for address %lx", addr);
         return GNTST_general_error;
     }
 
@@ -2462,7 +2462,7 @@
      */
     if ( unlikely(l1e_get_pfn(ol1e) != frame) )
     {
-        DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n",
+        MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
                 l1e_get_pfn(ol1e), addr, frame);
         return GNTST_general_error;
     }
@@ -2470,7 +2470,7 @@
     /* Delete pagetable entry. */
     if ( unlikely(__put_user(0, &pl1e->l1)) )
     {
-        DPRINTK("Cannot delete PTE entry at %p.\n", (unsigned long *)pl1e);
+        MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         return GNTST_general_error;
     }
     
@@ -2930,7 +2930,7 @@
 
         if ( unlikely(!get_page_from_l1e(nl1e, d)) )
         {
-            MEM_LOG("ptwr: Could not re-validate l1 page\n");
+            MEM_LOG("ptwr: Could not re-validate l1 page");
             /*
              * Make the remaining p.t's consistent before crashing, so the
              * reference counts are correct.
@@ -3056,7 +3056,7 @@
     /* Aligned access only, thank you. */
     if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) )
     {
-        MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)\n",
+        MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)",
                 bytes, addr);
         return X86EMUL_UNHANDLEABLE;
     }
@@ -3089,7 +3089,7 @@
     if (__copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
                          sizeof(pte)))
     {
-        MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table\n");
+        MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table");
         return X86EMUL_UNHANDLEABLE;
     }
 
@@ -3102,7 +3102,7 @@
          (page_get_owner(page) != d) )
     {
         MEM_LOG("ptwr_emulate: Page is mistyped or bad pte "
-                "(%lx, %" PRtype_info ")\n",
+                "(%lx, %" PRtype_info ")",
                 l1e_get_pfn(pte), page->u.inuse.type_info);
         return X86EMUL_UNHANDLEABLE;
     }
diff -r cd984b3478f6 -r cc5f88b719d0 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Mon Aug 22 18:37:48 2005
+++ b/xen/arch/x86/vmx.c        Tue Aug 23 19:03:21 2005
@@ -1712,8 +1712,6 @@
     default:
         __vmx_bug(&regs);       /* should not happen */
     }
-
-    return;
 }
 
 asmlinkage void load_cr2(void)
diff -r cd984b3478f6 -r cc5f88b719d0 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     Mon Aug 22 18:37:48 2005
+++ b/xen/arch/x86/vmx_io.c     Tue Aug 23 19:03:21 2005
@@ -631,7 +631,7 @@
     return ((eflags & X86_EFLAGS_IF) == 0);
 }
 
-asmlinkage void vmx_intr_assist() 
+asmlinkage void vmx_intr_assist(void) 
 {
     int intr_type = 0;
     int highest_vector;
@@ -714,8 +714,6 @@
 
     /* We can't resume the guest if we're waiting on I/O */
     ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags));
-
-    /* We always check for interrupts before resuming guest */
 }
 
 #endif /* CONFIG_VMX */
diff -r cd984b3478f6 -r cc5f88b719d0 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Mon Aug 22 18:37:48 2005
+++ b/xen/arch/x86/x86_32/traps.c       Tue Aug 23 19:03:21 2005
@@ -1,5 +1,6 @@
 
 #include <xen/config.h>
+#include <xen/domain_page.h>
 #include <xen/init.h>
 #include <xen/sched.h>
 #include <xen/lib.h>
@@ -86,24 +87,33 @@
 
 void show_page_walk(unsigned long addr)
 {
-    l2_pgentry_t pmd;
-    l1_pgentry_t *pte;
-
-    if ( addr < PAGE_OFFSET )
-        return;
+    unsigned long pfn = read_cr3() >> PAGE_SHIFT;
+    intpte_t *ptab, ent;
 
     printk("Pagetable walk from %08lx:\n", addr);
-    
-    pmd = idle_pg_table_l2[l2_linear_offset(addr)];
-    printk(" L2 = %"PRIpte" %s\n", l2e_get_intpte(pmd),
-           (l2e_get_flags(pmd) & _PAGE_PSE) ? "(2/4MB)" : "");
-    if ( !(l2e_get_flags(pmd) & _PAGE_PRESENT) ||
-         (l2e_get_flags(pmd) & _PAGE_PSE) )
-        return;
-
-    pte  = __va(l2e_get_paddr(pmd));
-    pte += l1_table_offset(addr);
-    printk("  L1 = %"PRIpte"\n", l1e_get_intpte(*pte));
+
+#ifdef CONFIG_X86_PAE
+    ptab = map_domain_page(pfn);
+    ent = ptab[l3_table_offset(addr)];
+    printk(" L3 = %"PRIpte"\n", ent);
+    unmap_domain_page(ptab);
+    if ( !(ent & _PAGE_PRESENT) )
+        return;
+    pfn = ent >> PAGE_SHIFT;
+#endif
+
+    ptab = map_domain_page(pfn);
+    ent = ptab[l2_table_offset(addr)];
+    printk("  L2 = %"PRIpte" %s\n", ent, (ent & _PAGE_PSE) ? "(PSE)" : "");
+    unmap_domain_page(ptab);
+    if ( !(ent & _PAGE_PRESENT) || (ent & _PAGE_PSE) )
+        return;
+    pfn = ent >> PAGE_SHIFT;
+
+    ptab = map_domain_page(ent >> PAGE_SHIFT);
+    ent = ptab[l2_table_offset(addr)];
+    printk("   L1 = %"PRIpte"\n", ent);
+    unmap_domain_page(ptab);
 }
 
 #define DOUBLEFAULT_STACK_SIZE 1024
diff -r cd984b3478f6 -r cc5f88b719d0 xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Mon Aug 22 18:37:48 2005
+++ b/xen/include/asm-x86/vmx.h Tue Aug 23 19:03:21 2005
@@ -31,7 +31,7 @@
 extern void vmx_asm_vmexit_handler(struct cpu_user_regs);
 extern void vmx_asm_do_resume(void);
 extern void vmx_asm_do_launch(void);
-extern void vmx_intr_assist();
+extern void vmx_intr_assist(void);
 
 extern void arch_vmx_do_launch(struct vcpu *);
 extern void arch_vmx_do_resume(struct vcpu *);
@@ -355,7 +355,7 @@
 }
 
 /* Make sure that xen intercepts any FP accesses from current */
-static inline void vmx_stts()
+static inline void vmx_stts(void)
 {
     unsigned long cr0;
 
diff -r cd984b3478f6 -r cc5f88b719d0 xen/include/public/io/blkif.h
--- a/xen/include/public/io/blkif.h     Mon Aug 22 18:37:48 2005
+++ b/xen/include/public/io/blkif.h     Tue Aug 23 19:03:21 2005
@@ -58,6 +58,9 @@
 #define BLKIF_RSP_ERROR  -1 /* non-specific 'error' */
 #define BLKIF_RSP_OKAY    0 /* non-specific 'okay'  */
 
+#define BLKIF_MAJOR(dev) ((dev)>>8)
+#define BLKIF_MINOR(dev) ((dev) & 0xff)
+
 /*
  * Generate blkif ring structures and types.
  */
diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/Makefile
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/Makefile    Tue Aug 23 19:03:21 2005
@@ -0,0 +1,13 @@
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+SUBDIRS :=
+SUBDIRS += libxenstat
+SUBDIRS += xentop
+
+.PHONY: all install clean
+
+all install clean:
+       @set -e; for subdir in $(SUBDIRS); do \
+               $(MAKE) -C $$subdir $@; \
+       done
diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/COPYING
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/libxenstat/COPYING  Tue Aug 23 19:03:21 2005
@@ -0,0 +1,510 @@
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+       51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations
+below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it
+becomes a de-facto standard.  To achieve this, non-free programs must
+be allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control
+compilation and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at least
+    three years, to give the same user the materials specified in
+    Subsection 6a, above, for a charge no more than the cost of
+    performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply, and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License
+may add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+                            NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms
+of the ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.
+It is safest to attach them to the start of each source file to most
+effectively convey the exclusion of warranty; and each file should
+have at least the "copyright" line and a pointer to where the full
+notice is found.
+
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or
+your school, if any, to sign a "copyright disclaimer" for the library,
+if necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James
+  Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/Makefile
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/libxenstat/Makefile Tue Aug 23 19:03:21 2005
@@ -0,0 +1,142 @@
+# libxenstat: statistics-collection library for Xen
+# Copyright (C) International Business Machines Corp., 2005
+# Author: Josh Triplett <josht@xxxxxxxxxx>
+# 
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+LINUX_ROOT := $(XEN_ROOT)/linux-2.6-xen-sparse
+
+INSTALL         = install
+INSTALL_PROG    = $(INSTALL) -m0755 -D
+INSTALL_DATA    = $(INSTALL) -m0644 -D
+
+prefix=/usr
+includedir=$(prefix)/include
+libdir=$(prefix)/lib
+
+LDCONFIG=ldconfig
+MAKE_LINK=ln -sf
+
+MAJOR=0
+MINOR=0
+
+LIB=src/libxenstat.a
+SHLIB=src/libxenstat.so.$(MAJOR).$(MINOR)
+SHLIB_LINKS=src/libxenstat.so.$(MAJOR) src/libxenstat.so
+OBJECTS=src/xenstat.o src/xen-interface.o
+SONAME_FLAGS=-Wl,-soname -Wl,libxenstat.so.$(MAJOR)
+
+WARN_FLAGS=-Wall -Werror
+
+CFLAGS+=-Isrc
+CFLAGS+=-I$(XEN_ROOT)/xen/include/public
+CFLAGS+=-I$(LINUX_ROOT)/include/asm-xen/linux-public/
+LDFLAGS+=-Lsrc
+
+all: $(LIB)
+
+$(LIB): $(OBJECTS)
+       $(AR) rc $@ $^
+       $(RANLIB) $@
+
+$(SHLIB): $(OBJECTS)
+       $(CC) $(LDFLAGS) $(SONAME_FLAGS) -shared -o $@ $(OBJECTS)
+
+src/xenstat.o: src/xenstat.c src/xenstat.h src/xen-interface.h
+       $(CC) $(CFLAGS) $(WARN_FLAGS) -c -o $@ $<
+
+src/xen-interface.o: src/xen-interface.c src/xen-interface.h
+       $(CC) $(CFLAGS) $(WARN_FLAGS) -c -o $@ $<
+
+src/libxenstat.so.$(MAJOR): $(LIB)
+       $(MAKE_LINK) $(<F) $@
+
+src/libxenstat.so: src/libxenstat.so.$(MAJOR)
+       $(MAKE_LINK) $(<F) $@
+
+install: all
+#install: all
+#      $(INSTALL_DATA) src/xenstat.h $(DESTDIR)$(includedir)/xenstat.h
+#      $(INSTALL_PROG) $(LIB) $(DESTDIR)$(libdir)/libxenstat.a
+#      $(INSTALL_PROG) $(SHLIB) \
+#                      $(DESTDIR)$(libdir)/libxenstat.so.$(MAJOR).$(MINOR)
+#      $(MAKE_LINK) libxenstat.so.$(MAJOR).$(MINOR) \
+#                   $(DESTDIR)$(libdir)/libxenstat.so.$(MAJOR)
+#      $(MAKE_LINK) libxenstat.so.$(MAJOR) \
+#                   $(DESTDIR)$(libdir)/libxenstat.so
+#      -$(LDCONFIG)
+
+PYLIB=bindings/swig/python/_xenstat.so
+PYMOD=bindings/swig/python/xenstat.py
+PYSRC=bindings/swig/python/_xenstat.c
+PERLLIB=bindings/swig/perl/xenstat.so
+PERLMOD=bindings/swig/perl/xenstat.pm
+PERLSRC=bindings/swig/perl/xenstat.c
+BINDINGS=$(PYLIB) $(PYMOD) $(PERLLIB) $(PERLMOD)
+BINDINGSRC=$(PYSRC) $(PERLSRC)
+
+# The all-bindings target builds all the language bindings
+all-bindings: perl-bindings python-bindings
+
+# The install-bindings target installs all the language bindings
+install-bindings: install-perl-bindings install-python-bindings
+
+$(BINDINGS): $(SHLIB) $(SHLIB_LINKS) src/xenstat.h
+
+SWIG_FLAGS=-module xenstat -Isrc
+
+# Python bindings
+PYTHON_VERSION=2.3
+PYTHON_FLAGS=-I/usr/include/python$(PYTHON_VERSION) -lpython$(PYTHON_VERSION)
+$(PYSRC) $(PYMOD): bindings/swig/xenstat.i
+       swig -python $(SWIG_FLAGS) -outdir $(@D) -o $(PYSRC) $<
+
+$(PYLIB): $(PYSRC)
+       $(CC) $(CFLAGS) $(LDFLAGS) $(PYTHON_FLAGS) -shared -lxenstat -o $@ $<
+
+python-bindings: $(PYLIB) $(PYMOD)
+
+pythonlibdir=$(prefix)/lib/python$(PYTHON_VERSION)/site-packages
+install-python-bindings: $(PYLIB) $(PYMOD)
+       $(INSTALL_PROG) $(PYLIB) $(DESTDIR)$(pythonlibdir)/_xenstat.so
+       $(INSTALL_PROG) $(PYMOD) $(DESTDIR)$(pythonlibdir)/xenstat.py
+
+ifeq ($(XENSTAT_PYTHON_BINDINGS),y)
+all: python-bindings
+install: install-python-bindings
+endif
+
+# Perl bindings
+PERL_FLAGS=`perl -MConfig -e 'print "$$Config{ccflags} 
-I$$Config{archlib}/CORE";'`
+$(PERLSRC) $(PERLMOD): bindings/swig/xenstat.i
+       swig -perl $(SWIG_FLAGS) -outdir $(@D) -o $(PERLSRC) $<
+
+$(PERLLIB): $(PERLSRC)
+       $(CC) $(CFLAGS) $(LDFLAGS) $(PERL_FLAGS) -shared -lxenstat -o $@ $<
+
+perl-bindings: $(PERLLIB) $(PERLMOD)
+
+perllibdir=$(prefix)/lib/perl5
+perlmoddir=$(prefix)/share/perl5
+install-perl-bindings: $(PERLLIB) $(PERLMOD)
+       $(INSTALL_PROG) $(PERLLIB) $(DESTDIR)$(perllibdir)/xenstat.so
+       $(INSTALL_PROG) $(PERLMOD) $(DESTDIR)$(perlmoddir)/xenstat.pm
+
+ifeq ($(XENSTAT_PERL_BINDINGS),y)
+all: perl-bindings
+install: install-perl-bindings
+endif
+
+clean:
+       rm -f $(LIB) $(SHLIB) $(SHLIB_LINKS) $(OBJECTS) \
+             $(BINDINGS) $(BINDINGSRC)
diff -r cd984b3478f6 -r cc5f88b719d0 
tools/xenstat/libxenstat/bindings/swig/perl/.empty
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/libxenstat/bindings/swig/perl/.empty        Tue Aug 23 
19:03:21 2005
@@ -0,0 +1,1 @@
+This directory is empty; this file is included to prevent version control 
systems from removing the directory.
diff -r cd984b3478f6 -r cc5f88b719d0 
tools/xenstat/libxenstat/bindings/swig/python/.empty
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/libxenstat/bindings/swig/python/.empty      Tue Aug 23 
19:03:21 2005
@@ -0,0 +1,1 @@
+This directory is empty; this file is included to prevent version control 
systems from removing the directory.
diff -r cd984b3478f6 -r cc5f88b719d0 
tools/xenstat/libxenstat/bindings/swig/xenstat.i
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/libxenstat/bindings/swig/xenstat.i  Tue Aug 23 19:03:21 2005
@@ -0,0 +1,8 @@
+%module xenstat_swig
+%{
+/* Includes the header in the wrapper code */
+#include "xenstat.h"
+%}
+
+/* Parse the header file to generate wrappers */
+%include "xenstat.h"
diff -r cd984b3478f6 -r cc5f88b719d0 
tools/xenstat/libxenstat/src/xen-interface.c
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/libxenstat/src/xen-interface.c      Tue Aug 23 19:03:21 2005
@@ -0,0 +1,204 @@
+/* xen-interface.c
+ *
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ *          Judy Fischbach <jfisch@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.         See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include "xen-interface.h"
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "version.h"
+#include "privcmd.h"
+#include "xen.h"
+
+struct xi_handle {
+       int fd;
+};
+
+/* Initialize for xen-interface.  Returns a handle to be used with subsequent
+ * calls to the xen-interface functions or NULL if an error occurs. */
+xi_handle *xi_init()
+{
+       xi_handle *handle;
+
+       handle = (xi_handle *)calloc(1, sizeof(xi_handle));
+       if (handle == NULL)
+               return NULL;
+
+       handle->fd = open("/proc/xen/privcmd", O_RDWR);
+       if (handle->fd < 0) {
+               perror("Couldn't open /proc/xen/privcmd");
+               free(handle);
+               return NULL;
+       }
+
+       return handle;
+}
+
+/* Release the handle to libxc, free resources, etc. */
+void xi_uninit(xi_handle *handle)
+{
+       close (handle->fd);
+       free (handle);
+}
+
+/* Make simple xen version hypervisor calls */
+static int xi_make_xen_version_hypercall(xi_handle *handle, long *vnum, 
xen_extraversion_t *ver)
+{
+       privcmd_hypercall_t privcmd;
+       multicall_entry_t multicall[2];
+       int ret = 0;
+
+       /* set up for doing hypercall */
+       privcmd.op = __HYPERVISOR_multicall; 
+       privcmd.arg[0] = (unsigned long)multicall;
+       privcmd.arg[1] = 2;
+
+       /* first one to get xen version number */
+       multicall[0].op = __HYPERVISOR_xen_version;
+       multicall[0].args[0] = (unsigned long)XENVER_version;
+
+       /* second to get xen version flag */
+       multicall[1].op = __HYPERVISOR_xen_version; 
+       multicall[1].args[0] = (unsigned long)XENVER_extraversion;
+       multicall[1].args[1] = (unsigned long)ver;
+
+       if (mlock( &privcmd, sizeof(privcmd_hypercall_t)) < 0) {
+               perror("Failed to mlock privcmd structure");
+               return -1;
+       }
+
+       if (mlock( multicall, sizeof(multicall_entry_t)) < 0) {
+               perror("Failed to mlock multicall_entry structure");
+               munlock( &multicall, sizeof(multicall_entry_t));
+               return -1;
+       }
+
+       if (ioctl( handle->fd, IOCTL_PRIVCMD_HYPERCALL, &privcmd) < 0) {
+               perror("Hypercall failed");
+               ret = -1;
+       }
+
+       *vnum = multicall[0].result;
+
+       munlock( &privcmd, sizeof(privcmd_hypercall_t));
+       munlock( &multicall, sizeof(multicall_entry_t));
+
+       return ret;
+}
+
+/* Make Xen Dom0 op hypervisor call */
+static int xi_make_dom0_op(xi_handle *handle, dom0_op_t *dom_op, int 
dom_opcode)
+{
+       privcmd_hypercall_t privcmd;
+       int ret = 0;
+
+       /* set up for doing hypercall */
+       privcmd.op = __HYPERVISOR_dom0_op;
+       privcmd.arg[0] = (unsigned long)dom_op;
+       dom_op->cmd = dom_opcode;
+       dom_op->interface_version = DOM0_INTERFACE_VERSION;
+
+       if (mlock( &privcmd, sizeof(privcmd_hypercall_t)) < 0) {
+               perror("Failed to mlock privcmd structure");
+               return -1;
+       }
+
+       if (mlock( dom_op, sizeof(dom0_op_t)) < 0) {
+               perror("Failed to mlock dom0_op structure");
+               munlock( &privcmd, sizeof(privcmd_hypercall_t));
+               return -1;
+       }
+
+       if (ioctl( handle->fd, IOCTL_PRIVCMD_HYPERCALL, &privcmd) < 0) {
+               perror("Hypercall failed");
+               ret = -1;
+       }
+
+       munlock( &privcmd, sizeof(privcmd_hypercall_t));
+       munlock( dom_op, sizeof(dom0_op_t));
+
+       return ret;
+}
+
+/* Obtain domain data from dom0 */
+int xi_get_physinfo(xi_handle *handle, dom0_physinfo_t *physinfo)
+{
+       dom0_op_t op;
+
+       if (xi_make_dom0_op(handle, &op, DOM0_PHYSINFO) < 0) {
+               perror("DOM0_PHYSINFO Hypercall failed");
+               return -1;
+       }
+
+       *physinfo = op.u.physinfo;
+       return 0;
+}
+
+/* Obtain domain data from dom0 */
+int xi_get_domaininfolist(xi_handle *handle, dom0_getdomaininfo_t *info,
+                          unsigned int first_domain, unsigned int max_domains)
+{
+       dom0_op_t op;
+       op.u.getdomaininfolist.first_domain = first_domain;
+       op.u.getdomaininfolist.max_domains = max_domains;
+       op.u.getdomaininfolist.buffer = info;
+
+       if (mlock( info, max_domains * sizeof(dom0_getdomaininfo_t)) < 0) {
+               perror("Failed to mlock domaininfo array");
+               return -1;
+       }
+
+       if (xi_make_dom0_op(handle, &op, DOM0_GETDOMAININFOLIST) < 0) {
+               perror("DOM0_GETDOMAININFOLIST Hypercall failed");
+               return -1;
+       }
+
+       return op.u.getdomaininfolist.num_domains;
+}
+
+/* Returns cpu usage data from dom0 */
+long long xi_get_vcpu_usage(xi_handle *handle, unsigned int domain,
+                            unsigned int vcpu)
+{
+       dom0_op_t op;
+       op.u.getvcpucontext.domain = domain;
+       op.u.getvcpucontext.vcpu = vcpu;
+       op.u.getvcpucontext.ctxt = NULL;
+
+       if (xi_make_dom0_op(handle, &op, DOM0_GETVCPUCONTEXT) < 0) {
+               perror("DOM0_GETVCPUCONTEXT Hypercall failed");
+               return -1;
+       }
+
+       return op.u.getvcpucontext.cpu_time;
+}
+
+/* gets xen version information from hypervisor */
+int xi_get_xen_version(xi_handle *handle, long *vnum, xen_extraversion_t *ver) 
+{
+
+        /* gets the XENVER_version and XENVER_extraversion */
+       if (xi_make_xen_version_hypercall( handle, vnum, ver) < 0) {; 
+               perror("XEN VERSION Hypercall failed");
+               return -1;
+       }
+
+       return 0;
+}
diff -r cd984b3478f6 -r cc5f88b719d0 
tools/xenstat/libxenstat/src/xen-interface.h
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/libxenstat/src/xen-interface.h      Tue Aug 23 19:03:21 2005
@@ -0,0 +1,53 @@
+/* xen-interface.h
+ *
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ *          Judy Fischbach <jfisch@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include <stdint.h>
+
+typedef int8_t   s8;
+typedef int16_t  s16;
+typedef int32_t  s32;
+typedef int64_t  s64;
+typedef uint8_t  u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+#include "dom0_ops.h"
+#include "version.h"
+
+/* Opaque handles */
+typedef struct xi_handle xi_handle;
+
+/* Initialize for xen-interface.  Returns a handle to be used with subsequent
+ * calls to the xen-interface functions or NULL if an error occurs. */
+xi_handle *xi_init();
+
+/* Release the handle to libxc, free resources, etc. */
+void xi_uninit(xi_handle *handle);
+
+/* Obtain xen version information from hypervisor */
+int xi_get_xen_version(xi_handle *, long *vnum, xen_extraversion_t *ver);
+
+/* Obtain physinfo data from dom0 */
+int xi_get_physinfo(xi_handle *, dom0_physinfo_t *);
+
+/* Obtain domain data from dom0 */
+int xi_get_domaininfolist(xi_handle *, dom0_getdomaininfo_t *, unsigned int,
+                          unsigned int);
+
+/* Returns cpu usage data from dom0 */
+long long xi_get_vcpu_usage(xi_handle *, unsigned int, unsigned int);
diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/src/xenstat.c
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/libxenstat/src/xenstat.c    Tue Aug 23 19:03:21 2005
@@ -0,0 +1,640 @@
+/* libxenstat: statistics-collection library for Xen
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ *          Judy Fischbach <jfisch@xxxxxxxxxx>
+ *          David Hendricks <dhendrix@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <xen-interface.h>
+#include "xenstat.h"
+#include "version.h"
+
+/*
+ * Types
+ */
+struct xenstat_handle {
+       xi_handle *xihandle;
+       int page_size;
+       FILE *procnetdev;
+};
+
+#define SHORT_ASC_LEN 5                /* length of 65535 */
+#define VERSION_SIZE (2 * SHORT_ASC_LEN + 1 + sizeof(xen_extraversion_t) + 1)
+
+struct xenstat_node {
+       unsigned int flags;
+       unsigned long long cpu_hz;
+       unsigned int num_cpus;
+       unsigned long long tot_mem;
+       unsigned long long free_mem;
+       unsigned int num_domains;
+       char xen_version[VERSION_SIZE]; /* xen version running on this node */
+       xenstat_domain *domains;        /* Array of length num_domains */
+};
+
+struct xenstat_domain {
+       unsigned int id;
+       unsigned int state;
+       unsigned long long cpu_ns;
+       unsigned int num_vcpus;
+       xenstat_vcpu *vcpus;            /* Array of length num_vcpus */
+       unsigned long long cur_mem;     /* Current memory reservation */
+       unsigned long long max_mem;     /* Total memory allowed */
+       unsigned int ssid;
+       unsigned int num_networks;
+       xenstat_network *networks;      /* Array of length num_networks */
+};
+
+struct xenstat_vcpu {
+       unsigned long long ns;
+};
+
+struct xenstat_network {
+       unsigned int id;
+       /* Received */
+       unsigned long long rbytes;
+       unsigned long long rpackets;
+       unsigned long long rerrs;
+       unsigned long long rdrop;
+       /* Transmitted */
+       unsigned long long tbytes;
+       unsigned long long tpackets;
+       unsigned long long terrs;
+       unsigned long long tdrop;
+};
+
+/*
+ * Data-collection types
+ */
+/* Called to collect the information for the node and all the domains on
+ * it. When called, the domain information has already been collected. */
+typedef int (*xenstat_collect_func)(xenstat_handle * handle,
+                                   xenstat_node * node);
+/* Called to free the information collected by the collect function.  The free
+ * function will only be called on a xenstat_node if that node includes
+ * information collected by the corresponding collector. */
+typedef void (*xenstat_free_func)(xenstat_node * node);
+/* Called to free any information stored in the handle.  Note the lack of a
+ * matching init function; the collect functions should initialize on first
+ * use.  Also, the uninit function must handle the case that the collector has
+ * never been initialized. */
+typedef void (*xenstat_uninit_func)(xenstat_handle * handle);
+typedef struct xenstat_collector {
+       unsigned int flag;
+       xenstat_collect_func collect;
+       xenstat_free_func free;
+       xenstat_uninit_func uninit;
+} xenstat_collector;
+
+static int  xenstat_collect_vcpus(xenstat_handle * handle,
+                                 xenstat_node * node);
+static int  xenstat_collect_networks(xenstat_handle * handle,
+                                   xenstat_node * node);
+static void xenstat_free_vcpus(xenstat_node * node);
+static void xenstat_free_networks(xenstat_node * node);
+static void xenstat_uninit_vcpus(xenstat_handle * handle);
+static void xenstat_uninit_networks(xenstat_handle * handle);
+
+static xenstat_collector collectors[] = {
+       { XENSTAT_VCPU, xenstat_collect_vcpus,
+         xenstat_free_vcpus, xenstat_uninit_vcpus },
+       { XENSTAT_NETWORK, xenstat_collect_networks,
+         xenstat_free_networks, xenstat_uninit_networks }
+};
+
+#define NUM_COLLECTORS (sizeof(collectors)/sizeof(xenstat_collector))
+
+/*
+ * libxenstat API
+ */
+xenstat_handle *xenstat_init()
+{
+       xenstat_handle *handle;
+
+       handle = (xenstat_handle *) calloc(1, sizeof(xenstat_handle));
+       if (handle == NULL)
+               return NULL;
+
+#if defined(PAGESIZE)
+       handle->page_size = PAGESIZE;
+#elif defined(PAGE_SIZE)
+       handle->page_size = PAGE_SIZE;
+#else
+       handle->page_size = sysconf(_SC_PAGE_SIZE);
+       if (handle->page_size < 0) {
+               perror("Failed to retrieve page size.");
+               free(handle);
+               return NULL;
+       }
+#endif
+
+       handle->xihandle = xi_init();
+       if (handle->xihandle == NULL) {
+               perror("xi_init");
+               free(handle);
+               return NULL;
+       }
+
+       return handle;
+}
+
+void xenstat_uninit(xenstat_handle * handle)
+{
+       unsigned int i;
+       if (handle) {
+               for (i = 0; i < NUM_COLLECTORS; i++)
+                       collectors[i].uninit(handle);
+               xi_uninit(handle->xihandle);
+               free(handle);
+       }
+}
+
+xenstat_node *xenstat_get_node(xenstat_handle * handle, unsigned int flags)
+{
+#define DOMAIN_CHUNK_SIZE 256
+       xenstat_node *node;
+       dom0_physinfo_t physinfo;
+       xen_extraversion_t version;
+       long vnum = 0; 
+       dom0_getdomaininfo_t domaininfo[DOMAIN_CHUNK_SIZE];
+       unsigned int num_domains, new_domains;
+       unsigned int i;
+
+       /* Create the node */
+       node = (xenstat_node *) calloc(1, sizeof(xenstat_node));
+       if (node == NULL)
+               return NULL;
+
+       /* Get information about the physical system */
+       if (xi_get_physinfo(handle->xihandle, &physinfo) < 0) {
+               free(node);
+               return NULL;
+       }
+
+       /* Get the xen version number and xen version tag */
+       if (xi_get_xen_version(handle->xihandle, &vnum, &version) < 0) {
+               free(node); 
+               return NULL;
+       } 
+       snprintf(node->xen_version, VERSION_SIZE,
+               "%ld.%ld%s\n", ((vnum >> 16) & 0xFFFF), vnum & 0xFFFF, (char 
*)version); 
+
+       node->cpu_hz = ((unsigned long long)physinfo.cpu_khz) * 1000ULL;
+       node->num_cpus =
+           (physinfo.threads_per_core * physinfo.cores_per_socket *
+            physinfo.sockets_per_node * physinfo.nr_nodes);
+       node->tot_mem = ((unsigned long long)physinfo.total_pages)
+           * handle->page_size;
+       node->free_mem = ((unsigned long long)physinfo.free_pages)
+           * handle->page_size;
+
+       /* malloc(0) is not portable, so allocate a single domain.  This will
+        * be resized below. */
+       node->domains = malloc(sizeof(xenstat_domain));
+       if (node->domains == NULL) {
+               free(node);
+               return NULL;
+       }
+
+       num_domains = 0;
+       do {
+               xenstat_domain *domain;
+
+               new_domains = xi_get_domaininfolist(handle->xihandle,
+                                                   domaininfo, num_domains,
+                                                   DOMAIN_CHUNK_SIZE);
+
+               node->domains = realloc(node->domains,
+                                       (num_domains + new_domains)
+                                       * sizeof(xenstat_domain));
+               if (node->domains == NULL) {
+                       free(node);
+                       return NULL;
+               }
+
+               domain = node->domains + num_domains;
+
+               for (i = 0; i < new_domains; i++) {
+                       /* Fill in domain using domaininfo[i] */
+                       domain->id = domaininfo[i].domain;
+                       domain->state = domaininfo[i].flags;
+                       domain->cpu_ns = domaininfo[i].cpu_time;
+                       domain->num_vcpus = domaininfo[i].n_vcpu;
+                       domain->vcpus = NULL;
+                       domain->cur_mem =
+                           ((unsigned long long)domaininfo[i].tot_pages)
+                           * handle->page_size;
+                       domain->max_mem =
+                           domaininfo[i].max_pages == UINT_MAX
+                           ? (unsigned long long)-1
+                           : (unsigned long long)(domaininfo[i].max_pages
+                                                  * handle->page_size);
+                       domain->ssid = domaininfo[i].ssidref;
+                       domain->num_networks = 0;
+                       domain->networks = NULL;
+
+                       domain++;
+               }
+               num_domains += new_domains;
+       } while (new_domains == DOMAIN_CHUNK_SIZE);
+       node->num_domains = num_domains;
+
+       /* Run all the extra data collectors requested */
+       node->flags = 0;
+       for (i = 0; i < NUM_COLLECTORS; i++) {
+               if ((flags & collectors[i].flag) == collectors[i].flag) {
+                       node->flags |= collectors[i].flag;
+                       if(collectors[i].collect(handle, node) == 0) {
+                               xenstat_free_node(node);
+                               return NULL;
+                       }
+               }
+       }
+
+       return node;
+}
+
+void xenstat_free_node(xenstat_node * node)
+{
+       int i;
+
+       if (node) {
+               if (node->domains) {
+                       for (i = 0; i < NUM_COLLECTORS; i++)
+                               if((node->flags & collectors[i].flag)
+                                  == collectors[i].flag)
+                                       collectors[i].free(node);
+                       free(node->domains);
+               }
+               free(node);
+       }
+}
+
+xenstat_domain *xenstat_node_domain(xenstat_node * node, unsigned int domid)
+{
+       unsigned int i;
+
+       /* FIXME: binary search */
+       /* Find the appropriate domain entry in the node struct. */
+       for (i = 0; i < node->num_domains; i++) {
+               if (node->domains[i].id == domid)
+                       return &(node->domains[i]);
+       }
+       return NULL;
+}
+
+xenstat_domain *xenstat_node_domain_by_index(xenstat_node * node,
+                                            unsigned int index)
+{
+       if (0 <= index && index < node->num_domains)
+               return &(node->domains[index]);
+       return NULL;
+}
+
+const char *xenstat_node_xen_ver(xenstat_node * node)
+{
+       return node->xen_version;
+}
+
+unsigned long long xenstat_node_tot_mem(xenstat_node * node)
+{
+       return node->tot_mem;
+}
+
+unsigned long long xenstat_node_free_mem(xenstat_node * node)
+{
+       return node->free_mem;
+}
+
+unsigned int xenstat_node_num_domains(xenstat_node * node)
+{
+       return node->num_domains;
+}
+
+unsigned int xenstat_node_num_cpus(xenstat_node * node)
+{
+       return node->num_cpus;
+}
+
+/* Get information about the CPU speed */
+unsigned long long xenstat_node_cpu_hz(xenstat_node * node)
+{
+       return node->cpu_hz;
+}
+
+/* Get the domain ID for this domain */
+unsigned xenstat_domain_id(xenstat_domain * domain)
+{
+       return domain->id;
+}
+
+/* Get information about how much CPU time has been used */
+unsigned long long xenstat_domain_cpu_ns(xenstat_domain * domain)
+{
+       return domain->cpu_ns;
+}
+
+/* Find the number of VCPUs allocated to a domain */
+unsigned int xenstat_domain_num_vcpus(xenstat_domain * domain)
+{
+       return domain->num_vcpus;
+}
+
+xenstat_vcpu *xenstat_domain_vcpu(xenstat_domain * domain, unsigned int vcpu)
+{
+       if (0 <= vcpu && vcpu < domain->num_vcpus)
+               return &(domain->vcpus[vcpu]);
+       return NULL;
+}
+
+/* Find the current memory reservation for this domain */
+unsigned long long xenstat_domain_cur_mem(xenstat_domain * domain)
+{
+       return domain->cur_mem;
+}
+
+/* Find the maximum memory reservation for this domain */
+unsigned long long xenstat_domain_max_mem(xenstat_domain * domain)
+{
+       return domain->max_mem;
+}
+
+/* Find the domain's SSID */
+unsigned int xenstat_domain_ssid(xenstat_domain * domain)
+{
+       return domain->ssid;
+}
+
+/* Get domain states */
+unsigned int xenstat_domain_dying(xenstat_domain * domain)
+{
+       return (domain->state & DOMFLAGS_DYING) == DOMFLAGS_DYING;
+}
+
+unsigned int xenstat_domain_crashed(xenstat_domain * domain)
+{
+       return ((domain->state & DOMFLAGS_SHUTDOWN) == DOMFLAGS_SHUTDOWN)
+           && (((domain->state >> DOMFLAGS_SHUTDOWNSHIFT)
+                & DOMFLAGS_SHUTDOWNMASK) == SHUTDOWN_crash);
+}
+
+unsigned int xenstat_domain_shutdown(xenstat_domain * domain)
+{
+       return ((domain->state & DOMFLAGS_SHUTDOWN) == DOMFLAGS_SHUTDOWN)
+           && (((domain->state >> DOMFLAGS_SHUTDOWNSHIFT)
+                & DOMFLAGS_SHUTDOWNMASK) != SHUTDOWN_crash);
+}
+
+unsigned int xenstat_domain_paused(xenstat_domain * domain)
+{
+       return (domain->state & DOMFLAGS_PAUSED) == DOMFLAGS_PAUSED;
+}
+
+unsigned int xenstat_domain_blocked(xenstat_domain * domain)
+{
+       return (domain->state & DOMFLAGS_BLOCKED) == DOMFLAGS_BLOCKED;
+}
+
+unsigned int xenstat_domain_running(xenstat_domain * domain)
+{
+       return (domain->state & DOMFLAGS_RUNNING) == DOMFLAGS_RUNNING;
+}
+
+/* Get the number of networks for a given domain */
+unsigned int xenstat_domain_num_networks(xenstat_domain * domain)
+{
+       return domain->num_networks;
+}
+
+/* Get the network handle to obtain network stats */
+xenstat_network *xenstat_domain_network(xenstat_domain * domain,
+                                       unsigned int network)
+{
+       if (domain->networks && 0 <= network && network < domain->num_networks)
+               return &(domain->networks[network]);
+       return NULL;
+}
+
+/*
+ * VCPU functions
+ */
+/* Collect information about VCPUs */
+static int xenstat_collect_vcpus(xenstat_handle * handle, xenstat_node * node)
+{
+       unsigned int i, vcpu;
+       /* Fill in VCPU information */
+       for (i = 0; i < node->num_domains; i++) {
+               node->domains[i].vcpus = malloc(node->domains[i].num_vcpus
+                                               * sizeof(xenstat_vcpu));
+               if (node->domains[i].vcpus == NULL)
+                       return 0;
+
+               for (vcpu = 0; vcpu < node->domains[i].num_vcpus; vcpu++) {
+                       /* FIXME: need to be using a more efficient mechanism*/
+                       long long vcpu_time;
+                       vcpu_time =
+                           xi_get_vcpu_usage(handle->xihandle,
+                                             node->domains[i].id,
+                                             vcpu);
+                       if (vcpu_time < 0)
+                               return 0;
+                       node->domains[i].vcpus[vcpu].ns = vcpu_time;
+               }
+       }
+       return 1;
+}
+
+/* Free VCPU information */
+static void xenstat_free_vcpus(xenstat_node * node)
+{
+       unsigned int i;
+       for (i = 0; i < node->num_domains; i++)
+               free(node->domains[i].vcpus);
+}
+
+/* Free VCPU information in handle - nothing to do */
+static void xenstat_uninit_vcpus(xenstat_handle * handle)
+{
+}
+
+/* Get VCPU usage */
+unsigned long long xenstat_vcpu_ns(xenstat_vcpu * vcpu)
+{
+       return vcpu->ns;
+}
+
+/*
+ * Network functions
+ */
+
+/* Expected format of /proc/net/dev */
+static const char PROCNETDEV_HEADER[] =
+    "Inter-|   Receive                                                |"
+    "  Transmit\n"
+    " face |bytes    packets errs drop fifo frame compressed multicast|"
+    "bytes    packets errs drop fifo colls carrier compressed\n";
+
+/* Collect information about networks */
+static int xenstat_collect_networks(xenstat_handle * handle,
+                                   xenstat_node * node)
+{
+       /* Open and validate /proc/net/dev if we haven't already */
+       if (handle->procnetdev == NULL) {
+               char header[sizeof(PROCNETDEV_HEADER)];
+               handle->procnetdev = fopen("/proc/net/dev", "r");
+               if (handle->procnetdev == NULL) {
+                       perror("Error opening /proc/net/dev");
+                       return 1;
+               }
+
+               /* Validate the format of /proc/net/dev */
+               if (fread(header, sizeof(PROCNETDEV_HEADER) - 1, 1,
+                         handle->procnetdev) != 1) {
+                       perror("Error reading /proc/net/dev header");
+                       return 1;
+               }
+               header[sizeof(PROCNETDEV_HEADER) - 1] = '\0';
+               if (strcmp(header, PROCNETDEV_HEADER) != 0) {
+                       fprintf(stderr,
+                               "Unexpected /proc/net/dev format\n");
+                       return 1;
+               }
+       }
+
+       /* Fill in networks */
+       /* FIXME: optimize this */
+       fseek(handle->procnetdev, sizeof(PROCNETDEV_HEADER) - 1, SEEK_SET);
+       while (1) {
+               xenstat_domain *domain;
+               xenstat_network net;
+               unsigned int domid;
+               int ret = fscanf(handle->procnetdev,
+                                "vif%u.%u:%llu%llu%llu%llu%*u%*u%*u%*u"
+                                "%llu%llu%llu%llu%*u%*u%*u%*u\n",
+                                &domid, &net.id,
+                                &net.tbytes, &net.tpackets, &net.terrs,
+                                &net.tdrop,
+                                &net.rbytes, &net.rpackets, &net.rerrs,
+                                &net.rdrop);
+               if (ret == EOF)
+                       break;
+               if (ret != 10) {
+                       unsigned int c;
+                       do {
+                               c = fgetc(handle->procnetdev);
+                       } while (c != '\n' && c != EOF);
+                       if (c == EOF)
+                               break;
+                       continue;
+               }
+
+               /* FIXME: this does a search for the domid */
+               domain = xenstat_node_domain(node, domid);
+               if (domain == NULL) {
+                       fprintf(stderr,
+                               "Found interface vif%u.%u but domain %u"
+                               " does not exist.\n", domid, net.id,
+                               domid);
+                       continue;
+               }
+               if (domain->networks == NULL) {
+                       domain->num_networks = 1;
+                       domain->networks = malloc(sizeof(xenstat_network));
+               } else {
+                       domain->num_networks++;
+                       domain->networks =
+                           realloc(domain->networks,
+                                   domain->num_networks *
+                                   sizeof(xenstat_network));
+               }
+               if (domain->networks == NULL)
+                       return 1;
+               domain->networks[domain->num_networks - 1] = net;
+       }
+
+       return 1;
+}
+
+/* Free network information */
+static void xenstat_free_networks(xenstat_node * node)
+{
+       unsigned int i;
+       for (i = 0; i < node->num_domains; i++)
+               free(node->domains[i].networks);
+}
+
+/* Free network information in handle */
+static void xenstat_uninit_networks(xenstat_handle * handle)
+{
+       if(handle->procnetdev)
+               fclose(handle->procnetdev);
+}
+
+/* Get the network ID */
+unsigned int xenstat_network_id(xenstat_network * network)
+{
+       return network->id;
+}
+
+/* Get the number of receive bytes */
+unsigned long long xenstat_network_rbytes(xenstat_network * network)
+{
+       return network->rbytes;
+}
+
+/* Get the number of receive packets */
+unsigned long long xenstat_network_rpackets(xenstat_network * network)
+{
+       return network->rpackets;
+}
+
+/* Get the number of receive errors */
+unsigned long long xenstat_network_rerrs(xenstat_network * network)
+{
+       return network->rerrs;
+}
+
+/* Get the number of receive drops */
+unsigned long long xenstat_network_rdrop(xenstat_network * network)
+{
+       return network->rdrop;
+}
+
+/* Get the number of transmit bytes */
+unsigned long long xenstat_network_tbytes(xenstat_network * network)
+{
+       return network->tbytes;
+}
+
+/* Get the number of transmit packets */
+unsigned long long xenstat_network_tpackets(xenstat_network * network)
+{
+       return network->tpackets;
+}
+
+/* Get the number of transmit errors */
+unsigned long long xenstat_network_terrs(xenstat_network * network)
+{
+       return network->terrs;
+}
+
+/* Get the number of transmit dropped packets */
+unsigned long long xenstat_network_tdrop(xenstat_network * network)
+{
+       return network->tdrop;
+}
diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/src/xenstat.h
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/libxenstat/src/xenstat.h    Tue Aug 23 19:03:21 2005
@@ -0,0 +1,150 @@
+/* libxenstat: statistics-collection library for Xen
+ * Copyright (C) International Business Machines Corp., 2005
+ * Authors: Josh Triplett <josht@xxxxxxxxxx>
+ *          Judy Fischbach <jfisch@xxxxxxxxxx>
+ *          David Hendricks <dhendrix@xxxxxxxxxx>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ */
+
+/* libxenstat API */
+
+/* Opaque handles */
+typedef struct xenstat_handle xenstat_handle;
+typedef struct xenstat_domain xenstat_domain;
+typedef struct xenstat_node xenstat_node;
+typedef struct xenstat_vcpu xenstat_vcpu;
+typedef struct xenstat_network xenstat_network;
+
+/* Initialize the xenstat library.  Returns a handle to be used with
+ * subsequent calls to the xenstat library, or NULL if an error occurs. */
+xenstat_handle *xenstat_init();
+
+/* Release the handle to libxc, free resources, etc. */
+void xenstat_uninit(xenstat_handle * handle);
+
+/* Get all available information about a node */
+#define XENSTAT_VCPU 0x1
+#define XENSTAT_NETWORK 0x2
+#define XENSTAT_ALL (XENSTAT_VCPU|XENSTAT_NETWORK)
+xenstat_node *xenstat_get_node(xenstat_handle * handle, unsigned int flags);
+
+/* Free the information */
+void xenstat_free_node(xenstat_node * node);
+
+/*
+ * Node functions - extract information from a xenstat_node
+ */
+
+/* Get information about the domain with the given domain ID */
+xenstat_domain *xenstat_node_domain(xenstat_node * node,
+                                   unsigned int domid);
+
+/* Get the domain with the given index; used to loop over all domains. */
+xenstat_domain *xenstat_node_domain_by_index(xenstat_node * node,
+                                            unsigned index);
+/* Get xen version of the node */
+const char *xenstat_node_xen_ver(xenstat_node * node);
+
+/* Get amount of total memory on a node */
+unsigned long long xenstat_node_tot_mem(xenstat_node * node);
+
+/* Get amount of free memory on a node */
+unsigned long long xenstat_node_free_mem(xenstat_node * node);
+
+/* Find the number of domains existing on a node */
+unsigned int xenstat_node_num_domains(xenstat_node * node);
+
+/* Find the number of CPUs existing on a node */
+unsigned int xenstat_node_num_cpus(xenstat_node * node);
+
+/* Get information about the CPU speed */
+unsigned long long xenstat_node_cpu_hz(xenstat_node * node);
+
+/*
+ * Domain functions - extract information from a xenstat_domain
+ */
+
+/* Get the domain ID for this domain */
+unsigned xenstat_domain_id(xenstat_domain * domain);
+
+/* Get information about how much CPU time has been used */
+unsigned long long xenstat_domain_cpu_ns(xenstat_domain * domain);
+
+/* Find the number of VCPUs allocated to a domain */
+unsigned int xenstat_domain_num_vcpus(xenstat_domain * domain);
+
+/* Get the VCPU handle to obtain VCPU stats */
+xenstat_vcpu *xenstat_domain_vcpu(xenstat_domain * domain,
+                                 unsigned int vcpu);
+
+/* Find the current memory reservation for this domain */
+unsigned long long xenstat_domain_cur_mem(xenstat_domain * domain);
+
+/* Find the maximum memory reservation for this domain */
+unsigned long long xenstat_domain_max_mem(xenstat_domain * domain);
+
+/* Find the domain's SSID */
+unsigned int xenstat_domain_ssid(xenstat_domain * domain);
+
+/* Get domain states */
+unsigned int xenstat_domain_dying(xenstat_domain * domain);
+unsigned int xenstat_domain_crashed(xenstat_domain * domain);
+unsigned int xenstat_domain_shutdown(xenstat_domain * domain);
+unsigned int xenstat_domain_paused(xenstat_domain * domain);
+unsigned int xenstat_domain_blocked(xenstat_domain * domain);
+unsigned int xenstat_domain_running(xenstat_domain * domain);
+
+/* Get the number of networks for a given domain */
+unsigned int xenstat_domain_num_networks(xenstat_domain *);
+
+/* Get the network handle to obtain network stats */
+xenstat_network *xenstat_domain_network(xenstat_domain * domain,
+                                       unsigned int network);
+
+/*
+ * VCPU functions - extract information from a xenstat_vcpu
+ */
+
+/* Get VCPU usage */
+unsigned long long xenstat_vcpu_ns(xenstat_vcpu * vcpu);
+
+
+/*
+ * Network functions - extract information from a xenstat_network
+ */
+
+/* Get the ID for this network */
+unsigned int xenstat_network_id(xenstat_network * network);
+
+/* Get the number of receive bytes for this network */
+unsigned long long xenstat_network_rbytes(xenstat_network * network);
+
+/* Get the number of receive packets for this network */
+unsigned long long xenstat_network_rpackets(xenstat_network * network);
+
+/* Get the number of receive errors for this network */
+unsigned long long xenstat_network_rerrs(xenstat_network * network);
+
+/* Get the number of receive drops for this network */
+unsigned long long xenstat_network_rdrop(xenstat_network * network);
+
+/* Get the number of transmit bytes for this network */
+unsigned long long xenstat_network_tbytes(xenstat_network * network);
+
+/* Get the number of transmit packets for this network */
+unsigned long long xenstat_network_tpackets(xenstat_network * network);
+
+/* Get the number of transmit errors for this network */
+unsigned long long xenstat_network_terrs(xenstat_network * network);
+
+/* Get the number of transmit drops for this network */
+unsigned long long xenstat_network_tdrop(xenstat_network * network);
diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/xentop/Makefile
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/xentop/Makefile     Tue Aug 23 19:03:21 2005
@@ -0,0 +1,44 @@
+# Copyright (C) International Business Machines Corp., 2005
+# Author: Josh Triplett <josht@xxxxxxxxxx>
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; under version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+ifneq ($(XENSTAT_XENTOP),y)
+all install xentop:
+else
+
+INSTALL         = install
+INSTALL_PROG    = $(INSTALL) -m0755 -D
+INSTALL_DATA    = $(INSTALL) -m0644 -D
+
+prefix=/usr
+mandir=$(prefix)/share/man
+man1dir=$(mandir)/man1
+sbindir=$(prefix)/sbin
+
+CFLAGS += -DGCC_PRINTF -Wall -Werror -I$(XEN_LIBXENSTAT)
+LDFLAGS += -L$(XEN_LIBXENSTAT)
+LDLIBS += -lxenstat -lncurses
+
+all: xentop
+
+xentop: xentop.o
+
+install: xentop xentop.1
+       $(INSTALL_PROG) xentop $(DESTDIR)$(sbindir)/xentop
+       $(INSTALL_DATA) xentop.1 $(DESTDIR)$(man1dir)/xentop.1
+
+endif
+
+clean:
+       rm -f xentop xentop.o
diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/xentop/TODO
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/xentop/TODO Tue Aug 23 19:03:21 2005
@@ -0,0 +1,34 @@
+Display error messages on the help line after bad input at a prompt.
+Fractional delay times
+Use prompting to search for domains
+Better line editing?
+
+* Make CPU in % more accurate
+* Domain total network TX % and RX %
+
+Like Top, f feature, field select of domain columns, toggle the display of
+field by typing the letter associated with field, if displayed it shows in
+bold and the letter is Capitalized along with a leading asterisk for the
+field, if not selected for display letter is lowercase, no leading asterisk
+and field is not bolded.
+
+Like Top, ordering of domain columns, o feature Capital letter shifts left,
+lowercase letter shifts right?
+
+Color
+Full management: pause, destroy, create domains
+
+Add support for Virtual Block Devices (vbd)
+
+To think about:
+Support for one than one node display (distributed monitoring 
+from any node of all other nodes in a cluster)
+Bottom line option (Switch node, Search node [tab completion?])
+
+Capture/Logging of resource information generated during a time interval.
+-b batch mode dump snapshots to standard output (used with -n)
+-n number of iterations to dump to standard output (unlimited if not specified)
+-d monitor DomIDs as -dD1,-dD2 or -dD1,D2...
+   Monitor only domains with specified domain IDs
+-m monitor nodeIDs as -mN1,-mN2 or -mN1,N2...
+   Monitor only domains with specified node IDs
diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/xentop/xentop.1
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/xentop/xentop.1     Tue Aug 23 19:03:21 2005
@@ -0,0 +1,88 @@
+.\" Copyright (C) International Business Machines  Corp., 2005
+.\" Author: Josh Triplett <josht@xxxxxxxxxx>
+.\"
+.\" This program is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; under version 2 of the License.
+.\"
+.\" This program is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License
+.\" along with this program; if not, write to the Free Software
+.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+.TH xentop 1 "August 2005"
+.SH NAME
+\fBxentop\fR \- displays real-time information about a Xen system and domains
+
+.SH SYNOPSIS
+.B xentop
+[\fB\-h\fR]
+[\fB\-V\fR]
+[\fB\-d\fRSECONDS]
+[\fB\-n\fR]
+[\fB\-r\fR]
+[\fB\-v\fR]
+
+.SH DESCRIPTION
+\fBxentop\fR displays information about the Xen system and domains, in a
+continually-updating manner.  Command-line options and interactive commands
+can change the detail and format of the information displayed by \fBxentop\fR.
+
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+display help and exit
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+output version information and exit
+.TP
+\fB\-d\fR, \fB\-\-delay\fR=\fISECONDS\fR
+seconds between updates (default 3)
+.TP
+\fB\-n\fR, \fB\-\-networks\fR
+output network information
+.TP
+\fB\-r\fR, \fB\-\-repeat\-header\fR
+repeat table header before each domain
+.TP
+\fB\-v\fR, \fB\-\-vcpus\fR
+output VCPU data
+
+.SH "INTERACTIVE COMMANDS"
+All interactive commands are case-insensitive.
+.TP
+.B D
+set delay between updates
+.TP
+.B N
+toggle display of network information
+.TP
+.B Q, Esc
+quit
+.TP
+.B R
+toggle table header before each domain
+.TP
+.B S
+cycle sort order
+.TP
+.B V
+toggle display of VCPU information
+.TP
+.B Arrows
+scroll domain display
+
+.SH AUTHORS
+Written by Judy Fischbach, David Hendricks, and Josh Triplett
+
+.SH "REPORTING BUGS"
+Report bugs to <dsteklof@xxxxxxxxxx>.
+
+.SH COPYRIGHT
+Copyright \(co 2005  International Business Machines  Corp
+.br
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/xentop/xentop.c
--- /dev/null   Mon Aug 22 18:37:48 2005
+++ b/tools/xenstat/xentop/xentop.c     Tue Aug 23 19:03:21 2005
@@ -0,0 +1,876 @@
+/*
+ *  Copyright (C) International Business Machines  Corp., 2005
+ *  Author(s): Judy Fischbach <jfisch@xxxxxxxxxx>
+ *             David Hendricks <dhendrix@xxxxxxxxxx>
+ *             Josh Triplett <josht@xxxxxxxxxx>
+ *    based on code from Anthony Liguori <aliguori@xxxxxxxxxx>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <curses.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <xenstat.h>
+
+#define XENTOP_VERSION "1.0"
+
+#define XENTOP_DISCLAIMER \
+"Copyright (C) 2005  International Business Machines  Corp\n"\
+"This is free software; see the source for copying conditions.There is NO\n"\
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
+#define XENTOP_BUGSTO "Report bugs to <dsteklof@xxxxxxxxxx>.\n"
+
+#define _GNU_SOURCE
+#include <getopt.h>
+
+#if !defined(__GNUC__) && !defined(__GNUG__)
+#define __attribute__(arg) /* empty */
+#endif
+
+#define KEY_ESCAPE '\x1B'
+
+/*
+ * Function prototypes
+ */
+/* Utility functions */
+static void usage(const char *);
+static void version(void);
+static void cleanup(void);
+static void fail(const char *);
+static int current_row(void);
+static int lines(void);
+static void print(const char *, ...) __attribute__((format(printf,1,2)));
+static void attr_addstr(int attr, const char *str);
+static void set_delay(char *value);
+static void set_prompt(char *new_prompt, void (*func)(char *));
+static int handle_key(int);
+static int compare(unsigned long long, unsigned long long);
+static int compare_domains(xenstat_domain **, xenstat_domain **);
+static unsigned long long tot_net_bytes( xenstat_domain *, int);
+
+/* Field functions */
+static int compare_domid(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_domid(xenstat_domain *domain);
+static int compare_state(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_state(xenstat_domain *domain);
+static int compare_cpu(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_cpu(xenstat_domain *domain);
+static int compare_cpu_pct(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_cpu_pct(xenstat_domain *domain);
+static int compare_mem(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_mem(xenstat_domain *domain);
+static void print_mem_pct(xenstat_domain *domain);
+static int compare_maxmem(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_maxmem(xenstat_domain *domain);
+static void print_max_pct(xenstat_domain *domain);
+static int compare_vcpus(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_vcpus(xenstat_domain *domain);
+static int compare_nets(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_nets(xenstat_domain *domain);
+static int compare_net_tx(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_net_tx(xenstat_domain *domain);
+static int compare_net_rx(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_net_rx(xenstat_domain *domain);
+static int compare_ssid(xenstat_domain *domain1, xenstat_domain *domain2);
+static void print_ssid(xenstat_domain *domain);
+
+/* Section printing functions */
+static void do_summary(void);
+static void do_header(void);
+static void do_bottom_line(void);
+static void do_domain(xenstat_domain *);
+static void do_vcpu(xenstat_domain *);
+static void do_network(xenstat_domain *);
+static void top(void);
+
+/* Field types */
+typedef enum field_id {
+       FIELD_DOMID,
+       FIELD_STATE,
+       FIELD_CPU,
+       FIELD_CPU_PCT,
+       FIELD_MEM,
+       FIELD_MEM_PCT,
+       FIELD_MAXMEM,
+       FIELD_MAX_PCT,
+       FIELD_VCPUS,
+       FIELD_NETS,
+       FIELD_NET_TX,
+       FIELD_NET_RX,
+       FIELD_SSID
+} field_id;
+
+typedef struct field {
+       field_id num;
+       const char *header;
+       unsigned int default_width;
+       int (*compare)(xenstat_domain *domain1, xenstat_domain *domain2);
+       void (*print)(xenstat_domain *domain);
+} field;
+
+field fields[] = {
+       { FIELD_DOMID,   "DOMID",      5, compare_domid,   print_domid   },
+       { FIELD_STATE,   "STATE",      6, compare_state,   print_state   },
+       { FIELD_CPU,     "CPU(sec)",  10, compare_cpu,     print_cpu     },
+       { FIELD_CPU_PCT, "CPU(%)",     6, compare_cpu_pct, print_cpu_pct },
+       { FIELD_MEM,     "MEM(k)",    10, compare_mem,     print_mem     },
+       { FIELD_MEM_PCT, "MEM(%)",     6, compare_mem,     print_mem_pct },
+       { FIELD_MAXMEM,  "MAXMEM(k)", 10, compare_maxmem,  print_maxmem  },
+       { FIELD_MAX_PCT, "MAXMEM(%)",  9, compare_maxmem,  print_max_pct },
+       { FIELD_VCPUS,   "VCPUS",      5, compare_vcpus,   print_vcpus   },
+       { FIELD_NETS,    "NETS",       4, compare_nets,    print_nets    },
+       { FIELD_NET_TX,  "NETTX(k)",   8, compare_net_tx,  print_net_tx  },
+       { FIELD_NET_RX,  "NETRX(k)",   8, compare_net_rx,  print_net_rx  },
+       { FIELD_SSID,    "SSID",       4, compare_ssid,    print_ssid    }
+};
+
+const unsigned int NUM_FIELDS = sizeof(fields)/sizeof(field);
+
+/* Globals */
+struct timeval curtime, oldtime;
+xenstat_handle *xhandle = NULL;
+xenstat_node *prev_node = NULL;
+xenstat_node *cur_node = NULL;
+field_id sort_field = FIELD_DOMID;
+unsigned int first_domain_index = 0;
+unsigned int delay = 3;
+int show_vcpus = 0;
+int show_networks = 0;
+int repeat_header = 0;
+#define PROMPT_VAL_LEN 80
+char *prompt = NULL;
+char prompt_val[PROMPT_VAL_LEN];
+int prompt_val_len = 0;
+void (*prompt_complete_func)(char *);
+
+/*
+ * Function definitions
+ */
+
+/* Utility functions */
+
+/* Print usage message, using given program name */
+static void usage(const char *program)
+{
+       printf("Usage: %s [OPTION]\n"
+              "Displays ongoing information about xen vm resources \n\n"
+              "-h, --help           display this help and exit\n"
+              "-V, --version        output version information and exit\n"
+              "-d, --delay=SECONDS  seconds between updates (default 3)\n"
+              "-n, --networks       output vif network data\n"
+              "-r, --repeat-header  repeat table header before each domain\n"
+              "-v, --vcpus          output vcpu data\n"
+              "\n" XENTOP_BUGSTO,
+              program);
+       return;
+}
+
+/* Print program version information */
+static void version(void)
+{
+       printf("xentop " XENTOP_VERSION "\n"
+              "Written by Judy Fischbach, David Hendricks, Josh Triplett\n"
+              "\n" XENTOP_DISCLAIMER);
+}
+
+/* Clean up any open resources */
+static void cleanup(void)
+{
+       if(!isendwin())
+               endwin();
+       if(prev_node != NULL)
+               xenstat_free_node(prev_node);
+       if(cur_node != NULL)
+               xenstat_free_node(cur_node);
+       if(xhandle != NULL)
+               xenstat_uninit(xhandle);
+}
+
+/* Display the given message and gracefully exit */
+static void fail(const char *str)
+{
+       if(!isendwin())
+               endwin();
+       fprintf(stderr, str);
+       exit(1);
+}
+
+/* Return the row containing the cursor. */
+static int current_row(void)
+{
+       int y, x;
+       getyx(stdscr, y, x);
+       return y;
+}
+
+/* Return the number of lines on the screen. */
+static int lines(void)
+{
+       int y, x;
+       getmaxyx(stdscr, y, x);
+       return y;
+}
+
+/* printf-style print function which calls printw, but only if the cursor is
+ * not on the last line. */
+static void print(const char *fmt, ...)
+{
+       va_list args;
+
+       if(current_row() < lines()-1) {
+               va_start(args, fmt);
+               vw_printw(stdscr, fmt, args);
+               va_end(args);
+       }
+}
+
+/* Print a string with the given attributes set. */
+static void attr_addstr(int attr, const char *str)
+{
+       attron(attr);
+       addstr(str);
+       attroff(attr);
+}
+
+/* Handle setting the delay from the user-supplied value in prompt_val */
+static void set_delay(char *value)
+{
+       int new_delay;
+       new_delay = atoi(value);
+       if(new_delay > 0)
+               delay = new_delay;
+}
+
+/* Enable prompting mode with the given prompt string; call the given function
+ * when a value is available. */
+static void set_prompt(char *new_prompt, void (*func)(char *))
+{
+       prompt = new_prompt;
+       prompt_val[0] = '\0';
+       prompt_val_len = 0;
+       prompt_complete_func = func;
+}
+
+/* Handle user input, return 0 if the program should quit, or 1 if not */
+static int handle_key(int ch)
+{
+       if(prompt == NULL) {
+               /* Not prompting for input; handle interactive commands */
+               switch(ch) {
+               case 'n': case 'N':
+                       show_networks ^= 1;
+                       break;
+               case 'r': case 'R':
+                       repeat_header ^= 1;
+                       break;
+               case 's': case 'S':
+                       sort_field = (sort_field + 1) % NUM_FIELDS;
+                       break;
+               case 'v': case 'V':
+                       show_vcpus ^= 1;
+                       break;
+               case KEY_DOWN:
+                       first_domain_index++;
+                       break;
+               case KEY_UP:
+                       if(first_domain_index > 0)
+                               first_domain_index--;
+                       break;
+               case 'd': case 'D':
+                       set_prompt("Delay(sec)", set_delay);
+                       break;
+               case 'q': case 'Q': case KEY_ESCAPE:
+                       return 0;
+               }
+       } else {
+               /* Prompting for input; handle line editing */
+               switch(ch) {
+               case '\r':
+                       prompt_complete_func(prompt_val);
+                       set_prompt(NULL, NULL);
+                       break;
+               case KEY_ESCAPE:
+                       set_prompt(NULL, NULL);
+                       break;
+               case KEY_BACKSPACE:
+                       if(prompt_val_len > 0)
+                               prompt_val[--prompt_val_len] = '\0';
+               default:
+                       if((prompt_val_len+1) < PROMPT_VAL_LEN
+                          && isprint(ch)) {
+                               prompt_val[prompt_val_len++] = (char)ch;
+                               prompt_val[prompt_val_len] = '\0';
+                       }
+               }
+       }
+
+       return 1;
+}
+
+/* Compares two integers, returning -1,0,1 for <,=,> */
+static int compare(unsigned long long i1, unsigned long long i2)
+{
+       if(i1 < i2)
+               return -1;
+       if(i1 > i2)
+               return 1;
+       return 0;
+}
+
+/* Comparison function for use with qsort.  Compares two domains using the
+ * current sort field. */
+static int compare_domains(xenstat_domain **domain1, xenstat_domain **domain2)
+{
+       return fields[sort_field].compare(*domain1, *domain2);
+}
+
+/* Field functions */
+
+/* Compares domain ids of two domains, returning -1,0,1 for <,=,> */
+int compare_domid(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return compare(xenstat_domain_id(domain1), xenstat_domain_id(domain2));
+}
+
+/* Prints domain identification number */
+void print_domid(xenstat_domain *domain)
+{
+       print("%5u", xenstat_domain_id(domain));
+}
+
+struct {
+       unsigned int (*get)(xenstat_domain *);
+       char ch;
+} state_funcs[] = {
+       { xenstat_domain_dying,    'd' },
+       { xenstat_domain_shutdown, 's' },
+       { xenstat_domain_blocked,  'b' },
+       { xenstat_domain_crashed,  'c' },
+       { xenstat_domain_paused,   'p' },
+       { xenstat_domain_running,  'r' }
+};
+const unsigned int NUM_STATES = sizeof(state_funcs)/sizeof(*state_funcs);
+
+/* Compare states of two domains, returning -1,0,1 for <,=,> */
+static int compare_state(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       unsigned int i, d1s, d2s;
+       for(i = 0; i < NUM_STATES; i++) {
+               d1s = state_funcs[i].get(domain1);
+               d2s = state_funcs[i].get(domain2);
+               if(d1s && !d2s)
+                       return -1;
+               if(d2s && !d1s)
+                       return 1;
+       }
+       return 0;
+}
+
+/* Prints domain state in abbreviated letter format */
+static void print_state(xenstat_domain *domain)
+{
+       unsigned int i;
+       for(i = 0; i < NUM_STATES; i++)
+               print("%c", state_funcs[i].get(domain) ? state_funcs[i].ch
+                                                      : '-');
+}
+
+/* Compares cpu usage of two domains, returning -1,0,1 for <,=,> */
+static int compare_cpu(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(xenstat_domain_cpu_ns(domain1),
+                       xenstat_domain_cpu_ns(domain2));
+}
+
+/* Prints domain cpu usage in seconds */
+static void print_cpu(xenstat_domain *domain)
+{
+       print("%10llu", xenstat_domain_cpu_ns(domain)/1000000000);
+}
+
+/* Computes the CPU percentage used for a specified domain */
+static double get_cpu_pct(xenstat_domain *domain)
+{
+       xenstat_domain *old_domain;
+       double us_elapsed;
+
+       /* Can't calculate CPU percentage without a previous sample. */
+       if(prev_node == NULL)
+               return 0.0;
+
+       old_domain = xenstat_node_domain(prev_node, xenstat_domain_id(domain));
+       if(old_domain == NULL)
+               return 0.0;
+
+       /* Calculate the time elapsed in microseconds */
+       us_elapsed = ((curtime.tv_sec-oldtime.tv_sec)*1000000.0
+                     +(curtime.tv_usec - oldtime.tv_usec));
+
+       /* In the following, nanoseconds must be multiplied by 1000.0 to
+        * convert to microseconds, then divided by 100.0 to get a percentage,
+        * resulting in a multiplication by 10.0 */
+       return ((xenstat_domain_cpu_ns(domain)
+                -xenstat_domain_cpu_ns(old_domain))/10.0)/us_elapsed;
+}
+
+static int compare_cpu_pct(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(get_cpu_pct(domain1), get_cpu_pct(domain2));
+}
+
+/* Prints cpu percentage statistic */
+static void print_cpu_pct(xenstat_domain *domain)
+{
+       print("%6.1f", get_cpu_pct(domain));
+}
+
+/* Compares current memory of two domains, returning -1,0,1 for <,=,> */
+static int compare_mem(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(xenstat_domain_cur_mem(domain1),
+                       xenstat_domain_cur_mem(domain2));
+}
+
+/* Prints current memory statistic */
+static void print_mem(xenstat_domain *domain)
+{
+       print("%10llu", xenstat_domain_cur_mem(domain)/1024);
+}
+
+/* Prints memory percentage statistic, ratio of current domain memory to total
+ * node memory */
+static void print_mem_pct(xenstat_domain *domain)
+{
+       print("%6.1f", (double)xenstat_domain_cur_mem(domain) /
+                      (double)xenstat_node_tot_mem(cur_node) * 100);
+}
+
+/* Compares maximum memory of two domains, returning -1,0,1 for <,=,> */
+static int compare_maxmem(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(xenstat_domain_max_mem(domain1),
+                       xenstat_domain_max_mem(domain2));
+}
+
+/* Prints maximum domain memory statistic in KB */
+static void print_maxmem(xenstat_domain *domain)
+{
+       unsigned long long max_mem = xenstat_domain_max_mem(domain);
+       if(max_mem == ((unsigned long long)-1))
+               print("%10s", "no limit");
+       else
+               print("%10llu", max_mem/1024);
+}
+
+/* Prints memory percentage statistic, ratio of current domain memory to total
+ * node memory */
+static void print_max_pct(xenstat_domain *domain)
+{
+       if (xenstat_domain_max_mem(domain) == (unsigned long long)-1)
+               print("%9s", "n/a");
+       else
+               print("%9.1f", (double)xenstat_domain_max_mem(domain) /
+                              (double)xenstat_node_tot_mem(cur_node) * 100);
+}
+
+/* Compares number of virtual CPUs of two domains, returning -1,0,1 for
+ * <,=,> */
+static int compare_vcpus(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(xenstat_domain_num_vcpus(domain1),
+                       xenstat_domain_num_vcpus(domain2));
+}
+
+/* Prints number of virtual CPUs statistic */
+static void print_vcpus(xenstat_domain *domain)
+{
+       print("%5u", xenstat_domain_num_vcpus(domain));
+}
+
+/* Compares number of virtual networks of two domains, returning -1,0,1 for
+ * <,=,> */
+static int compare_nets(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(xenstat_domain_num_networks(domain1),
+                       xenstat_domain_num_networks(domain2));
+}
+
+/* Prints number of virtual networks statistic */
+static void print_nets(xenstat_domain *domain)
+{
+       print("%4u", xenstat_domain_num_networks(domain));
+}
+
+/* Compares number of total network tx bytes of two domains, returning -1,0,1 
for
+ * <,=,> */
+static int compare_net_tx(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(tot_net_bytes(domain1, FALSE),
+                       tot_net_bytes(domain2, FALSE));
+}
+
+/* Prints number of total network tx bytes statistic */
+static void print_net_tx(xenstat_domain *domain)
+{
+       print("%8llu", tot_net_bytes(domain, FALSE)/1024);
+}
+
+/* Compares number of total network rx bytes of two domains, returning -1,0,1 
for
+ * <,=,> */
+static int compare_net_rx(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return -compare(tot_net_bytes(domain1, TRUE),
+                       tot_net_bytes(domain2, TRUE));
+}
+
+/* Prints number of total network rx bytes statistic */
+static void print_net_rx(xenstat_domain *domain)
+{
+       print("%8llu", tot_net_bytes(domain, TRUE)/1024);
+}
+
+/* Gets number of total network bytes statistic, if rx true, then rx bytes
+ * otherwise tx bytes
+ */
+static unsigned long long tot_net_bytes(xenstat_domain *domain, int rx_flag)
+{
+       int i = 0;
+       xenstat_network *network;
+       unsigned num_networks = 0;
+        unsigned long long total = 0;
+
+       /* How many networks? */
+       num_networks = xenstat_domain_num_networks(domain);
+
+       /* Dump information for each network */
+       for (i=0; i < num_networks; i++) {
+               /* Next get the network information */
+               network = xenstat_domain_network(domain,i);
+                if (rx_flag) 
+                       total += xenstat_network_rbytes(network);
+                else 
+                       total += xenstat_network_tbytes(network);
+       }
+        return (total);
+}
+
+/* Compares security id (ssid) of two domains, returning -1,0,1 for <,=,> */
+static int compare_ssid(xenstat_domain *domain1, xenstat_domain *domain2)
+{
+       return compare(xenstat_domain_ssid(domain1),
+                      xenstat_domain_ssid(domain2));
+}
+
+/* Prints ssid statistic */
+static void print_ssid(xenstat_domain *domain)
+{
+       print("%4u", xenstat_domain_ssid(domain));
+}
+
+/* Section printing functions */
+/* Prints the top summary, above the domain table */
+void do_summary(void)
+{
+#define TIME_STR_LEN 9
+       const char *TIME_STR_FORMAT = "%H:%M:%S";
+       char time_str[TIME_STR_LEN];
+       unsigned run = 0, block = 0, pause = 0,
+                crash = 0, dying = 0, shutdown = 0;
+       unsigned i, num_domains = 0;
+       unsigned long long used = 0;
+       xenstat_domain *domain;
+
+       /* Print program name, current time, and number of domains */
+       strftime(time_str, TIME_STR_LEN, TIME_STR_FORMAT,
+                localtime(&curtime.tv_sec));
+       num_domains = xenstat_node_num_domains(cur_node);
+       print("xentop - %s\n", time_str);
+
+       /* Tabulate what states domains are in for summary */
+       for (i=0; i < num_domains; i++) {
+               domain = xenstat_node_domain_by_index(cur_node,i);
+               if (xenstat_domain_running(domain)) run++;
+               else if (xenstat_domain_blocked(domain)) block++;
+               else if (xenstat_domain_paused(domain)) pause++;
+               else if (xenstat_domain_shutdown(domain)) shutdown++;
+               else if (xenstat_domain_crashed(domain)) crash++;
+               else if (xenstat_domain_dying(domain)) dying++;
+       }
+
+       print("%u domains: %u running, %u blocked, %u paused, "
+             "%u crashed, %u dying, %u shutdown \n",
+             num_domains, run, block, pause, crash, dying, shutdown);
+
+       used = xenstat_node_tot_mem(cur_node)-xenstat_node_free_mem(cur_node);
+
+       /* Dump node memory and cpu information */
+       print("Mem: %lluk total, %lluk used, %lluk free    "
+             "CPUs: %u @ %lluMHz\n",
+             xenstat_node_tot_mem(cur_node)/1024, used/1024,
+             xenstat_node_free_mem(cur_node)/1024,
+             xenstat_node_num_cpus(cur_node),
+             xenstat_node_cpu_hz(cur_node)/1000000);
+}
+
+/* Display the top header for the domain table */
+void do_header(void)
+{
+       field_id i;
+
+       /* Turn on REVERSE highlight attribute for headings */
+       attron(A_REVERSE);
+       for(i = 0; i < NUM_FIELDS; i++) {
+               if(i != 0)
+                       print(" ");
+               /* The BOLD attribute is turned on for the sort column */
+               if(i == sort_field)
+                       attron(A_BOLD);
+               print("%*s", fields[i].default_width, fields[i].header);
+               if(i == sort_field)
+                       attroff(A_BOLD);
+       }
+       attroff(A_REVERSE);
+       print("\n");
+}
+
+/* Displays bottom status line or current prompt */
+void do_bottom_line(void)
+{
+       move(lines()-1, 2);
+
+       if (prompt != NULL) {
+               printw("%s: %s", prompt, prompt_val);
+       } else {
+               addch(A_REVERSE | 'D'); addstr("elay  ");
+
+               /* network */
+               addch(A_REVERSE | 'N');
+               attr_addstr(show_networks ? COLOR_PAIR(1) : 0, "etworks");
+               addstr("  ");
+
+               /* vcpus */
+               addch(A_REVERSE | 'V');
+               attr_addstr(show_vcpus ? COLOR_PAIR(1) : 0, "CPUs");
+               addstr("  ");
+
+               /* repeat */
+               addch(A_REVERSE | 'R');
+               attr_addstr(repeat_header ? COLOR_PAIR(1) : 0, "epeat header");
+               addstr("  ");
+
+               /* sort order */
+               addch(A_REVERSE | 'S'); addstr("ort order  ");
+
+               addch(A_REVERSE | 'Q'); addstr("uit  ");
+       }
+}
+
+/* Prints Domain information */
+void do_domain(xenstat_domain *domain)
+{
+       unsigned int i;
+       for(i = 0; i < NUM_FIELDS; i++) {
+               if(i != 0)
+                       print(" ");
+               if(i == sort_field)
+                       attron(A_BOLD);
+               fields[i].print(domain);
+               if(i == sort_field)
+                       attroff(A_BOLD);
+       }
+       print("\n");
+}
+
+/* Output all vcpu information */
+void do_vcpu(xenstat_domain *domain)
+{
+       int i = 0;
+       unsigned num_vcpus = 0;
+       xenstat_vcpu *vcpu;
+
+       print("VCPUs(sec): ");
+
+       num_vcpus = xenstat_domain_num_vcpus(domain);
+
+       /* for all vcpus dump out values */
+       for (i=0; i< num_vcpus; i++) {
+               vcpu = xenstat_domain_vcpu(domain,i);
+
+               if (i != 0 && (i%5)==0)
+                       print("\n        ");
+               print(" %2u: %10llus", i, xenstat_vcpu_ns(vcpu)/1000000000);
+       }
+       print("\n");
+}
+
+/* Output all network information */
+void do_network(xenstat_domain *domain)
+{
+       int i = 0;
+       xenstat_network *network;
+       unsigned num_networks = 0;
+
+       /* How many networks? */
+       num_networks = xenstat_domain_num_networks(domain);
+
+       /* Dump information for each network */
+       for (i=0; i < num_networks; i++) {
+               /* Next get the network information */
+               network = xenstat_domain_network(domain,i);
+
+               print("Net%d RX: %8llubytes %8llupkts %8lluerr %8lludrop  ",
+                     i,
+                     xenstat_network_rbytes(network),
+                     xenstat_network_rpackets(network),
+                     xenstat_network_rerrs(network),
+                     xenstat_network_rdrop(network));
+
+               print("TX: %8llubytes %8llupkts %8lluerr %8lludrop\n",
+                     xenstat_network_tbytes(network),
+                     xenstat_network_tpackets(network),
+                     xenstat_network_terrs(network),
+                     xenstat_network_tdrop(network));
+       }
+}
+
+static void top(void)
+{
+       xenstat_domain **domains;
+       unsigned int i, num_domains = 0;
+
+       /* Now get the node information */
+       if (prev_node != NULL)
+               xenstat_free_node(prev_node);
+       prev_node = cur_node;
+       cur_node = xenstat_get_node(xhandle, XENSTAT_ALL);
+       if (cur_node == NULL)
+               fail("Failed to retrieve statistics from libxenstat\n");
+
+       /* dump summary top information */
+       do_summary();
+
+       /* Count the number of domains for which to report data */
+       num_domains = xenstat_node_num_domains(cur_node);
+
+       domains = malloc(num_domains*sizeof(xenstat_domain *));
+       if(domains == NULL)
+               fail("Failed to allocate memory\n");
+
+       for (i=0; i < num_domains; i++)
+               domains[i] = xenstat_node_domain_by_index(cur_node, i);
+
+       /* Sort */
+       qsort(domains, num_domains, sizeof(xenstat_domain *),
+             (int(*)(const void *, const void *))compare_domains);
+
+       if(first_domain_index >= num_domains)
+               first_domain_index = num_domains-1;
+
+       for (i = first_domain_index; i < num_domains; i++) {
+               if(current_row() == lines()-1)
+                       break;
+               if (i == first_domain_index || repeat_header)
+                       do_header();
+               do_domain(domains[i]);
+               if (show_vcpus)
+                       do_vcpu(domains[i]);
+               if (show_networks)
+                       do_network(domains[i]);
+       }
+
+       do_bottom_line();
+}
+
+int main(int argc, char **argv)
+{
+       int opt, optind = 0;
+       int ch = ERR;
+
+       struct option lopts[] = {
+               { "help",          no_argument,       NULL, 'h' },
+               { "version",       no_argument,       NULL, 'V' },
+               { "networks",      no_argument,       NULL, 'n' },
+               { "repeat-header", no_argument,       NULL, 'r' },
+               { "vcpus",         no_argument,       NULL, 'v' },
+               { "delay",         required_argument, NULL, 'd' },
+               { 0, 0, 0, 0 },
+       };
+       const char *sopts = "hVbnvd:";
+
+       if (atexit(cleanup) != 0)
+               fail("Failed to install cleanup handler.\n");
+
+       while ((opt = getopt_long(argc, argv, sopts, lopts, &optind)) != -1) {
+               switch (opt) {
+               case 'h':
+               case '?':
+               default:
+                       usage(argv[0]);
+                       exit(0);
+               case 'V':
+                       version();
+                       exit(0);
+               case 'n':
+                       show_networks = 1;
+                       break;
+               case 'r':
+                       repeat_header = 1;
+                       break;
+               case 'v':
+                       show_vcpus = 1;
+                       break;
+               case 'd':
+                       delay = atoi(optarg);
+                       break;
+               }
+       }
+
+       /* Get xenstat handle */
+       xhandle = xenstat_init();
+       if (xhandle == NULL)
+               fail("Failed to initialize xenstat library\n");
+
+       /* Begin curses stuff */
+       initscr();
+       start_color();
+       cbreak();
+       noecho();
+       nonl();
+       keypad(stdscr, TRUE);
+       halfdelay(5);
+       use_default_colors();
+       init_pair(1, -1, COLOR_YELLOW);
+
+       do {
+               gettimeofday(&curtime, NULL);
+               if(ch != ERR || (curtime.tv_sec - oldtime.tv_sec) >= delay) {
+                       clear();
+                       top();
+                       oldtime = curtime;
+                       refresh();
+               }
+               ch = getch();
+       } while (handle_key(ch));
+
+       /* Cleanup occurs in cleanup(), so no work to do here. */
+
+       return 0;
+}
diff -r cd984b3478f6 -r cc5f88b719d0 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c Mon Aug 22 18:37:48 2005
+++ /dev/null   Tue Aug 23 19:03:21 2005
@@ -1,497 +0,0 @@
-/*
- * arch/x86_64/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm/fixmap.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-
-/*
- * Reuse arch/xen/i396/mm/ioremap.c. Need to merge later
- */
-#ifndef CONFIG_XEN_PHYSDEV_ACCESS
-
-void * __ioremap(unsigned long phys_addr, unsigned long size,
-                unsigned long flags)
-{
-       return NULL;
-}
-
-void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
-       return NULL;
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-}
-
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
-       return NULL;
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
-}
-
-#else
-
-#if defined(__i386__)
-/*
- * Does @address reside within a non-highmem page that is local to this virtual
- * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
- * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand
- * why this works.
- */
-static inline int is_local_lowmem(unsigned long address)
-{
-       extern unsigned long max_low_pfn;
-       unsigned long mfn = address >> PAGE_SHIFT;
-       unsigned long pfn = mfn_to_pfn(mfn);
-       return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
-}
-#elif defined(__x86_64__)
-/*
- * 
- */
-static inline int is_local_lowmem(unsigned long address)
-{
-        return 0;
-}
-#endif
-
-/*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned 
long flags)
-{
-       void __iomem * addr;
-       struct vm_struct * area;
-       unsigned long offset, last_addr;
-       domid_t domid = DOMID_IO;
-
-       /* Don't allow wraparound or zero size */
-       last_addr = phys_addr + size - 1;
-       if (!size || last_addr < phys_addr)
-               return NULL;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       /*
-        * Don't remap the low PCI/ISA area, it's always mapped..
-        */
-       if (phys_addr >= 0x0 && last_addr < 0x100000)
-               return isa_bus_to_virt(phys_addr);
-#endif
-
-       /*
-        * Don't allow anybody to remap normal RAM that we're using..
-        */
-       if (is_local_lowmem(phys_addr)) {
-               char *t_addr, *t_end;
-               struct page *page;
-
-               t_addr = bus_to_virt(phys_addr);
-               t_end = t_addr + (size - 1);
-          
-               for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); 
page++)
-                       if(!PageReserved(page))
-                               return NULL;
-
-               domid = DOMID_LOCAL;
-       }
-
-       /*
-        * Mappings have to be page-aligned
-        */
-       offset = phys_addr & ~PAGE_MASK;
-       phys_addr &= PAGE_MASK;
-       size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
-       /*
-        * Ok, go for it..
-        */
-       area = get_vm_area(size, VM_IOREMAP | (flags << 20));
-       if (!area)
-               return NULL;
-       area->phys_addr = phys_addr;
-       addr = (void __iomem *) area->addr;
-       if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr,
-                                   size, __pgprot(_PAGE_PRESENT | _PAGE_RW |
-                                                  _PAGE_DIRTY | _PAGE_ACCESSED
-#if defined(__x86_64__)
-                                                   | _PAGE_USER
-#endif
-                                                  | flags), domid)) {
-               vunmap((void __force *) addr);
-               return NULL;
-       }
-       return (void __iomem *) (offset + (char __iomem *)addr);
-}
-
-
-/**
- * ioremap_nocache     -   map bus memory into CPU space
- * @offset:    bus address of the memory
- * @size:      size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address. 
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many 
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- * 
- * Must be freed with iounmap.
- */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
-       unsigned long last_addr;
-       void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
-       if (!p) 
-               return p; 
-
-       /* Guaranteed to be > phys_addr, as per __ioremap() */
-       last_addr = phys_addr + size - 1;
-
-       if (is_local_lowmem(last_addr)) { 
-               struct page *ppage = virt_to_page(bus_to_virt(phys_addr));
-               unsigned long npages;
-
-               phys_addr &= PAGE_MASK;
-
-               /* This might overflow and become zero.. */
-               last_addr = PAGE_ALIGN(last_addr);
-
-               /* .. but that's ok, because modulo-2**n arithmetic will make
-               * the page-aligned "last - first" come out right.
-               */
-               npages = (last_addr - phys_addr) >> PAGE_SHIFT;
-
-               if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { 
-                       iounmap(p); 
-                       p = NULL;
-               }
-               global_flush_tlb();
-       }
-
-       return p;                                       
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-       struct vm_struct *p;
-       if ((void __force *) addr <= high_memory) 
-               return; 
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
-               return;
-#endif
-       p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
-       if (!p) { 
-               printk("__iounmap: bad address %p\n", addr);
-               return;
-       }
-
-       if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
-               /* p->size includes the guard page, but cpa doesn't like that */
-               change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
-                                (p->size - PAGE_SIZE) >> PAGE_SHIFT,
-                                PAGE_KERNEL);                           
-               global_flush_tlb();
-       } 
-       kfree(p); 
-}
-
-#if defined(__i386__)
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
-       unsigned long offset, last_addr;
-       unsigned int nrpages;
-       enum fixed_addresses idx;
-
-       /* Don't allow wraparound or zero size */
-       last_addr = phys_addr + size - 1;
-       if (!size || last_addr < phys_addr)
-               return NULL;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       /*
-        * Don't remap the low PCI/ISA area, it's always mapped..
-        */
-       if (phys_addr >= 0x0 && last_addr < 0x100000)
-               return isa_bus_to_virt(phys_addr);
-#endif
-
-       /*
-        * Mappings have to be page-aligned
-        */
-       offset = phys_addr & ~PAGE_MASK;
-       phys_addr &= PAGE_MASK;
-       size = PAGE_ALIGN(last_addr) - phys_addr;
-
-       /*
-        * Mappings have to fit in the FIX_BTMAP area.
-        */
-       nrpages = size >> PAGE_SHIFT;
-       if (nrpages > NR_FIX_BTMAPS)
-               return NULL;
-
-       /*
-        * Ok, go for it..
-        */
-       idx = FIX_BTMAP_BEGIN;
-       while (nrpages > 0) {
-               set_fixmap(idx, phys_addr);
-               phys_addr += PAGE_SIZE;
-               --idx;
-               --nrpages;
-       }
-       return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
-       unsigned long virt_addr;
-       unsigned long offset;
-       unsigned int nrpages;
-       enum fixed_addresses idx;
-
-       virt_addr = (unsigned long)addr;
-       if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
-               return;
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-       if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
-               return;
-#endif
-       offset = virt_addr & ~PAGE_MASK;
-       nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
-
-       idx = FIX_BTMAP_BEGIN;
-       while (nrpages > 0) {
-               clear_fixmap(idx);
-               --idx;
-               --nrpages;
-       }
-}
-#endif /* defined(__i386__) */
-
-#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
-
-/* These hacky macros avoid phys->machine translations. */
-#define __direct_pte(x) ((pte_t) { (x) } )
-#define __direct_mk_pte(page_nr,pgprot) \
-  __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
-#define direct_mk_pte_phys(physpage, pgprot) \
-  __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
-
-static inline void direct_remap_area_pte(pte_t *pte, 
-                                        unsigned long address, 
-                                        unsigned long size,
-                                        mmu_update_t **v)
-{
-       unsigned long end;
-
-       address &= ~PMD_MASK;
-       end = address + size;
-       if (end > PMD_SIZE)
-               end = PMD_SIZE;
-       if (address >= end)
-               BUG();
-
-       do {
-               (*v)->ptr = virt_to_machine(pte);
-               (*v)++;
-               address += PAGE_SIZE;
-               pte++;
-       } while (address && (address < end));
-}
-
-static inline int direct_remap_area_pmd(struct mm_struct *mm,
-                                       pmd_t *pmd, 
-                                       unsigned long address, 
-                                       unsigned long size,
-                                       mmu_update_t **v)
-{
-       unsigned long end;
-
-       address &= ~PGDIR_MASK;
-       end = address + size;
-       if (end > PGDIR_SIZE)
-               end = PGDIR_SIZE;
-       if (address >= end)
-               BUG();
-       do {
-               pte_t *pte = (mm == &init_mm) ? 
-                       pte_alloc_kernel(mm, pmd, address) :
-                       pte_alloc_map(mm, pmd, address);
-               if (!pte)
-                       return -ENOMEM;
-               direct_remap_area_pte(pte, address, end - address, v);
-               pte_unmap(pte);
-               address = (address + PMD_SIZE) & PMD_MASK;
-               pmd++;
-       } while (address && (address < end));
-       return 0;
-}
- 
-int __direct_remap_area_pages(struct mm_struct *mm,
-                             unsigned long address, 
-                             unsigned long size, 
-                             mmu_update_t *v)
-{
-       pgd_t * dir;
-       unsigned long end = address + size;
-       int error;
-
-#if defined(__i386__)
-       dir = pgd_offset(mm, address);
-#elif defined (__x86_64)
-        dir = (mm == &init_mm) ?
-               pgd_offset_k(address):
-               pgd_offset(mm, address);
-#endif
-       if (address >= end)
-               BUG();
-       spin_lock(&mm->page_table_lock);
-       do {
-               pud_t *pud;
-               pmd_t *pmd;
-
-               error = -ENOMEM;
-               pud = pud_alloc(mm, dir, address);
-               if (!pud)
-                       break;
-               pmd = pmd_alloc(mm, pud, address);
-               if (!pmd)
-                       break;
-               error = 0;
-               direct_remap_area_pmd(mm, pmd, address, end - address, &v);
-               address = (address + PGDIR_SIZE) & PGDIR_MASK;
-               dir++;
-
-       } while (address && (address < end));
-       spin_unlock(&mm->page_table_lock);
-       return error;
-}
-
-
-int direct_remap_area_pages(struct mm_struct *mm,
-                           unsigned long address, 
-                           unsigned long machine_addr,
-                           unsigned long size, 
-                           pgprot_t prot,
-                           domid_t  domid)
-{
-       int i;
-       unsigned long start_address;
-#define MAX_DIRECTMAP_MMU_QUEUE 130
-       mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
-
-       start_address = address;
-
-       flush_cache_all();
-
-       for (i = 0; i < size; i += PAGE_SIZE) {
-               if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
-                       /* Fill in the PTE pointers. */
-                       __direct_remap_area_pages(mm,
-                                                 start_address, 
-                                                 address-start_address, 
-                                                 u);
- 
-                       if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
-                               return -EFAULT;
-                       v = u;
-                       start_address = address;
-               }
-
-               /*
-                * Fill in the machine address: PTE ptr is done later by
-                * __direct_remap_area_pages(). 
-                */
-               v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
-
-               machine_addr += PAGE_SIZE;
-               address += PAGE_SIZE; 
-               v++;
-       }
-
-       if (v != u) {
-               /* get the ptep's filled in */
-               __direct_remap_area_pages(mm,
-                                         start_address, 
-                                         address-start_address, 
-                                         u);
-               if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
-                       return -EFAULT;
-       }
-
-       flush_tlb_all();
-
-       return 0;
-}
-
-EXPORT_SYMBOL(direct_remap_area_pages);
-
-int create_lookup_pte_addr(struct mm_struct *mm, 
-                           unsigned long address,
-                           unsigned long *ptep)
-{
-    int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 
-    {
-        unsigned long *ptep = (unsigned long *)data;
-        if (ptep) *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << PAGE_SHIFT)
-                       | ((unsigned long)pte & ~PAGE_MASK);
-        return 0;
-    }
-
-    return generic_page_range(mm, address, PAGE_SIZE, f, ptep);
-}
-
-EXPORT_SYMBOL(create_lookup_pte_addr);
-
-int touch_pte_range(struct mm_struct *mm,
-                    unsigned long address,
-                    unsigned long size)
-{
-    int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 
-    {
-        return 0;
-    }
-
-    return generic_page_range(mm, address, size, f, NULL);
-}                 
-
-EXPORT_SYMBOL(touch_pte_range);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>