WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-ppc-devel

[XenPPC] [linux-ppc-2.6] [LINUX][XEN][POWERPC] update with the lates Xen

To: xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
Subject: [XenPPC] [linux-ppc-2.6] [LINUX][XEN][POWERPC] update with the lates Xen VIO
From: Xen patchbot-linux-ppc-2.6 <patchbot-linux-ppc-2.6@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 16 Oct 2006 13:41:25 +0000
Delivery-date: Tue, 17 Oct 2006 06:19:35 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-ppc-devel-request@lists.xensource.com?subject=help>
List-id: Xen PPC development <xen-ppc-devel.lists.xensource.com>
List-post: <mailto:xen-ppc-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ppc-devel>, <mailto:xen-ppc-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ppc-devel>, <mailto:xen-ppc-devel-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-ppc-devel-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
# Node ID 933b1d114a89abe409b50b948c39d3b28dd3e02f
# Parent  f0be2cc05103e19788416719e2b9ec38b38bd26e
[LINUX][XEN][POWERPC] update with the lates Xen VIO

Signed-off-by: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
---
 arch/powerpc/platforms/xen/balloon.c       |   46 ++++-
 arch/powerpc/platforms/xen/gnttab.c        |   20 +-
 arch/powerpc/xmon/xmon.c                   |    3 
 drivers/xen/blkback/blkback.c              |   77 ++++----
 drivers/xen/blkback/common.h               |    9 -
 drivers/xen/blkback/interface.c            |   28 +--
 drivers/xen/blkback/xenbus.c               |  126 ++++++++++----
 drivers/xen/blkfront/blkfront.c            |   13 +
 drivers/xen/netback/interface.c            |   29 ++-
 drivers/xen/netback/loopback.c             |   64 +++++++
 drivers/xen/netback/netback.c              |  169 +++++++++----------
 drivers/xen/netback/xenbus.c               |   24 +-
 drivers/xen/netfront/netfront.c            |  129 +++++++++++---
 drivers/xen/xenbus/xenbus_backend_client.c |   13 +
 drivers/xen/xenbus/xenbus_client.c         |   23 ++
 drivers/xen/xenbus/xenbus_comms.c          |   15 -
 drivers/xen/xenbus/xenbus_comms.h          |    2 
 drivers/xen/xenbus/xenbus_dev.c            |  120 +++++++++++++
 drivers/xen/xenbus/xenbus_probe.c          |  252 +++++++++++++++++++++--------
 drivers/xen/xenbus/xenbus_xs.c             |   10 +
 include/asm-powerpc/page.h                 |    1 
 include/asm-powerpc/xen/asm/hypervisor.h   |   11 +
 include/xen/balloon.h                      |    2 
 include/xen/foreign_page.h                 |   31 +++
 include/xen/hvm.h                          |   13 +
 include/xen/xenbus.h                       |    9 -
 26 files changed, 908 insertions(+), 331 deletions(-)

diff -r f0be2cc05103 -r 933b1d114a89 arch/powerpc/platforms/xen/balloon.c
--- a/arch/powerpc/platforms/xen/balloon.c      Fri Oct 13 12:36:39 2006 -0400
+++ b/arch/powerpc/platforms/xen/balloon.c      Mon Oct 16 09:31:03 2006 -0400
@@ -1,20 +1,47 @@
 #include <linux/module.h>
 #include <linux/mm.h>
+#include <asm/hypervisor.h>
 
 /*
  * FIXME: Port balloon driver, if ever
  */
 
-struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
+struct page **alloc_empty_pages_and_pagevec(int nr_pages)
 {
-       unsigned long vstart;
-       unsigned int  order = get_order(nr_pages * PAGE_SIZE);
+       struct page *page, **pagevec;
+       void *vaddr;
+       int i;
 
-       vstart = __get_free_pages(GFP_KERNEL, order);
-       if (vstart == 0)
-               return NULL;
+       pagevec = kmalloc(sizeof(*pagevec) * nr_pages, GFP_KERNEL);
+       if (pagevec == NULL)
+               return  NULL;
 
-       return virt_to_page(vstart);
+       for (i = 0; i < nr_pages; i++) {
+               page = alloc_page(GFP_KERNEL);
+               pagevec[i] = page;
+               vaddr = page_address(page);
+               scrub_pages(vaddr, 1);
+       }
+
+       return pagevec;
+}
+
+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
+{
+       int arch_is_foreign_page(struct page *page);
+       struct page *page;
+       int i;
+
+       if (pagevec == NULL)
+               return;
+
+       for (i = 0; i < nr_pages; i++) {
+               page = pagevec[i];
+               if (!arch_is_foreign_page(page))
+                       __free_page(page);
+       }
+       
+       kfree(pagevec);
 }
 
 void balloon_dealloc_empty_page_range(
@@ -32,6 +59,7 @@ void balloon_release_driver_page(struct 
        BUG();
 }
 
-EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range);
-EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range);
+EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
+EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
+EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
 EXPORT_SYMBOL_GPL(balloon_release_driver_page);
diff -r f0be2cc05103 -r 933b1d114a89 arch/powerpc/platforms/xen/gnttab.c
--- a/arch/powerpc/platforms/xen/gnttab.c       Fri Oct 13 12:36:39 2006 -0400
+++ b/arch/powerpc/platforms/xen/gnttab.c       Mon Oct 16 09:31:03 2006 -0400
@@ -39,8 +39,6 @@ static long map_to_linear(ulong paddr)
        mode = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
        vaddr = (ulong)__va(paddr);
 
-       DBG("%s: 0x%lx: 0x%x\n",
-           __func__, paddr, page_count(virt_to_page(vaddr)));
        {
                unsigned long vpn, hash, hpteg;
                unsigned long vsid = get_kernel_vsid(vaddr);
@@ -162,12 +160,9 @@ static void gnttab_pre_unmap_grant_ref(
                        continue;
                }
 
-               DBG("%s: 0x%lx: 0x%x\n",
+               DBG("%s: 0x%lx: page count: 0x%x\n",
                       __func__, ea, page_count(virt_to_page(ea)));
                plpar_pte_remove(0, slot, 0, &dummy1, &dummy2);
-
-               DBG("%s: remove_pages(0x%lx, 0x%lx)\n",
-                   __func__, unmap[i].host_addr, unmap[i].dev_bus_addr);
        }
 }
 
@@ -186,6 +181,9 @@ static void gnttab_post_map_grant_ref(
                /* ??? store the slot somewhere ??? */
                map[i].host_addr = (ulong)__va(pa);
                page = virt_to_page(map[i].host_addr);
+
+               DBG("%s: 0x%lx: 0x%x\n",
+                   __func__, pa, page_count(page));
 
                if (page_count(page) == 1) {
 #ifdef DEBUG                   
@@ -258,11 +256,6 @@ int HYPERVISOR_grant_table_op(unsigned i
        return ret;
 }
 EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
-
-ulong foreign_alloc_empty_page_range(unsigned long nr_pages)
-{
-       return (ulong)__va(foreign_map_base);
-}
 
 static ulong setup_grant_maps(void)
 {
@@ -350,3 +343,8 @@ void *arch_gnttab_map(unsigned long *fra
 
        return shared;
 }
+
+int arch_is_foreign_page(struct page *page)
+{
+       return ((page_to_pfn(page) << PAGE_SHIFT) >= foreign_map_base);
+}
diff -r f0be2cc05103 -r 933b1d114a89 arch/powerpc/xmon/xmon.c
--- a/arch/powerpc/xmon/xmon.c  Fri Oct 13 12:36:39 2006 -0400
+++ b/arch/powerpc/xmon/xmon.c  Mon Oct 16 09:31:03 2006 -0400
@@ -753,6 +753,9 @@ cmds(struct pt_regs *excp)
                        cmd = inchar();
                }
                switch (cmd) {
+               case 'A':
+                       asm volatile(".long 0x200;nop");
+                       break;
                case 'm':
                        cmd = inchar();
                        switch (cmd) {
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/blkback.c
--- a/drivers/xen/blkback/blkback.c     Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/blkback.c     Mon Oct 16 09:31:03 2006 -0400
@@ -55,8 +55,6 @@ static int blkif_reqs = 64;
 static int blkif_reqs = 64;
 module_param_named(reqs, blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
-
-static int mmap_pages;
 
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats = 0;
@@ -87,8 +85,7 @@ static DECLARE_WAIT_QUEUE_HEAD(pending_f
 
 #define BLKBACK_INVALID_HANDLE (~0)
 
-static unsigned long mmap_vstart;
-static unsigned long *pending_vaddrs;
+static struct page **pending_pages;
 static grant_handle_t *pending_grant_handles;
 
 static inline int vaddr_pagenr(pending_req_t *req, int seg)
@@ -98,8 +95,23 @@ static inline int vaddr_pagenr(pending_r
 
 static inline unsigned long vaddr(pending_req_t *req, int seg)
 {
-       return pending_vaddrs[vaddr_pagenr(req, seg)];
-}
+       unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
+       return (unsigned long)pfn_to_kaddr(pfn);
+}
+
+#ifdef CONFIG_PPC_XEN
+static inline void update_pending_pages(
+       unsigned int idx, gnttab_map_grant_ref_t *mop)
+{
+#ifdef PPC_NOT_YET
+       extern int arch_is_foreign_page(struct page *page);
+
+       if (!arch_is_foreign_page(pending_pages[idx]))
+               __free_page(pending_pages[idx]);
+#endif
+       pending_pages[idx] = pfn_to_page(mop->dev_bus_addr >> PAGE_SHIFT);
+}
+#endif
 
 #define pending_handle(_req, _seg) \
        (pending_grant_handles[vaddr_pagenr(_req, _seg)])
@@ -399,8 +411,7 @@ static void dispatch_rw_block_io(blkif_t
 
                pending_handle(pending_req, i) = map[i].handle;
 #ifdef CONFIG_PPC_XEN
-               pending_vaddrs[vaddr_pagenr(pending_req, i)] =
-                       (unsigned long)gnttab_map_vaddr(map[i]);
+               update_pending_pages(vaddr_pagenr(pending_req, i), &map[i]);
 #else
                set_phys_to_machine(__pa(vaddr(
                        pending_req, i)) >> PAGE_SHIFT,
@@ -511,57 +522,43 @@ static void make_response(blkif_t *blkif
 
 static int __init blkif_init(void)
 {
-       struct page *page;
-       int i;
+       int i, mmap_pages;
 
        if (!is_running_on_xen())
                return -ENODEV;
-
-       mmap_pages            = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
-
-#ifdef CONFIG_PPC_XEN
-       (void)page;
-       mmap_vstart = foreign_alloc_empty_page_range(mmap_pages);
-#else
-       page = balloon_alloc_empty_page_range(mmap_pages);
-       if (page == NULL)
-               return -ENOMEM;
-       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-#endif
+       
+       mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
 
        pending_reqs          = kmalloc(sizeof(pending_reqs[0]) *
                                        blkif_reqs, GFP_KERNEL);
        pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
                                        mmap_pages, GFP_KERNEL);
-       pending_vaddrs        = kmalloc(sizeof(pending_vaddrs[0]) *
-                                       mmap_pages, GFP_KERNEL);
-       if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
-               kfree(pending_reqs);
-               kfree(pending_grant_handles);
-               kfree(pending_vaddrs);
-               printk("%s: out of memory\n", __FUNCTION__);
-               return -ENOMEM;
-       }
+       pending_pages         = alloc_empty_pages_and_pagevec(mmap_pages);
+
+       if (!pending_reqs || !pending_grant_handles || !pending_pages)
+               goto out_of_memory;
+
+       for (i = 0; i < mmap_pages; i++)
+               pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
 
        blkif_interface_init();
-       
-       printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
-              __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
-       BUG_ON(mmap_vstart == 0);
-       for (i = 0; i < mmap_pages; i++) {
-               pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
-               pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
-       }
 
        memset(pending_reqs, 0, sizeof(pending_reqs));
        INIT_LIST_HEAD(&pending_free);
 
        for (i = 0; i < blkif_reqs; i++)
                list_add_tail(&pending_reqs[i].free_list, &pending_free);
-    
+
        blkif_xenbus_init();
 
        return 0;
+
+ out_of_memory:
+       kfree(pending_reqs);
+       kfree(pending_grant_handles);
+       free_empty_pages_and_pagevec(pending_pages, mmap_pages);
+       printk("%s: out of memory\n", __FUNCTION__);
+       return -ENOMEM;
 }
 
 module_init(blkif_init);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/common.h
--- a/drivers/xen/blkback/common.h      Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/common.h      Mon Oct 16 09:31:03 2006 -0400
@@ -55,9 +55,9 @@ struct vbd {
        unsigned char  type;        /* VDISK_xxx */
        u32            pdevice;     /* phys device that this vbd maps to */
        struct block_device *bdev;
-}; 
+};
 
-struct backend_info; 
+struct backend_info;
 
 typedef struct blkif_st {
        /* Unique identifier for this interface. */
@@ -72,7 +72,7 @@ typedef struct blkif_st {
        /* The VBD attached to this interface. */
        struct vbd        vbd;
        /* Back pointer to the backend_info. */
-       struct backend_info *be; 
+       struct backend_info *be;
        /* Private fields. */
        spinlock_t       blk_ring_lock;
        atomic_t         refcnt;
@@ -95,6 +95,7 @@ typedef struct blkif_st {
 } blkif_t;
 
 blkif_t *blkif_alloc(domid_t domid);
+void blkif_disconnect(blkif_t *blkif);
 void blkif_free(blkif_t *blkif);
 int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
 
@@ -121,7 +122,7 @@ struct phys_req {
        blkif_sector_t       sector_number;
 };
 
-int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
 
 void blkif_interface_init(void);
 
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/interface.c
--- a/drivers/xen/blkback/interface.c   Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/interface.c   Mon Oct 16 09:31:03 2006 -0400
@@ -32,6 +32,7 @@
 
 #include "common.h"
 #include <xen/evtchn.h>
+#include <linux/kthread.h>
 
 static kmem_cache_t *blkif_cachep;
 
@@ -75,12 +76,6 @@ static int map_frontend_page(blkif_t *bl
 
        blkif->shmem_ref = shared_page;
        blkif->shmem_handle = op.handle;
-
-#ifdef CONFIG_XEN_IA64_DOM0_NON_VP
-       /* on some arch's, map_grant_ref behaves like mmap, in that the
-        * passed address is a hint and a different address may be returned */
-       blkif->blk_ring_area->addr = gnttab_map_vaddr(op);
-#endif
 
        return 0;
 }
@@ -140,22 +135,33 @@ int blkif_map(blkif_t *blkif, unsigned l
        return 0;
 }
 
-void blkif_free(blkif_t *blkif)
+void blkif_disconnect(blkif_t *blkif)
 {
+       if (blkif->xenblkd) {
+               kthread_stop(blkif->xenblkd);
+               blkif->xenblkd = NULL;
+       }
+
        atomic_dec(&blkif->refcnt);
        wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
+       atomic_inc(&blkif->refcnt);
 
-       /* Already disconnected? */
-       if (blkif->irq)
+       if (blkif->irq) {
                unbind_from_irqhandler(blkif->irq, blkif);
-
-       vbd_free(&blkif->vbd);
+               blkif->irq = 0;
+       }
 
        if (blkif->blk_ring.sring) {
                unmap_frontend_page(blkif);
                free_vm_area(blkif->blk_ring_area);
+               blkif->blk_ring.sring = NULL;
        }
+}
 
+void blkif_free(blkif_t *blkif)
+{
+       if (!atomic_dec_and_test(&blkif->refcnt))
+               BUG();
        kmem_cache_free(blkif_cachep, blkif);
 }
 
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/xenbus.c
--- a/drivers/xen/blkback/xenbus.c      Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/xenbus.c      Mon Oct 16 09:31:03 2006 -0400
@@ -42,7 +42,6 @@ static int connect_ring(struct backend_i
 static int connect_ring(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
                            unsigned int);
-
 
 static void update_blkif_status(blkif_t *blkif)
 { 
@@ -73,26 +72,71 @@ static void update_blkif_status(blkif_t 
 }
 
 
-static ssize_t show_physical_device(struct device *_dev,
-                                   struct device_attribute *attr, char *buf)
-{
-       struct xenbus_device *dev = to_xenbus_device(_dev);
-       struct backend_info *be = dev->dev.driver_data;
-       return sprintf(buf, "%x:%x\n", be->major, be->minor);
-}
-DEVICE_ATTR(physical_device, S_IRUSR | S_IRGRP | S_IROTH,
-           show_physical_device, NULL);
-
-
-static ssize_t show_mode(struct device *_dev, struct device_attribute *attr,
-                        char *buf)
-{
-       struct xenbus_device *dev = to_xenbus_device(_dev);
-       struct backend_info *be = dev->dev.driver_data;
-       return sprintf(buf, "%s\n", be->mode);
-}
-DEVICE_ATTR(mode, S_IRUSR | S_IRGRP | S_IROTH, show_mode, NULL);
-
+/****************************************************************
+ *  sysfs interface for VBD I/O requests
+ */
+
+#define VBD_SHOW(name, format, args...)                                        
\
+       static ssize_t show_##name(struct device *_dev,                 \
+                                  struct device_attribute *attr,       \
+                                  char *buf)                           \
+       {                                                               \
+               struct xenbus_device *dev = to_xenbus_device(_dev);     \
+               struct backend_info *be = dev->dev.driver_data;         \
+                                                                       \
+               return sprintf(buf, format, ##args);                    \
+       }                                                               \
+       DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
+VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
+VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
+
+static struct attribute *vbdstat_attrs[] = {
+       &dev_attr_oo_req.attr,
+       &dev_attr_rd_req.attr,
+       &dev_attr_wr_req.attr,
+       NULL
+};
+
+static struct attribute_group vbdstat_group = {
+       .name = "statistics",
+       .attrs = vbdstat_attrs,
+};
+
+VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
+VBD_SHOW(mode, "%s\n", be->mode);
+
+int xenvbd_sysfs_addif(struct xenbus_device *dev)
+{
+       int error;
+       
+       error = device_create_file(&dev->dev, &dev_attr_physical_device);
+       if (error)
+               goto fail1;
+
+       error = device_create_file(&dev->dev, &dev_attr_mode);
+       if (error)
+               goto fail2;
+
+       error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
+       if (error)
+               goto fail3;
+
+       return 0;
+
+fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
+fail2: device_remove_file(&dev->dev, &dev_attr_mode);
+fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
+       return error;
+}
+
+void xenvbd_sysfs_delif(struct xenbus_device *dev)
+{
+       sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
+       device_remove_file(&dev->dev, &dev_attr_mode);
+       device_remove_file(&dev->dev, &dev_attr_physical_device);
+}
 
 static int blkback_remove(struct xenbus_device *dev)
 {
@@ -105,15 +149,16 @@ static int blkback_remove(struct xenbus_
                kfree(be->backend_watch.node);
                be->backend_watch.node = NULL;
        }
+
        if (be->blkif) {
-               if (be->blkif->xenblkd)
-                       kthread_stop(be->blkif->xenblkd);
+               blkif_disconnect(be->blkif);
+               vbd_free(&be->blkif->vbd);
                blkif_free(be->blkif);
                be->blkif = NULL;
        }
 
-       device_remove_file(&dev->dev, &dev_attr_physical_device);
-       device_remove_file(&dev->dev, &dev_attr_mode);
+       if (be->major || be->minor)
+               xenvbd_sysfs_delif(dev);
 
        kfree(be);
        dev->dev.driver_data = NULL;
@@ -149,7 +194,7 @@ static int blkback_probe(struct xenbus_d
        }
 
        /* setup back pointer */
-       be->blkif->be = be; 
+       be->blkif->be = be;
 
        err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
                                 &be->backend_watch, backend_changed);
@@ -228,17 +273,21 @@ static void backend_changed(struct xenbu
                err = vbd_create(be->blkif, handle, major, minor,
                                 (NULL == strchr(be->mode, 'w')));
                if (err) {
-                       be->major = 0;
-                       be->minor = 0;
+                       be->major = be->minor = 0;
                        xenbus_dev_fatal(dev, err, "creating vbd structure");
                        return;
                }
 
-               device_create_file(&dev->dev, &dev_attr_physical_device);
-               device_create_file(&dev->dev, &dev_attr_mode);
+               err = xenvbd_sysfs_addif(dev);
+               if (err) {
+                       vbd_free(&be->blkif->vbd);
+                       be->major = be->minor = 0;
+                       xenbus_dev_fatal(dev, err, "creating sysfs entries");
+                       return;
+               }
 
                /* We're potentially connected now */
-               update_blkif_status(be->blkif); 
+               update_blkif_status(be->blkif);
        }
 }
 
@@ -252,10 +301,15 @@ static void frontend_changed(struct xenb
        struct backend_info *be = dev->dev.driver_data;
        int err;
 
-       DPRINTK("");
+       DPRINTK("%s", xenbus_strstate(frontend_state));
 
        switch (frontend_state) {
        case XenbusStateInitialising:
+               if (dev->state == XenbusStateClosed) {
+                       printk("%s: %s: prepare for reconnect\n",
+                              __FUNCTION__, dev->nodename);
+                       xenbus_switch_state(dev, XenbusStateInitWait);
+               }
                break;
 
        case XenbusStateInitialised:
@@ -273,15 +327,19 @@ static void frontend_changed(struct xenb
                break;
 
        case XenbusStateClosing:
+               blkif_disconnect(be->blkif);
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
        case XenbusStateClosed:
+               xenbus_switch_state(dev, XenbusStateClosed);
+               if (xenbus_dev_is_online(dev))
+                       break;
+               /* fall through if not online */
+       case XenbusStateUnknown:
                device_unregister(&dev->dev);
                break;
 
-       case XenbusStateUnknown:
-       case XenbusStateInitWait:
        default:
                xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
                                 frontend_state);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkfront/blkfront.c
--- a/drivers/xen/blkfront/blkfront.c   Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkfront/blkfront.c   Mon Oct 16 09:31:03 2006 -0400
@@ -46,6 +46,7 @@
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
 #include <asm/hypervisor.h>
+#include <asm/maddr.h>
 
 #define BLKIF_STATE_DISCONNECTED 0
 #define BLKIF_STATE_CONNECTED    1
@@ -255,10 +256,10 @@ static void backend_changed(struct xenbu
        DPRINTK("blkfront:backend_changed.\n");
 
        switch (backend_state) {
-       case XenbusStateUnknown:
        case XenbusStateInitialising:
        case XenbusStateInitWait:
        case XenbusStateInitialised:
+       case XenbusStateUnknown:
        case XenbusStateClosed:
                break;
 
@@ -354,12 +355,14 @@ static void blkfront_closing(struct xenb
        blk_stop_queue(info->rq);
        /* No more gnttab callback work. */
        gnttab_cancel_free_callback(&info->callback);
+       spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+       /* Flush gnttab callback work. Must be done with no locks held. */
        flush_scheduled_work();
-       spin_unlock_irqrestore(&blkif_io_lock, flags);
 
        xlvbd_del(info);
 
-       xenbus_switch_state(dev, XenbusStateClosed);
+       xenbus_frontend_closed(dev);
 }
 
 
@@ -713,8 +716,10 @@ static void blkif_free(struct blkfront_i
                blk_stop_queue(info->rq);
        /* No more gnttab callback work. */
        gnttab_cancel_free_callback(&info->callback);
+       spin_unlock_irq(&blkif_io_lock);
+
+       /* Flush gnttab callback work. Must be done with no locks held. */
        flush_scheduled_work();
-       spin_unlock_irq(&blkif_io_lock);
 
        /* Free resources associated with old device channel. */
        if (info->ring_ref != GRANT_INVALID_REF) {
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/interface.c
--- a/drivers/xen/netback/interface.c   Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/interface.c   Mon Oct 16 09:31:03 2006 -0400
@@ -34,6 +34,24 @@
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
 
+/*
+ * Module parameter 'queue_length':
+ * 
+ * Enables queuing in the network stack when a client has run out of receive
+ * descriptors. Although this feature can improve receive bandwidth by avoiding
+ * packet loss, it can also result in packets sitting in the 'tx_queue' for
+ * unbounded time. This is bad if those packets hold onto foreign resources.
+ * For example, consider a packet that holds onto resources belonging to the
+ * guest for which it is queued (e.g., packet received on vif1.0, destined for
+ * vif1.1 which is not activated in the guest): in this situation the guest
+ * will never be destroyed, unless vif1.1 is taken down (which flushes the
+ * 'tx_queue').
+ * 
+ * Only set this parameter to non-zero value if you know what you are doing!
+ */
+static unsigned long netbk_queue_length = 0;
+module_param_named(queue_length, netbk_queue_length, ulong, 0);
+
 static void __netif_up(netif_t *netif)
 {
        enable_irq(netif->irq);
@@ -44,6 +62,7 @@ static void __netif_down(netif_t *netif)
 {
        disable_irq(netif->irq);
        netif_deschedule_work(netif);
+       del_timer_sync(&netif->credit_timeout);
 }
 
 static int net_open(struct net_device *dev)
@@ -134,6 +153,7 @@ netif_t *netif_alloc(domid_t domid, unsi
        netif->credit_bytes = netif->remaining_credit = ~0UL;
        netif->credit_usec  = 0UL;
        init_timer(&netif->credit_timeout);
+       netif->credit_timeout.expires = jiffies;
 
        dev->hard_start_xmit = netif_be_start_xmit;
        dev->get_stats       = netif_be_get_stats;
@@ -144,11 +164,10 @@ netif_t *netif_alloc(domid_t domid, unsi
 
        SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
 
-       /*
-        * Reduce default TX queuelen so that each guest interface only
-        * allows it to eat around 6.4MB of host memory.
-        */
-       dev->tx_queue_len = 100;
+       dev->tx_queue_len = netbk_queue_length;
+       if (dev->tx_queue_len != 0)
+               printk(KERN_WARNING "netbk: WARNING: device '%s' has non-zero "
+                      "queue length (%lu)!\n", dev->name, dev->tx_queue_len);
 
        for (i = 0; i < ETH_ALEN; i++)
                if (be_mac[i] != 0)
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/loopback.c
--- a/drivers/xen/netback/loopback.c    Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/loopback.c    Mon Oct 16 09:31:03 2006 -0400
@@ -53,8 +53,10 @@
 #include <linux/skbuff.h>
 #include <linux/ethtool.h>
 #include <net/dst.h>
-
-static int nloopbacks = 8;
+#include <net/xfrm.h>          /* secpath_reset() */
+#include <asm/hypervisor.h>    /* is_initial_xendomain() */
+
+static int nloopbacks = -1;
 module_param(nloopbacks, int, 0);
 MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
 
@@ -77,9 +79,59 @@ static int loopback_close(struct net_dev
        return 0;
 }
 
+#ifdef CONFIG_X86
+static int is_foreign(unsigned long pfn)
+{
+       /* NB. Play it safe for auto-translation mode. */
+       return (xen_feature(XENFEAT_auto_translated_physmap) ||
+               (phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT));
+}
+#else
+/* How to detect a foreign mapping? Play it safe. */
+#define is_foreign(pfn)        (1)
+#endif
+
+static int skb_remove_foreign_references(struct sk_buff *skb)
+{
+       struct page *page;
+       unsigned long pfn;
+       int i, off;
+       char *vaddr;
+
+       BUG_ON(skb_shinfo(skb)->frag_list);
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page);
+               if (!is_foreign(pfn))
+                       continue;
+               
+               page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+               if (unlikely(!page))
+                       return 0;
+
+               vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+               off = skb_shinfo(skb)->frags[i].page_offset;
+               memcpy(page_address(page) + off,
+                      vaddr + off,
+                      skb_shinfo(skb)->frags[i].size);
+               kunmap_skb_frag(vaddr);
+
+               put_page(skb_shinfo(skb)->frags[i].page);
+               skb_shinfo(skb)->frags[i].page = page;
+       }
+
+       return 1;
+}
+
 static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct net_private *np = netdev_priv(dev);
+
+       if (!skb_remove_foreign_references(skb)) {
+               np->stats.tx_dropped++;
+               dev_kfree_skb(skb);
+               return 0;
+       }
 
        dst_release(skb->dst);
        skb->dst = NULL;
@@ -110,6 +162,11 @@ static int loopback_start_xmit(struct sk
        skb->protocol = eth_type_trans(skb, dev);
        skb->dev      = dev;
        dev->last_rx  = jiffies;
+
+       /* Flush netfilter context: rx'ed skbuffs not expected to have any. */
+       nf_reset(skb);
+       secpath_reset(skb);
+
        netif_rx(skb);
 
        return 0;
@@ -239,6 +296,9 @@ static int __init loopback_init(void)
 {
        int i, err = 0;
 
+       if (nloopbacks == -1)
+               nloopbacks = is_initial_xendomain() ? 4 : 0;
+
        for (i = 0; i < nloopbacks; i++)
                if ((err = make_loopback(i)) != 0)
                        break;
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/netback.c
--- a/drivers/xen/netback/netback.c     Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/netback.c     Mon Oct 16 09:31:03 2006 -0400
@@ -70,35 +70,43 @@ static struct timer_list net_timer;
 
 static struct sk_buff_head rx_queue;
 
-static unsigned long mmap_vstart;
+static struct page **mmap_pages;
+static inline unsigned long idx_to_kaddr(unsigned int idx)
+{
+       return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx]));
+}
+
 #ifdef CONFIG_PPC_XEN
 
-
-static ulong mmap_vaddrs[MAX_PENDING_REQS];
-#define MMAP_VADDR(_req) (mmap_vaddrs[(_req)])
-
-static inline void PPC_map_vaddrs(int idx, gnttab_map_grant_ref_t *mop)
-{
-       struct page *page;
-       ulong virt = mop->host_addr;
-
-       page = virt_to_page(virt);
-       get_page(page);
-
-#if 0
-       SetPageForeign(page, netif_page_release);
-#else
-       (void)netif_page_release;
+struct address_space xen_foreign_dummy_mapping;
+
+static inline void update_mmap_pages(
+       unsigned int idx, gnttab_map_grant_ref_t *mop)
+{
+       struct page *p;
+#ifdef PPC_NOT_YET
+       struct page *cp = mmap_pages[idx];
+       extern int arch_is_foreign_page(struct page *page);
+
+       if (arch_is_foreign_page(cp)) {
+               printk(KERN_EMERG "%s foreign: %p, 0x%x\n",
+                      __func__, page_address(cp), page_count(cp));
+       } else {
+               printk(KERN_EMERG "%s local:  %p, 0x%x\n",
+                      __func__, page_address(cp), page_count(cp));
+       //              __free_page(mmap_pages[idx]);
+       }
+       
 #endif
-       mmap_vaddrs[idx] = virt;
-}
-#define _mmap_vaddrs(i,op) do { mmap_vaddrs[(i)] = mop->host_addr;
-#else
-#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
-#endif
-
-#ifndef CONFIG_PPC_XEN
-static void *rx_mmap_area;
+       p = pfn_to_page(mop->dev_bus_addr >> PAGE_SHIFT);
+
+       DPRINTK(KERN_EMERG "%s insert[%d]:  0x%lx, 0x%x\n",
+              __func__, idx, __va(mop->dev_bus_addr), page_count(p));
+
+       SetPageForeign(p, netif_page_release);
+       p->index = idx;
+       mmap_pages[idx] = p;
+}
 #endif
 
 #define PKT_PROT_LEN 64
@@ -243,7 +251,7 @@ static struct sk_buff *netbk_copy_skb(st
                copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
                zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
 
-               page = alloc_page(GFP_ATOMIC | zero);
+               page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
                if (unlikely(!page))
                        goto err_free;
 
@@ -818,10 +826,27 @@ void netif_deschedule_work(netif_t *neti
 }
 
 
+static void tx_add_credit(netif_t *netif)
+{
+       unsigned long max_burst;
+
+       /*
+        * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
+        * Otherwise the interface can seize up due to insufficient credit.
+        */
+       max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
+       max_burst = min(max_burst, 131072UL);
+       max_burst = max(max_burst, netif->credit_bytes);
+
+       netif->remaining_credit = min(netif->remaining_credit +
+                                     netif->credit_bytes,
+                                     max_burst);
+}
+
 static void tx_credit_callback(unsigned long data)
 {
        netif_t *netif = (netif_t *)data;
-       netif->remaining_credit = netif->credit_bytes;
+       tx_add_credit(netif);
        netif_schedule_work(netif);
 }
 
@@ -845,7 +870,7 @@ inline static void net_tx_action_dealloc
        gop = tx_unmap_ops;
        while (dc != dp) {
                pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
-               gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx),
+               gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
                                    GNTMAP_host_map,
                                    grant_tx_handle[pending_idx]);
                gop++;
@@ -933,7 +958,7 @@ static gnttab_map_grant_ref_t *netbk_get
                txp = RING_GET_REQUEST(&netif->tx, cons++);
                pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
 
-               gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
+               gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
                                  GNTMAP_host_map | GNTMAP_readonly,
                                  txp->gref, netif->domid);
 
@@ -966,10 +991,10 @@ static int netbk_tx_check_mop(struct sk_
                netif_put(netif);
        } else {
 #ifdef CONFIG_PPC_XEN
-               PPC_map_vaddrs(pending_idx, mop);
+               update_mmap_pages(pending_idx, mop);
 #else
                set_phys_to_machine(
-                       __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
+                       __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
                        FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
 #endif
                grant_tx_handle[pending_idx] = mop->handle;
@@ -987,10 +1012,10 @@ static int netbk_tx_check_mop(struct sk_
                newerr = (++mop)->status;
                if (likely(!newerr)) {
 #ifdef CONFIG_PPC_XEN
-                       PPC_map_vaddrs(pending_idx, mop);
+                       update_mmap_pages(pending_idx, mop);
 #else
                        set_phys_to_machine(
-                               __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
+                               __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
                                FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
 #endif
                        grant_tx_handle[pending_idx] = mop->handle;
@@ -1039,7 +1064,7 @@ static void netbk_fill_frags(struct sk_b
 
                pending_idx = (unsigned long)frag->page;
                txp = &pending_tx_info[pending_idx].req;
-               frag->page = virt_to_page(MMAP_VADDR(pending_idx));
+               frag->page = virt_to_page(idx_to_kaddr(pending_idx));
                frag->size = txp->size;
                frag->page_offset = txp->offset;
 
@@ -1135,6 +1160,7 @@ static void net_tx_action(unsigned long 
                i = netif->tx.req_cons;
                rmb(); /* Ensure that we see the request before we copy it. */
                memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+
                /* Credit-based scheduling. */
                if (txreq.size > netif->remaining_credit) {
                        unsigned long now = jiffies;
@@ -1143,25 +1169,27 @@ static void net_tx_action(unsigned long 
                                msecs_to_jiffies(netif->credit_usec / 1000);
 
                        /* Timer could already be pending in rare cases. */
-                       if (timer_pending(&netif->credit_timeout))
-                               break;
+                       if (timer_pending(&netif->credit_timeout)) {
+                               netif_put(netif);
+                               continue;
+                       }
 
                        /* Passed the point where we can replenish credit? */
                        if (time_after_eq(now, next_credit)) {
                                netif->credit_timeout.expires = now;
-                               netif->remaining_credit = netif->credit_bytes;
+                               tx_add_credit(netif);
                        }
 
                        /* Still too big to send right now? Set a callback. */
                        if (txreq.size > netif->remaining_credit) {
-                               netif->remaining_credit = 0;
                                netif->credit_timeout.data     =
                                        (unsigned long)netif;
                                netif->credit_timeout.function =
                                        tx_credit_callback;
                                __mod_timer(&netif->credit_timeout,
                                            next_credit);
-                               break;
+                               netif_put(netif);
+                               continue;
                        }
                }
                netif->remaining_credit -= txreq.size;
@@ -1235,7 +1263,7 @@ static void net_tx_action(unsigned long 
                        }
                }
 
-               gnttab_set_map_op(mop, MMAP_VADDR(pending_idx),
+               gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
                                  GNTMAP_host_map | GNTMAP_readonly,
                                  txreq.gref, netif->domid);
                mop++;
@@ -1294,8 +1322,8 @@ static void net_tx_action(unsigned long 
                }
 
                data_len = skb->len;
-               memcpy(skb->data, 
-                      (void *)(MMAP_VADDR(pending_idx)|txp->offset),
+               memcpy(skb->data,
+                      (void *)(idx_to_kaddr(pending_idx)|txp->offset),
                       data_len);
                if (data_len < txp->size) {
                        /* Append the packet payload as a fragment. */
@@ -1349,22 +1377,14 @@ static void netif_idx_release(u16 pendin
 
 static void netif_page_release(struct page *page)
 {
-       u16 pending_idx = page - virt_to_page(mmap_vstart);
-
-#ifdef CONFIG_PPC_XEN  /* Ready for next use. */
-       BUG();
-#endif 
-       init_page_count(page);
-       netif_idx_release(pending_idx);
-}
-
-#ifndef CONFIG_PPC_XEN
-static void netif_rx_page_release(struct page *page)
-{
        /* Ready for next use. */
        init_page_count(page);
-}
+#ifdef CONFIG_PPC_XEN
+       /* we need the count to be 2 so the unmap occurs */
+       get_page(page);
 #endif
+       netif_idx_release(page->index);
+}
 
 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 {
@@ -1474,10 +1494,6 @@ static int __init netback_init(void)
        if (!is_running_on_xen())
                return -ENODEV;
 
-#ifdef CONFIG_PPC_XEN
-       if (!(xen_start_info->flags & SIF_INITDOMAIN))
-               return -ENODEV;
-#endif
        /* We can increase reservation by this much in net_rx_action(). */
        balloon_update_driver_allowance(NET_RX_RING_SIZE);
 
@@ -1488,35 +1504,16 @@ static int __init netback_init(void)
        net_timer.data = 0;
        net_timer.function = net_alarm;
 
-#ifdef CONFIG_PPC_XEN
-       (void)page;
-       mmap_vstart = foreign_alloc_empty_page_range(MAX_PENDING_REQS);
-#else
-       page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
-       if (page == NULL)
+       mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+       if (mmap_pages == NULL) {
+               printk("%s: out of memory\n", __FUNCTION__);
                return -ENOMEM;
-
-       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
+       }
        for (i = 0; i < MAX_PENDING_REQS; i++) {
-               page = virt_to_page(MMAP_VADDR(i));
-               init_page_count(page);
+               page = mmap_pages[i];
                SetPageForeign(page, netif_page_release);
-       }
-#endif
-
-#ifndef CONFIG_PPC_XEN
-       page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE);
-       BUG_ON(page == NULL);
-       rx_mmap_area = pfn_to_kaddr(page_to_pfn(page));
-
-       for (i = 0; i < NET_RX_RING_SIZE; i++) {
-               page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE));
-               init_page_count(page);
-               SetPageForeign(page, netif_rx_page_release);
-       }
-#endif
-
+               page->index = i;
+       }
        pending_cons = 0;
        pending_prod = MAX_PENDING_REQS;
        for (i = 0; i < MAX_PENDING_REQS; i++)
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/xenbus.c
--- a/drivers/xen/netback/xenbus.c      Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/xenbus.c      Mon Oct 16 09:31:03 2006 -0400
@@ -262,10 +262,8 @@ static void frontend_changed(struct xenb
 
        case XenbusStateClosed:
                xenbus_switch_state(dev, XenbusStateClosed);
-#ifdef JX
                if (xenbus_dev_is_online(dev))
                        break;
-#endif
                /* fall through if not online */
        case XenbusStateUnknown:
                if (be->netif != NULL)
@@ -368,6 +366,10 @@ static void connect(struct backend_info 
        be->netif->remaining_credit = be->netif->credit_bytes;
 
        xenbus_switch_state(dev, XenbusStateConnected);
+
+       /* May not get a kick from the frontend, so start the tx_queue now. */
+       if (!netbk_can_queue(be->netif->dev))
+               netif_start_queue(be->netif->dev);
 }
 
 
@@ -405,14 +407,16 @@ static int connect_rings(struct backend_
        }
        be->netif->copying_receiver = !!rx_copy;
 
-       if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d",
-                        &val) < 0)
-               val = 0;
-       if (val)
-               be->netif->can_queue = 1;
-       else
-               /* Must be non-zero for pfifo_fast to work. */
-               be->netif->dev->tx_queue_len = 1;
+       if (be->netif->dev->tx_queue_len != 0) {
+               if (xenbus_scanf(XBT_NIL, dev->otherend,
+                                "feature-rx-notify", "%d", &val) < 0)
+                       val = 0;
+               if (val)
+                       be->netif->can_queue = 1;
+               else
+                       /* Must be non-zero for pfifo_fast to work. */
+                       be->netif->dev->tx_queue_len = 1;
+       }
 
        if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
                val = 0;
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netfront/netfront.c
--- a/drivers/xen/netfront/netfront.c   Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netfront/netfront.c   Mon Oct 16 09:31:03 2006 -0400
@@ -47,6 +47,7 @@
 #include <linux/in.h>
 #include <linux/if_ether.h>
 #include <linux/io.h>
+#include <linux/moduleparam.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/arp.h>
@@ -63,20 +64,64 @@
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
 
+/*
+ * Mutually-exclusive module options to select receive data path:
+ *  rx_copy : Packets are copied by network backend into local memory
+ *  rx_flip : Page containing packet data is transferred to our ownership
+ * For fully-virtualised guests there is no option - copying must be used.
+ * For paravirtualised guests, flipping is the default.
+ */
+#ifdef CONFIG_XEN
+static int MODPARM_rx_copy = 0;
+module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
+MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
+static int MODPARM_rx_flip = 0;
+module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
+MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
+#else
+static const int MODPARM_rx_copy = 1;
+static const int MODPARM_rx_flip = 0;
+#endif
+
 #define RX_COPY_THRESHOLD 256
 
 /* If we don't have GSO, fake things up so that we never try to use it. */
-#ifndef NETIF_F_GSO
-#define netif_needs_gso(dev, skb)      0
-#define dev_disable_gso_features(dev)  ((void)0)
-#else
+#if defined(NETIF_F_GSO)
 #define HAVE_GSO                       1
+#define HAVE_TSO                       1 /* TSO is a subset of GSO */
 static inline void dev_disable_gso_features(struct net_device *dev)
 {
        /* Turn off all GSO bits except ROBUST. */
        dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
        dev->features |= NETIF_F_GSO_ROBUST;
 }
+#elif defined(NETIF_F_TSO)
+#define HAVE_TSO                       1
+#define gso_size tso_size
+#define gso_segs tso_segs
+static inline void dev_disable_gso_features(struct net_device *dev)
+{
+       /* Turn off all TSO bits. */
+       dev->features &= ~NETIF_F_TSO;
+}
+static inline int skb_is_gso(const struct sk_buff *skb)
+{
+        return skb_shinfo(skb)->tso_size;
+}
+static inline int skb_gso_ok(struct sk_buff *skb, int features)
+{
+        return (features & NETIF_F_TSO);
+}
+
+static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
+{
+        return skb_is_gso(skb) &&
+               (!skb_gso_ok(skb, dev->features) ||
+                unlikely(skb->ip_summed != CHECKSUM_HW));
+}
+#else
+#define netif_needs_gso(dev, skb)      0
+#define dev_disable_gso_features(dev)  ((void)0)
 #endif
 
 #define GRANT_INVALID_REF      0
@@ -120,7 +165,7 @@ struct netfront_info {
        grant_ref_t gref_tx_head;
        grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
        grant_ref_t gref_rx_head;
-       grant_ref_t grant_rx_ref[NET_TX_RING_SIZE];
+       grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
 
        struct xenbus_device *xbdev;
        int tx_ring_ref;
@@ -229,8 +274,7 @@ static int __devinit netfront_probe(stru
        int err;
        struct net_device *netdev;
        struct netfront_info *info;
-       unsigned int handle;
-       unsigned feature_rx_copy;
+       unsigned int handle, feature_rx_copy, feature_rx_flip, use_copy;
 
        err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
        if (err != 1) {
@@ -238,24 +282,28 @@ static int __devinit netfront_probe(stru
                return err;
        }
 
-#ifdef CONFIG_PPC_XEN
        err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u",
                           &feature_rx_copy);
-       BUG_ON(err != 1);
-       if (err != 1) {
-               xenbus_dev_fatal(dev, err, "reading feature-rx-copy");
-               return err;
-       }
-       BUG_ON(!feature_rx_copy);
-       if (!feature_rx_copy) {
-               xenbus_dev_fatal(dev, 0, "need a copy-capable backend");
-               return -EINVAL;
-       }
-#else
-       feature_rx_copy = 0;
+       if (err != 1)
+               feature_rx_copy = 0;
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-flip", "%u",
+                          &feature_rx_flip);
+       if (err != 1)
+               feature_rx_flip = 1;
+
+       /*
+        * Copy packets on receive path if:
+        *  (a) This was requested by user, and the backend supports it; or
+        *  (b) Flipping was requested, but this is unsupported by the backend.
+        */
+       use_copy = (MODPARM_rx_copy && feature_rx_copy) ||
+               (MODPARM_rx_flip && !feature_rx_flip);
+
+#ifdef CONFIG_PPC_XEN
+       if (!use_copy)
+               panic("NetFront _must_ use rx copy feature on PowerPC\n");
 #endif
-
-       netdev = create_netdev(handle, feature_rx_copy, dev);
+       netdev = create_netdev(handle, use_copy, dev);
        if (IS_ERR(netdev)) {
                err = PTR_ERR(netdev);
                xenbus_dev_fatal(dev, err, "creating netdev");
@@ -272,6 +320,9 @@ static int __devinit netfront_probe(stru
        err = open_netdev(info);
        if (err)
                goto fail_open;
+
+       IPRINTK("Created netdev %s with %sing receive path.\n",
+               netdev->name, info->copying_receiver ? "copy" : "flipp");
 
        return 0;
 
@@ -387,7 +438,7 @@ again:
                goto abort_transaction;
        }
 
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
        err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
        if (err) {
                message = "writing feature-gso-tcpv4";
@@ -744,7 +795,7 @@ no_skb:
                } else {
                        gnttab_grant_foreign_access_ref(ref,
                                                        np->xbdev->otherend_id,
-                                                       pfn,
+                                                       pfn_to_mfn(pfn),
                                                        0);
                }
 
@@ -919,7 +970,7 @@ static int network_start_xmit(struct sk_
                tx->flags |= NETTXF_data_validated;
 #endif
 
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
        if (skb_shinfo(skb)->gso_size) {
                struct netif_extra_info *gso = (struct netif_extra_info *)
                        RING_GET_REQUEST(&np->tx, ++i);
@@ -1207,12 +1258,14 @@ static int xennet_set_skb_gso(struct sk_
                return -EINVAL;
        }
 
+#ifdef HAVE_TSO
+       skb_shinfo(skb)->gso_size = gso->u.gso.size;
 #ifdef HAVE_GSO
-       skb_shinfo(skb)->gso_size = gso->u.gso.size;
        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 
        /* Header must be checked, and gso_segs computed. */
        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+#endif
        skb_shinfo(skb)->gso_segs = 0;
 
        return 0;
@@ -1563,7 +1616,7 @@ static int xennet_set_sg(struct net_devi
 
 static int xennet_set_tso(struct net_device *dev, u32 data)
 {
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
        if (data) {
                struct netfront_info *np = netdev_priv(dev);
                int val;
@@ -1634,7 +1687,8 @@ static void network_connect(struct net_d
                } else {
                        gnttab_grant_foreign_access_ref(
                                ref, np->xbdev->otherend_id,
-                               page_to_pfn(skb_shinfo(skb)->frags->page),
+                               pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
+                                                      frags->page)),
                                0);
                }
                req->gref = ref;
@@ -1941,7 +1995,7 @@ static void netfront_closing(struct xenb
        DPRINTK("%s\n", dev->nodename);
 
        close_netdev(info);
-       xenbus_switch_state(dev, XenbusStateClosed);
+       xenbus_frontend_closed(dev);
 }
 
 
@@ -2055,6 +2109,23 @@ static int __init netif_init(void)
        if (!is_running_on_xen())
                return -ENODEV;
 
+#ifdef CONFIG_PPC_XEN
+       if (MODPARM_rx_flip || !MODPARM_rx_copy) {
+               WPRINTK("PowerPC forcing rx_copy.\n");
+               MODPARM_rx_flip = 0;
+               MODPARM_rx_copy = 1;
+       }
+#endif
+#ifdef CONFIG_XEN
+       if (MODPARM_rx_flip && MODPARM_rx_copy) {
+               WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
+               return -EINVAL;
+       }
+
+       if (!MODPARM_rx_flip && !MODPARM_rx_copy)
+               MODPARM_rx_flip = 1; /* Default is to flip. */
+#endif
+
        if (is_initial_xendomain())
                return 0;
 
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_backend_client.c
--- a/drivers/xen/xenbus/xenbus_backend_client.c        Fri Oct 13 12:36:39 
2006 -0400
+++ b/drivers/xen/xenbus/xenbus_backend_client.c        Mon Oct 16 09:31:03 
2006 -0400
@@ -50,7 +50,6 @@ struct vm_struct *xenbus_map_ring_valloc
        
        lock_vm_area(area);
        BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
-       update_vm_area(area, &op);
        unlock_vm_area(area);
 
        if (op.status != GNTST_okay) {
@@ -133,4 +132,16 @@ int xenbus_unmap_ring(struct xenbus_devi
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
 
+int xenbus_dev_is_online(struct xenbus_device *dev)
+{
+       int rc, val;
+
+       rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
+       if (rc != 1)
+               val = 0; /* no online node present */
+
+       return val;
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
+
 MODULE_LICENSE("Dual BSD/GPL");
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_client.c
--- a/drivers/xen/xenbus/xenbus_client.c        Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_client.c        Mon Oct 16 09:31:03 2006 -0400
@@ -38,6 +38,20 @@
 #define DPRINTK(fmt, args...) \
     pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, 
##args)
 
+char *xenbus_strstate(enum xenbus_state state)
+{
+       static char *name[] = {
+               [ XenbusStateUnknown      ] = "Unknown",
+               [ XenbusStateInitialising ] = "Initialising",
+               [ XenbusStateInitWait     ] = "InitWait",
+               [ XenbusStateInitialised  ] = "Initialised",
+               [ XenbusStateConnected    ] = "Connected",
+               [ XenbusStateClosing      ] = "Closing",
+               [ XenbusStateClosed       ] = "Closed",
+       };
+       return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+
 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
                      struct xenbus_watch *watch,
                      void (*callback)(struct xenbus_watch *,
@@ -121,6 +135,13 @@ int xenbus_switch_state(struct xenbus_de
 }
 EXPORT_SYMBOL_GPL(xenbus_switch_state);
 
+int xenbus_frontend_closed(struct xenbus_device *dev)
+{
+       xenbus_switch_state(dev, XenbusStateClosed);
+       complete(&dev->down);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
 
 /**
  * Return the path to the error node for the given device, or NULL on failure.
@@ -271,7 +292,7 @@ enum xenbus_state xenbus_read_driver_sta
        enum xenbus_state result;
        int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
        if (err)
-               result = XenbusStateClosed;
+               result = XenbusStateUnknown;
 
        return result;
 }
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_comms.c
--- a/drivers/xen/xenbus/xenbus_comms.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_comms.c Mon Oct 16 09:31:03 2006 -0400
@@ -47,11 +47,6 @@ static DECLARE_WORK(probe_work, xenbus_p
 
 DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
 
-static inline struct xenstore_domain_interface *xenstore_domain_interface(void)
-{
-       return mfn_to_virt(xen_start_info->store_mfn);
-}
-
 static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
 {
        if (unlikely(xenstored_ready == 0)) {
@@ -90,7 +85,7 @@ static const void *get_input_chunk(XENST
 
 int xb_write(const void *data, unsigned len)
 {
-       struct xenstore_domain_interface *intf = xenstore_domain_interface();
+       struct xenstore_domain_interface *intf = xen_store_interface;
        XENSTORE_RING_IDX cons, prod;
        int rc;
 
@@ -129,7 +124,7 @@ int xb_write(const void *data, unsigned 
                intf->req_prod += avail;
 
                /* This implies mb() before other side sees interrupt. */
-               notify_remote_via_evtchn(xen_start_info->store_evtchn);
+               notify_remote_via_evtchn(xen_store_evtchn);
        }
 
        return 0;
@@ -137,7 +132,7 @@ int xb_write(const void *data, unsigned 
 
 int xb_read(void *data, unsigned len)
 {
-       struct xenstore_domain_interface *intf = xenstore_domain_interface();
+       struct xenstore_domain_interface *intf = xen_store_interface;
        XENSTORE_RING_IDX cons, prod;
        int rc;
 
@@ -180,7 +175,7 @@ int xb_read(void *data, unsigned len)
                pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
 
                /* Implies mb(): they will see new header. */
-               notify_remote_via_evtchn(xen_start_info->store_evtchn);
+               notify_remote_via_evtchn(xen_store_evtchn);
        }
 
        return 0;
@@ -195,7 +190,7 @@ int xb_init_comms(void)
                unbind_from_irqhandler(xenbus_irq, &xb_waitq);
 
        err = bind_evtchn_to_irqhandler(
-               xen_start_info->store_evtchn, wake_waiting,
+               xen_store_evtchn, wake_waiting,
                0, "xenbus", &xb_waitq);
        if (err <= 0) {
                printk(KERN_ERR "XENBUS request irq failed %i\n", err);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_comms.h
--- a/drivers/xen/xenbus/xenbus_comms.h Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_comms.h Mon Oct 16 09:31:03 2006 -0400
@@ -39,5 +39,7 @@ int xb_read(void *data, unsigned len);
 int xb_read(void *data, unsigned len);
 int xs_input_avail(void);
 extern wait_queue_head_t xb_waitq;
+extern struct xenstore_domain_interface *xen_store_interface;
+extern int xen_store_evtchn;
 
 #endif /* _XENBUS_COMMS_H */
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_dev.c
--- a/drivers/xen/xenbus/xenbus_dev.c   Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_dev.c   Mon Oct 16 09:31:03 2006 -0400
@@ -58,6 +58,9 @@ struct xenbus_dev_data {
        /* In-progress transaction. */
        struct list_head transactions;
 
+       /* Active watches. */
+       struct list_head watches;
+
        /* Partial request. */
        unsigned int len;
        union {
@@ -70,6 +73,8 @@ struct xenbus_dev_data {
        char read_buffer[PAGE_SIZE];
        unsigned int read_cons, read_prod;
        wait_queue_head_t read_waitq;
+
+       struct mutex reply_mutex;
 };
 
 static struct proc_dir_entry *xenbus_dev_intf;
@@ -100,13 +105,59 @@ static void queue_reply(struct xenbus_de
 {
        int i;
 
+       mutex_lock(&u->reply_mutex);
+
        for (i = 0; i < len; i++, u->read_prod++)
                u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i];
 
        BUG_ON((u->read_prod - u->read_cons) > sizeof(u->read_buffer));
 
+       mutex_unlock(&u->reply_mutex);
+
        wake_up(&u->read_waitq);
 }
+
+struct watch_adapter
+{
+       struct list_head list;
+       struct xenbus_watch watch;
+       struct xenbus_dev_data *dev_data;
+       char *token;
+};
+
+static void free_watch_adapter (struct watch_adapter *watch)
+{
+       kfree(watch->watch.node);
+       kfree(watch->token);
+       kfree(watch);
+}
+
+static void watch_fired(struct xenbus_watch *watch,
+                       const char **vec,
+                       unsigned int len)
+{
+       struct watch_adapter *adap =
+            container_of(watch, struct watch_adapter, watch);
+       struct xsd_sockmsg hdr;
+       const char *path, *token;
+       int path_len, tok_len, body_len;
+
+       path = vec[XS_WATCH_PATH];
+       token = adap->token;
+
+       path_len = strlen(path) + 1;
+       tok_len = strlen(token) + 1;
+       body_len = path_len + tok_len;
+
+       hdr.type = XS_WATCH_EVENT;
+       hdr.len = body_len;
+       
+       queue_reply(adap->dev_data, (char *)&hdr, sizeof(hdr));
+       queue_reply(adap->dev_data, (char *)path, path_len);
+       queue_reply(adap->dev_data, (char *)token, tok_len);
+}
+
+static LIST_HEAD(watch_list);
 
 static ssize_t xenbus_dev_write(struct file *filp,
                                const char __user *ubuf,
@@ -116,6 +167,9 @@ static ssize_t xenbus_dev_write(struct f
        struct xenbus_dev_transaction *trans = NULL;
        uint32_t msg_type;
        void *reply;
+       char *path, *token;
+       struct watch_adapter *watch, *tmp_watch;
+       int err;
 
        if ((len + u->len) > sizeof(u->u.buffer))
                return -EINVAL;
@@ -169,6 +223,56 @@ static ssize_t xenbus_dev_write(struct f
                kfree(reply);
                break;
 
+       case XS_WATCH:
+       case XS_UNWATCH:
+               path = u->u.buffer + sizeof(u->u.msg);
+               token = memchr(path, 0, u->u.msg.len);
+               if (token == NULL)
+                       return -EILSEQ;
+               token++;
+
+               if (msg_type == XS_WATCH) {
+                       static const char * XS_WATCH_RESP = "OK";
+                       struct xsd_sockmsg hdr;
+
+                       watch = kmalloc(sizeof(*watch), GFP_KERNEL);
+                       watch->watch.node = kmalloc(strlen(path)+1,
+                                                    GFP_KERNEL);
+                       strcpy((char *)watch->watch.node, path);
+                       watch->watch.callback = watch_fired;
+                       watch->token = kmalloc(strlen(token)+1, GFP_KERNEL);
+                       strcpy(watch->token, token);
+                       watch->dev_data = u;
+
+                       err = register_xenbus_watch(&watch->watch);
+                       if (err) {
+                               free_watch_adapter(watch);
+                               return err;
+                       }
+                       
+                       list_add(&watch->list, &u->watches);
+
+                       hdr.type = XS_WATCH;
+                       hdr.len = strlen(XS_WATCH_RESP) + 1;
+                       queue_reply(u, (char *)&hdr, sizeof(hdr));
+                       queue_reply(u, (char *)XS_WATCH_RESP, hdr.len);
+               } else {
+                       list_for_each_entry_safe(watch, tmp_watch,
+                                                 &u->watches, list) {
+                               if (!strcmp(watch->token, token) &&
+                                   !strcmp(watch->watch.node, path))
+                                       break;
+                               {
+                                       unregister_xenbus_watch(&watch->watch);
+                                       list_del(&watch->list);
+                                       free_watch_adapter(watch);
+                                       break;
+                               }
+                       }
+               }
+
+               break;
+
        default:
                return -EINVAL;
        }
@@ -181,7 +285,7 @@ static int xenbus_dev_open(struct inode 
 {
        struct xenbus_dev_data *u;
 
-       if (xen_start_info->store_evtchn == 0)
+       if (xen_store_evtchn == 0)
                return -ENOENT;
 
        nonseekable_open(inode, filp);
@@ -191,7 +295,10 @@ static int xenbus_dev_open(struct inode 
                return -ENOMEM;
 
        INIT_LIST_HEAD(&u->transactions);
+       INIT_LIST_HEAD(&u->watches);
        init_waitqueue_head(&u->read_waitq);
+
+       mutex_init(&u->reply_mutex);
 
        filp->private_data = u;
 
@@ -202,11 +309,18 @@ static int xenbus_dev_release(struct ino
 {
        struct xenbus_dev_data *u = filp->private_data;
        struct xenbus_dev_transaction *trans, *tmp;
+       struct watch_adapter *watch, *tmp_watch;
 
        list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
                xenbus_transaction_end(trans->handle, 1);
                list_del(&trans->list);
                kfree(trans);
+       }
+
+       list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
+               unregister_xenbus_watch(&watch->watch);
+               list_del(&watch->list);
+               free_watch_adapter(watch);
        }
 
        kfree(u);
@@ -232,7 +346,7 @@ static struct file_operations xenbus_dev
        .poll = xenbus_dev_poll,
 };
 
-static int __init
+int __init
 xenbus_dev_init(void)
 {
        xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400);
@@ -241,5 +355,3 @@ xenbus_dev_init(void)
 
        return 0;
 }
-
-__initcall(xenbus_dev_init);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_probe.c
--- a/drivers/xen/xenbus/xenbus_probe.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_probe.c Mon Oct 16 09:31:03 2006 -0400
@@ -45,18 +45,35 @@
 
 #include <asm/io.h>
 #include <asm/page.h>
+#include <asm/maddr.h>
 #include <asm/pgtable.h>
 #include <asm/hypervisor.h>
 #include <xen/xenbus.h>
 #include <xen/xen_proc.h>
 #include <xen/evtchn.h>
 #include <xen/features.h>
+#include <xen/hvm.h>
 
 #include "xenbus_comms.h"
 
+int xen_store_evtchn;
+struct xenstore_domain_interface *xen_store_interface;
+static unsigned long xen_store_mfn;
+
 extern struct mutex xenwatch_mutex;
 
 static BLOCKING_NOTIFIER_HEAD(xenstore_notifier_list);
+
+static void wait_for_devices(struct xenbus_driver *xendrv);
+
+static int xenbus_probe_frontend(const char *type, const char *name);
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+                                int num_envp, char *buffer, int buffer_size);
+static int xenbus_probe_backend(const char *type, const char *domid);
+
+static int xenbus_dev_probe(struct device *_dev);
+static int xenbus_dev_remove(struct device *_dev);
+static void xenbus_dev_shutdown(struct device *_dev);
 
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
@@ -141,7 +158,9 @@ static int read_otherend_details(struct 
        }
        if (strlen(xendev->otherend) == 0 ||
            !xenbus_exists(XBT_NIL, xendev->otherend, "")) {
-               xenbus_dev_fatal(xendev, -ENOENT, "missing other end from %s",
+               xenbus_dev_fatal(xendev, -ENOENT,
+                                "unable to read other end from %s.  "
+                                "missing or inaccessible.",
                                 xendev->nodename);
                free_otherend_details(xendev);
                return -ENOENT;
@@ -164,15 +183,17 @@ static int read_frontend_details(struct 
 
 
 /* Bus type for frontend drivers. */
-static int xenbus_probe_frontend(const char *type, const char *name);
 static struct xen_bus_type xenbus_frontend = {
        .root = "device",
        .levels = 2,            /* device/type/<id> */
        .get_bus_id = frontend_bus_id,
        .probe = xenbus_probe_frontend,
        .bus = {
-               .name  = "xen",
-               .match = xenbus_match,
+               .name     = "xen",
+               .match    = xenbus_match,
+               .probe    = xenbus_dev_probe,
+               .remove   = xenbus_dev_remove,
+               .shutdown = xenbus_dev_shutdown,
        },
        .dev = {
                .bus_id = "xen",
@@ -217,18 +238,18 @@ static int backend_bus_id(char bus_id[BU
        return 0;
 }
 
-static int xenbus_uevent_backend(struct device *dev, char **envp,
-                                int num_envp, char *buffer, int buffer_size);
-static int xenbus_probe_backend(const char *type, const char *domid);
 static struct xen_bus_type xenbus_backend = {
        .root = "backend",
        .levels = 3,            /* backend/type/<frontend>/<id> */
        .get_bus_id = backend_bus_id,
        .probe = xenbus_probe_backend,
        .bus = {
-               .name  = "xen-backend",
-               .match = xenbus_match,
-               .uevent = xenbus_uevent_backend,
+               .name     = "xen-backend",
+               .match    = xenbus_match,
+               .probe    = xenbus_dev_probe,
+               .remove   = xenbus_dev_remove,
+//             .shutdown = xenbus_dev_shutdown,
+               .uevent   = xenbus_uevent_backend,
        },
        .dev = {
                .bus_id = "xen-backend",
@@ -298,8 +319,23 @@ static void otherend_changed(struct xenb
 
        state = xenbus_read_driver_state(dev->otherend);
 
-       DPRINTK("state is %d, %s, %s",
-               state, dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+       DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
+               dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+
+       /*
+        * Ignore xenbus transitions during shutdown. This prevents us doing
+        * work that can fail e.g., when the rootfs is gone.
+        */
+       if (system_state > SYSTEM_RUNNING) {
+               struct xen_bus_type *bus = bus;
+               bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
+               /* If we're frontend, drive the state machine to Closed. */
+               /* This should cause the backend to release our resources. */
+               if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
+                       xenbus_frontend_closed(dev);
+               return;
+       }
+
        if (drv->otherend_changed)
                drv->otherend_changed(dev, state);
 }
@@ -330,7 +366,7 @@ static int xenbus_dev_probe(struct devic
        const struct xenbus_device_id *id;
        int err;
 
-       DPRINTK("");
+       DPRINTK("%s", dev->nodename);
 
        if (!drv->probe) {
                err = -ENODEV;
@@ -375,7 +411,7 @@ static int xenbus_dev_remove(struct devi
        struct xenbus_device *dev = to_xenbus_device(_dev);
        struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
 
-       DPRINTK("");
+       DPRINTK("%s", dev->nodename);
 
        free_otherend_watch(dev);
        free_otherend_details(dev);
@@ -385,6 +421,27 @@ static int xenbus_dev_remove(struct devi
 
        xenbus_switch_state(dev, XenbusStateClosed);
        return 0;
+}
+
+static void xenbus_dev_shutdown(struct device *_dev)
+{
+       struct xenbus_device *dev = to_xenbus_device(_dev);
+       unsigned long timeout = 5*HZ;
+
+       DPRINTK("%s", dev->nodename);
+
+       get_device(&dev->dev);
+       if (dev->state != XenbusStateConnected) {
+               printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__,
+                      dev->nodename, xenbus_strstate(dev->state));
+               goto out;
+       }
+       xenbus_switch_state(dev, XenbusStateClosing);
+       timeout = wait_for_completion_timeout(&dev->down, timeout);
+       if (!timeout)
+               printk("%s: %s timeout closing device\n", __FUNCTION__, 
dev->nodename);
+ out:
+       put_device(&dev->dev);
 }
 
 static int xenbus_register_driver_common(struct xenbus_driver *drv,
@@ -395,8 +452,6 @@ static int xenbus_register_driver_common
        drv->driver.name = drv->name;
        drv->driver.bus = &bus->bus;
        drv->driver.owner = drv->owner;
-       drv->driver.probe = xenbus_dev_probe;
-       drv->driver.remove = xenbus_dev_remove;
 
        mutex_lock(&xenwatch_mutex);
        ret = driver_register(&drv->driver);
@@ -406,9 +461,18 @@ static int xenbus_register_driver_common
 
 int xenbus_register_frontend(struct xenbus_driver *drv)
 {
+       int ret;
+
        drv->read_otherend_details = read_backend_details;
 
-       return xenbus_register_driver_common(drv, &xenbus_frontend);
+       ret = xenbus_register_driver_common(drv, &xenbus_frontend);
+       if (ret)
+               return ret;
+
+       /* If this driver is loaded as a module wait for devices to attach. */
+       wait_for_devices(drv);
+
+       return 0;
 }
 EXPORT_SYMBOL_GPL(xenbus_register_frontend);
 
@@ -541,6 +605,7 @@ static int xenbus_probe_node(struct xen_
        tmpstring += strlen(tmpstring) + 1;
        strcpy(tmpstring, type);
        xendev->devicetype = tmpstring;
+       init_completion(&xendev->down);
 
        xendev->dev.parent = &bus->dev;
        xendev->dev.bus = &bus->bus;
@@ -806,7 +871,7 @@ static int resume_dev(struct device *dev
                        printk(KERN_WARNING
                               "xenbus: resume %s failed: %i\n", 
                               dev->bus_id, err);
-                       return err; 
+                       return err;
                }
        }
 
@@ -818,7 +883,7 @@ static int resume_dev(struct device *dev
                return err;
        }
 
-       return 0; 
+       return 0;
 }
 
 void xenbus_suspend(void)
@@ -865,29 +930,6 @@ EXPORT_SYMBOL_GPL(unregister_xenstore_no
 EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
 
 
-static int all_devices_ready_(struct device *dev, void *data)
-{
-       struct xenbus_device *xendev = to_xenbus_device(dev);
-       int *result = data;
-
-       if (xendev->state != XenbusStateConnected) {
-               *result = 0;
-               return 1;
-       }
-
-       return 0;
-}
-
-
-static int all_devices_ready(void)
-{
-       int ready = 1;
-       bus_for_each_dev(&xenbus_frontend.bus, NULL, &ready,
-                        all_devices_ready_);
-       return ready;
-}
-
-
 void xenbus_probe(void *unused)
 {
        BUG_ON((xenstored_ready <= 0));
@@ -917,8 +959,7 @@ static int xsd_kva_mmap(struct file *fil
        if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
                return -EINVAL;
 
-       if (remap_pfn_range(vma, vma->vm_start,
-                           mfn_to_pfn(xen_start_info->store_mfn),
+       if (remap_pfn_range(vma, vma->vm_start, mfn_to_pfn(xen_store_mfn),
                            size, vma->vm_page_prot))
                return -EAGAIN;
 
@@ -930,7 +971,7 @@ static int xsd_kva_read(char *page, char
 {
        int len;
 
-       len  = sprintf(page, "0x%p", mfn_to_virt(xen_start_info->store_mfn));
+       len  = sprintf(page, "0x%p", xen_store_interface);
        *eof = 1;
        return len;
 }
@@ -940,16 +981,15 @@ static int xsd_port_read(char *page, cha
 {
        int len;
 
-       len  = sprintf(page, "%d", xen_start_info->store_evtchn);
+       len  = sprintf(page, "%d", xen_store_evtchn);
        *eof = 1;
        return len;
 }
 #endif
 
-
 static int __init xenbus_probe_init(void)
 {
-       int err = 0, dom0;
+       int err = 0;
        unsigned long page = 0;
 
        DPRINTK("");
@@ -964,9 +1004,7 @@ static int __init xenbus_probe_init(void
        /*
         * Domain0 doesn't have a store_evtchn or store_mfn yet.
         */
-       dom0 = (xen_start_info->store_evtchn == 0);
-
-       if (dom0) {
+       if (is_initial_xendomain()) {
                struct evtchn_alloc_unbound alloc_unbound;
 
                /* Allocate page. */
@@ -974,7 +1012,7 @@ static int __init xenbus_probe_init(void
                if (!page)
                        return -ENOMEM;
 
-               xen_start_info->store_mfn =
+               xen_store_mfn = xen_start_info->store_mfn =
                        pfn_to_mfn(virt_to_phys((void *)page) >>
                                   PAGE_SHIFT);
 
@@ -987,7 +1025,8 @@ static int __init xenbus_probe_init(void
                if (err == -ENOSYS)
                        goto err;
                BUG_ON(err);
-               xen_start_info->store_evtchn = alloc_unbound.port;
+               xen_store_evtchn = xen_start_info->store_evtchn =
+                       alloc_unbound.port;
 
 #ifdef CONFIG_PROC_FS
                /* And finally publish the above info in /proc/xen */
@@ -1003,8 +1042,23 @@ static int __init xenbus_probe_init(void
                if (xsd_port_intf)
                        xsd_port_intf->read_proc = xsd_port_read;
 #endif
-       } else
+               xen_store_interface = mfn_to_virt(xen_store_mfn);
+       } else {
                xenstored_ready = 1;
+#ifdef CONFIG_XEN
+               xen_store_evtchn = xen_start_info->store_evtchn;
+               xen_store_mfn = xen_start_info->store_mfn;
+               xen_store_interface = mfn_to_virt(xen_store_mfn);
+#else
+               xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
+               xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
+               xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
+                                             PAGE_SIZE);
+#endif
+       }
+
+
+       xenbus_dev_init();
 
        /* Initialize the interface to xenstore. */
        err = xs_init();
@@ -1018,7 +1072,7 @@ static int __init xenbus_probe_init(void
        device_register(&xenbus_frontend.dev);
        device_register(&xenbus_backend.dev);
 
-       if (!dom0)
+       if (!is_initial_xendomain())
                xenbus_probe(NULL);
 
        return 0;
@@ -1038,6 +1092,58 @@ static int __init xenbus_probe_init(void
 
 postcore_initcall(xenbus_probe_init);
 
+MODULE_LICENSE("Dual BSD/GPL");
+
+
+static int is_disconnected_device(struct device *dev, void *data)
+{
+       struct xenbus_device *xendev = to_xenbus_device(dev);
+       struct device_driver *drv = data;
+
+       /*
+        * A device with no driver will never connect. We care only about
+        * devices which should currently be in the process of connecting.
+        */
+       if (!dev->driver)
+               return 0;
+
+       /* Is this search limited to a particular driver? */
+       if (drv && (dev->driver != drv))
+               return 0;
+
+       return (xendev->state != XenbusStateConnected);
+}
+
+static int exists_disconnected_device(struct device_driver *drv)
+{
+       return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+                               is_disconnected_device);
+}
+
+static int print_device_status(struct device *dev, void *data)
+{
+       struct xenbus_device *xendev = to_xenbus_device(dev);
+       struct device_driver *drv = data;
+
+       /* Is this operation limited to a particular driver? */
+       if (drv && (dev->driver != drv))
+               return 0;
+
+       if (!dev->driver) {
+               /* Information only: is this too noisy? */
+               printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
+                      xendev->nodename);
+       } else if (xendev->state != XenbusStateConnected) {
+               printk(KERN_WARNING "XENBUS: Timeout connecting "
+                      "to device: %s (state %d)\n",
+                      xendev->nodename, xendev->state);
+       }
+
+       return 0;
+}
+
+/* We only wait for device setup after most initcalls have run. */
+static int ready_to_wait_for_devices;
 
 /*
  * On a 10 second timeout, wait for all devices currently configured.  We need
@@ -1053,21 +1159,31 @@ postcore_initcall(xenbus_probe_init);
  * boot slightly, but of course needs tools or manual intervention to set up
  * those flags correctly.
  */
-static int __init wait_for_devices(void)
+static void wait_for_devices(struct xenbus_driver *xendrv)
 {
        unsigned long timeout = jiffies + 10*HZ;
-
-       if (!is_running_on_xen())
-               return -ENODEV;
-
-       while (time_before(jiffies, timeout)) {
-               if (all_devices_ready())
-                       return 0;
+       struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
+
+       if (!ready_to_wait_for_devices || !is_running_on_xen())
+               return;
+
+       while (exists_disconnected_device(drv)) {
+               if (time_after(jiffies, timeout))
+                       break;
                schedule_timeout_interruptible(HZ/10);
        }
 
-       printk(KERN_WARNING "XENBUS: Timeout connecting to devices!\n");
-       return 0;
-}
-
-late_initcall(wait_for_devices);
+       bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+                        print_device_status);
+}
+
+#ifndef MODULE
+static int __init boot_wait_for_devices(void)
+{
+       ready_to_wait_for_devices = 1;
+       wait_for_devices(NULL);
+       return 0;
+}
+
+late_initcall(boot_wait_for_devices);
+#endif
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_xs.c
--- a/drivers/xen/xenbus/xenbus_xs.c    Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_xs.c    Mon Oct 16 09:31:03 2006 -0400
@@ -662,7 +662,17 @@ EXPORT_SYMBOL_GPL(unregister_xenbus_watc
 
 void xs_suspend(void)
 {
+       struct xenbus_watch *watch;
+       char token[sizeof(watch) * 2 + 1];
+
        down_write(&xs_state.suspend_mutex);
+
+       /* No need for watches_lock: the suspend_mutex is sufficient. */
+       list_for_each_entry(watch, &watches, list) {
+               sprintf(token, "%lX", (long)watch);
+               xs_unwatch(watch->node, token);
+       }
+
        mutex_lock(&xs_state.request_mutex);
 }
 
diff -r f0be2cc05103 -r 933b1d114a89 include/asm-powerpc/page.h
--- a/include/asm-powerpc/page.h        Fri Oct 13 12:36:39 2006 -0400
+++ b/include/asm-powerpc/page.h        Mon Oct 16 09:31:03 2006 -0400
@@ -195,6 +195,7 @@ extern const char *arch_vma_name(struct 
 extern const char *arch_vma_name(struct vm_area_struct *vma);
 
 #include <asm-generic/memory_model.h>
+#include <xen/foreign_page.h>
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff -r f0be2cc05103 -r 933b1d114a89 include/asm-powerpc/xen/asm/hypervisor.h
--- a/include/asm-powerpc/xen/asm/hypervisor.h  Fri Oct 13 12:36:39 2006 -0400
+++ b/include/asm-powerpc/xen/asm/hypervisor.h  Mon Oct 16 09:31:03 2006 -0400
@@ -239,7 +239,16 @@ typedef unsigned long maddr_t;
 typedef unsigned long maddr_t;
 
 #ifdef CONFIG_XEN_SCRUB_PAGES
-#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
+
+static inline void scrub_pages(void *p, unsigned n)
+{
+       unsigned i;
+
+       for (i = 0; i < n; i++) {
+               clear_page(p);
+               p += PAGE_SIZE;
+       }
+}
 #else
 #define scrub_pages(_p,_n) ((void)0)
 #endif
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/balloon.h
--- a/include/xen/balloon.h     Fri Oct 13 12:36:39 2006 -0400
+++ b/include/xen/balloon.h     Mon Oct 16 09:31:03 2006 -0400
@@ -41,6 +41,8 @@ extern void
 extern void
 balloon_update_driver_allowance(
        long delta);
+struct page **alloc_empty_pages_and_pagevec(int nr_pages);
+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages);
 
 /* Allocate an empty low-memory page range. */
 extern struct page *
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/foreign_page.h
--- a/include/xen/foreign_page.h        Fri Oct 13 12:36:39 2006 -0400
+++ b/include/xen/foreign_page.h        Mon Oct 16 09:31:03 2006 -0400
@@ -10,6 +10,7 @@
 #ifndef __ASM_XEN_FOREIGN_PAGE_H__
 #define __ASM_XEN_FOREIGN_PAGE_H__
 
+#ifndef CONFIG_PPC_XEN
 #define PG_foreign             PG_arch_1
 
 #define PageForeign(page)      test_bit(PG_foreign, &(page)->flags)
@@ -27,4 +28,34 @@
 #define PageForeignDestructor(page)    \
        ( (void (*) (struct page *)) (page)->mapping )
 
+#else
+
+extern struct address_space xen_foreign_dummy_mapping;
+
+#define PageForeign(page)      \
+       ((page)->mapping == &xen_foreign_dummy_mapping)
+
+#define SetPageForeign(page, dtor) do {                                \
+       set_page_private((page), (unsigned long)(dtor));        \
+       (page)->mapping = &xen_foreign_dummy_mapping;   \
+       smp_rmb();                                              \
+} while (0)
+
+#define ClearPageForeign(page) do {    \
+       (page)->mapping = NULL;         \
+       smp_rmb();                      \
+       set_page_private((page), 0);    \
+} while (0)
+
+#define PageForeignDestructor(page)    \
+       ( (void (*) (struct page *)) page_private(page) )
+
+#define HAVE_ARCH_FREE_PAGE
+#define arch_free_page(_page,_order)                   \
+({      int foreign = PageForeign(_page);               \
+       if (foreign)                                    \
+               (PageForeignDestructor(_page))(_page);  \
+       foreign;                                        \
+})
+#endif
 #endif /* __ASM_XEN_FOREIGN_PAGE_H__ */
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/xenbus.h
--- a/include/xen/xenbus.h      Fri Oct 13 12:36:39 2006 -0400
+++ b/include/xen/xenbus.h      Mon Oct 16 09:31:03 2006 -0400
@@ -37,6 +37,7 @@
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <linux/mutex.h>
+#include <linux/completion.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/xenbus.h>
@@ -74,6 +75,7 @@ struct xenbus_device {
        struct xenbus_watch otherend_watch;
        struct device dev;
        enum xenbus_state state;
+       struct completion down;
 };
 
 static inline struct xenbus_device *to_xenbus_device(struct device *dev)
@@ -274,7 +276,7 @@ int xenbus_free_evtchn(struct xenbus_dev
 
 /**
  * Return the state of the driver rooted at the given store path, or
- * XenbusStateClosed if no state can be read.
+ * XenbusStateUnknown if no state can be read.
  */
 enum xenbus_state xenbus_read_driver_state(const char *path);
 
@@ -295,5 +297,10 @@ void xenbus_dev_fatal(struct xenbus_devi
 void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
                      ...);
 
+int __init xenbus_dev_init(void);
+
+char *xenbus_strstate(enum xenbus_state state);
+int xenbus_dev_is_online(struct xenbus_device *dev);
+int xenbus_frontend_closed(struct xenbus_device *dev);
 
 #endif /* _XEN_XENBUS_H */
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/hvm.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/include/xen/hvm.h Mon Oct 16 09:31:03 2006 -0400
@@ -0,0 +1,13 @@
+/* Simple wrappers around HVM functions */
+#ifndef XEN_HVM_H__
+#define XEN_HVM_H__
+
+#include <xen/interface/hvm/params.h>
+#include <asm/hypercall.h>
+
+static inline unsigned long hvm_get_parameter(int idx)
+{
+       return 0;
+}
+
+#endif /* XEN_HVM_H__ */

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [XenPPC] [linux-ppc-2.6] [LINUX][XEN][POWERPC] update with the lates Xen VIO, Xen patchbot-linux-ppc-2 . 6 <=