# HG changeset patch
# User Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
# Node ID 933b1d114a89abe409b50b948c39d3b28dd3e02f
# Parent f0be2cc05103e19788416719e2b9ec38b38bd26e
[LINUX][XEN][POWERPC] update with the lates Xen VIO
Signed-off-by: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
---
arch/powerpc/platforms/xen/balloon.c | 46 ++++-
arch/powerpc/platforms/xen/gnttab.c | 20 +-
arch/powerpc/xmon/xmon.c | 3
drivers/xen/blkback/blkback.c | 77 ++++----
drivers/xen/blkback/common.h | 9 -
drivers/xen/blkback/interface.c | 28 +--
drivers/xen/blkback/xenbus.c | 126 ++++++++++----
drivers/xen/blkfront/blkfront.c | 13 +
drivers/xen/netback/interface.c | 29 ++-
drivers/xen/netback/loopback.c | 64 +++++++
drivers/xen/netback/netback.c | 169 +++++++++----------
drivers/xen/netback/xenbus.c | 24 +-
drivers/xen/netfront/netfront.c | 129 +++++++++++---
drivers/xen/xenbus/xenbus_backend_client.c | 13 +
drivers/xen/xenbus/xenbus_client.c | 23 ++
drivers/xen/xenbus/xenbus_comms.c | 15 -
drivers/xen/xenbus/xenbus_comms.h | 2
drivers/xen/xenbus/xenbus_dev.c | 120 +++++++++++++
drivers/xen/xenbus/xenbus_probe.c | 252 +++++++++++++++++++++--------
drivers/xen/xenbus/xenbus_xs.c | 10 +
include/asm-powerpc/page.h | 1
include/asm-powerpc/xen/asm/hypervisor.h | 11 +
include/xen/balloon.h | 2
include/xen/foreign_page.h | 31 +++
include/xen/hvm.h | 13 +
include/xen/xenbus.h | 9 -
26 files changed, 908 insertions(+), 331 deletions(-)
diff -r f0be2cc05103 -r 933b1d114a89 arch/powerpc/platforms/xen/balloon.c
--- a/arch/powerpc/platforms/xen/balloon.c Fri Oct 13 12:36:39 2006 -0400
+++ b/arch/powerpc/platforms/xen/balloon.c Mon Oct 16 09:31:03 2006 -0400
@@ -1,20 +1,47 @@
#include <linux/module.h>
#include <linux/mm.h>
+#include <asm/hypervisor.h>
/*
* FIXME: Port balloon driver, if ever
*/
-struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
+struct page **alloc_empty_pages_and_pagevec(int nr_pages)
{
- unsigned long vstart;
- unsigned int order = get_order(nr_pages * PAGE_SIZE);
+ struct page *page, **pagevec;
+ void *vaddr;
+ int i;
- vstart = __get_free_pages(GFP_KERNEL, order);
- if (vstart == 0)
- return NULL;
+ pagevec = kmalloc(sizeof(*pagevec) * nr_pages, GFP_KERNEL);
+ if (pagevec == NULL)
+ return NULL;
- return virt_to_page(vstart);
+ for (i = 0; i < nr_pages; i++) {
+ page = alloc_page(GFP_KERNEL);
+ pagevec[i] = page;
+ vaddr = page_address(page);
+ scrub_pages(vaddr, 1);
+ }
+
+ return pagevec;
+}
+
+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
+{
+ int arch_is_foreign_page(struct page *page);
+ struct page *page;
+ int i;
+
+ if (pagevec == NULL)
+ return;
+
+ for (i = 0; i < nr_pages; i++) {
+ page = pagevec[i];
+ if (!arch_is_foreign_page(page))
+ __free_page(page);
+ }
+
+ kfree(pagevec);
}
void balloon_dealloc_empty_page_range(
@@ -32,6 +59,7 @@ void balloon_release_driver_page(struct
BUG();
}
-EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range);
-EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range);
+EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
+EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
+EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
EXPORT_SYMBOL_GPL(balloon_release_driver_page);
diff -r f0be2cc05103 -r 933b1d114a89 arch/powerpc/platforms/xen/gnttab.c
--- a/arch/powerpc/platforms/xen/gnttab.c Fri Oct 13 12:36:39 2006 -0400
+++ b/arch/powerpc/platforms/xen/gnttab.c Mon Oct 16 09:31:03 2006 -0400
@@ -39,8 +39,6 @@ static long map_to_linear(ulong paddr)
mode = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
vaddr = (ulong)__va(paddr);
- DBG("%s: 0x%lx: 0x%x\n",
- __func__, paddr, page_count(virt_to_page(vaddr)));
{
unsigned long vpn, hash, hpteg;
unsigned long vsid = get_kernel_vsid(vaddr);
@@ -162,12 +160,9 @@ static void gnttab_pre_unmap_grant_ref(
continue;
}
- DBG("%s: 0x%lx: 0x%x\n",
+ DBG("%s: 0x%lx: page count: 0x%x\n",
__func__, ea, page_count(virt_to_page(ea)));
plpar_pte_remove(0, slot, 0, &dummy1, &dummy2);
-
- DBG("%s: remove_pages(0x%lx, 0x%lx)\n",
- __func__, unmap[i].host_addr, unmap[i].dev_bus_addr);
}
}
@@ -186,6 +181,9 @@ static void gnttab_post_map_grant_ref(
/* ??? store the slot somewhere ??? */
map[i].host_addr = (ulong)__va(pa);
page = virt_to_page(map[i].host_addr);
+
+ DBG("%s: 0x%lx: 0x%x\n",
+ __func__, pa, page_count(page));
if (page_count(page) == 1) {
#ifdef DEBUG
@@ -258,11 +256,6 @@ int HYPERVISOR_grant_table_op(unsigned i
return ret;
}
EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
-
-ulong foreign_alloc_empty_page_range(unsigned long nr_pages)
-{
- return (ulong)__va(foreign_map_base);
-}
static ulong setup_grant_maps(void)
{
@@ -350,3 +343,8 @@ void *arch_gnttab_map(unsigned long *fra
return shared;
}
+
+int arch_is_foreign_page(struct page *page)
+{
+ return ((page_to_pfn(page) << PAGE_SHIFT) >= foreign_map_base);
+}
diff -r f0be2cc05103 -r 933b1d114a89 arch/powerpc/xmon/xmon.c
--- a/arch/powerpc/xmon/xmon.c Fri Oct 13 12:36:39 2006 -0400
+++ b/arch/powerpc/xmon/xmon.c Mon Oct 16 09:31:03 2006 -0400
@@ -753,6 +753,9 @@ cmds(struct pt_regs *excp)
cmd = inchar();
}
switch (cmd) {
+ case 'A':
+ asm volatile(".long 0x200;nop");
+ break;
case 'm':
cmd = inchar();
switch (cmd) {
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/blkback.c
--- a/drivers/xen/blkback/blkback.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/blkback.c Mon Oct 16 09:31:03 2006 -0400
@@ -55,8 +55,6 @@ static int blkif_reqs = 64;
static int blkif_reqs = 64;
module_param_named(reqs, blkif_reqs, int, 0);
MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
-
-static int mmap_pages;
/* Run-time switchable: /sys/module/blkback/parameters/ */
static unsigned int log_stats = 0;
@@ -87,8 +85,7 @@ static DECLARE_WAIT_QUEUE_HEAD(pending_f
#define BLKBACK_INVALID_HANDLE (~0)
-static unsigned long mmap_vstart;
-static unsigned long *pending_vaddrs;
+static struct page **pending_pages;
static grant_handle_t *pending_grant_handles;
static inline int vaddr_pagenr(pending_req_t *req, int seg)
@@ -98,8 +95,23 @@ static inline int vaddr_pagenr(pending_r
static inline unsigned long vaddr(pending_req_t *req, int seg)
{
- return pending_vaddrs[vaddr_pagenr(req, seg)];
-}
+ unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
+ return (unsigned long)pfn_to_kaddr(pfn);
+}
+
+#ifdef CONFIG_PPC_XEN
+static inline void update_pending_pages(
+ unsigned int idx, gnttab_map_grant_ref_t *mop)
+{
+#ifdef PPC_NOT_YET
+ extern int arch_is_foreign_page(struct page *page);
+
+ if (!arch_is_foreign_page(pending_pages[idx]))
+ __free_page(pending_pages[idx]);
+#endif
+ pending_pages[idx] = pfn_to_page(mop->dev_bus_addr >> PAGE_SHIFT);
+}
+#endif
#define pending_handle(_req, _seg) \
(pending_grant_handles[vaddr_pagenr(_req, _seg)])
@@ -399,8 +411,7 @@ static void dispatch_rw_block_io(blkif_t
pending_handle(pending_req, i) = map[i].handle;
#ifdef CONFIG_PPC_XEN
- pending_vaddrs[vaddr_pagenr(pending_req, i)] =
- (unsigned long)gnttab_map_vaddr(map[i]);
+ update_pending_pages(vaddr_pagenr(pending_req, i), &map[i]);
#else
set_phys_to_machine(__pa(vaddr(
pending_req, i)) >> PAGE_SHIFT,
@@ -511,57 +522,43 @@ static void make_response(blkif_t *blkif
static int __init blkif_init(void)
{
- struct page *page;
- int i;
+ int i, mmap_pages;
if (!is_running_on_xen())
return -ENODEV;
-
- mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
-
-#ifdef CONFIG_PPC_XEN
- (void)page;
- mmap_vstart = foreign_alloc_empty_page_range(mmap_pages);
-#else
- page = balloon_alloc_empty_page_range(mmap_pages);
- if (page == NULL)
- return -ENOMEM;
- mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-#endif
+
+ mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
blkif_reqs, GFP_KERNEL);
pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
mmap_pages, GFP_KERNEL);
- pending_vaddrs = kmalloc(sizeof(pending_vaddrs[0]) *
- mmap_pages, GFP_KERNEL);
- if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
- kfree(pending_reqs);
- kfree(pending_grant_handles);
- kfree(pending_vaddrs);
- printk("%s: out of memory\n", __FUNCTION__);
- return -ENOMEM;
- }
+ pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
+
+ if (!pending_reqs || !pending_grant_handles || !pending_pages)
+ goto out_of_memory;
+
+ for (i = 0; i < mmap_pages; i++)
+ pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
blkif_interface_init();
-
- printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
- __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
- BUG_ON(mmap_vstart == 0);
- for (i = 0; i < mmap_pages; i++) {
- pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
- pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
- }
memset(pending_reqs, 0, sizeof(pending_reqs));
INIT_LIST_HEAD(&pending_free);
for (i = 0; i < blkif_reqs; i++)
list_add_tail(&pending_reqs[i].free_list, &pending_free);
-
+
blkif_xenbus_init();
return 0;
+
+ out_of_memory:
+ kfree(pending_reqs);
+ kfree(pending_grant_handles);
+ free_empty_pages_and_pagevec(pending_pages, mmap_pages);
+ printk("%s: out of memory\n", __FUNCTION__);
+ return -ENOMEM;
}
module_init(blkif_init);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/common.h
--- a/drivers/xen/blkback/common.h Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/common.h Mon Oct 16 09:31:03 2006 -0400
@@ -55,9 +55,9 @@ struct vbd {
unsigned char type; /* VDISK_xxx */
u32 pdevice; /* phys device that this vbd maps to */
struct block_device *bdev;
-};
+};
-struct backend_info;
+struct backend_info;
typedef struct blkif_st {
/* Unique identifier for this interface. */
@@ -72,7 +72,7 @@ typedef struct blkif_st {
/* The VBD attached to this interface. */
struct vbd vbd;
/* Back pointer to the backend_info. */
- struct backend_info *be;
+ struct backend_info *be;
/* Private fields. */
spinlock_t blk_ring_lock;
atomic_t refcnt;
@@ -95,6 +95,7 @@ typedef struct blkif_st {
} blkif_t;
blkif_t *blkif_alloc(domid_t domid);
+void blkif_disconnect(blkif_t *blkif);
void blkif_free(blkif_t *blkif);
int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
@@ -121,7 +122,7 @@ struct phys_req {
blkif_sector_t sector_number;
};
-int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
void blkif_interface_init(void);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/interface.c
--- a/drivers/xen/blkback/interface.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/interface.c Mon Oct 16 09:31:03 2006 -0400
@@ -32,6 +32,7 @@
#include "common.h"
#include <xen/evtchn.h>
+#include <linux/kthread.h>
static kmem_cache_t *blkif_cachep;
@@ -75,12 +76,6 @@ static int map_frontend_page(blkif_t *bl
blkif->shmem_ref = shared_page;
blkif->shmem_handle = op.handle;
-
-#ifdef CONFIG_XEN_IA64_DOM0_NON_VP
- /* on some arch's, map_grant_ref behaves like mmap, in that the
- * passed address is a hint and a different address may be returned */
- blkif->blk_ring_area->addr = gnttab_map_vaddr(op);
-#endif
return 0;
}
@@ -140,22 +135,33 @@ int blkif_map(blkif_t *blkif, unsigned l
return 0;
}
-void blkif_free(blkif_t *blkif)
+void blkif_disconnect(blkif_t *blkif)
{
+ if (blkif->xenblkd) {
+ kthread_stop(blkif->xenblkd);
+ blkif->xenblkd = NULL;
+ }
+
atomic_dec(&blkif->refcnt);
wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
+ atomic_inc(&blkif->refcnt);
- /* Already disconnected? */
- if (blkif->irq)
+ if (blkif->irq) {
unbind_from_irqhandler(blkif->irq, blkif);
-
- vbd_free(&blkif->vbd);
+ blkif->irq = 0;
+ }
if (blkif->blk_ring.sring) {
unmap_frontend_page(blkif);
free_vm_area(blkif->blk_ring_area);
+ blkif->blk_ring.sring = NULL;
}
+}
+void blkif_free(blkif_t *blkif)
+{
+ if (!atomic_dec_and_test(&blkif->refcnt))
+ BUG();
kmem_cache_free(blkif_cachep, blkif);
}
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkback/xenbus.c
--- a/drivers/xen/blkback/xenbus.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkback/xenbus.c Mon Oct 16 09:31:03 2006 -0400
@@ -42,7 +42,6 @@ static int connect_ring(struct backend_i
static int connect_ring(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **,
unsigned int);
-
static void update_blkif_status(blkif_t *blkif)
{
@@ -73,26 +72,71 @@ static void update_blkif_status(blkif_t
}
-static ssize_t show_physical_device(struct device *_dev,
- struct device_attribute *attr, char *buf)
-{
- struct xenbus_device *dev = to_xenbus_device(_dev);
- struct backend_info *be = dev->dev.driver_data;
- return sprintf(buf, "%x:%x\n", be->major, be->minor);
-}
-DEVICE_ATTR(physical_device, S_IRUSR | S_IRGRP | S_IROTH,
- show_physical_device, NULL);
-
-
-static ssize_t show_mode(struct device *_dev, struct device_attribute *attr,
- char *buf)
-{
- struct xenbus_device *dev = to_xenbus_device(_dev);
- struct backend_info *be = dev->dev.driver_data;
- return sprintf(buf, "%s\n", be->mode);
-}
-DEVICE_ATTR(mode, S_IRUSR | S_IRGRP | S_IROTH, show_mode, NULL);
-
+/****************************************************************
+ * sysfs interface for VBD I/O requests
+ */
+
+#define VBD_SHOW(name, format, args...)
\
+ static ssize_t show_##name(struct device *_dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+ { \
+ struct xenbus_device *dev = to_xenbus_device(_dev); \
+ struct backend_info *be = dev->dev.driver_data; \
+ \
+ return sprintf(buf, format, ##args); \
+ } \
+ DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
+VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
+VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
+
+static struct attribute *vbdstat_attrs[] = {
+ &dev_attr_oo_req.attr,
+ &dev_attr_rd_req.attr,
+ &dev_attr_wr_req.attr,
+ NULL
+};
+
+static struct attribute_group vbdstat_group = {
+ .name = "statistics",
+ .attrs = vbdstat_attrs,
+};
+
+VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
+VBD_SHOW(mode, "%s\n", be->mode);
+
+int xenvbd_sysfs_addif(struct xenbus_device *dev)
+{
+ int error;
+
+ error = device_create_file(&dev->dev, &dev_attr_physical_device);
+ if (error)
+ goto fail1;
+
+ error = device_create_file(&dev->dev, &dev_attr_mode);
+ if (error)
+ goto fail2;
+
+ error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
+ if (error)
+ goto fail3;
+
+ return 0;
+
+fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
+fail2: device_remove_file(&dev->dev, &dev_attr_mode);
+fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
+ return error;
+}
+
+void xenvbd_sysfs_delif(struct xenbus_device *dev)
+{
+ sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
+ device_remove_file(&dev->dev, &dev_attr_mode);
+ device_remove_file(&dev->dev, &dev_attr_physical_device);
+}
static int blkback_remove(struct xenbus_device *dev)
{
@@ -105,15 +149,16 @@ static int blkback_remove(struct xenbus_
kfree(be->backend_watch.node);
be->backend_watch.node = NULL;
}
+
if (be->blkif) {
- if (be->blkif->xenblkd)
- kthread_stop(be->blkif->xenblkd);
+ blkif_disconnect(be->blkif);
+ vbd_free(&be->blkif->vbd);
blkif_free(be->blkif);
be->blkif = NULL;
}
- device_remove_file(&dev->dev, &dev_attr_physical_device);
- device_remove_file(&dev->dev, &dev_attr_mode);
+ if (be->major || be->minor)
+ xenvbd_sysfs_delif(dev);
kfree(be);
dev->dev.driver_data = NULL;
@@ -149,7 +194,7 @@ static int blkback_probe(struct xenbus_d
}
/* setup back pointer */
- be->blkif->be = be;
+ be->blkif->be = be;
err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
&be->backend_watch, backend_changed);
@@ -228,17 +273,21 @@ static void backend_changed(struct xenbu
err = vbd_create(be->blkif, handle, major, minor,
(NULL == strchr(be->mode, 'w')));
if (err) {
- be->major = 0;
- be->minor = 0;
+ be->major = be->minor = 0;
xenbus_dev_fatal(dev, err, "creating vbd structure");
return;
}
- device_create_file(&dev->dev, &dev_attr_physical_device);
- device_create_file(&dev->dev, &dev_attr_mode);
+ err = xenvbd_sysfs_addif(dev);
+ if (err) {
+ vbd_free(&be->blkif->vbd);
+ be->major = be->minor = 0;
+ xenbus_dev_fatal(dev, err, "creating sysfs entries");
+ return;
+ }
/* We're potentially connected now */
- update_blkif_status(be->blkif);
+ update_blkif_status(be->blkif);
}
}
@@ -252,10 +301,15 @@ static void frontend_changed(struct xenb
struct backend_info *be = dev->dev.driver_data;
int err;
- DPRINTK("");
+ DPRINTK("%s", xenbus_strstate(frontend_state));
switch (frontend_state) {
case XenbusStateInitialising:
+ if (dev->state == XenbusStateClosed) {
+ printk("%s: %s: prepare for reconnect\n",
+ __FUNCTION__, dev->nodename);
+ xenbus_switch_state(dev, XenbusStateInitWait);
+ }
break;
case XenbusStateInitialised:
@@ -273,15 +327,19 @@ static void frontend_changed(struct xenb
break;
case XenbusStateClosing:
+ blkif_disconnect(be->blkif);
xenbus_switch_state(dev, XenbusStateClosing);
break;
case XenbusStateClosed:
+ xenbus_switch_state(dev, XenbusStateClosed);
+ if (xenbus_dev_is_online(dev))
+ break;
+ /* fall through if not online */
+ case XenbusStateUnknown:
device_unregister(&dev->dev);
break;
- case XenbusStateUnknown:
- case XenbusStateInitWait:
default:
xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
frontend_state);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/blkfront/blkfront.c
--- a/drivers/xen/blkfront/blkfront.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/blkfront/blkfront.c Mon Oct 16 09:31:03 2006 -0400
@@ -46,6 +46,7 @@
#include <xen/interface/grant_table.h>
#include <xen/gnttab.h>
#include <asm/hypervisor.h>
+#include <asm/maddr.h>
#define BLKIF_STATE_DISCONNECTED 0
#define BLKIF_STATE_CONNECTED 1
@@ -255,10 +256,10 @@ static void backend_changed(struct xenbu
DPRINTK("blkfront:backend_changed.\n");
switch (backend_state) {
- case XenbusStateUnknown:
case XenbusStateInitialising:
case XenbusStateInitWait:
case XenbusStateInitialised:
+ case XenbusStateUnknown:
case XenbusStateClosed:
break;
@@ -354,12 +355,14 @@ static void blkfront_closing(struct xenb
blk_stop_queue(info->rq);
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ /* Flush gnttab callback work. Must be done with no locks held. */
flush_scheduled_work();
- spin_unlock_irqrestore(&blkif_io_lock, flags);
xlvbd_del(info);
- xenbus_switch_state(dev, XenbusStateClosed);
+ xenbus_frontend_closed(dev);
}
@@ -713,8 +716,10 @@ static void blkif_free(struct blkfront_i
blk_stop_queue(info->rq);
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
+ spin_unlock_irq(&blkif_io_lock);
+
+ /* Flush gnttab callback work. Must be done with no locks held. */
flush_scheduled_work();
- spin_unlock_irq(&blkif_io_lock);
/* Free resources associated with old device channel. */
if (info->ring_ref != GRANT_INVALID_REF) {
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/interface.c
--- a/drivers/xen/netback/interface.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/interface.c Mon Oct 16 09:31:03 2006 -0400
@@ -34,6 +34,24 @@
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
+/*
+ * Module parameter 'queue_length':
+ *
+ * Enables queuing in the network stack when a client has run out of receive
+ * descriptors. Although this feature can improve receive bandwidth by avoiding
+ * packet loss, it can also result in packets sitting in the 'tx_queue' for
+ * unbounded time. This is bad if those packets hold onto foreign resources.
+ * For example, consider a packet that holds onto resources belonging to the
+ * guest for which it is queued (e.g., packet received on vif1.0, destined for
+ * vif1.1 which is not activated in the guest): in this situation the guest
+ * will never be destroyed, unless vif1.1 is taken down (which flushes the
+ * 'tx_queue').
+ *
+ * Only set this parameter to non-zero value if you know what you are doing!
+ */
+static unsigned long netbk_queue_length = 0;
+module_param_named(queue_length, netbk_queue_length, ulong, 0);
+
static void __netif_up(netif_t *netif)
{
enable_irq(netif->irq);
@@ -44,6 +62,7 @@ static void __netif_down(netif_t *netif)
{
disable_irq(netif->irq);
netif_deschedule_work(netif);
+ del_timer_sync(&netif->credit_timeout);
}
static int net_open(struct net_device *dev)
@@ -134,6 +153,7 @@ netif_t *netif_alloc(domid_t domid, unsi
netif->credit_bytes = netif->remaining_credit = ~0UL;
netif->credit_usec = 0UL;
init_timer(&netif->credit_timeout);
+ netif->credit_timeout.expires = jiffies;
dev->hard_start_xmit = netif_be_start_xmit;
dev->get_stats = netif_be_get_stats;
@@ -144,11 +164,10 @@ netif_t *netif_alloc(domid_t domid, unsi
SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
- /*
- * Reduce default TX queuelen so that each guest interface only
- * allows it to eat around 6.4MB of host memory.
- */
- dev->tx_queue_len = 100;
+ dev->tx_queue_len = netbk_queue_length;
+ if (dev->tx_queue_len != 0)
+ printk(KERN_WARNING "netbk: WARNING: device '%s' has non-zero "
+ "queue length (%lu)!\n", dev->name, dev->tx_queue_len);
for (i = 0; i < ETH_ALEN; i++)
if (be_mac[i] != 0)
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/loopback.c
--- a/drivers/xen/netback/loopback.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/loopback.c Mon Oct 16 09:31:03 2006 -0400
@@ -53,8 +53,10 @@
#include <linux/skbuff.h>
#include <linux/ethtool.h>
#include <net/dst.h>
-
-static int nloopbacks = 8;
+#include <net/xfrm.h> /* secpath_reset() */
+#include <asm/hypervisor.h> /* is_initial_xendomain() */
+
+static int nloopbacks = -1;
module_param(nloopbacks, int, 0);
MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
@@ -77,9 +79,59 @@ static int loopback_close(struct net_dev
return 0;
}
+#ifdef CONFIG_X86
+static int is_foreign(unsigned long pfn)
+{
+ /* NB. Play it safe for auto-translation mode. */
+ return (xen_feature(XENFEAT_auto_translated_physmap) ||
+ (phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT));
+}
+#else
+/* How to detect a foreign mapping? Play it safe. */
+#define is_foreign(pfn) (1)
+#endif
+
+static int skb_remove_foreign_references(struct sk_buff *skb)
+{
+ struct page *page;
+ unsigned long pfn;
+ int i, off;
+ char *vaddr;
+
+ BUG_ON(skb_shinfo(skb)->frag_list);
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page);
+ if (!is_foreign(pfn))
+ continue;
+
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!page))
+ return 0;
+
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+ off = skb_shinfo(skb)->frags[i].page_offset;
+ memcpy(page_address(page) + off,
+ vaddr + off,
+ skb_shinfo(skb)->frags[i].size);
+ kunmap_skb_frag(vaddr);
+
+ put_page(skb_shinfo(skb)->frags[i].page);
+ skb_shinfo(skb)->frags[i].page = page;
+ }
+
+ return 1;
+}
+
static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct net_private *np = netdev_priv(dev);
+
+ if (!skb_remove_foreign_references(skb)) {
+ np->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return 0;
+ }
dst_release(skb->dst);
skb->dst = NULL;
@@ -110,6 +162,11 @@ static int loopback_start_xmit(struct sk
skb->protocol = eth_type_trans(skb, dev);
skb->dev = dev;
dev->last_rx = jiffies;
+
+ /* Flush netfilter context: rx'ed skbuffs not expected to have any. */
+ nf_reset(skb);
+ secpath_reset(skb);
+
netif_rx(skb);
return 0;
@@ -239,6 +296,9 @@ static int __init loopback_init(void)
{
int i, err = 0;
+ if (nloopbacks == -1)
+ nloopbacks = is_initial_xendomain() ? 4 : 0;
+
for (i = 0; i < nloopbacks; i++)
if ((err = make_loopback(i)) != 0)
break;
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/netback.c
--- a/drivers/xen/netback/netback.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/netback.c Mon Oct 16 09:31:03 2006 -0400
@@ -70,35 +70,43 @@ static struct timer_list net_timer;
static struct sk_buff_head rx_queue;
-static unsigned long mmap_vstart;
+static struct page **mmap_pages;
+static inline unsigned long idx_to_kaddr(unsigned int idx)
+{
+ return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx]));
+}
+
#ifdef CONFIG_PPC_XEN
-
-static ulong mmap_vaddrs[MAX_PENDING_REQS];
-#define MMAP_VADDR(_req) (mmap_vaddrs[(_req)])
-
-static inline void PPC_map_vaddrs(int idx, gnttab_map_grant_ref_t *mop)
-{
- struct page *page;
- ulong virt = mop->host_addr;
-
- page = virt_to_page(virt);
- get_page(page);
-
-#if 0
- SetPageForeign(page, netif_page_release);
-#else
- (void)netif_page_release;
+struct address_space xen_foreign_dummy_mapping;
+
+static inline void update_mmap_pages(
+ unsigned int idx, gnttab_map_grant_ref_t *mop)
+{
+ struct page *p;
+#ifdef PPC_NOT_YET
+ struct page *cp = mmap_pages[idx];
+ extern int arch_is_foreign_page(struct page *page);
+
+ if (arch_is_foreign_page(cp)) {
+ printk(KERN_EMERG "%s foreign: %p, 0x%x\n",
+ __func__, page_address(cp), page_count(cp));
+ } else {
+ printk(KERN_EMERG "%s local: %p, 0x%x\n",
+ __func__, page_address(cp), page_count(cp));
+ // __free_page(mmap_pages[idx]);
+ }
+
#endif
- mmap_vaddrs[idx] = virt;
-}
-#define _mmap_vaddrs(i,op) do { mmap_vaddrs[(i)] = mop->host_addr;
-#else
-#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
-#endif
-
-#ifndef CONFIG_PPC_XEN
-static void *rx_mmap_area;
+ p = pfn_to_page(mop->dev_bus_addr >> PAGE_SHIFT);
+
+ DPRINTK(KERN_EMERG "%s insert[%d]: 0x%lx, 0x%x\n",
+ __func__, idx, __va(mop->dev_bus_addr), page_count(p));
+
+ SetPageForeign(p, netif_page_release);
+ p->index = idx;
+ mmap_pages[idx] = p;
+}
#endif
#define PKT_PROT_LEN 64
@@ -243,7 +251,7 @@ static struct sk_buff *netbk_copy_skb(st
copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
- page = alloc_page(GFP_ATOMIC | zero);
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
if (unlikely(!page))
goto err_free;
@@ -818,10 +826,27 @@ void netif_deschedule_work(netif_t *neti
}
+static void tx_add_credit(netif_t *netif)
+{
+ unsigned long max_burst;
+
+ /*
+ * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
+ * Otherwise the interface can seize up due to insufficient credit.
+ */
+ max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
+ max_burst = min(max_burst, 131072UL);
+ max_burst = max(max_burst, netif->credit_bytes);
+
+ netif->remaining_credit = min(netif->remaining_credit +
+ netif->credit_bytes,
+ max_burst);
+}
+
static void tx_credit_callback(unsigned long data)
{
netif_t *netif = (netif_t *)data;
- netif->remaining_credit = netif->credit_bytes;
+ tx_add_credit(netif);
netif_schedule_work(netif);
}
@@ -845,7 +870,7 @@ inline static void net_tx_action_dealloc
gop = tx_unmap_ops;
while (dc != dp) {
pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
- gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx),
+ gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
GNTMAP_host_map,
grant_tx_handle[pending_idx]);
gop++;
@@ -933,7 +958,7 @@ static gnttab_map_grant_ref_t *netbk_get
txp = RING_GET_REQUEST(&netif->tx, cons++);
pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
- gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
+ gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
GNTMAP_host_map | GNTMAP_readonly,
txp->gref, netif->domid);
@@ -966,10 +991,10 @@ static int netbk_tx_check_mop(struct sk_
netif_put(netif);
} else {
#ifdef CONFIG_PPC_XEN
- PPC_map_vaddrs(pending_idx, mop);
+ update_mmap_pages(pending_idx, mop);
#else
set_phys_to_machine(
- __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
+ __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
#endif
grant_tx_handle[pending_idx] = mop->handle;
@@ -987,10 +1012,10 @@ static int netbk_tx_check_mop(struct sk_
newerr = (++mop)->status;
if (likely(!newerr)) {
#ifdef CONFIG_PPC_XEN
- PPC_map_vaddrs(pending_idx, mop);
+ update_mmap_pages(pending_idx, mop);
#else
set_phys_to_machine(
- __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
+ __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
#endif
grant_tx_handle[pending_idx] = mop->handle;
@@ -1039,7 +1064,7 @@ static void netbk_fill_frags(struct sk_b
pending_idx = (unsigned long)frag->page;
txp = &pending_tx_info[pending_idx].req;
- frag->page = virt_to_page(MMAP_VADDR(pending_idx));
+ frag->page = virt_to_page(idx_to_kaddr(pending_idx));
frag->size = txp->size;
frag->page_offset = txp->offset;
@@ -1135,6 +1160,7 @@ static void net_tx_action(unsigned long
i = netif->tx.req_cons;
rmb(); /* Ensure that we see the request before we copy it. */
memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+
/* Credit-based scheduling. */
if (txreq.size > netif->remaining_credit) {
unsigned long now = jiffies;
@@ -1143,25 +1169,27 @@ static void net_tx_action(unsigned long
msecs_to_jiffies(netif->credit_usec / 1000);
/* Timer could already be pending in rare cases. */
- if (timer_pending(&netif->credit_timeout))
- break;
+ if (timer_pending(&netif->credit_timeout)) {
+ netif_put(netif);
+ continue;
+ }
/* Passed the point where we can replenish credit? */
if (time_after_eq(now, next_credit)) {
netif->credit_timeout.expires = now;
- netif->remaining_credit = netif->credit_bytes;
+ tx_add_credit(netif);
}
/* Still too big to send right now? Set a callback. */
if (txreq.size > netif->remaining_credit) {
- netif->remaining_credit = 0;
netif->credit_timeout.data =
(unsigned long)netif;
netif->credit_timeout.function =
tx_credit_callback;
__mod_timer(&netif->credit_timeout,
next_credit);
- break;
+ netif_put(netif);
+ continue;
}
}
netif->remaining_credit -= txreq.size;
@@ -1235,7 +1263,7 @@ static void net_tx_action(unsigned long
}
}
- gnttab_set_map_op(mop, MMAP_VADDR(pending_idx),
+ gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
GNTMAP_host_map | GNTMAP_readonly,
txreq.gref, netif->domid);
mop++;
@@ -1294,8 +1322,8 @@ static void net_tx_action(unsigned long
}
data_len = skb->len;
- memcpy(skb->data,
- (void *)(MMAP_VADDR(pending_idx)|txp->offset),
+ memcpy(skb->data,
+ (void *)(idx_to_kaddr(pending_idx)|txp->offset),
data_len);
if (data_len < txp->size) {
/* Append the packet payload as a fragment. */
@@ -1349,22 +1377,14 @@ static void netif_idx_release(u16 pendin
static void netif_page_release(struct page *page)
{
- u16 pending_idx = page - virt_to_page(mmap_vstart);
-
-#ifdef CONFIG_PPC_XEN /* Ready for next use. */
- BUG();
-#endif
- init_page_count(page);
- netif_idx_release(pending_idx);
-}
-
-#ifndef CONFIG_PPC_XEN
-static void netif_rx_page_release(struct page *page)
-{
/* Ready for next use. */
init_page_count(page);
-}
+#ifdef CONFIG_PPC_XEN
+ /* we need the count to be 2 so the unmap occurs */
+ get_page(page);
#endif
+ netif_idx_release(page->index);
+}
irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
{
@@ -1474,10 +1494,6 @@ static int __init netback_init(void)
if (!is_running_on_xen())
return -ENODEV;
-#ifdef CONFIG_PPC_XEN
- if (!(xen_start_info->flags & SIF_INITDOMAIN))
- return -ENODEV;
-#endif
/* We can increase reservation by this much in net_rx_action(). */
balloon_update_driver_allowance(NET_RX_RING_SIZE);
@@ -1488,35 +1504,16 @@ static int __init netback_init(void)
net_timer.data = 0;
net_timer.function = net_alarm;
-#ifdef CONFIG_PPC_XEN
- (void)page;
- mmap_vstart = foreign_alloc_empty_page_range(MAX_PENDING_REQS);
-#else
- page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
- if (page == NULL)
+ mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+ if (mmap_pages == NULL) {
+ printk("%s: out of memory\n", __FUNCTION__);
return -ENOMEM;
-
- mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
+ }
for (i = 0; i < MAX_PENDING_REQS; i++) {
- page = virt_to_page(MMAP_VADDR(i));
- init_page_count(page);
+ page = mmap_pages[i];
SetPageForeign(page, netif_page_release);
- }
-#endif
-
-#ifndef CONFIG_PPC_XEN
- page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE);
- BUG_ON(page == NULL);
- rx_mmap_area = pfn_to_kaddr(page_to_pfn(page));
-
- for (i = 0; i < NET_RX_RING_SIZE; i++) {
- page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE));
- init_page_count(page);
- SetPageForeign(page, netif_rx_page_release);
- }
-#endif
-
+ page->index = i;
+ }
pending_cons = 0;
pending_prod = MAX_PENDING_REQS;
for (i = 0; i < MAX_PENDING_REQS; i++)
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netback/xenbus.c
--- a/drivers/xen/netback/xenbus.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netback/xenbus.c Mon Oct 16 09:31:03 2006 -0400
@@ -262,10 +262,8 @@ static void frontend_changed(struct xenb
case XenbusStateClosed:
xenbus_switch_state(dev, XenbusStateClosed);
-#ifdef JX
if (xenbus_dev_is_online(dev))
break;
-#endif
/* fall through if not online */
case XenbusStateUnknown:
if (be->netif != NULL)
@@ -368,6 +366,10 @@ static void connect(struct backend_info
be->netif->remaining_credit = be->netif->credit_bytes;
xenbus_switch_state(dev, XenbusStateConnected);
+
+ /* May not get a kick from the frontend, so start the tx_queue now. */
+ if (!netbk_can_queue(be->netif->dev))
+ netif_start_queue(be->netif->dev);
}
@@ -405,14 +407,16 @@ static int connect_rings(struct backend_
}
be->netif->copying_receiver = !!rx_copy;
- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d",
- &val) < 0)
- val = 0;
- if (val)
- be->netif->can_queue = 1;
- else
- /* Must be non-zero for pfifo_fast to work. */
- be->netif->dev->tx_queue_len = 1;
+ if (be->netif->dev->tx_queue_len != 0) {
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
+ "feature-rx-notify", "%d", &val) < 0)
+ val = 0;
+ if (val)
+ be->netif->can_queue = 1;
+ else
+ /* Must be non-zero for pfifo_fast to work. */
+ be->netif->dev->tx_queue_len = 1;
+ }
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
val = 0;
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/netfront/netfront.c
--- a/drivers/xen/netfront/netfront.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/netfront/netfront.c Mon Oct 16 09:31:03 2006 -0400
@@ -47,6 +47,7 @@
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/io.h>
+#include <linux/moduleparam.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/arp.h>
@@ -63,20 +64,64 @@
#include <xen/interface/grant_table.h>
#include <xen/gnttab.h>
+/*
+ * Mutually-exclusive module options to select receive data path:
+ * rx_copy : Packets are copied by network backend into local memory
+ * rx_flip : Page containing packet data is transferred to our ownership
+ * For fully-virtualised guests there is no option - copying must be used.
+ * For paravirtualised guests, flipping is the default.
+ */
+#ifdef CONFIG_XEN
+static int MODPARM_rx_copy = 0;
+module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
+MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
+static int MODPARM_rx_flip = 0;
+module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
+MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
+#else
+static const int MODPARM_rx_copy = 1;
+static const int MODPARM_rx_flip = 0;
+#endif
+
#define RX_COPY_THRESHOLD 256
/* If we don't have GSO, fake things up so that we never try to use it. */
-#ifndef NETIF_F_GSO
-#define netif_needs_gso(dev, skb) 0
-#define dev_disable_gso_features(dev) ((void)0)
-#else
+#if defined(NETIF_F_GSO)
#define HAVE_GSO 1
+#define HAVE_TSO 1 /* TSO is a subset of GSO */
static inline void dev_disable_gso_features(struct net_device *dev)
{
/* Turn off all GSO bits except ROBUST. */
dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
dev->features |= NETIF_F_GSO_ROBUST;
}
+#elif defined(NETIF_F_TSO)
+#define HAVE_TSO 1
+#define gso_size tso_size
+#define gso_segs tso_segs
+static inline void dev_disable_gso_features(struct net_device *dev)
+{
+ /* Turn off all TSO bits. */
+ dev->features &= ~NETIF_F_TSO;
+}
+static inline int skb_is_gso(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->tso_size;
+}
+static inline int skb_gso_ok(struct sk_buff *skb, int features)
+{
+ return (features & NETIF_F_TSO);
+}
+
+static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
+{
+ return skb_is_gso(skb) &&
+ (!skb_gso_ok(skb, dev->features) ||
+ unlikely(skb->ip_summed != CHECKSUM_HW));
+}
+#else
+#define netif_needs_gso(dev, skb) 0
+#define dev_disable_gso_features(dev) ((void)0)
#endif
#define GRANT_INVALID_REF 0
@@ -120,7 +165,7 @@ struct netfront_info {
grant_ref_t gref_tx_head;
grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
grant_ref_t gref_rx_head;
- grant_ref_t grant_rx_ref[NET_TX_RING_SIZE];
+ grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
struct xenbus_device *xbdev;
int tx_ring_ref;
@@ -229,8 +274,7 @@ static int __devinit netfront_probe(stru
int err;
struct net_device *netdev;
struct netfront_info *info;
- unsigned int handle;
- unsigned feature_rx_copy;
+ unsigned int handle, feature_rx_copy, feature_rx_flip, use_copy;
err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
if (err != 1) {
@@ -238,24 +282,28 @@ static int __devinit netfront_probe(stru
return err;
}
-#ifdef CONFIG_PPC_XEN
err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u",
&feature_rx_copy);
- BUG_ON(err != 1);
- if (err != 1) {
- xenbus_dev_fatal(dev, err, "reading feature-rx-copy");
- return err;
- }
- BUG_ON(!feature_rx_copy);
- if (!feature_rx_copy) {
- xenbus_dev_fatal(dev, 0, "need a copy-capable backend");
- return -EINVAL;
- }
-#else
- feature_rx_copy = 0;
+ if (err != 1)
+ feature_rx_copy = 0;
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-flip", "%u",
+ &feature_rx_flip);
+ if (err != 1)
+ feature_rx_flip = 1;
+
+ /*
+ * Copy packets on receive path if:
+ * (a) This was requested by user, and the backend supports it; or
+ * (b) Flipping was requested, but this is unsupported by the backend.
+ */
+ use_copy = (MODPARM_rx_copy && feature_rx_copy) ||
+ (MODPARM_rx_flip && !feature_rx_flip);
+
+#ifdef CONFIG_PPC_XEN
+ if (!use_copy)
+ panic("NetFront _must_ use rx copy feature on PowerPC\n");
#endif
-
- netdev = create_netdev(handle, feature_rx_copy, dev);
+ netdev = create_netdev(handle, use_copy, dev);
if (IS_ERR(netdev)) {
err = PTR_ERR(netdev);
xenbus_dev_fatal(dev, err, "creating netdev");
@@ -272,6 +320,9 @@ static int __devinit netfront_probe(stru
err = open_netdev(info);
if (err)
goto fail_open;
+
+ IPRINTK("Created netdev %s with %sing receive path.\n",
+ netdev->name, info->copying_receiver ? "copy" : "flipp");
return 0;
@@ -387,7 +438,7 @@ again:
goto abort_transaction;
}
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
if (err) {
message = "writing feature-gso-tcpv4";
@@ -744,7 +795,7 @@ no_skb:
} else {
gnttab_grant_foreign_access_ref(ref,
np->xbdev->otherend_id,
- pfn,
+ pfn_to_mfn(pfn),
0);
}
@@ -919,7 +970,7 @@ static int network_start_xmit(struct sk_
tx->flags |= NETTXF_data_validated;
#endif
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
if (skb_shinfo(skb)->gso_size) {
struct netif_extra_info *gso = (struct netif_extra_info *)
RING_GET_REQUEST(&np->tx, ++i);
@@ -1207,12 +1258,14 @@ static int xennet_set_skb_gso(struct sk_
return -EINVAL;
}
+#ifdef HAVE_TSO
+ skb_shinfo(skb)->gso_size = gso->u.gso.size;
#ifdef HAVE_GSO
- skb_shinfo(skb)->gso_size = gso->u.gso.size;
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
/* Header must be checked, and gso_segs computed. */
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+#endif
skb_shinfo(skb)->gso_segs = 0;
return 0;
@@ -1563,7 +1616,7 @@ static int xennet_set_sg(struct net_devi
static int xennet_set_tso(struct net_device *dev, u32 data)
{
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
if (data) {
struct netfront_info *np = netdev_priv(dev);
int val;
@@ -1634,7 +1687,8 @@ static void network_connect(struct net_d
} else {
gnttab_grant_foreign_access_ref(
ref, np->xbdev->otherend_id,
- page_to_pfn(skb_shinfo(skb)->frags->page),
+ pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
+ frags->page)),
0);
}
req->gref = ref;
@@ -1941,7 +1995,7 @@ static void netfront_closing(struct xenb
DPRINTK("%s\n", dev->nodename);
close_netdev(info);
- xenbus_switch_state(dev, XenbusStateClosed);
+ xenbus_frontend_closed(dev);
}
@@ -2055,6 +2109,23 @@ static int __init netif_init(void)
if (!is_running_on_xen())
return -ENODEV;
+#ifdef CONFIG_PPC_XEN
+ if (MODPARM_rx_flip || !MODPARM_rx_copy) {
+ WPRINTK("PowerPC forcing rx_copy.\n");
+ MODPARM_rx_flip = 0;
+ MODPARM_rx_copy = 1;
+ }
+#endif
+#ifdef CONFIG_XEN
+ if (MODPARM_rx_flip && MODPARM_rx_copy) {
+ WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
+ return -EINVAL;
+ }
+
+ if (!MODPARM_rx_flip && !MODPARM_rx_copy)
+ MODPARM_rx_flip = 1; /* Default is to flip. */
+#endif
+
if (is_initial_xendomain())
return 0;
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_backend_client.c
--- a/drivers/xen/xenbus/xenbus_backend_client.c Fri Oct 13 12:36:39
2006 -0400
+++ b/drivers/xen/xenbus/xenbus_backend_client.c Mon Oct 16 09:31:03
2006 -0400
@@ -50,7 +50,6 @@ struct vm_struct *xenbus_map_ring_valloc
lock_vm_area(area);
BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
- update_vm_area(area, &op);
unlock_vm_area(area);
if (op.status != GNTST_okay) {
@@ -133,4 +132,16 @@ int xenbus_unmap_ring(struct xenbus_devi
}
EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
+int xenbus_dev_is_online(struct xenbus_device *dev)
+{
+ int rc, val;
+
+ rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
+ if (rc != 1)
+ val = 0; /* no online node present */
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
+
MODULE_LICENSE("Dual BSD/GPL");
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_client.c
--- a/drivers/xen/xenbus/xenbus_client.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_client.c Mon Oct 16 09:31:03 2006 -0400
@@ -38,6 +38,20 @@
#define DPRINTK(fmt, args...) \
pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__,
##args)
+char *xenbus_strstate(enum xenbus_state state)
+{
+ static char *name[] = {
+ [ XenbusStateUnknown ] = "Unknown",
+ [ XenbusStateInitialising ] = "Initialising",
+ [ XenbusStateInitWait ] = "InitWait",
+ [ XenbusStateInitialised ] = "Initialised",
+ [ XenbusStateConnected ] = "Connected",
+ [ XenbusStateClosing ] = "Closing",
+ [ XenbusStateClosed ] = "Closed",
+ };
+ return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+
int xenbus_watch_path(struct xenbus_device *dev, const char *path,
struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *,
@@ -121,6 +135,13 @@ int xenbus_switch_state(struct xenbus_de
}
EXPORT_SYMBOL_GPL(xenbus_switch_state);
+int xenbus_frontend_closed(struct xenbus_device *dev)
+{
+ xenbus_switch_state(dev, XenbusStateClosed);
+ complete(&dev->down);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
/**
* Return the path to the error node for the given device, or NULL on failure.
@@ -271,7 +292,7 @@ enum xenbus_state xenbus_read_driver_sta
enum xenbus_state result;
int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
if (err)
- result = XenbusStateClosed;
+ result = XenbusStateUnknown;
return result;
}
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_comms.c
--- a/drivers/xen/xenbus/xenbus_comms.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_comms.c Mon Oct 16 09:31:03 2006 -0400
@@ -47,11 +47,6 @@ static DECLARE_WORK(probe_work, xenbus_p
DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
-static inline struct xenstore_domain_interface *xenstore_domain_interface(void)
-{
- return mfn_to_virt(xen_start_info->store_mfn);
-}
-
static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
{
if (unlikely(xenstored_ready == 0)) {
@@ -90,7 +85,7 @@ static const void *get_input_chunk(XENST
int xb_write(const void *data, unsigned len)
{
- struct xenstore_domain_interface *intf = xenstore_domain_interface();
+ struct xenstore_domain_interface *intf = xen_store_interface;
XENSTORE_RING_IDX cons, prod;
int rc;
@@ -129,7 +124,7 @@ int xb_write(const void *data, unsigned
intf->req_prod += avail;
/* This implies mb() before other side sees interrupt. */
- notify_remote_via_evtchn(xen_start_info->store_evtchn);
+ notify_remote_via_evtchn(xen_store_evtchn);
}
return 0;
@@ -137,7 +132,7 @@ int xb_write(const void *data, unsigned
int xb_read(void *data, unsigned len)
{
- struct xenstore_domain_interface *intf = xenstore_domain_interface();
+ struct xenstore_domain_interface *intf = xen_store_interface;
XENSTORE_RING_IDX cons, prod;
int rc;
@@ -180,7 +175,7 @@ int xb_read(void *data, unsigned len)
pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
/* Implies mb(): they will see new header. */
- notify_remote_via_evtchn(xen_start_info->store_evtchn);
+ notify_remote_via_evtchn(xen_store_evtchn);
}
return 0;
@@ -195,7 +190,7 @@ int xb_init_comms(void)
unbind_from_irqhandler(xenbus_irq, &xb_waitq);
err = bind_evtchn_to_irqhandler(
- xen_start_info->store_evtchn, wake_waiting,
+ xen_store_evtchn, wake_waiting,
0, "xenbus", &xb_waitq);
if (err <= 0) {
printk(KERN_ERR "XENBUS request irq failed %i\n", err);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_comms.h
--- a/drivers/xen/xenbus/xenbus_comms.h Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_comms.h Mon Oct 16 09:31:03 2006 -0400
@@ -39,5 +39,7 @@ int xb_read(void *data, unsigned len);
int xb_read(void *data, unsigned len);
int xs_input_avail(void);
extern wait_queue_head_t xb_waitq;
+extern struct xenstore_domain_interface *xen_store_interface;
+extern int xen_store_evtchn;
#endif /* _XENBUS_COMMS_H */
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_dev.c
--- a/drivers/xen/xenbus/xenbus_dev.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_dev.c Mon Oct 16 09:31:03 2006 -0400
@@ -58,6 +58,9 @@ struct xenbus_dev_data {
/* In-progress transaction. */
struct list_head transactions;
+ /* Active watches. */
+ struct list_head watches;
+
/* Partial request. */
unsigned int len;
union {
@@ -70,6 +73,8 @@ struct xenbus_dev_data {
char read_buffer[PAGE_SIZE];
unsigned int read_cons, read_prod;
wait_queue_head_t read_waitq;
+
+ struct mutex reply_mutex;
};
static struct proc_dir_entry *xenbus_dev_intf;
@@ -100,13 +105,59 @@ static void queue_reply(struct xenbus_de
{
int i;
+ mutex_lock(&u->reply_mutex);
+
for (i = 0; i < len; i++, u->read_prod++)
u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i];
BUG_ON((u->read_prod - u->read_cons) > sizeof(u->read_buffer));
+ mutex_unlock(&u->reply_mutex);
+
wake_up(&u->read_waitq);
}
+
+struct watch_adapter
+{
+ struct list_head list;
+ struct xenbus_watch watch;
+ struct xenbus_dev_data *dev_data;
+ char *token;
+};
+
+static void free_watch_adapter (struct watch_adapter *watch)
+{
+ kfree(watch->watch.node);
+ kfree(watch->token);
+ kfree(watch);
+}
+
+static void watch_fired(struct xenbus_watch *watch,
+ const char **vec,
+ unsigned int len)
+{
+ struct watch_adapter *adap =
+ container_of(watch, struct watch_adapter, watch);
+ struct xsd_sockmsg hdr;
+ const char *path, *token;
+ int path_len, tok_len, body_len;
+
+ path = vec[XS_WATCH_PATH];
+ token = adap->token;
+
+ path_len = strlen(path) + 1;
+ tok_len = strlen(token) + 1;
+ body_len = path_len + tok_len;
+
+ hdr.type = XS_WATCH_EVENT;
+ hdr.len = body_len;
+
+ queue_reply(adap->dev_data, (char *)&hdr, sizeof(hdr));
+ queue_reply(adap->dev_data, (char *)path, path_len);
+ queue_reply(adap->dev_data, (char *)token, tok_len);
+}
+
+static LIST_HEAD(watch_list);
static ssize_t xenbus_dev_write(struct file *filp,
const char __user *ubuf,
@@ -116,6 +167,9 @@ static ssize_t xenbus_dev_write(struct f
struct xenbus_dev_transaction *trans = NULL;
uint32_t msg_type;
void *reply;
+ char *path, *token;
+ struct watch_adapter *watch, *tmp_watch;
+ int err;
if ((len + u->len) > sizeof(u->u.buffer))
return -EINVAL;
@@ -169,6 +223,56 @@ static ssize_t xenbus_dev_write(struct f
kfree(reply);
break;
+ case XS_WATCH:
+ case XS_UNWATCH:
+ path = u->u.buffer + sizeof(u->u.msg);
+ token = memchr(path, 0, u->u.msg.len);
+ if (token == NULL)
+ return -EILSEQ;
+ token++;
+
+ if (msg_type == XS_WATCH) {
+ static const char * XS_WATCH_RESP = "OK";
+ struct xsd_sockmsg hdr;
+
+ watch = kmalloc(sizeof(*watch), GFP_KERNEL);
+ watch->watch.node = kmalloc(strlen(path)+1,
+ GFP_KERNEL);
+ strcpy((char *)watch->watch.node, path);
+ watch->watch.callback = watch_fired;
+ watch->token = kmalloc(strlen(token)+1, GFP_KERNEL);
+ strcpy(watch->token, token);
+ watch->dev_data = u;
+
+ err = register_xenbus_watch(&watch->watch);
+ if (err) {
+ free_watch_adapter(watch);
+ return err;
+ }
+
+ list_add(&watch->list, &u->watches);
+
+ hdr.type = XS_WATCH;
+ hdr.len = strlen(XS_WATCH_RESP) + 1;
+ queue_reply(u, (char *)&hdr, sizeof(hdr));
+ queue_reply(u, (char *)XS_WATCH_RESP, hdr.len);
+ } else {
+ list_for_each_entry_safe(watch, tmp_watch,
+ &u->watches, list) {
+ if (!strcmp(watch->token, token) &&
+ !strcmp(watch->watch.node, path))
+ break;
+ {
+ unregister_xenbus_watch(&watch->watch);
+ list_del(&watch->list);
+ free_watch_adapter(watch);
+ break;
+ }
+ }
+ }
+
+ break;
+
default:
return -EINVAL;
}
@@ -181,7 +285,7 @@ static int xenbus_dev_open(struct inode
{
struct xenbus_dev_data *u;
- if (xen_start_info->store_evtchn == 0)
+ if (xen_store_evtchn == 0)
return -ENOENT;
nonseekable_open(inode, filp);
@@ -191,7 +295,10 @@ static int xenbus_dev_open(struct inode
return -ENOMEM;
INIT_LIST_HEAD(&u->transactions);
+ INIT_LIST_HEAD(&u->watches);
init_waitqueue_head(&u->read_waitq);
+
+ mutex_init(&u->reply_mutex);
filp->private_data = u;
@@ -202,11 +309,18 @@ static int xenbus_dev_release(struct ino
{
struct xenbus_dev_data *u = filp->private_data;
struct xenbus_dev_transaction *trans, *tmp;
+ struct watch_adapter *watch, *tmp_watch;
list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
xenbus_transaction_end(trans->handle, 1);
list_del(&trans->list);
kfree(trans);
+ }
+
+ list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
+ unregister_xenbus_watch(&watch->watch);
+ list_del(&watch->list);
+ free_watch_adapter(watch);
}
kfree(u);
@@ -232,7 +346,7 @@ static struct file_operations xenbus_dev
.poll = xenbus_dev_poll,
};
-static int __init
+int __init
xenbus_dev_init(void)
{
xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400);
@@ -241,5 +355,3 @@ xenbus_dev_init(void)
return 0;
}
-
-__initcall(xenbus_dev_init);
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_probe.c
--- a/drivers/xen/xenbus/xenbus_probe.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_probe.c Mon Oct 16 09:31:03 2006 -0400
@@ -45,18 +45,35 @@
#include <asm/io.h>
#include <asm/page.h>
+#include <asm/maddr.h>
#include <asm/pgtable.h>
#include <asm/hypervisor.h>
#include <xen/xenbus.h>
#include <xen/xen_proc.h>
#include <xen/evtchn.h>
#include <xen/features.h>
+#include <xen/hvm.h>
#include "xenbus_comms.h"
+int xen_store_evtchn;
+struct xenstore_domain_interface *xen_store_interface;
+static unsigned long xen_store_mfn;
+
extern struct mutex xenwatch_mutex;
static BLOCKING_NOTIFIER_HEAD(xenstore_notifier_list);
+
+static void wait_for_devices(struct xenbus_driver *xendrv);
+
+static int xenbus_probe_frontend(const char *type, const char *name);
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+ int num_envp, char *buffer, int buffer_size);
+static int xenbus_probe_backend(const char *type, const char *domid);
+
+static int xenbus_dev_probe(struct device *_dev);
+static int xenbus_dev_remove(struct device *_dev);
+static void xenbus_dev_shutdown(struct device *_dev);
/* If something in array of ids matches this device, return it. */
static const struct xenbus_device_id *
@@ -141,7 +158,9 @@ static int read_otherend_details(struct
}
if (strlen(xendev->otherend) == 0 ||
!xenbus_exists(XBT_NIL, xendev->otherend, "")) {
- xenbus_dev_fatal(xendev, -ENOENT, "missing other end from %s",
+ xenbus_dev_fatal(xendev, -ENOENT,
+ "unable to read other end from %s. "
+ "missing or inaccessible.",
xendev->nodename);
free_otherend_details(xendev);
return -ENOENT;
@@ -164,15 +183,17 @@ static int read_frontend_details(struct
/* Bus type for frontend drivers. */
-static int xenbus_probe_frontend(const char *type, const char *name);
static struct xen_bus_type xenbus_frontend = {
.root = "device",
.levels = 2, /* device/type/<id> */
.get_bus_id = frontend_bus_id,
.probe = xenbus_probe_frontend,
.bus = {
- .name = "xen",
- .match = xenbus_match,
+ .name = "xen",
+ .match = xenbus_match,
+ .probe = xenbus_dev_probe,
+ .remove = xenbus_dev_remove,
+ .shutdown = xenbus_dev_shutdown,
},
.dev = {
.bus_id = "xen",
@@ -217,18 +238,18 @@ static int backend_bus_id(char bus_id[BU
return 0;
}
-static int xenbus_uevent_backend(struct device *dev, char **envp,
- int num_envp, char *buffer, int buffer_size);
-static int xenbus_probe_backend(const char *type, const char *domid);
static struct xen_bus_type xenbus_backend = {
.root = "backend",
.levels = 3, /* backend/type/<frontend>/<id> */
.get_bus_id = backend_bus_id,
.probe = xenbus_probe_backend,
.bus = {
- .name = "xen-backend",
- .match = xenbus_match,
- .uevent = xenbus_uevent_backend,
+ .name = "xen-backend",
+ .match = xenbus_match,
+ .probe = xenbus_dev_probe,
+ .remove = xenbus_dev_remove,
+// .shutdown = xenbus_dev_shutdown,
+ .uevent = xenbus_uevent_backend,
},
.dev = {
.bus_id = "xen-backend",
@@ -298,8 +319,23 @@ static void otherend_changed(struct xenb
state = xenbus_read_driver_state(dev->otherend);
- DPRINTK("state is %d, %s, %s",
- state, dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+ DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
+ dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+
+ /*
+ * Ignore xenbus transitions during shutdown. This prevents us doing
+ * work that can fail e.g., when the rootfs is gone.
+ */
+ if (system_state > SYSTEM_RUNNING) {
+ struct xen_bus_type *bus = bus;
+ bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
+ /* If we're frontend, drive the state machine to Closed. */
+ /* This should cause the backend to release our resources. */
+ if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
+ xenbus_frontend_closed(dev);
+ return;
+ }
+
if (drv->otherend_changed)
drv->otherend_changed(dev, state);
}
@@ -330,7 +366,7 @@ static int xenbus_dev_probe(struct devic
const struct xenbus_device_id *id;
int err;
- DPRINTK("");
+ DPRINTK("%s", dev->nodename);
if (!drv->probe) {
err = -ENODEV;
@@ -375,7 +411,7 @@ static int xenbus_dev_remove(struct devi
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
- DPRINTK("");
+ DPRINTK("%s", dev->nodename);
free_otherend_watch(dev);
free_otherend_details(dev);
@@ -385,6 +421,27 @@ static int xenbus_dev_remove(struct devi
xenbus_switch_state(dev, XenbusStateClosed);
return 0;
+}
+
+static void xenbus_dev_shutdown(struct device *_dev)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ unsigned long timeout = 5*HZ;
+
+ DPRINTK("%s", dev->nodename);
+
+ get_device(&dev->dev);
+ if (dev->state != XenbusStateConnected) {
+ printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__,
+ dev->nodename, xenbus_strstate(dev->state));
+ goto out;
+ }
+ xenbus_switch_state(dev, XenbusStateClosing);
+ timeout = wait_for_completion_timeout(&dev->down, timeout);
+ if (!timeout)
+ printk("%s: %s timeout closing device\n", __FUNCTION__,
dev->nodename);
+ out:
+ put_device(&dev->dev);
}
static int xenbus_register_driver_common(struct xenbus_driver *drv,
@@ -395,8 +452,6 @@ static int xenbus_register_driver_common
drv->driver.name = drv->name;
drv->driver.bus = &bus->bus;
drv->driver.owner = drv->owner;
- drv->driver.probe = xenbus_dev_probe;
- drv->driver.remove = xenbus_dev_remove;
mutex_lock(&xenwatch_mutex);
ret = driver_register(&drv->driver);
@@ -406,9 +461,18 @@ static int xenbus_register_driver_common
int xenbus_register_frontend(struct xenbus_driver *drv)
{
+ int ret;
+
drv->read_otherend_details = read_backend_details;
- return xenbus_register_driver_common(drv, &xenbus_frontend);
+ ret = xenbus_register_driver_common(drv, &xenbus_frontend);
+ if (ret)
+ return ret;
+
+ /* If this driver is loaded as a module wait for devices to attach. */
+ wait_for_devices(drv);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(xenbus_register_frontend);
@@ -541,6 +605,7 @@ static int xenbus_probe_node(struct xen_
tmpstring += strlen(tmpstring) + 1;
strcpy(tmpstring, type);
xendev->devicetype = tmpstring;
+ init_completion(&xendev->down);
xendev->dev.parent = &bus->dev;
xendev->dev.bus = &bus->bus;
@@ -806,7 +871,7 @@ static int resume_dev(struct device *dev
printk(KERN_WARNING
"xenbus: resume %s failed: %i\n",
dev->bus_id, err);
- return err;
+ return err;
}
}
@@ -818,7 +883,7 @@ static int resume_dev(struct device *dev
return err;
}
- return 0;
+ return 0;
}
void xenbus_suspend(void)
@@ -865,29 +930,6 @@ EXPORT_SYMBOL_GPL(unregister_xenstore_no
EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
-static int all_devices_ready_(struct device *dev, void *data)
-{
- struct xenbus_device *xendev = to_xenbus_device(dev);
- int *result = data;
-
- if (xendev->state != XenbusStateConnected) {
- *result = 0;
- return 1;
- }
-
- return 0;
-}
-
-
-static int all_devices_ready(void)
-{
- int ready = 1;
- bus_for_each_dev(&xenbus_frontend.bus, NULL, &ready,
- all_devices_ready_);
- return ready;
-}
-
-
void xenbus_probe(void *unused)
{
BUG_ON((xenstored_ready <= 0));
@@ -917,8 +959,7 @@ static int xsd_kva_mmap(struct file *fil
if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
return -EINVAL;
- if (remap_pfn_range(vma, vma->vm_start,
- mfn_to_pfn(xen_start_info->store_mfn),
+ if (remap_pfn_range(vma, vma->vm_start, mfn_to_pfn(xen_store_mfn),
size, vma->vm_page_prot))
return -EAGAIN;
@@ -930,7 +971,7 @@ static int xsd_kva_read(char *page, char
{
int len;
- len = sprintf(page, "0x%p", mfn_to_virt(xen_start_info->store_mfn));
+ len = sprintf(page, "0x%p", xen_store_interface);
*eof = 1;
return len;
}
@@ -940,16 +981,15 @@ static int xsd_port_read(char *page, cha
{
int len;
- len = sprintf(page, "%d", xen_start_info->store_evtchn);
+ len = sprintf(page, "%d", xen_store_evtchn);
*eof = 1;
return len;
}
#endif
-
static int __init xenbus_probe_init(void)
{
- int err = 0, dom0;
+ int err = 0;
unsigned long page = 0;
DPRINTK("");
@@ -964,9 +1004,7 @@ static int __init xenbus_probe_init(void
/*
* Domain0 doesn't have a store_evtchn or store_mfn yet.
*/
- dom0 = (xen_start_info->store_evtchn == 0);
-
- if (dom0) {
+ if (is_initial_xendomain()) {
struct evtchn_alloc_unbound alloc_unbound;
/* Allocate page. */
@@ -974,7 +1012,7 @@ static int __init xenbus_probe_init(void
if (!page)
return -ENOMEM;
- xen_start_info->store_mfn =
+ xen_store_mfn = xen_start_info->store_mfn =
pfn_to_mfn(virt_to_phys((void *)page) >>
PAGE_SHIFT);
@@ -987,7 +1025,8 @@ static int __init xenbus_probe_init(void
if (err == -ENOSYS)
goto err;
BUG_ON(err);
- xen_start_info->store_evtchn = alloc_unbound.port;
+ xen_store_evtchn = xen_start_info->store_evtchn =
+ alloc_unbound.port;
#ifdef CONFIG_PROC_FS
/* And finally publish the above info in /proc/xen */
@@ -1003,8 +1042,23 @@ static int __init xenbus_probe_init(void
if (xsd_port_intf)
xsd_port_intf->read_proc = xsd_port_read;
#endif
- } else
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
+ } else {
xenstored_ready = 1;
+#ifdef CONFIG_XEN
+ xen_store_evtchn = xen_start_info->store_evtchn;
+ xen_store_mfn = xen_start_info->store_mfn;
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
+#else
+ xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
+ xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
+ xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
+ PAGE_SIZE);
+#endif
+ }
+
+
+ xenbus_dev_init();
/* Initialize the interface to xenstore. */
err = xs_init();
@@ -1018,7 +1072,7 @@ static int __init xenbus_probe_init(void
device_register(&xenbus_frontend.dev);
device_register(&xenbus_backend.dev);
- if (!dom0)
+ if (!is_initial_xendomain())
xenbus_probe(NULL);
return 0;
@@ -1038,6 +1092,58 @@ static int __init xenbus_probe_init(void
postcore_initcall(xenbus_probe_init);
+MODULE_LICENSE("Dual BSD/GPL");
+
+
+static int is_disconnected_device(struct device *dev, void *data)
+{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+ struct device_driver *drv = data;
+
+ /*
+ * A device with no driver will never connect. We care only about
+ * devices which should currently be in the process of connecting.
+ */
+ if (!dev->driver)
+ return 0;
+
+ /* Is this search limited to a particular driver? */
+ if (drv && (dev->driver != drv))
+ return 0;
+
+ return (xendev->state != XenbusStateConnected);
+}
+
+static int exists_disconnected_device(struct device_driver *drv)
+{
+ return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+ is_disconnected_device);
+}
+
+static int print_device_status(struct device *dev, void *data)
+{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+ struct device_driver *drv = data;
+
+ /* Is this operation limited to a particular driver? */
+ if (drv && (dev->driver != drv))
+ return 0;
+
+ if (!dev->driver) {
+ /* Information only: is this too noisy? */
+ printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
+ xendev->nodename);
+ } else if (xendev->state != XenbusStateConnected) {
+ printk(KERN_WARNING "XENBUS: Timeout connecting "
+ "to device: %s (state %d)\n",
+ xendev->nodename, xendev->state);
+ }
+
+ return 0;
+}
+
+/* We only wait for device setup after most initcalls have run. */
+static int ready_to_wait_for_devices;
/*
* On a 10 second timeout, wait for all devices currently configured. We need
@@ -1053,21 +1159,31 @@ postcore_initcall(xenbus_probe_init);
* boot slightly, but of course needs tools or manual intervention to set up
* those flags correctly.
*/
-static int __init wait_for_devices(void)
+static void wait_for_devices(struct xenbus_driver *xendrv)
{
unsigned long timeout = jiffies + 10*HZ;
-
- if (!is_running_on_xen())
- return -ENODEV;
-
- while (time_before(jiffies, timeout)) {
- if (all_devices_ready())
- return 0;
+ struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
+
+ if (!ready_to_wait_for_devices || !is_running_on_xen())
+ return;
+
+ while (exists_disconnected_device(drv)) {
+ if (time_after(jiffies, timeout))
+ break;
schedule_timeout_interruptible(HZ/10);
}
- printk(KERN_WARNING "XENBUS: Timeout connecting to devices!\n");
- return 0;
-}
-
-late_initcall(wait_for_devices);
+ bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+ print_device_status);
+}
+
+#ifndef MODULE
+static int __init boot_wait_for_devices(void)
+{
+ ready_to_wait_for_devices = 1;
+ wait_for_devices(NULL);
+ return 0;
+}
+
+late_initcall(boot_wait_for_devices);
+#endif
diff -r f0be2cc05103 -r 933b1d114a89 drivers/xen/xenbus/xenbus_xs.c
--- a/drivers/xen/xenbus/xenbus_xs.c Fri Oct 13 12:36:39 2006 -0400
+++ b/drivers/xen/xenbus/xenbus_xs.c Mon Oct 16 09:31:03 2006 -0400
@@ -662,7 +662,17 @@ EXPORT_SYMBOL_GPL(unregister_xenbus_watc
void xs_suspend(void)
{
+ struct xenbus_watch *watch;
+ char token[sizeof(watch) * 2 + 1];
+
down_write(&xs_state.suspend_mutex);
+
+ /* No need for watches_lock: the suspend_mutex is sufficient. */
+ list_for_each_entry(watch, &watches, list) {
+ sprintf(token, "%lX", (long)watch);
+ xs_unwatch(watch->node, token);
+ }
+
mutex_lock(&xs_state.request_mutex);
}
diff -r f0be2cc05103 -r 933b1d114a89 include/asm-powerpc/page.h
--- a/include/asm-powerpc/page.h Fri Oct 13 12:36:39 2006 -0400
+++ b/include/asm-powerpc/page.h Mon Oct 16 09:31:03 2006 -0400
@@ -195,6 +195,7 @@ extern const char *arch_vma_name(struct
extern const char *arch_vma_name(struct vm_area_struct *vma);
#include <asm-generic/memory_model.h>
+#include <xen/foreign_page.h>
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff -r f0be2cc05103 -r 933b1d114a89 include/asm-powerpc/xen/asm/hypervisor.h
--- a/include/asm-powerpc/xen/asm/hypervisor.h Fri Oct 13 12:36:39 2006 -0400
+++ b/include/asm-powerpc/xen/asm/hypervisor.h Mon Oct 16 09:31:03 2006 -0400
@@ -239,7 +239,16 @@ typedef unsigned long maddr_t;
typedef unsigned long maddr_t;
#ifdef CONFIG_XEN_SCRUB_PAGES
-#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
+
+static inline void scrub_pages(void *p, unsigned n)
+{
+ unsigned i;
+
+ for (i = 0; i < n; i++) {
+ clear_page(p);
+ p += PAGE_SIZE;
+ }
+}
#else
#define scrub_pages(_p,_n) ((void)0)
#endif
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/balloon.h
--- a/include/xen/balloon.h Fri Oct 13 12:36:39 2006 -0400
+++ b/include/xen/balloon.h Mon Oct 16 09:31:03 2006 -0400
@@ -41,6 +41,8 @@ extern void
extern void
balloon_update_driver_allowance(
long delta);
+struct page **alloc_empty_pages_and_pagevec(int nr_pages);
+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages);
/* Allocate an empty low-memory page range. */
extern struct page *
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/foreign_page.h
--- a/include/xen/foreign_page.h Fri Oct 13 12:36:39 2006 -0400
+++ b/include/xen/foreign_page.h Mon Oct 16 09:31:03 2006 -0400
@@ -10,6 +10,7 @@
#ifndef __ASM_XEN_FOREIGN_PAGE_H__
#define __ASM_XEN_FOREIGN_PAGE_H__
+#ifndef CONFIG_PPC_XEN
#define PG_foreign PG_arch_1
#define PageForeign(page) test_bit(PG_foreign, &(page)->flags)
@@ -27,4 +28,34 @@
#define PageForeignDestructor(page) \
( (void (*) (struct page *)) (page)->mapping )
+#else
+
+extern struct address_space xen_foreign_dummy_mapping;
+
+#define PageForeign(page) \
+ ((page)->mapping == &xen_foreign_dummy_mapping)
+
+#define SetPageForeign(page, dtor) do { \
+ set_page_private((page), (unsigned long)(dtor)); \
+ (page)->mapping = &xen_foreign_dummy_mapping; \
+ smp_rmb(); \
+} while (0)
+
+#define ClearPageForeign(page) do { \
+ (page)->mapping = NULL; \
+ smp_rmb(); \
+ set_page_private((page), 0); \
+} while (0)
+
+#define PageForeignDestructor(page) \
+ ( (void (*) (struct page *)) page_private(page) )
+
+#define HAVE_ARCH_FREE_PAGE
+#define arch_free_page(_page,_order) \
+({ int foreign = PageForeign(_page); \
+ if (foreign) \
+ (PageForeignDestructor(_page))(_page); \
+ foreign; \
+})
+#endif
#endif /* __ASM_XEN_FOREIGN_PAGE_H__ */
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/xenbus.h
--- a/include/xen/xenbus.h Fri Oct 13 12:36:39 2006 -0400
+++ b/include/xen/xenbus.h Mon Oct 16 09:31:03 2006 -0400
@@ -37,6 +37,7 @@
#include <linux/device.h>
#include <linux/notifier.h>
#include <linux/mutex.h>
+#include <linux/completion.h>
#include <xen/interface/xen.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/xenbus.h>
@@ -74,6 +75,7 @@ struct xenbus_device {
struct xenbus_watch otherend_watch;
struct device dev;
enum xenbus_state state;
+ struct completion down;
};
static inline struct xenbus_device *to_xenbus_device(struct device *dev)
@@ -274,7 +276,7 @@ int xenbus_free_evtchn(struct xenbus_dev
/**
* Return the state of the driver rooted at the given store path, or
- * XenbusStateClosed if no state can be read.
+ * XenbusStateUnknown if no state can be read.
*/
enum xenbus_state xenbus_read_driver_state(const char *path);
@@ -295,5 +297,10 @@ void xenbus_dev_fatal(struct xenbus_devi
void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
...);
+int __init xenbus_dev_init(void);
+
+char *xenbus_strstate(enum xenbus_state state);
+int xenbus_dev_is_online(struct xenbus_device *dev);
+int xenbus_frontend_closed(struct xenbus_device *dev);
#endif /* _XEN_XENBUS_H */
diff -r f0be2cc05103 -r 933b1d114a89 include/xen/hvm.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/include/xen/hvm.h Mon Oct 16 09:31:03 2006 -0400
@@ -0,0 +1,13 @@
+/* Simple wrappers around HVM functions */
+#ifndef XEN_HVM_H__
+#define XEN_HVM_H__
+
+#include <xen/interface/hvm/params.h>
+#include <asm/hypercall.h>
+
+static inline unsigned long hvm_get_parameter(int idx)
+{
+ return 0;
+}
+
+#endif /* XEN_HVM_H__ */
_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel
|