# HG changeset patch
# User emellor@xxxxxxxxxxxxxxxxxxxxxx
# Node ID 43582de050c69548a233206354a782d92f79701c
# Parent 36f09499bd8c75595a85cec1951d1b7b7d8a347d
# Parent f62f9b1732b9adaeab50b808746097b7aa089f5d
Merged.
diff -r 36f09499bd8c -r 43582de050c6
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Thu Dec 1 11:15:31 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Fri Dec 2 01:00:01 2005
@@ -1752,7 +1752,7 @@
#endif
#endif
} else {
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
extern const struct consw xennull_con;
extern int console_use_vt;
#if defined(CONFIG_VGA_CONSOLE)
diff -r 36f09499bd8c -r 43582de050c6
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Thu Dec 1
11:15:31 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri Dec 2
01:00:01 2005
@@ -481,6 +481,7 @@
blkif_response_t *resp;
unsigned long flags;
blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+ int notify;
/* Place on the response ring for the relevant domain. */
spin_lock_irqsave(&blkif->blk_ring_lock, flags);
@@ -488,13 +489,23 @@
resp->id = id;
resp->operation = op;
resp->status = st;
- wmb(); /* Ensure other side can see the response fields. */
blk_ring->rsp_prod_pvt++;
- RING_PUSH_RESPONSES(blk_ring);
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
- /* Kick the relevant domain. */
- notify_remote_via_irq(blkif->irq);
+ /*
+ * Tail check for pending requests. Allows frontend to avoid
+ * notifications if requests are already in flight (lower overheads
+ * and promotes batching).
+ */
+ if (!__on_blkdev_list(blkif) &&
+ RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) {
+ add_to_blkdev_list_tail(blkif);
+ maybe_trigger_blkio_schedule();
+ }
+
+ if (notify)
+ notify_remote_via_irq(blkif->irq);
}
void blkif_deschedule(blkif_t *blkif)
diff -r 36f09499bd8c -r 43582de050c6
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Dec 1
11:15:31 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Dec 2
01:00:01 2005
@@ -300,6 +300,10 @@
/* ** Connection ** */
+/*
+** Invoked when the backend is finally 'ready' (and has told produced
+** the details about the physical device - #sectors, size, etc).
+*/
static void connect(struct blkfront_info *info)
{
unsigned long sectors, sector_size;
@@ -324,19 +328,16 @@
return;
}
- info->connected = BLKIF_STATE_CONNECTED;
xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
-
- err = xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected);
- if (err)
- return;
+
+ (void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected);
/* Kick pending requests. */
spin_lock_irq(&blkif_io_lock);
+ info->connected = BLKIF_STATE_CONNECTED;
kick_pending_request_queues(info);
spin_unlock_irq(&blkif_io_lock);
}
-
/**
* Handle the change of state of the backend to Closing. We must delete our
@@ -394,8 +395,17 @@
static inline void flush_requests(struct blkfront_info *info)
{
+ RING_IDX old_prod = info->ring.sring->req_prod;
+
RING_PUSH_REQUESTS(&info->ring);
- notify_remote_via_irq(info->irq);
+
+ /*
+ * Send new requests /then/ check if any old requests are still in
+ * flight. If so then there is no need to send a notification.
+ */
+ mb();
+ if (info->ring.sring->rsp_prod == old_prod)
+ notify_remote_via_irq(info->irq);
}
static void kick_pending_request_queues(struct blkfront_info *info)
@@ -631,6 +641,7 @@
return IRQ_HANDLED;
}
+ again:
rp = info->ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
@@ -665,6 +676,15 @@
}
info->ring.rsp_cons = i;
+
+ if (i != info->ring.req_prod_pvt) {
+ int more_to_do;
+ RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+ if (more_to_do)
+ goto again;
+ } else {
+ info->ring.sring->rsp_event = i + 1;
+ }
kick_pending_request_queues(info);
@@ -751,14 +771,20 @@
kfree(copy);
- /* info->ring->req_prod will be set when we flush_requests().*/
- wmb();
-
- /* Kicks things back into life. */
+ (void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected);
+
+ /* Now safe for us to use the shared ring */
+ spin_lock_irq(&blkif_io_lock);
+ info->connected = BLKIF_STATE_CONNECTED;
+ spin_unlock_irq(&blkif_io_lock);
+
+ /* Send off requeued requests */
flush_requests(info);
- /* Now safe to let other people use the interface. */
- info->connected = BLKIF_STATE_CONNECTED;
+ /* Kick any other new requests queued since we resumed */
+ spin_lock_irq(&blkif_io_lock);
+ kick_pending_request_queues(info);
+ spin_unlock_irq(&blkif_io_lock);
}
diff -r 36f09499bd8c -r 43582de050c6
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Thu Dec 1 11:15:31 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Fri Dec 2 01:00:01 2005
@@ -375,7 +375,7 @@
static unsigned int blktap_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &blktap_wait, wait);
- if (RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring)) {
+ if (blktap_ufe_ring.req_prod_pvt != blktap_ufe_ring.sring->req_prod) {
flush_tlb_all();
RING_PUSH_REQUESTS(&blktap_ufe_ring);
return POLLIN | POLLRDNORM;
diff -r 36f09499bd8c -r 43582de050c6
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Thu Dec 1
11:15:31 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Fri Dec 2
01:00:01 2005
@@ -579,7 +579,7 @@
.wait_until_sent = xencons_wait_until_sent,
};
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
static const char *xennullcon_startup(void)
{
return NULL;
diff -r 36f09499bd8c -r 43582de050c6
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Thu Dec 1 11:15:31 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Fri Dec 2 01:00:01 2005
@@ -50,9 +50,9 @@
struct per_user_data {
/* Notification ring, accessed via /dev/xen/evtchn. */
-#define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */
+#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
- u16 *ring;
+ evtchn_port_t *ring;
unsigned int ring_cons, ring_prod, ring_overflow;
/* Processes wait on this queue when ring is empty. */
@@ -75,7 +75,7 @@
if ((u = port_user[port]) != NULL) {
if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
- u->ring[EVTCHN_RING_MASK(u->ring_prod)] = (u16)port;
+ u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
if (u->ring_cons == u->ring_prod++) {
wake_up_interruptible(&u->evtchn_wait);
kill_fasync(&u->evtchn_async_queue,
@@ -94,52 +94,40 @@
{
int rc;
unsigned int c, p, bytes1 = 0, bytes2 = 0;
- DECLARE_WAITQUEUE(wait, current);
struct per_user_data *u = file->private_data;
- add_wait_queue(&u->evtchn_wait, &wait);
-
- count &= ~1; /* even number of bytes */
-
- if (count == 0) {
- rc = 0;
- goto out;
- }
+ /* Whole number of ports. */
+ count &= ~(sizeof(evtchn_port_t)-1);
+
+ if (count == 0)
+ return 0;
if (count > PAGE_SIZE)
count = PAGE_SIZE;
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
+ if (u->ring_overflow)
+ return -EFBIG;
if ((c = u->ring_cons) != (p = u->ring_prod))
break;
- if (u->ring_overflow) {
- rc = -EFBIG;
- goto out;
- }
-
- if (file->f_flags & O_NONBLOCK) {
- rc = -EAGAIN;
- goto out;
- }
-
- if (signal_pending(current)) {
- rc = -ERESTARTSYS;
- goto out;
- }
-
- schedule();
+ if (file->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ rc = wait_event_interruptible(
+ u->evtchn_wait, u->ring_cons != u->ring_prod);
+ if (rc)
+ return rc;
}
/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
- sizeof(u16);
- bytes2 = EVTCHN_RING_MASK(p) * sizeof(u16);
+ sizeof(evtchn_port_t);
+ bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
} else {
- bytes1 = (p - c) * sizeof(u16);
+ bytes1 = (p - c) * sizeof(evtchn_port_t);
bytes2 = 0;
}
@@ -153,32 +141,26 @@
if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
((bytes2 != 0) &&
- copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) {
- rc = -EFAULT;
- goto out;
- }
-
- u->ring_cons += (bytes1 + bytes2) / sizeof(u16);
-
- rc = bytes1 + bytes2;
-
- out:
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&u->evtchn_wait, &wait);
- return rc;
+ copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
+ return -EFAULT;
+
+ u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+
+ return bytes1 + bytes2;
}
static ssize_t evtchn_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
int rc, i;
- u16 *kbuf = (u16 *)__get_free_page(GFP_KERNEL);
+ evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
struct per_user_data *u = file->private_data;
if (kbuf == NULL)
return -ENOMEM;
- count &= ~1; /* even number of bytes */
+ /* Whole number of ports. */
+ count &= ~(sizeof(evtchn_port_t)-1);
if (count == 0) {
rc = 0;
@@ -194,7 +176,7 @@
}
spin_lock_irq(&port_user_lock);
- for (i = 0; i < (count/2); i++)
+ for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
unmask_evtchn(kbuf[i]);
spin_unlock_irq(&port_user_lock);
@@ -379,8 +361,8 @@
memset(u, 0, sizeof(*u));
init_waitqueue_head(&u->evtchn_wait);
- if ((u->ring = (u16 *)__get_free_page(GFP_KERNEL)) == NULL)
- {
+ u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+ if (u->ring == NULL) {
kfree(u);
return -ENOMEM;
}
@@ -400,8 +382,7 @@
free_page((unsigned long)u->ring);
- for (i = 0; i < NR_EVENT_CHANNELS; i++)
- {
+ for (i = 0; i < NR_EVENT_CHANNELS; i++) {
int ret;
if (port_user[i] != u)
continue;
@@ -447,10 +428,9 @@
spin_lock_init(&port_user_lock);
memset(port_user, 0, sizeof(port_user));
- /* (DEVFS) create '/dev/misc/evtchn'. */
+ /* Create '/dev/misc/evtchn'. */
err = misc_register(&evtchn_miscdev);
- if (err != 0)
- {
+ if (err != 0) {
printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
return err;
}
diff -r 36f09499bd8c -r 43582de050c6
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Thu Dec 1
11:15:31 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Dec 2
01:00:01 2005
@@ -43,9 +43,6 @@
static gnttab_transfer_t grant_rx_op[MAX_PENDING_REQS];
static unsigned char rx_notify[NR_IRQS];
-
-/* Don't currently gate addition of an interface to the tx scheduling list. */
-#define tx_work_exists(_if) (1)
static unsigned long mmap_vstart;
#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
@@ -377,25 +374,22 @@
* aggressive in avoiding new-packet notifications -- frontend only needs to
* send a notification if there are no outstanding unreceived responses.
* If we may be buffer transmit buffers for any reason then we must be rather
- * more conservative and advertise that we are 'sleeping' this connection here.
+ * more conservative and treat this as the final check for pending work.
*/
void netif_schedule_work(netif_t *netif)
{
- if (RING_HAS_UNCONSUMED_REQUESTS(&netif->tx)) {
+ int more_to_do;
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+ more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
+#else
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+#endif
+
+ if (more_to_do) {
add_to_net_schedule_list_tail(netif);
maybe_schedule_tx_action();
}
-#ifndef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
- else {
- netif->tx.sring->server_is_sleeping = 1;
- mb();
- if (RING_HAS_UNCONSUMED_REQUESTS(&netif->tx)) {
- netif->tx.sring->server_is_sleeping = 0;
- add_to_net_schedule_list_tail(netif);
- maybe_schedule_tx_action();
- }
- }
-#endif
}
void netif_deschedule_work(netif_t *netif)
@@ -447,26 +441,6 @@
pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
- /*
- * Scheduling checks must happen after the above response is
- * posted. This avoids a possible race with a guest OS on
- * another CPU if that guest is testing against 'resp_prod'
- * when deciding whether to notify us when it queues additional
- * packets.
- */
- mb();
-
- if (RING_HAS_UNCONSUMED_REQUESTS(&netif->tx)) {
- add_to_net_schedule_list_tail(netif);
- } else {
- netif->tx.sring->server_is_sleeping = 1;
- mb();
- if (RING_HAS_UNCONSUMED_REQUESTS(&netif->tx)) {
- netif->tx.sring->server_is_sleeping = 0;
- add_to_net_schedule_list_tail(netif);
- }
- }
-
netif_put(netif);
}
}
@@ -482,7 +456,7 @@
RING_IDX i;
gnttab_map_grant_ref_t *mop;
unsigned int data_len;
- int ret;
+ int ret, work_to_do;
if (dealloc_cons != dealloc_prod)
net_tx_action_dealloc();
@@ -496,8 +470,8 @@
netif_get(netif);
remove_from_net_schedule_list(netif);
- /* Work to do? */
- if (!RING_HAS_UNCONSUMED_REQUESTS(&netif->tx)) {
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+ if (!work_to_do) {
netif_put(netif);
continue;
}
@@ -695,10 +669,8 @@
irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
{
netif_t *netif = dev_id;
- if (tx_work_exists(netif)) {
- add_to_net_schedule_list_tail(netif);
- maybe_schedule_tx_action();
- }
+ add_to_net_schedule_list_tail(netif);
+ maybe_schedule_tx_action();
return IRQ_HANDLED;
}
@@ -708,17 +680,25 @@
{
RING_IDX i = netif->tx.rsp_prod_pvt;
netif_tx_response_t *resp;
+ int notify;
resp = RING_GET_RESPONSE(&netif->tx, i);
resp->id = id;
resp->status = st;
- wmb();
+
netif->tx.rsp_prod_pvt = ++i;
- RING_PUSH_RESPONSES(&netif->tx);
-
- mb(); /* Update producer before checking event threshold. */
- if (i == netif->tx.sring->rsp_event)
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+ if (notify)
notify_remote_via_irq(netif->irq);
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+ if (i == netif->tx.req_cons) {
+ int more_to_do;
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+ if (more_to_do)
+ add_to_net_schedule_list_tail(netif);
+ }
+#endif
}
static int make_rx_response(netif_t *netif,
@@ -730,6 +710,7 @@
{
RING_IDX i = netif->rx.rsp_prod_pvt;
netif_rx_response_t *resp;
+ int notify;
resp = RING_GET_RESPONSE(&netif->rx, i);
resp->offset = offset;
@@ -738,12 +719,11 @@
resp->status = (s16)size;
if (st < 0)
resp->status = (s16)st;
- wmb();
+
netif->rx.rsp_prod_pvt = ++i;
- RING_PUSH_RESPONSES(&netif->rx);
-
- mb(); /* Update producer before checking event threshold. */
- return (i == netif->rx.sring->rsp_event);
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
+
+ return notify;
}
static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
diff -r 36f09499bd8c -r 43582de050c6
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Dec 1
11:15:31 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Dec 2
01:00:01 2005
@@ -616,6 +616,7 @@
RING_IDX i;
grant_ref_t ref;
unsigned long mfn;
+ int notify;
if (unlikely(np->tx_full)) {
printk(KERN_ALERT "%s: full queue wasn't stopped!\n",
@@ -661,9 +662,10 @@
tx->size = skb->len;
tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
- wmb(); /* Ensure that backend will see the request. */
np->tx.req_prod_pvt = i + 1;
- RING_PUSH_REQUESTS(&np->tx);
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
+ if (notify)
+ notify_remote_via_irq(np->irq);
network_tx_buf_gc(dev);
@@ -676,13 +678,6 @@
np->stats.tx_bytes += skb->len;
np->stats.tx_packets++;
-
- /* Only notify Xen if we really have to. */
- mb();
- if (np->tx.sring->server_is_sleeping) {
- np->tx.sring->server_is_sleeping = 0;
- notify_remote_via_irq(np->irq);
- }
return 0;
@@ -761,7 +756,8 @@
rx->id, rx->status);
RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id =
rx->id;
- wmb();
+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref =
+ ref;
np->rx.req_prod_pvt++;
RING_PUSH_REQUESTS(&np->rx);
work_done--;
@@ -882,14 +878,9 @@
if (work_done < budget) {
local_irq_save(flags);
- np->rx.sring->rsp_event = i + 1;
-
- /* Deal with hypervisor racing our resetting of rx_event. */
- mb();
- if (np->rx.sring->rsp_prod == i) {
+ RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
+ if (!more_to_do)
__netif_rx_complete(dev);
- more_to_do = 0;
- }
local_irq_restore(flags);
}
@@ -930,7 +921,6 @@
/* Step 1: Reinitialise variables. */
np->tx_full = 0;
- np->rx.sring->rsp_event = np->tx.sring->rsp_event = 1;
/*
* Step 2: Rebuild the RX and TX ring contents.
@@ -972,7 +962,7 @@
np->stats.tx_bytes += skb->len;
np->stats.tx_packets++;
}
- wmb();
+
np->tx.req_prod_pvt = requeue_idx;
RING_PUSH_REQUESTS(&np->tx);
@@ -987,7 +977,7 @@
RING_GET_REQUEST(&np->rx, requeue_idx)->id = i;
requeue_idx++;
}
- wmb();
+
np->rx.req_prod_pvt = requeue_idx;
RING_PUSH_REQUESTS(&np->rx);
@@ -998,7 +988,6 @@
* packets.
*/
np->backend_state = BEST_CONNECTED;
- wmb();
notify_remote_via_irq(np->irq);
network_tx_buf_gc(dev);
diff -r 36f09499bd8c -r 43582de050c6 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Thu Dec 1 11:15:31 2005
+++ b/tools/console/daemon/io.c Fri Dec 2 01:00:01 2005
@@ -62,7 +62,7 @@
struct domain *next;
char *conspath;
int ring_ref;
- int local_port;
+ evtchn_port_t local_port;
int evtchn_fd;
struct xencons_interface *interface;
};
@@ -488,7 +488,7 @@
static void handle_ring_read(struct domain *dom)
{
- uint16_t v;
+ evtchn_port_t v;
if (!read_sync(dom->evtchn_fd, &v, sizeof(v)))
return;
diff -r 36f09499bd8c -r 43582de050c6 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c Thu Dec 1 11:15:31 2005
+++ b/tools/ioemu/target-i386-dm/helper2.c Fri Dec 2 01:00:01 2005
@@ -125,7 +125,7 @@
//the evtchn port for polling the notification,
//should be inputed as bochs's parameter
-uint16_t ioreq_remote_port, ioreq_local_port;
+evtchn_port_t ioreq_remote_port, ioreq_local_port;
//some functions to handle the io req packet
void sp_info()
@@ -170,12 +170,12 @@
ioreq_t* cpu_get_ioreq(void)
{
int rc;
- uint16_t port;
+ evtchn_port_t port;
rc = read(evtchn_fd, &port, sizeof(port));
if ((rc == sizeof(port)) && (port == ioreq_local_port)) {
// unmask the wanted port again
- write(evtchn_fd, &ioreq_local_port, 2);
+ write(evtchn_fd, &ioreq_local_port, sizeof(port));
//get the io packet from shared memory
return __cpu_get_ioreq();
diff -r 36f09499bd8c -r 43582de050c6 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Thu Dec 1 11:15:31 2005
+++ b/tools/ioemu/vl.c Fri Dec 2 01:00:01 2005
@@ -2907,7 +2907,7 @@
break;
case QEMU_OPTION_p:
{
- extern uint16_t ioreq_remote_port;
+ extern evtchn_port_t ioreq_remote_port;
ioreq_remote_port = atoi(optarg);
fprintf(logfile, "eport: %d\n", ioreq_remote_port);
}
diff -r 36f09499bd8c -r 43582de050c6 tools/libxc/xc_evtchn.c
--- a/tools/libxc/xc_evtchn.c Thu Dec 1 11:15:31 2005
+++ b/tools/libxc/xc_evtchn.c Fri Dec 2 01:00:01 2005
@@ -51,7 +51,7 @@
int xc_evtchn_status(int xc_handle,
uint32_t dom,
- int port,
+ evtchn_port_t port,
xc_evtchn_status_t *status)
{
int rc;
diff -r 36f09499bd8c -r 43582de050c6 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Thu Dec 1 11:15:31 2005
+++ b/tools/libxc/xc_linux_save.c Fri Dec 2 01:00:01 2005
@@ -677,7 +677,7 @@
live_shinfo->arch.pfn_to_mfn_frame_list_list);
if (!live_p2m_frame_list_list) {
- ERR("Couldn't map p2m_frame_list_list");
+ ERR("Couldn't map p2m_frame_list_list (errno %d)", errno);
goto out;
}
diff -r 36f09499bd8c -r 43582de050c6 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Thu Dec 1 11:15:31 2005
+++ b/tools/libxc/xenctrl.h Fri Dec 2 01:00:01 2005
@@ -334,7 +334,7 @@
int xc_evtchn_status(int xc_handle,
uint32_t dom, /* may be DOMID_SELF */
- int port,
+ evtchn_port_t port,
xc_evtchn_status_t *status);
int xc_physdev_pci_access_modify(int xc_handle,
diff -r 36f09499bd8c -r 43582de050c6 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Thu Dec 1 11:15:31 2005
+++ b/tools/python/xen/xend/XendCheckpoint.py Fri Dec 2 01:00:01 2005
@@ -40,10 +40,18 @@
raise XendError(errmsg)
def read_exact(fd, size, errmsg):
- buf = os.read(fd, size)
- if len(buf) != size:
- raise XendError(errmsg)
+ buf = ''
+ while size != 0:
+ str = os.read(fd, size)
+ if not len(str):
+ log.error("read_exact: EOF trying to read %d (buf='%s')" % \
+ (size, buf))
+ raise XendError(errmsg)
+ size = size - len(str)
+ buf = buf + str
return buf
+
+
def save(fd, dominfo, live):
write_exact(fd, SIGNATURE, "could not write guest state file: signature")
diff -r 36f09499bd8c -r 43582de050c6 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Thu Dec 1 11:15:31 2005
+++ b/tools/python/xen/xend/XendDomain.py Fri Dec 2 01:00:01 2005
@@ -397,6 +397,7 @@
raise XendError("can't connect: %s" % err[1])
sock.send("receive\n")
+ sock.recv(80)
XendCheckpoint.save(sock.fileno(), dominfo, live)
diff -r 36f09499bd8c -r 43582de050c6 tools/xenstore/fake_libxc.c
--- a/tools/xenstore/fake_libxc.c Thu Dec 1 11:15:31 2005
+++ b/tools/xenstore/fake_libxc.c Fri Dec 2 01:00:01 2005
@@ -34,7 +34,7 @@
static int sigfd;
static int xs_test_pid;
-static uint16_t port;
+static evtchn_port_t port;
/* The event channel maps to a signal, shared page to an mmapped file. */
void evtchn_notify(int local_port)
diff -r 36f09499bd8c -r 43582de050c6 tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Thu Dec 1 11:15:31 2005
+++ b/tools/xenstore/xenstored_domain.c Fri Dec 2 01:00:01 2005
@@ -41,7 +41,7 @@
#include <xen/linux/evtchn.h>
static int *xc_handle;
-static int virq_port;
+static evtchn_port_t virq_port;
int eventchn_fd = -1;
@@ -53,11 +53,11 @@
unsigned int domid;
/* Event channel port */
- uint16_t port;
+ evtchn_port_t port;
/* The remote end of the event channel, used only to validate
repeated domain introductions. */
- uint16_t remote_port;
+ evtchn_port_t remote_port;
/* The mfn associated with the event channel, used only to validate
repeated domain introductions. */
@@ -224,7 +224,7 @@
/* We scan all domains rather than use the information given here. */
void handle_event(void)
{
- uint16_t port;
+ evtchn_port_t port;
if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port))
barf_perror("Failed to read from event fd");
@@ -314,7 +314,7 @@
char *vec[3];
unsigned int domid;
unsigned long mfn;
- uint16_t port;
+ evtchn_port_t port;
if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
send_error(conn, EINVAL);
@@ -460,7 +460,8 @@
static int dom0_init(void)
{
- int rc, fd, port;
+ int rc, fd;
+ evtchn_port_t port;
unsigned long mfn;
char str[20];
struct domain *dom0;
diff -r 36f09499bd8c -r 43582de050c6 xen/common/sched_bvt.c
--- a/xen/common/sched_bvt.c Thu Dec 1 11:15:31 2005
+++ b/xen/common/sched_bvt.c Fri Dec 2 01:00:01 2005
@@ -67,6 +67,7 @@
#define MCU (s32)MICROSECS(100) /* Minimum unit */
#define MCU_ADVANCE 10 /* default weight */
#define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
+#define CTX_MIN (s32)MICROSECS(10) /* Low limit for ctx_allow */
static s32 ctx_allow = (s32)MILLISECS(5); /* context switch allowance */
static inline void __add_to_runqueue_head(struct vcpu *d)
@@ -297,7 +298,11 @@
if ( cmd->direction == SCHED_INFO_PUT )
ctx_allow = params->ctx_allow;
else
+ {
+ if ( ctx_allow < CTX_MIN )
+ ctx_allow = CTX_MIN;
params->ctx_allow = ctx_allow;
+ }
return 0;
}
diff -r 36f09499bd8c -r 43582de050c6 xen/include/public/event_channel.h
--- a/xen/include/public/event_channel.h Thu Dec 1 11:15:31 2005
+++ b/xen/include/public/event_channel.h Fri Dec 2 01:00:01 2005
@@ -8,6 +8,8 @@
#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
#define __XEN_PUBLIC_EVENT_CHANNEL_H__
+
+typedef uint32_t evtchn_port_t;
/*
* EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
@@ -20,9 +22,9 @@
#define EVTCHNOP_alloc_unbound 6
typedef struct evtchn_alloc_unbound {
/* IN parameters */
- domid_t dom, remote_dom;
+ domid_t dom, remote_dom;
/* OUT parameters */
- uint32_t port;
+ evtchn_port_t port;
} evtchn_alloc_unbound_t;
/*
@@ -37,10 +39,10 @@
#define EVTCHNOP_bind_interdomain 0
typedef struct evtchn_bind_interdomain {
/* IN parameters. */
- domid_t remote_dom;
- uint32_t remote_port;
+ domid_t remote_dom;
+ evtchn_port_t remote_port;
/* OUT parameters. */
- uint32_t local_port;
+ evtchn_port_t local_port;
} evtchn_bind_interdomain_t;
/*
@@ -57,7 +59,7 @@
uint32_t virq;
uint32_t vcpu;
/* OUT parameters. */
- uint32_t port;
+ evtchn_port_t port;
} evtchn_bind_virq_t;
/*
@@ -73,7 +75,7 @@
#define BIND_PIRQ__WILL_SHARE 1
uint32_t flags; /* BIND_PIRQ__* */
/* OUT parameters. */
- uint32_t port;
+ evtchn_port_t port;
} evtchn_bind_pirq_t;
/*
@@ -86,7 +88,7 @@
typedef struct evtchn_bind_ipi {
uint32_t vcpu;
/* OUT parameters. */
- uint32_t port;
+ evtchn_port_t port;
} evtchn_bind_ipi_t;
/*
@@ -97,7 +99,7 @@
#define EVTCHNOP_close 3
typedef struct evtchn_close {
/* IN parameters. */
- uint32_t port;
+ evtchn_port_t port;
} evtchn_close_t;
/*
@@ -107,7 +109,7 @@
#define EVTCHNOP_send 4
typedef struct evtchn_send {
/* IN parameters. */
- uint32_t port;
+ evtchn_port_t port;
} evtchn_send_t;
/*
@@ -122,7 +124,7 @@
typedef struct evtchn_status {
/* IN parameters */
domid_t dom;
- uint32_t port;
+ evtchn_port_t port;
/* OUT parameters */
#define EVTCHNSTAT_closed 0 /* Channel is not in use. */
#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/
@@ -134,11 +136,11 @@
uint32_t vcpu; /* VCPU to which this channel is bound. */
union {
struct {
- domid_t dom;
+ domid_t dom;
} unbound; /* EVTCHNSTAT_unbound */
struct {
- domid_t dom;
- uint32_t port;
+ domid_t dom;
+ evtchn_port_t port;
} interdomain; /* EVTCHNSTAT_interdomain */
uint32_t pirq; /* EVTCHNSTAT_pirq */
uint32_t virq; /* EVTCHNSTAT_virq */
@@ -158,7 +160,7 @@
#define EVTCHNOP_bind_vcpu 8
typedef struct evtchn_bind_vcpu {
/* IN parameters. */
- uint32_t port;
+ evtchn_port_t port;
uint32_t vcpu;
} evtchn_bind_vcpu_t;
diff -r 36f09499bd8c -r 43582de050c6 xen/include/public/io/blkif.h
--- a/xen/include/public/io/blkif.h Thu Dec 1 11:15:31 2005
+++ b/xen/include/public/io/blkif.h Fri Dec 2 01:00:01 2005
@@ -10,6 +10,19 @@
#define __XEN_PUBLIC_IO_BLKIF_H__
#include "ring.h"
+
+/*
+ * Front->back notifications: When enqueuing a new request, there is no
+ * need to send a notification if there are old requests still in flight
+ * (that is, old_req_prod != sring->rsp_prod). The backend guarantees to check
+ * for new requests after queuing the response for the last in-flight request.
+ * (NB. The generic req_event mechanism is not used for blk requests.)
+ *
+ * Back->front notifications: When enqueuing a new response, sending a
+ * notification can be made conditional on rsp_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Frontends must set
+ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
+ */
#ifndef blkif_vdev_t
#define blkif_vdev_t uint16_t
diff -r 36f09499bd8c -r 43582de050c6 xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h Thu Dec 1 11:15:31 2005
+++ b/xen/include/public/io/netif.h Fri Dec 2 01:00:01 2005
@@ -10,6 +10,13 @@
#define __XEN_PUBLIC_IO_NETIF_H__
#include "ring.h"
+
+/*
+ * Note that there is *never* any need to notify the backend when enqueuing
+ * receive requests (netif_rx_request_t). Notifications after enqueuing any
+ * other type of message should be conditional on the appropriate req_event
+ * or rsp_event field in the shared ring.
+ */
typedef struct netif_tx_request {
grant_ref_t gref; /* Reference to buffer page */
diff -r 36f09499bd8c -r 43582de050c6 xen/include/public/io/ring.h
--- a/xen/include/public/io/ring.h Thu Dec 1 11:15:31 2005
+++ b/xen/include/public/io/ring.h Fri Dec 2 01:00:01 2005
@@ -1,10 +1,10 @@
-
-
-
-/*
+/******************************************************************************
+ * ring.h
+ *
* Shared producer-consumer ring macros.
+ *
* Tim Deegan and Andrew Warfield November 2004.
- */
+ */
#ifndef __XEN_PUBLIC_IO_RING_H__
#define __XEN_PUBLIC_IO_RING_H__
@@ -28,32 +28,35 @@
(__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
/*
- * Macros to make the correct C datatypes for a new kind of ring.
- *
- * To make a new ring datatype, you need to have two message structures,
- * let's say request_t, and response_t already defined.
- *
- * In a header where you want the ring datatype declared, you then do:
+ * Macros to make the correct C datatypes for a new kind of ring.
+ *
+ * To make a new ring datatype, you need to have two message structures,
+ * let's say request_t, and response_t already defined.
+ *
+ * In a header where you want the ring datatype declared, you then do:
*
* DEFINE_RING_TYPES(mytag, request_t, response_t);
*
- * These expand out to give you a set of types, as you can see below.
- * The most important of these are:
+ * These expand out to give you a set of types, as you can see below.
+ * The most important of these are:
*
* mytag_sring_t - The shared ring.
* mytag_front_ring_t - The 'front' half of the ring.
* mytag_back_ring_t - The 'back' half of the ring.
*
- * To initialize a ring in your code you need to know the location and size
- * of the shared memory area (PAGE_SIZE, for instance). To initialise
- * the front half:
- *
- * mytag_front_ring_t front_ring;
- *
- * SHARED_RING_INIT((mytag_sring_t *)shared_page);
- * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
- *
- * Initializing the back follows similarly...
+ * To initialize a ring in your code you need to know the location and size
+ * of the shared memory area (PAGE_SIZE, for instance). To initialise
+ * the front half:
+ *
+ * mytag_front_ring_t front_ring;
+ * SHARED_RING_INIT((mytag_sring_t *)shared_page);
+ * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *
+ * Initializing the back follows similarly (note that only the front
+ * initializes the shared ring):
+ *
+ * mytag_back_ring_t back_ring;
+ * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
*/
#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \
@@ -66,10 +69,8 @@
\
/* Shared ring page */ \
struct __name##_sring { \
- RING_IDX req_prod; \
- RING_IDX rsp_prod; \
- RING_IDX rsp_event; /* notify client when rsp_prod == rsp_event */ \
- uint8_t server_is_sleeping; /* notify server to kick off work */ \
+ RING_IDX req_prod, req_event; \
+ RING_IDX rsp_prod, rsp_event; \
union __name##_sring_entry ring[1]; /* variable-length */ \
}; \
\
@@ -95,24 +96,24 @@
typedef struct __name##_back_ring __name##_back_ring_t
/*
- * Macros for manipulating rings.
- *
- * FRONT_RING_whatever works on the "front end" of a ring: here
- * requests are pushed on to the ring and responses taken off it.
- *
- * BACK_RING_whatever works on the "back end" of a ring: here
- * requests are taken off the ring and responses put on.
- *
- * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
- * This is OK in 1-for-1 request-response situations where the
- * requestor (front end) never has more than RING_SIZE()-1
- * outstanding requests.
+ * Macros for manipulating rings.
+ *
+ * FRONT_RING_whatever works on the "front end" of a ring: here
+ * requests are pushed on to the ring and responses taken off it.
+ *
+ * BACK_RING_whatever works on the "back end" of a ring: here
+ * requests are taken off the ring and responses put on.
+ *
+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
+ * This is OK in 1-for-1 request-response situations where the
+ * requestor (front end) never has more than RING_SIZE()-1
+ * outstanding requests.
*/
/* Initialising empty rings */
#define SHARED_RING_INIT(_s) do { \
- (_s)->req_prod = 0; \
- (_s)->rsp_prod = 0; \
+ (_s)->req_prod = (_s)->rsp_prod = 0; \
+ (_s)->req_event = (_s)->rsp_event = 1; \
} while(0)
#define FRONT_RING_INIT(_r, _s, __size) do { \
@@ -148,10 +149,6 @@
#define RING_SIZE(_r) \
((_r)->nr_ents)
-/* How many empty slots are on a ring? */
-#define RING_PENDING_REQUESTS(_r) \
- ( ((_r)->req_prod_pvt - (_r)->rsp_cons) )
-
/* Test if there is an empty slot available on the front ring.
* (This is only meaningful from the front. )
*/
@@ -167,25 +164,6 @@
(((_r)->req_cons - (_r)->rsp_prod_pvt) != \
RING_SIZE(_r)) )
-/* Test if there are messages waiting to be pushed. */
-#define RING_HAS_UNPUSHED_REQUESTS(_r) \
- ( (_r)->req_prod_pvt != (_r)->sring->req_prod )
-
-#define RING_HAS_UNPUSHED_RESPONSES(_r) \
- ( (_r)->rsp_prod_pvt != (_r)->sring->rsp_prod )
-
-/* Copy the private producer pointer into the shared ring so the other end
- * can see the updates we've made. */
-#define RING_PUSH_REQUESTS(_r) do { \
- wmb(); \
- (_r)->sring->req_prod = (_r)->req_prod_pvt; \
-} while (0)
-
-#define RING_PUSH_RESPONSES(_r) do { \
- wmb(); \
- (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \
-} while (0)
-
/* Direct access to individual ring elements, by index. */
#define RING_GET_REQUEST(_r, _idx) \
(&((_r)->sring->ring[ \
@@ -201,6 +179,82 @@
#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \
(((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
+#define RING_PUSH_REQUESTS(_r) do { \
+ wmb(); /* back sees requests /before/ updated producer index */ \
+ (_r)->sring->req_prod = (_r)->req_prod_pvt; \
+} while (0)
+
+#define RING_PUSH_RESPONSES(_r) do { \
+ wmb(); /* front sees responses /before/ updated producer index */ \
+ (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \
+} while (0)
+
+/*
+ * Notification hold-off (req_event and rsp_event):
+ *
+ * When queueing requests or responses on a shared ring, it may not always be
+ * necessary to notify the remote end. For example, if requests are in flight
+ * in a backend, the front may be able to queue further requests without
+ * notifying the back (if the back checks for new requests when it queues
+ * responses).
+ *
+ * When enqueuing requests or responses:
+ *
+ * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
+ * is a boolean return value. True indicates that the receiver requires an
+ * asynchronous notification.
+ *
+ * After dequeuing requests or responses (before sleeping the connection):
+ *
+ * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
+ * The second argument is a boolean return value. True indicates that there
+ * are pending messages on the ring (i.e., the connection should not be put
+ * to sleep).
+ *
+ * These macros will set the req_event/rsp_event field to trigger a
+ * notification on the very next message that is enqueued. If you want to
+ * create batches of work (i.e., only receive a notification after several
+ * messages have been enqueued) then you will need to create a customised
+ * version of the FINAL_CHECK macro in your own code, which sets the event
+ * field appropriately.
+ */
+
+#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \
+ RING_IDX __old = (_r)->sring->req_prod; \
+ RING_IDX __new = (_r)->req_prod_pvt; \
+ wmb(); /* back sees requests /before/ updated producer index */ \
+ (_r)->sring->req_prod = __new; \
+ mb(); /* back sees new requests /before/ we check req_event */ \
+ (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \
+ (RING_IDX)(__new - __old)); \
+} while (0)
+
+#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \
+ RING_IDX __old = (_r)->sring->rsp_prod; \
+ RING_IDX __new = (_r)->rsp_prod_pvt; \
+ wmb(); /* front sees responses /before/ updated producer index */ \
+ (_r)->sring->rsp_prod = __new; \
+ mb(); /* front sees new responses /before/ we check rsp_event */ \
+ (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \
+ (RING_IDX)(__new - __old)); \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \
+ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
+ if (_work_to_do) break; \
+ (_r)->sring->req_event = (_r)->req_cons + 1; \
+ mb(); \
+ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \
+ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
+ if (_work_to_do) break; \
+ (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \
+ mb(); \
+ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
+} while (0)
+
#endif /* __XEN_PUBLIC_IO_RING_H__ */
/*
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|