Thought I should point out this proposed blkback patch
as it is fairly extensive and may cause problems for
Xen/ia64 multiple domains. Perhaps Kevin or Matt can
look it over before it gets applied? On quick glance I see that
the removal of xen_init() is probably a mistake.
http://lists.xensource.com/archives/html/xen-devel/2005-11/msg00371.html
====
> Does 'xm save / xm restore' work with this patch (at least as well as
it
> currently does ;-)?
xm save/restore still doesn't work for me, neither with the sparse tree
nor with the linux-2.6 repository, so I can't try. I can't see any
reason why it should become worse with that patch though.
I've resynced the blkback threading patch with the latest sparse tree,
here we are. Changes:
* One thread per blkif. The I/O scheduler can do a better job that
way, also you can use ionice on the blkback threads to adjust the
block I/O priorities for the domain.
* Various stuff has been moved from global variables into blkif_t.
* The scary allocation ring for pending_req's is gone and has been
replaced by a free list.
* made dispatch_rw_block_io() reentrant.
* general linux coding style cleanup, at least for the code I've
touched anyway.
* number of outstanding requests is runtime-configurable now.
* made the ia64 #ifdefs smaller and dropped one. It should still
work on ia64 in theory, but would be great if the ia64 folks
can have a look ...
cheers,
Gerd
diff -r abbe3df33774 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue Nov 8
17:39:58 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Wed Nov 9
13:45:37 2005
@@ -12,6 +12,8 @@
*/
#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
#include <asm-xen/balloon.h>
#include <asm/hypervisor.h>
#include "common.h"
@@ -21,26 +23,21 @@
* pulled from a communication ring are quite likely to end up being
part of
* the same scatter/gather request at the disc.
*
- * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
+ *
* This will increase the chances of being able to write whole tracks.
* 64 should be enough to keep us competitive with Linux.
*/
-#define MAX_PENDING_REQS 64
-#define BATCH_PER_DOMAIN 16
-
-static unsigned long mmap_vstart;
-#define MMAP_PAGES \
- (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#ifdef __ia64__
-static void *pending_vaddrs[MMAP_PAGES];
-#define MMAP_VADDR(_idx, _i) \
- (unsigned long)(pending_vaddrs[((_idx) *
BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
-#else
-#define MMAP_VADDR(_req,_seg) \
- (mmap_vstart + \
- ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
- ((_seg) * PAGE_SIZE))
-#endif
+static int blkif_reqs = 64;
+static int mmap_pages;
+
+static int __init set_blkif_reqs(char *str)
+{
+ get_option(&str, &blkif_reqs);
+ return 1;
+}
+__setup("blkif_reqs=", set_blkif_reqs);
+
/*
* Each outstanding request that we've passed to the lower device
layers has a
@@ -55,43 +52,38 @@
atomic_t pendcnt;
unsigned short operation;
int status;
+ struct list_head free_list;
} pending_req_t;
-/*
- * We can't allocate pending_req's in order, since they may complete
out of
- * order. We therefore maintain an allocation ring. This ring also
indicates
- * when enough work has been passed down -- at that point the
allocation ring
- * will be empty.
- */
-static pending_req_t pending_reqs[MAX_PENDING_REQS];
-static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
-/* NB. We use a different index type to differentiate from shared blk
rings. */
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod +
pending_cons)
-
-static request_queue_t *plugged_queue;
-static inline void flush_plugged_queue(void)
-{
- request_queue_t *q = plugged_queue;
- if (q != NULL) {
- if ( q->unplug_fn != NULL )
- q->unplug_fn(q);
- blk_put_queue(q);
- plugged_queue = NULL;
- }
-}
+static pending_req_t *pending_reqs;
+static struct list_head pending_free;
+static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
+
+#define BLKBACK_INVALID_HANDLE (0xFFFF)
+
+static unsigned long mmap_vstart;
+static unsigned long *pending_vaddrs;
+static u16 *pending_grant_handles;
+
+static inline int vaddr_pagenr(pending_req_t *req, int seg)
+{
+ return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
+}
+
+static inline unsigned long vaddr(pending_req_t *req, int seg)
+{
+ return pending_vaddrs[vaddr_pagenr(req, seg)];
+}
+
+#define pending_handle(_req, _seg) \
+ (pending_grant_handles[vaddr_pagenr(_req, _seg)])
+
/* When using grant tables to map a frame for device access then the
* handle returned must be used to unmap the frame. This is needed to
* drop the ref count on the frame.
*/
-static u16 pending_grant_handles[MMAP_PAGES];
-#define pending_handle(_idx, _i) \
- (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) +
(_i)])
-#define BLKBACK_INVALID_HANDLE (0xFFFF)
#ifdef CONFIG_XEN_BLKDEV_TAP_BE
/*
@@ -105,26 +97,79 @@
static inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16);
}
#endif
-static int do_block_io_op(blkif_t *blkif, int max_to_do);
-static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
+static int do_block_io_op(blkif_t *blkif);
+static void dispatch_rw_block_io(blkif_t *blkif,
+ blkif_request_t *req,
+ pending_req_t *pending_req);
static void make_response(blkif_t *blkif, unsigned long id,
unsigned short op, int st);
-static void fast_flush_area(int idx, int nr_pages)
+/******************************************************************
+ * misc small helpers
+ */
+static pending_req_t* alloc_req(void)
+{
+ pending_req_t *req = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pending_free_lock, flags);
+ if (!list_empty(&pending_free)) {
+ req = list_entry(pending_free.next, pending_req_t, free_list);
+ list_del(&req->free_list);
+ }
+ spin_unlock_irqrestore(&pending_free_lock, flags);
+ return req;
+}
+
+static void free_req(pending_req_t *req)
+{
+ unsigned long flags;
+ int was_empty;
+
+ spin_lock_irqsave(&pending_free_lock, flags);
+ was_empty = list_empty(&pending_free);
+ list_add(&req->free_list, &pending_free);
+ spin_unlock_irqrestore(&pending_free_lock, flags);
+ if (was_empty)
+ wake_up(&pending_free_wq);
+}
+
+static void unplug_queue(blkif_t *blkif)
+{
+ if (NULL == blkif->plug)
+ return;
+ if (blkif->plug->unplug_fn)
+ blkif->plug->unplug_fn(blkif->plug);
+ blk_put_queue(blkif->plug);
+ blkif->plug = NULL;
+}
+
+static void plug_queue(blkif_t *blkif, struct bio *bio)
+{
+ request_queue_t *q = bdev_get_queue(bio->bi_bdev);
+
+ if (q == blkif->plug)
+ return;
+ unplug_queue(blkif);
+ blk_get_queue(q);
+ blkif->plug = q;
+}
+
+static void fast_flush_area(pending_req_t *req)
{
struct gnttab_unmap_grant_ref
unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned int i, invcount = 0;
u16 handle;
int ret;
- for (i = 0; i < nr_pages; i++) {
- handle = pending_handle(idx, i);
+ for (i = 0; i < req->nr_pages; i++) {
+ handle = pending_handle(req, i);
if (handle == BLKBACK_INVALID_HANDLE)
continue;
- unmap[invcount].host_addr = MMAP_VADDR(idx, i);
+ unmap[invcount].host_addr = vaddr(req, i);
unmap[invcount].dev_bus_addr = 0;
unmap[invcount].handle = handle;
- pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
+ pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
invcount++;
}
@@ -133,109 +178,56 @@
BUG_ON(ret);
}
-
-/******************************************************************
- * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
- */
-
-static struct list_head blkio_schedule_list;
-static spinlock_t blkio_schedule_list_lock;
-
-static int __on_blkdev_list(blkif_t *blkif)
-{
- return blkif->blkdev_list.next != NULL;
-}
-
-static void remove_from_blkdev_list(blkif_t *blkif)
-{
- unsigned long flags;
-
- if (!__on_blkdev_list(blkif))
- return;
-
- spin_lock_irqsave(&blkio_schedule_list_lock, flags);
- if (__on_blkdev_list(blkif)) {
- list_del(&blkif->blkdev_list);
- blkif->blkdev_list.next = NULL;
- blkif_put(blkif);
- }
- spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-static void add_to_blkdev_list_tail(blkif_t *blkif)
-{
- unsigned long flags;
-
- if (__on_blkdev_list(blkif))
- return;
-
- spin_lock_irqsave(&blkio_schedule_list_lock, flags);
- if (!__on_blkdev_list(blkif) && (blkif->status == CONNECTED)) {
- list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
- blkif_get(blkif);
- }
- spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-
/******************************************************************
* SCHEDULER FUNCTIONS
*/
-static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
-
-static int blkio_schedule(void *arg)
-{
- DECLARE_WAITQUEUE(wq, current);
-
- blkif_t *blkif;
- struct list_head *ent;
-
- daemonize("xenblkd");
-
+int blkif_schedule(void *arg)
+{
+ blkif_t *blkif = arg;
+
+ blkif_get(blkif);
+ printk(KERN_DEBUG "%s: started\n", current->comm);
for (;;) {
- /* Wait for work to do. */
- add_wait_queue(&blkio_schedule_wait, &wq);
- set_current_state(TASK_INTERRUPTIBLE);
- if ( (NR_PENDING_REQS == MAX_PENDING_REQS) ||
- list_empty(&blkio_schedule_list) )
- schedule();
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&blkio_schedule_wait, &wq);
-
- /* Queue up a batch of requests. */
- while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
- !list_empty(&blkio_schedule_list)) {
- ent = blkio_schedule_list.next;
- blkif = list_entry(ent, blkif_t, blkdev_list);
- blkif_get(blkif);
- remove_from_blkdev_list(blkif);
- if (do_block_io_op(blkif, BATCH_PER_DOMAIN))
- add_to_blkdev_list_tail(blkif);
- blkif_put(blkif);
- }
-
- /* Push the batch through to disc. */
- flush_plugged_queue();
- }
-}
-
-static void maybe_trigger_blkio_schedule(void)
-{
- /*
- * Needed so that two processes, which together make the following
- * predicate true, don't both read stale values and evaluate the
- * predicate incorrectly. Incredibly unlikely to stall the scheduler
- * on x86, but...
- */
- smp_mb();
-
- if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
- !list_empty(&blkio_schedule_list))
- wake_up(&blkio_schedule_wait);
-}
-
-
+ if (!atomic_read(&blkif->io_pending)) {
+ /* Wait for work to do or requests to exit. */
+ if (kthread_should_stop())
+ break;
+ wait_event_interruptible(blkif->wq,
+ atomic_read(&blkif->io_pending) ||
+ kthread_should_stop());
+ } else if (list_empty(&pending_free)) {
+ /* Wait for pending_req becoming available. */
+ wait_event_interruptible(pending_free_wq,
+ !list_empty(&pending_free));
+ }
+
+ /* Schedule I/O */
+ atomic_set(&blkif->io_pending, 0);
+ if (do_block_io_op(blkif))
+ atomic_inc(&blkif->io_pending);
+ unplug_queue(blkif);
+
+#if 0
+ /* Print stats for performance debugging. */
+ if (time_after(jiffies, blkif->st_print)) {
+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n",
+ current->comm, blkif->st_oo_req,
+ blkif->st_rd_req, blkif->st_wr_req);
+ blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+ blkif->st_rd_req = 0;
+ blkif->st_wr_req = 0;
+ blkif->st_oo_req = 0;
+ }
+#endif
+ }
+
+ /* bye folks, and thanks for all the fish ;) */
+ printk(KERN_DEBUG "%s: exiting\n", current->comm);
+ blkif->xenblkd = NULL;
+ blkif_put(blkif);
+ return 0;
+}
/******************************************************************
* COMPLETION CALLBACK -- Called as bh->b_end_io()
@@ -243,8 +235,6 @@
static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
{
- unsigned long flags;
-
/* An error fails the entire request. */
if (!uptodate) {
DPRINTK("Buffer not up-to-date at end of operation\n");
@@ -252,15 +242,11 @@
}
if (atomic_dec_and_test(&pending_req->pendcnt)) {
- int pending_idx = pending_req - pending_reqs;
- fast_flush_area(pending_idx, pending_req->nr_pages);
+ fast_flush_area(pending_req);
make_response(pending_req->blkif, pending_req->id,
pending_req->operation,
pending_req->status);
blkif_put(pending_req->blkif);
- spin_lock_irqsave(&pend_prod_lock, flags);
- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
- spin_unlock_irqrestore(&pend_prod_lock, flags);
- maybe_trigger_blkio_schedule();
+ free_req(pending_req);
}
}
@@ -281,8 +267,10 @@
irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
{
blkif_t *blkif = dev_id;
- add_to_blkdev_list_tail(blkif);
- maybe_trigger_blkio_schedule();
+
+ atomic_inc(&blkif->io_pending);
+ if (blkif->status == CONNECTED)
+ wake_up(&blkif->wq);
return IRQ_HANDLED;
}
@@ -292,10 +280,11 @@
* DOWNWARD CALLS -- These interface with the block-device layer
proper.
*/
-static int do_block_io_op(blkif_t *blkif, int max_to_do)
+static int do_block_io_op(blkif_t *blkif)
{
blkif_back_ring_t *blk_ring = &blkif->blk_ring;
blkif_request_t *req;
+ pending_req_t *pending_req;
RING_IDX i, rp;
int more_to_do = 0;
@@ -305,24 +294,30 @@
for (i = blk_ring->req_cons;
(i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
i++) {
- if ((max_to_do-- == 0) ||
- (NR_PENDING_REQS == MAX_PENDING_REQS)) {
+
+ pending_req = alloc_req();
+ if (NULL == pending_req) {
+ blkif->st_oo_req++;
more_to_do = 1;
break;
}
-
+
req = RING_GET_REQUEST(blk_ring, i);
switch (req->operation) {
case BLKIF_OP_READ:
+ blkif->st_rd_req++;
+ dispatch_rw_block_io(blkif, req, pending_req);
+ break;
case BLKIF_OP_WRITE:
- dispatch_rw_block_io(blkif, req);
+ blkif->st_wr_req++;
+ dispatch_rw_block_io(blkif, req, pending_req);
break;
-
default:
DPRINTK("error: unknown block io operation
[%d]\n",
req->operation);
make_response(blkif, req->id, req->operation,
BLKIF_RSP_ERROR);
+ free_req(pending_req);
break;
}
}
@@ -331,13 +326,13 @@
return more_to_do;
}
-static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
+static void dispatch_rw_block_io(blkif_t *blkif,
+ blkif_request_t *req,
+ pending_req_t *pending_req)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head *
bhs[]);
int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE :
READ;
unsigned long fas = 0;
- int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
- pending_req_t *pending_req;
struct gnttab_map_grant_ref
map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct phys_req preq;
struct {
@@ -345,31 +340,35 @@
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned int nseg;
struct bio *bio = NULL,
*biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- int nbio = 0;
- request_queue_t *q;
- int ret, errors = 0;
+ int ret, i, nbio = 0;
/* Check that number of segments is sane. */
nseg = req->nr_segments;
if (unlikely(nseg == 0) ||
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
DPRINTK("Bad number of segments in request (%d)\n",
nseg);
- goto bad_descriptor;
+ goto fail_response;
}
preq.dev = req->handle;
preq.sector_number = req->sector_number;
preq.nr_sects = 0;
+ pending_req->blkif = blkif;
+ pending_req->id = req->id;
+ pending_req->operation = operation;
+ pending_req->status = BLKIF_RSP_OKAY;
+ pending_req->nr_pages = nseg;
+
for (i = 0; i < nseg; i++) {
fas = req->frame_and_sects[i];
seg[i].nsec = blkif_last_sect(fas) -
blkif_first_sect(fas) + 1;
if (seg[i].nsec <= 0)
- goto bad_descriptor;
+ goto fail_response;
preq.nr_sects += seg[i].nsec;
- map[i].host_addr = MMAP_VADDR(pending_idx, i);
+ map[i].host_addr = vaddr(pending_req, i);
map[i].dom = blkif->domid;
map[i].ref = blkif_gref_from_fas(fas);
map[i].flags = GNTMAP_host_map;
@@ -381,27 +380,23 @@
BUG_ON(ret);
for (i = 0; i < nseg; i++) {
- if (likely(map[i].handle >= 0)) {
- pending_handle(pending_idx, i) = map[i].handle;
+ if (unlikely(map[i].handle < 0)) {
+ DPRINTK("invalid buffer -- could not remap it\n");
+ goto fail_flush;
+ }
+
+ pending_handle(pending_req, i) = map[i].handle;
#ifdef __ia64__
- MMAP_VADDR(pending_idx,i) = gnttab_map_vaddr(map[i]);
+ pending_vaddrs[vaddr_pagenr(req, seg)] =
+ = gnttab_map_vaddr(map[i]);
#else
- phys_to_machine_mapping[__pa(MMAP_VADDR(
- pending_idx, i)) >> PAGE_SHIFT] =
- FOREIGN_FRAME(map[i].dev_bus_addr>>PAGE_SHIFT);
+ phys_to_machine_mapping[__pa(vaddr(
+ pending_req, i)) >> PAGE_SHIFT] =
+ FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
#endif
- fas = req->frame_and_sects[i];
- seg[i].buf = map[i].dev_bus_addr |
- (blkif_first_sect(fas) << 9);
- } else {
- errors++;
- }
- }
-
- if (errors) {
- DPRINTK("invalid buffer -- could not remap it\n");
- fast_flush_area(pending_idx, nseg);
- goto bad_descriptor;
+ fas = req->frame_and_sects[i];
+ seg[i].buf = map[i].dev_bus_addr |
+ (blkif_first_sect(fas) << 9);
}
if (vbd_translate(&preq, blkif, operation) != 0) {
@@ -409,37 +404,25 @@
operation == READ ? "read" : "write",
preq.sector_number,
preq.sector_number + preq.nr_sects, preq.dev);
- goto bad_descriptor;
- }
-
- pending_req = &pending_reqs[pending_idx];
- pending_req->blkif = blkif;
- pending_req->id = req->id;
- pending_req->operation = operation;
- pending_req->status = BLKIF_RSP_OKAY;
- pending_req->nr_pages = nseg;
+ goto fail_flush;
+ }
for (i = 0; i < nseg; i++) {
if (((int)preq.sector_number|(int)seg[i].nsec) &
((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
DPRINTK("Misaligned I/O request from domain
%d",
blkif->domid);
- goto cleanup_and_fail;
+ goto fail_put_bio;
}
while ((bio == NULL) ||
(bio_add_page(bio,
- virt_to_page(MMAP_VADDR(pending_idx, i)),
+ virt_to_page(vaddr(pending_req, i)),
seg[i].nsec << 9,
seg[i].buf & ~PAGE_MASK) == 0)) {
bio = biolist[nbio++] = bio_alloc(GFP_KERNEL,
nseg-i);
- if (unlikely(bio == NULL)) {
- cleanup_and_fail:
- for (i = 0; i < (nbio-1); i++)
- bio_put(biolist[i]);
- fast_flush_area(pending_idx, nseg);
- goto bad_descriptor;
- }
+ if (unlikely(bio == NULL))
+ goto fail_put_bio;
bio->bi_bdev = preq.bdev;
bio->bi_private = pending_req;
@@ -450,14 +433,8 @@
preq.sector_number += seg[i].nsec;
}
- if ((q = bdev_get_queue(bio->bi_bdev)) != plugged_queue) {
- flush_plugged_queue();
- blk_get_queue(q);
- plugged_queue = q;
- }
-
+ plug_queue(blkif, bio);
atomic_set(&pending_req->pendcnt, nbio);
- pending_cons++;
blkif_get(blkif);
for (i = 0; i < nbio; i++)
@@ -465,8 +442,14 @@
return;
- bad_descriptor:
+ fail_put_bio:
+ for (i = 0; i < (nbio-1); i++)
+ bio_put(biolist[i]);
+ fail_flush:
+ fast_flush_area(pending_req);
+ fail_response:
make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+ free_req(pending_req);
}
@@ -498,56 +481,47 @@
notify_remote_via_irq(blkif->irq);
}
-void blkif_deschedule(blkif_t *blkif)
-{
- remove_from_blkdev_list(blkif);
-}
-
static int __init blkif_init(void)
{
+ struct page *page;
int i;
- struct page *page;
- int ret;
-
- for (i = 0; i < MMAP_PAGES; i++)
- pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
-
- if (xen_init() < 0)
- return -ENODEV;
+
+ mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
+ blkif_reqs, GFP_KERNEL);
+ pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
+ mmap_pages, GFP_KERNEL);
+ pending_vaddrs = kmalloc(sizeof(pending_vaddrs[0]) *
+ mmap_pages, GFP_KERNEL);
+ if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
+ printk("%s: out of memory\n", __FUNCTION__);
+ return -1;
+ }
blkif_interface_init();
-
+
#ifdef __ia64__
- {
extern unsigned long
alloc_empty_foreign_map_page_range(unsigned long pages);
- int i;
-
- mmap_vstart = alloc_empty_foreign_map_page_range(MMAP_PAGES);
- printk("Allocated mmap_vstart: 0x%lx\n", mmap_vstart);
- for(i = 0; i < MMAP_PAGES; i++)
- pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
- BUG_ON(mmap_vstart == NULL);
- }
-#else
- page = balloon_alloc_empty_page_range(MMAP_PAGES);
+ mmap_vstart = (unsigned
long)alloc_empty_foreign_map_page_range(mmap_pages);
+#else /* ! ia64 */
+ page = balloon_alloc_empty_page_range(mmap_pages);
BUG_ON(page == NULL);
mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
#endif
-
- pending_cons = 0;
- pending_prod = MAX_PENDING_REQS;
+ printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
+ __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
+ BUG_ON(mmap_vstart == 0);
+ for (i = 0; i < mmap_pages; i++)
+ pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
+
+ memset(pending_grant_handles, BLKBACK_INVALID_HANDLE, mmap_pages);
memset(pending_reqs, 0, sizeof(pending_reqs));
- for (i = 0; i < MAX_PENDING_REQS; i++)
- pending_ring[i] = i;
+ INIT_LIST_HEAD(&pending_free);
+
+ for (i = 0; i < blkif_reqs; i++)
+ list_add_tail(&pending_reqs[i].free_list, &pending_free);
- spin_lock_init(&blkio_schedule_list_lock);
- INIT_LIST_HEAD(&blkio_schedule_list);
-
- ret = kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES);
- BUG_ON(ret < 0);
-
blkif_xenbus_init();
-
return 0;
}
diff -r abbe3df33774 linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Tue Nov 8
17:39:58 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Wed Nov 9
13:45:37 2005
@@ -56,9 +56,19 @@
/* Is this a blktap frontend */
unsigned int is_blktap;
#endif
- struct list_head blkdev_list;
spinlock_t blk_ring_lock;
atomic_t refcnt;
+
+ wait_queue_head_t wq;
+ struct task_struct *xenblkd;
+ atomic_t io_pending;
+ request_queue_t *plug;
+
+ /* statistics */
+ unsigned long st_print;
+ int st_rd_req;
+ int st_wr_req;
+ int st_oo_req;
struct work_struct free_work;
@@ -97,11 +107,10 @@
void blkif_interface_init(void);
-void blkif_deschedule(blkif_t *blkif);
-
void blkif_xenbus_init(void);
irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+int blkif_schedule(void *arg);
#endif /* __BLKIF__BACKEND__COMMON_H__ */
diff -r abbe3df33774
linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Tue Nov 8
17:39:58 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Wed Nov 9
13:45:37 2005
@@ -24,6 +24,8 @@
blkif->status = DISCONNECTED;
spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 1);
+ init_waitqueue_head(&blkif->wq);
+ blkif->st_print = jiffies;
return blkif;
}
diff -r abbe3df33774 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue Nov 8
17:39:58 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Wed Nov 9
13:45:37 2005
@@ -17,6 +17,7 @@
*/
#include <stdarg.h>
#include <linux/module.h>
+#include <linux/kthread.h>
#include <asm-xen/xenbus.h>
#include "common.h"
@@ -46,8 +47,11 @@
if (be->watch.node)
unregister_xenbus_watch(&be->watch);
unregister_xenbus_watch(&be->backend_watch);
- if (be->blkif)
+ if (be->blkif) {
+ if (be->blkif->xenblkd)
+ kthread_stop(be->blkif->xenblkd);
blkif_put(be->blkif);
+ }
if (be->frontpath)
kfree(be->frontpath);
kfree(be);
@@ -198,6 +202,16 @@
be->blkif = NULL;
xenbus_dev_error(dev, err,
"creating vbd structure");
+ return;
+ }
+
+ be->blkif->xenblkd = kthread_run(blkif_schedule, be->blkif,
+ "xenblkd %d/%04lx",
+ be->blkif->domid, be->pdev);
+ if (IS_ERR(be->blkif->xenblkd)) {
+ err = PTR_ERR(be->blkif->xenblkd);
+ be->blkif->xenblkd = NULL;
+ xenbus_dev_error(dev, err, "start xenblkd");
return;
}
_______________________________________________
Xen-devel mailing list
Xen-devel@...
http://lists.xensource.com/xen-devel
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|