Hi folks,
New version of the patch, adapted to apply cleanly against latest
unstable, also debug output is runtime-switchable now, no major changes
from last version. I'm running my xen machine all day with that patch,
without any issues showing up, including working save/restore. Any
chance to get it merged?
Full list changes:
* One thread per blkif. The I/O scheduler can do a better job that
way, also you can use ionice on the blkback threads to adjust the
block I/O priorities for the domain.
* Various stuff has been moved from global variables into blkif_t.
* The scary allocation ring for pending_req's is gone and has been
replaced by a free list.
* made dispatch_rw_block_io() reentrant.
* general linux coding style cleanup, at least for the code I've
touched anyway.
* number of outstanding requests is runtime-configurable now.
* made the ia64 #ifdefs smaller and dropped one. It should still
work on ia64 in theory, but would be great if the ia64 folks
can have a look ...
* re-added the xen_init() which got lost by mistake (pointed
out by the ia64 guys).
* runtime-switchable stats and debug output.
cheers,
Gerd
diff -r 6a666940fa04 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Sun Nov 20
09:19:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Mon Nov 21
11:21:46 2005
@@ -12,6 +12,8 @@
*/
#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
#include <asm-xen/balloon.h>
#include <asm/hypervisor.h>
#include "common.h"
@@ -21,26 +23,26 @@
* pulled from a communication ring are quite likely to end up being part of
* the same scatter/gather request at the disc.
*
- * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
+ *
* This will increase the chances of being able to write whole tracks.
* 64 should be enough to keep us competitive with Linux.
*/
-#define MAX_PENDING_REQS 64
-#define BATCH_PER_DOMAIN 16
-
-static unsigned long mmap_vstart;
-#define MMAP_PAGES \
- (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#ifdef __ia64__
-static void *pending_vaddrs[MMAP_PAGES];
-#define MMAP_VADDR(_idx, _i) \
- (unsigned long)(pending_vaddrs[((_idx) *
BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
-#else
-#define MMAP_VADDR(_req,_seg) \
- (mmap_vstart + \
- ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
- ((_seg) * PAGE_SIZE))
-#endif
+static int blkif_reqs = 64;
+static int mmap_pages;
+
+static int __init set_blkif_reqs(char *str)
+{
+ get_option(&str, &blkif_reqs);
+ return 1;
+}
+__setup("blkif_reqs=", set_blkif_reqs);
+
+/* runtime-switchable, check /sys/module/blkback/parameters/ ;) */
+static unsigned int log_stats = 0;
+static unsigned int debug_lvl = 0;
+module_param(log_stats, int, 0644);
+module_param(debug_lvl, int, 0644);
/*
* Each outstanding request that we've passed to the lower device layers has a
@@ -55,43 +57,38 @@
atomic_t pendcnt;
unsigned short operation;
int status;
+ struct list_head free_list;
} pending_req_t;
-/*
- * We can't allocate pending_req's in order, since they may complete out of
- * order. We therefore maintain an allocation ring. This ring also indicates
- * when enough work has been passed down -- at that point the allocation ring
- * will be empty.
- */
-static pending_req_t pending_reqs[MAX_PENDING_REQS];
-static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
-/* NB. We use a different index type to differentiate from shared blk rings. */
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static request_queue_t *plugged_queue;
-static inline void flush_plugged_queue(void)
-{
- request_queue_t *q = plugged_queue;
- if (q != NULL) {
- if ( q->unplug_fn != NULL )
- q->unplug_fn(q);
- blk_put_queue(q);
- plugged_queue = NULL;
- }
-}
+static pending_req_t *pending_reqs;
+static struct list_head pending_free;
+static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
+
+#define BLKBACK_INVALID_HANDLE (0xFFFF)
+
+static unsigned long mmap_vstart;
+static unsigned long *pending_vaddrs;
+static u16 *pending_grant_handles;
+
+static inline int vaddr_pagenr(pending_req_t *req, int seg)
+{
+ return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
+}
+
+static inline unsigned long vaddr(pending_req_t *req, int seg)
+{
+ return pending_vaddrs[vaddr_pagenr(req, seg)];
+}
+
+#define pending_handle(_req, _seg) \
+ (pending_grant_handles[vaddr_pagenr(_req, _seg)])
+
/* When using grant tables to map a frame for device access then the
* handle returned must be used to unmap the frame. This is needed to
* drop the ref count on the frame.
*/
-static u16 pending_grant_handles[MMAP_PAGES];
-#define pending_handle(_idx, _i) \
- (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
-#define BLKBACK_INVALID_HANDLE (0xFFFF)
#ifdef CONFIG_XEN_BLKDEV_TAP_BE
/*
@@ -105,26 +102,79 @@
static inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
#endif
-static int do_block_io_op(blkif_t *blkif, int max_to_do);
-static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
+static int do_block_io_op(blkif_t *blkif);
+static void dispatch_rw_block_io(blkif_t *blkif,
+ blkif_request_t *req,
+ pending_req_t *pending_req);
static void make_response(blkif_t *blkif, unsigned long id,
unsigned short op, int st);
-static void fast_flush_area(int idx, int nr_pages)
+/******************************************************************
+ * misc small helpers
+ */
+static pending_req_t* alloc_req(void)
+{
+ pending_req_t *req = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pending_free_lock, flags);
+ if (!list_empty(&pending_free)) {
+ req = list_entry(pending_free.next, pending_req_t, free_list);
+ list_del(&req->free_list);
+ }
+ spin_unlock_irqrestore(&pending_free_lock, flags);
+ return req;
+}
+
+static void free_req(pending_req_t *req)
+{
+ unsigned long flags;
+ int was_empty;
+
+ spin_lock_irqsave(&pending_free_lock, flags);
+ was_empty = list_empty(&pending_free);
+ list_add(&req->free_list, &pending_free);
+ spin_unlock_irqrestore(&pending_free_lock, flags);
+ if (was_empty)
+ wake_up(&pending_free_wq);
+}
+
+static void unplug_queue(blkif_t *blkif)
+{
+ if (NULL == blkif->plug)
+ return;
+ if (blkif->plug->unplug_fn)
+ blkif->plug->unplug_fn(blkif->plug);
+ blk_put_queue(blkif->plug);
+ blkif->plug = NULL;
+}
+
+static void plug_queue(blkif_t *blkif, struct bio *bio)
+{
+ request_queue_t *q = bdev_get_queue(bio->bi_bdev);
+
+ if (q == blkif->plug)
+ return;
+ unplug_queue(blkif);
+ blk_get_queue(q);
+ blkif->plug = q;
+}
+
+static void fast_flush_area(pending_req_t *req)
{
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned int i, invcount = 0;
u16 handle;
int ret;
- for (i = 0; i < nr_pages; i++) {
- handle = pending_handle(idx, i);
+ for (i = 0; i < req->nr_pages; i++) {
+ handle = pending_handle(req, i);
if (handle == BLKBACK_INVALID_HANDLE)
continue;
- unmap[invcount].host_addr = MMAP_VADDR(idx, i);
+ unmap[invcount].host_addr = vaddr(req, i);
unmap[invcount].dev_bus_addr = 0;
unmap[invcount].handle = handle;
- pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
+ pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
invcount++;
}
@@ -133,109 +183,79 @@
BUG_ON(ret);
}
-
-/******************************************************************
- * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
- */
-
-static struct list_head blkio_schedule_list;
-static spinlock_t blkio_schedule_list_lock;
-
-static int __on_blkdev_list(blkif_t *blkif)
-{
- return blkif->blkdev_list.next != NULL;
-}
-
-static void remove_from_blkdev_list(blkif_t *blkif)
-{
- unsigned long flags;
-
- if (!__on_blkdev_list(blkif))
- return;
-
- spin_lock_irqsave(&blkio_schedule_list_lock, flags);
- if (__on_blkdev_list(blkif)) {
- list_del(&blkif->blkdev_list);
- blkif->blkdev_list.next = NULL;
- blkif_put(blkif);
- }
- spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-static void add_to_blkdev_list_tail(blkif_t *blkif)
-{
- unsigned long flags;
-
- if (__on_blkdev_list(blkif))
- return;
-
- spin_lock_irqsave(&blkio_schedule_list_lock, flags);
- if (!__on_blkdev_list(blkif) && (blkif->status == CONNECTED)) {
- list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
- blkif_get(blkif);
- }
- spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-
/******************************************************************
* SCHEDULER FUNCTIONS
*/
-static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
-
-static int blkio_schedule(void *arg)
-{
- DECLARE_WAITQUEUE(wq, current);
-
- blkif_t *blkif;
- struct list_head *ent;
-
- daemonize("xenblkd");
-
+static void print_stats(blkif_t *blkif)
+{
+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n",
+ current->comm, blkif->st_oo_req,
+ blkif->st_rd_req, blkif->st_wr_req);
+ blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+ blkif->st_rd_req = 0;
+ blkif->st_wr_req = 0;
+ blkif->st_oo_req = 0;
+}
+
+int blkif_schedule(void *arg)
+{
+ blkif_t *blkif = arg;
+
+ blkif_get(blkif);
+ if (debug_lvl)
+ printk(KERN_DEBUG "%s: started\n", current->comm);
for (;;) {
- /* Wait for work to do. */
- add_wait_queue(&blkio_schedule_wait, &wq);
- set_current_state(TASK_INTERRUPTIBLE);
- if ( (NR_PENDING_REQS == MAX_PENDING_REQS) ||
- list_empty(&blkio_schedule_list) )
- schedule();
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&blkio_schedule_wait, &wq);
-
- /* Queue up a batch of requests. */
- while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
- !list_empty(&blkio_schedule_list)) {
- ent = blkio_schedule_list.next;
- blkif = list_entry(ent, blkif_t, blkdev_list);
- blkif_get(blkif);
- remove_from_blkdev_list(blkif);
- if (do_block_io_op(blkif, BATCH_PER_DOMAIN))
- add_to_blkdev_list_tail(blkif);
- blkif_put(blkif);
- }
-
- /* Push the batch through to disc. */
- flush_plugged_queue();
- }
-}
-
-static void maybe_trigger_blkio_schedule(void)
-{
- /*
- * Needed so that two processes, which together make the following
- * predicate true, don't both read stale values and evaluate the
- * predicate incorrectly. Incredibly unlikely to stall the scheduler
- * on x86, but...
- */
- smp_mb();
-
- if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
- !list_empty(&blkio_schedule_list))
- wake_up(&blkio_schedule_wait);
-}
-
-
+ if (kthread_should_stop()) {
+ /* asked to quit? */
+ if (!atomic_read(&blkif->io_pending))
+ break;
+ if (debug_lvl)
+ printk(KERN_DEBUG "%s: I/O pending, delaying
exit\n",
+ current->comm);
+ }
+
+ if (!atomic_read(&blkif->io_pending)) {
+ /* Wait for work to do. */
+ wait_event_interruptible(blkif->wq,
+
atomic_read(&blkif->io_pending) ||
+ kthread_should_stop());
+ } else if (list_empty(&pending_free)) {
+ /* Wait for pending_req becoming available. */
+ wait_event_interruptible(pending_free_wq,
+ !list_empty(&pending_free));
+ }
+
+ if (blkif->status != CONNECTED) {
+ /* make sure we are connected */
+ if (debug_lvl)
+ printk(KERN_DEBUG "%s: not connected (%d
pending)\n",
+ current->comm,
atomic_read(&blkif->io_pending));
+ wait_event_interruptible(blkif->wq,
+ blkif->status != CONNECTED ||
+ kthread_should_stop());
+ continue;
+ }
+
+ /* Schedule I/O */
+ atomic_set(&blkif->io_pending, 0);
+ if (do_block_io_op(blkif))
+ atomic_inc(&blkif->io_pending);
+ unplug_queue(blkif);
+
+ if (log_stats && time_after(jiffies, blkif->st_print))
+ print_stats(blkif);
+ }
+
+ /* bye folks, and thanks for all the fish ;) */
+ if (log_stats)
+ print_stats(blkif);
+ if (debug_lvl)
+ printk(KERN_DEBUG "%s: exiting\n", current->comm);
+ blkif->xenblkd = NULL;
+ blkif_put(blkif);
+ return 0;
+}
/******************************************************************
* COMPLETION CALLBACK -- Called as bh->b_end_io()
@@ -243,8 +263,6 @@
static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
{
- unsigned long flags;
-
/* An error fails the entire request. */
if (!uptodate) {
DPRINTK("Buffer not up-to-date at end of operation\n");
@@ -252,15 +270,11 @@
}
if (atomic_dec_and_test(&pending_req->pendcnt)) {
- int pending_idx = pending_req - pending_reqs;
- fast_flush_area(pending_idx, pending_req->nr_pages);
+ fast_flush_area(pending_req);
make_response(pending_req->blkif, pending_req->id,
pending_req->operation, pending_req->status);
blkif_put(pending_req->blkif);
- spin_lock_irqsave(&pend_prod_lock, flags);
- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
- spin_unlock_irqrestore(&pend_prod_lock, flags);
- maybe_trigger_blkio_schedule();
+ free_req(pending_req);
}
}
@@ -281,8 +295,9 @@
irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
{
blkif_t *blkif = dev_id;
- add_to_blkdev_list_tail(blkif);
- maybe_trigger_blkio_schedule();
+
+ atomic_inc(&blkif->io_pending);
+ wake_up(&blkif->wq);
return IRQ_HANDLED;
}
@@ -292,10 +307,11 @@
* DOWNWARD CALLS -- These interface with the block-device layer proper.
*/
-static int do_block_io_op(blkif_t *blkif, int max_to_do)
+static int do_block_io_op(blkif_t *blkif)
{
blkif_back_ring_t *blk_ring = &blkif->blk_ring;
blkif_request_t *req;
+ pending_req_t *pending_req;
RING_IDX i, rp;
int more_to_do = 0;
@@ -305,24 +321,30 @@
for (i = blk_ring->req_cons;
(i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
i++) {
- if ((max_to_do-- == 0) ||
- (NR_PENDING_REQS == MAX_PENDING_REQS)) {
+
+ pending_req = alloc_req();
+ if (NULL == pending_req) {
+ blkif->st_oo_req++;
more_to_do = 1;
break;
}
-
+
req = RING_GET_REQUEST(blk_ring, i);
switch (req->operation) {
case BLKIF_OP_READ:
+ blkif->st_rd_req++;
+ dispatch_rw_block_io(blkif, req, pending_req);
+ break;
case BLKIF_OP_WRITE:
- dispatch_rw_block_io(blkif, req);
+ blkif->st_wr_req++;
+ dispatch_rw_block_io(blkif, req, pending_req);
break;
-
default:
DPRINTK("error: unknown block io operation [%d]\n",
req->operation);
make_response(blkif, req->id, req->operation,
BLKIF_RSP_ERROR);
+ free_req(pending_req);
break;
}
}
@@ -331,13 +353,13 @@
return more_to_do;
}
-static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
+static void dispatch_rw_block_io(blkif_t *blkif,
+ blkif_request_t *req,
+ pending_req_t *pending_req)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
unsigned long fas = 0;
- int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
- pending_req_t *pending_req;
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct phys_req preq;
struct {
@@ -345,31 +367,35 @@
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned int nseg;
struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- int nbio = 0;
- request_queue_t *q;
- int ret, errors = 0;
+ int ret, i, nbio = 0;
/* Check that number of segments is sane. */
nseg = req->nr_segments;
if (unlikely(nseg == 0) ||
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
DPRINTK("Bad number of segments in request (%d)\n", nseg);
- goto bad_descriptor;
+ goto fail_response;
}
preq.dev = req->handle;
preq.sector_number = req->sector_number;
preq.nr_sects = 0;
+ pending_req->blkif = blkif;
+ pending_req->id = req->id;
+ pending_req->operation = operation;
+ pending_req->status = BLKIF_RSP_OKAY;
+ pending_req->nr_pages = nseg;
+
for (i = 0; i < nseg; i++) {
fas = req->frame_and_sects[i];
seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
if (seg[i].nsec <= 0)
- goto bad_descriptor;
+ goto fail_response;
preq.nr_sects += seg[i].nsec;
- map[i].host_addr = MMAP_VADDR(pending_idx, i);
+ map[i].host_addr = vaddr(pending_req, i);
map[i].dom = blkif->domid;
map[i].ref = blkif_gref_from_fas(fas);
map[i].flags = GNTMAP_host_map;
@@ -381,27 +407,23 @@
BUG_ON(ret);
for (i = 0; i < nseg; i++) {
- if (likely(map[i].handle >= 0)) {
- pending_handle(pending_idx, i) = map[i].handle;
+ if (unlikely(map[i].handle < 0)) {
+ DPRINTK("invalid buffer -- could not remap it\n");
+ goto fail_flush;
+ }
+
+ pending_handle(pending_req, i) = map[i].handle;
#ifdef __ia64__
- MMAP_VADDR(pending_idx,i) = gnttab_map_vaddr(map[i]);
+ pending_vaddrs[vaddr_pagenr(req, seg)] =
+ = gnttab_map_vaddr(map[i]);
#else
- set_phys_to_machine(__pa(MMAP_VADDR(
- pending_idx, i)) >> PAGE_SHIFT,
- FOREIGN_FRAME(map[i].dev_bus_addr>>PAGE_SHIFT));
+ set_phys_to_machine(__pa(vaddr(
+ pending_req, i)) >> PAGE_SHIFT,
+ FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
#endif
- fas = req->frame_and_sects[i];
- seg[i].buf = map[i].dev_bus_addr |
- (blkif_first_sect(fas) << 9);
- } else {
- errors++;
- }
- }
-
- if (errors) {
- DPRINTK("invalid buffer -- could not remap it\n");
- fast_flush_area(pending_idx, nseg);
- goto bad_descriptor;
+ fas = req->frame_and_sects[i];
+ seg[i].buf = map[i].dev_bus_addr |
+ (blkif_first_sect(fas) << 9);
}
if (vbd_translate(&preq, blkif, operation) != 0) {
@@ -409,37 +431,25 @@
operation == READ ? "read" : "write",
preq.sector_number,
preq.sector_number + preq.nr_sects, preq.dev);
- goto bad_descriptor;
- }
-
- pending_req = &pending_reqs[pending_idx];
- pending_req->blkif = blkif;
- pending_req->id = req->id;
- pending_req->operation = operation;
- pending_req->status = BLKIF_RSP_OKAY;
- pending_req->nr_pages = nseg;
+ goto fail_flush;
+ }
for (i = 0; i < nseg; i++) {
if (((int)preq.sector_number|(int)seg[i].nsec) &
((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
DPRINTK("Misaligned I/O request from domain %d",
blkif->domid);
- goto cleanup_and_fail;
+ goto fail_put_bio;
}
while ((bio == NULL) ||
(bio_add_page(bio,
- virt_to_page(MMAP_VADDR(pending_idx, i)),
+ virt_to_page(vaddr(pending_req, i)),
seg[i].nsec << 9,
seg[i].buf & ~PAGE_MASK) == 0)) {
bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
- if (unlikely(bio == NULL)) {
- cleanup_and_fail:
- for (i = 0; i < (nbio-1); i++)
- bio_put(biolist[i]);
- fast_flush_area(pending_idx, nseg);
- goto bad_descriptor;
- }
+ if (unlikely(bio == NULL))
+ goto fail_put_bio;
bio->bi_bdev = preq.bdev;
bio->bi_private = pending_req;
@@ -450,14 +460,8 @@
preq.sector_number += seg[i].nsec;
}
- if ((q = bdev_get_queue(bio->bi_bdev)) != plugged_queue) {
- flush_plugged_queue();
- blk_get_queue(q);
- plugged_queue = q;
- }
-
+ plug_queue(blkif, bio);
atomic_set(&pending_req->pendcnt, nbio);
- pending_cons++;
blkif_get(blkif);
for (i = 0; i < nbio; i++)
@@ -465,8 +469,14 @@
return;
- bad_descriptor:
+ fail_put_bio:
+ for (i = 0; i < (nbio-1); i++)
+ bio_put(biolist[i]);
+ fail_flush:
+ fast_flush_area(pending_req);
+ fail_response:
make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+ free_req(pending_req);
}
@@ -498,56 +508,50 @@
notify_remote_via_irq(blkif->irq);
}
-void blkif_deschedule(blkif_t *blkif)
-{
- remove_from_blkdev_list(blkif);
-}
-
static int __init blkif_init(void)
{
+ struct page *page;
int i;
- struct page *page;
- int ret;
-
- for (i = 0; i < MMAP_PAGES; i++)
- pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
if (xen_init() < 0)
return -ENODEV;
+ mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
+ blkif_reqs, GFP_KERNEL);
+ pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
+ mmap_pages, GFP_KERNEL);
+ pending_vaddrs = kmalloc(sizeof(pending_vaddrs[0]) *
+ mmap_pages, GFP_KERNEL);
+ if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
+ printk("%s: out of memory\n", __FUNCTION__);
+ return -1;
+ }
+
blkif_interface_init();
-
+
#ifdef __ia64__
- {
extern unsigned long alloc_empty_foreign_map_page_range(unsigned long
pages);
- int i;
-
- mmap_vstart = alloc_empty_foreign_map_page_range(MMAP_PAGES);
- printk("Allocated mmap_vstart: 0x%lx\n", mmap_vstart);
- for(i = 0; i < MMAP_PAGES; i++)
- pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
- BUG_ON(mmap_vstart == NULL);
- }
-#else
- page = balloon_alloc_empty_page_range(MMAP_PAGES);
+ mmap_vstart = (unsigned
long)alloc_empty_foreign_map_page_range(mmap_pages);
+#else /* ! ia64 */
+ page = balloon_alloc_empty_page_range(mmap_pages);
BUG_ON(page == NULL);
mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
#endif
-
- pending_cons = 0;
- pending_prod = MAX_PENDING_REQS;
+ printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
+ __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
+ BUG_ON(mmap_vstart == 0);
+ for (i = 0; i < mmap_pages; i++)
+ pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
+
+ memset(pending_grant_handles, BLKBACK_INVALID_HANDLE, mmap_pages);
memset(pending_reqs, 0, sizeof(pending_reqs));
- for (i = 0; i < MAX_PENDING_REQS; i++)
- pending_ring[i] = i;
+ INIT_LIST_HEAD(&pending_free);
+
+ for (i = 0; i < blkif_reqs; i++)
+ list_add_tail(&pending_reqs[i].free_list, &pending_free);
- spin_lock_init(&blkio_schedule_list_lock);
- INIT_LIST_HEAD(&blkio_schedule_list);
-
- ret = kernel_thread(blkio_schedule, NULL, CLONE_FS | CLONE_FILES);
- BUG_ON(ret < 0);
-
blkif_xenbus_init();
-
return 0;
}
diff -r 6a666940fa04 linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Sun Nov 20 09:19:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Mon Nov 21 11:21:46 2005
@@ -56,9 +56,19 @@
/* Is this a blktap frontend */
unsigned int is_blktap;
#endif
- struct list_head blkdev_list;
spinlock_t blk_ring_lock;
atomic_t refcnt;
+
+ wait_queue_head_t wq;
+ struct task_struct *xenblkd;
+ atomic_t io_pending;
+ request_queue_t *plug;
+
+ /* statistics */
+ unsigned long st_print;
+ int st_rd_req;
+ int st_wr_req;
+ int st_oo_req;
struct work_struct free_work;
@@ -97,11 +107,10 @@
void blkif_interface_init(void);
-void blkif_deschedule(blkif_t *blkif);
-
void blkif_xenbus_init(void);
irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+int blkif_schedule(void *arg);
#endif /* __BLKIF__BACKEND__COMMON_H__ */
diff -r 6a666940fa04 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Sun Nov 20
09:19:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Mon Nov 21
11:21:46 2005
@@ -24,6 +24,8 @@
blkif->status = DISCONNECTED;
spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 1);
+ init_waitqueue_head(&blkif->wq);
+ blkif->st_print = jiffies;
return blkif;
}
@@ -113,6 +115,7 @@
blkif->irq = bind_evtchn_to_irqhandler(
blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif);
blkif->status = CONNECTED;
+ wake_up(&blkif->wq);
return 0;
}
diff -r 6a666940fa04 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Sun Nov 20 09:19:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Nov 21 11:21:46 2005
@@ -20,6 +20,7 @@
#include <stdarg.h>
#include <linux/module.h>
+#include <linux/kthread.h>
#include <asm-xen/xenbus.h>
#include "common.h"
@@ -61,6 +62,8 @@
be->backend_watch.node = NULL;
}
if (be->blkif) {
+ if (be->blkif->xenblkd)
+ kthread_stop(be->blkif->xenblkd);
blkif_put(be->blkif);
be->blkif = NULL;
}
@@ -175,6 +178,16 @@
be->pdev = 0L;
xenbus_dev_fatal(dev, err,
"creating vbd structure");
+ return;
+ }
+
+ be->blkif->xenblkd = kthread_run(blkif_schedule, be->blkif,
+ "xenblkd %d/%04lx",
+ be->blkif->domid, be->pdev);
+ if (IS_ERR(be->blkif->xenblkd)) {
+ err = PTR_ERR(be->blkif->xenblkd);
+ be->blkif->xenblkd = NULL;
+ xenbus_dev_error(dev, err, "start xenblkd");
return;
}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|