WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [patch] CFQ for xen domains

Does 'xm save / xm restore' work with this patch (at least as well as it currently does ;-)?

xm save/restore still doesn't work for me, neither with the sparse tree nor with the linux-2.6 repository, so I can't try. I can't see any reason why it should become worse with that patch though.

I've resynced the blkback threading patch with the latest sparse tree, here we are. Changes:

  * One thread per blkif.  The I/O scheduler can do a better job that
    way, also you can use ionice on the blkback threads to adjust the
    block I/O priorities for the domain.
  * Various stuff has been moved from global variables into blkif_t.
  * The scary allocation ring for pending_req's is gone and has been
    replaced by a free list.
  * made dispatch_rw_block_io() reentrant.
  * general linux coding style cleanup, at least for the code I've
    touched anyway.
  * number of outstanding requests is runtime-configurable now.
  * made the ia64 #ifdefs smaller and dropped one.  It should still
    work on ia64 in theory, but would be great if the ia64 folks
    can have a look ...

cheers,

  Gerd

diff -r abbe3df33774 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Tue Nov  8 
17:39:58 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Wed Nov  9 
13:45:37 2005
@@ -12,6 +12,8 @@
  */
 
 #include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
 #include <asm-xen/balloon.h>
 #include <asm/hypervisor.h>
 #include "common.h"
@@ -21,26 +23,21 @@
  * pulled from a communication ring are quite likely to end up being part of
  * the same scatter/gather request at the disc.
  * 
- * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
+ * 
  * This will increase the chances of being able to write whole tracks.
  * 64 should be enough to keep us competitive with Linux.
  */
-#define MAX_PENDING_REQS 64
-#define BATCH_PER_DOMAIN 16
-
-static unsigned long mmap_vstart;
-#define MMAP_PAGES                                             \
-       (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#ifdef __ia64__
-static void *pending_vaddrs[MMAP_PAGES];
-#define MMAP_VADDR(_idx, _i) \
-       (unsigned long)(pending_vaddrs[((_idx) * 
BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
-#else
-#define MMAP_VADDR(_req,_seg)                                          \
-       (mmap_vstart +                                                  \
-        ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +        \
-        ((_seg) * PAGE_SIZE))
-#endif
+static int blkif_reqs = 64;
+static int mmap_pages;
+
+static int __init set_blkif_reqs(char *str)
+{
+       get_option(&str, &blkif_reqs);
+       return 1;
+}
+__setup("blkif_reqs=", set_blkif_reqs);
+
 
 /*
  * Each outstanding request that we've passed to the lower device layers has a 
@@ -55,43 +52,38 @@
        atomic_t       pendcnt;
        unsigned short operation;
        int            status;
+       struct list_head free_list;
 } pending_req_t;
 
-/*
- * We can't allocate pending_req's in order, since they may complete out of 
- * order. We therefore maintain an allocation ring. This ring also indicates 
- * when enough work has been passed down -- at that point the allocation ring 
- * will be empty.
- */
-static pending_req_t pending_reqs[MAX_PENDING_REQS];
-static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
-/* NB. We use a different index type to differentiate from shared blk rings. */
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static request_queue_t *plugged_queue;
-static inline void flush_plugged_queue(void)
-{
-       request_queue_t *q = plugged_queue;
-       if (q != NULL) {
-               if ( q->unplug_fn != NULL )
-                       q->unplug_fn(q);
-               blk_put_queue(q);
-               plugged_queue = NULL;
-       }
-}
+static pending_req_t *pending_reqs;
+static struct list_head pending_free;
+static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
+
+#define BLKBACK_INVALID_HANDLE (0xFFFF)
+
+static unsigned long mmap_vstart;
+static unsigned long *pending_vaddrs;
+static u16 *pending_grant_handles;
+
+static inline int vaddr_pagenr(pending_req_t *req, int seg)
+{
+       return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
+}
+
+static inline unsigned long vaddr(pending_req_t *req, int seg)
+{
+       return pending_vaddrs[vaddr_pagenr(req, seg)];
+}
+
+#define pending_handle(_req, _seg) \
+       (pending_grant_handles[vaddr_pagenr(_req, _seg)])
+
 
 /* When using grant tables to map a frame for device access then the
  * handle returned must be used to unmap the frame. This is needed to
  * drop the ref count on the frame.
  */
-static u16 pending_grant_handles[MMAP_PAGES];
-#define pending_handle(_idx, _i) \
-    (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
-#define BLKBACK_INVALID_HANDLE (0xFFFF)
 
 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
 /*
@@ -105,26 +97,79 @@
 static inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
 #endif
 
-static int do_block_io_op(blkif_t *blkif, int max_to_do);
-static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
+static int do_block_io_op(blkif_t *blkif);
+static void dispatch_rw_block_io(blkif_t *blkif,
+                                blkif_request_t *req,
+                                pending_req_t *pending_req);
 static void make_response(blkif_t *blkif, unsigned long id, 
                           unsigned short op, int st);
 
-static void fast_flush_area(int idx, int nr_pages)
+/******************************************************************
+ * misc small helpers
+ */
+static pending_req_t* alloc_req(void)
+{
+       pending_req_t *req = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pending_free_lock, flags);
+       if (!list_empty(&pending_free)) {
+               req = list_entry(pending_free.next, pending_req_t, free_list);
+               list_del(&req->free_list);
+       }
+       spin_unlock_irqrestore(&pending_free_lock, flags);
+       return req;
+}
+
+static void free_req(pending_req_t *req)
+{
+       unsigned long flags;
+       int was_empty;
+
+       spin_lock_irqsave(&pending_free_lock, flags);
+       was_empty = list_empty(&pending_free);
+       list_add(&req->free_list, &pending_free);
+       spin_unlock_irqrestore(&pending_free_lock, flags);
+       if (was_empty)
+               wake_up(&pending_free_wq);
+}
+
+static void unplug_queue(blkif_t *blkif)
+{
+       if (NULL == blkif->plug)
+               return;
+       if (blkif->plug->unplug_fn)
+               blkif->plug->unplug_fn(blkif->plug);
+       blk_put_queue(blkif->plug);
+       blkif->plug = NULL;
+}
+
+static void plug_queue(blkif_t *blkif, struct bio *bio)
+{
+       request_queue_t *q = bdev_get_queue(bio->bi_bdev);
+
+       if (q == blkif->plug)
+               return;
+       unplug_queue(blkif);
+       blk_get_queue(q);
+       blkif->plug = q;
+}
+
+static void fast_flush_area(pending_req_t *req)
 {
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int i, invcount = 0;
        u16 handle;
        int ret;
 
-       for (i = 0; i < nr_pages; i++) {
-               handle = pending_handle(idx, i);
+       for (i = 0; i < req->nr_pages; i++) {
+               handle = pending_handle(req, i);
                if (handle == BLKBACK_INVALID_HANDLE)
                        continue;
-               unmap[invcount].host_addr    = MMAP_VADDR(idx, i);
+               unmap[invcount].host_addr    = vaddr(req, i);
                unmap[invcount].dev_bus_addr = 0;
                unmap[invcount].handle       = handle;
-               pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
+               pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
                invcount++;
        }
 
@@ -133,109 +178,56 @@
        BUG_ON(ret);
 }
 
-
-/******************************************************************
- * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
- */
-
-static struct list_head blkio_schedule_list;
-static spinlock_t blkio_schedule_list_lock;
-
-static int __on_blkdev_list(blkif_t *blkif)
-{
-       return blkif->blkdev_list.next != NULL;
-}
-
-static void remove_from_blkdev_list(blkif_t *blkif)
-{
-       unsigned long flags;
-
-       if (!__on_blkdev_list(blkif))
-               return;
-
-       spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-       if (__on_blkdev_list(blkif)) {
-               list_del(&blkif->blkdev_list);
-               blkif->blkdev_list.next = NULL;
-               blkif_put(blkif);
-       }
-       spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-static void add_to_blkdev_list_tail(blkif_t *blkif)
-{
-       unsigned long flags;
-
-       if (__on_blkdev_list(blkif))
-               return;
-
-       spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-       if (!__on_blkdev_list(blkif) && (blkif->status == CONNECTED)) {
-               list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
-               blkif_get(blkif);
-       }
-       spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-
 /******************************************************************
  * SCHEDULER FUNCTIONS
  */
 
-static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
-
-static int blkio_schedule(void *arg)
-{
-       DECLARE_WAITQUEUE(wq, current);
-
-       blkif_t          *blkif;
-       struct list_head *ent;
-
-       daemonize("xenblkd");
-
+int blkif_schedule(void *arg)
+{
+       blkif_t          *blkif = arg;
+
+       blkif_get(blkif);
+       printk(KERN_DEBUG "%s: started\n", current->comm);
        for (;;) {
-               /* Wait for work to do. */
-               add_wait_queue(&blkio_schedule_wait, &wq);
-               set_current_state(TASK_INTERRUPTIBLE);
-               if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
-                    list_empty(&blkio_schedule_list) )
-                       schedule();
-               __set_current_state(TASK_RUNNING);
-               remove_wait_queue(&blkio_schedule_wait, &wq);
-
-               /* Queue up a batch of requests. */
-               while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
-                      !list_empty(&blkio_schedule_list)) {
-                       ent = blkio_schedule_list.next;
-                       blkif = list_entry(ent, blkif_t, blkdev_list);
-                       blkif_get(blkif);
-                       remove_from_blkdev_list(blkif);
-                       if (do_block_io_op(blkif, BATCH_PER_DOMAIN))
-                               add_to_blkdev_list_tail(blkif);
-                       blkif_put(blkif);
-               }
-
-               /* Push the batch through to disc. */
-               flush_plugged_queue();
-       }
-}
-
-static void maybe_trigger_blkio_schedule(void)
-{
-       /*
-        * Needed so that two processes, which together make the following
-        * predicate true, don't both read stale values and evaluate the
-        * predicate incorrectly. Incredibly unlikely to stall the scheduler
-        * on x86, but...
-        */
-       smp_mb();
-
-       if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
-           !list_empty(&blkio_schedule_list))
-               wake_up(&blkio_schedule_wait);
-}
-
-
+               if (!atomic_read(&blkif->io_pending)) {
+                       /* Wait for work to do or requests to exit. */
+                       if (kthread_should_stop())
+                               break;
+                       wait_event_interruptible(blkif->wq,
+                                                
atomic_read(&blkif->io_pending) ||
+                                                kthread_should_stop());
+               } else if (list_empty(&pending_free)) {
+                       /* Wait for pending_req becoming available. */
+                       wait_event_interruptible(pending_free_wq,
+                                                !list_empty(&pending_free));
+               }
+
+               /* Schedule I/O */
+               atomic_set(&blkif->io_pending, 0);
+               if (do_block_io_op(blkif))
+                       atomic_inc(&blkif->io_pending);
+               unplug_queue(blkif);
+
+#if 0
+               /* Print stats for performance debugging. */
+               if (time_after(jiffies, blkif->st_print)) {
+                       printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d\n",
+                              current->comm, blkif->st_oo_req,
+                              blkif->st_rd_req, blkif->st_wr_req);
+                       blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+                       blkif->st_rd_req = 0;
+                       blkif->st_wr_req = 0;
+                       blkif->st_oo_req = 0;
+               }
+#endif
+       }
+
+       /* bye folks, and thanks for all the fish ;) */
+       printk(KERN_DEBUG "%s: exiting\n", current->comm);
+       blkif->xenblkd = NULL;
+       blkif_put(blkif);
+       return 0;
+}
 
 /******************************************************************
  * COMPLETION CALLBACK -- Called as bh->b_end_io()
@@ -243,8 +235,6 @@
 
 static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
 {
-       unsigned long flags;
-
        /* An error fails the entire request. */
        if (!uptodate) {
                DPRINTK("Buffer not up-to-date at end of operation\n");
@@ -252,15 +242,11 @@
        }
 
        if (atomic_dec_and_test(&pending_req->pendcnt)) {
-               int pending_idx = pending_req - pending_reqs;
-               fast_flush_area(pending_idx, pending_req->nr_pages);
+               fast_flush_area(pending_req);
                make_response(pending_req->blkif, pending_req->id,
                              pending_req->operation, pending_req->status);
                blkif_put(pending_req->blkif);
-               spin_lock_irqsave(&pend_prod_lock, flags);
-               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-               spin_unlock_irqrestore(&pend_prod_lock, flags);
-               maybe_trigger_blkio_schedule();
+               free_req(pending_req);
        }
 }
 
@@ -281,8 +267,10 @@
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 {
        blkif_t *blkif = dev_id;
-       add_to_blkdev_list_tail(blkif);
-       maybe_trigger_blkio_schedule();
+
+       atomic_inc(&blkif->io_pending);
+       if (blkif->status == CONNECTED)
+               wake_up(&blkif->wq);
        return IRQ_HANDLED;
 }
 
@@ -292,10 +280,11 @@
  * DOWNWARD CALLS -- These interface with the block-device layer proper.
  */
 
-static int do_block_io_op(blkif_t *blkif, int max_to_do)
+static int do_block_io_op(blkif_t *blkif)
 {
        blkif_back_ring_t *blk_ring = &blkif->blk_ring;
        blkif_request_t *req;
+       pending_req_t *pending_req;
        RING_IDX i, rp;
        int more_to_do = 0;
 
@@ -305,24 +294,30 @@
        for (i = blk_ring->req_cons; 
             (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
             i++) {
-               if ((max_to_do-- == 0) ||
-                   (NR_PENDING_REQS == MAX_PENDING_REQS)) {
+
+               pending_req = alloc_req();
+               if (NULL == pending_req) {
+                       blkif->st_oo_req++;
                        more_to_do = 1;
                        break;
                }
-        
+
                req = RING_GET_REQUEST(blk_ring, i);
                switch (req->operation) {
                case BLKIF_OP_READ:
+                       blkif->st_rd_req++;
+                       dispatch_rw_block_io(blkif, req, pending_req);
+                       break;
                case BLKIF_OP_WRITE:
-                       dispatch_rw_block_io(blkif, req);
+                       blkif->st_wr_req++;
+                       dispatch_rw_block_io(blkif, req, pending_req);
                        break;
-
                default:
                        DPRINTK("error: unknown block io operation [%d]\n",
                                req->operation);
                        make_response(blkif, req->id, req->operation,
                                      BLKIF_RSP_ERROR);
+                       free_req(pending_req);
                        break;
                }
        }
@@ -331,13 +326,13 @@
        return more_to_do;
 }
 
-static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
+static void dispatch_rw_block_io(blkif_t *blkif,
+                                blkif_request_t *req,
+                                pending_req_t *pending_req)
 {
        extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
        int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
        unsigned long fas = 0;
-       int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-       pending_req_t *pending_req;
        struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct phys_req preq;
        struct { 
@@ -345,31 +340,35 @@
        } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int nseg;
        struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-       int nbio = 0;
-       request_queue_t *q;
-       int ret, errors = 0;
+       int ret, i, nbio = 0;
 
        /* Check that number of segments is sane. */
        nseg = req->nr_segments;
        if (unlikely(nseg == 0) || 
            unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
                DPRINTK("Bad number of segments in request (%d)\n", nseg);
-               goto bad_descriptor;
+               goto fail_response;
        }
 
        preq.dev           = req->handle;
        preq.sector_number = req->sector_number;
        preq.nr_sects      = 0;
 
+       pending_req->blkif     = blkif;
+       pending_req->id        = req->id;
+       pending_req->operation = operation;
+       pending_req->status    = BLKIF_RSP_OKAY;
+       pending_req->nr_pages  = nseg;
+
        for (i = 0; i < nseg; i++) {
                fas         = req->frame_and_sects[i];
                seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
 
                if (seg[i].nsec <= 0)
-                       goto bad_descriptor;
+                       goto fail_response;
                preq.nr_sects += seg[i].nsec;
 
-               map[i].host_addr = MMAP_VADDR(pending_idx, i);
+               map[i].host_addr = vaddr(pending_req, i);
                map[i].dom = blkif->domid;
                map[i].ref = blkif_gref_from_fas(fas);
                map[i].flags = GNTMAP_host_map;
@@ -381,27 +380,23 @@
        BUG_ON(ret);
 
        for (i = 0; i < nseg; i++) {
-               if (likely(map[i].handle >= 0)) {
-                       pending_handle(pending_idx, i) = map[i].handle;
+               if (unlikely(map[i].handle < 0)) {
+                       DPRINTK("invalid buffer -- could not remap it\n");
+                       goto fail_flush;
+               }
+
+               pending_handle(pending_req, i) = map[i].handle;
 #ifdef __ia64__
-                       MMAP_VADDR(pending_idx,i) = gnttab_map_vaddr(map[i]);
+               pending_vaddrs[vaddr_pagenr(req, seg)] =
+                       = gnttab_map_vaddr(map[i]);
 #else
-                       phys_to_machine_mapping[__pa(MMAP_VADDR(
-                               pending_idx, i)) >> PAGE_SHIFT] =
-                               FOREIGN_FRAME(map[i].dev_bus_addr>>PAGE_SHIFT);
+               phys_to_machine_mapping[__pa(vaddr(
+                       pending_req, i)) >> PAGE_SHIFT] =
+                       FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
 #endif
-                       fas        = req->frame_and_sects[i];
-                       seg[i].buf = map[i].dev_bus_addr | 
-                               (blkif_first_sect(fas) << 9);
-               } else {
-                       errors++;
-               }
-       }
-
-       if (errors) {
-               DPRINTK("invalid buffer -- could not remap it\n");
-               fast_flush_area(pending_idx, nseg);
-               goto bad_descriptor;
+               fas         = req->frame_and_sects[i];
+               seg[i].buf  = map[i].dev_bus_addr | 
+                       (blkif_first_sect(fas) << 9);
        }
 
        if (vbd_translate(&preq, blkif, operation) != 0) {
@@ -409,37 +404,25 @@
                        operation == READ ? "read" : "write",
                        preq.sector_number,
                        preq.sector_number + preq.nr_sects, preq.dev); 
-               goto bad_descriptor;
-       }
-
-       pending_req = &pending_reqs[pending_idx];
-       pending_req->blkif     = blkif;
-       pending_req->id        = req->id;
-       pending_req->operation = operation;
-       pending_req->status    = BLKIF_RSP_OKAY;
-       pending_req->nr_pages  = nseg;
+               goto fail_flush;
+       }
 
        for (i = 0; i < nseg; i++) {
                if (((int)preq.sector_number|(int)seg[i].nsec) &
                    ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
                        DPRINTK("Misaligned I/O request from domain %d",
                                blkif->domid);
-                       goto cleanup_and_fail;
+                       goto fail_put_bio;
                }
 
                while ((bio == NULL) ||
                       (bio_add_page(bio,
-                                    virt_to_page(MMAP_VADDR(pending_idx, i)),
+                                    virt_to_page(vaddr(pending_req, i)),
                                     seg[i].nsec << 9,
                                     seg[i].buf & ~PAGE_MASK) == 0)) {
                        bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
-                       if (unlikely(bio == NULL)) {
-                       cleanup_and_fail:
-                               for (i = 0; i < (nbio-1); i++)
-                                       bio_put(biolist[i]);
-                               fast_flush_area(pending_idx, nseg);
-                               goto bad_descriptor;
-                       }
+                       if (unlikely(bio == NULL))
+                               goto fail_put_bio;
                 
                        bio->bi_bdev    = preq.bdev;
                        bio->bi_private = pending_req;
@@ -450,14 +433,8 @@
                preq.sector_number += seg[i].nsec;
        }
 
-       if ((q = bdev_get_queue(bio->bi_bdev)) != plugged_queue) {
-               flush_plugged_queue();
-               blk_get_queue(q);
-               plugged_queue = q;
-       }
-
+       plug_queue(blkif, bio);
        atomic_set(&pending_req->pendcnt, nbio);
-       pending_cons++;
        blkif_get(blkif);
 
        for (i = 0; i < nbio; i++)
@@ -465,8 +442,14 @@
 
        return;
 
- bad_descriptor:
+ fail_put_bio:
+       for (i = 0; i < (nbio-1); i++)
+               bio_put(biolist[i]);
+ fail_flush:
+       fast_flush_area(pending_req);
+ fail_response:
        make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+       free_req(pending_req);
 } 
 
 
@@ -498,56 +481,47 @@
        notify_remote_via_irq(blkif->irq);
 }
 
-void blkif_deschedule(blkif_t *blkif)
-{
-       remove_from_blkdev_list(blkif);
-}
-
 static int __init blkif_init(void)
 {
+       struct page *page;
        int i;
-       struct page *page;
-       int ret;
-
-       for (i = 0; i < MMAP_PAGES; i++)
-               pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
-
-       if (xen_init() < 0)
-               return -ENODEV;
+
+       mmap_pages            = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+       pending_reqs          = kmalloc(sizeof(pending_reqs[0]) *
+                                       blkif_reqs, GFP_KERNEL);
+       pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
+                                       mmap_pages, GFP_KERNEL);
+       pending_vaddrs        = kmalloc(sizeof(pending_vaddrs[0]) *
+                                       mmap_pages, GFP_KERNEL);
+       if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
+               printk("%s: out of memory\n", __FUNCTION__);
+               return -1;
+       }
 
        blkif_interface_init();
-
+       
 #ifdef __ia64__
-    {
        extern unsigned long alloc_empty_foreign_map_page_range(unsigned long 
pages);
-       int i;
-
-       mmap_vstart =  alloc_empty_foreign_map_page_range(MMAP_PAGES);
-       printk("Allocated mmap_vstart: 0x%lx\n", mmap_vstart);
-       for(i = 0; i < MMAP_PAGES; i++)
-           pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
-       BUG_ON(mmap_vstart == NULL);
-    }
-#else
-       page = balloon_alloc_empty_page_range(MMAP_PAGES);
+       mmap_vstart = (unsigned 
long)alloc_empty_foreign_map_page_range(mmap_pages);
+#else /* ! ia64 */
+       page = balloon_alloc_empty_page_range(mmap_pages);
        BUG_ON(page == NULL);
        mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
 #endif
-
-       pending_cons = 0;
-       pending_prod = MAX_PENDING_REQS;
+       printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
+              __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
+       BUG_ON(mmap_vstart == 0);
+       for (i = 0; i < mmap_pages; i++)
+               pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
+
+       memset(pending_grant_handles,  BLKBACK_INVALID_HANDLE, mmap_pages);
        memset(pending_reqs, 0, sizeof(pending_reqs));
-       for (i = 0; i < MAX_PENDING_REQS; i++)
-               pending_ring[i] = i;
+       INIT_LIST_HEAD(&pending_free);
+
+       for (i = 0; i < blkif_reqs; i++)
+               list_add_tail(&pending_reqs[i].free_list, &pending_free);
     
-       spin_lock_init(&blkio_schedule_list_lock);
-       INIT_LIST_HEAD(&blkio_schedule_list);
-
-       ret = kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES);
-       BUG_ON(ret < 0);
-
        blkif_xenbus_init();
-
        return 0;
 }
 
diff -r abbe3df33774 linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Tue Nov  8 17:39:58 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Wed Nov  9 13:45:37 2005
@@ -56,9 +56,19 @@
        /* Is this a blktap frontend */
        unsigned int     is_blktap;
 #endif
-       struct list_head blkdev_list;
        spinlock_t       blk_ring_lock;
        atomic_t         refcnt;
+
+       wait_queue_head_t   wq;
+       struct task_struct  *xenblkd;
+       atomic_t            io_pending;
+       request_queue_t     *plug;
+
+       /* statistics */
+       unsigned long       st_print;
+       int                 st_rd_req;
+       int                 st_wr_req;
+       int                 st_oo_req;
 
        struct work_struct free_work;
 
@@ -97,11 +107,10 @@
 
 void blkif_interface_init(void);
 
-void blkif_deschedule(blkif_t *blkif);
-
 void blkif_xenbus_init(void);
 
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+int blkif_schedule(void *arg);
 
 #endif /* __BLKIF__BACKEND__COMMON_H__ */
 
diff -r abbe3df33774 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Tue Nov  8 
17:39:58 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Wed Nov  9 
13:45:37 2005
@@ -24,6 +24,8 @@
        blkif->status = DISCONNECTED;
        spin_lock_init(&blkif->blk_ring_lock);
        atomic_set(&blkif->refcnt, 1);
+       init_waitqueue_head(&blkif->wq);
+       blkif->st_print = jiffies;
 
        return blkif;
 }
diff -r abbe3df33774 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue Nov  8 17:39:58 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Wed Nov  9 13:45:37 2005
@@ -17,6 +17,7 @@
 */
 #include <stdarg.h>
 #include <linux/module.h>
+#include <linux/kthread.h>
 #include <asm-xen/xenbus.h>
 #include "common.h"
 
@@ -46,8 +47,11 @@
        if (be->watch.node)
                unregister_xenbus_watch(&be->watch);
        unregister_xenbus_watch(&be->backend_watch);
-       if (be->blkif)
+       if (be->blkif) {
+               if (be->blkif->xenblkd)
+                       kthread_stop(be->blkif->xenblkd);
                blkif_put(be->blkif);
+       }
        if (be->frontpath)
                kfree(be->frontpath);
        kfree(be);
@@ -198,6 +202,16 @@
                        be->blkif = NULL;
                        xenbus_dev_error(dev, err,
                                         "creating vbd structure");
+                       return;
+               }
+
+               be->blkif->xenblkd = kthread_run(blkif_schedule, be->blkif,
+                                                "xenblkd %d/%04lx",
+                                                be->blkif->domid, be->pdev);
+               if (IS_ERR(be->blkif->xenblkd)) {
+                       err = PTR_ERR(be->blkif->xenblkd);
+                       be->blkif->xenblkd = NULL;
+                       xenbus_dev_error(dev, err, "start xenblkd");
                        return;
                }
 
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>