WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 3 of 4] blktap2: Separate tapdisk raw I/O into differ

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 3 of 4] blktap2: Separate tapdisk raw I/O into different backends
From: Daniel Stodden <daniel.stodden@xxxxxxxxxx>
Date: Thu, 28 Jan 2010 22:37:45 -0000
Delivery-date: Thu, 28 Jan 2010 14:41:22 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <patchbomb.1264718262@xxxxxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <patchbomb.1264718262@xxxxxxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mercurial-patchbomb/1.3.1
# HG changeset patch
# User Daniel Stodden <daniel.stodden@xxxxxxxxxx>
# Date 1264717794 28800
# Node ID b816b4a752d8d2dcc6e7e01ad0cf75404932d94b
# Parent  2439e941318745b333e63493af247b9558ddebc2
blktap2: Separate tapdisk raw I/O into different backends.

Hide tapdisk support for different raw I/O interfaces behind a new
struct tio. Libaio remains to dominate the interface, requiring
everyone to dispatch iocb/ioevent structs.

Backends:
 - lio:  Kernel AIO via libaio.
 - rwio: Canonical read/write() mode.

Misc:
 - Fixes a bug in tapdisk-vbd which locks up the sync io mode.
 - Wants a PERROR macro in blktaplib.h
 - Removes dead code in qcow2raw to make it link again.

Signed-off-by: Daniel Stodden <daniel.stodden@xxxxxxxxxx>
Signed-off-by: Jake Wires <jake.wires@xxxxxxxxxx>

diff -r 2439e9413187 -r b816b4a752d8 tools/blktap2/drivers/tapdisk-queue.c
--- a/tools/blktap2/drivers/tapdisk-queue.c     Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-queue.c     Thu Jan 28 14:29:54 2010 -0800
@@ -141,7 +141,7 @@
         * use a private linked list to keep track
         * of the tiocbs we're cancelling. 
         */
-       tiocb  = (struct tiocb *)queue->iocbs[0]->data;
+       tiocb  = queue->iocbs[0]->data;
        queued = queue->queued;
        queue->queued = 0;
 
@@ -165,8 +165,40 @@
        return cancel_tiocbs(queue, err);
 }
 
+/*
+ * rwio
+ */
+
+struct rwio {
+       struct io_event *aio_events;
+};
+
+static void
+tapdisk_rwio_destroy(struct tqueue *queue)
+{
+       struct rwio *rwio = queue->tio_data;
+
+       if (rwio->aio_events) {
+               free(rwio->aio_events);
+               rwio->aio_events = NULL;
+       }
+}
+
+static int
+tapdisk_rwio_setup(struct tqueue *queue, int size)
+{
+       struct rwio *rwio = queue->tio_data;
+       int err;
+
+       rwio->aio_events = calloc(size, sizeof(struct io_event));
+       if (!rwio->aio_events)
+               return -errno;
+
+       return 0;
+}
+
 static inline ssize_t
-iocb_rw(struct iocb *iocb)
+tapdisk_rwio_rw(const struct iocb *iocb)
 {
        int fd        = iocb->aio_fildes;
        char *buf     = iocb->u.c.buf;
@@ -177,7 +209,7 @@
 
        if (lseek(fd, off, SEEK_SET) == (off_t)-1)
                return -errno;
-       
+
        if (atomicio(func, fd, buf, size) != size)
                return -errno;
 
@@ -185,8 +217,9 @@
 }
 
 static int
-io_synchronous_rw(struct tqueue *queue)
+tapdisk_rwio_submit(struct tqueue *queue)
 {
+       struct rwio *rwio = queue->tio_data;
        int i, merged, split;
        struct iocb *iocb;
        struct tiocb *tiocb;
@@ -201,18 +234,18 @@
        queue->queued = 0;
 
        for (i = 0; i < merged; i++) {
-               ep      = queue->aio_events + i;
+               ep      = rwio->aio_events + i;
                iocb    = queue->iocbs[i];
                ep->obj = iocb;
-               ep->res = iocb_rw(iocb);
+               ep->res = tapdisk_rwio_rw(iocb);
        }
 
-       split = io_split(&queue->opioctx, queue->aio_events, merged);
-       tapdisk_filter_events(queue->filter, queue->aio_events, split);
+       split = io_split(&queue->opioctx, rwio->aio_events, merged);
+       tapdisk_filter_events(queue->filter, rwio->aio_events, split);
 
-       for (i = split, ep = queue->aio_events; i-- > 0; ep++) {
+       for (i = split, ep = rwio->aio_events; i-- > 0; ep++) {
                iocb  = ep->obj;
-               tiocb = (struct tiocb *)iocb->data;
+               tiocb = iocb->data;
                complete_tiocb(queue, tiocb, ep->res);
        }
 
@@ -221,65 +254,258 @@
        return split;
 }
 
-static void tapdisk_tiocb_event(event_id_t id, char mode, void *private);
+static const struct tio td_tio_rwio = {
+       .name        = "rwio",
+       .data_size   = 0,
+       .tio_setup   = NULL,
+       .tio_destroy = NULL,
+       .tio_submit  = tapdisk_rwio_submit
+};
+
+/*
+ * libaio
+ */
+
+struct lio {
+       io_context_t     aio_ctx;
+       struct io_event *aio_events;
+
+       int              poll_fd;
+       int              event_id;
+};
+
+static void
+tapdisk_lio_destroy(struct tqueue *queue)
+{
+       struct lio *lio = queue->tio_data;
+
+       if (!lio)
+               return;
+
+       if (lio->event_id >= 0) {
+               tapdisk_server_unregister_event(lio->event_id);
+               lio->event_id = -1;
+       }
+
+       if (lio->aio_ctx) {
+               io_destroy(lio->aio_ctx);
+               lio->aio_ctx = NULL;
+       }
+
+       if (lio->aio_events) {
+               free(lio->aio_events);
+               lio->aio_events = NULL;
+       }
+}
+
+static void
+tapdisk_lio_event(event_id_t id, char mode, void *private)
+{
+       struct tqueue *queue = private;
+       struct lio *lio;
+       int i, ret, split;
+       struct iocb *iocb;
+       struct tiocb *tiocb;
+       struct io_event *ep;
+
+       lio   = queue->tio_data;
+       ret   = io_getevents(lio->aio_ctx, 0,
+                            queue->size, lio->aio_events, NULL);
+       split = io_split(&queue->opioctx, lio->aio_events, ret);
+       tapdisk_filter_events(queue->filter, lio->aio_events, split);
+
+       DBG("events: %d, tiocbs: %d\n", ret, split);
+
+       queue->iocbs_pending  -= ret;
+       queue->tiocbs_pending -= split;
+
+       for (i = split, ep = lio->aio_events; i-- > 0; ep++) {
+               iocb  = ep->obj;
+               tiocb = iocb->data;
+               complete_tiocb(queue, tiocb, ep->res);
+       }
+
+       queue_deferred_tiocbs(queue);
+}
+
+static int
+tapdisk_lio_setup(struct tqueue *queue, int qlen)
+{
+       struct lio *lio = queue->tio_data;
+       size_t sz;
+       int err;
+
+       lio->event_id = -1;
+       lio->aio_ctx  = REQUEST_ASYNC_FD;
+
+       lio->poll_fd = io_setup(qlen, &lio->aio_ctx);
+       err = lio->poll_fd;
+       if (err < 0) {
+               lio->aio_ctx = NULL;
+
+               if (err == -EAGAIN)
+                       goto fail_rsv;
+
+               goto fail_fd;
+       }
+
+       lio->event_id =
+               tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
+                                             lio->poll_fd, 0,
+                                             tapdisk_lio_event,
+                                             queue);
+       err = lio->event_id;
+       if (err < 0)
+               goto fail;
+
+       lio->aio_events = calloc(qlen, sizeof(struct io_event));
+       if (!lio->aio_events) {
+               err = -errno;
+               goto fail;
+       }
+
+       return 0;
+
+fail:
+       tapdisk_lio_destroy(queue);
+       return err;
+
+fail_rsv:
+       DPRINTF("Couldn't setup AIO context. If you are trying to "
+               "concurrently use a large number of blktap-based disks, you may 
"
+               "need to increase the system-wide aio request limit. "
+               "(e.g. 'echo 1048576 > /proc/sys/fs/aio-max-nr')\n");
+       goto fail;
+
+fail_fd:
+       DPRINTF("Couldn't get fd for AIO poll support. This is probably "
+               "because your kernel does not have  the aio-poll patch "
+               "applied.\n");
+       goto fail;
+}
+
+static int
+tapdisk_lio_submit(struct tqueue *queue)
+{
+       struct lio *lio = queue->tio_data;
+       int merged, submitted, err = 0;
+
+       if (!queue->queued)
+               return 0;
+
+       tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued);
+       merged    = io_merge(&queue->opioctx, queue->iocbs, queue->queued);
+       submitted = io_submit(lio->aio_ctx, merged, queue->iocbs);
+
+       DBG("queued: %d, merged: %d, submitted: %d\n",
+           queue->queued, merged, submitted);
+
+       if (submitted < 0) {
+               err = submitted;
+               submitted = 0;
+       } else if (submitted < merged)
+               err = -EIO;
+
+       queue->iocbs_pending  += submitted;
+       queue->tiocbs_pending += queue->queued;
+       queue->queued          = 0;
+
+       if (err)
+               queue->tiocbs_pending -= 
+                       fail_tiocbs(queue, submitted, merged, err);
+
+       return submitted;
+}
+
+static const struct tio td_tio_lio = {
+       .name        = "lio",
+       .data_size   = sizeof(struct lio),
+       .tio_setup   = tapdisk_lio_setup,
+       .tio_destroy = tapdisk_lio_destroy,
+       .tio_submit  = tapdisk_lio_submit,
+};
+
+static void
+tapdisk_queue_free_io(struct tqueue *queue)
+{
+       if (queue->tio) {
+               if (queue->tio->tio_destroy)
+                       queue->tio->tio_destroy(queue);
+               queue->tio = NULL;
+       }
+
+       if (queue->tio_data) {
+               free(queue->tio_data);
+               queue->tio_data = NULL;
+       }
+}
+
+static int
+tapdisk_queue_init_io(struct tqueue *queue, int drv)
+{
+       const struct tio *tio;
+       int err;
+
+       switch (drv) {
+       case TIO_DRV_LIO:
+               tio = &td_tio_lio;
+               break;
+       case TIO_DRV_RWIO:
+               tio = &td_tio_rwio;
+               break;
+       default:
+               err = -EINVAL;
+               goto fail;
+       }
+
+       queue->tio_data = calloc(1, tio->data_size);
+       if (!queue->tio_data) {
+               PERROR("malloc(%zu)", tio->data_size);
+               err = -errno;
+               goto fail;
+       }
+
+       queue->tio = tio;
+
+       if (tio->tio_setup) {
+               err = tio->tio_setup(queue, queue->size);
+               if (err)
+                       goto fail;
+       }
+
+       DPRINTF("I/O queue driver: %s\n", tio->name);
+
+       return 0;
+
+fail:
+       tapdisk_queue_free_io(queue);
+       return err;
+}
 
 int
 tapdisk_init_queue(struct tqueue *queue, int size,
-                  int sync, struct tfilter *filter)
+                  int drv, struct tfilter *filter)
 {
        int i, err;
 
        memset(queue, 0, sizeof(struct tqueue));
 
        queue->size   = size;
-       queue->sync   = sync;
        queue->filter = filter;
 
-       queue->event   = -1;
-       queue->aio_ctx = NULL;
-
        if (!size)
                return 0;
 
-       if (!sync) {
-               queue->aio_ctx = REQUEST_ASYNC_FD;
-               queue->poll_fd = io_setup(size, &queue->aio_ctx);
-               err = queue->poll_fd;
-               if (err < 0) {
-                       if (err == -EAGAIN)
-                               DPRINTF("Couldn't setup AIO context.  If you "
-                                       "are trying to concurrently use a "
-                                       "large number of blktap-based disks, "
-                                       "you may need to increase the "
-                                       "system-wide aio request limit. "
-                                       "(e.g. 'echo 1048576 > /proc/sys/fs/"
-                                       "aio-max-nr')\n");
-                       else
-                               DPRINTF("Couldn't get fd for AIO poll "
-                                       "support.  This is probably because "
-                                       "your kernel does not have the "
-                                       "aio-poll patch applied.\n");
-                       queue->aio_ctx = NULL;
-                       goto fail;
-               }
+       err = tapdisk_queue_init_io(queue, drv);
+       if (err)
+               goto fail;
 
-               queue->event =
-                       tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
-                                                     queue->poll_fd, 0,
-                                                     tapdisk_tiocb_event,
-                                                     queue);
-               err = queue->event;
-               if (err < 0)
-                       goto fail;
-
+       queue->iocbs = calloc(size, sizeof(struct iocb *));
+       if (!queue->iocbs) {
+               err = -errno;
+               goto fail;
        }
 
-       err               = -ENOMEM;
-       queue->iocbs      = calloc(size, sizeof(struct iocb *));
-       queue->aio_events = calloc(size, sizeof(struct io_event));
-       if (!queue->iocbs || !queue->aio_events)
-               goto fail;
-
        err = opio_init(&queue->opioctx, size);
        if (err)
                goto fail;
@@ -294,22 +520,11 @@
 void
 tapdisk_free_queue(struct tqueue *queue)
 {
-       if (queue->event >= 0) {
-               tapdisk_server_unregister_event(queue->event);
-               queue->event = -1;
-       }
-
-       if (queue->aio_ctx) {
-               io_destroy(queue->aio_ctx);
-               queue->aio_ctx = NULL;
-       }
+       tapdisk_queue_free_io(queue);
 
        free(queue->iocbs);
        queue->iocbs = NULL;
 
-       free(queue->aio_events);
-       queue->aio_events = NULL;
-
        opio_free(&queue->opioctx);
 }
 
@@ -319,9 +534,9 @@
        struct tiocb *tiocb = queue->deferred.head;
 
        WARN("TAPDISK QUEUE:\n");
-       WARN("size: %d, sync: %d, queued: %d, iocbs_pending: %d, "
+       WARN("size: %d, tio: %s, queued: %d, iocbs_pending: %d, "
             "tiocbs_pending: %d, tiocbs_deferred: %d, deferrals: %"PRIx64"\n",
-            queue->size, queue->sync, queue->queued, queue->iocbs_pending,
+            queue->size, queue->tio->name, queue->queued, queue->iocbs_pending,
             queue->tiocbs_pending, queue->tiocbs_deferred, queue->deferrals);
 
        if (tiocb) {
@@ -362,42 +577,14 @@
                defer_tiocb(queue, tiocb);
 }
 
+
 /*
  * fail_tiocbs may queue more tiocbs
  */
 int
 tapdisk_submit_tiocbs(struct tqueue *queue)
 {
-       int merged, submitted, err = 0;
-
-       if (!queue->queued)
-               return 0;
-
-       if (queue->sync)
-               return io_synchronous_rw(queue);
-
-       tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued);
-       merged    = io_merge(&queue->opioctx, queue->iocbs, queue->queued);
-       submitted = io_submit(queue->aio_ctx, merged, queue->iocbs);
-
-       DBG("queued: %d, merged: %d, submitted: %d\n",
-           queue->queued, merged, submitted);
-
-       if (submitted < 0) {
-               err = submitted;
-               submitted = 0;
-       } else if (submitted < merged)
-               err = -EIO;
-
-       queue->iocbs_pending  += submitted;
-       queue->tiocbs_pending += queue->queued;
-       queue->queued          = 0;
-
-       if (err)
-               queue->tiocbs_pending -= 
-                       fail_tiocbs(queue, submitted, merged, err);
-
-       return submitted;
+       return queue->tio->tio_submit(queue);
 }
 
 int
@@ -412,40 +599,6 @@
        return submitted;
 }
 
-static void
-tapdisk_complete_tiocbs(struct tqueue *queue)
-{
-       int i, ret, split;
-       struct iocb *iocb;
-       struct tiocb *tiocb;
-       struct io_event *ep;
-
-       ret   = io_getevents(queue->aio_ctx, 0,
-                            queue->size, queue->aio_events, NULL);
-       split = io_split(&queue->opioctx, queue->aio_events, ret);
-       tapdisk_filter_events(queue->filter, queue->aio_events, split);
-
-       DBG("events: %d, tiocbs: %d\n", ret, split);
-
-       queue->iocbs_pending  -= ret;
-       queue->tiocbs_pending -= split;
-
-       for (i = split, ep = queue->aio_events; i-- > 0; ep++) {
-               iocb  = ep->obj;
-               tiocb = (struct tiocb *)iocb->data;
-               complete_tiocb(queue, tiocb, ep->res);
-       }
-
-       queue_deferred_tiocbs(queue);
-}
-
-static void
-tapdisk_tiocb_event(event_id_t id, char mode, void *private)
-{
-       struct tqueue *queue = private;
-       tapdisk_complete_tiocbs(queue);
-}
-
 /*
  * cancel_tiocbs may queue more tiocbs
  */
diff -r 2439e9413187 -r b816b4a752d8 tools/blktap2/drivers/tapdisk-queue.h
--- a/tools/blktap2/drivers/tapdisk-queue.h     Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-queue.h     Thu Jan 28 14:29:54 2010 -0800
@@ -55,16 +55,14 @@
 
 struct tqueue {
        int                   size;
-       int                   sync;
 
-       int                   poll_fd;
-       event_id_t            event;
-       io_context_t          aio_ctx;
+       const struct tio     *tio;
+       void                 *tio_data;
+
        struct opioctx        opioctx;
 
        int                   queued;
        struct iocb         **iocbs;
-       struct io_event      *aio_events;
 
        /* number of iocbs pending in the aio layer */
        int                   iocbs_pending;
@@ -86,6 +84,20 @@
        uint64_t              deferrals;
 };
 
+struct tio {
+       const char           *name;
+       size_t                data_size;
+
+       int  (*tio_setup)    (struct tqueue *queue, int qlen);
+       void (*tio_destroy)  (struct tqueue *queue);
+       int  (*tio_submit)   (struct tqueue *queue);
+};
+
+enum {
+       TIO_DRV_LIO     = 1,
+       TIO_DRV_RWIO    = 2,
+};
+
 /*
  * Interface for request producer (i.e., tapdisk)
  * NB: the following functions may cause additional tiocbs to be queued:
@@ -99,7 +111,7 @@
 #define tapdisk_queue_empty(q) ((q)->queued == 0)
 #define tapdisk_queue_full(q)  \
        (((q)->tiocbs_pending + (q)->queued) >= (q)->size)
-int tapdisk_init_queue(struct tqueue *, int size, int sync, struct tfilter *);
+int tapdisk_init_queue(struct tqueue *, int size, int drv, struct tfilter *);
 void tapdisk_free_queue(struct tqueue *);
 void tapdisk_debug_queue(struct tqueue *);
 void tapdisk_queue_tiocb(struct tqueue *, struct tiocb *);
diff -r 2439e9413187 -r b816b4a752d8 tools/blktap2/drivers/tapdisk-server.c
--- a/tools/blktap2/drivers/tapdisk-server.c    Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-server.c    Thu Jan 28 14:29:54 2010 -0800
@@ -236,7 +236,8 @@
 static int
 tapdisk_server_init_aio(void)
 {
-       return tapdisk_init_queue(&server.aio_queue, TAPDISK_TIOCBS, 0, NULL);
+       return tapdisk_init_queue(&server.aio_queue, TAPDISK_TIOCBS,
+                                 TIO_DRV_LIO, NULL);
 }
 
 static void
diff -r 2439e9413187 -r b816b4a752d8 tools/blktap2/drivers/tapdisk-vbd.c
--- a/tools/blktap2/drivers/tapdisk-vbd.c       Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-vbd.c       Thu Jan 28 14:29:54 2010 -0800
@@ -1260,6 +1260,8 @@
        int n;
        td_ring_t *ring;
 
+       tapdisk_vbd_check_state(vbd);
+
        ring = &vbd->ring;
        if (!ring->sring)
                return 0;
diff -r 2439e9413187 -r b816b4a752d8 tools/blktap2/include/blktaplib.h
--- a/tools/blktap2/include/blktaplib.h Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/include/blktaplib.h Thu Jan 28 14:29:54 2010 -0800
@@ -43,6 +43,7 @@
 #endif
 
 #define EPRINTF(_f, _a...) syslog(LOG_ERR, "tap-err:%s: " _f, __func__, ##_a)
+#define PERROR(_f, _a...)  EPRINTF(_f ": %s", ##_a, strerror(errno))
 
 #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, XC_PAGE_SIZE)
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel