# HG changeset patch
# User Daniel Stodden <dns@xxxxxxxxxxxx>
# Date 1264750223 28800
# Node ID 2b8c81d25096575472974f1e81829b73f0fe7d69
# Parent 9746b010e2bf495a1c93c37ab98f9d1fdde559df
blktap2: Separate tapdisk raw I/O into different backends.
Hide tapdisk support for different raw I/O interfaces behind a new
struct tio. Libaio remains to dominate the interface, requiring
everyone to dispatch iocb/ioevent structs.
Backends:
- lio: Kernel AIO via libaio.
- rwio: Canonical read/write() mode.
Misc:
- Fixes a bug in tapdisk-vbd which locks up the sync io mode.
- Wants a PERROR macro in blktaplib.h
- Removes dead code in qcow2raw to make it link again.
Signed-off-by: Daniel Stodden <daniel.stodden@xxxxxxxxxx>
Signed-off-by: Jake Wires <jake.wires@xxxxxxxxxx>
diff -r 9746b010e2bf -r 2b8c81d25096 tools/blktap2/drivers/tapdisk-queue.c
--- a/tools/blktap2/drivers/tapdisk-queue.c Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-queue.c Thu Jan 28 23:30:23 2010 -0800
@@ -141,7 +141,7 @@
* use a private linked list to keep track
* of the tiocbs we're cancelling.
*/
- tiocb = (struct tiocb *)queue->iocbs[0]->data;
+ tiocb = queue->iocbs[0]->data;
queued = queue->queued;
queue->queued = 0;
@@ -165,8 +165,40 @@
return cancel_tiocbs(queue, err);
}
+/*
+ * rwio
+ */
+
+struct rwio {
+ struct io_event *aio_events;
+};
+
+static void
+tapdisk_rwio_destroy(struct tqueue *queue)
+{
+ struct rwio *rwio = queue->tio_data;
+
+ if (rwio->aio_events) {
+ free(rwio->aio_events);
+ rwio->aio_events = NULL;
+ }
+}
+
+static int
+tapdisk_rwio_setup(struct tqueue *queue, int size)
+{
+ struct rwio *rwio = queue->tio_data;
+ int err;
+
+ rwio->aio_events = calloc(size, sizeof(struct io_event));
+ if (!rwio->aio_events)
+ return -errno;
+
+ return 0;
+}
+
static inline ssize_t
-iocb_rw(struct iocb *iocb)
+tapdisk_rwio_rw(const struct iocb *iocb)
{
int fd = iocb->aio_fildes;
char *buf = iocb->u.c.buf;
@@ -177,7 +209,7 @@
if (lseek(fd, off, SEEK_SET) == (off_t)-1)
return -errno;
-
+
if (atomicio(func, fd, buf, size) != size)
return -errno;
@@ -185,8 +217,9 @@
}
static int
-io_synchronous_rw(struct tqueue *queue)
+tapdisk_rwio_submit(struct tqueue *queue)
{
+ struct rwio *rwio = queue->tio_data;
int i, merged, split;
struct iocb *iocb;
struct tiocb *tiocb;
@@ -201,18 +234,18 @@
queue->queued = 0;
for (i = 0; i < merged; i++) {
- ep = queue->aio_events + i;
+ ep = rwio->aio_events + i;
iocb = queue->iocbs[i];
ep->obj = iocb;
- ep->res = iocb_rw(iocb);
+ ep->res = tapdisk_rwio_rw(iocb);
}
- split = io_split(&queue->opioctx, queue->aio_events, merged);
- tapdisk_filter_events(queue->filter, queue->aio_events, split);
+ split = io_split(&queue->opioctx, rwio->aio_events, merged);
+ tapdisk_filter_events(queue->filter, rwio->aio_events, split);
- for (i = split, ep = queue->aio_events; i-- > 0; ep++) {
+ for (i = split, ep = rwio->aio_events; i-- > 0; ep++) {
iocb = ep->obj;
- tiocb = (struct tiocb *)iocb->data;
+ tiocb = iocb->data;
complete_tiocb(queue, tiocb, ep->res);
}
@@ -221,65 +254,258 @@
return split;
}
-static void tapdisk_tiocb_event(event_id_t id, char mode, void *private);
+static const struct tio td_tio_rwio = {
+ .name = "rwio",
+ .data_size = 0,
+ .tio_setup = NULL,
+ .tio_destroy = NULL,
+ .tio_submit = tapdisk_rwio_submit
+};
+
+/*
+ * libaio
+ */
+
+struct lio {
+ io_context_t aio_ctx;
+ struct io_event *aio_events;
+
+ int poll_fd;
+ int event_id;
+};
+
+static void
+tapdisk_lio_destroy(struct tqueue *queue)
+{
+ struct lio *lio = queue->tio_data;
+
+ if (!lio)
+ return;
+
+ if (lio->event_id >= 0) {
+ tapdisk_server_unregister_event(lio->event_id);
+ lio->event_id = -1;
+ }
+
+ if (lio->aio_ctx) {
+ io_destroy(lio->aio_ctx);
+ lio->aio_ctx = NULL;
+ }
+
+ if (lio->aio_events) {
+ free(lio->aio_events);
+ lio->aio_events = NULL;
+ }
+}
+
+static void
+tapdisk_lio_event(event_id_t id, char mode, void *private)
+{
+ struct tqueue *queue = private;
+ struct lio *lio;
+ int i, ret, split;
+ struct iocb *iocb;
+ struct tiocb *tiocb;
+ struct io_event *ep;
+
+ lio = queue->tio_data;
+ ret = io_getevents(lio->aio_ctx, 0,
+ queue->size, lio->aio_events, NULL);
+ split = io_split(&queue->opioctx, lio->aio_events, ret);
+ tapdisk_filter_events(queue->filter, lio->aio_events, split);
+
+ DBG("events: %d, tiocbs: %d\n", ret, split);
+
+ queue->iocbs_pending -= ret;
+ queue->tiocbs_pending -= split;
+
+ for (i = split, ep = lio->aio_events; i-- > 0; ep++) {
+ iocb = ep->obj;
+ tiocb = iocb->data;
+ complete_tiocb(queue, tiocb, ep->res);
+ }
+
+ queue_deferred_tiocbs(queue);
+}
+
+static int
+tapdisk_lio_setup(struct tqueue *queue, int qlen)
+{
+ struct lio *lio = queue->tio_data;
+ size_t sz;
+ int err;
+
+ lio->event_id = -1;
+ lio->aio_ctx = REQUEST_ASYNC_FD;
+
+ lio->poll_fd = io_setup(qlen, &lio->aio_ctx);
+ err = lio->poll_fd;
+ if (err < 0) {
+ lio->aio_ctx = NULL;
+
+ if (err == -EAGAIN)
+ goto fail_rsv;
+
+ goto fail_fd;
+ }
+
+ lio->event_id =
+ tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
+ lio->poll_fd, 0,
+ tapdisk_lio_event,
+ queue);
+ err = lio->event_id;
+ if (err < 0)
+ goto fail;
+
+ lio->aio_events = calloc(qlen, sizeof(struct io_event));
+ if (!lio->aio_events) {
+ err = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ tapdisk_lio_destroy(queue);
+ return err;
+
+fail_rsv:
+ DPRINTF("Couldn't setup AIO context. If you are trying to "
+ "concurrently use a large number of blktap-based disks, you may
"
+ "need to increase the system-wide aio request limit. "
+ "(e.g. 'echo 1048576 > /proc/sys/fs/aio-max-nr')\n");
+ goto fail;
+
+fail_fd:
+ DPRINTF("Couldn't get fd for AIO poll support. This is probably "
+ "because your kernel does not have the aio-poll patch "
+ "applied.\n");
+ goto fail;
+}
+
+static int
+tapdisk_lio_submit(struct tqueue *queue)
+{
+ struct lio *lio = queue->tio_data;
+ int merged, submitted, err = 0;
+
+ if (!queue->queued)
+ return 0;
+
+ tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued);
+ merged = io_merge(&queue->opioctx, queue->iocbs, queue->queued);
+ submitted = io_submit(lio->aio_ctx, merged, queue->iocbs);
+
+ DBG("queued: %d, merged: %d, submitted: %d\n",
+ queue->queued, merged, submitted);
+
+ if (submitted < 0) {
+ err = submitted;
+ submitted = 0;
+ } else if (submitted < merged)
+ err = -EIO;
+
+ queue->iocbs_pending += submitted;
+ queue->tiocbs_pending += queue->queued;
+ queue->queued = 0;
+
+ if (err)
+ queue->tiocbs_pending -=
+ fail_tiocbs(queue, submitted, merged, err);
+
+ return submitted;
+}
+
+static const struct tio td_tio_lio = {
+ .name = "lio",
+ .data_size = sizeof(struct lio),
+ .tio_setup = tapdisk_lio_setup,
+ .tio_destroy = tapdisk_lio_destroy,
+ .tio_submit = tapdisk_lio_submit,
+};
+
+static void
+tapdisk_queue_free_io(struct tqueue *queue)
+{
+ if (queue->tio) {
+ if (queue->tio->tio_destroy)
+ queue->tio->tio_destroy(queue);
+ queue->tio = NULL;
+ }
+
+ if (queue->tio_data) {
+ free(queue->tio_data);
+ queue->tio_data = NULL;
+ }
+}
+
+static int
+tapdisk_queue_init_io(struct tqueue *queue, int drv)
+{
+ const struct tio *tio;
+ int err;
+
+ switch (drv) {
+ case TIO_DRV_LIO:
+ tio = &td_tio_lio;
+ break;
+ case TIO_DRV_RWIO:
+ tio = &td_tio_rwio;
+ break;
+ default:
+ err = -EINVAL;
+ goto fail;
+ }
+
+ queue->tio_data = calloc(1, tio->data_size);
+ if (!queue->tio_data) {
+ PERROR("malloc(%zu)", tio->data_size);
+ err = -errno;
+ goto fail;
+ }
+
+ queue->tio = tio;
+
+ if (tio->tio_setup) {
+ err = tio->tio_setup(queue, queue->size);
+ if (err)
+ goto fail;
+ }
+
+ DPRINTF("I/O queue driver: %s\n", tio->name);
+
+ return 0;
+
+fail:
+ tapdisk_queue_free_io(queue);
+ return err;
+}
int
tapdisk_init_queue(struct tqueue *queue, int size,
- int sync, struct tfilter *filter)
+ int drv, struct tfilter *filter)
{
int i, err;
memset(queue, 0, sizeof(struct tqueue));
queue->size = size;
- queue->sync = sync;
queue->filter = filter;
- queue->event = -1;
- queue->aio_ctx = NULL;
-
if (!size)
return 0;
- if (!sync) {
- queue->aio_ctx = REQUEST_ASYNC_FD;
- queue->poll_fd = io_setup(size, &queue->aio_ctx);
- err = queue->poll_fd;
- if (err < 0) {
- if (err == -EAGAIN)
- DPRINTF("Couldn't setup AIO context. If you "
- "are trying to concurrently use a "
- "large number of blktap-based disks, "
- "you may need to increase the "
- "system-wide aio request limit. "
- "(e.g. 'echo 1048576 > /proc/sys/fs/"
- "aio-max-nr')\n");
- else
- DPRINTF("Couldn't get fd for AIO poll "
- "support. This is probably because "
- "your kernel does not have the "
- "aio-poll patch applied.\n");
- queue->aio_ctx = NULL;
- goto fail;
- }
+ err = tapdisk_queue_init_io(queue, drv);
+ if (err)
+ goto fail;
- queue->event =
- tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
- queue->poll_fd, 0,
- tapdisk_tiocb_event,
- queue);
- err = queue->event;
- if (err < 0)
- goto fail;
-
+ queue->iocbs = calloc(size, sizeof(struct iocb *));
+ if (!queue->iocbs) {
+ err = -errno;
+ goto fail;
}
- err = -ENOMEM;
- queue->iocbs = calloc(size, sizeof(struct iocb *));
- queue->aio_events = calloc(size, sizeof(struct io_event));
- if (!queue->iocbs || !queue->aio_events)
- goto fail;
-
err = opio_init(&queue->opioctx, size);
if (err)
goto fail;
@@ -294,22 +520,11 @@
void
tapdisk_free_queue(struct tqueue *queue)
{
- if (queue->event >= 0) {
- tapdisk_server_unregister_event(queue->event);
- queue->event = -1;
- }
-
- if (queue->aio_ctx) {
- io_destroy(queue->aio_ctx);
- queue->aio_ctx = NULL;
- }
+ tapdisk_queue_free_io(queue);
free(queue->iocbs);
queue->iocbs = NULL;
- free(queue->aio_events);
- queue->aio_events = NULL;
-
opio_free(&queue->opioctx);
}
@@ -319,9 +534,9 @@
struct tiocb *tiocb = queue->deferred.head;
WARN("TAPDISK QUEUE:\n");
- WARN("size: %d, sync: %d, queued: %d, iocbs_pending: %d, "
+ WARN("size: %d, tio: %s, queued: %d, iocbs_pending: %d, "
"tiocbs_pending: %d, tiocbs_deferred: %d, deferrals: %"PRIx64"\n",
- queue->size, queue->sync, queue->queued, queue->iocbs_pending,
+ queue->size, queue->tio->name, queue->queued, queue->iocbs_pending,
queue->tiocbs_pending, queue->tiocbs_deferred, queue->deferrals);
if (tiocb) {
@@ -362,42 +577,14 @@
defer_tiocb(queue, tiocb);
}
+
/*
* fail_tiocbs may queue more tiocbs
*/
int
tapdisk_submit_tiocbs(struct tqueue *queue)
{
- int merged, submitted, err = 0;
-
- if (!queue->queued)
- return 0;
-
- if (queue->sync)
- return io_synchronous_rw(queue);
-
- tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued);
- merged = io_merge(&queue->opioctx, queue->iocbs, queue->queued);
- submitted = io_submit(queue->aio_ctx, merged, queue->iocbs);
-
- DBG("queued: %d, merged: %d, submitted: %d\n",
- queue->queued, merged, submitted);
-
- if (submitted < 0) {
- err = submitted;
- submitted = 0;
- } else if (submitted < merged)
- err = -EIO;
-
- queue->iocbs_pending += submitted;
- queue->tiocbs_pending += queue->queued;
- queue->queued = 0;
-
- if (err)
- queue->tiocbs_pending -=
- fail_tiocbs(queue, submitted, merged, err);
-
- return submitted;
+ return queue->tio->tio_submit(queue);
}
int
@@ -412,40 +599,6 @@
return submitted;
}
-static void
-tapdisk_complete_tiocbs(struct tqueue *queue)
-{
- int i, ret, split;
- struct iocb *iocb;
- struct tiocb *tiocb;
- struct io_event *ep;
-
- ret = io_getevents(queue->aio_ctx, 0,
- queue->size, queue->aio_events, NULL);
- split = io_split(&queue->opioctx, queue->aio_events, ret);
- tapdisk_filter_events(queue->filter, queue->aio_events, split);
-
- DBG("events: %d, tiocbs: %d\n", ret, split);
-
- queue->iocbs_pending -= ret;
- queue->tiocbs_pending -= split;
-
- for (i = split, ep = queue->aio_events; i-- > 0; ep++) {
- iocb = ep->obj;
- tiocb = (struct tiocb *)iocb->data;
- complete_tiocb(queue, tiocb, ep->res);
- }
-
- queue_deferred_tiocbs(queue);
-}
-
-static void
-tapdisk_tiocb_event(event_id_t id, char mode, void *private)
-{
- struct tqueue *queue = private;
- tapdisk_complete_tiocbs(queue);
-}
-
/*
* cancel_tiocbs may queue more tiocbs
*/
diff -r 9746b010e2bf -r 2b8c81d25096 tools/blktap2/drivers/tapdisk-queue.h
--- a/tools/blktap2/drivers/tapdisk-queue.h Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-queue.h Thu Jan 28 23:30:23 2010 -0800
@@ -55,16 +55,14 @@
struct tqueue {
int size;
- int sync;
- int poll_fd;
- event_id_t event;
- io_context_t aio_ctx;
+ const struct tio *tio;
+ void *tio_data;
+
struct opioctx opioctx;
int queued;
struct iocb **iocbs;
- struct io_event *aio_events;
/* number of iocbs pending in the aio layer */
int iocbs_pending;
@@ -86,6 +84,20 @@
uint64_t deferrals;
};
+struct tio {
+ const char *name;
+ size_t data_size;
+
+ int (*tio_setup) (struct tqueue *queue, int qlen);
+ void (*tio_destroy) (struct tqueue *queue);
+ int (*tio_submit) (struct tqueue *queue);
+};
+
+enum {
+ TIO_DRV_LIO = 1,
+ TIO_DRV_RWIO = 2,
+};
+
/*
* Interface for request producer (i.e., tapdisk)
* NB: the following functions may cause additional tiocbs to be queued:
@@ -99,7 +111,7 @@
#define tapdisk_queue_empty(q) ((q)->queued == 0)
#define tapdisk_queue_full(q) \
(((q)->tiocbs_pending + (q)->queued) >= (q)->size)
-int tapdisk_init_queue(struct tqueue *, int size, int sync, struct tfilter *);
+int tapdisk_init_queue(struct tqueue *, int size, int drv, struct tfilter *);
void tapdisk_free_queue(struct tqueue *);
void tapdisk_debug_queue(struct tqueue *);
void tapdisk_queue_tiocb(struct tqueue *, struct tiocb *);
diff -r 9746b010e2bf -r 2b8c81d25096 tools/blktap2/drivers/tapdisk-server.c
--- a/tools/blktap2/drivers/tapdisk-server.c Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-server.c Thu Jan 28 23:30:23 2010 -0800
@@ -236,7 +236,8 @@
static int
tapdisk_server_init_aio(void)
{
- return tapdisk_init_queue(&server.aio_queue, TAPDISK_TIOCBS, 0, NULL);
+ return tapdisk_init_queue(&server.aio_queue, TAPDISK_TIOCBS,
+ TIO_DRV_LIO, NULL);
}
static void
diff -r 9746b010e2bf -r 2b8c81d25096 tools/blktap2/drivers/tapdisk-vbd.c
--- a/tools/blktap2/drivers/tapdisk-vbd.c Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/drivers/tapdisk-vbd.c Thu Jan 28 23:30:23 2010 -0800
@@ -1260,6 +1260,8 @@
int n;
td_ring_t *ring;
+ tapdisk_vbd_check_state(vbd);
+
ring = &vbd->ring;
if (!ring->sring)
return 0;
diff -r 9746b010e2bf -r 2b8c81d25096 tools/blktap2/include/blktaplib.h
--- a/tools/blktap2/include/blktaplib.h Fri Nov 13 20:26:14 2009 -0800
+++ b/tools/blktap2/include/blktaplib.h Thu Jan 28 23:30:23 2010 -0800
@@ -43,6 +43,7 @@
#endif
#define EPRINTF(_f, _a...) syslog(LOG_ERR, "tap-err:%s: " _f, __func__, ##_a)
+#define PERROR(_f, _a...) EPRINTF(_f ": %s", ##_a, strerror(errno))
#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, XC_PAGE_SIZE)
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|