WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] blktap: Add fallback code to blktap for m

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] blktap: Add fallback code to blktap for missing poll-on-aio support.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 19 Jun 2007 14:42:17 -0700
Delivery-date: Tue, 19 Jun 2007 14:40:45 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Date 1182267148 -3600
# Node ID eeeb77195ac20e8e08c57a7248913bedc0d999bf
# Parent  865c4ae59be3b0aa7e375b929414244d1e5bdf74
blktap: Add fallback code to blktap for missing poll-on-aio support.

blktap requires a xen specific kernel AIO ABI which has been vetoed by
upstream in favour of another approach. Rather than include this ABI,
Fedora has been carrying a patch which makes tap:aio use a thread to
poll for aio events and notify the main thread via a pipe.

The upstream approach of allowing io_getevents() poll normal file
descriptors via epoll is still progressing:
 http://lkml.org/lkml/2007/1/3/16

but when that does make it upstream, blktap will require significant
re-working to use that approach.

In the meantime, here's a patch which uses the poll-in-a-thread
approach only if AIO poll support isn't available. It also hides the
details behind a simple abstraction and makes both tap:aio and
tap:qcow use it.

Signed-off-by: Mark McLoughlin <markmc@xxxxxxxxxx>
---
 tools/blktap/drivers/Makefile     |    1 
 tools/blktap/drivers/block-aio.c  |   49 ++++-------
 tools/blktap/drivers/block-qcow.c |   48 +++++------
 tools/blktap/drivers/tapaio.c     |  164 ++++++++++++++++++++++++++++++++++++++
 tools/blktap/drivers/tapaio.h     |   58 +++++++++++++
 5 files changed, 267 insertions(+), 53 deletions(-)

diff -r 865c4ae59be3 -r eeeb77195ac2 tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile     Tue Jun 19 16:29:22 2007 +0100
+++ b/tools/blktap/drivers/Makefile     Tue Jun 19 16:32:28 2007 +0100
@@ -35,6 +35,7 @@ BLK-OBJS  += block-ram.o
 BLK-OBJS  += block-ram.o
 BLK-OBJS  += block-qcow.o
 BLK-OBJS  += aes.o
+BLK-OBJS  += tapaio.o
 
 all: $(IBIN) qcow-util
 
diff -r 865c4ae59be3 -r eeeb77195ac2 tools/blktap/drivers/block-aio.c
--- a/tools/blktap/drivers/block-aio.c  Tue Jun 19 16:29:22 2007 +0100
+++ b/tools/blktap/drivers/block-aio.c  Tue Jun 19 16:32:28 2007 +0100
@@ -43,14 +43,7 @@
 #include <sys/ioctl.h>
 #include <linux/fs.h>
 #include "tapdisk.h"
-
-
-/**
- * We used a kernel patch to return an fd associated with the AIO context
- * so that we can concurrently poll on synchronous and async descriptors.
- * This is signalled by passing 1 as the io context to io_setup.
- */
-#define REQUEST_ASYNC_FD 1
+#include "tapaio.h"
 
 #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
 
@@ -65,14 +58,13 @@ struct tdaio_state {
        int fd;
        
        /* libaio state */
-       io_context_t       aio_ctx;
+       tap_aio_context_t  aio_ctx;
        struct iocb        iocb_list  [MAX_AIO_REQS];
        struct iocb       *iocb_free  [MAX_AIO_REQS];
        struct pending_aio pending_aio[MAX_AIO_REQS];
        int                iocb_free_count;
        struct iocb       *iocb_queue[MAX_AIO_REQS];
        int                iocb_queued;
-       int                poll_fd; /* NB: we require aio_poll support */
        struct io_event    aio_events[MAX_AIO_REQS];
 };
 
@@ -148,7 +140,7 @@ static inline void init_fds(struct disk_
        for(i = 0; i < MAX_IOFD; i++) 
                dd->io_fd[i] = 0;
 
-       dd->io_fd[0] = prv->poll_fd;
+       dd->io_fd[0] = prv->aio_ctx.pollfd;
 }
 
 /* Open the disk file and initialize aio state. */
@@ -162,12 +154,9 @@ int tdaio_open (struct disk_driver *dd, 
        /* Initialize AIO */
        prv->iocb_free_count = MAX_AIO_REQS;
        prv->iocb_queued     = 0;
-       
-       prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
-       prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx);
-
-       if (prv->poll_fd < 0) {
-               ret = prv->poll_fd;
+
+       ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS);
+       if (ret < 0) {
                 if (ret == -EAGAIN) {
                         DPRINTF("Couldn't setup AIO context.  If you are "
                                 "trying to concurrently use a large number "
@@ -176,9 +165,7 @@ int tdaio_open (struct disk_driver *dd, 
                                 "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
                                 "aio-max-nr')\n");
                 } else {
-                        DPRINTF("Couldn't get fd for AIO poll support.  This "
-                                "is probably because your kernel does not "
-                                "have the aio-poll patch applied.\n");
+                        DPRINTF("Couldn't setup AIO context.\n");
                 }
                goto done;
        }
@@ -286,7 +273,7 @@ int tdaio_submit(struct disk_driver *dd)
        if (!prv->iocb_queued)
                return 0;
 
-       ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+       ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, 
prv->iocb_queue);
        
        /* XXX: TODO: Handle error conditions here. */
        
@@ -300,7 +287,7 @@ int tdaio_close(struct disk_driver *dd)
 {
        struct tdaio_state *prv = (struct tdaio_state *)dd->private;
        
-       io_destroy(prv->aio_ctx);
+       io_destroy(prv->aio_ctx.aio_ctx);
        close(prv->fd);
 
        return 0;
@@ -308,15 +295,13 @@ int tdaio_close(struct disk_driver *dd)
 
 int tdaio_do_callbacks(struct disk_driver *dd, int sid)
 {
-       int ret, i, rsp = 0;
+       int i, nr_events, rsp = 0;
        struct io_event *ep;
        struct tdaio_state *prv = (struct tdaio_state *)dd->private;
 
-       /* Non-blocking test for completed io. */
-       ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
-                          NULL);
-                       
-       for (ep=prv->aio_events,i=ret; i-->0; ep++) {
+       nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+       for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
                struct iocb        *io  = ep->obj;
                struct pending_aio *pio;
                
@@ -327,6 +312,14 @@ int tdaio_do_callbacks(struct disk_drive
 
                prv->iocb_free[prv->iocb_free_count++] = io;
        }
+
+       if (nr_events) {
+               nr_events = tap_aio_more_events(&prv->aio_ctx);
+               goto repeat;
+       }
+
+       tap_aio_continue(&prv->aio_ctx);
+
        return rsp;
 }
 
diff -r 865c4ae59be3 -r eeeb77195ac2 tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Tue Jun 19 16:29:22 2007 +0100
+++ b/tools/blktap/drivers/block-qcow.c Tue Jun 19 16:32:28 2007 +0100
@@ -38,6 +38,7 @@
 #include "bswap.h"
 #include "aes.h"
 #include "tapdisk.h"
+#include "tapaio.h"
 
 #if 1
 #define ASSERT(_p) \
@@ -52,9 +53,6 @@
     (uint64_t)( \
         (l + (s - 1)) - ((l + (s - 1)) % s)); \
 })
-
-/******AIO DEFINES******/
-#define REQUEST_ASYNC_FD 1
 
 struct pending_aio {
         td_callback_t cb;
@@ -145,7 +143,7 @@ struct tdqcow_state {
        AES_KEY aes_encrypt_key;       /*AES key*/
        AES_KEY aes_decrypt_key;       /*AES key*/
         /* libaio state */
-        io_context_t        aio_ctx;
+        tap_aio_context_t   aio_ctx;
         int                 max_aio_reqs;
         struct iocb        *iocb_list;
         struct iocb       **iocb_free;
@@ -153,7 +151,6 @@ struct tdqcow_state {
         int                 iocb_free_count;
         struct iocb       **iocb_queue;
         int                 iocb_queued;
-        int                 poll_fd;      /* NB: we require aio_poll support */
         struct io_event    *aio_events;
 };
 
@@ -179,7 +176,7 @@ static void free_aio_state(struct disk_d
 
 static int init_aio_state(struct disk_driver *dd)
 {
-        int i;
+       int i, ret;
        struct td_state     *bs = dd->td_state;
        struct tdqcow_state  *s = (struct tdqcow_state *)dd->private;
         long     ioidx;
@@ -216,12 +213,9 @@ static int init_aio_state(struct disk_dr
                 goto fail;
         }
 
-        /*Signal kernel to create Poll FD for Asyc completion events*/
-        s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;   
-        s->poll_fd = io_setup(s->max_aio_reqs, &s->aio_ctx);
-
-       if (s->poll_fd < 0) {
-                if (s->poll_fd == -EAGAIN) {
+       ret = tap_aio_setup(&s->aio_ctx, s->aio_events, s->max_aio_reqs);
+       if (ret < 0) {
+                if (ret == -EAGAIN) {
                         DPRINTF("Couldn't setup AIO context.  If you are "
                                 "trying to concurrently use a large number "
                                 "of blktap-based disks, you may need to "
@@ -229,9 +223,7 @@ static int init_aio_state(struct disk_dr
                                 "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
                                 "aio-max-nr')\n");
                 } else {
-                        DPRINTF("Couldn't get fd for AIO poll support.  This "
-                                "is probably because your kernel does not "
-                                "have the aio-poll patch applied.\n");
+                        DPRINTF("Couldn't setup AIO context.\n");
                 }
                goto fail;
        }
@@ -845,7 +837,7 @@ static inline void init_fds(struct disk_
        for(i = 0; i < MAX_IOFD; i++) 
                dd->io_fd[i] = 0;
 
-       dd->io_fd[0] = s->poll_fd;
+       dd->io_fd[0] = s->aio_ctx.pollfd;
 }
 
 /* Open the disk file and initialize qcow state. */
@@ -1144,7 +1136,7 @@ int tdqcow_submit(struct disk_driver *dd
        if (!prv->iocb_queued)
                return 0;
 
-       ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+       ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, 
prv->iocb_queue);
 
         /* XXX: TODO: Handle error conditions here. */
 
@@ -1172,7 +1164,7 @@ int tdqcow_close(struct disk_driver *dd)
                close(fd);
        }
 
-       io_destroy(s->aio_ctx);
+       io_destroy(s->aio_ctx.aio_ctx);
        free(s->name);
        free(s->l1_table);
        free(s->l2_cache);
@@ -1184,17 +1176,15 @@ int tdqcow_close(struct disk_driver *dd)
 
 int tdqcow_do_callbacks(struct disk_driver *dd, int sid)
 {
-        int ret, i, rsp = 0,*ptr;
+        int ret, i, nr_events, rsp = 0,*ptr;
         struct io_event *ep;
         struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
 
         if (sid > MAX_IOFD) return 1;
-       
-       /* Non-blocking test for completed io. */
-        ret = io_getevents(prv->aio_ctx, 0, prv->max_aio_reqs, prv->aio_events,
-                           NULL);
-
-        for (ep = prv->aio_events, i = ret; i-- > 0; ep++) {
+
+        nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+        for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
                 struct iocb        *io  = ep->obj;
                 struct pending_aio *pio;
 
@@ -1215,6 +1205,14 @@ int tdqcow_do_callbacks(struct disk_driv
 
                 prv->iocb_free[prv->iocb_free_count++] = io;
         }
+
+        if (nr_events) {
+                nr_events = tap_aio_more_events(&prv->aio_ctx);
+                goto repeat;
+        }
+
+        tap_aio_continue(&prv->aio_ctx);
+
         return rsp;
 }
 
diff -r 865c4ae59be3 -r eeeb77195ac2 tools/blktap/drivers/tapaio.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/tapaio.c     Tue Jun 19 16:32:28 2007 +0100
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "tapaio.h"
+#include "tapdisk.h"
+#include <unistd.h>
+
+/**
+ * We used a kernel patch to return an fd associated with the AIO context
+ * so that we can concurrently poll on synchronous and async descriptors.
+ * This is signalled by passing 1 as the io context to io_setup.
+ */
+#define REQUEST_ASYNC_FD 1
+
+/*
+ * If we don't have any way to do epoll on aio events in a normal kernel,
+ * wait for aio events in a separate thread and return completion status
+ * that via a pipe that can be waited on normally.
+ *
+ * To keep locking problems between the completion thread and the submit
+ * thread to a minimum, there's a handshake which allows only one thread
+ * to be doing work on the completion queue at a time:
+ *
+ * 1) main thread sends completion thread a command via the command pipe;
+ * 2) completion thread waits for aio events and returns the number
+ *    received on the completion pipe
+ * 3) main thread processes the received ctx->aio_events events
+ * 4) loop back to 1) to let the completion thread refill the aio_events
+ *    buffer.
+ *
+ * This workaround needs to disappear once the kernel provides a single
+ * mechanism for waiting on both aio and normal fd wakeups.
+ */
+static void *
+tap_aio_completion_thread(void *arg)
+{
+       tap_aio_context_t *ctx = (tap_aio_context_t *) arg;
+       int command;
+       int nr_events;
+       int rc;
+
+       while (1) {
+               rc = read(ctx->command_fd[0], &command, sizeof(command));
+
+               do {
+                       rc = io_getevents(ctx->aio_ctx, 1,
+                                         ctx->max_aio_events, ctx->aio_events,
+                                         NULL);
+                       if (rc) {
+                               nr_events = rc;
+                               rc = write(ctx->completion_fd[1], &nr_events,
+                                          sizeof(nr_events));
+                       }
+               } while (!rc);
+       }
+}
+
+void
+tap_aio_continue(tap_aio_context_t *ctx)
+{
+        int cmd = 0;
+
+        if (!ctx->poll_in_thread)
+                return;
+
+        if (write(ctx->command_fd[1], &cmd, sizeof(cmd)) < 0)
+                DPRINTF("Cannot write to command pipe\n");
+}
+
+int
+tap_aio_setup(tap_aio_context_t *ctx,
+              struct io_event *aio_events,
+              int max_aio_events)
+{
+        int ret;
+
+        ctx->aio_events = aio_events;
+        ctx->max_aio_events = max_aio_events;
+        ctx->poll_in_thread = 0;
+
+        ctx->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
+        ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+        if (ret < 0 && ret != -EINVAL)
+                return ret;
+        else if (ret > 0) {
+                ctx->pollfd = ret;
+                return ctx->pollfd;
+        }
+
+        ctx->aio_ctx = (io_context_t) 0;
+        ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+        if (ret < 0)
+                return ret;
+
+        if ((ret = pipe(ctx->command_fd)) < 0) {
+                DPRINTF("Unable to create command pipe\n");
+                return -1;
+        }
+        if ((ret = pipe(ctx->completion_fd)) < 0) {
+                DPRINTF("Unable to create completion pipe\n");
+                return -1;
+        }
+
+        if ((ret = pthread_create(&ctx->aio_thread, NULL,
+                                  tap_aio_completion_thread, ctx)) != 0) {
+                DPRINTF("Unable to create completion thread\n");
+                return -1;
+        }
+
+        ctx->pollfd = ctx->completion_fd[0];
+        ctx->poll_in_thread = 1;
+
+        tap_aio_continue(ctx);
+
+        return 0;
+}
+
+int
+tap_aio_get_events(tap_aio_context_t *ctx)
+{
+        int nr_events = 0;
+
+        if (!ctx->poll_in_thread)
+                nr_events = io_getevents(ctx->aio_ctx, 1,
+                                         ctx->max_aio_events, ctx->aio_events, 
NULL);
+        else
+                read(ctx->completion_fd[0], &nr_events, sizeof(nr_events));
+
+        return nr_events;
+}
+
+int tap_aio_more_events(tap_aio_context_t *ctx)
+{
+        return io_getevents(ctx->aio_ctx, 0,
+                            ctx->max_aio_events, ctx->aio_events, NULL);
+}
+
+
diff -r 865c4ae59be3 -r eeeb77195ac2 tools/blktap/drivers/tapaio.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/tapaio.h     Tue Jun 19 16:32:28 2007 +0100
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __TAPAIO_H__
+#define __TAPAIO_H__
+
+#include <pthread.h>
+#include <libaio.h>
+
+struct tap_aio_context {
+        io_context_t     aio_ctx;
+
+        struct io_event *aio_events;
+        int              max_aio_events;
+
+        pthread_t        aio_thread;
+        int              command_fd[2];
+        int              completion_fd[2];
+        int              pollfd;
+        unsigned int     poll_in_thread : 1;
+};
+
+typedef struct tap_aio_context tap_aio_context_t;
+
+int  tap_aio_setup      (tap_aio_context_t *ctx,
+                         struct io_event *aio_events,
+                         int max_aio_events);
+void tap_aio_continue   (tap_aio_context_t *ctx);
+int  tap_aio_get_events (tap_aio_context_t *ctx);
+int  tap_aio_more_events(tap_aio_context_t *ctx);
+
+#endif /* __TAPAIO_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] blktap: Add fallback code to blktap for missing poll-on-aio support., Xen patchbot-unstable <=