WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] stubdom: add asynchronous disk flush support

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH] stubdom: add asynchronous disk flush support
From: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>
Date: Sat, 5 Apr 2008 14:48:13 +0200
Delivery-date: Sat, 05 Apr 2008 05:49:43 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Mail-followup-to: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.12-2006-07-14
stubdom: add asynchronous disk flush support

Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>

diff -r 4558664bea4a extras/mini-os/blkfront.c
--- a/extras/mini-os/blkfront.c Fri Apr 04 16:07:44 2008 +0100
+++ b/extras/mini-os/blkfront.c Sat Apr 05 13:25:43 2008 +0100
@@ -48,11 +48,7 @@ struct blkfront_dev {
 
     char *nodename;
     char *backend;
-    unsigned sector_size;
-    unsigned sectors;
-    int mode;
-    int barrier;
-    int flush;
+    struct blkfront_info info;
 
 #ifdef HAVE_LIBC
     int fd;
@@ -70,7 +66,7 @@ void blkfront_handler(evtchn_port_t port
     wake_up(&blkfront_queue);
 }
 
-struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned 
*sector_size, int *mode, int *info)
+struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info)
 {
     xenbus_transaction_t xbt;
     char* err;
@@ -163,9 +159,9 @@ done:
             return NULL;
         }
         if (*c == 'w')
-            *mode = dev->mode = O_RDWR;
+            dev->info.mode = O_RDWR;
         else
-            *mode = dev->mode = O_RDONLY;
+            dev->info.mode = O_RDONLY;
         free(c);
 
         snprintf(path, sizeof(path), "%s/state", dev->backend);
@@ -177,24 +173,26 @@ done:
         xenbus_unwatch_path(XBT_NIL, path);
 
         snprintf(path, sizeof(path), "%s/info", dev->backend);
-        *info = xenbus_read_integer(path);
+        dev->info.info = xenbus_read_integer(path);
 
         snprintf(path, sizeof(path), "%s/sectors", dev->backend);
         // FIXME: read_integer returns an int, so disk size limited to 1TB for 
now
-        *sectors = dev->sectors = xenbus_read_integer(path);
+        dev->info.sectors = xenbus_read_integer(path);
 
         snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
-        *sector_size = dev->sector_size = xenbus_read_integer(path);
+        dev->info.sector_size = xenbus_read_integer(path);
 
         snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
-        dev->barrier = xenbus_read_integer(path);
+        dev->info.barrier = xenbus_read_integer(path);
 
         snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
-        dev->flush = xenbus_read_integer(path);
+        dev->info.flush = xenbus_read_integer(path);
+
+        *info = dev->info;
     }
     unmask_evtchn(dev->evtchn);
 
-    printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
+    printk("%u sectors of %u bytes\n", dev->info.sectors, 
dev->info.sector_size);
     printk("**************************\n");
 
     return dev;
@@ -258,11 +256,11 @@ void blkfront_aio(struct blkfront_aiocb 
     uintptr_t start, end;
 
     // Can't io at non-sector-aligned location
-    ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
+    ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1)));
     // Can't io non-sector-sized amounts
-    ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
+    ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1)));
     // Can't io non-sector-aligned buffer
-    ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
+    ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1)));
 
     start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
     end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & 
PAGE_MASK;
@@ -280,7 +278,7 @@ void blkfront_aio(struct blkfront_aiocb 
     req->nr_segments = n;
     req->handle = dev->handle;
     req->id = (uintptr_t) aiocbp;
-    req->sector_number = aiocbp->aio_offset / dev->sector_size;
+    req->sector_number = aiocbp->aio_offset / dev->info.sector_size;
 
     for (j = 0; j < n; j++) {
        uintptr_t data = start + j * PAGE_SIZE;
@@ -292,10 +290,10 @@ void blkfront_aio(struct blkfront_aiocb 
        aiocbp->gref[j] = req->seg[j].gref =
             gnttab_grant_access(dev->dom, virtual_to_mfn(data), write);
        req->seg[j].first_sect = 0;
-       req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
+       req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1;
     }
-    req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 
dev->sector_size;
-    req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + 
aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
+    req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 
dev->info.sector_size;
+    req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + 
aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size;
 
     dev->ring.req_prod_pvt = i + 1;
 
@@ -313,6 +311,62 @@ void blkfront_aio_read(struct blkfront_a
 void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
 {
     blkfront_aio(aiocbp, 0);
+}
+
+static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op, 
uint64_t id)
+{
+    int i;
+    struct blkif_request *req;
+    int notify;
+
+    blkfront_wait_slot(dev);
+    i = dev->ring.req_prod_pvt;
+    req = RING_GET_REQUEST(&dev->ring, i);
+    req->operation = op;
+    req->nr_segments = 0;
+    req->handle = dev->handle;
+    req->id = id;
+    /* Not needed anyway, but the backend will check it */
+    req->sector_number = 0;
+    dev->ring.req_prod_pvt = i + 1;
+    wmb();
+    RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+    if (notify) notify_remote_via_evtchn(dev->evtchn);
+}
+
+void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op)
+{
+    struct blkfront_dev *dev = aiocbp->aio_dev;
+    blkfront_push_operation(dev, op, (uintptr_t) aiocbp);
+}
+
+void blkfront_sync(struct blkfront_dev *dev)
+{
+    unsigned long flags;
+
+    if (dev->info.mode == O_RDWR) {
+        if (dev->info.barrier == 1)
+            blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0);
+
+        if (dev->info.flush == 1)
+            blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0);
+    }
+
+    /* Note: This won't finish if another thread enqueues requests.  */
+    local_irq_save(flags);
+    DEFINE_WAIT(w);
+    while (1) {
+       blkfront_aio_poll(dev);
+       if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
+           break;
+
+       add_waiter(w, blkfront_queue);
+       local_irq_restore(flags);
+       schedule();
+       local_irq_save(flags);
+    }
+    remove_waiter(w);
+    local_irq_restore(flags);
 }
 
 int blkfront_aio_poll(struct blkfront_dev *dev)
@@ -337,93 +391,45 @@ moretodo:
        rsp = RING_GET_RESPONSE(&dev->ring, cons);
        nr_consumed++;
 
-        if (rsp->status != BLKIF_RSP_OKAY)
-            printk("block error %d for op %d\n", rsp->status, rsp->operation);
+        struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
+        int status = rsp->status;
+
+        if (status != BLKIF_RSP_OKAY)
+            printk("block error %d for op %d\n", status, rsp->operation);
 
         switch (rsp->operation) {
         case BLKIF_OP_READ:
         case BLKIF_OP_WRITE:
         {
-            struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
-            int status = rsp->status;
             int j;
 
             for (j = 0; j < aiocbp->n; j++)
                 gnttab_end_access(aiocbp->gref[j]);
 
-            dev->ring.rsp_cons = ++cons;
-            /* Nota: callback frees aiocbp itself */
-            aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
-            if (dev->ring.rsp_cons != cons)
-                /* We reentered, we must not continue here */
-                goto out;
             break;
         }
+
+        case BLKIF_OP_WRITE_BARRIER:
+        case BLKIF_OP_FLUSH_DISKCACHE:
+            break;
+
         default:
             printk("unrecognized block operation %d response\n", 
rsp->operation);
-        case BLKIF_OP_WRITE_BARRIER:
-        case BLKIF_OP_FLUSH_DISKCACHE:
-            dev->ring.rsp_cons = ++cons;
+        }
+
+        dev->ring.rsp_cons = ++cons;
+        /* Nota: callback frees aiocbp itself */
+        if (aiocbp && aiocbp->aio_cb)
+            aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
+        if (dev->ring.rsp_cons != cons)
+            /* We reentered, we must not continue here */
             break;
-        }
     }
 
-out:
     RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
     if (more) goto moretodo;
 
     return nr_consumed;
-}
-
-static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
-{
-    int i;
-    struct blkif_request *req;
-    int notify;
-
-    blkfront_wait_slot(dev);
-    i = dev->ring.req_prod_pvt;
-    req = RING_GET_REQUEST(&dev->ring, i);
-    req->operation = op;
-    req->nr_segments = 0;
-    req->handle = dev->handle;
-    /* Not used */
-    req->id = 0;
-    /* Not needed anyway, but the backend will check it */
-    req->sector_number = 0;
-    dev->ring.req_prod_pvt = i + 1;
-    wmb();
-    RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
-    if (notify) notify_remote_via_evtchn(dev->evtchn);
-}
-
-void blkfront_sync(struct blkfront_dev *dev)
-{
-    unsigned long flags;
-
-    if (dev->mode == O_RDWR) {
-        if (dev->barrier == 1)
-            blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
-
-        if (dev->flush == 1)
-            blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
-    }
-
-    /* Note: This won't finish if another thread enqueues requests.  */
-    local_irq_save(flags);
-    DEFINE_WAIT(w);
-    while (1) {
-       blkfront_aio_poll(dev);
-       if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
-           break;
-
-       add_waiter(w, blkfront_queue);
-       local_irq_restore(flags);
-       schedule();
-       local_irq_save(flags);
-    }
-    remove_waiter(w);
-    local_irq_restore(flags);
 }
 
 #ifdef HAVE_LIBC
diff -r 4558664bea4a extras/mini-os/include/blkfront.h
--- a/extras/mini-os/include/blkfront.h Fri Apr 04 16:07:44 2008 +0100
+++ b/extras/mini-os/include/blkfront.h Sat Apr 05 13:25:43 2008 +0100
@@ -15,13 +15,23 @@ struct blkfront_aiocb
 
     void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret);
 };
-struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned 
*sector_size, int *mode, int *info);
+struct blkfront_info
+{
+    uint64_t sectors;
+    unsigned sector_size;
+    int mode;
+    int info;
+    int barrier;
+    int flush;
+};
+struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info);
 #ifdef HAVE_LIBC
 int blkfront_open(struct blkfront_dev *dev);
 #endif
 void blkfront_aio(struct blkfront_aiocb *aiocbp, int write);
 void blkfront_aio_read(struct blkfront_aiocb *aiocbp);
 void blkfront_aio_write(struct blkfront_aiocb *aiocbp);
+void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op);
 int blkfront_aio_poll(struct blkfront_dev *dev);
 void blkfront_sync(struct blkfront_dev *dev);
 void shutdown_blkfront(struct blkfront_dev *dev);
diff -r 4558664bea4a extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Fri Apr 04 16:07:44 2008 +0100
+++ b/extras/mini-os/kernel.c   Sat Apr 05 13:25:43 2008 +0100
@@ -91,9 +91,7 @@ static void netfront_thread(void *p)
 }
 
 static struct blkfront_dev *blk_dev;
-static uint64_t blk_sectors;
-static unsigned blk_sector_size;
-static int blk_mode;
+static struct blkfront_info blk_info;
 static uint64_t blk_size_read;
 static uint64_t blk_size_write;
 
@@ -111,9 +109,9 @@ static struct blk_req *blk_alloc_req(uin
 {
     struct blk_req *req = xmalloc(struct blk_req);
     req->aiocb.aio_dev = blk_dev;
-    req->aiocb.aio_buf = _xmalloc(blk_sector_size, blk_sector_size);
-    req->aiocb.aio_nbytes = blk_sector_size;
-    req->aiocb.aio_offset = sector * blk_sector_size;
+    req->aiocb.aio_buf = _xmalloc(blk_info.sector_size, blk_info.sector_size);
+    req->aiocb.aio_nbytes = blk_info.sector_size;
+    req->aiocb.aio_offset = sector * blk_info.sector_size;
     req->aiocb.data = req;
     req->next = NULL;
     return req;
@@ -125,7 +123,7 @@ static void blk_read_completed(struct bl
     if (ret)
         printk("got error code %d when reading at offset %ld\n", ret, 
aiocb->aio_offset);
     else
-        blk_size_read += blk_sector_size;
+        blk_size_read += blk_info.sector_size;
     free(aiocb->aio_buf);
     free(req);
 }
@@ -154,10 +152,10 @@ static void blk_write_read_completed(str
         free(req);
         return;
     }
-    blk_size_read += blk_sector_size;
+    blk_size_read += blk_info.sector_size;
     buf = (int*) aiocb->aio_buf;
     rand_value = req->rand_value;
-    for (i = 0; i < blk_sector_size / sizeof(int); i++) {
+    for (i = 0; i < blk_info.sector_size / sizeof(int); i++) {
         if (buf[i] != rand_value) {
             printk("bogus data at offset %ld\n", aiocb->aio_offset + i);
             break;
@@ -177,7 +175,7 @@ static void blk_write_completed(struct b
         free(req);
         return;
     }
-    blk_size_write += blk_sector_size;
+    blk_size_write += blk_info.sector_size;
     /* Push write check */
     req->next = blk_to_read;
     blk_to_read = req;
@@ -195,7 +193,7 @@ static void blk_write_sector(uint64_t se
     req->rand_value = rand_value = rand();
 
     buf = (int*) req->aiocb.aio_buf;
-    for (i = 0; i < blk_sector_size / sizeof(int); i++) {
+    for (i = 0; i < blk_info.sector_size / sizeof(int); i++) {
         buf[i] = rand_value;
         rand_value *= RAND_MIX;
     }
@@ -207,35 +205,34 @@ static void blkfront_thread(void *p)
 static void blkfront_thread(void *p)
 {
     time_t lasttime = 0;
-    int blk_info;
 
-    blk_dev = init_blkfront(NULL, &blk_sectors, &blk_sector_size, &blk_mode, 
&blk_info);
+    blk_dev = init_blkfront(NULL, &blk_info);
     if (!blk_dev)
         return;
 
-    if (blk_info & VDISK_CDROM)
+    if (blk_info.info & VDISK_CDROM)
         printk("Block device is a CDROM\n");
-    if (blk_info & VDISK_REMOVABLE)
+    if (blk_info.info & VDISK_REMOVABLE)
         printk("Block device is removable\n");
-    if (blk_info & VDISK_READONLY)
+    if (blk_info.info & VDISK_READONLY)
         printk("Block device is read-only\n");
 
 #ifdef BLKTEST_WRITE
-    if (blk_mode == O_RDWR) {
+    if (blk_info.mode == O_RDWR) {
         blk_write_sector(0);
-        blk_write_sector(blk_sectors-1);
+        blk_write_sector(blk_info.sectors-1);
     } else
 #endif
     {
         blk_read_sector(0);
-        blk_read_sector(blk_sectors-1);
+        blk_read_sector(blk_info.sectors-1);
     }
 
     while (1) {
-        uint64_t sector = rand() % blk_sectors;
+        uint64_t sector = rand() % blk_info.sectors;
         struct timeval tv;
 #ifdef BLKTEST_WRITE
-        if (blk_mode == O_RDWR)
+        if (blk_info.mode == O_RDWR)
             blk_write_sector(sector);
         else
 #endif
diff -r 4558664bea4a tools/ioemu/block-vbd.c
--- a/tools/ioemu/block-vbd.c   Fri Apr 04 16:07:44 2008 +0100
+++ b/tools/ioemu/block-vbd.c   Sat Apr 05 13:25:43 2008 +0100
@@ -49,11 +49,7 @@ typedef struct BDRVVbdState {
 typedef struct BDRVVbdState {
     struct blkfront_dev *dev;
     int fd;
-    int type;
-    int mode;
-    int info;
-    uint64_t sectors;
-    unsigned sector_size;
+    struct blkfront_info info;
     QEMU_LIST_ENTRY(BDRVVbdState) list;
 } BDRVVbdState;
 
@@ -81,13 +77,13 @@ static int vbd_open(BlockDriverState *bs
     //handy to test posix access
     //return -EIO;
 
-    s->dev = init_blkfront((char *) filename, &s->sectors, &s->sector_size, 
&s->mode, &s->info);
+    s->dev = init_blkfront((char *) filename, &s->info);
 
     if (!s->dev)
        return -EIO;
 
-    if (SECTOR_SIZE % s->sector_size) {
-       printf("sector size is %d, we only support sector sizes that divide 
%d\n", s->sector_size, SECTOR_SIZE);
+    if (SECTOR_SIZE % s->info.sector_size) {
+       printf("sector size is %d, we only support sector sizes that divide 
%d\n", s->info.sector_size, SECTOR_SIZE);
        return -EIO;
     }
 
@@ -267,6 +263,32 @@ static void vbd_aio_cancel(BlockDriverAI
     // Try to cancel. If can't, wait for it, drop the callback and call 
qemu_aio_release(acb)
 }
 
+static void vbd_nop_cb(void *opaque, int ret)
+{
+}
+
+static BlockDriverAIOCB *vbd_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVVbdState *s = bs->opaque;
+    VbdAIOCB *acb = NULL;
+
+    if (s->info.barrier == 1) {
+        acb = vbd_aio_setup(bs, 0, NULL, 0,
+                s->info.flush == 1 ? vbd_nop_cb : cb, opaque);
+        if (!acb)
+            return NULL;
+        blkfront_aio_push_operation(&acb->aiocb, BLKIF_OP_WRITE_BARRIER);
+    }
+    if (s->info.flush == 1) {
+        acb = vbd_aio_setup(bs, 0, NULL, 0, cb, opaque);
+        if (!acb)
+            return NULL;
+        blkfront_aio_push_operation(&acb->aiocb, BLKIF_OP_FLUSH_DISKCACHE);
+    }
+    return &acb->common;
+}
+
 static void vbd_close(BlockDriverState *bs)
 {
     BDRVVbdState *s = bs->opaque;
@@ -282,13 +304,14 @@ static int64_t  vbd_getlength(BlockDrive
 static int64_t  vbd_getlength(BlockDriverState *bs)
 {
     BDRVVbdState *s = bs->opaque;
-    return s->sectors * s->sector_size;
+    return s->info.sectors * s->info.sector_size;
 }
 
-static void vbd_flush(BlockDriverState *bs)
+static int vbd_flush(BlockDriverState *bs)
 {
     BDRVVbdState *s = bs->opaque;
     blkfront_sync(s->dev);
+    return 0;
 }
 
 /***********************************************/
@@ -333,6 +356,7 @@ BlockDriver bdrv_vbd = {
     .bdrv_aio_read = vbd_aio_read,
     .bdrv_aio_write = vbd_aio_write,
     .bdrv_aio_cancel = vbd_aio_cancel,
+    .bdrv_aio_flush = vbd_aio_flush,
     .aiocb_size = sizeof(VbdAIOCB),
     .bdrv_read = vbd_read,
     .bdrv_write = vbd_write,

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] stubdom: add asynchronous disk flush support, Samuel Thibault <=