WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] minios: add blkfront

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH] minios: add blkfront
From: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>
Date: Thu, 17 Jan 2008 16:02:21 +0000
Delivery-date: Thu, 17 Jan 2008 08:03:46 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Mail-followup-to: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.12-2006-07-14
minios: add blkfront

Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>

# HG changeset patch
# User Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>
# Date 1200585684 0
# Node ID c4babfc157d51322e7167a7a84e0462ea3ccedbe
# Parent  44f1f0fbb7df42f71f5b61857fe4bf3d3c33fae5
minios: add blkfront

diff -r 44f1f0fbb7df -r c4babfc157d5 extras/mini-os/blkfront.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/extras/mini-os/blkfront.c Thu Jan 17 16:01:24 2008 +0000
@@ -0,0 +1,392 @@
+/* Minimal block driver for Mini-OS. 
+ * Copyright (c) 2007-2008 Samuel Thibault.
+ * Based on netfront.c.
+ */
+
+#include <os.h>
+#include <xenbus.h>
+#include <events.h>
+#include <errno.h>
+#include <xen/io/blkif.h>
+#include <gnttab.h>
+#include <xmalloc.h>
+#include <time.h>
+#include <blkfront.h>
+#include <lib.h>
+#include <fcntl.h>
+
+/* Note: we generally don't need to disable IRQs since we hardly do anything in
+ * the interrupt handler.  */
+
+/* Note: we really suppose non-preemptive threads.  */
+
+DECLARE_WAIT_QUEUE_HEAD(blkfront_queue);
+
+
+
+
+#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
+#define GRANT_INVALID_REF 0
+
+
+struct blk_buffer {
+    void* page;
+    grant_ref_t gref;
+};
+
+struct blkfront_dev {
+    struct blkif_front_ring ring;
+    grant_ref_t ring_ref;
+    evtchn_port_t evtchn, local_port;
+    blkif_vdev_t handle;
+
+    char *nodename;
+    char *backend;
+    unsigned sector_size;
+    unsigned sectors;
+    int mode;
+    int barrier;
+    int flush;
+};
+
+static inline int xenblk_rxidx(RING_IDX idx)
+{
+    return idx & (BLK_RING_SIZE - 1);
+}
+
+void blkfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
+{
+    wake_up(&blkfront_queue);
+}
+
+struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned 
*sector_size, int *mode)
+{
+    xenbus_transaction_t xbt;
+    char* err;
+    char* message=NULL;
+    struct blkif_sring *s;
+    int retry=0;
+    char* msg;
+    char* c;
+
+    struct blkfront_dev *dev;
+
+    ASSERT(!strncmp(nodename, "/local/domain/", 14));
+    nodename = strchr(nodename + 14, '/') + 1;
+
+    char path[strlen(nodename) + 1 + 10 + 1];
+
+    printk("******************* BLKFRONT for %s **********\n\n\n", nodename);
+
+    dev = malloc(sizeof(*dev));
+    dev->nodename = strdup(nodename);
+
+    s = (struct blkif_sring*) alloc_page();
+    memset(s,0,PAGE_SIZE);
+
+
+    SHARED_RING_INIT(s);
+    FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
+
+    dev->ring_ref = gnttab_grant_access(0,virt_to_mfn(s),0);
+
+    evtchn_alloc_unbound_t op;
+    op.dom = DOMID_SELF;
+    snprintf(path, sizeof(path), "%s/backend-id", nodename);
+    op.remote_dom = xenbus_read_integer(path); 
+    HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
+    clear_evtchn(op.port);        /* Without, handler gets invoked now! */
+    dev->local_port = bind_evtchn(op.port, blkfront_handler, dev);
+    dev->evtchn=op.port;
+
+    // FIXME: proper frees on failures
+again:
+    err = xenbus_transaction_start(&xbt);
+    if (err) {
+        printk("starting transaction\n");
+    }
+
+    err = xenbus_printf(xbt, nodename, "ring-ref","%u",
+                dev->ring_ref);
+    if (err) {
+        message = "writing ring-ref";
+        goto abort_transaction;
+    }
+    err = xenbus_printf(xbt, nodename,
+                "event-channel", "%u", dev->evtchn);
+    if (err) {
+        message = "writing event-channel";
+        goto abort_transaction;
+    }
+
+    err = xenbus_printf(xbt, nodename, "state", "%u",
+            4); /* connected */
+
+
+    err = xenbus_transaction_end(xbt, 0, &retry);
+    if (retry) {
+            goto again;
+        printk("completing transaction\n");
+    }
+
+    goto done;
+
+abort_transaction:
+    xenbus_transaction_end(xbt, 1, &retry);
+    return NULL;
+
+done:
+
+    snprintf(path, sizeof(path), "%s/backend", nodename);
+    msg = xenbus_read(XBT_NIL, path, &dev->backend);
+    if (msg) {
+        printk("Error %s when reading the backend path %s\n", msg, path);
+        return NULL;
+    }
+
+    printk("backend at %s\n", dev->backend);
+
+    dev->handle = simple_strtoul(strrchr(nodename, '/')+1, NULL, 0);
+
+    {
+        char path[strlen(dev->backend) + 1 + 19 + 1];
+        snprintf(path, sizeof(path), "%s/mode", dev->backend);
+        msg = xenbus_read(XBT_NIL, path, &c);
+        if (msg) {
+            printk("Error %s when reading the mode\n", msg);
+            return NULL;
+        }
+        if (*c == 'w')
+            *mode = dev->mode = O_RDWR;
+        else
+            *mode = dev->mode = O_RDONLY;
+        free(c);
+
+        snprintf(path, sizeof(path), "%s/state", dev->backend);
+
+        xenbus_watch_path(XBT_NIL, path);
+
+        xenbus_wait_for_value(path,"4");
+
+        xenbus_unwatch_path(XBT_NIL, path);
+
+        snprintf(path, sizeof(path), "%s/sectors", dev->backend);
+        // FIXME: read_integer returns an int, so disk size limited to 1TB for 
now
+        *sectors = dev->sectors = xenbus_read_integer(path);
+
+        snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
+        *sector_size = dev->sector_size = xenbus_read_integer(path);
+
+        snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
+        dev->barrier = xenbus_read_integer(path);
+
+        snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
+        dev->flush = xenbus_read_integer(path);
+    }
+
+    printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
+    printk("**************************\n");
+
+    return dev;
+}
+
+void shutdown_blkfront(struct blkfront_dev *dev)
+{
+    char* err;
+    char *nodename = dev->nodename;
+
+    char path[strlen(dev->backend) + 1 + 5 + 1];
+
+    blkfront_sync(dev);
+
+    printk("close blk: backend at %s\n",dev->backend);
+
+    snprintf(path, sizeof(path), "%s/state", dev->backend);
+    err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 5); /* closing */
+    xenbus_wait_for_value(path,"5");
+
+    err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
+    xenbus_wait_for_value(path,"6");
+
+    unbind_evtchn(dev->local_port);
+
+    free(nodename);
+    free(dev->backend);
+    free(dev);
+}
+
+static void blkfront_wait_slot(struct blkfront_dev *dev)
+{
+    /* Wait for a slot */
+    if (RING_FULL(&dev->ring)) {
+       unsigned long flags;
+       DEFINE_WAIT(w);
+       local_irq_save(flags);
+       while (1) {
+           blkfront_aio_poll(dev);
+           if (!RING_FULL(&dev->ring))
+               break;
+           /* Really no slot, go to sleep. */
+           add_waiter(w, blkfront_queue);
+           local_irq_restore(flags);
+           schedule();
+           local_irq_save(flags);
+       }
+       remove_waiter(w);
+       local_irq_restore(flags);
+    }
+}
+
+/* Issue an aio */
+void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
+{
+    struct blkfront_dev *dev = aiocbp->aio_dev;
+    struct blkif_request *req;
+    RING_IDX i;
+    int notify;
+    int n, j;
+    uintptr_t start, end;
+
+    // Can't io at non-sector-aligned location
+    ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
+    // Can't io non-sector-sized amounts
+    ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
+    // Can't io non-sector-aligned buffer
+    ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
+
+    start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
+    end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & 
PAGE_MASK;
+    n = (end - start) / PAGE_SIZE;
+
+    /* qemu's IDE max multsect is 16 (8KB) and SCSI max DMA was set to 32KB,
+     * so max 44KB can't happen */
+    ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+    blkfront_wait_slot(dev);
+    i = dev->ring.req_prod_pvt;
+    req = RING_GET_REQUEST(&dev->ring, i);
+
+    req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
+    req->nr_segments = n;
+    req->handle = dev->handle;
+    req->id = (uintptr_t) aiocbp;
+    req->sector_number = aiocbp->aio_offset / dev->sector_size;
+
+    for (j = 0; j < n; j++) {
+       uintptr_t data = start + j * PAGE_SIZE;
+       aiocbp->gref[j] = req->seg[j].gref =
+            gnttab_grant_access(0, virt_to_mfn(data), write);
+       req->seg[j].first_sect = 0;
+       req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
+    }
+    req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 
dev->sector_size;
+    req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + 
aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
+
+    dev->ring.req_prod_pvt = i + 1;
+
+    wmb();
+    RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+
+    if(notify) notify_remote_via_evtchn(dev->evtchn);
+}
+
+void blkfront_aio_write(struct blkfront_aiocb *aiocbp)
+{
+    blkfront_aio(aiocbp, 1);
+}
+
+void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
+{
+    blkfront_aio(aiocbp, 0);
+}
+
+int blkfront_aio_poll(struct blkfront_dev *dev)
+{
+    RING_IDX rp, cons;
+    struct blkif_response *rsp;
+
+moretodo:
+    rp = dev->ring.sring->rsp_prod;
+    rmb(); /* Ensure we see queued responses up to 'rp'. */
+    cons = dev->ring.rsp_cons;
+
+    int nr_consumed = 0;
+    while ((cons != rp))
+    {
+       rsp = RING_GET_RESPONSE(&dev->ring, cons);
+
+        switch (rsp->operation) {
+        case BLKIF_OP_READ:
+        case BLKIF_OP_WRITE:
+        {
+            struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
+            int n = (aiocbp->aio_nbytes + PAGE_SIZE - 1) / PAGE_SIZE, j;
+            for (j = 0; j < n; j++)
+                gnttab_end_access(aiocbp->gref[j]);
+
+            /* Nota: callback frees aiocbp itself */
+            aiocbp->aio_cb(aiocbp, rsp->status ? -EIO : 0);
+            break;
+        }
+        case BLKIF_OP_WRITE_BARRIER:
+        case BLKIF_OP_FLUSH_DISKCACHE:
+            break;
+        default:
+            printk("unrecognized block operation %d response\n", 
rsp->operation);
+            break;
+        }
+
+       nr_consumed++;
+       ++cons;
+    }
+    dev->ring.rsp_cons = cons;
+
+    int more;
+    RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
+    if (more) goto moretodo;
+
+    return nr_consumed;
+}
+
+static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
+{
+    int i;
+    struct blkif_request *req;
+    int notify;
+
+    blkfront_wait_slot(dev);
+    i = dev->ring.req_prod_pvt;
+    req = RING_GET_REQUEST(&dev->ring, i);
+    req->operation = op;
+    dev->ring.req_prod_pvt = i + 1;
+    wmb();
+    RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+    if (notify) notify_remote_via_evtchn(dev->evtchn);
+}
+
+void blkfront_sync(struct blkfront_dev *dev)
+{
+    unsigned long flags;
+
+    if (dev->barrier == 1)
+        blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
+
+    if (dev->flush == 1)
+        blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
+
+    /* Note: This won't finish if another thread enqueues requests.  */
+    local_irq_save(flags);
+    DEFINE_WAIT(w);
+    while (1) {
+       blkfront_aio_poll(dev);
+       if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
+           break;
+
+       add_waiter(w, blkfront_queue);
+       local_irq_restore(flags);
+       schedule();
+       local_irq_save(flags);
+    }
+    remove_waiter(w);
+    local_irq_restore(flags);
+}
diff -r 44f1f0fbb7df -r c4babfc157d5 extras/mini-os/include/blkfront.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/extras/mini-os/include/blkfront.h Thu Jan 17 16:01:24 2008 +0000
@@ -0,0 +1,26 @@
+#include <wait.h>
+#include <xen/io/blkif.h>
+#include <types.h>
+struct blkfront_dev;
+struct blkfront_aiocb
+{
+    struct blkfront_dev *aio_dev;
+    uint8_t *aio_buf;
+    size_t aio_nbytes;
+    uint64_t aio_offset;
+    void *data;
+
+    grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+
+    void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret);
+};
+struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned 
*sector_size, int *mode);
+int blkfront_open(struct blkfront_dev *dev);
+void blkfront_aio(struct blkfront_aiocb *aiocbp, int write);
+void blkfront_aio_read(struct blkfront_aiocb *aiocbp);
+void blkfront_aio_write(struct blkfront_aiocb *aiocbp);
+int blkfront_aio_poll(struct blkfront_dev *dev);
+void blkfront_sync(struct blkfront_dev *dev);
+void shutdown_blkfront(struct blkfront_dev *dev);
+
+extern struct wait_queue_head blkfront_queue;

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] minios: add blkfront, Samuel Thibault <=