[PATCH 2/4] (Refactored) provide vhd support to blktap
tapdisk-vhd-support.patch
Provides integration of vdisk library, and implementation of block-vhd
into blktap
Signed-off-by: Boris Ostrovsky <bostrovsky@xxxxxxxxxxxxxxx>
Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxxx>
diff -r dff7e92bf3e9 tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile Thu Jun 21 13:04:38 2007 -0400
+++ b/tools/blktap/drivers/Makefile Thu Jun 21 13:04:41 2007 -0400
@@ -7,12 +7,14 @@ QCOW_UTIL = img2qcow qcow2raw qcow-cr
QCOW_UTIL = img2qcow qcow2raw qcow-create
INST_DIR = /usr/sbin
LIBAIO_DIR = ../../libaio/src
+LIBVDISK_DIR = ../../vdisk
+LIBSDIR = lib64
CFLAGS += -Werror
CFLAGS += -Wno-unused
CFLAGS += -fno-strict-aliasing
CFLAGS += -I $(XEN_LIBXC) -I $(LIBAIO_DIR)
-CFLAGS += $(INCLUDES) -I. -I../../xenstore
+CFLAGS += $(INCLUDES) -I. -I../../xenstore -I$(LIBVDISK_DIR)
CFLAGS += -D_GNU_SOURCE
# Get gcc to generate the dependencies for us.
@@ -21,10 +23,10 @@ DEPS = .*.d
THREADLIB := -lpthread -lz
LIBS := -L. -L.. -L../lib
-LIBS += -L$(XEN_LIBXC)
+LIBS += -L$(XEN_LIBXC) -L$(LIBVDISK_DIR) -L$(LIBAIO_DIR)
LIBS += -lblktap -lxenctrl
LIBS += -lcrypto
-LIBS += -lz
+LIBS += -lz -lvdisk -laio
LIBS += -L$(XEN_XENSTORE) -lxenstore
AIOLIBS := $(LIBAIO_DIR)/libaio.a
@@ -34,15 +36,16 @@ BLK-OBJS += block-vmdk.o
BLK-OBJS += block-vmdk.o
BLK-OBJS += block-ram.o
BLK-OBJS += block-qcow.o
+BLK-OBJS += block-vhd.o
BLK-OBJS += aes.o
BLK-OBJS += tapaio.o
all: $(IBIN) qcow-util
-blktapctrl: blktapctrl.c
+blktapctrl: blktapctrl.c tapdisk.h
$(CC) $(CFLAGS) -o blktapctrl $(LIBS) blktapctrl.c
-tapdisk: $(BLK-OBJS) tapdisk.c
+tapdisk: $(BLK-OBJS) tapdisk.c tapdisk.h
$(CC) $(CFLAGS) -o tapdisk $(BLK-OBJS) tapdisk.c \
$(AIOLIBS) $(LIBS)
diff -r dff7e92bf3e9 tools/blktap/drivers/block-vhd.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/block-vhd.c Thu Jun 21 13:04:51 2007 -0400
@@ -0,0 +1,344 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/stddef.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <dlfcn.h>
+#include "tapdisk.h"
+#include <vdisk.h>
+
+#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
+
+struct tdvhd_state {
+ vdisk_dev_t *vdisk;
+ int poll_pipe[2]; /* dummy fd for polling on */
+ int fd;
+};
+
+typedef struct td_cbinfo {
+ struct td_state *s;
+ td_callback_t cb;
+ struct vdisk_dev *vdisk;
+ void *private;
+ int id;
+ int cnt;
+} td_cbinfo_t;
+
+static int
+tdvhd_queue_rw(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private, int op)
+{
+ struct tdvhd_state *prv = (struct tdvhd_state *)dd->private;
+ int res = 0;
+ int aio_cnt = prv->vdisk->aio_cnt;
+ td_cbinfo_t *cbi;
+
+ cbi = malloc(sizeof(td_cbinfo_t));
+ if (cbi == NULL) {
+ VIDDBG(0, "Can't allocate callback info\n");
+ return (-ENOMEM);
+ }
+
+ cbi->s = dd->td_state;
+ cbi->cb = cb;
+ cbi->id = id;
+ cbi->vdisk = prv->vdisk;
+ cbi->private = private;
+
+ res = vdisk_rw(prv->vdisk, sector, (uint8_t *)buf,
+ nb_sectors, op, (void *)cbi);
+ if (res == -EBUSY) {
+ blkif_t *blkif = cbi->s->blkif;
+
+ /*
+ * This really should be done in tapdisk.c.
+ * However, we since we don't want to touch it,
+ * we do it here.
+ */
+ blkif->pending_list[id].secs_pending -= nb_sectors;
+
+ free(cbi);
+ }
+
+ // Didn't use async IO
+ if (res > 0) {
+ cbi->cb(dd, 0/*XXX: pass error*/,
+ sector, res>>9,
+ cbi->id, cbi->private);
+ }
+
+ // How many AIOs have been created for this request
+ cbi->cnt = prv->vdisk->aio_cnt - aio_cnt;
+
+ return (res);
+
+}
+
+int tdvhd_queue_read(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ return (tdvhd_queue_rw(dd, sector, nb_sectors, buf,
+ cb, id, private, VDISK_READ));
+}
+
+int tdvhd_queue_write(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ return (tdvhd_queue_rw(dd, sector, nb_sectors, buf,
+ cb, id, private, VDISK_WRITE));
+}
+
+int tdvhd_close(struct disk_driver *dd)
+{
+ struct tdvhd_state *prv;
+
+ // We can be called more than once
+ if (dd == NULL)
+ return (0);
+
+ prv = (struct tdvhd_state *)dd->private;
+ if ((prv == NULL) || (prv->vdisk == NULL))
+ return (0); // XXX: Or error?
+
+ vdisk_fini(prv->vdisk);
+
+ free(prv->vdisk);
+ prv->vdisk = NULL;
+
+ close(prv->poll_pipe[0]);
+ close(prv->poll_pipe[1]);
+
+ return 0;
+}
+
+static void tdvhd_get_fd(struct disk_driver *dd)
+{
+ struct tdvhd_state *prv = (struct tdvhd_state *)dd->private;
+ vdisk_dev_t *vdisk = (vdisk_dev_t *)prv->vdisk;
+ int i;
+
+ /*initialise the FD array*/
+ for(i=0;i<MAX_IOFD;i++)
+ dd->io_fd[i] = 0;
+
+ dd->io_fd[0] = vdisk->aio_fd;
+}
+
+/* Open the disk file and initialize aio state. */
+int tdvhd_open (struct disk_driver *dd, const char *filename, td_flag_t flags)
+{
+ int i, fd, ret = 0;
+ struct tdvhd_state *prv = (struct tdvhd_state *)dd->private;
+ int heads, secs, cyls;
+ struct program_props props;
+
+ prv->vdisk = malloc(sizeof(struct vdisk_dev));
+ if (prv->vdisk == NULL) {
+ VIDDBG(0, "Can't allocate memory for vdisk\n");
+ return (-ENOMEM);
+ }
+
+ prv->vdisk->use_aio = 1;
+
+ props.alloc_func = NULL;
+ props.free_func = NULL;
+ props.out_target = VDISK_OUT_SYSLOG;
+ ret = vdisk_init(prv->vdisk, (char *)filename, &props, 0);
+ if (ret) {
+ VIDDBG(0, "Can't initialize vdisk for %s\n", filename);
+ free(prv->vdisk);
+ return (ret>0?(-1*ret):ret);
+ }
+
+ /* aio is only used in blktap, init here instead of in common*/
+ /* Initialize async IO data */
+ for (i=0;i<VDISK_HASH_SZ;i++)
+ prv->vdisk->hash[i].key = VDISK_INVALID_HASH;
+
+ prv->vdisk->aio_cnt = 0;
+
+
+ ret = tap_aio_setup(&prv->vdisk->aio_ctx, prv->vdisk->aio_events,
MAX_AIO_REQS);
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ DPRINTF("Couldn't setup AIO context. If you are "
+ "trying to concurrently use a large number "
+ "of blktap-based disks, you may need to "
+ "increase the system-wide aio request limit. "
+ "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
+ "aio-max-nr')\n");
+ } else {
+ DPRINTF("Couldn't setup AIO context.\n");
+ }
+
+ prv->vdisk->use_aio = 0;
+
+ }
+
+ /* set up a pipe so that we can hand back a poll fd that won't fire.*/
+ ret = pipe(prv->poll_pipe);
+ if (ret != 0)
+ return (0 - errno);
+
+ // VHD format limits geometry to roughly 136GB (0xffff cylinders,
+ // 0x10 heads and 0xff sectors per cylinder). We'll report "original
+ // size" (as specified by the header), not CHS product
+ dd->td_state->size = prv->vdisk->sz >> 9;
+ dd->td_state->sector_size = DEFAULT_SECTOR_SIZE;
+
+ tdvhd_get_fd(dd);
+
+done:
+ return ret;
+}
+
+int tdvhd_submit(struct disk_driver *dd)
+{
+ int res;
+ struct tdvhd_state *prv = (struct tdvhd_state *)dd->private;
+ vdisk_dev_t *vdisk = (vdisk_dev_t *)prv->vdisk;
+
+ if (!vdisk->use_aio)
+ return (0);
+
+ if (!vdisk->aio_cnt)
+ return (0);
+
+ VIDDBG(50, "Submitting %d requests\n", vdisk->aio_cnt);
+
+ res = io_submit(vdisk->aio_ctx.aio_ctx, vdisk->aio_cnt,
vdisk->aio_submit);
+ if (res != vdisk->aio_cnt)
+ VIDDBG(0, "Can't submit %d AIO requests (submitted %d)\n",
+ vdisk->aio_cnt, res);
+
+ vdisk->aio_cnt = 0;
+
+ return 0;
+}
+
+int tdvhd_do_callbacks(struct disk_driver *dd, int sid)
+{
+ struct tdvhd_state *prv = (struct tdvhd_state *)dd->private;
+ vdisk_dev_t *vdisk = (vdisk_dev_t *)prv->vdisk;
+ int ret, i, *ptr;
+ struct io_event *ep;
+ td_cbinfo_t *cbi;
+ uint32_t blk;
+ int nr_events, rsp = 0;
+
+ nr_events = tap_aio_get_events(&vdisk->aio_ctx);
+repeat:
+ for (ep = vdisk->aio_events, i = nr_events; i-- > 0; ep++) {
+ struct iocb *io = ep->obj;
+ struct pending_aio *pio;
+ int err;
+
+ err = 0;
+
+ pio = (struct pending_aio *)io->data;
+ if (pio == NULL) {
+ VIDDBG(0, "Can't find pending AIO data\n");
+ return (-EIO);
+ }
+
+ if ((signed long)ep->res < 0) {
+ VIDDBG(0, "AIO to block %u for %u blocks reported "
+ "error %ld (%ld)\n", pio->block, pio->num_blocks,
+ ep->res, ep->res2);
+ err = ep->res;
+ } else if (ep->res != io->u.c.nbytes) {
+ /* TODO: handle this case better. */
+ ptr = (int *)&ep->res;
+ VIDDBG(0, "AIO did less than I asked it to "
+ "[%lu,%lu,%d]\n",
+ ep->res, io->u.c.nbytes, *ptr);
+ err = -EIO;
+ }
+
+ cbi = (td_cbinfo_t *)pio->aiocb;
+ if (cbi == NULL) {
+ VIDDBG(0, "callback info is missing\n");
+ //XXX: This is pretty bad. Maybe we should die?
+ continue;
+ }
+ if (cbi->vdisk == NULL) {
+ VIDDBG(0, "Can't find vdisk for pending AIO\n");
+ err = -EIO;
+ } else {
+ int vdisk_err;
+
+ // Let vdisk know that a pending IO has completed
+ pio->res = err;
+ vdisk_err = vdisk_xfer_cb(cbi->vdisk, pio);
+ if (vdisk_err != 0) {
+ VIDDBG(0, "vdisk callback failed\n");
+ //XXX: return (error) ???
+
+ if (err == 0) // Report the earliest error
+ err = vdisk_err;
+ }
+ }
+
+ cbi->cnt--;
+
+ // blktap's callback (usually to kick the driver)
+ rsp += cbi->cb(dd, err,
+ pio->block, /* sector */
+ pio->num_blocks, /* nb_sectors */
+ cbi->id, cbi->private);
+
+ if (cbi->cnt == 0)
+ free(cbi);
+ }
+
+ if (nr_events) {
+ nr_events = tap_aio_more_events(&vdisk->aio_ctx);
+ goto repeat;
+ }
+
+ tap_aio_continue(&vdisk->aio_ctx);
+
+ // XXX: What do we return on errors?
+ return rsp;
+}
+int tdvhd_get_parent_id (struct disk_driver *dd, struct disk_id *id)
+{
+ return TD_NO_PARENT;
+}
+
+int tdvhd_validate_parent (struct disk_driver *dd,
+ struct disk_driver *p, td_flag_t flags)
+{
+ return -EINVAL;
+}
+
+
+
+struct tap_disk tapdisk_vhd = {
+ "tapdisk_vhd",
+ sizeof(struct tdvhd_state),
+ tdvhd_open,
+ tdvhd_queue_read,
+ tdvhd_queue_write,
+ tdvhd_submit,
+ tdvhd_close,
+ tdvhd_do_callbacks,
+ tdvhd_get_parent_id,
+ tdvhd_validate_parent
+};
diff -r dff7e92bf3e9 tools/blktap/drivers/tapdisk.c
--- a/tools/blktap/drivers/tapdisk.c Thu Jun 21 13:04:38 2007 -0400
+++ b/tools/blktap/drivers/tapdisk.c Thu Jun 21 13:04:41 2007 -0400
@@ -556,15 +556,22 @@ int send_responses(struct disk_driver *d
preq = &blkif->pending_list[idx];
req = &preq->req;
- if (res == BLK_NOT_ALLOCATED) {
- res = do_cow_read(dd, req, sidx, sector, nr_secs);
+ if (res == BLK_NOT_ALLOCATED) {
+#if 1
+ /* VHD - do not support */
+ DPRINTF("invalid for VHD's\n");
+ return 0;
+#else
+ /* Original xen code */
+ res = do_cow_read(dd, req, sidx, sector, nr_secs);
if (res >= 0) {
secs_done = res;
res = 0;
} else
secs_done = 0;
- }
-
+#endif
+ }
+
preq->secs_pending -= secs_done;
if (res == -EBUSY && preq->submitting)
diff -r dff7e92bf3e9 tools/blktap/drivers/tapdisk.h
--- a/tools/blktap/drivers/tapdisk.h Thu Jun 21 13:04:38 2007 -0400
+++ b/tools/blktap/drivers/tapdisk.h Thu Jun 21 13:04:41 2007 -0400
@@ -156,6 +156,7 @@ extern struct tap_disk tapdisk_vmdk;
extern struct tap_disk tapdisk_vmdk;
extern struct tap_disk tapdisk_ram;
extern struct tap_disk tapdisk_qcow;
+extern struct tap_disk tapdisk_vhd;
#define MAX_DISK_TYPES 20
@@ -164,6 +165,7 @@ extern struct tap_disk tapdisk_qcow;
#define DISK_TYPE_VMDK 2
#define DISK_TYPE_RAM 3
#define DISK_TYPE_QCOW 4
+#define DISK_TYPE_VHD 5
/*Define Individual Disk Parameters here */
@@ -214,6 +216,16 @@ static disk_info_t qcow_disk = {
0,
#ifdef TAPDISK
&tapdisk_qcow,
+#endif
+};
+
+static disk_info_t vhd_disk = {
+ DISK_TYPE_VHD,
+ "VHD disk (vhd)",
+ "vhd",
+ 1,
+#ifdef TAPDISK
+ &tapdisk_vhd,
#endif
};
@@ -224,6 +236,7 @@ static disk_info_t *dtypes[] = {
&vmdk_disk,
&ram_disk,
&qcow_disk,
+ &vhd_disk,
};
typedef struct driver_list_entry {
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|