WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] [TAPDISK] add tapdisk support for image c

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] [TAPDISK] add tapdisk support for image chaining
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Sat, 17 Feb 2007 04:50:06 -0800
Delivery-date: Sat, 17 Feb 2007 04:49:42 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Jake Wires <jwires@xxxxxxxxxxxxx>
# Date 1171686687 28800
# Node ID 3c827d68fa87fbbe75e218cdaf4ec02f437009ce
# Parent  32a0599135914ecb98bd8e1a7c9111cc64ab3ce7
[TAPDISK] add tapdisk support for image chaining

Enables tapdisk to chain an arbitrary number of VDIs, propagating reads of
holes in children to their parent images.  Introduces two new functions to the
tapdisk interface to facilitate this.  Modifies the QCoW plugin to take
advantage of these changes, thus providing support for arbitrarily long chains
of QCoW image types.
---
 tools/blktap/drivers/Makefile     |   16 -
 tools/blktap/drivers/block-aio.c  |  132 ++++----
 tools/blktap/drivers/block-qcow.c |  563 +++++++++++++++++---------------------
 tools/blktap/drivers/block-ram.c  |  125 ++++----
 tools/blktap/drivers/block-sync.c |   95 +++---
 tools/blktap/drivers/block-vmdk.c |   99 +++---
 tools/blktap/drivers/img2qcow.c   |   28 +
 tools/blktap/drivers/qcow2raw.c   |   75 ++---
 tools/blktap/drivers/tapdisk.c    |  372 ++++++++++++++++++-------
 tools/blktap/drivers/tapdisk.h    |   62 ++--
 tools/blktap/lib/blktaplib.h      |   13 
 tools/blktap/lib/xs_api.c         |    8 
 12 files changed, 889 insertions(+), 699 deletions(-)

diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile     Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/drivers/Makefile     Fri Feb 16 20:31:27 2007 -0800
@@ -5,7 +5,7 @@ INCLUDES += -I.. -I../lib
 
 IBIN         = blktapctrl tapdisk
 QCOW_UTIL    = img2qcow qcow2raw qcow-create
-INST_DIR  = /usr/sbin
+INST_DIR     = /usr/sbin
 LIBAIO_DIR   = ../../libaio/src
 
 CFLAGS   += -Werror
@@ -17,7 +17,7 @@ CFLAGS   += -D_GNU_SOURCE
 
 # Get gcc to generate the dependencies for us.
 CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS     = .*.d
+DEPS      = .*.d
 
 THREADLIB := -lpthread -lz
 LIBS      := -L. -L.. -L../lib
@@ -29,10 +29,10 @@ LIBS      += -L$(XEN_XENSTORE) -lxenstor
 
 AIOLIBS   := $(LIBAIO_DIR)/libaio.a
 
-BLK-OBJS  := block-aio.o 
-BLK-OBJS  += block-sync.o 
+BLK-OBJS  := block-aio.o
+BLK-OBJS  += block-sync.o
 BLK-OBJS  += block-vmdk.o
-BLK-OBJS  += block-ram.o 
+BLK-OBJS  += block-ram.o
 BLK-OBJS  += block-qcow.o
 BLK-OBJS  += aes.o
 
@@ -52,13 +52,13 @@ qcow-util: img2qcow qcow2raw qcow-create
 qcow-util: img2qcow qcow2raw qcow-create
 
 img2qcow qcow2raw qcow-create: %: $(BLK-OBJS)
-       $(CC) $(CFLAGS) -o $* $(BLK-OBJS) $*.c $(AIOLIBS)  $(LIBS)
+       $(CC) $(CFLAGS) -o $* $(BLK-OBJS) $*.c $(AIOLIBS) $(LIBS)
 
 install: all
-       $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(DESTDIR)$(INST_DIR)
+       $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(VHD_UTIL) $(DESTDIR)$(INST_DIR)
 
 clean:
-       rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL)
+       rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL) $(VHD_UTIL)
 
 .PHONY: clean install
 
diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/block-aio.c
--- a/tools/blktap/drivers/block-aio.c  Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/drivers/block-aio.c  Fri Feb 16 20:31:27 2007 -0800
@@ -58,6 +58,7 @@ struct pending_aio {
        td_callback_t cb;
        int id;
        void *private;
+       uint64_t lsec;
 };
 
 struct tdaio_state {
@@ -139,12 +140,23 @@ static int get_image_info(struct td_stat
        return 0;
 }
 
+static inline void init_fds(struct disk_driver *dd)
+{
+       int i;
+       struct tdaio_state *prv = (struct tdaio_state *)dd->private;
+
+       for(i = 0; i < MAX_IOFD; i++) 
+               dd->io_fd[i] = 0;
+
+       dd->io_fd[0] = prv->poll_fd;
+}
+
 /* Open the disk file and initialize aio state. */
-int tdaio_open (struct td_state *s, const char *name)
+int tdaio_open (struct disk_driver *dd, const char *name)
 {
        int i, fd, ret = 0;
-       struct tdaio_state *prv = (struct tdaio_state *)s->private;
-       s->private = prv;
+       struct td_state    *s   = dd->td_state;
+       struct tdaio_state *prv = (struct tdaio_state *)dd->private;
 
        DPRINTF("block-aio open('%s')", name);
        /* Initialize AIO */
@@ -194,18 +206,21 @@ int tdaio_open (struct td_state *s, cons
 
         prv->fd = fd;
 
+       init_fds(dd);
        ret = get_image_info(s, fd);
+
 done:
        return ret;     
 }
 
-int tdaio_queue_read(struct td_state *s, uint64_t sector,
-                              int nb_sectors, char *buf, td_callback_t cb,
-                              int id, void *private)
+int tdaio_queue_read(struct disk_driver *dd, uint64_t sector,
+                    int nb_sectors, char *buf, td_callback_t cb,
+                    int id, void *private)
 {
        struct   iocb *io;
        struct   pending_aio *pio;
-       struct   tdaio_state *prv = (struct tdaio_state *)s->private;
+       struct   td_state    *s   = dd->td_state;
+       struct   tdaio_state *prv = (struct tdaio_state *)dd->private;
        int      size    = nb_sectors * s->sector_size;
        uint64_t offset  = sector * (uint64_t)s->sector_size;
        long     ioidx;
@@ -219,22 +234,24 @@ int tdaio_queue_read(struct td_state *s,
        pio->cb = cb;
        pio->id = id;
        pio->private = private;
+       pio->lsec = sector;
        
        io_prep_pread(io, prv->fd, buf, size, offset);
        io->data = (void *)ioidx;
        
        prv->iocb_queue[prv->iocb_queued++] = io;
-       
-       return 0;
-}
-                       
-int tdaio_queue_write(struct td_state *s, uint64_t sector,
-                              int nb_sectors, char *buf, td_callback_t cb,
-                              int id, void *private)
+
+       return 0;
+}
+                       
+int tdaio_queue_write(struct disk_driver *dd, uint64_t sector,
+                     int nb_sectors, char *buf, td_callback_t cb,
+                     int id, void *private)
 {
        struct   iocb *io;
        struct   pending_aio *pio;
-       struct   tdaio_state *prv = (struct tdaio_state *)s->private;
+       struct   td_state    *s   = dd->td_state;
+       struct   tdaio_state *prv = (struct tdaio_state *)dd->private;
        int      size    = nb_sectors * s->sector_size;
        uint64_t offset  = sector * (uint64_t)s->sector_size;
        long     ioidx;
@@ -248,19 +265,20 @@ int tdaio_queue_write(struct td_state *s
        pio->cb = cb;
        pio->id = id;
        pio->private = private;
+       pio->lsec = sector;
        
        io_prep_pwrite(io, prv->fd, buf, size, offset);
        io->data = (void *)ioidx;
        
        prv->iocb_queue[prv->iocb_queued++] = io;
-       
-       return 0;
-}
-                       
-int tdaio_submit(struct td_state *s)
+
+       return 0;
+}
+                       
+int tdaio_submit(struct disk_driver *dd)
 {
        int ret;
-       struct   tdaio_state *prv = (struct tdaio_state *)s->private;
+       struct tdaio_state *prv = (struct tdaio_state *)dd->private;
 
        ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
        
@@ -269,38 +287,24 @@ int tdaio_submit(struct td_state *s)
        /* Success case: */
        prv->iocb_queued = 0;
        
-       return ret;
-}
-
-int *tdaio_get_fd(struct td_state *s)
-{
-       struct tdaio_state *prv = (struct tdaio_state *)s->private;
-       int *fds, i;
-
-       fds = malloc(sizeof(int) * MAX_IOFD);
-       /*initialise the FD array*/
-       for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
-
-       fds[0] = prv->poll_fd;
-
-       return fds;     
-}
-
-int tdaio_close(struct td_state *s)
-{
-       struct tdaio_state *prv = (struct tdaio_state *)s->private;
+       return 0;
+}
+
+int tdaio_close(struct disk_driver *dd)
+{
+       struct tdaio_state *prv = (struct tdaio_state *)dd->private;
        
        io_destroy(prv->aio_ctx);
        close(prv->fd);
-       
-       return 0;
-}
-
-int tdaio_do_callbacks(struct td_state *s, int sid)
+
+       return 0;
+}
+
+int tdaio_do_callbacks(struct disk_driver *dd, int sid)
 {
        int ret, i, rsp = 0;
        struct io_event *ep;
-       struct tdaio_state *prv = (struct tdaio_state *)s->private;
+       struct tdaio_state *prv = (struct tdaio_state *)dd->private;
 
        /* Non-blocking test for completed io. */
        ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
@@ -311,22 +315,34 @@ int tdaio_do_callbacks(struct td_state *
                struct pending_aio *pio;
                
                pio = &prv->pending_aio[(long)io->data];
-               rsp += pio->cb(s, ep->res == io->u.c.nbytes ? 0 : 1,
+               rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1,
+                              pio->lsec, io->u.c.nbytes >> 9, 
                               pio->id, pio->private);
 
                prv->iocb_free[prv->iocb_free_count++] = io;
        }
        return rsp;
 }
-       
+
+int tdaio_has_parent(struct disk_driver *dd)
+{
+       return 0;
+}
+
+int tdaio_get_parent(struct disk_driver *dd, struct disk_driver *parent)
+{
+       return -EINVAL;
+}
+
 struct tap_disk tapdisk_aio = {
-       "tapdisk_aio",
-       sizeof(struct tdaio_state),
-       tdaio_open,
-       tdaio_queue_read,
-       tdaio_queue_write,
-       tdaio_submit,
-       tdaio_get_fd,
-       tdaio_close,
-       tdaio_do_callbacks,
+       .disk_type          = "tapdisk_aio",
+       .private_data_size  = sizeof(struct tdaio_state),
+       .td_open            = tdaio_open,
+       .td_queue_read      = tdaio_queue_read,
+       .td_queue_write     = tdaio_queue_write,
+       .td_submit          = tdaio_submit,
+       .td_has_parent      = tdaio_has_parent,
+       .td_get_parent      = tdaio_get_parent,
+       .td_close           = tdaio_close,
+       .td_do_callbacks    = tdaio_do_callbacks,
 };
diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/drivers/block-qcow.c Fri Feb 16 20:31:27 2007 -0800
@@ -55,7 +55,6 @@
 
 /******AIO DEFINES******/
 #define REQUEST_ASYNC_FD 1
-#define MAX_QCOW_IDS  0xFFFF
 #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
 
 struct pending_aio {
@@ -65,7 +64,6 @@ struct pending_aio {
        int nb_sectors;
        char *buf;
        uint64_t sector;
-       int qcow_idx;
 };
 
 #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
@@ -115,9 +113,9 @@ struct tdqcow_state {
 struct tdqcow_state {
         int fd;                        /*Main Qcow file descriptor */
        uint64_t fd_end;               /*Store a local record of file length */
-       int bfd;                       /*Backing file descriptor*/
        char *name;                    /*Record of the filename*/
-       int poll_pipe[2];              /*dummy fd for polling on */
+       uint32_t backing_file_size;
+       uint64_t backing_file_offset;
        int encrypted;                 /*File contents are encrypted or plain*/
        int cluster_bits;              /*Determines length of cluster as 
                                        *indicated by file hdr*/
@@ -149,7 +147,6 @@ struct tdqcow_state {
        AES_KEY aes_decrypt_key;       /*AES key*/
         /* libaio state */
         io_context_t       aio_ctx;
-       int                nr_reqs [MAX_QCOW_IDS];
         struct iocb        iocb_list  [MAX_AIO_REQS];
         struct iocb       *iocb_free  [MAX_AIO_REQS];
         struct pending_aio pending_aio[MAX_AIO_REQS];
@@ -162,10 +159,11 @@ struct tdqcow_state {
 
 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
 
-static int init_aio_state(struct td_state *bs)
+static int init_aio_state(struct disk_driver *dd)
 {
         int i;
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+       struct td_state     *bs = dd->td_state;
+       struct tdqcow_state  *s = (struct tdqcow_state *)dd->private;
         long     ioidx;
 
         /*Initialize Locking bitmap*/
@@ -202,8 +200,7 @@ static int init_aio_state(struct td_stat
 
         for (i=0;i<MAX_AIO_REQS;i++)
                 s->iocb_free[i] = &s->iocb_list[i];
-       for (i=0;i<MAX_QCOW_IDS;i++)
-               s->nr_reqs[i] = 0;
+
         DPRINTF("AIO state initialised\n");
 
         return 0;
@@ -238,7 +235,10 @@ static uint32_t gen_cksum(char *ptr, int
 
        if(!md) return 0;
 
-       if (MD5((unsigned char *)ptr, len, md) != md) return 0;
+       if (MD5((unsigned char *)ptr, len, md) != md) {
+               free(md);
+               return 0;
+       }
 
        memcpy(&ret, md, sizeof(uint32_t));
        free(md);
@@ -247,26 +247,42 @@ static uint32_t gen_cksum(char *ptr, int
 
 static int get_filesize(char *filename, uint64_t *size, struct stat *st)
 {
-       int blockfd;
+       int fd;
+       QCowHeader header;
 
        /*Set to the backing file size*/
+       fd = open(filename, O_RDONLY);
+       if (fd < 0)
+               return -1;
+       if (read(fd, &header, sizeof(header)) < sizeof(header)) {
+               close(fd);
+               return -1;
+       }
+       close(fd);
+       
+       be32_to_cpus(&header.magic);
+       be64_to_cpus(&header.size);
+       if (header.magic == QCOW_MAGIC) {
+               *size = header.size >> SECTOR_SHIFT;
+               return 0;
+       }
+
        if(S_ISBLK(st->st_mode)) {
-               blockfd = open(filename, O_RDONLY);
-               if (blockfd < 0)
+               fd = open(filename, O_RDONLY);
+               if (fd < 0)
                        return -1;
-               if (ioctl(blockfd,BLKGETSIZE,size)!=0) {
+               if (ioctl(fd,BLKGETSIZE,size)!=0) {
                        printf("Unable to get Block device size\n");
-                       close(blockfd);
+                       close(fd);
                        return -1;
                }
-               close(blockfd);
+               close(fd);
        } else *size = (st->st_size >> SECTOR_SHIFT);   
        return 0;
 }
 
-static int qcow_set_key(struct td_state *bs, const char *key)
-{
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+static int qcow_set_key(struct tdqcow_state *s, const char *key)
+{
        uint8_t keybuf[16];
        int len, i;
        
@@ -306,10 +322,9 @@ static int qcow_set_key(struct td_state 
        return 0;
 }
 
-static int async_read(struct tdqcow_state *s, int fd, int size, 
-                    uint64_t offset,
-                    char *buf, td_callback_t cb,
-                    int id, uint64_t sector, int qcow_idx, void *private)
+static int async_read(struct tdqcow_state *s, int size, 
+                     uint64_t offset, char *buf, td_callback_t cb,
+                     int id, uint64_t sector, void *private)
 {
         struct   iocb *io;
         struct   pending_aio *pio;
@@ -325,9 +340,8 @@ static int async_read(struct tdqcow_stat
        pio->nb_sectors = size/512;
        pio->buf = buf;
        pio->sector = sector;
-       pio->qcow_idx = qcow_idx;
-
-        io_prep_pread(io, fd, buf, size, offset);
+
+        io_prep_pread(io, s->fd, buf, size, offset);
         io->data = (void *)ioidx;
 
         s->iocb_queue[s->iocb_queued++] = io;
@@ -335,10 +349,9 @@ static int async_read(struct tdqcow_stat
         return 1;
 }
 
-static int async_write(struct tdqcow_state *s, int fd, int size, 
-                    uint64_t offset,
-                    char *buf, td_callback_t cb,
-                     int id, uint64_t sector, int qcow_idx, void *private)
+static int async_write(struct tdqcow_state *s, int size,
+                      uint64_t offset, char *buf, td_callback_t cb,
+                      int id, uint64_t sector, void *private)
 {
         struct   iocb *io;
         struct   pending_aio *pio;
@@ -354,9 +367,8 @@ static int async_write(struct tdqcow_sta
        pio->nb_sectors = size/512;
        pio->buf = buf;
        pio->sector = sector;
-       pio->qcow_idx = qcow_idx;
-
-        io_prep_pwrite(io, fd, buf, size, offset);
+
+        io_prep_pwrite(io, s->fd, buf, size, offset);
         io->data = (void *)ioidx;
 
         s->iocb_queue[s->iocb_queued++] = io;
@@ -381,17 +393,6 @@ static void aio_unlock(struct tdqcow_sta
 
        --s->sector_lock[sector];
        return;
-}
-
-/*TODO - Use a freelist*/
-static int get_free_idx(struct tdqcow_state *s)
-{
-       int i;
-       
-       for(i = 0; i < MAX_QCOW_IDS; i++) {
-               if(s->nr_reqs[i] == 0) return i;
-       }
-       return -1;
 }
 
 /* 
@@ -425,23 +426,23 @@ static int qtruncate(int fd, off_t lengt
 {
        int ret, i; 
        int current = 0, rem = 0;
-       int sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
+       uint64_t sectors;
        struct stat st;
-       char buf[DEFAULT_SECTOR_SIZE];
+       char *buf;
 
        /* If length is greater than the current file len
         * we synchronously write zeroes to the end of the 
         * file, otherwise we truncate the length down
         */
-       memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
        ret = fstat(fd, &st);
-       if (ret == -1)
+       if (ret == -1) 
                return -1;
        if (S_ISBLK(st.st_mode))
                return 0;
-
+       
+       sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
        current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
-       rem = st.st_size % DEFAULT_SECTOR_SIZE;
+       rem     = st.st_size % DEFAULT_SECTOR_SIZE;
 
        /* If we are extending this file, we write zeros to the end --
         * this tries to ensure that the extents allocated wind up being
@@ -449,28 +450,40 @@ static int qtruncate(int fd, off_t lengt
         */
        if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) {
                /*We are extending the file*/
+               if ((ret = posix_memalign((void **)&buf, 
+                                         512, DEFAULT_SECTOR_SIZE))) {
+                       DPRINTF("posix_memalign failed: %d\n", ret);
+                       return -1;
+               }
+               memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
                if (lseek(fd, 0, SEEK_END)==-1) {
-                       fprintf(stderr, 
-                               "Lseek EOF failed (%d), internal error\n",
+                       DPRINTF("Lseek EOF failed (%d), internal error\n",
                                errno);
+                       free(buf);
                        return -1;
                }
                if (rem) {
                        ret = write(fd, buf, rem);
-                       if (ret != rem)
+                       if (ret != rem) {
+                               DPRINTF("write failed: ret = %d, err = %s\n",
+                                       ret, strerror(errno));
+                               free(buf);
                                return -1;
+                       }
                }
                for (i = current; i < sectors; i++ ) {
                        ret = write(fd, buf, DEFAULT_SECTOR_SIZE);
-                       if (ret != DEFAULT_SECTOR_SIZE)
+                       if (ret != DEFAULT_SECTOR_SIZE) {
+                               DPRINTF("write failed: ret = %d, err = %s\n",
+                                       ret, strerror(errno));
+                               free(buf);
                                return -1;
-               }
-               
+                       }
+               }
+               free(buf);
        } else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE))
-               if (ftruncate(fd, sectors * DEFAULT_SECTOR_SIZE)==-1) {
-                       fprintf(stderr,
-                               "Ftruncate failed (%d), internal error\n",
-                                errno);
+               if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) {
+                       DPRINTF("Ftruncate failed (%s)\n", strerror(errno));
                        return -1;
                }
        return 0;
@@ -490,12 +503,11 @@ static int qtruncate(int fd, off_t lengt
  *
  * return 0 if not allocated.
  */
-static uint64_t get_cluster_offset(struct td_state *bs,
+static uint64_t get_cluster_offset(struct tdqcow_state *s,
                                    uint64_t offset, int allocate,
                                    int compressed_size,
                                    int n_start, int n_end)
 {
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
        int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
        char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr;
        uint64_t l2_offset, *l2_table, cluster_offset, tmp;
@@ -550,8 +562,10 @@ static uint64_t get_cluster_offset(struc
                 * entry is written before blocks.
                 */
                lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
-               if (write(s->fd, tmp_ptr, 4096) != 4096)
+               if (write(s->fd, tmp_ptr, 4096) != 4096) {
+                       free(tmp_ptr);
                        return 0;
+               }
                free(tmp_ptr);
 
                new_l2_table = 1;
@@ -716,9 +730,10 @@ found:
        return cluster_offset;
 }
 
-static void init_cluster_cache(struct td_state *bs)
-{
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+static void init_cluster_cache(struct disk_driver *dd)
+{
+       struct td_state     *bs = dd->td_state;
+       struct tdqcow_state *s  = (struct tdqcow_state *)dd->private;
        uint32_t count = 0;
        int i, cluster_entries;
 
@@ -727,22 +742,20 @@ static void init_cluster_cache(struct td
                cluster_entries, s->cluster_size);
 
        for (i = 0; i < bs->size; i += cluster_entries) {
-               if (get_cluster_offset(bs, i << 9, 0, 0, 0, 1)) count++;
+               if (get_cluster_offset(s, i << 9, 0, 0, 0, 1)) count++;
                if (count >= L2_CACHE_SIZE) return;
        }
        DPRINTF("Finished cluster initialisation, added %d entries\n", count);
        return;
 }
 
-static int qcow_is_allocated(struct td_state *bs, int64_t sector_num, 
+static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num,
                              int nb_sectors, int *pnum)
 {
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
-
        int index_in_cluster, n;
        uint64_t cluster_offset;
 
-       cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+       cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0);
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
        n = s->cluster_sectors - index_in_cluster;
        if (n > nb_sectors)
@@ -800,11 +813,23 @@ static int decompress_cluster(struct tdq
        return 0;
 }
 
+static inline void init_fds(struct disk_driver *dd)
+{
+       int i;
+       struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
+
+       for(i = 0; i < MAX_IOFD; i++) 
+               dd->io_fd[i] = 0;
+
+       dd->io_fd[0] = s->poll_fd;
+}
+
 /* Open the disk file and initialize qcow state. */
-int tdqcow_open (struct td_state *bs, const char *name)
+int tdqcow_open (struct disk_driver *dd, const char *name)
 {
        int fd, len, i, shift, ret, size, l1_table_size;
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+       struct td_state     *bs = dd->td_state;
+       struct tdqcow_state *s  = (struct tdqcow_state *)dd->private;
        char *buf;
        QCowHeader *header;
        QCowHeader_ext *exthdr;
@@ -812,10 +837,6 @@ int tdqcow_open (struct td_state *bs, co
        uint64_t final_cluster = 0;
 
        DPRINTF("QCOW: Opening %s\n",name);
-       /* set up a pipe so that we can hand back a poll fd that won't fire.*/
-       ret = pipe(s->poll_pipe);
-       if (ret != 0)
-               return (0 - errno);
 
        fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
        if (fd < 0) {
@@ -826,7 +847,7 @@ int tdqcow_open (struct td_state *bs, co
        s->fd = fd;
        asprintf(&s->name,"%s", name);
 
-       ASSERT(sizeof(header) < 512);
+       ASSERT(sizeof(QCowHeader) + sizeof(QCowHeader_ext) < 512);
 
        ret = posix_memalign((void **)&buf, 512, 512);
        if (ret != 0) goto fail;
@@ -861,7 +882,9 @@ int tdqcow_open (struct td_state *bs, co
        s->cluster_alloc = s->l2_size;
        bs->size = header->size / 512;
        s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
-       
+       s->backing_file_offset = header->backing_file_offset;
+       s->backing_file_size   = header->backing_file_size;
+
        /* read the level 1 table */
        shift = s->cluster_bits + s->l2_bits;
        s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
@@ -887,7 +910,7 @@ int tdqcow_open (struct td_state *bs, co
        if (read(fd, s->l1_table, l1_table_size) != l1_table_size)
                goto fail;
 
-       for(i = 0;i < s->l1_size; i++) {
+       for(i = 0; i < s->l1_size; i++) {
                //be64_to_cpus(&s->l1_table[i]);
                //DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]);
                if (s->l1_table[i] > final_cluster)
@@ -907,41 +930,15 @@ int tdqcow_open (struct td_state *bs, co
        if(ret != 0) goto fail;
        s->cluster_cache_offset = -1;
 
-       /* read the backing file name */
-       s->bfd = -1;
-       if (header->backing_file_offset != 0) {
-               DPRINTF("Reading backing file data\n");
-               len = header->backing_file_size;
-               if (len > 1023)
-                       len = 1023;
-
-                /*TODO - Fix read size for O_DIRECT and use original fd!*/
-               fd = open(name, O_RDONLY | O_LARGEFILE);
-
-               lseek(fd, header->backing_file_offset, SEEK_SET);
-               if (read(fd, bs->backing_file, len) != len)
-                       goto fail;
-               bs->backing_file[len] = '\0';
-               close(fd);
-               /***********************************/
-
-               /*Open backing file*/
-               fd = open(bs->backing_file, O_RDONLY | O_DIRECT | O_LARGEFILE);
-               if (fd < 0) {
-                       DPRINTF("Unable to open backing file: %s\n",
-                               bs->backing_file);
-                       goto fail;
-               }
-               s->bfd = fd;
+       if (s->backing_file_offset != 0)
                s->cluster_alloc = 1; /*Cannot use pre-alloc*/
-       }
 
         bs->sector_size = 512;
         bs->info = 0;
        
        /*Detect min_cluster_alloc*/
        s->min_cluster_alloc = 1; /*Default*/
-       if (s->bfd == -1 && (s->l1_table_offset % 4096 == 0) ) {
+       if (s->backing_file_offset == 0 && s->l1_table_offset % 4096 == 0) {
                /*We test to see if the xen magic # exists*/
                exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
                be32_to_cpus(&exthdr->xmagic);
@@ -962,10 +959,11 @@ int tdqcow_open (struct td_state *bs, co
        }
 
  end_xenhdr:
-       if (init_aio_state(bs)!=0) {
+       if (init_aio_state(dd)!=0) {
                DPRINTF("Unable to initialise AIO state\n");
                goto fail;
        }
+       init_fds(dd);
        s->fd_end = (final_cluster == 0 ? (s->l1_table_offset + l1_table_size) 
: 
                                (final_cluster + s->cluster_size));
 
@@ -981,213 +979,145 @@ fail:
        return -1;
 }
 
- int tdqcow_queue_read(struct td_state *bs, uint64_t sector,
-                              int nb_sectors, char *buf, td_callback_t cb,
-                              int id, void *private)
-{
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
-       int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
-       uint64_t cluster_offset;
+int tdqcow_queue_read(struct disk_driver *dd, uint64_t sector,
+                     int nb_sectors, char *buf, td_callback_t cb,
+                     int id, void *private)
+{
+       struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
+       int ret = 0, index_in_cluster, n, i, rsp = 0;
+       uint64_t cluster_offset, sec, nr_secs;
+
+       sec     = sector;
+       nr_secs = nb_sectors;
 
        /*Check we can get a lock*/
-       for (i = 0; i < nb_sectors; i++)
-               if (!aio_can_lock(s, sector + i)) {
-                       DPRINTF("AIO_CAN_LOCK failed [%llu]\n", 
-                               (long long) sector + i);
-                       return -EBUSY;
-               }
-       
+       for (i = 0; i < nb_sectors; i++) 
+               if (!aio_can_lock(s, sector + i)) 
+                       return cb(dd, -EBUSY, sector, nb_sectors, id, private);
+
        /*We store a local record of the request*/
-       qcow_idx = get_free_idx(s);
        while (nb_sectors > 0) {
                cluster_offset = 
-                       get_cluster_offset(bs, sector << 9, 0, 0, 0, 0);
+                       get_cluster_offset(s, sector << 9, 0, 0, 0, 0);
                index_in_cluster = sector & (s->cluster_sectors - 1);
                n = s->cluster_sectors - index_in_cluster;
                if (n > nb_sectors)
                        n = nb_sectors;
 
-               if (s->iocb_free_count == 0 || !aio_lock(s, sector)) {
-                       DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" 
-                               "[%llu]\n", s->iocb_free_count, 
-                               (long long) sector);
-                       return -ENOMEM;
-               }
+               if (s->iocb_free_count == 0 || !aio_lock(s, sector)) 
+                       return cb(dd, -EBUSY, sector, nb_sectors, id, private);
                
-               if (!cluster_offset && (s->bfd > 0)) {
-                       s->nr_reqs[qcow_idx]++;
-                       asubmit += async_read(s, s->bfd, n * 512, sector << 9, 
-                                             buf, cb, id, sector, 
-                                             qcow_idx, private);
-               } else if(!cluster_offset) {
-                       memset(buf, 0, 512 * n);
+               if(!cluster_offset) {
                        aio_unlock(s, sector);
+                       ret = cb(dd, BLK_NOT_ALLOCATED, 
+                                sector, n, id, private);
+                       if (ret == -EBUSY) {
+                               /* mark remainder of request
+                                * as busy and try again later */
+                               return cb(dd, -EBUSY, sector + n,
+                                         nb_sectors - n, id, private);
+                       } else rsp += ret;
                } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+                       aio_unlock(s, sector);
                        if (decompress_cluster(s, cluster_offset) < 0) {
-                               ret = -1;
+                               rsp += cb(dd, -EIO, sector, 
+                                         nb_sectors, id, private);
                                goto done;
                        }
                        memcpy(buf, s->cluster_cache + index_in_cluster * 512, 
                               512 * n);
-               } else {                        
-                       s->nr_reqs[qcow_idx]++;
-                       asubmit += async_read(s, s->fd, n * 512, 
-                                             (cluster_offset + 
-                                              index_in_cluster * 512), 
-                                             buf, cb, id, sector, 
-                                             qcow_idx, private);
+                       rsp += cb(dd, 0, sector, n, id, private);
+               } else {
+                       async_read(s, n * 512, 
+                                  (cluster_offset + index_in_cluster * 512),
+                                  buf, cb, id, sector, private);
                }
                nb_sectors -= n;
                sector += n;
                buf += n * 512;
        }
 done:
-        /*Callback if no async requests outstanding*/
-        if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
-
-       return 0;
-}
-
- int tdqcow_queue_write(struct td_state *bs, uint64_t sector,
-                              int nb_sectors, char *buf, td_callback_t cb,
-                              int id, void *private)
-{
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
-       int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
-       uint64_t cluster_offset;
+       return rsp;
+}
+
+int tdqcow_queue_write(struct disk_driver *dd, uint64_t sector,
+                      int nb_sectors, char *buf, td_callback_t cb,
+                      int id, void *private)
+{
+       struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
+       int ret = 0, index_in_cluster, n, i;
+       uint64_t cluster_offset, sec, nr_secs;
+
+       sec     = sector;
+       nr_secs = nb_sectors;
 
        /*Check we can get a lock*/
        for (i = 0; i < nb_sectors; i++)
-               if (!aio_can_lock(s, sector + i))  {
-                       DPRINTF("AIO_CAN_LOCK failed [%llu]\n", 
-                               (long long) (sector + i));
-                       return -EBUSY;
-               }
+               if (!aio_can_lock(s, sector + i))  
+                       return cb(dd, -EBUSY, sector, nb_sectors, id, private);
                   
        /*We store a local record of the request*/
-       qcow_idx = get_free_idx(s);     
        while (nb_sectors > 0) {
                index_in_cluster = sector & (s->cluster_sectors - 1);
                n = s->cluster_sectors - index_in_cluster;
                if (n > nb_sectors)
                        n = nb_sectors;
 
-               if (s->iocb_free_count == 0 || !aio_lock(s, sector)){
-                       DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" 
-                               "[%llu]\n", s->iocb_free_count, 
-                               (long long) sector);
-                       return -ENOMEM;
-               }
-
-               if (!IS_ZERO(buf,n * 512)) {
-
-                       cluster_offset = get_cluster_offset(bs, sector << 9, 
-                                                           1, 0, 
-                                                           index_in_cluster, 
-                                                           index_in_cluster+n
-                               );
-                       if (!cluster_offset) {
-                               DPRINTF("Ooops, no write cluster offset!\n");
-                               ret = -1;
-                               goto done;
-                       }
-
-                       if (s->crypt_method) {
-                               encrypt_sectors(s, sector, s->cluster_data, 
-                                               (unsigned char *)buf, n, 1,
-                                               &s->aes_encrypt_key);
-                               s->nr_reqs[qcow_idx]++;
-                               asubmit += async_write(s, s->fd, n * 512, 
-                                                      (cluster_offset + 
-                                                       index_in_cluster*512), 
-                                                      (char *)s->cluster_data,
-                                                      cb, id, sector, 
-                                                      qcow_idx, private);
-                       } else {
-                               s->nr_reqs[qcow_idx]++;
-                               asubmit += async_write(s, s->fd, n * 512, 
-                                                      (cluster_offset + 
-                                                       index_in_cluster*512),
-                                                      buf, cb, id, sector, 
-                                                      qcow_idx, private);
-                       }
+               if (s->iocb_free_count == 0 || !aio_lock(s, sector))
+                       return cb(dd, -EBUSY, sector, nb_sectors, id, private);
+
+               cluster_offset = get_cluster_offset(s, sector << 9, 1, 0,
+                                                   index_in_cluster, 
+                                                   index_in_cluster+n);
+               if (!cluster_offset) {
+                       DPRINTF("Ooops, no write cluster offset!\n");
+                       return cb(dd, -EIO, sector, nb_sectors, id, private);
+               }
+
+               if (s->crypt_method) {
+                       encrypt_sectors(s, sector, s->cluster_data, 
+                                       (unsigned char *)buf, n, 1,
+                                       &s->aes_encrypt_key);
+                       async_write(s, n * 512, 
+                                   (cluster_offset + index_in_cluster*512),
+                                   (char *)s->cluster_data, cb, id, sector, 
+                                   private);
                } else {
-                       /*Write data contains zeros, but we must check to see 
-                         if cluster already allocated*/
-                       cluster_offset = get_cluster_offset(bs, sector << 9, 
-                                                           0, 0, 
-                                                           index_in_cluster, 
-                                                           index_in_cluster+n
-                               );      
-                       if(cluster_offset) {
-                               if (s->crypt_method) {
-                                       encrypt_sectors(s, sector, 
-                                                       s->cluster_data, 
-                                                       (unsigned char *)buf, 
-                                                       n, 1,
-                                                       &s->aes_encrypt_key);
-                                       s->nr_reqs[qcow_idx]++;
-                                       asubmit += async_write(s, s->fd, 
-                                                              n * 512, 
-                                                              (cluster_offset+
-                                                               
index_in_cluster * 512), 
-                                                              (char 
*)s->cluster_data, cb, id, sector, 
-                                                              qcow_idx, 
private);
-                               } else {
-                                       s->nr_reqs[qcow_idx]++;
-                                       asubmit += async_write(s, s->fd, n*512,
-                                                              cluster_offset + 
index_in_cluster * 512, 
-                                                              buf, cb, id, 
sector, 
-                                                              qcow_idx, 
private);
-                               }
-                       }
-                       else aio_unlock(s, sector);
-               }
+                       async_write(s, n * 512, 
+                                   (cluster_offset + index_in_cluster*512),
+                                   buf, cb, id, sector, private);
+               }
+               
                nb_sectors -= n;
                sector += n;
                buf += n * 512;
        }
        s->cluster_cache_offset = -1; /* disable compressed cache */
 
-done:
-       /*Callback if no async requests outstanding*/
-        if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
-
        return 0;
 }
                
-int tdqcow_submit(struct td_state *bs)
+int tdqcow_submit(struct disk_driver *dd)
 {
         int ret;
-        struct   tdqcow_state *prv = (struct tdqcow_state *)bs->private;
-
-        ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+        struct   tdqcow_state *prv = (struct tdqcow_state *)dd->private;
+
+       if (!prv->iocb_queued)
+               return 0;
+
+       ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
 
         /* XXX: TODO: Handle error conditions here. */
 
         /* Success case: */
         prv->iocb_queued = 0;
 
-        return ret;
-}
-
-
-int *tdqcow_get_fd(struct td_state *bs)
-{
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
-       int *fds, i;
-
-       fds = malloc(sizeof(int) * MAX_IOFD);
-       /*initialise the FD array*/
-       for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
-
-       fds[0] = s->poll_fd;
-       return fds;
-}
-
-int tdqcow_close(struct td_state *bs)
-{
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+        return 0;
+}
+
+int tdqcow_close(struct disk_driver *dd)
+{
+       struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
        uint32_t cksum, out;
        int fd, offset;
 
@@ -1203,6 +1133,7 @@ int tdqcow_close(struct td_state *bs)
                close(fd);
        }
 
+       io_destroy(s->aio_ctx);
        free(s->name);
        free(s->l1_table);
        free(s->l2_cache);
@@ -1212,11 +1143,11 @@ int tdqcow_close(struct td_state *bs)
        return 0;
 }
 
-int tdqcow_do_callbacks(struct td_state *s, int sid)
+int tdqcow_do_callbacks(struct disk_driver *dd, int sid)
 {
         int ret, i, rsp = 0,*ptr;
         struct io_event *ep;
-        struct tdqcow_state *prv = (struct tdqcow_state *)s->private;
+        struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
 
         if (sid > MAX_IOFD) return 1;
        
@@ -1224,25 +1155,24 @@ int tdqcow_do_callbacks(struct td_state 
         ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
                            NULL);
 
-        for (ep=prv->aio_events, i = ret; i-->0; ep++) {
+        for (ep = prv->aio_events, i = ret; i-- > 0; ep++) {
                 struct iocb        *io  = ep->obj;
                 struct pending_aio *pio;
 
                 pio = &prv->pending_aio[(long)io->data];
 
                aio_unlock(prv, pio->sector);
-               if (pio->id >= 0) {
-                       if (prv->crypt_method)
-                               encrypt_sectors(prv, pio->sector, 
-                                               (unsigned char *)pio->buf, 
-                                               (unsigned char *)pio->buf, 
-                                               pio->nb_sectors, 0, 
-                                               &prv->aes_decrypt_key);
-                       prv->nr_reqs[pio->qcow_idx]--;
-                       if (prv->nr_reqs[pio->qcow_idx] == 0) 
-                               rsp += pio->cb(s, ep->res == io->u.c.nbytes ? 0 
: 1, pio->id, 
-                                              pio->private);
-               } else if (pio->id == -2) free(pio->buf);
+
+               if (prv->crypt_method)
+                       encrypt_sectors(prv, pio->sector, 
+                                       (unsigned char *)pio->buf, 
+                                       (unsigned char *)pio->buf, 
+                                       pio->nb_sectors, 0, 
+                                       &prv->aes_decrypt_key);
+
+               rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1, 
+                              pio->sector, pio->nb_sectors,
+                              pio->id, pio->private);
 
                 prv->iocb_free[prv->iocb_free_count++] = io;
         }
@@ -1250,7 +1180,7 @@ int tdqcow_do_callbacks(struct td_state 
 }
 
 int qcow_create(const char *filename, uint64_t total_size,
-                      const char *backing_file, int sparse)
+               const char *backing_file, int sparse)
 {
        int fd, header_size, backing_filename_len, l1_size, i;
        int shift, length, adjust, flags = 0, ret = 0;
@@ -1391,9 +1321,8 @@ int qcow_create(const char *filename, ui
        return 0;
 }
 
-int qcow_make_empty(struct td_state *bs)
-{
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
+int qcow_make_empty(struct tdqcow_state *s)
+{
        uint32_t l1_length = s->l1_size * sizeof(uint64_t);
 
        memset(s->l1_table, 0, l1_length);
@@ -1412,19 +1341,16 @@ int qcow_make_empty(struct td_state *bs)
        return 0;
 }
 
-int qcow_get_cluster_size(struct td_state *bs)
-{
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
-
+int qcow_get_cluster_size(struct tdqcow_state *s)
+{
        return s->cluster_size;
 }
 
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
-int qcow_compress_cluster(struct td_state *bs, int64_t sector_num, 
+int qcow_compress_cluster(struct tdqcow_state *s, int64_t sector_num, 
                           const uint8_t *buf)
 {
-       struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
        z_stream strm;
        int ret, out_len;
        uint8_t *out_buf;
@@ -1463,7 +1389,7 @@ int qcow_compress_cluster(struct td_stat
                /* could not compress: write normal cluster */
                //tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors);
        } else {
-               cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, 
+               cluster_offset = get_cluster_offset(s, sector_num << 9, 2, 
                                             out_len, 0, 0);
                cluster_offset &= s->cluster_offset_mask;
                lseek(s->fd, cluster_offset, SEEK_SET);
@@ -1477,15 +1403,54 @@ int qcow_compress_cluster(struct td_stat
        return 0;
 }
 
+int tdqcow_has_parent(struct disk_driver *dd)
+{
+       struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
+       return (s->backing_file_offset ? 1 : 0);
+}
+
+int tdqcow_get_parent(struct disk_driver *cdd, struct disk_driver *pdd)
+{
+       off_t off;
+       char *buf, *filename;
+       int len, secs, ret = -1;
+       struct tdqcow_state *child  = (struct tdqcow_state *)cdd->private;
+
+       if (!child->backing_file_offset)
+               return -1;
+
+       /* read the backing file name */
+       len  = child->backing_file_size;
+       off  = child->backing_file_offset - (child->backing_file_offset % 512);
+       secs = (len + (child->backing_file_offset - off) + 511) >> 9;
+
+       if (posix_memalign((void **)&buf, 512, secs << 9)) 
+               return -1;
+
+       if (lseek(child->fd, off, SEEK_SET) == (off_t)-1)
+               goto out;
+
+       if (read(child->fd, buf, secs << 9) != secs << 9)
+               goto out;
+       filename      = buf + (child->backing_file_offset - off);
+       filename[len] = '\0';
+
+       /*Open backing file*/
+       ret = tdqcow_open(pdd, filename);
+ out:
+       free(buf);
+       return ret;
+}
+
 struct tap_disk tapdisk_qcow = {
-       "tapdisk_qcow",
-       sizeof(struct tdqcow_state),
-       tdqcow_open,
-       tdqcow_queue_read,
-       tdqcow_queue_write,
-       tdqcow_submit,
-       tdqcow_get_fd,
-       tdqcow_close,
-       tdqcow_do_callbacks,
+       .disk_type           = "tapdisk_qcow",
+       .private_data_size   = sizeof(struct tdqcow_state),
+       .td_open             = tdqcow_open,
+       .td_queue_read       = tdqcow_queue_read,
+       .td_queue_write      = tdqcow_queue_write,
+       .td_submit           = tdqcow_submit,
+       .td_has_parent       = tdqcow_has_parent,
+       .td_get_parent       = tdqcow_get_parent,
+       .td_close            = tdqcow_close,
+       .td_do_callbacks     = tdqcow_do_callbacks,
 };
-
diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/block-ram.c
--- a/tools/blktap/drivers/block-ram.c  Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/drivers/block-ram.c  Fri Feb 16 20:31:27 2007 -0800
@@ -123,14 +123,25 @@ static int get_image_info(struct td_stat
        return 0;
 }
 
+static inline void init_fds(struct disk_driver *dd)
+{
+        int i;
+       struct tdram_state *prv = (struct tdram_state *)dd->private;
+
+        for(i =0 ; i < MAX_IOFD; i++)
+               dd->io_fd[i] = 0;
+
+        dd->io_fd[0] = prv->poll_pipe[0];
+}
+
 /* Open the disk file and initialize ram state. */
-int tdram_open (struct td_state *s, const char *name)
-{
+int tdram_open (struct disk_driver *dd, const char *name)
+{
+       char *p;
+       uint64_t size;
        int i, fd, ret = 0, count = 0;
-       struct tdram_state *prv = (struct tdram_state *)s->private;
-       uint64_t size;
-       char *p;
-       s->private = prv;
+       struct td_state    *s     = dd->td_state;
+       struct tdram_state *prv   = (struct tdram_state *)dd->private;
 
        connections++;
        
@@ -209,88 +220,80 @@ int tdram_open (struct td_state *s, cons
                ret = 0;
        } 
 
+       init_fds(dd);
 done:
        return ret;
 }
 
- int tdram_queue_read(struct td_state *s, uint64_t sector,
-                              int nb_sectors, char *buf, td_callback_t cb,
-                              int id, void *private)
-{
-       struct tdram_state *prv = (struct tdram_state *)s->private;
+ int tdram_queue_read(struct disk_driver *dd, uint64_t sector,
+                     int nb_sectors, char *buf, td_callback_t cb,
+                     int id, void *private)
+{
+       struct td_state    *s   = dd->td_state;
+       struct tdram_state *prv = (struct tdram_state *)dd->private;
        int      size    = nb_sectors * s->sector_size;
        uint64_t offset  = sector * (uint64_t)s->sector_size;
-       int ret;
 
        memcpy(buf, img + offset, size);
-       ret = size;
-
-       cb(s, (ret < 0) ? ret: 0, id, private);
-
-       return ret;
-}
-
- int tdram_queue_write(struct td_state *s, uint64_t sector,
-                              int nb_sectors, char *buf, td_callback_t cb,
-                              int id, void *private)
-{
-       struct tdram_state *prv = (struct tdram_state *)s->private;
+
+       return cb(dd, 0, sector, nb_sectors, id, private);
+}
+
+int tdram_queue_write(struct disk_driver *dd, uint64_t sector,
+                     int nb_sectors, char *buf, td_callback_t cb,
+                     int id, void *private)
+{
+       struct td_state    *s   = dd->td_state;
+       struct tdram_state *prv = (struct tdram_state *)dd->private;
        int      size    = nb_sectors * s->sector_size;
        uint64_t offset  = sector * (uint64_t)s->sector_size;
-       int ret;
-       
-       /*We assume that write access is controlled at a higher level for 
multiple disks*/
+       
+       /* We assume that write access is controlled
+        * at a higher level for multiple disks */
        memcpy(img + offset, buf, size);
-       ret = size;
-
-       cb(s, (ret < 0) ? ret : 0, id, private);
-
-       return ret;
+
+       return cb(dd, 0, sector, nb_sectors, id, private);
 }
                
-int tdram_submit(struct td_state *s)
+int tdram_submit(struct disk_driver *dd)
 {
        return 0;       
 }
 
-
-int *tdram_get_fd(struct td_state *s)
-{
-       struct tdram_state *prv = (struct tdram_state *)s->private;
-        int *fds, i;
-
-        fds = malloc(sizeof(int) * MAX_IOFD);
-        /*initialise the FD array*/
-        for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
-
-        fds[0] = prv->poll_pipe[0];
-        return fds;    
-}
-
-int tdram_close(struct td_state *s)
-{
-       struct tdram_state *prv = (struct tdram_state *)s->private;
+int tdram_close(struct disk_driver *dd)
+{
+       struct tdram_state *prv = (struct tdram_state *)dd->private;
        
        connections--;
        
        return 0;
 }
 
-int tdram_do_callbacks(struct td_state *s, int sid)
+int tdram_do_callbacks(struct disk_driver *dd, int sid)
 {
        /* always ask for a kick */
        return 1;
 }
 
+int tdram_has_parent(struct disk_driver *dd)
+{
+       return 0;
+}
+
+int tdram_get_parent(struct disk_driver *dd, struct disk_driver *parent)
+{
+       return -EINVAL;
+}
+
 struct tap_disk tapdisk_ram = {
-       "tapdisk_ram",
-       sizeof(struct tdram_state),
-       tdram_open,
-       tdram_queue_read,
-       tdram_queue_write,
-       tdram_submit,
-       tdram_get_fd,
-       tdram_close,
-       tdram_do_callbacks,
+       .disk_type          = "tapdisk_ram",
+       .private_data_size  = sizeof(struct tdram_state),
+       .td_open            = tdram_open,
+       .td_queue_read      = tdram_queue_read,
+       .td_queue_write     = tdram_queue_write,
+       .td_submit          = tdram_submit,
+       .td_has_parent      = tdram_has_parent,
+       .td_get_parent      = tdram_get_parent,
+       .td_close           = tdram_close,
+       .td_do_callbacks    = tdram_do_callbacks,
 };
-
diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/block-sync.c
--- a/tools/blktap/drivers/block-sync.c Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/drivers/block-sync.c Fri Feb 16 20:31:27 2007 -0800
@@ -106,12 +106,23 @@ static int get_image_info(struct td_stat
        return 0;
 }
 
+static inline void init_fds(struct disk_driver *dd)
+{
+       int i;
+       struct tdsync_state *prv = (struct tdsync_state *)dd->private;
+       
+       for(i = 0; i < MAX_IOFD; i++)
+               dd->io_fd[i] = 0;
+
+       dd->io_fd[0] = prv->poll_pipe[0];
+}
+
 /* Open the disk file and initialize aio state. */
-int tdsync_open (struct td_state *s, const char *name)
+int tdsync_open (struct disk_driver *dd, const char *name)
 {
        int i, fd, ret = 0;
-       struct tdsync_state *prv = (struct tdsync_state *)s->private;
-       s->private = prv;
+       struct td_state     *s   = dd->td_state;
+       struct tdsync_state *prv = (struct tdsync_state *)dd->private;
        
        /* set up a pipe so that we can hand back a poll fd that won't fire.*/
        ret = pipe(prv->poll_pipe);
@@ -138,16 +149,18 @@ int tdsync_open (struct td_state *s, con
 
         prv->fd = fd;
 
+       init_fds(dd);
        ret = get_image_info(s, fd);
 done:
        return ret;     
 }
 
- int tdsync_queue_read(struct td_state *s, uint64_t sector,
+ int tdsync_queue_read(struct disk_driver *dd, uint64_t sector,
                               int nb_sectors, char *buf, td_callback_t cb,
                               int id, void *private)
 {
-       struct tdsync_state *prv = (struct tdsync_state *)s->private;
+       struct td_state     *s   = dd->td_state;
+       struct tdsync_state *prv = (struct tdsync_state *)dd->private;
        int      size    = nb_sectors * s->sector_size;
        uint64_t offset  = sector * (uint64_t)s->sector_size;
        int ret;
@@ -162,16 +175,15 @@ done:
                } 
        } else ret = 0 - errno;
                
-       cb(s, (ret < 0) ? ret: 0, id, private);
-       
-       return 1;
-}
-
- int tdsync_queue_write(struct td_state *s, uint64_t sector,
+       return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private);
+}
+
+ int tdsync_queue_write(struct disk_driver *dd, uint64_t sector,
                               int nb_sectors, char *buf, td_callback_t cb,
                               int id, void *private)
 {
-       struct tdsync_state *prv = (struct tdsync_state *)s->private;
+       struct td_state     *s   = dd->td_state;
+       struct tdsync_state *prv = (struct tdsync_state *)dd->private;
        int      size    = nb_sectors * s->sector_size;
        uint64_t offset  = sector * (uint64_t)s->sector_size;
        int ret = 0;
@@ -186,34 +198,17 @@ done:
                }
        } else ret = 0 - errno;
                
-       cb(s, (ret < 0) ? ret : 0, id, private);
-       
-       return 1;
+       return cb(dd, (ret < 0) ? ret : 0, sector, nb_sectors, id, private);
 }
                
-int tdsync_submit(struct td_state *s)
+int tdsync_submit(struct disk_driver *dd)
 {
        return 0;       
 }
 
-
-int *tdsync_get_fd(struct td_state *s)
-{
-       struct tdsync_state *prv = (struct tdsync_state *)s->private;
-       
-       int *fds, i;
-
-       fds = malloc(sizeof(int) * MAX_IOFD);
-       /*initialise the FD array*/
-       for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
-
-       fds[0] = prv->poll_pipe[0];
-       return fds;
-}
-
-int tdsync_close(struct td_state *s)
-{
-       struct tdsync_state *prv = (struct tdsync_state *)s->private;
+int tdsync_close(struct disk_driver *dd)
+{
+       struct tdsync_state *prv = (struct tdsync_state *)dd->private;
        
        close(prv->fd);
        close(prv->poll_pipe[0]);
@@ -222,21 +217,31 @@ int tdsync_close(struct td_state *s)
        return 0;
 }
 
-int tdsync_do_callbacks(struct td_state *s, int sid)
+int tdsync_do_callbacks(struct disk_driver *dd, int sid)
 {
        /* always ask for a kick */
        return 1;
 }
 
+int tdsync_has_parent(struct disk_driver *dd)
+{
+       return 0;
+}
+
+int tdsync_get_parent(struct disk_driver *dd, struct disk_driver *parent)
+{
+       return -EINVAL;
+}
+
 struct tap_disk tapdisk_sync = {
-       "tapdisk_sync",
-       sizeof(struct tdsync_state),
-       tdsync_open,
-       tdsync_queue_read,
-       tdsync_queue_write,
-       tdsync_submit,
-       tdsync_get_fd,
-       tdsync_close,
-       tdsync_do_callbacks,
+       .disk_type           = "tapdisk_sync",
+       .private_data_size   = sizeof(struct tdsync_state),
+       .td_open             = tdsync_open,
+       .td_queue_read       = tdsync_queue_read,
+       .td_queue_write      = tdsync_queue_write,
+       .td_submit           = tdsync_submit,
+       .td_has_parent       = tdsync_has_parent,
+       .td_get_parent       = tdsync_get_parent,
+       .td_close            = tdsync_close,
+       .td_do_callbacks     = tdsync_do_callbacks,
 };
-
diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/block-vmdk.c
--- a/tools/blktap/drivers/block-vmdk.c Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/drivers/block-vmdk.c Fri Feb 16 20:31:27 2007 -0800
@@ -107,14 +107,25 @@ struct tdvmdk_state {
        unsigned int cluster_sectors;
 };
 
+static inline void init_fds(struct disk_driver *dd)
+{
+        int i;
+       struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
+
+        for (i = 0; i < MAX_IOFD; i++)
+               dd->io_fd[i] = 0;
+
+        dd->io_fd[0] = prv->poll_pipe[0];
+}
 
 /* Open the disk file and initialize aio state. */
-static int tdvmdk_open (struct td_state *s, const char *name)
+static int tdvmdk_open (struct disk_driver *dd, const char *name)
 {
        int ret, fd;
        int l1_size, i;
        uint32_t magic;
-       struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+       struct td_state     *s   = dd->td_state;
+       struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
 
        /* set up a pipe so that we can hand back a poll fd that won't fire.*/
        ret = pipe(prv->poll_pipe);
@@ -206,6 +217,7 @@ static int tdvmdk_open (struct td_state 
        if (!prv->l2_cache)
                goto fail;
        prv->fd = fd;
+       init_fds(dd);
        DPRINTF("VMDK File opened successfully\n");
        return 0;
        
@@ -218,10 +230,9 @@ fail:
        return -1;
 }
 
-static uint64_t get_cluster_offset(struct td_state *s, 
+static uint64_t get_cluster_offset(struct tdvmdk_state *prv, 
                                    uint64_t offset, int allocate)
 {
-       struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
        unsigned int l1_index, l2_offset, l2_index;
        int min_index, i, j;
        uint32_t min_count, *l2_table, tmp;
@@ -291,16 +302,17 @@ static uint64_t get_cluster_offset(struc
        return cluster_offset;
 }
 
-static int tdvmdk_queue_read(struct td_state *s, uint64_t sector,
+static int tdvmdk_queue_read(struct disk_driver *dd, uint64_t sector,
                               int nb_sectors, char *buf, td_callback_t cb,
                               int id, void *private)
 {
-       struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+       struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
        int index_in_cluster, n;
        uint64_t cluster_offset;
        int ret = 0;
+
        while (nb_sectors > 0) {
-               cluster_offset = get_cluster_offset(s, sector << 9, 0);
+               cluster_offset = get_cluster_offset(prv, sector << 9, 0);
                index_in_cluster = sector % prv->cluster_sectors;
                n = prv->cluster_sectors - index_in_cluster;
                if (n > nb_sectors)
@@ -321,27 +333,24 @@ static int tdvmdk_queue_read(struct td_s
                buf += n * 512;
        }
 done:
-       cb(s, ret == -1 ? -1 : 0, id, private);
-       
-       return 1;
-}
-
-static  int tdvmdk_queue_write(struct td_state *s, uint64_t sector,
+       return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private);
+}
+
+static  int tdvmdk_queue_write(struct disk_driver *dd, uint64_t sector,
                               int nb_sectors, char *buf, td_callback_t cb,
                               int id, void *private)
 {
-       struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+       struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
        int index_in_cluster, n;
        uint64_t cluster_offset;
        int ret = 0;
-       
 
        while (nb_sectors > 0) {
                index_in_cluster = sector & (prv->cluster_sectors - 1);
                n = prv->cluster_sectors - index_in_cluster;
                if (n > nb_sectors)
                        n = nb_sectors;
-               cluster_offset = get_cluster_offset(s, sector << 9, 1);
+               cluster_offset = get_cluster_offset(prv, sector << 9, 1);
                if (!cluster_offset) {
                        ret = -1;
                        goto done;
@@ -358,33 +367,17 @@ static  int tdvmdk_queue_write(struct td
                buf += n * 512;
        }
 done:
-       cb(s, ret == -1 ? -1 : 0, id, private);
-       
-       return 1;
+       return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private);
 }
                
-static int tdvmdk_submit(struct td_state *s)
+static int tdvmdk_submit(struct disk_driver *dd)
 {
        return 0;       
 }
 
-
-static int *tdvmdk_get_fd(struct td_state *s)
-{
-       struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
-        int *fds, i;
-
-        fds = malloc(sizeof(int) * MAX_IOFD);
-        /*initialise the FD array*/
-        for (i=0;i<MAX_IOFD;i++) fds[i] = 0;
-
-        fds[0] = prv->poll_pipe[0];
-        return fds;
-}
-
-static int tdvmdk_close(struct td_state *s)
-{
-       struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
+static int tdvmdk_close(struct disk_driver *dd)
+{
+       struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
        
        safer_free(prv->l1_table);
        safer_free(prv->l1_backup_table);
@@ -395,21 +388,31 @@ static int tdvmdk_close(struct td_state 
        return 0;
 }
 
-static int tdvmdk_do_callbacks(struct td_state *s, int sid)
+static int tdvmdk_do_callbacks(struct disk_driver *dd, int sid)
 {
        /* always ask for a kick */
        return 1;
 }
 
+static int tdvmdk_has_parent(struct disk_driver *dd)
+{
+       return 0;
+}
+
+static int tdvmdk_get_parent(struct disk_driver *dd, struct disk_driver 
*parent)
+{
+       return -EINVAL;
+}
+
 struct tap_disk tapdisk_vmdk = {
-       "tapdisk_vmdk",
-       sizeof(struct tdvmdk_state),
-       tdvmdk_open,
-       tdvmdk_queue_read,
-       tdvmdk_queue_write,
-       tdvmdk_submit,
-       tdvmdk_get_fd,
-       tdvmdk_close,
-       tdvmdk_do_callbacks,
+       .disk_type           = "tapdisk_vmdk",
+       .private_data_size   = sizeof(struct tdvmdk_state),
+       .td_open             = tdvmdk_open,
+       .td_queue_read       = tdvmdk_queue_read,
+       .td_queue_write      = tdvmdk_queue_write,
+       .td_submit           = tdvmdk_submit,
+       .td_has_parent       = tdvmdk_has_parent,
+       .td_get_parent       = tdvmdk_get_parent,
+       .td_close            = tdvmdk_close,
+       .td_do_callbacks     = tdvmdk_do_callbacks,
 };
-
diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/img2qcow.c
--- a/tools/blktap/drivers/img2qcow.c   Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/drivers/img2qcow.c   Fri Feb 16 20:31:27 2007 -0800
@@ -147,7 +147,8 @@ static int get_image_info(struct td_stat
        return 0;
 }
 
-static int send_responses(struct td_state *s, int res, int idx, void *private)
+static int send_responses(struct disk_driver *dd, int res, uint64_t sec, 
+                         int nr_secs, int idx, void *private)
 {
        if (res < 0) DFPRINTF("AIO FAILURE: res [%d]!\n",res);
        
@@ -159,7 +160,7 @@ static int send_responses(struct td_stat
 
 int main(int argc, char *argv[])
 {
-       struct tap_disk *drv;
+       struct disk_driver dd;
        struct td_state *s;
        int ret = -1, fd, len;
        fd_set readfds;
@@ -195,16 +196,17 @@ int main(int argc, char *argv[])
        } else DFPRINTF("Qcow file created: size %llu sectors\n",
                        (long long unsigned)s->size);
        
-       drv = &tapdisk_qcow;
-       s->private = malloc(drv->private_data_size);
+       dd.td_state = s;
+       dd.drv      = &tapdisk_qcow;
+       dd.private  = malloc(dd.drv->private_data_size);
 
         /*Open qcow file*/
-        if (drv->td_open(s, argv[1])!=0) {
+        if (dd.drv->td_open(&dd, argv[1])!=0) {
                DFPRINTF("Unable to open Qcow file [%s]\n",argv[1]);
                exit(-1);
        }
 
-       io_fd = drv->td_get_fd(s);
+       io_fd = dd.io_fd;
 
        /*Initialise the output string*/
        memset(output,0x20,25);
@@ -245,9 +247,9 @@ int main(int argc, char *argv[])
                                len = (len >> 9) << 9;
                        }
 
-                       ret = drv->td_queue_write(s, i >> 9,
-                                                 len >> 9, buf, 
-                                                 send_responses, 0, buf);
+                       ret = dd.drv->td_queue_write(&dd, i >> 9,
+                                                    len >> 9, buf, 
+                                                    send_responses, 0, buf);
                                
                        if (!ret) submit_events++;
                                
@@ -261,7 +263,7 @@ int main(int argc, char *argv[])
                        debug_output(i,s->size << 9);
                        
                        if ((submit_events % 10 == 0) || complete) 
-                               drv->td_submit(s);
+                               dd.drv->td_submit(&dd);
                        timeout.tv_usec = 0;
                        
                } else {
@@ -275,14 +277,14 @@ int main(int argc, char *argv[])
                 ret = select(maxfds + 1, &readfds, (fd_set *) 0,
                              (fd_set *) 0, &timeout);
                             
-               if (ret > 0) drv->td_do_callbacks(s, 0);
+               if (ret > 0) dd.drv->td_do_callbacks(&dd, 0);
                if (complete && (returned_events == submit_events)) 
                        running = 0;
        }
        memcpy(output+prev+1,"=",1);
        DFPRINTF("\r%s     100%%\nTRANSFER COMPLETE\n\n", output);
-        drv->td_close(s);
-        free(s->private);
+        dd.drv->td_close(&dd);
+        free(dd.private);
         free(s);
                
        return 0;
diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/qcow2raw.c
--- a/tools/blktap/drivers/qcow2raw.c   Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/drivers/qcow2raw.c   Fri Feb 16 20:31:27 2007 -0800
@@ -55,8 +55,7 @@ static int returned_read_events = 0, ret
 static int returned_read_events = 0, returned_write_events = 0;
 static int submit_events = 0;
 static uint32_t read_idx = 0, write_idx = 0;
-struct tap_disk *drv1, *drv2;
-struct td_state *sqcow, *saio;
+struct disk_driver ddqcow, ddaio;
 static uint64_t prev = 0, written = 0;
 static char output[25];
 
@@ -100,7 +99,8 @@ static inline void LOCAL_FD_SET(fd_set *
        return;
 }
 
-static int send_write_responses(struct td_state *s, int res, int idx, void 
*private)
+static int send_write_responses(struct disk_driver *dd, int res, uint64_t sec,
+                               int nr_secs, int idx, void *private)
 {
        if (res < 0) {
                DFPRINTF("AIO FAILURE: res [%d]!\n",res);
@@ -112,12 +112,13 @@ static int send_write_responses(struct t
        if (complete && (returned_write_events == submit_events)) 
                write_complete = 1;
 
-       debug_output(written, s->size << 9);
+       debug_output(written, dd->td_state->size << 9);
        free(private);
        return 0;
 }
 
-static int send_read_responses(struct td_state *s, int res, int idx, void 
*private)
+static int send_read_responses(struct disk_driver *dd, int res, uint64_t sec,
+                              int nr_secs, int idx, void *private)
 {
        int ret;
 
@@ -128,8 +129,8 @@ static int send_read_responses(struct td
        if (complete && (returned_read_events == submit_events)) 
                read_complete = 1;
        
-       ret = drv2->td_queue_write(saio, idx, BLOCK_PROCESSSZ>>9, private, 
-                                  send_write_responses, idx, private);
+       ret = ddaio.drv->td_queue_write(&ddaio, idx, BLOCK_PROCESSSZ>>9, 
private, 
+                                       send_write_responses, idx, private);
        if (ret != 0) {
                DFPRINTF("ERROR in submitting queue write!\n");
                return 0;
@@ -137,7 +138,7 @@ static int send_read_responses(struct td
 
        if ( (complete && returned_read_events == submit_events) || 
             (returned_read_events % 10 == 0) ) {
-               drv2->td_submit(saio);
+               ddaio.drv->td_submit(&ddaio);
        }
 
        return 0;
@@ -161,20 +162,20 @@ int main(int argc, char *argv[])
                exit(-1);
        }
 
-       sqcow = malloc(sizeof(struct td_state));
-       saio  = malloc(sizeof(struct td_state));
+       ddqcow.td_state = malloc(sizeof(struct td_state));
+       ddaio.td_state  = malloc(sizeof(struct td_state));
        
        /*Open qcow source file*/       
-       drv1 = &tapdisk_qcow;
-       sqcow->private = malloc(drv1->private_data_size);
-
-        if (drv1->td_open(sqcow, argv[2])!=0) {
+       ddqcow.drv = &tapdisk_qcow;
+       ddqcow.private = malloc(ddqcow.drv->private_data_size);
+
+        if (ddqcow.drv->td_open(&ddqcow, argv[2])!=0) {
                DFPRINTF("Unable to open Qcow file [%s]\n",argv[2]);
                exit(-1);
        } else DFPRINTF("QCOW file opened, size %llu\n",
-                     (long long unsigned)sqcow->size);
-
-       qcowio_fd = drv1->td_get_fd(sqcow);
+                     (long long unsigned)ddqcow.td_state->size);
+
+       qcowio_fd = ddqcow.io_fd;
 
         /*Setup aio destination file*/
        ret = stat(argv[1],&finfo);
@@ -191,12 +192,12 @@ int main(int argc, char *argv[])
                                       argv[1], 0 - errno);
                                exit(-1);
                        }
-                       if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) {
+                       if (ftruncate(fd, (off_t)ddqcow.td_state->size<<9) < 0) 
{
                                DFPRINTF("Unable to create file "
                                        "[%s] of size %llu (errno %d). "
                                         "Exiting...\n",
                                        argv[1], 
-                                       (long long unsigned)sqcow->size<<9, 
+                                       (long long 
unsigned)ddqcow.td_state->size<<9, 
                                        0 - errno);
                                close(fd);
                                exit(-1);
@@ -238,43 +239,43 @@ int main(int argc, char *argv[])
                                close(fd);
                                exit(-1);
                        }
-                       if (size < sqcow->size<<9) {
+                       if (size < ddqcow.td_state->size<<9) {
                                DFPRINTF("ERROR: Not enough space on device "
                                        "%s (%lu bytes available, %llu bytes 
required\n",
                                        argv[1], size, 
-                                       (long long unsigned)sqcow->size<<9);
+                                       (long long 
unsigned)ddqcow.td_state->size<<9);
                                close(fd);
                                exit(-1);                               
                        }
                } else {
-                       if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) {
+                       if (ftruncate(fd, (off_t)ddqcow.td_state->size<<9) < 0) 
{
                                DFPRINTF("Unable to create file "
                                        "[%s] of size %llu (errno %d). "
                                         "Exiting...\n",
                                        argv[1], 
-                                       (long long unsigned)sqcow->size<<9, 
+                                       (long long 
unsigned)ddqcow.td_state->size<<9, 
                                         0 - errno);
                                close(fd);
                                exit(-1);
                        } else DFPRINTF("File [%s] truncated to length %llu "
                                        "(%llu)\n", 
                                       argv[1], 
-                                      (long long unsigned)sqcow->size<<9, 
-                                      (long long unsigned)sqcow->size);
+                                      (long long 
unsigned)ddqcow.td_state->size<<9, 
+                                      (long long 
unsigned)ddqcow.td_state->size);
                }
                close(fd);
        }
 
        /*Open aio destination file*/   
-       drv2 = &tapdisk_aio;
-       saio->private = malloc(drv2->private_data_size);
-
-        if (drv2->td_open(saio, argv[1])!=0) {
+       ddaio.drv = &tapdisk_aio;
+       ddaio.private = malloc(ddaio.drv->private_data_size);
+
+        if (ddaio.drv->td_open(&ddaio, argv[1])!=0) {
                DFPRINTF("Unable to open Qcow file [%s]\n", argv[1]);
                exit(-1);
        }
 
-       aio_fd = drv2->td_get_fd(saio);
+       aio_fd = ddaio.io_fd;
 
        /*Initialise the output string*/
        memset(output,0x20,25);
@@ -298,9 +299,9 @@ int main(int argc, char *argv[])
                        }
                
                        /*Attempt to read 4k sized blocks*/
-                       ret = drv1->td_queue_read(sqcow, i>>9,
-                                                 BLOCK_PROCESSSZ>>9, buf, 
-                                                 send_read_responses, i>>9, 
buf);
+                       ret = ddqcow.drv->td_queue_read(&ddqcow, i>>9,
+                                                       BLOCK_PROCESSSZ>>9, 
buf, 
+                                                       send_read_responses, 
i>>9, buf);
 
                        if (ret < 0) {
                                DFPRINTF("UNABLE TO READ block [%llu]\n",
@@ -311,12 +312,12 @@ int main(int argc, char *argv[])
                                submit_events++;
                        }
 
-                       if (i >= sqcow->size<<9) {
+                       if (i >= ddqcow.td_state->size<<9) {
                                complete = 1;
                        }
                        
                        if ((submit_events % 10 == 0) || complete) 
-                               drv1->td_submit(sqcow);
+                               ddqcow.drv->td_submit(&ddqcow);
                        timeout.tv_usec = 0;
                        
                } else {
@@ -332,9 +333,9 @@ int main(int argc, char *argv[])
                             
                if (ret > 0) {
                        if (FD_ISSET(qcowio_fd[0], &readfds)) 
-                               drv1->td_do_callbacks(sqcow, 0);
+                               ddqcow.drv->td_do_callbacks(&ddqcow, 0);
                        if (FD_ISSET(aio_fd[0], &readfds)) 
-                               drv2->td_do_callbacks(saio, 0);
+                               ddaio.drv->td_do_callbacks(&ddaio, 0);
                }
                if (complete && (returned_write_events == submit_events)) 
                        running = 0;
diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/tapdisk.c
--- a/tools/blktap/drivers/tapdisk.c    Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/drivers/tapdisk.c    Fri Feb 16 20:31:27 2007 -0800
@@ -48,6 +48,12 @@ int connected_disks = 0;
 int connected_disks = 0;
 fd_list_entry_t *fd_start = NULL;
 
+int do_cow_read(struct disk_driver *dd, blkif_request_t *req, 
+               int sidx, uint64_t sector, int nr_secs);
+
+#define td_for_each_disk(tds, drv) \
+        for (drv = tds->disks; drv != NULL; drv = drv->next)
+
 void usage(void) 
 {
        fprintf(stderr, "blktap-utils: v1.0.0\n");
@@ -78,10 +84,17 @@ static void unmap_disk(struct td_state *
 static void unmap_disk(struct td_state *s)
 {
        tapdev_info_t *info = s->ring_info;
-       struct tap_disk *drv = s->drv;
+       struct disk_driver *dd, *tmp;
        fd_list_entry_t *entry;
 
-       drv->td_close(s);
+       dd = s->disks;
+       while (dd) {
+               tmp = dd->next;
+               dd->drv->td_close(dd);
+               free(dd->private);
+               free(dd);
+               dd = tmp;
+       }
 
        if (info != NULL && info->mem > 0)
                munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);
@@ -96,7 +109,6 @@ static void unmap_disk(struct td_state *
        free(s->fd_entry);
        free(s->blkif);
        free(s->ring_info);
-        free(s->private);
        free(s);
 
        return;
@@ -113,16 +125,19 @@ static inline int LOCAL_FD_SET(fd_set *r
 static inline int LOCAL_FD_SET(fd_set *readfds)
 {
        fd_list_entry_t *ptr;
+       struct disk_driver *dd;
 
        ptr = fd_start;
        while (ptr != NULL) {
                if (ptr->tap_fd) {
                        FD_SET(ptr->tap_fd, readfds);
-                       if (ptr->io_fd[READ]) 
-                               FD_SET(ptr->io_fd[READ], readfds);
-                       maxfds = (ptr->io_fd[READ] > maxfds ? 
-                                       ptr->io_fd[READ]: maxfds);
-                       maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd: maxfds);
+                       td_for_each_disk(ptr->s, dd) {
+                               if (dd->io_fd[READ]) 
+                                       FD_SET(dd->io_fd[READ], readfds);
+                               maxfds = (dd->io_fd[READ] > maxfds ? 
+                                         dd->io_fd[READ] : maxfds);
+                       }
+                       maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd : maxfds);
                }
                ptr = ptr->next;
        }
@@ -130,8 +145,7 @@ static inline int LOCAL_FD_SET(fd_set *r
        return 0;
 }
 
-static inline fd_list_entry_t *add_fd_entry(
-       int tap_fd, int io_fd[MAX_IOFD], struct td_state *s)
+static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s)
 {
        fd_list_entry_t **pprev, *entry;
        int i;
@@ -139,12 +153,10 @@ static inline fd_list_entry_t *add_fd_en
        DPRINTF("Adding fd_list_entry\n");
 
        /*Add to linked list*/
-       s->fd_entry = entry = malloc(sizeof(fd_list_entry_t));
+       s->fd_entry   = entry = malloc(sizeof(fd_list_entry_t));
        entry->tap_fd = tap_fd;
-       for (i = 0; i < MAX_IOFD; i++)
-               entry->io_fd[i] = io_fd[i];
-       entry->s = s;
-       entry->next = NULL;
+       entry->s      = s;
+       entry->next   = NULL;
 
        pprev = &fd_start;
        while (*pprev != NULL)
@@ -171,7 +183,7 @@ static struct tap_disk *get_driver(int d
 static struct tap_disk *get_driver(int drivertype)
 {
        /* blktapctrl has passed us the driver type */
-       
+
        return dtypes[drivertype]->drv;
 }
 
@@ -183,12 +195,34 @@ static struct td_state *state_init(void)
 
        s = malloc(sizeof(struct td_state));
        blkif = s->blkif = malloc(sizeof(blkif_t));
-       s->ring_info = malloc(sizeof(tapdev_info_t));
-
-       for (i = 0; i < MAX_REQUESTS; i++)
-               blkif->pending_list[i].count = 0;
+       s->ring_info = calloc(1, sizeof(tapdev_info_t));
+
+       for (i = 0; i < MAX_REQUESTS; i++) {
+               blkif->pending_list[i].secs_pending = 0;
+               blkif->pending_list[i].submitting = 0;
+       }
 
        return s;
+}
+
+static struct disk_driver *disk_init(struct td_state *s, struct tap_disk *drv)
+{
+       struct disk_driver *dd;
+
+       dd = calloc(1, sizeof(struct disk_driver));
+       if (!dd)
+               return NULL;
+       
+       dd->private = malloc(drv->private_data_size);
+       if (!dd->private) {
+               free(dd);
+               return NULL;
+       }
+
+       dd->drv      = drv;
+       dd->td_state = s;
+
+       return dd;
 }
 
 static int map_new_dev(struct td_state *s, int minor)
@@ -246,6 +280,51 @@ static int map_new_dev(struct td_state *
        return -1;
 }
 
+static int open_disk(struct td_state *s, struct disk_driver *dd, char *path)
+{
+       int err;
+       struct disk_driver *d = dd;
+
+       err = dd->drv->td_open(dd, path);
+       if (err)
+               return err;
+
+       /* load backing files as necessary */
+       while (d->drv->td_has_parent(d)) {
+               struct disk_driver *new;
+               
+               new = calloc(1, sizeof(struct disk_driver));
+               if (!new)
+                       goto fail;
+               new->drv      = d->drv;
+               new->td_state = s;
+               new->private  = malloc(new->drv->private_data_size);
+               if (!new->private) {
+                       free(new);
+                       goto fail;
+               }
+               
+               err = d->drv->td_get_parent(d, new);
+               if (err)
+                       goto fail;
+
+               d = d->next = new;
+       }
+
+       return 0;
+
+ fail:
+       DPRINTF("failed opening disk\n");
+       while (dd) {
+               d = dd->next;
+               dd->drv->td_close(dd);
+               free(dd->private);
+               free(dd);
+               dd = d;
+       }
+       return err;
+}
+
 static int read_msg(char *buf)
 {
        int length, len, msglen, tap_fd, *io_fd;
@@ -255,6 +334,7 @@ static int read_msg(char *buf)
        msg_newdev_t *msg_dev;
        msg_pid_t *msg_pid;
        struct tap_disk *drv;
+       struct disk_driver *dd;
        int ret = -1;
        struct td_state *s = NULL;
        fd_list_entry_t *entry;
@@ -289,20 +369,20 @@ static int read_msg(char *buf)
                        if (s == NULL)
                                goto params_done;
 
-                       s->drv = drv;
-                       s->private = malloc(drv->private_data_size);
-                       if (s->private == NULL) {
+                       s->disks = dd = disk_init(s, drv);
+                       if (!dd) {
                                free(s);
                                goto params_done;
                        }
 
                        /*Open file*/
-                       ret = drv->td_open(s, path);
-                       io_fd = drv->td_get_fd(s);
-
-                       entry = add_fd_entry(0, io_fd, s);
+                       ret = open_disk(s, dd, path);
+                       if (ret)
+                               goto params_done;
+
+                       entry = add_fd_entry(0, s);
                        entry->cookie = msg->cookie;
-                       DPRINTF("Entered cookie %d\n",entry->cookie);
+                       DPRINTF("Entered cookie %d\n", entry->cookie);
                        
                        memset(buf, 0x00, MSG_SIZE); 
                        
@@ -323,13 +403,12 @@ static int read_msg(char *buf)
                        free(path);
                        return 1;
                        
-                       
-                       
                case CTLMSG_NEWDEV:
                        msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));
 
                        s = get_state(msg->cookie);
-                       DPRINTF("Retrieving state, cookie 
%d.....[%s]\n",msg->cookie, (s == NULL ? "FAIL":"OK"));
+                       DPRINTF("Retrieving state, cookie %d.....[%s]\n",
+                               msg->cookie, (s == NULL ? "FAIL":"OK"));
                        if (s != NULL) {
                                ret = ((map_new_dev(s, msg_dev->devnum) 
                                        == msg_dev->devnum ? 0: -1));
@@ -397,49 +476,75 @@ static inline void kick_responses(struct
        }
 }
 
-void io_done(struct td_state *s, int sid)
-{
-       struct tap_disk *drv = s->drv;
+void io_done(struct disk_driver *dd, int sid)
+{
+       struct tap_disk *drv = dd->drv;
 
        if (!run) return; /*We have received signal to close*/
 
-       if (drv->td_do_callbacks(s, sid) > 0) kick_responses(s);
+       if (drv->td_do_callbacks(dd, sid) > 0) kick_responses(dd->td_state);
 
        return;
 }
 
-int send_responses(struct td_state *s, int res, int idx, void *private)
-{
+static inline uint64_t
+segment_start(blkif_request_t *req, int sidx)
+{
+       int i;
+       uint64_t start = req->sector_number;
+
+       for (i = 0; i < sidx; i++) 
+               start += (req->seg[i].last_sect - req->seg[i].first_sect + 1);
+
+       return start;
+}
+
+uint64_t sends, responds;
+int send_responses(struct disk_driver *dd, int res, 
+                  uint64_t sector, int nr_secs, int idx, void *private)
+{
+       pending_req_t   *preq;
        blkif_request_t *req;
        int responses_queued = 0;
+       struct td_state *s = dd->td_state;
        blkif_t *blkif = s->blkif;
-
-       req   = &blkif->pending_list[idx].req;
-                       
-       if ( (idx > MAX_REQUESTS-1) || 
-           (blkif->pending_list[idx].count == 0) )
+       int sidx = (int)private, secs_done = nr_secs;
+
+       if ( (idx > MAX_REQUESTS-1) )
        {
                DPRINTF("invalid index returned(%u)!\n", idx);
                return 0;
        }
+       preq = &blkif->pending_list[idx];
+       req  = &preq->req;
+
+       if (res == BLK_NOT_ALLOCATED) {
+               res = do_cow_read(dd, req, sidx, sector, nr_secs);
+               if (res >= 0) {
+                       secs_done = res;
+                       res = 0;
+               } else
+                       secs_done = 0;
+       }
+
+       preq->secs_pending -= secs_done;
+
+       if (res == -EBUSY && preq->submitting) 
+               return -EBUSY;  /* propagate -EBUSY back to higher layers */
+       if (res) 
+               preq->status = BLKIF_RSP_ERROR;
        
-       if (res != 0) {
-               blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
-       }
-
-       blkif->pending_list[idx].count--;
-       
-       if (blkif->pending_list[idx].count == 0) 
+       if (!preq->submitting && preq->secs_pending == 0) 
        {
                blkif_request_t tmp;
                blkif_response_t *rsp;
-               
-               tmp = blkif->pending_list[idx].req;
+
+               tmp = preq->req;
                rsp = (blkif_response_t *)req;
                
                rsp->id = tmp.id;
                rsp->operation = tmp.operation;
-               rsp->status = blkif->pending_list[idx].status;
+               rsp->status = preq->status;
                
                write_rsp_to_ring(s, rsp);
                responses_queued++;
@@ -447,15 +552,51 @@ int send_responses(struct td_state *s, i
        return responses_queued;
 }
 
+int do_cow_read(struct disk_driver *dd, blkif_request_t *req, 
+               int sidx, uint64_t sector, int nr_secs)
+{
+       char *page;
+       int ret, early;
+       uint64_t seg_start, seg_end;
+       struct td_state  *s = dd->td_state;
+       tapdev_info_t *info = s->ring_info;
+       struct disk_driver *parent = dd->next;
+       
+       seg_start = segment_start(req, sidx);
+       seg_end   = seg_start + req->seg[sidx].last_sect + 1;
+       
+       ASSERT(sector >= seg_start && sector + nr_secs <= seg_end);
+
+       page  = (char *)MMAP_VADDR(info->vstart, 
+                                  (unsigned long)req->id, sidx);
+       page += (req->seg[sidx].first_sect << SECTOR_SHIFT);
+       page += ((sector - seg_start) << SECTOR_SHIFT);
+
+       if (!parent) {
+               memset(page, 0, nr_secs << SECTOR_SHIFT);
+               return nr_secs;
+       }
+
+       /* reissue request to backing file */
+       ret = parent->drv->td_queue_read(parent, sector, nr_secs,
+                                        page, send_responses, 
+                                        req->id, (void *)sidx);
+       if (ret > 0)
+               parent->early += ret;
+
+       return ((ret >= 0) ? 0 : ret);
+}
+
 static void get_io_request(struct td_state *s)
 {
-       RING_IDX          rp, rc, j, i, ret;
+       RING_IDX          rp, rc, j, i;
        blkif_request_t  *req;
-       int idx, nsects;
+       int idx, nsects, ret;
        uint64_t sector_nr;
        char *page;
        int early = 0; /* count early completions */
-       struct tap_disk *drv = s->drv;
+       struct disk_driver *dd = s->disks;
+       struct tap_disk *drv   = dd->drv;
        blkif_t *blkif = s->blkif;
        tapdev_info_t *info = s->ring_info;
        int page_size = getpagesize();
@@ -466,23 +607,33 @@ static void get_io_request(struct td_sta
        rmb();
        for (j = info->fe_ring.req_cons; j != rp; j++)
        {
-               int done = 0; 
+               int done = 0, start_seg = 0; 
 
                req = NULL;
                req = RING_GET_REQUEST(&info->fe_ring, j);
                ++info->fe_ring.req_cons;
                
                if (req == NULL) continue;
-               
+
                idx = req->id;
-               ASSERT(blkif->pending_list[idx].count == 0);
-               memcpy(&blkif->pending_list[idx].req, req, sizeof(*req));
-               blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
-               blkif->pending_list[idx].count = req->nr_segments;
-
-               sector_nr = req->sector_number;
-
-               for (i = 0; i < req->nr_segments; i++) {
+
+               if (info->busy.req) {
+                       /* continue where we left off last time */
+                       ASSERT(info->busy.req == req);
+                       start_seg = info->busy.seg_idx;
+                       sector_nr = segment_start(req, start_seg);
+                       info->busy.seg_idx = 0;
+                       info->busy.req     = NULL;
+               } else {
+                       ASSERT(blkif->pending_list[idx].secs_pending == 0);
+                       memcpy(&blkif->pending_list[idx].req, 
+                              req, sizeof(*req));
+                       blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
+                       blkif->pending_list[idx].submitting = 1;
+                       sector_nr = req->sector_number;
+               }
+
+               for (i = start_seg; i < req->nr_segments; i++) {
                        nsects = req->seg[i].last_sect - 
                                 req->seg[i].first_sect + 1;
        
@@ -508,31 +659,37 @@ static void get_io_request(struct td_sta
                                        (long long unsigned) sector_nr);
                                continue;
                        }
-                       
+
+                       blkif->pending_list[idx].secs_pending += nsects;
+
                        switch (req->operation) 
                        {
                        case BLKIF_OP_WRITE:
-                               ret = drv->td_queue_write(s, sector_nr,
-                                               nsects, page, send_responses, 
-                                               idx, NULL);
-                               if (ret > 0) early += ret;
+                               ret = drv->td_queue_write(dd, sector_nr,
+                                                         nsects, page, 
+                                                         send_responses,
+                                                         idx, (void *)i);
+                               if (ret > 0) dd->early += ret;
                                else if (ret == -EBUSY) {
-                                       /*
-                                        * TODO: Sector is locked         *
-                                        * Need to put req back on queue  *
-                                        */
+                                       /* put req back on queue */
+                                       --info->fe_ring.req_cons;
+                                       info->busy.req     = req;
+                                       info->busy.seg_idx = i;
+                                       goto out;
                                }
                                break;
                        case BLKIF_OP_READ:
-                               ret = drv->td_queue_read(s, sector_nr,
-                                               nsects, page, send_responses, 
-                                               idx, NULL);
-                               if (ret > 0) early += ret;
+                               ret = drv->td_queue_read(dd, sector_nr,
+                                                        nsects, page, 
+                                                        send_responses,
+                                                        idx, (void *)i);
+                               if (ret > 0) dd->early += ret;
                                else if (ret == -EBUSY) {
-                                       /*
-                                        * TODO: Sector is locked         *
-                                        * Need to put req back on queue  *
-                                        */
+                                       /* put req back on queue */
+                                       --info->fe_ring.req_cons;
+                                       info->busy.req     = req;
+                                       info->busy.seg_idx = i;
+                                       goto out;
                                }
                                break;
                        default:
@@ -541,14 +698,22 @@ static void get_io_request(struct td_sta
                        }
                        sector_nr += nsects;
                }
-       }
-
+               blkif->pending_list[idx].submitting = 0;
+               /* force write_rsp_to_ring for synchronous case */
+               if (blkif->pending_list[idx].secs_pending == 0)
+                       dd->early += send_responses(dd, 0, 0, 0, idx, (void 
*)0);
+       }
+
+ out:
        /*Batch done*/
-       drv->td_submit(s);
-       
-       if (early > 0) 
-               io_done(s,10);
-               
+       td_for_each_disk(s, dd) {
+               dd->early += dd->drv->td_submit(dd);
+               if (dd->early > 0) {
+                       io_done(dd, 10);
+                       dd->early = 0;
+               }
+       }
+
        return;
 }
 
@@ -558,10 +723,9 @@ int main(int argc, char *argv[])
        char *p, *buf;
        fd_set readfds, writefds;       
        fd_list_entry_t *ptr;
-       struct tap_disk *drv;
        struct td_state *s;
        char openlogbuf[128];
-       
+
        if (argc != 3) usage();
 
        daemonize();
@@ -573,12 +737,12 @@ int main(int argc, char *argv[])
        signal (SIGINT, sig_handler);
 
        /*Open the control channel*/
-       fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK);
+       fds[READ]  = open(argv[1],O_RDWR|O_NONBLOCK);
        fds[WRITE] = open(argv[2],O_RDWR|O_NONBLOCK);
 
        if ( (fds[READ] < 0) || (fds[WRITE] < 0) ) 
        {
-               DPRINTF("FD open failed [%d,%d]\n",fds[READ], fds[WRITE]);
+               DPRINTF("FD open failed [%d,%d]\n", fds[READ], fds[WRITE]);
                exit(-1);
        }
 
@@ -608,11 +772,22 @@ int main(int argc, char *argv[])
                {
                        ptr = fd_start;
                        while (ptr != NULL) {
-                               if (FD_ISSET(ptr->tap_fd, &readfds)) 
+                               int progress_made = 0;
+                               struct disk_driver *dd;
+                               tapdev_info_t *info = ptr->s->ring_info;
+
+                               td_for_each_disk(ptr->s, dd) {
+                                       if (dd->io_fd[READ] &&
+                                           FD_ISSET(dd->io_fd[READ], 
+                                                    &readfds)) {
+                                               io_done(dd, READ);
+                                               progress_made = 1;
+                                       }
+                               }
+
+                               if (FD_ISSET(ptr->tap_fd, &readfds) ||
+                                   (info->busy.req && progress_made))
                                        get_io_request(ptr->s);
-                               if (ptr->io_fd[READ] && 
-                                               FD_ISSET(ptr->io_fd[READ], 
&readfds)) 
-                                       io_done(ptr->s, READ);
 
                                ptr = ptr->next;
                        }
@@ -628,11 +803,8 @@ int main(int argc, char *argv[])
        ptr = fd_start;
        while (ptr != NULL) {
                s = ptr->s;
-               drv = s->drv;
 
                unmap_disk(s);
-               drv->td_close(s);
-               free(s->private);
                free(s->blkif);
                free(s->ring_info);
                free(s);
diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/drivers/tapdisk.h
--- a/tools/blktap/drivers/tapdisk.h    Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/drivers/tapdisk.h    Fri Feb 16 20:31:27 2007 -0800
@@ -43,6 +43,9 @@
  *   - The fd used for poll is an otherwise unused pipe, which allows poll to 
  *     be safely called without ever returning anything.
  * 
+ * NOTE: tapdisk uses the number of sectors submitted per request as a 
+ * ref count.  Plugins must use the callback function to communicate the
+ * completion--or error--of every sector submitted to them.
  */
 
 #ifndef TAPDISK_H_
@@ -65,39 +68,55 @@
 #define SECTOR_SHIFT             9
 #define DEFAULT_SECTOR_SIZE    512
 
+#define MAX_IOFD                 2
+
+#define BLK_NOT_ALLOCATED       99
+
+struct td_state;
+struct tap_disk;
+
+struct disk_driver {
+       int early;
+       void *private;
+       int io_fd[MAX_IOFD];
+       struct tap_disk *drv;
+       struct td_state *td_state;
+       struct disk_driver *next;
+};
+
 /* This structure represents the state of an active virtual disk.           */
 struct td_state {
-       void *private;
-       void *drv;
+       struct disk_driver *disks;
        void *blkif;
        void *image;
        void *ring_info;
        void *fd_entry;
-       char backing_file[1024]; /*Used by differencing disks, e.g. qcow*/
        unsigned long      sector_size;
        unsigned long long size;
        unsigned int       info;
 };
 
 /* Prototype of the callback to activate as requests complete.              */
-typedef int (*td_callback_t)(struct td_state *s, int res, int id, void *prv);
+typedef int (*td_callback_t)(struct disk_driver *dd, int res, uint64_t sector,
+                            int nb_sectors, int id, void *private);
 
 /* Structure describing the interface to a virtual disk implementation.     */
 /* See note at the top of this file describing this interface.              */
 struct tap_disk {
        const char *disk_type;
        int private_data_size;
-       int (*td_open)        (struct td_state *s, const char *name);
-       int (*td_queue_read)  (struct td_state *s, uint64_t sector,
-                              int nb_sectors, char *buf, td_callback_t cb,
+       int (*td_open)        (struct disk_driver *dd, const char *name);
+       int (*td_queue_read)  (struct disk_driver *dd, uint64_t sector,
+                              int nb_sectors, char *buf, td_callback_t cb, 
                               int id, void *prv);
-       int (*td_queue_write) (struct td_state *s, uint64_t sector,
-                              int nb_sectors, char *buf, td_callback_t cb,
+       int (*td_queue_write) (struct disk_driver *dd, uint64_t sector,
+                              int nb_sectors, char *buf, td_callback_t cb, 
                               int id, void *prv);
-       int (*td_submit)      (struct td_state *s);
-       int *(*td_get_fd)      (struct td_state *s);
-       int (*td_close)       (struct td_state *s);
-       int (*td_do_callbacks)(struct td_state *s, int sid);
+       int (*td_submit)      (struct disk_driver *dd);
+       int (*td_has_parent)  (struct disk_driver *dd);
+       int (*td_get_parent)  (struct disk_driver *dd, struct disk_driver *p);
+       int (*td_close)       (struct disk_driver *dd);
+       int (*td_do_callbacks)(struct disk_driver *dd, int sid);
 };
 
 typedef struct disk_info {
@@ -119,14 +138,13 @@ extern struct tap_disk tapdisk_ram;
 extern struct tap_disk tapdisk_ram;
 extern struct tap_disk tapdisk_qcow;
 
-#define MAX_DISK_TYPES  20
-#define MAX_IOFD        2
-
-#define DISK_TYPE_AIO   0
-#define DISK_TYPE_SYNC  1
-#define DISK_TYPE_VMDK  2
-#define DISK_TYPE_RAM   3
-#define DISK_TYPE_QCOW  4
+#define MAX_DISK_TYPES     20
+
+#define DISK_TYPE_AIO      0
+#define DISK_TYPE_SYNC     1
+#define DISK_TYPE_VMDK     2
+#define DISK_TYPE_RAM      3
+#define DISK_TYPE_QCOW     4
 
 
 /*Define Individual Disk Parameters here */
@@ -197,12 +215,10 @@ typedef struct fd_list_entry {
 typedef struct fd_list_entry {
        int cookie;
        int  tap_fd;
-       int  io_fd[MAX_IOFD];
        struct td_state *s;
        struct fd_list_entry **pprev, *next;
 } fd_list_entry_t;
 
 int qcow_create(const char *filename, uint64_t total_size,
                const char *backing_file, int flags);
-
 #endif /*TAPDISK_H_*/
diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/lib/blktaplib.h
--- a/tools/blktap/lib/blktaplib.h      Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/lib/blktaplib.h      Fri Feb 16 20:31:27 2007 -0800
@@ -91,8 +91,9 @@ struct blkif;
 
 typedef struct {
        blkif_request_t  req;
-       struct blkif         *blkif;
-       int              count;
+       struct blkif    *blkif;
+       int              submitting;
+       int              secs_pending;
         int16_t          status;
 } pending_req_t;
 
@@ -116,7 +117,7 @@ typedef struct blkif {
        
        void *prv;  /* device-specific data */
        void *info; /*Image parameter passing */
-       pending_req_t    pending_list[MAX_REQUESTS];
+       pending_req_t pending_list[MAX_REQUESTS];
        int devnum;
        int fds[2];
        int be_id;
@@ -141,6 +142,11 @@ void free_blkif(blkif_t *blkif);
 void free_blkif(blkif_t *blkif);
 void __init_blkif(void);
 
+typedef struct busy_state {
+       int seg_idx;
+       blkif_request_t *req;
+} busy_state_t;
+
 typedef struct tapdev_info {
        int fd;
        char *mem;
@@ -148,6 +154,7 @@ typedef struct tapdev_info {
        blkif_back_ring_t  fe_ring;
        unsigned long vstart;
        blkif_t *blkif;
+       busy_state_t busy;
 } tapdev_info_t;
 
 typedef struct domid_translate {
diff -r 32a059913591 -r 3c827d68fa87 tools/blktap/lib/xs_api.c
--- a/tools/blktap/lib/xs_api.c Fri Feb 16 16:34:28 2007 +0000
+++ b/tools/blktap/lib/xs_api.c Fri Feb 16 20:31:27 2007 -0800
@@ -311,8 +311,8 @@ int unregister_xenbus_watch(struct xs_ha
        }
 
        if (!xs_unwatch(h, watch->node, token))
-               DPRINTF("XENBUS Failed to release watch %s: %i\n",
-                       watch->node, er);
+               DPRINTF("XENBUS Failed to release watch %s\n",
+                       watch->node);
 
        list_del(&watch->list);
        
@@ -351,9 +351,9 @@ int xs_fire_next_watch(struct xs_handle 
        
        node  = res[XS_WATCH_PATH];
        token = res[XS_WATCH_TOKEN];
-       
+
        w = find_watch(token);
-       if (w)
+       if (w) 
                w->callback(h, w, node);
 
        free(res);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] [TAPDISK] add tapdisk support for image chaining, Xen patchbot-unstable <=