With my qcow2 patch to blktap I contributed a bit to the mess, so here
an attempt to clean up and remove at least some of the code duplication
in the functions dealing with aio operations.
This patch is mostly just moving code. Apart from renaming to get common
names and such things, only very few changes are needed as big parts of
the code were completely identical and obviously created by copy&paste
without any change.
If there are problems with the patch: I might not be able to answer
mails until Tuesday, so don't be surprised if I don't answer right away.
Signed-off-by: Kevin Wolf <kwolf@xxxxxxx>
diff -r 081d0accc0e1 tools/blktap/drivers/block-aio.c
--- a/tools/blktap/drivers/block-aio.c Fri Feb 22 11:31:06 2008 +0100
+++ b/tools/blktap/drivers/block-aio.c Fri Feb 22 11:31:39 2008 +0100
@@ -52,28 +52,11 @@
#define O_LARGEFILE 0
#endif
-struct pending_aio {
- td_callback_t cb;
- int id;
- void *private;
- uint64_t lsec;
-};
-
struct tdaio_state {
int fd;
-
- /* libaio state */
- tap_aio_context_t aio_ctx;
- struct iocb iocb_list [MAX_AIO_REQS];
- struct iocb *iocb_free [MAX_AIO_REQS];
- struct pending_aio pending_aio[MAX_AIO_REQS];
- int iocb_free_count;
- struct iocb *iocb_queue[MAX_AIO_REQS];
- int iocb_queued;
- struct io_event aio_events[MAX_AIO_REQS];
+ tap_aio_context_t aio;
};
-#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
/*Get Image size, secsize*/
static int get_image_info(struct td_state *s, int fd)
@@ -131,7 +114,7 @@ static inline void init_fds(struct disk_
for(i = 0; i < MAX_IOFD; i++)
dd->io_fd[i] = 0;
- dd->io_fd[0] = prv->aio_ctx.pollfd;
+ dd->io_fd[0] = prv->aio.aio_ctx.pollfd;
}
/* Open the disk file and initialize aio state. */
@@ -142,27 +125,11 @@ int tdaio_open (struct disk_driver *dd,
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
DPRINTF("block-aio open('%s')", name);
+
/* Initialize AIO */
- prv->iocb_free_count = MAX_AIO_REQS;
- prv->iocb_queued = 0;
-
- ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS);
- if (ret < 0) {
- if (ret == -EAGAIN) {
- DPRINTF("Couldn't setup AIO context. If you are "
- "trying to concurrently use a large number "
- "of blktap-based disks, you may need to "
- "increase the system-wide aio request limit. "
- "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
- "aio-max-nr')\n");
- } else {
- DPRINTF("Couldn't setup AIO context.\n");
- }
- goto done;
- }
-
- for (i=0;i<MAX_AIO_REQS;i++)
- prv->iocb_free[i] = &prv->iocb_list[i];
+ ret = tap_aio_init(&prv->aio, 0, MAX_AIO_REQS);
+ if (ret != 0)
+ return ret;
/* Open the file */
o_flags = O_DIRECT | O_LARGEFILE |
@@ -198,87 +165,40 @@ int tdaio_queue_read(struct disk_driver
int nb_sectors, char *buf, td_callback_t cb,
int id, void *private)
{
- struct iocb *io;
- struct pending_aio *pio;
struct td_state *s = dd->td_state;
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
int size = nb_sectors * s->sector_size;
uint64_t offset = sector * (uint64_t)s->sector_size;
- long ioidx;
-
- if (prv->iocb_free_count == 0)
- return -ENOMEM;
- io = prv->iocb_free[--prv->iocb_free_count];
-
- ioidx = IOCB_IDX(prv, io);
- pio = &prv->pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->lsec = sector;
-
- io_prep_pread(io, prv->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- prv->iocb_queue[prv->iocb_queued++] = io;
-
- return 0;
+
+ return tap_aio_read(&prv->aio, prv->fd, size, offset, buf,
+ cb, id, sector, private);
}
int tdaio_queue_write(struct disk_driver *dd, uint64_t sector,
int nb_sectors, char *buf, td_callback_t cb,
int id, void *private)
{
- struct iocb *io;
- struct pending_aio *pio;
struct td_state *s = dd->td_state;
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
int size = nb_sectors * s->sector_size;
uint64_t offset = sector * (uint64_t)s->sector_size;
- long ioidx;
+
+ return tap_aio_write(&prv->aio, prv->fd, size, offset, buf,
+ cb, id, sector, private);
+}
+
+int tdaio_submit(struct disk_driver *dd)
+{
+ struct tdaio_state *prv = (struct tdaio_state *)dd->private;
+
+ return tap_aio_submit(&prv->aio);
+}
+
+int tdaio_close(struct disk_driver *dd)
+{
+ struct tdaio_state *prv = (struct tdaio_state *)dd->private;
- if (prv->iocb_free_count == 0)
- return -ENOMEM;
- io = prv->iocb_free[--prv->iocb_free_count];
-
- ioidx = IOCB_IDX(prv, io);
- pio = &prv->pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->lsec = sector;
-
- io_prep_pwrite(io, prv->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- prv->iocb_queue[prv->iocb_queued++] = io;
-
- return 0;
-}
-
-int tdaio_submit(struct disk_driver *dd)
-{
- int ret;
- struct tdaio_state *prv = (struct tdaio_state *)dd->private;
-
- if (!prv->iocb_queued)
- return 0;
-
- ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued,
prv->iocb_queue);
-
- /* XXX: TODO: Handle error conditions here. */
-
- /* Success case: */
- prv->iocb_queued = 0;
-
- return 0;
-}
-
-int tdaio_close(struct disk_driver *dd)
-{
- struct tdaio_state *prv = (struct tdaio_state *)dd->private;
-
- io_destroy(prv->aio_ctx.aio_ctx);
+ io_destroy(prv->aio.aio_ctx.aio_ctx);
close(prv->fd);
return 0;
@@ -290,26 +210,26 @@ int tdaio_do_callbacks(struct disk_drive
struct io_event *ep;
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
- nr_events = tap_aio_get_events(&prv->aio_ctx);
+ nr_events = tap_aio_get_events(&prv->aio.aio_ctx);
repeat:
- for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
+ for (ep = prv->aio.aio_events, i = nr_events; i-- > 0; ep++) {
struct iocb *io = ep->obj;
struct pending_aio *pio;
- pio = &prv->pending_aio[(long)io->data];
+ pio = &prv->aio.pending_aio[(long)io->data];
rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1,
- pio->lsec, io->u.c.nbytes >> 9,
+ pio->sector, io->u.c.nbytes >> 9,
pio->id, pio->private);
- prv->iocb_free[prv->iocb_free_count++] = io;
+ prv->aio.iocb_free[prv->aio.iocb_free_count++] = io;
}
if (nr_events) {
- nr_events = tap_aio_more_events(&prv->aio_ctx);
+ nr_events = tap_aio_more_events(&prv->aio.aio_ctx);
goto repeat;
}
- tap_aio_continue(&prv->aio_ctx);
+ tap_aio_continue(&prv->aio.aio_ctx);
return rsp;
}
diff -r 081d0accc0e1 tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Fri Feb 22 11:31:06 2008 +0100
+++ b/tools/blktap/drivers/block-qcow.c Fri Feb 22 11:31:39 2008 +0100
@@ -59,15 +59,7 @@
(l + (s - 1)) - ((l + (s - 1)) % s)); \
})
-struct pending_aio {
- td_callback_t cb;
- int id;
- void *private;
- int nb_sectors;
- char *buf;
- uint64_t sector;
-};
-
+#undef IOCB_IDX
#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
#define ZERO_TEST(_b) (_b | 0x00)
@@ -140,109 +132,18 @@ struct tdqcow_state {
uint32_t l2_cache_counts[L2_CACHE_SIZE]; /*Cache access record*/
uint8_t *cluster_cache;
uint8_t *cluster_data;
- uint8_t *sector_lock; /*Locking bitmap for AIO reads/writes*/
uint64_t cluster_cache_offset; /**/
uint32_t crypt_method; /*current crypt method, 0 if no
*key yet */
uint32_t crypt_method_header; /**/
AES_KEY aes_encrypt_key; /*AES key*/
AES_KEY aes_decrypt_key; /*AES key*/
- /* libaio state */
- tap_aio_context_t aio_ctx;
- int max_aio_reqs;
- struct iocb *iocb_list;
- struct iocb **iocb_free;
- struct pending_aio *pending_aio;
- int iocb_free_count;
- struct iocb **iocb_queue;
- int iocb_queued;
- struct io_event *aio_events;
+
+ /* libaio state */
+ tap_aio_context_t aio;
};
static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
-
-static void free_aio_state(struct disk_driver *dd)
-{
- struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
-
- if (s->sector_lock)
- free(s->sector_lock);
- if (s->iocb_list)
- free(s->iocb_list);
- if (s->pending_aio)
- free(s->pending_aio);
- if (s->aio_events)
- free(s->aio_events);
- if (s->iocb_free)
- free(s->iocb_free);
- if (s->iocb_queue)
- free(s->iocb_queue);
-}
-
-static int init_aio_state(struct disk_driver *dd)
-{
- int i, ret;
- struct td_state *bs = dd->td_state;
- struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
- long ioidx;
-
- s->iocb_list = NULL;
- s->pending_aio = NULL;
- s->aio_events = NULL;
- s->iocb_free = NULL;
- s->iocb_queue = NULL;
-
- /*Initialize Locking bitmap*/
- s->sector_lock = calloc(1, bs->size);
-
- if (!s->sector_lock) {
- DPRINTF("Failed to allocate sector lock\n");
- goto fail;
- }
-
- /* A segment (i.e. a page) can span multiple clusters */
- s->max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
- MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
-
- /* Initialize AIO */
- s->iocb_free_count = s->max_aio_reqs;
- s->iocb_queued = 0;
-
- if (!(s->iocb_list = malloc(sizeof(struct iocb) * s->max_aio_reqs)) ||
- !(s->pending_aio = malloc(sizeof(struct pending_aio) *
s->max_aio_reqs)) ||
- !(s->aio_events = malloc(sizeof(struct io_event) *
s->max_aio_reqs)) ||
- !(s->iocb_free = malloc(sizeof(struct iocb *) * s->max_aio_reqs))
||
- !(s->iocb_queue = malloc(sizeof(struct iocb *) *
s->max_aio_reqs))) {
- DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
- s->max_aio_reqs);
- goto fail;
- }
-
- ret = tap_aio_setup(&s->aio_ctx, s->aio_events, s->max_aio_reqs);
- if (ret < 0) {
- if (ret == -EAGAIN) {
- DPRINTF("Couldn't setup AIO context. If you are "
- "trying to concurrently use a large number "
- "of blktap-based disks, you may need to "
- "increase the system-wide aio request limit. "
- "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
- "aio-max-nr')\n");
- } else {
- DPRINTF("Couldn't setup AIO context.\n");
- }
- goto fail;
- }
-
- for (i=0;i<s->max_aio_reqs;i++)
- s->iocb_free[i] = &s->iocb_list[i];
-
- DPRINTF("AIO state initialised\n");
-
- return 0;
-
- fail:
- return -1;
-}
static uint32_t gen_cksum(char *ptr, int len)
{
@@ -337,79 +238,6 @@ static int qcow_set_key(struct tdqcow_st
}
#endif
return 0;
-}
-
-static int async_read(struct tdqcow_state *s, int size,
- uint64_t offset, char *buf, td_callback_t cb,
- int id, uint64_t sector, void *private)
-{
- struct iocb *io;
- struct pending_aio *pio;
- long ioidx;
-
- io = s->iocb_free[--s->iocb_free_count];
-
- ioidx = IOCB_IDX(s, io);
- pio = &s->pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->nb_sectors = size/512;
- pio->buf = buf;
- pio->sector = sector;
-
- io_prep_pread(io, s->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- s->iocb_queue[s->iocb_queued++] = io;
-
- return 1;
-}
-
-static int async_write(struct tdqcow_state *s, int size,
- uint64_t offset, char *buf, td_callback_t cb,
- int id, uint64_t sector, void *private)
-{
- struct iocb *io;
- struct pending_aio *pio;
- long ioidx;
-
- io = s->iocb_free[--s->iocb_free_count];
-
- ioidx = IOCB_IDX(s, io);
- pio = &s->pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->nb_sectors = size/512;
- pio->buf = buf;
- pio->sector = sector;
-
- io_prep_pwrite(io, s->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- s->iocb_queue[s->iocb_queued++] = io;
-
- return 1;
-}
-
-/*TODO: Fix sector span!*/
-static int aio_can_lock(struct tdqcow_state *s, uint64_t sector)
-{
- return (s->sector_lock[sector] ? 0 : 1);
-}
-
-static int aio_lock(struct tdqcow_state *s, uint64_t sector)
-{
- return ++s->sector_lock[sector];
-}
-
-static void aio_unlock(struct tdqcow_state *s, uint64_t sector)
-{
- if (!s->sector_lock[sector]) return;
-
- --s->sector_lock[sector];
- return;
}
/*
@@ -841,13 +669,14 @@ static inline void init_fds(struct disk_
for(i = 0; i < MAX_IOFD; i++)
dd->io_fd[i] = 0;
- dd->io_fd[0] = s->aio_ctx.pollfd;
+ dd->io_fd[0] = s->aio.aio_ctx.pollfd;
}
/* Open the disk file and initialize qcow state. */
int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t flags)
{
int fd, len, i, shift, ret, size, l1_table_size, o_flags;
+ int max_aio_reqs;
struct td_state *bs = dd->td_state;
struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
char *buf;
@@ -996,9 +825,14 @@ int tdqcow_open (struct disk_driver *dd,
}
end_xenhdr:
- if (init_aio_state(dd)!=0) {
+
+ /* A segment (i.e. a page) can span multiple clusters */
+ max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
+ MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
+
+ if (tap_aio_init(&s->aio, bs->size, max_aio_reqs)!=0) {
DPRINTF("Unable to initialise AIO state\n");
- free_aio_state(dd);
+ tap_aio_free(&s->aio);
goto fail;
}
init_fds(dd);
@@ -1015,7 +849,7 @@ int tdqcow_open (struct disk_driver *dd,
fail:
DPRINTF("QCOW Open failed\n");
- free_aio_state(dd);
+ tap_aio_free(&s->aio);
free(s->l1_table);
free(s->l2_cache);
free(s->cluster_cache);
@@ -1037,7 +871,7 @@ int tdqcow_queue_read(struct disk_driver
/*Check we can get a lock*/
for (i = 0; i < nb_sectors; i++)
- if (!aio_can_lock(s, sector + i))
+ if (!tap_aio_can_lock(&s->aio, sector + i))
return cb(dd, -EBUSY, sector, nb_sectors, id, private);
/*We store a local record of the request*/
@@ -1049,11 +883,11 @@ int tdqcow_queue_read(struct disk_driver
if (n > nb_sectors)
n = nb_sectors;
- if (s->iocb_free_count == 0 || !aio_lock(s, sector))
+ if (s->aio.iocb_free_count == 0 || !tap_aio_lock(&s->aio,
sector))
return cb(dd, -EBUSY, sector, nb_sectors, id, private);
if(!cluster_offset) {
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->aio, sector);
ret = cb(dd, BLK_NOT_ALLOCATED,
sector, n, id, private);
if (ret == -EBUSY) {
@@ -1064,7 +898,7 @@ int tdqcow_queue_read(struct disk_driver
} else
rsp += ret;
} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->aio, sector);
if (decompress_cluster(s, cluster_offset) < 0) {
rsp += cb(dd, -EIO, sector,
nb_sectors, id, private);
@@ -1074,7 +908,7 @@ int tdqcow_queue_read(struct disk_driver
512 * n);
rsp += cb(dd, 0, sector, n, id, private);
} else {
- async_read(s, n * 512,
+ tap_aio_read(&s->aio, s->fd, n * 512,
(cluster_offset + index_in_cluster * 512),
buf, cb, id, sector, private);
}
@@ -1099,7 +933,7 @@ int tdqcow_queue_write(struct disk_drive
/*Check we can get a lock*/
for (i = 0; i < nb_sectors; i++)
- if (!aio_can_lock(s, sector + i))
+ if (!tap_aio_can_lock(&s->aio, sector + i))
return cb(dd, -EBUSY, sector, nb_sectors, id, private);
/*We store a local record of the request*/
@@ -1109,7 +943,7 @@ int tdqcow_queue_write(struct disk_drive
if (n > nb_sectors)
n = nb_sectors;
- if (s->iocb_free_count == 0 || !aio_lock(s, sector))
+ if (s->aio.iocb_free_count == 0 || !tap_aio_lock(&s->aio,
sector))
return cb(dd, -EBUSY, sector, nb_sectors, id, private);
cluster_offset = get_cluster_offset(s, sector << 9, 1, 0,
@@ -1117,7 +951,7 @@ int tdqcow_queue_write(struct disk_drive
index_in_cluster+n);
if (!cluster_offset) {
DPRINTF("Ooops, no write cluster offset!\n");
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->aio, sector);
return cb(dd, -EIO, sector, nb_sectors, id, private);
}
@@ -1125,12 +959,12 @@ int tdqcow_queue_write(struct disk_drive
encrypt_sectors(s, sector, s->cluster_data,
(unsigned char *)buf, n, 1,
&s->aes_encrypt_key);
- async_write(s, n * 512,
+ tap_aio_write(&s->aio, s->fd, n * 512,
(cluster_offset + index_in_cluster*512),
(char *)s->cluster_data, cb, id, sector,
private);
} else {
- async_write(s, n * 512,
+ tap_aio_write(&s->aio, s->fd, n * 512,
(cluster_offset + index_in_cluster*512),
buf, cb, id, sector, private);
}
@@ -1146,20 +980,9 @@ int tdqcow_queue_write(struct disk_drive
int tdqcow_submit(struct disk_driver *dd)
{
- int ret;
- struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
-
- if (!prv->iocb_queued)
- return 0;
-
- ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued,
prv->iocb_queue);
-
- /* XXX: TODO: Handle error conditions here. */
-
- /* Success case: */
- prv->iocb_queued = 0;
-
- return 0;
+ struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
+
+ return tap_aio_submit(&prv->aio);
}
int tdqcow_close(struct disk_driver *dd)
@@ -1180,7 +1003,7 @@ int tdqcow_close(struct disk_driver *dd)
close(fd);
}
- io_destroy(s->aio_ctx.aio_ctx);
+ io_destroy(s->aio.aio_ctx.aio_ctx);
free(s->name);
free(s->l1_table);
free(s->l2_cache);
@@ -1198,15 +1021,15 @@ int tdqcow_do_callbacks(struct disk_driv
if (sid > MAX_IOFD) return 1;
- nr_events = tap_aio_get_events(&prv->aio_ctx);
+ nr_events = tap_aio_get_events(&prv->aio.aio_ctx);
repeat:
- for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
+ for (ep = prv->aio.aio_events, i = nr_events; i-- > 0; ep++) {
struct iocb *io = ep->obj;
struct pending_aio *pio;
- pio = &prv->pending_aio[(long)io->data];
-
- aio_unlock(prv, pio->sector);
+ pio = &prv->aio.pending_aio[(long)io->data];
+
+ tap_aio_unlock(&prv->aio, pio->sector);
if (prv->crypt_method)
encrypt_sectors(prv, pio->sector,
@@ -1219,15 +1042,15 @@ repeat:
pio->sector, pio->nb_sectors,
pio->id, pio->private);
- prv->iocb_free[prv->iocb_free_count++] = io;
+ prv->aio.iocb_free[prv->aio.iocb_free_count++] = io;
}
if (nr_events) {
- nr_events = tap_aio_more_events(&prv->aio_ctx);
+ nr_events = tap_aio_more_events(&prv->aio.aio_ctx);
goto repeat;
}
- tap_aio_continue(&prv->aio_ctx);
+ tap_aio_continue(&prv->aio.aio_ctx);
return rsp;
}
diff -r 081d0accc0e1 tools/blktap/drivers/block-qcow2.c
--- a/tools/blktap/drivers/block-qcow2.c Fri Feb 22 11:31:06 2008 +0100
+++ b/tools/blktap/drivers/block-qcow2.c Fri Feb 22 11:31:39 2008 +0100
@@ -145,20 +145,7 @@ typedef struct BDRVQcowState {
int64_t total_sectors;
-
- struct {
- tap_aio_context_t aio_ctx;
- int max_aio_reqs;
- struct iocb *iocb_list;
- struct iocb **iocb_free;
- struct pending_aio *pending_aio;
- int iocb_free_count;
- struct iocb **iocb_queue;
- int iocb_queued;
- struct io_event *aio_events;
-
- uint8_t *sector_lock; /*Locking bitmap for AIO
reads/writes*/
- } async;
+ tap_aio_context_t async;
/* Original qemu variables */
int cluster_bits;
@@ -222,9 +209,6 @@ static void check_refcounts(struct disk_
static void check_refcounts(struct disk_driver *bs);
#endif
-static int init_aio_state(struct disk_driver *bs);
-static void free_aio_state(struct disk_driver *bs);
-
static int qcow_sync_read(struct disk_driver *dd, uint64_t sector,
int nb_sectors, char *buf, td_callback_t cb,
int id, void *prv);
@@ -309,7 +293,7 @@ static int qcow_open(struct disk_driver
static int qcow_open(struct disk_driver *bs, const char *filename, td_flag_t
flags)
{
BDRVQcowState *s = bs->private;
- int len, i, shift, ret;
+ int len, i, shift, ret, max_aio_reqs;
QCowHeader header;
int fd, o_flags;
@@ -475,9 +459,14 @@ static int qcow_open(struct disk_driver
#ifdef USE_AIO
/* Initialize AIO */
- if (init_aio_state(bs)!=0) {
+
+ /* A segment (i.e. a page) can span multiple clusters */
+ max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
+ MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
+
+ if (tap_aio_init(&s->async, bs->td_state->size, max_aio_reqs)) {
DPRINTF("Unable to initialise AIO state\n");
- free_aio_state(bs);
+ tap_aio_free(&s->async);
goto fail;
}
@@ -496,7 +485,7 @@ static int qcow_open(struct disk_driver
DPRINTF("qcow_open failed\n");
#ifdef USE_AIO
- free_aio_state(bs);
+ tap_aio_free(&s->async);
#endif
qcow_free_snapshots(bs);
@@ -1070,200 +1059,6 @@ static int qcow_write(struct disk_driver
#ifdef USE_AIO
/*
- * General AIO helper functions
- */
-
-#define IOCB_IDX(_s, _io) ((_io) - (_s)->async.iocb_list)
-
-struct pending_aio {
- td_callback_t cb;
- int id;
- void *private;
- int nb_sectors;
- char *buf;
- uint64_t sector;
-};
-
-
-static int init_aio_state(struct disk_driver *dd)
-{
- int i, ret;
- struct td_state *bs = dd->td_state;
- struct BDRVQcowState *s = (struct BDRVQcowState*) dd->private;
- long ioidx;
-
- s->async.iocb_list = NULL;
- s->async.pending_aio = NULL;
- s->async.aio_events = NULL;
- s->async.iocb_free = NULL;
- s->async.iocb_queue = NULL;
-
- /*Initialize Locking bitmap*/
- s->async.sector_lock = calloc(1, bs->size);
-
- if (!s->async.sector_lock) {
- DPRINTF("Failed to allocate sector lock\n");
- goto fail;
- }
-
- /* A segment (i.e. a page) can span multiple clusters */
- s->async.max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
- MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
-
- /* Initialize AIO */
- s->async.iocb_free_count = s->async.max_aio_reqs;
- s->async.iocb_queued = 0;
-
- if (!(s->async.iocb_list = malloc(sizeof(struct iocb) *
s->async.max_aio_reqs)) ||
- !(s->async.pending_aio = malloc(sizeof(struct pending_aio) *
s->async.max_aio_reqs)) ||
- !(s->async.aio_events = malloc(sizeof(struct io_event) *
s->async.max_aio_reqs)) ||
- !(s->async.iocb_free = malloc(sizeof(struct iocb *) *
s->async.max_aio_reqs)) ||
- !(s->async.iocb_queue = malloc(sizeof(struct iocb *) *
s->async.max_aio_reqs)))
- {
- DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
- s->async.max_aio_reqs);
- goto fail;
- }
-
- ret = tap_aio_setup(&s->async.aio_ctx, s->async.aio_events,
s->async.max_aio_reqs);
- if (ret < 0) {
- if (ret == -EAGAIN) {
- DPRINTF("Couldn't setup AIO context. If you are "
- "trying to concurrently use a large number "
- "of blktap-based disks, you may need to "
- "increase the system-wide aio request limit. "
- "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
- "aio-max-nr')\n");
- } else {
- DPRINTF("Couldn't setup AIO context.\n");
- }
- goto fail;
- }
-
- for (i=0;i<s->async.max_aio_reqs;i++)
- s->async.iocb_free[i] = &s->async.iocb_list[i];
-
- DPRINTF("AIO state initialised\n");
-
- return 0;
-
-fail:
- return -1;
-}
-
-static void free_aio_state(struct disk_driver *dd)
-{
- struct BDRVQcowState *s = (struct BDRVQcowState*) dd->private;
-
- if (s->async.sector_lock)
- free(s->async.sector_lock);
- if (s->async.iocb_list)
- free(s->async.iocb_list);
- if (s->async.pending_aio)
- free(s->async.pending_aio);
- if (s->async.aio_events)
- free(s->async.aio_events);
- if (s->async.iocb_free)
- free(s->async.iocb_free);
- if (s->async.iocb_queue)
- free(s->async.iocb_queue);
-}
-
-static int async_read(struct BDRVQcowState *s, int size,
- uint64_t offset, char *buf, td_callback_t cb,
- int id, uint64_t sector, void *private)
-{
- struct iocb *io;
- struct pending_aio *pio;
- long ioidx;
-
- io = s->async.iocb_free[--s->async.iocb_free_count];
-
- ioidx = IOCB_IDX(s, io);
- pio = &s->async.pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->nb_sectors = size/512;
- pio->buf = buf;
- pio->sector = sector;
-
- io_prep_pread(io, s->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- s->async.iocb_queue[s->async.iocb_queued++] = io;
-
- return 1;
-}
-
-static int async_write(struct BDRVQcowState *s, int size,
- uint64_t offset, char *buf, td_callback_t cb,
- int id, uint64_t sector, void *private)
-{
- struct iocb *io;
- struct pending_aio *pio;
- long ioidx;
-
- io = s->async.iocb_free[--s->async.iocb_free_count];
-
- ioidx = IOCB_IDX(s, io);
- pio = &s->async.pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->nb_sectors = size/512;
- pio->buf = buf;
- pio->sector = sector;
-
- io_prep_pwrite(io, s->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- s->async.iocb_queue[s->async.iocb_queued++] = io;
-
- return 1;
-}
-
-static int async_submit(struct disk_driver *dd)
-{
- int ret;
- struct BDRVQcowState *prv = (struct BDRVQcowState*) dd->private;
-
- if (!prv->async.iocb_queued)
- return 0;
-
- ret = io_submit(prv->async.aio_ctx.aio_ctx, prv->async.iocb_queued,
prv->async.iocb_queue);
-
- /* XXX: TODO: Handle error conditions here. */
-
- /* Success case: */
- prv->async.iocb_queued = 0;
-
- return 0;
-}
-
-/*TODO: Fix sector span!*/
-static int aio_can_lock(struct BDRVQcowState *s, uint64_t sector)
-{
- return (s->async.sector_lock[sector] ? 0 : 1);
-}
-
-static int aio_lock(struct BDRVQcowState *s, uint64_t sector)
-{
- return ++s->async.sector_lock[sector];
-}
-
-static void aio_unlock(struct BDRVQcowState *s, uint64_t sector)
-{
- if (!s->async.sector_lock[sector]) return;
-
- --s->async.sector_lock[sector];
- return;
-}
-
-
-
-
-/*
* QCOW2 specific AIO functions
*/
@@ -1278,7 +1073,7 @@ static int qcow_queue_read(struct disk_d
/*Check we can get a lock*/
for (i = 0; i < nb_sectors; i++)
- if (!aio_can_lock(s, sector + i))
+ if (!tap_aio_can_lock(&s->async, sector + i))
return cb(bs, -EBUSY, sector, nb_sectors, id, private);
while (nb_sectors > 0) {
@@ -1290,13 +1085,13 @@ static int qcow_queue_read(struct disk_d
if (n > nb_sectors)
n = nb_sectors;
- if (s->async.iocb_free_count == 0 || !aio_lock(s, sector))
+ if (s->async.iocb_free_count == 0 || !tap_aio_lock(&s->async,
sector))
return cb(bs, -EBUSY, sector, nb_sectors, id, private);
if (!cluster_offset) {
/* The requested sector is not allocated */
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->async, sector);
ret = cb(bs, BLK_NOT_ALLOCATED,
sector, n, id, private);
if (ret == -EBUSY) {
@@ -1311,7 +1106,7 @@ static int qcow_queue_read(struct disk_d
} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* sync read for compressed clusters */
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->async, sector);
if (decompress_cluster(s, cluster_offset) < 0) {
rsp += cb(bs, -EIO, sector, nb_sectors, id,
private);
goto done;
@@ -1323,7 +1118,7 @@ static int qcow_queue_read(struct disk_d
} else {
/* async read */
- async_read(s, n * 512,
+ tap_aio_read(&s->async, s->fd, n * 512,
(cluster_offset + index_in_cluster *
512),
buf, cb, id, sector, private);
}
@@ -1351,7 +1146,7 @@ static int qcow_queue_write(struct disk_
/*Check we can get a lock*/
for (i = 0; i < nb_sectors; i++)
- if (!aio_can_lock(s, sector + i))
+ if (!tap_aio_can_lock(&s->async, sector + i))
return cb(bs, -EBUSY, sector, nb_sectors, id, private);
@@ -1362,7 +1157,7 @@ static int qcow_queue_write(struct disk_
if (n > nb_sectors)
n = nb_sectors;
- if (s->async.iocb_free_count == 0 || !aio_lock(s, sector))
+ if (s->async.iocb_free_count == 0 || !tap_aio_lock(&s->async,
sector))
return cb(bs, -EBUSY, sector, nb_sectors, id, private);
@@ -1372,14 +1167,14 @@ static int qcow_queue_write(struct disk_
if (!cluster_offset) {
DPRINTF("Ooops, no write cluster offset!\n");
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->async, sector);
return cb(bs, -EIO, sector, nb_sectors, id, private);
}
// TODO Encryption
- async_write(s, n * 512,
+ tap_aio_write(&s->async, s->fd, n * 512,
(cluster_offset + index_in_cluster*512),
buf, cb, id, sector, private);
@@ -1402,9 +1197,14 @@ static int qcow_close(struct disk_driver
static int qcow_close(struct disk_driver *bs)
{
BDRVQcowState *s = bs->private;
-
+
+#ifdef USE_AIO
+ io_destroy(s->async.aio_ctx.aio_ctx);
+ tap_aio_free(&s->async);
+#else
close(s->poll_pipe[0]);
- close(s->poll_pipe[1]);
+ close(s->poll_pipe[1]);
+#endif
qemu_free(s->l1_table);
qemu_free(s->l2_cache);
@@ -1606,23 +1406,10 @@ static int qcow_write_compressed(struct
static int qcow_submit(struct disk_driver *bs)
{
- int ret;
- struct BDRVQcowState *prv = (struct BDRVQcowState*)bs->private;
-
-
- fsync(prv->fd);
-
- if (!prv->async.iocb_queued)
- return 0;
-
- ret = io_submit(prv->async.aio_ctx.aio_ctx, prv->async.iocb_queued,
prv->async.iocb_queue);
-
- /* XXX: TODO: Handle error conditions here. */
-
- /* Success case: */
- prv->async.iocb_queued = 0;
-
- return 0;
+ struct BDRVQcowState *s = (struct BDRVQcowState*) bs->private;
+
+ fsync(s->fd);
+ return tap_aio_submit(&s->async);
}
@@ -2246,7 +2033,7 @@ repeat:
pio = &prv->async.pending_aio[(long)io->data];
- aio_unlock(prv, pio->sector);
+ tap_aio_unlock(&prv->async, pio->sector);
if (prv->crypt_method)
encrypt_sectors(prv, pio->sector,
diff -r 081d0accc0e1 tools/blktap/drivers/tapaio.c
--- a/tools/blktap/drivers/tapaio.c Fri Feb 22 11:31:06 2008 +0100
+++ b/tools/blktap/drivers/tapaio.c Fri Feb 22 11:31:39 2008 +0100
@@ -32,6 +32,7 @@
#include <unistd.h>
#include <errno.h>
#include <string.h>
+#include <stdlib.h>
/**
* We used a kernel patch to return an fd associated with the AIO context
@@ -62,7 +63,7 @@ static void *
static void *
tap_aio_completion_thread(void *arg)
{
- tap_aio_context_t *ctx = (tap_aio_context_t *) arg;
+ tap_aio_internal_context_t *ctx = (tap_aio_internal_context_t *) arg;
int command;
int nr_events;
int rc;
@@ -84,7 +85,7 @@ tap_aio_completion_thread(void *arg)
}
void
-tap_aio_continue(tap_aio_context_t *ctx)
+tap_aio_continue(tap_aio_internal_context_t *ctx)
{
int cmd = 0;
@@ -95,8 +96,8 @@ tap_aio_continue(tap_aio_context_t *ctx)
DPRINTF("Cannot write to command pipe\n");
}
-int
-tap_aio_setup(tap_aio_context_t *ctx,
+static int
+tap_aio_setup(tap_aio_internal_context_t *ctx,
struct io_event *aio_events,
int max_aio_events)
{
@@ -144,7 +145,7 @@ tap_aio_setup(tap_aio_context_t *ctx,
}
int
-tap_aio_get_events(tap_aio_context_t *ctx)
+tap_aio_get_events(tap_aio_internal_context_t *ctx)
{
int nr_events = 0;
@@ -171,10 +172,185 @@ tap_aio_get_events(tap_aio_context_t *ct
return nr_events;
}
-int tap_aio_more_events(tap_aio_context_t *ctx)
+int tap_aio_more_events(tap_aio_internal_context_t *ctx)
{
return io_getevents(ctx->aio_ctx, 0,
ctx->max_aio_events, ctx->aio_events, NULL);
}
-
+int tap_aio_init(tap_aio_context_t *ctx, uint64_t sectors,
+ int max_aio_reqs)
+{
+ int i, ret;
+ long ioidx;
+
+ ctx->iocb_list = NULL;
+ ctx->pending_aio = NULL;
+ ctx->aio_events = NULL;
+ ctx->iocb_free = NULL;
+ ctx->iocb_queue = NULL;
+
+ /*Initialize Locking bitmap*/
+ ctx->sector_lock = calloc(1, sectors);
+
+ if (!ctx->sector_lock) {
+ DPRINTF("Failed to allocate sector lock\n");
+ goto fail;
+ }
+
+
+ /* Initialize AIO */
+ ctx->max_aio_reqs = max_aio_reqs;
+ ctx->iocb_free_count = ctx->max_aio_reqs;
+ ctx->iocb_queued = 0;
+
+ if (!(ctx->iocb_list = malloc(sizeof(struct iocb) * ctx->max_aio_reqs))
||
+ !(ctx->pending_aio = malloc(sizeof(struct pending_aio) *
ctx->max_aio_reqs)) ||
+ !(ctx->aio_events = malloc(sizeof(struct io_event) *
ctx->max_aio_reqs)) ||
+ !(ctx->iocb_free = malloc(sizeof(struct iocb *) *
ctx->max_aio_reqs)) ||
+ !(ctx->iocb_queue = malloc(sizeof(struct iocb *) *
ctx->max_aio_reqs)))
+ {
+ DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
+ ctx->max_aio_reqs);
+ goto fail;
+ }
+
+ ret = tap_aio_setup(&ctx->aio_ctx, ctx->aio_events, ctx->max_aio_reqs);
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ DPRINTF("Couldn't setup AIO context. If you are "
+ "trying to concurrently use a large number "
+ "of blktap-based disks, you may need to "
+ "increase the system-wide aio request limit. "
+ "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
+ "aio-max-nr')\n");
+ } else {
+ DPRINTF("Couldn't setup AIO context.\n");
+ }
+ goto fail;
+ }
+
+ for (i=0;i<ctx->max_aio_reqs;i++)
+ ctx->iocb_free[i] = &ctx->iocb_list[i];
+
+ DPRINTF("AIO state initialised\n");
+
+ return 0;
+
+fail:
+ return -1;
+}
+
+void tap_aio_free(tap_aio_context_t *ctx)
+{
+ if (ctx->sector_lock)
+ free(ctx->sector_lock);
+ if (ctx->iocb_list)
+ free(ctx->iocb_list);
+ if (ctx->pending_aio)
+ free(ctx->pending_aio);
+ if (ctx->aio_events)
+ free(ctx->aio_events);
+ if (ctx->iocb_free)
+ free(ctx->iocb_free);
+ if (ctx->iocb_queue)
+ free(ctx->iocb_queue);
+}
+
+/*TODO: Fix sector span!*/
+int tap_aio_can_lock(tap_aio_context_t *ctx, uint64_t sector)
+{
+ return (ctx->sector_lock[sector] ? 0 : 1);
+}
+
+int tap_aio_lock(tap_aio_context_t *ctx, uint64_t sector)
+{
+ return ++ctx->sector_lock[sector];
+}
+
+void tap_aio_unlock(tap_aio_context_t *ctx, uint64_t sector)
+{
+ if (!ctx->sector_lock[sector]) return;
+
+ --ctx->sector_lock[sector];
+ return;
+}
+
+
+int tap_aio_read(tap_aio_context_t *ctx, int fd, int size,
+ uint64_t offset, char *buf, td_callback_t cb,
+ int id, uint64_t sector, void *private)
+{
+ struct iocb *io;
+ struct pending_aio *pio;
+ long ioidx;
+
+ if (ctx->iocb_free_count == 0)
+ return -ENOMEM;
+
+ io = ctx->iocb_free[--ctx->iocb_free_count];
+
+ ioidx = IOCB_IDX(ctx, io);
+ pio = &ctx->pending_aio[ioidx];
+ pio->cb = cb;
+ pio->id = id;
+ pio->private = private;
+ pio->nb_sectors = size/512;
+ pio->buf = buf;
+ pio->sector = sector;
+
+ io_prep_pread(io, fd, buf, size, offset);
+ io->data = (void *)ioidx;
+
+ ctx->iocb_queue[ctx->iocb_queued++] = io;
+
+ return 0;
+}
+
+int tap_aio_write(tap_aio_context_t *ctx, int fd, int size,
+ uint64_t offset, char *buf, td_callback_t cb,
+ int id, uint64_t sector, void *private)
+{
+ struct iocb *io;
+ struct pending_aio *pio;
+ long ioidx;
+
+ if (ctx->iocb_free_count == 0)
+ return -ENOMEM;
+
+ io = ctx->iocb_free[--ctx->iocb_free_count];
+
+ ioidx = IOCB_IDX(ctx, io);
+ pio = &ctx->pending_aio[ioidx];
+ pio->cb = cb;
+ pio->id = id;
+ pio->private = private;
+ pio->nb_sectors = size/512;
+ pio->buf = buf;
+ pio->sector = sector;
+
+ io_prep_pwrite(io, fd, buf, size, offset);
+ io->data = (void *)ioidx;
+
+ ctx->iocb_queue[ctx->iocb_queued++] = io;
+
+ return 0;
+}
+
+int tap_aio_submit(tap_aio_context_t *ctx)
+{
+ int ret;
+
+ if (!ctx->iocb_queued)
+ return 0;
+
+ ret = io_submit(ctx->aio_ctx.aio_ctx, ctx->iocb_queued,
ctx->iocb_queue);
+
+ /* XXX: TODO: Handle error conditions here. */
+
+ /* Success case: */
+ ctx->iocb_queued = 0;
+
+ return 0;
+}
+
diff -r 081d0accc0e1 tools/blktap/drivers/tapaio.h
--- a/tools/blktap/drivers/tapaio.h Fri Feb 22 11:31:06 2008 +0100
+++ b/tools/blktap/drivers/tapaio.h Fri Feb 22 11:31:39 2008 +0100
@@ -32,8 +32,13 @@
#include <pthread.h>
#include <libaio.h>
+#include <stdint.h>
-struct tap_aio_context {
+#include "tapdisk.h"
+
+#define IOCB_IDX(_ctx, _io) ((_io) - (_ctx)->iocb_list)
+
+struct tap_aio_internal_context {
io_context_t aio_ctx;
struct io_event *aio_events;
@@ -45,14 +50,59 @@ struct tap_aio_context {
int pollfd;
unsigned int poll_in_thread : 1;
};
+
+
+typedef struct tap_aio_internal_context tap_aio_internal_context_t;
+
+
+struct pending_aio {
+ td_callback_t cb;
+ int id;
+ void *private;
+ int nb_sectors;
+ char *buf;
+ uint64_t sector;
+};
+
+
+struct tap_aio_context {
+ tap_aio_internal_context_t aio_ctx;
+
+ int max_aio_reqs;
+ struct iocb *iocb_list;
+ struct iocb **iocb_free;
+ struct pending_aio *pending_aio;
+ int iocb_free_count;
+ struct iocb **iocb_queue;
+ int iocb_queued;
+ struct io_event *aio_events;
+
+ /* Locking bitmap for AIO reads/writes */
+ uint8_t *sector_lock;
+};
typedef struct tap_aio_context tap_aio_context_t;
-int tap_aio_setup (tap_aio_context_t *ctx,
- struct io_event *aio_events,
- int max_aio_events);
-void tap_aio_continue (tap_aio_context_t *ctx);
-int tap_aio_get_events (tap_aio_context_t *ctx);
-int tap_aio_more_events(tap_aio_context_t *ctx);
+void tap_aio_continue (tap_aio_internal_context_t *ctx);
+int tap_aio_get_events (tap_aio_internal_context_t *ctx);
+int tap_aio_more_events(tap_aio_internal_context_t *ctx);
+
+
+int tap_aio_init(tap_aio_context_t *ctx, uint64_t sectors,
+ int max_aio_reqs);
+void tap_aio_free(tap_aio_context_t *ctx);
+
+int tap_aio_can_lock(tap_aio_context_t *ctx, uint64_t sector);
+int tap_aio_lock(tap_aio_context_t *ctx, uint64_t sector);
+void tap_aio_unlock(tap_aio_context_t *ctx, uint64_t sector);
+
+
+int tap_aio_read(tap_aio_context_t *ctx, int fd, int size,
+ uint64_t offset, char *buf, td_callback_t cb,
+ int id, uint64_t sector, void *private);
+int tap_aio_write(tap_aio_context_t *ctx, int fd, int size,
+ uint64_t offset, char *buf, td_callback_t cb,
+ int id, uint64_t sector, void *private);
+int tap_aio_submit(tap_aio_context_t *ctx);
#endif /* __TAPAIO_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|