[patch 2/6] xenblk: Add O_DIRECT and O_SYNC support -- backend.
This patch for backend support.
If the io requests marked as sync write, will wait the bios finish then
return to frontend.
Signed-off-by: Joe Jin <joe.jin@xxxxxxxxxx>
---
diff -r 2fb13b8cbe13 drivers/xen/blkback/blkback.c
--- a/drivers/xen/blkback/blkback.c Thu Oct 30 13:34:43 2008 +0000
+++ b/drivers/xen/blkback/blkback.c Mon Nov 03 10:31:41 2008 +0800
@@ -77,12 +77,15 @@
unsigned short operation;
int status;
struct list_head free_list;
+ wait_queue_head_t pr_bio_wait;
} pending_req_t;
static pending_req_t *pending_reqs;
static struct list_head pending_free;
static DEFINE_SPINLOCK(pending_free_lock);
static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
+
+static struct bio_set *_bios;
#define BLKBACK_INVALID_HANDLE (~0)
@@ -123,6 +126,7 @@
if (!list_empty(&pending_free)) {
req = list_entry(pending_free.next, pending_req_t, free_list);
list_del(&req->free_list);
+ init_waitqueue_head(&req->pr_bio_wait);
}
spin_unlock_irqrestore(&pending_free_lock, flags);
return req;
@@ -199,6 +203,15 @@
blkif->st_oo_req = 0;
}
+static void make_response_and_free_req(pending_req_t *pending_req)
+{
+ fast_flush_area(pending_req);
+ make_response(pending_req->blkif, pending_req->id,
+ pending_req->operation, pending_req->status);
+ blkif_put(pending_req->blkif);
+ free_req(pending_req);
+}
+
int blkif_schedule(void *arg)
{
blkif_t *blkif = arg;
@@ -248,7 +261,7 @@
static void __end_block_io_op(pending_req_t *pending_req, int error)
{
/* An error fails the entire request. */
- if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
+ if ((BLKIF_OP(pending_req->operation) == BLKIF_OP_WRITE_BARRIER) &&
(error == -EOPNOTSUPP)) {
DPRINTK("blkback: write barrier op failed, not supported\n");
blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
@@ -260,19 +273,20 @@
}
if (atomic_dec_and_test(&pending_req->pendcnt)) {
- fast_flush_area(pending_req);
- make_response(pending_req->blkif, pending_req->id,
- pending_req->operation, pending_req->status);
- blkif_put(pending_req->blkif);
- free_req(pending_req);
+ if (!req_is_sync(pending_req))
+ make_response_and_free_req(pending_req);
}
}
static int end_block_io_op(struct bio *bio, unsigned int done, int error)
{
+ pending_req_t *req = bio->bi_private;
+
if (bio->bi_size != 0)
return 1;
- __end_block_io_op(bio->bi_private, error);
+ __end_block_io_op(req, error);
+ if (req_is_sync(req))
+ wake_up(&req->pr_bio_wait);
bio_put(bio);
return error;
}
@@ -378,6 +392,23 @@
return more_to_do;
}
+static void blkif_bio_destructor(struct bio *bio)
+{
+ bio_free(bio, _bios);
+}
+
+#define __wait_pending_bio(wq, condition) \
+do { \
+ DEFINE_WAIT(__pr_wait); \
+ for(;;) { \
+ prepare_to_wait((wq), &__pr_wait, TASK_UNINTERRUPTIBLE);\
+ if (condition) \
+ break; \
+ schedule(); \
+ } \
+ finish_wait((wq), &__pr_wait); \
+} while(0)
+
static void dispatch_rw_block_io(blkif_t *blkif,
blkif_request_t *req,
pending_req_t *pending_req)
@@ -392,8 +423,9 @@
struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int ret, i, nbio = 0;
int operation;
+ int sync = req_is_sync(req);
- switch (req->operation) {
+ switch (BLKIF_OP(req->operation)) {
case BLKIF_OP_READ:
operation = READ;
break;
@@ -407,6 +439,8 @@
operation = 0; /* make gcc happy */
BUG();
}
+ if (sync)
+ operation |= (1 << BIO_RW_SYNC);
/* Check that number of segments is sane. */
nseg = req->nr_segments;
@@ -438,7 +472,7 @@
preq.nr_sects += seg[i].nsec;
flags = GNTMAP_host_map;
- if (operation != READ)
+ if (BLKIF_OP(operation) != READ)
flags |= GNTMAP_readonly;
gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
req->seg[i].gref, blkif->domid);
@@ -469,9 +503,9 @@
if (ret)
goto fail_flush;
- if (vbd_translate(&preq, blkif, operation) != 0) {
+ if (vbd_translate(&preq, blkif, BLKIF_OP(operation)) != 0) {
DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
- operation == READ ? "read" : "write",
+ BLKIF_OP(operation) == READ ? "read" : "write",
preq.sector_number,
preq.sector_number + preq.nr_sects, preq.dev);
goto fail_flush;
@@ -490,7 +524,16 @@
virt_to_page(vaddr(pending_req, i)),
seg[i].nsec << 9,
seg[i].buf & ~PAGE_MASK) == 0)) {
- bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
+ /* submit the bio before allocate new one */
+ if (sync && bio) {
+ plug_queue(blkif, bio);
+ if (nbio == 1)
+ blkif_get(blkif);
+ atomic_inc(&pending_req->pendcnt);
+ submit_bio(operation, bio);
+ }
+ bio = biolist[nbio++] = bio_alloc_bioset(GFP_KERNEL,
+ nseg-i, _bios);
if (unlikely(bio == NULL))
goto fail_put_bio;
@@ -498,6 +541,10 @@
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio->bi_sector = preq.sector_number;
+ bio->bi_rw = operation;
+ bio->bi_destructor = blkif_bio_destructor;
+ if (sync)
+ bio->bi_rw |= (1 << BIO_RW_SYNC);
}
preq.sector_number += seg[i].nsec;
@@ -516,15 +563,28 @@
}
plug_queue(blkif, bio);
- atomic_set(&pending_req->pendcnt, nbio);
- blkif_get(blkif);
- for (i = 0; i < nbio; i++)
- submit_bio(operation, biolist[i]);
+ if (sync) {
+ if (bio) {
+ if (nbio == 1 )
+ blkif_get(blkif);
+ atomic_inc(&pending_req->pendcnt);
+ /* submit the last bio to request queue */
+ submit_bio(operation, bio);
+ }
+ __wait_pending_bio(&pending_req->pr_bio_wait,
+ atomic_read(&pending_req->pendcnt) == 0);
+ make_response_and_free_req(pending_req);
+ } else {
+ atomic_set(&pending_req->pendcnt, nbio);
+ blkif_get(blkif);
+ for (i = 0; i < nbio; i++)
+ submit_bio(operation, biolist[i]);
+ }
- if (operation == READ)
+ if (BLKIF_OP(operation) == READ)
blkif->st_rd_sect += preq.nr_sects;
- else if (operation == WRITE || operation == WRITE_BARRIER)
+ else if (BLKIF_OP(operation) == WRITE || BLKIF_OP(operation) ==
WRITE_BARRIER)
blkif->st_wr_sect += preq.nr_sects;
return;
@@ -618,6 +678,10 @@
if (!pending_reqs || !pending_grant_handles || !pending_pages)
goto out_of_memory;
+ _bios = bioset_create(16, 16, 4);
+ if (!_bios)
+ goto out_of_memory;
+
for (i = 0; i < mmap_pages; i++)
pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|