WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] Re: Block WRITE_BARRIER / FLUSH_DISKCACHE operations and par

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] Re: Block WRITE_BARRIER / FLUSH_DISKCACHE operations and parameters
From: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>
Date: Fri, 8 Feb 2008 14:14:47 +0000
Delivery-date: Fri, 08 Feb 2008 06:15:59 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
In-reply-to: <20080207161442.GJ4310@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Mail-followup-to: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxx
References: <20080207161442.GJ4310@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.12-2006-07-14
Samuel Thibault, le Thu 07 Feb 2008 16:14:42 +0000, a écrit :
> Nothing is said about parameters to be given along WRITE_BARRIER or
> FLUSH_DISKCACHE operations.  I guess they are both implicitely supposed
> to be write operations? (it's the case for WRITE_BARRIER in the Linux
> implementation) If so, would it be fine to allow nr_segments to be 0?
> As part of IDE cache flush emulation, I need to issue WRITE_BARRIERs
> without issuing any actual write...

The patch below implements it for Linux:



block: backport Jens Axboe's commit from
Tue, 16 Oct 2007 09:03:56 +0000 (11:03 +0200)
bf2de6f5a4faf0197268f18d08969b003b87b6e8
Initial support for data-less (or empty) barrier support

blkback: permit and implement empty barrier.

Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>

diff -r 5c61cd349b20 block/elevator.c
--- a/block/elevator.c  Thu Feb 07 10:33:19 2008 +0000
+++ b/block/elevator.c  Fri Feb 08 11:27:12 2008 +0000
@@ -493,6 +493,16 @@
        int ret;
 
        while ((rq = __elv_next_request(q)) != NULL) {
+               /*
+                * Kill the empty barrier place holder, the driver must
+                * not ever see it.
+                */
+               if (blk_empty_barrier(rq)) {
+                       blkdev_dequeue_request(rq);
+                       end_that_request_chunk(rq, 1, 0);
+                       end_that_request_last(rq, 1);
+                       continue;
+               }
                if (!(rq->flags & REQ_STARTED)) {
                        elevator_t *e = q->elevator;
 
--- a/block/ll_rw_blk.c Thu Feb 07 10:33:19 2008 +0000
+++ b/block/ll_rw_blk.c Fri Feb 08 11:27:12 2008 +0000
@@ -483,9 +483,12 @@
         * Queue ordered sequence.  As we stack them at the head, we
         * need to queue in reverse order.  Note that we rely on that
         * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
-        * request gets inbetween ordered sequence.
+        * request gets inbetween ordered sequence. If this request is
+        * an empty barrier, we don't need to do a postflush ever since
+        * there will be no data written between the pre and post flush.
+        * Hence a single flush will suffice.
         */
-       if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
+       if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
                queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
        else
                q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
@@ -2967,7 +2970,7 @@
 {
        struct block_device *bdev = bio->bi_bdev;
 
-       if (bdev != bdev->bd_contains) {
+       if (bio_sectors(bio) && bdev != bdev->bd_contains) {
                struct hd_struct *p = bdev->bd_part;
                const int rw = bio_data_dir(bio);
 
@@ -3028,7 +3031,7 @@
        might_sleep();
        /* Test device or partition size, when known. */
        maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-       if (maxsector) {
+       if (maxsector && nr_sectors) {
                sector_t sector = bio->bi_sector;
 
                if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
@@ -3094,7 +3097,7 @@
                old_dev = bio->bi_bdev->bd_dev;
 
                maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-               if (maxsector) {
+               if (maxsector && nr_sectors) {
                        sector_t sector = bio->bi_sector;
 
                        if (maxsector < nr_sectors || maxsector - nr_sectors < 
sector) {
@@ -3128,21 +3131,25 @@
 {
        int count = bio_sectors(bio);
 
-       BIO_BUG_ON(!bio->bi_size);
-       BIO_BUG_ON(!bio->bi_io_vec);
        bio->bi_rw |= rw;
-       if (rw & WRITE)
-               count_vm_events(PGPGOUT, count);
-       else
-               count_vm_events(PGPGIN, count);
 
-       if (unlikely(block_dump)) {
-               char b[BDEVNAME_SIZE];
-               printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
-                       current->comm, current->pid,
-                       (rw & WRITE) ? "WRITE" : "READ",
-                       (unsigned long long)bio->bi_sector,
-                       bdevname(bio->bi_bdev,b));
+       if (!bio_empty_barrier(bio)) {
+               BIO_BUG_ON(!bio->bi_size);
+               BIO_BUG_ON(!bio->bi_io_vec);
+
+               if (rw & WRITE)
+                       count_vm_events(PGPGOUT, count);
+               else
+                       count_vm_events(PGPGIN, count);
+
+               if (unlikely(block_dump)) {
+                       char b[BDEVNAME_SIZE];
+                       printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
+                               current->comm, current->pid,
+                               (rw & WRITE) ? "WRITE" : "READ",
+                               (unsigned long long)bio->bi_sector,
+                               bdevname(bio->bi_bdev,b));
+               }
        }
 
        generic_make_request(bio);
@@ -3259,6 +3266,13 @@
        total_bytes = bio_nbytes = 0;
        while ((bio = req->bio) != NULL) {
                int nbytes;
+
+               /* For an empty barrier request, the low level driver must
+                * store a potential error location in ->sector. We pass
+                * that back up in ->bi_sector
+                */
+               if (blk_empty_barrier(req))
+                       bio->bi_sector = req->sector;
 
                if (nr_bytes >= bio->bi_size) {
                        req->bio = bio->bi_next;
--- a/drivers/xen/blkback/blkback.c     Thu Feb 07 10:33:19 2008 +0000
+++ b/drivers/xen/blkback/blkback.c     Fri Feb 08 11:27:13 2008 +0000
@@ -407,7 +407,7 @@
 
        /* Check that number of segments is sane. */
        nseg = req->nr_segments;
-       if (unlikely(nseg == 0) || 
+       if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || 
            unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
                DPRINTK("Bad number of segments in request (%d)\n", nseg);
                goto fail_response;
@@ -500,6 +500,18 @@
                preq.sector_number += seg[i].nsec;
        }
 
+       if (!bio) {
+               BUG_ON(operation != WRITE_BARRIER);
+               bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0);
+               if (unlikely(bio == NULL))
+                       goto fail_put_bio;
+
+               bio->bi_bdev    = preq.bdev;
+               bio->bi_private = pending_req;
+               bio->bi_end_io  = end_block_io_op;
+               bio->bi_sector  = -1;
+       }
+
        plug_queue(blkif, bio);
        atomic_set(&pending_req->pendcnt, nbio);
        blkif_get(blkif);
--- a/fs/bio.c  Thu Feb 07 10:33:19 2008 +0000
+++ b/fs/bio.c  Fri Feb 08 11:27:13 2008 +0000
@@ -112,7 +112,8 @@
 
        BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
 
-       mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+       if (bio->bi_io_vec)
+               mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
        mempool_free(bio, bio_set->bio_pool);
 }
 
--- a/include/linux/bio.h       Thu Feb 07 10:33:19 2008 +0000
+++ b/include/linux/bio.h       Fri Feb 08 11:27:13 2008 +0000
@@ -172,12 +172,27 @@
 #define bio_offset(bio)                bio_iovec((bio))->bv_offset
 #define bio_segments(bio)      ((bio)->bi_vcnt - (bio)->bi_idx)
 #define bio_sectors(bio)       ((bio)->bi_size >> 9)
-#define bio_cur_sectors(bio)   (bio_iovec(bio)->bv_len >> 9)
-#define bio_data(bio)          (page_address(bio_page((bio))) + 
bio_offset((bio)))
 #define bio_barrier(bio)       ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
 #define bio_sync(bio)          ((bio)->bi_rw & (1 << BIO_RW_SYNC))
 #define bio_failfast(bio)      ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
 #define bio_rw_ahead(bio)      ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
+#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size)
+
+static inline unsigned int bio_cur_sectors(struct bio *bio)
+{
+       if (bio->bi_vcnt)
+               return bio_iovec(bio)->bv_len >> 9;
+
+       return 0;
+}
+
+static inline void *bio_data(struct bio *bio)
+{
+       if (bio->bi_vcnt)
+               return page_address(bio_page(bio)) + bio_offset(bio);
+
+       return NULL;
+}
 
 /*
  * will die
--- a/include/linux/blkdev.h    Thu Feb 07 10:33:19 2008 +0000
+++ b/include/linux/blkdev.h    Fri Feb 08 11:27:13 2008 +0000
@@ -506,6 +506,8 @@
 #define blk_barrier_rq(rq)     ((rq)->flags & REQ_HARDBARRIER)
 #define blk_fua_rq(rq)         ((rq)->flags & REQ_FUA)
 
+#define blk_empty_barrier(rq)   (blk_barrier_rq(rq) && blk_fs_request(rq) && 
!(rq)->hard_nr_sectors)
+
 #define list_entry_rq(ptr)     list_entry((ptr), struct request, queuelist)
 
 #define rq_data_dir(rq)                ((rq)->flags & 1)
--- a/mm/highmem.c      Thu Feb 07 10:33:19 2008 +0000
+++ b/mm/highmem.c      Fri Feb 08 11:27:13 2008 +0000
@@ -468,6 +468,12 @@
        mempool_t *pool;
 
        /*
+        * Data-less bio, nothing to bounce
+        */
+       if (bio_empty_barrier(*bio_orig))
+               return;
+
+       /*
         * for non-isa bounce case, just check if the bounce pfn is equal
         * to or bigger than the highest pfn in the system -- in that case,
         * don't waste time iterating over bio segments

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>