Now blkback driver can handle the discard request from guest, we will
forward the request to phy device if it really has discard support, or we'll
punch a hole on the image file.
Signed-off-by: Li Dongyang <lidongyang@xxxxxxxxxx>
---
 drivers/block/xen-blkback/blkback.c |   87 +++++++++++++++++++++++++++-----
 drivers/block/xen-blkback/common.h  |   93 ++++++++++++++++++++++++++++------
 drivers/block/xen-blkback/xenbus.c  |   58 ++++++++++++++++++++++
 3 files changed, 207 insertions(+), 31 deletions(-)
diff --git a/drivers/block/xen-blkback/blkback.c 
b/drivers/block/xen-blkback/blkback.c
index 2330a9a..a3b50d0 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,9 @@
 #include <linux/list.h>
 #include <linux/delay.h>
 #include <linux/freezer.h>
+#include <linux/loop.h>
+#include <linux/falloc.h>
+#include <linux/fs.h>
 
 #include <xen/events.h>
 #include <xen/page.h>
@@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
 
 static void print_stats(struct xen_blkif *blkif)
 {
-       pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d\n",
+       pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d"
+                "  |  ds %4d\n",
                 current->comm, blkif->st_oo_req,
-                blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
+                blkif->st_rd_req, blkif->st_wr_req,
+                blkif->st_f_req, blkif->st_ds_req);
        blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
        blkif->st_rd_req = 0;
        blkif->st_wr_req = 0;
        blkif->st_oo_req = 0;
+       blkif->st_ds_req = 0;
 }
 
 int xen_blkif_schedule(void *arg)
@@ -410,6 +416,43 @@ static int xen_blkbk_map(struct blkif_request *req,
        return ret;
 }
 
+static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
+{
+       int err = 0;
+       int status = BLKIF_RSP_OKAY;
+       struct block_device *bdev = blkif->vbd.bdev;
+
+       if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
+               /* just forward the discard request */
+               err = blkdev_issue_discard(bdev,
+                               req->u.discard.sector_number,
+                               req->u.discard.nr_sectors,
+                               GFP_KERNEL, 0);
+       else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
+               /* punch a hole in the backing file */
+               struct loop_device *lo = bdev->bd_disk->private_data;
+               struct file *file = lo->lo_backing_file;
+
+               if (file->f_op->fallocate)
+                       err = file->f_op->fallocate(file,
+                               FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+                               req->u.discard.sector_number << 9,
+                               req->u.discard.nr_sectors << 9);
+               else
+                       err = -EOPNOTSUPP;
+       } else
+               err = -EOPNOTSUPP;
+
+       if (err == -EOPNOTSUPP) {
+               pr_debug(DRV_PFX "blkback: discard op failed, "
+                               "not supported\n");
+               status = BLKIF_RSP_EOPNOTSUPP;
+       } else if (err)
+               status = BLKIF_RSP_ERROR;
+
+       make_response(blkif, req->id, req->operation, status);
+}
+
 /*
  * Completion callback on the bio's. Called as bh->b_end_io()
  */
@@ -563,6 +606,10 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                blkif->st_f_req++;
                operation = WRITE_FLUSH;
                break;
+       case BLKIF_OP_DISCARD:
+               blkif->st_ds_req++;
+               operation = REQ_DISCARD;
+               break;
        case BLKIF_OP_WRITE_BARRIER:
        default:
                operation = 0; /* make gcc happy */
@@ -572,7 +619,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 
        /* Check that the number of segments is sane. */
        nseg = req->nr_segments;
-       if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
+       if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
+                               operation != REQ_DISCARD) ||
            unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
                pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
                         nseg);
@@ -627,10 +675,14 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
         * the hypercall to unmap the grants - that is all done in
         * xen_blkbk_unmap.
         */
-       if (xen_blkbk_map(req, pending_req, seg))
+       if (operation != BLKIF_OP_DISCARD &&
+                       xen_blkbk_map(req, pending_req, seg))
                goto fail_flush;
 
-       /* This corresponding xen_blkif_put is done in __end_block_io_op */
+       /*
+        * This corresponding xen_blkif_put is done in __end_block_io_op, or
+        * below if we are handling a BLKIF_OP_DISCARD.
+        */
        xen_blkif_get(blkif);
 
        for (i = 0; i < nseg; i++) {
@@ -654,18 +706,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                preq.sector_number += seg[i].nsec;
        }
 
-       /* This will be hit if the operation was a flush. */
+       /* This will be hit if the operation was a flush or discard. */
        if (!bio) {
-               BUG_ON(operation != WRITE_FLUSH);
+               BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
 
-               bio = bio_alloc(GFP_KERNEL, 0);
-               if (unlikely(bio == NULL))
-                       goto fail_put_bio;
+               if (operation == WRITE_FLUSH) {
+                       bio = bio_alloc(GFP_KERNEL, 0);
+                       if (unlikely(bio == NULL))
+                               goto fail_put_bio;
 
-               biolist[nbio++] = bio;
-               bio->bi_bdev    = preq.bdev;
-               bio->bi_private = pending_req;
-               bio->bi_end_io  = end_block_io_op;
+                       biolist[nbio++] = bio;
+                       bio->bi_bdev    = preq.bdev;
+                       bio->bi_private = pending_req;
+                       bio->bi_end_io  = end_block_io_op;
+               } else if (operation == REQ_DISCARD) {
+                       xen_blk_discard(blkif, req);
+                       xen_blkif_put(blkif);
+                       free_req(pending_req);
+                       return 0;
+               }
        }
 
        /*
diff --git a/drivers/block/xen-blkback/common.h 
b/drivers/block/xen-blkback/common.h
index 9e40b28..bfb532e 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -63,13 +63,26 @@ struct blkif_common_response {
 
 /* i386 protocol version */
 #pragma pack(push, 4)
+
+struct blkif_x86_32_request_rw {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+struct blkif_x86_32_request_discard {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       uint64_t nr_sectors;
+};
+
 struct blkif_x86_32_request {
        uint8_t        operation;    /* BLKIF_OP_???                         */
        uint8_t        nr_segments;  /* number of segments                   */
        blkif_vdev_t   handle;       /* only for read/write requests         */
        uint64_t       id;           /* private guest value, echoed in resp  */
-       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       union {
+               struct blkif_x86_32_request_rw rw;
+               struct blkif_x86_32_request_discard discard;
+       } u;
 };
 struct blkif_x86_32_response {
        uint64_t        id;              /* copied from request */
@@ -79,13 +92,26 @@ struct blkif_x86_32_response {
 #pragma pack(pop)
 
 /* x86_64 protocol version */
+
+struct blkif_x86_64_request_rw {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+struct blkif_x86_64_request_discard {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       uint64_t nr_sectors;
+};
+
 struct blkif_x86_64_request {
        uint8_t        operation;    /* BLKIF_OP_???                         */
        uint8_t        nr_segments;  /* number of segments                   */
        blkif_vdev_t   handle;       /* only for read/write requests         */
        uint64_t       __attribute__((__aligned__(8))) id;
-       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       union {
+               struct blkif_x86_64_request_rw rw;
+               struct blkif_x86_64_request_discard discard;
+       } u;
 };
 struct blkif_x86_64_response {
        uint64_t       __attribute__((__aligned__(8))) id;
@@ -113,6 +139,11 @@ enum blkif_protocol {
        BLKIF_PROTOCOL_X86_64 = 3,
 };
 
+enum blkif_backend_type {
+       BLKIF_BACKEND_PHY  = 1,
+       BLKIF_BACKEND_FILE = 2,
+};
+
 struct xen_vbd {
        /* What the domain refers to this vbd as. */
        blkif_vdev_t            handle;
@@ -138,6 +169,7 @@ struct xen_blkif {
        unsigned int            irq;
        /* Comms information. */
        enum blkif_protocol     blk_protocol;
+       enum blkif_backend_type blk_backend_type;
        union blkif_back_rings  blk_rings;
        struct vm_struct        *blk_ring_area;
        /* The VBD attached to this interface. */
@@ -159,6 +191,7 @@ struct xen_blkif {
        int                     st_wr_req;
        int                     st_oo_req;
        int                     st_f_req;
+       int                     st_ds_req;
        int                     st_rd_sect;
        int                     st_wr_sect;
 
@@ -182,7 +215,7 @@ struct xen_blkif {
 
 struct phys_req {
        unsigned short          dev;
-       unsigned short          nr_sects;
+       blkif_sector_t          nr_sects;
        struct block_device     *bdev;
        blkif_sector_t          sector_number;
 };
@@ -206,12 +239,25 @@ static inline void blkif_get_x86_32_req(struct 
blkif_request *dst,
        dst->nr_segments = src->nr_segments;
        dst->handle = src->handle;
        dst->id = src->id;
-       dst->u.rw.sector_number = src->sector_number;
-       barrier();
-       if (n > dst->nr_segments)
-               n = dst->nr_segments;
-       for (i = 0; i < n; i++)
-               dst->u.rw.seg[i] = src->seg[i];
+       switch (src->operation) {
+       case BLKIF_OP_READ:
+       case BLKIF_OP_WRITE:
+       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_FLUSH_DISKCACHE:
+               dst->u.rw.sector_number = src->u.rw.sector_number;
+               barrier();
+               if (n > dst->nr_segments)
+                       n = dst->nr_segments;
+               for (i = 0; i < n; i++)
+                       dst->u.rw.seg[i] = src->u.rw.seg[i];
+               break;
+       case BLKIF_OP_DISCARD:
+               dst->u.discard.sector_number = src->u.discard.sector_number;
+               dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+               break;
+       default:
+               break;
+       }
 }
 
 static inline void blkif_get_x86_64_req(struct blkif_request *dst,
@@ -222,12 +268,25 @@ static inline void blkif_get_x86_64_req(struct 
blkif_request *dst,
        dst->nr_segments = src->nr_segments;
        dst->handle = src->handle;
        dst->id = src->id;
-       dst->u.rw.sector_number = src->sector_number;
-       barrier();
-       if (n > dst->nr_segments)
-               n = dst->nr_segments;
-       for (i = 0; i < n; i++)
-               dst->u.rw.seg[i] = src->seg[i];
+       switch (src->operation) {
+       case BLKIF_OP_READ:
+       case BLKIF_OP_WRITE:
+       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_FLUSH_DISKCACHE:
+               dst->u.rw.sector_number = src->u.rw.sector_number;
+               barrier();
+               if (n > dst->nr_segments)
+                       n = dst->nr_segments;
+               for (i = 0; i < n; i++)
+                       dst->u.rw.seg[i] = src->u.rw.seg[i];
+               break;
+       case BLKIF_OP_DISCARD:
+               dst->u.discard.sector_number = src->u.discard.sector_number;
+               dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+               break;
+       default:
+               break;
+       }
 }
 
 #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index 3f129b4..2b3aef0 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -272,6 +272,7 @@ VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req);
 VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
 VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
 VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req);
+VBD_SHOW(ds_req,  "%d\n", be->blkif->st_ds_req);
 VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
 VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
 
@@ -280,6 +281,7 @@ static struct attribute *xen_vbdstat_attrs[] = {
        &dev_attr_rd_req.attr,
        &dev_attr_wr_req.attr,
        &dev_attr_f_req.attr,
+       &dev_attr_ds_req.attr,
        &dev_attr_rd_sect.attr,
        &dev_attr_wr_sect.attr,
        NULL
@@ -419,6 +421,60 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction 
xbt,
        return err;
 }
 
+int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
+{
+       struct xenbus_device *dev = be->dev;
+       struct xen_blkif *blkif = be->blkif;
+       char *type;
+       int err;
+       int state = 0;
+
+       type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
+       if (!IS_ERR(type)) {
+               if (strncmp(type, "file", 4) == 0) {
+                       state = 1;
+                       blkif->blk_backend_type = BLKIF_BACKEND_FILE;
+               }
+               if (strncmp(type, "phy", 3) == 0) {
+                       struct block_device *bdev = be->blkif->vbd.bdev;
+                       struct request_queue *q = bdev_get_queue(bdev);
+                       if (blk_queue_discard(q)) {
+                               err = xenbus_printf(xbt, dev->nodename,
+                                       "discard-granularity", "%u",
+                                       q->limits.discard_granularity);
+                               if (err) {
+                                       xenbus_dev_fatal(dev, err,
+                                               "writing discard-granularity");
+                                       goto kfree;
+                               }
+                               err = xenbus_printf(xbt, dev->nodename,
+                                       "discard-alignment", "%u",
+                                       q->limits.discard_alignment);
+                               if (err) {
+                                       xenbus_dev_fatal(dev, err,
+                                               "writing discard-alignment");
+                                       goto kfree;
+                               }
+                               state = 1;
+                               blkif->blk_backend_type = BLKIF_BACKEND_PHY;
+                       }
+               }
+       } else {
+               err = PTR_ERR(type);
+               xenbus_dev_fatal(dev, err, "reading type");
+               goto out;
+       }
+
+       err = xenbus_printf(xbt, dev->nodename, "feature-discard",
+                           "%d", state);
+       if (err)
+               xenbus_dev_fatal(dev, err, "writing feature-discard");
+kfree:
+       kfree(type);
+out:
+       return err;
+}
+
 /*
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures, and watch the store waiting for the hotplug scripts to tell us
@@ -650,6 +706,8 @@ again:
        if (err)
                goto abort;
 
+       err = xen_blkbk_discard(xbt, be);
+
        err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
                            (unsigned long long)vbd_sz(&be->blkif->vbd));
        if (err) {
-- 
1.7.6
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
 |