>>> On 18.08.11 at 11:35, Li Dongyang <lidongyang@xxxxxxxxxx> wrote:
> JBeulich@xxxxxxxxxx
> Subject: [PATCH V2 3/3] xen-blkback: handle trim request in backend driver
> Date: Thu, 18 Aug 2011 17:34:31 +0800
> Message-Id: <1313660071-25230-4-git-send-email-lidongyang@xxxxxxxxxx>
> X-Mailer: git-send-email 1.7.6
> In-Reply-To: <1313660071-25230-1-git-send-email-lidongyang@xxxxxxxxxx>
> References: <1313660071-25230-1-git-send-email-lidongyang@xxxxxxxxxx>
>
> Now blkback driver can handle the trim request from guest, we will
> forward the request to phy device if it really has trim support, or we'll
> punch a hole on the image file.
>
> Signed-off-by: Li Dongyang <lidongyang@xxxxxxxxxx>
> ---
> drivers/block/xen-blkback/blkback.c | 85
> +++++++++++++++++++++++++++++------
> drivers/block/xen-blkback/common.h | 4 +-
> drivers/block/xen-blkback/xenbus.c | 61 +++++++++++++++++++++++++
> 3 files changed, 135 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/block/xen-blkback/blkback.c
> b/drivers/block/xen-blkback/blkback.c
> index 2330a9a..5acc37a 100644
> --- a/drivers/block/xen-blkback/blkback.c
> +++ b/drivers/block/xen-blkback/blkback.c
> @@ -39,6 +39,9 @@
> #include <linux/list.h>
> #include <linux/delay.h>
> #include <linux/freezer.h>
> +#include <linux/loop.h>
> +#include <linux/falloc.h>
> +#include <linux/fs.h>
>
> #include <xen/events.h>
> #include <xen/page.h>
> @@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
>
> static void print_stats(struct xen_blkif *blkif)
> {
> - pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n",
> + pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d"
> + " | tr %4d\n",
> current->comm, blkif->st_oo_req,
> - blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
> + blkif->st_rd_req, blkif->st_wr_req,
> + blkif->st_f_req, blkif->st_tr_req);
> blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
> blkif->st_rd_req = 0;
> blkif->st_wr_req = 0;
> blkif->st_oo_req = 0;
> + blkif->st_tr_req = 0;
> }
>
> int xen_blkif_schedule(void *arg)
> @@ -563,6 +569,10 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
> blkif->st_f_req++;
> operation = WRITE_FLUSH;
> break;
> + case BLKIF_OP_TRIM:
> + blkif->st_tr_req++;
> + operation = REQ_DISCARD;
> + break;
> case BLKIF_OP_WRITE_BARRIER:
> default:
> operation = 0; /* make gcc happy */
> @@ -572,7 +582,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>
> /* Check that the number of segments is sane. */
> nseg = req->nr_segments;
> - if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
> + if (unlikely(nseg == 0 && operation != (WRITE_FLUSH | REQ_DISCARD)) ||
This will match neither WRITE_FLUSH nor REQ_DISCARD.
> unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
> pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
> nseg);
> @@ -627,10 +637,13 @@ static int dispatch_rw_block_io(struct xen_blkif
> *blkif,
> * the hypercall to unmap the grants - that is all done in
> * xen_blkbk_unmap.
> */
> - if (xen_blkbk_map(req, pending_req, seg))
> + if (operation != BLKIF_OP_TRIM && xen_blkbk_map(req, pending_req, seg))
> goto fail_flush;
>
> - /* This corresponding xen_blkif_put is done in __end_block_io_op */
> + /*
> + * This corresponding xen_blkif_put is done in __end_block_io_op, or
> + * below if we are handling a BLKIF_OP_TRIM.
> + */
> xen_blkif_get(blkif);
>
> for (i = 0; i < nseg; i++) {
> @@ -654,18 +667,62 @@ static int dispatch_rw_block_io(struct xen_blkif
> *blkif,
> preq.sector_number += seg[i].nsec;
> }
>
> - /* This will be hit if the operation was a flush. */
> + /* This will be hit if the operation was a flush or trim. */
> if (!bio) {
> - BUG_ON(operation != WRITE_FLUSH);
> + BUG_ON(operation != (WRITE_FLUSH | REQ_DISCARD));
Same here.
>
> - bio = bio_alloc(GFP_KERNEL, 0);
> - if (unlikely(bio == NULL))
> - goto fail_put_bio;
> + if (operation == WRITE_FLUSH) {
> + bio = bio_alloc(GFP_KERNEL, 0);
> + if (unlikely(bio == NULL))
> + goto fail_put_bio;
>
> - biolist[nbio++] = bio;
> - bio->bi_bdev = preq.bdev;
> - bio->bi_private = pending_req;
> - bio->bi_end_io = end_block_io_op;
> + biolist[nbio++] = bio;
> + bio->bi_bdev = preq.bdev;
> + bio->bi_private = pending_req;
> + bio->bi_end_io = end_block_io_op;
> + } else if (operation == REQ_DISCARD) {
> + int err = 0;
> + int status = BLKIF_RSP_OKAY;
> + struct block_device *bdev = blkif->vbd.bdev;
> +
> + preq.nr_sects = req->u.trim.nr_sectors;
> + if (blkif->vbd.type & VDISK_PHY_BACKEND)
> + /* just forward the trim request */
> + err = blkdev_issue_discard(bdev,
> + preq.sector_number,
> + preq.nr_sects,
> + GFP_KERNEL, 0);
> + else if (blkif->vbd.type & VDISK_FILE_BACKEND) {
> + /* punch a hole in the backing file */
> + struct loop_device *lo =
> + bdev->bd_disk->private_data;
> + struct file *file = lo->lo_backing_file;
> +
> + if (file->f_op->fallocate)
> + err = file->f_op->fallocate(file,
> + FALLOC_FL_KEEP_SIZE |
> + FALLOC_FL_PUNCH_HOLE,
> + preq.sector_number << 9,
> + preq.nr_sects << 9);
> + else
> + err = -EOPNOTSUPP;
> + } else
Are you not worried about doing this synchronously, i.e. blocking any
other I/O going on for the device?
> + status = BLKIF_RSP_EOPNOTSUPP;
> +
> + if (err == -EOPNOTSUPP) {
> + DPRINTK("blkback: discard op failed, "
> + "not supported\n");
> + status = BLKIF_RSP_EOPNOTSUPP;
> + } else if (err)
> + status = BLKIF_RSP_ERROR;
> +
> + if (status == BLKIF_RSP_OKAY)
> + blkif->st_tr_sect += preq.nr_sects;
I don't think this is a particularly useful statistic.
> + make_response(blkif, req->id, req->operation, status);
> + xen_blkif_put(blkif);
> + free_req(pending_req);
> + return 0;
> + }
> }
>
> /*
> diff --git a/drivers/block/xen-blkback/common.h
> b/drivers/block/xen-blkback/common.h
> index 9e40b28..1fef727 100644
> --- a/drivers/block/xen-blkback/common.h
> +++ b/drivers/block/xen-blkback/common.h
> @@ -159,8 +159,10 @@ struct xen_blkif {
> int st_wr_req;
> int st_oo_req;
> int st_f_req;
> + int st_tr_req;
> int st_rd_sect;
> int st_wr_sect;
> + int st_tr_sect;
>
> wait_queue_head_t waiting_to_free;
>
> @@ -182,7 +184,7 @@ struct xen_blkif {
>
> struct phys_req {
> unsigned short dev;
> - unsigned short nr_sects;
> + blkif_sector_t nr_sects;
> struct block_device *bdev;
> blkif_sector_t sector_number;
> };
> diff --git a/drivers/block/xen-blkback/xenbus.c
> b/drivers/block/xen-blkback/xenbus.c
> index 3f129b4..05ea8e0 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -272,16 +272,20 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
> VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
> VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
> VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req);
> +VBD_SHOW(tr_req, "%d\n", be->blkif->st_tr_req);
> VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
> VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
> +VBD_SHOW(tr_sect, "%d\n", be->blkif->st_tr_sect);
>
> static struct attribute *xen_vbdstat_attrs[] = {
> &dev_attr_oo_req.attr,
> &dev_attr_rd_req.attr,
> &dev_attr_wr_req.attr,
> &dev_attr_f_req.attr,
> + &dev_attr_tr_req.attr,
> &dev_attr_rd_sect.attr,
> &dev_attr_wr_sect.attr,
> + &dev_attr_tr_sect.attr,
> NULL
> };
>
> @@ -419,6 +423,59 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction
> xbt,
> return err;
> }
>
> +int xen_blkbk_trim(struct xenbus_transaction xbt, struct backend_info *be)
> +{
> + struct xenbus_device *dev = be->dev;
> + struct xen_vbd *vbd = &be->blkif->vbd;
> + char *type;
> + int err;
> + int state = 0;
> +
> + type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
> + if (!IS_ERR(type)) {
> + if (strcmp(type, "file") == 0)
> + state = 1;
> + vbd->type |= VDISK_FILE_BACKEND;
Missing { and }.
Jan
> + if (strcmp(type, "phy") == 0) {
> + struct block_device *bdev = be->blkif->vbd.bdev;
> + struct request_queue *q = bdev_get_queue(bdev);
> + if (blk_queue_discard(q)) {
> + err = xenbus_printf(xbt, dev->nodename,
> + "discard_granularity", "%u",
> + q->limits.discard_granularity);
> + if (err) {
> + xenbus_dev_fatal(dev, err,
> + "writing discard_granularity");
> + goto kfree;
> + }
> + err = xenbus_printf(xbt, dev->nodename,
> + "discard_alignment", "%u",
> + q->limits.discard_alignment);
> + if (err) {
> + xenbus_dev_fatal(dev, err,
> + "writing discard_alignment");
> + goto kfree;
> + }
> + state = 1;
> + vbd->type |= VDISK_PHY_BACKEND;
> + }
> + }
> + } else {
> + err = PTR_ERR(type);
> + xenbus_dev_fatal(dev, err, "reading type");
> + goto out;
> + }
> +
> + err = xenbus_printf(xbt, dev->nodename, "feature-trim",
> + "%d", state);
> + if (err)
> + xenbus_dev_fatal(dev, err, "writing feature-trim");
> +kfree:
> + kfree(type);
> +out:
> + return err;
> +}
> +
> /*
> * Entry point to this code when a new device is created. Allocate the
> basic
> * structures, and watch the store waiting for the hotplug scripts to tell
> us
> @@ -650,6 +707,10 @@ again:
> if (err)
> goto abort;
>
> + err = xen_blkbk_trim(xbt, be);
> + if (err)
> + goto abort;
> +
> err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
> (unsigned long long)vbd_sz(&be->blkif->vbd));
> if (err) {
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|