On Thu, Aug 18, 2011 at 6:33 PM, Jan Beulich <JBeulich@xxxxxxxxxx> wrote:
> >>> On 18.08.11 at 11:35, Li Dongyang <lidongyang@xxxxxxxxxx> wrote:
>> JBeulich@xxxxxxxxxx
>> Subject: [PATCH V2 3/3] xen-blkback: handle trim request in backend driver
>> Date: Thu, 18 Aug 2011 17:34:31 +0800
>> Message-Id: <1313660071-25230-4-git-send-email-lidongyang@xxxxxxxxxx>
>> X-Mailer: git-send-email 1.7.6
>> In-Reply-To: <1313660071-25230-1-git-send-email-lidongyang@xxxxxxxxxx>
>> References: <1313660071-25230-1-git-send-email-lidongyang@xxxxxxxxxx>
>>
>> Now blkback driver can handle the trim request from guest, we will
>> forward the request to phy device if it really has trim support, or we'll
>> punch a hole on the image file.
>>
>> Signed-off-by: Li Dongyang <lidongyang@xxxxxxxxxx>
>> ---
>> drivers/block/xen-blkback/blkback.c | 85
>> +++++++++++++++++++++++++++++------
>> drivers/block/xen-blkback/common.h | 4 +-
>> drivers/block/xen-blkback/xenbus.c | 61 +++++++++++++++++++++++++
>> 3 files changed, 135 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkback/blkback.c
>> b/drivers/block/xen-blkback/blkback.c
>> index 2330a9a..5acc37a 100644
>> --- a/drivers/block/xen-blkback/blkback.c
>> +++ b/drivers/block/xen-blkback/blkback.c
>> @@ -39,6 +39,9 @@
>> #include <linux/list.h>
>> #include <linux/delay.h>
>> #include <linux/freezer.h>
>> +#include <linux/loop.h>
>> +#include <linux/falloc.h>
>> +#include <linux/fs.h>
>>
>> #include <xen/events.h>
>> #include <xen/page.h>
>> @@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
>>
>> static void print_stats(struct xen_blkif *blkif)
>> {
>> - pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n",
>> + pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d"
>> + " | tr %4d\n",
>> current->comm, blkif->st_oo_req,
>> - blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
>> + blkif->st_rd_req, blkif->st_wr_req,
>> + blkif->st_f_req, blkif->st_tr_req);
>> blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
>> blkif->st_rd_req = 0;
>> blkif->st_wr_req = 0;
>> blkif->st_oo_req = 0;
>> + blkif->st_tr_req = 0;
>> }
>>
>> int xen_blkif_schedule(void *arg)
>> @@ -563,6 +569,10 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>> blkif->st_f_req++;
>> operation = WRITE_FLUSH;
>> break;
>> + case BLKIF_OP_TRIM:
>> + blkif->st_tr_req++;
>> + operation = REQ_DISCARD;
>> + break;
>> case BLKIF_OP_WRITE_BARRIER:
>> default:
>> operation = 0; /* make gcc happy */
>> @@ -572,7 +582,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>>
>> /* Check that the number of segments is sane. */
>> nseg = req->nr_segments;
>> - if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
>> + if (unlikely(nseg == 0 && operation != (WRITE_FLUSH | REQ_DISCARD)) ||
>
> This will match neither WRITE_FLUSH nor REQ_DISCARD.
sorry for the stupid mistake.
>
>> unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
>> pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
>> nseg);
>> @@ -627,10 +637,13 @@ static int dispatch_rw_block_io(struct xen_blkif
>> *blkif,
>> * the hypercall to unmap the grants - that is all done in
>> * xen_blkbk_unmap.
>> */
>> - if (xen_blkbk_map(req, pending_req, seg))
>> + if (operation != BLKIF_OP_TRIM && xen_blkbk_map(req, pending_req, seg))
>> goto fail_flush;
>>
>> - /* This corresponding xen_blkif_put is done in __end_block_io_op */
>> + /*
>> + * This corresponding xen_blkif_put is done in __end_block_io_op, or
>> + * below if we are handling a BLKIF_OP_TRIM.
>> + */
>> xen_blkif_get(blkif);
>>
>> for (i = 0; i < nseg; i++) {
>> @@ -654,18 +667,62 @@ static int dispatch_rw_block_io(struct xen_blkif
>> *blkif,
>> preq.sector_number += seg[i].nsec;
>> }
>>
>> - /* This will be hit if the operation was a flush. */
>> + /* This will be hit if the operation was a flush or trim. */
>> if (!bio) {
>> - BUG_ON(operation != WRITE_FLUSH);
>> + BUG_ON(operation != (WRITE_FLUSH | REQ_DISCARD));
>
> Same here.
>
>>
>> - bio = bio_alloc(GFP_KERNEL, 0);
>> - if (unlikely(bio == NULL))
>> - goto fail_put_bio;
>> + if (operation == WRITE_FLUSH) {
>> + bio = bio_alloc(GFP_KERNEL, 0);
>> + if (unlikely(bio == NULL))
>> + goto fail_put_bio;
>>
>> - biolist[nbio++] = bio;
>> - bio->bi_bdev = preq.bdev;
>> - bio->bi_private = pending_req;
>> - bio->bi_end_io = end_block_io_op;
>> + biolist[nbio++] = bio;
>> + bio->bi_bdev = preq.bdev;
>> + bio->bi_private = pending_req;
>> + bio->bi_end_io = end_block_io_op;
>> + } else if (operation == REQ_DISCARD) {
>> + int err = 0;
>> + int status = BLKIF_RSP_OKAY;
>> + struct block_device *bdev = blkif->vbd.bdev;
>> +
>> + preq.nr_sects = req->u.trim.nr_sectors;
>> + if (blkif->vbd.type & VDISK_PHY_BACKEND)
>> + /* just forward the trim request */
>> + err = blkdev_issue_discard(bdev,
>> + preq.sector_number,
>> + preq.nr_sects,
>> + GFP_KERNEL, 0);
>> + else if (blkif->vbd.type & VDISK_FILE_BACKEND) {
>> + /* punch a hole in the backing file */
>> + struct loop_device *lo =
>> + bdev->bd_disk->private_data;
>> + struct file *file = lo->lo_backing_file;
>> +
>> + if (file->f_op->fallocate)
>> + err = file->f_op->fallocate(file,
>> + FALLOC_FL_KEEP_SIZE |
>> + FALLOC_FL_PUNCH_HOLE,
>> + preq.sector_number << 9,
>> + preq.nr_sects << 9);
>> + else
>> + err = -EOPNOTSUPP;
>> + } else
>
> Are you not worried about doing this synchronously, i.e. blocking any
> other I/O going on for the device?
if the backend is a phy has trim, what we do is forward the trim,
and blkdev_issue_trim will alloc a bio and wait to finish,
sure it will block I/O, cause trim is a non-queue, no-merge op, and it
gonna stall the queue anyway.
if the backend is a file, we gonna punch a hole on the file to make
the fs release the blocks,
thus to make a "hole" inside the file, so the disk usage is reduced.
for hole punching, I don't think we can
make it async, correct me if am wrong.
>
>> + status = BLKIF_RSP_EOPNOTSUPP;
>> +
>> + if (err == -EOPNOTSUPP) {
>> + DPRINTK("blkback: discard op failed, "
>> + "not supported\n");
>> + status = BLKIF_RSP_EOPNOTSUPP;
>> + } else if (err)
>> + status = BLKIF_RSP_ERROR;
>> +
>> + if (status == BLKIF_RSP_OKAY)
>> + blkif->st_tr_sect += preq.nr_sects;
>
> I don't think this is a particularly useful statistic.
>
>> + make_response(blkif, req->id, req->operation, status);
>> + xen_blkif_put(blkif);
>> + free_req(pending_req);
>> + return 0;
>> + }
>> }
>>
>> /*
>> diff --git a/drivers/block/xen-blkback/common.h
>> b/drivers/block/xen-blkback/common.h
>> index 9e40b28..1fef727 100644
>> --- a/drivers/block/xen-blkback/common.h
>> +++ b/drivers/block/xen-blkback/common.h
>> @@ -159,8 +159,10 @@ struct xen_blkif {
>> int st_wr_req;
>> int st_oo_req;
>> int st_f_req;
>> + int st_tr_req;
>> int st_rd_sect;
>> int st_wr_sect;
>> + int st_tr_sect;
>>
>> wait_queue_head_t waiting_to_free;
>>
>> @@ -182,7 +184,7 @@ struct xen_blkif {
>>
>> struct phys_req {
>> unsigned short dev;
>> - unsigned short nr_sects;
>> + blkif_sector_t nr_sects;
>> struct block_device *bdev;
>> blkif_sector_t sector_number;
>> };
>> diff --git a/drivers/block/xen-blkback/xenbus.c
>> b/drivers/block/xen-blkback/xenbus.c
>> index 3f129b4..05ea8e0 100644
>> --- a/drivers/block/xen-blkback/xenbus.c
>> +++ b/drivers/block/xen-blkback/xenbus.c
>> @@ -272,16 +272,20 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
>> VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
>> VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
>> VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req);
>> +VBD_SHOW(tr_req, "%d\n", be->blkif->st_tr_req);
>> VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
>> VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
>> +VBD_SHOW(tr_sect, "%d\n", be->blkif->st_tr_sect);
>>
>> static struct attribute *xen_vbdstat_attrs[] = {
>> &dev_attr_oo_req.attr,
>> &dev_attr_rd_req.attr,
>> &dev_attr_wr_req.attr,
>> &dev_attr_f_req.attr,
>> + &dev_attr_tr_req.attr,
>> &dev_attr_rd_sect.attr,
>> &dev_attr_wr_sect.attr,
>> + &dev_attr_tr_sect.attr,
>> NULL
>> };
>>
>> @@ -419,6 +423,59 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction
>> xbt,
>> return err;
>> }
>>
>> +int xen_blkbk_trim(struct xenbus_transaction xbt, struct backend_info *be)
>> +{
>> + struct xenbus_device *dev = be->dev;
>> + struct xen_vbd *vbd = &be->blkif->vbd;
>> + char *type;
>> + int err;
>> + int state = 0;
>> +
>> + type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
>> + if (!IS_ERR(type)) {
>> + if (strcmp(type, "file") == 0)
>> + state = 1;
>> + vbd->type |= VDISK_FILE_BACKEND;
>
> Missing { and }.
>
> Jan
>
>> + if (strcmp(type, "phy") == 0) {
>> + struct block_device *bdev = be->blkif->vbd.bdev;
>> + struct request_queue *q = bdev_get_queue(bdev);
>> + if (blk_queue_discard(q)) {
>> + err = xenbus_printf(xbt, dev->nodename,
>> + "discard_granularity", "%u",
>> + q->limits.discard_granularity);
>> + if (err) {
>> + xenbus_dev_fatal(dev, err,
>> + "writing discard_granularity");
>> + goto kfree;
>> + }
>> + err = xenbus_printf(xbt, dev->nodename,
>> + "discard_alignment", "%u",
>> + q->limits.discard_alignment);
>> + if (err) {
>> + xenbus_dev_fatal(dev, err,
>> + "writing discard_alignment");
>> + goto kfree;
>> + }
>> + state = 1;
>> + vbd->type |= VDISK_PHY_BACKEND;
>> + }
>> + }
>> + } else {
>> + err = PTR_ERR(type);
>> + xenbus_dev_fatal(dev, err, "reading type");
>> + goto out;
>> + }
>> +
>> + err = xenbus_printf(xbt, dev->nodename, "feature-trim",
>> + "%d", state);
>> + if (err)
>> + xenbus_dev_fatal(dev, err, "writing feature-trim");
>> +kfree:
>> + kfree(type);
>> +out:
>> + return err;
>> +}
>> +
>> /*
>> * Entry point to this code when a new device is created. Allocate the
>> basic
>> * structures, and watch the store waiting for the hotplug scripts to tell
>> us
>> @@ -650,6 +707,10 @@ again:
>> if (err)
>> goto abort;
>>
>> + err = xen_blkbk_trim(xbt, be);
>> + if (err)
>> + goto abort;
>> +
>> err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
>> (unsigned long long)vbd_sz(&be->blkif->vbd));
>> if (err) {
>
>
>
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|