* Re: <subject missing #2>
[not found] <6688c4e4.427@victor.provo.novell.com>
@ 2011-08-18 10:18 ` Jan Beulich
[not found] ` <4E4D02F80200007800051CB6@victor.provo.novell.com>
1 sibling, 0 replies; 3+ messages in thread
From: Jan Beulich @ 2011-08-18 10:18 UTC (permalink / raw)
To: xen-devel, Dong Yang Li; +Cc: owen.smith
>>> On 18.08.11 at 11:35, Li Dongyang <lidongyang@novell.com> wrote:
> JBeulich@novell.com
> Subject: [PATCH V2 2/3] xen-blkfront: teach blkfront driver handle trim
> request
> Date: Thu, 18 Aug 2011 17:34:30 +0800
> Message-Id: <1313660071-25230-3-git-send-email-lidongyang@novell.com>
> X-Mailer: git-send-email 1.7.6
> In-Reply-To: <1313660071-25230-1-git-send-email-lidongyang@novell.com>
> References: <1313660071-25230-1-git-send-email-lidongyang@novell.com>
>
> The blkfront driver now will read feature-trim from xenstore,
> and set up the request queue with trim params, then we can forward the
> discard requests to backend driver.
>
> Signed-off-by: Li Dongyang <lidongyang@novell.com>
> ---
> drivers/block/xen-blkfront.c | 96 ++++++++++++++++++++++++++++++++----------
> 1 files changed, 73 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index b536a9c..c22fb07 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -98,6 +98,9 @@ struct blkfront_info
> unsigned long shadow_free;
> unsigned int feature_flush;
> unsigned int flush_op;
> + unsigned int feature_trim;
> + unsigned int discard_granularity;
> + unsigned int discard_alignment;
> int is_ready;
> };
>
> @@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req)
> ring_req->operation = info->flush_op;
> }
>
> - ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
> - BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
> + if (unlikely(req->cmd_flags & REQ_DISCARD)) {
> + /* id, sector_number and handle are set above. */
> + ring_req->operation = BLKIF_OP_TRIM;
> + ring_req->nr_segments = 0;
> + ring_req->u.trim.nr_sectors = blk_rq_sectors(req);
> + } else {
> + ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
> + BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
>
> - for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
> - buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
> - fsect = sg->offset >> 9;
> - lsect = fsect + (sg->length >> 9) - 1;
> - /* install a grant reference. */
> - ref = gnttab_claim_grant_reference(&gref_head);
> - BUG_ON(ref == -ENOSPC);
> + for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
> + buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
> + fsect = sg->offset >> 9;
> + lsect = fsect + (sg->length >> 9) - 1;
> + /* install a grant reference. */
> + ref = gnttab_claim_grant_reference(&gref_head);
> + BUG_ON(ref == -ENOSPC);
>
> - gnttab_grant_foreign_access_ref(
> - ref,
> - info->xbdev->otherend_id,
> - buffer_mfn,
> - rq_data_dir(req) );
> -
> - info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
> - ring_req->u.rw.seg[i] =
> - (struct blkif_request_segment) {
> - .gref = ref,
> - .first_sect = fsect,
> - .last_sect = lsect };
> + gnttab_grant_foreign_access_ref(
> + ref,
> + info->xbdev->otherend_id,
> + buffer_mfn,
> + rq_data_dir(req));
> +
> + info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
> + ring_req->u.rw.seg[i] =
> + (struct blkif_request_segment) {
> + .gref = ref,
> + .first_sect = fsect,
> + .last_sect = lsect };
> + }
> }
>
> info->ring.req_prod_pvt++;
> @@ -399,6 +409,7 @@ wait:
> static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
> {
> struct request_queue *rq;
> + struct blkfront_info *info = gd->private_data;
>
> rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
> if (rq == NULL)
> @@ -406,6 +417,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16
> sector_size)
>
> queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
>
> + if (info->feature_trim) {
> + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
> + blk_queue_max_discard_sectors(rq, get_capacity(gd));
> + rq->limits.discard_granularity = info->discard_granularity;
> + rq->limits.discard_alignment = info->discard_alignment;
> + }
> +
> /* Hard sector size and max sectors impersonate the equiv. hardware. */
> blk_queue_logical_block_size(rq, sector_size);
> blk_queue_max_hw_sectors(rq, 512);
> @@ -722,6 +740,19 @@ static irqreturn_t blkif_interrupt(int irq, void
> *dev_id)
>
> error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
> switch (bret->operation) {
> + case BLKIF_OP_TRIM:
> + if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
> + struct request_queue *rq = info->rq;
> + printk(KERN_WARNING "blkfront: %s: trim op failed\n",
> + info->gd->disk_name);
> + error = -EOPNOTSUPP;
> + info->feature_trim = 0;
> + spin_lock_irq(rq->queue_lock);
> + queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
> + spin_unlock_irq(rq->queue_lock);
Are you sure you want to re-enable interrupts here unconditionally?
> + }
> + __blk_end_request_all(req, error);
> + break;
> case BLKIF_OP_FLUSH_DISKCACHE:
> case BLKIF_OP_WRITE_BARRIER:
> if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
> @@ -1108,7 +1139,9 @@ static void blkfront_connect(struct blkfront_info
> *info)
> unsigned long sector_size;
> unsigned int binfo;
> int err;
> - int barrier, flush;
> + unsigned int discard_granularity;
> + unsigned int discard_alignment;
> + int barrier, flush, trim;
>
> switch (info->connected) {
> case BLKIF_STATE_CONNECTED:
> @@ -1178,7 +1211,24 @@ static void blkfront_connect(struct blkfront_info
> *info)
> info->feature_flush = REQ_FLUSH;
> info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
> }
> -
> +
> + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
> + "feature-trim", "%d", &trim,
> + NULL);
So you switched this to use "trim", but ...
> +
> + if (!err && trim) {
> + info->feature_trim = 1;
> + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
> + "discard_granularity", "%u", &discard_granularity,
> + "discard_alignment", "%u", &discard_alignment,
... you kept these using "discard" - quite inconsistent.
Also, I think (but I may be wrong) that dashes are preferred over
underscores in xenstore node names.
> + NULL);
> + if (!err) {
> + info->discard_granularity = discard_granularity;
> + info->discard_alignment = discard_alignment;
I think you should set info->feature_trim only here (and then you can
eliminate the local variables).
Jan
> + }
> + } else
> + info->feature_trim = 0;
> +
> err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
> if (err) {
> xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: <subject missing #2>
[not found] ` <4E4D02F80200007800051CB6@victor.provo.novell.com>
@ 2011-08-22 9:19 ` Li Dongyang
2011-08-22 9:55 ` [PATCH V2 2/3] xen-blkfront: teach blkfront driver handle trim request Jan Beulich
0 siblings, 1 reply; 3+ messages in thread
From: Li Dongyang @ 2011-08-22 9:19 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel, owen.smith
On Thu, Aug 18, 2011 at 6:18 PM, Jan Beulich <JBeulich@novell.com> wrote:
> >>> On 18.08.11 at 11:35, Li Dongyang <lidongyang@novell.com> wrote:
>> JBeulich@novell.com
>> Subject: [PATCH V2 2/3] xen-blkfront: teach blkfront driver handle trim
>> request
>> Date: Thu, 18 Aug 2011 17:34:30 +0800
>> Message-Id: <1313660071-25230-3-git-send-email-lidongyang@novell.com>
>> X-Mailer: git-send-email 1.7.6
>> In-Reply-To: <1313660071-25230-1-git-send-email-lidongyang@novell.com>
>> References: <1313660071-25230-1-git-send-email-lidongyang@novell.com>
>>
>> The blkfront driver now will read feature-trim from xenstore,
>> and set up the request queue with trim params, then we can forward the
>> discard requests to backend driver.
>>
>> Signed-off-by: Li Dongyang <lidongyang@novell.com>
>> ---
>> drivers/block/xen-blkfront.c | 96 ++++++++++++++++++++++++++++++++----------
>> 1 files changed, 73 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index b536a9c..c22fb07 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -98,6 +98,9 @@ struct blkfront_info
>> unsigned long shadow_free;
>> unsigned int feature_flush;
>> unsigned int flush_op;
>> + unsigned int feature_trim;
>> + unsigned int discard_granularity;
>> + unsigned int discard_alignment;
>> int is_ready;
>> };
>>
>> @@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req)
>> ring_req->operation = info->flush_op;
>> }
>>
>> - ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
>> - BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
>> + if (unlikely(req->cmd_flags & REQ_DISCARD)) {
>> + /* id, sector_number and handle are set above. */
>> + ring_req->operation = BLKIF_OP_TRIM;
>> + ring_req->nr_segments = 0;
>> + ring_req->u.trim.nr_sectors = blk_rq_sectors(req);
>> + } else {
>> + ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
>> + BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
>>
>> - for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
>> - buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
>> - fsect = sg->offset >> 9;
>> - lsect = fsect + (sg->length >> 9) - 1;
>> - /* install a grant reference. */
>> - ref = gnttab_claim_grant_reference(&gref_head);
>> - BUG_ON(ref == -ENOSPC);
>> + for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
>> + buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
>> + fsect = sg->offset >> 9;
>> + lsect = fsect + (sg->length >> 9) - 1;
>> + /* install a grant reference. */
>> + ref = gnttab_claim_grant_reference(&gref_head);
>> + BUG_ON(ref == -ENOSPC);
>>
>> - gnttab_grant_foreign_access_ref(
>> - ref,
>> - info->xbdev->otherend_id,
>> - buffer_mfn,
>> - rq_data_dir(req) );
>> -
>> - info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
>> - ring_req->u.rw.seg[i] =
>> - (struct blkif_request_segment) {
>> - .gref = ref,
>> - .first_sect = fsect,
>> - .last_sect = lsect };
>> + gnttab_grant_foreign_access_ref(
>> + ref,
>> + info->xbdev->otherend_id,
>> + buffer_mfn,
>> + rq_data_dir(req));
>> +
>> + info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
>> + ring_req->u.rw.seg[i] =
>> + (struct blkif_request_segment) {
>> + .gref = ref,
>> + .first_sect = fsect,
>> + .last_sect = lsect };
>> + }
>> }
>>
>> info->ring.req_prod_pvt++;
>> @@ -399,6 +409,7 @@ wait:
>> static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
>> {
>> struct request_queue *rq;
>> + struct blkfront_info *info = gd->private_data;
>>
>> rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
>> if (rq == NULL)
>> @@ -406,6 +417,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16
>> sector_size)
>>
>> queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
>>
>> + if (info->feature_trim) {
>> + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
>> + blk_queue_max_discard_sectors(rq, get_capacity(gd));
>> + rq->limits.discard_granularity = info->discard_granularity;
>> + rq->limits.discard_alignment = info->discard_alignment;
>> + }
>> +
>> /* Hard sector size and max sectors impersonate the equiv. hardware. */
>> blk_queue_logical_block_size(rq, sector_size);
>> blk_queue_max_hw_sectors(rq, 512);
>> @@ -722,6 +740,19 @@ static irqreturn_t blkif_interrupt(int irq, void
>> *dev_id)
>>
>> error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
>> switch (bret->operation) {
>> + case BLKIF_OP_TRIM:
>> + if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
>> + struct request_queue *rq = info->rq;
>> + printk(KERN_WARNING "blkfront: %s: trim op failed\n",
>> + info->gd->disk_name);
>> + error = -EOPNOTSUPP;
>> + info->feature_trim = 0;
>> + spin_lock_irq(rq->queue_lock);
>> + queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
>> + spin_unlock_irq(rq->queue_lock);
>
> Are you sure you want to re-enable interrupts here unconditionally?
hm, you're right, as we called spin_lock_irqsave() above, I think it's
safe for us
to just use spin_lock and spin_unlock here.
>
>> + }
>> + __blk_end_request_all(req, error);
>> + break;
>> case BLKIF_OP_FLUSH_DISKCACHE:
>> case BLKIF_OP_WRITE_BARRIER:
>> if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
>> @@ -1108,7 +1139,9 @@ static void blkfront_connect(struct blkfront_info
>> *info)
>> unsigned long sector_size;
>> unsigned int binfo;
>> int err;
>> - int barrier, flush;
>> + unsigned int discard_granularity;
>> + unsigned int discard_alignment;
>> + int barrier, flush, trim;
>>
>> switch (info->connected) {
>> case BLKIF_STATE_CONNECTED:
>> @@ -1178,7 +1211,24 @@ static void blkfront_connect(struct blkfront_info
>> *info)
>> info->feature_flush = REQ_FLUSH;
>> info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
>> }
>> -
>> +
>> + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
>> + "feature-trim", "%d", &trim,
>> + NULL);
>
> So you switched this to use "trim", but ...
>
>> +
>> + if (!err && trim) {
>> + info->feature_trim = 1;
>> + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
>> + "discard_granularity", "%u", &discard_granularity,
>> + "discard_alignment", "%u", &discard_alignment,
>
> ... you kept these using "discard" - quite inconsistent.
the discard_granularity and discard_alignment are taken from struct queue_limits
they are needed to setup the queue in guest if we are using a phy
device has trim.
so I think we can keep the names here.
>
> Also, I think (but I may be wrong) that dashes are preferred over
> underscores in xenstore node names.
that could be done in xenstore, I used dashes because they are taken
from struct queue_limits
>
>> + NULL);
>> + if (!err) {
>> + info->discard_granularity = discard_granularity;
>> + info->discard_alignment = discard_alignment;
>
> I think you should set info->feature_trim only here (and then you can
> eliminate the local variables).
those discard_granularity and discard_alignment are only meaningful if
the backend is a phy device has trim,
and the 2 args are read from the queue limits from host. if the
backend is a file, we can go with no discard_granularity
and discard_alignment because we are about to punch a hole in the image file.
>
> Jan
>
>> + }
>> + } else
>> + info->feature_trim = 0;
>> +
>> err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
>> if (err) {
>> xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
>
>
>
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH V2 2/3] xen-blkfront: teach blkfront driver handle trim request
2011-08-22 9:19 ` Li Dongyang
@ 2011-08-22 9:55 ` Jan Beulich
0 siblings, 0 replies; 3+ messages in thread
From: Jan Beulich @ 2011-08-22 9:55 UTC (permalink / raw)
To: Li Dongyang; +Cc: xen-devel, owen.smith
>>> On 22.08.11 at 11:19, Li Dongyang <jerry87905@gmail.com> wrote:
> On Thu, Aug 18, 2011 at 6:18 PM, Jan Beulich <JBeulich@novell.com> wrote:
>> >>> On 18.08.11 at 11:35, Li Dongyang <lidongyang@novell.com> wrote:
>>> @@ -1178,7 +1211,24 @@ static void blkfront_connect(struct blkfront_info
>>> *info)
>>> info->feature_flush = REQ_FLUSH;
>>> info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
>>> }
>>> -
>>> +
>>> + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
>>> + "feature-trim", "%d", &trim,
>>> + NULL);
>>
>> So you switched this to use "trim", but ...
>>
>>> +
>>> + if (!err && trim) {
>>> + info->feature_trim = 1;
>>> + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
>>> + "discard_granularity", "%u", &discard_granularity,
>>> + "discard_alignment", "%u", &discard_alignment,
>>
>> ... you kept these using "discard" - quite inconsistent.
> the discard_granularity and discard_alignment are taken from struct
> queue_limits
> they are needed to setup the queue in guest if we are using a phy
> device has trim.
> so I think we can keep the names here.
The way Linux names them doesn't matter for the xenstore interface.
I think they should be named so that the connection to the base
node's name is obvious.
>>
>> Also, I think (but I may be wrong) that dashes are preferred over
>> underscores in xenstore node names.
> that could be done in xenstore, I used dashes because they are taken
> from struct queue_limits
>>
>>> + NULL);
>>> + if (!err) {
>>> + info->discard_granularity = discard_granularity;
>>> + info->discard_alignment = discard_alignment;
>>
>> I think you should set info->feature_trim only here (and then you can
>> eliminate the local variables).
> those discard_granularity and discard_alignment are only meaningful if
> the backend is a phy device has trim,
> and the 2 args are read from the queue limits from host. if the
> backend is a file, we can go with no discard_granularity
> and discard_alignment because we are about to punch a hole in the image
> file.
Then you should update the condition accordingly. Setting
info->feature_trim prematurely is just calling for later problems.
Jan
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2011-08-22 9:55 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <6688c4e4.427@victor.provo.novell.com>
2011-08-18 10:18 ` <subject missing #2> Jan Beulich
[not found] ` <4E4D02F80200007800051CB6@victor.provo.novell.com>
2011-08-22 9:19 ` Li Dongyang
2011-08-22 9:55 ` [PATCH V2 2/3] xen-blkfront: teach blkfront driver handle trim request Jan Beulich
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).