public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
@ 2014-05-09 12:07 Shaohua Li
  2014-05-09 15:00 ` Christoph Hellwig
  0 siblings, 1 reply; 35+ messages in thread
From: Shaohua Li @ 2014-05-09 12:07 UTC (permalink / raw)
  To: linux-kernel; +Cc: axboe, hch


flush request is special, which borrows tag from other request. Need a special
handling to get it from tag.

Signed-off-by: Shaohua Li <shli@fusionio.com>
---
 block/blk-flush.c                 |    4 +++-
 block/blk-mq.c                    |   13 +++++++++++--
 drivers/block/mtip32xx/mtip32xx.c |    2 +-
 include/linux/blk-mq.h            |    3 ++-
 include/linux/blkdev.h            |    1 +
 5 files changed, 18 insertions(+), 5 deletions(-)

Index: linux/block/blk-flush.c
===================================================================
--- linux.orig/block/blk-flush.c	2014-05-09 19:52:25.360415014 +0800
+++ linux/block/blk-flush.c	2014-05-09 19:53:59.795228450 +0800
@@ -231,8 +231,10 @@ static void flush_end_io(struct request
 	struct request *rq, *n;
 	unsigned long flags = 0;
 
-	if (q->mq_ops)
+	if (q->mq_ops) {
 		spin_lock_irqsave(&q->mq_flush_lock, flags);
+		q->flush_rq_tag_valid = false;
+	}
 
 	running = &q->flush_queue[q->flush_running_idx];
 	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
Index: linux/block/blk-mq.c
===================================================================
--- linux.orig/block/blk-mq.c	2014-05-09 19:52:25.360415014 +0800
+++ linux/block/blk-mq.c	2014-05-09 19:57:58.432227506 +0800
@@ -336,6 +336,8 @@ void blk_mq_clone_flush_request(struct r
 
 	flush_rq->mq_ctx = orig_rq->mq_ctx;
 	flush_rq->tag = orig_rq->tag;
+	smp_wmb();
+	orig_rq->q->flush_rq_tag_valid = true;
 	memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
 		hctx->cmd_size);
 }
@@ -481,8 +483,14 @@ void blk_mq_requeue_request(struct reque
 }
 EXPORT_SYMBOL(blk_mq_requeue_request);
 
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
+struct request *blk_mq_tag_to_rq(struct request_queue *q,
+	struct blk_mq_tags *tags, unsigned int tag)
 {
+	if (q->flush_rq_tag_valid) {
+		smp_rmb();
+		if (q->flush_rq->tag == tag)
+			return q->flush_rq;
+	}
 	return tags->rqs[tag];
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
@@ -512,7 +520,7 @@ static void blk_mq_timeout_check(void *_
 		if (tag >= hctx->tags->nr_tags)
 			break;
 
-		rq = blk_mq_tag_to_rq(hctx->tags, tag++);
+		rq = blk_mq_tag_to_rq(hctx->queue, hctx->tags, tag++);
 		if (rq->q != hctx->queue)
 			continue;
 		if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
@@ -1510,6 +1518,7 @@ struct request_queue *blk_mq_init_queue(
 				GFP_KERNEL);
 	if (!q->flush_rq)
 		goto err_hw;
+	q->flush_rq_tag_valid = false;
 
 	if (blk_mq_init_hw_queues(q, set))
 		goto err_flush_rq;
Index: linux/include/linux/blk-mq.h
===================================================================
--- linux.orig/include/linux/blk-mq.h	2014-05-09 19:52:25.360415014 +0800
+++ linux/include/linux/blk-mq.h	2014-05-09 19:52:25.352415140 +0800
@@ -149,7 +149,8 @@ void blk_mq_free_request(struct request
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp);
 struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp);
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
+struct request *blk_mq_tag_to_rq(struct request_queue *q,
+	struct blk_mq_tags *tags, unsigned int tag);
 
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int);
Index: linux/drivers/block/mtip32xx/mtip32xx.c
===================================================================
--- linux.orig/drivers/block/mtip32xx/mtip32xx.c	2014-05-09 19:52:25.360415014 +0800
+++ linux/drivers/block/mtip32xx/mtip32xx.c	2014-05-09 19:52:25.352415140 +0800
@@ -195,7 +195,7 @@ static struct request *mtip_rq_from_tag(
 {
 	struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0];
 
-	return blk_mq_tag_to_rq(hctx->tags, tag);
+	return blk_mq_tag_to_rq(dd->queue, hctx->tags, tag);
 }
 
 static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,
Index: linux/include/linux/blkdev.h
===================================================================
--- linux.orig/include/linux/blkdev.h	2014-05-09 19:42:28.000000000 +0800
+++ linux/include/linux/blkdev.h	2014-05-09 19:53:07.591883317 +0800
@@ -461,6 +461,7 @@ struct request_queue {
 	struct list_head	flush_queue[2];
 	struct list_head	flush_data_in_flight;
 	struct request		*flush_rq;
+	bool			flush_rq_tag_valid;
 	spinlock_t		mq_flush_lock;
 
 	struct mutex		sysfs_lock;

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-05-09 12:07 [patch]blk-mq: blk_mq_tag_to_rq should handle flush request Shaohua Li
@ 2014-05-09 15:00 ` Christoph Hellwig
  2014-05-10  4:00   ` Shaohua Li
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2014-05-09 15:00 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-kernel, axboe, hch

On Fri, May 09, 2014 at 08:07:33PM +0800, Shaohua Li wrote:
> 
> flush request is special, which borrows tag from other request. Need a special
> handling to get it from tag.

Thanks, we probably need this one.  But I think you can simply test
REQ_FLUSH_SEQ the passed in request instead of the flush_rq_tag_valid
flag/


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-05-09 15:00 ` Christoph Hellwig
@ 2014-05-10  4:00   ` Shaohua Li
  2014-05-11 17:40     ` Jens Axboe
  2014-05-30 14:09     ` Jens Axboe
  0 siblings, 2 replies; 35+ messages in thread
From: Shaohua Li @ 2014-05-10  4:00 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel, axboe

On Fri, May 09, 2014 at 08:00:18AM -0700, Christoph Hellwig wrote:
> On Fri, May 09, 2014 at 08:07:33PM +0800, Shaohua Li wrote:
> > 
> > flush request is special, which borrows tag from other request. Need a special
> > handling to get it from tag.
> 
> Thanks, we probably need this one.  But I think you can simply test
> REQ_FLUSH_SEQ the passed in request instead of the flush_rq_tag_valid
> flag/

fair enough. updated one.

 
Subject: blk-mq: blk_mq_tag_to_rq should handle flush request

flush request is special, which borrows tag from other request. Need a special
handling to get it from tag.

Signed-off-by: Shaohua Li <shli@fusionio.com>
---
 block/blk-flush.c                 |    4 +++-
 block/blk-mq.c                    |    8 ++++++--
 drivers/block/mtip32xx/mtip32xx.c |    2 +-
 include/linux/blk-mq.h            |    3 ++-
 4 files changed, 12 insertions(+), 5 deletions(-)

Index: linux/block/blk-flush.c
===================================================================
--- linux.orig/block/blk-flush.c	2014-05-10 11:49:21.646830000 +0800
+++ linux/block/blk-flush.c	2014-05-10 11:49:21.638830100 +0800
@@ -231,8 +231,10 @@ static void flush_end_io(struct request
 	struct request *rq, *n;
 	unsigned long flags = 0;
 
-	if (q->mq_ops)
+	if (q->mq_ops) {
 		spin_lock_irqsave(&q->mq_flush_lock, flags);
+		q->flush_rq->cmd_flags = 0;
+	}
 
 	running = &q->flush_queue[q->flush_running_idx];
 	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
Index: linux/block/blk-mq.c
===================================================================
--- linux.orig/block/blk-mq.c	2014-05-10 11:49:21.646830000 +0800
+++ linux/block/blk-mq.c	2014-05-10 11:49:21.638830100 +0800
@@ -481,8 +481,12 @@ void blk_mq_requeue_request(struct reque
 }
 EXPORT_SYMBOL(blk_mq_requeue_request);
 
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
+struct request *blk_mq_tag_to_rq(struct request_queue *q,
+	struct blk_mq_tags *tags, unsigned int tag)
 {
+	if ((q->flush_rq->cmd_flags & REQ_FLUSH_SEQ) &&
+	    q->flush_rq->tag == tag)
+		return q->flush_rq;
 	return tags->rqs[tag];
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
@@ -512,7 +516,7 @@ static void blk_mq_timeout_check(void *_
 		if (tag >= hctx->tags->nr_tags)
 			break;
 
-		rq = blk_mq_tag_to_rq(hctx->tags, tag++);
+		rq = blk_mq_tag_to_rq(hctx->queue, hctx->tags, tag++);
 		if (rq->q != hctx->queue)
 			continue;
 		if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
Index: linux/include/linux/blk-mq.h
===================================================================
--- linux.orig/include/linux/blk-mq.h	2014-05-10 11:49:21.646830000 +0800
+++ linux/include/linux/blk-mq.h	2014-05-10 11:49:21.638830100 +0800
@@ -149,7 +149,8 @@ void blk_mq_free_request(struct request
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp);
 struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp);
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
+struct request *blk_mq_tag_to_rq(struct request_queue *q,
+	struct blk_mq_tags *tags, unsigned int tag);
 
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int);
Index: linux/drivers/block/mtip32xx/mtip32xx.c
===================================================================
--- linux.orig/drivers/block/mtip32xx/mtip32xx.c	2014-05-10 11:49:21.646830000 +0800
+++ linux/drivers/block/mtip32xx/mtip32xx.c	2014-05-10 11:49:21.638830100 +0800
@@ -195,7 +195,7 @@ static struct request *mtip_rq_from_tag(
 {
 	struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0];
 
-	return blk_mq_tag_to_rq(hctx->tags, tag);
+	return blk_mq_tag_to_rq(dd->queue, hctx->tags, tag);
 }
 
 static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-05-10  4:00   ` Shaohua Li
@ 2014-05-11 17:40     ` Jens Axboe
  2014-05-30 14:09     ` Jens Axboe
  1 sibling, 0 replies; 35+ messages in thread
From: Jens Axboe @ 2014-05-11 17:40 UTC (permalink / raw)
  To: Shaohua Li, Christoph Hellwig; +Cc: linux-kernel

On 05/09/2014 10:00 PM, Shaohua Li wrote:
> On Fri, May 09, 2014 at 08:00:18AM -0700, Christoph Hellwig wrote:
>> On Fri, May 09, 2014 at 08:07:33PM +0800, Shaohua Li wrote:
>>>
>>> flush request is special, which borrows tag from other request. Need a special
>>> handling to get it from tag.
>>
>> Thanks, we probably need this one.  But I think you can simply test
>> REQ_FLUSH_SEQ the passed in request instead of the flush_rq_tag_valid
>> flag/
> 
> fair enough. updated one.
> 
>  
> Subject: blk-mq: blk_mq_tag_to_rq should handle flush request
> 
> flush request is special, which borrows tag from other request. Need a special
> handling to get it from tag.

This looks good, applied, thanks!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-05-10  4:00   ` Shaohua Li
  2014-05-11 17:40     ` Jens Axboe
@ 2014-05-30 14:09     ` Jens Axboe
  2014-06-04 11:11       ` Christoph Hellwig
  1 sibling, 1 reply; 35+ messages in thread
From: Jens Axboe @ 2014-05-30 14:09 UTC (permalink / raw)
  To: Shaohua Li, Christoph Hellwig; +Cc: linux-kernel

On 2014-05-09 22:00, Shaohua Li wrote:
> On Fri, May 09, 2014 at 08:00:18AM -0700, Christoph Hellwig wrote:
>> On Fri, May 09, 2014 at 08:07:33PM +0800, Shaohua Li wrote:
>>>
>>> flush request is special, which borrows tag from other request. Need a special
>>> handling to get it from tag.
>>
>> Thanks, we probably need this one.  But I think you can simply test
>> REQ_FLUSH_SEQ the passed in request instead of the flush_rq_tag_valid
>> flag/
>
> fair enough. updated one.
>
>
> Subject: blk-mq: blk_mq_tag_to_rq should handle flush request
>
> flush request is special, which borrows tag from other request. Need a special
> handling to get it from tag.

I have applied this one, but it irks me a little bit since we have to 
touch q->flush_rq->stuff from a potential hot path. I haven't thought 
much about this yet, but it would be a lot better if we could fold in 
the flush tag somehow.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-05-30 14:09     ` Jens Axboe
@ 2014-06-04 11:11       ` Christoph Hellwig
  2014-06-04 14:15         ` Jens Axboe
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2014-06-04 11:11 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Shaohua Li, Christoph Hellwig, linux-kernel

On Fri, May 30, 2014 at 08:09:51AM -0600, Jens Axboe wrote:
> I have applied this one, but it irks me a little bit since we have
> to touch q->flush_rq->stuff from a potential hot path. I haven't
> thought much about this yet, but it would be a lot better if we
> could fold in the flush tag somehow.

This one also breaks scsi-mq by changing the first parameter of
blk_mq_tag_to_rq.  With host-wide tags we won't have a hardware context
easily available when looking for a request by tag.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 11:11       ` Christoph Hellwig
@ 2014-06-04 14:15         ` Jens Axboe
  2014-06-04 14:20           ` Christoph Hellwig
  0 siblings, 1 reply; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 14:15 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel

On 2014-06-04 05:11, Christoph Hellwig wrote:
> On Fri, May 30, 2014 at 08:09:51AM -0600, Jens Axboe wrote:
>> I have applied this one, but it irks me a little bit since we have
>> to touch q->flush_rq->stuff from a potential hot path. I haven't
>> thought much about this yet, but it would be a lot better if we
>> could fold in the flush tag somehow.
>
> This one also breaks scsi-mq by changing the first parameter of
> blk_mq_tag_to_rq.  With host-wide tags we won't have a hardware context
> easily available when looking for a request by tag.

Alright, I'll revert it to pass in the tags instead.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 14:15         ` Jens Axboe
@ 2014-06-04 14:20           ` Christoph Hellwig
  2014-06-04 14:54             ` Jens Axboe
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2014-06-04 14:20 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, Shaohua Li, linux-kernel

On Wed, Jun 04, 2014 at 08:15:45AM -0600, Jens Axboe wrote:
> Alright, I'll revert it to pass in the tags instead.

It's not as simple as the added code wants to get a queue from the
hwctx, which we can't get at.  I was planning to look into this, but
there are various other regressions in the recent block updates that I
need to fix before I can even test a tree with this one reverted.

If you can get to sorting this out soon I'd love you to handle it,
otherwise I'll look into it as soon as I can.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 14:20           ` Christoph Hellwig
@ 2014-06-04 14:54             ` Jens Axboe
  2014-06-04 14:58               ` Christoph Hellwig
  0 siblings, 1 reply; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 14:54 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel

On 2014-06-04 08:20, Christoph Hellwig wrote:
> On Wed, Jun 04, 2014 at 08:15:45AM -0600, Jens Axboe wrote:
>> Alright, I'll revert it to pass in the tags instead.
>
> It's not as simple as the added code wants to get a queue from the
> hwctx, which we can't get at.  I was planning to look into this, but
> there are various other regressions in the recent block updates that I
> need to fix before I can even test a tree with this one reverted.

Which regressions? Performance or crashes?

> If you can get to sorting this out soon I'd love you to handle it,
> otherwise I'll look into it as soon as I can.

Just took a look at it, but I don't see the problematic path. I'm 
looking at wip-9.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 14:54             ` Jens Axboe
@ 2014-06-04 14:58               ` Christoph Hellwig
  2014-06-04 15:02                 ` Jens Axboe
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2014-06-04 14:58 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, Shaohua Li, linux-kernel

On Wed, Jun 04, 2014 at 08:54:23AM -0600, Jens Axboe wrote:
> >It's not as simple as the added code wants to get a queue from the
> >hwctx, which we can't get at.  I was planning to look into this, but
> >there are various other regressions in the recent block updates that I
> >need to fix before I can even test a tree with this one reverted.
> 
> Which regressions? Performance or crashes?

Both.  I've tracked down the SCSI boot crash and you'll have a patch for
that soon, still working on bisecting the performance crawl, but I'm
getting close.

> >If you can get to sorting this out soon I'd love you to handle it,
> >otherwise I'll look into it as soon as I can.
> 
> Just took a look at it, but I don't see the problematic path. I'm
> looking at wip-9.

scsi_mq_find_tag only gets the scsi host, which may have multiple
queues.  When called from scsi_find_tag we actually have a scsi device,
so that's not an issue, but when called from scsi_host_find_tag the
driver only provides the host.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 14:58               ` Christoph Hellwig
@ 2014-06-04 15:02                 ` Jens Axboe
  2014-06-04 15:05                   ` Christoph Hellwig
  2014-06-04 15:31                   ` Christoph Hellwig
  0 siblings, 2 replies; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 15:02 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel

On 2014-06-04 08:58, Christoph Hellwig wrote:
> On Wed, Jun 04, 2014 at 08:54:23AM -0600, Jens Axboe wrote:
>>> It's not as simple as the added code wants to get a queue from the
>>> hwctx, which we can't get at.  I was planning to look into this, but
>>> there are various other regressions in the recent block updates that I
>>> need to fix before I can even test a tree with this one reverted.
>>
>> Which regressions? Performance or crashes?
>
> Both.  I've tracked down the SCSI boot crash and you'll have a patch for
> that soon, still working on bisecting the performance crawl, but I'm
> getting close.

OK strange, there hasn't been that much churn since the last rebase. In 
my for-linus, there's a patch for a single queue crash, but that should 
just hit for the removal case. And then there's the atomic schedule 
patch, but that issue was actually in the code base for about a month, 
so not a new one either.

>>> If you can get to sorting this out soon I'd love you to handle it,
>>> otherwise I'll look into it as soon as I can.
>>
>> Just took a look at it, but I don't see the problematic path. I'm
>> looking at wip-9.
>
> scsi_mq_find_tag only gets the scsi host, which may have multiple
> queues.  When called from scsi_find_tag we actually have a scsi device,
> so that's not an issue, but when called from scsi_host_find_tag the
> driver only provides the host.

Only solution I see right now is to have the flush_rq in the shared 
tags, but that would potentially be a regression for multiple devices 
and heavy flush uses cases. I'll see if I can come up with something 
better, or maybe Shaohua has an idea.


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:02                 ` Jens Axboe
@ 2014-06-04 15:05                   ` Christoph Hellwig
  2014-06-04 15:08                     ` Jens Axboe
  2014-06-04 15:31                   ` Christoph Hellwig
  1 sibling, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2014-06-04 15:05 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, Shaohua Li, linux-kernel

On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
> OK strange, there hasn't been that much churn since the last rebase.
> In my for-linus, there's a patch for a single queue crash, but that
> should just hit for the removal case. And then there's the atomic
> schedule patch, but that issue was actually in the code base for
> about a month, so not a new one either.

You're request initializaion optimization doesn't set up req->cmd and
thus causes all BLOCK_PC I/O (including the SCSI LUN scan) to crash and
burn.  The trivial fix is on your way.

The performance regression is caused by "blk-mq: avoid code
duplication", but I don't really understand why yet.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:05                   ` Christoph Hellwig
@ 2014-06-04 15:08                     ` Jens Axboe
  2014-06-04 15:10                       ` Christoph Hellwig
  2014-06-04 15:22                       ` Christoph Hellwig
  0 siblings, 2 replies; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 15:08 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel

On 2014-06-04 09:05, Christoph Hellwig wrote:
> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>> OK strange, there hasn't been that much churn since the last rebase.
>> In my for-linus, there's a patch for a single queue crash, but that
>> should just hit for the removal case. And then there's the atomic
>> schedule patch, but that issue was actually in the code base for
>> about a month, so not a new one either.
>
> You're request initializaion optimization doesn't set up req->cmd and
> thus causes all BLOCK_PC I/O (including the SCSI LUN scan) to crash and
> burn.  The trivial fix is on your way.

OK. That'll arguably be a good cleanup as well, getting that initialized 
in the right place. I hate the 'lets clear all the memory' part of rq 
init, it's not super cheap to do.

> The performance regression is caused by "blk-mq: avoid code
> duplication", but I don't really understand why yet.

Gah, looks like this one dropped the tag idling. I bet that is why.


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:08                     ` Jens Axboe
@ 2014-06-04 15:10                       ` Christoph Hellwig
  2014-06-04 15:11                         ` Jens Axboe
  2014-06-04 15:22                       ` Christoph Hellwig
  1 sibling, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2014-06-04 15:10 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, Shaohua Li, linux-kernel

On Wed, Jun 04, 2014 at 09:08:12AM -0600, Jens Axboe wrote:
> >The performance regression is caused by "blk-mq: avoid code
> >duplication", but I don't really understand why yet.
> 
> Gah, looks like this one dropped the tag idling. I bet that is why.

Ah, thanks.  It's indeed exactly the same sort of slow down I bisected
back then, this should have rung a bell instead of keeping me occupied
for hours..


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:10                       ` Christoph Hellwig
@ 2014-06-04 15:11                         ` Jens Axboe
  2014-06-04 15:16                           ` Christoph Hellwig
  0 siblings, 1 reply; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 15:11 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel

On 2014-06-04 09:10, Christoph Hellwig wrote:
> On Wed, Jun 04, 2014 at 09:08:12AM -0600, Jens Axboe wrote:
>>> The performance regression is caused by "blk-mq: avoid code
>>> duplication", but I don't really understand why yet.
>>
>> Gah, looks like this one dropped the tag idling. I bet that is why.
>
> Ah, thanks.  It's indeed exactly the same sort of slow down I bisected
> back then, this should have rung a bell instead of keeping me occupied
> for hours..

Patch is committed :-)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:11                         ` Jens Axboe
@ 2014-06-04 15:16                           ` Christoph Hellwig
  2014-06-04 15:19                             ` Jens Axboe
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2014-06-04 15:16 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, Shaohua Li, linux-kernel

On Wed, Jun 04, 2014 at 09:11:47AM -0600, Jens Axboe wrote:
> On 2014-06-04 09:10, Christoph Hellwig wrote:
> >On Wed, Jun 04, 2014 at 09:08:12AM -0600, Jens Axboe wrote:
> >>>The performance regression is caused by "blk-mq: avoid code
> >>>duplication", but I don't really understand why yet.
> >>
> >>Gah, looks like this one dropped the tag idling. I bet that is why.
> >
> >Ah, thanks.  It's indeed exactly the same sort of slow down I bisected
> >back then, this should have rung a bell instead of keeping me occupied
> >for hours..
> 
> Patch is committed :-)

Oh well, I had just prepared the same one for you, but I want to at
least do basic sanity checking of my rebased tree with the fixes applied
now that I'm done bisecting.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:16                           ` Christoph Hellwig
@ 2014-06-04 15:19                             ` Jens Axboe
  0 siblings, 0 replies; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 15:19 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel

On 2014-06-04 09:16, Christoph Hellwig wrote:
> On Wed, Jun 04, 2014 at 09:11:47AM -0600, Jens Axboe wrote:
>> On 2014-06-04 09:10, Christoph Hellwig wrote:
>>> On Wed, Jun 04, 2014 at 09:08:12AM -0600, Jens Axboe wrote:
>>>>> The performance regression is caused by "blk-mq: avoid code
>>>>> duplication", but I don't really understand why yet.
>>>>
>>>> Gah, looks like this one dropped the tag idling. I bet that is why.
>>>
>>> Ah, thanks.  It's indeed exactly the same sort of slow down I bisected
>>> back then, this should have rung a bell instead of keeping me occupied
>>> for hours..
>>
>> Patch is committed :-)
>
> Oh well, I had just prepared the same one for you, but I want to at
> least do basic sanity checking of my rebased tree with the fixes applied
> now that I'm done bisecting.

Please do. If there's anything else I need, let me know. It wasn't clear 
to me if the cdb patch was in blk-mq or scsi-mq. I'll flush my queue out 
this afternoon, so that -rc1 will hopefully have everything in a sane state.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:08                     ` Jens Axboe
  2014-06-04 15:10                       ` Christoph Hellwig
@ 2014-06-04 15:22                       ` Christoph Hellwig
  2014-06-04 15:28                         ` Jens Axboe
  1 sibling, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2014-06-04 15:22 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, Shaohua Li, linux-kernel

On Wed, Jun 04, 2014 at 09:08:12AM -0600, Jens Axboe wrote:
> On 2014-06-04 09:05, Christoph Hellwig wrote:
> >On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
> >>OK strange, there hasn't been that much churn since the last rebase.
> >>In my for-linus, there's a patch for a single queue crash, but that
> >>should just hit for the removal case. And then there's the atomic
> >>schedule patch, but that issue was actually in the code base for
> >>about a month, so not a new one either.
> >
> >You're request initializaion optimization doesn't set up req->cmd and
> >thus causes all BLOCK_PC I/O (including the SCSI LUN scan) to crash and
> >burn.  The trivial fix is on your way.
> 
> OK. That'll arguably be a good cleanup as well, getting that
> initialized in the right place. I hate the 'lets clear all the
> memory' part of rq init, it's not super cheap to do.

What would the right place be?  We don't really know if a request is
going to be used for BLOCK_PC purposes until it has been returned to
the caller.

I also found another issue when just initializing req->cmd instead
of blindly reverting the patch due to the lack of req->biotail
initialization.  For now I'll got back to a revert of that patch
for my scsi-mq tree, and I'd prefer to keep that for mainline as well
until this has been throughoutly tested.

[    1.139357] scsi0 : scsi_debug, version 1.82 [20100324], dev_size_mb=8, opts=0x0
[    1.143630] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
[    1.145583] IP: [<ffffffff817242b4>] ll_back_merge_fn+0x84/0x170
[    1.146692] PGD 0 
[    1.146692] Oops: 0000 [#1] SMP 
[    1.146692] Modules linked in:
[    1.146692] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 3.15.0-rc1+ #138
[    1.146692] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[    1.146692] task: ffff88007d490010 ti: ffff88007d48e000 task.ti: ffff88007d48e000
[    1.146692] RIP: 0010:[<ffffffff817242b4>]  [<ffffffff817242b4>] ll_back_merge_fn+0x84/0x170
[    1.146692] RSP: 0000:ffff88007d48f818  EFLAGS: 00010287
[    1.146692] RAX: 0000000000000000 RBX: ffff88007b100000 RCX: 0000000000000000
[    1.146692] RDX: 0000000000000400 RSI: 0000000000000000 RDI: ffff88007d7bc290
[    1.146692] RBP: ffff88007d48f838 R08: 0000000000000500 R09: 0000000000000000
[    1.146692] R10: 000000000000ffff R11: 0000000000000000 R12: ffff88007d7dad00
[    1.146692] R13: ffff88007d7bc290 R14: ffff88007d7dad00 R15: ffff88007b100000
[    1.146692] FS:  0000000000000000(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000
[    1.146692] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[    1.146692] CR2: 0000000000000010 CR3: 0000000002347000 CR4: 00000000000006e0
[    1.146692] Stack:
[    1.146692]  ffff88007b100000 ffff88007d7dad00 0000000000000001 ffff88007d7dad00
[    1.146692]  ffff88007d48f858 ffffffff81722f92 ffff88007d7bc290 ffff88007b100000
[    1.146692]  ffff88007d48f898 ffffffff81723090 ffff88007d48f898 ffffffff811a195d
[    1.146692] Call Trace:
[    1.146692]  [<ffffffff81722f92>] blk_rq_append_bio+0x22/0x70
[    1.146692]  [<ffffffff81723090>] blk_rq_map_kern+0xb0/0x170
[    1.146692]  [<ffffffff811a195d>] ? cache_alloc_debugcheck_after.isra.63+0x9d/0x1b0
[    1.146692]  [<ffffffff818f5b68>] scsi_execute+0x148/0x170
[    1.146692]  [<ffffffff818f5c27>] scsi_execute_req_flags+0x97/0x110
[    1.146692]  [<ffffffff818f99c8>] scsi_probe_and_add_lun+0x208/0xd00
[    1.146692]  [<ffffffff810f61ba>] ? mark_held_locks+0x6a/0x90
[    1.146692]  [<ffffffff81d48e3a>] ? _raw_spin_unlock_irqrestore+0x3a/0x70
[    1.146692]  [<ffffffff818cfab5>] ? __pm_runtime_resume+0x55/0x70
[    1.146692]  [<ffffffff818fa7d8>] __scsi_scan_target+0xe8/0x6d0
[    1.146692]  [<ffffffff810f61ba>] ? mark_held_locks+0x6a/0x90
[    1.146692]  [<ffffffff81d48e3a>] ? _raw_spin_unlock_irqrestore+0x3a/0x70
[    1.146692]  [<ffffffff810f62ed>] ? trace_hardirqs_on_caller+0x10d/0x1d0
[    1.146692]  [<ffffffff818fae1e>] scsi_scan_channel.part.8+0x5e/0x80
[    1.146692]  [<ffffffff818fb1a9>] scsi_scan_host_selected+0x109/0x1d0
[    1.146692]  [<ffffffff818fb2f9>] do_scsi_scan_host+0x89/0x90
[    1.146692]  [<ffffffff818fb490>] scsi_scan_host+0x190/0x1c0
[    1.146692]  [<ffffffff819a0323>] sdebug_driver_probe+0x163/0x2d0
[    1.146692]  [<ffffffff818c7136>] driver_probe_device+0x76/0x240
[    1.146692]  [<ffffffff818c73b0>] ? __driver_attach+0xb0/0xb0
[    1.146692]  [<ffffffff818c73fb>] __device_attach+0x4b/0x60
[    1.146692]  [<ffffffff818c530e>] bus_for_each_drv+0x4e/0xa0
[    1.146692]  [<ffffffff818c7088>] device_attach+0x98/0xb0
[    1.146692]  [<ffffffff818c65b0>] bus_probe_device+0xb0/0xe0
[    1.146692]  [<ffffffff818c45c6>] device_add+0x466/0x590
[    1.146692]  [<ffffffff818d0306>] ? pm_runtime_init+0x106/0x110
[    1.146692]  [<ffffffff818c48a9>] device_register+0x19/0x20
[    1.146692]  [<ffffffff819a058c>] sdebug_add_adapter+0xfc/0x1c0
[    1.146692]  [<ffffffff824e404f>] scsi_debug_init+0x5e0/0x665
[    1.146692]  [<ffffffff824e3a6f>] ? osd_uld_init+0xc3/0xc3
[    1.146692]  [<ffffffff81000352>] do_one_initcall+0x102/0x160
[    1.146692]  [<ffffffff82492128>] kernel_init_freeable+0x108/0x19c
[    1.146692]  [<ffffffff824918ca>] ? do_early_param+0x8c/0x8c
[    1.146692]  [<ffffffff81d35510>] ? rest_init+0xc0/0xc0
[    1.146692]  [<ffffffff81d35519>] kernel_init+0x9/0xe0
[    1.146692]  [<ffffffff81d5180c>] ret_from_fork+0x7c/0xb0
[    1.146692]  [<ffffffff81d35510>] ? rest_init+0xc0/0xc0


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:22                       ` Christoph Hellwig
@ 2014-06-04 15:28                         ` Jens Axboe
  0 siblings, 0 replies; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 15:28 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel

On 2014-06-04 09:22, Christoph Hellwig wrote:
> On Wed, Jun 04, 2014 at 09:08:12AM -0600, Jens Axboe wrote:
>> On 2014-06-04 09:05, Christoph Hellwig wrote:
>>> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>>>> OK strange, there hasn't been that much churn since the last rebase.
>>>> In my for-linus, there's a patch for a single queue crash, but that
>>>> should just hit for the removal case. And then there's the atomic
>>>> schedule patch, but that issue was actually in the code base for
>>>> about a month, so not a new one either.
>>>
>>> You're request initializaion optimization doesn't set up req->cmd and
>>> thus causes all BLOCK_PC I/O (including the SCSI LUN scan) to crash and
>>> burn.  The trivial fix is on your way.
>>
>> OK. That'll arguably be a good cleanup as well, getting that
>> initialized in the right place. I hate the 'lets clear all the
>> memory' part of rq init, it's not super cheap to do.
>
> What would the right place be?  We don't really know if a request is
> going to be used for BLOCK_PC purposes until it has been returned to
> the caller.

Probably split the init, so instead of directly assigning the type as 
BLOCK_PC (or similar), then have an init function for that that clears 
the parts.

> I also found another issue when just initializing req->cmd instead
> of blindly reverting the patch due to the lack of req->biotail
> initialization.  For now I'll got back to a revert of that patch
> for my scsi-mq tree, and I'd prefer to keep that for mainline as well
> until this has been throughoutly tested.

That's fine. I'll get this cleaned up, or consider a revert in the block 
tree as well.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:02                 ` Jens Axboe
  2014-06-04 15:05                   ` Christoph Hellwig
@ 2014-06-04 15:31                   ` Christoph Hellwig
  2014-06-04 15:39                     ` Jens Axboe
  2014-06-04 15:43                     ` Ming Lei
  1 sibling, 2 replies; 35+ messages in thread
From: Christoph Hellwig @ 2014-06-04 15:31 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, Shaohua Li, linux-kernel

On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
> >scsi_mq_find_tag only gets the scsi host, which may have multiple
> >queues.  When called from scsi_find_tag we actually have a scsi device,
> >so that's not an issue, but when called from scsi_host_find_tag the
> >driver only provides the host.
> 
> Only solution I see right now is to have the flush_rq in the shared
> tags, but that would potentially be a regression for multiple
> devices and heavy flush uses cases. I'll see if I can come up with
> something better, or maybe Shaohua has an idea.

What about something like the following (untest, uncompiled, maybe
pseudo-code):

struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
{
	struct request *rq = tags->rqs[tag];

	if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
		return rq->q->flush_rq;
	return rq;
}


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:31                   ` Christoph Hellwig
@ 2014-06-04 15:39                     ` Jens Axboe
  2014-06-04 15:47                       ` Jens Axboe
  2014-06-04 15:43                     ` Ming Lei
  1 sibling, 1 reply; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 15:39 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel

On 06/04/2014 09:31 AM, Christoph Hellwig wrote:
> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>>> scsi_mq_find_tag only gets the scsi host, which may have multiple
>>> queues.  When called from scsi_find_tag we actually have a scsi device,
>>> so that's not an issue, but when called from scsi_host_find_tag the
>>> driver only provides the host.
>>
>> Only solution I see right now is to have the flush_rq in the shared
>> tags, but that would potentially be a regression for multiple
>> devices and heavy flush uses cases. I'll see if I can come up with
>> something better, or maybe Shaohua has an idea.
> 
> What about something like the following (untest, uncompiled, maybe
> pseudo-code):
> 
> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
> {
> 	struct request *rq = tags->rqs[tag];
> 
> 	if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
> 		return rq->q->flush_rq;
> 	return rq;

Ah yes, that'll work, the queue is always assigned. I'll make that change.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:31                   ` Christoph Hellwig
  2014-06-04 15:39                     ` Jens Axboe
@ 2014-06-04 15:43                     ` Ming Lei
  2014-06-04 15:48                       ` Jens Axboe
  1 sibling, 1 reply; 35+ messages in thread
From: Ming Lei @ 2014-06-04 15:43 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Shaohua Li, Linux Kernel Mailing List

On Wed, Jun 4, 2014 at 11:31 PM, Christoph Hellwig <hch@infradead.org> wrote:
> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>> >scsi_mq_find_tag only gets the scsi host, which may have multiple
>> >queues.  When called from scsi_find_tag we actually have a scsi device,
>> >so that's not an issue, but when called from scsi_host_find_tag the
>> >driver only provides the host.
>>
>> Only solution I see right now is to have the flush_rq in the shared
>> tags, but that would potentially be a regression for multiple
>> devices and heavy flush uses cases. I'll see if I can come up with
>> something better, or maybe Shaohua has an idea.
>
> What about something like the following (untest, uncompiled, maybe
> pseudo-code):
>
> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
> {
>         struct request *rq = tags->rqs[tag];
>
>         if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
>                 return rq->q->flush_rq;
>         return rq;
> }

Looks we thought it together, :-)

Also maybe the flush_rq->tag need to be cleared in flush_end_io().


Thanks,
-- 
Ming Lei

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:39                     ` Jens Axboe
@ 2014-06-04 15:47                       ` Jens Axboe
  2014-06-04 16:25                         ` Jens Axboe
  0 siblings, 1 reply; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 15:47 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1312 bytes --]

On 06/04/2014 09:39 AM, Jens Axboe wrote:
> On 06/04/2014 09:31 AM, Christoph Hellwig wrote:
>> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>>>> scsi_mq_find_tag only gets the scsi host, which may have multiple
>>>> queues.  When called from scsi_find_tag we actually have a scsi device,
>>>> so that's not an issue, but when called from scsi_host_find_tag the
>>>> driver only provides the host.
>>>
>>> Only solution I see right now is to have the flush_rq in the shared
>>> tags, but that would potentially be a regression for multiple
>>> devices and heavy flush uses cases. I'll see if I can come up with
>>> something better, or maybe Shaohua has an idea.
>>
>> What about something like the following (untest, uncompiled, maybe
>> pseudo-code):
>>
>> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
>> {
>> 	struct request *rq = tags->rqs[tag];
>>
>> 	if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
>> 		return rq->q->flush_rq;
>> 	return rq;
> 
> Ah yes, that'll work, the queue is always assigned. I'll make that change.

Something like this in complete form. Compile tested only, I'll test it
on dev box. Probably doesn't matter too much, but I prefer to
potentially have the faster path (non-flush) just fall inline.


-- 
Jens Axboe


[-- Attachment #2: tag-to-request.patch --]
[-- Type: text/x-patch, Size: 2579 bytes --]

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4e8e8cf00815..4e4cd6208052 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -529,15 +529,20 @@ void blk_mq_kick_requeue_list(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 
-struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static inline bool is_flush_request(struct request *rq, unsigned int tag)
 {
-	struct request_queue *q = hctx->queue;
+	return ((rq->cmd_flags & REQ_FLUSH_SEQ) &&
+			rq->q->flush_rq->tag == tag);
+}
 
-	if ((q->flush_rq->cmd_flags & REQ_FLUSH_SEQ) &&
-	    q->flush_rq->tag == tag)
-		return q->flush_rq;
+struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
+{
+	struct request *rq = tags->rqs[tag];
+
+	if (!is_flush_request(rq, tag))
+		return rq;
 
-	return hctx->tags->rqs[tag];
+	return rq->q->flush_rq;
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
 
@@ -566,7 +571,7 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags)
 		if (tag >= hctx->tags->nr_tags)
 			break;
 
-		rq = blk_mq_tag_to_rq(hctx, tag++);
+		rq = blk_mq_tag_to_rq(hctx->tags, tag++);
 		if (rq->q != hctx->queue)
 			continue;
 		if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index abc858b3528b..74abd49fabdc 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -193,7 +193,9 @@ static void mtip_put_int_command(struct driver_data *dd, struct mtip_cmd *cmd)
 static struct request *mtip_rq_from_tag(struct driver_data *dd,
 					unsigned int tag)
 {
-	return blk_mq_tag_to_rq(dd->queue->queue_hw_ctx[0], tag);
+	struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0];
+
+	return blk_mq_tag_to_rq(hctx->tags, tag);
 }
 
 static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c15128833100..0feedebfde48 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -155,7 +155,7 @@ void blk_mq_free_request(struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		gfp_t gfp, bool reserved);
-struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag);
+struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:43                     ` Ming Lei
@ 2014-06-04 15:48                       ` Jens Axboe
  2014-06-04 16:00                         ` Ming Lei
  0 siblings, 1 reply; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 15:48 UTC (permalink / raw)
  To: Ming Lei, Christoph Hellwig; +Cc: Shaohua Li, Linux Kernel Mailing List

On 06/04/2014 09:43 AM, Ming Lei wrote:
> On Wed, Jun 4, 2014 at 11:31 PM, Christoph Hellwig <hch@infradead.org> wrote:
>> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>>>> scsi_mq_find_tag only gets the scsi host, which may have multiple
>>>> queues.  When called from scsi_find_tag we actually have a scsi device,
>>>> so that's not an issue, but when called from scsi_host_find_tag the
>>>> driver only provides the host.
>>>
>>> Only solution I see right now is to have the flush_rq in the shared
>>> tags, but that would potentially be a regression for multiple
>>> devices and heavy flush uses cases. I'll see if I can come up with
>>> something better, or maybe Shaohua has an idea.
>>
>> What about something like the following (untest, uncompiled, maybe
>> pseudo-code):
>>
>> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
>> {
>>         struct request *rq = tags->rqs[tag];
>>
>>         if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
>>                 return rq->q->flush_rq;
>>         return rq;
>> }
> 
> Looks we thought it together, :-)
> 
> Also maybe the flush_rq->tag need to be cleared in flush_end_io().

It clears the command flag, so that should be enough.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:48                       ` Jens Axboe
@ 2014-06-04 16:00                         ` Ming Lei
  2014-06-04 16:09                           ` Jens Axboe
  0 siblings, 1 reply; 35+ messages in thread
From: Ming Lei @ 2014-06-04 16:00 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, Shaohua Li, Linux Kernel Mailing List

On Wed, Jun 4, 2014 at 11:48 PM, Jens Axboe <axboe@kernel.dk> wrote:
> On 06/04/2014 09:43 AM, Ming Lei wrote:
>> On Wed, Jun 4, 2014 at 11:31 PM, Christoph Hellwig <hch@infradead.org> wrote:
>>> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>>>>> scsi_mq_find_tag only gets the scsi host, which may have multiple
>>>>> queues.  When called from scsi_find_tag we actually have a scsi device,
>>>>> so that's not an issue, but when called from scsi_host_find_tag the
>>>>> driver only provides the host.
>>>>
>>>> Only solution I see right now is to have the flush_rq in the shared
>>>> tags, but that would potentially be a regression for multiple
>>>> devices and heavy flush uses cases. I'll see if I can come up with
>>>> something better, or maybe Shaohua has an idea.
>>>
>>> What about something like the following (untest, uncompiled, maybe
>>> pseudo-code):
>>>
>>> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
>>> {
>>>         struct request *rq = tags->rqs[tag];
>>>
>>>         if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
>>>                 return rq->q->flush_rq;
>>>         return rq;
>>> }
>>
>> Looks we thought it together, :-)
>>
>> Also maybe the flush_rq->tag need to be cleared in flush_end_io().
>
> It clears the command flag, so that should be enough.

Only the flush_rq's command flag is cleared, and its parent request
flag isn't cleared.


Thanks,
-- 
Ming Lei

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 16:00                         ` Ming Lei
@ 2014-06-04 16:09                           ` Jens Axboe
  2014-06-04 16:26                             ` Ming Lei
  0 siblings, 1 reply; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 16:09 UTC (permalink / raw)
  To: Ming Lei; +Cc: Christoph Hellwig, Shaohua Li, Linux Kernel Mailing List

On 06/04/2014 10:00 AM, Ming Lei wrote:
> On Wed, Jun 4, 2014 at 11:48 PM, Jens Axboe <axboe@kernel.dk> wrote:
>> On 06/04/2014 09:43 AM, Ming Lei wrote:
>>> On Wed, Jun 4, 2014 at 11:31 PM, Christoph Hellwig <hch@infradead.org> wrote:
>>>> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>>>>>> scsi_mq_find_tag only gets the scsi host, which may have multiple
>>>>>> queues.  When called from scsi_find_tag we actually have a scsi device,
>>>>>> so that's not an issue, but when called from scsi_host_find_tag the
>>>>>> driver only provides the host.
>>>>>
>>>>> Only solution I see right now is to have the flush_rq in the shared
>>>>> tags, but that would potentially be a regression for multiple
>>>>> devices and heavy flush uses cases. I'll see if I can come up with
>>>>> something better, or maybe Shaohua has an idea.
>>>>
>>>> What about something like the following (untest, uncompiled, maybe
>>>> pseudo-code):
>>>>
>>>> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
>>>> {
>>>>         struct request *rq = tags->rqs[tag];
>>>>
>>>>         if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
>>>>                 return rq->q->flush_rq;
>>>>         return rq;
>>>> }
>>>
>>> Looks we thought it together, :-)
>>>
>>> Also maybe the flush_rq->tag need to be cleared in flush_end_io().
>>
>> It clears the command flag, so that should be enough.
> 
> Only the flush_rq's command flag is cleared, and its parent request
> flag isn't cleared.

Good point. Care to send in a patch? We can just clear it to -1U, at
least in blk-mq that's defined as an invalid tag.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 15:47                       ` Jens Axboe
@ 2014-06-04 16:25                         ` Jens Axboe
  2014-06-05  1:27                           ` Shaohua Li
  0 siblings, 1 reply; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 16:25 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel

On 06/04/2014 09:47 AM, Jens Axboe wrote:
> On 06/04/2014 09:39 AM, Jens Axboe wrote:
>> On 06/04/2014 09:31 AM, Christoph Hellwig wrote:
>>> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>>>>> scsi_mq_find_tag only gets the scsi host, which may have multiple
>>>>> queues.  When called from scsi_find_tag we actually have a scsi device,
>>>>> so that's not an issue, but when called from scsi_host_find_tag the
>>>>> driver only provides the host.
>>>>
>>>> Only solution I see right now is to have the flush_rq in the shared
>>>> tags, but that would potentially be a regression for multiple
>>>> devices and heavy flush uses cases. I'll see if I can come up with
>>>> something better, or maybe Shaohua has an idea.
>>>
>>> What about something like the following (untest, uncompiled, maybe
>>> pseudo-code):
>>>
>>> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
>>> {
>>> 	struct request *rq = tags->rqs[tag];
>>>
>>> 	if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
>>> 		return rq->q->flush_rq;
>>> 	return rq;
>>
>> Ah yes, that'll work, the queue is always assigned. I'll make that change.
> 
> Something like this in complete form. Compile tested only, I'll test it
> on dev box. Probably doesn't matter too much, but I prefer to
> potentially have the faster path (non-flush) just fall inline.

Works for me, committed.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 16:09                           ` Jens Axboe
@ 2014-06-04 16:26                             ` Ming Lei
  2014-06-04 16:28                               ` Jens Axboe
  0 siblings, 1 reply; 35+ messages in thread
From: Ming Lei @ 2014-06-04 16:26 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, Shaohua Li, Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 1802 bytes --]

On Thu, Jun 5, 2014 at 12:09 AM, Jens Axboe <axboe@kernel.dk> wrote:
> On 06/04/2014 10:00 AM, Ming Lei wrote:
>> On Wed, Jun 4, 2014 at 11:48 PM, Jens Axboe <axboe@kernel.dk> wrote:
>>> On 06/04/2014 09:43 AM, Ming Lei wrote:
>>>> On Wed, Jun 4, 2014 at 11:31 PM, Christoph Hellwig <hch@infradead.org> wrote:
>>>>> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>>>>>>> scsi_mq_find_tag only gets the scsi host, which may have multiple
>>>>>>> queues.  When called from scsi_find_tag we actually have a scsi device,
>>>>>>> so that's not an issue, but when called from scsi_host_find_tag the
>>>>>>> driver only provides the host.
>>>>>>
>>>>>> Only solution I see right now is to have the flush_rq in the shared
>>>>>> tags, but that would potentially be a regression for multiple
>>>>>> devices and heavy flush uses cases. I'll see if I can come up with
>>>>>> something better, or maybe Shaohua has an idea.
>>>>>
>>>>> What about something like the following (untest, uncompiled, maybe
>>>>> pseudo-code):
>>>>>
>>>>> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
>>>>> {
>>>>>         struct request *rq = tags->rqs[tag];
>>>>>
>>>>>         if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
>>>>>                 return rq->q->flush_rq;
>>>>>         return rq;
>>>>> }
>>>>
>>>> Looks we thought it together, :-)
>>>>
>>>> Also maybe the flush_rq->tag need to be cleared in flush_end_io().
>>>
>>> It clears the command flag, so that should be enough.
>>
>> Only the flush_rq's command flag is cleared, and its parent request
>> flag isn't cleared.
>
> Good point. Care to send in a patch? We can just clear it to -1U, at
> least in blk-mq that's defined as an invalid tag.

Attachment patch should be enough.

Thanks,
-- 
Ming Lei

[-- Attachment #2: clear_flush_rq_tag.patch --]
[-- Type: text/x-patch, Size: 394 bytes --]

diff --git a/block/blk-flush.c b/block/blk-flush.c
index ff87c66..8ffee4b 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -225,7 +225,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 
 	if (q->mq_ops) {
 		spin_lock_irqsave(&q->mq_flush_lock, flags);
-		q->flush_rq->cmd_flags = 0;
+		q->flush_rq->tag = -1;
 	}
 
 	running = &q->flush_queue[q->flush_running_idx];

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 16:26                             ` Ming Lei
@ 2014-06-04 16:28                               ` Jens Axboe
  2014-06-04 16:33                                 ` Christoph Hellwig
  0 siblings, 1 reply; 35+ messages in thread
From: Jens Axboe @ 2014-06-04 16:28 UTC (permalink / raw)
  To: Ming Lei; +Cc: Christoph Hellwig, Shaohua Li, Linux Kernel Mailing List

On 06/04/2014 10:26 AM, Ming Lei wrote:
> On Thu, Jun 5, 2014 at 12:09 AM, Jens Axboe <axboe@kernel.dk> wrote:
>> On 06/04/2014 10:00 AM, Ming Lei wrote:
>>> On Wed, Jun 4, 2014 at 11:48 PM, Jens Axboe <axboe@kernel.dk> wrote:
>>>> On 06/04/2014 09:43 AM, Ming Lei wrote:
>>>>> On Wed, Jun 4, 2014 at 11:31 PM, Christoph Hellwig <hch@infradead.org> wrote:
>>>>>> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>>>>>>>> scsi_mq_find_tag only gets the scsi host, which may have multiple
>>>>>>>> queues.  When called from scsi_find_tag we actually have a scsi device,
>>>>>>>> so that's not an issue, but when called from scsi_host_find_tag the
>>>>>>>> driver only provides the host.
>>>>>>>
>>>>>>> Only solution I see right now is to have the flush_rq in the shared
>>>>>>> tags, but that would potentially be a regression for multiple
>>>>>>> devices and heavy flush uses cases. I'll see if I can come up with
>>>>>>> something better, or maybe Shaohua has an idea.
>>>>>>
>>>>>> What about something like the following (untest, uncompiled, maybe
>>>>>> pseudo-code):
>>>>>>
>>>>>> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
>>>>>> {
>>>>>>         struct request *rq = tags->rqs[tag];
>>>>>>
>>>>>>         if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
>>>>>>                 return rq->q->flush_rq;
>>>>>>         return rq;
>>>>>> }
>>>>>
>>>>> Looks we thought it together, :-)
>>>>>
>>>>> Also maybe the flush_rq->tag need to be cleared in flush_end_io().
>>>>
>>>> It clears the command flag, so that should be enough.
>>>
>>> Only the flush_rq's command flag is cleared, and its parent request
>>> flag isn't cleared.
>>
>> Good point. Care to send in a patch? We can just clear it to -1U, at
>> least in blk-mq that's defined as an invalid tag.
> 
> Attachment patch should be enough.

Yep, looks fine. Care to send as a proper patch, then I will include it?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 16:28                               ` Jens Axboe
@ 2014-06-04 16:33                                 ` Christoph Hellwig
  2014-06-04 16:36                                   ` Ming Lei
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2014-06-04 16:33 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Ming Lei, Christoph Hellwig, Shaohua Li,
	Linux Kernel Mailing List

On Wed, Jun 04, 2014 at 10:28:40AM -0600, Jens Axboe wrote:
> > Attachment patch should be enough.
> 
> Yep, looks fine. Care to send as a proper patch, then I will include it?

I think we an also drop the assignment to q->flush_rq->cmd_flags in
flush_end_io now.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 16:33                                 ` Christoph Hellwig
@ 2014-06-04 16:36                                   ` Ming Lei
  0 siblings, 0 replies; 35+ messages in thread
From: Ming Lei @ 2014-06-04 16:36 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Shaohua Li, Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 452 bytes --]

On Thu, Jun 5, 2014 at 12:33 AM, Christoph Hellwig <hch@infradead.org> wrote:
> On Wed, Jun 04, 2014 at 10:28:40AM -0600, Jens Axboe wrote:
>> > Attachment patch should be enough.
>>
>> Yep, looks fine. Care to send as a proper patch, then I will include it?
>
> I think we an also drop the assignment to q->flush_rq->cmd_flags in
> flush_end_io now.
>

Yes, that is already done, :-)

Jens, attachment is the formal one, thanks.

Thanks,
-- 
Ming Lei

[-- Attachment #2: 0001-block-mq-flush-clear-flush_rq-s-tag-in-flush_end_io.patch --]
[-- Type: text/x-patch, Size: 845 bytes --]

From 77eb2e96942deed528b2e5892a7d9aa3735c0c67 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Thu, 5 Jun 2014 00:23:55 +0800
Subject: [PATCH] block: mq flush: clear flush_rq's tag in flush_end_io()

So that blk_mq_tag_to_rq() can decide if one request
is flush_rq by comparing the tag.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
---
 block/blk-flush.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index ff87c66..8ffee4b 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -225,7 +225,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 
 	if (q->mq_ops) {
 		spin_lock_irqsave(&q->mq_flush_lock, flags);
-		q->flush_rq->cmd_flags = 0;
+		q->flush_rq->tag = -1;
 	}
 
 	running = &q->flush_queue[q->flush_running_idx];
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-04 16:25                         ` Jens Axboe
@ 2014-06-05  1:27                           ` Shaohua Li
  2014-06-05  2:05                             ` Jens Axboe
  0 siblings, 1 reply; 35+ messages in thread
From: Shaohua Li @ 2014-06-05  1:27 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, linux-kernel

On Wed, Jun 04, 2014 at 10:25:22AM -0600, Jens Axboe wrote:
> On 06/04/2014 09:47 AM, Jens Axboe wrote:
> > On 06/04/2014 09:39 AM, Jens Axboe wrote:
> >> On 06/04/2014 09:31 AM, Christoph Hellwig wrote:
> >>> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
> >>>>> scsi_mq_find_tag only gets the scsi host, which may have multiple
> >>>>> queues.  When called from scsi_find_tag we actually have a scsi device,
> >>>>> so that's not an issue, but when called from scsi_host_find_tag the
> >>>>> driver only provides the host.
> >>>>
> >>>> Only solution I see right now is to have the flush_rq in the shared
> >>>> tags, but that would potentially be a regression for multiple
> >>>> devices and heavy flush uses cases. I'll see if I can come up with
> >>>> something better, or maybe Shaohua has an idea.
> >>>
> >>> What about something like the following (untest, uncompiled, maybe
> >>> pseudo-code):
> >>>
> >>> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
> >>> {
> >>> 	struct request *rq = tags->rqs[tag];
> >>>
> >>> 	if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
> >>> 		return rq->q->flush_rq;
> >>> 	return rq;
> >>
> >> Ah yes, that'll work, the queue is always assigned. I'll make that change.
> > 
> > Something like this in complete form. Compile tested only, I'll test it
> > on dev box. Probably doesn't matter too much, but I prefer to
> > potentially have the faster path (non-flush) just fall inline.
> 
> Works for me, committed.

Sounds there is a small race here. FUA request has REQ_FLUSH_SEQ set too.
Assume its tag is 0. we initialize flush_rq.
blk_mq_rq_init->blk_rq_init->memset could set flush_rq tag to 0 in a short
time. In that short time, blk_mq_tag_to_rq will return wrong request for the
FUA request.

we can do (rq->cmd_flags & REQ_FLUSH_SEQ) && !(rq->cmd_flags & REQ_FUA) in
is_flush_request to avoid this issue.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-05  1:27                           ` Shaohua Li
@ 2014-06-05  2:05                             ` Jens Axboe
  2014-06-05  2:27                               ` Shaohua Li
  0 siblings, 1 reply; 35+ messages in thread
From: Jens Axboe @ 2014-06-05  2:05 UTC (permalink / raw)
  To: Shaohua Li; +Cc: Christoph Hellwig, linux-kernel

On 2014-06-04 19:27, Shaohua Li wrote:
> On Wed, Jun 04, 2014 at 10:25:22AM -0600, Jens Axboe wrote:
>> On 06/04/2014 09:47 AM, Jens Axboe wrote:
>>> On 06/04/2014 09:39 AM, Jens Axboe wrote:
>>>> On 06/04/2014 09:31 AM, Christoph Hellwig wrote:
>>>>> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>>>>>>> scsi_mq_find_tag only gets the scsi host, which may have multiple
>>>>>>> queues.  When called from scsi_find_tag we actually have a scsi device,
>>>>>>> so that's not an issue, but when called from scsi_host_find_tag the
>>>>>>> driver only provides the host.
>>>>>>
>>>>>> Only solution I see right now is to have the flush_rq in the shared
>>>>>> tags, but that would potentially be a regression for multiple
>>>>>> devices and heavy flush uses cases. I'll see if I can come up with
>>>>>> something better, or maybe Shaohua has an idea.
>>>>>
>>>>> What about something like the following (untest, uncompiled, maybe
>>>>> pseudo-code):
>>>>>
>>>>> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
>>>>> {
>>>>> 	struct request *rq = tags->rqs[tag];
>>>>>
>>>>> 	if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
>>>>> 		return rq->q->flush_rq;
>>>>> 	return rq;
>>>>
>>>> Ah yes, that'll work, the queue is always assigned. I'll make that change.
>>>
>>> Something like this in complete form. Compile tested only, I'll test it
>>> on dev box. Probably doesn't matter too much, but I prefer to
>>> potentially have the faster path (non-flush) just fall inline.
>>
>> Works for me, committed.
>
> Sounds there is a small race here. FUA request has REQ_FLUSH_SEQ set too.
> Assume its tag is 0. we initialize flush_rq.
> blk_mq_rq_init->blk_rq_init->memset could set flush_rq tag to 0 in a short
> time. In that short time, blk_mq_tag_to_rq will return wrong request for the
> FUA request.
>
> we can do (rq->cmd_flags & REQ_FLUSH_SEQ) && !(rq->cmd_flags & REQ_FUA) in
> is_flush_request to avoid this issue.

We don't memset the entire request anymore from the rq alloc path.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-05  2:05                             ` Jens Axboe
@ 2014-06-05  2:27                               ` Shaohua Li
  2014-06-05  2:40                                 ` Jens Axboe
  0 siblings, 1 reply; 35+ messages in thread
From: Shaohua Li @ 2014-06-05  2:27 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, linux-kernel

On Wed, Jun 04, 2014 at 08:05:33PM -0600, Jens Axboe wrote:
> On 2014-06-04 19:27, Shaohua Li wrote:
> >On Wed, Jun 04, 2014 at 10:25:22AM -0600, Jens Axboe wrote:
> >>On 06/04/2014 09:47 AM, Jens Axboe wrote:
> >>>On 06/04/2014 09:39 AM, Jens Axboe wrote:
> >>>>On 06/04/2014 09:31 AM, Christoph Hellwig wrote:
> >>>>>On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
> >>>>>>>scsi_mq_find_tag only gets the scsi host, which may have multiple
> >>>>>>>queues.  When called from scsi_find_tag we actually have a scsi device,
> >>>>>>>so that's not an issue, but when called from scsi_host_find_tag the
> >>>>>>>driver only provides the host.
> >>>>>>
> >>>>>>Only solution I see right now is to have the flush_rq in the shared
> >>>>>>tags, but that would potentially be a regression for multiple
> >>>>>>devices and heavy flush uses cases. I'll see if I can come up with
> >>>>>>something better, or maybe Shaohua has an idea.
> >>>>>
> >>>>>What about something like the following (untest, uncompiled, maybe
> >>>>>pseudo-code):
> >>>>>
> >>>>>struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
> >>>>>{
> >>>>>	struct request *rq = tags->rqs[tag];
> >>>>>
> >>>>>	if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
> >>>>>		return rq->q->flush_rq;
> >>>>>	return rq;
> >>>>
> >>>>Ah yes, that'll work, the queue is always assigned. I'll make that change.
> >>>
> >>>Something like this in complete form. Compile tested only, I'll test it
> >>>on dev box. Probably doesn't matter too much, but I prefer to
> >>>potentially have the faster path (non-flush) just fall inline.
> >>
> >>Works for me, committed.
> >
> >Sounds there is a small race here. FUA request has REQ_FLUSH_SEQ set too.
> >Assume its tag is 0. we initialize flush_rq.
> >blk_mq_rq_init->blk_rq_init->memset could set flush_rq tag to 0 in a short
> >time. In that short time, blk_mq_tag_to_rq will return wrong request for the
> >FUA request.
> >
> >we can do (rq->cmd_flags & REQ_FLUSH_SEQ) && !(rq->cmd_flags & REQ_FUA) in
> >is_flush_request to avoid this issue.
> 
> We don't memset the entire request anymore from the rq alloc path.

blk_kick_flush() still calls blk_rq_init()?

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch]blk-mq: blk_mq_tag_to_rq should handle flush request
  2014-06-05  2:27                               ` Shaohua Li
@ 2014-06-05  2:40                                 ` Jens Axboe
  0 siblings, 0 replies; 35+ messages in thread
From: Jens Axboe @ 2014-06-05  2:40 UTC (permalink / raw)
  To: Shaohua Li; +Cc: Christoph Hellwig, linux-kernel

On 2014-06-04 20:27, Shaohua Li wrote:
> On Wed, Jun 04, 2014 at 08:05:33PM -0600, Jens Axboe wrote:
>> On 2014-06-04 19:27, Shaohua Li wrote:
>>> On Wed, Jun 04, 2014 at 10:25:22AM -0600, Jens Axboe wrote:
>>>> On 06/04/2014 09:47 AM, Jens Axboe wrote:
>>>>> On 06/04/2014 09:39 AM, Jens Axboe wrote:
>>>>>> On 06/04/2014 09:31 AM, Christoph Hellwig wrote:
>>>>>>> On Wed, Jun 04, 2014 at 09:02:19AM -0600, Jens Axboe wrote:
>>>>>>>>> scsi_mq_find_tag only gets the scsi host, which may have multiple
>>>>>>>>> queues.  When called from scsi_find_tag we actually have a scsi device,
>>>>>>>>> so that's not an issue, but when called from scsi_host_find_tag the
>>>>>>>>> driver only provides the host.
>>>>>>>>
>>>>>>>> Only solution I see right now is to have the flush_rq in the shared
>>>>>>>> tags, but that would potentially be a regression for multiple
>>>>>>>> devices and heavy flush uses cases. I'll see if I can come up with
>>>>>>>> something better, or maybe Shaohua has an idea.
>>>>>>>
>>>>>>> What about something like the following (untest, uncompiled, maybe
>>>>>>> pseudo-code):
>>>>>>>
>>>>>>> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
>>>>>>> {
>>>>>>> 	struct request *rq = tags->rqs[tag];
>>>>>>>
>>>>>>> 	if ((rq->cmd_flags & REQ_FLUSH_SEQ) && rq->q->flush_rq->tag == tag)
>>>>>>> 		return rq->q->flush_rq;
>>>>>>> 	return rq;
>>>>>>
>>>>>> Ah yes, that'll work, the queue is always assigned. I'll make that change.
>>>>>
>>>>> Something like this in complete form. Compile tested only, I'll test it
>>>>> on dev box. Probably doesn't matter too much, but I prefer to
>>>>> potentially have the faster path (non-flush) just fall inline.
>>>>
>>>> Works for me, committed.
>>>
>>> Sounds there is a small race here. FUA request has REQ_FLUSH_SEQ set too.
>>> Assume its tag is 0. we initialize flush_rq.
>>> blk_mq_rq_init->blk_rq_init->memset could set flush_rq tag to 0 in a short
>>> time. In that short time, blk_mq_tag_to_rq will return wrong request for the
>>> FUA request.
>>>
>>> we can do (rq->cmd_flags & REQ_FLUSH_SEQ) && !(rq->cmd_flags & REQ_FUA) in
>>> is_flush_request to avoid this issue.
>>
>> We don't memset the entire request anymore from the rq alloc path.
>
> blk_kick_flush() still calls blk_rq_init()?

OK, I see what you mean now. I was thinking about the normal uses cases 
of blk_mq_tag_to_rq(), which would be completion or issue time. If you 
are concerned about the "any point in time" validity, then yes, it could 
be an issue.

Might be better to fixup flush init, though.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2014-06-05  2:40 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-09 12:07 [patch]blk-mq: blk_mq_tag_to_rq should handle flush request Shaohua Li
2014-05-09 15:00 ` Christoph Hellwig
2014-05-10  4:00   ` Shaohua Li
2014-05-11 17:40     ` Jens Axboe
2014-05-30 14:09     ` Jens Axboe
2014-06-04 11:11       ` Christoph Hellwig
2014-06-04 14:15         ` Jens Axboe
2014-06-04 14:20           ` Christoph Hellwig
2014-06-04 14:54             ` Jens Axboe
2014-06-04 14:58               ` Christoph Hellwig
2014-06-04 15:02                 ` Jens Axboe
2014-06-04 15:05                   ` Christoph Hellwig
2014-06-04 15:08                     ` Jens Axboe
2014-06-04 15:10                       ` Christoph Hellwig
2014-06-04 15:11                         ` Jens Axboe
2014-06-04 15:16                           ` Christoph Hellwig
2014-06-04 15:19                             ` Jens Axboe
2014-06-04 15:22                       ` Christoph Hellwig
2014-06-04 15:28                         ` Jens Axboe
2014-06-04 15:31                   ` Christoph Hellwig
2014-06-04 15:39                     ` Jens Axboe
2014-06-04 15:47                       ` Jens Axboe
2014-06-04 16:25                         ` Jens Axboe
2014-06-05  1:27                           ` Shaohua Li
2014-06-05  2:05                             ` Jens Axboe
2014-06-05  2:27                               ` Shaohua Li
2014-06-05  2:40                                 ` Jens Axboe
2014-06-04 15:43                     ` Ming Lei
2014-06-04 15:48                       ` Jens Axboe
2014-06-04 16:00                         ` Ming Lei
2014-06-04 16:09                           ` Jens Axboe
2014-06-04 16:26                             ` Ming Lei
2014-06-04 16:28                               ` Jens Axboe
2014-06-04 16:33                                 ` Christoph Hellwig
2014-06-04 16:36                                   ` Ming Lei

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox