[PATCHSET/RFC] Make legacy IO schedulers work with blk-mq

linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq
@ 2016-12-03  3:15 Jens Axboe
  2016-12-03  3:15 ` [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler Jens Axboe
                   ` (7 more replies)
  0 siblings, 8 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-03  3:15 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: paolo.valente

This is by no means done, but it seems to work well enough that
I thought I'd send it out for others to take a look at and play
with.

Basically this allows blk-mq managed devices to run the legacy
IO schedulers, unmodified. The only requirement is that the
blk-mq device has to be single queue for now, though that
limitation would be rather simple to lift.

Since this is a debug patch, the default scheduler is deadline.
You can switch that to the other configured schedulers, as you
would with non-mq devices. Here's an example of a scsi-mq device
that is running deadline, and being switched to CFQ online:

root@leopard:~# cat /sys/block/sda/mq/0/tags 
nr_tags=31, reserved_tags=0, bits_per_word=4
nr_free=31, nr_reserved=0
active_queues=0

root@leopard:~# cat /sys/block/sda/queue/scheduler 
noop [deadline] cfq 

root@leopard:~# echo cfq > /sys/block/sda/queue/scheduler 
root@leopard:~# cat /sys/block/sda/queue/scheduler 
noop deadline [cfq] 

Testing welcome. There's certainly room for improvement here, so
I'm mostly interested in grave performance issues or crashes, if any.

Can also be viewed/fetched via git:

git://git.kernel.dk/linux-block for-4.11/blk-mq-legacy-sched

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-03  3:15 [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
@ 2016-12-03  3:15 ` Jens Axboe
  2016-12-05 13:05   ` Christoph Hellwig
  2016-12-05 17:00   ` Ming Lei
  2016-12-03  3:15 ` [PATCH 2/7] cfq-iosched: use appropriate run queue function Jens Axboe
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-03  3:15 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: paolo.valente, Jens Axboe

No functional changes with this patch, it's just in preparation for
supporting legacy schedulers on blk-mq.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c  |  2 +-
 block/blk-exec.c  |  2 +-
 block/blk-flush.c | 26 ++++++++++++++------------
 block/blk.h       | 12 +++++++++++-
 4 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 3f2eb8d80189..0e23589ab3bf 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1310,7 +1310,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 
 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 {
-	if (q->mq_ops)
+	if (blk_use_mq_path(q))
 		return blk_mq_alloc_request(q, rw,
 			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
 				0 : BLK_MQ_REQ_NOWAIT);
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 3ecb00a6cf45..73b8a701ae6d 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -64,7 +64,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	 * don't check dying flag for MQ because the request won't
 	 * be reused after dying flag is set
 	 */
-	if (q->mq_ops) {
+	if (blk_use_mq_path(q)) {
 		blk_mq_insert_request(rq, at_head, true, false);
 		return;
 	}
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 1bdbb3d3e5f5..0b68a1258bdd 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -133,14 +133,16 @@ static void blk_flush_restore_request(struct request *rq)
 
 static bool blk_flush_queue_rq(struct request *rq, bool add_front)
 {
-	if (rq->q->mq_ops) {
+	struct request_queue *q = rq->q;
+
+	if (blk_use_mq_path(q)) {
 		blk_mq_add_to_requeue_list(rq, add_front, true);
 		return false;
 	} else {
 		if (add_front)
-			list_add(&rq->queuelist, &rq->q->queue_head);
+			list_add(&rq->queuelist, &q->queue_head);
 		else
-			list_add_tail(&rq->queuelist, &rq->q->queue_head);
+			list_add_tail(&rq->queuelist, &q->queue_head);
 		return true;
 	}
 }
@@ -201,7 +203,7 @@ static bool blk_flush_complete_seq(struct request *rq,
 		BUG_ON(!list_empty(&rq->queuelist));
 		list_del_init(&rq->flush.list);
 		blk_flush_restore_request(rq);
-		if (q->mq_ops)
+		if (blk_use_mq_path(q))
 			blk_mq_end_request(rq, error);
 		else
 			__blk_end_request_all(rq, error);
@@ -224,7 +226,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 	unsigned long flags = 0;
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
 
-	if (q->mq_ops) {
+	if (blk_use_mq_path(q)) {
 		struct blk_mq_hw_ctx *hctx;
 
 		/* release the tag's ownership to the req cloned from */
@@ -240,7 +242,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 	/* account completion of the flush request */
 	fq->flush_running_idx ^= 1;
 
-	if (!q->mq_ops)
+	if (!blk_use_mq_path(q))
 		elv_completed_request(q, flush_rq);
 
 	/* and push the waiting requests to the next stage */
@@ -267,7 +269,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 		blk_run_queue_async(q);
 	}
 	fq->flush_queue_delayed = 0;
-	if (q->mq_ops)
+	if (blk_use_mq_path(q))
 		spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
 }
 
@@ -315,7 +317,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	 * be in flight at the same time. And acquire the tag's
 	 * ownership for flush req.
 	 */
-	if (q->mq_ops) {
+	if (blk_use_mq_path(q)) {
 		struct blk_mq_hw_ctx *hctx;
 
 		flush_rq->mq_ctx = first_rq->mq_ctx;
@@ -409,7 +411,7 @@ void blk_insert_flush(struct request *rq)
 	 * complete the request.
 	 */
 	if (!policy) {
-		if (q->mq_ops)
+		if (blk_use_mq_path(q))
 			blk_mq_end_request(rq, 0);
 		else
 			__blk_end_bidi_request(rq, 0, 0, 0);
@@ -425,9 +427,9 @@ void blk_insert_flush(struct request *rq)
 	 */
 	if ((policy & REQ_FSEQ_DATA) &&
 	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
-		if (q->mq_ops) {
+		if (blk_use_mq_path(q))
 			blk_mq_insert_request(rq, false, false, true);
-		} else
+		else
 			list_add_tail(&rq->queuelist, &q->queue_head);
 		return;
 	}
@@ -440,7 +442,7 @@ void blk_insert_flush(struct request *rq)
 	INIT_LIST_HEAD(&rq->flush.list);
 	rq->rq_flags |= RQF_FLUSH_SEQ;
 	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
-	if (q->mq_ops) {
+	if (blk_use_mq_path(q)) {
 		rq->end_io = mq_flush_data_end_io;
 
 		spin_lock_irq(&fq->mq_flush_lock);
diff --git a/block/blk.h b/block/blk.h
index 041185e5f129..094fc10429c3 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -36,10 +36,20 @@ extern struct kmem_cache *request_cachep;
 extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
+/*
+ * Use the MQ path if we have mq_ops, but not if we are using an IO
+ * scheduler. For the scheduler, we should use the legacy path. Only
+ * for internal use in the block layer.
+ */
+static inline bool blk_use_mq_path(struct request_queue *q)
+{
+	return q->mq_ops && !q->elevator;
+}
+
 static inline struct blk_flush_queue *blk_get_flush_queue(
 		struct request_queue *q, struct blk_mq_ctx *ctx)
 {
-	if (q->mq_ops)
+	if (blk_use_mq_path(q))
 		return blk_mq_map_queue(q, ctx->cpu)->fq;
 	return q->fq;
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-03  3:15 ` [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler Jens Axboe
@ 2016-12-05 13:05   ` Christoph Hellwig
  2016-12-05 15:07     ` Jens Axboe
  2016-12-05 17:00   ` Ming Lei
  1 sibling, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2016-12-05 13:05 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-block, paolo.valente

On Fri, Dec 02, 2016 at 08:15:15PM -0700, Jens Axboe wrote:
> No functional changes with this patch, it's just in preparation for
> supporting legacy schedulers on blk-mq.

Ewww.  I think without refactoring to clear what 'use_mq_path'
means here and better naming this is a total non-started.  Even with
that we'll now have yet another code path to worry about.  Is there
any chance to instead consolidate into a single path?

>  struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
>  {
> -	if (q->mq_ops)
> +	if (blk_use_mq_path(q))
>  		return blk_mq_alloc_request(q, rw,
>  			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
>  				0 : BLK_MQ_REQ_NOWAIT);

So now with blk-mq and an elevator set we go into blk_old_get_request,
hich will simply allocate new requests.  How does this not break
every existing driver?

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-05 13:05   ` Christoph Hellwig
@ 2016-12-05 15:07     ` Jens Axboe
  2016-12-05 15:49       ` Johannes Thumshirn
  2016-12-05 22:40       ` Mike Snitzer
  0 siblings, 2 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-05 15:07 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: axboe, linux-block, paolo.valente

On 12/05/2016 06:05 AM, Christoph Hellwig wrote:
> On Fri, Dec 02, 2016 at 08:15:15PM -0700, Jens Axboe wrote:
>> No functional changes with this patch, it's just in preparation for
>> supporting legacy schedulers on blk-mq.
> 
> Ewww.  I think without refactoring to clear what 'use_mq_path'
> means here and better naming this is a total non-started.  Even with
> that we'll now have yet another code path to worry about.  Is there
> any chance to instead consolidate into a single path?

It's not pretty at all. I should have prefaced this patchset with saying
that it's an experiment in seeing what it would take to simply use the
old IO schedulers, as a temporary measure, on blk/scsi-mq. I did clean
it up a bit after posting:

http://git.kernel.dk/cgit/linux-block/log/?h=blk-mq-legacy-sched

but I'm not going to claim this is anywhere near merge read, nor clean.

>>  struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
>>  {
>> -	if (q->mq_ops)
>> +	if (blk_use_mq_path(q))
>>  		return blk_mq_alloc_request(q, rw,
>>  			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
>>  				0 : BLK_MQ_REQ_NOWAIT);
> 
> So now with blk-mq and an elevator set we go into blk_old_get_request,
> hich will simply allocate new requests.  How does this not break
> every existing driver?

Since Johannes found that confusion, maybe I should explain how it all
works.

Basically, if we are blk-mq (q->mq_ops) and now have an IO scheduler
attached (q->elevator), then the path from bio to request is essentially
the old one. We allocate a request through get_request() and friends,
and insert it with the elevator interface. When the queue is later run,
our blk_mq_sched_dispatch() asks the IO scheduler for a request, and if
successful, we allocate a real MQ request and dispatch that. So all the
driver ever sees is MQ requests, and it uses the MQ interface. Only the
internal bits now look at blk_use_mq_path() to tell whether they should
alloc+insert with that.

The above will break if we have drivers manually allocating a request
through blk_get_request(), and then using MQ functions to insert/run it.
I didn't audit all of that, and I think most (all?) of them just use the
MQ interfaces. That would also currently be broken, we either/or the
logic to run software queues or run through the elevator.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-05 15:07     ` Jens Axboe
@ 2016-12-05 15:49       ` Johannes Thumshirn
  2016-12-05 15:49         ` Jens Axboe
  2016-12-05 22:40       ` Mike Snitzer
  1 sibling, 1 reply; 33+ messages in thread
From: Johannes Thumshirn @ 2016-12-05 15:49 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, axboe, linux-block, paolo.valente

On Mon, Dec 05, 2016 at 08:07:10AM -0700, Jens Axboe wrote:
> On 12/05/2016 06:05 AM, Christoph Hellwig wrote:
> > On Fri, Dec 02, 2016 at 08:15:15PM -0700, Jens Axboe wrote:
> >> No functional changes with this patch, it's just in preparation for
> >> supporting legacy schedulers on blk-mq.
> > 
> > Ewww.  I think without refactoring to clear what 'use_mq_path'
> > means here and better naming this is a total non-started.  Even with
> > that we'll now have yet another code path to worry about.  Is there
> > any chance to instead consolidate into a single path?
> 
> It's not pretty at all. I should have prefaced this patchset with saying
> that it's an experiment in seeing what it would take to simply use the
> old IO schedulers, as a temporary measure, on blk/scsi-mq. I did clean
> it up a bit after posting:
> 
> http://git.kernel.dk/cgit/linux-block/log/?h=blk-mq-legacy-sched
> 
> but I'm not going to claim this is anywhere near merge read, nor clean.
> 
> >>  struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
> >>  {
> >> -	if (q->mq_ops)
> >> +	if (blk_use_mq_path(q))
> >>  		return blk_mq_alloc_request(q, rw,
> >>  			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
> >>  				0 : BLK_MQ_REQ_NOWAIT);
> > 
> > So now with blk-mq and an elevator set we go into blk_old_get_request,
> > hich will simply allocate new requests.  How does this not break
> > every existing driver?
> 
> Since Johannes found that confusion, maybe I should explain how it all
> works.

To clarify the naming, how about sth. like blk_mq_use_sched() (to align
with blk_mq_sched_dispatch())?


-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nï¿½rnberg
GF: Felix Imendï¿½rffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nï¿½rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-05 15:49       ` Johannes Thumshirn
@ 2016-12-05 15:49         ` Jens Axboe
  0 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-05 15:49 UTC (permalink / raw)
  To: Johannes Thumshirn; +Cc: Christoph Hellwig, axboe, linux-block, paolo.valente

On 12/05/2016 08:49 AM, Johannes Thumshirn wrote:
> On Mon, Dec 05, 2016 at 08:07:10AM -0700, Jens Axboe wrote:
>> On 12/05/2016 06:05 AM, Christoph Hellwig wrote:
>>> On Fri, Dec 02, 2016 at 08:15:15PM -0700, Jens Axboe wrote:
>>>> No functional changes with this patch, it's just in preparation for
>>>> supporting legacy schedulers on blk-mq.
>>>
>>> Ewww.  I think without refactoring to clear what 'use_mq_path'
>>> means here and better naming this is a total non-started.  Even with
>>> that we'll now have yet another code path to worry about.  Is there
>>> any chance to instead consolidate into a single path?
>>
>> It's not pretty at all. I should have prefaced this patchset with saying
>> that it's an experiment in seeing what it would take to simply use the
>> old IO schedulers, as a temporary measure, on blk/scsi-mq. I did clean
>> it up a bit after posting:
>>
>> http://git.kernel.dk/cgit/linux-block/log/?h=blk-mq-legacy-sched
>>
>> but I'm not going to claim this is anywhere near merge read, nor clean.
>>
>>>>  struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
>>>>  {
>>>> -	if (q->mq_ops)
>>>> +	if (blk_use_mq_path(q))
>>>>  		return blk_mq_alloc_request(q, rw,
>>>>  			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
>>>>  				0 : BLK_MQ_REQ_NOWAIT);
>>>
>>> So now with blk-mq and an elevator set we go into blk_old_get_request,
>>> hich will simply allocate new requests.  How does this not break
>>> every existing driver?
>>
>> Since Johannes found that confusion, maybe I should explain how it all
>> works.
> 
> To clarify the naming, how about sth. like blk_mq_use_sched() (to align
> with blk_mq_sched_dispatch())?

Yeah, that is a much better name indeed. I'll make that change.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-05 15:07     ` Jens Axboe
  2016-12-05 15:49       ` Johannes Thumshirn
@ 2016-12-05 22:40       ` Mike Snitzer
  2016-12-05 22:50         ` Jens Axboe
  1 sibling, 1 reply; 33+ messages in thread
From: Mike Snitzer @ 2016-12-05 22:40 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, Jens Axboe, linux-block, paolo.valente,
	device-mapper development

On Mon, Dec 5, 2016 at 10:07 AM, Jens Axboe <axboe@fb.com> wrote:
>
> On 12/05/2016 06:05 AM, Christoph Hellwig wrote:
> > On Fri, Dec 02, 2016 at 08:15:15PM -0700, Jens Axboe wrote:
> >> No functional changes with this patch, it's just in preparation for
> >> supporting legacy schedulers on blk-mq.
> >
> > Ewww.  I think without refactoring to clear what 'use_mq_path'
> > means here and better naming this is a total non-started.  Even with
> > that we'll now have yet another code path to worry about.  Is there
> > any chance to instead consolidate into a single path?
>
> It's not pretty at all. I should have prefaced this patchset with saying
> that it's an experiment in seeing what it would take to simply use the
> old IO schedulers, as a temporary measure, on blk/scsi-mq. I did clean
> it up a bit after posting:
>
> http://git.kernel.dk/cgit/linux-block/log/?h=blk-mq-legacy-sched
>
> but I'm not going to claim this is anywhere near merge read, nor clean.

Nice to see you've lowered your standards...

Maybe now we can revisit this line of work? ;)
http://git.kernel.org/cgit/linux/kernel/git/snitzer/linux.git/log/?h=wip

To fix: https://bugzilla.kernel.org/show_bug.cgi?id=119841

> >>  struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
> >>  {
> >> -    if (q->mq_ops)
> >> +    if (blk_use_mq_path(q))
> >>              return blk_mq_alloc_request(q, rw,
> >>                      (gfp_mask & __GFP_DIRECT_RECLAIM) ?
> >>                              0 : BLK_MQ_REQ_NOWAIT);
> >
> > So now with blk-mq and an elevator set we go into blk_old_get_request,
> > hich will simply allocate new requests.  How does this not break
> > every existing driver?
>
> Since Johannes found that confusion, maybe I should explain how it all
> works.
>
> Basically, if we are blk-mq (q->mq_ops) and now have an IO scheduler
> attached (q->elevator), then the path from bio to request is essentially
> the old one. We allocate a request through get_request() and friends,
> and insert it with the elevator interface. When the queue is later run,
> our blk_mq_sched_dispatch() asks the IO scheduler for a request, and if
> successful, we allocate a real MQ request and dispatch that. So all the
> driver ever sees is MQ requests, and it uses the MQ interface. Only the
> internal bits now look at blk_use_mq_path() to tell whether they should
> alloc+insert with that.
>
> The above will break if we have drivers manually allocating a request
> through blk_get_request(), and then using MQ functions to insert/run it.
> I didn't audit all of that, and I think most (all?) of them just use the
> MQ interfaces. That would also currently be broken, we either/or the
> logic to run software queues or run through the elevator.

I'm not seeing anything in elevator_switch() that would prevent an
elevator from attempting to be used on an mq device with > 1 hw queue.
But I could easily be missing it.

That aside, this patchset has all the makings of a _serious_ problem
for dm-mq multipath (e.g. if dm_mod.use_blk_mq=Y and
dm_mod.dm_mq_nr_hw_queues=1).  There is a bunch of code in
drivers/dm/dm-rq.c that looks at q->mq_ops vs not to determine if mq
is used vs old .request_fn.

I think we really need a way to force an allocated mq request_queue
(with a single hw_queue) to not support this technological terror
you've constructed. (*cough*
http://es.memegenerator.net/instance/58341711 )

In fact I'd prefer there be a mechanism to only allow this if some
opt-in interface is used... or the inverse: dm-mq can allocate a
blk-mq request_requeue that will _never_ support this.

I could be super dense on this line of work.  But what is the point of
all this?  Seems like a really unfortunate distraction that makes the
block code all the more encumbered with fiddley old vs new logic.  So
now we're opening old .request_fn users up to blk-mq-with-scheduler vs
non-blk-mq bugs.

Color me skeptical.  In time, maybe I'll warm to all this but for now
we need a big "OFF!" switch (aside from DEFAULT_MQ_SCHED_NONE, in
request_queue allocation interface).

FWIW, dm-multipath has supported a top-level .request_fn
requeue_queue, legacy elevator and all, stacked on blk-mq queue(s) for
quite a while.  If people _really_ want this you could easily give it
to them by forcing the use of the DM "multipath" target with
multipath's "queue_mode" feature set to "rq".

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-05 22:40       ` Mike Snitzer
@ 2016-12-05 22:50         ` Jens Axboe
  2016-12-06 19:50           ` Mike Snitzer
  0 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2016-12-05 22:50 UTC (permalink / raw)
  To: Mike Snitzer
  Cc: Christoph Hellwig, Jens Axboe, linux-block, paolo.valente,
	device-mapper development

On 12/05/2016 03:40 PM, Mike Snitzer wrote:
> On Mon, Dec 5, 2016 at 10:07 AM, Jens Axboe <axboe@fb.com> wrote:
>>
>> On 12/05/2016 06:05 AM, Christoph Hellwig wrote:
>>> On Fri, Dec 02, 2016 at 08:15:15PM -0700, Jens Axboe wrote:
>>>> No functional changes with this patch, it's just in preparation for
>>>> supporting legacy schedulers on blk-mq.
>>>
>>> Ewww.  I think without refactoring to clear what 'use_mq_path'
>>> means here and better naming this is a total non-started.  Even with
>>> that we'll now have yet another code path to worry about.  Is there
>>> any chance to instead consolidate into a single path?
>>
>> It's not pretty at all. I should have prefaced this patchset with saying
>> that it's an experiment in seeing what it would take to simply use the
>> old IO schedulers, as a temporary measure, on blk/scsi-mq. I did clean
>> it up a bit after posting:
>>
>> http://git.kernel.dk/cgit/linux-block/log/?h=blk-mq-legacy-sched
>>
>> but I'm not going to claim this is anywhere near merge read, nor clean.
> 
> Nice to see you've lowered your standards...
> 
> Maybe now we can revisit this line of work? ;)
> http://git.kernel.org/cgit/linux/kernel/git/snitzer/linux.git/log/?h=wip

I haven't lowered my standards. I thought this posting was pretty clear
- it's an experiment in what supporting legacy schedulers would look
like. As you quote me above, this is NOT proposed for merging, nor do I
consider it anywhere near clean.

> I'm not seeing anything in elevator_switch() that would prevent an
> elevator from attempting to be used on an mq device with > 1 hw queue.
> But I could easily be missing it.

You missed it, it's in blk_mq_sched_init(), called from
elv_iosched_store() when trying to switch (or setup a new) schedulers.

> That aside, this patchset has all the makings of a _serious_ problem
> for dm-mq multipath (e.g. if dm_mod.use_blk_mq=Y and
> dm_mod.dm_mq_nr_hw_queues=1).  There is a bunch of code in
> drivers/dm/dm-rq.c that looks at q->mq_ops vs not to determine if mq
> is used vs old .request_fn.
> 
> I think we really need a way to force an allocated mq request_queue
> (with a single hw_queue) to not support this technological terror
> you've constructed. (*cough*

See BLK_MQ_F_NO_SCHED.

> I could be super dense on this line of work.  But what is the point of
> all this?  Seems like a really unfortunate distraction that makes the
> block code all the more encumbered with fiddley old vs new logic.  So
> now we're opening old .request_fn users up to blk-mq-with-scheduler vs
> non-blk-mq bugs.

See above, it's just an experiment in seeing what this would look like,
how transparent (or not) we could make that.

Don't overthink any of this, and don't start making plans or coming up
with problems on why X or Y would not work with whatever interface
variant of dm. That's jumping the gun.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-05 22:50         ` Jens Axboe
@ 2016-12-06 19:50           ` Mike Snitzer
  0 siblings, 0 replies; 33+ messages in thread
From: Mike Snitzer @ 2016-12-06 19:50 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, Jens Axboe, device-mapper development,
	paolo.valente, linux-block

On Mon, Dec 05 2016 at  5:50pm -0500,
Jens Axboe <axboe@fb.com> wrote:

> On 12/05/2016 03:40 PM, Mike Snitzer wrote:
> > On Mon, Dec 5, 2016 at 10:07 AM, Jens Axboe <axboe@fb.com> wrote:
> >>
> >> On 12/05/2016 06:05 AM, Christoph Hellwig wrote:
> >>> On Fri, Dec 02, 2016 at 08:15:15PM -0700, Jens Axboe wrote:
> >>>> No functional changes with this patch, it's just in preparation for
> >>>> supporting legacy schedulers on blk-mq.
> >>>
> >>> Ewww.  I think without refactoring to clear what 'use_mq_path'
> >>> means here and better naming this is a total non-started.  Even with
> >>> that we'll now have yet another code path to worry about.  Is there
> >>> any chance to instead consolidate into a single path?
> >>
> >> It's not pretty at all. I should have prefaced this patchset with saying
> >> that it's an experiment in seeing what it would take to simply use the
> >> old IO schedulers, as a temporary measure, on blk/scsi-mq. I did clean
> >> it up a bit after posting:
> >>
> >> http://git.kernel.dk/cgit/linux-block/log/?h=blk-mq-legacy-sched
> >>
> >> but I'm not going to claim this is anywhere near merge read, nor clean.
> > 
> > Nice to see you've lowered your standards...
> > 
> > Maybe now we can revisit this line of work? ;)
> > http://git.kernel.org/cgit/linux/kernel/git/snitzer/linux.git/log/?h=wip
> 
> I haven't lowered my standards. I thought this posting was pretty clear
> - it's an experiment in what supporting legacy schedulers would look
> like. As you quote me above, this is NOT proposed for merging, nor do I
> consider it anywhere near clean.

Where'd your sense of humor go?

> > I'm not seeing anything in elevator_switch() that would prevent an
> > elevator from attempting to be used on an mq device with > 1 hw queue.
> > But I could easily be missing it.
> 
> You missed it, it's in blk_mq_sched_init(), called from
> elv_iosched_store() when trying to switch (or setup a new) schedulers.
> 
> > That aside, this patchset has all the makings of a _serious_ problem
> > for dm-mq multipath (e.g. if dm_mod.use_blk_mq=Y and
> > dm_mod.dm_mq_nr_hw_queues=1).  There is a bunch of code in
> > drivers/dm/dm-rq.c that looks at q->mq_ops vs not to determine if mq
> > is used vs old .request_fn.
> > 
> > I think we really need a way to force an allocated mq request_queue
> > (with a single hw_queue) to not support this technological terror
> > you've constructed. (*cough*
> 
> See BLK_MQ_F_NO_SCHED.

Yeap, missed it, thanks.  Reviewing patches via gmail _sucks_ I
should've just looked at your git branch(es) from the start.
 
> > I could be super dense on this line of work.  But what is the point of
> > all this?  Seems like a really unfortunate distraction that makes the
> > block code all the more encumbered with fiddley old vs new logic.  So
> > now we're opening old .request_fn users up to blk-mq-with-scheduler vs
> > non-blk-mq bugs.
> 
> See above, it's just an experiment in seeing what this would look like,
> how transparent (or not) we could make that.

OK, seems not very transparent so far.  But that aside, I'm more curious
on what the goal(s) and/or benefit(s) might be?  I know that before you
were hopeful to eventually eliminate the old .request_fn path in block
core (in favor of blk-mq, once it grew IO scheduling capabilties).

But by tieing blk-mq through to the old request path (and associated IO
schedulers) it certainly complicates getting rid of all the legacy code.

Selfishly, I'm looking forward to eliminating the old .request_fn
request-based code in DM core.  This advance to supporting the old IO
schedulers make that less likely.

> Don't overthink any of this, and don't start making plans or coming up
> with problems on why X or Y would not work with whatever interface
> variant of dm. That's jumping the gun.

Not overthinking.. just thinking ;)  But if this does happen then maybe
I should look to invert the request-based DM core cleanup: remove all
the old .request_fn support and impose the same (namely IO scheduler
enabled DM multipath) via dm_mod.use_blk_mq=Y and
dm_mod.dm_mod.dm_mq_nr_hw_queues=1

Mike

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-03  3:15 ` [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler Jens Axboe
  2016-12-05 13:05   ` Christoph Hellwig
@ 2016-12-05 17:00   ` Ming Lei
  2016-12-05 17:09     ` Jens Axboe
  1 sibling, 1 reply; 33+ messages in thread
From: Ming Lei @ 2016-12-05 17:00 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jens Axboe, linux-block, paolo.valente

On Sat, Dec 3, 2016 at 11:15 AM, Jens Axboe <axboe@fb.com> wrote:
> No functional changes with this patch, it's just in preparation for
> supporting legacy schedulers on blk-mq.
>
> Signed-off-by: Jens Axboe <axboe@fb.com>
> ---
>  block/blk-core.c  |  2 +-
>  block/blk-exec.c  |  2 +-
>  block/blk-flush.c | 26 ++++++++++++++------------
>  block/blk.h       | 12 +++++++++++-
>  4 files changed, 27 insertions(+), 15 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 3f2eb8d80189..0e23589ab3bf 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1310,7 +1310,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
>
>  struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
>  {
> -       if (q->mq_ops)
> +       if (blk_use_mq_path(q))
>                 return blk_mq_alloc_request(q, rw,
>                         (gfp_mask & __GFP_DIRECT_RECLAIM) ?
>                                 0 : BLK_MQ_REQ_NOWAIT);

Another way might be to use mq allocator to allocate rq in case of mq_sched,
such as: just replace mempool_alloc in __get_request() with
blk_mq_alloc_request(), in this way, it should be possible to
avoid one extra rq allocation in blk_mq_sched_dispatch(), and keep mq's benefit
of rq preallocation, which can avoid to hold queue_lock during the
allocation too.

> diff --git a/block/blk-exec.c b/block/blk-exec.c
> index 3ecb00a6cf45..73b8a701ae6d 100644
> --- a/block/blk-exec.c
> +++ b/block/blk-exec.c
> @@ -64,7 +64,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
>          * don't check dying flag for MQ because the request won't
>          * be reused after dying flag is set
>          */
> -       if (q->mq_ops) {
> +       if (blk_use_mq_path(q)) {
>                 blk_mq_insert_request(rq, at_head, true, false);
>                 return;
>         }
> diff --git a/block/blk-flush.c b/block/blk-flush.c
> index 1bdbb3d3e5f5..0b68a1258bdd 100644
> --- a/block/blk-flush.c
> +++ b/block/blk-flush.c
> @@ -133,14 +133,16 @@ static void blk_flush_restore_request(struct request *rq)
>
>  static bool blk_flush_queue_rq(struct request *rq, bool add_front)
>  {
> -       if (rq->q->mq_ops) {
> +       struct request_queue *q = rq->q;
> +
> +       if (blk_use_mq_path(q)) {
>                 blk_mq_add_to_requeue_list(rq, add_front, true);
>                 return false;
>         } else {
>                 if (add_front)
> -                       list_add(&rq->queuelist, &rq->q->queue_head);
> +                       list_add(&rq->queuelist, &q->queue_head);
>                 else
> -                       list_add_tail(&rq->queuelist, &rq->q->queue_head);
> +                       list_add_tail(&rq->queuelist, &q->queue_head);
>                 return true;
>         }
>  }
> @@ -201,7 +203,7 @@ static bool blk_flush_complete_seq(struct request *rq,
>                 BUG_ON(!list_empty(&rq->queuelist));
>                 list_del_init(&rq->flush.list);
>                 blk_flush_restore_request(rq);
> -               if (q->mq_ops)
> +               if (blk_use_mq_path(q))
>                         blk_mq_end_request(rq, error);
>                 else
>                         __blk_end_request_all(rq, error);
> @@ -224,7 +226,7 @@ static void flush_end_io(struct request *flush_rq, int error)
>         unsigned long flags = 0;
>         struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
>
> -       if (q->mq_ops) {
> +       if (blk_use_mq_path(q)) {
>                 struct blk_mq_hw_ctx *hctx;
>
>                 /* release the tag's ownership to the req cloned from */
> @@ -240,7 +242,7 @@ static void flush_end_io(struct request *flush_rq, int error)
>         /* account completion of the flush request */
>         fq->flush_running_idx ^= 1;
>
> -       if (!q->mq_ops)
> +       if (!blk_use_mq_path(q))
>                 elv_completed_request(q, flush_rq);
>
>         /* and push the waiting requests to the next stage */
> @@ -267,7 +269,7 @@ static void flush_end_io(struct request *flush_rq, int error)
>                 blk_run_queue_async(q);
>         }
>         fq->flush_queue_delayed = 0;
> -       if (q->mq_ops)
> +       if (blk_use_mq_path(q))
>                 spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
>  }
>
> @@ -315,7 +317,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
>          * be in flight at the same time. And acquire the tag's
>          * ownership for flush req.
>          */
> -       if (q->mq_ops) {
> +       if (blk_use_mq_path(q)) {
>                 struct blk_mq_hw_ctx *hctx;
>
>                 flush_rq->mq_ctx = first_rq->mq_ctx;
> @@ -409,7 +411,7 @@ void blk_insert_flush(struct request *rq)
>          * complete the request.
>          */
>         if (!policy) {
> -               if (q->mq_ops)
> +               if (blk_use_mq_path(q))
>                         blk_mq_end_request(rq, 0);
>                 else
>                         __blk_end_bidi_request(rq, 0, 0, 0);
> @@ -425,9 +427,9 @@ void blk_insert_flush(struct request *rq)
>          */
>         if ((policy & REQ_FSEQ_DATA) &&
>             !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
> -               if (q->mq_ops) {
> +               if (blk_use_mq_path(q))
>                         blk_mq_insert_request(rq, false, false, true);
> -               } else
> +               else
>                         list_add_tail(&rq->queuelist, &q->queue_head);
>                 return;
>         }
> @@ -440,7 +442,7 @@ void blk_insert_flush(struct request *rq)
>         INIT_LIST_HEAD(&rq->flush.list);
>         rq->rq_flags |= RQF_FLUSH_SEQ;
>         rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
> -       if (q->mq_ops) {
> +       if (blk_use_mq_path(q)) {
>                 rq->end_io = mq_flush_data_end_io;
>
>                 spin_lock_irq(&fq->mq_flush_lock);
> diff --git a/block/blk.h b/block/blk.h
> index 041185e5f129..094fc10429c3 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -36,10 +36,20 @@ extern struct kmem_cache *request_cachep;
>  extern struct kobj_type blk_queue_ktype;
>  extern struct ida blk_queue_ida;
>
> +/*
> + * Use the MQ path if we have mq_ops, but not if we are using an IO
> + * scheduler. For the scheduler, we should use the legacy path. Only
> + * for internal use in the block layer.
> + */
> +static inline bool blk_use_mq_path(struct request_queue *q)
> +{
> +       return q->mq_ops && !q->elevator;
> +}
> +
>  static inline struct blk_flush_queue *blk_get_flush_queue(
>                 struct request_queue *q, struct blk_mq_ctx *ctx)
>  {
> -       if (q->mq_ops)
> +       if (blk_use_mq_path(q))
>                 return blk_mq_map_queue(q, ctx->cpu)->fq;
>         return q->fq;
>  }
> --
> 2.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-block" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-05 17:00   ` Ming Lei
@ 2016-12-05 17:09     ` Jens Axboe
  2016-12-05 19:22       ` Ming Lei
  0 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2016-12-05 17:09 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-block, paolo.valente

On 12/05/2016 10:00 AM, Ming Lei wrote:
> On Sat, Dec 3, 2016 at 11:15 AM, Jens Axboe <axboe@fb.com> wrote:
>> No functional changes with this patch, it's just in preparation for
>> supporting legacy schedulers on blk-mq.
>>
>> Signed-off-by: Jens Axboe <axboe@fb.com>
>> ---
>>  block/blk-core.c  |  2 +-
>>  block/blk-exec.c  |  2 +-
>>  block/blk-flush.c | 26 ++++++++++++++------------
>>  block/blk.h       | 12 +++++++++++-
>>  4 files changed, 27 insertions(+), 15 deletions(-)
>>
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 3f2eb8d80189..0e23589ab3bf 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -1310,7 +1310,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
>>
>>  struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
>>  {
>> -       if (q->mq_ops)
>> +       if (blk_use_mq_path(q))
>>                 return blk_mq_alloc_request(q, rw,
>>                         (gfp_mask & __GFP_DIRECT_RECLAIM) ?
>>                                 0 : BLK_MQ_REQ_NOWAIT);
> 
> Another way might be to use mq allocator to allocate rq in case of mq_sched,
> such as: just replace mempool_alloc in __get_request() with
> blk_mq_alloc_request(), in this way, it should be possible to
> avoid one extra rq allocation in blk_mq_sched_dispatch(), and keep mq's benefit
> of rq preallocation, which can avoid to hold queue_lock during the
> allocation too.

One problem with the MQ rq allocation is that it's tied to the device
queue depth. This is a problem for scheduling, since we want to have a
larger pool of requests that the IO scheduler can use, so that we
actually have something that we can schedule with. This is a non-starter
on QD=1 devices, but it's also a problem for SATA with 31 effectively
usable tags.

That's why I split it in two, so we have the "old" requests that we hand
to the scheduler. I know the 'rq' field copy isn't super pretty, though.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-05 17:09     ` Jens Axboe
@ 2016-12-05 19:22       ` Ming Lei
  2016-12-05 19:35         ` Jens Axboe
  0 siblings, 1 reply; 33+ messages in thread
From: Ming Lei @ 2016-12-05 19:22 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jens Axboe, linux-block, paolo.valente

On Tue, Dec 6, 2016 at 1:09 AM, Jens Axboe <axboe@fb.com> wrote:
> On 12/05/2016 10:00 AM, Ming Lei wrote:
>> On Sat, Dec 3, 2016 at 11:15 AM, Jens Axboe <axboe@fb.com> wrote:
>>> No functional changes with this patch, it's just in preparation for
>>> supporting legacy schedulers on blk-mq.
>>>
>>> Signed-off-by: Jens Axboe <axboe@fb.com>
>>> ---
>>>  block/blk-core.c  |  2 +-
>>>  block/blk-exec.c  |  2 +-
>>>  block/blk-flush.c | 26 ++++++++++++++------------
>>>  block/blk.h       | 12 +++++++++++-
>>>  4 files changed, 27 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/block/blk-core.c b/block/blk-core.c
>>> index 3f2eb8d80189..0e23589ab3bf 100644
>>> --- a/block/blk-core.c
>>> +++ b/block/blk-core.c
>>> @@ -1310,7 +1310,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
>>>
>>>  struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
>>>  {
>>> -       if (q->mq_ops)
>>> +       if (blk_use_mq_path(q))
>>>                 return blk_mq_alloc_request(q, rw,
>>>                         (gfp_mask & __GFP_DIRECT_RECLAIM) ?
>>>                                 0 : BLK_MQ_REQ_NOWAIT);
>>
>> Another way might be to use mq allocator to allocate rq in case of mq_sched,
>> such as: just replace mempool_alloc in __get_request() with
>> blk_mq_alloc_request(), in this way, it should be possible to
>> avoid one extra rq allocation in blk_mq_sched_dispatch(), and keep mq's benefit
>> of rq preallocation, which can avoid to hold queue_lock during the
>> allocation too.
>
> One problem with the MQ rq allocation is that it's tied to the device
> queue depth. This is a problem for scheduling, since we want to have a
> larger pool of requests that the IO scheduler can use, so that we
> actually have something that we can schedule with. This is a non-starter
> on QD=1 devices, but it's also a problem for SATA with 31 effectively
> usable tags.
>
> That's why I split it in two, so we have the "old" requests that we hand
> to the scheduler. I know the 'rq' field copy isn't super pretty, though.

OK, got it, thanks for your explanation.

So could we fall back to mempool_alloc() for allocating rq with mq
size if MQ rq allocator fails? Then in this way the extra rq allocation
in blk_mq_alloc_request() may be killed.


Thanks,
Ming

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler
  2016-12-05 19:22       ` Ming Lei
@ 2016-12-05 19:35         ` Jens Axboe
  0 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-05 19:35 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-block, paolo.valente

On 12/05/2016 12:22 PM, Ming Lei wrote:
> On Tue, Dec 6, 2016 at 1:09 AM, Jens Axboe <axboe@fb.com> wrote:
>> On 12/05/2016 10:00 AM, Ming Lei wrote:
>>> On Sat, Dec 3, 2016 at 11:15 AM, Jens Axboe <axboe@fb.com> wrote:
>>>> No functional changes with this patch, it's just in preparation for
>>>> supporting legacy schedulers on blk-mq.
>>>>
>>>> Signed-off-by: Jens Axboe <axboe@fb.com>
>>>> ---
>>>>  block/blk-core.c  |  2 +-
>>>>  block/blk-exec.c  |  2 +-
>>>>  block/blk-flush.c | 26 ++++++++++++++------------
>>>>  block/blk.h       | 12 +++++++++++-
>>>>  4 files changed, 27 insertions(+), 15 deletions(-)
>>>>
>>>> diff --git a/block/blk-core.c b/block/blk-core.c
>>>> index 3f2eb8d80189..0e23589ab3bf 100644
>>>> --- a/block/blk-core.c
>>>> +++ b/block/blk-core.c
>>>> @@ -1310,7 +1310,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
>>>>
>>>>  struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
>>>>  {
>>>> -       if (q->mq_ops)
>>>> +       if (blk_use_mq_path(q))
>>>>                 return blk_mq_alloc_request(q, rw,
>>>>                         (gfp_mask & __GFP_DIRECT_RECLAIM) ?
>>>>                                 0 : BLK_MQ_REQ_NOWAIT);
>>>
>>> Another way might be to use mq allocator to allocate rq in case of mq_sched,
>>> such as: just replace mempool_alloc in __get_request() with
>>> blk_mq_alloc_request(), in this way, it should be possible to
>>> avoid one extra rq allocation in blk_mq_sched_dispatch(), and keep mq's benefit
>>> of rq preallocation, which can avoid to hold queue_lock during the
>>> allocation too.
>>
>> One problem with the MQ rq allocation is that it's tied to the device
>> queue depth. This is a problem for scheduling, since we want to have a
>> larger pool of requests that the IO scheduler can use, so that we
>> actually have something that we can schedule with. This is a non-starter
>> on QD=1 devices, but it's also a problem for SATA with 31 effectively
>> usable tags.
>>
>> That's why I split it in two, so we have the "old" requests that we hand
>> to the scheduler. I know the 'rq' field copy isn't super pretty, though.
> 
> OK, got it, thanks for your explanation.
> 
> So could we fall back to mempool_alloc() for allocating rq with mq
> size if MQ rq allocator fails? Then in this way the extra rq allocation
> in blk_mq_alloc_request() may be killed.

We could, yes, though I'm not sure it's worth special casing that. The
copy is pretty damn cheap compared to the high costs of going through
the legacy path. And given that, I'd probably prefer to keep it all the
same, regardless or the depth of the device. I don't think the change
would be noticable.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 2/7] cfq-iosched: use appropriate run queue function
  2016-12-03  3:15 [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
  2016-12-03  3:15 ` [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler Jens Axboe
@ 2016-12-03  3:15 ` Jens Axboe
  2016-12-05  8:48   ` Johannes Thumshirn
  2016-12-05 13:06   ` Christoph Hellwig
  2016-12-03  3:15 ` [PATCH 3/7] block: use appropriate queue running functions Jens Axboe
                   ` (5 subsequent siblings)
  7 siblings, 2 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-03  3:15 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: paolo.valente, Jens Axboe

For MQ devices, we have to use other functions to run the queue.
No functional changes in this patch, just a prep patch for
support legacy schedulers on blk-mq.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/cfq-iosched.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c73a6fcaeb9d..d6d454a72bd4 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -919,8 +919,14 @@ static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic)
 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 {
 	if (cfqd->busy_queues) {
+		struct request_queue *q = cfqd->queue;
+
 		cfq_log(cfqd, "schedule dispatch");
-		kblockd_schedule_work(&cfqd->unplug_work);
+
+		if (q->mq_ops)
+			blk_mq_run_hw_queues(q, true);
+		else
+			kblockd_schedule_work(&cfqd->unplug_work);
 	}
 }
 
@@ -4086,6 +4092,16 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	cfq_mark_cfqq_slice_new(cfqq);
 }
 
+static void cfq_run_queue(struct cfq_data *cfqd)
+{
+	struct request_queue *q = cfqd->queue;
+
+	if (q->mq_ops)
+		blk_mq_run_hw_queues(q, true);
+	else
+		__blk_run_queue(q);
+}
+
 /*
  * Called when a new fs request (rq) is added (to cfqq). Check if there's
  * something we should do about it
@@ -4122,7 +4138,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			    cfqd->busy_queues > 1) {
 				cfq_del_timer(cfqd, cfqq);
 				cfq_clear_cfqq_wait_request(cfqq);
-				__blk_run_queue(cfqd->queue);
+				cfq_run_queue(cfqd);
 			} else {
 				cfqg_stats_update_idle_time(cfqq->cfqg);
 				cfq_mark_cfqq_must_dispatch(cfqq);
@@ -4136,7 +4152,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 * this new queue is RT and the current one is BE
 		 */
 		cfq_preempt_queue(cfqd, cfqq);
-		__blk_run_queue(cfqd->queue);
+		cfq_run_queue(cfqd);
 	}
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/7] cfq-iosched: use appropriate run queue function
  2016-12-03  3:15 ` [PATCH 2/7] cfq-iosched: use appropriate run queue function Jens Axboe
@ 2016-12-05  8:48   ` Johannes Thumshirn
  2016-12-05 15:12     ` Jens Axboe
  2016-12-05 13:06   ` Christoph Hellwig
  1 sibling, 1 reply; 33+ messages in thread
From: Johannes Thumshirn @ 2016-12-05  8:48 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-block, paolo.valente

On Fri, Dec 02, 2016 at 08:15:16PM -0700, Jens Axboe wrote:
> For MQ devices, we have to use other functions to run the queue.
> No functional changes in this patch, just a prep patch for
> support legacy schedulers on blk-mq.
> 
> Signed-off-by: Jens Axboe <axboe@fb.com>
> ---
>  block/cfq-iosched.c | 22 +++++++++++++++++++---
>  1 file changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index c73a6fcaeb9d..d6d454a72bd4 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -919,8 +919,14 @@ static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic)
>  static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
>  {
>  	if (cfqd->busy_queues) {
> +		struct request_queue *q = cfqd->queue;
> +
>  		cfq_log(cfqd, "schedule dispatch");
> -		kblockd_schedule_work(&cfqd->unplug_work);
> +
> +		if (q->mq_ops)

You've introduced blk_use_mq_path() in patch 1, so why don't you use it here
as well?

> +			blk_mq_run_hw_queues(q, true);
> +		else
> +			kblockd_schedule_work(&cfqd->unplug_work);
>  	}
>  }
>  
> @@ -4086,6 +4092,16 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
>  	cfq_mark_cfqq_slice_new(cfqq);
>  }
>  
> +static void cfq_run_queue(struct cfq_data *cfqd)
> +{
> +	struct request_queue *q = cfqd->queue;
> +
> +	if (q->mq_ops)

Ditto.

> +		blk_mq_run_hw_queues(q, true);
> +	else
> +		__blk_run_queue(q);
> +}
> +
>  /*
>   * Called when a new fs request (rq) is added (to cfqq). Check if there's
>   * something we should do about it
> @@ -4122,7 +4138,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
>  			    cfqd->busy_queues > 1) {
>  				cfq_del_timer(cfqd, cfqq);
>  				cfq_clear_cfqq_wait_request(cfqq);
> -				__blk_run_queue(cfqd->queue);
> +				cfq_run_queue(cfqd);
>  			} else {
>  				cfqg_stats_update_idle_time(cfqq->cfqg);
>  				cfq_mark_cfqq_must_dispatch(cfqq);
> @@ -4136,7 +4152,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
>  		 * this new queue is RT and the current one is BE
>  		 */
>  		cfq_preempt_queue(cfqd, cfqq);
> -		__blk_run_queue(cfqd->queue);
> +		cfq_run_queue(cfqd);
>  	}
>  }
>  
> -- 
> 2.7.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-block" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nï¿½rnberg
GF: Felix Imendï¿½rffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nï¿½rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/7] cfq-iosched: use appropriate run queue function
  2016-12-05  8:48   ` Johannes Thumshirn
@ 2016-12-05 15:12     ` Jens Axboe
  2016-12-05 15:18       ` Johannes Thumshirn
  0 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2016-12-05 15:12 UTC (permalink / raw)
  To: Johannes Thumshirn; +Cc: axboe, linux-block, paolo.valente

On 12/05/2016 01:48 AM, Johannes Thumshirn wrote:
> On Fri, Dec 02, 2016 at 08:15:16PM -0700, Jens Axboe wrote:
>> For MQ devices, we have to use other functions to run the queue.
>> No functional changes in this patch, just a prep patch for
>> support legacy schedulers on blk-mq.
>>
>> Signed-off-by: Jens Axboe <axboe@fb.com>
>> ---
>>  block/cfq-iosched.c | 22 +++++++++++++++++++---
>>  1 file changed, 19 insertions(+), 3 deletions(-)
>>
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index c73a6fcaeb9d..d6d454a72bd4 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -919,8 +919,14 @@ static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic)
>>  static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
>>  {
>>  	if (cfqd->busy_queues) {
>> +		struct request_queue *q = cfqd->queue;
>> +
>>  		cfq_log(cfqd, "schedule dispatch");
>> -		kblockd_schedule_work(&cfqd->unplug_work);
>> +
>> +		if (q->mq_ops)
> 
> You've introduced blk_use_mq_path() in patch 1, so why don't you use it here
> as well?

There's a difference. q->mq_ops means "this driver is blk-mq",
blk_use_mq_path() checks for ->elevator as well, which means "for this
driver, allocate and insert requests via the legacy IO scheduler path".

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/7] cfq-iosched: use appropriate run queue function
  2016-12-05 15:12     ` Jens Axboe
@ 2016-12-05 15:18       ` Johannes Thumshirn
  0 siblings, 0 replies; 33+ messages in thread
From: Johannes Thumshirn @ 2016-12-05 15:18 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-block, paolo.valente

On Mon, Dec 05, 2016 at 08:12:16AM -0700, Jens Axboe wrote:
> On 12/05/2016 01:48 AM, Johannes Thumshirn wrote:
> > On Fri, Dec 02, 2016 at 08:15:16PM -0700, Jens Axboe wrote:
> >> For MQ devices, we have to use other functions to run the queue.
> >> No functional changes in this patch, just a prep patch for
> >> support legacy schedulers on blk-mq.
> >>
> >> Signed-off-by: Jens Axboe <axboe@fb.com>
> >> ---
> >>  block/cfq-iosched.c | 22 +++++++++++++++++++---
> >>  1 file changed, 19 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> >> index c73a6fcaeb9d..d6d454a72bd4 100644
> >> --- a/block/cfq-iosched.c
> >> +++ b/block/cfq-iosched.c
> >> @@ -919,8 +919,14 @@ static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic)
> >>  static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
> >>  {
> >>  	if (cfqd->busy_queues) {
> >> +		struct request_queue *q = cfqd->queue;
> >> +
> >>  		cfq_log(cfqd, "schedule dispatch");
> >> -		kblockd_schedule_work(&cfqd->unplug_work);
> >> +
> >> +		if (q->mq_ops)
> > 
> > You've introduced blk_use_mq_path() in patch 1, so why don't you use it here
> > as well?
> 
> There's a difference. q->mq_ops means "this driver is blk-mq",
> blk_use_mq_path() checks for ->elevator as well, which means "for this
> driver, allocate and insert requests via the legacy IO scheduler path".

Ah OK, that clarifies it. thanks

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nï¿½rnberg
GF: Felix Imendï¿½rffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nï¿½rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/7] cfq-iosched: use appropriate run queue function
  2016-12-03  3:15 ` [PATCH 2/7] cfq-iosched: use appropriate run queue function Jens Axboe
  2016-12-05  8:48   ` Johannes Thumshirn
@ 2016-12-05 13:06   ` Christoph Hellwig
  2016-12-05 15:08     ` Jens Axboe
  1 sibling, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2016-12-05 13:06 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-block, paolo.valente

On Fri, Dec 02, 2016 at 08:15:16PM -0700, Jens Axboe wrote:
> For MQ devices, we have to use other functions to run the queue.
> No functional changes in this patch, just a prep patch for
> support legacy schedulers on blk-mq.

I don't think supporting CFQ on blk-mq makes any sense.  Even if we
for some reason reuse existing scheduler (something I'm not in favour
except as bringup vehicle) we should limit it to deadline and in the
(hopefully near) future BFQ.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/7] cfq-iosched: use appropriate run queue function
  2016-12-05 13:06   ` Christoph Hellwig
@ 2016-12-05 15:08     ` Jens Axboe
  0 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-05 15:08 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: axboe, linux-block, paolo.valente

On 12/05/2016 06:06 AM, Christoph Hellwig wrote:
> On Fri, Dec 02, 2016 at 08:15:16PM -0700, Jens Axboe wrote:
>> For MQ devices, we have to use other functions to run the queue.
>> No functional changes in this patch, just a prep patch for
>> support legacy schedulers on blk-mq.
> 
> I don't think supporting CFQ on blk-mq makes any sense.  Even if we
> for some reason reuse existing scheduler (something I'm not in favour
> except as bringup vehicle) we should limit it to deadline and in the
> (hopefully near) future BFQ.

Not disagreeing with that. It'd be synthetically limiting it, though, as
there's really nothing preventing CFQ from running under it, with the
slight queue run change here.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 3/7] block: use appropriate queue running functions
  2016-12-03  3:15 [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
  2016-12-03  3:15 ` [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler Jens Axboe
  2016-12-03  3:15 ` [PATCH 2/7] cfq-iosched: use appropriate run queue function Jens Axboe
@ 2016-12-03  3:15 ` Jens Axboe
  2016-12-05  9:01   ` Johannes Thumshirn
  2016-12-05 13:07   ` Christoph Hellwig
  2016-12-03  3:15 ` [PATCH 4/7] blk-mq: blk_account_io_start() takes a bool Jens Axboe
                   ` (4 subsequent siblings)
  7 siblings, 2 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-03  3:15 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: paolo.valente, Jens Axboe

Use MQ variants for MQ, legacy ones for legacy.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c  |  5 ++++-
 block/blk-exec.c  | 10 ++++++++--
 block/blk-flush.c | 14 ++++++++++----
 block/elevator.c  |  5 ++++-
 4 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 0e23589ab3bf..3591f5419509 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -340,7 +340,10 @@ void __blk_run_queue(struct request_queue *q)
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
-	__blk_run_queue_uncond(q);
+	if (WARN_ON_ONCE(q->mq_ops))
+		blk_mq_run_hw_queues(q, true);
+	else
+		__blk_run_queue_uncond(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 73b8a701ae6d..27e4d82564ed 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -80,8 +80,14 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	}
 
 	__elv_add_request(q, rq, where);
-	__blk_run_queue(q);
-	spin_unlock_irq(q->queue_lock);
+
+	if (q->mq_ops) {
+		spin_unlock_irq(q->queue_lock);
+		blk_mq_run_hw_queues(q, false);
+	} else {
+		__blk_run_queue(q);
+		spin_unlock_irq(q->queue_lock);
+	}
 }
 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 0b68a1258bdd..620d69909b8d 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -265,8 +265,10 @@ static void flush_end_io(struct request *flush_rq, int error)
 	 * kblockd.
 	 */
 	if (queued || fq->flush_queue_delayed) {
-		WARN_ON(q->mq_ops);
-		blk_run_queue_async(q);
+		if (q->mq_ops)
+			blk_mq_run_hw_queues(q, true);
+		else
+			blk_run_queue_async(q);
 	}
 	fq->flush_queue_delayed = 0;
 	if (blk_use_mq_path(q))
@@ -346,8 +348,12 @@ static void flush_data_end_io(struct request *rq, int error)
 	 * After populating an empty queue, kick it to avoid stall.  Read
 	 * the comment in flush_end_io().
 	 */
-	if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
-		blk_run_queue_async(q);
+	if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error)) {
+		if (q->mq_ops)
+			blk_mq_run_hw_queues(q, true);
+		else
+			blk_run_queue_async(q);
+	}
 }
 
 static void mq_flush_data_end_io(struct request *rq, int error)
diff --git a/block/elevator.c b/block/elevator.c
index a18a5db274e4..11d2cfee2bc1 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -627,7 +627,10 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 		 *   with anything.  There's no point in delaying queue
 		 *   processing.
 		 */
-		__blk_run_queue(q);
+		if (q->mq_ops)
+			blk_mq_run_hw_queues(q, true);
+		else
+			__blk_run_queue(q);
 		break;
 
 	case ELEVATOR_INSERT_SORT_MERGE:
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/7] block: use appropriate queue running functions
  2016-12-03  3:15 ` [PATCH 3/7] block: use appropriate queue running functions Jens Axboe
@ 2016-12-05  9:01   ` Johannes Thumshirn
  2016-12-05 13:07   ` Christoph Hellwig
  1 sibling, 0 replies; 33+ messages in thread
From: Johannes Thumshirn @ 2016-12-05  9:01 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-block, paolo.valente

On Fri, Dec 02, 2016 at 08:15:17PM -0700, Jens Axboe wrote:
> Use MQ variants for MQ, legacy ones for legacy.
> 
> Signed-off-by: Jens Axboe <axboe@fb.com>
> ---
>  block/blk-core.c  |  5 ++++-
>  block/blk-exec.c  | 10 ++++++++--
>  block/blk-flush.c | 14 ++++++++++----
>  block/elevator.c  |  5 ++++-
>  4 files changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 0e23589ab3bf..3591f5419509 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -340,7 +340,10 @@ void __blk_run_queue(struct request_queue *q)
>  	if (unlikely(blk_queue_stopped(q)))
>  		return;
>  
> -	__blk_run_queue_uncond(q);

I don't quite get the WARN_ON_ONCE() is it for debug purposes? And similarly
blk_use_mq_path() + the other occurences below as well?


-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nï¿½rnberg
GF: Felix Imendï¿½rffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nï¿½rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/7] block: use appropriate queue running functions
  2016-12-03  3:15 ` [PATCH 3/7] block: use appropriate queue running functions Jens Axboe
  2016-12-05  9:01   ` Johannes Thumshirn
@ 2016-12-05 13:07   ` Christoph Hellwig
  2016-12-05 15:10     ` Jens Axboe
  1 sibling, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2016-12-05 13:07 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-block, paolo.valente

On Fri, Dec 02, 2016 at 08:15:17PM -0700, Jens Axboe wrote:
> Use MQ variants for MQ, legacy ones for legacy.
> 
> Signed-off-by: Jens Axboe <axboe@fb.com>
> ---
>  block/blk-core.c  |  5 ++++-
>  block/blk-exec.c  | 10 ++++++++--
>  block/blk-flush.c | 14 ++++++++++----
>  block/elevator.c  |  5 ++++-
>  4 files changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 0e23589ab3bf..3591f5419509 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -340,7 +340,10 @@ void __blk_run_queue(struct request_queue *q)
>  	if (unlikely(blk_queue_stopped(q)))
>  		return;
>  
> -	__blk_run_queue_uncond(q);
> +	if (WARN_ON_ONCE(q->mq_ops))
> +		blk_mq_run_hw_queues(q, true);
> +	else

This looks odd with the WARN_ON..

> +++ b/block/blk-exec.c
> @@ -80,8 +80,14 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
>  	}
>  
>  	__elv_add_request(q, rq, where);
> -	__blk_run_queue(q);
> -	spin_unlock_irq(q->queue_lock);
> +
> +	if (q->mq_ops) {
> +		spin_unlock_irq(q->queue_lock);
> +		blk_mq_run_hw_queues(q, false);
> +	} else {
> +		__blk_run_queue(q);
> +		spin_unlock_irq(q->queue_lock);
> +	}

We already branch out to the blk-mq path earlier in the function.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/7] block: use appropriate queue running functions
  2016-12-05 13:07   ` Christoph Hellwig
@ 2016-12-05 15:10     ` Jens Axboe
  2016-12-05 17:39       ` Jens Axboe
  0 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2016-12-05 15:10 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: axboe, linux-block, paolo.valente

On 12/05/2016 06:07 AM, Christoph Hellwig wrote:
> On Fri, Dec 02, 2016 at 08:15:17PM -0700, Jens Axboe wrote:
>> Use MQ variants for MQ, legacy ones for legacy.
>>
>> Signed-off-by: Jens Axboe <axboe@fb.com>
>> ---
>>  block/blk-core.c  |  5 ++++-
>>  block/blk-exec.c  | 10 ++++++++--
>>  block/blk-flush.c | 14 ++++++++++----
>>  block/elevator.c  |  5 ++++-
>>  4 files changed, 26 insertions(+), 8 deletions(-)
>>
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 0e23589ab3bf..3591f5419509 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -340,7 +340,10 @@ void __blk_run_queue(struct request_queue *q)
>>  	if (unlikely(blk_queue_stopped(q)))
>>  		return;
>>  
>> -	__blk_run_queue_uncond(q);
>> +	if (WARN_ON_ONCE(q->mq_ops))
>> +		blk_mq_run_hw_queues(q, true);
>> +	else
> 
> This looks odd with the WARN_ON..

It's just a debug thing, should go away.

>> +++ b/block/blk-exec.c
>> @@ -80,8 +80,14 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
>>  	}
>>  
>>  	__elv_add_request(q, rq, where);
>> -	__blk_run_queue(q);
>> -	spin_unlock_irq(q->queue_lock);
>> +
>> +	if (q->mq_ops) {
>> +		spin_unlock_irq(q->queue_lock);
>> +		blk_mq_run_hw_queues(q, false);
>> +	} else {
>> +		__blk_run_queue(q);
>> +		spin_unlock_irq(q->queue_lock);
>> +	}
> 
> We already branch out to the blk-mq path earlier in the function.

Ah good point, this is a subset of an earlier branch that also checks
q->elevator.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/7] block: use appropriate queue running functions
  2016-12-05 15:10     ` Jens Axboe
@ 2016-12-05 17:39       ` Jens Axboe
  0 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-05 17:39 UTC (permalink / raw)
  To: Jens Axboe, Christoph Hellwig; +Cc: linux-block, paolo.valente

On 12/05/2016 08:10 AM, Jens Axboe wrote:
> On 12/05/2016 06:07 AM, Christoph Hellwig wrote:
>> On Fri, Dec 02, 2016 at 08:15:17PM -0700, Jens Axboe wrote:
>>> Use MQ variants for MQ, legacy ones for legacy.
>>>
>>> Signed-off-by: Jens Axboe <axboe@fb.com>
>>> ---
>>>  block/blk-core.c  |  5 ++++-
>>>  block/blk-exec.c  | 10 ++++++++--
>>>  block/blk-flush.c | 14 ++++++++++----
>>>  block/elevator.c  |  5 ++++-
>>>  4 files changed, 26 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/block/blk-core.c b/block/blk-core.c
>>> index 0e23589ab3bf..3591f5419509 100644
>>> --- a/block/blk-core.c
>>> +++ b/block/blk-core.c
>>> @@ -340,7 +340,10 @@ void __blk_run_queue(struct request_queue *q)
>>>  	if (unlikely(blk_queue_stopped(q)))
>>>  		return;
>>>  
>>> -	__blk_run_queue_uncond(q);
>>> +	if (WARN_ON_ONCE(q->mq_ops))
>>> +		blk_mq_run_hw_queues(q, true);
>>> +	else
>>
>> This looks odd with the WARN_ON..
> 
> It's just a debug thing, should go away.
> 
>>> +++ b/block/blk-exec.c
>>> @@ -80,8 +80,14 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
>>>  	}
>>>  
>>>  	__elv_add_request(q, rq, where);
>>> -	__blk_run_queue(q);
>>> -	spin_unlock_irq(q->queue_lock);
>>> +
>>> +	if (q->mq_ops) {
>>> +		spin_unlock_irq(q->queue_lock);
>>> +		blk_mq_run_hw_queues(q, false);
>>> +	} else {
>>> +		__blk_run_queue(q);
>>> +		spin_unlock_irq(q->queue_lock);
>>> +	}
>>
>> We already branch out to the blk-mq path earlier in the function.
> 
> Ah good point, this is a subset of an earlier branch that also checks
> q->elevator.

Actually, I take that back - if q->mq_ops && q->elevator, we still get
here, and we should run the queue with the MQ variant. The code was
correct as-is.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 4/7] blk-mq: blk_account_io_start() takes a bool
  2016-12-03  3:15 [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
                   ` (2 preceding siblings ...)
  2016-12-03  3:15 ` [PATCH 3/7] block: use appropriate queue running functions Jens Axboe
@ 2016-12-03  3:15 ` Jens Axboe
  2016-12-05  9:01   ` Johannes Thumshirn
  2016-12-05 13:08   ` Christoph Hellwig
  2016-12-03  3:15 ` [PATCH 5/7] blk-mq: test patch to get legacy IO schedulers working Jens Axboe
                   ` (3 subsequent siblings)
  7 siblings, 2 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-03  3:15 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: paolo.valente, Jens Axboe

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index bac12caece06..90db5b490df9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1237,7 +1237,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
 {
 	init_request_from_bio(rq, bio);
 
-	blk_account_io_start(rq, 1);
+	blk_account_io_start(rq, true);
 }
 
 static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/7] blk-mq: blk_account_io_start() takes a bool
  2016-12-03  3:15 ` [PATCH 4/7] blk-mq: blk_account_io_start() takes a bool Jens Axboe
@ 2016-12-05  9:01   ` Johannes Thumshirn
  2016-12-05 13:08   ` Christoph Hellwig
  1 sibling, 0 replies; 33+ messages in thread
From: Johannes Thumshirn @ 2016-12-05  9:01 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-block, paolo.valente

On Fri, Dec 02, 2016 at 08:15:18PM -0700, Jens Axboe wrote:
> Signed-off-by: Jens Axboe <axboe@fb.com>
> ---

Easy enough,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nï¿½rnberg
GF: Felix Imendï¿½rffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nï¿½rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/7] blk-mq: blk_account_io_start() takes a bool
  2016-12-03  3:15 ` [PATCH 4/7] blk-mq: blk_account_io_start() takes a bool Jens Axboe
  2016-12-05  9:01   ` Johannes Thumshirn
@ 2016-12-05 13:08   ` Christoph Hellwig
  1 sibling, 0 replies; 33+ messages in thread
From: Christoph Hellwig @ 2016-12-05 13:08 UTC (permalink / raw)
  To: Jens Axboe; +Cc: axboe, linux-block, paolo.valente

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 5/7] blk-mq: test patch to get legacy IO schedulers working
  2016-12-03  3:15 [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
                   ` (3 preceding siblings ...)
  2016-12-03  3:15 ` [PATCH 4/7] blk-mq: blk_account_io_start() takes a bool Jens Axboe
@ 2016-12-03  3:15 ` Jens Axboe
  2016-12-03  3:15 ` [PATCH 6/7] block: drop irq+lock when flushing queue plugs Jens Axboe
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-03  3:15 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: paolo.valente, Jens Axboe

With this applied, a single queue blk-mq manage device can use
any of the legacy IO schedulers. The driver has to set
BLK_MQ_F_SQ_SCHED for now, and we default to 'deadline'.

The scheduler defaults to deadline for now. Can be runtime switched,
like the non-mq devices, by echoing something else into
/sys/block/</dev>/queue/scheduler

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |  38 ++++---
 block/blk-merge.c      |   5 +
 block/blk-mq.c         | 290 ++++++++++++++++++++++++++++++++++++++++++++++---
 block/blk-sysfs.c      |   2 +-
 block/blk.h            |   4 +
 block/elevator.c       |  11 +-
 include/linux/blk-mq.h |   1 +
 include/linux/blkdev.h |   2 +
 8 files changed, 316 insertions(+), 37 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 3591f5419509..6c1063aab1a0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1159,6 +1159,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
 	blk_rq_set_rl(rq, rl);
 	rq->cmd_flags = op;
 	rq->rq_flags = rq_flags;
+	if (q->mq_ops)
+		rq->rq_flags |= RQF_MQ_RL;
 
 	/* init elvpriv */
 	if (rq_flags & RQF_ELVPRIV) {
@@ -1246,8 +1248,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
  * Returns ERR_PTR on failure, with @q->queue_lock held.
  * Returns request pointer on success, with @q->queue_lock *not held*.
  */
-static struct request *get_request(struct request_queue *q, unsigned int op,
-		struct bio *bio, gfp_t gfp_mask)
+struct request *get_request(struct request_queue *q, unsigned int op,
+			    struct bio *bio, gfp_t gfp_mask)
 {
 	const bool is_sync = op_is_sync(op);
 	DEFINE_WAIT(wait);
@@ -1430,7 +1432,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 	if (unlikely(!q))
 		return;
 
-	if (q->mq_ops) {
+	if (q->mq_ops && !(req->rq_flags & RQF_MQ_RL)) {
 		blk_mq_free_request(req);
 		return;
 	}
@@ -1466,7 +1468,7 @@ void blk_put_request(struct request *req)
 {
 	struct request_queue *q = req->q;
 
-	if (q->mq_ops)
+	if (q->mq_ops && !(req->rq_flags & RQF_MQ_RL))
 		blk_mq_free_request(req);
 	else {
 		unsigned long flags;
@@ -1556,6 +1558,15 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
 	return true;
 }
 
+struct list_head *blk_get_plug_list(struct request_queue *q,
+				    struct blk_plug *plug)
+{
+	if (!q->mq_ops || q->elevator)
+		return &plug->list;
+
+	return &plug->mq_list;
+}
+
 /**
  * blk_attempt_plug_merge - try to merge with %current's plugged list
  * @q: request_queue new bio is being queued at
@@ -1592,10 +1603,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
 		goto out;
 	*request_count = 0;
 
-	if (q->mq_ops)
-		plug_list = &plug->mq_list;
-	else
-		plug_list = &plug->list;
+	plug_list = blk_get_plug_list(q, plug);
 
 	list_for_each_entry_reverse(rq, plug_list, queuelist) {
 		int el_ret;
@@ -1640,10 +1648,7 @@ unsigned int blk_plug_queued_count(struct request_queue *q)
 	if (!plug)
 		goto out;
 
-	if (q->mq_ops)
-		plug_list = &plug->mq_list;
-	else
-		plug_list = &plug->list;
+	plug_list = blk_get_plug_list(q, plug);
 
 	list_for_each_entry(rq, plug_list, queuelist) {
 		if (rq->q == q)
@@ -3197,7 +3202,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 {
 	trace_block_unplug(q, depth, !from_schedule);
 
-	if (from_schedule)
+	if (q->mq_ops)
+		blk_mq_run_hw_queues(q, true);
+	else if (from_schedule)
 		blk_run_queue_async(q);
 	else
 		__blk_run_queue(q);
@@ -3293,7 +3300,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		 * Short-circuit if @q is dead
 		 */
 		if (unlikely(blk_queue_dying(q))) {
-			__blk_end_request_all(rq, -ENODEV);
+			if (q->mq_ops)
+				blk_mq_end_request(rq, -ENODEV);
+			else
+				__blk_end_request_all(rq, -ENODEV);
 			continue;
 		}
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1002afdfee99..0952e0503aa4 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -754,6 +754,11 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	/* owner-ship of bio passed from next to req */
 	next->bio = NULL;
 	__blk_put_request(q, next);
+
+	/* FIXME: MQ+sched holds a reference */
+	if (q->mq_ops && q->elevator)
+		blk_queue_exit(q);
+
 	return 1;
 }
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 90db5b490df9..335c37787ac7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -821,6 +821,146 @@ static inline unsigned int queued_to_index(unsigned int queued)
 	return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
 }
 
+static void rq_copy(struct request *rq, struct request *src)
+{
+#define FIELD_COPY(dst, src, name)	((dst)->name = (src)->name)
+	FIELD_COPY(rq, src, cpu);
+	FIELD_COPY(rq, src, cmd_type);
+	FIELD_COPY(rq, src, cmd_flags);
+	rq->rq_flags |= (src->rq_flags & (RQF_PREEMPT | RQF_QUIET | RQF_PM | RQF_DONTPREP));
+	rq->rq_flags &= ~RQF_IO_STAT;
+	FIELD_COPY(rq, src, __data_len);
+	FIELD_COPY(rq, src, __sector);
+	FIELD_COPY(rq, src, bio);
+	FIELD_COPY(rq, src, biotail);
+	FIELD_COPY(rq, src, rq_disk);
+	FIELD_COPY(rq, src, part);
+	FIELD_COPY(rq, src, nr_phys_segments);
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+	FIELD_COPY(rq, src, nr_integrity_segments);
+#endif
+	FIELD_COPY(rq, src, ioprio);
+	FIELD_COPY(rq, src, timeout);
+
+	if (src->cmd_type == REQ_TYPE_BLOCK_PC) {
+		FIELD_COPY(rq, src, cmd);
+		FIELD_COPY(rq, src, cmd_len);
+		FIELD_COPY(rq, src, extra_len);
+		FIELD_COPY(rq, src, sense_len);
+		FIELD_COPY(rq, src, resid_len);
+		FIELD_COPY(rq, src, sense);
+		FIELD_COPY(rq, src, retries);
+	}
+
+	src->bio = src->biotail = NULL;
+}
+
+static void sched_rq_end_io(struct request *rq, int error)
+{
+	struct request *sched_rq = rq->end_io_data;
+	struct request_queue *q = rq->q;
+	unsigned long flags;
+
+	FIELD_COPY(sched_rq, rq, resid_len);
+	FIELD_COPY(sched_rq, rq, extra_len);
+	FIELD_COPY(sched_rq, rq, sense_len);
+	FIELD_COPY(sched_rq, rq, errors);
+	FIELD_COPY(sched_rq, rq, retries);
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	blk_finish_request(sched_rq, error);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	blk_mq_free_request(rq);
+	blk_mq_start_stopped_hw_queues(q, true);
+}
+
+/*
+ * Pull off the elevator dispatch list and send it to the driver. Note that
+ * we have to transform the fake requests into real requests
+ */
+static void blk_mq_sched_dispatch(struct blk_mq_hw_ctx *hctx)
+{
+	struct request_queue *q = hctx->queue;
+	struct request *rq, *sched_rq;
+	struct blk_mq_alloc_data alloc_data;
+	struct blk_mq_queue_data bd;
+	int queued = 0, ret;
+
+	if (unlikely(blk_mq_hctx_stopped(hctx)))
+		return;
+
+	hctx->run++;
+
+again:
+	rq = NULL;
+	if (!list_empty(&hctx->dispatch)) {
+		spin_lock_irq(&hctx->lock);
+		if (!list_empty(&hctx->dispatch)) {
+			rq = list_first_entry(&hctx->dispatch, struct request, queuelist);
+			list_del_init(&rq->queuelist);
+		}
+		spin_unlock_irq(&hctx->lock);
+	}
+
+	if (!rq) {
+		alloc_data.q = q;
+		alloc_data.flags = BLK_MQ_REQ_NOWAIT;
+		alloc_data.ctx = blk_mq_get_ctx(q);
+		alloc_data.hctx = hctx;
+
+		rq = __blk_mq_alloc_request(&alloc_data, 0);
+		blk_mq_put_ctx(alloc_data.ctx);
+
+		if (!rq) {
+			blk_mq_stop_hw_queue(hctx);
+			return;
+		}
+
+		spin_lock_irq(q->queue_lock);
+		sched_rq = blk_fetch_request(q);
+		spin_unlock_irq(q->queue_lock);
+
+		if (!sched_rq) {
+			blk_queue_enter_live(q);
+			__blk_mq_free_request(hctx, alloc_data.ctx, rq);
+			return;
+		}
+
+		rq_copy(rq, sched_rq);
+		rq->end_io = sched_rq_end_io;
+		rq->end_io_data = sched_rq;
+	}
+
+	bd.rq = rq;
+	bd.list = NULL;
+	bd.last = true;
+
+	ret = q->mq_ops->queue_rq(hctx, &bd);
+	switch (ret) {
+	case BLK_MQ_RQ_QUEUE_OK:
+		queued++;
+		break;
+	case BLK_MQ_RQ_QUEUE_BUSY:
+		spin_lock_irq(&hctx->lock);
+		list_add_tail(&rq->queuelist, &hctx->dispatch);
+		spin_unlock_irq(&hctx->lock);
+		blk_mq_stop_hw_queue(hctx);
+		break;
+	default:
+		pr_err("blk-mq: bad return on queue: %d\n", ret);
+	case BLK_MQ_RQ_QUEUE_ERROR:
+		rq->errors = -EIO;
+		blk_mq_end_request(rq, rq->errors);
+		break;
+	}
+
+	if (ret != BLK_MQ_RQ_QUEUE_BUSY)
+		goto again;
+
+	hctx->dispatched[queued_to_index(queued)]++;
+}
+
 /*
  * Run this hardware queue, pulling any software queues mapped to it in.
  * Note that this function currently has various problems around ordering
@@ -938,11 +1078,17 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 
 	if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
 		rcu_read_lock();
-		blk_mq_process_rq_list(hctx);
+		if (!hctx->queue->elevator)
+			blk_mq_process_rq_list(hctx);
+		else
+			blk_mq_sched_dispatch(hctx);
 		rcu_read_unlock();
 	} else {
 		srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
-		blk_mq_process_rq_list(hctx);
+		if (!hctx->queue->elevator)
+			blk_mq_process_rq_list(hctx);
+		else
+			blk_mq_sched_dispatch(hctx);
 		srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
 	}
 }
@@ -992,18 +1138,27 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 	kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work);
 }
 
+static inline bool hctx_pending_io(struct request_queue *q,
+				   struct blk_mq_hw_ctx *hctx)
+{
+	/*
+	 * For the pure MQ case, we have pending IO if any of the software
+	 * queues are loaded, or we have residual dispatch. If we have
+	 * an IO scheduler attached, we don't know for sure. So just say
+	 * yes, to ensure the queue runs.
+	 */
+	return blk_mq_hctx_has_pending(hctx) ||
+		!list_empty_careful(&hctx->dispatch) || q->elevator;
+}
+
 void blk_mq_run_hw_queues(struct request_queue *q, bool async)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		if ((!blk_mq_hctx_has_pending(hctx) &&
-		    list_empty_careful(&hctx->dispatch)) ||
-		    blk_mq_hctx_stopped(hctx))
-			continue;
-
-		blk_mq_run_hw_queue(hctx, async);
+		if (hctx_pending_io(q, hctx))
+			blk_mq_run_hw_queue(hctx, async);
 	}
 }
 EXPORT_SYMBOL(blk_mq_run_hw_queues);
@@ -1448,12 +1603,14 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = op_is_sync(bio->bi_opf);
 	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
+	const bool can_merge = !blk_queue_nomerges(q) && bio_mergeable(bio);
 	struct blk_plug *plug;
 	unsigned int request_count = 0;
 	struct blk_mq_alloc_data data;
 	struct request *rq;
 	blk_qc_t cookie;
 	unsigned int wb_acct;
+	int where = ELEVATOR_INSERT_SORT;
 
 	blk_queue_bounce(q, &bio);
 
@@ -1464,18 +1621,64 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio, q->bio_split);
 
-	if (!is_flush_fua && !blk_queue_nomerges(q)) {
+	if (!is_flush_fua && can_merge) {
 		if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
 			return BLK_QC_T_NONE;
 	} else
 		request_count = blk_plug_queued_count(q);
 
+	/*
+	 * Set some defaults - we have just one hardware queue, so
+	 * we don't have to explicitly map it.
+	 */
+	data.hctx = q->queue_hw_ctx[0];
+	data.ctx = NULL;
+
+	if (q->elevator && can_merge) {
+		int el_ret;
+
+		spin_lock_irq(q->queue_lock);
+
+		el_ret = elv_merge(q, &rq, bio);
+		if (el_ret == ELEVATOR_BACK_MERGE) {
+			if (bio_attempt_back_merge(q, rq, bio)) {
+				elv_bio_merged(q, rq, bio);
+				if (!attempt_back_merge(q, rq))
+					elv_merged_request(q, rq, el_ret);
+				goto elv_unlock;
+			}
+		} else if (el_ret == ELEVATOR_FRONT_MERGE) {
+			if (bio_attempt_front_merge(q, rq, bio)) {
+				elv_bio_merged(q, rq, bio);
+				if (!attempt_front_merge(q, rq))
+					elv_merged_request(q, rq, el_ret);
+				goto elv_unlock;
+			}
+		}
+
+		spin_unlock_irq(q->queue_lock);
+	}
+
 	wb_acct = wbt_wait(q->rq_wb, bio, NULL);
 
-	rq = blk_mq_map_request(q, bio, &data);
-	if (unlikely(!rq)) {
-		__wbt_done(q->rq_wb, wb_acct);
-		return BLK_QC_T_NONE;
+	if (!q->elevator) {
+		rq = blk_mq_map_request(q, bio, &data);
+		if (unlikely(!rq)) {
+			__wbt_done(q->rq_wb, wb_acct);
+			return BLK_QC_T_NONE;
+		}
+	} else {
+		blk_queue_enter_live(q);
+		spin_lock_irq(q->queue_lock);
+		rq = get_request(q, bio->bi_opf, bio, GFP_NOIO);
+		if (IS_ERR(rq)) {
+			spin_unlock_irq(q->queue_lock);
+			blk_queue_exit(q);
+			__wbt_done(q->rq_wb, wb_acct);
+			goto elv_unlock;
+		}
+
+		init_request_from_bio(rq, bio);
 	}
 
 	wbt_track(&rq->issue_stat, wb_acct);
@@ -1483,6 +1686,11 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
 
 	if (unlikely(is_flush_fua)) {
+		if (q->elevator) {
+			init_request_from_bio(rq, bio);
+			where = ELEVATOR_INSERT_FLUSH;
+			goto elv_insert;
+		}
 		blk_mq_bio_to_request(rq, bio);
 		blk_insert_flush(rq);
 		goto run_queue;
@@ -1495,6 +1703,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	 */
 	plug = current->plug;
 	if (plug) {
+		struct list_head *plug_list = blk_get_plug_list(q, plug);
 		struct request *last = NULL;
 
 		blk_mq_bio_to_request(rq, bio);
@@ -1503,14 +1712,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 		 * @request_count may become stale because of schedule
 		 * out, so check the list again.
 		 */
-		if (list_empty(&plug->mq_list))
+		if (list_empty(plug_list))
 			request_count = 0;
 		if (!request_count)
 			trace_block_plug(q);
 		else
-			last = list_entry_rq(plug->mq_list.prev);
+			last = list_entry_rq(plug_list->prev);
 
-		blk_mq_put_ctx(data.ctx);
+		if (data.ctx)
+			blk_mq_put_ctx(data.ctx);
 
 		if (request_count >= BLK_MAX_REQUEST_COUNT || (last &&
 		    blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
@@ -1518,10 +1728,21 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 			trace_block_plug(q);
 		}
 
-		list_add_tail(&rq->queuelist, &plug->mq_list);
+		list_add_tail(&rq->queuelist, plug_list);
 		return cookie;
 	}
 
+	if (q->elevator) {
+elv_insert:
+		blk_account_io_start(rq, true);
+		spin_lock_irq(q->queue_lock);
+		__elv_add_request(q, rq, where);
+elv_unlock:
+		spin_unlock_irq(q->queue_lock);
+		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
+		return BLK_QC_T_NONE;
+	}
+
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
 		/*
 		 * For a SYNC request, send it to the hardware immediately. For
@@ -2085,6 +2306,35 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 	blk_mq_sysfs_register(q);
 }
 
+static int blk_sq_sched_init(struct request_queue *q)
+{
+	int ret;
+
+	q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
+	if (!q->fq)
+		goto fail;
+
+	if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
+		goto fail;
+
+	mutex_lock(&q->sysfs_lock);
+	ret = elevator_init(q, "deadline");
+	mutex_unlock(&q->sysfs_lock);
+
+	if (ret) {
+		blk_exit_rl(&q->root_rl);
+		goto fail;
+	}
+
+	q->queue_lock = &q->queue_hw_ctx[0]->lock;
+	printk(KERN_ERR "blk-mq: sq sched init success\n");
+	return 0;
+fail:
+	printk(KERN_ERR "blk-mq: sq sched init failed\n");
+	blk_free_flush_queue(q->fq);
+	return 1;
+}
+
 struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 						  struct request_queue *q)
 {
@@ -2124,9 +2374,13 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 	if (q->nr_hw_queues > 1)
 		blk_queue_make_request(q, blk_mq_make_request);
-	else
+	else {
 		blk_queue_make_request(q, blk_sq_make_request);
 
+		if (set->flags & BLK_MQ_F_SQ_SCHED)
+			blk_sq_sched_init(q);
+	}
+
 	/*
 	 * Do this after blk_queue_make_request() overrides it...
 	 */
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 706b27bd73a1..f3a11d4de4e6 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -896,7 +896,7 @@ int blk_register_queue(struct gendisk *disk)
 
 	blk_wb_init(q);
 
-	if (!q->request_fn)
+	if (!q->elevator)
 		return 0;
 
 	ret = elv_register_queue(q);
diff --git a/block/blk.h b/block/blk.h
index 094fc10429c3..3137ff09725e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -77,6 +77,9 @@ bool __blk_end_bidi_request(struct request *rq, int error,
 			    unsigned int nr_bytes, unsigned int bidi_bytes);
 void blk_freeze_queue(struct request_queue *q);
 
+struct request *get_request(struct request_queue *, unsigned int, struct bio *,
+				gfp_t);
+
 static inline void blk_queue_enter_live(struct request_queue *q)
 {
 	/*
@@ -110,6 +113,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
 			    unsigned int *request_count,
 			    struct request **same_queue_rq);
 unsigned int blk_plug_queued_count(struct request_queue *q);
+struct list_head *blk_get_plug_list(struct request_queue *, struct blk_plug *);
 
 void blk_account_io_start(struct request *req, bool new_io);
 void blk_account_io_completion(struct request *req, unsigned int bytes);
diff --git a/block/elevator.c b/block/elevator.c
index 11d2cfee2bc1..c62974ef1052 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1002,18 +1002,21 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 ssize_t elv_iosched_show(struct request_queue *q, char *name)
 {
 	struct elevator_queue *e = q->elevator;
-	struct elevator_type *elv;
+	struct elevator_type *elv = NULL;
 	struct elevator_type *__e;
 	int len = 0;
 
-	if (!q->elevator || !blk_queue_stackable(q))
+	if (!blk_queue_stackable(q))
 		return sprintf(name, "none\n");
 
-	elv = e->type;
+	if (!q->elevator)
+		len += sprintf(name+len, "[none] ");
+	else
+		elv = e->type;
 
 	spin_lock(&elv_list_lock);
 	list_for_each_entry(__e, &elv_list, list) {
-		if (!strcmp(elv->elevator_name, __e->elevator_name))
+		if (elv && !strcmp(elv->elevator_name, __e->elevator_name))
 			len += sprintf(name+len, "[%s] ", elv->elevator_name);
 		else
 			len += sprintf(name+len, "%s ", __e->elevator_name);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 35a0af5ede6d..485d922b3fe6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -151,6 +151,7 @@ enum {
 	BLK_MQ_F_SG_MERGE	= 1 << 2,
 	BLK_MQ_F_DEFER_ISSUE	= 1 << 4,
 	BLK_MQ_F_BLOCKING	= 1 << 5,
+	BLK_MQ_F_SQ_SCHED	= 1 << 6,
 	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
 	BLK_MQ_F_ALLOC_POLICY_BITS = 1,
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ebeef2b79c5a..a8c580f806cc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -120,6 +120,8 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_HASHED		((__force req_flags_t)(1 << 16))
 /* IO stats tracking on */
 #define RQF_STATS		((__force req_flags_t)(1 << 17))
+/* rl based request on MQ queue */
+#define RQF_MQ_RL		((__force req_flags_t)(1 << 18))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 6/7] block: drop irq+lock when flushing queue plugs
  2016-12-03  3:15 [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
                   ` (4 preceding siblings ...)
  2016-12-03  3:15 ` [PATCH 5/7] blk-mq: test patch to get legacy IO schedulers working Jens Axboe
@ 2016-12-03  3:15 ` Jens Axboe
  2016-12-03  3:15 ` [PATCH 7/7] null_blk: add parameters to ask for mq-sched and write cache Jens Axboe
  2016-12-03  4:02 ` [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
  7 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-03  3:15 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: paolo.valente, Jens Axboe

Not convinced this is a faster approach, and it does look IRQs off
longer than otherwise. With mq+scheduling, it's a problem since
it forces us to offload the queue running. If we get rid of it,
we can run the queue without the queue lock held.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 6c1063aab1a0..80b5259080a9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3197,18 +3197,21 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
  * plugger did not intend it.
  */
 static void queue_unplugged(struct request_queue *q, unsigned int depth,
-			    bool from_schedule)
+			    bool from_schedule, unsigned long flags)
 	__releases(q->queue_lock)
 {
 	trace_block_unplug(q, depth, !from_schedule);
 
-	if (q->mq_ops)
-		blk_mq_run_hw_queues(q, true);
-	else if (from_schedule)
-		blk_run_queue_async(q);
-	else
-		__blk_run_queue(q);
-	spin_unlock(q->queue_lock);
+	if (q->mq_ops) {
+		spin_unlock_irqrestore(q->queue_lock, flags);
+		blk_mq_run_hw_queues(q, from_schedule);
+	} else {
+		if (from_schedule)
+			blk_run_queue_async(q);
+		else
+			__blk_run_queue(q);
+		spin_unlock_irqrestore(q->queue_lock, flags);
+	}
 }
 
 static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
@@ -3276,11 +3279,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 	q = NULL;
 	depth = 0;
 
-	/*
-	 * Save and disable interrupts here, to avoid doing it for every
-	 * queue lock we have to take.
-	 */
-	local_irq_save(flags);
 	while (!list_empty(&list)) {
 		rq = list_entry_rq(list.next);
 		list_del_init(&rq->queuelist);
@@ -3290,10 +3288,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 			 * This drops the queue lock
 			 */
 			if (q)
-				queue_unplugged(q, depth, from_schedule);
+				queue_unplugged(q, depth, from_schedule, flags);
 			q = rq->q;
 			depth = 0;
-			spin_lock(q->queue_lock);
+			spin_lock_irqsave(q->queue_lock, flags);
 		}
 
 		/*
@@ -3322,9 +3320,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 	 * This drops the queue lock
 	 */
 	if (q)
-		queue_unplugged(q, depth, from_schedule);
-
-	local_irq_restore(flags);
+		queue_unplugged(q, depth, from_schedule, flags);
 }
 
 void blk_finish_plug(struct blk_plug *plug)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 7/7] null_blk: add parameters to ask for mq-sched and write cache
  2016-12-03  3:15 [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
                   ` (5 preceding siblings ...)
  2016-12-03  3:15 ` [PATCH 6/7] block: drop irq+lock when flushing queue plugs Jens Axboe
@ 2016-12-03  3:15 ` Jens Axboe
  2016-12-03  4:02 ` [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
  7 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-03  3:15 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: paolo.valente, Jens Axboe

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/null_blk.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 4943ee22716e..f2726936cd1a 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -117,6 +117,14 @@ static bool use_lightnvm;
 module_param(use_lightnvm, bool, S_IRUGO);
 MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
 
+static bool use_sched;
+module_param(use_sched, bool, S_IRUGO);
+MODULE_PARM_DESC(use_sched, "Ask for blk-mq scheduling");
+
+static bool wc;
+module_param(wc, bool, S_IRUGO);
+MODULE_PARM_DESC(use_sched, "Claim volatile write cache");
+
 static int irqmode = NULL_IRQ_SOFTIRQ;
 
 static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -724,6 +732,9 @@ static int null_add_dev(void)
 		nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 		nullb->tag_set.driver_data = nullb;
 
+		if (use_sched)
+			nullb->tag_set.flags |= BLK_MQ_F_SQ_SCHED;
+
 		rv = blk_mq_alloc_tag_set(&nullb->tag_set);
 		if (rv)
 			goto out_cleanup_queues;
@@ -760,6 +771,9 @@ static int null_add_dev(void)
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
 
+	if (wc)
+		blk_queue_write_cache(nullb->q, true, false);
+
 	mutex_lock(&lock);
 	nullb->index = nullb_indexes++;
 	mutex_unlock(&lock);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq
  2016-12-03  3:15 [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
                   ` (6 preceding siblings ...)
  2016-12-03  3:15 ` [PATCH 7/7] null_blk: add parameters to ask for mq-sched and write cache Jens Axboe
@ 2016-12-03  4:02 ` Jens Axboe
  2016-12-03  4:04   ` Jens Axboe
  7 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2016-12-03  4:02 UTC (permalink / raw)
  To: linux-block; +Cc: paolo.valente

On 12/02/2016 08:15 PM, Jens Axboe wrote:
> This is by no means done, but it seems to work well enough that
> I thought I'd send it out for others to take a look at and play
> with.
> 
> Basically this allows blk-mq managed devices to run the legacy
> IO schedulers, unmodified. The only requirement is that the
> blk-mq device has to be single queue for now, though that
> limitation would be rather simple to lift.
> 
> Since this is a debug patch, the default scheduler is deadline.
> You can switch that to the other configured schedulers, as you
> would with non-mq devices. Here's an example of a scsi-mq device
> that is running deadline, and being switched to CFQ online:
> 
> root@leopard:~# cat /sys/block/sda/mq/0/tags 
> nr_tags=31, reserved_tags=0, bits_per_word=4
> nr_free=31, nr_reserved=0
> active_queues=0
> 
> root@leopard:~# cat /sys/block/sda/queue/scheduler 
> noop [deadline] cfq 
> 
> root@leopard:~# echo cfq > /sys/block/sda/queue/scheduler 
> root@leopard:~# cat /sys/block/sda/queue/scheduler 
> noop deadline [cfq] 
> 
> Testing welcome. There's certainly room for improvement here, so
> I'm mostly interested in grave performance issues or crashes, if any.
> 
> Can also be viewed/fetched via git:
> 
> git://git.kernel.dk/linux-block for-4.11/blk-mq-legacy-sched

BTW, didn't include the patch for SCSI. You need the below to enable
scheduling on SCSI devices.

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index aedcec3..47a5c87 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2121,7 +2121,7 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
 	shost->tag_set.queue_depth = shost->can_queue;
 	shost->tag_set.cmd_size = cmd_size;
 	shost->tag_set.numa_node = NUMA_NO_NODE;
-	shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE | BLK_MQ_F_SQ_SCHED;
+	shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
 	shost->tag_set.flags |=
 		BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
 	shost->tag_set.driver_data = shost;

-- 
Jens Axboe

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq
  2016-12-03  4:02 ` [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
@ 2016-12-03  4:04   ` Jens Axboe
  0 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-03  4:04 UTC (permalink / raw)
  To: linux-block; +Cc: paolo.valente

On 12/02/2016 09:02 PM, Jens Axboe wrote:
> On 12/02/2016 08:15 PM, Jens Axboe wrote:
>> This is by no means done, but it seems to work well enough that
>> I thought I'd send it out for others to take a look at and play
>> with.
>>
>> Basically this allows blk-mq managed devices to run the legacy
>> IO schedulers, unmodified. The only requirement is that the
>> blk-mq device has to be single queue for now, though that
>> limitation would be rather simple to lift.
>>
>> Since this is a debug patch, the default scheduler is deadline.
>> You can switch that to the other configured schedulers, as you
>> would with non-mq devices. Here's an example of a scsi-mq device
>> that is running deadline, and being switched to CFQ online:
>>
>> root@leopard:~# cat /sys/block/sda/mq/0/tags 
>> nr_tags=31, reserved_tags=0, bits_per_word=4
>> nr_free=31, nr_reserved=0
>> active_queues=0
>>
>> root@leopard:~# cat /sys/block/sda/queue/scheduler 
>> noop [deadline] cfq 
>>
>> root@leopard:~# echo cfq > /sys/block/sda/queue/scheduler 
>> root@leopard:~# cat /sys/block/sda/queue/scheduler 
>> noop deadline [cfq] 
>>
>> Testing welcome. There's certainly room for improvement here, so
>> I'm mostly interested in grave performance issues or crashes, if any.
>>
>> Can also be viewed/fetched via git:
>>
>> git://git.kernel.dk/linux-block for-4.11/blk-mq-legacy-sched
> 
> BTW, didn't include the patch for SCSI. You need the below to enable
> scheduling on SCSI devices.

Gah, that was reversed, time to put the computer away for the night.
Below is correct.

commit 8eea81e0903fcde1c28044ea66acc4c5c578f553
Author: Jens Axboe <axboe@fb.com>
Date:   Fri Dec 2 21:03:43 2016 -0700

    scsi: enable IO scheduling for scsi-mq
    
    Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 47a5c8783b89..aedcec30ee5f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2121,7 +2121,7 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
 	shost->tag_set.queue_depth = shost->can_queue;
 	shost->tag_set.cmd_size = cmd_size;
 	shost->tag_set.numa_node = NUMA_NO_NODE;
-	shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE | BLK_MQ_F_SQ_SCHED;
 	shost->tag_set.flags |=
 		BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
 	shost->tag_set.driver_data = shost;

-- 
Jens Axboe

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCHSET/RFC v2] Make legacy IO schedulers work with blk-mq
@ 2016-12-05 18:26 Jens Axboe
  2016-12-05 18:27 ` [PATCH 4/7] blk-mq: blk_account_io_start() takes a bool Jens Axboe
  0 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2016-12-05 18:26 UTC (permalink / raw)
  To: axboe, linux-block, linux-kernel; +Cc: paolo.valente

Version 2 of the hack/patchset, that enables blk-mq to use the legacy
IO schedulers with single queue devices. Original posting is here:

https://marc.info/?l=linux-block&m=148073493203664&w=2

You can also found this version in the following git branch:

git://git.kernel.dk/linux-block blk-mq-legacy-sched.2

and new developments/fixes will happen in the 'blk-mq-legacy-sched'
branch.

Changes since v1:

- Remove the default 'deadline' hard wiring, and provide Kconfig
  entries to set the blk-mq scheduler. This now works like for legacy
  devices.

- Rename blk_use_mq_path() to blk_use_sched_path() to make it more
  clear. Suggested by Johannes Thumshirn.

- Fixup a spot where we did not use the accessor function to determine
  what path to use.

- Flush mq software queues, even if IO scheduler managed. This should
  make paths work that are MQ aware, and using only MQ interfaces.

- Cleanup free path of MQ request.

- Account when IO was queued to a hardware context, similarly to the
  regular MQ path.

- Add BLK_MQ_F_NO_SCHED flag, so that drivers can explicitly ask for
  no scheduling on a queue. Add this for NVMe admin queues.

- Kill BLK_MQ_F_SCHED, since we now have Kconfig entries for setting
  the desired IO scheduler.

- Fix issues with live scheduler switching through sysfs. Should now
  be solid, even with lots of IO running on the device.

- Drop null_blk and SCSI changes, not needed anymore.


 block/Kconfig.iosched   |   29 ++++
 block/blk-core.c        |   77 ++++++-----
 block/blk-exec.c        |   12 +
 block/blk-flush.c       |   40 +++--
 block/blk-merge.c       |    5 
 block/blk-mq.c          |  332 +++++++++++++++++++++++++++++++++++++++++++++---
 block/blk-mq.h          |    1 
 block/blk-sysfs.c       |    2 
 block/blk.h             |   16 ++
 block/cfq-iosched.c     |   22 ++-
 block/elevator.c        |  125 ++++++++++++------
 drivers/nvme/host/pci.c |    1 
 include/linux/blk-mq.h  |    1 
 include/linux/blkdev.h  |    2 
 14 files changed, 555 insertions(+), 110 deletions(-)

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 4/7] blk-mq: blk_account_io_start() takes a bool
  2016-12-05 18:26 [PATCHSET/RFC v2] " Jens Axboe
@ 2016-12-05 18:27 ` Jens Axboe
  0 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2016-12-05 18:27 UTC (permalink / raw)
  To: axboe, linux-block, linux-kernel; +Cc: paolo.valente, Jens Axboe

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 block/blk-mq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index bac12caece06..90db5b490df9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1237,7 +1237,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
 {
 	init_request_from_bio(rq, bio);
 
-	blk_account_io_start(rq, 1);
+	blk_account_io_start(rq, true);
 }
 
 static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2016-12-06 19:50 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-12-03  3:15 [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
2016-12-03  3:15 ` [PATCH 1/7] block: use legacy path for flush requests for MQ with a scheduler Jens Axboe
2016-12-05 13:05   ` Christoph Hellwig
2016-12-05 15:07     ` Jens Axboe
2016-12-05 15:49       ` Johannes Thumshirn
2016-12-05 15:49         ` Jens Axboe
2016-12-05 22:40       ` Mike Snitzer
2016-12-05 22:50         ` Jens Axboe
2016-12-06 19:50           ` Mike Snitzer
2016-12-05 17:00   ` Ming Lei
2016-12-05 17:09     ` Jens Axboe
2016-12-05 19:22       ` Ming Lei
2016-12-05 19:35         ` Jens Axboe
2016-12-03  3:15 ` [PATCH 2/7] cfq-iosched: use appropriate run queue function Jens Axboe
2016-12-05  8:48   ` Johannes Thumshirn
2016-12-05 15:12     ` Jens Axboe
2016-12-05 15:18       ` Johannes Thumshirn
2016-12-05 13:06   ` Christoph Hellwig
2016-12-05 15:08     ` Jens Axboe
2016-12-03  3:15 ` [PATCH 3/7] block: use appropriate queue running functions Jens Axboe
2016-12-05  9:01   ` Johannes Thumshirn
2016-12-05 13:07   ` Christoph Hellwig
2016-12-05 15:10     ` Jens Axboe
2016-12-05 17:39       ` Jens Axboe
2016-12-03  3:15 ` [PATCH 4/7] blk-mq: blk_account_io_start() takes a bool Jens Axboe
2016-12-05  9:01   ` Johannes Thumshirn
2016-12-05 13:08   ` Christoph Hellwig
2016-12-03  3:15 ` [PATCH 5/7] blk-mq: test patch to get legacy IO schedulers working Jens Axboe
2016-12-03  3:15 ` [PATCH 6/7] block: drop irq+lock when flushing queue plugs Jens Axboe
2016-12-03  3:15 ` [PATCH 7/7] null_blk: add parameters to ask for mq-sched and write cache Jens Axboe
2016-12-03  4:02 ` [PATCHSET/RFC] Make legacy IO schedulers work with blk-mq Jens Axboe
2016-12-03  4:04   ` Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2016-12-05 18:26 [PATCHSET/RFC v2] " Jens Axboe
2016-12-05 18:27 ` [PATCH 4/7] blk-mq: blk_account_io_start() takes a bool Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).