public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed
From: Chao Leng <lengchao@huawei.com>
To: Sagi Grimberg <sagi@grimberg.me>,
	<linux-nvme@lists.infradead.org>, <linux-block@vger.kernel.org>
Cc: <hch@lst.de>, <kbusch@kernel.org>, <ming.lei@redhat.com>,
	<axboe@kernel.dk>
Subject: Re: [PATCH v2 1/2] blk-mq: add tagset quiesce interface
Date: Fri, 14 Oct 2022 10:09:47 +0800	[thread overview]
Message-ID: <1ffd3e10-eb5b-1edf-b2ab-16ca00643c81@huawei.com> (raw)
In-Reply-To: <99dac305-206c-4e1b-a1ec-50e107258b6b@grimberg.me>



On 2022/10/13 18:28, Sagi Grimberg wrote:
> 
> 
> On 10/13/22 12:44, Chao Leng wrote:
>> Drivers that have shared tagsets may need to quiesce potentially a lot
>> of request queues that all share a single tagset (e.g. nvme). Add an interface
>> to quiesce all the queues on a given tagset. This interface is useful because
>> it can speedup the quiesce by doing it in parallel.
>>
>> For tagsets that have BLK_MQ_F_BLOCKING set, we use call_srcu to all hctxs
>> in parallel such that all of them wait for the same rcu elapsed period with
>> a per-hctx heap allocated rcu_synchronize. for tagsets that don't have
>> BLK_MQ_F_BLOCKING set, we simply call a single synchronize_rcu as this is
>> sufficient.
>>
>> Because some queues never need to be quiesced(e.g. nvme connect_q).
>> So introduce QUEUE_FLAG_NOQUIESCED to tagset quiesce interface to
>> skip the queue.
> 
> I wouldn't say it never nor will ever quiesce, we just don't happen to
> quiesce it today...
> 
>>
>> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
>> Signed-off-by: Chao Leng <lengchao@huawei.com>
>> ---
>>   block/blk-mq.c         | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>   include/linux/blk-mq.h |  2 ++
>>   include/linux/blkdev.h |  2 ++
>>   3 files changed, 79 insertions(+)
>>
>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>> index 8070b6c10e8d..ebe25da08156 100644
>> --- a/block/blk-mq.c
>> +++ b/block/blk-mq.c
>> @@ -29,6 +29,7 @@
>>   #include <linux/prefetch.h>
>>   #include <linux/blk-crypto.h>
>>   #include <linux/part_stat.h>
>> +#include <linux/rcupdate_wait.h>
>>   #include <trace/events/block.h>
>> @@ -311,6 +312,80 @@ void blk_mq_unquiesce_queue(struct request_queue *q)
>>   }
>>   EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
>> +static void blk_mq_quiesce_blocking_tagset(struct blk_mq_tag_set *set)
>> +{
>> +    int i = 0;
>> +    int count = 0;
>> +    struct request_queue *q;
>> +    struct rcu_synchronize *rcu;
>> +
>> +    list_for_each_entry(q, &set->tag_list, tag_set_list) {
>> +        if (blk_queue_noquiesced(q))
>> +            continue;
>> +
>> +        blk_mq_quiesce_queue_nowait(q);
>> +        count++;
>> +    }
>> +
>> +    rcu = kvmalloc(count * sizeof(*rcu), GFP_KERNEL);
>> +    if (rcu) {
>> +        list_for_each_entry(q, &set->tag_list, tag_set_list) {
>> +            if (blk_queue_noquiesced(q))
>> +                continue;
>> +
>> +            init_rcu_head(&rcu[i].head);
>> +            init_completion(&rcu[i].completion);
>> +            call_srcu(q->srcu, &rcu[i].head, wakeme_after_rcu);
>> +            i++;
>> +        }
>> +
>> +        for (i = 0; i < count; i++) {
>> +            wait_for_completion(&rcu[i].completion);
>> +            destroy_rcu_head(&rcu[i].head);
>> +        }
>> +        kvfree(rcu);
>> +    } else {
>> +        list_for_each_entry(q, &set->tag_list, tag_set_list)
>> +            synchronize_srcu(q->srcu);
>> +    }
>> +}
>> +
>> +static void blk_mq_quiesce_nonblocking_tagset(struct blk_mq_tag_set *set)
>> +{
>> +    struct request_queue *q;
>> +
>> +    list_for_each_entry(q, &set->tag_list, tag_set_list) {
>> +        if (blk_queue_noquiesced(q))
>> +            continue;
>> +
>> +        blk_mq_quiesce_queue_nowait(q);
>> +    }
>> +    synchronize_rcu();
>> +}
>> +
>> +void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set)
>> +{
>> +    mutex_lock(&set->tag_list_lock);
>> +    if (set->flags & BLK_MQ_F_BLOCKING)
>> +        blk_mq_quiesce_blocking_tagset(set);
>> +    else
>> +        blk_mq_quiesce_nonblocking_tagset(set);
>> +
>> +    mutex_unlock(&set->tag_list_lock);
>> +}
>> +EXPORT_SYMBOL_GPL(blk_mq_quiesce_tagset);
>> +
>> +void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set)
>> +{
>> +    struct request_queue *q;
>> +
>> +    mutex_lock(&set->tag_list_lock);
>> +    list_for_each_entry(q, &set->tag_list, tag_set_list)
>> +        blk_mq_unquiesce_queue(q);
>> +    mutex_unlock(&set->tag_list_lock);
>> +}
>> +EXPORT_SYMBOL_GPL(blk_mq_unquiesce_tagset);
>> +
>>   void blk_mq_wake_waiters(struct request_queue *q)
>>   {
>>       struct blk_mq_hw_ctx *hctx;
>> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
>> index ba18e9bdb799..1df47606d0a7 100644
>> --- a/include/linux/blk-mq.h
>> +++ b/include/linux/blk-mq.h
>> @@ -877,6 +877,8 @@ void blk_mq_start_hw_queues(struct request_queue *q);
>>   void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
>>   void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
>>   void blk_mq_quiesce_queue(struct request_queue *q);
>> +void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set);
>> +void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set);
>>   void blk_mq_wait_quiesce_done(struct request_queue *q);
>>   void blk_mq_unquiesce_queue(struct request_queue *q);
>>   void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 50e358a19d98..f15544299a67 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -579,6 +579,7 @@ struct request_queue {
>>   #define QUEUE_FLAG_HCTX_ACTIVE    28    /* at least one blk-mq hctx is active */
>>   #define QUEUE_FLAG_NOWAIT       29    /* device supports NOWAIT */
>>   #define QUEUE_FLAG_SQ_SCHED     30    /* single queue style io dispatch */
>> +#define QUEUE_FLAG_NOQUIESCED    31    /* queue is never quiesced */
> 
> the comment is misleading. If this is truely queue that is never
> quiescing then blk_mq_quiesce_queue() and friends need to skip it.
> 
> I'd call it self_quiesce or something that would reflect that it is
> black-listed from tagset-wide quiesce.
Yes, you are right. I will modify the comment in patch V3.
> .


  reply	other threads:[~2022-10-14  2:09 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-13  9:44 [PATCH v2 0/2] improve nvme quiesce time for large amount of namespaces Chao Leng
2022-10-13  9:44 ` [PATCH v2 1/2] blk-mq: add tagset quiesce interface Chao Leng
2022-10-13 10:28   ` Sagi Grimberg
2022-10-14  2:09     ` Chao Leng [this message]
2022-10-17 13:43     ` Christoph Hellwig
2022-10-18  9:51       ` Chao Leng
2022-10-17 13:39   ` Christoph Hellwig
2022-10-17 13:42     ` Christoph Hellwig
2022-10-18  8:39       ` Sagi Grimberg
2022-10-18  8:55         ` Christoph Hellwig
2022-10-18  9:06           ` Sagi Grimberg
2022-10-18 11:05             ` Christoph Hellwig
2022-10-18  9:52       ` Chao Leng
2022-10-17 15:21     ` Paul E. McKenney
2022-10-17 15:31       ` Christoph Hellwig
2022-10-17 22:41         ` Paul E. McKenney
2022-10-18  5:19           ` Christoph Hellwig
2022-10-19  0:35             ` Ming Lei
2022-10-19  7:15               ` Sagi Grimberg
2022-10-19  7:25                 ` Christoph Hellwig
2022-10-19  7:27                   ` Christoph Hellwig
2022-10-19  7:30                   ` Sagi Grimberg
2022-10-19  7:32                     ` Christoph Hellwig
2022-10-19  7:57                       ` Sagi Grimberg
2022-10-19  8:17                         ` Christoph Hellwig
2022-10-19  8:29                           ` Sagi Grimberg
2022-10-18  9:52       ` Chao Leng
2022-10-18 15:04         ` Paul E. McKenney
2022-10-19  2:39           ` Chao Leng
2022-10-18  9:52     ` Chao Leng
2022-10-13  9:44 ` [PATCH v2 2/2] nvme: use blk_mq_[un]quiesce_tagset Chao Leng
2022-10-13 10:22   ` Sagi Grimberg
2022-10-14  2:09     ` Chao Leng
2022-10-17 13:48     ` Christoph Hellwig
2022-10-13 14:32 ` [PATCH v2 0/2] improve nvme quiesce time for large amount of namespaces Chaitanya Kulkarni
2022-10-14  2:12   ` Chao Leng
2022-10-15  0:30     ` Chaitanya Kulkarni

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1ffd3e10-eb5b-1edf-b2ab-16ca00643c81@huawei.com \
    --to=lengchao@huawei.com \
    --cc=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=ming.lei@redhat.com \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox