From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, Ming Lei <ming.lei@redhat.com>,
Alan Stern <stern@rowland.harvard.edu>,
Christoph Hellwig <hch@lst.de>,
Bart Van Assche <bart.vanassche@wdc.com>,
Jianchao Wang <jianchao.w.wang@oracle.com>,
Hannes Reinecke <hare@suse.de>,
Johannes Thumshirn <jthumshirn@suse.de>,
Adrian Hunter <adrian.hunter@intel.com>,
"James E.J. Bottomley" <jejb@linux.vnet.ibm.com>,
"Martin K. Petersen" <martin.petersen@oracle.com>,
linux-scsi@vger.kernel.org
Subject: [RFC PATCH V2 16/17] block: simplify runtime PM support
Date: Sat, 11 Aug 2018 15:12:19 +0800 [thread overview]
Message-ID: <20180811071220.357-17-ming.lei@redhat.com> (raw)
In-Reply-To: <20180811071220.357-1-ming.lei@redhat.com>
This patch simplifies runtime PM support by the following approach:
1) resume device in blk_queue_enter() if this device is
runtime-suspended or runtime-suspending
2) freeze queue in blk_pre_runtime_suspend()
3) unfreeze queue in blk_pre_runtime_resume()
4) remove checking on RRF_PM because now we requires out-of-band PM
request to resume device
5) introduce blk_unfreeze_queue_lock() and blk_freeze_queue_lock()
so that both runtime-PM and system-PM can use them to freeze/unfreeze
queue and avoid freeze & unfreeze mismatch
Then we can remove blk_pm_allow_request(), and more importantly this way
can be applied to blk-mq path too.
Finally the IO queue associated with scsi_device is kept as runtime
resumed in __scsi_execute() when sending non-PM RQF_REQUEST, and this
way makes sure that the LUN is active for handling non-PM RQF_PREEMPT.
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-core.c | 106 ++++++++++++++++++++++++++++--------------------
block/blk-mq.c | 22 ++++++++++
block/elevator.c | 25 ------------
drivers/scsi/scsi_lib.c | 14 +++++--
include/linux/blk-mq.h | 2 +
include/linux/blkdev.h | 3 ++
6 files changed, 101 insertions(+), 71 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 67d34a43359f..939e1dae4ea8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -890,6 +890,28 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
}
EXPORT_SYMBOL(blk_alloc_queue);
+#ifdef CONFIG_PM
+static void blk_resume_queue(struct request_queue *q)
+{
+ int rpm_status;
+
+ if (!q->dev)
+ return;
+
+ spin_lock_irq(q->queue_lock);
+ rpm_status = q->rpm_status;
+ spin_unlock_irq(q->queue_lock);
+
+ /* PM request needs to be dealt with out of band */
+ if (rpm_status == RPM_SUSPENDED || rpm_status == RPM_SUSPENDING)
+ pm_runtime_resume(q->dev);
+}
+#else
+static void blk_resume_queue(struct request_queue *q)
+{
+}
+#endif
+
/**
* blk_queue_enter() - try to increase q->q_usage_counter
* @q: request queue pointer
@@ -913,11 +935,20 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
*/
smp_rmb();
+ blk_resume_queue(q);
+
wait_event(q->mq_freeze_wq,
atomic_read(&q->mq_freeze_depth) == 0 ||
blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
+
+ /*
+ * This allocation may be blocked via queue freezing before
+ * the queue is suspended, so we have to resume queue again
+ * after waking up.
+ */
+ blk_resume_queue(q);
}
}
@@ -1024,6 +1055,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
q->bypass_depth = 1;
queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
+ mutex_init(&q->freeze_lock);
init_waitqueue_head(&q->mq_freeze_wq);
/*
@@ -1471,6 +1503,23 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
return ERR_PTR(-ENOMEM);
}
+#ifdef CONFIG_PM
+static void blk_pm_add_request(struct request_queue *q)
+{
+ if (q->dev)
+ q->nr_pending++;
+}
+static void blk_pm_put_request(struct request_queue *q)
+{
+ if (q->dev && !--q->nr_pending)
+ pm_runtime_mark_last_busy(q->dev);
+}
+#else
+static inline void blk_pm_put_request(struct request_queue *q) {}
+static inline void blk_pm_add_request(struct request_queue *q){}
+#endif
+
+
/**
* get_request - get a free request
* @q: request_queue to allocate request from
@@ -1499,16 +1548,19 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
rl = blk_get_rl(q, bio); /* transferred to @rq on success */
retry:
+ blk_pm_add_request(q);
rq = __get_request(rl, op, bio, flags, gfp);
if (!IS_ERR(rq))
return rq;
if (op & REQ_NOWAIT) {
+ blk_pm_put_request(q);
blk_put_rl(rl);
return ERR_PTR(-EAGAIN);
}
if ((flags & BLK_MQ_REQ_NOWAIT) || unlikely(blk_queue_dying(q))) {
+ blk_pm_put_request(q);
blk_put_rl(rl);
return rq;
}
@@ -1519,6 +1571,7 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
trace_block_sleeprq(q, bio, op);
+ blk_pm_put_request(q);
spin_unlock_irq(q->queue_lock);
io_schedule();
@@ -1687,16 +1740,6 @@ void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part)
}
EXPORT_SYMBOL_GPL(part_round_stats);
-#ifdef CONFIG_PM
-static void blk_pm_put_request(struct request *rq)
-{
- if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending)
- pm_runtime_mark_last_busy(rq->q->dev);
-}
-#else
-static inline void blk_pm_put_request(struct request *rq) {}
-#endif
-
void __blk_put_request(struct request_queue *q, struct request *req)
{
req_flags_t rq_flags = req->rq_flags;
@@ -1712,7 +1755,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
lockdep_assert_held(q->queue_lock);
blk_req_zone_write_unlock(req);
- blk_pm_put_request(req);
+ blk_pm_put_request(q);
elv_completed_request(q, req);
@@ -2708,30 +2751,6 @@ void blk_account_io_done(struct request *req, u64 now)
}
}
-#ifdef CONFIG_PM
-/*
- * Don't process normal requests when queue is suspended
- * or in the process of suspending/resuming
- */
-static bool blk_pm_allow_request(struct request *rq)
-{
- switch (rq->q->rpm_status) {
- case RPM_RESUMING:
- case RPM_SUSPENDING:
- return rq->rq_flags & RQF_PM;
- case RPM_SUSPENDED:
- return false;
- default:
- return true;
- }
-}
-#else
-static bool blk_pm_allow_request(struct request *rq)
-{
- return true;
-}
-#endif
-
void blk_account_io_start(struct request *rq, bool new_io)
{
struct hd_struct *part;
@@ -2776,13 +2795,8 @@ static struct request *elv_next_request(struct request_queue *q)
WARN_ON_ONCE(q->mq_ops);
while (1) {
- list_for_each_entry(rq, &q->queue_head, queuelist) {
- if (blk_pm_allow_request(rq))
- return rq;
-
- if (rq->rq_flags & RQF_SOFTBARRIER)
- break;
- }
+ list_for_each_entry(rq, &q->queue_head, queuelist)
+ return rq;
/*
* Flush request is running and flush request isn't queueable
@@ -3786,6 +3800,10 @@ int blk_pre_runtime_suspend(struct request_queue *q)
q->rpm_status = RPM_SUSPENDING;
}
spin_unlock_irq(q->queue_lock);
+
+ if (!ret)
+ blk_freeze_queue_lock(q);
+
return ret;
}
EXPORT_SYMBOL(blk_pre_runtime_suspend);
@@ -3863,13 +3881,15 @@ void blk_post_runtime_resume(struct request_queue *q, int err)
spin_lock_irq(q->queue_lock);
if (!err) {
q->rpm_status = RPM_ACTIVE;
- __blk_run_queue(q);
pm_runtime_mark_last_busy(q->dev);
pm_request_autosuspend(q->dev);
} else {
q->rpm_status = RPM_SUSPENDED;
}
spin_unlock_irq(q->queue_lock);
+
+ if (!err)
+ blk_unfreeze_queue_lock(q);
}
EXPORT_SYMBOL(blk_post_runtime_resume);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5226fcf92cbe..aea121c41a30 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -204,6 +204,28 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
+void blk_unfreeze_queue_lock(struct request_queue *q)
+{
+ mutex_lock(&q->freeze_lock);
+ if (q->q_frozen) {
+ blk_mq_unfreeze_queue(q);
+ q->q_frozen = false;
+ }
+ mutex_unlock(&q->freeze_lock);
+}
+EXPORT_SYMBOL(blk_unfreeze_queue_lock);
+
+void blk_freeze_queue_lock(struct request_queue *q)
+{
+ mutex_lock(&q->freeze_lock);
+ if (!q->q_frozen) {
+ blk_mq_freeze_queue(q);
+ q->q_frozen = true;
+ }
+ mutex_unlock(&q->freeze_lock);
+}
+EXPORT_SYMBOL(blk_freeze_queue_lock);
+
/*
* FIXME: replace the scsi_internal_device_*block_nowait() calls in the
* mpt3sas driver such that this function can be removed.
diff --git a/block/elevator.c b/block/elevator.c
index 7438cf285907..4abc424cd5fc 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -557,27 +557,6 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
}
-#ifdef CONFIG_PM
-static void blk_pm_requeue_request(struct request *rq)
-{
- if (rq->q->dev && !(rq->rq_flags & RQF_PM))
- rq->q->nr_pending--;
-}
-
-static void blk_pm_add_request(struct request_queue *q, struct request *rq)
-{
- if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 &&
- (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
- pm_request_resume(q->dev);
-}
-#else
-static inline void blk_pm_requeue_request(struct request *rq) {}
-static inline void blk_pm_add_request(struct request_queue *q,
- struct request *rq)
-{
-}
-#endif
-
void elv_requeue_request(struct request_queue *q, struct request *rq)
{
/*
@@ -592,8 +571,6 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
rq->rq_flags &= ~RQF_STARTED;
- blk_pm_requeue_request(rq);
-
__elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
}
@@ -620,8 +597,6 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
{
trace_block_rq_insert(q, rq);
- blk_pm_add_request(q, rq);
-
rq->q = q;
if (rq->rq_flags & RQF_SOFTBARRIER) {
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 965781e2879c..6284b378a88d 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -278,12 +278,17 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
struct request *req;
struct scsi_request *rq;
int ret = DRIVER_ERROR << 24;
+ bool pm_rq = rq_flags & RQF_PM;
+
+ if (!pm_rq)
+ scsi_autopm_get_device(sdev);
req = blk_get_request(sdev->host->admin_q,
data_direction == DMA_TO_DEVICE ?
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT);
if (IS_ERR(req))
- return ret;
+ goto fail;
+
rq = scsi_req(req);
if (bufflen && blk_rq_map_kern(req->q, req,
@@ -327,6 +332,9 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
atomic_dec(&sdev->nr_admin_pending);
wake_up_all(&sdev->admin_wq);
+ fail:
+ if (!pm_rq)
+ scsi_autopm_put_device(sdev);
return ret;
}
@@ -3132,7 +3140,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
{
int err;
- blk_mq_freeze_queue(sdev->request_queue);
+ blk_freeze_queue_lock(sdev->request_queue);
mutex_lock(&sdev->state_mutex);
err = scsi_device_set_state(sdev, SDEV_QUIESCE);
@@ -3162,7 +3170,7 @@ void scsi_device_resume(struct scsi_device *sdev)
scsi_device_set_state(sdev, SDEV_RUNNING);
mutex_unlock(&sdev->state_mutex);
- blk_mq_unfreeze_queue(sdev->request_queue);
+ blk_unfreeze_queue_lock(sdev->request_queue);
}
EXPORT_SYMBOL(scsi_device_resume);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index afde18ac5b31..00970a0b4b06 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -295,6 +295,8 @@ void blk_freeze_queue_start(struct request_queue *q);
void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout);
+void blk_freeze_queue_lock(struct request_queue *q);
+void blk_unfreeze_queue_lock(struct request_queue *q);
int blk_mq_map_queues(struct blk_mq_tag_set *set);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1bd4f02d11c0..4b2abdccec1f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -635,6 +635,9 @@ struct request_queue {
int bypass_depth;
atomic_t mq_freeze_depth;
+ bool q_frozen;
+ struct mutex freeze_lock;
+
#if defined(CONFIG_BLK_DEV_BSG)
bsg_job_fn *bsg_job_fn;
struct bsg_class_device bsg_dev;
--
2.9.5
next prev parent reply other threads:[~2018-08-11 7:12 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-08-11 7:12 [RFC PATCH V2 00/17] SCSI: introduce per-host admin queue & enable runtime PM Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 01/17] blk-mq: allow to pass default queue flags for creating & initializing queue Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 02/17] blk-mq: convert BLK_MQ_F_NO_SCHED into per-queue flag Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 03/17] block: rename QUEUE_FLAG_NO_SCHED as QUEUE_FLAG_ADMIN Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 04/17] blk-mq: don't reserve tags for admin queue Ming Lei
2018-08-13 10:02 ` jianchao.wang
2018-08-13 10:48 ` Ming Lei
2018-08-14 1:29 ` jianchao.wang
2018-08-14 2:10 ` Ming Lei
2018-08-14 2:47 ` jianchao.wang
2018-08-14 3:06 ` Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 05/17] SCSI: try to retrieve request_queue via 'scsi_cmnd' if possible Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 06/17] SCSI: pass 'scsi_device' instance from 'scsi_request' Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 07/17] SCSI: prepare for introducing admin queue for legacy path Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 08/17] SCSI: pass scsi_device to scsi_mq_prep_fn Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 09/17] SCSI: don't set .queuedata in scsi_mq_alloc_queue() Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 10/17] SCSI: deal with admin queue busy Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 11/17] SCSI: track pending admin commands Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 12/17] SCSI: create admin queue for each host Ming Lei
2018-08-14 5:56 ` jianchao.wang
2018-08-14 6:03 ` jianchao.wang
2018-08-14 11:34 ` Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 13/17] SCSI: use the dedicated admin queue to send admin commands Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 14/17] SCSI: transport_spi: resume a quiesced device Ming Lei
2018-08-11 7:12 ` [RFC PATCH V2 15/17] SCSI: use admin queue to implement queue QUIESCE Ming Lei
2018-08-11 7:12 ` Ming Lei [this message]
2018-08-15 6:39 ` [RFC PATCH V2 16/17] block: simplify runtime PM support jianchao.wang
2018-08-15 8:28 ` Ming Lei
2018-08-15 9:47 ` jianchao.wang
2018-08-15 11:23 ` Ming Lei
2018-08-16 8:26 ` jianchao.wang
2018-08-11 7:12 ` [RFC PATCH V2 17/17] block: enable runtime PM for blk-mq Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180811071220.357-17-ming.lei@redhat.com \
--to=ming.lei@redhat.com \
--cc=adrian.hunter@intel.com \
--cc=axboe@kernel.dk \
--cc=bart.vanassche@wdc.com \
--cc=hare@suse.de \
--cc=hch@lst.de \
--cc=jejb@linux.vnet.ibm.com \
--cc=jianchao.w.wang@oracle.com \
--cc=jthumshirn@suse.de \
--cc=linux-block@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
--cc=stern@rowland.harvard.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).