From: Jens Axboe <axboe@fb.com>
To: <axboe@kernel.dk>, <linux-kernel@vger.kernel.org>,
<linux-block@vger.kernel.org>
Cc: <hch@lst.de>, Jens Axboe <axboe@fb.com>
Subject: [PATCH 3/4] blk-mq: implement hybrid poll mode for sync O_DIRECT
Date: Tue, 1 Nov 2016 15:05:24 -0600 [thread overview]
Message-ID: <1478034325-28232-4-git-send-email-axboe@fb.com> (raw)
In-Reply-To: <1478034325-28232-1-git-send-email-axboe@fb.com>
This patch enables a hybrid polling mode. Instead of polling after IO
submission, we can induce an artificial delay, and then poll after that.
For example, if the IO is presumed to complete in 8 usecs from now, we
can sleep for 4 usecs, wake up, and then do our polling. This still puts
a sleep/wakeup cycle in the IO path, but instead of the wakeup happening
after the IO has completed, it'll happen before. With this hybrid
scheme, we can achieve big latency reductions while still using the same
(or less) amount of CPU.
Signed-off-by: Jens Axboe <axboe@fb.com>
---
block/blk-mq.c | 38 ++++++++++++++++++++++++++++++++++++++
block/blk-sysfs.c | 29 +++++++++++++++++++++++++++++
block/blk.h | 1 +
include/linux/blkdev.h | 1 +
4 files changed, 69 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4ef35588c299..caa55bec9411 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -302,6 +302,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
rq->rq_flags = 0;
clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
+ clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
blk_mq_put_tag(hctx, ctx, tag);
blk_queue_exit(q);
}
@@ -2352,11 +2353,48 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
}
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
+static void blk_mq_poll_hybrid_sleep(struct request_queue *q,
+ struct request *rq)
+{
+ struct hrtimer_sleeper hs;
+ ktime_t kt;
+
+ if (!q->poll_nsec || test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags))
+ return;
+
+ set_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
+
+ /*
+ * This will be replaced with the stats tracking code, using
+ * 'avg_completion_time / 2' as the pre-sleep target.
+ */
+ kt = ktime_set(0, q->poll_nsec);
+
+ hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_set_expires(&hs.timer, kt);
+
+ hrtimer_init_sleeper(&hs, current);
+ do {
+ if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags))
+ break;
+ set_current_state(TASK_INTERRUPTIBLE);
+ hrtimer_start_expires(&hs.timer, HRTIMER_MODE_REL);
+ if (hs.task)
+ io_schedule();
+ hrtimer_cancel(&hs.timer);
+ } while (hs.task && !signal_pending(current));
+
+ __set_current_state(TASK_RUNNING);
+ destroy_hrtimer_on_stack(&hs.timer);
+}
+
bool blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
{
struct request_queue *q = hctx->queue;
long state;
+ blk_mq_poll_hybrid_sleep(q, rq);
+
hctx->poll_considered++;
state = current->state;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 5bb4648f434a..467b81c6713c 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -336,6 +336,28 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
return ret;
}
+static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
+{
+ return queue_var_show(q->poll_nsec / 1000, page);
+}
+
+static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
+ size_t count)
+{
+ unsigned long poll_usec;
+ ssize_t ret;
+
+ if (!q->mq_ops || !q->mq_ops->poll)
+ return -EINVAL;
+
+ ret = queue_var_store(&poll_usec, page, count);
+ if (ret < 0)
+ return ret;
+
+ q->poll_nsec = poll_usec * 1000;
+ return ret;
+}
+
static ssize_t queue_poll_show(struct request_queue *q, char *page)
{
return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page);
@@ -562,6 +584,12 @@ static struct queue_sysfs_entry queue_poll_entry = {
.store = queue_poll_store,
};
+static struct queue_sysfs_entry queue_poll_delay_entry = {
+ .attr = {.name = "io_poll_delay", .mode = S_IRUGO | S_IWUSR },
+ .show = queue_poll_delay_show,
+ .store = queue_poll_delay_store,
+};
+
static struct queue_sysfs_entry queue_wc_entry = {
.attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR },
.show = queue_wc_show,
@@ -608,6 +636,7 @@ static struct attribute *default_attrs[] = {
&queue_wc_entry.attr,
&queue_dax_entry.attr,
&queue_stats_entry.attr,
+ &queue_poll_delay_entry.attr,
NULL,
};
diff --git a/block/blk.h b/block/blk.h
index aa132dea598c..041185e5f129 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -111,6 +111,7 @@ void blk_account_io_done(struct request *req);
enum rq_atomic_flags {
REQ_ATOM_COMPLETE = 0,
REQ_ATOM_STARTED,
+ REQ_ATOM_POLL_SLEPT,
};
/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dcd8d6e8801f..6acd220dc3f3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -502,6 +502,7 @@ struct request_queue {
unsigned int request_fn_active;
unsigned int rq_timeout;
+ unsigned int poll_nsec;
struct timer_list timeout;
struct work_struct timeout_work;
struct list_head timeout_list;
--
2.7.4
next prev parent reply other threads:[~2016-11-01 21:06 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-01 21:05 [PATCHSET] block: IO polling improvements Jens Axboe
2016-11-01 21:05 ` [PATCH 1/4] block: add scalable completion tracking of requests Jens Axboe
2016-11-01 22:25 ` Johannes Thumshirn
2016-11-02 5:37 ` Jens Axboe
2016-11-02 14:52 ` Christoph Hellwig
2016-11-02 14:55 ` Jens Axboe
2016-11-02 14:59 ` Christoph Hellwig
2016-11-03 11:17 ` Ming Lei
2016-11-03 13:38 ` Jens Axboe
2016-11-03 14:57 ` Ming Lei
2016-11-03 16:55 ` Jens Axboe
2016-11-04 23:13 ` Ming Lei
2016-11-05 20:49 ` Jens Axboe
2016-11-05 20:59 ` Jens Axboe
2016-11-03 14:10 ` Bart Van Assche
2016-11-03 14:18 ` Jens Axboe
2016-11-01 21:05 ` [PATCH 2/4] block: move poll code to blk-mq Jens Axboe
2016-11-02 14:54 ` Christoph Hellwig
2016-11-01 21:05 ` Jens Axboe [this message]
2016-11-02 14:54 ` [PATCH 3/4] blk-mq: implement hybrid poll mode for sync O_DIRECT Christoph Hellwig
2016-11-03 12:27 ` Ming Lei
2016-11-03 13:41 ` Jens Axboe
2016-11-03 14:01 ` Bart Van Assche
2016-11-03 14:15 ` Jens Axboe
2016-11-01 21:05 ` [PATCH 4/4] blk-mq: make the polling code adaptive Jens Axboe
2016-11-02 14:51 ` [PATCHSET] block: IO polling improvements Christoph Hellwig
2016-11-02 14:54 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1478034325-28232-4-git-send-email-axboe@fb.com \
--to=axboe@fb.com \
--cc=axboe@kernel.dk \
--cc=hch@lst.de \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox