From: Omar Sandoval <osandov@osandov.com>
To: "Pavel Begunkov (Silence)" <asml.silence@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>,
linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
osandov@fb.com, ming.lei@redhat.com, Hou Tao <houtao1@huawei.com>
Subject: Re: [PATCH 1/1] blk-mq: Fix disabled hybrid polling
Date: Tue, 28 May 2019 11:37:26 -0700 [thread overview]
Message-ID: <20190528183726.GA25022@vader> (raw)
In-Reply-To: <dd30f4d94aa19956ad4500b1177741fd071ec37f.1558791181.git.asml.silence@gmail.com>
On Sat, May 25, 2019 at 04:42:11PM +0300, Pavel Begunkov (Silence) wrote:
> From: Pavel Begunkov <asml.silence@gmail.com>
>
> Commit 4bc6339a583cec650b05 ("block: move blk_stat_add() to
> __blk_mq_end_request()") moved blk_stat_add(), so now it's called after
> blk_update_request(), which zeroes rq->__data_len. Without length,
> blk_stat_add() can't calculate stat bucket and returns error,
> effectively disabling hybrid polling.
I don't see how this patch fixes this problem, am I missing something?
The timing issue seems orthogonal.
> __blk_mq_end_request() is a right place to call blk_stat_add(), as it's
> guaranteed to be called for each request. Yet, calculating time there
> won't provide sufficient accuracy/precision for finer tuned hybrid
> polling, because a path from __blk_mq_complete_request() to
> __blk_mq_end_request() adds unpredictable overhead.
>
> Add io_end_time_ns field in struct request, save time as soon as
> possible (at __blk_mq_complete_request()) and reuse later.
>
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
> block/blk-mq.c | 13 ++++++++++---
> block/blk-stat.c | 4 ++--
> block/blk-stat.h | 2 +-
> include/linux/blkdev.h | 11 +++++++++++
> 4 files changed, 24 insertions(+), 6 deletions(-)
>
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 32b8ad3d341b..8f6b6bfe0ccb 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -330,6 +330,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
> else
> rq->start_time_ns = 0;
> rq->io_start_time_ns = 0;
> + rq->io_end_time_ns = 0;
> rq->nr_phys_segments = 0;
> #if defined(CONFIG_BLK_DEV_INTEGRITY)
> rq->nr_integrity_segments = 0;
> @@ -532,14 +533,17 @@ EXPORT_SYMBOL_GPL(blk_mq_free_request);
>
> inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
> {
> - u64 now = 0;
> + u64 now = rq->io_end_time_ns;
Kyber expects the timestamp passed in to kyber_complete_request() to
include the software overhead. iostat should probably include the
software overhead, too. So, we probably won't be able to avoid calling
ktime_get() twice, once for I/O time and one for the end-to-end time.
> - if (blk_mq_need_time_stamp(rq))
> + /* called directly bypassing __blk_mq_complete_request */
> + if (blk_mq_need_time_stamp(rq) && !now) {
> now = ktime_get_ns();
> + rq->io_end_time_ns = now;
> + }
>
> if (rq->rq_flags & RQF_STATS) {
> blk_mq_poll_stats_start(rq->q);
> - blk_stat_add(rq, now);
> + blk_stat_add(rq);
> }
>
> if (rq->internal_tag != -1)
> @@ -579,6 +583,9 @@ static void __blk_mq_complete_request(struct request *rq)
> bool shared = false;
> int cpu;
>
> + if (blk_mq_need_time_stamp(rq))
> + rq->io_end_time_ns = ktime_get_ns();
> +
> WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
> /*
> * Most of single queue controllers, there is only one irq vector
> diff --git a/block/blk-stat.c b/block/blk-stat.c
> index 940f15d600f8..9b9b30927ea8 100644
> --- a/block/blk-stat.c
> +++ b/block/blk-stat.c
> @@ -48,7 +48,7 @@ void blk_rq_stat_add(struct blk_rq_stat *stat, u64 value)
> stat->nr_samples++;
> }
>
> -void blk_stat_add(struct request *rq, u64 now)
> +void blk_stat_add(struct request *rq)
> {
> struct request_queue *q = rq->q;
> struct blk_stat_callback *cb;
> @@ -56,7 +56,7 @@ void blk_stat_add(struct request *rq, u64 now)
> int bucket;
> u64 value;
>
> - value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
> + value = blk_rq_io_time(rq);
>
> blk_throtl_stat_add(rq, value);
>
> diff --git a/block/blk-stat.h b/block/blk-stat.h
> index 17b47a86eefb..2653818cee36 100644
> --- a/block/blk-stat.h
> +++ b/block/blk-stat.h
> @@ -65,7 +65,7 @@ struct blk_stat_callback {
> struct blk_queue_stats *blk_alloc_queue_stats(void);
> void blk_free_queue_stats(struct blk_queue_stats *);
>
> -void blk_stat_add(struct request *rq, u64 now);
> +void blk_stat_add(struct request *rq);
>
> /* record time/size info in request but not add a callback */
> void blk_stat_enable_accounting(struct request_queue *q);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 592669bcc536..2a8d4b68d707 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -198,6 +198,9 @@ struct request {
> u64 start_time_ns;
> /* Time that I/O was submitted to the device. */
> u64 io_start_time_ns;
> + /* Time that I/O was reported completed by the device. */
> + u64 io_end_time_ns;
> +
>
> #ifdef CONFIG_BLK_WBT
> unsigned short wbt_flags;
> @@ -385,6 +388,14 @@ static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
>
> #endif /* CONFIG_BLK_DEV_ZONED */
>
> +static inline u64 blk_rq_io_time(struct request *rq)
> +{
> + u64 start = rq->io_start_time_ns;
> + u64 end = rq->io_end_time_ns;
> +
> + return (end - start) ? end - start : 0;
I think you meant:
return end >= start ? end - start : 0;
> +}
> +
> struct request_queue {
> /*
> * Together with queue_head for cacheline sharing
> --
> 2.21.0
>
next prev parent reply other threads:[~2019-05-28 18:37 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-25 13:42 [PATCH 1/1] blk-mq: Fix disabled hybrid polling Pavel Begunkov (Silence)
2019-05-28 18:37 ` Omar Sandoval [this message]
2019-05-30 9:19 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190528183726.GA25022@vader \
--to=osandov@osandov.com \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=houtao1@huawei.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=ming.lei@redhat.com \
--cc=osandov@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.