All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Ming Lei <ming.lei@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>,
	"Martin K . Petersen" <martin.petersen@oracle.com>,
	linux-block@vger.kernel.org, linux-nvme@lists.infradead.org,
	linux-scsi@vger.kernel.org
Subject: Re: [PATCH V2 05/13] block: only account passthrough IO from userspace
Date: Tue, 25 Jan 2022 08:19:06 +0100	[thread overview]
Message-ID: <20220125071906.GA27674@lst.de> (raw)
In-Reply-To: <20220125061634.GA26495@lst.de>

On Tue, Jan 25, 2022 at 07:16:34AM +0100, Christoph Hellwig wrote:
> So why not key off accouning off "rq->bio && rq->bio->bi_bdev"
> and remove the need for the flag and the second half of the assignment
> above?  That is much less error probe and removes code size.

Something like this, lightly tested:

---
From 5499d013341b492899d1fecde7680ff8ebd232e9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 Jan 2022 07:29:06 +0100
Subject: block: remove the part field from struct request

All file system I/O and most userspace passthrough bios have bi_bdev set.
Switch I/O accounting to directly use the bio and stop copying it into a
separate struct request field.

This changes behavior in that e.g. /dev/sgX requests are not accounted
to the gendisk for the SCSI disk any more, which is the correct thing to
do as they never went through that gendisk, and fixes a potential race
when the disk driver is unbound while /dev/sgX I/O is in progress.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-merge.c      | 12 ++++++------
 block/blk-mq.c         | 32 +++++++++++++-------------------
 block/blk.h            |  6 +++---
 include/linux/blk-mq.h |  1 -
 4 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 4de34a332c9fd..43e46ea2f0152 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -739,11 +739,11 @@ void blk_rq_set_mixed_merge(struct request *rq)
 
 static void blk_account_io_merge_request(struct request *req)
 {
-	if (blk_do_io_stat(req)) {
-		part_stat_lock();
-		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
-		part_stat_unlock();
-	}
+	if (!blk_do_io_stat(req))
+		return;
+	part_stat_lock();
+	part_stat_inc(req->bio->bi_bdev, merges[op_stat_group(req_op(req))]);
+	part_stat_unlock();
 }
 
 static enum elv_merge blk_try_req_merge(struct request *req,
@@ -947,7 +947,7 @@ static void blk_account_io_merge_bio(struct request *req)
 		return;
 
 	part_stat_lock();
-	part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
+	part_stat_inc(req->bio->bi_bdev, merges[op_stat_group(req_op(req))]);
 	part_stat_unlock();
 }
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f3bf3358a3bb2..01b3862347965 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -132,10 +132,12 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv,
 {
 	struct mq_inflight *mi = priv;
 
-	if ((!mi->part->bd_partno || rq->part == mi->part) &&
-	    blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
-		mi->inflight[rq_data_dir(rq)]++;
+	if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
+		return true;
+	if (mi->part->bd_partno && rq->bio && rq->bio->bi_bdev != mi->part)
+		return true;
 
+	mi->inflight[rq_data_dir(rq)]++;
 	return true;
 }
 
@@ -331,7 +333,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	rq->tag = BLK_MQ_NO_TAG;
 	rq->internal_tag = BLK_MQ_NO_TAG;
 	rq->start_time_ns = ktime_get_ns();
-	rq->part = NULL;
 	blk_crypto_rq_set_defaults(rq);
 }
 EXPORT_SYMBOL(blk_rq_init);
@@ -368,7 +369,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 		rq->start_time_ns = ktime_get_ns();
 	else
 		rq->start_time_ns = 0;
-	rq->part = NULL;
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
 	rq->alloc_time_ns = alloc_time_ns;
 #endif
@@ -687,11 +687,11 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
-	if (req->part && blk_do_io_stat(req)) {
+	if (blk_do_io_stat(req)) {
 		const int sgrp = op_stat_group(req_op(req));
 
 		part_stat_lock();
-		part_stat_add(req->part, sectors[sgrp], bytes >> 9);
+		part_stat_add(req->bio->bi_bdev, sectors[sgrp], bytes >> 9);
 		part_stat_unlock();
 	}
 }
@@ -859,11 +859,12 @@ EXPORT_SYMBOL_GPL(blk_update_request);
 static void __blk_account_io_done(struct request *req, u64 now)
 {
 	const int sgrp = op_stat_group(req_op(req));
+	struct block_device *bdev = req->bio->bi_bdev;
 
 	part_stat_lock();
-	update_io_ticks(req->part, jiffies, true);
-	part_stat_inc(req->part, ios[sgrp]);
-	part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
+	update_io_ticks(bdev, jiffies, true);
+	part_stat_inc(bdev, ios[sgrp]);
+	part_stat_add(bdev, nsecs[sgrp], now - req->start_time_ns);
 	part_stat_unlock();
 }
 
@@ -874,21 +875,14 @@ static inline void blk_account_io_done(struct request *req, u64 now)
 	 * normal IO on queueing nor completion.  Accounting the
 	 * containing request is enough.
 	 */
-	if (blk_do_io_stat(req) && req->part &&
-	    !(req->rq_flags & RQF_FLUSH_SEQ))
+	if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ))
 		__blk_account_io_done(req, now);
 }
 
 static void __blk_account_io_start(struct request *rq)
 {
-	/* passthrough requests can hold bios that do not have ->bi_bdev set */
-	if (rq->bio && rq->bio->bi_bdev)
-		rq->part = rq->bio->bi_bdev;
-	else if (rq->q->disk)
-		rq->part = rq->q->disk->part0;
-
 	part_stat_lock();
-	update_io_ticks(rq->part, jiffies, false);
+	update_io_ticks(rq->bio->bi_bdev, jiffies, false);
 	part_stat_unlock();
 }
 
diff --git a/block/blk.h b/block/blk.h
index 8bd43b3ad33d5..a7a5a5435e09d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -320,12 +320,12 @@ int blk_dev_init(void);
 /*
  * Contribute to IO statistics IFF:
  *
- *	a) it's attached to a gendisk, and
- *	b) the queue had IO stats enabled when this request was started
+ *	a) the queue had IO stats enabled when this request was started, and
+ *	b) it has an assigned block_device
  */
 static inline bool blk_do_io_stat(struct request *rq)
 {
-	return (rq->rq_flags & RQF_IO_STAT) && rq->q->disk;
+	return (rq->rq_flags & RQF_IO_STAT) && rq->bio && rq->bio->bi_bdev;
 }
 
 void update_io_ticks(struct block_device *part, unsigned long now, bool end);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d319ffa59354a..81769c01e6e4b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -99,7 +99,6 @@ struct request {
 		struct request *rq_next;
 	};
 
-	struct block_device *part;
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
 	/* Time that the first bio started allocating this request. */
 	u64 alloc_time_ns;
-- 
2.30.2


  reply	other threads:[~2022-01-25  7:22 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-22 11:10 [PATCH V2 00/13] block: don't drain file system I/O on del_gendisk Ming Lei
2022-01-22 11:10 ` [PATCH V2 01/13] block: declare blkcg_[init|exit]_queue in private header Ming Lei
2022-01-24 12:59   ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 02/13] block: move initialization of q->blkg_list into blkcg_init_queue Ming Lei
2022-01-24 13:00   ` Christoph Hellwig
2022-01-24 18:32   ` Bart Van Assche
2022-01-22 11:10 ` [PATCH V2 03/13] block: move blkcg initialization/destroy into disk allocation/release handler Ming Lei
2022-01-24 13:02   ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 04/13] block/wbt: fix negative inflight counter when remove scsi device Ming Lei
2022-02-17  7:45   ` Christoph Hellwig
2022-02-17 14:53     ` Jens Axboe
2022-01-22 11:10 ` [PATCH V2 05/13] block: only account passthrough IO from userspace Ming Lei
2022-01-24 13:05   ` Christoph Hellwig
2022-01-24 23:09     ` Ming Lei
2022-01-25  6:16       ` Christoph Hellwig
2022-01-25  7:19         ` Christoph Hellwig [this message]
2022-01-25  8:35           ` Ming Lei
2022-01-25  9:09           ` Ming Lei
2022-01-26  5:50             ` Christoph Hellwig
2022-01-26  7:21               ` Ming Lei
2022-01-26  8:10                 ` Christoph Hellwig
2022-01-26  8:33                   ` Ming Lei
2022-01-26  8:49                     ` Christoph Hellwig
2022-01-26  9:59                       ` Ming Lei
2022-01-26 16:37                         ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 06/13] block: don't remove hctx debugfs dir from blk_mq_exit_queue Ming Lei
2022-01-24 13:06   ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 07/13] block: move q_usage_counter release into blk_queue_release Ming Lei
2022-01-24 13:09   ` Christoph Hellwig
2022-01-24 19:06   ` Bart Van Assche
2022-01-22 11:10 ` [PATCH V2 08/13] block: export __blk_mq_unfreeze_queue Ming Lei
2022-01-24 13:11   ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 09/13] scsi: force unfreezing queue into atomic mode Ming Lei
2022-01-24 13:15   ` Christoph Hellwig
2022-01-24 23:21     ` Ming Lei
2022-01-25  7:27       ` Christoph Hellwig
2022-01-25  8:54         ` Ming Lei
2022-01-26  8:15           ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 10/13] block: add helper of disk_release_queue for release queue data for disk Ming Lei
2022-01-24 13:16   ` Christoph Hellwig
2022-01-24 23:27     ` Ming Lei
2022-01-25  6:17       ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 11/13] block: move blk_exit_queue into disk_release Ming Lei
2022-01-24 13:22   ` Christoph Hellwig
2022-01-24 23:38     ` Ming Lei
2022-01-22 11:10 ` [PATCH V2 12/13] block: move rq_qos_exit() into disk_release() Ming Lei
2022-01-24 13:28   ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 13/13] block: don't drain file system I/O on del_gendisk Ming Lei
2022-01-24 13:39   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220125071906.GA27674@lst.de \
    --to=hch@lst.de \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=ming.lei@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.