public inbox for linux-raid@vger.kernel.org
 help / color / mirror / Atom feed
From: jeffm@suse.com
To: linux-raid@vger.kernel.org, song@kernel.org
Cc: nfbrown@suse.com, colyli@suse.com, Jeff Mahoney <jeffm@suse.com>
Subject: [PATCH] mdraid: fix read/write bytes accounting
Date: Fri,  5 Jun 2020 16:19:53 -0400	[thread overview]
Message-ID: <20200605201953.11098-1-jeffm@suse.com> (raw)

From: Jeff Mahoney <jeffm@suse.com>

The i/o accounting published in /proc/diskstats for mdraid is currently
broken.  md_make_request does the accounting for every bio passed but
when a bio needs to be split, all the split bios are also submitted
through md_make_request, resulting in multiple accounting.

As a result, a quick test on a RAID6 volume displayed the following
behavior:

131072+0 records in
131072+0 records out
67108864 bytes (67 MB, 64 MiB) copied, 13.9227 s, 4.8 MB/s

... shows 131072 sectors read -- 64 MiB.  Correct.

512+0 records in
512+0 records out
67108864 bytes (67 MB, 64 MiB) copied, 0.287365 s, 234 MB/s

... shows 196608 sectors read -- 96 MiB.  With a 64k stripe size, we're
seeing some splitting.

512+0 records in
512+0 records out
536870912 bytes (537 MB, 512 MiB) copied, 0.705004 s, 762 MB/s

... shows 4718624 sectors read -- 2.25 GiB transferred.  Lots of splitting
and a stats explosion.

The fix is to push the accounting into the personality make_request
callbacks so that only the bios actually submitted for I/O will be
accounted.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 drivers/md/md-faulty.c    |  1 +
 drivers/md/md-linear.c    |  1 +
 drivers/md/md-multipath.c |  1 +
 drivers/md/md.c           | 21 +++++++++++++++------
 drivers/md/md.h           |  1 +
 drivers/md/raid0.c        |  1 +
 drivers/md/raid1.c        |  3 +++
 drivers/md/raid10.c       |  3 +++
 drivers/md/raid5.c        |  3 +++
 9 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c
index 50ad4ba86f0e..25e2c72ef920 100644
--- a/drivers/md/md-faulty.c
+++ b/drivers/md/md-faulty.c
@@ -214,6 +214,7 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
 	} else
 		bio_set_dev(bio, conf->rdev->bdev);
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	generic_make_request(bio);
 	return true;
 }
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 26c75c0199fa..ec0dbc0f1d76 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -271,6 +271,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
 		bio = split;
 	}
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	bio_set_dev(bio, tmp_dev->rdev->bdev);
 	bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
 		start_sector + data_offset;
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 152f9e65a226..c3afe3f62a88 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -131,6 +131,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
 	mp_bh->bio.bi_private = mp_bh;
 	mddev_check_writesame(mddev, &mp_bh->bio);
 	mddev_check_write_zeroes(mddev, &mp_bh->bio);
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	generic_make_request(&mp_bh->bio);
 	return true;
 }
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f567f536b529..e8078a16419b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -463,10 +463,24 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
 }
 EXPORT_SYMBOL(md_handle_request);
 
+/*
+ * This is generic_start_io_acct without the inflight tracking.  Since
+ * we can't reliably call generic_end_io_acct, the inflight counter
+ * would also be unreliable and it's better to keep it at 0.
+ */
+void md_io_acct(struct mddev *mddev, int sgrp, unsigned int sectors)
+{
+	part_stat_lock();
+	part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
+	part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
+	part_stat_unlock();
+
+}
+EXPORT_SYMBOL_GPL(md_io_acct);
+
 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
-	const int sgrp = op_stat_group(bio_op(bio));
 	struct mddev *mddev = bio->bi_disk->private_data;
 	unsigned int sectors;
 
@@ -498,11 +512,6 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 
 	md_handle_request(mddev, bio);
 
-	part_stat_lock();
-	part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
-	part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
-	part_stat_unlock();
-
 	return BLK_QC_T_NONE;
 }
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 612814d07d35..5834427f7557 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -741,6 +741,7 @@ extern void mddev_suspend(struct mddev *mddev);
 extern void mddev_resume(struct mddev *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
 				   struct mddev *mddev);
+extern void md_io_acct(struct mddev *mddev, int sgrp, unsigned int sectors);
 
 extern void md_reload_sb(struct mddev *mddev, int raid_disk);
 extern void md_update_sb(struct mddev *mddev, int force);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 322386ff5d22..ac1109d1d48d 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -633,6 +633,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
 				disk_devt(mddev->gendisk), bio_sector);
 	mddev_check_writesame(mddev, bio);
 	mddev_check_write_zeroes(mddev, bio);
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	generic_make_request(bio);
 	return true;
 }
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index dcd27f3da84e..f0d620e2b90f 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1318,6 +1318,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
 		r1_bio->sectors = max_sectors;
 	}
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	r1_bio->read_disk = rdisk;
 
 	read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
@@ -1489,6 +1490,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 		r1_bio->sectors = max_sectors;
 	}
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
+
 	atomic_set(&r1_bio->remaining, 1);
 	atomic_set(&r1_bio->behind_remaining, 0);
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ec136e44aef7..b8e8d7f67f65 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1202,6 +1202,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 	}
 	slot = r10_bio->read_slot;
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
+
 	read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
 
 	r10_bio->devs[slot].bio = read_bio;
@@ -1485,6 +1487,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 		r10_bio->master_bio = bio;
 	}
 
+	md_io_acct(mddev, bio_op(bio), bio_sectors(bio));
 	atomic_set(&r10_bio->remaining, 1);
 	md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ab8067f9ce8c..e57109e39fcd 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5316,6 +5316,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
 	if (!raid5_read_one_chunk(mddev, raid_bio))
 		return raid_bio;
 
+	md_io_acct(mddev, bio_op(raid_bio), bio_sectors(raid_bio));
 	return NULL;
 }
 
@@ -5634,6 +5635,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 	last_sector = bio_end_sector(bi);
 	bi->bi_next = NULL;
 
+	md_io_acct(mddev, bio_op(bi), bio_sectors(bi));
+
 	prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
 	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
 		int previous;
-- 
2.16.4

             reply	other threads:[~2020-06-05 20:19 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-05 20:19 jeffm [this message]
2020-06-08  7:13 ` [PATCH] mdraid: fix read/write bytes accounting Artur Paszkiewicz
2020-06-23 14:21   ` Guoqing Jiang
2020-06-23 16:48     ` Artur Paszkiewicz
2020-06-25  9:13       ` Guoqing Jiang
2020-06-23 14:24   ` Jeff Mahoney
2020-06-09  6:37 ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200605201953.11098-1-jeffm@suse.com \
    --to=jeffm@suse.com \
    --cc=colyli@suse.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=nfbrown@suse.com \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox