From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
To: linux-block@vger.kernel.org, Jens Axboe <axboe@kernel.dk>,
linux-kernel@vger.kernel.org
Cc: Mikulas Patocka <mpatocka@redhat.com>,
Mike Snitzer <snitzer@redhat.com>, Ming Lei <ming.lei@redhat.com>
Subject: [PATCH v3 2/3] block/diskstats: accumulate all per-cpu counters in one pass
Date: Tue, 24 Mar 2020 09:39:43 +0300 [thread overview]
Message-ID: <158503198306.1955.15150686320152629671.stgit@buzz> (raw)
In-Reply-To: <158503038812.1955.7827988255138056389.stgit@buzz>
Reading /proc/diskstats iterates over all cpus for summing each field.
It's faster to sum all fields in one pass.
Hammering /proc/diskstats with fio shows 2x performance improvement:
fio --name=test --numjobs=$JOBS --filename=/proc/diskstats \
--size=1k --bs=1k --fallocate=none --create_on_open=1 \
--time_based=1 --runtime=10 --invalidate=0 --group_report
JOBS=1 JOBS=10
Before: 7k iops 64k iops
After: 18k iops 120k iops
Also this way code is more compact:
add/remove: 1/0 grow/shrink: 0/2 up/down: 194/-1540 (-1346)
Function old new delta
part_stat_read_all - 194 +194
diskstats_show 1344 631 -713
part_stat_show 1219 392 -827
Total: Before=14966947, After=14965601, chg -0.01%
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
block/genhd.c | 61 +++++++++++++++++++++++++++++++++------------
block/partition-generic.c | 35 ++++++++++++++------------
include/linux/genhd.h | 11 ++++++--
3 files changed, 72 insertions(+), 35 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c
index b210c12c4870..606e8755f6ed 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -69,6 +69,28 @@ void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size,
EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify);
+#ifdef CONFIG_SMP
+void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
+{
+ int cpu;
+
+ memset(stat, 0, sizeof(struct disk_stats));
+ for_each_possible_cpu(cpu) {
+ struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu);
+ int group;
+
+ for (group = 0; group < NR_STAT_GROUPS; group++) {
+ stat->nsecs[group] += ptr->nsecs[group];
+ stat->sectors[group] += ptr->sectors[group];
+ stat->ios[group] += ptr->ios[group];
+ stat->merges[group] += ptr->merges[group];
+ }
+
+ stat->io_ticks += ptr->io_ticks;
+ stat->time_in_queue += ptr->time_in_queue;
+ }
+}
+#endif /* CONFIG_SMP */
void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
{
@@ -1429,6 +1451,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
struct hd_struct *hd;
char buf[BDEVNAME_SIZE];
unsigned int inflight;
+ struct disk_stats stat;
/*
if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
@@ -1440,7 +1463,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
+ part_stat_read_all(hd, &stat);
inflight = part_in_flight(gp->queue, hd);
+
seq_printf(seqf, "%4d %7d %s "
"%lu %lu %lu %u "
"%lu %lu %lu %u "
@@ -1450,23 +1475,27 @@ static int diskstats_show(struct seq_file *seqf, void *v)
"\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
disk_name(gp, hd->partno, buf),
- part_stat_read(hd, ios[STAT_READ]),
- part_stat_read(hd, merges[STAT_READ]),
- part_stat_read(hd, sectors[STAT_READ]),
- (unsigned int)part_stat_read_msecs(hd, STAT_READ),
- part_stat_read(hd, ios[STAT_WRITE]),
- part_stat_read(hd, merges[STAT_WRITE]),
- part_stat_read(hd, sectors[STAT_WRITE]),
- (unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
+ stat.ios[STAT_READ],
+ stat.merges[STAT_READ],
+ stat.sectors[STAT_READ],
+ (unsigned int)div_u64(stat.nsecs[STAT_READ],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_WRITE],
+ stat.merges[STAT_WRITE],
+ stat.sectors[STAT_WRITE],
+ (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
+ NSEC_PER_MSEC),
inflight,
- jiffies_to_msecs(part_stat_read(hd, io_ticks)),
- jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
- part_stat_read(hd, ios[STAT_DISCARD]),
- part_stat_read(hd, merges[STAT_DISCARD]),
- part_stat_read(hd, sectors[STAT_DISCARD]),
- (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
- part_stat_read(hd, ios[STAT_FLUSH]),
- (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
+ jiffies_to_msecs(stat.io_ticks),
+ jiffies_to_msecs(stat.time_in_queue),
+ stat.ios[STAT_DISCARD],
+ stat.merges[STAT_DISCARD],
+ stat.sectors[STAT_DISCARD],
+ (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_FLUSH],
+ (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC)
);
}
disk_part_iter_exit(&piter);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 564fae77711d..6d3fcb5187cb 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -120,9 +120,12 @@ ssize_t part_stat_show(struct device *dev,
{
struct hd_struct *p = dev_to_part(dev);
struct request_queue *q = part_to_disk(p)->queue;
+ struct disk_stats stat;
unsigned int inflight;
+ part_stat_read_all(p, &stat);
inflight = part_in_flight(q, p);
+
return sprintf(buf,
"%8lu %8lu %8llu %8u "
"%8lu %8lu %8llu %8u "
@@ -130,23 +133,23 @@ ssize_t part_stat_show(struct device *dev,
"%8lu %8lu %8llu %8u "
"%8lu %8u"
"\n",
- part_stat_read(p, ios[STAT_READ]),
- part_stat_read(p, merges[STAT_READ]),
- (unsigned long long)part_stat_read(p, sectors[STAT_READ]),
- (unsigned int)part_stat_read_msecs(p, STAT_READ),
- part_stat_read(p, ios[STAT_WRITE]),
- part_stat_read(p, merges[STAT_WRITE]),
- (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
- (unsigned int)part_stat_read_msecs(p, STAT_WRITE),
+ stat.ios[STAT_READ],
+ stat.merges[STAT_READ],
+ (unsigned long long)stat.sectors[STAT_READ],
+ (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
+ stat.ios[STAT_WRITE],
+ stat.merges[STAT_WRITE],
+ (unsigned long long)stat.sectors[STAT_WRITE],
+ (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
inflight,
- jiffies_to_msecs(part_stat_read(p, io_ticks)),
- jiffies_to_msecs(part_stat_read(p, time_in_queue)),
- part_stat_read(p, ios[STAT_DISCARD]),
- part_stat_read(p, merges[STAT_DISCARD]),
- (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
- (unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
- part_stat_read(p, ios[STAT_FLUSH]),
- (unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
+ jiffies_to_msecs(stat.io_ticks),
+ jiffies_to_msecs(stat.time_in_queue),
+ stat.ios[STAT_DISCARD],
+ stat.merges[STAT_DISCARD],
+ (unsigned long long)stat.sectors[STAT_DISCARD],
+ (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
+ stat.ios[STAT_FLUSH],
+ (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
}
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index f1066f10b062..ead3ffb7f327 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -375,6 +375,8 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
sizeof(struct disk_stats));
}
+void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat);
+
static inline int init_part_stats(struct hd_struct *part)
{
part->dkstats = alloc_percpu(struct disk_stats);
@@ -401,6 +403,12 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
memset(&part->dkstats, value, sizeof(struct disk_stats));
}
+static inline void part_stat_read_all(struct hd_struct *part,
+ struct disk_stats *stat)
+{
+ memcpy(stat, &part->dkstats, sizeof(struct disk_stats));
+}
+
static inline int init_part_stats(struct hd_struct *part)
{
return 1;
@@ -412,9 +420,6 @@ static inline void free_part_stats(struct hd_struct *part)
#endif /* CONFIG_SMP */
-#define part_stat_read_msecs(part, which) \
- div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC)
-
#define part_stat_read_accum(part, field) \
(part_stat_read(part, field[STAT_READ]) + \
part_stat_read(part, field[STAT_WRITE]) + \
next prev parent reply other threads:[~2020-03-24 6:39 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-24 6:39 [PATCH v3 0/3] block/diskstats: more accurate io_ticks and optimization Konstantin Khlebnikov
2020-03-24 6:39 ` [PATCH v3 1/3] block/diskstats: more accurate approximation of io_ticks for slow disks Konstantin Khlebnikov
2020-03-24 14:06 ` Ming Lei
2020-03-25 3:40 ` Ming Lei
2020-03-25 6:28 ` Konstantin Khlebnikov
2020-03-25 8:02 ` Konstantin Khlebnikov
2020-03-25 8:54 ` Ming Lei
2020-03-25 13:02 ` Konstantin Khlebnikov
2020-03-26 7:53 ` Ming Lei
2020-03-24 6:39 ` Konstantin Khlebnikov [this message]
2020-03-24 6:39 ` [PATCH v3 3/3] block/diskstats: replace time_in_queue with sum of request times Konstantin Khlebnikov
2020-03-24 16:06 ` [PATCH v3 0/3] block/diskstats: more accurate io_ticks and optimization Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=158503198306.1955.15150686320152629671.stgit@buzz \
--to=khlebnikov@yandex-team.ru \
--cc=axboe@kernel.dk \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=ming.lei@redhat.com \
--cc=mpatocka@redhat.com \
--cc=snitzer@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox