From: Coly Li <colyli@gmail.com>
To: dm-devel@redhat.com
Cc: Tao Ma <boyu.mt@taobao.com>, Robin Dong <sanbai@alibaba-inc.com>,
Laurence Oberman <loberman@redhat.com>,
Alasdair Kergon <agk@redhat.com>
Subject: [PATCH 2/4] dm-latency: add I/O latency measurement in device mapper
Date: Thu, 26 Feb 2015 14:42:33 +0800 [thread overview]
Message-ID: <54EEC059.7000303@gmail.com> (raw)
From: Coly Li <bosong.ly@alibaba-inc.com>
This patch adds I/O latency measurement code in dm.c. The basic idea is,
1) add timestamp in struct dm_io as unsigned long start_time_usec
2) set timestamp when dm_io issued to underlaying block layer.
3) when all sub-I/O completed, calculate time difference against
start_time_usec as I/O latency.
4) account the latency to corresponding counters like,
- latency_stats_us
- latency_stats_ms
- latency_stats_s
Signed-off-by: Coly Li <bosong.ly@alibaba-inc.com>
Reviewed-by: Robin Dong <sanbai@alibaba-inc.com>
Reviewed-by: Tao Ma <boyu.mt@taobao.com>
---
drivers/md/dm.c | 69
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
drivers/md/dm.h | 13 +++++++++++++
2 files changed, 82 insertions(+)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e2532c8..200954a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -20,6 +20,7 @@
#include <linux/hdreg.h>
#include <linux/delay.h>
#include <linux/wait.h>
+#include <linux/clocksource.h>
#include <trace/events/block.h>
@@ -67,6 +68,7 @@ struct dm_io {
atomic_t io_count;
struct bio *bio;
unsigned long start_time;
+ unsigned long start_time_usec;
spinlock_t endio_lock;
struct dm_stats_aux stats_aux;
};
@@ -521,14 +523,72 @@ static int md_in_flight(struct mapped_device *md)
atomic_read(&md->pending[WRITE]);
}
+static unsigned long long us2msecs(unsigned long long usec)
+{
+ usec += 500;
+ do_div(usec, 1000);
+ return usec;
+}
+
+static unsigned long long us2secs(unsigned long long usec)
+{
+ usec += 500;
+ do_div(usec, 1000);
+ usec += 500;
+ do_div(usec, 1000);
+ return usec;
+}
+
+static void update_latency_stats(struct mapped_device *md, struct dm_io
*io)
+{
+ uint64_t now, latency;
+ int idx;
+ ktime_t ts;
+
+ ts = ktime_get();
+ now = (uint64_t)ktime_to_us(ts);
+
+ /*
+ * if now <= io->start_time_usec, it means counter
+ * in ktime_get() over flows, just ignore this I/O
+ */
+ if (unlikely(now < io->start_time_usec))
+ return;
+
+ latency = (uint64_t)(now - io->start_time_usec);
+ if (latency < 1000) {
+ /* microseconds */
+ idx = latency/DM_LATENCY_STATS_US_GRAINSIZE;
+ if (idx > (DM_LATENCY_STATS_US_NR - 1))
+ idx = DM_LATENCY_STATS_US_NR - 1;
+ atomic_inc(&(md->latency_stats_us[idx]));
+ } else if (latency < 1000000) {
+ /* milliseconds */
+ idx = us2msecs(latency)/DM_LATENCY_STATS_MS_GRAINSIZE;
+ if (idx > (DM_LATENCY_STATS_MS_NR - 1))
+ idx = DM_LATENCY_STATS_MS_NR - 1;
+ atomic_inc(&(md->latency_stats_ms[idx]));
+ } else {
+ /* seconds */
+ idx = us2secs(latency)/DM_LATENCY_STATS_S_GRAINSIZE;
+ if (idx > (DM_LATENCY_STATS_S_NR - 1))
+ idx = DM_LATENCY_STATS_S_NR - 1;
+ atomic_inc(&(md->latency_stats_s[idx]));
+ }
+}
+
static void start_io_acct(struct dm_io *io)
{
struct mapped_device *md = io->md;
struct bio *bio = io->bio;
+ ktime_t ts;
+
int cpu;
int rw = bio_data_dir(bio);
io->start_time = jiffies;
+ ts = ktime_get();
+ io->start_time_usec = (unsigned long)ktime_to_us(ts);
cpu = part_stat_lock();
part_round_stats(cpu, &dm_disk(md)->part0);
@@ -549,6 +609,7 @@ static void end_io_acct(struct dm_io *io)
int pending;
int rw = bio_data_dir(bio);
+ update_latency_stats(md, io);
generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);
if (unlikely(dm_stats_used(&md->stats)))
@@ -2118,6 +2179,14 @@ static struct mapped_device *alloc_dev(int minor)
dm_stats_init(&md->stats);
+ /* initial latency stats buckets */
+ for (r = 0; r < DM_LATENCY_STATS_S_NR; r++)
+ atomic_set(&(md->latency_stats_s[r]), 0);
+ for (r = 0; r < DM_LATENCY_STATS_MS_NR; r++)
+ atomic_set(&(md->latency_stats_ms[r]), 0);
+ for (r = 0; r < DM_LATENCY_STATS_US_NR; r++)
+ atomic_set(&(md->latency_stats_us[r]), 0);
+
/* Populate the mapping, nobody knows we exist yet */
spin_lock(&_minor_lock);
old_md = idr_replace(&_minor_idr, md, minor);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 3014667..6c254f4 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -32,6 +32,14 @@
*/
#define DM_STATUS_NOFLUSH_FLAG (1 << 0)
+/* 100s is long enough to measure disk I/O latency */
+#define DM_LATENCY_STATS_S_NR 100
+#define DM_LATENCY_STATS_S_GRAINSIZE (1000/DM_LATENCY_STATS_S_NR)
+#define DM_LATENCY_STATS_MS_NR 100
+#define DM_LATENCY_STATS_MS_GRAINSIZE (1000/DM_LATENCY_STATS_MS_NR)
+#define DM_LATENCY_STATS_US_NR 100
+#define DM_LATENCY_STATS_US_GRAINSIZE (1000/DM_LATENCY_STATS_US_NR)
+
/*
* Type of table and mapped_device's mempool
*/
@@ -135,6 +143,11 @@ struct mapped_device {
struct kthread_worker kworker;
struct task_struct *kworker_task;
+
+ /* latency statistic buckets */
+ atomic_t latency_stats_s[DM_LATENCY_STATS_S_NR];
+ atomic_t latency_stats_ms[DM_LATENCY_STATS_MS_NR];
+ atomic_t latency_stats_us[DM_LATENCY_STATS_US_NR];
};
/*
reply other threads:[~2015-02-26 6:42 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=54EEC059.7000303@gmail.com \
--to=colyli@gmail.com \
--cc=agk@redhat.com \
--cc=boyu.mt@taobao.com \
--cc=dm-devel@redhat.com \
--cc=loberman@redhat.com \
--cc=sanbai@alibaba-inc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.