From: Sebastian Parschauer <sebastian.riemer@profitbricks.com>
To: neilb@suse.de
Cc: linux-raid@vger.kernel.org,
Florian-Ewald Mueller <florian-ewald.mueller@profitbricks.com>,
Sebastian Parschauer <sebastian.riemer@profitbricks.com>
Subject: [RFC PATCH 1/4] md: complete bio accounting and add io_latency extension
Date: Wed, 4 Jun 2014 19:09:59 +0200 [thread overview]
Message-ID: <1401901802-16296-2-git-send-email-sebastian.riemer@profitbricks.com> (raw)
In-Reply-To: <1401901802-16296-1-git-send-email-sebastian.riemer@profitbricks.com>
From: Florian-Ewald Mueller <florian-ewald.mueller@profitbricks.com>
The md layer only accounts the number of I/Os and sectors per bio.
So account in-flight and ticks as well. Also maintain an I/O latency
statistic by counting I/Os in power of 2 latency areas starting at
< 8 ms and ending at >= 65536 ms. Determine the maximum latency as
well. This I/O latency statistic can be read and reset to 0 with the
md sysfs file 'io_latency'.
Signed-off-by: Florian-Ewald Mueller <florian-ewald.mueller@profitbricks.com>
[spars: added a description, replaced gcc atomics with atomic64_t,
merged commits, fixed checkpatch warnings]
Signed-off-by: Sebastian Parschauer <sebastian.riemer@profitbricks.com>
---
drivers/md/md.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
drivers/md/md.h | 18 ++++++
2 files changed, 193 insertions(+)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 237b7e0..8c653f9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -54,6 +54,32 @@
#include "md.h"
#include "bitmap.h"
+#ifdef BIO_ACCOUNTING_EXTENSION
+
+#include <linux/ratelimit.h>
+
+struct md_bio_private {
+ void (*orig_bio_endio)(struct bio *, int);
+ void *orig_bio_private;
+ struct mddev *mddev;
+ unsigned int sectors;
+ unsigned long ticks;
+};
+
+static struct kmem_cache *md_bio_private_cache __read_mostly;
+
+static DEFINE_RATELIMIT_STATE(md_ratelimit_state,
+ DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+static inline int __must_check md_valid_ptr(const void *p)
+{
+ return !ZERO_OR_NULL_PTR(p) && !IS_ERR(p);
+}
+#define VALID_PTR(p) md_valid_ptr(p)
+
+#endif /* BIO_ACCOUNTING_EXTENSION */
+
#ifndef MODULE
static void autostart_arrays(int part);
#endif
@@ -241,6 +267,64 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
_tmp = _tmp->next;}) \
)
+#ifdef BIO_ACCOUNTING_EXTENSION
+
+static inline long atomic64_set_if_greater(atomic64_t *v, long val)
+{
+ long act, old;
+
+ old = atomic64_read(v);
+ for (;;) {
+ if (val <= old)
+ break;
+ act = atomic64_cmpxchg(v, old, val);
+ if (likely(act == old))
+ break;
+ old = act;
+ }
+ return old;
+}
+
+static void md_bio_endio(struct bio *bio, int err)
+{
+ struct md_bio_private *mbp = bio->bi_private;
+ struct mddev *mddev = mbp->mddev;
+ struct md_stats *sp = &mddev->stats;
+
+ unsigned int sectors = mbp->sectors;
+ int cpu, idx, rw = bio_data_dir(bio);
+ unsigned long ms, ticks;
+
+ BUILD_BUG_ON(ARRAY_SIZE(sp->latency_table[0]) != 2);
+ BUILD_BUG_ON(ARRAY_SIZE(sp->max_latency) != 2);
+
+ ticks = (long)jiffies - (long)mbp->ticks;
+
+ cpu = part_stat_lock();
+ part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+ part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
+ part_stat_add(cpu, &mddev->gendisk->part0, ticks[rw], ticks);
+ part_dec_in_flight(&mddev->gendisk->part0, rw);
+ part_round_stats(cpu, &mddev->gendisk->part0);
+ part_stat_unlock();
+
+ ms = jiffies_to_msecs(ticks);
+ if (likely(ticks > 0) && ms > 0) {
+ idx = ilog2(ms) - MD_LATENCY_LOGBASE + 1;
+ idx = clamp(idx, 0, (int)ARRAY_SIZE(sp->latency_table) - 1);
+ } else {
+ idx = 0;
+ }
+ atomic64_set_if_greater(&sp->max_latency[rw], ticks);
+ atomic64_inc(&sp->latency_table[idx][rw]);
+
+ bio->bi_private = mbp->orig_bio_private;
+ bio->bi_end_io = mbp->orig_bio_endio;
+ kmem_cache_free(md_bio_private_cache, mbp);
+ bio_endio_nodec(bio, err); /* >= 3.14, bio_endio() otherwise */
+}
+
+#endif /* BIO_ACCOUNTING_EXTENSION */
/* Rather than calling directly into the personality make_request function,
* IO requests come here first so that we can check if the device is
@@ -255,6 +339,9 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
struct mddev *mddev = q->queuedata;
int cpu;
unsigned int sectors;
+#ifdef BIO_ACCOUNTING_EXTENSION
+ struct md_bio_private *mbp;
+#endif /* BIO_ACCOUNTING_EXTENSION */
if (mddev == NULL || mddev->pers == NULL
|| !mddev->ready) {
@@ -288,12 +375,36 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
* go away inside make_request
*/
sectors = bio_sectors(bio);
+#ifdef BIO_ACCOUNTING_EXTENSION
+ mbp = kmem_cache_alloc(md_bio_private_cache, GFP_NOIO);
+ if (unlikely(!VALID_PTR(mbp))) {
+ if (__ratelimit(&md_ratelimit_state))
+ pr_warn("%s: [%s] kmem_cache_alloc failed\n",
+ __func__, mdname(mddev));
+ cpu = part_stat_lock();
+ part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+ part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+ sectors);
+ part_stat_unlock();
+ } else {
+ part_inc_in_flight(&mddev->gendisk->part0, rw);
+ mbp->orig_bio_private = bio->bi_private;
+ mbp->orig_bio_endio = bio->bi_end_io;
+ mbp->sectors = sectors;
+ mbp->ticks = jiffies;
+ mbp->mddev = mddev;
+ bio->bi_end_io = md_bio_endio;
+ bio->bi_private = mbp;
+ }
+#endif /* BIO_ACCOUNTING_EXTENSION */
mddev->pers->make_request(mddev, bio);
+#ifndef BIO_ACCOUNTING_EXTENSION
cpu = part_stat_lock();
part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
part_stat_unlock();
+#endif /* !BIO_ACCOUNTING_EXTENSION */
if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
wake_up(&mddev->sb_wait);
@@ -4652,6 +4763,52 @@ static struct md_sysfs_entry md_array_size =
__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
array_size_store);
+#ifdef BIO_ACCOUNTING_EXTENSION
+
+static ssize_t
+md_io_latency_show(struct mddev *mddev, char *page)
+{
+ struct md_stats *sp = &mddev->stats;
+ ssize_t cnt;
+ int i;
+
+ for (cnt = i = 0; i < (ARRAY_SIZE(sp->latency_table) - 1); i++) {
+ cnt += scnprintf(page + cnt, PAGE_SIZE - cnt,
+ "< %5d ms: %lu %lu\n",
+ (1 << (i + MD_LATENCY_LOGBASE)),
+ atomic64_read(&sp->latency_table[i][0]),
+ atomic64_read(&sp->latency_table[i][1]));
+ }
+ cnt += scnprintf(page + cnt, PAGE_SIZE - cnt, ">= %5d ms: %lu %lu\n",
+ (1 << ((i - 1) + MD_LATENCY_LOGBASE)),
+ atomic64_read(&sp->latency_table[i][0]),
+ atomic64_read(&sp->latency_table[i][1]));
+ cnt += scnprintf(page + cnt, PAGE_SIZE - cnt, " maximum ms: %u %u\n",
+ jiffies_to_msecs(atomic64_read(&sp->max_latency[0])),
+ jiffies_to_msecs(atomic64_read(&sp->max_latency[1])));
+ return cnt;
+}
+
+static ssize_t
+md_io_latency_store(struct mddev *mddev, const char *buf, size_t len)
+{
+ struct md_stats *sp = &mddev->stats;
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(sp->max_latency); i++)
+ atomic64_set(&sp->max_latency[i], 0);
+ for (i = 0; i < ARRAY_SIZE(sp->latency_table); i++) {
+ for (j = 0; j < ARRAY_SIZE(sp->latency_table[i]); j++)
+ atomic64_set(&sp->latency_table[i][j], 0);
+ }
+ return len;
+}
+
+static struct md_sysfs_entry md_io_latency =
+__ATTR(io_latency, S_IRUGO|S_IWUSR, md_io_latency_show, md_io_latency_store);
+
+#endif /* BIO_ACCOUNTING_EXTENSION */
+
static struct attribute *md_default_attrs[] = {
&md_level.attr,
&md_layout.attr,
@@ -4667,6 +4824,9 @@ static struct attribute *md_default_attrs[] = {
&md_reshape_direction.attr,
&md_array_size.attr,
&max_corr_read_errors.attr,
+#ifdef BIO_ACCOUNTING_EXTENSION
+ &md_io_latency.attr,
+#endif /* BIO_ACCOUNTING_EXTENSION */
NULL,
};
@@ -8551,6 +8711,14 @@ static int __init md_init(void)
{
int ret = -ENOMEM;
+#ifdef BIO_ACCOUNTING_EXTENSION
+ md_bio_private_cache = KMEM_CACHE(md_bio_private, 0);
+ if (unlikely(!VALID_PTR(md_bio_private_cache))) {
+ pr_err("%s: KMEM_CACHE failed\n", __func__);
+ return -ENOMEM;
+ }
+#endif /* BIO_ACCOUNTING_EXTENSION */
+
md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
if (!md_wq)
goto err_wq;
@@ -8687,6 +8855,13 @@ static __exit void md_exit(void)
}
destroy_workqueue(md_misc_wq);
destroy_workqueue(md_wq);
+
+#ifdef BIO_ACCOUNTING_EXTENSION
+ if (likely(VALID_PTR(md_bio_private_cache))) {
+ kmem_cache_destroy(md_bio_private_cache);
+ md_bio_private_cache = NULL;
+ }
+#endif /* BIO_ACCOUNTING_EXTENSION */
}
subsys_initcall(md_init);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index a49d991..f0e9171 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -24,6 +24,10 @@
#include <linux/wait.h>
#include <linux/workqueue.h>
+#if 1
+#define BIO_ACCOUNTING_EXTENSION
+#endif
+
#define MaxSector (~(sector_t)0)
/* Bad block numbers are stored sorted in a single page.
@@ -202,6 +206,17 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
extern void md_ack_all_badblocks(struct badblocks *bb);
+#ifdef BIO_ACCOUNTING_EXTENSION
+
+#define MD_LATENCY_LOGBASE 3
+
+struct md_stats {
+ atomic64_t latency_table[15][2];
+ atomic64_t max_latency[2];
+};
+
+#endif /* BIO_ACCOUNTING_EXTENSION */
+
struct mddev {
void *private;
struct md_personality *pers;
@@ -437,6 +452,9 @@ struct mddev {
struct work_struct flush_work;
struct work_struct event_work; /* used by dm to report failure event */
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
+#ifdef BIO_ACCOUNTING_EXTENSION
+ struct md_stats stats;
+#endif /* BIO_ACCOUNTING_EXTENSION */
};
--
1.7.9.5
next prev parent reply other threads:[~2014-06-04 17:09 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-28 13:04 [RFC] Process requests instead of bios to use a scheduler Sebastian Parschauer
2014-06-01 23:32 ` NeilBrown
2014-06-02 9:51 ` Sebastian Parschauer
2014-06-02 10:20 ` NeilBrown
2014-06-02 11:12 ` Sebastian Parschauer
2014-06-04 17:09 ` [RFC PATCH 0/4] md/mdadm: introduce request function mode support Sebastian Parschauer
2014-06-04 17:09 ` Sebastian Parschauer [this message]
2014-06-04 17:10 ` [RFC PATCH 2/4] md: " Sebastian Parschauer
2014-06-04 17:10 ` [RFC PATCH 3/4] md: handle IO latency accounting in rqfn mode Sebastian Parschauer
2014-06-04 17:10 ` [RFC PATCH 4/4] mdadm: introduce '--use-requestfn' create/assembly option Sebastian Parschauer
2014-06-17 13:20 ` [RFC PATCH 0/4] md/mdadm: introduce request function mode support Sebastian Parschauer
[not found] ` <CAH3kUhEK26+4KryoReosMt654-vcrkkgkxaW5tKkFRDBqgX82w@mail.gmail.com>
[not found] ` <53A14513.20902@profitbricks.com>
2014-06-18 13:57 ` Roberto Spadim
2014-06-18 14:43 ` Sebastian Parschauer
2014-06-24 7:09 ` NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1401901802-16296-2-git-send-email-sebastian.riemer@profitbricks.com \
--to=sebastian.riemer@profitbricks.com \
--cc=florian-ewald.mueller@profitbricks.com \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).