From: Jody McIntyre <scjody@sun.com>
To: linux-raid@vger.kernel.org, neilb@suse.de, dan.j.williams@intel.com
Subject: [PATCH] md: Track raid5/6 statistics
Date: Thu, 12 Mar 2009 16:57:55 -0400 [thread overview]
Message-ID: <20090312205754.GH8732@clouds> (raw)
This patch tracks various statistics related to the performance of a RAID 5
or 6 array. These have been useful to us in the past to help solve
performance issues. They are reported via the 'stat' file in each device's
'md' sysfs directory, e.g. /sys/class/block/md0/md/stat .
A slight amount of overhead is added by the atomic_inc() and atomic_dec()
calls used in these patches, but it's so low I've been unable to measure it.
Both calls are already used extensively in raid5.c to track internal
counters so I believe this is OK.
Signed-off-by: Jody McIntyre <scjody@sun.com>
Index: linux-2.6/drivers/md/raid5.c
===================================================================
--- linux-2.6.orig/drivers/md/raid5.c
+++ linux-2.6/drivers/md/raid5.c
@@ -136,7 +136,7 @@ static inline int raid6_next_disk(int di
return (disk < raid_disks) ? disk : 0;
}
-static void return_io(struct bio *return_bi)
+static void return_io(struct bio *return_bi, raid5_conf_t *conf)
{
struct bio *bi = return_bi;
while (bi) {
@@ -145,6 +145,7 @@ static void return_io(struct bio *return
bi->bi_next = NULL;
bi->bi_size = 0;
bio_endio(bi, 0);
+ atomic_dec(&conf->in_reqs_in_queue);
bi = return_bi;
}
}
@@ -167,10 +168,12 @@ static void __release_stripe(raid5_conf_
if (test_bit(STRIPE_DELAYED, &sh->state)) {
list_add_tail(&sh->lru, &conf->delayed_list);
blk_plug_device(conf->mddev->queue);
+ atomic_inc(&conf->delayed);
} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0) {
list_add_tail(&sh->lru, &conf->bitmap_list);
blk_plug_device(conf->mddev->queue);
+ atomic_inc(&conf->bit_delayed);
} else {
clear_bit(STRIPE_BIT_DELAY, &sh->state);
list_add_tail(&sh->lru, &conf->handle_list);
@@ -347,6 +350,7 @@ static struct stripe_head *get_active_st
if (noblock && sh == NULL)
break;
if (!sh) {
+ atomic_inc(&conf->out_of_stripes);
conf->inactive_blocked = 1;
wait_event_lock_irq(conf->wait_for_stripe,
!list_empty(&conf->inactive_list) &&
@@ -406,10 +410,13 @@ static void ops_run_io(struct stripe_hea
bi = &sh->dev[i].req;
bi->bi_rw = rw;
- if (rw == WRITE)
+ if (rw == WRITE) {
+ atomic_inc(&conf->writes_out);
bi->bi_end_io = raid5_end_write_request;
- else
+ } else {
+ atomic_inc(&conf->reads_out);
bi->bi_end_io = raid5_end_read_request;
+ }
rcu_read_lock();
rdev = rcu_dereference(conf->disks[i].rdev);
@@ -444,6 +451,7 @@ static void ops_run_io(struct stripe_hea
test_bit(R5_ReWrite, &sh->dev[i].flags))
atomic_add(STRIPE_SECTORS,
&rdev->corrected_errors);
+ atomic_inc(&conf->out_reqs_in_queue);
generic_make_request(bi);
} else {
if (rw == WRITE)
@@ -547,7 +555,7 @@ static void ops_complete_biofill(void *s
spin_unlock_irq(&conf->device_lock);
clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
- return_io(return_bi);
+ return_io(return_bi, conf);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
@@ -1074,6 +1082,8 @@ static void raid5_end_read_request(struc
mdk_rdev_t *rdev;
+ atomic_dec(&conf->out_reqs_in_queue);
+
for (i=0 ; i<disks; i++)
if (bi == &sh->dev[i].req)
break;
@@ -1153,6 +1163,8 @@ static void raid5_end_write_request(stru
int disks = sh->disks, i;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
+ atomic_dec(&conf->out_reqs_in_queue);
+
for (i=0 ; i<disks; i++)
if (bi == &sh->dev[i].req)
break;
@@ -2131,6 +2143,7 @@ static void handle_stripe_dirtying5(raid
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
+ atomic_inc(&conf->reads_for_rmw);
} else {
set_bit(STRIPE_DELAYED, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
@@ -2154,6 +2167,7 @@ static void handle_stripe_dirtying5(raid
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
+ atomic_inc(&conf->reads_for_rcw);
} else {
set_bit(STRIPE_DELAYED, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
@@ -2219,6 +2233,7 @@ static void handle_stripe_dirtying6(raid
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
+ atomic_inc(&conf->reads_for_rcw);
} else {
pr_debug("Request delayed stripe %llu "
"block %d for Reconstruct\n",
@@ -2789,7 +2804,7 @@ static bool handle_stripe5(struct stripe
ops_run_io(sh, &s);
- return_io(return_bi);
+ return_io(return_bi, conf);
return blocked_rdev == NULL;
}
@@ -3011,7 +3026,7 @@ static bool handle_stripe6(struct stripe
ops_run_io(sh, &s);
- return_io(return_bi);
+ return_io(return_bi, conf);
return blocked_rdev == NULL;
}
@@ -3217,6 +3232,7 @@ static void raid5_align_endio(struct bio
raid_bi->bi_next = NULL;
rdev_dec_pending(rdev, conf->mddev);
+ atomic_dec(&conf->out_reqs_in_queue);
if (!error && uptodate) {
bio_endio(raid_bi, 0);
@@ -3287,6 +3303,7 @@ static int chunk_aligned_read(struct req
&pd_idx,
conf);
+ atomic_dec(&conf->in_reqs_in_queue);
rcu_read_lock();
rdev = rcu_dereference(conf->disks[dd_idx].rdev);
if (rdev && test_bit(In_sync, &rdev->flags)) {
@@ -3311,6 +3328,9 @@ static int chunk_aligned_read(struct req
atomic_inc(&conf->active_aligned_reads);
spin_unlock_irq(&conf->device_lock);
+ atomic_inc(&conf->out_reqs_in_queue);
+ atomic_inc(&conf->aligned_reads);
+ atomic_inc(&conf->reads_out);
generic_make_request(align_bi);
return 1;
} else {
@@ -3384,6 +3404,8 @@ static int make_request(struct request_q
const int rw = bio_data_dir(bi);
int cpu, remaining;
+ atomic_inc(&conf->in_reqs_in_queue);
+
if (unlikely(bio_barrier(bi))) {
bio_endio(bi, -EOPNOTSUPP);
return 0;
@@ -3397,6 +3419,11 @@ static int make_request(struct request_q
bio_sectors(bi));
part_stat_unlock();
+ if (rw == WRITE)
+ atomic_inc(&conf->writes_in);
+ else
+ atomic_inc(&conf->reads_in);
+
if (rw == READ &&
mddev->reshape_position == MaxSector &&
chunk_aligned_read(q,bi))
@@ -3508,6 +3535,7 @@ static int make_request(struct request_q
if ( rw == WRITE )
md_write_end(mddev);
+ atomic_dec(&conf->in_reqs_in_queue);
bio_endio(bi, 0);
}
@@ -3981,10 +4009,37 @@ stripe_cache_active_show(mddev_t *mddev,
static struct md_sysfs_entry
raid5_stripecache_active = __ATTR_RO(stripe_cache_active);
+static ssize_t
+stat_show(mddev_t *mddev, char *page)
+{
+ raid5_conf_t *conf = mddev_to_conf(mddev);
+ if (conf)
+ return sprintf(page, "%u %u %u %u %u %u %u %u %u %u %u %u %u\n",
+ atomic_read(&conf->reads_in),
+ atomic_read(&conf->writes_in),
+ atomic_read(&conf->reads_out),
+ atomic_read(&conf->writes_out),
+ atomic_read(&conf->reads_for_rmw),
+ atomic_read(&conf->reads_for_rcw),
+ atomic_read(&conf->aligned_reads),
+ atomic_read(&conf->active_stripes),
+ atomic_read(&conf->in_reqs_in_queue),
+ atomic_read(&conf->out_reqs_in_queue),
+ atomic_read(&conf->delayed),
+ atomic_read(&conf->bit_delayed),
+ atomic_read(&conf->out_of_stripes));
+ else
+ return 0;
+}
+
+static struct md_sysfs_entry
+raid5_stats = __ATTR_RO(stat);
+
static struct attribute *raid5_attrs[] = {
&raid5_stripecache_size.attr,
&raid5_stripecache_active.attr,
&raid5_preread_bypass_threshold.attr,
+ &raid5_stats.attr,
NULL,
};
static struct attribute_group raid5_attrs_group = {
Index: linux-2.6/include/linux/raid/raid5.h
===================================================================
--- linux-2.6.orig/include/linux/raid/raid5.h
+++ linux-2.6/include/linux/raid/raid5.h
@@ -385,6 +385,22 @@ struct raid5_private_data {
int pool_size; /* number of disks in stripeheads in pool */
spinlock_t device_lock;
struct disk_info *disks;
+
+ /*
+ * Stats
+ */
+ atomic_t reads_in;
+ atomic_t writes_in;
+ atomic_t reads_out;
+ atomic_t writes_out;
+ atomic_t reads_for_rmw;
+ atomic_t reads_for_rcw;
+ atomic_t aligned_reads;
+ atomic_t in_reqs_in_queue;
+ atomic_t out_reqs_in_queue;
+ atomic_t delayed;
+ atomic_t bit_delayed;
+ atomic_t out_of_stripes;
};
typedef struct raid5_private_data raid5_conf_t;
Index: linux-2.6/Documentation/md.txt
===================================================================
--- linux-2.6.orig/Documentation/md.txt
+++ linux-2.6/Documentation/md.txt
@@ -484,3 +484,26 @@ These currently include
to 1. Setting this to 0 disables bypass accounting and
requires preread stripes to wait until all full-width stripe-
writes are complete. Valid values are 0 to stripe_cache_size.
+ stat (currently raid 5/6 only)
+ Reports various performance statistics related to the array. In
+ order, separated by spaces:
+ reads in: number of reads submitted to the array
+ writes in: number of writes submitted to the array
+ reads out: number of reads performed on the underlying devices
+ writes out: number of writes performed on the underlying devices
+ reads for rmw: number of reads for read-modify-write operations
+ reads for rcw: number of reads for read-copy-write operations
+ aligned reads: number of reads via the aligned path
+
+ active stripes: number of stripes currently in use
+ in reqs in queue: current number of requests queued on the array
+ out reqs in queue: current number of requests queued for the underlying
+ devices
+
+ delayed: number of write requests that were delayed to perform reads
+ bit delayed: number of write requests that were delayed to update the
+ bitmap
+ out of stripes: number of times the array has run out of stripes;
+ if this value is high, increasing the stripe cache
+ may be useful.
+ More statistics may be added at the end of the line in the future.
next reply other threads:[~2009-03-12 20:57 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-03-12 20:57 Jody McIntyre [this message]
2009-03-14 17:07 ` [PATCH] md: Track raid5/6 statistics Dan Williams
2009-05-06 20:05 ` Jody McIntyre
2009-05-07 16:30 ` Dan Williams
2009-05-11 13:36 ` Jody McIntyre
2009-05-13 13:10 ` Bill Davidsen
2009-10-02 17:01 ` Jody McIntyre
2009-10-02 17:51 ` Bill Davidsen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090312205754.GH8732@clouds \
--to=scjody@sun.com \
--cc=dan.j.williams@intel.com \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.