From: Jody McIntyre <scjody@sun.com>
To: linux-raid@vger.kernel.org, neilb@suse.de, dan.j.williams@intel.com
Subject: [PATCH] md: Track raid5/6 statistics
Date: Thu, 12 Mar 2009 16:57:55 -0400 [thread overview]
Message-ID: <20090312205754.GH8732@clouds> (raw)
This patch tracks various statistics related to the performance of a RAID 5
or 6 array. These have been useful to us in the past to help solve
performance issues. They are reported via the 'stat' file in each device's
'md' sysfs directory, e.g. /sys/class/block/md0/md/stat .
A slight amount of overhead is added by the atomic_inc() and atomic_dec()
calls used in these patches, but it's so low I've been unable to measure it.
Both calls are already used extensively in raid5.c to track internal
counters so I believe this is OK.
Signed-off-by: Jody McIntyre <scjody@sun.com>
Index: linux-2.6/drivers/md/raid5.c
===================================================================
--- linux-2.6.orig/drivers/md/raid5.c
+++ linux-2.6/drivers/md/raid5.c
@@ -136,7 +136,7 @@ static inline int raid6_next_disk(int di
return (disk < raid_disks) ? disk : 0;
}
-static void return_io(struct bio *return_bi)
+static void return_io(struct bio *return_bi, raid5_conf_t *conf)
{
struct bio *bi = return_bi;
while (bi) {
@@ -145,6 +145,7 @@ static void return_io(struct bio *return
bi->bi_next = NULL;
bi->bi_size = 0;
bio_endio(bi, 0);
+ atomic_dec(&conf->in_reqs_in_queue);
bi = return_bi;
}
}
@@ -167,10 +168,12 @@ static void __release_stripe(raid5_conf_
if (test_bit(STRIPE_DELAYED, &sh->state)) {
list_add_tail(&sh->lru, &conf->delayed_list);
blk_plug_device(conf->mddev->queue);
+ atomic_inc(&conf->delayed);
} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0) {
list_add_tail(&sh->lru, &conf->bitmap_list);
blk_plug_device(conf->mddev->queue);
+ atomic_inc(&conf->bit_delayed);
} else {
clear_bit(STRIPE_BIT_DELAY, &sh->state);
list_add_tail(&sh->lru, &conf->handle_list);
@@ -347,6 +350,7 @@ static struct stripe_head *get_active_st
if (noblock && sh == NULL)
break;
if (!sh) {
+ atomic_inc(&conf->out_of_stripes);
conf->inactive_blocked = 1;
wait_event_lock_irq(conf->wait_for_stripe,
!list_empty(&conf->inactive_list) &&
@@ -406,10 +410,13 @@ static void ops_run_io(struct stripe_hea
bi = &sh->dev[i].req;
bi->bi_rw = rw;
- if (rw == WRITE)
+ if (rw == WRITE) {
+ atomic_inc(&conf->writes_out);
bi->bi_end_io = raid5_end_write_request;
- else
+ } else {
+ atomic_inc(&conf->reads_out);
bi->bi_end_io = raid5_end_read_request;
+ }
rcu_read_lock();
rdev = rcu_dereference(conf->disks[i].rdev);
@@ -444,6 +451,7 @@ static void ops_run_io(struct stripe_hea
test_bit(R5_ReWrite, &sh->dev[i].flags))
atomic_add(STRIPE_SECTORS,
&rdev->corrected_errors);
+ atomic_inc(&conf->out_reqs_in_queue);
generic_make_request(bi);
} else {
if (rw == WRITE)
@@ -547,7 +555,7 @@ static void ops_complete_biofill(void *s
spin_unlock_irq(&conf->device_lock);
clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
- return_io(return_bi);
+ return_io(return_bi, conf);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
@@ -1074,6 +1082,8 @@ static void raid5_end_read_request(struc
mdk_rdev_t *rdev;
+ atomic_dec(&conf->out_reqs_in_queue);
+
for (i=0 ; i<disks; i++)
if (bi == &sh->dev[i].req)
break;
@@ -1153,6 +1163,8 @@ static void raid5_end_write_request(stru
int disks = sh->disks, i;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
+ atomic_dec(&conf->out_reqs_in_queue);
+
for (i=0 ; i<disks; i++)
if (bi == &sh->dev[i].req)
break;
@@ -2131,6 +2143,7 @@ static void handle_stripe_dirtying5(raid
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
+ atomic_inc(&conf->reads_for_rmw);
} else {
set_bit(STRIPE_DELAYED, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
@@ -2154,6 +2167,7 @@ static void handle_stripe_dirtying5(raid
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
+ atomic_inc(&conf->reads_for_rcw);
} else {
set_bit(STRIPE_DELAYED, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
@@ -2219,6 +2233,7 @@ static void handle_stripe_dirtying6(raid
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
+ atomic_inc(&conf->reads_for_rcw);
} else {
pr_debug("Request delayed stripe %llu "
"block %d for Reconstruct\n",
@@ -2789,7 +2804,7 @@ static bool handle_stripe5(struct stripe
ops_run_io(sh, &s);
- return_io(return_bi);
+ return_io(return_bi, conf);
return blocked_rdev == NULL;
}
@@ -3011,7 +3026,7 @@ static bool handle_stripe6(struct stripe
ops_run_io(sh, &s);
- return_io(return_bi);
+ return_io(return_bi, conf);
return blocked_rdev == NULL;
}
@@ -3217,6 +3232,7 @@ static void raid5_align_endio(struct bio
raid_bi->bi_next = NULL;
rdev_dec_pending(rdev, conf->mddev);
+ atomic_dec(&conf->out_reqs_in_queue);
if (!error && uptodate) {
bio_endio(raid_bi, 0);
@@ -3287,6 +3303,7 @@ static int chunk_aligned_read(struct req
&pd_idx,
conf);
+ atomic_dec(&conf->in_reqs_in_queue);
rcu_read_lock();
rdev = rcu_dereference(conf->disks[dd_idx].rdev);
if (rdev && test_bit(In_sync, &rdev->flags)) {
@@ -3311,6 +3328,9 @@ static int chunk_aligned_read(struct req
atomic_inc(&conf->active_aligned_reads);
spin_unlock_irq(&conf->device_lock);
+ atomic_inc(&conf->out_reqs_in_queue);
+ atomic_inc(&conf->aligned_reads);
+ atomic_inc(&conf->reads_out);
generic_make_request(align_bi);
return 1;
} else {
@@ -3384,6 +3404,8 @@ static int make_request(struct request_q
const int rw = bio_data_dir(bi);
int cpu, remaining;
+ atomic_inc(&conf->in_reqs_in_queue);
+
if (unlikely(bio_barrier(bi))) {
bio_endio(bi, -EOPNOTSUPP);
return 0;
@@ -3397,6 +3419,11 @@ static int make_request(struct request_q
bio_sectors(bi));
part_stat_unlock();
+ if (rw == WRITE)
+ atomic_inc(&conf->writes_in);
+ else
+ atomic_inc(&conf->reads_in);
+
if (rw == READ &&
mddev->reshape_position == MaxSector &&
chunk_aligned_read(q,bi))
@@ -3508,6 +3535,7 @@ static int make_request(struct request_q
if ( rw == WRITE )
md_write_end(mddev);
+ atomic_dec(&conf->in_reqs_in_queue);
bio_endio(bi, 0);
}
@@ -3981,10 +4009,37 @@ stripe_cache_active_show(mddev_t *mddev,
static struct md_sysfs_entry
raid5_stripecache_active = __ATTR_RO(stripe_cache_active);
+static ssize_t
+stat_show(mddev_t *mddev, char *page)
+{
+ raid5_conf_t *conf = mddev_to_conf(mddev);
+ if (conf)
+ return sprintf(page, "%u %u %u %u %u %u %u %u %u %u %u %u %u\n",
+ atomic_read(&conf->reads_in),
+ atomic_read(&conf->writes_in),
+ atomic_read(&conf->reads_out),
+ atomic_read(&conf->writes_out),
+ atomic_read(&conf->reads_for_rmw),
+ atomic_read(&conf->reads_for_rcw),
+ atomic_read(&conf->aligned_reads),
+ atomic_read(&conf->active_stripes),
+ atomic_read(&conf->in_reqs_in_queue),
+ atomic_read(&conf->out_reqs_in_queue),
+ atomic_read(&conf->delayed),
+ atomic_read(&conf->bit_delayed),
+ atomic_read(&conf->out_of_stripes));
+ else
+ return 0;
+}
+
+static struct md_sysfs_entry
+raid5_stats = __ATTR_RO(stat);
+
static struct attribute *raid5_attrs[] = {
&raid5_stripecache_size.attr,
&raid5_stripecache_active.attr,
&raid5_preread_bypass_threshold.attr,
+ &raid5_stats.attr,
NULL,
};
static struct attribute_group raid5_attrs_group = {
Index: linux-2.6/include/linux/raid/raid5.h
===================================================================
--- linux-2.6.orig/include/linux/raid/raid5.h
+++ linux-2.6/include/linux/raid/raid5.h
@@ -385,6 +385,22 @@ struct raid5_private_data {
int pool_size; /* number of disks in stripeheads in pool */
spinlock_t device_lock;
struct disk_info *disks;
+
+ /*
+ * Stats
+ */
+ atomic_t reads_in;
+ atomic_t writes_in;
+ atomic_t reads_out;
+ atomic_t writes_out;
+ atomic_t reads_for_rmw;
+ atomic_t reads_for_rcw;
+ atomic_t aligned_reads;
+ atomic_t in_reqs_in_queue;
+ atomic_t out_reqs_in_queue;
+ atomic_t delayed;
+ atomic_t bit_delayed;
+ atomic_t out_of_stripes;
};
typedef struct raid5_private_data raid5_conf_t;
Index: linux-2.6/Documentation/md.txt
===================================================================
--- linux-2.6.orig/Documentation/md.txt
+++ linux-2.6/Documentation/md.txt
@@ -484,3 +484,26 @@ These currently include
to 1. Setting this to 0 disables bypass accounting and
requires preread stripes to wait until all full-width stripe-
writes are complete. Valid values are 0 to stripe_cache_size.
+ stat (currently raid 5/6 only)
+ Reports various performance statistics related to the array. In
+ order, separated by spaces:
+ reads in: number of reads submitted to the array
+ writes in: number of writes submitted to the array
+ reads out: number of reads performed on the underlying devices
+ writes out: number of writes performed on the underlying devices
+ reads for rmw: number of reads for read-modify-write operations
+ reads for rcw: number of reads for read-copy-write operations
+ aligned reads: number of reads via the aligned path
+
+ active stripes: number of stripes currently in use
+ in reqs in queue: current number of requests queued on the array
+ out reqs in queue: current number of requests queued for the underlying
+ devices
+
+ delayed: number of write requests that were delayed to perform reads
+ bit delayed: number of write requests that were delayed to update the
+ bitmap
+ out of stripes: number of times the array has run out of stripes;
+ if this value is high, increasing the stripe cache
+ may be useful.
+ More statistics may be added at the end of the line in the future.
next reply other threads:[~2009-03-12 20:57 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-03-12 20:57 Jody McIntyre [this message]
2009-03-14 17:07 ` [PATCH] md: Track raid5/6 statistics Dan Williams
2009-05-06 20:05 ` Jody McIntyre
2009-05-07 16:30 ` Dan Williams
2009-05-11 13:36 ` Jody McIntyre
2009-05-13 13:10 ` Bill Davidsen
2009-10-02 17:01 ` Jody McIntyre
2009-10-02 17:51 ` Bill Davidsen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090312205754.GH8732@clouds \
--to=scjody@sun.com \
--cc=dan.j.williams@intel.com \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).