[PATCH] md: Track raid5/6 statistics

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] md: Track raid5/6 statistics
@ 2009-03-12 20:57 Jody McIntyre
  2009-03-14 17:07 ` Dan Williams
  0 siblings, 1 reply; 8+ messages in thread
From: Jody McIntyre @ 2009-03-12 20:57 UTC (permalink / raw)
  To: linux-raid, neilb, dan.j.williams

This patch tracks various statistics related to the performance of a RAID 5
or 6 array.  These have been useful to us in the past to help solve
performance issues.  They are reported via the 'stat' file in each device's
'md' sysfs directory, e.g. /sys/class/block/md0/md/stat .

A slight amount of overhead is added by the atomic_inc() and atomic_dec()
calls used in these patches, but it's so low I've been unable to measure it.
Both calls are already used extensively in raid5.c to track internal
counters so I believe this is OK.

Signed-off-by: Jody McIntyre <scjody@sun.com>

Index: linux-2.6/drivers/md/raid5.c
===================================================================
--- linux-2.6.orig/drivers/md/raid5.c
+++ linux-2.6/drivers/md/raid5.c
@@ -136,7 +136,7 @@ static inline int raid6_next_disk(int di
 	return (disk < raid_disks) ? disk : 0;
 }
 
-static void return_io(struct bio *return_bi)
+static void return_io(struct bio *return_bi, raid5_conf_t *conf)
 {
 	struct bio *bi = return_bi;
 	while (bi) {
@@ -145,6 +145,7 @@ static void return_io(struct bio *return
 		bi->bi_next = NULL;
 		bi->bi_size = 0;
 		bio_endio(bi, 0);
+		atomic_dec(&conf->in_reqs_in_queue);
 		bi = return_bi;
 	}
 }
@@ -167,10 +168,12 @@ static void __release_stripe(raid5_conf_
 			if (test_bit(STRIPE_DELAYED, &sh->state)) {
 				list_add_tail(&sh->lru, &conf->delayed_list);
 				blk_plug_device(conf->mddev->queue);
+				atomic_inc(&conf->delayed);
 			} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
 				   sh->bm_seq - conf->seq_write > 0) {
 				list_add_tail(&sh->lru, &conf->bitmap_list);
 				blk_plug_device(conf->mddev->queue);
+				atomic_inc(&conf->bit_delayed);
 			} else {
 				clear_bit(STRIPE_BIT_DELAY, &sh->state);
 				list_add_tail(&sh->lru, &conf->handle_list);
@@ -347,6 +350,7 @@ static struct stripe_head *get_active_st
 			if (noblock && sh == NULL)
 				break;
 			if (!sh) {
+				atomic_inc(&conf->out_of_stripes);
 				conf->inactive_blocked = 1;
 				wait_event_lock_irq(conf->wait_for_stripe,
 						    !list_empty(&conf->inactive_list) &&
@@ -406,10 +410,13 @@ static void ops_run_io(struct stripe_hea
 		bi = &sh->dev[i].req;
 
 		bi->bi_rw = rw;
-		if (rw == WRITE)
+		if (rw == WRITE) {
+			atomic_inc(&conf->writes_out);
 			bi->bi_end_io = raid5_end_write_request;
-		else
+		} else {
+			atomic_inc(&conf->reads_out);
 			bi->bi_end_io = raid5_end_read_request;
+		}
 
 		rcu_read_lock();
 		rdev = rcu_dereference(conf->disks[i].rdev);
@@ -444,6 +451,7 @@ static void ops_run_io(struct stripe_hea
 			    test_bit(R5_ReWrite, &sh->dev[i].flags))
 				atomic_add(STRIPE_SECTORS,
 					&rdev->corrected_errors);
+			atomic_inc(&conf->out_reqs_in_queue);
 			generic_make_request(bi);
 		} else {
 			if (rw == WRITE)
@@ -547,7 +555,7 @@ static void ops_complete_biofill(void *s
 	spin_unlock_irq(&conf->device_lock);
 	clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
 
-	return_io(return_bi);
+	return_io(return_bi, conf);
 
 	set_bit(STRIPE_HANDLE, &sh->state);
 	release_stripe(sh);
@@ -1074,6 +1082,8 @@ static void raid5_end_read_request(struc
 	mdk_rdev_t *rdev;
 
 
+	atomic_dec(&conf->out_reqs_in_queue);
+
 	for (i=0 ; i<disks; i++)
 		if (bi == &sh->dev[i].req)
 			break;
@@ -1153,6 +1163,8 @@ static void raid5_end_write_request(stru
 	int disks = sh->disks, i;
 	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
 
+	atomic_dec(&conf->out_reqs_in_queue);
+
 	for (i=0 ; i<disks; i++)
 		if (bi == &sh->dev[i].req)
 			break;
@@ -2131,6 +2143,7 @@ static void handle_stripe_dirtying5(raid
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
 					s->locked++;
+					atomic_inc(&conf->reads_for_rmw);
 				} else {
 					set_bit(STRIPE_DELAYED, &sh->state);
 					set_bit(STRIPE_HANDLE, &sh->state);
@@ -2154,6 +2167,7 @@ static void handle_stripe_dirtying5(raid
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
 					s->locked++;
+					atomic_inc(&conf->reads_for_rcw);
 				} else {
 					set_bit(STRIPE_DELAYED, &sh->state);
 					set_bit(STRIPE_HANDLE, &sh->state);
@@ -2219,6 +2233,7 @@ static void handle_stripe_dirtying6(raid
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
 					s->locked++;
+					atomic_inc(&conf->reads_for_rcw);
 				} else {
 					pr_debug("Request delayed stripe %llu "
 						"block %d for Reconstruct\n",
@@ -2789,7 +2804,7 @@ static bool handle_stripe5(struct stripe
 
 	ops_run_io(sh, &s);
 
-	return_io(return_bi);
+	return_io(return_bi, conf);
 
 	return blocked_rdev == NULL;
 }
@@ -3011,7 +3026,7 @@ static bool handle_stripe6(struct stripe
 
 	ops_run_io(sh, &s);
 
-	return_io(return_bi);
+	return_io(return_bi, conf);
 
 	return blocked_rdev == NULL;
 }
@@ -3217,6 +3232,7 @@ static void raid5_align_endio(struct bio
 	raid_bi->bi_next = NULL;
 
 	rdev_dec_pending(rdev, conf->mddev);
+	atomic_dec(&conf->out_reqs_in_queue);
 
 	if (!error && uptodate) {
 		bio_endio(raid_bi, 0);
@@ -3287,6 +3303,7 @@ static int chunk_aligned_read(struct req
 					&pd_idx,
 					conf);
 
+	atomic_dec(&conf->in_reqs_in_queue);
 	rcu_read_lock();
 	rdev = rcu_dereference(conf->disks[dd_idx].rdev);
 	if (rdev && test_bit(In_sync, &rdev->flags)) {
@@ -3311,6 +3328,9 @@ static int chunk_aligned_read(struct req
 		atomic_inc(&conf->active_aligned_reads);
 		spin_unlock_irq(&conf->device_lock);
 
+		atomic_inc(&conf->out_reqs_in_queue);
+		atomic_inc(&conf->aligned_reads);
+		atomic_inc(&conf->reads_out);
 		generic_make_request(align_bi);
 		return 1;
 	} else {
@@ -3384,6 +3404,8 @@ static int make_request(struct request_q
 	const int rw = bio_data_dir(bi);
 	int cpu, remaining;
 
+	atomic_inc(&conf->in_reqs_in_queue);
+
 	if (unlikely(bio_barrier(bi))) {
 		bio_endio(bi, -EOPNOTSUPP);
 		return 0;
@@ -3397,6 +3419,11 @@ static int make_request(struct request_q
 		      bio_sectors(bi));
 	part_stat_unlock();
 
+	if (rw == WRITE)
+		atomic_inc(&conf->writes_in);
+	else
+		atomic_inc(&conf->reads_in);
+
 	if (rw == READ &&
 	     mddev->reshape_position == MaxSector &&
 	     chunk_aligned_read(q,bi))
@@ -3508,6 +3535,7 @@ static int make_request(struct request_q
 
 		if ( rw == WRITE )
 			md_write_end(mddev);
+		atomic_dec(&conf->in_reqs_in_queue);
 
 		bio_endio(bi, 0);
 	}
@@ -3981,10 +4009,37 @@ stripe_cache_active_show(mddev_t *mddev,
 static struct md_sysfs_entry
 raid5_stripecache_active = __ATTR_RO(stripe_cache_active);
 
+static ssize_t
+stat_show(mddev_t *mddev, char *page)
+{
+	raid5_conf_t *conf = mddev_to_conf(mddev);
+	if (conf)
+		return sprintf(page, "%u %u %u %u %u %u %u %u %u %u %u %u %u\n",
+			       atomic_read(&conf->reads_in),
+			       atomic_read(&conf->writes_in),
+			       atomic_read(&conf->reads_out),
+			       atomic_read(&conf->writes_out),
+			       atomic_read(&conf->reads_for_rmw),
+			       atomic_read(&conf->reads_for_rcw),
+			       atomic_read(&conf->aligned_reads),
+			       atomic_read(&conf->active_stripes),
+			       atomic_read(&conf->in_reqs_in_queue),
+			       atomic_read(&conf->out_reqs_in_queue),
+			       atomic_read(&conf->delayed),
+			       atomic_read(&conf->bit_delayed),
+			       atomic_read(&conf->out_of_stripes));
+	else
+		return 0;
+}
+
+static struct md_sysfs_entry
+raid5_stats = __ATTR_RO(stat);
+
 static struct attribute *raid5_attrs[] =  {
 	&raid5_stripecache_size.attr,
 	&raid5_stripecache_active.attr,
 	&raid5_preread_bypass_threshold.attr,
+	&raid5_stats.attr,
 	NULL,
 };
 static struct attribute_group raid5_attrs_group = {
Index: linux-2.6/include/linux/raid/raid5.h
===================================================================
--- linux-2.6.orig/include/linux/raid/raid5.h
+++ linux-2.6/include/linux/raid/raid5.h
@@ -385,6 +385,22 @@ struct raid5_private_data {
 	int			pool_size; /* number of disks in stripeheads in pool */
 	spinlock_t		device_lock;
 	struct disk_info	*disks;
+
+	/*
+	 * Stats
+	 */
+	atomic_t		reads_in;
+	atomic_t		writes_in;
+	atomic_t		reads_out;
+	atomic_t		writes_out;
+	atomic_t		reads_for_rmw;
+	atomic_t		reads_for_rcw;
+	atomic_t		aligned_reads;
+	atomic_t		in_reqs_in_queue;
+	atomic_t		out_reqs_in_queue;
+	atomic_t		delayed;
+	atomic_t		bit_delayed;
+	atomic_t		out_of_stripes;
 };
 
 typedef struct raid5_private_data raid5_conf_t;
Index: linux-2.6/Documentation/md.txt
===================================================================
--- linux-2.6.orig/Documentation/md.txt
+++ linux-2.6/Documentation/md.txt
@@ -484,3 +484,26 @@ These currently include
       to 1.  Setting this to 0 disables bypass accounting and
       requires preread stripes to wait until all full-width stripe-
       writes are complete.  Valid values are 0 to stripe_cache_size.
+  stat (currently raid 5/6 only)
+      Reports various performance statistics related to the array.  In
+      order, separated by spaces:
+	reads in: number of reads submitted to the array
+	writes in: number of writes submitted to the array
+	reads out: number of reads performed on the underlying devices
+	writes out: number of writes performed on the underlying devices
+	reads for rmw: number of reads for read-modify-write operations
+	reads for rcw: number of reads for read-copy-write operations
+	aligned reads: number of reads via the aligned path
+
+	active stripes: number of stripes currently in use
+	in reqs in queue: current number of requests queued on the array
+	out reqs in queue: current number of requests queued for the underlying
+			   devices
+
+	delayed: number of write requests that were delayed to perform reads
+	bit delayed: number of write requests that were delayed to update the
+		     bitmap
+	out of stripes: number of times the array has run out of stripes;
+			if this value is high, increasing the stripe cache
+			may be useful.
+      More statistics may be added at the end of the line in the future.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] md: Track raid5/6 statistics
  2009-03-12 20:57 [PATCH] md: Track raid5/6 statistics Jody McIntyre
@ 2009-03-14 17:07 ` Dan Williams
  2009-05-06 20:05   ` Jody McIntyre
  0 siblings, 1 reply; 8+ messages in thread
From: Dan Williams @ 2009-03-14 17:07 UTC (permalink / raw)
  To: Jody McIntyre; +Cc: linux-raid, neilb

On Thu, Mar 12, 2009 at 1:57 PM, Jody McIntyre <scjody@sun.com> wrote:
> This patch tracks various statistics related to the performance of a RAID 5
> or 6 array.  These have been useful to us in the past to help solve
> performance issues.  They are reported via the 'stat' file in each device's
> 'md' sysfs directory, e.g. /sys/class/block/md0/md/stat .
>
> A slight amount of overhead is added by the atomic_inc() and atomic_dec()
> calls used in these patches, but it's so low I've been unable to measure it.
> Both calls are already used extensively in raid5.c to track internal
> counters so I believe this is OK.
>

Hi Jody,

I am curious, can you say a bit more about the performance problems
you solved with this data?  Is there a corresponding userspace tool
that interprets these numbers?  My earlier recommendation to use some
type of tracepoint had more to do with the fact that the data could be
interpreted by existing tools rather than the overhead.  Looking a bit
closer at the statistics it seems most are currently available via
/proc/diskstats and blktrace...

[..]
> +static ssize_t
> +stat_show(mddev_t *mddev, char *page)
> +{
> +       raid5_conf_t *conf = mddev_to_conf(mddev);
> +       if (conf)
> +               return sprintf(page, "%u %u %u %u %u %u %u %u %u %u %u %u %u\n",
> +                              atomic_read(&conf->reads_in),
> +                              atomic_read(&conf->writes_in),
> +                              atomic_read(&conf->reads_out),
> +                              atomic_read(&conf->writes_out),

...available in /proc/diskstats and blktrace.

> +                              atomic_read(&conf->reads_for_rmw),
> +                              atomic_read(&conf->reads_for_rcw),
> +                              atomic_read(&conf->aligned_reads),

...these could be useful as there is no other way to retrieve this information.

> +                              atomic_read(&conf->active_stripes),

...duplicate of stripe_cache_active attribute

> +                              atomic_read(&conf->in_reqs_in_queue),
> +                              atomic_read(&conf->out_reqs_in_queue),

...available via blktrace

> +                              atomic_read(&conf->delayed),
> +                              atomic_read(&conf->bit_delayed),
> +                              atomic_read(&conf->out_of_stripes));

...these could be useful

So, my original suggestion/question should have been why not extend
blktrace to understand these incremental MD events?

Regards,
Dan
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] md: Track raid5/6 statistics
  2009-03-14 17:07 ` Dan Williams
@ 2009-05-06 20:05   ` Jody McIntyre
  2009-05-07 16:30     ` Dan Williams
  0 siblings, 1 reply; 8+ messages in thread
From: Jody McIntyre @ 2009-05-06 20:05 UTC (permalink / raw)
  To: Dan Williams; +Cc: linux-raid, neilb

Hi Dan,

On Sat, Mar 14, 2009 at 10:07:49AM -0700, Dan Williams wrote:

> I am curious, can you say a bit more about the performance problems
> you solved with this data?  Is there a corresponding userspace tool
> that interprets these numbers?  

With the original patch there was no need for a tool - statistics were
in /proc/mdstat and were fairly easy to understand.  The patch I
recently submitted would need a small tool, but one has not been
written.

I've looked into how we've used this data in the past, and while our
support team often requests /proc/mdstat from customers experiencing
RAID performance problems, they rarely receive it.  The original
statistics patch (which has been shipping with Lustre for about 3 years)
seems to have been useful for 2 things:

1. Analyzing RAID IO patterns when developing our RAID performance
improvements (which seem to be completely obsolete now thanks to the
more extensive improvements you and Neil have done, so I won't be
submitting them.)  Of course, this is now a good reason to merge the
patch - if anyone (including us) wants to do similar studies, they can 
develop their own internal patch.

2. The out_of_stripes tracking is useful - we've found several cases
where stripe_cache_size was set too low and performance suffered as a
result.  Monitoring stripe_cache_active during IO is difficult so it's
far better to have a counter like this.

So if we can solve the second problem somehow - maybe just introduce a
read-only counter under /sys/block/md*/md/out_of_stripes - the need for
the rest of the patch goes away IMO.

> [...]

> So, my original suggestion/question should have been why not extend
> blktrace to understand these incremental MD events?

Regarding blktrace specifically, it's really geared towards developers.
I played with it a bit and it looks like it might be useful to me at
some point, but I wouldn't expect a customer to use it.  It would need a
much better frontend tool and a more supported kernel interface than
debugfs.  But as I said, our customers aren't using our existing
/proc/mdstat information very much anyway so I don't think this problem
needs to be solved.

Cheers,
Jody

> Regards,
> Dan

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] md: Track raid5/6 statistics
  2009-05-06 20:05   ` Jody McIntyre
@ 2009-05-07 16:30     ` Dan Williams
  2009-05-11 13:36       ` Jody McIntyre
  0 siblings, 1 reply; 8+ messages in thread
From: Dan Williams @ 2009-05-07 16:30 UTC (permalink / raw)
  To: Jody McIntyre; +Cc: linux-raid, neilb

On Wed, May 6, 2009 at 1:05 PM, Jody McIntyre <scjody@sun.com> wrote:
> 2. The out_of_stripes tracking is useful - we've found several cases
> where stripe_cache_size was set too low and performance suffered as a
> result.  Monitoring stripe_cache_active during IO is difficult so it's
> far better to have a counter like this.
>
> So if we can solve the second problem somehow - maybe just introduce a
> read-only counter under /sys/block/md*/md/out_of_stripes - the need for
> the rest of the patch goes away IMO.

It would be nice if the kernel could auto-tune stripe_cache_size, but
I think modifying it in a reactive fashion may do more harm than good.
 The times when we want write-out to be faster are usually the times
when the system has too much dirty memory lying around so there is no
room to increase the cache.  If we are under utilizing the stripe
cache then there is a good chance the memory could be put to better
use in the page cache, but then we are putting ourselves in a
compromised state when a write burst appears.

In the end I agree that having some kind of out_of_stripes
notification would be useful.  However, I think it would make more
sense to implement it as a "stripe_cache_active load average".  Then
for a given workload the operator can see if there are spikes or
sustained cache saturation.  What do you think?

Thanks,
Dan
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] md: Track raid5/6 statistics
  2009-05-07 16:30     ` Dan Williams
@ 2009-05-11 13:36       ` Jody McIntyre
  2009-05-13 13:10         ` Bill Davidsen
  0 siblings, 1 reply; 8+ messages in thread
From: Jody McIntyre @ 2009-05-11 13:36 UTC (permalink / raw)
  To: Dan Williams; +Cc: linux-raid, neilb

On Thu, May 07, 2009 at 09:30:33AM -0700, Dan Williams wrote:

> It would be nice if the kernel could auto-tune stripe_cache_size, but
> I think modifying it in a reactive fashion may do more harm than good.
>  The times when we want write-out to be faster are usually the times
> when the system has too much dirty memory lying around so there is no
> room to increase the cache.  If we are under utilizing the stripe
> cache then there is a good chance the memory could be put to better
> use in the page cache, but then we are putting ourselves in a
> compromised state when a write burst appears.

Yes - it's really too bad that we have this tunable, but I can't think
of a good way to get rid of it.  In some customer issues I've seen,
performance really suffers when the array is out of stripes - enough to
make single IOs take _minutes_ in the worst cases.  This is especially
easy to reproduce during a resync or rebuild, for obvious reasons.

On a related note, there seems to be some confusion surrounding how much
memory is used by the stripe cache.  I've seen users who believed the
value was in kilobytes of memory, whereas the truth is a bit more
complicated.  We could add a stripe_cache_kb entry (writeable even) to
make this clearer, and/or improve Documentation/md.txt.  Also, we
helpfully print the amount allocated when the array is first run():

		printk(KERN_INFO "raid5: allocated %dkB for %s\n",
			memory, mdname(mddev));

but we don't ever provide an update when it changes.  I don't think we
want to printk() every time someone changes the sysfs tunable though -
perhaps we should get rid of the message in run()?

> In the end I agree that having some kind of out_of_stripes
> notification would be useful.  However, I think it would make more
> sense to implement it as a "stripe_cache_active load average".  Then
> for a given workload the operator can see if there are spikes or
> sustained cache saturation.  What do you think?

That makes sense.  It would be a more meaningful number than our current
statistic, which is "at some point since you started the array, we had
to wait for a stripe N times."

I'll come up with a patch when I get the chance.

Cheers,
Jody

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] md: Track raid5/6 statistics
  2009-05-11 13:36       ` Jody McIntyre
@ 2009-05-13 13:10         ` Bill Davidsen
  2009-10-02 17:01           ` Jody McIntyre
  0 siblings, 1 reply; 8+ messages in thread
From: Bill Davidsen @ 2009-05-13 13:10 UTC (permalink / raw)
  To: Jody McIntyre; +Cc: Dan Williams, linux-raid, neilb

Jody McIntyre wrote:
> On Thu, May 07, 2009 at 09:30:33AM -0700, Dan Williams wrote:
>
>   
>> It would be nice if the kernel could auto-tune stripe_cache_size, but
>> I think modifying it in a reactive fashion may do more harm than good.
>>  The times when we want write-out to be faster are usually the times
>> when the system has too much dirty memory lying around so there is no
>> room to increase the cache.  If we are under utilizing the stripe
>> cache then there is a good chance the memory could be put to better
>> use in the page cache, but then we are putting ourselves in a
>> compromised state when a write burst appears.
>>     
>
> Yes - it's really too bad that we have this tunable, but I can't think
> of a good way to get rid of it.  In some customer issues I've seen,
> performance really suffers when the array is out of stripes - enough to
> make single IOs take _minutes_ in the worst cases.  This is especially
> easy to reproduce during a resync or rebuild, for obvious reasons.
>
> On a related note, there seems to be some confusion surrounding how much
> memory is used by the stripe cache.  I've seen users who believed the
> value was in kilobytes of memory, whereas the truth is a bit more
> complicated.  We could add a stripe_cache_kb entry (writeable even) to
> make this clearer, and/or improve Documentation/md.txt.  Also, we
> helpfully print the amount allocated when the array is first run():
>
> 		printk(KERN_INFO "raid5: allocated %dkB for %s\n",
> 			memory, mdname(mddev));
>
> but we don't ever provide an update when it changes.  I don't think we
> want to printk() every time someone changes the sysfs tunable though -
> perhaps we should get rid of the message in run()?
>
>   
I think the opposite, when it changes log the new value. This is not 
something likely to be done repeatedly, usually just when tuning right 
after boot. Or bumping the size for doing a resync or such, in any case 
it's infrequent.
>> In the end I agree that having some kind of out_of_stripes
>> notification would be useful.  However, I think it would make more
>> sense to implement it as a "stripe_cache_active load average".  Then
>> for a given workload the operator can see if there are spikes or
>> sustained cache saturation.  What do you think?
>>     
>
> That makes sense.  It would be a more meaningful number than our current
> statistic, which is "at some point since you started the array, we had
> to wait for a stripe N times."
>
> I'll come up with a patch when I get the chance.
>   


-- 
bill davidsen <davidsen@tmr.com>
  CTO TMR Associates, Inc

"You are disgraced professional losers. And by the way, give us our money back."
    - Representative Earl Pomeroy,  Democrat of North Dakota
on the A.I.G. executives who were paid bonuses  after a federal bailout.



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] md: Track raid5/6 statistics
  2009-05-13 13:10         ` Bill Davidsen
@ 2009-10-02 17:01           ` Jody McIntyre
  2009-10-02 17:51             ` Bill Davidsen
  0 siblings, 1 reply; 8+ messages in thread
From: Jody McIntyre @ 2009-10-02 17:01 UTC (permalink / raw)
  To: Bill Davidsen; +Cc: Dan Williams, linux-raid, neilb

I finally got around to looking at the load average code and thinking how
it could be applied to tracking stripe cache usage, and unfortunately I
don't have any great ideas.

What's useful to know is:

1. The current stripe_cache_active value, which can be sampled by a script
during heavy IO/resync/etc.  This is already available.

2. How often (relative to the amount of IO) we've had to block waiting for
a free stripe recently.  The "recently" part is hard to define and and not
implemented by current the current patch - it just reports the number of
events since the array was started, but we can collect statistics from
before and after a run and compare.

3. We've had a few customers using write-intent bitmaps lately, and our
"bit delayed" counter (the number of stripes currently on bitmap_list) has
been useful in assessing the impact of bitmaps / changes to bitmap chunk
size.  But it's not really a great measure of anything so I'm open to
suggestions.  I think "average amount of time an IO is delayed due to
bitmaps" would be nice and probably not too hard to implement, but I'm
worried about the performance impact of this.

Also, there's still the open question of where we report these values other
than /proc/mdstat and I'm really open to suggestions.  If nobody has any
ideas, we'll just continue to patch raid5.c ourselves to extend
/proc/mdstat.

Cheers,
Jody

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] md: Track raid5/6 statistics
  2009-10-02 17:01           ` Jody McIntyre
@ 2009-10-02 17:51             ` Bill Davidsen
  0 siblings, 0 replies; 8+ messages in thread
From: Bill Davidsen @ 2009-10-02 17:51 UTC (permalink / raw)
  To: Jody McIntyre; +Cc: Dan Williams, linux-raid, neilb

Jody McIntyre wrote:
> I finally got around to looking at the load average code and thinking how
> it could be applied to tracking stripe cache usage, and unfortunately I
> don't have any great ideas.
>
> What's useful to know is:
>
> 1. The current stripe_cache_active value, which can be sampled by a script
> during heavy IO/resync/etc.  This is already available.
>
> 2. How often (relative to the amount of IO) we've had to block waiting for
> a free stripe recently.  The "recently" part is hard to define and and not
> implemented by current the current patch - it just reports the number of
> events since the array was started, but we can collect statistics from
> before and after a run and compare.
>
> 3. We've had a few customers using write-intent bitmaps lately, and our
> "bit delayed" counter (the number of stripes currently on bitmap_list) has
> been useful in assessing the impact of bitmaps / changes to bitmap chunk
> size.  But it's not really a great measure of anything so I'm open to
> suggestions.  I think "average amount of time an IO is delayed due to
> bitmaps" would be nice and probably not too hard to implement, but I'm
> worried about the performance impact of this.
>
> Also, there's still the open question of where we report these values other
> than /proc/mdstat and I'm really open to suggestions.  If nobody has any
> ideas, we'll just continue to patch raid5.c ourselves to extend
> /proc/mdstat.
>   

I would think in /sys would be a better place, anything which changes 
/proc/mdstat is likely to break some script, and therefore be less 
useful (and adoptable to mainline), where another file in /sys would be 
really unlikely to cause an issue. People tend to look at what is in 
known files, but ignore files which were not known at the time of script 
creation. It tends to a messy tree of files, but rarely breaks existing 
code.

-- 
Bill Davidsen <davidsen@tmr.com>
  Unintended results are the well-earned reward for incompetence.


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2009-10-02 17:51 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-12 20:57 [PATCH] md: Track raid5/6 statistics Jody McIntyre
2009-03-14 17:07 ` Dan Williams
2009-05-06 20:05   ` Jody McIntyre
2009-05-07 16:30     ` Dan Williams
2009-05-11 13:36       ` Jody McIntyre
2009-05-13 13:10         ` Bill Davidsen
2009-10-02 17:01           ` Jody McIntyre
2009-10-02 17:51             ` Bill Davidsen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).