linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: scjody@sun.com
To: linux-raid@vger.kernel.org, neilb@suse.de
Subject: [RFC patch 1/1] Track raid5/6 statistics.
Date: Wed, 26 Nov 2008 14:04:53 -0500	[thread overview]
Message-ID: <20081126190854.813151413@sun.com> (raw)
In-Reply-To: 20081126190452.775333692@sun.com

[-- Attachment #1: raid5-stats.patch --]
[-- Type: TEXT/PLAIN, Size: 11035 bytes --]

This patch tracks various statistics related to the performance of a RAID 5
or 6 array.  These have been useful to us in the past to help solve
performance issues.  They are reported via /proc/mdstat.

I realize that the format of the statistics may not be the best, and
there may be a better location than /proc/mdstat, so I welcome suggestions
on where to put them.

I will add documentation once we've decided on the format and location (or
if nobody objects to the current format and location.)

Signed-off-by: Jody McIntyre <scjody@sun.com>

Index: linux-2.6/drivers/md/raid5.c
===================================================================
--- linux-2.6.orig/drivers/md/raid5.c
+++ linux-2.6/drivers/md/raid5.c
@@ -136,7 +136,7 @@ static inline int raid6_next_disk(int di
 	return (disk < raid_disks) ? disk : 0;
 }
 
-static void return_io(struct bio *return_bi)
+static void return_io(struct bio *return_bi, raid5_conf_t *conf)
 {
 	struct bio *bi = return_bi;
 	while (bi) {
@@ -145,6 +145,7 @@ static void return_io(struct bio *return
 		bi->bi_next = NULL;
 		bi->bi_size = 0;
 		bio_endio(bi, 0);
+		atomic_dec(&conf->in_reqs_in_queue);
 		bi = return_bi;
 	}
 }
@@ -167,10 +168,12 @@ static void __release_stripe(raid5_conf_
 			if (test_bit(STRIPE_DELAYED, &sh->state)) {
 				list_add_tail(&sh->lru, &conf->delayed_list);
 				blk_plug_device(conf->mddev->queue);
+				atomic_inc(&conf->delayed);
 			} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
 				   sh->bm_seq - conf->seq_write > 0) {
 				list_add_tail(&sh->lru, &conf->bitmap_list);
 				blk_plug_device(conf->mddev->queue);
+				atomic_inc(&conf->bit_delayed);
 			} else {
 				clear_bit(STRIPE_BIT_DELAY, &sh->state);
 				list_add_tail(&sh->lru, &conf->handle_list);
@@ -347,6 +350,7 @@ static struct stripe_head *get_active_st
 			if (noblock && sh == NULL)
 				break;
 			if (!sh) {
+				atomic_inc(&conf->out_of_stripes);
 				conf->inactive_blocked = 1;
 				wait_event_lock_irq(conf->wait_for_stripe,
 						    !list_empty(&conf->inactive_list) &&
@@ -369,6 +373,10 @@ static struct stripe_head *get_active_st
 				    !test_bit(STRIPE_EXPANDING, &sh->state))
 					BUG();
 				list_del_init(&sh->lru);
+				if (test_bit(STRIPE_DELAYED, &sh->state))
+					atomic_dec(&conf->delayed);
+				if (test_bit(STRIPE_BIT_DELAY, &sh->state))
+					atomic_dec(&conf->bit_delayed);
 			}
 		}
 	} while (sh == NULL);
@@ -406,10 +414,13 @@ static void ops_run_io(struct stripe_hea
 		bi = &sh->dev[i].req;
 
 		bi->bi_rw = rw;
-		if (rw == WRITE)
+		if (rw == WRITE) {
+			atomic_inc(&conf->writes_out);
 			bi->bi_end_io = raid5_end_write_request;
-		else
+		} else {
+			atomic_inc(&conf->reads_out);
 			bi->bi_end_io = raid5_end_read_request;
+		}
 
 		rcu_read_lock();
 		rdev = rcu_dereference(conf->disks[i].rdev);
@@ -444,6 +455,7 @@ static void ops_run_io(struct stripe_hea
 			    test_bit(R5_ReWrite, &sh->dev[i].flags))
 				atomic_add(STRIPE_SECTORS,
 					&rdev->corrected_errors);
+			atomic_inc(&conf->out_reqs_in_queue);
 			generic_make_request(bi);
 		} else {
 			if (rw == WRITE)
@@ -547,7 +559,7 @@ static void ops_complete_biofill(void *s
 	spin_unlock_irq(&conf->device_lock);
 	clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
 
-	return_io(return_bi);
+	return_io(return_bi, conf);
 
 	set_bit(STRIPE_HANDLE, &sh->state);
 	release_stripe(sh);
@@ -1074,6 +1086,8 @@ static void raid5_end_read_request(struc
 	mdk_rdev_t *rdev;
 
 
+	atomic_dec(&conf->out_reqs_in_queue);
+
 	for (i=0 ; i<disks; i++)
 		if (bi == &sh->dev[i].req)
 			break;
@@ -1153,6 +1167,8 @@ static void raid5_end_write_request(stru
 	int disks = sh->disks, i;
 	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
 
+	atomic_dec(&conf->out_reqs_in_queue);
+
 	for (i=0 ; i<disks; i++)
 		if (bi == &sh->dev[i].req)
 			break;
@@ -2131,6 +2147,7 @@ static void handle_stripe_dirtying5(raid
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
 					s->locked++;
+					atomic_inc(&conf->reads_for_rmw);
 				} else {
 					set_bit(STRIPE_DELAYED, &sh->state);
 					set_bit(STRIPE_HANDLE, &sh->state);
@@ -2154,6 +2171,7 @@ static void handle_stripe_dirtying5(raid
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
 					s->locked++;
+					atomic_inc(&conf->reads_for_rcw);
 				} else {
 					set_bit(STRIPE_DELAYED, &sh->state);
 					set_bit(STRIPE_HANDLE, &sh->state);
@@ -2219,6 +2237,7 @@ static void handle_stripe_dirtying6(raid
 					set_bit(R5_LOCKED, &dev->flags);
 					set_bit(R5_Wantread, &dev->flags);
 					s->locked++;
+					atomic_inc(&conf->reads_for_rcw);
 				} else {
 					pr_debug("Request delayed stripe %llu "
 						"block %d for Reconstruct\n",
@@ -2556,6 +2575,8 @@ static bool handle_stripe5(struct stripe
 	clear_bit(STRIPE_HANDLE, &sh->state);
 	clear_bit(STRIPE_DELAYED, &sh->state);
 
+	atomic_inc(&conf->handle_called);
+
 	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
 	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
 	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
@@ -2789,7 +2810,7 @@ static bool handle_stripe5(struct stripe
 
 	ops_run_io(sh, &s);
 
-	return_io(return_bi);
+	return_io(return_bi, conf);
 
 	return blocked_rdev == NULL;
 }
@@ -2816,6 +2837,8 @@ static bool handle_stripe6(struct stripe
 	clear_bit(STRIPE_HANDLE, &sh->state);
 	clear_bit(STRIPE_DELAYED, &sh->state);
 
+	atomic_inc(&conf->handle_called);
+
 	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
 	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
 	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
@@ -3011,7 +3034,7 @@ static bool handle_stripe6(struct stripe
 
 	ops_run_io(sh, &s);
 
-	return_io(return_bi);
+	return_io(return_bi, conf);
 
 	return blocked_rdev == NULL;
 }
@@ -3039,6 +3062,7 @@ static void raid5_activate_delayed(raid5
 			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
 				atomic_inc(&conf->preread_active_stripes);
 			list_add_tail(&sh->lru, &conf->hold_list);
+			atomic_dec(&conf->delayed);
 		}
 	} else
 		blk_plug_device(conf->mddev->queue);
@@ -3217,6 +3241,7 @@ static void raid5_align_endio(struct bio
 	raid_bi->bi_next = NULL;
 
 	rdev_dec_pending(rdev, conf->mddev);
+	atomic_dec(&conf->out_reqs_in_queue);
 
 	if (!error && uptodate) {
 		bio_endio(raid_bi, 0);
@@ -3265,6 +3290,7 @@ static int chunk_aligned_read(struct req
 		pr_debug("chunk_aligned_read : non aligned\n");
 		return 0;
 	}
+	atomic_inc(&conf->aligned_reads);
 	/*
  	 * use bio_clone to make a copy of the bio
 	 */
@@ -3287,11 +3313,13 @@ static int chunk_aligned_read(struct req
 					&pd_idx,
 					conf);
 
+	atomic_dec(&conf->in_reqs_in_queue);
 	rcu_read_lock();
 	rdev = rcu_dereference(conf->disks[dd_idx].rdev);
 	if (rdev && test_bit(In_sync, &rdev->flags)) {
 		atomic_inc(&rdev->nr_pending);
 		rcu_read_unlock();
+		atomic_inc(&conf->reads_out);
 		raid_bio->bi_next = (void*)rdev;
 		align_bi->bi_bdev =  rdev->bdev;
 		align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
@@ -3311,6 +3339,7 @@ static int chunk_aligned_read(struct req
 		atomic_inc(&conf->active_aligned_reads);
 		spin_unlock_irq(&conf->device_lock);
 
+		atomic_inc(&conf->out_reqs_in_queue);
 		generic_make_request(align_bi);
 		return 1;
 	} else {
@@ -3384,6 +3413,8 @@ static int make_request(struct request_q
 	const int rw = bio_data_dir(bi);
 	int cpu, remaining;
 
+	atomic_inc(&conf->in_reqs_in_queue);
+
 	if (unlikely(bio_barrier(bi))) {
 		bio_endio(bi, -EOPNOTSUPP);
 		return 0;
@@ -3397,6 +3428,11 @@ static int make_request(struct request_q
 		      bio_sectors(bi));
 	part_stat_unlock();
 
+	if (rw == WRITE)
+		atomic_inc(&conf->writes_in);
+	else
+		atomic_inc(&conf->reads_in);
+
 	if (rw == READ &&
 	     mddev->reshape_position == MaxSector &&
 	     chunk_aligned_read(q,bi))
@@ -3508,6 +3544,7 @@ static int make_request(struct request_q
 
 		if ( rw == WRITE )
 			md_write_end(mddev);
+		atomic_dec(&conf->in_reqs_in_queue);
 
 		bio_endio(bi, 0);
 	}
@@ -3862,6 +3899,7 @@ static void raid5d(mddev_t *mddev)
 			if (!ok)
 				break;
 			handled++;
+			atomic_inc(&conf->handled_in_raid5d);
 		}
 
 		sh = __get_priority_stripe(conf);
@@ -3871,6 +3909,7 @@ static void raid5d(mddev_t *mddev)
 		spin_unlock_irq(&conf->device_lock);
 		
 		handled++;
+		atomic_inc(&conf->handled_in_raid5d);
 		handle_stripe(sh, conf->spare_page);
 		release_stripe(sh);
 
@@ -4330,15 +4369,37 @@ static void status(struct seq_file *seq,
 	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
 	int i;
 
-	seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout);
-	seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
+	seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
+				mddev->chunk_size >> 10, mddev->layout);
+	seq_printf(seq, " [%d/%d] [", conf->raid_disks,
+				conf->raid_disks - mddev->degraded);
 	for (i = 0; i < conf->raid_disks; i++)
-		seq_printf (seq, "%s",
+		seq_printf(seq, "%s",
 			       conf->disks[i].rdev &&
 			       test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_");
-	seq_printf (seq, "]");
+	seq_printf(seq, "]\n");
+	seq_printf(seq, "\tin: %u reads, %u writes; out: %u reads, %u writes\n",
+			atomic_read(&conf->reads_in),
+			atomic_read(&conf->writes_in),
+			atomic_read(&conf->reads_out),
+			atomic_read(&conf->writes_out));
+	seq_printf(seq, "\t%u in raid5d, %u out of stripes, %u handle called\n",
+			atomic_read(&conf->handled_in_raid5d),
+			atomic_read(&conf->out_of_stripes),
+			atomic_read(&conf->handle_called));
+	seq_printf(seq, "\treads: %u for rmw, %u for rcw, %u aligned,\n",
+			atomic_read(&conf->reads_for_rmw),
+			atomic_read(&conf->reads_for_rcw),
+			atomic_read(&conf->aligned_reads));
+	seq_printf(seq, "\t%u delayed, %u bit delayed, %u active, ",
+			atomic_read(&conf->delayed),
+			atomic_read(&conf->bit_delayed),
+			atomic_read(&conf->active_stripes));
+	seq_printf(seq, "queues: %u in, %u out\n",
+			atomic_read(&conf->in_reqs_in_queue),
+			atomic_read(&conf->out_reqs_in_queue));
 #ifdef DEBUG
-	seq_printf (seq, "\n");
+	seq_printf(seq, "\n");
 	printall(seq, conf);
 #endif
 }
Index: linux-2.6/include/linux/raid/raid5.h
===================================================================
--- linux-2.6.orig/include/linux/raid/raid5.h
+++ linux-2.6/include/linux/raid/raid5.h
@@ -385,6 +385,26 @@ struct raid5_private_data {
 	int			pool_size; /* number of disks in stripeheads in pool */
 	spinlock_t		device_lock;
 	struct disk_info	*disks;
+
+	/*
+	 * Stats
+	 */
+	atomic_t		reads_in;
+	atomic_t		writes_in;
+	atomic_t		reads_out;
+	atomic_t		writes_out;
+	atomic_t		handled_in_raid5d;
+	atomic_t		out_of_stripes;
+	atomic_t		reads_for_rmw;
+	atomic_t		reads_for_rcw;
+	atomic_t		aligned_reads;
+	atomic_t		writes_zcopy;
+	atomic_t		writes_copied;
+	atomic_t		handle_called;
+	atomic_t		delayed;
+	atomic_t		bit_delayed;
+	atomic_t		in_reqs_in_queue;
+	atomic_t		out_reqs_in_queue;
 };
 
 typedef struct raid5_private_data raid5_conf_t;

-- 

  reply	other threads:[~2008-11-26 19:04 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-11-26 19:04 [RFC patch 0/1] Lustre RAID 5/6 patches scjody
2008-11-26 19:04 ` scjody [this message]
2008-11-26 21:50   ` [RFC patch 1/1] Track raid5/6 statistics Dan Williams
2008-11-27 13:45     ` Jody McIntyre
2008-11-28 21:14       ` Dan Williams
2008-11-27 11:47   ` Gabor Gombas
2008-11-27 13:52     ` Jody McIntyre
2008-11-27 17:15       ` Gabor Gombas
2008-11-27 18:29         ` Jody McIntyre
2008-11-27 19:21           ` Mr. James W. Laferriere
2008-11-28 17:21           ` Bill Davidsen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081126190854.813151413@sun.com \
    --to=scjody@sun.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).