[md PATCH 05/16] md/raid1: clean up read_balance.

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org
Subject: [md PATCH 05/16] md/raid1: clean up read_balance.
Date: Wed, 11 May 2011 16:30:31 +1000	[thread overview]
Message-ID: <20110511063030.21263.65786.stgit@notabene.brown> (raw)
In-Reply-To: <20110511062743.21263.72802.stgit@notabene.brown>

read_balance has two loops which both look for a 'best'
device based on slightly different criteria.
This is clumsy and makes is hard to add extra criteria.

So replace it all with a single loop that combines everything.

Signed-off-by: NeilBrown <neilb@suse.de>
---

 drivers/md/raid1.c |   83 +++++++++++++++++++++-------------------------------
 1 files changed, 34 insertions(+), 49 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 2b7a7ff..f0b0c79 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -411,10 +411,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 {
 	const sector_t this_sector = r1_bio->sector;
 	const int sectors = r1_bio->sectors;
-	int new_disk = -1;
 	int start_disk;
+	int best_disk;
 	int i;
-	sector_t new_distance, current_distance;
+	sector_t best_dist;
 	mdk_rdev_t *rdev;
 	int choose_first;
 
@@ -425,6 +425,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 	 * We take the first readable disk when above the resync window.
 	 */
  retry:
+	best_disk = -1;
+	best_dist = MaxSector;
 	if (conf->mddev->recovery_cp < MaxSector &&
 	    (this_sector + sectors >= conf->next_resync)) {
 		choose_first = 1;
@@ -434,8 +436,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 		start_disk = conf->last_used;
 	}
 
-	/* make sure the disk is operational */
 	for (i = 0 ; i < conf->raid_disks ; i++) {
+		sector_t dist;
 		int disk = start_disk + i;
 		if (disk >= conf->raid_disks)
 			disk -= conf->raid_disks;
@@ -443,60 +445,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 		rdev = rcu_dereference(conf->mirrors[disk].rdev);
 		if (r1_bio->bios[disk] == IO_BLOCKED
 		    || rdev == NULL
-		    || !test_bit(In_sync, &rdev->flags))
+		    || test_bit(Faulty, &rdev->flags))
 			continue;
-
-		new_disk = disk;
-		if (!test_bit(WriteMostly, &rdev->flags))
-			break;
-	}
-
-	if (new_disk < 0 || choose_first)
-		goto rb_out;
-
-	/*
-	 * Don't change to another disk for sequential reads:
-	 */
-	if (conf->next_seq_sect == this_sector)
-		goto rb_out;
-	if (this_sector == conf->mirrors[new_disk].head_position)
-		goto rb_out;
-
-	current_distance = abs(this_sector 
-			       - conf->mirrors[new_disk].head_position);
-
-	/* look for a better disk - i.e. head is closer */
-	start_disk = new_disk;
-	for (i = 1; i < conf->raid_disks; i++) {
-		int disk = start_disk + 1;
-		if (disk >= conf->raid_disks)
-			disk -= conf->raid_disks;
-
-		rdev = rcu_dereference(conf->mirrors[disk].rdev);
-		if (r1_bio->bios[disk] == IO_BLOCKED
-		    || rdev == NULL
-		    || !test_bit(In_sync, &rdev->flags)
-		    || test_bit(WriteMostly, &rdev->flags))
+		if (!test_bit(In_sync, &rdev->flags) &&
+		    rdev->recovery_offset < this_sector + sectors)
 			continue;
-
-		if (!atomic_read(&rdev->nr_pending)) {
-			new_disk = disk;
+		if (test_bit(WriteMostly, &rdev->flags)) {
+			/* Don't balance among write-mostly, just
+			 * use the first as a last resort */
+			if (best_disk < 0)
+				best_disk = disk;
+			continue;
+		}
+		/* This is a reasonable device to use.  It might
+		 * even be best.
+		 */
+		dist = abs(this_sector - conf->mirrors[disk].head_position);
+		if (choose_first
+		    /* Don't change to another disk for sequential reads */
+		    || conf->next_seq_sect == this_sector
+		    || dist == 0
+		    /* If device is idle, use it */
+		    || atomic_read(&rdev->nr_pending) == 0) {
+			best_disk = disk;
 			break;
 		}
-		new_distance = abs(this_sector - conf->mirrors[disk].head_position);
-		if (new_distance < current_distance) {
-			current_distance = new_distance;
-			new_disk = disk;
+		if (dist < best_dist) {
+			best_dist = dist;
+			best_disk = disk;
 		}
 	}
 
- rb_out:
-	if (new_disk >= 0) {
-		rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
+	if (best_disk >= 0) {
+		rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
 		if (!rdev)
 			goto retry;
 		atomic_inc(&rdev->nr_pending);
-		if (!test_bit(In_sync, &rdev->flags)) {
+		if (test_bit(Faulty, &rdev->flags)) {
 			/* cannot risk returning a device that failed
 			 * before we inc'ed nr_pending
 			 */
@@ -504,11 +489,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 			goto retry;
 		}
 		conf->next_seq_sect = this_sector + sectors;
-		conf->last_used = new_disk;
+		conf->last_used = best_disk;
 	}
 	rcu_read_unlock();
 
-	return new_disk;
+	return best_disk;
 }
 
 static int raid1_congested(void *data, int bits)

next prev parent reply	other threads:[~2011-05-11  6:30 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-11  6:30 [md PATCH 00/16] md patches for 2.6.40 NeilBrown
2011-05-11  6:30 ` [md PATCH 03/16] md/bitmap: fix saving of events_cleared and other state NeilBrown
2011-05-11  6:30 ` [md PATCH 04/16] md: simplify raid10 read_balance NeilBrown
2011-05-11  6:30 ` [md PATCH 01/16] md: Fix race when creating a new md device NeilBrown
2011-05-11  6:30 ` [md PATCH 02/16] md: reject a re-add request that cannot be honoured NeilBrown
2011-05-11  6:30 ` [md PATCH 12/16] md/raid10: some tidying up in fix_read_error NeilBrown
2011-05-11  6:30 ` [md PATCH 16/16] md: allow resync_start to be set while an array is active NeilBrown
2011-05-11  6:30 ` [md PATCH 07/16] md: make error_handler functions more uniform and correct NeilBrown
2011-05-11  6:30 ` [md PATCH 11/16] md/raid1: improve handling of pages allocated for write-behind NeilBrown
2011-05-11  6:30 ` [md PATCH 15/16] md/raid10: reformat some loops with less indenting NeilBrown
2011-05-11  6:30 ` [md PATCH 09/16] md/raid1: tidy up new functions: process_checks and fix_sync_read_error NeilBrown
2011-05-11  6:30 ` [md PATCH 06/16] md/multipath: discard ->working_disks in favour of ->degraded NeilBrown
2011-05-11  6:30 ` [md PATCH 08/16] md/raid1: split out two sub-functions from sync_request_write NeilBrown
2011-05-11  6:30 ` NeilBrown [this message]
2011-05-11  6:30 ` [md PATCH 10/16] md/raid1: try fix_sync_read_error before process_checks NeilBrown
2011-05-11  6:30 ` [md PATCH 14/16] md/raid10: remove unused variable NeilBrown
2011-05-11  6:30 ` [md PATCH 13/16] md/raid10: make more use of 'slot' in raid10d NeilBrown

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:2b7a7ff dfblob:f0b0c79 )
 OR (
bs:"[md PATCH 05/16] md/raid1: clean up read_balance." )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110511063030.21263.65786.stgit@notabene.brown \
    --to=neilb@suse.de \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).