From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org
Subject: [md PATCH 06/16] md/raid1: clean up read_balance.
Date: Mon, 07 Jun 2010 10:07:54 +1000 [thread overview]
Message-ID: <20100607000754.13302.77379.stgit@notabene.brown> (raw)
In-Reply-To: <20100606235833.13302.60932.stgit@notabene.brown>
read_balance has three loops which all look for a 'best'
device based on slightly different criteria.
This is clumsy and makes is hard to add extra criteria.
So replace it all with a single loop that combines everything.
Signed-off-by: NeilBrown <neilb@suse.de>
---
drivers/md/raid1.c | 144 ++++++++++++++++++++++------------------------------
1 files changed, 60 insertions(+), 84 deletions(-)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 82440a7..fa62c7b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -420,10 +420,13 @@ static void raid1_end_write_request(struct bio *bio, int error)
static int read_balance(conf_t *conf, r1bio_t *r1_bio)
{
const sector_t this_sector = r1_bio->sector;
- int new_disk = conf->last_used, disk = new_disk;
- int wonly_disk = -1;
const int sectors = r1_bio->sectors;
- sector_t new_distance, current_distance;
+ int do_balance;
+ int disk;
+ int start_disk;
+ int best_disk;
+ int i;
+ sector_t best_dist;
mdk_rdev_t *rdev;
rcu_read_lock();
@@ -433,100 +436,73 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
* We take the first readable disk when above the resync window.
*/
retry:
+ disk = -1;
+ best_disk = -1;
+ best_dist = MaxSector;
if (conf->mddev->recovery_cp < MaxSector &&
(this_sector + sectors >= conf->next_resync)) {
- /* Choose the first operational device, for consistancy */
- new_disk = 0;
-
- for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
- r1_bio->bios[new_disk] == IO_BLOCKED ||
- !rdev || !test_bit(In_sync, &rdev->flags)
- || test_bit(WriteMostly, &rdev->flags);
- rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {
-
- if (rdev && test_bit(In_sync, &rdev->flags) &&
- r1_bio->bios[new_disk] != IO_BLOCKED)
- wonly_disk = new_disk;
-
- if (new_disk == conf->raid_disks - 1) {
- new_disk = wonly_disk;
- break;
- }
- }
- goto rb_out;
+ /* just choose the first */
+ start_disk = 0;
+ do_balance = 0;
+ } else {
+ /* Else start from last used */
+ start_disk = conf->last_used;
+ do_balance = 1;
}
+ for (i = 0; i < conf->raid_disks; i++) {
+ sector_t dist;
+ disk = (start_disk + i) % conf->raid_disks;
+ if (r1_bio->bios[disk] == IO_BLOCKED)
+ continue;
+ rdev = rcu_dereference(conf->mirrors[disk].rdev);
+ if (!rdev)
+ continue;
+ if (test_bit(Faulty, &rdev->flags))
+ continue;
+ if (!test_bit(In_sync, &rdev->flags) &&
+ rdev->recovery_offset < this_sector + sectors)
+ continue;
- /* make sure the disk is operational */
- for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
- r1_bio->bios[new_disk] == IO_BLOCKED ||
- !rdev || !test_bit(In_sync, &rdev->flags) ||
- test_bit(WriteMostly, &rdev->flags);
- rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) {
+ if (test_bit(WriteMostly, &rdev->flags)) {
+ /* don't balance among write-mostly, just
+ * use first as a last resort */
+ if (best_disk < 0)
+ best_disk = disk;
+ continue;
+ }
+ /* This is a reasonable device to use. It might
+ * even be best.
+ */
+ if (!do_balance)
+ break;
- if (rdev && test_bit(In_sync, &rdev->flags) &&
- r1_bio->bios[new_disk] != IO_BLOCKED)
- wonly_disk = new_disk;
+ /*
+ * Don't change to another disk for sequential reads:
+ */
+ if (conf->next_seq_sect == this_sector)
+ break;
- if (new_disk <= 0)
- new_disk = conf->raid_disks;
- new_disk--;
- if (new_disk == disk) {
- new_disk = wonly_disk;
+ dist = abs(this_sector - conf->mirrors[disk].head_position);
+ if (dist == 0)
+ break;
+ if (!atomic_read(&rdev->nr_pending))
+ /* Device is idle, so use it */
break;
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_disk = disk;
}
}
+ if (i == conf->raid_disks)
+ disk = best_disk;
- if (new_disk < 0)
- goto rb_out;
-
- disk = new_disk;
- /* now disk == new_disk == starting point for search */
-
- /*
- * Don't change to another disk for sequential reads:
- */
- if (conf->next_seq_sect == this_sector)
- goto rb_out;
- if (this_sector == conf->mirrors[new_disk].head_position)
- goto rb_out;
-
- current_distance = abs(this_sector - conf->mirrors[disk].head_position);
-
- /* Find the disk whose head is closest */
-
- do {
- if (disk <= 0)
- disk = conf->raid_disks;
- disk--;
-
+ if (disk >= 0) {
rdev = rcu_dereference(conf->mirrors[disk].rdev);
-
- if (!rdev || r1_bio->bios[disk] == IO_BLOCKED ||
- !test_bit(In_sync, &rdev->flags) ||
- test_bit(WriteMostly, &rdev->flags))
- continue;
-
- if (!atomic_read(&rdev->nr_pending)) {
- new_disk = disk;
- break;
- }
- new_distance = abs(this_sector - conf->mirrors[disk].head_position);
- if (new_distance < current_distance) {
- current_distance = new_distance;
- new_disk = disk;
- }
- } while (disk != conf->last_used);
-
- rb_out:
-
-
- if (new_disk >= 0) {
- rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
if (!rdev)
goto retry;
atomic_inc(&rdev->nr_pending);
- if (!test_bit(In_sync, &rdev->flags)) {
+ if (test_bit(Faulty, &rdev->flags)) {
/* cannot risk returning a device that failed
* before we inc'ed nr_pending
*/
@@ -534,11 +510,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
goto retry;
}
conf->next_seq_sect = this_sector + sectors;
- conf->last_used = new_disk;
+ conf->last_used = disk;
}
rcu_read_unlock();
- return new_disk;
+ return disk;
}
static void unplug_slaves(mddev_t *mddev)
next prev parent reply other threads:[~2010-06-07 0:07 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-06-07 0:07 [md PATCH 00/16] bad block list management for md and RAID1 NeilBrown
2010-06-07 0:07 ` [md PATCH 02/16] md/bad-block-log: add sysfs interface for accessing bad-block-log NeilBrown
2010-06-07 0:07 ` [md PATCH 01/16] md: beginnings of bad block management NeilBrown
2010-06-07 0:07 ` [md PATCH 04/16] md: load/store badblock list from v1.x metadata NeilBrown
2010-06-07 0:07 ` [md PATCH 03/16] md: don't allow arrays to contain devices with bad blocks NeilBrown
2010-06-07 0:07 ` NeilBrown [this message]
2010-06-07 0:07 ` [md PATCH 07/16] md: simplify raid10 read_balance NeilBrown
2010-06-07 0:07 ` [md PATCH 05/16] md: reject devices with bad blocks and v0.90 metadata NeilBrown
2010-06-07 0:07 ` [md PATCH 08/16] md/raid1: avoid reading from known bad blocks NeilBrown
2010-06-07 0:07 ` [md PATCH 09/16] md/raid1: avoid reading known bad blocks during resync NeilBrown
2010-06-07 0:07 ` [md PATCH 11/16] md/multipath: discard ->working_disks in favour of ->degraded NeilBrown
2010-06-07 0:07 ` [md PATCH 12/16] md: make error_handler functions more uniform and correct NeilBrown
2010-06-07 0:07 ` [md PATCH 10/16] md: add 'write_error' flag to component devices NeilBrown
2010-06-07 0:07 ` [md PATCH 15/16] md/raid1: clear bad-block record when write succeeds NeilBrown
2010-06-07 0:07 ` [md PATCH 13/16] md: make it easier to wait for bad blocks to be acknowledged NeilBrown
2010-06-07 0:07 ` [md PATCH 14/16] md/raid1: avoid writing to known-bad blocks on known-bad drives NeilBrown
2010-06-07 0:07 ` [md PATCH 16/16] md/raid1: Handle write errors by updating badblock log NeilBrown
2010-06-07 0:28 ` [md PATCH 00/16] bad block list management for md and RAID1 Berkey B Walker
2010-06-07 22:18 ` Stefan /*St0fF*/ Hübner
2010-06-17 12:48 ` Brett Russ
2010-06-17 15:53 ` Graham Mitchell
2010-06-18 3:58 ` Neil Brown
2010-06-18 4:30 ` Graham Mitchell
2010-06-18 3:23 ` Neil Brown
[not found] ` <4C1BABC4.3020008@tmr.com>
2010-06-29 5:06 ` Neil Brown
2010-06-29 16:54 ` Bill Davidsen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100607000754.13302.77379.stgit@notabene.brown \
--to=neilb@suse.de \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).