[md PATCH 09/16] md/raid1: avoid reading known bad blocks during resync

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org
Subject: [md PATCH 09/16] md/raid1: avoid reading known bad blocks during resync
Date: Mon, 07 Jun 2010 10:07:55 +1000	[thread overview]
Message-ID: <20100607000755.13302.11589.stgit@notabene.brown> (raw)
In-Reply-To: <20100606235833.13302.60932.stgit@notabene.brown>

When performing resync/etc, keep the size of the request
small enough that it doesn't overlap any known bad blocks.
Devices with badblocks at the start of the request are completely
excluded.
If there is nowhere to read from due to bad blocks, record
a bad block on each target device.

Now that we never read from known-bad-blocks we can allow devices with
known-bad-blocks into a RAID1.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid1.c |   97 +++++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 76 insertions(+), 21 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a5a3f01..8429c63 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1249,9 +1249,6 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 	int first = 0;
 	int last = mddev->raid_disks - 1;
 
-	if (rdev->badblocks.count)
-		return -EINVAL;
-
 	if (rdev->raid_disk >= 0)
 		first = last = rdev->raid_disk;
 
@@ -1488,6 +1485,9 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
 		 * We don't need to freeze the array, because being in an
 		 * active sync request, there is no normal IO, and
 		 * no overlapping syncs.
+		 * We don't need to check is_bad_block again as we
+		 * made sure and anything with a bad block in range
+		 * will have bi_end_io clear.
 		 */
 		sector_t sect = r1_bio->sector;
 		int sectors = r1_bio->sectors;
@@ -1901,6 +1901,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	int write_targets = 0, read_targets = 0;
 	int sync_blocks;
 	int still_degraded = 0;
+	int good_sectors = RESYNC_SECTORS;
+	int min_bad = 0; /* number of sectors that are bad in all devices */
 
 	if (!conf->r1buf_pool)
 		if (init_resync(conf))
@@ -1988,34 +1990,89 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		if (rdev == NULL ||
 			   test_bit(Faulty, &rdev->flags)) {
 			still_degraded = 1;
-			continue;
 		} else if (!test_bit(In_sync, &rdev->flags)) {
 			bio->bi_rw = WRITE;
 			bio->bi_end_io = end_sync_write;
 			write_targets ++;
 		} else {
 			/* may need to read from here */
-			bio->bi_rw = READ;
-			bio->bi_end_io = end_sync_read;
-			if (test_bit(WriteMostly, &rdev->flags)) {
-				if (wonly < 0)
-					wonly = i;
-			} else {
-				if (disk < 0)
-					disk = i;
+			sector_t first_bad = MaxSector;
+			int bad_sectors;
+
+			if (is_badblock(rdev, sector_nr, good_sectors,
+					&first_bad, &bad_sectors)) {
+				if (first_bad > sector_nr)
+					good_sectors = first_bad - sector_nr;
+				else {
+					bad_sectors -= (sector_nr - first_bad);
+					if (min_bad == 0 ||
+					    min_bad > bad_sectors)
+						min_bad = bad_sectors;
+				}
+			}
+			if (sector_nr < first_bad) {
+				if (test_bit(WriteMostly, &rdev->flags)) {
+					if (wonly < 0)
+						wonly = i;
+				} else {
+					if (disk < 0)
+						disk = i;
+				}
+				bio->bi_rw = READ;
+				bio->bi_end_io = end_sync_read;
+				read_targets++;
 			}
-			read_targets++;
 		}
-		atomic_inc(&rdev->nr_pending);
-		bio->bi_sector = sector_nr + rdev->data_offset;
-		bio->bi_bdev = rdev->bdev;
-		bio->bi_private = r1_bio;
+		if (bio->bi_end_io) {
+			atomic_inc(&rdev->nr_pending);
+			bio->bi_sector = sector_nr + rdev->data_offset;
+			bio->bi_bdev = rdev->bdev;
+			bio->bi_private = r1_bio;
+		}
 	}
 	rcu_read_unlock();
 	if (disk < 0)
 		disk = wonly;
 	r1_bio->read_disk = disk;
 
+	if (read_targets == 0 && min_bad > 0) {
+		/* These sectors are bad on all InSync devices, so we
+		 * need to mark them bad on all write targets
+		 */
+		int ok = 1;
+		for (i = 0 ; i < conf->raid_disks ; i++)
+			if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
+				mdk_rdev_t *rdev =
+					rcu_dereference(conf->mirrors[i].rdev);
+				ok = md_set_badblocks(
+					&rdev->badblocks,
+					sector_nr + rdev->data_offset,
+					min_bad, 0
+					) && ok;
+			}
+		set_bit(MD_CHANGE_DEVS, &mddev->flags);
+		*skipped = 1;
+		put_buf(r1_bio);
+
+		if (!ok) {
+			/* Cannot record the badblocks, so need to
+			 * abort the resync.
+			 * If there are multiple read targets, could just
+			 * fail the really bad ones ???
+			 */
+			mddev->recovery_disabled = 1;
+			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+			return 0;
+		} else
+			return min_bad;
+
+	}
+	if (min_bad > 0 && min_bad < good_sectors) {
+		/* only resync enough to read the next bad->good
+		 * transition */
+		good_sectors = min_bad;
+	}
+
 	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0)
 		/* extra read targets are also write targets */
 		write_targets += read_targets-1;
@@ -2032,6 +2089,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 
 	if (max_sector > mddev->resync_max)
 		max_sector = mddev->resync_max; /* Don't do IO beyond here */
+	if (max_sector > sector_nr + good_sectors)
+		max_sector = sector_nr + good_sectors;
 	nr_sectors = 0;
 	sync_blocks = 0;
 	do {
@@ -2254,10 +2313,6 @@ static int run(mddev_t *mddev)
 			blk_queue_segment_boundary(mddev->queue,
 						   PAGE_CACHE_SIZE - 1);
 		}
-		if (rdev->badblocks.count) {
-			printk(KERN_ERR "md/raid1: Cannot handle bad blocks yet\n");
-			return -EINVAL;
-		}
 	}
 
 	mddev->degraded = 0;

next prev parent reply	other threads:[~2010-06-07  0:07 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-07  0:07 [md PATCH 00/16] bad block list management for md and RAID1 NeilBrown
2010-06-07  0:07 ` [md PATCH 01/16] md: beginnings of bad block management NeilBrown
2010-06-07  0:07 ` [md PATCH 02/16] md/bad-block-log: add sysfs interface for accessing bad-block-log NeilBrown
2010-06-07  0:07 ` [md PATCH 05/16] md: reject devices with bad blocks and v0.90 metadata NeilBrown
2010-06-07  0:07 ` [md PATCH 07/16] md: simplify raid10 read_balance NeilBrown
2010-06-07  0:07 ` [md PATCH 06/16] md/raid1: clean up read_balance NeilBrown
2010-06-07  0:07 ` [md PATCH 03/16] md: don't allow arrays to contain devices with bad blocks NeilBrown
2010-06-07  0:07 ` [md PATCH 04/16] md: load/store badblock list from v1.x metadata NeilBrown
2010-06-07  0:07 ` [md PATCH 10/16] md: add 'write_error' flag to component devices NeilBrown
2010-06-07  0:07 ` [md PATCH 12/16] md: make error_handler functions more uniform and correct NeilBrown
2010-06-07  0:07 ` [md PATCH 11/16] md/multipath: discard ->working_disks in favour of ->degraded NeilBrown
2010-06-07  0:07 ` [md PATCH 08/16] md/raid1: avoid reading from known bad blocks NeilBrown
2010-06-07  0:07 ` NeilBrown [this message]
2010-06-07  0:07 ` [md PATCH 16/16] md/raid1: Handle write errors by updating badblock log NeilBrown
2010-06-07  0:07 ` [md PATCH 14/16] md/raid1: avoid writing to known-bad blocks on known-bad drives NeilBrown
2010-06-07  0:07 ` [md PATCH 15/16] md/raid1: clear bad-block record when write succeeds NeilBrown
2010-06-07  0:07 ` [md PATCH 13/16] md: make it easier to wait for bad blocks to be acknowledged NeilBrown
2010-06-07  0:28 ` [md PATCH 00/16] bad block list management for md and RAID1 Berkey B Walker
2010-06-07 22:18   ` Stefan /*St0fF*/ Hübner
2010-06-17 12:48 ` Brett Russ
2010-06-17 15:53   ` Graham Mitchell
2010-06-18  3:58     ` Neil Brown
2010-06-18  4:30       ` Graham Mitchell
2010-06-18  3:23   ` Neil Brown
     [not found]     ` <4C1BABC4.3020008@tmr.com>
2010-06-29  5:06       ` Neil Brown
2010-06-29 16:54         ` Bill Davidsen

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:a5a3f01 dfblob:8429c63 )
 OR (
bs:"[md PATCH 09/16] md/raid1: avoid reading known bad blocks during resync" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100607000755.13302.11589.stgit@notabene.brown \
    --to=neilb@suse.de \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).