* [PATCH 001 of 5] md: Avoid oops when attempting to fix read errors on raid10
2006-04-28 2:51 [PATCH 000 of 5] md: Introduction - assorted raid10/raid1 fixes NeilBrown
@ 2006-04-28 2:50 ` NeilBrown
2006-05-01 21:52 ` [stable] " Greg KH
2006-04-28 2:51 ` [PATCH 002 of 5] md: Fixed refcounting/locking when attempting read error correction in raid10 NeilBrown
` (3 subsequent siblings)
4 siblings, 1 reply; 7+ messages in thread
From: NeilBrown @ 2006-04-28 2:50 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-raid, linux-kernel, stable
We should add to the counter for the rdev *after* checking
if the rdev is NULL !!!
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid10.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~ 2006-04-28 12:13:20.000000000 +1000
+++ ./drivers/md/raid10.c 2006-04-28 12:13:20.000000000 +1000
@@ -1435,9 +1435,9 @@ static void raid10d(mddev_t *mddev)
sl--;
d = r10_bio->devs[sl].devnum;
rdev = conf->mirrors[d].rdev;
- atomic_add(s, &rdev->corrected_errors);
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
+ atomic_add(s, &rdev->corrected_errors);
if (sync_page_io(rdev->bdev,
r10_bio->devs[sl].addr +
sect + rdev->data_offset,
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 002 of 5] md: Fixed refcounting/locking when attempting read error correction in raid10
2006-04-28 2:51 [PATCH 000 of 5] md: Introduction - assorted raid10/raid1 fixes NeilBrown
2006-04-28 2:50 ` [PATCH 001 of 5] md: Avoid oops when attempting to fix read errors on raid10 NeilBrown
@ 2006-04-28 2:51 ` NeilBrown
2006-04-28 2:51 ` [PATCH 003 of 5] md: Change ENOTSUPP to EOPNOTSUPP NeilBrown
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: NeilBrown @ 2006-04-28 2:51 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-raid, linux-kernel
We need to hold a reference to rdevs while reading
and writing to attempt to correct read errors. This
reference must be taken under an rcu lock.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid10.c | 44 ++++++++++++++++++++++++++++++--------------
1 file changed, 30 insertions(+), 14 deletions(-)
diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~ 2006-04-28 12:13:20.000000000 +1000
+++ ./drivers/md/raid10.c 2006-04-28 12:16:54.000000000 +1000
@@ -1407,36 +1407,45 @@ static void raid10d(mddev_t *mddev)
if (s > (PAGE_SIZE>>9))
s = PAGE_SIZE >> 9;
+ rcu_read_lock();
do {
int d = r10_bio->devs[sl].devnum;
- rdev = conf->mirrors[d].rdev;
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
- test_bit(In_sync, &rdev->flags) &&
- sync_page_io(rdev->bdev,
- r10_bio->devs[sl].addr +
- sect + rdev->data_offset,
- s<<9,
- conf->tmppage, READ))
- success = 1;
- else {
- sl++;
- if (sl == conf->copies)
- sl = 0;
+ test_bit(In_sync, &rdev->flags)) {
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
+ success = sync_page_io(rdev->bdev,
+ r10_bio->devs[sl].addr +
+ sect + rdev->data_offset,
+ s<<9,
+ conf->tmppage, READ);
+ rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
+ if (success)
+ break;
}
+ sl++;
+ if (sl == conf->copies)
+ sl = 0;
} while (!success && sl != r10_bio->read_slot);
+ rcu_read_unlock();
if (success) {
int start = sl;
/* write it back and re-read */
+ rcu_read_lock();
while (sl != r10_bio->read_slot) {
int d;
if (sl==0)
sl = conf->copies;
sl--;
d = r10_bio->devs[sl].devnum;
- rdev = conf->mirrors[d].rdev;
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
atomic_add(s, &rdev->corrected_errors);
if (sync_page_io(rdev->bdev,
r10_bio->devs[sl].addr +
@@ -1444,6 +1453,8 @@ static void raid10d(mddev_t *mddev)
s<<9, conf->tmppage, WRITE) == 0)
/* Well, this device is dead */
md_error(mddev, rdev);
+ rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
}
}
sl = start;
@@ -1453,17 +1464,22 @@ static void raid10d(mddev_t *mddev)
sl = conf->copies;
sl--;
d = r10_bio->devs[sl].devnum;
- rdev = conf->mirrors[d].rdev;
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
if (sync_page_io(rdev->bdev,
r10_bio->devs[sl].addr +
sect + rdev->data_offset,
s<<9, conf->tmppage, READ) == 0)
/* Well, this device is dead */
md_error(mddev, rdev);
+ rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
}
}
+ rcu_read_unlock();
} else {
/* Cannot read from anywhere -- bye bye array */
md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev);
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 003 of 5] md: Change ENOTSUPP to EOPNOTSUPP
2006-04-28 2:51 [PATCH 000 of 5] md: Introduction - assorted raid10/raid1 fixes NeilBrown
2006-04-28 2:50 ` [PATCH 001 of 5] md: Avoid oops when attempting to fix read errors on raid10 NeilBrown
2006-04-28 2:51 ` [PATCH 002 of 5] md: Fixed refcounting/locking when attempting read error correction in raid10 NeilBrown
@ 2006-04-28 2:51 ` NeilBrown
2006-04-28 2:51 ` [PATCH 004 of 5] md: Improve detection of lack of barrier support in raid1 NeilBrown
2006-04-28 2:51 ` [PATCH 005 of 5] md: Fix 'rdev->nr_pending' count when retrying barrier requests NeilBrown
4 siblings, 0 replies; 7+ messages in thread
From: NeilBrown @ 2006-04-28 2:51 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-raid, linux-kernel
Because that is what you get if a BIO_RW_BARRIER isn't supported !
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid1.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
--- ./drivers/md/raid1.c~current~ 2006-04-28 12:17:27.000000000 +1000
+++ ./drivers/md/raid1.c 2006-04-28 12:17:27.000000000 +1000
@@ -315,7 +315,7 @@ static int raid1_end_write_request(struc
if (r1_bio->bios[mirror] == bio)
break;
- if (error == -ENOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
+ if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags);
set_bit(R1BIO_BarrierRetry, &r1_bio->state);
r1_bio->mddev->barriers_work = 0;
@@ -1404,7 +1404,7 @@ static void raid1d(mddev_t *mddev)
unplug = 1;
} else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
/* some requests in the r1bio were BIO_RW_BARRIER
- * requests which failed with -ENOTSUPP. Hohumm..
+ * requests which failed with -EOPNOTSUPP. Hohumm..
* Better resubmit without the barrier.
* We know which devices to resubmit for, because
* all others have had their bios[] entry cleared.
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 004 of 5] md: Improve detection of lack of barrier support in raid1
2006-04-28 2:51 [PATCH 000 of 5] md: Introduction - assorted raid10/raid1 fixes NeilBrown
` (2 preceding siblings ...)
2006-04-28 2:51 ` [PATCH 003 of 5] md: Change ENOTSUPP to EOPNOTSUPP NeilBrown
@ 2006-04-28 2:51 ` NeilBrown
2006-04-28 2:51 ` [PATCH 005 of 5] md: Fix 'rdev->nr_pending' count when retrying barrier requests NeilBrown
4 siblings, 0 replies; 7+ messages in thread
From: NeilBrown @ 2006-04-28 2:51 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-raid, linux-kernel
Move the test for 'do barrier work' down a bit so that if the first
write to a raid1 is a BIO_RW_BARRIER write, the checking done by
superblock writes will cause the right thing to happen.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid1.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
--- ./drivers/md/raid1.c~current~ 2006-04-28 12:17:27.000000000 +1000
+++ ./drivers/md/raid1.c 2006-04-28 12:17:40.000000000 +1000
@@ -753,18 +753,24 @@ static int make_request(request_queue_t
const int rw = bio_data_dir(bio);
int do_barriers;
- if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
- bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
- return 0;
- }
-
/*
* Register the new request and wait if the reconstruction
* thread has put up a bar for new requests.
* Continue immediately if no resync is active currently.
+ * We test barriers_work *after* md_write_start as md_write_start
+ * may cause the first superblock write, and that will check out
+ * if barriers work.
*/
+
md_write_start(mddev, bio); /* wait on superblock update early */
+ if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
+ if (rw == WRITE)
+ md_write_end(mddev);
+ bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
+ return 0;
+ }
+
wait_barrier(conf);
disk_stat_inc(mddev->gendisk, ios[rw]);
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 005 of 5] md: Fix 'rdev->nr_pending' count when retrying barrier requests.
2006-04-28 2:51 [PATCH 000 of 5] md: Introduction - assorted raid10/raid1 fixes NeilBrown
` (3 preceding siblings ...)
2006-04-28 2:51 ` [PATCH 004 of 5] md: Improve detection of lack of barrier support in raid1 NeilBrown
@ 2006-04-28 2:51 ` NeilBrown
4 siblings, 0 replies; 7+ messages in thread
From: NeilBrown @ 2006-04-28 2:51 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-raid, linux-kernel
When retrying a failed BIO_RW_BARRIER request, we need to
keep the reference in ->nr_pending over the whole retry.
Currently, we only hold the reference if the failed request is
the *last* one to finish - which is silly, because it would normally
be the first to finish.
So move the rdev_dec_pending call up into the didn't-fail branch.
As the rdev isn't used in the later code, calling rdev_dec_pending
earlier doesn't hurt.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid1.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
--- ./drivers/md/raid1.c~current~ 2006-04-28 12:17:40.000000000 +1000
+++ ./drivers/md/raid1.c 2006-04-28 12:17:58.000000000 +1000
@@ -319,6 +319,7 @@ static int raid1_end_write_request(struc
set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags);
set_bit(R1BIO_BarrierRetry, &r1_bio->state);
r1_bio->mddev->barriers_work = 0;
+ /* Don't rdev_dec_pending in this branch - keep it for the retry */
} else {
/*
* this branch is our 'one mirror IO has finished' event handler:
@@ -365,6 +366,7 @@ static int raid1_end_write_request(struc
}
}
}
+ rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
}
/*
*
@@ -374,11 +376,9 @@ static int raid1_end_write_request(struc
if (atomic_dec_and_test(&r1_bio->remaining)) {
if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
reschedule_retry(r1_bio);
- /* Don't dec_pending yet, we want to hold
- * the reference over the retry
- */
goto out;
}
+ /* it really is the end of this request */
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
/* free extra copy of the data pages */
int i = bio->bi_vcnt;
@@ -393,8 +393,6 @@ static int raid1_end_write_request(struc
md_write_end(r1_bio->mddev);
raid_end_bio_io(r1_bio);
}
-
- rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
out:
if (to_put)
bio_put(to_put);
@@ -1414,6 +1412,7 @@ static void raid1d(mddev_t *mddev)
* Better resubmit without the barrier.
* We know which devices to resubmit for, because
* all others have had their bios[] entry cleared.
+ * We already have a nr_pending reference on these rdevs.
*/
int i;
clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
^ permalink raw reply [flat|nested] 7+ messages in thread