From: NeilBrown <neilb@suse.de>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 004 of 4] md: Improve locking around error handling
Date: Mon, 28 Aug 2006 09:49:33 +1000 [thread overview]
Message-ID: <1060827234933.32501@suse.de> (raw)
In-Reply-To: 20060828092849.21292.patches@notabene
From: NeilBrown <neilb@suse.de>
The error handling routines don't use proper locking, and so two concurrent
errors could trigger a problem.
So:
- use test-and-set and test-and-clear to synchonise
the In_sync bits with the ->degraded count
- use the spinlock to protect updates to the
degraded count (could use an atomic_t but that
would be a bigger change in code, and isn't
really justified)
- remove un-necessary locking in raid5
Signed-off-by: Neil Brown <neilb@suse.de>
---
drivers/md/raid1.c | 16 +++++++++++-----
drivers/md/raid10.c | 12 ++++++++----
drivers/md/raid5.c | 20 ++++++++++++--------
3 files changed, 31 insertions(+), 17 deletions(-)
Index: mm-quilt/drivers/md/raid1.c
===================================================================
--- mm-quilt.orig/drivers/md/raid1.c 2006-08-28 09:43:39.000000000 +1000
+++ mm-quilt/drivers/md/raid1.c 2006-08-28 09:43:44.000000000 +1000
@@ -959,14 +959,16 @@ static void error(mddev_t *mddev, mdk_rd
* normal single drive
*/
return;
- if (test_bit(In_sync, &rdev->flags)) {
+ if (test_and_clear_bit(In_sync, &rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
/*
* if recovery is running, make sure it aborts.
*/
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
}
- clear_bit(In_sync, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
@@ -1022,9 +1024,11 @@ static int raid1_spare_active(mddev_t *m
mdk_rdev_t *rdev = conf->mirrors[i].rdev;
if (rdev
&& !test_bit(Faulty, &rdev->flags)
- && !test_bit(In_sync, &rdev->flags)) {
+ && !test_and_set_bit(In_sync, &rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded--;
- set_bit(In_sync, &rdev->flags);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
}
}
@@ -2048,7 +2052,7 @@ static int raid1_reshape(mddev_t *mddev)
mirror_info_t *newmirrors;
conf_t *conf = mddev_to_conf(mddev);
int cnt, raid_disks;
-
+ unsigned long flags;
int d, d2;
/* Cannot change chunk_size, layout, or level */
@@ -2107,7 +2111,9 @@ static int raid1_reshape(mddev_t *mddev)
kfree(conf->poolinfo);
conf->poolinfo = newpoolinfo;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded += (raid_disks - conf->raid_disks);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
conf->raid_disks = mddev->raid_disks = raid_disks;
mddev->delta_disks = 0;
Index: mm-quilt/drivers/md/raid10.c
===================================================================
--- mm-quilt.orig/drivers/md/raid10.c 2006-08-28 09:43:37.000000000 +1000
+++ mm-quilt/drivers/md/raid10.c 2006-08-28 09:43:44.000000000 +1000
@@ -950,14 +950,16 @@ static void error(mddev_t *mddev, mdk_rd
* really dead" tests...
*/
return;
- if (test_bit(In_sync, &rdev->flags)) {
+ if (test_and_clear_bit(In_sync, &rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
/*
* if recovery is running, make sure it aborts.
*/
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
}
- clear_bit(In_sync, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n"
@@ -1033,9 +1035,11 @@ static int raid10_spare_active(mddev_t *
tmp = conf->mirrors + i;
if (tmp->rdev
&& !test_bit(Faulty, &tmp->rdev->flags)
- && !test_bit(In_sync, &tmp->rdev->flags)) {
+ && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded--;
- set_bit(In_sync, &tmp->rdev->flags);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
}
}
Index: mm-quilt/drivers/md/raid5.c
===================================================================
--- mm-quilt.orig/drivers/md/raid5.c 2006-08-28 09:43:27.000000000 +1000
+++ mm-quilt/drivers/md/raid5.c 2006-08-28 09:43:44.000000000 +1000
@@ -636,7 +636,6 @@ static int raid5_end_write_request (stru
struct stripe_head *sh = bi->bi_private;
raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks, i;
- unsigned long flags;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
if (bi->bi_size)
@@ -654,7 +653,6 @@ static int raid5_end_write_request (stru
return 0;
}
- spin_lock_irqsave(&conf->device_lock, flags);
if (!uptodate)
md_error(conf->mddev, conf->disks[i].rdev);
@@ -662,8 +660,7 @@ static int raid5_end_write_request (stru
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
- __release_stripe(conf, sh);
- spin_unlock_irqrestore(&conf->device_lock, flags);
+ release_stripe(sh);
return 0;
}
@@ -697,9 +694,11 @@ static void error(mddev_t *mddev, mdk_rd
if (!test_bit(Faulty, &rdev->flags)) {
set_bit(MD_CHANGE_DEVS, &mddev->flags);
- if (test_bit(In_sync, &rdev->flags)) {
+ if (test_and_clear_bit(In_sync, &rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++;
- clear_bit(In_sync, &rdev->flags);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
/*
* if recovery was running, make sure it aborts.
*/
@@ -3419,9 +3418,11 @@ static int raid5_spare_active(mddev_t *m
tmp = conf->disks + i;
if (tmp->rdev
&& !test_bit(Faulty, &tmp->rdev->flags)
- && !test_bit(In_sync, &tmp->rdev->flags)) {
+ && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded--;
- set_bit(In_sync, &tmp->rdev->flags);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
}
}
print_raid5_conf(conf);
@@ -3557,6 +3558,7 @@ static int raid5_start_reshape(mddev_t *
struct list_head *rtmp;
int spares = 0;
int added_devices = 0;
+ unsigned long flags;
if (mddev->degraded ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
@@ -3598,7 +3600,9 @@ static int raid5_start_reshape(mddev_t *
break;
}
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
mddev->raid_disks = conf->raid_disks;
mddev->reshape_position = 0;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
next prev parent reply other threads:[~2006-08-27 23:49 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-08-27 23:49 [PATCH 000 of 4] md: Introduction - some revised/reordered patches NeilBrown
2006-08-27 23:49 ` [PATCH 001 of 4] md: Fix issues with referencing rdev in md/raid1 NeilBrown
2006-08-27 23:49 ` [PATCH 002 of 4] md: Factor out part of raid1d into a separate function NeilBrown
2006-08-27 23:49 ` [PATCH 003 of 4] md: Remove working_disks from raid1 state data NeilBrown
2006-08-27 23:49 ` NeilBrown [this message]
2006-08-28 0:06 ` Recommended kernel? Jeff Breidenbach
2006-08-28 0:11 ` Neil Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1060827234933.32501@suse.de \
--to=neilb@suse.de \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.