Linux RAID subsystem development
 help / color / mirror / Atom feed
* [PATCH md 0 of 7] Introduction
@ 2004-09-08  2:48 NeilBrown
  2004-09-08  2:48 ` [PATCH md 4 of 7] Rationalise unplug functions in md NeilBrown
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: NeilBrown @ 2004-09-08  2:48 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid

Following are 7 patches for md in 2.6.8-rc1-mm2

They all grew out of a desire to redo the locking in unplug_slave.
Getting and dropping a spinlock so often for very little gain (it
would be nearly impossible to lose the relevant race) really bothered
me.

I finally figured that I could reply it with rcu locking which is very
light wait, and quite up to the task.

One the way I found an number of inconsistencies that needed cleaning
up and even a few bugs to fix.
The first 6 patches deal with these inconsistencies and bugs.  The
last redoes the locking for adding/removing/accessing devices within
md personalities.



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH md 1 of 7] Remove md_flush_all
  2004-09-08  2:48 [PATCH md 0 of 7] Introduction NeilBrown
  2004-09-08  2:48 ` [PATCH md 4 of 7] Rationalise unplug functions in md NeilBrown
  2004-09-08  2:48 ` [PATCH md 2 of 7] Make retry_list non-global in raid1 and multipath NeilBrown
@ 2004-09-08  2:48 ` NeilBrown
  2004-09-08  2:48 ` [PATCH md 3 of 7] Rationalise issue_flush function in md personalities NeilBrown
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2004-09-08  2:48 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid


This isn't needed as each personality defines its own issue_flush_fn

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>

### Diffstat output
 ./drivers/md/md.c |   33 ---------------------------------
 1 files changed, 33 deletions(-)

diff ./drivers/md/md.c~current~ ./drivers/md/md.c
--- ./drivers/md/md.c~current~	2004-09-08 11:03:11.000000000 +1000
+++ ./drivers/md/md.c	2004-09-08 11:03:16.000000000 +1000
@@ -197,38 +197,6 @@ static spinlock_t all_mddevs_lock = SPIN
 		tmp = tmp->next;})					\
 		)
 
-int md_flush_mddev(mddev_t *mddev, sector_t *error_sector)
-{
-	struct list_head *tmp;
-	mdk_rdev_t *rdev;
-	int ret = 0;
-
-	/*
-	 * this list iteration is done without any locking in md?!
-	 */
-	ITERATE_RDEV(mddev, rdev, tmp) {
-		request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
-		int err;
-
-		if (!r_queue->issue_flush_fn)
-			err = -EOPNOTSUPP;
-		else
-			err = r_queue->issue_flush_fn(r_queue, rdev->bdev->bd_disk, error_sector);
-
-		if (!ret)
-			ret = err;
-	}
-
-	return ret;
-}
-
-static int md_flush_all(request_queue_t *q, struct gendisk *disk,
-			 sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-
-	return md_flush_mddev(mddev, error_sector);
-}
 
 static int md_fail_request (request_queue_t *q, struct bio *bio)
 {
@@ -1757,7 +1725,6 @@ static int do_md_run(mddev_t * mddev)
 	 */
 	mddev->queue->queuedata = mddev;
 	mddev->queue->make_request_fn = mddev->pers->make_request;
-	mddev->queue->issue_flush_fn = md_flush_all;
 
 	mddev->changed = 1;
 	md_new_event();

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH md 3 of 7] Rationalise issue_flush function in md personalities
  2004-09-08  2:48 [PATCH md 0 of 7] Introduction NeilBrown
                   ` (2 preceding siblings ...)
  2004-09-08  2:48 ` [PATCH md 1 of 7] Remove md_flush_all NeilBrown
@ 2004-09-08  2:48 ` NeilBrown
  2004-09-08  2:48 ` [PATCH md 6 of 7] Fix two little bugs in raid10 NeilBrown
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2004-09-08  2:48 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid


The functions are all subtley different.  This patch makes them all
much the same.  In particular, EOPNOTSUPP gets returned by all is
appropriate.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>

### Diffstat output
 ./drivers/md/linear.c    |   11 ++++-------
 ./drivers/md/multipath.c |   13 +++++--------
 ./drivers/md/raid0.c     |   12 ++++--------
 ./drivers/md/raid1.c     |   12 ++++++------
 ./drivers/md/raid10.c    |   12 ++++++------
 ./drivers/md/raid5.c     |   22 ++++++----------------
 ./drivers/md/raid6main.c |   22 ++++++----------------
 7 files changed, 37 insertions(+), 67 deletions(-)

diff ./drivers/md/linear.c~current~ ./drivers/md/linear.c
--- ./drivers/md/linear.c~current~	2004-09-08 11:56:53.000000000 +1000
+++ ./drivers/md/linear.c	2004-09-08 11:56:55.000000000 +1000
@@ -99,17 +99,14 @@ static int linear_issue_flush(request_qu
 	linear_conf_t *conf = mddev_to_conf(mddev);
 	int i, ret = 0;
 
-	for (i=0; i < mddev->raid_disks; i++) {
+	for (i=0; i < mddev->raid_disks && ret == 0; i++) {
 		struct block_device *bdev = conf->disks[i].rdev->bdev;
 		request_queue_t *r_queue = bdev_get_queue(bdev);
 
-		if (!r_queue->issue_flush_fn) {
+		if (!r_queue->issue_flush_fn)
 			ret = -EOPNOTSUPP;
-			break;
-		}
-		ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-		if (ret)
-			break;
+		else
+			ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
 	}
 	return ret;
 }

diff ./drivers/md/multipath.c~current~ ./drivers/md/multipath.c
--- ./drivers/md/multipath.c~current~	2004-09-08 11:56:53.000000000 +1000
+++ ./drivers/md/multipath.c	2004-09-08 11:56:55.000000000 +1000
@@ -238,20 +238,17 @@ static int multipath_issue_flush(request
 	multipath_conf_t *conf = mddev_to_conf(mddev);
 	int i, ret = 0;
 
-	for (i=0; i<mddev->raid_disks; i++) {
+	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
 		mdk_rdev_t *rdev = conf->multipaths[i].rdev;
 		if (rdev && !rdev->faulty) {
 			struct block_device *bdev = rdev->bdev;
 			request_queue_t *r_queue = bdev_get_queue(bdev);
 
-			if (!r_queue->issue_flush_fn) {
+			if (!r_queue->issue_flush_fn)
 				ret = -EOPNOTSUPP;
-				break;
-			}
-
-			ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-			if (ret)
-				break;
+			else
+				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
+							      error_sector);
 		}
 	}
 	return ret;

diff ./drivers/md/raid0.c~current~ ./drivers/md/raid0.c
--- ./drivers/md/raid0.c~current~	2004-09-08 11:56:53.000000000 +1000
+++ ./drivers/md/raid0.c	2004-09-08 11:56:55.000000000 +1000
@@ -48,18 +48,14 @@ static int raid0_issue_flush(request_que
 	mdk_rdev_t **devlist = conf->strip_zone[0].dev;
 	int i, ret = 0;
 
-	for (i=0; i<mddev->raid_disks; i++) {
+	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
 		struct block_device *bdev = devlist[i]->bdev;
 		request_queue_t *r_queue = bdev_get_queue(bdev);
 
-		if (!r_queue->issue_flush_fn) {
+		if (!r_queue->issue_flush_fn)
 			ret = -EOPNOTSUPP;
-			break;
-		}
-
-		ret =r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-		if (ret)
-			break;
+		else
+			ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
 	}
 	return ret;
 }

diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
--- ./drivers/md/raid1.c~current~	2004-09-08 11:56:53.000000000 +1000
+++ ./drivers/md/raid1.c	2004-09-08 11:57:08.000000000 +1000
@@ -459,17 +459,17 @@ static int raid1_issue_flush(request_que
 	int i, ret = 0;
 
 	spin_lock_irqsave(&conf->device_lock, flags);
-	for (i=0; i<mddev->raid_disks; i++) {
+	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
 		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
 		if (rdev && !rdev->faulty) {
 			struct block_device *bdev = rdev->bdev;
 			request_queue_t *r_queue = bdev_get_queue(bdev);
 
-			if (r_queue->issue_flush_fn) {
-				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-				if (ret)
-					break;
-			}
+			if (!r_queue->issue_flush_fn)
+				ret = -EOPNOTSUPP;
+			else
+				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
+							      error_sector);
 		}
 	}
 	spin_unlock_irqrestore(&conf->device_lock, flags);

diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~	2004-09-08 11:56:53.000000000 +1000
+++ ./drivers/md/raid10.c	2004-09-08 11:56:55.000000000 +1000
@@ -613,17 +613,17 @@ static int raid10_issue_flush(request_qu
 	int i, ret = 0;
 
 	spin_lock_irqsave(&conf->device_lock, flags);
-	for (i=0; i<mddev->raid_disks; i++) {
+	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
 		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
 		if (rdev && !rdev->faulty) {
 			struct block_device *bdev = rdev->bdev;
 			request_queue_t *r_queue = bdev_get_queue(bdev);
 
-			if (r_queue->issue_flush_fn) {
-				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-				if (ret)
-					break;
-			}
+			if (!r_queue->issue_flush_fn)
+				ret = -EOPNOTSUPP;
+			else
+				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
+							      error_sector);
 		}
 	}
 	spin_unlock_irqrestore(&conf->device_lock, flags);

diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~	2004-09-08 11:56:53.000000000 +1000
+++ ./drivers/md/raid5.c	2004-09-08 11:56:55.000000000 +1000
@@ -1347,27 +1347,17 @@ static int raid5_issue_flush(request_que
 	raid5_conf_t *conf = mddev_to_conf(mddev);
 	int i, ret = 0;
 
-	for (i=0; i<mddev->raid_disks; i++) {
+	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
 		mdk_rdev_t *rdev = conf->disks[i].rdev;
 		if (rdev && !rdev->faulty) {
 			struct block_device *bdev = rdev->bdev;
-			request_queue_t *r_queue;
+			request_queue_t *r_queue = bdev_get_queue(bdev);
 
-			if (!bdev)
-				continue;
-
-			r_queue = bdev_get_queue(bdev);
-			if (!r_queue)
-				continue;
-
-			if (!r_queue->issue_flush_fn) {
+			if (!r_queue->issue_flush_fn)
 				ret = -EOPNOTSUPP;
-				break;
-			}
-
-			ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-			if (ret)
-				break;
+			else
+				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
+							      error_sector);
 		}
 	}
 	return ret;

diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
--- ./drivers/md/raid6main.c~current~	2004-09-08 11:56:53.000000000 +1000
+++ ./drivers/md/raid6main.c	2004-09-08 11:56:55.000000000 +1000
@@ -1509,27 +1509,17 @@ static int raid6_issue_flush(request_que
 	raid6_conf_t *conf = mddev_to_conf(mddev);
 	int i, ret = 0;
 
-	for (i=0; i<mddev->raid_disks; i++) {
+	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
 		mdk_rdev_t *rdev = conf->disks[i].rdev;
 		if (rdev && !rdev->faulty) {
 			struct block_device *bdev = rdev->bdev;
-			request_queue_t *r_queue;
+			request_queue_t *r_queue = bdev_get_queue(bdev);
 
-			if (!bdev)
-				continue;
-
-			r_queue = bdev_get_queue(bdev);
-			if (!r_queue)
-				continue;
-
-			if (!r_queue->issue_flush_fn) {
+			if (!r_queue->issue_flush_fn)
 				ret = -EOPNOTSUPP;
-				break;
-			}
-
-			ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-			if (ret)
-				break;
+			else
+				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
+							      error_sector);
 		}
 	}
 	return ret;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH md 2 of 7] Make retry_list non-global in raid1 and multipath
  2004-09-08  2:48 [PATCH md 0 of 7] Introduction NeilBrown
  2004-09-08  2:48 ` [PATCH md 4 of 7] Rationalise unplug functions in md NeilBrown
@ 2004-09-08  2:48 ` NeilBrown
  2004-09-08  2:48 ` [PATCH md 1 of 7] Remove md_flush_all NeilBrown
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2004-09-08  2:48 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid


Both raid1 and multipath have a "retry_list" which is global,
so all raid1 arrays (for example) us the same list.
This is rather ugly, and it is simple enough to make it per-array,
so this patch does that.

It also changes to multipath code to use list.h lists instead of
roll-your-own.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>

### Diffstat output
 ./drivers/md/multipath.c         |   28 ++++++++++++----------------
 ./drivers/md/raid1.c             |   18 +++++++++---------
 ./include/linux/raid/multipath.h |    3 ++-
 ./include/linux/raid/raid1.h     |    1 +
 4 files changed, 24 insertions(+), 26 deletions(-)

diff ./drivers/md/multipath.c~current~ ./drivers/md/multipath.c
--- ./drivers/md/multipath.c~current~	2004-09-08 11:03:13.000000000 +1000
+++ ./drivers/md/multipath.c	2004-09-08 11:03:22.000000000 +1000
@@ -36,8 +36,6 @@
 
 
 static mdk_personality_t multipath_personality;
-static spinlock_t retry_list_lock = SPIN_LOCK_UNLOCKED;
-struct multipath_bh *multipath_retry_list = NULL, **multipath_retry_tail;
 
 
 static void *mp_pool_alloc(int gfp_flags, void *data)
@@ -101,14 +99,11 @@ static void multipath_reschedule_retry (
 {
 	unsigned long flags;
 	mddev_t *mddev = mp_bh->mddev;
+	multipath_conf_t *conf = mddev_to_conf(mddev);
 
-	spin_lock_irqsave(&retry_list_lock, flags);
-	if (multipath_retry_list == NULL)
-		multipath_retry_tail = &multipath_retry_list;
-	*multipath_retry_tail = mp_bh;
-	multipath_retry_tail = &mp_bh->next_mp;
-	mp_bh->next_mp = NULL;
-	spin_unlock_irqrestore(&retry_list_lock, flags);
+	spin_lock_irqsave(&conf->device_lock, flags);
+	list_add(&mp_bh->retry_list, &conf->retry_list);
+	spin_unlock_irqrestore(&conf->device_lock, flags);
 	md_wakeup_thread(mddev->thread);
 }
 
@@ -399,18 +394,18 @@ static void multipathd (mddev_t *mddev)
 	struct bio *bio;
 	unsigned long flags;
 	multipath_conf_t *conf = mddev_to_conf(mddev);
+	struct list_head *head = &conf->retry_list;
 
 	md_check_recovery(mddev);
 	for (;;) {
 		char b[BDEVNAME_SIZE];
-		spin_lock_irqsave(&retry_list_lock, flags);
-		mp_bh = multipath_retry_list;
-		if (!mp_bh)
+		spin_lock_irqsave(&conf->device_lock, flags);
+		if (list_empty(head))
 			break;
-		multipath_retry_list = mp_bh->next_mp;
-		spin_unlock_irqrestore(&retry_list_lock, flags);
+		mp_bh = list_entry(head->prev, struct multipath_bh, retry_list);
+		list_del(head->prev);
+		spin_unlock_irqrestore(&conf->device_lock, flags);
 
-		mddev = mp_bh->mddev;
 		bio = &mp_bh->bio;
 		bio->bi_sector = mp_bh->master_bio->bi_sector;
 		
@@ -433,7 +428,7 @@ static void multipathd (mddev_t *mddev)
 			generic_make_request(bio);
 		}
 	}
-	spin_unlock_irqrestore(&retry_list_lock, flags);
+	spin_unlock_irqrestore(&conf->device_lock, flags);
 }
 
 static int multipath_run (mddev_t *mddev)
@@ -506,6 +501,7 @@ static int multipath_run (mddev_t *mddev
 	mddev->sb_dirty = 1;
 	conf->mddev = mddev;
 	conf->device_lock = SPIN_LOCK_UNLOCKED;
+	INIT_LIST_HEAD(&conf->retry_list);
 
 	if (!conf->working_disks) {
 		printk(KERN_ERR "multipath: no operational IO paths for %s\n",

diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
--- ./drivers/md/raid1.c~current~	2004-09-08 11:03:13.000000000 +1000
+++ ./drivers/md/raid1.c	2004-09-08 11:03:22.000000000 +1000
@@ -30,8 +30,6 @@
 #define	NR_RAID1_BIOS 256
 
 static mdk_personality_t raid1_personality;
-static spinlock_t retry_list_lock = SPIN_LOCK_UNLOCKED;
-static LIST_HEAD(retry_list_head);
 
 static void unplug_slaves(mddev_t *mddev);
 
@@ -188,10 +186,11 @@ static void reschedule_retry(r1bio_t *r1
 {
 	unsigned long flags;
 	mddev_t *mddev = r1_bio->mddev;
+	conf_t *conf = mddev_to_conf(mddev);
 
-	spin_lock_irqsave(&retry_list_lock, flags);
-	list_add(&r1_bio->retry_list, &retry_list_head);
-	spin_unlock_irqrestore(&retry_list_lock, flags);
+	spin_lock_irqsave(&conf->device_lock, flags);
+	list_add(&r1_bio->retry_list, &conf->retry_list);
+	spin_unlock_irqrestore(&conf->device_lock, flags);
 
 	md_wakeup_thread(mddev->thread);
 }
@@ -904,11 +903,11 @@ static void sync_request_write(mddev_t *
 
 static void raid1d(mddev_t *mddev)
 {
-	struct list_head *head = &retry_list_head;
 	r1bio_t *r1_bio;
 	struct bio *bio;
 	unsigned long flags;
 	conf_t *conf = mddev_to_conf(mddev);
+	struct list_head *head = &conf->retry_list;
 	int unplug=0;
 	mdk_rdev_t *rdev;
 
@@ -917,12 +916,12 @@ static void raid1d(mddev_t *mddev)
 	
 	for (;;) {
 		char b[BDEVNAME_SIZE];
-		spin_lock_irqsave(&retry_list_lock, flags);
+		spin_lock_irqsave(&conf->device_lock, flags);
 		if (list_empty(head))
 			break;
 		r1_bio = list_entry(head->prev, r1bio_t, retry_list);
 		list_del(head->prev);
-		spin_unlock_irqrestore(&retry_list_lock, flags);
+		spin_unlock_irqrestore(&conf->device_lock, flags);
 
 		mddev = r1_bio->mddev;
 		conf = mddev_to_conf(mddev);
@@ -956,7 +955,7 @@ static void raid1d(mddev_t *mddev)
 			}
 		}
 	}
-	spin_unlock_irqrestore(&retry_list_lock, flags);
+	spin_unlock_irqrestore(&conf->device_lock, flags);
 	if (unplug)
 		unplug_slaves(mddev);
 }
@@ -1205,6 +1204,7 @@ static int run(mddev_t *mddev)
 	conf->raid_disks = mddev->raid_disks;
 	conf->mddev = mddev;
 	conf->device_lock = SPIN_LOCK_UNLOCKED;
+	INIT_LIST_HEAD(&conf->retry_list);
 	if (conf->working_disks == 1)
 		mddev->recovery_cp = MaxSector;
 

diff ./include/linux/raid/multipath.h~current~ ./include/linux/raid/multipath.h
--- ./include/linux/raid/multipath.h~current~	2004-09-08 11:03:13.000000000 +1000
+++ ./include/linux/raid/multipath.h	2004-09-08 11:03:22.000000000 +1000
@@ -14,6 +14,7 @@ struct multipath_private_data {
 	int			working_disks;
 	spinlock_t		device_lock;
 	unsigned long		last_fail_event;
+	struct list_head	retry_list;
 
 	mempool_t		*pool;
 };
@@ -37,6 +38,6 @@ struct multipath_bh {
 	struct bio		*master_bio;
 	struct bio		bio;
 	int			path;
-	struct multipath_bh	*next_mp; /* next for retry */
+	struct list_head	retry_list;
 };
 #endif

diff ./include/linux/raid/raid1.h~current~ ./include/linux/raid/raid1.h
--- ./include/linux/raid/raid1.h~current~	2004-09-08 11:03:13.000000000 +1000
+++ ./include/linux/raid/raid1.h	2004-09-08 11:03:22.000000000 +1000
@@ -35,6 +35,7 @@ struct r1_private_data_s {
 	sector_t		next_seq_sect;
 	spinlock_t		device_lock;
 
+	struct list_head	retry_list;
 	/* for use when syncing mirrors: */
 
 	spinlock_t		resync_lock;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH md 4 of 7] Rationalise unplug functions in md
  2004-09-08  2:48 [PATCH md 0 of 7] Introduction NeilBrown
@ 2004-09-08  2:48 ` NeilBrown
  2004-09-08  2:48 ` [PATCH md 2 of 7] Make retry_list non-global in raid1 and multipath NeilBrown
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2004-09-08  2:48 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid


There are currently subtle differences in the different personalities
concerning when subdevices are unplugged (faulty? nr_pending?).
This patch makes them sll uniform.


Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>

### Diffstat output
 ./drivers/md/multipath.c |    2 +-
 ./drivers/md/raid1.c     |    2 +-
 ./drivers/md/raid10.c    |    2 +-
 ./drivers/md/raid5.c     |    4 ++--
 ./drivers/md/raid6main.c |    4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

diff ./drivers/md/multipath.c~current~ ./drivers/md/multipath.c
--- ./drivers/md/multipath.c~current~	2004-09-08 11:56:55.000000000 +1000
+++ ./drivers/md/multipath.c	2004-09-08 11:57:14.000000000 +1000
@@ -159,7 +159,7 @@ static void unplug_slaves(mddev_t *mddev
 	spin_lock_irqsave(&conf->device_lock, flags);
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->multipaths[i].rdev;
-		if (rdev && !rdev->faulty) {
+		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
 			atomic_inc(&rdev->nr_pending);

diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
--- ./drivers/md/raid1.c~current~	2004-09-08 11:57:08.000000000 +1000
+++ ./drivers/md/raid1.c	2004-09-08 11:57:14.000000000 +1000
@@ -430,7 +430,7 @@ static void unplug_slaves(mddev_t *mddev
 	spin_lock_irqsave(&conf->device_lock, flags);
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
-		if (rdev && atomic_read(&rdev->nr_pending)) {
+		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
 			atomic_inc(&rdev->nr_pending);

diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~	2004-09-08 11:56:55.000000000 +1000
+++ ./drivers/md/raid10.c	2004-09-08 11:57:14.000000000 +1000
@@ -584,7 +584,7 @@ static void unplug_slaves(mddev_t *mddev
 	spin_lock_irqsave(&conf->device_lock, flags);
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
-		if (rdev && atomic_read(&rdev->nr_pending)) {
+		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
 			atomic_inc(&rdev->nr_pending);

diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~	2004-09-08 11:56:55.000000000 +1000
+++ ./drivers/md/raid5.c	2004-09-08 11:57:14.000000000 +1000
@@ -1307,13 +1307,13 @@ static void unplug_slaves(mddev_t *mddev
 	spin_lock_irqsave(&conf->device_lock, flags);
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->disks[i].rdev;
-		if (rdev && atomic_read(&rdev->nr_pending)) {
+		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
 			atomic_inc(&rdev->nr_pending);
 			spin_unlock_irqrestore(&conf->device_lock, flags);
 
-			if (r_queue && r_queue->unplug_fn)
+			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
 			spin_lock_irqsave(&conf->device_lock, flags);

diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
--- ./drivers/md/raid6main.c~current~	2004-09-08 11:56:55.000000000 +1000
+++ ./drivers/md/raid6main.c	2004-09-08 11:57:14.000000000 +1000
@@ -1469,13 +1469,13 @@ static void unplug_slaves(mddev_t *mddev
 	spin_lock_irqsave(&conf->device_lock, flags);
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->disks[i].rdev;
-		if (rdev && atomic_read(&rdev->nr_pending)) {
+		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
 			atomic_inc(&rdev->nr_pending);
 			spin_unlock_irqrestore(&conf->device_lock, flags);
 
-			if (r_queue && r_queue->unplug_fn)
+			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
 			spin_lock_irqsave(&conf->device_lock, flags);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH md 5 of 7] Make sure md always uses rdev_dec_pending properly
  2004-09-08  2:48 [PATCH md 0 of 7] Introduction NeilBrown
                   ` (5 preceding siblings ...)
  2004-09-08  2:48 ` [PATCH md 7 of 7] Modify locking when accessing subdevices in md NeilBrown
@ 2004-09-08  2:48 ` NeilBrown
  6 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2004-09-08  2:48 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid


The ->nr_pending counted should always be decremented with
rdev_dec_pending, as this need to do things when the count hits zero.
There were a few places where it was being decremented directly.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>

### Diffstat output
 ./drivers/md/multipath.c |    2 +-
 ./drivers/md/raid1.c     |    4 ++--
 ./drivers/md/raid10.c    |    4 ++--
 ./drivers/md/raid5.c     |    2 +-
 ./drivers/md/raid6main.c |    2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff ./drivers/md/multipath.c~current~ ./drivers/md/multipath.c
--- ./drivers/md/multipath.c~current~	2004-09-08 11:57:14.000000000 +1000
+++ ./drivers/md/multipath.c	2004-09-08 11:57:21.000000000 +1000
@@ -169,7 +169,7 @@ static void unplug_slaves(mddev_t *mddev
 				r_queue->unplug_fn(r_queue);
 
 			spin_lock_irqsave(&conf->device_lock, flags);
-			atomic_dec(&rdev->nr_pending);
+			rdev_dec_pending(rdev, mddev);
 		}
 	}
 	spin_unlock_irqrestore(&conf->device_lock, flags);

diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
--- ./drivers/md/raid1.c~current~	2004-09-08 11:57:14.000000000 +1000
+++ ./drivers/md/raid1.c	2004-09-08 11:57:21.000000000 +1000
@@ -440,7 +440,7 @@ static void unplug_slaves(mddev_t *mddev
 				r_queue->unplug_fn(r_queue);
 
 			spin_lock_irqsave(&conf->device_lock, flags);
-			atomic_dec(&rdev->nr_pending);
+			rdev_dec_pending(rdev, mddev);
 		}
 	}
 	spin_unlock_irqrestore(&conf->device_lock, flags);
@@ -1086,7 +1086,7 @@ static int sync_request(mddev_t *mddev, 
 		int rv = max_sector - sector_nr;
 		md_done_sync(mddev, rv, 1);
 		put_buf(r1_bio);
-		atomic_dec(&conf->mirrors[disk].rdev->nr_pending);
+		rdev_dec_pending(conf->mirrors[disk].rdev, mddev);
 		return rv;
 	}
 

diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~	2004-09-08 11:57:14.000000000 +1000
+++ ./drivers/md/raid10.c	2004-09-08 11:57:21.000000000 +1000
@@ -594,7 +594,7 @@ static void unplug_slaves(mddev_t *mddev
 				r_queue->unplug_fn(r_queue);
 
 			spin_lock_irqsave(&conf->device_lock, flags);
-			atomic_dec(&rdev->nr_pending);
+			rdev_dec_pending(rdev, mddev);
 		}
 	}
 	spin_unlock_irqrestore(&conf->device_lock, flags);
@@ -1493,7 +1493,7 @@ static int sync_request(mddev_t *mddev, 
 			for (i=0; i<conf->copies; i++) {
 				int d = r10_bio->devs[i].devnum;
 				if (r10_bio->devs[i].bio->bi_end_io)
-					atomic_dec(&conf->mirrors[d].rdev->nr_pending);
+					rdev_dec_pending(conf->mirrors[d].rdev, mddev);
 			}
 			put_buf(r10_bio);
 			goto giveup;

diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~	2004-09-08 11:57:14.000000000 +1000
+++ ./drivers/md/raid5.c	2004-09-08 11:57:21.000000000 +1000
@@ -1317,7 +1317,7 @@ static void unplug_slaves(mddev_t *mddev
 				r_queue->unplug_fn(r_queue);
 
 			spin_lock_irqsave(&conf->device_lock, flags);
-			atomic_dec(&rdev->nr_pending);
+			rdev_dec_pending(rdev, mddev);
 		}
 	}
 	spin_unlock_irqrestore(&conf->device_lock, flags);

diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
--- ./drivers/md/raid6main.c~current~	2004-09-08 11:57:14.000000000 +1000
+++ ./drivers/md/raid6main.c	2004-09-08 11:57:21.000000000 +1000
@@ -1479,7 +1479,7 @@ static void unplug_slaves(mddev_t *mddev
 				r_queue->unplug_fn(r_queue);
 
 			spin_lock_irqsave(&conf->device_lock, flags);
-			atomic_dec(&rdev->nr_pending);
+			rdev_dec_pending(rdev, mddev);
 		}
 	}
 	spin_unlock_irqrestore(&conf->device_lock, flags);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH md 6 of 7] Fix two little bugs in raid10
  2004-09-08  2:48 [PATCH md 0 of 7] Introduction NeilBrown
                   ` (3 preceding siblings ...)
  2004-09-08  2:48 ` [PATCH md 3 of 7] Rationalise issue_flush function in md personalities NeilBrown
@ 2004-09-08  2:48 ` NeilBrown
  2004-09-08  2:48 ` [PATCH md 7 of 7] Modify locking when accessing subdevices in md NeilBrown
  2004-09-08  2:48 ` [PATCH md 5 of 7] Make sure md always uses rdev_dec_pending properly NeilBrown
  6 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2004-09-08  2:48 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid


These can cause resync to spin when there is a faulty drive.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>

### Diffstat output
 ./drivers/md/raid10.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletion(-)

diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~	2004-09-08 11:57:21.000000000 +1000
+++ ./drivers/md/raid10.c	2004-09-08 11:58:02.000000000 +1000
@@ -1496,6 +1496,7 @@ static int sync_request(mddev_t *mddev, 
 					rdev_dec_pending(conf->mirrors[d].rdev, mddev);
 			}
 			put_buf(r10_bio);
+			biolist = NULL;
 			goto giveup;
 		}
 	}
@@ -1557,7 +1558,7 @@ static int sync_request(mddev_t *mddev, 
 		}
 	}
 
-	return nr_sectors;
+	return sectors_skipped + nr_sectors;
  giveup:
 	/* There is nowhere to write, so all non-sync
 	 * drives must be failed, so try the next chunk...

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH md 7 of 7] Modify locking when accessing subdevices in md
  2004-09-08  2:48 [PATCH md 0 of 7] Introduction NeilBrown
                   ` (4 preceding siblings ...)
  2004-09-08  2:48 ` [PATCH md 6 of 7] Fix two little bugs in raid10 NeilBrown
@ 2004-09-08  2:48 ` NeilBrown
  2004-09-08  2:48 ` [PATCH md 5 of 7] Make sure md always uses rdev_dec_pending properly NeilBrown
  6 siblings, 0 replies; 8+ messages in thread
From: NeilBrown @ 2004-09-08  2:48 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-raid


Each md personality keeps a list of devices that are currently
active in the array (mdk_rdev_t).  As these can potentially
be removed at any time, some locking is needed when accessing
entries in the list.  Currently this involves a spinlock,
but all the spinlocking this imposed in unplug_slaves bothered
me.

So, I have changed it to use rcu locking.  This is more appropriate
as objects are removed only very rarely, and there is no
cost in waiting a little while for a remove to succeed.

Also, all changes to the list of devices are performed by the
per-array thread (calling md_check_recovery) and so are completely
single threaded, so no locking between writers is needed at all.

Finally, devices are never added or removed while resync is happenin,
so resync doesn't need to worry about locking at all.

So with this patch, the spinlocking is replaced with rcu read locking
and synchronize_kernel.
The rcu_read_lock is held while dereferencing a possible device,
and the nr_pending count is atomically incremented if the device is to
be held outside of the rcu_read_lock.

When removing a device, if the nr_pending count appears to be zero, we
set the list entry to NULL and call synchronize_kernel().  If the 
count is still zero after this, we have a safe removal.  If it is non-zero,
then someone has just started using it so we put the pointer back and 
fail the removal.  When the new user finally drops it's reference,
that will cause the per-array thread to wake up again and retry
the removal.


Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>

### Diffstat output
 ./drivers/md/multipath.c |   54 ++++++++++++++++++++++++----------------
 ./drivers/md/raid1.c     |   61 ++++++++++++++++++++++++----------------------
 ./drivers/md/raid10.c    |   62 +++++++++++++++++++++++++----------------------
 ./drivers/md/raid5.c     |   51 +++++++++++++++++++++-----------------
 ./drivers/md/raid6main.c |   52 +++++++++++++++++++++------------------
 5 files changed, 154 insertions(+), 126 deletions(-)

diff ./drivers/md/multipath.c~current~ ./drivers/md/multipath.c
--- ./drivers/md/multipath.c~current~	2004-09-08 11:57:21.000000000 +1000
+++ ./drivers/md/multipath.c	2004-09-08 11:59:00.000000000 +1000
@@ -66,12 +66,12 @@ static int multipath_map (multipath_conf
 	 * but this does not happen for the last device.
 	 */
  retry:
-	spin_lock_irq(&conf->device_lock);
+	rcu_read_lock();
 	for (i = 0; i < disks; i++) {
 		mdk_rdev_t *rdev = conf->multipaths[i].rdev;
 		if (rdev && rdev->in_sync) {
 			atomic_inc(&rdev->nr_pending);
-			spin_unlock_irq(&conf->device_lock);
+			rcu_read_unlock();
 			return i;
 		}
 		if (rdev) failed = i;
@@ -79,10 +79,10 @@ static int multipath_map (multipath_conf
 	if (!retry && failed >= 0) {
 		mdk_rdev_t *rdev = conf->multipaths[failed].rdev;
 		atomic_inc(&rdev->nr_pending);
-		spin_unlock_irq(&conf->device_lock);
+		rcu_read_unlock();
 		return failed;
 	}
-	spin_unlock_irq(&conf->device_lock);
+	rcu_read_unlock();
 
 	/* sync with any monitoring daemon */
 	wait_event(md_event_waiters,
@@ -154,26 +154,26 @@ static void unplug_slaves(mddev_t *mddev
 {
 	multipath_conf_t *conf = mddev_to_conf(mddev);
 	int i;
-	unsigned long flags;
 
-	spin_lock_irqsave(&conf->device_lock, flags);
+	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->multipaths[i].rdev;
 		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
 			atomic_inc(&rdev->nr_pending);
-			spin_unlock_irqrestore(&conf->device_lock, flags);
+			rcu_read_unlock();
 
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
-			spin_lock_irqsave(&conf->device_lock, flags);
 			rdev_dec_pending(rdev, mddev);
+			rcu_read_lock();
 		}
 	}
-	spin_unlock_irqrestore(&conf->device_lock, flags);
+	rcu_read_unlock();
 }
+
 static void multipath_unplug(request_queue_t *q)
 {
 	unplug_slaves(q->queuedata);
@@ -238,6 +238,7 @@ static int multipath_issue_flush(request
 	multipath_conf_t *conf = mddev_to_conf(mddev);
 	int i, ret = 0;
 
+	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
 		mdk_rdev_t *rdev = conf->multipaths[i].rdev;
 		if (rdev && !rdev->faulty) {
@@ -246,11 +247,17 @@ static int multipath_issue_flush(request
 
 			if (!r_queue->issue_flush_fn)
 				ret = -EOPNOTSUPP;
-			else
+			else {
+				atomic_inc(&rdev->nr_pending);
+				rcu_read_unlock();
 				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
 							      error_sector);
+				rdev_dec_pending(rdev, mddev);
+				rcu_read_lock();
+			}
 		}
 	}
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -312,10 +319,9 @@ static int multipath_add_disk(mddev_t *m
 	struct multipath_info *p;
 
 	print_multipath_conf(conf);
-	spin_lock_irq(&conf->device_lock);
+
 	for (path=0; path<mddev->raid_disks; path++) 
 		if ((p=conf->multipaths+path)->rdev == NULL) {
-			p->rdev = rdev;
 			blk_queue_stack_limits(mddev->queue,
 					       rdev->bdev->bd_disk->queue);
 
@@ -332,9 +338,9 @@ static int multipath_add_disk(mddev_t *m
 			conf->working_disks++;
 			rdev->raid_disk = path;
 			rdev->in_sync = 1;
+			p->rdev = rdev;
 			found = 1;
 		}
-	spin_unlock_irq(&conf->device_lock);
 
 	print_multipath_conf(conf);
 	return found;
@@ -343,11 +349,11 @@ static int multipath_add_disk(mddev_t *m
 static int multipath_remove_disk(mddev_t *mddev, int number)
 {
 	multipath_conf_t *conf = mddev->private;
-	int err = 1;
+	int err = 0;
+	mdk_rdev_t *rdev;
 	struct multipath_info *p = conf->multipaths + number;
 
 	print_multipath_conf(conf);
-	spin_lock_irq(&conf->device_lock);
 
 	if (conf->working_disks == 0)
 		/* don't remove devices when none seem to be working.
@@ -355,21 +361,25 @@ static int multipath_remove_disk(mddev_t
 		 * is really a device error
 		 */
 		goto abort;
-	if (p->rdev) {
-		if (p->rdev->in_sync ||
-		    atomic_read(&p->rdev->nr_pending)) {
+
+	rdev = p->rdev;
+	if (rdev) {
+		if (rdev->in_sync ||
+		    atomic_read(&rdev->nr_pending)) {
 			printk(KERN_ERR "hot-remove-disk, slot %d is identified"
 			       " but is still operational!\n", number);
 			err = -EBUSY;
 			goto abort;
 		}
 		p->rdev = NULL;
-		err = 0;
+		synchronize_kernel();
+		if (atomic_read(&rdev->nr_pending)) {
+			/* lost the race, try later */
+			err = -EBUSY;
+			p->rdev = rdev;
+		}
 	}
-	if (err)
-		MD_BUG();
 abort:
-	spin_unlock_irq(&conf->device_lock);
 
 	print_multipath_conf(conf);
 	return err;

diff ./drivers/md/raid1.c~current~ ./drivers/md/raid1.c
--- ./drivers/md/raid1.c~current~	2004-09-08 11:57:21.000000000 +1000
+++ ./drivers/md/raid1.c	2004-09-08 11:59:00.000000000 +1000
@@ -339,7 +339,7 @@ static int read_balance(conf_t *conf, r1
 	const int sectors = r1_bio->sectors;
 	sector_t new_distance, current_distance;
 
-	spin_lock_irq(&conf->device_lock);
+	rcu_read_lock();
 	/*
 	 * Check if it if we can balance. We can balance on the whole
 	 * device if no resync is going on, or below the resync window.
@@ -416,7 +416,7 @@ rb_out:
 		conf->last_used = new_disk;
 		atomic_inc(&conf->mirrors[new_disk].rdev->nr_pending);
 	}
-	spin_unlock_irq(&conf->device_lock);
+	rcu_read_unlock();
 
 	return new_disk;
 }
@@ -425,26 +425,26 @@ static void unplug_slaves(mddev_t *mddev
 {
 	conf_t *conf = mddev_to_conf(mddev);
 	int i;
-	unsigned long flags;
 
-	spin_lock_irqsave(&conf->device_lock, flags);
+	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
 		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
 			atomic_inc(&rdev->nr_pending);
-			spin_unlock_irqrestore(&conf->device_lock, flags);
+			rcu_read_unlock();
 
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
-			spin_lock_irqsave(&conf->device_lock, flags);
 			rdev_dec_pending(rdev, mddev);
+			rcu_read_lock();
 		}
 	}
-	spin_unlock_irqrestore(&conf->device_lock, flags);
+	rcu_read_unlock();
 }
+
 static void raid1_unplug(request_queue_t *q)
 {
 	unplug_slaves(q->queuedata);
@@ -455,10 +455,9 @@ static int raid1_issue_flush(request_que
 {
 	mddev_t *mddev = q->queuedata;
 	conf_t *conf = mddev_to_conf(mddev);
-	unsigned long flags;
 	int i, ret = 0;
 
-	spin_lock_irqsave(&conf->device_lock, flags);
+	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
 		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
 		if (rdev && !rdev->faulty) {
@@ -467,12 +466,17 @@ static int raid1_issue_flush(request_que
 
 			if (!r_queue->issue_flush_fn)
 				ret = -EOPNOTSUPP;
-			else
+			else {
+				atomic_inc(&rdev->nr_pending);
+				rcu_read_unlock();
 				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
 							      error_sector);
+				rdev_dec_pending(rdev, mddev);
+				rcu_read_lock();
+			}
 		}
 	}
-	spin_unlock_irqrestore(&conf->device_lock, flags);
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -579,7 +583,7 @@ static int make_request(request_queue_t 
 	 * bios[x] to bio
 	 */
 	disks = conf->raid_disks;
-	spin_lock_irq(&conf->device_lock);
+	rcu_read_lock();
 	for (i = 0;  i < disks; i++) {
 		if (conf->mirrors[i].rdev &&
 		    !conf->mirrors[i].rdev->faulty) {
@@ -588,7 +592,7 @@ static int make_request(request_queue_t 
 		} else
 			r1_bio->bios[i] = NULL;
 	}
-	spin_unlock_irq(&conf->device_lock);
+	rcu_read_unlock();
 
 	atomic_set(&r1_bio->remaining, 1);
 	md_write_start(mddev);
@@ -710,7 +714,6 @@ static int raid1_spare_active(mddev_t *m
 	conf_t *conf = mddev->private;
 	mirror_info_t *tmp;
 
-	spin_lock_irq(&conf->device_lock);
 	/*
 	 * Find all failed disks within the RAID1 configuration 
 	 * and mark them readable
@@ -725,7 +728,6 @@ static int raid1_spare_active(mddev_t *m
 			tmp->rdev->in_sync = 1;
 		}
 	}
-	spin_unlock_irq(&conf->device_lock);
 
 	print_conf(conf);
 	return 0;
@@ -739,10 +741,8 @@ static int raid1_add_disk(mddev_t *mddev
 	int mirror;
 	mirror_info_t *p;
 
-	spin_lock_irq(&conf->device_lock);
 	for (mirror=0; mirror < mddev->raid_disks; mirror++)
 		if ( !(p=conf->mirrors+mirror)->rdev) {
-			p->rdev = rdev;
 
 			blk_queue_stack_limits(mddev->queue,
 					       rdev->bdev->bd_disk->queue);
@@ -757,9 +757,9 @@ static int raid1_add_disk(mddev_t *mddev
 			p->head_position = 0;
 			rdev->raid_disk = mirror;
 			found = 1;
+			p->rdev = rdev;
 			break;
 		}
-	spin_unlock_irq(&conf->device_lock);
 
 	print_conf(conf);
 	return found;
@@ -768,24 +768,27 @@ static int raid1_add_disk(mddev_t *mddev
 static int raid1_remove_disk(mddev_t *mddev, int number)
 {
 	conf_t *conf = mddev->private;
-	int err = 1;
+	int err = 0;
+	mdk_rdev_t *rdev;
 	mirror_info_t *p = conf->mirrors+ number;
 
 	print_conf(conf);
-	spin_lock_irq(&conf->device_lock);
-	if (p->rdev) {
-		if (p->rdev->in_sync ||
-		    atomic_read(&p->rdev->nr_pending)) {
+	rdev = p->rdev;
+	if (rdev) {
+		if (rdev->in_sync ||
+		    atomic_read(&rdev->nr_pending)) {
 			err = -EBUSY;
 			goto abort;
 		}
 		p->rdev = NULL;
-		err = 0;
+		synchronize_kernel();
+		if (atomic_read(&rdev->nr_pending)) {
+			/* lost the race, try later */
+			err = -EBUSY;
+			p->rdev = rdev;
+		}
 	}
-	if (err)
-		MD_BUG();
 abort:
-	spin_unlock_irq(&conf->device_lock);
 
 	print_conf(conf);
 	return err;
@@ -1022,7 +1025,7 @@ static int sync_request(mddev_t *mddev, 
 	 */
 	disk = conf->last_used;
 	/* make sure disk is operational */
-	spin_lock_irq(&conf->device_lock);
+
 	while (conf->mirrors[disk].rdev == NULL ||
 	       !conf->mirrors[disk].rdev->in_sync) {
 		if (disk <= 0)
@@ -1033,7 +1036,7 @@ static int sync_request(mddev_t *mddev, 
 	}
 	conf->last_used = disk;
 	atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
-	spin_unlock_irq(&conf->device_lock);
+
 
 	mirror = conf->mirrors + disk;
 

diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~	2004-09-08 11:58:02.000000000 +1000
+++ ./drivers/md/raid10.c	2004-09-08 11:59:00.000000000 +1000
@@ -498,7 +498,7 @@ static int read_balance(conf_t *conf, r1
 	sector_t new_distance, current_distance;
 
 	raid10_find_phys(conf, r10_bio);
-	spin_lock_irq(&conf->device_lock);
+	rcu_read_lock();
 	/*
 	 * Check if we can balance. We can balance on the whole
 	 * device if no resync is going on, or below the resync window.
@@ -570,7 +570,7 @@ rb_out:
 
 	if (disk >= 0 && conf->mirrors[disk].rdev)
 		atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
-	spin_unlock_irq(&conf->device_lock);
+	rcu_read_unlock();
 
 	return disk;
 }
@@ -579,26 +579,26 @@ static void unplug_slaves(mddev_t *mddev
 {
 	conf_t *conf = mddev_to_conf(mddev);
 	int i;
-	unsigned long flags;
 
-	spin_lock_irqsave(&conf->device_lock, flags);
+	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
 		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
 			atomic_inc(&rdev->nr_pending);
-			spin_unlock_irqrestore(&conf->device_lock, flags);
+			rcu_read_unlock();
 
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
-			spin_lock_irqsave(&conf->device_lock, flags);
 			rdev_dec_pending(rdev, mddev);
+			rcu_read_lock();
 		}
 	}
-	spin_unlock_irqrestore(&conf->device_lock, flags);
+	rcu_read_unlock();
 }
+
 static void raid10_unplug(request_queue_t *q)
 {
 	unplug_slaves(q->queuedata);
@@ -609,10 +609,9 @@ static int raid10_issue_flush(request_qu
 {
 	mddev_t *mddev = q->queuedata;
 	conf_t *conf = mddev_to_conf(mddev);
-	unsigned long flags;
 	int i, ret = 0;
 
-	spin_lock_irqsave(&conf->device_lock, flags);
+	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
 		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
 		if (rdev && !rdev->faulty) {
@@ -621,12 +620,17 @@ static int raid10_issue_flush(request_qu
 
 			if (!r_queue->issue_flush_fn)
 				ret = -EOPNOTSUPP;
-			else
+			else {
+				atomic_inc(&rdev->nr_pending);
+				rcu_read_unlock();
 				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
 							      error_sector);
+				rdev_dec_pending(rdev, mddev);
+				rcu_read_lock();
+			}
 		}
 	}
-	spin_unlock_irqrestore(&conf->device_lock, flags);
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -757,7 +761,7 @@ static int make_request(request_queue_t 
 	 * bios[x] to bio
 	 */
 	raid10_find_phys(conf, r10_bio);
-	spin_lock_irq(&conf->device_lock);
+	rcu_read_lock();
 	for (i = 0;  i < conf->copies; i++) {
 		int d = r10_bio->devs[i].devnum;
 		if (conf->mirrors[d].rdev &&
@@ -767,7 +771,7 @@ static int make_request(request_queue_t 
 		} else
 			r10_bio->devs[i].bio = NULL;
 	}
-	spin_unlock_irq(&conf->device_lock);
+	rcu_read_unlock();
 
 	atomic_set(&r10_bio->remaining, 1);
 	md_write_start(mddev);
@@ -900,7 +904,6 @@ static int raid10_spare_active(mddev_t *
 	conf_t *conf = mddev->private;
 	mirror_info_t *tmp;
 
-	spin_lock_irq(&conf->device_lock);
 	/*
 	 * Find all non-in_sync disks within the RAID10 configuration
 	 * and mark them in_sync
@@ -915,7 +918,6 @@ static int raid10_spare_active(mddev_t *
 			tmp->rdev->in_sync = 1;
 		}
 	}
-	spin_unlock_irq(&conf->device_lock);
 
 	print_conf(conf);
 	return 0;
@@ -934,10 +936,9 @@ static int raid10_add_disk(mddev_t *mdde
 		 * very different from resync
 		 */
 		return 0;
-	spin_lock_irq(&conf->device_lock);
+
 	for (mirror=0; mirror < mddev->raid_disks; mirror++)
 		if ( !(p=conf->mirrors+mirror)->rdev) {
-			p->rdev = rdev;
 
 			blk_queue_stack_limits(mddev->queue,
 					       rdev->bdev->bd_disk->queue);
@@ -952,9 +953,9 @@ static int raid10_add_disk(mddev_t *mdde
 			p->head_position = 0;
 			rdev->raid_disk = mirror;
 			found = 1;
+			p->rdev = rdev;
 			break;
 		}
-	spin_unlock_irq(&conf->device_lock);
 
 	print_conf(conf);
 	return found;
@@ -963,24 +964,27 @@ static int raid10_add_disk(mddev_t *mdde
 static int raid10_remove_disk(mddev_t *mddev, int number)
 {
 	conf_t *conf = mddev->private;
-	int err = 1;
+	int err = 0;
+	mdk_rdev_t *rdev;
 	mirror_info_t *p = conf->mirrors+ number;
 
 	print_conf(conf);
-	spin_lock_irq(&conf->device_lock);
-	if (p->rdev) {
-		if (p->rdev->in_sync ||
-		    atomic_read(&p->rdev->nr_pending)) {
+	rdev = p->rdev;
+	if (rdev) {
+		if (rdev->in_sync ||
+		    atomic_read(&rdev->nr_pending)) {
 			err = -EBUSY;
 			goto abort;
 		}
 		p->rdev = NULL;
-		err = 0;
+		synchronize_kernel();
+		if (atomic_read(&rdev->nr_pending)) {
+			/* lost the race, try later */
+			err = -EBUSY;
+			p->rdev = rdev;
+		}
 	}
-	if (err)
-		MD_BUG();
 abort:
-	spin_unlock_irq(&conf->device_lock);
 
 	print_conf(conf);
 	return err;
@@ -1468,7 +1472,7 @@ static int sync_request(mddev_t *mddev, 
 		set_bit(R10BIO_IsSync, &r10_bio->state);
 		raid10_find_phys(conf, r10_bio);
 		r10_bio->sectors = (sector_nr | conf->chunk_mask) - sector_nr +1;
-		spin_lock_irq(&conf->device_lock);
+
 		for (i=0; i<conf->copies; i++) {
 			int d = r10_bio->devs[i].devnum;
 			bio = r10_bio->devs[i].bio;
@@ -1488,7 +1492,7 @@ static int sync_request(mddev_t *mddev, 
 			bio->bi_bdev = conf->mirrors[d].rdev->bdev;
 			count++;
 		}
-		spin_unlock_irq(&conf->device_lock);
+
 		if (count < 2) {
 			for (i=0; i<conf->copies; i++) {
 				int d = r10_bio->devs[i].devnum;

diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~	2004-09-08 11:57:21.000000000 +1000
+++ ./drivers/md/raid5.c	2004-09-08 11:59:00.000000000 +1000
@@ -1247,13 +1247,13 @@ static void handle_stripe(struct stripe_
 		else
 			bi->bi_end_io = raid5_end_read_request;
  
-		spin_lock_irq(&conf->device_lock);
+		rcu_read_lock();
 		rdev = conf->disks[i].rdev;
 		if (rdev && rdev->faulty)
 			rdev = NULL;
 		if (rdev)
 			atomic_inc(&rdev->nr_pending);
-		spin_unlock_irq(&conf->device_lock);
+		rcu_read_unlock();
  
 		if (rdev) {
 			if (test_bit(R5_Syncio, &sh->dev[i].flags))
@@ -1302,25 +1302,24 @@ static void unplug_slaves(mddev_t *mddev
 {
 	raid5_conf_t *conf = mddev_to_conf(mddev);
 	int i;
-	unsigned long flags;
 
-	spin_lock_irqsave(&conf->device_lock, flags);
+	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->disks[i].rdev;
 		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
 			atomic_inc(&rdev->nr_pending);
-			spin_unlock_irqrestore(&conf->device_lock, flags);
+			rcu_read_unlock();
 
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
-			spin_lock_irqsave(&conf->device_lock, flags);
 			rdev_dec_pending(rdev, mddev);
+			rcu_read_lock();
 		}
 	}
-	spin_unlock_irqrestore(&conf->device_lock, flags);
+	rcu_read_unlock();
 }
 
 static void raid5_unplug_device(request_queue_t *q)
@@ -1347,6 +1346,7 @@ static int raid5_issue_flush(request_que
 	raid5_conf_t *conf = mddev_to_conf(mddev);
 	int i, ret = 0;
 
+	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
 		mdk_rdev_t *rdev = conf->disks[i].rdev;
 		if (rdev && !rdev->faulty) {
@@ -1355,11 +1355,17 @@ static int raid5_issue_flush(request_que
 
 			if (!r_queue->issue_flush_fn)
 				ret = -EOPNOTSUPP;
-			else
+			else {
+				atomic_inc(&rdev->nr_pending);
+				rcu_read_unlock();
 				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
 							      error_sector);
+				rdev_dec_pending(rdev, mddev);
+				rcu_read_lock();
+			}
 		}
 	}
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -1789,7 +1795,6 @@ static int raid5_spare_active(mddev_t *m
 	raid5_conf_t *conf = mddev->private;
 	struct disk_info *tmp;
 
-	spin_lock_irq(&conf->device_lock);
 	for (i = 0; i < conf->raid_disks; i++) {
 		tmp = conf->disks + i;
 		if (tmp->rdev
@@ -1801,7 +1806,6 @@ static int raid5_spare_active(mddev_t *m
 			tmp->rdev->in_sync = 1;
 		}
 	}
-	spin_unlock_irq(&conf->device_lock);
 	print_raid5_conf(conf);
 	return 0;
 }
@@ -1809,25 +1813,28 @@ static int raid5_spare_active(mddev_t *m
 static int raid5_remove_disk(mddev_t *mddev, int number)
 {
 	raid5_conf_t *conf = mddev->private;
-	int err = 1;
+	int err = 0;
+	mdk_rdev_t *rdev;
 	struct disk_info *p = conf->disks + number;
 
 	print_raid5_conf(conf);
-	spin_lock_irq(&conf->device_lock);
-
-	if (p->rdev) {
-		if (p->rdev->in_sync || 
-		    atomic_read(&p->rdev->nr_pending)) {
+	rdev = p->rdev;
+	if (rdev) {
+		if (rdev->in_sync || 
+		    atomic_read(&rdev->nr_pending)) {
 			err = -EBUSY;
 			goto abort;
 		}
 		p->rdev = NULL;
-		err = 0;
+		synchronize_kernel();
+		if (atomic_read(&rdev->nr_pending)) {
+			/* lost the race, try later */
+			err = -EBUSY;
+			p->rdev = rdev;
+		}
 	}
-	if (err)
-		MD_BUG();
 abort:
-	spin_unlock_irq(&conf->device_lock);
+
 	print_raid5_conf(conf);
 	return err;
 }
@@ -1839,19 +1846,17 @@ static int raid5_add_disk(mddev_t *mddev
 	int disk;
 	struct disk_info *p;
 
-	spin_lock_irq(&conf->device_lock);
 	/*
 	 * find the disk ...
 	 */
 	for (disk=0; disk < mddev->raid_disks; disk++)
 		if ((p=conf->disks + disk)->rdev == NULL) {
-			p->rdev = rdev;
 			rdev->in_sync = 0;
 			rdev->raid_disk = disk;
 			found = 1;
+			p->rdev = rdev;
 			break;
 		}
-	spin_unlock_irq(&conf->device_lock);
 	print_raid5_conf(conf);
 	return found;
 }

diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
--- ./drivers/md/raid6main.c~current~	2004-09-08 11:57:21.000000000 +1000
+++ ./drivers/md/raid6main.c	2004-09-08 11:59:00.000000000 +1000
@@ -1409,13 +1409,13 @@ static void handle_stripe(struct stripe_
 		else
 			bi->bi_end_io = raid6_end_read_request;
 
-		spin_lock_irq(&conf->device_lock);
+		rcu_read_lock();
 		rdev = conf->disks[i].rdev;
 		if (rdev && rdev->faulty)
 			rdev = NULL;
 		if (rdev)
 			atomic_inc(&rdev->nr_pending);
-		spin_unlock_irq(&conf->device_lock);
+		rcu_read_unlock();
 
 		if (rdev) {
 			if (test_bit(R5_Syncio, &sh->dev[i].flags))
@@ -1464,25 +1464,24 @@ static void unplug_slaves(mddev_t *mddev
 {
 	raid6_conf_t *conf = mddev_to_conf(mddev);
 	int i;
-	unsigned long flags;
 
-	spin_lock_irqsave(&conf->device_lock, flags);
+	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->disks[i].rdev;
 		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
 			atomic_inc(&rdev->nr_pending);
-			spin_unlock_irqrestore(&conf->device_lock, flags);
+			rcu_read_unlock();
 
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
-			spin_lock_irqsave(&conf->device_lock, flags);
 			rdev_dec_pending(rdev, mddev);
+			rcu_read_lock();
 		}
 	}
-	spin_unlock_irqrestore(&conf->device_lock, flags);
+	rcu_read_unlock();
 }
 
 static void raid6_unplug_device(request_queue_t *q)
@@ -1509,6 +1508,7 @@ static int raid6_issue_flush(request_que
 	raid6_conf_t *conf = mddev_to_conf(mddev);
 	int i, ret = 0;
 
+	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
 		mdk_rdev_t *rdev = conf->disks[i].rdev;
 		if (rdev && !rdev->faulty) {
@@ -1517,11 +1517,17 @@ static int raid6_issue_flush(request_que
 
 			if (!r_queue->issue_flush_fn)
 				ret = -EOPNOTSUPP;
-			else
+			else {
+				atomic_inc(&rdev->nr_pending);
+				rcu_read_unlock();
 				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
 							      error_sector);
+				rdev_dec_pending(rdev, mddev);
+				rcu_read_lock();
+			}
 		}
 	}
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -1958,7 +1964,6 @@ static int raid6_spare_active(mddev_t *m
 	raid6_conf_t *conf = mddev->private;
 	struct disk_info *tmp;
 
-	spin_lock_irq(&conf->device_lock);
 	for (i = 0; i < conf->raid_disks; i++) {
 		tmp = conf->disks + i;
 		if (tmp->rdev
@@ -1970,7 +1975,6 @@ static int raid6_spare_active(mddev_t *m
 			tmp->rdev->in_sync = 1;
 		}
 	}
-	spin_unlock_irq(&conf->device_lock);
 	print_raid6_conf(conf);
 	return 0;
 }
@@ -1978,25 +1982,29 @@ static int raid6_spare_active(mddev_t *m
 static int raid6_remove_disk(mddev_t *mddev, int number)
 {
 	raid6_conf_t *conf = mddev->private;
-	int err = 1;
+	int err = 0;
+	mdk_rdev_t *rdev;
 	struct disk_info *p = conf->disks + number;
 
 	print_raid6_conf(conf);
-	spin_lock_irq(&conf->device_lock);
-
-	if (p->rdev) {
-		if (p->rdev->in_sync ||
-		    atomic_read(&p->rdev->nr_pending)) {
+	rdev = p->rdev;
+	if (rdev) {
+		if (rdev->in_sync ||
+		    atomic_read(&rdev->nr_pending)) {
 			err = -EBUSY;
 			goto abort;
 		}
 		p->rdev = NULL;
-		err = 0;
+		synchronize_kernel();
+		if (atomic_read(&rdev->nr_pending)) {
+			/* lost the race, try later */
+			err = -EBUSY;
+			p->rdev = rdev;
+		}
 	}
-	if (err)
-		MD_BUG();
+
 abort:
-	spin_unlock_irq(&conf->device_lock);
+
 	print_raid6_conf(conf);
 	return err;
 }
@@ -2008,19 +2016,17 @@ static int raid6_add_disk(mddev_t *mddev
 	int disk;
 	struct disk_info *p;
 
-	spin_lock_irq(&conf->device_lock);
 	/*
 	 * find the disk ...
 	 */
 	for (disk=0; disk < mddev->raid_disks; disk++)
 		if ((p=conf->disks + disk)->rdev == NULL) {
-			p->rdev = rdev;
 			rdev->in_sync = 0;
 			rdev->raid_disk = disk;
 			found = 1;
+			p->rdev = rdev;
 			break;
 		}
-	spin_unlock_irq(&conf->device_lock);
 	print_raid6_conf(conf);
 	return found;
 }

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2004-09-08  2:48 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-09-08  2:48 [PATCH md 0 of 7] Introduction NeilBrown
2004-09-08  2:48 ` [PATCH md 4 of 7] Rationalise unplug functions in md NeilBrown
2004-09-08  2:48 ` [PATCH md 2 of 7] Make retry_list non-global in raid1 and multipath NeilBrown
2004-09-08  2:48 ` [PATCH md 1 of 7] Remove md_flush_all NeilBrown
2004-09-08  2:48 ` [PATCH md 3 of 7] Rationalise issue_flush function in md personalities NeilBrown
2004-09-08  2:48 ` [PATCH md 6 of 7] Fix two little bugs in raid10 NeilBrown
2004-09-08  2:48 ` [PATCH md 7 of 7] Modify locking when accessing subdevices in md NeilBrown
2004-09-08  2:48 ` [PATCH md 5 of 7] Make sure md always uses rdev_dec_pending properly NeilBrown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox