[PATCH 3/4] re-add a failed disk

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 3/4] re-add a failed disk
@ 2015-04-08 19:23 Goldwyn Rodrigues
  2015-04-08 23:32 ` NeilBrown
  0 siblings, 1 reply; 2+ messages in thread
From: Goldwyn Rodrigues @ 2015-04-08 19:23 UTC (permalink / raw)
  To: neilb; +Cc: linux-raid, GQJiang

This adds the capability of re-adding a failed disk by
writing "re-add" to /sys/block/mdXX/md/dev-YYY/state.

This works for generic md, and is not (completely)
related to clustering.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 drivers/md/md.c | 56 ++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 20 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0c65e51..83a8e91 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -79,6 +79,7 @@ static struct workqueue_struct *md_misc_wq;
 
 static int remove_and_add_spares(struct mddev *mddev,
 				 struct md_rdev *this);
+static int add_bound_rdev(struct md_rdev *);
 
 #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
 
@@ -2845,6 +2846,9 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
 			clear_bit(Replacement, &rdev->flags);
 			err = 0;
 		}
+	} else if (cmd_match(buf, "re-add")) {
+		clear_bit(Faulty, &rdev->flags);
+		err = add_bound_rdev(rdev);
 	}
 	if (!err)
 		sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -5861,6 +5865,37 @@ static int get_disk_info(struct mddev * mddev, void __user * arg)
 	return 0;
 }
 
+static int add_bound_rdev(struct md_rdev *rdev)
+{
+	struct mddev *mddev = rdev->mddev;
+	int err = 0;
+
+	if (!mddev->pers->hot_remove_disk) {
+		/* If there is hot_add_disk but no hot_remove_disk
+		 * then added disks for geometry changes,
+		 * and should be added immediately.
+		 */
+		super_types[mddev->major_version].
+			validate_super(mddev, rdev);
+		err = mddev->pers->hot_add_disk(mddev, rdev);
+		if (err)
+			unbind_rdev_from_array(rdev);
+	}
+	if (err)
+		export_rdev(rdev);
+	else
+		sysfs_notify_dirent_safe(rdev->sysfs_state);
+
+	set_bit(MD_CHANGE_DEVS, &mddev->flags);
+	if (mddev->degraded)
+		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+	if (!err)
+		md_new_event(mddev);
+	md_wakeup_thread(mddev->thread);
+	return err;
+}
+
 static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
 {
 	char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
@@ -5989,29 +6024,10 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
 
 		rdev->raid_disk = -1;
 		err = bind_rdev_to_array(rdev, mddev);
-		if (!err && !mddev->pers->hot_remove_disk) {
-			/* If there is hot_add_disk but no hot_remove_disk
-			 * then added disks for geometry changes,
-			 * and should be added immediately.
-			 */
-			super_types[mddev->major_version].
-				validate_super(mddev, rdev);
-			err = mddev->pers->hot_add_disk(mddev, rdev);
-			if (err)
-				unbind_rdev_from_array(rdev);
-		}
 		if (err)
 			export_rdev(rdev);
 		else
-			sysfs_notify_dirent_safe(rdev->sysfs_state);
-
-		set_bit(MD_CHANGE_DEVS, &mddev->flags);
-		if (mddev->degraded)
-			set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-		if (!err)
-			md_new_event(mddev);
-		md_wakeup_thread(mddev->thread);
+			err = add_bound_rdev(rdev);
 		if (mddev_is_clustered(mddev) &&
 				(info->state & (1 << MD_DISK_CLUSTER_ADD)))
 			md_cluster_ops->add_new_disk_finish(mddev);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH 3/4] re-add a failed disk
  2015-04-08 19:23 [PATCH 3/4] re-add a failed disk Goldwyn Rodrigues
@ 2015-04-08 23:32 ` NeilBrown
  0 siblings, 0 replies; 2+ messages in thread
From: NeilBrown @ 2015-04-08 23:32 UTC (permalink / raw)
  To: Goldwyn Rodrigues; +Cc: linux-raid, GQJiang

[-- Attachment #1: Type: text/plain, Size: 4147 bytes --]

On Wed, 8 Apr 2015 14:23:37 -0500 Goldwyn Rodrigues <rgoldwyn@suse.de> wrote:

> This adds the capability of re-adding a failed disk by
> writing "re-add" to /sys/block/mdXX/md/dev-YYY/state.
> 
> This works for generic md, and is not (completely)
> related to clustering.

Again, more words here would help.
Explain why "remove, then add" isn't sufficient.

> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>  drivers/md/md.c | 56 ++++++++++++++++++++++++++++++++++++--------------------
>  1 file changed, 36 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 0c65e51..83a8e91 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -79,6 +79,7 @@ static struct workqueue_struct *md_misc_wq;
>  
>  static int remove_and_add_spares(struct mddev *mddev,
>  				 struct md_rdev *this);
> +static int add_bound_rdev(struct md_rdev *);
>  
>  #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
>  
> @@ -2845,6 +2846,9 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
>  			clear_bit(Replacement, &rdev->flags);
>  			err = 0;
>  		}
> +	} else if (cmd_match(buf, "re-add")) {
> +		clear_bit(Faulty, &rdev->flags);
> +		err = add_bound_rdev(rdev);

Shouldn't this  fail if the device isn't Faulty, of if raid_disk isn't '-1' ??


>  	}
>  	if (!err)
>  		sysfs_notify_dirent_safe(rdev->sysfs_state);
> @@ -5861,6 +5865,37 @@ static int get_disk_info(struct mddev * mddev, void __user * arg)
>  	return 0;
>  }
>  
> +static int add_bound_rdev(struct md_rdev *rdev)
> +{
> +	struct mddev *mddev = rdev->mddev;
> +	int err = 0;
> +
> +	if (!mddev->pers->hot_remove_disk) {
> +		/* If there is hot_add_disk but no hot_remove_disk
> +		 * then added disks for geometry changes,
> +		 * and should be added immediately.
> +		 */
> +		super_types[mddev->major_version].
> +			validate_super(mddev, rdev);
> +		err = mddev->pers->hot_add_disk(mddev, rdev);
> +		if (err)
> +			unbind_rdev_from_array(rdev);
> +	}
> +	if (err)
> +		export_rdev(rdev);
> +	else
> +		sysfs_notify_dirent_safe(rdev->sysfs_state);
> +
> +	set_bit(MD_CHANGE_DEVS, &mddev->flags);
> +	if (mddev->degraded)
> +		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
> +	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> +	if (!err)
> +		md_new_event(mddev);
> +	md_wakeup_thread(mddev->thread);
> +	return err;
> +}
> +
>  static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
>  {
>  	char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
> @@ -5989,29 +6024,10 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
>  
>  		rdev->raid_disk = -1;
>  		err = bind_rdev_to_array(rdev, mddev);
> -		if (!err && !mddev->pers->hot_remove_disk) {
> -			/* If there is hot_add_disk but no hot_remove_disk
> -			 * then added disks for geometry changes,
> -			 * and should be added immediately.
> -			 */
> -			super_types[mddev->major_version].
> -				validate_super(mddev, rdev);
> -			err = mddev->pers->hot_add_disk(mddev, rdev);
> -			if (err)
> -				unbind_rdev_from_array(rdev);
> -		}
>  		if (err)
>  			export_rdev(rdev);
>  		else
> -			sysfs_notify_dirent_safe(rdev->sysfs_state);
> -
> -		set_bit(MD_CHANGE_DEVS, &mddev->flags);
> -		if (mddev->degraded)
> -			set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
> -		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> -		if (!err)
> -			md_new_event(mddev);
> -		md_wakeup_thread(mddev->thread);
> +			err = add_bound_rdev(rdev);
>  		if (mddev_is_clustered(mddev) &&
>  				(info->state & (1 << MD_DISK_CLUSTER_ADD)))
>  			md_cluster_ops->add_new_disk_finish(mddev);

This is a rather nice clean-up.  Possibly it could go in a separate patch -
or maybe not.

Also I think there is more room for improvement.
There is only one place in add_bound_rdev() that can set 'err', and I think
that when that happens we can just clean up and return. There is no real
need for the 'set_bit's or the wakeup.

Would you mind cleaning up a bit more?

Thanks,
NeilBrown


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 811 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-04-08 23:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-04-08 19:23 [PATCH 3/4] re-add a failed disk Goldwyn Rodrigues
2015-04-08 23:32 ` NeilBrown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).