linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 6/6] md-cluster: re-add capabilities
@ 2015-04-14 15:45 Goldwyn Rodrigues
  2015-04-20  2:01 ` NeilBrown
  0 siblings, 1 reply; 2+ messages in thread
From: Goldwyn Rodrigues @ 2015-04-14 15:45 UTC (permalink / raw)
  To: neilb; +Cc: GQJiang, linux-raid

When "re-add" is writted to /sys/block/mdXX/md/dev-YYY/state,
the clustered md:

1. Sends RE_ADD message with the desc_nr. Nodes receiving the message
   clear the Faulty bit in their respective rdev->flags.
2. The node initiating re-add, gathers the bitmaps of all nodes
   and copies them into the local bitmap. It does not clear the bitmap
   from which it is copying.
3. Initiating node schedules a md recovery to sync the devices.

Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 drivers/md/bitmap.c     | 20 +++++++++++---------
 drivers/md/bitmap.h     |  2 +-
 drivers/md/md-cluster.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
 drivers/md/md-cluster.h |  1 +
 drivers/md/md.c         | 12 ++++++++++++
 5 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 5ff67c3..956cfb9 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1852,7 +1852,7 @@ EXPORT_SYMBOL_GPL(bitmap_load);
  * to our bitmap
  */
 int bitmap_copy_from_slot(struct mddev *mddev, int slot,
-		sector_t *low, sector_t *high)
+		sector_t *low, sector_t *high, bool clear_bits)
 {
 	int rv = 0, i, j;
 	sector_t block, lo = 0, hi = 0;
@@ -1879,14 +1879,16 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot,
 		}
 	}
 
-	bitmap_update_sb(bitmap);
-	/* Setting this for the ev_page should be enough.
-	 * And we do not require both write_all and PAGE_DIRT either
-	 */
-	for (i = 0; i < bitmap->storage.file_pages; i++)
-		set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
-	bitmap_write_all(bitmap);
-	bitmap_unplug(bitmap);
+	if (clear_bits) {
+		bitmap_update_sb(bitmap);
+		/* Setting this for the ev_page should be enough.
+		 * And we do not require both write_all and PAGE_DIRT either
+		 */
+		for (i = 0; i < bitmap->storage.file_pages; i++)
+			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
+		bitmap_write_all(bitmap);
+		bitmap_unplug(bitmap);
+	}
 	*low = lo;
 	*high = hi;
 err:
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index 4aabc74..f1f4dd0 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -263,7 +263,7 @@ void bitmap_daemon_work(struct mddev *mddev);
 int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
 		  int chunksize, int init);
 int bitmap_copy_from_slot(struct mddev *mddev, int slot,
-				sector_t *lo, sector_t *hi);
+				sector_t *lo, sector_t *hi, bool clear_bits);
 #endif
 
 #endif
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 82f1b7b..ad2b5b7 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -73,6 +73,7 @@ enum msg_type {
 	RESYNCING,
 	NEWDISK,
 	REMOVE,
+	RE_ADD,
 };
 
 struct cluster_msg {
@@ -253,7 +254,7 @@ void recover_bitmaps(struct md_thread *thread)
 					str, ret);
 			goto clear_bit;
 		}
-		ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi);
+		ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
 		if (ret) {
 			pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
 			goto dlm_unlock;
@@ -412,6 +413,16 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
 		pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, msg->raid_slot);
 }
 
+static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
+{
+	struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot);
+
+	if (rdev && test_bit(Faulty, &rdev->flags))
+		clear_bit(Faulty, &rdev->flags);
+	else
+		pr_warn("%s: %d Could not find disk(%d) which is faulty", __func__, __LINE__, msg->raid_slot);
+}
+
 static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
 {
 	switch (msg->type) {
@@ -436,6 +447,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
 			__func__, __LINE__, msg->slot);
 		process_remove_disk(mddev, msg);
 		break;
+	case RE_ADD:
+		pr_info("%s: %d Received RE_ADD from %d\n",
+			__func__, __LINE__, msg->slot);
+		process_readd_disk(mddev, msg);
+		break;
 	default:
 		pr_warn("%s:%d Received unknown message from %d\n",
 			__func__, __LINE__, msg->slot);
@@ -883,6 +899,35 @@ static int remove_disk(struct mddev *mddev, struct md_rdev *rdev)
 	return __sendmsg(cinfo, &cmsg);
 }
 
+static int gather_bitmaps(struct md_rdev *rdev)
+{
+	int sn, err;
+	sector_t lo, hi;
+	struct cluster_msg cmsg;
+	struct mddev *mddev = rdev->mddev;
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+
+	cmsg.type = RE_ADD;
+	cmsg.raid_slot = rdev->desc_nr;
+	err = sendmsg(cinfo, &cmsg);
+	if (err)
+		goto out;
+
+	for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) {
+		if (sn == (cinfo->slot_number - 1))
+			continue;
+		err = bitmap_copy_from_slot(mddev, sn, &lo, &hi, false);
+		if (err) {
+			pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn);
+			goto out;
+		}
+		if ((hi > 0) && (lo < mddev->recovery_cp))
+			mddev->recovery_cp = lo;
+	}
+out:
+	return err;
+}
+
 static struct md_cluster_operations cluster_ops = {
 	.join   = join,
 	.leave  = leave,
@@ -898,6 +943,7 @@ static struct md_cluster_operations cluster_ops = {
 	.add_new_disk_finish = add_new_disk_finish,
 	.new_disk_ack = new_disk_ack,
 	.remove_disk = remove_disk,
+	.gather_bitmaps = gather_bitmaps,
 };
 
 static int __init cluster_init(void)
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index 71e5143..6817ee0 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -23,6 +23,7 @@ struct md_cluster_operations {
 	int (*add_new_disk_finish)(struct mddev *mddev);
 	int (*new_disk_ack)(struct mddev *mddev, bool ack);
 	int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
+	int (*gather_bitmaps)(struct md_rdev *rdev);
 };
 
 #endif /* _MD_CLUSTER_H */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ba01605..8c37bbf 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2599,11 +2599,23 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
 			err = 0;
 		}
 	} else if (cmd_match(buf, "re-add") && (test_bit(Faulty, &rdev->flags) || (rdev->raid_disk == -1))) {
+			/* clear_bit is performed _after_ all the devices
+			 * have their local Faulty bit cleared. If any writes
+			 * happen in the meantime in the local node, they
+			 * will land in the local bitmap, which will be synced
+			 * by this node eventually
+			 */
+			if (mddev_is_clustered(rdev->mddev)) {
+				err = md_cluster_ops->gather_bitmaps(rdev);
+				if (err)
+					goto out;
+			}
 			clear_bit(Faulty, &rdev->flags);
 			err = add_bound_rdev(rdev);
 		}
 	if (!err)
 		sysfs_notify_dirent_safe(rdev->sysfs_state);
+out:
 	return err ? err : len;
 }
 static struct rdev_sysfs_entry rdev_state =
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH 6/6] md-cluster: re-add capabilities
  2015-04-14 15:45 [PATCH 6/6] md-cluster: re-add capabilities Goldwyn Rodrigues
@ 2015-04-20  2:01 ` NeilBrown
  0 siblings, 0 replies; 2+ messages in thread
From: NeilBrown @ 2015-04-20  2:01 UTC (permalink / raw)
  To: Goldwyn Rodrigues; +Cc: GQJiang, linux-raid

[-- Attachment #1: Type: text/plain, Size: 7679 bytes --]

On Tue, 14 Apr 2015 10:45:42 -0500 Goldwyn Rodrigues <rgoldwyn@suse.de> wrote:

> When "re-add" is writted to /sys/block/mdXX/md/dev-YYY/state,
> the clustered md:
> 
> 1. Sends RE_ADD message with the desc_nr. Nodes receiving the message
>    clear the Faulty bit in their respective rdev->flags.
> 2. The node initiating re-add, gathers the bitmaps of all nodes
>    and copies them into the local bitmap. It does not clear the bitmap
>    from which it is copying.
> 3. Initiating node schedules a md recovery to sync the devices.
> 
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>  drivers/md/bitmap.c     | 20 +++++++++++---------
>  drivers/md/bitmap.h     |  2 +-
>  drivers/md/md-cluster.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
>  drivers/md/md-cluster.h |  1 +
>  drivers/md/md.c         | 12 ++++++++++++
>  5 files changed, 72 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
> index 5ff67c3..956cfb9 100644
> --- a/drivers/md/bitmap.c
> +++ b/drivers/md/bitmap.c
> @@ -1852,7 +1852,7 @@ EXPORT_SYMBOL_GPL(bitmap_load);
>   * to our bitmap
>   */
>  int bitmap_copy_from_slot(struct mddev *mddev, int slot,
> -		sector_t *low, sector_t *high)
> +		sector_t *low, sector_t *high, bool clear_bits)
>  {
>  	int rv = 0, i, j;
>  	sector_t block, lo = 0, hi = 0;
> @@ -1879,14 +1879,16 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot,
>  		}
>  	}
>  
> -	bitmap_update_sb(bitmap);
> -	/* Setting this for the ev_page should be enough.
> -	 * And we do not require both write_all and PAGE_DIRT either
> -	 */
> -	for (i = 0; i < bitmap->storage.file_pages; i++)
> -		set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
> -	bitmap_write_all(bitmap);
> -	bitmap_unplug(bitmap);
> +	if (clear_bits) {
> +		bitmap_update_sb(bitmap);
> +		/* Setting this for the ev_page should be enough.
> +		 * And we do not require both write_all and PAGE_DIRT either
> +		 */
> +		for (i = 0; i < bitmap->storage.file_pages; i++)
> +			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
> +		bitmap_write_all(bitmap);
> +		bitmap_unplug(bitmap);
> +	}
>  	*low = lo;
>  	*high = hi;
>  err:
> diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
> index 4aabc74..f1f4dd0 100644
> --- a/drivers/md/bitmap.h
> +++ b/drivers/md/bitmap.h
> @@ -263,7 +263,7 @@ void bitmap_daemon_work(struct mddev *mddev);
>  int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
>  		  int chunksize, int init);
>  int bitmap_copy_from_slot(struct mddev *mddev, int slot,
> -				sector_t *lo, sector_t *hi);
> +				sector_t *lo, sector_t *hi, bool clear_bits);
>  #endif
>  
>  #endif
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 82f1b7b..ad2b5b7 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -73,6 +73,7 @@ enum msg_type {
>  	RESYNCING,
>  	NEWDISK,
>  	REMOVE,
> +	RE_ADD,
>  };
>  
>  struct cluster_msg {
> @@ -253,7 +254,7 @@ void recover_bitmaps(struct md_thread *thread)
>  					str, ret);
>  			goto clear_bit;
>  		}
> -		ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi);
> +		ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
>  		if (ret) {
>  			pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
>  			goto dlm_unlock;
> @@ -412,6 +413,16 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
>  		pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, msg->raid_slot);
>  }
>  
> +static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
> +{
> +	struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot);
> +
> +	if (rdev && test_bit(Faulty, &rdev->flags))
> +		clear_bit(Faulty, &rdev->flags);
> +	else
> +		pr_warn("%s: %d Could not find disk(%d) which is faulty", __func__, __LINE__, msg->raid_slot);
> +}
> +
>  static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
>  {
>  	switch (msg->type) {
> @@ -436,6 +447,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
>  			__func__, __LINE__, msg->slot);
>  		process_remove_disk(mddev, msg);
>  		break;
> +	case RE_ADD:
> +		pr_info("%s: %d Received RE_ADD from %d\n",
> +			__func__, __LINE__, msg->slot);
> +		process_readd_disk(mddev, msg);
> +		break;
>  	default:
>  		pr_warn("%s:%d Received unknown message from %d\n",
>  			__func__, __LINE__, msg->slot);
> @@ -883,6 +899,35 @@ static int remove_disk(struct mddev *mddev, struct md_rdev *rdev)
>  	return __sendmsg(cinfo, &cmsg);
>  }
>  
> +static int gather_bitmaps(struct md_rdev *rdev)
> +{
> +	int sn, err;
> +	sector_t lo, hi;
> +	struct cluster_msg cmsg;
> +	struct mddev *mddev = rdev->mddev;
> +	struct md_cluster_info *cinfo = mddev->cluster_info;
> +
> +	cmsg.type = RE_ADD;
> +	cmsg.raid_slot = rdev->desc_nr;
> +	err = sendmsg(cinfo, &cmsg);
> +	if (err)
> +		goto out;
> +
> +	for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) {
> +		if (sn == (cinfo->slot_number - 1))
> +			continue;
> +		err = bitmap_copy_from_slot(mddev, sn, &lo, &hi, false);
> +		if (err) {
> +			pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn);
> +			goto out;
> +		}
> +		if ((hi > 0) && (lo < mddev->recovery_cp))
> +			mddev->recovery_cp = lo;
> +	}
> +out:
> +	return err;
> +}
> +
>  static struct md_cluster_operations cluster_ops = {
>  	.join   = join,
>  	.leave  = leave,
> @@ -898,6 +943,7 @@ static struct md_cluster_operations cluster_ops = {
>  	.add_new_disk_finish = add_new_disk_finish,
>  	.new_disk_ack = new_disk_ack,
>  	.remove_disk = remove_disk,
> +	.gather_bitmaps = gather_bitmaps,
>  };
>  
>  static int __init cluster_init(void)
> diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
> index 71e5143..6817ee0 100644
> --- a/drivers/md/md-cluster.h
> +++ b/drivers/md/md-cluster.h
> @@ -23,6 +23,7 @@ struct md_cluster_operations {
>  	int (*add_new_disk_finish)(struct mddev *mddev);
>  	int (*new_disk_ack)(struct mddev *mddev, bool ack);
>  	int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
> +	int (*gather_bitmaps)(struct md_rdev *rdev);
>  };
>  
>  #endif /* _MD_CLUSTER_H */
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index ba01605..8c37bbf 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -2599,11 +2599,23 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
>  			err = 0;
>  		}
>  	} else if (cmd_match(buf, "re-add") && (test_bit(Faulty, &rdev->flags) || (rdev->raid_disk == -1))) {
> +			/* clear_bit is performed _after_ all the devices
> +			 * have their local Faulty bit cleared. If any writes
> +			 * happen in the meantime in the local node, they
> +			 * will land in the local bitmap, which will be synced
> +			 * by this node eventually
> +			 */
> +			if (mddev_is_clustered(rdev->mddev)) {
> +				err = md_cluster_ops->gather_bitmaps(rdev);
> +				if (err)
> +					goto out;
> +			}
>  			clear_bit(Faulty, &rdev->flags);
>  			err = add_bound_rdev(rdev);
>  		}
>  	if (!err)
>  		sysfs_notify_dirent_safe(rdev->sysfs_state);
> +out:
>  	return err ? err : len;
>  }
>  static struct rdev_sysfs_entry rdev_state =

I changed this to:

			if (!mddev_is_clustered(rdev->mddev) ||
			    (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
				clear_bit(Faulty, &rdev->flags);
				err = add_bound_rdev(rdev);
			}

because I think it makes the code a bit clearer.

otherwise, applied.
Thanks,
NeilBrown



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 811 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-04-20  2:01 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-04-14 15:45 [PATCH 6/6] md-cluster: re-add capabilities Goldwyn Rodrigues
2015-04-20  2:01 ` NeilBrown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).