[PATCH 1/3] md: set MD_CHANGE_PENDING in a atomic region

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/3] md: set MD_CHANGE_PENDING in a atomic region
@ 2016-05-04  2:22 Guoqing Jiang
  2016-05-04  2:22 ` [PATCH 2/3] md-cluster: gather resync infos and enable recv_thread after bitmap is ready Guoqing Jiang
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Guoqing Jiang @ 2016-05-04  2:22 UTC (permalink / raw)
  To: shli
  Cc: neilb, linux-raid, Guoqing Jiang, Martin Kepplinger,
	Andrew Morton, Denys Vlasenko, Sasha Levin, linux-kernel

Some code waits for a metadata update by:

1. flagging that it is needed (MD_CHANGE_DEVS or MD_CHANGE_CLEAN)
2. setting MD_CHANGE_PENDING and waking the management thread
3. waiting for MD_CHANGE_PENDING to be cleared

If the first two are done without locking, the code in md_update_sb()
which checks if it needs to repeat might test if an update is needed
before step 1, then clear MD_CHANGE_PENDING after step 2, resulting
in the wait returning early.

So make sure all places that set MD_CHANGE_PENDING are atomicial, and
bit_clear_unless (suggested by Neil) is introduced for the purpose.

Cc: Martin Kepplinger <martink@posteo.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: <linux-kernel@vger.kernel.org>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 drivers/md/md.c          | 27 ++++++++++++++-------------
 drivers/md/raid1.c       |  4 ++--
 drivers/md/raid10.c      |  8 ++++----
 drivers/md/raid5-cache.c |  4 ++--
 drivers/md/raid5.c       |  4 ++--
 include/linux/bitops.h   | 16 ++++++++++++++++
 6 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 06f6e81..892d639 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2295,12 +2295,16 @@ repeat:
 	if (mddev_is_clustered(mddev)) {
 		if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
 			force_change = 1;
+		if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
+			nospares = 1;
 		ret = md_cluster_ops->metadata_update_start(mddev);
 		/* Has someone else has updated the sb */
 		if (!does_sb_need_changing(mddev)) {
 			if (ret == 0)
 				md_cluster_ops->metadata_update_cancel(mddev);
-			clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+			bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
+							 BIT(MD_CHANGE_DEVS) |
+							 BIT(MD_CHANGE_CLEAN));
 			return;
 		}
 	}
@@ -2434,15 +2438,11 @@ repeat:
 	if (mddev_is_clustered(mddev) && ret == 0)
 		md_cluster_ops->metadata_update_finish(mddev);
 
-	spin_lock(&mddev->lock);
 	if (mddev->in_sync != sync_req ||
-	    test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
+	    !bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
+			       BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))
 		/* have to write it out again */
-		spin_unlock(&mddev->lock);
 		goto repeat;
-	}
-	clear_bit(MD_CHANGE_PENDING, &mddev->flags);
-	spin_unlock(&mddev->lock);
 	wake_up(&mddev->sb_wait);
 	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
 		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
@@ -8147,18 +8147,18 @@ void md_do_sync(struct md_thread *thread)
 		}
 	}
  skip:
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-
 	if (mddev_is_clustered(mddev) &&
 	    ret == 0) {
 		/* set CHANGE_PENDING here since maybe another
 		 * update is needed, so other nodes are informed */
-		set_bit(MD_CHANGE_PENDING, &mddev->flags);
+		set_mask_bits(&mddev->flags, 0,
+			      BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
 		md_wakeup_thread(mddev->thread);
 		wait_event(mddev->sb_wait,
 			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
 		md_cluster_ops->resync_finish(mddev);
-	}
+	} else
+		set_bit(MD_CHANGE_DEVS, &mddev->flags);
 
 	spin_lock(&mddev->lock);
 	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -8550,6 +8550,7 @@ EXPORT_SYMBOL(md_finish_reshape);
 int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 		       int is_new)
 {
+	struct mddev *mddev = rdev->mddev;
 	int rv;
 	if (is_new)
 		s += rdev->new_data_offset;
@@ -8559,8 +8560,8 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 	if (rv == 0) {
 		/* Make sure they get written out promptly */
 		sysfs_notify_dirent_safe(rdev->sysfs_state);
-		set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
-		set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
+		set_mask_bits(&mddev->flags, 0,
+			      BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING));
 		md_wakeup_thread(rdev->mddev->thread);
 		return 1;
 	} else
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a7f2b9c..c7c8cde 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1474,8 +1474,8 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 	 * if recovery is running, make sure it aborts.
 	 */
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-	set_bit(MD_CHANGE_PENDING, &mddev->flags);
+	set_mask_bits(&mddev->flags, 0,
+		      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
 	printk(KERN_ALERT
 	       "md/raid1:%s: Disk failure on %s, disabling device.\n"
 	       "md/raid1:%s: Operation continuing on %d devices.\n",
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e3fd725..f21c4bb 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1102,8 +1102,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
 		bio->bi_iter.bi_sector < conf->reshape_progress))) {
 		/* Need to update reshape_position in metadata */
 		mddev->reshape_position = conf->reshape_progress;
-		set_bit(MD_CHANGE_DEVS, &mddev->flags);
-		set_bit(MD_CHANGE_PENDING, &mddev->flags);
+		set_mask_bits(&mddev->flags, 0,
+			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
 		md_wakeup_thread(mddev->thread);
 		wait_event(mddev->sb_wait,
 			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
@@ -1591,8 +1591,8 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	set_bit(Blocked, &rdev->flags);
 	set_bit(Faulty, &rdev->flags);
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-	set_bit(MD_CHANGE_PENDING, &mddev->flags);
+	set_mask_bits(&mddev->flags, 0,
+		      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 	printk(KERN_ALERT
 	       "md/raid10:%s: Disk failure on %s, disabling device.\n"
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 9531f5f..ac51bc5 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -712,8 +712,8 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
 	 * in_teardown check workaround this issue.
 	 */
 	if (!log->in_teardown) {
-		set_bit(MD_CHANGE_DEVS, &mddev->flags);
-		set_bit(MD_CHANGE_PENDING, &mddev->flags);
+		set_mask_bits(&mddev->flags, 0,
+			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
 		md_wakeup_thread(mddev->thread);
 		wait_event(mddev->sb_wait,
 			!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e48c262..cd110ce 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2514,8 +2514,8 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
 
 	set_bit(Blocked, &rdev->flags);
 	set_bit(Faulty, &rdev->flags);
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-	set_bit(MD_CHANGE_PENDING, &mddev->flags);
+	set_mask_bits(&mddev->flags, 0,
+		      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
 	printk(KERN_ALERT
 	       "md/raid:%s: Disk failure on %s, disabling device.\n"
 	       "md/raid:%s: Operation continuing on %d devices.\n",
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index defeaac..299e76b 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -227,6 +227,22 @@ static inline unsigned long __ffs64(u64 word)
 })
 #endif
 
+#ifndef bit_clear_unless
+#define bit_clear_unless(ptr, _clear, _test)	\
+({								\
+	const typeof(*ptr) clear = (_clear), test = (_test);	\
+	typeof(*ptr) old, new;					\
+								\
+	do {							\
+		old = ACCESS_ONCE(*ptr);			\
+		new = old & ~clear;				\
+	} while (!(old & test) &&				\
+		 cmpxchg(ptr, old, new) != old);		\
+								\
+	!(old & test);						\
+})
+#endif
+
 #ifndef find_last_bit
 /**
  * find_last_bit - find the last set bit in a memory region
-- 
2.6.6

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] md-cluster: gather resync infos and enable recv_thread after bitmap is ready
  2016-05-04  2:22 [PATCH 1/3] md: set MD_CHANGE_PENDING in a atomic region Guoqing Jiang
@ 2016-05-04  2:22 ` Guoqing Jiang
  2016-05-04  5:52   ` Guoqing Jiang
  2016-05-04  6:17   ` [Update PATCH] " Guoqing Jiang
  2016-05-04  2:22 ` [PATCH 3/3] md-cluster: check the return value of process_recvd_msg Guoqing Jiang
  2016-05-08 22:12 ` [PATCH 1/3] md: set MD_CHANGE_PENDING in a atomic region Shaohua Li
  2 siblings, 2 replies; 6+ messages in thread
From: Guoqing Jiang @ 2016-05-04  2:22 UTC (permalink / raw)
  To: shli; +Cc: neilb, linux-raid, Guoqing Jiang

The in-memory bitmap is not ready when node joins cluster,
so it doesn't make sense to make gather_all_resync_info()
called so earlier, we need to call it after the node's
bitmap is setup. Also, recv_thread could be wake up after
node joins cluster, but it could cause problem if node
receives RESYNCING message without persionality since
mddev->pers->quiesce is called in process_suspend_info.

This commit introduces a new cluster interface load_bitmaps
to fix above problems, load_bitmaps is called in bitmap_load
where bitmap and persionality are ready, and load_bitmaps
does the following tasks:

1. call gather_all_resync_info to load all the node's
   bitmap info.
2. set MD_CLUSTER_ALREADY_IN_CLUSTER bit to recv_thread
   could be wake up, and wake up recv_thread if there is
   pending recv event.

Then ack_bast only wakes up recv_thread after IN_CLUSTER
bit is ready otherwise MD_CLUSTER_PENDING_RESYNC_EVENT is
set.

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 drivers/md/bitmap.c     |  3 +++
 drivers/md/md-cluster.c | 27 ++++++++++++++++++++++-----
 drivers/md/md-cluster.h |  1 +
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index ad5a858..d8129ec 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1848,6 +1848,9 @@ int bitmap_load(struct mddev *mddev)
 	if (!bitmap)
 		goto out;
 
+	if (mddev_is_clustered(mddev))
+		md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
+
 	/* Clear out old bitmap info first:  Either there is none, or we
 	 * are resuming after someone else has possibly changed things,
 	 * so we should forget old cached info.
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index a55b5f4..bee4085 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -61,6 +61,10 @@ struct resync_info {
  * the lock.
  */
 #define		MD_CLUSTER_SEND_LOCKED_ALREADY		5
+/* We should receive message after node joined cluster and
+ * set up all the related infos such as bitmap and personality */
+#define		MD_CLUSTER_ALREADY_IN_CLUSTER		6
+#define		MD_CLUSTER_PENDING_RESYNC_EVENT		7
 
 
 struct md_cluster_info {
@@ -376,8 +380,11 @@ static void ack_bast(void *arg, int mode)
 	struct dlm_lock_resource *res = arg;
 	struct md_cluster_info *cinfo = res->mddev->cluster_info;
 
-	if (mode == DLM_LOCK_EX)
+	if (mode == DLM_LOCK_EX &&
+	    test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state))
 		md_wakeup_thread(cinfo->recv_thread);
+	else
+		set_bit(MD_CLUSTER_PENDING_RESYNC_EVENT, &cinfo->state);
 }
 
 static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
@@ -846,10 +853,6 @@ static int join(struct mddev *mddev, int nodes)
 	if (!cinfo->resync_lockres)
 		goto err;
 
-	ret = gather_all_resync_info(mddev, nodes);
-	if (ret)
-		goto err;
-
 	return 0;
 err:
 	md_unregister_thread(&cinfo->recovery_thread);
@@ -867,6 +870,19 @@ err:
 	return ret;
 }
 
+static void load_bitmaps(struct mddev *mddev, int total_slots)
+{
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+
+	/* load all the node's bitmap info for resync */
+	if (gather_all_resync_info(mddev, total_slots))
+		pr_err("md-cluster: failed to gather all resyn infos\n");
+	set_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state);
+	/* wake up recv thread in case something need to be handled */
+	if (test_and_clear_bit(MD_CLUSTER_PENDING_RESYNC_EVENT, &cinfo->state))
+		md_wakeup_thread(cinfo->recv_thread);
+}
+
 static void resync_bitmap(struct mddev *mddev)
 {
 	struct md_cluster_info *cinfo = mddev->cluster_info;
@@ -1208,6 +1224,7 @@ static struct md_cluster_operations cluster_ops = {
 	.add_new_disk_cancel = add_new_disk_cancel,
 	.new_disk_ack = new_disk_ack,
 	.remove_disk = remove_disk,
+	.load_bitmaps = load_bitmaps,
 	.gather_bitmaps = gather_bitmaps,
 	.lock_all_bitmaps = lock_all_bitmaps,
 	.unlock_all_bitmaps = unlock_all_bitmaps,
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index 45ce6c9..e765499 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -23,6 +23,7 @@ struct md_cluster_operations {
 	void (*add_new_disk_cancel)(struct mddev *mddev);
 	int (*new_disk_ack)(struct mddev *mddev, bool ack);
 	int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
+	void (*load_bitmaps)(struct mddev *mddev, int total_slots);
 	int (*gather_bitmaps)(struct md_rdev *rdev);
 	int (*lock_all_bitmaps)(struct mddev *mddev);
 	void (*unlock_all_bitmaps)(struct mddev *mddev);
-- 
2.6.6


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3] md-cluster: gather resync infos and enable recv_thread after bitmap is ready
  2016-05-04  2:22 ` [PATCH 2/3] md-cluster: gather resync infos and enable recv_thread after bitmap is ready Guoqing Jiang
@ 2016-05-04  5:52   ` Guoqing Jiang
  2016-05-04  6:17   ` [Update PATCH] " Guoqing Jiang
  1 sibling, 0 replies; 6+ messages in thread
From: Guoqing Jiang @ 2016-05-04  5:52 UTC (permalink / raw)
  To: shli; +Cc: neilb, linux-raid



On 05/03/2016 10:22 PM, Guoqing Jiang wrote:
> The in-memory bitmap is not ready when node joins cluster,
> so it doesn't make sense to make gather_all_resync_info()
> called so earlier, we need to call it after the node's
> bitmap is setup. Also, recv_thread could be wake up after
> node joins cluster, but it could cause problem if node
> receives RESYNCING message without persionality since
> mddev->pers->quiesce is called in process_suspend_info.
>
> This commit introduces a new cluster interface load_bitmaps
> to fix above problems, load_bitmaps is called in bitmap_load
> where bitmap and persionality are ready, and load_bitmaps
> does the following tasks:
>
> 1. call gather_all_resync_info to load all the node's
>     bitmap info.
> 2. set MD_CLUSTER_ALREADY_IN_CLUSTER bit to recv_thread
>     could be wake up, and wake up recv_thread if there is
>     pending recv event.
>
> Then ack_bast only wakes up recv_thread after IN_CLUSTER
> bit is ready otherwise MD_CLUSTER_PENDING_RESYNC_EVENT is
> set.
>
> Reviewed-by: NeilBrown <neilb@suse.com>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
> ---
>   drivers/md/bitmap.c     |  3 +++
>   drivers/md/md-cluster.c | 27 ++++++++++++++++++++++-----
>   drivers/md/md-cluster.h |  1 +
>   3 files changed, 26 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
> index ad5a858..d8129ec 100644
> --- a/drivers/md/bitmap.c
> +++ b/drivers/md/bitmap.c
> @@ -1848,6 +1848,9 @@ int bitmap_load(struct mddev *mddev)
>   	if (!bitmap)
>   		goto out;
>   
> +	if (mddev_is_clustered(mddev))
> +		md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
> +
>   	/* Clear out old bitmap info first:  Either there is none, or we
>   	 * are resuming after someone else has possibly changed things,
>   	 * so we should forget old cached info.
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index a55b5f4..bee4085 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -61,6 +61,10 @@ struct resync_info {
>    * the lock.
>    */
>   #define		MD_CLUSTER_SEND_LOCKED_ALREADY		5
> +/* We should receive message after node joined cluster and
> + * set up all the related infos such as bitmap and personality */
> +#define		MD_CLUSTER_ALREADY_IN_CLUSTER		6
> +#define		MD_CLUSTER_PENDING_RESYNC_EVENT		7
>   
>   
>   struct md_cluster_info {
> @@ -376,8 +380,11 @@ static void ack_bast(void *arg, int mode)
>   	struct dlm_lock_resource *res = arg;
>   	struct md_cluster_info *cinfo = res->mddev->cluster_info;
>   
> -	if (mode == DLM_LOCK_EX)
> +	if (mode == DLM_LOCK_EX &&
> +	    test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state))
>   		md_wakeup_thread(cinfo->recv_thread);
> +	else
> +		set_bit(MD_CLUSTER_PENDING_RESYNC_EVENT, &cinfo->state);
>   }

Hmm, the above should be:

-       if (mode == DLM_LOCK_EX)
-               md_wakeup_thread(cinfo->recv_thread);
+       if (mode == DLM_LOCK_EX) {
+               if (test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state))
+                       md_wakeup_thread(cinfo->recv_thread);
+               else
+                       set_bit(MD_CLUSTER_PENDING_RESYNC_EVENT, 
&cinfo->state);
+       }

I will update the patch.

Thanks,
Guoqing

>   
>   static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
> @@ -846,10 +853,6 @@ static int join(struct mddev *mddev, int nodes)
>   	if (!cinfo->resync_lockres)
>   		goto err;
>   
> -	ret = gather_all_resync_info(mddev, nodes);
> -	if (ret)
> -		goto err;
> -
>   	return 0;
>   err:
>   	md_unregister_thread(&cinfo->recovery_thread);
> @@ -867,6 +870,19 @@ err:
>   	return ret;
>   }
>   
> +static void load_bitmaps(struct mddev *mddev, int total_slots)
> +{
> +	struct md_cluster_info *cinfo = mddev->cluster_info;
> +
> +	/* load all the node's bitmap info for resync */
> +	if (gather_all_resync_info(mddev, total_slots))
> +		pr_err("md-cluster: failed to gather all resyn infos\n");
> +	set_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state);
> +	/* wake up recv thread in case something need to be handled */
> +	if (test_and_clear_bit(MD_CLUSTER_PENDING_RESYNC_EVENT, &cinfo->state))
> +		md_wakeup_thread(cinfo->recv_thread);
> +}
> +
>   static void resync_bitmap(struct mddev *mddev)
>   {
>   	struct md_cluster_info *cinfo = mddev->cluster_info;
> @@ -1208,6 +1224,7 @@ static struct md_cluster_operations cluster_ops = {
>   	.add_new_disk_cancel = add_new_disk_cancel,
>   	.new_disk_ack = new_disk_ack,
>   	.remove_disk = remove_disk,
> +	.load_bitmaps = load_bitmaps,
>   	.gather_bitmaps = gather_bitmaps,
>   	.lock_all_bitmaps = lock_all_bitmaps,
>   	.unlock_all_bitmaps = unlock_all_bitmaps,
> diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
> index 45ce6c9..e765499 100644
> --- a/drivers/md/md-cluster.h
> +++ b/drivers/md/md-cluster.h
> @@ -23,6 +23,7 @@ struct md_cluster_operations {
>   	void (*add_new_disk_cancel)(struct mddev *mddev);
>   	int (*new_disk_ack)(struct mddev *mddev, bool ack);
>   	int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
> +	void (*load_bitmaps)(struct mddev *mddev, int total_slots);
>   	int (*gather_bitmaps)(struct md_rdev *rdev);
>   	int (*lock_all_bitmaps)(struct mddev *mddev);
>   	void (*unlock_all_bitmaps)(struct mddev *mddev);


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Update PATCH] md-cluster: gather resync infos and enable recv_thread after bitmap is ready
  2016-05-04  2:22 ` [PATCH 2/3] md-cluster: gather resync infos and enable recv_thread after bitmap is ready Guoqing Jiang
  2016-05-04  5:52   ` Guoqing Jiang
@ 2016-05-04  6:17   ` Guoqing Jiang
  1 sibling, 0 replies; 6+ messages in thread
From: Guoqing Jiang @ 2016-05-04  6:17 UTC (permalink / raw)
  To: shli; +Cc: neilb, linux-raid, Guoqing Jiang

The in-memory bitmap is not ready when node joins cluster,
so it doesn't make sense to make gather_all_resync_info()
called so earlier, we need to call it after the node's
bitmap is setup. Also, recv_thread could be wake up after
node joins cluster, but it could cause problem if node
receives RESYNCING message without persionality since
mddev->pers->quiesce is called in process_suspend_info.

This commit introduces a new cluster interface load_bitmaps
to fix above problems, load_bitmaps is called in bitmap_load
where bitmap and persionality are ready, and load_bitmaps
does the following tasks:

1. call gather_all_resync_info to load all the node's
   bitmap info.
2. set MD_CLUSTER_ALREADY_IN_CLUSTER bit to recv_thread
   could be wake up, and wake up recv_thread if there is
   pending recv event.

Then ack_bast only wakes up recv_thread after IN_CLUSTER
bit is ready otherwise MD_CLUSTER_PENDING_RESYNC_EVENT is
set.

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
Changes:
1. s/MD_CLUSTER_PENDING_RESYNC_EVENT/MD_CLUSTER_PENDING_RECV_EVENT
2. adjust the logic in ack_bast

 drivers/md/bitmap.c     |  3 +++
 drivers/md/md-cluster.c | 30 ++++++++++++++++++++++++------
 drivers/md/md-cluster.h |  1 +
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index ad5a858..d8129ec 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1848,6 +1848,9 @@ int bitmap_load(struct mddev *mddev)
 	if (!bitmap)
 		goto out;
 
+	if (mddev_is_clustered(mddev))
+		md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
+
 	/* Clear out old bitmap info first:  Either there is none, or we
 	 * are resuming after someone else has possibly changed things,
 	 * so we should forget old cached info.
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index a55b5f4..bef6a47 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -61,6 +61,10 @@ struct resync_info {
  * the lock.
  */
 #define		MD_CLUSTER_SEND_LOCKED_ALREADY		5
+/* We should receive message after node joined cluster and
+ * set up all the related infos such as bitmap and personality */
+#define		MD_CLUSTER_ALREADY_IN_CLUSTER		6
+#define		MD_CLUSTER_PENDING_RECV_EVENT		7
 
 
 struct md_cluster_info {
@@ -376,8 +380,12 @@ static void ack_bast(void *arg, int mode)
 	struct dlm_lock_resource *res = arg;
 	struct md_cluster_info *cinfo = res->mddev->cluster_info;
 
-	if (mode == DLM_LOCK_EX)
-		md_wakeup_thread(cinfo->recv_thread);
+	if (mode == DLM_LOCK_EX) {
+		if (test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state))
+			md_wakeup_thread(cinfo->recv_thread);
+		else
+			set_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state);
+	}
 }
 
 static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
@@ -846,10 +854,6 @@ static int join(struct mddev *mddev, int nodes)
 	if (!cinfo->resync_lockres)
 		goto err;
 
-	ret = gather_all_resync_info(mddev, nodes);
-	if (ret)
-		goto err;
-
 	return 0;
 err:
 	md_unregister_thread(&cinfo->recovery_thread);
@@ -867,6 +871,19 @@ err:
 	return ret;
 }
 
+static void load_bitmaps(struct mddev *mddev, int total_slots)
+{
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+
+	/* load all the node's bitmap info for resync */
+	if (gather_all_resync_info(mddev, total_slots))
+		pr_err("md-cluster: failed to gather all resyn infos\n");
+	set_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state);
+	/* wake up recv thread in case something need to be handled */
+	if (test_and_clear_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state))
+		md_wakeup_thread(cinfo->recv_thread);
+}
+
 static void resync_bitmap(struct mddev *mddev)
 {
 	struct md_cluster_info *cinfo = mddev->cluster_info;
@@ -1208,6 +1225,7 @@ static struct md_cluster_operations cluster_ops = {
 	.add_new_disk_cancel = add_new_disk_cancel,
 	.new_disk_ack = new_disk_ack,
 	.remove_disk = remove_disk,
+	.load_bitmaps = load_bitmaps,
 	.gather_bitmaps = gather_bitmaps,
 	.lock_all_bitmaps = lock_all_bitmaps,
 	.unlock_all_bitmaps = unlock_all_bitmaps,
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index 45ce6c9..e765499 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -23,6 +23,7 @@ struct md_cluster_operations {
 	void (*add_new_disk_cancel)(struct mddev *mddev);
 	int (*new_disk_ack)(struct mddev *mddev, bool ack);
 	int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
+	void (*load_bitmaps)(struct mddev *mddev, int total_slots);
 	int (*gather_bitmaps)(struct md_rdev *rdev);
 	int (*lock_all_bitmaps)(struct mddev *mddev);
 	void (*unlock_all_bitmaps)(struct mddev *mddev);
-- 
2.6.6


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] md-cluster: check the return value of process_recvd_msg
  2016-05-04  2:22 [PATCH 1/3] md: set MD_CHANGE_PENDING in a atomic region Guoqing Jiang
  2016-05-04  2:22 ` [PATCH 2/3] md-cluster: gather resync infos and enable recv_thread after bitmap is ready Guoqing Jiang
@ 2016-05-04  2:22 ` Guoqing Jiang
  2016-05-08 22:12 ` [PATCH 1/3] md: set MD_CHANGE_PENDING in a atomic region Shaohua Li
  2 siblings, 0 replies; 6+ messages in thread
From: Guoqing Jiang @ 2016-05-04  2:22 UTC (permalink / raw)
  To: shli; +Cc: neilb, linux-raid, Guoqing Jiang

We don't need to run the full path of recv_daemon
if process_recvd_msg doesn't return 0.

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 drivers/md/md-cluster.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index bee4085..301207d 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -519,11 +519,13 @@ static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
 			__func__, __LINE__, le32_to_cpu(msg->raid_slot));
 }
 
-static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
+static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
 {
+	int ret = 0;
+
 	if (WARN(mddev->cluster_info->slot_number - 1 == le32_to_cpu(msg->slot),
 		"node %d received it's own msg\n", le32_to_cpu(msg->slot)))
-		return;
+		return -1;
 	switch (le32_to_cpu(msg->type)) {
 	case METADATA_UPDATED:
 		process_metadata_update(mddev, msg);
@@ -546,9 +548,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
 		__recover_slot(mddev, le32_to_cpu(msg->slot));
 		break;
 	default:
+		ret = -1;
 		pr_warn("%s:%d Received unknown message from %d\n",
 			__func__, __LINE__, msg->slot);
 	}
+	return ret;
 }
 
 /*
@@ -572,7 +576,9 @@ static void recv_daemon(struct md_thread *thread)
 
 	/* read lvb and wake up thread to process this message_lockres */
 	memcpy(&msg, message_lockres->lksb.sb_lvbptr, sizeof(struct cluster_msg));
-	process_recvd_msg(thread->mddev, &msg);
+	ret = process_recvd_msg(thread->mddev, &msg);
+	if (ret)
+		goto out;
 
 	/*release CR on ack_lockres*/
 	ret = dlm_unlock_sync(ack_lockres);
@@ -586,6 +592,7 @@ static void recv_daemon(struct md_thread *thread)
 	ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
 	if (unlikely(ret != 0))
 		pr_info("lock CR on ack failed return %d\n", ret);
+out:
 	/*release CR on message_lockres*/
 	ret = dlm_unlock_sync(message_lockres);
 	if (unlikely(ret != 0))
-- 
2.6.6


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] md: set MD_CHANGE_PENDING in a atomic region
  2016-05-04  2:22 [PATCH 1/3] md: set MD_CHANGE_PENDING in a atomic region Guoqing Jiang
  2016-05-04  2:22 ` [PATCH 2/3] md-cluster: gather resync infos and enable recv_thread after bitmap is ready Guoqing Jiang
  2016-05-04  2:22 ` [PATCH 3/3] md-cluster: check the return value of process_recvd_msg Guoqing Jiang
@ 2016-05-08 22:12 ` Shaohua Li
  2 siblings, 0 replies; 6+ messages in thread
From: Shaohua Li @ 2016-05-08 22:12 UTC (permalink / raw)
  To: Guoqing Jiang
  Cc: neilb, linux-raid, Martin Kepplinger, Andrew Morton,
	Denys Vlasenko, Sasha Levin, linux-kernel

On Tue, May 03, 2016 at 10:22:13PM -0400, Guoqing Jiang wrote:
> Some code waits for a metadata update by:
> 
> 1. flagging that it is needed (MD_CHANGE_DEVS or MD_CHANGE_CLEAN)
> 2. setting MD_CHANGE_PENDING and waking the management thread
> 3. waiting for MD_CHANGE_PENDING to be cleared
> 
> If the first two are done without locking, the code in md_update_sb()
> which checks if it needs to repeat might test if an update is needed
> before step 1, then clear MD_CHANGE_PENDING after step 2, resulting
> in the wait returning early.
> 
> So make sure all places that set MD_CHANGE_PENDING are atomicial, and
> bit_clear_unless (suggested by Neil) is introduced for the purpose.

Applied the 3, thanks!

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-05-08 22:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-05-04  2:22 [PATCH 1/3] md: set MD_CHANGE_PENDING in a atomic region Guoqing Jiang
2016-05-04  2:22 ` [PATCH 2/3] md-cluster: gather resync infos and enable recv_thread after bitmap is ready Guoqing Jiang
2016-05-04  5:52   ` Guoqing Jiang
2016-05-04  6:17   ` [Update PATCH] " Guoqing Jiang
2016-05-04  2:22 ` [PATCH 3/3] md-cluster: check the return value of process_recvd_msg Guoqing Jiang
2016-05-08 22:12 ` [PATCH 1/3] md: set MD_CHANGE_PENDING in a atomic region Shaohua Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.