- * [PATCH 05/12] md-cluster: init completion within lockres_init
  2015-07-10  9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
@ 2015-07-10  9:01   ` Guoqing Jiang
  2015-07-27 16:44     ` Goldwyn Rodrigues
  2015-07-10  9:01   ` [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock Guoqing Jiang
                     ` (7 subsequent siblings)
  8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10  9:01 UTC (permalink / raw)
  To: neilb; +Cc: rgoldwyn, linux-raid
We should init completion within lockres_init, otherwise
completion could be initialized more than one time during
it's life cycle.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 drivers/md/md-cluster.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 85b7836..2a57f19 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -100,7 +100,6 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
 {
 	int ret = 0;
 
-	init_completion(&res->completion);
 	ret = dlm_lock(res->ls, mode, &res->lksb,
 			res->flags, res->name, strlen(res->name),
 			0, sync_ast, res, res->bast);
@@ -125,6 +124,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
 	res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
 	if (!res)
 		return NULL;
+	init_completion(&res->completion);
 	res->ls = cinfo->lockspace;
 	res->mddev = mddev;
 	namelen = strlen(name);
@@ -169,7 +169,6 @@ static void lockres_free(struct dlm_lock_resource *res)
 	if (!res)
 		return;
 
-	init_completion(&res->completion);
 	dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
 	wait_for_completion(&res->completion);
 
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 29+ messages in thread
- * Re: [PATCH 05/12] md-cluster: init completion within lockres_init
  2015-07-10  9:01   ` [PATCH 05/12] md-cluster: init completion within lockres_init Guoqing Jiang
@ 2015-07-27 16:44     ` Goldwyn Rodrigues
  0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:44 UTC (permalink / raw)
  To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> We should init completion within lockres_init, otherwise
> completion could be initialized more than one time during
> it's life cycle.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>   drivers/md/md-cluster.c | 3 +--
>   1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 85b7836..2a57f19 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -100,7 +100,6 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
>   {
>   	int ret = 0;
>
> -	init_completion(&res->completion);
>   	ret = dlm_lock(res->ls, mode, &res->lksb,
>   			res->flags, res->name, strlen(res->name),
>   			0, sync_ast, res, res->bast);
> @@ -125,6 +124,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
>   	res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
>   	if (!res)
>   		return NULL;
> +	init_completion(&res->completion);
>   	res->ls = cinfo->lockspace;
>   	res->mddev = mddev;
>   	namelen = strlen(name);
> @@ -169,7 +169,6 @@ static void lockres_free(struct dlm_lock_resource *res)
>   	if (!res)
>   		return;
>
> -	init_completion(&res->completion);
>   	dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
>   	wait_for_completion(&res->completion);
>
>
-- 
Goldwyn
^ permalink raw reply	[flat|nested] 29+ messages in thread
 
- * [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
  2015-07-10  9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
  2015-07-10  9:01   ` [PATCH 05/12] md-cluster: init completion within lockres_init Guoqing Jiang
@ 2015-07-10  9:01   ` Guoqing Jiang
  2015-07-27 16:48     ` Goldwyn Rodrigues
  2015-07-10  9:01   ` [PATCH 07/12] md-cluster: init suspend_list and suspend_lock early in join Guoqing Jiang
                     ` (6 subsequent siblings)
  8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10  9:01 UTC (permalink / raw)
  To: neilb; +Cc: rgoldwyn, linux-raid
In complicated cluster environment, it is possible that the
dlm lock couldn't be get/convert on purpose, the related err
info is added for better debug potential issue.
For lockres_free, if the lock is blocking by a lock request or
conversion request, then dlm_unlock just put it back to grant
queue, so need to ensure the lock is free finally.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 drivers/md/md-cluster.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 2a57f19..b80a689 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -166,10 +166,24 @@ out_err:
 
 static void lockres_free(struct dlm_lock_resource *res)
 {
+	int ret;
+
 	if (!res)
 		return;
 
-	dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
+	/* cancel a lock request or a conversion request that is blocked */
+	res->flags |= DLM_LKF_CANCEL;
+retry:
+	ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
+	if (unlikely(ret != 0)) {
+		pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret);
+
+		/* if a lock conversion is cancelled, then the lock is put
+		 * back to grant queue, need to ensure it is unlocked */
+		if (ret == -DLM_ECANCEL)
+			goto retry;
+	}
+	res->flags &= ~DLM_LKF_CANCEL;
 	wait_for_completion(&res->completion);
 
 	kfree(res->name);
@@ -474,6 +488,7 @@ static void recv_daemon(struct md_thread *thread)
 	struct dlm_lock_resource *ack_lockres = cinfo->ack_lockres;
 	struct dlm_lock_resource *message_lockres = cinfo->message_lockres;
 	struct cluster_msg msg;
+	int ret;
 
 	/*get CR on Message*/
 	if (dlm_lock_sync(message_lockres, DLM_LOCK_CR)) {
@@ -486,13 +501,21 @@ static void recv_daemon(struct md_thread *thread)
 	process_recvd_msg(thread->mddev, &msg);
 
 	/*release CR on ack_lockres*/
-	dlm_unlock_sync(ack_lockres);
+	ret = dlm_unlock_sync(ack_lockres);
+	if (unlikely(ret != 0))
+		pr_info("unlock ack failed return %d\n", ret);
 	/*up-convert to PR on message_lockres*/
-	dlm_lock_sync(message_lockres, DLM_LOCK_PR);
+	ret = dlm_lock_sync(message_lockres, DLM_LOCK_PR);
+	if (unlikely(ret != 0))
+		pr_info("lock PR on msg failed return %d\n", ret);
 	/*get CR on ack_lockres again*/
-	dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
+	ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
+	if (unlikely(ret != 0))
+		pr_info("lock CR on ack failed return %d\n", ret);
 	/*release CR on message_lockres*/
-	dlm_unlock_sync(message_lockres);
+	ret = dlm_unlock_sync(message_lockres);
+	if (unlikely(ret != 0))
+		pr_info("unlock msg failed return %d\n", ret);
 }
 
 /* lock_comm()
@@ -567,7 +590,13 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 	}
 
 failed_ack:
-	dlm_unlock_sync(cinfo->message_lockres);
+	error = dlm_unlock_sync(cinfo->message_lockres);
+	if (unlikely(error != 0)) {
+		pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
+			error);
+		/* in case the message can't be released due to some reason */
+		goto failed_ack;
+	}
 failed_message:
 	return error;
 }
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 29+ messages in thread
- * Re: [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
  2015-07-10  9:01   ` [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock Guoqing Jiang
@ 2015-07-27 16:48     ` Goldwyn Rodrigues
  2015-07-28  3:04       ` Guoqing Jiang
  0 siblings, 1 reply; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:48 UTC (permalink / raw)
  To: Guoqing Jiang, neilb; +Cc: linux-raid
Hi Guoqing,
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> In complicated cluster environment, it is possible that the
> dlm lock couldn't be get/convert on purpose, the related err
> info is added for better debug potential issue.
>
> For lockres_free, if the lock is blocking by a lock request or
> conversion request, then dlm_unlock just put it back to grant
> queue, so need to ensure the lock is free finally.
I cannot think of a scenario where a DLM_CANCEL will be returned. Could 
you explain the situation a bit more?
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
> ---
>   drivers/md/md-cluster.c | 41 +++++++++++++++++++++++++++++++++++------
>   1 file changed, 35 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 2a57f19..b80a689 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -166,10 +166,24 @@ out_err:
>
>   static void lockres_free(struct dlm_lock_resource *res)
>   {
> +	int ret;
> +
>   	if (!res)
>   		return;
>
> -	dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
> +	/* cancel a lock request or a conversion request that is blocked */
> +	res->flags |= DLM_LKF_CANCEL;
> +retry:
> +	ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
> +	if (unlikely(ret != 0)) {
> +		pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret);
> +
> +		/* if a lock conversion is cancelled, then the lock is put
> +		 * back to grant queue, need to ensure it is unlocked */
> +		if (ret == -DLM_ECANCEL)
> +			goto retry;
> +	}
> +	res->flags &= ~DLM_LKF_CANCEL;
>   	wait_for_completion(&res->completion);
>
>   	kfree(res->name);
> @@ -474,6 +488,7 @@ static void recv_daemon(struct md_thread *thread)
>   	struct dlm_lock_resource *ack_lockres = cinfo->ack_lockres;
>   	struct dlm_lock_resource *message_lockres = cinfo->message_lockres;
>   	struct cluster_msg msg;
> +	int ret;
>
>   	/*get CR on Message*/
>   	if (dlm_lock_sync(message_lockres, DLM_LOCK_CR)) {
> @@ -486,13 +501,21 @@ static void recv_daemon(struct md_thread *thread)
>   	process_recvd_msg(thread->mddev, &msg);
>
>   	/*release CR on ack_lockres*/
> -	dlm_unlock_sync(ack_lockres);
> +	ret = dlm_unlock_sync(ack_lockres);
> +	if (unlikely(ret != 0))
> +		pr_info("unlock ack failed return %d\n", ret);
>   	/*up-convert to PR on message_lockres*/
> -	dlm_lock_sync(message_lockres, DLM_LOCK_PR);
> +	ret = dlm_lock_sync(message_lockres, DLM_LOCK_PR);
> +	if (unlikely(ret != 0))
> +		pr_info("lock PR on msg failed return %d\n", ret);
>   	/*get CR on ack_lockres again*/
> -	dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
> +	ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
> +	if (unlikely(ret != 0))
> +		pr_info("lock CR on ack failed return %d\n", ret);
>   	/*release CR on message_lockres*/
> -	dlm_unlock_sync(message_lockres);
> +	ret = dlm_unlock_sync(message_lockres);
> +	if (unlikely(ret != 0))
> +		pr_info("unlock msg failed return %d\n", ret);
>   }
>
>   /* lock_comm()
> @@ -567,7 +590,13 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
>   	}
>
>   failed_ack:
> -	dlm_unlock_sync(cinfo->message_lockres);
> +	error = dlm_unlock_sync(cinfo->message_lockres);
> +	if (unlikely(error != 0)) {
> +		pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
> +			error);
> +		/* in case the message can't be released due to some reason */
> +		goto failed_ack;
> +	}
>   failed_message:
>   	return error;
>   }
>
-- 
Goldwyn
^ permalink raw reply	[flat|nested] 29+ messages in thread
- * Re: [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
  2015-07-27 16:48     ` Goldwyn Rodrigues
@ 2015-07-28  3:04       ` Guoqing Jiang
  2015-07-29  0:22         ` NeilBrown
  2015-07-29 23:39         ` Goldwyn Rodrigues
  0 siblings, 2 replies; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-28  3:04 UTC (permalink / raw)
  To: Goldwyn Rodrigues; +Cc: neilb, linux-raid
Hi Goldwyn,
Goldwyn Rodrigues wrote:
> Hi Guoqing,
>
> On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
>> In complicated cluster environment, it is possible that the
>> dlm lock couldn't be get/convert on purpose, the related err
>> info is added for better debug potential issue.
>>
>> For lockres_free, if the lock is blocking by a lock request or
>> conversion request, then dlm_unlock just put it back to grant
>> queue, so need to ensure the lock is free finally.
>
>
> I cannot think of a scenario where a DLM_CANCEL will be returned.
> Could you explain the situation a bit more?
>
Thanks for the review. When the node is receiving message where it needs
to convert message
lock, and lockres_free is invoked if user stop array meanwhile, then the
message lock is put back
to grant queue at the CR mode and message lock is not released, am I
misunderstood the case?
Thanks,
Guoqing
^ permalink raw reply	[flat|nested] 29+ messages in thread 
- * Re: [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
  2015-07-28  3:04       ` Guoqing Jiang
@ 2015-07-29  0:22         ` NeilBrown
  2015-07-29  2:03           ` Guoqing Jiang
  2015-07-29 23:39         ` Goldwyn Rodrigues
  1 sibling, 1 reply; 29+ messages in thread
From: NeilBrown @ 2015-07-29  0:22 UTC (permalink / raw)
  To: Guoqing Jiang; +Cc: Goldwyn Rodrigues, linux-raid
On Tue, 28 Jul 2015 11:04:31 +0800 Guoqing Jiang <gqJiang@suse.com>
wrote:
> Hi Goldwyn,
> 
> Goldwyn Rodrigues wrote:
> > Hi Guoqing,
> >
> > On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> >> In complicated cluster environment, it is possible that the
> >> dlm lock couldn't be get/convert on purpose, the related err
> >> info is added for better debug potential issue.
> >>
> >> For lockres_free, if the lock is blocking by a lock request or
> >> conversion request, then dlm_unlock just put it back to grant
> >> queue, so need to ensure the lock is free finally.
> >
> >
> > I cannot think of a scenario where a DLM_CANCEL will be returned.
> > Could you explain the situation a bit more?
> >
> Thanks for the review. When the node is receiving message where it needs
> to convert message
> lock, and lockres_free is invoked if user stop array meanwhile, then the
> message lock is put back
> to grant queue at the CR mode and message lock is not released, am I
> misunderstood the case?
> 
> Thanks,
> Guoqing
Thanks for these patches Guoqing and for the review Goldwyn.
I've queued all but the last one (which you both agree we don't need).
I have queued this one too for now, though it Goldwyn has further
comments I'd be very happy to see them.
I'm a bit bothered by:
+retry:
+	ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
+	if (unlikely(ret != 0)) {
+		pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret);
+
+		/* if a lock conversion is cancelled, then the lock is put
+		 * back to grant queue, need to ensure it is unlocked */
+		if (ret == -DLM_ECANCEL)
+			goto retry;
+	}
which looks like it could loop repeatedly.  Maybe if there was
documentation for DLM_LKF_CANCEL or DLM_ECANCEL somewhere, that might
make me feel more comfortable, but the only documentation that I can
find is the code ... is there real documentation that google doesn't
find for me?
Thanks,
NeilBrown
^ permalink raw reply	[flat|nested] 29+ messages in thread
- * Re: [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
  2015-07-29  0:22         ` NeilBrown
@ 2015-07-29  2:03           ` Guoqing Jiang
  0 siblings, 0 replies; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-29  2:03 UTC (permalink / raw)
  To: NeilBrown; +Cc: Goldwyn Rodrigues, linux-raid
NeilBrown wrote:
> On Tue, 28 Jul 2015 11:04:31 +0800 Guoqing Jiang <gqJiang@suse.com>
> wrote:
>
>   
>> Hi Goldwyn,
>>
>> Goldwyn Rodrigues wrote:
>>     
>>> Hi Guoqing,
>>>
>>> On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
>>>       
>>>> In complicated cluster environment, it is possible that the
>>>> dlm lock couldn't be get/convert on purpose, the related err
>>>> info is added for better debug potential issue.
>>>>
>>>> For lockres_free, if the lock is blocking by a lock request or
>>>> conversion request, then dlm_unlock just put it back to grant
>>>> queue, so need to ensure the lock is free finally.
>>>>         
>>> I cannot think of a scenario where a DLM_CANCEL will be returned.
>>> Could you explain the situation a bit more?
>>>
>>>       
>> Thanks for the review. When the node is receiving message where it needs
>> to convert message
>> lock, and lockres_free is invoked if user stop array meanwhile, then the
>> message lock is put back
>> to grant queue at the CR mode and message lock is not released, am I
>> misunderstood the case?
>>
>> Thanks,
>> Guoqing
>>     
>
> Thanks for these patches Guoqing and for the review Goldwyn.
>
> I've queued all but the last one (which you both agree we don't need).
>
> I have queued this one too for now, though it Goldwyn has further
> comments I'd be very happy to see them.
>
> I'm a bit bothered by:
> +retry:
> +	ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
> +	if (unlikely(ret != 0)) {
> +		pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret);
> +
> +		/* if a lock conversion is cancelled, then the lock is put
> +		 * back to grant queue, need to ensure it is unlocked */
> +		if (ret == -DLM_ECANCEL)
> +			goto retry;
> +	}
>
> which looks like it could loop repeatedly.  Maybe if there was
> documentation for DLM_LKF_CANCEL or DLM_ECANCEL somewhere, that might
> make me feel more comfortable, but the only documentation that I can
> find is the code ... is there real documentation that google doesn't
> find for me?
>
>   
Hi Neil,
Please see the people.redhat.com/ccaulfie/docs/rhdlmbook.pdf, I guess it
is the only
official book about DLM. And for the above code, I am mostly referred to
page 37-38
which described dlm_unlock API.
BTW: do you have time to review the following other three patches for
mdadm? Thanks.
    http://www.spinics.net/lists/raid/msg49356.html
    http://www.spinics.net/lists/raid/msg49357.html
    http://www.spinics.net/lists/raid/msg49358.html
Best Regards,
Guoqing
^ permalink raw reply	[flat|nested] 29+ messages in thread
 
- * Re: [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
  2015-07-28  3:04       ` Guoqing Jiang
  2015-07-29  0:22         ` NeilBrown
@ 2015-07-29 23:39         ` Goldwyn Rodrigues
  1 sibling, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-29 23:39 UTC (permalink / raw)
  To: Guoqing Jiang; +Cc: neilb, linux-raid
On 07/27/2015 10:04 PM, Guoqing Jiang wrote:
> Hi Goldwyn,
>
> Goldwyn Rodrigues wrote:
>> Hi Guoqing,
>>
>> On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
>>> In complicated cluster environment, it is possible that the
>>> dlm lock couldn't be get/convert on purpose, the related err
>>> info is added for better debug potential issue.
>>>
>>> For lockres_free, if the lock is blocking by a lock request or
>>> conversion request, then dlm_unlock just put it back to grant
>>> queue, so need to ensure the lock is free finally.
>>
>>
>> I cannot think of a scenario where a DLM_CANCEL will be returned.
>> Could you explain the situation a bit more?
>>
> Thanks for the review. When the node is receiving message where it needs
> to convert message
> lock, and lockres_free is invoked if user stop array meanwhile, then the
> message lock is put back
> to grant queue at the CR mode and message lock is not released, am I
> misunderstood the case?
>
In that case, this looks correct.
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
-- 
Goldwyn
^ permalink raw reply	[flat|nested] 29+ messages in thread 
 
 
 
- * [PATCH 07/12] md-cluster: init suspend_list and suspend_lock early in join
  2015-07-10  9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
  2015-07-10  9:01   ` [PATCH 05/12] md-cluster: init completion within lockres_init Guoqing Jiang
  2015-07-10  9:01   ` [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock Guoqing Jiang
@ 2015-07-10  9:01   ` Guoqing Jiang
  2015-07-27 16:29     ` Goldwyn Rodrigues
  2015-07-10  9:01   ` [PATCH 08/12] md-cluster: remove the unused sb_lock Guoqing Jiang
                     ` (5 subsequent siblings)
  8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10  9:01 UTC (permalink / raw)
  To: neilb; +Cc: rgoldwyn, linux-raid
If the node just join the cluster, and receive the msg from other nodes
before init suspend_list, it will cause kernel crash due to NULL pointer
dereference, so move the initializations early to fix the bug.
md-cluster: Joined cluster 3578507b-e0cb-6d4f-6322-696cd7b1b10c slot 3
BUG: unable to handle kernel NULL pointer dereference at           (null)
... ... ...
Call Trace:
[<ffffffffa0444924>] process_recvd_msg+0x2e4/0x330 [md_cluster]
[<ffffffffa0444a06>] recv_daemon+0x96/0x170 [md_cluster]
[<ffffffffa045189d>] md_thread+0x11d/0x170 [md_mod]
[<ffffffff810768c4>] kthread+0xb4/0xc0
[<ffffffff8151927c>] ret_from_fork+0x7c/0xb0
... ... ...
RIP  [<ffffffffa0443581>] __remove_suspend_info+0x11/0xa0 [md_cluster]
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 drivers/md/md-cluster.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index b80a689..6f1ea3c 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -671,6 +671,8 @@ static int join(struct mddev *mddev, int nodes)
 	if (!cinfo)
 		return -ENOMEM;
 
+	INIT_LIST_HEAD(&cinfo->suspend_list);
+	spin_lock_init(&cinfo->suspend_lock);
 	init_completion(&cinfo->completion);
 
 	mutex_init(&cinfo->sb_mutex);
@@ -736,9 +738,6 @@ static int join(struct mddev *mddev, int nodes)
 		goto err;
 	}
 
-	INIT_LIST_HEAD(&cinfo->suspend_list);
-	spin_lock_init(&cinfo->suspend_lock);
-
 	ret = gather_all_resync_info(mddev, nodes);
 	if (ret)
 		goto err;
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 29+ messages in thread
- * Re: [PATCH 07/12] md-cluster: init suspend_list and suspend_lock early in join
  2015-07-10  9:01   ` [PATCH 07/12] md-cluster: init suspend_list and suspend_lock early in join Guoqing Jiang
@ 2015-07-27 16:29     ` Goldwyn Rodrigues
  0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:29 UTC (permalink / raw)
  To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> If the node just join the cluster, and receive the msg from other nodes
> before init suspend_list, it will cause kernel crash due to NULL pointer
> dereference, so move the initializations early to fix the bug.
>
> md-cluster: Joined cluster 3578507b-e0cb-6d4f-6322-696cd7b1b10c slot 3
> BUG: unable to handle kernel NULL pointer dereference at           (null)
> ... ... ...
> Call Trace:
> [<ffffffffa0444924>] process_recvd_msg+0x2e4/0x330 [md_cluster]
> [<ffffffffa0444a06>] recv_daemon+0x96/0x170 [md_cluster]
> [<ffffffffa045189d>] md_thread+0x11d/0x170 [md_mod]
> [<ffffffff810768c4>] kthread+0xb4/0xc0
> [<ffffffff8151927c>] ret_from_fork+0x7c/0xb0
> ... ... ...
> RIP  [<ffffffffa0443581>] __remove_suspend_info+0x11/0xa0 [md_cluster]
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>   drivers/md/md-cluster.c | 5 ++---
>   1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index b80a689..6f1ea3c 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -671,6 +671,8 @@ static int join(struct mddev *mddev, int nodes)
>   	if (!cinfo)
>   		return -ENOMEM;
>
> +	INIT_LIST_HEAD(&cinfo->suspend_list);
> +	spin_lock_init(&cinfo->suspend_lock);
>   	init_completion(&cinfo->completion);
>
>   	mutex_init(&cinfo->sb_mutex);
> @@ -736,9 +738,6 @@ static int join(struct mddev *mddev, int nodes)
>   		goto err;
>   	}
>
> -	INIT_LIST_HEAD(&cinfo->suspend_list);
> -	spin_lock_init(&cinfo->suspend_lock);
> -
>   	ret = gather_all_resync_info(mddev, nodes);
>   	if (ret)
>   		goto err;
>
-- 
Goldwyn
^ permalink raw reply	[flat|nested] 29+ messages in thread 
 
- * [PATCH 08/12] md-cluster: remove the unused sb_lock
  2015-07-10  9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
                     ` (2 preceding siblings ...)
  2015-07-10  9:01   ` [PATCH 07/12] md-cluster: init suspend_list and suspend_lock early in join Guoqing Jiang
@ 2015-07-10  9:01   ` Guoqing Jiang
  2015-07-27 16:29     ` Goldwyn Rodrigues
  2015-07-10  9:01   ` [PATCH 09/12] md-cluster: add missed lockres_free Guoqing Jiang
                     ` (4 subsequent siblings)
  8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10  9:01 UTC (permalink / raw)
  To: neilb; +Cc: rgoldwyn, linux-raid
The sb_lock is not used anywhere, so let's remove it.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 drivers/md/md-cluster.c | 9 ---------
 1 file changed, 9 deletions(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 6f1ea3c..057a973 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -52,7 +52,6 @@ struct md_cluster_info {
 	dlm_lockspace_t *lockspace;
 	int slot_number;
 	struct completion completion;
-	struct dlm_lock_resource *sb_lock;
 	struct mutex sb_mutex;
 	struct dlm_lock_resource *bitmap_lockres;
 	struct list_head suspend_list;
@@ -692,12 +691,6 @@ static int join(struct mddev *mddev, int nodes)
 		ret = -ERANGE;
 		goto err;
 	}
-	cinfo->sb_lock = lockres_init(mddev, "cmd-super",
-					NULL, 0);
-	if (!cinfo->sb_lock) {
-		ret = -ENOMEM;
-		goto err;
-	}
 	/* Initiate the communication resources */
 	ret = -ENOMEM;
 	cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
@@ -749,7 +742,6 @@ err:
 	lockres_free(cinfo->ack_lockres);
 	lockres_free(cinfo->no_new_dev_lockres);
 	lockres_free(cinfo->bitmap_lockres);
-	lockres_free(cinfo->sb_lock);
 	if (cinfo->lockspace)
 		dlm_release_lockspace(cinfo->lockspace, 2);
 	mddev->cluster_info = NULL;
@@ -770,7 +762,6 @@ static int leave(struct mddev *mddev)
 	lockres_free(cinfo->token_lockres);
 	lockres_free(cinfo->ack_lockres);
 	lockres_free(cinfo->no_new_dev_lockres);
-	lockres_free(cinfo->sb_lock);
 	lockres_free(cinfo->bitmap_lockres);
 	dlm_release_lockspace(cinfo->lockspace, 2);
 	return 0;
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 29+ messages in thread
- * Re: [PATCH 08/12] md-cluster: remove the unused sb_lock
  2015-07-10  9:01   ` [PATCH 08/12] md-cluster: remove the unused sb_lock Guoqing Jiang
@ 2015-07-27 16:29     ` Goldwyn Rodrigues
  0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:29 UTC (permalink / raw)
  To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> The sb_lock is not used anywhere, so let's remove it.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Code cleanup.
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>   drivers/md/md-cluster.c | 9 ---------
>   1 file changed, 9 deletions(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 6f1ea3c..057a973 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -52,7 +52,6 @@ struct md_cluster_info {
>   	dlm_lockspace_t *lockspace;
>   	int slot_number;
>   	struct completion completion;
> -	struct dlm_lock_resource *sb_lock;
>   	struct mutex sb_mutex;
>   	struct dlm_lock_resource *bitmap_lockres;
>   	struct list_head suspend_list;
> @@ -692,12 +691,6 @@ static int join(struct mddev *mddev, int nodes)
>   		ret = -ERANGE;
>   		goto err;
>   	}
> -	cinfo->sb_lock = lockres_init(mddev, "cmd-super",
> -					NULL, 0);
> -	if (!cinfo->sb_lock) {
> -		ret = -ENOMEM;
> -		goto err;
> -	}
>   	/* Initiate the communication resources */
>   	ret = -ENOMEM;
>   	cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
> @@ -749,7 +742,6 @@ err:
>   	lockres_free(cinfo->ack_lockres);
>   	lockres_free(cinfo->no_new_dev_lockres);
>   	lockres_free(cinfo->bitmap_lockres);
> -	lockres_free(cinfo->sb_lock);
>   	if (cinfo->lockspace)
>   		dlm_release_lockspace(cinfo->lockspace, 2);
>   	mddev->cluster_info = NULL;
> @@ -770,7 +762,6 @@ static int leave(struct mddev *mddev)
>   	lockres_free(cinfo->token_lockres);
>   	lockres_free(cinfo->ack_lockres);
>   	lockres_free(cinfo->no_new_dev_lockres);
> -	lockres_free(cinfo->sb_lock);
>   	lockres_free(cinfo->bitmap_lockres);
>   	dlm_release_lockspace(cinfo->lockspace, 2);
>   	return 0;
>
-- 
Goldwyn
^ permalink raw reply	[flat|nested] 29+ messages in thread
 
- * [PATCH 09/12] md-cluster: add missed lockres_free
  2015-07-10  9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
                     ` (3 preceding siblings ...)
  2015-07-10  9:01   ` [PATCH 08/12] md-cluster: remove the unused sb_lock Guoqing Jiang
@ 2015-07-10  9:01   ` Guoqing Jiang
  2015-07-27 16:30     ` Goldwyn Rodrigues
  2015-07-10  9:01   ` [PATCH 10/12] md-cluster: only call complete(&cinfo->completion) when node join cluster Guoqing Jiang
                     ` (3 subsequent siblings)
  8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10  9:01 UTC (permalink / raw)
  To: neilb; +Cc: rgoldwyn, linux-raid
We also need to free the lock resource before goto out.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 drivers/md/md-cluster.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 057a973..411b430 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -647,8 +647,10 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
 			lockres_free(bm_lockres);
 			continue;
 		}
-		if (ret)
+		if (ret) {
+			lockres_free(bm_lockres);
 			goto out;
+		}
 		/* TODO: Read the disk bitmap sb and check if it needs recovery */
 		dlm_unlock_sync(bm_lockres);
 		lockres_free(bm_lockres);
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 29+ messages in thread
- * Re: [PATCH 09/12] md-cluster: add missed lockres_free
  2015-07-10  9:01   ` [PATCH 09/12] md-cluster: add missed lockres_free Guoqing Jiang
@ 2015-07-27 16:30     ` Goldwyn Rodrigues
  0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:30 UTC (permalink / raw)
  To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> We also need to free the lock resource before goto out.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>   drivers/md/md-cluster.c | 4 +++-
>   1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 057a973..411b430 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -647,8 +647,10 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
>   			lockres_free(bm_lockres);
>   			continue;
>   		}
> -		if (ret)
> +		if (ret) {
> +			lockres_free(bm_lockres);
>   			goto out;
> +		}
>   		/* TODO: Read the disk bitmap sb and check if it needs recovery */
>   		dlm_unlock_sync(bm_lockres);
>   		lockres_free(bm_lockres);
>
-- 
Goldwyn
^ permalink raw reply	[flat|nested] 29+ messages in thread
 
- * [PATCH 10/12] md-cluster: only call complete(&cinfo->completion) when node join cluster
  2015-07-10  9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
                     ` (4 preceding siblings ...)
  2015-07-10  9:01   ` [PATCH 09/12] md-cluster: add missed lockres_free Guoqing Jiang
@ 2015-07-10  9:01   ` Guoqing Jiang
  2015-07-27 16:49     ` Goldwyn Rodrigues
  2015-07-10  9:01   ` [PATCH 11/12] md-cluster: Read the disk bitmap sb and check if it needs recovery Guoqing Jiang
                     ` (2 subsequent siblings)
  8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10  9:01 UTC (permalink / raw)
  To: neilb; +Cc: rgoldwyn, linux-raid
Introduce MD_CLUSTER_BEGIN_JOIN_CLUSTER flag to make sure
complete(&cinfo->completion) is only be invoked when node
join cluster. Otherwise node failure could also call the
complete, and it doesn't make sense to do it.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 drivers/md/md-cluster.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 411b430..29f65e2 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -45,6 +45,7 @@ struct resync_info {
 /* md_cluster_info flags */
 #define		MD_CLUSTER_WAITING_FOR_NEWDISK		1
 #define		MD_CLUSTER_SUSPEND_READ_BALANCING	2
+#define		MD_CLUSTER_BEGIN_JOIN_CLUSTER		3
 
 
 struct md_cluster_info {
@@ -320,10 +321,17 @@ static void recover_done(void *arg, struct dlm_slot *slots,
 	struct md_cluster_info *cinfo = mddev->cluster_info;
 
 	cinfo->slot_number = our_slot;
-	complete(&cinfo->completion);
+	/* completion is only need to be complete when node join cluster,
+	 * it doesn't need to run during another node's failure */
+	if (test_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state)) {
+		complete(&cinfo->completion);
+		clear_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
+	}
 	clear_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
 }
 
+/* the ops is called when node join the cluster, and do lock recovery
+ * if node failure occurs */
 static const struct dlm_lockspace_ops md_ls_ops = {
 	.recover_prep = recover_prep,
 	.recover_slot = recover_slot,
@@ -675,6 +683,7 @@ static int join(struct mddev *mddev, int nodes)
 	INIT_LIST_HEAD(&cinfo->suspend_list);
 	spin_lock_init(&cinfo->suspend_lock);
 	init_completion(&cinfo->completion);
+	set_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
 
 	mutex_init(&cinfo->sb_mutex);
 	mddev->cluster_info = cinfo;
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 29+ messages in thread
- * Re: [PATCH 10/12] md-cluster: only call complete(&cinfo->completion) when node join cluster
  2015-07-10  9:01   ` [PATCH 10/12] md-cluster: only call complete(&cinfo->completion) when node join cluster Guoqing Jiang
@ 2015-07-27 16:49     ` Goldwyn Rodrigues
  0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:49 UTC (permalink / raw)
  To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> Introduce MD_CLUSTER_BEGIN_JOIN_CLUSTER flag to make sure
> complete(&cinfo->completion) is only be invoked when node
> join cluster. Otherwise node failure could also call the
> complete, and it doesn't make sense to do it.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>   drivers/md/md-cluster.c | 11 ++++++++++-
>   1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 411b430..29f65e2 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -45,6 +45,7 @@ struct resync_info {
>   /* md_cluster_info flags */
>   #define		MD_CLUSTER_WAITING_FOR_NEWDISK		1
>   #define		MD_CLUSTER_SUSPEND_READ_BALANCING	2
> +#define		MD_CLUSTER_BEGIN_JOIN_CLUSTER		3
>
>
>   struct md_cluster_info {
> @@ -320,10 +321,17 @@ static void recover_done(void *arg, struct dlm_slot *slots,
>   	struct md_cluster_info *cinfo = mddev->cluster_info;
>
>   	cinfo->slot_number = our_slot;
> -	complete(&cinfo->completion);
> +	/* completion is only need to be complete when node join cluster,
> +	 * it doesn't need to run during another node's failure */
> +	if (test_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state)) {
> +		complete(&cinfo->completion);
> +		clear_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
> +	}
>   	clear_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
>   }
>
> +/* the ops is called when node join the cluster, and do lock recovery
> + * if node failure occurs */
>   static const struct dlm_lockspace_ops md_ls_ops = {
>   	.recover_prep = recover_prep,
>   	.recover_slot = recover_slot,
> @@ -675,6 +683,7 @@ static int join(struct mddev *mddev, int nodes)
>   	INIT_LIST_HEAD(&cinfo->suspend_list);
>   	spin_lock_init(&cinfo->suspend_lock);
>   	init_completion(&cinfo->completion);
> +	set_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
>
>   	mutex_init(&cinfo->sb_mutex);
>   	mddev->cluster_info = cinfo;
>
-- 
Goldwyn
^ permalink raw reply	[flat|nested] 29+ messages in thread
 
- * [PATCH 11/12] md-cluster: Read the disk bitmap sb and check if it needs recovery
  2015-07-10  9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
                     ` (5 preceding siblings ...)
  2015-07-10  9:01   ` [PATCH 10/12] md-cluster: only call complete(&cinfo->completion) when node join cluster Guoqing Jiang
@ 2015-07-10  9:01   ` Guoqing Jiang
  2015-07-27 16:31     ` Goldwyn Rodrigues
  2015-07-10  9:01   ` [PATCH 12/12] md-cluster: handle error situations more precisely in lockres_init Guoqing Jiang
  2015-07-27 16:25   ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Goldwyn Rodrigues
  8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10  9:01 UTC (permalink / raw)
  To: neilb; +Cc: rgoldwyn, linux-raid
In gather_all_resync_info, we need to read the disk bitmap sb and
check if it needs recovery.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 drivers/md/md-cluster.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 29f65e2..c35a03a 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -625,6 +625,7 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
 	struct dlm_lock_resource *bm_lockres;
 	struct suspend_info *s;
 	char str[64];
+	sector_t lo, hi;
 
 
 	for (i = 0; i < total_slots; i++) {
@@ -659,7 +660,20 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
 			lockres_free(bm_lockres);
 			goto out;
 		}
-		/* TODO: Read the disk bitmap sb and check if it needs recovery */
+
+		/* Read the disk bitmap sb and check if it needs recovery */
+		ret = bitmap_copy_from_slot(mddev, i, &lo, &hi, false);
+		if (ret) {
+			pr_warn("md-cluster: Could not gather bitmaps from slot %d", i);
+			lockres_free(bm_lockres);
+			continue;
+		}
+		if ((hi > 0) && (lo < mddev->recovery_cp)) {
+			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+			mddev->recovery_cp = lo;
+			md_check_recovery(mddev);
+		}
+
 		dlm_unlock_sync(bm_lockres);
 		lockres_free(bm_lockres);
 	}
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 29+ messages in thread
- * Re: [PATCH 11/12] md-cluster: Read the disk bitmap sb and check if it needs recovery
  2015-07-10  9:01   ` [PATCH 11/12] md-cluster: Read the disk bitmap sb and check if it needs recovery Guoqing Jiang
@ 2015-07-27 16:31     ` Goldwyn Rodrigues
  0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:31 UTC (permalink / raw)
  To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> In gather_all_resync_info, we need to read the disk bitmap sb and
> check if it needs recovery.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>   drivers/md/md-cluster.c | 16 +++++++++++++++-
>   1 file changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 29f65e2..c35a03a 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -625,6 +625,7 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
>   	struct dlm_lock_resource *bm_lockres;
>   	struct suspend_info *s;
>   	char str[64];
> +	sector_t lo, hi;
>
>
>   	for (i = 0; i < total_slots; i++) {
> @@ -659,7 +660,20 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
>   			lockres_free(bm_lockres);
>   			goto out;
>   		}
> -		/* TODO: Read the disk bitmap sb and check if it needs recovery */
> +
> +		/* Read the disk bitmap sb and check if it needs recovery */
> +		ret = bitmap_copy_from_slot(mddev, i, &lo, &hi, false);
> +		if (ret) {
> +			pr_warn("md-cluster: Could not gather bitmaps from slot %d", i);
> +			lockres_free(bm_lockres);
> +			continue;
> +		}
> +		if ((hi > 0) && (lo < mddev->recovery_cp)) {
> +			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> +			mddev->recovery_cp = lo;
> +			md_check_recovery(mddev);
> +		}
> +
>   		dlm_unlock_sync(bm_lockres);
>   		lockres_free(bm_lockres);
>   	}
>
-- 
Goldwyn
^ permalink raw reply	[flat|nested] 29+ messages in thread
 
- * [PATCH 12/12] md-cluster: handle error situations more precisely in lockres_init
  2015-07-10  9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
                     ` (6 preceding siblings ...)
  2015-07-10  9:01   ` [PATCH 11/12] md-cluster: Read the disk bitmap sb and check if it needs recovery Guoqing Jiang
@ 2015-07-10  9:01   ` Guoqing Jiang
  2015-07-27 16:34     ` Goldwyn Rodrigues
  2015-07-27 16:25   ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Goldwyn Rodrigues
  8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10  9:01 UTC (permalink / raw)
  To: neilb; +Cc: rgoldwyn, linux-raid
In lockres_init, it's better to distinguish different err conditions.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
 drivers/md/md-cluster.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index c35a03a..54d225c 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -131,14 +131,14 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
 	res->name = kzalloc(namelen + 1, GFP_KERNEL);
 	if (!res->name) {
 		pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
-		goto out_err;
+		goto out_err_name;
 	}
 	strlcpy(res->name, name, namelen + 1);
 	if (with_lvb) {
 		res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
 		if (!res->lksb.sb_lvbptr) {
 			pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name);
-			goto out_err;
+			goto out_err_lvb;
 		}
 		res->flags = DLM_LKF_VALBLK;
 	}
@@ -159,7 +159,9 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
 	return res;
 out_err:
 	kfree(res->lksb.sb_lvbptr);
+out_err_lvb:
 	kfree(res->name);
+out_err_name:
 	kfree(res);
 	return NULL;
 }
@@ -627,7 +629,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
 	char str[64];
 	sector_t lo, hi;
 
-
 	for (i = 0; i < total_slots; i++) {
 		memset(str, '\0', 64);
 		snprintf(str, 64, "bitmap%04d", i);
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 29+ messages in thread
- * Re: [PATCH 12/12] md-cluster: handle error situations more precisely in lockres_init
  2015-07-10  9:01   ` [PATCH 12/12] md-cluster: handle error situations more precisely in lockres_init Guoqing Jiang
@ 2015-07-27 16:34     ` Goldwyn Rodrigues
  2015-07-28  3:05       ` Guoqing Jiang
  0 siblings, 1 reply; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:34 UTC (permalink / raw)
  To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> In lockres_init, it's better to distinguish different err conditions.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
This is not required. kfree() is capable of ignoring null pointers.
> ---
>   drivers/md/md-cluster.c | 7 ++++---
>   1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index c35a03a..54d225c 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -131,14 +131,14 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
>   	res->name = kzalloc(namelen + 1, GFP_KERNEL);
>   	if (!res->name) {
>   		pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
> -		goto out_err;
> +		goto out_err_name;
>   	}
>   	strlcpy(res->name, name, namelen + 1);
>   	if (with_lvb) {
>   		res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
>   		if (!res->lksb.sb_lvbptr) {
>   			pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name);
> -			goto out_err;
> +			goto out_err_lvb;
>   		}
>   		res->flags = DLM_LKF_VALBLK;
>   	}
> @@ -159,7 +159,9 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
>   	return res;
>   out_err:
>   	kfree(res->lksb.sb_lvbptr);
> +out_err_lvb:
>   	kfree(res->name);
> +out_err_name:
>   	kfree(res);
>   	return NULL;
>   }
> @@ -627,7 +629,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
>   	char str[64];
>   	sector_t lo, hi;
>
> -
>   	for (i = 0; i < total_slots; i++) {
>   		memset(str, '\0', 64);
>   		snprintf(str, 64, "bitmap%04d", i);
>
-- 
Goldwyn
^ permalink raw reply	[flat|nested] 29+ messages in thread
 
- * Re: [PATCH 04/12] md-cluster: fix deadlock issue on message lock
  2015-07-10  9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
                     ` (7 preceding siblings ...)
  2015-07-10  9:01   ` [PATCH 12/12] md-cluster: handle error situations more precisely in lockres_init Guoqing Jiang
@ 2015-07-27 16:25   ` Goldwyn Rodrigues
  8 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:25 UTC (permalink / raw)
  To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> There is problem with previous communication mechanism, and we got below
> deadlock scenario with cluster which has 3 nodes.
>
> 	Sender                	    Receiver        		Receiver
>
> 	token(EX)
>         message(EX)
>        writes message
>     downconverts message(CR)
>        requests ack(EX)
> 		                  get message(CR)            gets message(CR)
>                  		  reads message                reads message
> 		               requests EX on message    requests EX on message
>
> To fix this problem, we do the following changes:
>
> 1. the sender downconverts MESSAGE to CW rather than CR.
> 2. and the receiver request PR lock not EX lock on message.
>
> And in case we failed to down-convert EX to CW on message, it is better to
> unlock message otherthan still hold the lock.
>
> Signed-off-by: Lidong Zhong <ldzhong@suse.com>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>   Documentation/md-cluster.txt |  4 ++--
>   drivers/md/md-cluster.c      | 14 +++++++-------
>   2 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/Documentation/md-cluster.txt b/Documentation/md-cluster.txt
> index de1af7d..1b79436 100644
> --- a/Documentation/md-cluster.txt
> +++ b/Documentation/md-cluster.txt
> @@ -91,7 +91,7 @@ The algorithm is:
>       this message inappropriate or redundant.
>
>    3. sender write LVB.
> -    sender down-convert MESSAGE from EX to CR
> +    sender down-convert MESSAGE from EX to CW
>       sender try to get EX of ACK
>       [ wait until all receiver has *processed* the MESSAGE ]
>
> @@ -112,7 +112,7 @@ The algorithm is:
>       sender down-convert ACK from EX to CR
>       sender release MESSAGE
>       sender release TOKEN
> -                               receiver upconvert to EX of MESSAGE
> +                               receiver upconvert to PR of MESSAGE
>                                  receiver get CR of ACK
>                                  receiver release MESSAGE
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 47199ad..85b7836 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -488,8 +488,8 @@ static void recv_daemon(struct md_thread *thread)
>
>   	/*release CR on ack_lockres*/
>   	dlm_unlock_sync(ack_lockres);
> -	/*up-convert to EX on message_lockres*/
> -	dlm_lock_sync(message_lockres, DLM_LOCK_EX);
> +	/*up-convert to PR on message_lockres*/
> +	dlm_lock_sync(message_lockres, DLM_LOCK_PR);
>   	/*get CR on ack_lockres again*/
>   	dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
>   	/*release CR on message_lockres*/
> @@ -522,7 +522,7 @@ static void unlock_comm(struct md_cluster_info *cinfo)
>    * The function:
>    * 1. Grabs the message lockresource in EX mode
>    * 2. Copies the message to the message LVB
> - * 3. Downconverts message lockresource to CR
> + * 3. Downconverts message lockresource to CW
>    * 4. Upconverts ack lock resource from CR to EX. This forces the BAST on other nodes
>    *    and the other nodes read the message. The thread will wait here until all other
>    *    nodes have released ack lock resource.
> @@ -543,12 +543,12 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
>
>   	memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
>   			sizeof(struct cluster_msg));
> -	/*down-convert EX to CR on Message*/
> -	error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CR);
> +	/*down-convert EX to CW on Message*/
> +	error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CW);
>   	if (error) {
> -		pr_err("md-cluster: failed to convert EX to CR on MESSAGE(%d)\n",
> +		pr_err("md-cluster: failed to convert EX to CW on MESSAGE(%d)\n",
>   				error);
> -		goto failed_message;
> +		goto failed_ack;
>   	}
>
>   	/*up-convert CR to EX on Ack*/
>
-- 
Goldwyn
^ permalink raw reply	[flat|nested] 29+ messages in thread