* [PATCH 05/12] md-cluster: init completion within lockres_init
2015-07-10 9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
@ 2015-07-10 9:01 ` Guoqing Jiang
2015-07-27 16:44 ` Goldwyn Rodrigues
2015-07-10 9:01 ` [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock Guoqing Jiang
` (7 subsequent siblings)
8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10 9:01 UTC (permalink / raw)
To: neilb; +Cc: rgoldwyn, linux-raid
We should init completion within lockres_init, otherwise
completion could be initialized more than one time during
it's life cycle.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
drivers/md/md-cluster.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 85b7836..2a57f19 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -100,7 +100,6 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
{
int ret = 0;
- init_completion(&res->completion);
ret = dlm_lock(res->ls, mode, &res->lksb,
res->flags, res->name, strlen(res->name),
0, sync_ast, res, res->bast);
@@ -125,6 +124,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
if (!res)
return NULL;
+ init_completion(&res->completion);
res->ls = cinfo->lockspace;
res->mddev = mddev;
namelen = strlen(name);
@@ -169,7 +169,6 @@ static void lockres_free(struct dlm_lock_resource *res)
if (!res)
return;
- init_completion(&res->completion);
dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
wait_for_completion(&res->completion);
--
1.7.12.4
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 05/12] md-cluster: init completion within lockres_init
2015-07-10 9:01 ` [PATCH 05/12] md-cluster: init completion within lockres_init Guoqing Jiang
@ 2015-07-27 16:44 ` Goldwyn Rodrigues
0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:44 UTC (permalink / raw)
To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> We should init completion within lockres_init, otherwise
> completion could be initialized more than one time during
> it's life cycle.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
> drivers/md/md-cluster.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 85b7836..2a57f19 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -100,7 +100,6 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
> {
> int ret = 0;
>
> - init_completion(&res->completion);
> ret = dlm_lock(res->ls, mode, &res->lksb,
> res->flags, res->name, strlen(res->name),
> 0, sync_ast, res, res->bast);
> @@ -125,6 +124,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
> res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
> if (!res)
> return NULL;
> + init_completion(&res->completion);
> res->ls = cinfo->lockspace;
> res->mddev = mddev;
> namelen = strlen(name);
> @@ -169,7 +169,6 @@ static void lockres_free(struct dlm_lock_resource *res)
> if (!res)
> return;
>
> - init_completion(&res->completion);
> dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
> wait_for_completion(&res->completion);
>
>
--
Goldwyn
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
2015-07-10 9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
2015-07-10 9:01 ` [PATCH 05/12] md-cluster: init completion within lockres_init Guoqing Jiang
@ 2015-07-10 9:01 ` Guoqing Jiang
2015-07-27 16:48 ` Goldwyn Rodrigues
2015-07-10 9:01 ` [PATCH 07/12] md-cluster: init suspend_list and suspend_lock early in join Guoqing Jiang
` (6 subsequent siblings)
8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10 9:01 UTC (permalink / raw)
To: neilb; +Cc: rgoldwyn, linux-raid
In complicated cluster environment, it is possible that the
dlm lock couldn't be get/convert on purpose, the related err
info is added for better debug potential issue.
For lockres_free, if the lock is blocking by a lock request or
conversion request, then dlm_unlock just put it back to grant
queue, so need to ensure the lock is free finally.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
drivers/md/md-cluster.c | 41 +++++++++++++++++++++++++++++++++++------
1 file changed, 35 insertions(+), 6 deletions(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 2a57f19..b80a689 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -166,10 +166,24 @@ out_err:
static void lockres_free(struct dlm_lock_resource *res)
{
+ int ret;
+
if (!res)
return;
- dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
+ /* cancel a lock request or a conversion request that is blocked */
+ res->flags |= DLM_LKF_CANCEL;
+retry:
+ ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
+ if (unlikely(ret != 0)) {
+ pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret);
+
+ /* if a lock conversion is cancelled, then the lock is put
+ * back to grant queue, need to ensure it is unlocked */
+ if (ret == -DLM_ECANCEL)
+ goto retry;
+ }
+ res->flags &= ~DLM_LKF_CANCEL;
wait_for_completion(&res->completion);
kfree(res->name);
@@ -474,6 +488,7 @@ static void recv_daemon(struct md_thread *thread)
struct dlm_lock_resource *ack_lockres = cinfo->ack_lockres;
struct dlm_lock_resource *message_lockres = cinfo->message_lockres;
struct cluster_msg msg;
+ int ret;
/*get CR on Message*/
if (dlm_lock_sync(message_lockres, DLM_LOCK_CR)) {
@@ -486,13 +501,21 @@ static void recv_daemon(struct md_thread *thread)
process_recvd_msg(thread->mddev, &msg);
/*release CR on ack_lockres*/
- dlm_unlock_sync(ack_lockres);
+ ret = dlm_unlock_sync(ack_lockres);
+ if (unlikely(ret != 0))
+ pr_info("unlock ack failed return %d\n", ret);
/*up-convert to PR on message_lockres*/
- dlm_lock_sync(message_lockres, DLM_LOCK_PR);
+ ret = dlm_lock_sync(message_lockres, DLM_LOCK_PR);
+ if (unlikely(ret != 0))
+ pr_info("lock PR on msg failed return %d\n", ret);
/*get CR on ack_lockres again*/
- dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
+ ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
+ if (unlikely(ret != 0))
+ pr_info("lock CR on ack failed return %d\n", ret);
/*release CR on message_lockres*/
- dlm_unlock_sync(message_lockres);
+ ret = dlm_unlock_sync(message_lockres);
+ if (unlikely(ret != 0))
+ pr_info("unlock msg failed return %d\n", ret);
}
/* lock_comm()
@@ -567,7 +590,13 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
}
failed_ack:
- dlm_unlock_sync(cinfo->message_lockres);
+ error = dlm_unlock_sync(cinfo->message_lockres);
+ if (unlikely(error != 0)) {
+ pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
+ error);
+ /* in case the message can't be released due to some reason */
+ goto failed_ack;
+ }
failed_message:
return error;
}
--
1.7.12.4
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
2015-07-10 9:01 ` [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock Guoqing Jiang
@ 2015-07-27 16:48 ` Goldwyn Rodrigues
2015-07-28 3:04 ` Guoqing Jiang
0 siblings, 1 reply; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:48 UTC (permalink / raw)
To: Guoqing Jiang, neilb; +Cc: linux-raid
Hi Guoqing,
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> In complicated cluster environment, it is possible that the
> dlm lock couldn't be get/convert on purpose, the related err
> info is added for better debug potential issue.
>
> For lockres_free, if the lock is blocking by a lock request or
> conversion request, then dlm_unlock just put it back to grant
> queue, so need to ensure the lock is free finally.
I cannot think of a scenario where a DLM_CANCEL will be returned. Could
you explain the situation a bit more?
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
> ---
> drivers/md/md-cluster.c | 41 +++++++++++++++++++++++++++++++++++------
> 1 file changed, 35 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 2a57f19..b80a689 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -166,10 +166,24 @@ out_err:
>
> static void lockres_free(struct dlm_lock_resource *res)
> {
> + int ret;
> +
> if (!res)
> return;
>
> - dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
> + /* cancel a lock request or a conversion request that is blocked */
> + res->flags |= DLM_LKF_CANCEL;
> +retry:
> + ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
> + if (unlikely(ret != 0)) {
> + pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret);
> +
> + /* if a lock conversion is cancelled, then the lock is put
> + * back to grant queue, need to ensure it is unlocked */
> + if (ret == -DLM_ECANCEL)
> + goto retry;
> + }
> + res->flags &= ~DLM_LKF_CANCEL;
> wait_for_completion(&res->completion);
>
> kfree(res->name);
> @@ -474,6 +488,7 @@ static void recv_daemon(struct md_thread *thread)
> struct dlm_lock_resource *ack_lockres = cinfo->ack_lockres;
> struct dlm_lock_resource *message_lockres = cinfo->message_lockres;
> struct cluster_msg msg;
> + int ret;
>
> /*get CR on Message*/
> if (dlm_lock_sync(message_lockres, DLM_LOCK_CR)) {
> @@ -486,13 +501,21 @@ static void recv_daemon(struct md_thread *thread)
> process_recvd_msg(thread->mddev, &msg);
>
> /*release CR on ack_lockres*/
> - dlm_unlock_sync(ack_lockres);
> + ret = dlm_unlock_sync(ack_lockres);
> + if (unlikely(ret != 0))
> + pr_info("unlock ack failed return %d\n", ret);
> /*up-convert to PR on message_lockres*/
> - dlm_lock_sync(message_lockres, DLM_LOCK_PR);
> + ret = dlm_lock_sync(message_lockres, DLM_LOCK_PR);
> + if (unlikely(ret != 0))
> + pr_info("lock PR on msg failed return %d\n", ret);
> /*get CR on ack_lockres again*/
> - dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
> + ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
> + if (unlikely(ret != 0))
> + pr_info("lock CR on ack failed return %d\n", ret);
> /*release CR on message_lockres*/
> - dlm_unlock_sync(message_lockres);
> + ret = dlm_unlock_sync(message_lockres);
> + if (unlikely(ret != 0))
> + pr_info("unlock msg failed return %d\n", ret);
> }
>
> /* lock_comm()
> @@ -567,7 +590,13 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
> }
>
> failed_ack:
> - dlm_unlock_sync(cinfo->message_lockres);
> + error = dlm_unlock_sync(cinfo->message_lockres);
> + if (unlikely(error != 0)) {
> + pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
> + error);
> + /* in case the message can't be released due to some reason */
> + goto failed_ack;
> + }
> failed_message:
> return error;
> }
>
--
Goldwyn
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
2015-07-27 16:48 ` Goldwyn Rodrigues
@ 2015-07-28 3:04 ` Guoqing Jiang
2015-07-29 0:22 ` NeilBrown
2015-07-29 23:39 ` Goldwyn Rodrigues
0 siblings, 2 replies; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-28 3:04 UTC (permalink / raw)
To: Goldwyn Rodrigues; +Cc: neilb, linux-raid
Hi Goldwyn,
Goldwyn Rodrigues wrote:
> Hi Guoqing,
>
> On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
>> In complicated cluster environment, it is possible that the
>> dlm lock couldn't be get/convert on purpose, the related err
>> info is added for better debug potential issue.
>>
>> For lockres_free, if the lock is blocking by a lock request or
>> conversion request, then dlm_unlock just put it back to grant
>> queue, so need to ensure the lock is free finally.
>
>
> I cannot think of a scenario where a DLM_CANCEL will be returned.
> Could you explain the situation a bit more?
>
Thanks for the review. When the node is receiving message where it needs
to convert message
lock, and lockres_free is invoked if user stop array meanwhile, then the
message lock is put back
to grant queue at the CR mode and message lock is not released, am I
misunderstood the case?
Thanks,
Guoqing
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
2015-07-28 3:04 ` Guoqing Jiang
@ 2015-07-29 0:22 ` NeilBrown
2015-07-29 2:03 ` Guoqing Jiang
2015-07-29 23:39 ` Goldwyn Rodrigues
1 sibling, 1 reply; 29+ messages in thread
From: NeilBrown @ 2015-07-29 0:22 UTC (permalink / raw)
To: Guoqing Jiang; +Cc: Goldwyn Rodrigues, linux-raid
On Tue, 28 Jul 2015 11:04:31 +0800 Guoqing Jiang <gqJiang@suse.com>
wrote:
> Hi Goldwyn,
>
> Goldwyn Rodrigues wrote:
> > Hi Guoqing,
> >
> > On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> >> In complicated cluster environment, it is possible that the
> >> dlm lock couldn't be get/convert on purpose, the related err
> >> info is added for better debug potential issue.
> >>
> >> For lockres_free, if the lock is blocking by a lock request or
> >> conversion request, then dlm_unlock just put it back to grant
> >> queue, so need to ensure the lock is free finally.
> >
> >
> > I cannot think of a scenario where a DLM_CANCEL will be returned.
> > Could you explain the situation a bit more?
> >
> Thanks for the review. When the node is receiving message where it needs
> to convert message
> lock, and lockres_free is invoked if user stop array meanwhile, then the
> message lock is put back
> to grant queue at the CR mode and message lock is not released, am I
> misunderstood the case?
>
> Thanks,
> Guoqing
Thanks for these patches Guoqing and for the review Goldwyn.
I've queued all but the last one (which you both agree we don't need).
I have queued this one too for now, though it Goldwyn has further
comments I'd be very happy to see them.
I'm a bit bothered by:
+retry:
+ ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
+ if (unlikely(ret != 0)) {
+ pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret);
+
+ /* if a lock conversion is cancelled, then the lock is put
+ * back to grant queue, need to ensure it is unlocked */
+ if (ret == -DLM_ECANCEL)
+ goto retry;
+ }
which looks like it could loop repeatedly. Maybe if there was
documentation for DLM_LKF_CANCEL or DLM_ECANCEL somewhere, that might
make me feel more comfortable, but the only documentation that I can
find is the code ... is there real documentation that google doesn't
find for me?
Thanks,
NeilBrown
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
2015-07-29 0:22 ` NeilBrown
@ 2015-07-29 2:03 ` Guoqing Jiang
0 siblings, 0 replies; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-29 2:03 UTC (permalink / raw)
To: NeilBrown; +Cc: Goldwyn Rodrigues, linux-raid
NeilBrown wrote:
> On Tue, 28 Jul 2015 11:04:31 +0800 Guoqing Jiang <gqJiang@suse.com>
> wrote:
>
>
>> Hi Goldwyn,
>>
>> Goldwyn Rodrigues wrote:
>>
>>> Hi Guoqing,
>>>
>>> On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
>>>
>>>> In complicated cluster environment, it is possible that the
>>>> dlm lock couldn't be get/convert on purpose, the related err
>>>> info is added for better debug potential issue.
>>>>
>>>> For lockres_free, if the lock is blocking by a lock request or
>>>> conversion request, then dlm_unlock just put it back to grant
>>>> queue, so need to ensure the lock is free finally.
>>>>
>>> I cannot think of a scenario where a DLM_CANCEL will be returned.
>>> Could you explain the situation a bit more?
>>>
>>>
>> Thanks for the review. When the node is receiving message where it needs
>> to convert message
>> lock, and lockres_free is invoked if user stop array meanwhile, then the
>> message lock is put back
>> to grant queue at the CR mode and message lock is not released, am I
>> misunderstood the case?
>>
>> Thanks,
>> Guoqing
>>
>
> Thanks for these patches Guoqing and for the review Goldwyn.
>
> I've queued all but the last one (which you both agree we don't need).
>
> I have queued this one too for now, though it Goldwyn has further
> comments I'd be very happy to see them.
>
> I'm a bit bothered by:
> +retry:
> + ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
> + if (unlikely(ret != 0)) {
> + pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret);
> +
> + /* if a lock conversion is cancelled, then the lock is put
> + * back to grant queue, need to ensure it is unlocked */
> + if (ret == -DLM_ECANCEL)
> + goto retry;
> + }
>
> which looks like it could loop repeatedly. Maybe if there was
> documentation for DLM_LKF_CANCEL or DLM_ECANCEL somewhere, that might
> make me feel more comfortable, but the only documentation that I can
> find is the code ... is there real documentation that google doesn't
> find for me?
>
>
Hi Neil,
Please see the people.redhat.com/ccaulfie/docs/rhdlmbook.pdf, I guess it
is the only
official book about DLM. And for the above code, I am mostly referred to
page 37-38
which described dlm_unlock API.
BTW: do you have time to review the following other three patches for
mdadm? Thanks.
http://www.spinics.net/lists/raid/msg49356.html
http://www.spinics.net/lists/raid/msg49357.html
http://www.spinics.net/lists/raid/msg49358.html
Best Regards,
Guoqing
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock
2015-07-28 3:04 ` Guoqing Jiang
2015-07-29 0:22 ` NeilBrown
@ 2015-07-29 23:39 ` Goldwyn Rodrigues
1 sibling, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-29 23:39 UTC (permalink / raw)
To: Guoqing Jiang; +Cc: neilb, linux-raid
On 07/27/2015 10:04 PM, Guoqing Jiang wrote:
> Hi Goldwyn,
>
> Goldwyn Rodrigues wrote:
>> Hi Guoqing,
>>
>> On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
>>> In complicated cluster environment, it is possible that the
>>> dlm lock couldn't be get/convert on purpose, the related err
>>> info is added for better debug potential issue.
>>>
>>> For lockres_free, if the lock is blocking by a lock request or
>>> conversion request, then dlm_unlock just put it back to grant
>>> queue, so need to ensure the lock is free finally.
>>
>>
>> I cannot think of a scenario where a DLM_CANCEL will be returned.
>> Could you explain the situation a bit more?
>>
> Thanks for the review. When the node is receiving message where it needs
> to convert message
> lock, and lockres_free is invoked if user stop array meanwhile, then the
> message lock is put back
> to grant queue at the CR mode and message lock is not released, am I
> misunderstood the case?
>
In that case, this looks correct.
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
--
Goldwyn
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 07/12] md-cluster: init suspend_list and suspend_lock early in join
2015-07-10 9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
2015-07-10 9:01 ` [PATCH 05/12] md-cluster: init completion within lockres_init Guoqing Jiang
2015-07-10 9:01 ` [PATCH 06/12] md-cluster: add the error check if failed to get dlm lock Guoqing Jiang
@ 2015-07-10 9:01 ` Guoqing Jiang
2015-07-27 16:29 ` Goldwyn Rodrigues
2015-07-10 9:01 ` [PATCH 08/12] md-cluster: remove the unused sb_lock Guoqing Jiang
` (5 subsequent siblings)
8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10 9:01 UTC (permalink / raw)
To: neilb; +Cc: rgoldwyn, linux-raid
If the node just join the cluster, and receive the msg from other nodes
before init suspend_list, it will cause kernel crash due to NULL pointer
dereference, so move the initializations early to fix the bug.
md-cluster: Joined cluster 3578507b-e0cb-6d4f-6322-696cd7b1b10c slot 3
BUG: unable to handle kernel NULL pointer dereference at (null)
... ... ...
Call Trace:
[<ffffffffa0444924>] process_recvd_msg+0x2e4/0x330 [md_cluster]
[<ffffffffa0444a06>] recv_daemon+0x96/0x170 [md_cluster]
[<ffffffffa045189d>] md_thread+0x11d/0x170 [md_mod]
[<ffffffff810768c4>] kthread+0xb4/0xc0
[<ffffffff8151927c>] ret_from_fork+0x7c/0xb0
... ... ...
RIP [<ffffffffa0443581>] __remove_suspend_info+0x11/0xa0 [md_cluster]
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
drivers/md/md-cluster.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index b80a689..6f1ea3c 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -671,6 +671,8 @@ static int join(struct mddev *mddev, int nodes)
if (!cinfo)
return -ENOMEM;
+ INIT_LIST_HEAD(&cinfo->suspend_list);
+ spin_lock_init(&cinfo->suspend_lock);
init_completion(&cinfo->completion);
mutex_init(&cinfo->sb_mutex);
@@ -736,9 +738,6 @@ static int join(struct mddev *mddev, int nodes)
goto err;
}
- INIT_LIST_HEAD(&cinfo->suspend_list);
- spin_lock_init(&cinfo->suspend_lock);
-
ret = gather_all_resync_info(mddev, nodes);
if (ret)
goto err;
--
1.7.12.4
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 07/12] md-cluster: init suspend_list and suspend_lock early in join
2015-07-10 9:01 ` [PATCH 07/12] md-cluster: init suspend_list and suspend_lock early in join Guoqing Jiang
@ 2015-07-27 16:29 ` Goldwyn Rodrigues
0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:29 UTC (permalink / raw)
To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> If the node just join the cluster, and receive the msg from other nodes
> before init suspend_list, it will cause kernel crash due to NULL pointer
> dereference, so move the initializations early to fix the bug.
>
> md-cluster: Joined cluster 3578507b-e0cb-6d4f-6322-696cd7b1b10c slot 3
> BUG: unable to handle kernel NULL pointer dereference at (null)
> ... ... ...
> Call Trace:
> [<ffffffffa0444924>] process_recvd_msg+0x2e4/0x330 [md_cluster]
> [<ffffffffa0444a06>] recv_daemon+0x96/0x170 [md_cluster]
> [<ffffffffa045189d>] md_thread+0x11d/0x170 [md_mod]
> [<ffffffff810768c4>] kthread+0xb4/0xc0
> [<ffffffff8151927c>] ret_from_fork+0x7c/0xb0
> ... ... ...
> RIP [<ffffffffa0443581>] __remove_suspend_info+0x11/0xa0 [md_cluster]
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
> drivers/md/md-cluster.c | 5 ++---
> 1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index b80a689..6f1ea3c 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -671,6 +671,8 @@ static int join(struct mddev *mddev, int nodes)
> if (!cinfo)
> return -ENOMEM;
>
> + INIT_LIST_HEAD(&cinfo->suspend_list);
> + spin_lock_init(&cinfo->suspend_lock);
> init_completion(&cinfo->completion);
>
> mutex_init(&cinfo->sb_mutex);
> @@ -736,9 +738,6 @@ static int join(struct mddev *mddev, int nodes)
> goto err;
> }
>
> - INIT_LIST_HEAD(&cinfo->suspend_list);
> - spin_lock_init(&cinfo->suspend_lock);
> -
> ret = gather_all_resync_info(mddev, nodes);
> if (ret)
> goto err;
>
--
Goldwyn
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 08/12] md-cluster: remove the unused sb_lock
2015-07-10 9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
` (2 preceding siblings ...)
2015-07-10 9:01 ` [PATCH 07/12] md-cluster: init suspend_list and suspend_lock early in join Guoqing Jiang
@ 2015-07-10 9:01 ` Guoqing Jiang
2015-07-27 16:29 ` Goldwyn Rodrigues
2015-07-10 9:01 ` [PATCH 09/12] md-cluster: add missed lockres_free Guoqing Jiang
` (4 subsequent siblings)
8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10 9:01 UTC (permalink / raw)
To: neilb; +Cc: rgoldwyn, linux-raid
The sb_lock is not used anywhere, so let's remove it.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
drivers/md/md-cluster.c | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 6f1ea3c..057a973 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -52,7 +52,6 @@ struct md_cluster_info {
dlm_lockspace_t *lockspace;
int slot_number;
struct completion completion;
- struct dlm_lock_resource *sb_lock;
struct mutex sb_mutex;
struct dlm_lock_resource *bitmap_lockres;
struct list_head suspend_list;
@@ -692,12 +691,6 @@ static int join(struct mddev *mddev, int nodes)
ret = -ERANGE;
goto err;
}
- cinfo->sb_lock = lockres_init(mddev, "cmd-super",
- NULL, 0);
- if (!cinfo->sb_lock) {
- ret = -ENOMEM;
- goto err;
- }
/* Initiate the communication resources */
ret = -ENOMEM;
cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
@@ -749,7 +742,6 @@ err:
lockres_free(cinfo->ack_lockres);
lockres_free(cinfo->no_new_dev_lockres);
lockres_free(cinfo->bitmap_lockres);
- lockres_free(cinfo->sb_lock);
if (cinfo->lockspace)
dlm_release_lockspace(cinfo->lockspace, 2);
mddev->cluster_info = NULL;
@@ -770,7 +762,6 @@ static int leave(struct mddev *mddev)
lockres_free(cinfo->token_lockres);
lockres_free(cinfo->ack_lockres);
lockres_free(cinfo->no_new_dev_lockres);
- lockres_free(cinfo->sb_lock);
lockres_free(cinfo->bitmap_lockres);
dlm_release_lockspace(cinfo->lockspace, 2);
return 0;
--
1.7.12.4
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 08/12] md-cluster: remove the unused sb_lock
2015-07-10 9:01 ` [PATCH 08/12] md-cluster: remove the unused sb_lock Guoqing Jiang
@ 2015-07-27 16:29 ` Goldwyn Rodrigues
0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:29 UTC (permalink / raw)
To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> The sb_lock is not used anywhere, so let's remove it.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Code cleanup.
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
> drivers/md/md-cluster.c | 9 ---------
> 1 file changed, 9 deletions(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 6f1ea3c..057a973 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -52,7 +52,6 @@ struct md_cluster_info {
> dlm_lockspace_t *lockspace;
> int slot_number;
> struct completion completion;
> - struct dlm_lock_resource *sb_lock;
> struct mutex sb_mutex;
> struct dlm_lock_resource *bitmap_lockres;
> struct list_head suspend_list;
> @@ -692,12 +691,6 @@ static int join(struct mddev *mddev, int nodes)
> ret = -ERANGE;
> goto err;
> }
> - cinfo->sb_lock = lockres_init(mddev, "cmd-super",
> - NULL, 0);
> - if (!cinfo->sb_lock) {
> - ret = -ENOMEM;
> - goto err;
> - }
> /* Initiate the communication resources */
> ret = -ENOMEM;
> cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
> @@ -749,7 +742,6 @@ err:
> lockres_free(cinfo->ack_lockres);
> lockres_free(cinfo->no_new_dev_lockres);
> lockres_free(cinfo->bitmap_lockres);
> - lockres_free(cinfo->sb_lock);
> if (cinfo->lockspace)
> dlm_release_lockspace(cinfo->lockspace, 2);
> mddev->cluster_info = NULL;
> @@ -770,7 +762,6 @@ static int leave(struct mddev *mddev)
> lockres_free(cinfo->token_lockres);
> lockres_free(cinfo->ack_lockres);
> lockres_free(cinfo->no_new_dev_lockres);
> - lockres_free(cinfo->sb_lock);
> lockres_free(cinfo->bitmap_lockres);
> dlm_release_lockspace(cinfo->lockspace, 2);
> return 0;
>
--
Goldwyn
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 09/12] md-cluster: add missed lockres_free
2015-07-10 9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
` (3 preceding siblings ...)
2015-07-10 9:01 ` [PATCH 08/12] md-cluster: remove the unused sb_lock Guoqing Jiang
@ 2015-07-10 9:01 ` Guoqing Jiang
2015-07-27 16:30 ` Goldwyn Rodrigues
2015-07-10 9:01 ` [PATCH 10/12] md-cluster: only call complete(&cinfo->completion) when node join cluster Guoqing Jiang
` (3 subsequent siblings)
8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10 9:01 UTC (permalink / raw)
To: neilb; +Cc: rgoldwyn, linux-raid
We also need to free the lock resource before goto out.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
drivers/md/md-cluster.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 057a973..411b430 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -647,8 +647,10 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
lockres_free(bm_lockres);
continue;
}
- if (ret)
+ if (ret) {
+ lockres_free(bm_lockres);
goto out;
+ }
/* TODO: Read the disk bitmap sb and check if it needs recovery */
dlm_unlock_sync(bm_lockres);
lockres_free(bm_lockres);
--
1.7.12.4
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 09/12] md-cluster: add missed lockres_free
2015-07-10 9:01 ` [PATCH 09/12] md-cluster: add missed lockres_free Guoqing Jiang
@ 2015-07-27 16:30 ` Goldwyn Rodrigues
0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:30 UTC (permalink / raw)
To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> We also need to free the lock resource before goto out.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
> drivers/md/md-cluster.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 057a973..411b430 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -647,8 +647,10 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
> lockres_free(bm_lockres);
> continue;
> }
> - if (ret)
> + if (ret) {
> + lockres_free(bm_lockres);
> goto out;
> + }
> /* TODO: Read the disk bitmap sb and check if it needs recovery */
> dlm_unlock_sync(bm_lockres);
> lockres_free(bm_lockres);
>
--
Goldwyn
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 10/12] md-cluster: only call complete(&cinfo->completion) when node join cluster
2015-07-10 9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
` (4 preceding siblings ...)
2015-07-10 9:01 ` [PATCH 09/12] md-cluster: add missed lockres_free Guoqing Jiang
@ 2015-07-10 9:01 ` Guoqing Jiang
2015-07-27 16:49 ` Goldwyn Rodrigues
2015-07-10 9:01 ` [PATCH 11/12] md-cluster: Read the disk bitmap sb and check if it needs recovery Guoqing Jiang
` (2 subsequent siblings)
8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10 9:01 UTC (permalink / raw)
To: neilb; +Cc: rgoldwyn, linux-raid
Introduce MD_CLUSTER_BEGIN_JOIN_CLUSTER flag to make sure
complete(&cinfo->completion) is only be invoked when node
join cluster. Otherwise node failure could also call the
complete, and it doesn't make sense to do it.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
drivers/md/md-cluster.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 411b430..29f65e2 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -45,6 +45,7 @@ struct resync_info {
/* md_cluster_info flags */
#define MD_CLUSTER_WAITING_FOR_NEWDISK 1
#define MD_CLUSTER_SUSPEND_READ_BALANCING 2
+#define MD_CLUSTER_BEGIN_JOIN_CLUSTER 3
struct md_cluster_info {
@@ -320,10 +321,17 @@ static void recover_done(void *arg, struct dlm_slot *slots,
struct md_cluster_info *cinfo = mddev->cluster_info;
cinfo->slot_number = our_slot;
- complete(&cinfo->completion);
+ /* completion is only need to be complete when node join cluster,
+ * it doesn't need to run during another node's failure */
+ if (test_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state)) {
+ complete(&cinfo->completion);
+ clear_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
+ }
clear_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
}
+/* the ops is called when node join the cluster, and do lock recovery
+ * if node failure occurs */
static const struct dlm_lockspace_ops md_ls_ops = {
.recover_prep = recover_prep,
.recover_slot = recover_slot,
@@ -675,6 +683,7 @@ static int join(struct mddev *mddev, int nodes)
INIT_LIST_HEAD(&cinfo->suspend_list);
spin_lock_init(&cinfo->suspend_lock);
init_completion(&cinfo->completion);
+ set_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
mutex_init(&cinfo->sb_mutex);
mddev->cluster_info = cinfo;
--
1.7.12.4
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 10/12] md-cluster: only call complete(&cinfo->completion) when node join cluster
2015-07-10 9:01 ` [PATCH 10/12] md-cluster: only call complete(&cinfo->completion) when node join cluster Guoqing Jiang
@ 2015-07-27 16:49 ` Goldwyn Rodrigues
0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:49 UTC (permalink / raw)
To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> Introduce MD_CLUSTER_BEGIN_JOIN_CLUSTER flag to make sure
> complete(&cinfo->completion) is only be invoked when node
> join cluster. Otherwise node failure could also call the
> complete, and it doesn't make sense to do it.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
> drivers/md/md-cluster.c | 11 ++++++++++-
> 1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 411b430..29f65e2 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -45,6 +45,7 @@ struct resync_info {
> /* md_cluster_info flags */
> #define MD_CLUSTER_WAITING_FOR_NEWDISK 1
> #define MD_CLUSTER_SUSPEND_READ_BALANCING 2
> +#define MD_CLUSTER_BEGIN_JOIN_CLUSTER 3
>
>
> struct md_cluster_info {
> @@ -320,10 +321,17 @@ static void recover_done(void *arg, struct dlm_slot *slots,
> struct md_cluster_info *cinfo = mddev->cluster_info;
>
> cinfo->slot_number = our_slot;
> - complete(&cinfo->completion);
> + /* completion is only need to be complete when node join cluster,
> + * it doesn't need to run during another node's failure */
> + if (test_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state)) {
> + complete(&cinfo->completion);
> + clear_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
> + }
> clear_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
> }
>
> +/* the ops is called when node join the cluster, and do lock recovery
> + * if node failure occurs */
> static const struct dlm_lockspace_ops md_ls_ops = {
> .recover_prep = recover_prep,
> .recover_slot = recover_slot,
> @@ -675,6 +683,7 @@ static int join(struct mddev *mddev, int nodes)
> INIT_LIST_HEAD(&cinfo->suspend_list);
> spin_lock_init(&cinfo->suspend_lock);
> init_completion(&cinfo->completion);
> + set_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
>
> mutex_init(&cinfo->sb_mutex);
> mddev->cluster_info = cinfo;
>
--
Goldwyn
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 11/12] md-cluster: Read the disk bitmap sb and check if it needs recovery
2015-07-10 9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
` (5 preceding siblings ...)
2015-07-10 9:01 ` [PATCH 10/12] md-cluster: only call complete(&cinfo->completion) when node join cluster Guoqing Jiang
@ 2015-07-10 9:01 ` Guoqing Jiang
2015-07-27 16:31 ` Goldwyn Rodrigues
2015-07-10 9:01 ` [PATCH 12/12] md-cluster: handle error situations more precisely in lockres_init Guoqing Jiang
2015-07-27 16:25 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Goldwyn Rodrigues
8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10 9:01 UTC (permalink / raw)
To: neilb; +Cc: rgoldwyn, linux-raid
In gather_all_resync_info, we need to read the disk bitmap sb and
check if it needs recovery.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
drivers/md/md-cluster.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 29f65e2..c35a03a 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -625,6 +625,7 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
struct dlm_lock_resource *bm_lockres;
struct suspend_info *s;
char str[64];
+ sector_t lo, hi;
for (i = 0; i < total_slots; i++) {
@@ -659,7 +660,20 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
lockres_free(bm_lockres);
goto out;
}
- /* TODO: Read the disk bitmap sb and check if it needs recovery */
+
+ /* Read the disk bitmap sb and check if it needs recovery */
+ ret = bitmap_copy_from_slot(mddev, i, &lo, &hi, false);
+ if (ret) {
+ pr_warn("md-cluster: Could not gather bitmaps from slot %d", i);
+ lockres_free(bm_lockres);
+ continue;
+ }
+ if ((hi > 0) && (lo < mddev->recovery_cp)) {
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ mddev->recovery_cp = lo;
+ md_check_recovery(mddev);
+ }
+
dlm_unlock_sync(bm_lockres);
lockres_free(bm_lockres);
}
--
1.7.12.4
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 11/12] md-cluster: Read the disk bitmap sb and check if it needs recovery
2015-07-10 9:01 ` [PATCH 11/12] md-cluster: Read the disk bitmap sb and check if it needs recovery Guoqing Jiang
@ 2015-07-27 16:31 ` Goldwyn Rodrigues
0 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:31 UTC (permalink / raw)
To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> In gather_all_resync_info, we need to read the disk bitmap sb and
> check if it needs recovery.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
> drivers/md/md-cluster.c | 16 +++++++++++++++-
> 1 file changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 29f65e2..c35a03a 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -625,6 +625,7 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
> struct dlm_lock_resource *bm_lockres;
> struct suspend_info *s;
> char str[64];
> + sector_t lo, hi;
>
>
> for (i = 0; i < total_slots; i++) {
> @@ -659,7 +660,20 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
> lockres_free(bm_lockres);
> goto out;
> }
> - /* TODO: Read the disk bitmap sb and check if it needs recovery */
> +
> + /* Read the disk bitmap sb and check if it needs recovery */
> + ret = bitmap_copy_from_slot(mddev, i, &lo, &hi, false);
> + if (ret) {
> + pr_warn("md-cluster: Could not gather bitmaps from slot %d", i);
> + lockres_free(bm_lockres);
> + continue;
> + }
> + if ((hi > 0) && (lo < mddev->recovery_cp)) {
> + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> + mddev->recovery_cp = lo;
> + md_check_recovery(mddev);
> + }
> +
> dlm_unlock_sync(bm_lockres);
> lockres_free(bm_lockres);
> }
>
--
Goldwyn
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 12/12] md-cluster: handle error situations more precisely in lockres_init
2015-07-10 9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
` (6 preceding siblings ...)
2015-07-10 9:01 ` [PATCH 11/12] md-cluster: Read the disk bitmap sb and check if it needs recovery Guoqing Jiang
@ 2015-07-10 9:01 ` Guoqing Jiang
2015-07-27 16:34 ` Goldwyn Rodrigues
2015-07-27 16:25 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Goldwyn Rodrigues
8 siblings, 1 reply; 29+ messages in thread
From: Guoqing Jiang @ 2015-07-10 9:01 UTC (permalink / raw)
To: neilb; +Cc: rgoldwyn, linux-raid
In lockres_init, it's better to distinguish different err conditions.
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
---
drivers/md/md-cluster.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index c35a03a..54d225c 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -131,14 +131,14 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
res->name = kzalloc(namelen + 1, GFP_KERNEL);
if (!res->name) {
pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
- goto out_err;
+ goto out_err_name;
}
strlcpy(res->name, name, namelen + 1);
if (with_lvb) {
res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
if (!res->lksb.sb_lvbptr) {
pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name);
- goto out_err;
+ goto out_err_lvb;
}
res->flags = DLM_LKF_VALBLK;
}
@@ -159,7 +159,9 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
return res;
out_err:
kfree(res->lksb.sb_lvbptr);
+out_err_lvb:
kfree(res->name);
+out_err_name:
kfree(res);
return NULL;
}
@@ -627,7 +629,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
char str[64];
sector_t lo, hi;
-
for (i = 0; i < total_slots; i++) {
memset(str, '\0', 64);
snprintf(str, 64, "bitmap%04d", i);
--
1.7.12.4
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 12/12] md-cluster: handle error situations more precisely in lockres_init
2015-07-10 9:01 ` [PATCH 12/12] md-cluster: handle error situations more precisely in lockres_init Guoqing Jiang
@ 2015-07-27 16:34 ` Goldwyn Rodrigues
2015-07-28 3:05 ` Guoqing Jiang
0 siblings, 1 reply; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:34 UTC (permalink / raw)
To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> In lockres_init, it's better to distinguish different err conditions.
>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
This is not required. kfree() is capable of ignoring null pointers.
> ---
> drivers/md/md-cluster.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index c35a03a..54d225c 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -131,14 +131,14 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
> res->name = kzalloc(namelen + 1, GFP_KERNEL);
> if (!res->name) {
> pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
> - goto out_err;
> + goto out_err_name;
> }
> strlcpy(res->name, name, namelen + 1);
> if (with_lvb) {
> res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
> if (!res->lksb.sb_lvbptr) {
> pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name);
> - goto out_err;
> + goto out_err_lvb;
> }
> res->flags = DLM_LKF_VALBLK;
> }
> @@ -159,7 +159,9 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
> return res;
> out_err:
> kfree(res->lksb.sb_lvbptr);
> +out_err_lvb:
> kfree(res->name);
> +out_err_name:
> kfree(res);
> return NULL;
> }
> @@ -627,7 +629,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
> char str[64];
> sector_t lo, hi;
>
> -
> for (i = 0; i < total_slots; i++) {
> memset(str, '\0', 64);
> snprintf(str, 64, "bitmap%04d", i);
>
--
Goldwyn
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 04/12] md-cluster: fix deadlock issue on message lock
2015-07-10 9:01 ` [PATCH 04/12] md-cluster: fix deadlock issue on message lock Guoqing Jiang
` (7 preceding siblings ...)
2015-07-10 9:01 ` [PATCH 12/12] md-cluster: handle error situations more precisely in lockres_init Guoqing Jiang
@ 2015-07-27 16:25 ` Goldwyn Rodrigues
8 siblings, 0 replies; 29+ messages in thread
From: Goldwyn Rodrigues @ 2015-07-27 16:25 UTC (permalink / raw)
To: Guoqing Jiang, neilb; +Cc: linux-raid
On 07/10/2015 04:01 AM, Guoqing Jiang wrote:
> There is problem with previous communication mechanism, and we got below
> deadlock scenario with cluster which has 3 nodes.
>
> Sender Receiver Receiver
>
> token(EX)
> message(EX)
> writes message
> downconverts message(CR)
> requests ack(EX)
> get message(CR) gets message(CR)
> reads message reads message
> requests EX on message requests EX on message
>
> To fix this problem, we do the following changes:
>
> 1. the sender downconverts MESSAGE to CW rather than CR.
> 2. and the receiver request PR lock not EX lock on message.
>
> And in case we failed to down-convert EX to CW on message, it is better to
> unlock message otherthan still hold the lock.
>
> Signed-off-by: Lidong Zhong <ldzhong@suse.com>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
> Documentation/md-cluster.txt | 4 ++--
> drivers/md/md-cluster.c | 14 +++++++-------
> 2 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/Documentation/md-cluster.txt b/Documentation/md-cluster.txt
> index de1af7d..1b79436 100644
> --- a/Documentation/md-cluster.txt
> +++ b/Documentation/md-cluster.txt
> @@ -91,7 +91,7 @@ The algorithm is:
> this message inappropriate or redundant.
>
> 3. sender write LVB.
> - sender down-convert MESSAGE from EX to CR
> + sender down-convert MESSAGE from EX to CW
> sender try to get EX of ACK
> [ wait until all receiver has *processed* the MESSAGE ]
>
> @@ -112,7 +112,7 @@ The algorithm is:
> sender down-convert ACK from EX to CR
> sender release MESSAGE
> sender release TOKEN
> - receiver upconvert to EX of MESSAGE
> + receiver upconvert to PR of MESSAGE
> receiver get CR of ACK
> receiver release MESSAGE
>
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 47199ad..85b7836 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -488,8 +488,8 @@ static void recv_daemon(struct md_thread *thread)
>
> /*release CR on ack_lockres*/
> dlm_unlock_sync(ack_lockres);
> - /*up-convert to EX on message_lockres*/
> - dlm_lock_sync(message_lockres, DLM_LOCK_EX);
> + /*up-convert to PR on message_lockres*/
> + dlm_lock_sync(message_lockres, DLM_LOCK_PR);
> /*get CR on ack_lockres again*/
> dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
> /*release CR on message_lockres*/
> @@ -522,7 +522,7 @@ static void unlock_comm(struct md_cluster_info *cinfo)
> * The function:
> * 1. Grabs the message lockresource in EX mode
> * 2. Copies the message to the message LVB
> - * 3. Downconverts message lockresource to CR
> + * 3. Downconverts message lockresource to CW
> * 4. Upconverts ack lock resource from CR to EX. This forces the BAST on other nodes
> * and the other nodes read the message. The thread will wait here until all other
> * nodes have released ack lock resource.
> @@ -543,12 +543,12 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
>
> memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
> sizeof(struct cluster_msg));
> - /*down-convert EX to CR on Message*/
> - error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CR);
> + /*down-convert EX to CW on Message*/
> + error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CW);
> if (error) {
> - pr_err("md-cluster: failed to convert EX to CR on MESSAGE(%d)\n",
> + pr_err("md-cluster: failed to convert EX to CW on MESSAGE(%d)\n",
> error);
> - goto failed_message;
> + goto failed_ack;
> }
>
> /*up-convert CR to EX on Ack*/
>
--
Goldwyn
^ permalink raw reply [flat|nested] 29+ messages in thread