* [Ocfs2-devel] [PATCH 2/6] Add DLM recovery callbacks
@ 2013-10-18 14:45 Goldwyn Rodrigues
2013-11-03 22:16 ` Mark Fasheh
0 siblings, 1 reply; 8+ messages in thread
From: Goldwyn Rodrigues @ 2013-10-18 14:45 UTC (permalink / raw)
To: ocfs2-devel
These are the callbacks called by the fs/dlm code in case the membership
changes. If there is a failure while/during calling any of these, the
DLM creates a new membership and relays to the rest of the nodes.
recover_prep() is called when DLM understands a node is down.
recover_slot() is called once all nodes have acknowledged recover_prep
and recovery can begin.
recover_done() is called once the recovery is complete. It returns the
new membership.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/ocfs2/stack_user.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 286edf1..4111855 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -110,6 +110,8 @@
struct ocfs2_live_connection {
struct list_head oc_list;
struct ocfs2_cluster_connection *oc_conn;
+ atomic_t oc_this_node;
+ int oc_our_slot;
};
struct ocfs2_control_private {
@@ -799,6 +801,42 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
return 0;
}
+static void user_recover_prep(void *arg)
+{
+}
+
+static void user_recover_slot(void *arg, struct dlm_slot *slot)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n",
+ slot->nodeid, slot->slot);
+ conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data);
+
+}
+
+static void user_recover_done(void *arg, struct dlm_slot *slots,
+ int num_slots, int our_slot,
+ uint32_t generation)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ int i;
+
+ for (i = 0; i < num_slots; i++)
+ if (slots[i].slot == our_slot) {
+ atomic_set(&lc->oc_this_node, slots[i].nodeid);
+ break;
+ }
+
+ lc->oc_our_slot = our_slot;
+}
+
+const struct dlm_lockspace_ops ocfs2_ls_ops = {
+ .recover_prep = user_recover_prep,
+ .recover_slot = user_recover_slot,
+ .recover_done = user_recover_done,
+};
+
static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
{
dlm_lockspace_t *fsdlm;
--
1.8.1.4
--
Goldwyn
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Ocfs2-devel] [PATCH 2/6] Add DLM recovery callbacks
2013-10-18 14:45 [Ocfs2-devel] [PATCH 2/6] Add DLM recovery callbacks Goldwyn Rodrigues
@ 2013-11-03 22:16 ` Mark Fasheh
2013-11-04 3:45 ` Goldwyn Rodrigues
0 siblings, 1 reply; 8+ messages in thread
From: Mark Fasheh @ 2013-11-03 22:16 UTC (permalink / raw)
To: ocfs2-devel
This also looks good in that I don't see anything wrong with the way the
actual code functions. The extra description you put about the callbacks
helps in understanding this too. Question - can you explain why oc_this_node
is an atomic? That is to say I'd like to understand that race that you're
protecting against :)
--Mark
On Fri, Oct 18, 2013 at 09:45:25AM -0500, Goldwyn Rodrigues wrote:
> These are the callbacks called by the fs/dlm code in case the membership
> changes. If there is a failure while/during calling any of these, the
> DLM creates a new membership and relays to the rest of the nodes.
>
> recover_prep() is called when DLM understands a node is down.
> recover_slot() is called once all nodes have acknowledged recover_prep
> and recovery can begin.
> recover_done() is called once the recovery is complete. It returns the
> new membership.
>
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
> fs/ocfs2/stack_user.c | 38 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 38 insertions(+)
>
> diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
> index 286edf1..4111855 100644
> --- a/fs/ocfs2/stack_user.c
> +++ b/fs/ocfs2/stack_user.c
> @@ -110,6 +110,8 @@
> struct ocfs2_live_connection {
> struct list_head oc_list;
> struct ocfs2_cluster_connection *oc_conn;
> + atomic_t oc_this_node;
> + int oc_our_slot;
> };
>
> struct ocfs2_control_private {
> @@ -799,6 +801,42 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
> return 0;
> }
>
> +static void user_recover_prep(void *arg)
> +{
> +}
> +
> +static void user_recover_slot(void *arg, struct dlm_slot *slot)
> +{
> + struct ocfs2_cluster_connection *conn = arg;
> + printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n",
> + slot->nodeid, slot->slot);
> + conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data);
> +
> +}
> +
> +static void user_recover_done(void *arg, struct dlm_slot *slots,
> + int num_slots, int our_slot,
> + uint32_t generation)
> +{
> + struct ocfs2_cluster_connection *conn = arg;
> + struct ocfs2_live_connection *lc = conn->cc_private;
> + int i;
> +
> + for (i = 0; i < num_slots; i++)
> + if (slots[i].slot == our_slot) {
> + atomic_set(&lc->oc_this_node, slots[i].nodeid);
> + break;
> + }
> +
> + lc->oc_our_slot = our_slot;
> +}
> +
> +const struct dlm_lockspace_ops ocfs2_ls_ops = {
> + .recover_prep = user_recover_prep,
> + .recover_slot = user_recover_slot,
> + .recover_done = user_recover_done,
> +};
> +
> static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
> {
> dlm_lockspace_t *fsdlm;
> --
> 1.8.1.4
>
>
> --
> Goldwyn
--
Mark Fasheh
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Ocfs2-devel] [PATCH 2/6] Add DLM recovery callbacks
2013-11-03 22:16 ` Mark Fasheh
@ 2013-11-04 3:45 ` Goldwyn Rodrigues
2013-11-04 22:09 ` Mark Fasheh
0 siblings, 1 reply; 8+ messages in thread
From: Goldwyn Rodrigues @ 2013-11-04 3:45 UTC (permalink / raw)
To: ocfs2-devel
On 11/03/2013 04:16 PM, Mark Fasheh wrote:
> This also looks good in that I don't see anything wrong with the way the
> actual code functions. The extra description you put about the callbacks
> helps in understanding this too. Question - can you explain why oc_this_node
> is an atomic? That is to say I'd like to understand that race that you're
> protecting against :)
> --Mark
Ok, we may not require this to be atomic. The dlm thread sets up
oc_this_node via .recover_done(), but it should not matter because
this_node() functions are called after .cluster_connect().
>
> On Fri, Oct 18, 2013 at 09:45:25AM -0500, Goldwyn Rodrigues wrote:
>> These are the callbacks called by the fs/dlm code in case the membership
>> changes. If there is a failure while/during calling any of these, the
>> DLM creates a new membership and relays to the rest of the nodes.
>>
>> recover_prep() is called when DLM understands a node is down.
>> recover_slot() is called once all nodes have acknowledged recover_prep
>> and recovery can begin.
>> recover_done() is called once the recovery is complete. It returns the
>> new membership.
>>
>> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
>> ---
>> fs/ocfs2/stack_user.c | 38 ++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 38 insertions(+)
>>
>> diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
>> index 286edf1..4111855 100644
>> --- a/fs/ocfs2/stack_user.c
>> +++ b/fs/ocfs2/stack_user.c
>> @@ -110,6 +110,8 @@
>> struct ocfs2_live_connection {
>> struct list_head oc_list;
>> struct ocfs2_cluster_connection *oc_conn;
>> + atomic_t oc_this_node;
>> + int oc_our_slot;
>> };
>>
>> struct ocfs2_control_private {
>> @@ -799,6 +801,42 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
>> return 0;
>> }
>>
>> +static void user_recover_prep(void *arg)
>> +{
>> +}
>> +
>> +static void user_recover_slot(void *arg, struct dlm_slot *slot)
>> +{
>> + struct ocfs2_cluster_connection *conn = arg;
>> + printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n",
>> + slot->nodeid, slot->slot);
>> + conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data);
>> +
>> +}
>> +
>> +static void user_recover_done(void *arg, struct dlm_slot *slots,
>> + int num_slots, int our_slot,
>> + uint32_t generation)
>> +{
>> + struct ocfs2_cluster_connection *conn = arg;
>> + struct ocfs2_live_connection *lc = conn->cc_private;
>> + int i;
>> +
>> + for (i = 0; i < num_slots; i++)
>> + if (slots[i].slot == our_slot) {
>> + atomic_set(&lc->oc_this_node, slots[i].nodeid);
>> + break;
>> + }
>> +
>> + lc->oc_our_slot = our_slot;
>> +}
>> +
>> +const struct dlm_lockspace_ops ocfs2_ls_ops = {
>> + .recover_prep = user_recover_prep,
>> + .recover_slot = user_recover_slot,
>> + .recover_done = user_recover_done,
>> +};
>> +
>> static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
>> {
>> dlm_lockspace_t *fsdlm;
>> --
>> 1.8.1.4
>>
>>
>> --
>> Goldwyn
> --
> Mark Fasheh
>
--
Goldwyn
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Ocfs2-devel] [PATCH 2/6] Add DLM recovery callbacks
2013-11-04 3:45 ` Goldwyn Rodrigues
@ 2013-11-04 22:09 ` Mark Fasheh
2013-11-05 20:34 ` Goldwyn Rodrigues
0 siblings, 1 reply; 8+ messages in thread
From: Mark Fasheh @ 2013-11-04 22:09 UTC (permalink / raw)
To: ocfs2-devel
On Sun, Nov 03, 2013 at 09:45:09PM -0600, Goldwyn Rodrigues wrote:
> On 11/03/2013 04:16 PM, Mark Fasheh wrote:
> > This also looks good in that I don't see anything wrong with the way the
> > actual code functions. The extra description you put about the callbacks
> > helps in understanding this too. Question - can you explain why oc_this_node
> > is an atomic? That is to say I'd like to understand that race that you're
> > protecting against :)
> > --Mark
>
> Ok, we may not require this to be atomic. The dlm thread sets up
> oc_this_node via .recover_done(), but it should not matter because
> this_node() functions are called after .cluster_connect().
Ahh alright. And this is probably a silly question, but we know the dlm will
never call this code concurently?
--Mark
--
Mark Fasheh
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Ocfs2-devel] [PATCH 2/6] Add DLM recovery callbacks
2013-11-04 22:09 ` Mark Fasheh
@ 2013-11-05 20:34 ` Goldwyn Rodrigues
0 siblings, 0 replies; 8+ messages in thread
From: Goldwyn Rodrigues @ 2013-11-05 20:34 UTC (permalink / raw)
To: ocfs2-devel
On 11/04/2013 04:09 PM, Mark Fasheh wrote:
> On Sun, Nov 03, 2013 at 09:45:09PM -0600, Goldwyn Rodrigues wrote:
>> On 11/03/2013 04:16 PM, Mark Fasheh wrote:
>>> This also looks good in that I don't see anything wrong with the way the
>>> actual code functions. The extra description you put about the callbacks
>>> helps in understanding this too. Question - can you explain why oc_this_node
>>> is an atomic? That is to say I'd like to understand that race that you're
>>> protecting against :)
>>> --Mark
>>
>> Ok, we may not require this to be atomic. The dlm thread sets up
>> oc_this_node via .recover_done(), but it should not matter because
>> this_node() functions are called after .cluster_connect().
>
> Ahh alright. And this is probably a silly question, but we know the dlm will
> never call this code concurently?
It may.. in case of failure. However, the node number will not change
unless the user has forced it. So, I suppose it is best to keep it
atomic. I think thats the reason I had kept it atomic initially.
--
Goldwyn
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Ocfs2-devel] [PATCH 2/6] Add DLM recovery callbacks
@ 2013-11-12 14:06 Goldwyn Rodrigues
2013-11-25 23:00 ` Mark Fasheh
0 siblings, 1 reply; 8+ messages in thread
From: Goldwyn Rodrigues @ 2013-11-12 14:06 UTC (permalink / raw)
To: ocfs2-devel
These are the callbacks called by the fs/dlm code in case the membership
changes. If there is a failure while/during calling any of these, the
DLM creates a new membership and relays to the rest of the nodes.
recover_prep() is called when DLM understands a node is down.
recover_slot() is called once all nodes have acknowledged recover_prep
and recovery can begin.
recover_done() is called once the recovery is complete. It returns the
new membership.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/ocfs2/stack_user.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 286edf1..4111855 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -110,6 +110,8 @@
struct ocfs2_live_connection {
struct list_head oc_list;
struct ocfs2_cluster_connection *oc_conn;
+ atomic_t oc_this_node;
+ int oc_our_slot;
};
struct ocfs2_control_private {
@@ -799,6 +801,42 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
return 0;
}
+static void user_recover_prep(void *arg)
+{
+}
+
+static void user_recover_slot(void *arg, struct dlm_slot *slot)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n",
+ slot->nodeid, slot->slot);
+ conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data);
+
+}
+
+static void user_recover_done(void *arg, struct dlm_slot *slots,
+ int num_slots, int our_slot,
+ uint32_t generation)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ int i;
+
+ for (i = 0; i < num_slots; i++)
+ if (slots[i].slot == our_slot) {
+ atomic_set(&lc->oc_this_node, slots[i].nodeid);
+ break;
+ }
+
+ lc->oc_our_slot = our_slot;
+}
+
+const struct dlm_lockspace_ops ocfs2_ls_ops = {
+ .recover_prep = user_recover_prep,
+ .recover_slot = user_recover_slot,
+ .recover_done = user_recover_done,
+};
+
static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
{
dlm_lockspace_t *fsdlm;
--
1.8.1.4
--
Goldwyn
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Ocfs2-devel] [PATCH 2/6] Add DLM recovery callbacks
2013-11-12 14:06 Goldwyn Rodrigues
@ 2013-11-25 23:00 ` Mark Fasheh
0 siblings, 0 replies; 8+ messages in thread
From: Mark Fasheh @ 2013-11-25 23:00 UTC (permalink / raw)
To: ocfs2-devel
On Tue, Nov 12, 2013 at 08:06:59AM -0600, Goldwyn Rodrigues wrote:
> These are the callbacks called by the fs/dlm code in case the membership
> changes. If there is a failure while/during calling any of these, the
> DLM creates a new membership and relays to the rest of the nodes.
>
> recover_prep() is called when DLM understands a node is down.
> recover_slot() is called once all nodes have acknowledged recover_prep
> and recovery can begin.
> recover_done() is called once the recovery is complete. It returns the
> new membership.
>
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Looks good, I didn't have anything last time, nothing still this time so:
Reviewed-by: Mark Fasheh <mfasheh@suse.com>
--Mark
--
Mark Fasheh
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Ocfs2-devel] [PATCH 2/6] Add DLM recovery callbacks
@ 2013-12-09 19:41 Goldwyn Rodrigues
0 siblings, 0 replies; 8+ messages in thread
From: Goldwyn Rodrigues @ 2013-12-09 19:41 UTC (permalink / raw)
To: ocfs2-devel
These are the callbacks called by the fs/dlm code in case the membership
changes. If there is a failure while/during calling any of these, the
DLM creates a new membership and relays to the rest of the nodes.
recover_prep() is called when DLM understands a node is down.
recover_slot() is called once all nodes have acknowledged recover_prep
and recovery can begin.
recover_done() is called once the recovery is complete. It returns the
new membership.
Reviewed-by: Mark Fasheh <mfasheh@suse.de>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/ocfs2/stack_user.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 286edf1..4111855 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -110,6 +110,8 @@
struct ocfs2_live_connection {
struct list_head oc_list;
struct ocfs2_cluster_connection *oc_conn;
+ atomic_t oc_this_node;
+ int oc_our_slot;
};
struct ocfs2_control_private {
@@ -799,6 +801,42 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
return 0;
}
+static void user_recover_prep(void *arg)
+{
+}
+
+static void user_recover_slot(void *arg, struct dlm_slot *slot)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n",
+ slot->nodeid, slot->slot);
+ conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data);
+
+}
+
+static void user_recover_done(void *arg, struct dlm_slot *slots,
+ int num_slots, int our_slot,
+ uint32_t generation)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ int i;
+
+ for (i = 0; i < num_slots; i++)
+ if (slots[i].slot == our_slot) {
+ atomic_set(&lc->oc_this_node, slots[i].nodeid);
+ break;
+ }
+
+ lc->oc_our_slot = our_slot;
+}
+
+const struct dlm_lockspace_ops ocfs2_ls_ops = {
+ .recover_prep = user_recover_prep,
+ .recover_slot = user_recover_slot,
+ .recover_done = user_recover_done,
+};
+
static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
{
dlm_lockspace_t *fsdlm;
--
1.8.4
--
Goldwyn
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2013-12-09 19:41 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-10-18 14:45 [Ocfs2-devel] [PATCH 2/6] Add DLM recovery callbacks Goldwyn Rodrigues
2013-11-03 22:16 ` Mark Fasheh
2013-11-04 3:45 ` Goldwyn Rodrigues
2013-11-04 22:09 ` Mark Fasheh
2013-11-05 20:34 ` Goldwyn Rodrigues
-- strict thread matches above, loose matches on Subject: below --
2013-11-12 14:06 Goldwyn Rodrigues
2013-11-25 23:00 ` Mark Fasheh
2013-12-09 19:41 Goldwyn Rodrigues
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.