From: teigland@sourceware.org <teigland@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/group/daemon app.c cpg.c gd_internal.h
Date: 21 Jun 2006 18:10:24 -0000 [thread overview]
Message-ID: <20060621181024.5387.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: teigland at sourceware.org 2006-06-21 18:10:23
Modified files:
group/daemon : app.c cpg.c gd_internal.h
Log message:
Don't finalize/terminate a local group leave until we see that all
remaining group members have stopped.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/app.c.diff?cvsroot=cluster&r1=1.29&r2=1.30
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/cpg.c.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/gd_internal.h.diff?cvsroot=cluster&r1=1.32&r2=1.33
--- cluster/group/daemon/app.c 2006/06/20 20:26:08 1.29
+++ cluster/group/daemon/app.c 2006/06/21 18:10:23 1.30
@@ -592,9 +592,11 @@
return (ev->nodeid == our_nodeid);
}
-/* called after the local app has acked that it is stopped as part
- of our own leave. We've gotten the final confchg for our leave
- so we can't send anything out to the group at this point. */
+/* Called after all nodes have acked that they're stopped for our
+ leave. We get their stopped messages even though we've left the
+ cpg because the messages are sent through the groupd cpg.
+ groupd_down() will fill in stops for us for nodes that fail before
+ sending stopped for our leave. */
void finalize_our_leave(group_t *g)
{
@@ -620,15 +622,6 @@
msg_t msg;
event_t *ev = g->app->current_event;
- /* FIXME: see other fixme that mentions that leaving nodes
- should also send a stopped message to be counted by the
- remaining nodes before they move on to restarted */
-
- if (ev && ev->state == EST_LEAVE_STOP_WAIT && is_our_leave(ev)) {
- finalize_our_leave(g);
- return 0;
- }
-
memset(&msg, 0, sizeof(msg));
msg.ms_type = MSG_APP_STOPPED;
msg.ms_global_id = g->global_id;
@@ -855,18 +848,6 @@
case EST_LEAVE_BEGIN:
ev->state = EST_LEAVE_STOP_WAIT;
app_stop(a);
-
- /* FIXME: have leaving node send a stopped message after
- the app acks that it's stopped, and then make the
- other nodes wait for this stopped message instead of
- just setting the leaving node as stopped here */
-
- if (!is_our_leave(ev)) {
- node = find_app_node(a, ev->nodeid);
- ASSERT(node);
- node->stopped = 1;
- }
-
break;
case EST_LEAVE_STOP_WAIT:
@@ -877,6 +858,12 @@
break;
case EST_LEAVE_ALL_STOPPED:
+ if (is_our_leave(ev)) {
+ /* frees group structure */
+ finalize_our_leave(g);
+ rv = -1;
+ break;
+ }
ev->state = EST_LEAVE_START_WAIT;
node = find_app_node(a, ev->nodeid);
@@ -1358,14 +1345,16 @@
{
app_t *a = g->app;
event_t *ev = NULL;
- int rv = 0;
+ int rv = 0, ret;
if (a->current_event) {
- /* this assumes that we never remove/free the group in
- process_current_event */
-
rv += process_app_messages(g);
- rv += process_current_event(g);
+
+ ret = process_current_event(g);
+ if (ret < 0)
+ goto out;
+ rv += ret;
+
rv += recover_current_event(g);
} else {
/* We only take on a new non-recovery event if there are
@@ -1407,3 +1396,30 @@
return rv;
}
+/* This is a bit of a hack that may not be entirely necessary. The problem
+ we're solving with this function is when a node leaves a group and is
+ collecting all the "stopped" messages from the remaining members, some
+ of those members may fail, so we wouldn't get a stopped message from
+ them and never finalize_our_leave (terminate the group). I'm not entirely
+ sure that we _need_ to wait for stopped messages from remaining members
+ before we do the finalize_our_leave/terminate... The reasoning@this
+ point is that when gfs is withdrawing, we want to be sure gfs is
+ suspended everywhere before we leave the lockspace (which happens at
+ terminate for the withdraw/leave) */
+
+void groupd_down(int nodeid)
+{
+ group_t *g;
+
+ list_for_each_entry(g, &gd_groups, list) {
+ if (g->app &&
+ g->app->current_event &&
+ g->app->current_event->state == EST_LEAVE_STOP_WAIT &&
+ is_our_leave(g->app->current_event)) {
+ log_group(g, "groupd down on %d, push our leave",
+ nodeid);
+ mark_node_stopped(g->app, nodeid);
+ }
+ }
+}
+
--- cluster/group/daemon/cpg.c 2006/06/20 20:26:08 1.24
+++ cluster/group/daemon/cpg.c 2006/06/21 18:10:23 1.25
@@ -171,8 +171,10 @@
where groupd exits but cman is still running. */
for (i = 0; i < saved_left_count; i++) {
- if (saved_left[i].reason != CPG_REASON_LEAVE)
+ if (saved_left[i].reason != CPG_REASON_LEAVE) {
add_recovery_set(saved_left[i].nodeId);
+ groupd_down(saved_left[i].nodeId);
+ }
}
}
--- cluster/group/daemon/gd_internal.h 2006/06/20 20:26:08 1.32
+++ cluster/group/daemon/gd_internal.h 2006/06/21 18:10:23 1.33
@@ -248,6 +248,7 @@
void msg_bswap_out(msg_t *msg);
void msg_bswap_in(msg_t *msg);
struct recovery_set *get_recovery_set(int nodeid);
+void groupd_down(int nodeid);
/* main.c */
void app_stop(app_t *a);
next reply other threads:[~2006-06-21 18:10 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-06-21 18:10 teigland [this message]
-- strict thread matches above, loose matches on Subject: below --
2006-06-22 18:39 [Cluster-devel] cluster/group/daemon app.c cpg.c gd_internal.h teigland
2007-01-05 18:49 teigland
2007-01-05 18:50 teigland
2007-01-05 19:56 teigland
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060621181024.5387.qmail@sourceware.org \
--to=teigland@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).