From mboxrd@z Thu Jan 1 00:00:00 1970 From: teigland@sourceware.org Date: 31 Aug 2006 18:20:51 -0000 Subject: [Cluster-devel] cluster/group/daemon app.c Message-ID: <20060831182051.27530.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: teigland at sourceware.org 2006-08-31 18:20:51 Modified files: group/daemon : app.c Log message: when we set a recovery event back to the FAIL_BEGIN state, make sure that we process the event once before processing any new messages. this is probably a better fix for bz 202635 than I added previously where we accept messages more liberally i.e. in X_BEGIN states. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/app.c.diff?cvsroot=cluster&r1=1.41&r2=1.42 --- cluster/group/daemon/app.c 2006/08/22 14:36:20 1.41 +++ cluster/group/daemon/app.c 2006/08/31 18:20:51 1.42 @@ -1316,6 +1316,7 @@ event_t *ev, *rev; node_t *node; struct nodeid *id, *safe; + int rv = 0; ev = a->current_event; if (!ev) @@ -1361,7 +1362,7 @@ list_del(&rev->list); free_event(rev); - return 0; + return 1; } /* Before starting the rev we need to apply the node addition/removal @@ -1383,7 +1384,7 @@ list_del(&rev->list); a->current_event = rev; free_event(ev); - + rv = 1; } else if (event_state_stopping(a)) { /* We'll come back through here multiple times until all the @@ -1397,9 +1398,7 @@ mark_node_stopped(a, rev->nodeid); list_for_each_entry(id, &rev->extended, list) mark_node_stopped(a, id->nodeid); - - process_current_event(g); - + rv = 1; } else { log_group(g, "rev for %d delayed for ev %d %s", rev->nodeid, ev->nodeid, ev_state_str(ev)); @@ -1411,7 +1410,7 @@ /* FIXME: if the current event is a leave and the leaving node has failed, then replace the current event with the rev */ - return 0; + return rv; } static int process_app(group_t *g) @@ -1428,7 +1427,18 @@ goto out; rv += ret; - rv += recover_current_event(g); + ret = recover_current_event(g); + if (ret > 0) { + rv += ret; + + /* it's important that we call process_current_event() + when recover_current_event() returns 1 */ + + ret = process_current_event(g); + if (ret < 0) + goto out; + rv += ret; + } } else { /* We only take on a new non-recovery event if there are no recovery sets outstanding. The new event may be