From mboxrd@z Thu Jan 1 00:00:00 1970 From: teigland@sourceware.org Date: 28 Jun 2006 22:16:40 -0000 Subject: [Cluster-devel] cluster/group/daemon joinleave.c gd_internal.h ... Message-ID: <20060628221640.10023.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: teigland at sourceware.org 2006-06-28 22:16:37 Modified files: group/daemon : joinleave.c gd_internal.h app.c Log message: - extra checking and debugging when events get backlogged - prevent joins while we're still leaving and leaves while we're still joining Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/joinleave.c.diff?cvsroot=cluster&r1=1.16&r2=1.17 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/gd_internal.h.diff?cvsroot=cluster&r1=1.35&r2=1.36 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/app.c.diff?cvsroot=cluster&r1=1.33&r2=1.34 --- cluster/group/daemon/joinleave.c 2006/06/21 20:43:54 1.16 +++ cluster/group/daemon/joinleave.c 2006/06/28 22:16:36 1.17 @@ -93,7 +93,7 @@ a->client = ci; log_debug("%d:%s got join", level, name); - + g->joining = 1; rv = do_cpg_join(g); out: return rv; @@ -102,6 +102,7 @@ int do_leave(char *name, int level) { group_t *g; + event_t *ev; int rv; g = find_group_level(name, level); @@ -113,8 +114,30 @@ return -EINVAL; } - log_debug("%d:%s got leave", level, name); + if (g->joining) { + log_group(g, "leave: still joining"); + return -EAGAIN; + } + + if (g->leaving) { + log_group(g, "leave: already leaving"); + return -EBUSY; + } + if (g->app->current_event && + g->app->current_event->nodeid == our_nodeid) { + log_group(g, "leave: busy event %llx state %s", + ev->id, ev_state_str(g->app->current_event)); + return -EAGAIN; + } + + list_for_each_entry(ev, &g->app->events, list) { + ASSERT(ev->nodeid != our_nodeid); + log_group(g, "event id %llx", ev->id); + } + + log_debug("%d:%s got leave", level, name); + g->leaving = 1; rv = do_cpg_leave(g); return rv; } --- cluster/group/daemon/gd_internal.h 2006/06/22 18:39:26 1.35 +++ cluster/group/daemon/gd_internal.h 2006/06/28 22:16:36 1.36 @@ -169,6 +169,8 @@ int cpg_fd; int cpg_client; int have_set_id; + int joining; + int leaving; }; struct app { --- cluster/group/daemon/app.c 2006/06/22 21:12:33 1.33 +++ cluster/group/daemon/app.c 2006/06/28 22:16:36 1.34 @@ -525,16 +525,52 @@ } } +event_t *search_event(group_t *g, int nodeid) +{ + event_t *ev; + + list_for_each_entry(ev, &g->app->events, list) { + if (ev->nodeid == nodeid) + return ev; + } + return NULL; +} + +void dump_queued_events(group_t *g) +{ + event_t *ev; + + list_for_each_entry(ev, &g->app->events, list) { + log_group(g, " queued ev %d %llx %s", + ev->nodeid, ev->id, ev_state_str(ev)); + } +} + int queue_app_join(group_t *g, int nodeid) { event_t *ev; + /* sanity check */ + ev = g->app->current_event; + if (ev && ev->nodeid == nodeid) { + log_group(g, "queue_app_join: current event %d %llx %s", + nodeid, ev->id, ev_state_str(ev)); + } + + /* sanity check */ + ev = search_event(g, nodeid); + if (ev) { + log_group(g, "queue_app_join: queued event %d %llx %s", + nodeid, ev->id, ev_state_str(ev)); + } + ev = create_event(g); ev->nodeid = nodeid; ev->state = EST_JOIN_BEGIN; ev->id = make_event_id(g, EST_JOIN_BEGIN, nodeid); log_group(g, "queue join event for nodeid %d", nodeid); + dump_queued_events(g); if (nodeid == our_nodeid) add_event_nodes(g, ev); @@ -547,12 +583,27 @@ { event_t *ev; + /* sanity check */ + ev = g->app->current_event; + if (ev && ev->nodeid == nodeid) { + log_group(g, "queue_app_leave: current event %d %llx %s", + nodeid, ev->id, ev_state_str(ev)); + } + + /* sanity check */ + ev = search_event(g, nodeid); + if (ev) { + log_group(g, "queue_app_leave: queued event %d %llx %s", + nodeid, ev->id, ev_state_str(ev)); + } + ev = create_event(g); ev->nodeid = nodeid; ev->state = EST_LEAVE_BEGIN; ev->id = make_event_id(g, EST_LEAVE_BEGIN, nodeid); log_group(g, "queue leave event for nodeid %d", nodeid); + dump_queued_events(g); list_add_tail(&ev->list, &g->app->events); return 0; @@ -845,8 +896,10 @@ case EST_JOIN_ALL_STARTED: app_finish(a); - if (is_our_join(ev)) + if (is_our_join(ev)) { purge_messages(g); + g->joining = 0; + } free_event(ev); a->current_event = NULL; rv = 1;