From: teigland@sourceware.org <teigland@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/group/daemon app.c cpg.c gd_internal.h ...
Date: 26 Sep 2006 19:17:22 -0000 [thread overview]
Message-ID: <20060926191722.21656.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: teigland at sourceware.org 2006-09-26 19:17:21
Modified files:
group/daemon : app.c cpg.c gd_internal.h joinleave.c main.c
Log message:
Add debugging in four areas to help us know more quickly when something
might be wrong at the cpg level:
- log if cpg flow control goes on
- log when we're waiting to receive a cpg event for our own join
- when we're in a FOO_STOP_WAIT or FOO_START_WAIT state, log how
many more cpg messages we're waiting to receive before moving on
to the next state
- save the event id of the last cpg message we sent, and clear that
value when we receive that message back (this value is printed to
the debug log when someone runs group_tool, not shown in the
group_tool output)
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/app.c.diff?cvsroot=cluster&r1=1.45&r2=1.46
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/cpg.c.diff?cvsroot=cluster&r1=1.31&r2=1.32
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/gd_internal.h.diff?cvsroot=cluster&r1=1.40&r2=1.41
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/joinleave.c.diff?cvsroot=cluster&r1=1.17&r2=1.18
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/daemon/main.c.diff?cvsroot=cluster&r1=1.44&r2=1.45
--- cluster/group/daemon/app.c 2006/09/15 20:07:15 1.45
+++ cluster/group/daemon/app.c 2006/09/26 19:17:20 1.46
@@ -692,6 +692,7 @@
msg_bswap_out(&msg);
log_group(g, "send stopped");
+ g->app->sent_event_id = ev->id;
return send_message_groupd(g, &msg, sizeof(msg));
}
@@ -710,6 +711,7 @@
msg_bswap_out(&msg);
log_group(g, "send started");
+ g->app->sent_event_id = ev->id;
return send_message_groupd(g, &msg, sizeof(msg));
}
@@ -788,7 +790,6 @@
}
}
-#if 0
static int count_nodes_not_stopped(app_t *a)
{
node_t *node;
@@ -800,7 +801,6 @@
}
return i;
}
-#endif
int event_state_begin(app_t *a)
{
@@ -853,7 +853,7 @@
event_t *ev = a->current_event;
node_t *node, *n;
struct nodeid *id;
- int rv = 0, do_start = 0;
+ int rv = 0, do_start = 0, count;
if (!(event_state_stopping(a) || event_state_starting(a)))
log_group(g, "process_current_event %llx %d %s",
@@ -904,10 +904,9 @@
break;
case EST_JOIN_STOP_WAIT:
- /*
count = count_nodes_not_stopped(a);
- log_group(g, "waiting for %d more nodes to be stopped", count);
- */
+ log_group(g, "waiting for %d more stopped messages "
+ "before JOIN_ALL_STOPPED", count);
break;
case EST_JOIN_ALL_STOPPED:
@@ -939,10 +938,9 @@
break;
case EST_LEAVE_STOP_WAIT:
- /*
count = count_nodes_not_stopped(a);
- log_group(g, "waiting for %d more nodes to be stopped", count);
- */
+ log_group(g, "waiting for %d more stopped messages "
+ "before LEAVE_ALL_STOPPED", count);
break;
case EST_LEAVE_ALL_STOPPED:
@@ -993,10 +991,9 @@
break;
case EST_FAIL_STOP_WAIT:
- /*
count = count_nodes_not_stopped(a);
- log_group(g, "waiting for %d more nodes to be stopped", count);
- */
+ log_group(g, "waiting for %d more stopped messages "
+ "before FAIL_ALL_STOPPED", count);
break;
case EST_FAIL_ALL_STOPPED:
@@ -1470,8 +1467,11 @@
}
if (ev) {
+ a->need_first_event = 0;
a->current_event = ev;
rv = process_current_event(g);
+ } else if (a->need_first_event) {
+ log_group(g, "waiting for our own cpg join event");
}
}
out:
--- cluster/group/daemon/cpg.c 2006/09/08 23:14:56 1.31
+++ cluster/group/daemon/cpg.c 2006/09/26 19:17:20 1.32
@@ -20,6 +20,7 @@
static int saved_left_count;
static cpg_handle_t saved_handle;
static struct cpg_name saved_name;
+static int message_flow_control_on;
static node_t *find_group_node(group_t *g, int nodeid)
@@ -246,6 +247,9 @@
msg_type(msg->ms_type));
*/
+ if (nodeid == our_nodeid && g->app->sent_event_id == msg->ms_event_id)
+ g->app->sent_event_id = 0;
+
save = malloc(sizeof(struct save_msg));
memset(save, 0, sizeof(struct save_msg));
save->nodeid = nodeid;
@@ -375,6 +379,7 @@
cpg_error_t error;
cpg_handle_t handle;
int found = 0;
+ cpg_flow_control_state_t flow_control_state;
if (ci == groupd_ci) {
handle = groupd_handle;
@@ -404,6 +409,18 @@
return;
}
+ error = cpg_flow_control_state_get(handle, &flow_control_state);
+ if (error != CPG_OK)
+ log_error(g, "cpg_flow_control_state_get %d", error);
+ else if (flow_control_state == CPG_FLOW_CONTROL_ENABLED) {
+ message_flow_control_on = 1;
+ log_debug("flow control on");
+ } else {
+ if (message_flow_control_on)
+ log_debug("flow control off");
+ message_flow_control_on = 0;
+ }
+
if (got_confchg)
process_confchg();
}
--- cluster/group/daemon/gd_internal.h 2006/09/15 18:20:36 1.40
+++ cluster/group/daemon/gd_internal.h 2006/09/26 19:17:21 1.41
@@ -180,6 +180,8 @@
struct list_head events;
event_t *current_event;
group_t *g;
+ uint64_t sent_event_id; /* for debugging */
+ int need_first_event; /* for debugging */
};
#define MSG_APP_STOPPED 1
--- cluster/group/daemon/joinleave.c 2006/06/28 22:16:36 1.17
+++ cluster/group/daemon/joinleave.c 2006/09/26 19:17:21 1.18
@@ -63,6 +63,7 @@
a = malloc(sizeof(app_t));
memset(a, 0, sizeof(app_t));
+ a->need_first_event = 1;
INIT_LIST_HEAD(&a->nodes);
INIT_LIST_HEAD(&a->events);
a->g = g;
--- cluster/group/daemon/main.c 2006/09/15 18:20:36 1.44
+++ cluster/group/daemon/main.c 2006/09/26 19:17:21 1.45
@@ -462,6 +462,10 @@
else
data->event_local_status = -1;
}
+
+ if (g->app->sent_event_id)
+ log_group(g, "sent_event_id %llx",
+ g->app->sent_event_id);
}
data->member_count = g->app->node_count;
next reply other threads:[~2006-09-26 19:17 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-09-26 19:17 teigland [this message]
-- strict thread matches above, loose matches on Subject: below --
2006-09-26 21:32 [Cluster-devel] cluster/group/daemon app.c cpg.c gd_internal.h teigland
2006-10-06 16:55 teigland
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060926191722.21656.qmail@sourceware.org \
--to=teigland@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).