From mboxrd@z Thu Jan 1 00:00:00 1970 From: pcaulfield@sourceware.org Date: 2 Aug 2006 11:54:38 -0000 Subject: [Cluster-devel] cluster/cman/daemon ais.c cmanccs.c commands.c Message-ID: <20060802115438.8571.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: pcaulfield at sourceware.org 2006-08-02 11:54:37 Modified files: cman/daemon : ais.c cmanccs.c commands.c Log message: if we can't get the latest config from CCS, poll it until we do. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.c.diff?cvsroot=cluster&r1=1.33&r2=1.34 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cmanccs.c.diff?cvsroot=cluster&r1=1.17&r2=1.18 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/commands.c.diff?cvsroot=cluster&r1=1.45&r2=1.46 --- cluster/cman/daemon/ais.c 2006/07/21 12:25:21 1.33 +++ cluster/cman/daemon/ais.c 2006/08/02 11:54:36 1.34 @@ -46,6 +46,7 @@ extern char cluster_name[MAX_CLUSTER_NAME_LEN+1]; extern char *key_filename; extern unsigned int quorumdev_poll; +extern unsigned int ccsd_poll_interval; extern unsigned int shutdown_timeout; extern int init_config(struct objdb_iface_ver0 *objdb); @@ -239,6 +240,7 @@ { objdb_get_int(objdb, object_handle, "quorum_dev_poll", &quorumdev_poll); objdb_get_int(objdb, object_handle, "shutdown_timeout", &shutdown_timeout); + objdb_get_int(objdb, object_handle, "ccsd_poll", &ccsd_poll_interval); /* Only use the CCS version of this if it was not overridden on the command-line */ if (!getenv("CMAN_DEBUGLOG")) --- cluster/cman/daemon/cmanccs.c 2006/07/03 08:51:10 1.17 +++ cluster/cman/daemon/cmanccs.c 2006/08/02 11:54:36 1.18 @@ -107,14 +107,6 @@ if (!ccs_get(ctree, CONFIG_VERSION_PATH, &str)) { config = atoi(str); free(str); - - /* config_version is zero at startup when we read initial config */ - if (*config_version && config != *config_version) { - ccs_disconnect(ctree); - log_msg(LOG_ERR, "CCS version is %d, we expected %d. config not updated\n", - config, *config_version); - return -1; - } *config_version = config; } --- cluster/cman/daemon/commands.c 2006/07/21 12:25:21 1.45 +++ cluster/cman/daemon/commands.c 2006/08/02 11:54:36 1.46 @@ -63,6 +63,7 @@ static int two_node; unsigned int quorumdev_poll=10000; unsigned int shutdown_timeout=5000; + unsigned int ccsd_poll_interval=1000; static int cluster_is_quorate; char cluster_name[MAX_CLUSTER_NAME_LEN+1]; static char nodename[MAX_CLUSTER_MEMBER_NAME_LEN+1]; @@ -73,6 +74,11 @@ static int ais_running; static poll_timer_handle quorum_device_timer; +/* If CCS gets out of sync, we poll it until it isn't */ +static poll_timer_handle ccsd_timer; +static unsigned int wanted_config_version; +static int config_error; + static poll_timer_handle shutdown_timer; static struct connection *shutdown_con; static uint32_t shutdown_flags; @@ -128,7 +134,7 @@ { int quorate; - if (quorum > total_votes) { + if (quorum > total_votes || config_error) { quorate = 0; } else { @@ -457,7 +463,7 @@ einfo->flags = 0; if (two_node) einfo->flags |= CMAN_EXTRA_FLAG_2NODE; - if (us->expected_votes == INT_MAX) + if (config_error) einfo->flags |= CMAN_EXTRA_FLAG_ERROR; if (shutdown_con) einfo->flags |= CMAN_EXTRA_FLAG_SHUTDOWN; @@ -962,6 +968,27 @@ return 0; } +static void ccsd_timer_fn(void *arg) +{ + int ccs_err; + + log_msg(LOG_DEBUG, "Polling ccsd for updated information\n"); + ccs_err = read_ccs_nodes(&config_version); + if (ccs_err || config_version < wanted_config_version) { + log_msg(LOG_ERR, "Can't read CCS to get updated config version %d. Activity suspended on this node\n", + wanted_config_version); + + poll_timer_add(ais_poll_handle, ccsd_poll_interval, NULL, + ccsd_timer_fn, &ccsd_timer); + } + else { + log_msg(LOG_ERR, "Now got CCS information version %d, continuing\n", config_version); + config_error = 0; + recalculate_quorum(0); + } +} + + static void quorum_device_timer_fn(void *arg) { struct timeval now; @@ -1352,20 +1379,20 @@ static int valid_transition_msg(int nodeid, struct cl_transmsg *msg) { if (strcmp(msg->clustername, cluster_name) != 0) { - log_msg(LOG_ERR, "Node %d refused, remote cluster name='%s', local='%s'\n", + log_msg(LOG_ERR, "Node %d conflict, remote cluster name='%s', local='%s'\n", nodeid, msg->clustername, cluster_name); return -1; } if (msg->cluster_id != cluster_id) { - log_msg(LOG_ERR, "Node %d refused, remote cluster id=%d, local=%d\n", + log_msg(LOG_ERR, "Node %d conflict, remote cluster id=%d, local=%d\n", nodeid, msg->cluster_id, cluster_id); return -1; } if (msg->major_version != CNXMAN_MAJOR_VERSION) { - log_msg(LOG_ERR, "Node %d refused, remote version id=%d, local=%d\n", + log_msg(LOG_ERR, "Node %d conflict, remote version id=%d, local=%d\n", nodeid, msg->major_version, CNXMAN_MAJOR_VERSION); return -1; } @@ -1376,9 +1403,13 @@ ccs_err = read_ccs_nodes(&config_version); if (ccs_err || config_version < msg->config_version) { - us->expected_votes = INT_MAX; /* Force us to stop */ + config_error = 1; log_msg(LOG_ERR, "Can't read CCS to get updated config version %d. Activity suspended on this node\n", msg->config_version); + + wanted_config_version = msg->config_version; + poll_timer_add(ais_poll_handle, ccsd_poll_interval, NULL, + ccsd_timer_fn, &ccsd_timer); } if (config_version > msg->config_version) { // TODO tell everyone else to update... @@ -1388,7 +1419,7 @@ if (msg->config_version != config_version) { - log_msg(LOG_ERR, "Node %d refused, remote config version id=%d, local=%d\n", + log_msg(LOG_ERR, "Node %d conflict, remote config version id=%d, local=%d\n", nodeid, msg->config_version, config_version); return -1; } @@ -1528,8 +1559,12 @@ log_msg(LOG_ERR, "Can't read CCS to get updated config version %d. Activity suspended on this node\n", msg->value); - us->expected_votes = INT_MAX; /* Force us to stop */ + config_error = 1; recalculate_quorum(0); + + wanted_config_version = config_version; + poll_timer_add(ais_poll_handle, ccsd_poll_interval, NULL, + ccsd_timer_fn, &ccsd_timer); } break; }