From mboxrd@z Thu Jan 1 00:00:00 1970 From: pcaulfield@sourceware.org Date: 5 Oct 2006 07:48:34 -0000 Subject: [Cluster-devel] cluster/cman cman_tool/main.c daemon/ais.c dae ... Message-ID: <20061005074834.577.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: pcaulfield at sourceware.org 2006-10-05 07:48:33 Modified files: cman/cman_tool : main.c cman/daemon : ais.c ais.h cnxman-private.h cnxman-socket.h commands.c cman/lib : libcman.c libcman.h Log message: Add some extra semantics to CMAN to cope with openAIS rejoins. Basically, this adds an extra state to a node: AISONLY which is only cleared when cman receives a valid TRANSITION message from the node. A TRANSITION message is deemed to be invalid if the join_time of the node has not been changed (this is the timestamp the daemon was started) and the node has since been down and is rejoining. cman_tool will show if this is the case for a cluster by displaying the DisallowedNodes flag in the "cman_tool status command". If there are disallowed nodes in the cluster then the "cman_tool expected" command is disabled until those nodes have been removed. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/main.c.diff?cvsroot=cluster&r1=1.48&r2=1.49 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.c.diff?cvsroot=cluster&r1=1.41&r2=1.42 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.h.diff?cvsroot=cluster&r1=1.10&r2=1.11 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-private.h.diff?cvsroot=cluster&r1=1.24&r2=1.25 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-socket.h.diff?cvsroot=cluster&r1=1.16&r2=1.17 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/commands.c.diff?cvsroot=cluster&r1=1.50&r2=1.51 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/lib/libcman.c.diff?cvsroot=cluster&r1=1.27&r2=1.28 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/lib/libcman.h.diff?cvsroot=cluster&r1=1.28&r2=1.29 --- cluster/cman/cman_tool/main.c 2006/08/11 12:34:18 1.48 +++ cluster/cman/cman_tool/main.c 2006/10/05 07:48:33 1.49 @@ -232,6 +232,8 @@ printf(" Shutdown"); if (einfo->ei_flags & CMAN_EXTRA_FLAG_ERROR) printf(" Error"); + if (einfo->ei_flags & CMAN_EXTRA_FLAG_DISALLOWED) + printf(" DisallowedNodes"); printf(" \n"); printf("Ports Bound: "); --- cluster/cman/daemon/ais.c 2006/10/02 08:50:02 1.41 +++ cluster/cman/daemon/ais.c 2006/10/05 07:48:33 1.42 @@ -57,6 +57,7 @@ struct totem_ip_address ifaddrs[MAX_INTERFACES]; int num_interfaces; uint64_t incarnation; +int num_ais_nodes; static int config_run; static char errorstring[512]; @@ -411,12 +412,15 @@ P_AIS("confchg_fn called type = %d, seq=%lld\n", configuration_type, ring_id->seq); incarnation = ring_id->seq; + num_ais_nodes = member_list_entries; /* Tell the cman membership layer */ for (i=0; i #include #include +#include #include #include "list.h" #include "cnxman-socket.h" @@ -72,6 +73,7 @@ static struct cluster_node *quorum_device; static uint16_t cluster_id; static int ais_running; +static time_t join_time; static poll_timer_handle quorum_device_timer; /* If CCS gets out of sync, we poll it until it isn't */ @@ -94,6 +96,7 @@ static int send_port_enquire(int nodeid); static void process_internal_message(char *data, int len, int nodeid, int byteswap); static void recalculate_quorum(int allow_decrease); +static void send_kill(int nodeid, uint16_t reason); static void set_port_bit(struct cluster_node *node, uint8_t port) { @@ -146,6 +149,18 @@ if (!cluster_is_quorate && quorate) log_msg(LOG_INFO, "quorum regained, resuming activity\n"); + /* If we are newly quorate, then kill any AISONLY nodes */ + if (!cluster_is_quorate && quorate) { + struct cluster_node *node = NULL; + struct list *tmp; + + list_iterate(tmp, &cluster_members_list) { + node = list_item(tmp, struct cluster_node); + if (node->state == NODESTATE_AISONLY) + send_kill(node->node_id, CLUSTER_KILL_REJOIN); + } + } + cluster_is_quorate = quorate; } @@ -386,6 +401,7 @@ strcpy(nodename, un.nodename); } + time(&join_time); us = add_new_node(nodename, wanted_nodeid, -1, expected_votes, NODESTATE_MEMBER); set_port_bit(us, 0); @@ -424,6 +440,7 @@ int total_votes = 0; int max_expected = 0; int addrlen; + int uncounted = 0; struct cluster_node *node; struct sockaddr_storage *ss; char *ptr; @@ -437,6 +454,8 @@ total_votes += node->votes; max_expected = max(max_expected, node->expected_votes); } + if (node->state == NODESTATE_AISONLY) + uncounted = 1; } if (quorum_device && quorum_device->state == NODESTATE_MEMBER) total_votes += quorum_device->votes; @@ -467,6 +486,8 @@ einfo->flags |= CMAN_EXTRA_FLAG_ERROR; if (shutdown_con) einfo->flags |= CMAN_EXTRA_FLAG_SHUTDOWN; + if (uncounted) + einfo->flags |= CMAN_EXTRA_FLAG_UNCOUNTED; ptr = einfo->addresses; for (i=0; istate == NODESTATE_AISONLY) { + log_printf(LOG_NOTICE, "Attempt to set expected votes when cluster has AISONLY nodes in it."); + return -EINVAL; + } + } + memcpy(&newexp, cmdbuf, sizeof(int)); newquorum = calculate_quorum(1, newexp, &total_votes); @@ -647,7 +681,7 @@ if ((node = find_node_by_nodeid(nodeid)) == NULL) return -EINVAL; - if (node->state != NODESTATE_MEMBER) + if (node->state != NODESTATE_MEMBER && node->state != NODESTATE_AISONLY) return -EINVAL; node->leave_reason = CLUSTER_LEAVEFLAG_KILLED; @@ -1485,6 +1519,7 @@ msg->config_version = config_version; msg->flags = us->flags; msg->fence_time = us->fence_time; + msg->join_time = join_time; strcpy(msg->clustername, cluster_name); if (us->fence_agent) { @@ -1644,10 +1679,27 @@ node = find_node_by_nodeid(nodeid); assert(node); - if (node->flags & NODE_FLAGS_GOTTRANSITION) { - + /* This is the killer. If the join_time of the node matches that already stored AND + the node has been down, then we kill it as this must be a rejoin */ + if (msg->join_time == node->cman_join_time && node->flags & NODE_FLAGS_BEENDOWN) { + if (cluster_is_quorate) { + P_MEMB("Killing node %s because it has rejoined the cluster without cman_tool join", node->name); + log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster without cman_tool join", node->name); + send_kill(nodeid, CLUSTER_KILL_REJOIN); + } + else { + P_MEMB("Node %s not joined to cman because it has rejoined an inquorate cluster", node->name); + log_printf(LOG_CRIT, "Node %s not joined to cman because it has rejoined an inquorate cluster", node->name); + node->state = NODESTATE_AISONLY; + } + return; } - node->flags = msg->flags; + else { + node->cman_join_time = msg->join_time; + add_ais_node(nodeid, incarnation, num_ais_nodes); + } + + node->flags = msg->flags; /* This will clear the BEENDOWN flag of course */ if (node->fence_agent && msg->fence_agent[0] && strcmp(node->fence_agent, msg->fence_agent)) { free(node->fence_agent); @@ -1748,7 +1800,7 @@ node->leave_reason = leavemsg->reason; /* Mark it as leaving, and remove it when we get an AIS node down event for it */ - if (node && node->state == NODESTATE_MEMBER) + if (node && (node->state == NODESTATE_MEMBER || node->state == NODESTATE_AISONLY)) node->state = NODESTATE_LEAVING; break; @@ -1843,11 +1895,9 @@ node->name = strdup(tempname); } - node->incarnation = incarnation; - - gettimeofday(&node->join_time, NULL); - if (node->state == NODESTATE_DEAD) { + gettimeofday(&node->join_time, NULL); + node->incarnation = incarnation; node->state = NODESTATE_MEMBER; cluster_members++; recalculate_quorum(0); @@ -1874,6 +1924,7 @@ node->flags &= ~NODE_FLAGS_FENCED; node->flags &= ~NODE_FLAGS_FENCEDWHILEUP; + node->flags |= NODE_FLAGS_BEENDOWN; if (node->state == NODESTATE_MEMBER) { node->state = NODESTATE_DEAD; --- cluster/cman/lib/libcman.c 2006/08/24 10:40:57 1.27 +++ cluster/cman/lib/libcman.c 2006/10/05 07:48:33 1.28 @@ -45,7 +45,7 @@ int magic; int fd; int zero_fd; - void *private; + void *privdata; int want_reply; cman_callback_t event_callback; cman_datacallback_t data_callback; @@ -161,7 +161,7 @@ else { if (h->data_callback) - h->data_callback(h, h->private, + h->data_callback(h, h->privdata, buf+sizeof(*dmsg), msg->length-sizeof(*dmsg), dmsg->port, dmsg->nodeid); } @@ -203,14 +203,14 @@ { if (msg->command == CMAN_CMD_EVENT && h->event_callback) { struct sock_event_message *emsg = (struct sock_event_message *)msg; - h->event_callback(h, h->private, emsg->reason, emsg->arg); + h->event_callback(h, h->privdata, emsg->reason, emsg->arg); } if (msg->command == CMAN_CMD_CONFCHG && h->confchg_callback) { struct sock_confchg_message *cmsg = (struct sock_confchg_message *)msg; - h->confchg_callback(h, h->private, + h->confchg_callback(h, h->privdata, cmsg->entries,cmsg->member_entries, &cmsg->entries[cmsg->member_entries], cmsg->left_entries, &cmsg->entries[cmsg->member_entries+cmsg->left_entries], cmsg->joined_entries); @@ -287,7 +287,7 @@ return wait_for_reply(h, outbuf, outlen); } -static cman_handle_t open_socket(const char *name, int namelen, void *private) +static cman_handle_t open_socket(const char *name, int namelen, void *privdata) { struct cman_handle *h; struct sockaddr_un sockaddr; @@ -297,7 +297,7 @@ return NULL; h->magic = CMAN_MAGIC; - h->private = private; + h->privdata = privdata; h->event_callback = NULL; h->data_callback = NULL; h->confchg_callback = NULL; @@ -344,14 +344,14 @@ return (cman_handle_t)h; } -cman_handle_t cman_admin_init(void *private) +cman_handle_t cman_admin_init(void *privdata) { - return open_socket(ADMIN_SOCKNAME, sizeof(ADMIN_SOCKNAME), private); + return open_socket(ADMIN_SOCKNAME, sizeof(ADMIN_SOCKNAME), privdata); } -cman_handle_t cman_init(void *private) +cman_handle_t cman_init(void *privdata) { - return open_socket(CLIENT_SOCKNAME, sizeof(CLIENT_SOCKNAME), private); + return open_socket(CLIENT_SOCKNAME, sizeof(CLIENT_SOCKNAME), privdata); } int cman_finish(cman_handle_t handle) @@ -367,21 +367,21 @@ return 0; } -int cman_set_private(cman_handle_t *handle, void *private) +int cman_setprivdata(cman_handle_t *handle, void *privdata) { struct cman_handle *h = (struct cman_handle *)handle; VALIDATE_HANDLE(h); - h->private = private; + h->privdata = privdata; return 0; } -int cman_get_private(cman_handle_t *handle, void **private) +int cman_getprivdata(cman_handle_t *handle, void **privdata) { struct cman_handle *h = (struct cman_handle *)handle; VALIDATE_HANDLE(h); - *private = h->private; + *privdata = h->privdata; return 0; } @@ -631,6 +631,53 @@ return 0; } +int cman_get_disallowed_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes) +{ + struct cman_handle *h = (struct cman_handle *)handle; + struct cl_cluster_node *cman_nodes; + int status; + int buflen; + int count = 0; + int out_count = 0; + VALIDATE_HANDLE(h); + + if (!retnodes || !nodes || maxnodes < 1) + { + errno = EINVAL; + return -1; + } + + buflen = sizeof(struct cl_cluster_node) * maxnodes; + cman_nodes = malloc(buflen); + if (!cman_nodes) + return -1; + + status = info_call(h, CMAN_CMD_GETALLMEMBERS, NULL, 0, cman_nodes, buflen); + if (status < 0) + { + int saved_errno = errno; + free(cman_nodes); + errno = saved_errno; + return -1; + } + + if (cman_nodes[0].size != sizeof(struct cl_cluster_node)) + { + free(cman_nodes); + errno = EINVAL; + return -1; + } + + for (count = 0; count < status; count++) + { + if (cman_nodes[count].state == NODESTATE_AISONLY && out_count < maxnodes) + copy_node(&nodes[out_count++], &cman_nodes[count]); + } + free(cman_nodes); + *retnodes = out_count; + return 0; +} + int cman_get_node(cman_handle_t handle, int nodeid, cman_node_t *node) { struct cman_handle *h = (struct cman_handle *)handle; --- cluster/cman/lib/libcman.h 2006/09/01 08:47:40 1.28 +++ cluster/cman/lib/libcman.h 2006/10/05 07:48:33 1.29 @@ -150,9 +150,10 @@ */ /* Flags in ei_flags */ -#define CMAN_EXTRA_FLAG_2NODE 1 -#define CMAN_EXTRA_FLAG_ERROR 2 -#define CMAN_EXTRA_FLAG_SHUTDOWN 4 +#define CMAN_EXTRA_FLAG_2NODE 1 +#define CMAN_EXTRA_FLAG_ERROR 2 +#define CMAN_EXTRA_FLAG_SHUTDOWN 4 +#define CMAN_EXTRA_FLAG_DISALLOWED 8 typedef struct cman_extra_info { int ei_node_state; @@ -198,8 +199,8 @@ int cman_finish(cman_handle_t handle); /* Update/retrieve the private data */ -int cman_set_private(cman_handle_t *h, void *privdata); -int cman_get_private(cman_handle_t *h, void **privdata); +int cman_setprivdata(cman_handle_t *h, void *privdata); +int cman_getprivdata(cman_handle_t *h, void **privdata); /* * Notification of membership change events. Note that these are sent after @@ -259,6 +260,10 @@ to determine how big your array needs to be */ int cman_get_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes); +/* Returns a list of nodes that are known to AIS but blocked from joining the CMAN + cluster because they rejoined with cluster without a cman_tool join */ +int cman_get_disallowed_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes); + /* * cman_get_node() can get node info by nodeid OR by name. If the first * char of node->cn_name is zero then the nodeid will be used, otherwise