All of lore.kernel.org
 help / color / mirror / Atom feed
From: pcaulfield@sourceware.org <pcaulfield@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cman cman_tool/main.c daemon/ais.c dae ...
Date: 5 Oct 2006 07:48:34 -0000	[thread overview]
Message-ID: <20061005074834.577.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	pcaulfield at sourceware.org	2006-10-05 07:48:33

Modified files:
	cman/cman_tool : main.c 
	cman/daemon    : ais.c ais.h cnxman-private.h cnxman-socket.h 
	                 commands.c 
	cman/lib       : libcman.c libcman.h 

Log message:
	Add some extra semantics to CMAN to cope with openAIS rejoins.
	Basically, this adds an extra state to a node: AISONLY which is only cleared
	when cman receives a valid TRANSITION message from the node.
	
	A TRANSITION message is deemed to be invalid if the join_time of the node
	has not been changed (this is the timestamp the daemon was started) and
	the node has since been down and is rejoining. cman_tool will show if this
	is the case for a cluster by displaying the DisallowedNodes flag in the
	"cman_tool status command".
	
	If there are disallowed nodes in the cluster then the "cman_tool expected"
	command is disabled until those nodes have been removed.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/main.c.diff?cvsroot=cluster&r1=1.48&r2=1.49
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.c.diff?cvsroot=cluster&r1=1.41&r2=1.42
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.h.diff?cvsroot=cluster&r1=1.10&r2=1.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-private.h.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-socket.h.diff?cvsroot=cluster&r1=1.16&r2=1.17
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/commands.c.diff?cvsroot=cluster&r1=1.50&r2=1.51
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/lib/libcman.c.diff?cvsroot=cluster&r1=1.27&r2=1.28
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/lib/libcman.h.diff?cvsroot=cluster&r1=1.28&r2=1.29

--- cluster/cman/cman_tool/main.c	2006/08/11 12:34:18	1.48
+++ cluster/cman/cman_tool/main.c	2006/10/05 07:48:33	1.49
@@ -232,6 +232,8 @@
 		printf(" Shutdown");
 	if (einfo->ei_flags & CMAN_EXTRA_FLAG_ERROR)
 		printf(" Error");
+	if (einfo->ei_flags & CMAN_EXTRA_FLAG_DISALLOWED)
+		printf(" DisallowedNodes");
 	printf(" \n");
 
 	printf("Ports Bound: ");
--- cluster/cman/daemon/ais.c	2006/10/02 08:50:02	1.41
+++ cluster/cman/daemon/ais.c	2006/10/05 07:48:33	1.42
@@ -57,6 +57,7 @@
 struct totem_ip_address ifaddrs[MAX_INTERFACES];
 int num_interfaces;
 uint64_t incarnation;
+int num_ais_nodes;
 
 static int config_run;
 static char errorstring[512];
@@ -411,12 +412,15 @@
 	P_AIS("confchg_fn called type = %d, seq=%lld\n", configuration_type, ring_id->seq);
 
 	incarnation = ring_id->seq;
+	num_ais_nodes = member_list_entries;
 
 	/* Tell the cman membership layer */
 	for (i=0; i<left_list_entries; i++)
 		del_ais_node(left_list[i]);
-	for (i=0; i<joined_list_entries; i++)
-		add_ais_node(joined_list[i], incarnation, member_list_entries);
+
+	/* Joining nodes are only added after a valid TRANSITION message
+	 * is received.
+	 */
 
 	/* Save the left list for later so we can do a consolidated confchg message */
 	if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) {
--- cluster/cman/daemon/ais.h	2006/08/11 12:34:18	1.10
+++ cluster/cman/daemon/ais.h	2006/10/05 07:48:33	1.11
@@ -25,3 +25,4 @@
 extern struct totem_ip_address mcast_addr[MAX_INTERFACES];
 extern struct totem_ip_address ifaddrs[MAX_INTERFACES];
 extern int num_interfaces;
+extern int num_ais_nodes;
--- cluster/cman/daemon/cnxman-private.h	2006/09/22 12:35:42	1.24
+++ cluster/cman/daemon/cnxman-private.h	2006/10/05 07:48:33	1.25
@@ -81,6 +81,7 @@
 	unsigned int   config_version;
 	unsigned int   flags;
 	uint64_t       fence_time;
+	uint64_t       join_time;
         char           clustername[16];
 	char           fence_agent[];
 };
@@ -142,7 +143,10 @@
 #define RECONFIG_PARAM_CONFIG_VERSION 3
 #define RECONFIG_PARAM_CCS            4
 
-#define NODE_FLAGS_GOTTRANSITION      1
+/* NODE_FLAGS_BEENDOWN   - this node has been down.
+   NODE_FLAGS_FENCED     - This node has been fenced since it last went down.
+*/
+#define NODE_FLAGS_BEENDOWN           1
 #define NODE_FLAGS_FENCED             2
 #define NODE_FLAGS_FENCEDWHILEUP      4
 
@@ -161,6 +165,8 @@
 	uint64_t fence_time; /* A time_t */
 	char    *fence_agent;
 
+	uint64_t cman_join_time; /* A time_t */
+
 	struct timeval last_hello; /* Only used for quorum devices */
 
 	unsigned int votes;
@@ -189,6 +195,7 @@
 /* Kill reasons */
 #define CLUSTER_KILL_REJECTED   1
 #define CLUSTER_KILL_CMANTOOL   2
+#define CLUSTER_KILL_REJOIN     3
 
 #define MAX_ADDR_PRINTED_LEN (address_length*3 + 1)
 
--- cluster/cman/daemon/cnxman-socket.h	2006/08/17 13:22:39	1.16
+++ cluster/cman/daemon/cnxman-socket.h	2006/10/05 07:48:33	1.17
@@ -113,7 +113,7 @@
 #define MSG_BCASTSELF    0x4000000
 
 typedef enum { NODESTATE_JOINING=1, NODESTATE_MEMBER,
-	       NODESTATE_DEAD, NODESTATE_LEAVING } nodestate_t;
+	       NODESTATE_DEAD, NODESTATE_LEAVING, NODESTATE_AISONLY } nodestate_t;
 
 static const char CLIENT_SOCKNAME[]= "/var/run/cman_client";
 static const char ADMIN_SOCKNAME[]=  "/var/run/cman_admin";
@@ -163,9 +163,10 @@
 };
 
 /* Flags */
-#define CMAN_EXTRA_FLAG_2NODE    1
-#define CMAN_EXTRA_FLAG_ERROR    2
-#define CMAN_EXTRA_FLAG_SHUTDOWN 4
+#define CMAN_EXTRA_FLAG_2NODE     1
+#define CMAN_EXTRA_FLAG_ERROR     2
+#define CMAN_EXTRA_FLAG_SHUTDOWN  4
+#define CMAN_EXTRA_FLAG_UNCOUNTED 8
 
 struct cl_extra_info {
 	int           node_state;
--- cluster/cman/daemon/commands.c	2006/09/22 12:35:42	1.50
+++ cluster/cman/daemon/commands.c	2006/10/05 07:48:33	1.51
@@ -33,6 +33,7 @@
 #include <openais/totem/totemip.h>
 #include <openais/totem/totempg.h>
 #include <openais/service/swab.h>
+#include <openais/service/print.h>
 #include <openais/totem/aispoll.h>
 #include "list.h"
 #include "cnxman-socket.h"
@@ -72,6 +73,7 @@
 static struct cluster_node *quorum_device;
 static uint16_t cluster_id;
 static int ais_running;
+static time_t join_time;
 static poll_timer_handle quorum_device_timer;
 
 /* If CCS gets out of sync, we poll it until it isn't */
@@ -94,6 +96,7 @@
 static int send_port_enquire(int nodeid);
 static void process_internal_message(char *data, int len, int nodeid, int byteswap);
 static void recalculate_quorum(int allow_decrease);
+static void send_kill(int nodeid, uint16_t reason);
 
 static void set_port_bit(struct cluster_node *node, uint8_t port)
 {
@@ -146,6 +149,18 @@
 	if (!cluster_is_quorate && quorate)
 		log_msg(LOG_INFO, "quorum regained, resuming activity\n");
 
+	/* If we are newly quorate, then kill any AISONLY nodes */
+	if (!cluster_is_quorate && quorate) {
+		struct cluster_node *node = NULL;
+		struct list *tmp;
+
+		list_iterate(tmp, &cluster_members_list) {
+			node = list_item(tmp, struct cluster_node);
+			if (node->state == NODESTATE_AISONLY)
+				send_kill(node->node_id, CLUSTER_KILL_REJOIN);
+		}
+	}
+
 	cluster_is_quorate = quorate;
 
 }
@@ -386,6 +401,7 @@
 		strcpy(nodename, un.nodename);
 	}
 
+	time(&join_time);
 	us = add_new_node(nodename, wanted_nodeid, -1, expected_votes,
 			  NODESTATE_MEMBER);
 	set_port_bit(us, 0);
@@ -424,6 +440,7 @@
 	int total_votes = 0;
 	int max_expected = 0;
 	int addrlen;
+	int uncounted = 0;
 	struct cluster_node *node;
 	struct sockaddr_storage *ss;
 	char *ptr;
@@ -437,6 +454,8 @@
 			total_votes += node->votes;
 			max_expected = max(max_expected, node->expected_votes);
 		}
+		if (node->state == NODESTATE_AISONLY)
+			uncounted = 1;
 	}
 	if (quorum_device && quorum_device->state == NODESTATE_MEMBER)
 		total_votes += quorum_device->votes;
@@ -467,6 +486,8 @@
 		einfo->flags |= CMAN_EXTRA_FLAG_ERROR;
 	if (shutdown_con)
 		einfo->flags |= CMAN_EXTRA_FLAG_SHUTDOWN;
+	if (uncounted)
+		einfo->flags |= CMAN_EXTRA_FLAG_UNCOUNTED;
 
 	ptr = einfo->addresses;
 	for (i=0; i<num_interfaces; i++) {
@@ -585,9 +606,22 @@
 	unsigned int total_votes;
 	unsigned int newquorum;
 	unsigned int newexp;
+	struct cluster_node *node = NULL;
+	struct list *tmp;
 
 	if (!we_are_a_cluster_member)
 		return -ENOENT;
+
+	/* If there are any AISONLY nodes then we can't allow
+	   the user to set expected votes as it may destroy data */
+	list_iterate(tmp, &cluster_members_list) {
+		node = list_item(tmp, struct cluster_node);
+		if (node->state == NODESTATE_AISONLY) {
+			log_printf(LOG_NOTICE, "Attempt to set expected votes when cluster has AISONLY nodes in it.");
+			return -EINVAL;
+		}
+	}
+
 	memcpy(&newexp, cmdbuf, sizeof(int));
 	newquorum = calculate_quorum(1, newexp, &total_votes);
 
@@ -647,7 +681,7 @@
 	if ((node = find_node_by_nodeid(nodeid)) == NULL)
 		return -EINVAL;
 
-	if (node->state != NODESTATE_MEMBER)
+	if (node->state != NODESTATE_MEMBER && node->state != NODESTATE_AISONLY)
 		return -EINVAL;
 
 	node->leave_reason = CLUSTER_LEAVEFLAG_KILLED;
@@ -1485,6 +1519,7 @@
 	msg->config_version = config_version;
 	msg->flags = us->flags;
 	msg->fence_time = us->fence_time;
+	msg->join_time = join_time;
 	strcpy(msg->clustername, cluster_name);
 	if (us->fence_agent)
 	{
@@ -1644,10 +1679,27 @@
 	node = find_node_by_nodeid(nodeid);
 	assert(node);
 
-	if (node->flags & NODE_FLAGS_GOTTRANSITION) {
-
+        /* This is the killer. If the join_time of the node matches that already stored AND
+	   the node has been down, then we kill it as this must be a rejoin */
+	if (msg->join_time == node->cman_join_time && node->flags & NODE_FLAGS_BEENDOWN) {
+		if (cluster_is_quorate) {
+			P_MEMB("Killing node %s because it has rejoined the cluster without cman_tool join", node->name);
+			log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster without cman_tool join", node->name);
+			send_kill(nodeid, CLUSTER_KILL_REJOIN);
+		}
+		else {
+			P_MEMB("Node %s not joined to cman because it has rejoined an inquorate cluster", node->name);
+			log_printf(LOG_CRIT, "Node %s not joined to cman because it has rejoined an inquorate cluster", node->name);
+			node->state = NODESTATE_AISONLY;
+		}
+		return;
 	}
-	node->flags = msg->flags;
+	else {
+		node->cman_join_time = msg->join_time;
+		add_ais_node(nodeid, incarnation, num_ais_nodes);
+	}
+
+	node->flags = msg->flags; /* This will clear the BEENDOWN flag of course */
 	if (node->fence_agent && msg->fence_agent[0] && strcmp(node->fence_agent, msg->fence_agent))
 	{
 		free(node->fence_agent);
@@ -1748,7 +1800,7 @@
 			node->leave_reason = leavemsg->reason;
 
 		/* Mark it as leaving, and remove it when we get an AIS node down event for it */
-		if (node && node->state == NODESTATE_MEMBER)
+		if (node && (node->state == NODESTATE_MEMBER || node->state == NODESTATE_AISONLY))
 			node->state = NODESTATE_LEAVING;
 		break;
 
@@ -1843,11 +1895,9 @@
 		node->name = strdup(tempname);
 	}
 
-	node->incarnation = incarnation;
-
-	gettimeofday(&node->join_time, NULL);
-
 	if (node->state == NODESTATE_DEAD) {
+		gettimeofday(&node->join_time, NULL);
+		node->incarnation = incarnation;
 		node->state = NODESTATE_MEMBER;
 		cluster_members++;
 		recalculate_quorum(0);
@@ -1874,6 +1924,7 @@
 		node->flags &= ~NODE_FLAGS_FENCED;
 
 	node->flags &= ~NODE_FLAGS_FENCEDWHILEUP;
+	node->flags |= NODE_FLAGS_BEENDOWN;
 
 	if (node->state == NODESTATE_MEMBER) {
 		node->state = NODESTATE_DEAD;
--- cluster/cman/lib/libcman.c	2006/08/24 10:40:57	1.27
+++ cluster/cman/lib/libcman.c	2006/10/05 07:48:33	1.28
@@ -45,7 +45,7 @@
 	int magic;
 	int fd;
 	int zero_fd;
-	void *private;
+	void *privdata;
 	int want_reply;
 	cman_callback_t event_callback;
 	cman_datacallback_t data_callback;
@@ -161,7 +161,7 @@
 		else
 		{
 			if (h->data_callback)
-				h->data_callback(h, h->private,
+				h->data_callback(h, h->privdata,
 						 buf+sizeof(*dmsg), msg->length-sizeof(*dmsg),
 						 dmsg->port, dmsg->nodeid);
 		}
@@ -203,14 +203,14 @@
 		{
 			if (msg->command == CMAN_CMD_EVENT && h->event_callback) {
 				struct sock_event_message *emsg = (struct sock_event_message *)msg;
-				h->event_callback(h, h->private, emsg->reason, emsg->arg);
+				h->event_callback(h, h->privdata, emsg->reason, emsg->arg);
 			}
 
 			if (msg->command == CMAN_CMD_CONFCHG && h->confchg_callback)
 			{
 				struct sock_confchg_message *cmsg = (struct sock_confchg_message *)msg;
 
-				h->confchg_callback(h, h->private,
+				h->confchg_callback(h, h->privdata,
 						    cmsg->entries,cmsg->member_entries, 
 						    &cmsg->entries[cmsg->member_entries], cmsg->left_entries, 
 						    &cmsg->entries[cmsg->member_entries+cmsg->left_entries], cmsg->joined_entries);
@@ -287,7 +287,7 @@
 	return wait_for_reply(h, outbuf, outlen);
 }
 
-static cman_handle_t open_socket(const char *name, int namelen, void *private)
+static cman_handle_t open_socket(const char *name, int namelen, void *privdata)
 {
 	struct cman_handle *h;
 	struct sockaddr_un sockaddr;
@@ -297,7 +297,7 @@
 		return NULL;
 
 	h->magic = CMAN_MAGIC;
-	h->private = private;
+	h->privdata = privdata;
 	h->event_callback = NULL;
 	h->data_callback = NULL;
 	h->confchg_callback = NULL;
@@ -344,14 +344,14 @@
 	return (cman_handle_t)h;
 }
 
-cman_handle_t cman_admin_init(void *private)
+cman_handle_t cman_admin_init(void *privdata)
 {
-	return open_socket(ADMIN_SOCKNAME, sizeof(ADMIN_SOCKNAME), private);
+	return open_socket(ADMIN_SOCKNAME, sizeof(ADMIN_SOCKNAME), privdata);
 }
 
-cman_handle_t cman_init(void *private)
+cman_handle_t cman_init(void *privdata)
 {
-	return open_socket(CLIENT_SOCKNAME, sizeof(CLIENT_SOCKNAME), private);
+	return open_socket(CLIENT_SOCKNAME, sizeof(CLIENT_SOCKNAME), privdata);
 }
 
 int cman_finish(cman_handle_t handle)
@@ -367,21 +367,21 @@
 	return 0;
 }
 
-int cman_set_private(cman_handle_t *handle, void *private)
+int cman_setprivdata(cman_handle_t *handle, void *privdata)
 {
 	struct cman_handle *h = (struct cman_handle *)handle;
 	VALIDATE_HANDLE(h);
 
-	h->private = private;
+	h->privdata = privdata;
 	return 0;
 }
 
-int cman_get_private(cman_handle_t *handle, void **private)
+int cman_getprivdata(cman_handle_t *handle, void **privdata)
 {
 	struct cman_handle *h = (struct cman_handle *)handle;
 	VALIDATE_HANDLE(h);
 
-	*private = h->private;
+	*privdata = h->privdata;
 
 	return 0;
 }
@@ -631,6 +631,53 @@
 	return 0;
 }
 
+int cman_get_disallowed_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes)
+{
+	struct cman_handle *h = (struct cman_handle *)handle;
+	struct cl_cluster_node *cman_nodes;
+	int status;
+	int buflen;
+	int count = 0;
+	int out_count = 0;
+	VALIDATE_HANDLE(h);
+
+	if (!retnodes || !nodes || maxnodes < 1)
+	{
+		errno = EINVAL;
+		return -1;
+	}
+
+	buflen = sizeof(struct cl_cluster_node) * maxnodes;
+	cman_nodes = malloc(buflen);
+	if (!cman_nodes)
+		return -1;
+
+	status = info_call(h, CMAN_CMD_GETALLMEMBERS, NULL, 0, cman_nodes, buflen);
+	if (status < 0)
+	{
+		int saved_errno = errno;
+		free(cman_nodes);
+		errno = saved_errno;
+		return -1;
+	}
+
+	if (cman_nodes[0].size != sizeof(struct cl_cluster_node))
+	{
+		free(cman_nodes);
+		errno = EINVAL;
+		return -1;
+	}
+
+	for (count = 0; count < status; count++)
+	{
+		if (cman_nodes[count].state == NODESTATE_AISONLY && out_count < maxnodes)
+			copy_node(&nodes[out_count++], &cman_nodes[count]);
+	}
+	free(cman_nodes);
+	*retnodes = out_count;
+	return 0;
+}
+
 int cman_get_node(cman_handle_t handle, int nodeid, cman_node_t *node)
 {
 	struct cman_handle *h = (struct cman_handle *)handle;
--- cluster/cman/lib/libcman.h	2006/09/01 08:47:40	1.28
+++ cluster/cman/lib/libcman.h	2006/10/05 07:48:33	1.29
@@ -150,9 +150,10 @@
  */
 
 /* Flags in ei_flags */
-#define CMAN_EXTRA_FLAG_2NODE    1
-#define CMAN_EXTRA_FLAG_ERROR    2
-#define CMAN_EXTRA_FLAG_SHUTDOWN 4
+#define CMAN_EXTRA_FLAG_2NODE      1
+#define CMAN_EXTRA_FLAG_ERROR      2
+#define CMAN_EXTRA_FLAG_SHUTDOWN   4
+#define CMAN_EXTRA_FLAG_DISALLOWED 8
 
 typedef struct cman_extra_info {
 	int           ei_node_state;
@@ -198,8 +199,8 @@
 int cman_finish(cman_handle_t handle);
 
 /* Update/retrieve the private data */
-int cman_set_private(cman_handle_t *h, void *privdata);
-int cman_get_private(cman_handle_t *h, void **privdata);
+int cman_setprivdata(cman_handle_t *h, void *privdata);
+int cman_getprivdata(cman_handle_t *h, void **privdata);
 
 /*
  * Notification of membership change events. Note that these are sent after
@@ -259,6 +260,10 @@
    to determine how big your array needs to be */
 int cman_get_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes);
 
+/* Returns a list of nodes that are known to AIS but blocked from joining the CMAN
+   cluster because they rejoined with cluster without a cman_tool join */
+int cman_get_disallowed_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes);
+
 /*
  * cman_get_node() can get node info by nodeid OR by name. If the first
  * char of node->cn_name is zero then the nodeid will be used, otherwise



                 reply	other threads:[~2006-10-05  7:48 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061005074834.577.qmail@sourceware.org \
    --to=pcaulfield@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.