cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: pcaulfield@sourceware.org <pcaulfield@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cman cman_tool/main.c daemon/cnxman-pr ...
Date: 3 Nov 2006 15:07:53 -0000	[thread overview]
Message-ID: <20061103150753.2922.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	pcaulfield at sourceware.org	2006-11-03 15:07:53

Modified files:
	cman/cman_tool : main.c 
	cman/daemon    : cnxman-private.h commands.c 

Log message:
	fix bz#213747
	Basically we don't let a node join a cluster that already has "Disallowed" nodes
	in it as we don't consistently know the state of the cluster in that case (it
	could be two inquorate halves for example).
	
	Sorry, Steven, this is yet another instance where cman has to exit() the aisexec
	process for the greater good of the cluster.
	
	I've also enhanceed "cman_tool nodes" to show the disallowed nodes and a warning
	message that the cluster is in a bit of a mess.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/main.c.diff?cvsroot=cluster&r1=1.50&r2=1.51
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-private.h.diff?cvsroot=cluster&r1=1.25&r2=1.26
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/commands.c.diff?cvsroot=cluster&r1=1.53&r2=1.54

--- cluster/cman/cman_tool/main.c	2006/10/09 15:54:31	1.50
+++ cluster/cman/cman_tool/main.c	2006/11/03 15:07:51	1.51
@@ -302,6 +302,8 @@
 	int count;
 	int i;
 	int numnodes;
+	int dis_count;
+	cman_node_t *dis_nodes;
 	cman_node_t *nodes;
 	struct tm *jtime;
 	struct tm *ftime;
@@ -322,11 +324,46 @@
 	if (cman_get_nodes(h, count, &numnodes, nodes) < 0)
 		die("cman_get_nodes failed: %s", cman_error(errno));
 
+
+	/* Get Disallowed nodes, so we can show them as such */
+	dis_nodes = malloc(sizeof(cman_node_t) * count);
+
+	if (cman_get_disallowed_nodes(h, count, &dis_count, dis_nodes) == 0) {
+		int i,j;
+		for (i=0; i<numnodes; i++) {
+			for (j=0; j<dis_count; j++) {
+				if (dis_nodes[j].cn_nodeid == nodes[i].cn_nodeid)
+					nodes[i].cn_member = 2;
+			}
+		}
+	}
+
 	/* Sort by nodeid to be friendly */
 	qsort(nodes, numnodes, sizeof(cman_node_t), node_compare);
 
+	if (dis_count) {
+		printf("NOTE: There are %d disallowed nodes,\n", dis_count);
+		printf("      members list may seem inconsistent across the cluster\n");
+	}
+
 	printf("Node  Sts   Inc   Joined               Name\n");
 	for (i=0; i<numnodes; i++) {
+		char member_type;
+
+		switch (nodes[i].cn_member) {
+		case 0:
+			member_type = 'X';
+			break;
+		case 1:
+			member_type = 'M';
+			break;
+		case 2:
+			member_type = 'd';
+			break;
+		default:
+			member_type = '?';
+			break;
+		}
 
 		jtime = localtime(&nodes[i].cn_jointime.tv_sec);
 		if (nodes[i].cn_jointime.tv_sec && nodes[i].cn_member)
@@ -335,7 +372,7 @@
 			strcpy(jstring, "                   ");
 
 		printf("%4d   %c  %5d   %s  %s\n",
-		       nodes[i].cn_nodeid, nodes[i].cn_member?'M':'X',
+		       nodes[i].cn_nodeid, member_type,
 		       nodes[i].cn_incarnation, jstring, nodes[i].cn_name);
 
 		if (comline->fence_opt) {
--- cluster/cman/daemon/cnxman-private.h	2006/10/05 07:48:33	1.25
+++ cluster/cman/daemon/cnxman-private.h	2006/11/03 15:07:52	1.26
@@ -143,12 +143,15 @@
 #define RECONFIG_PARAM_CONFIG_VERSION 3
 #define RECONFIG_PARAM_CCS            4
 
-/* NODE_FLAGS_BEENDOWN   - this node has been down.
-   NODE_FLAGS_FENCED     - This node has been fenced since it last went down.
+/* NODE_FLAGS_BEENDOWN       - This node has been down.
+   NODE_FLAGS_FENCED         - This node has been fenced since it last went down.
+   NODE_FLAGS_FENCEDWHILEUP  - This node was fenced manually (probably).
+   NODE_FLAGS_SEESDISALLOWED - Only set in a transition message
 */
 #define NODE_FLAGS_BEENDOWN           1
 #define NODE_FLAGS_FENCED             2
 #define NODE_FLAGS_FENCEDWHILEUP      4
+#define NODE_FLAGS_SEESDISALLOWED     8
 
 /* There's one of these for each node in the cluster */
 struct cluster_node {
--- cluster/cman/daemon/commands.c	2006/10/16 14:10:21	1.53
+++ cluster/cman/daemon/commands.c	2006/11/03 15:07:52	1.54
@@ -131,6 +131,18 @@
 	return ((node->port_bits[byte] & (1<<bit)) != 0);
 }
 
+static int have_disallowed(void)
+{
+	struct cluster_node *node;
+
+	list_iterate_items(node, &cluster_members_list) {
+		if (node->state == NODESTATE_AISONLY)
+			return 1;
+	}
+
+	return 0;
+}
+
 /* If "cluster_is_quorate" is 0 then all activity apart from protected ports is
  * blocked. */
 static void set_quorate(int total_votes)
@@ -1532,6 +1544,9 @@
 		len += 1;
 	}
 
+	if (have_disallowed())
+		msg->flags |= NODE_FLAGS_SEESDISALLOWED;
+
 	comms_send_message(msg, len,
 			   0,0,
 			   0,  /* multicast */
@@ -1676,6 +1691,16 @@
 		P_MEMB("Transition message from %d does not match current config - should quit ?\n", nodeid);
 		return; // PJC ???
 	}
+
+	/* If the remote node can see AISONLY nodes then we can't join as we don't
+	   know the full state */
+	if (msg->flags & NODE_FLAGS_SEESDISALLOWED && !have_disallowed()) {
+		/* Must use syslog directly here or the message will never arrive */
+		syslog(LOG_CRIT, "CMAN: Joined a cluster with disallowed nodes. must die");
+		exit(2);
+	}
+	msg->flags &= ~NODE_FLAGS_SEESDISALLOWED;
+
 	node = find_node_by_nodeid(nodeid);
 	assert(node);
 
@@ -1703,6 +1728,12 @@
 		add_ais_node(nodeid, incarnation, num_ais_nodes);
 	}
 
+	/* If the cluster already has some AISONLY nodes then we can't make
+	   sense of the membership. So the new node has to also be AISONLY
+	   until we are consistent again */
+	if (have_disallowed() && !node->us)
+		node->state = NODESTATE_AISONLY;
+
 	node->flags = msg->flags; /* This will clear the BEENDOWN flag of course */
 	if (node->fence_agent && msg->fence_agent[0] && strcmp(node->fence_agent, msg->fence_agent))
 	{



             reply	other threads:[~2006-11-03 15:07 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-11-03 15:07 pcaulfield [this message]
  -- strict thread matches above, loose matches on Subject: below --
2007-08-20 14:37 [Cluster-devel] cluster/cman cman_tool/main.c daemon/cnxman-pr pcaulfield
2007-09-17 13:22 pcaulfield

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061103150753.2922.qmail@sourceware.org \
    --to=pcaulfield@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).