From mboxrd@z Thu Jan 1 00:00:00 1970 From: pcaulfield@sourceware.org Date: 20 Aug 2007 14:37:29 -0000 Subject: [Cluster-devel] cluster/cman cman_tool/main.c daemon/cnxman-pr ... Message-ID: <20070820143729.18052.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: pcaulfield at sourceware.org 2007-08-20 14:37:26 Modified files: cman/cman_tool : main.c cman/daemon : cnxman-private.h cnxman-socket.h commands.c cman/lib : libcman.c libcman.h Log message: Add a "dirty" flag to cman to prevent active clusters merging with one-another. bz#251966 Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/main.c.diff?cvsroot=cluster&r1=1.53&r2=1.54 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-private.h.diff?cvsroot=cluster&r1=1.26&r2=1.27 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-socket.h.diff?cvsroot=cluster&r1=1.17&r2=1.18 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/commands.c.diff?cvsroot=cluster&r1=1.65&r2=1.66 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/lib/libcman.c.diff?cvsroot=cluster&r1=1.34&r2=1.35 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/lib/libcman.h.diff?cvsroot=cluster&r1=1.30&r2=1.31 --- cluster/cman/cman_tool/main.c 2007/08/15 19:39:54 1.53 +++ cluster/cman/cman_tool/main.c 2007/08/20 14:37:26 1.54 @@ -235,6 +235,8 @@ printf(" Error"); if (einfo->ei_flags & CMAN_EXTRA_FLAG_DISALLOWED) printf(" DisallowedNodes"); + if (einfo->ei_flags & CMAN_EXTRA_FLAG_DIRTY) + printf(" Dirty"); printf(" \n"); printf("Ports Bound: "); --- cluster/cman/daemon/cnxman-private.h 2006/11/03 15:07:52 1.26 +++ cluster/cman/daemon/cnxman-private.h 2007/08/20 14:37:26 1.27 @@ -18,8 +18,8 @@ /* Protocol Version triplet */ #define CNXMAN_MAJOR_VERSION 6 -#define CNXMAN_MINOR_VERSION 0 -#define CNXMAN_PATCH_VERSION 1 +#define CNXMAN_MINOR_VERSION 1 +#define CNXMAN_PATCH_VERSION 0 /* How we announce ourself in console events */ #define CMAN_NAME "CMAN" @@ -147,11 +147,14 @@ NODE_FLAGS_FENCED - This node has been fenced since it last went down. NODE_FLAGS_FENCEDWHILEUP - This node was fenced manually (probably). NODE_FLAGS_SEESDISALLOWED - Only set in a transition message + NODE_FLAGS_DIRTY - This node has internal state and must not join + a cluster that also has state. */ #define NODE_FLAGS_BEENDOWN 1 #define NODE_FLAGS_FENCED 2 #define NODE_FLAGS_FENCEDWHILEUP 4 #define NODE_FLAGS_SEESDISALLOWED 8 +#define NODE_FLAGS_DIRTY 16 /* There's one of these for each node in the cluster */ struct cluster_node { --- cluster/cman/daemon/cnxman-socket.h 2006/10/05 07:48:33 1.17 +++ cluster/cman/daemon/cnxman-socket.h 2007/08/20 14:37:26 1.18 @@ -52,6 +52,7 @@ #define CMAN_CMD_GET_NODEADDRS 0x000000bf #define CMAN_CMD_START_CONFCHG 0x000000c0 #define CMAN_CMD_STOP_CONFCHG 0x000000c1 +#define CMAN_CMD_SET_DIRTY 0x800000c2 #define CMAN_CMD_DATA 0x00000100 #define CMAN_CMD_BIND 0x00000101 @@ -167,6 +168,7 @@ #define CMAN_EXTRA_FLAG_ERROR 2 #define CMAN_EXTRA_FLAG_SHUTDOWN 4 #define CMAN_EXTRA_FLAG_UNCOUNTED 8 +#define CMAN_EXTRA_FLAG_DIRTY 16 struct cl_extra_info { int node_state; --- cluster/cman/daemon/commands.c 2007/06/26 09:09:13 1.65 +++ cluster/cman/daemon/commands.c 2007/08/20 14:37:26 1.66 @@ -502,6 +502,8 @@ einfo->flags |= CMAN_EXTRA_FLAG_SHUTDOWN; if (uncounted) einfo->flags |= CMAN_EXTRA_FLAG_UNCOUNTED; + if (us->flags & NODE_FLAGS_DIRTY) + einfo->flags |= CMAN_EXTRA_FLAG_DIRTY; ptr = einfo->addresses; for (i=0; iflags |= NODE_FLAGS_DIRTY; + break; + case CMAN_CMD_START_CONFCHG: con->confchg = 1; err = 0; @@ -1706,7 +1712,27 @@ node = find_node_by_nodeid(nodeid); assert(node); - /* This is the killer. If the join_time of the node matches that already stored AND + /* Newer nodes 6.1.0 onwards, set the DIRTY flag if they have state. If the new node has been down + and has state then we mark it disallowed because we cannot merge stateful nodes */ + if (msg->flags & NODE_FLAGS_DIRTY && node->flags & NODE_FLAGS_BEENDOWN) { + /* Don't duplicate messages */ + if (node->state != NODESTATE_AISONLY) { + if (cluster_is_quorate) { + P_MEMB("Killing node %s because it has rejoined the cluster with existing state", node->name); + log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster with existing state", node->name); + node->state = NODESTATE_AISONLY; + send_kill(nodeid, CLUSTER_KILL_REJOIN); + } + else { + P_MEMB("Node %s not joined to cman because it has existing state", node->name); + log_printf(LOG_CRIT, "Node %s not joined to cman because it has existing state", node->name); + node->state = NODESTATE_AISONLY; + } + } + return; + } + + /* This is for older nodes. If the join_time of the node matches that already stored AND the node has been down, then we kill it as this must be a rejoin */ if (msg->join_time == node->cman_join_time && node->flags & NODE_FLAGS_BEENDOWN) { /* Don't duplicate messages */ --- cluster/cman/lib/libcman.c 2007/05/02 10:27:07 1.34 +++ cluster/cman/lib/libcman.c 2007/08/20 14:37:26 1.35 @@ -978,6 +978,14 @@ return info_call(h, CMAN_CMD_TRY_SHUTDOWN, &flags, sizeof(int), NULL, 0); } +int cman_set_dirty(cman_handle_t handle) +{ + struct cman_handle *h = (struct cman_handle *)handle; + VALIDATE_HANDLE(h); + + return info_call(h, CMAN_CMD_SET_DIRTY, NULL, 0, NULL, 0); +} + int cman_replyto_shutdown(cman_handle_t handle, int yesno) { struct cman_handle *h = (struct cman_handle *)handle; --- cluster/cman/lib/libcman.h 2007/05/02 10:27:07 1.30 +++ cluster/cman/lib/libcman.h 2007/08/20 14:37:26 1.31 @@ -154,6 +154,7 @@ #define CMAN_EXTRA_FLAG_ERROR 2 #define CMAN_EXTRA_FLAG_SHUTDOWN 4 #define CMAN_EXTRA_FLAG_DISALLOWED 8 +#define CMAN_EXTRA_FLAG_DIRTY 16 typedef struct cman_extra_info { int ei_node_state; @@ -388,4 +389,12 @@ int cman_unregister_quorum_device(cman_handle_t handle); int cman_poll_quorum_device(cman_handle_t handle, int isavailable); +/* + * Sets the dirty bit inside cman. This indicates that the node has + * some internal 'state' (eg in a daemon, filesystem or lock manager) + * and cannot merge with another cluster that already has state. + * This cannot be reset. + */ +int cman_set_dirty(cman_handle_t handle); + #endif