From: pcaulfield@sourceware.org <pcaulfield@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cman cman_tool/main.c daemon/ais.c dae ...
Date: 5 Oct 2006 07:48:34 -0000 [thread overview]
Message-ID: <20061005074834.577.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: pcaulfield at sourceware.org 2006-10-05 07:48:33
Modified files:
cman/cman_tool : main.c
cman/daemon : ais.c ais.h cnxman-private.h cnxman-socket.h
commands.c
cman/lib : libcman.c libcman.h
Log message:
Add some extra semantics to CMAN to cope with openAIS rejoins.
Basically, this adds an extra state to a node: AISONLY which is only cleared
when cman receives a valid TRANSITION message from the node.
A TRANSITION message is deemed to be invalid if the join_time of the node
has not been changed (this is the timestamp the daemon was started) and
the node has since been down and is rejoining. cman_tool will show if this
is the case for a cluster by displaying the DisallowedNodes flag in the
"cman_tool status command".
If there are disallowed nodes in the cluster then the "cman_tool expected"
command is disabled until those nodes have been removed.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/main.c.diff?cvsroot=cluster&r1=1.48&r2=1.49
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.c.diff?cvsroot=cluster&r1=1.41&r2=1.42
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.h.diff?cvsroot=cluster&r1=1.10&r2=1.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-private.h.diff?cvsroot=cluster&r1=1.24&r2=1.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/cnxman-socket.h.diff?cvsroot=cluster&r1=1.16&r2=1.17
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/commands.c.diff?cvsroot=cluster&r1=1.50&r2=1.51
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/lib/libcman.c.diff?cvsroot=cluster&r1=1.27&r2=1.28
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/lib/libcman.h.diff?cvsroot=cluster&r1=1.28&r2=1.29
--- cluster/cman/cman_tool/main.c 2006/08/11 12:34:18 1.48
+++ cluster/cman/cman_tool/main.c 2006/10/05 07:48:33 1.49
@@ -232,6 +232,8 @@
printf(" Shutdown");
if (einfo->ei_flags & CMAN_EXTRA_FLAG_ERROR)
printf(" Error");
+ if (einfo->ei_flags & CMAN_EXTRA_FLAG_DISALLOWED)
+ printf(" DisallowedNodes");
printf(" \n");
printf("Ports Bound: ");
--- cluster/cman/daemon/ais.c 2006/10/02 08:50:02 1.41
+++ cluster/cman/daemon/ais.c 2006/10/05 07:48:33 1.42
@@ -57,6 +57,7 @@
struct totem_ip_address ifaddrs[MAX_INTERFACES];
int num_interfaces;
uint64_t incarnation;
+int num_ais_nodes;
static int config_run;
static char errorstring[512];
@@ -411,12 +412,15 @@
P_AIS("confchg_fn called type = %d, seq=%lld\n", configuration_type, ring_id->seq);
incarnation = ring_id->seq;
+ num_ais_nodes = member_list_entries;
/* Tell the cman membership layer */
for (i=0; i<left_list_entries; i++)
del_ais_node(left_list[i]);
- for (i=0; i<joined_list_entries; i++)
- add_ais_node(joined_list[i], incarnation, member_list_entries);
+
+ /* Joining nodes are only added after a valid TRANSITION message
+ * is received.
+ */
/* Save the left list for later so we can do a consolidated confchg message */
if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) {
--- cluster/cman/daemon/ais.h 2006/08/11 12:34:18 1.10
+++ cluster/cman/daemon/ais.h 2006/10/05 07:48:33 1.11
@@ -25,3 +25,4 @@
extern struct totem_ip_address mcast_addr[MAX_INTERFACES];
extern struct totem_ip_address ifaddrs[MAX_INTERFACES];
extern int num_interfaces;
+extern int num_ais_nodes;
--- cluster/cman/daemon/cnxman-private.h 2006/09/22 12:35:42 1.24
+++ cluster/cman/daemon/cnxman-private.h 2006/10/05 07:48:33 1.25
@@ -81,6 +81,7 @@
unsigned int config_version;
unsigned int flags;
uint64_t fence_time;
+ uint64_t join_time;
char clustername[16];
char fence_agent[];
};
@@ -142,7 +143,10 @@
#define RECONFIG_PARAM_CONFIG_VERSION 3
#define RECONFIG_PARAM_CCS 4
-#define NODE_FLAGS_GOTTRANSITION 1
+/* NODE_FLAGS_BEENDOWN - this node has been down.
+ NODE_FLAGS_FENCED - This node has been fenced since it last went down.
+*/
+#define NODE_FLAGS_BEENDOWN 1
#define NODE_FLAGS_FENCED 2
#define NODE_FLAGS_FENCEDWHILEUP 4
@@ -161,6 +165,8 @@
uint64_t fence_time; /* A time_t */
char *fence_agent;
+ uint64_t cman_join_time; /* A time_t */
+
struct timeval last_hello; /* Only used for quorum devices */
unsigned int votes;
@@ -189,6 +195,7 @@
/* Kill reasons */
#define CLUSTER_KILL_REJECTED 1
#define CLUSTER_KILL_CMANTOOL 2
+#define CLUSTER_KILL_REJOIN 3
#define MAX_ADDR_PRINTED_LEN (address_length*3 + 1)
--- cluster/cman/daemon/cnxman-socket.h 2006/08/17 13:22:39 1.16
+++ cluster/cman/daemon/cnxman-socket.h 2006/10/05 07:48:33 1.17
@@ -113,7 +113,7 @@
#define MSG_BCASTSELF 0x4000000
typedef enum { NODESTATE_JOINING=1, NODESTATE_MEMBER,
- NODESTATE_DEAD, NODESTATE_LEAVING } nodestate_t;
+ NODESTATE_DEAD, NODESTATE_LEAVING, NODESTATE_AISONLY } nodestate_t;
static const char CLIENT_SOCKNAME[]= "/var/run/cman_client";
static const char ADMIN_SOCKNAME[]= "/var/run/cman_admin";
@@ -163,9 +163,10 @@
};
/* Flags */
-#define CMAN_EXTRA_FLAG_2NODE 1
-#define CMAN_EXTRA_FLAG_ERROR 2
-#define CMAN_EXTRA_FLAG_SHUTDOWN 4
+#define CMAN_EXTRA_FLAG_2NODE 1
+#define CMAN_EXTRA_FLAG_ERROR 2
+#define CMAN_EXTRA_FLAG_SHUTDOWN 4
+#define CMAN_EXTRA_FLAG_UNCOUNTED 8
struct cl_extra_info {
int node_state;
--- cluster/cman/daemon/commands.c 2006/09/22 12:35:42 1.50
+++ cluster/cman/daemon/commands.c 2006/10/05 07:48:33 1.51
@@ -33,6 +33,7 @@
#include <openais/totem/totemip.h>
#include <openais/totem/totempg.h>
#include <openais/service/swab.h>
+#include <openais/service/print.h>
#include <openais/totem/aispoll.h>
#include "list.h"
#include "cnxman-socket.h"
@@ -72,6 +73,7 @@
static struct cluster_node *quorum_device;
static uint16_t cluster_id;
static int ais_running;
+static time_t join_time;
static poll_timer_handle quorum_device_timer;
/* If CCS gets out of sync, we poll it until it isn't */
@@ -94,6 +96,7 @@
static int send_port_enquire(int nodeid);
static void process_internal_message(char *data, int len, int nodeid, int byteswap);
static void recalculate_quorum(int allow_decrease);
+static void send_kill(int nodeid, uint16_t reason);
static void set_port_bit(struct cluster_node *node, uint8_t port)
{
@@ -146,6 +149,18 @@
if (!cluster_is_quorate && quorate)
log_msg(LOG_INFO, "quorum regained, resuming activity\n");
+ /* If we are newly quorate, then kill any AISONLY nodes */
+ if (!cluster_is_quorate && quorate) {
+ struct cluster_node *node = NULL;
+ struct list *tmp;
+
+ list_iterate(tmp, &cluster_members_list) {
+ node = list_item(tmp, struct cluster_node);
+ if (node->state == NODESTATE_AISONLY)
+ send_kill(node->node_id, CLUSTER_KILL_REJOIN);
+ }
+ }
+
cluster_is_quorate = quorate;
}
@@ -386,6 +401,7 @@
strcpy(nodename, un.nodename);
}
+ time(&join_time);
us = add_new_node(nodename, wanted_nodeid, -1, expected_votes,
NODESTATE_MEMBER);
set_port_bit(us, 0);
@@ -424,6 +440,7 @@
int total_votes = 0;
int max_expected = 0;
int addrlen;
+ int uncounted = 0;
struct cluster_node *node;
struct sockaddr_storage *ss;
char *ptr;
@@ -437,6 +454,8 @@
total_votes += node->votes;
max_expected = max(max_expected, node->expected_votes);
}
+ if (node->state == NODESTATE_AISONLY)
+ uncounted = 1;
}
if (quorum_device && quorum_device->state == NODESTATE_MEMBER)
total_votes += quorum_device->votes;
@@ -467,6 +486,8 @@
einfo->flags |= CMAN_EXTRA_FLAG_ERROR;
if (shutdown_con)
einfo->flags |= CMAN_EXTRA_FLAG_SHUTDOWN;
+ if (uncounted)
+ einfo->flags |= CMAN_EXTRA_FLAG_UNCOUNTED;
ptr = einfo->addresses;
for (i=0; i<num_interfaces; i++) {
@@ -585,9 +606,22 @@
unsigned int total_votes;
unsigned int newquorum;
unsigned int newexp;
+ struct cluster_node *node = NULL;
+ struct list *tmp;
if (!we_are_a_cluster_member)
return -ENOENT;
+
+ /* If there are any AISONLY nodes then we can't allow
+ the user to set expected votes as it may destroy data */
+ list_iterate(tmp, &cluster_members_list) {
+ node = list_item(tmp, struct cluster_node);
+ if (node->state == NODESTATE_AISONLY) {
+ log_printf(LOG_NOTICE, "Attempt to set expected votes when cluster has AISONLY nodes in it.");
+ return -EINVAL;
+ }
+ }
+
memcpy(&newexp, cmdbuf, sizeof(int));
newquorum = calculate_quorum(1, newexp, &total_votes);
@@ -647,7 +681,7 @@
if ((node = find_node_by_nodeid(nodeid)) == NULL)
return -EINVAL;
- if (node->state != NODESTATE_MEMBER)
+ if (node->state != NODESTATE_MEMBER && node->state != NODESTATE_AISONLY)
return -EINVAL;
node->leave_reason = CLUSTER_LEAVEFLAG_KILLED;
@@ -1485,6 +1519,7 @@
msg->config_version = config_version;
msg->flags = us->flags;
msg->fence_time = us->fence_time;
+ msg->join_time = join_time;
strcpy(msg->clustername, cluster_name);
if (us->fence_agent)
{
@@ -1644,10 +1679,27 @@
node = find_node_by_nodeid(nodeid);
assert(node);
- if (node->flags & NODE_FLAGS_GOTTRANSITION) {
-
+ /* This is the killer. If the join_time of the node matches that already stored AND
+ the node has been down, then we kill it as this must be a rejoin */
+ if (msg->join_time == node->cman_join_time && node->flags & NODE_FLAGS_BEENDOWN) {
+ if (cluster_is_quorate) {
+ P_MEMB("Killing node %s because it has rejoined the cluster without cman_tool join", node->name);
+ log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster without cman_tool join", node->name);
+ send_kill(nodeid, CLUSTER_KILL_REJOIN);
+ }
+ else {
+ P_MEMB("Node %s not joined to cman because it has rejoined an inquorate cluster", node->name);
+ log_printf(LOG_CRIT, "Node %s not joined to cman because it has rejoined an inquorate cluster", node->name);
+ node->state = NODESTATE_AISONLY;
+ }
+ return;
}
- node->flags = msg->flags;
+ else {
+ node->cman_join_time = msg->join_time;
+ add_ais_node(nodeid, incarnation, num_ais_nodes);
+ }
+
+ node->flags = msg->flags; /* This will clear the BEENDOWN flag of course */
if (node->fence_agent && msg->fence_agent[0] && strcmp(node->fence_agent, msg->fence_agent))
{
free(node->fence_agent);
@@ -1748,7 +1800,7 @@
node->leave_reason = leavemsg->reason;
/* Mark it as leaving, and remove it when we get an AIS node down event for it */
- if (node && node->state == NODESTATE_MEMBER)
+ if (node && (node->state == NODESTATE_MEMBER || node->state == NODESTATE_AISONLY))
node->state = NODESTATE_LEAVING;
break;
@@ -1843,11 +1895,9 @@
node->name = strdup(tempname);
}
- node->incarnation = incarnation;
-
- gettimeofday(&node->join_time, NULL);
-
if (node->state == NODESTATE_DEAD) {
+ gettimeofday(&node->join_time, NULL);
+ node->incarnation = incarnation;
node->state = NODESTATE_MEMBER;
cluster_members++;
recalculate_quorum(0);
@@ -1874,6 +1924,7 @@
node->flags &= ~NODE_FLAGS_FENCED;
node->flags &= ~NODE_FLAGS_FENCEDWHILEUP;
+ node->flags |= NODE_FLAGS_BEENDOWN;
if (node->state == NODESTATE_MEMBER) {
node->state = NODESTATE_DEAD;
--- cluster/cman/lib/libcman.c 2006/08/24 10:40:57 1.27
+++ cluster/cman/lib/libcman.c 2006/10/05 07:48:33 1.28
@@ -45,7 +45,7 @@
int magic;
int fd;
int zero_fd;
- void *private;
+ void *privdata;
int want_reply;
cman_callback_t event_callback;
cman_datacallback_t data_callback;
@@ -161,7 +161,7 @@
else
{
if (h->data_callback)
- h->data_callback(h, h->private,
+ h->data_callback(h, h->privdata,
buf+sizeof(*dmsg), msg->length-sizeof(*dmsg),
dmsg->port, dmsg->nodeid);
}
@@ -203,14 +203,14 @@
{
if (msg->command == CMAN_CMD_EVENT && h->event_callback) {
struct sock_event_message *emsg = (struct sock_event_message *)msg;
- h->event_callback(h, h->private, emsg->reason, emsg->arg);
+ h->event_callback(h, h->privdata, emsg->reason, emsg->arg);
}
if (msg->command == CMAN_CMD_CONFCHG && h->confchg_callback)
{
struct sock_confchg_message *cmsg = (struct sock_confchg_message *)msg;
- h->confchg_callback(h, h->private,
+ h->confchg_callback(h, h->privdata,
cmsg->entries,cmsg->member_entries,
&cmsg->entries[cmsg->member_entries], cmsg->left_entries,
&cmsg->entries[cmsg->member_entries+cmsg->left_entries], cmsg->joined_entries);
@@ -287,7 +287,7 @@
return wait_for_reply(h, outbuf, outlen);
}
-static cman_handle_t open_socket(const char *name, int namelen, void *private)
+static cman_handle_t open_socket(const char *name, int namelen, void *privdata)
{
struct cman_handle *h;
struct sockaddr_un sockaddr;
@@ -297,7 +297,7 @@
return NULL;
h->magic = CMAN_MAGIC;
- h->private = private;
+ h->privdata = privdata;
h->event_callback = NULL;
h->data_callback = NULL;
h->confchg_callback = NULL;
@@ -344,14 +344,14 @@
return (cman_handle_t)h;
}
-cman_handle_t cman_admin_init(void *private)
+cman_handle_t cman_admin_init(void *privdata)
{
- return open_socket(ADMIN_SOCKNAME, sizeof(ADMIN_SOCKNAME), private);
+ return open_socket(ADMIN_SOCKNAME, sizeof(ADMIN_SOCKNAME), privdata);
}
-cman_handle_t cman_init(void *private)
+cman_handle_t cman_init(void *privdata)
{
- return open_socket(CLIENT_SOCKNAME, sizeof(CLIENT_SOCKNAME), private);
+ return open_socket(CLIENT_SOCKNAME, sizeof(CLIENT_SOCKNAME), privdata);
}
int cman_finish(cman_handle_t handle)
@@ -367,21 +367,21 @@
return 0;
}
-int cman_set_private(cman_handle_t *handle, void *private)
+int cman_setprivdata(cman_handle_t *handle, void *privdata)
{
struct cman_handle *h = (struct cman_handle *)handle;
VALIDATE_HANDLE(h);
- h->private = private;
+ h->privdata = privdata;
return 0;
}
-int cman_get_private(cman_handle_t *handle, void **private)
+int cman_getprivdata(cman_handle_t *handle, void **privdata)
{
struct cman_handle *h = (struct cman_handle *)handle;
VALIDATE_HANDLE(h);
- *private = h->private;
+ *privdata = h->privdata;
return 0;
}
@@ -631,6 +631,53 @@
return 0;
}
+int cman_get_disallowed_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes)
+{
+ struct cman_handle *h = (struct cman_handle *)handle;
+ struct cl_cluster_node *cman_nodes;
+ int status;
+ int buflen;
+ int count = 0;
+ int out_count = 0;
+ VALIDATE_HANDLE(h);
+
+ if (!retnodes || !nodes || maxnodes < 1)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ buflen = sizeof(struct cl_cluster_node) * maxnodes;
+ cman_nodes = malloc(buflen);
+ if (!cman_nodes)
+ return -1;
+
+ status = info_call(h, CMAN_CMD_GETALLMEMBERS, NULL, 0, cman_nodes, buflen);
+ if (status < 0)
+ {
+ int saved_errno = errno;
+ free(cman_nodes);
+ errno = saved_errno;
+ return -1;
+ }
+
+ if (cman_nodes[0].size != sizeof(struct cl_cluster_node))
+ {
+ free(cman_nodes);
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (count = 0; count < status; count++)
+ {
+ if (cman_nodes[count].state == NODESTATE_AISONLY && out_count < maxnodes)
+ copy_node(&nodes[out_count++], &cman_nodes[count]);
+ }
+ free(cman_nodes);
+ *retnodes = out_count;
+ return 0;
+}
+
int cman_get_node(cman_handle_t handle, int nodeid, cman_node_t *node)
{
struct cman_handle *h = (struct cman_handle *)handle;
--- cluster/cman/lib/libcman.h 2006/09/01 08:47:40 1.28
+++ cluster/cman/lib/libcman.h 2006/10/05 07:48:33 1.29
@@ -150,9 +150,10 @@
*/
/* Flags in ei_flags */
-#define CMAN_EXTRA_FLAG_2NODE 1
-#define CMAN_EXTRA_FLAG_ERROR 2
-#define CMAN_EXTRA_FLAG_SHUTDOWN 4
+#define CMAN_EXTRA_FLAG_2NODE 1
+#define CMAN_EXTRA_FLAG_ERROR 2
+#define CMAN_EXTRA_FLAG_SHUTDOWN 4
+#define CMAN_EXTRA_FLAG_DISALLOWED 8
typedef struct cman_extra_info {
int ei_node_state;
@@ -198,8 +199,8 @@
int cman_finish(cman_handle_t handle);
/* Update/retrieve the private data */
-int cman_set_private(cman_handle_t *h, void *privdata);
-int cman_get_private(cman_handle_t *h, void **privdata);
+int cman_setprivdata(cman_handle_t *h, void *privdata);
+int cman_getprivdata(cman_handle_t *h, void **privdata);
/*
* Notification of membership change events. Note that these are sent after
@@ -259,6 +260,10 @@
to determine how big your array needs to be */
int cman_get_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes);
+/* Returns a list of nodes that are known to AIS but blocked from joining the CMAN
+ cluster because they rejoined with cluster without a cman_tool join */
+int cman_get_disallowed_nodes(cman_handle_t handle, int maxnodes, int *retnodes, cman_node_t *nodes);
+
/*
* cman_get_node() can get node info by nodeid OR by name. If the first
* char of node->cn_name is zero then the nodeid will be used, otherwise
reply other threads:[~2006-10-05 7:48 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061005074834.577.qmail@sourceware.org \
--to=pcaulfield@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).