From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 22 Jan 2007 23:15:55 -0000 Subject: [Cluster-devel] cluster/fence agents/manual/Makefile fenced/fd ... Message-ID: <20070122231555.28150.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: lhh at sourceware.org 2007-01-22 23:15:54 Modified files: fence/agents/manual: Makefile fence/fenced : fd.h main.c recover.c Log message: Simple manual override for fenced & example replacement for fence_ack_manual Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/manual/Makefile.diff?cvsroot=cluster&r1=1.7&r2=1.8 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/fd.h.diff?cvsroot=cluster&r1=1.24&r2=1.25 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/main.c.diff?cvsroot=cluster&r1=1.40&r2=1.41 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/recover.c.diff?cvsroot=cluster&r1=1.25&r2=1.26 --- cluster/fence/agents/manual/Makefile 2006/08/11 15:18:08 1.7 +++ cluster/fence/agents/manual/Makefile 2007/01/22 23:15:54 1.8 @@ -36,6 +36,12 @@ fence_ack_manual: ack.o ${CC} -o $@ $^ +# Using manual override instead of the binary version. +# (this version does not require any configuration params +# in cluster.conf, but fencing must fail first) +#fence_ack_manual: fence_ack_manual.sh +# cp $@ $^ + ack.o: ack.c $(CC) $(CFLAGS) -c -o $@ $< --- cluster/fence/fenced/fd.h 2006/10/09 10:52:50 1.24 +++ cluster/fence/fenced/fd.h 2007/01/22 23:15:54 1.25 @@ -53,6 +53,7 @@ #define DEFAULT_POST_JOIN_DELAY 6 #define DEFAULT_POST_FAIL_DELAY 0 #define DEFAULT_CLEAN_START 0 +#define DEFAULT_OVERRIDE_PATH "/var/run/cluster/fenced_override" #define FENCED_SOCK_PATH "fenced_socket" extern char *prog_name; @@ -131,10 +132,12 @@ { int post_join_delay; int post_fail_delay; + char *override_path; int8_t clean_start; int8_t post_join_delay_opt; int8_t post_fail_delay_opt; int8_t clean_start_opt; + int8_t override_path_opt; }; #define FDFL_RUN (0) --- cluster/fence/fenced/main.c 2006/12/01 15:28:11 1.40 +++ cluster/fence/fenced/main.c 2007/01/22 23:15:54 1.41 @@ -15,7 +15,7 @@ #include "ccs.h" #include "copyright.cf" -#define OPTION_STRING ("cj:f:Dn:hVSw") +#define OPTION_STRING ("cj:f:Dn:O:hVSw") #define LOCKFILE_NAME "/var/run/fenced.pid" struct client { @@ -145,6 +145,23 @@ free(str); } + if (comline.override_path_opt == FALSE) { + str = NULL; + memset(path, 0, 256); + sprintf(path, "/cluster/fence_daemon/@override_path"); + + error = ccs_get(cd, path, &str); + if (!error) + /* XXX These are not explicitly freed on exit; if + we decide to make fenced handle SIGHUP@a later + time, we will need to free this. */ + comline.override_path = strdup(str); + else + comline.override_path = strdup(DEFAULT_OVERRIDE_PATH); + if (str) + free(str); + } + log_debug("delay post_join %ds post_fail %ds", comline.post_join_delay, comline.post_fail_delay); @@ -500,6 +517,8 @@ DEFAULT_POST_JOIN_DELAY); printf(" -f Post-fail fencing delay (default %d)\n", DEFAULT_POST_FAIL_DELAY); + printf(" -O Override path (default %s)\n", + DEFAULT_OVERRIDE_PATH); printf(" -D Enable debugging code and don't fork\n"); printf(" -h Print this help, then exit\n"); printf(" -V Print program version information, then exit\n"); @@ -547,6 +566,8 @@ int cont = TRUE; int optchar; + comline->override_path_opt = FALSE; + comline->override_path = NULL; comline->post_join_delay_opt = FALSE; comline->post_fail_delay_opt = FALSE; comline->clean_start_opt = FALSE; @@ -571,6 +592,11 @@ comline->post_fail_delay_opt = TRUE; break; + case 'O': + comline->override_path = strdup(optarg); + comline->override_path_opt = TRUE; + break; + case 'D': daemon_debug_opt = TRUE; break; --- cluster/fence/fenced/recover.c 2006/10/09 10:52:50 1.25 +++ cluster/fence/fenced/recover.c 2007/01/22 23:15:54 1.26 @@ -13,6 +13,9 @@ #include "fd.h" #include "ccs.h" +#include +#include +#include extern int our_nodeid; extern commandline_t comline; @@ -213,6 +216,79 @@ return num_victims; } +static inline void close_override(int *fd, char *path) +{ + unlink(path); + if (fd && *fd >= 0) + close(*fd); + *fd = -1; +} + +static int open_override(char *path) +{ + int ret; + mode_t om; + + om = umask(077); + ret = mkfifo(path, (S_IRUSR | S_IWUSR)); + umask(om); + + if (ret < 0) + return -1; + return open(path, O_RDONLY | O_NONBLOCK); +} + +static int check_override(int ofd, char *nodename, int timeout) +{ + char buf[128]; + fd_set rfds; + struct timeval tv = {0, 0}; + int ret, x; + + if (ofd < 0 || !nodename || !strlen(nodename)) { + sleep(timeout); + return 0; + } + + FD_ZERO(&rfds); + FD_SET(ofd, &rfds); + tv.tv_usec = 0; + tv.tv_sec = timeout; + + ret = select(ofd + 1, &rfds, NULL, NULL, &tv); + if (ret < 0) { + syslog(LOG_ERR, "select: %s\n", strerror(errno)); + return -1; + } + + if (ret == 0) + return 0; + + memset(buf, 0, sizeof(buf)); + ret = read(ofd, buf, sizeof(buf) - 1); + if (ret < 0) { + syslog(LOG_ERR, "read: %s\n", strerror(errno)); + return -1; + } + + /* chop off control characters */ + for (x = 0; x < ret; x++) { + if (buf[x] < 0x20) { + buf[x] = 0; + break; + } + } + + if (!strcasecmp(nodename, buf)) { + /* Case insensitive, but not as nice as, say, name_equal + in the other file... */ + return 1; + } + + return 0; +} + + /* If there are victims after a node has joined, it's a good indication that they may be joining the cluster shortly. If we delay a bit they might become members and we can avoid fencing them. This is only really an issue @@ -283,6 +359,7 @@ fd_node_t *node; char *master_name; int master, error, cd; + int override = -1; master = find_master_nodeid(fd, &master_name); @@ -319,7 +396,22 @@ list_del(&node->list); free(node); } - sleep(5); + + if (!comline.override_path) { + sleep(5); + continue; + } + + /* Check for manual intervention */ + override = open_override(comline.override_path); + if (check_override(override, node->name, 5) > 0) { + syslog(LOG_WARNING, "fence \"%s\" overridden by " + "administrator intervention", node->name); + + list_del(&node->list); + free(node); + } + close_override(&override, comline.override_path); } ccs_disconnect(cd);