* [Cluster-devel] cluster/fence/fenced fd.h main.c recover.c
@ 2007-01-29 20:04 lhh
0 siblings, 0 replies; 3+ messages in thread
From: lhh @ 2007-01-29 20:04 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: STABLE
Changes by: lhh at sourceware.org 2007-01-29 20:04:41
Modified files:
fence/fenced : fd.h main.c recover.c
Log message:
Add manual override for fenced to STABLE branch; patch is a merge from RHEL4 branch; fixes 223060
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/fd.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.7.2.4&r2=1.7.2.4.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/main.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.16.2.9.6.2&r2=1.16.2.9.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/recover.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.10.2.6.6.1&r2=1.10.2.6.6.2
--- cluster/fence/fenced/fd.h 2005/02/24 07:06:09 1.7.2.4
+++ cluster/fence/fenced/fd.h 2007/01/29 20:04:41 1.7.2.4.6.1
@@ -52,6 +52,7 @@
#define FENCED_SOCK_PATH "fenced_socket"
+#define DEFAULT_OVERRIDE_PATH "/var/run/cluster/fenced_override"
#define DEFAULT_POST_JOIN_DELAY 6
#define DEFAULT_POST_FAIL_DELAY 0
@@ -129,10 +130,12 @@
int debug;
int post_join_delay;
int post_fail_delay;
+ char *override_path;
int8_t clean_start;
int8_t post_join_delay_opt;
int8_t post_fail_delay_opt;
int8_t clean_start_opt;
+ int8_t override_path_opt;
};
#define FDFL_RUN (0)
--- cluster/fence/fenced/main.c 2006/01/23 19:24:10 1.16.2.9.6.2
+++ cluster/fence/fenced/main.c 2007/01/29 20:04:41 1.16.2.9.6.3
@@ -23,7 +23,7 @@
char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1];
-#define OPTION_STRING ("cj:f:t:Dn:hVSwQ")
+#define OPTION_STRING ("cj:f:t:Dn:O:hVSwQ")
#define LOCKFILE_NAME "/var/run/fenced.pid"
@@ -40,6 +40,8 @@
DEFAULT_POST_JOIN_DELAY);
printf(" -f <secs> Post-fail fencing delay (default %d)\n",
DEFAULT_POST_FAIL_DELAY);
+ printf(" -O <path> Override path (default %s)\n",
+ DEFAULT_OVERRIDE_PATH);
printf(" -D Enable debugging code and don't fork\n");
printf(" -h Print this help, then exit\n");
printf(" -n <name> Name of the fence domain, \"default\" if none\n");
@@ -434,6 +436,23 @@
free(str);
}
+ if (fd->comline->override_path_opt == FALSE) {
+ str = NULL;
+ memset(path, 0, 256);
+ sprintf(path, "/cluster/fence_daemon/@override_path");
+
+ error = ccs_get(cd, path, &str);
+ if (!error)
+ /* XXX These are not explicitly freed on exit; if
+ we decide to make fenced handle SIGHUP at a later
+ time, we will need to free this. */
+ fd->comline->override_path = strdup(str);
+ else
+ fd->comline->override_path = strdup(DEFAULT_OVERRIDE_PATH);
+ if (str)
+ free(str);
+ }
+
log_debug("delay post_join %ds post_fail %ds",
fd->comline->post_join_delay, fd->comline->post_fail_delay);
@@ -527,6 +546,8 @@
int cont = TRUE;
int optchar;
+ comline->override_path_opt = FALSE;
+ comline->override_path = NULL;
comline->post_join_delay_opt = FALSE;
comline->post_fail_delay_opt = FALSE;
comline->clean_start_opt = FALSE;
@@ -551,6 +572,11 @@
comline->post_fail_delay_opt = TRUE;
break;
+ case 'O':
+ comline->override_path = strdup(optarg);
+ comline->override_path_opt = TRUE;
+ break;
+
case 'D':
comline->debug = TRUE;
fenced_debug = TRUE;
--- cluster/fence/fenced/recover.c 2005/06/21 18:07:31 1.10.2.6.6.1
+++ cluster/fence/fenced/recover.c 2007/01/29 20:04:41 1.10.2.6.6.2
@@ -13,6 +13,9 @@
#include "fd.h"
#include "ccs.h"
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/select.h>
/* Fencing recovery algorithm
@@ -359,6 +362,79 @@
return num_victims;
}
+static inline void close_override(int *fd, char *path)
+{
+ unlink(path);
+ if (fd && *fd >= 0)
+ close(*fd);
+ *fd = -1;
+}
+
+static int open_override(char *path)
+{
+ int ret;
+ mode_t om;
+
+ om = umask(077);
+ ret = mkfifo(path, (S_IRUSR | S_IWUSR));
+ umask(om);
+
+ if (ret < 0)
+ return -1;
+ return open(path, O_RDONLY | O_NONBLOCK);
+}
+
+static int check_override(int ofd, char *nodename, int timeout)
+{
+ char buf[128];
+ fd_set rfds;
+ struct timeval tv = {0, 0};
+ int ret, x;
+
+ if (ofd < 0 || !nodename || !strlen(nodename)) {
+ sleep(timeout);
+ return 0;
+ }
+
+ FD_ZERO(&rfds);
+ FD_SET(ofd, &rfds);
+ tv.tv_usec = 0;
+ tv.tv_sec = timeout;
+
+ ret = select(ofd + 1, &rfds, NULL, NULL, &tv);
+ if (ret < 0) {
+ syslog(LOG_ERR, "select: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (ret == 0)
+ return 0;
+
+ memset(buf, 0, sizeof(buf));
+ ret = read(ofd, buf, sizeof(buf) - 1);
+ if (ret < 0) {
+ syslog(LOG_ERR, "read: %s\n", strerror(errno));
+ return -1;
+ }
+
+ /* chop off control characters */
+ for (x = 0; x < ret; x++) {
+ if (buf[x] < 0x20) {
+ buf[x] = 0;
+ break;
+ }
+ }
+
+ if (!strcasecmp(nodename, buf)) {
+ /* Case insensitive, but not as nice as, say, name_equal
+ in the other file... */
+ return 1;
+ }
+
+ return 0;
+}
+
+
/* If there are victims after a node has joined, it's a good indication that
they may be joining the cluster shortly. If we delay a bit they might
become members and we can avoid fencing them. This is only really an issue
@@ -429,7 +505,7 @@
fd_node_t *node;
char *master_name;
uint32_t master;
- int error, cd;
+ int error, cd, override = -1;
master = find_master_nodeid(fd, &master_name);
@@ -466,7 +542,22 @@
list_del(&node->list);
free(node);
}
- sleep(5);
+
+ if (!fd->comline->override_path) {
+ sleep(5);
+ continue;
+ }
+
+ /* Check for manual intervention */
+ override = open_override(fd->comline->override_path);
+ if (check_override(override, node->name, 5) > 0) {
+ syslog(LOG_WARNING, "fence \"%s\" overridden by "
+ "administrator intervention", node->name);
+
+ list_del(&node->list);
+ free(node);
+ }
+ close_override(&override, fd->comline->override_path);
}
ccs_disconnect(cd);
^ permalink raw reply [flat|nested] 3+ messages in thread* [Cluster-devel] cluster/fence/fenced fd.h main.c recover.c
@ 2007-01-29 20:30 lhh
0 siblings, 0 replies; 3+ messages in thread
From: lhh @ 2007-01-29 20:30 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: lhh at sourceware.org 2007-01-29 20:30:25
Modified files:
fence/fenced : fd.h main.c recover.c
Log message:
Add manual override for fenced to RHEL5 branch; patch is a merge from HEAD branch
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/fd.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.24.2.1&r2=1.24.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/main.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.38.2.2&r2=1.38.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/recover.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.1&r2=1.25.2.2
--- cluster/fence/fenced/fd.h 2006/12/01 19:17:46 1.24.2.1
+++ cluster/fence/fenced/fd.h 2007/01/29 20:30:25 1.24.2.2
@@ -53,6 +53,7 @@
#define DEFAULT_POST_JOIN_DELAY 6
#define DEFAULT_POST_FAIL_DELAY 0
#define DEFAULT_CLEAN_START 0
+#define DEFAULT_OVERRIDE_PATH "/var/run/cluster/fenced_override"
#define FENCED_SOCK_PATH "fenced_socket"
extern char *prog_name;
@@ -131,10 +132,12 @@
{
int post_join_delay;
int post_fail_delay;
+ char *override_path;
int8_t clean_start;
int8_t post_join_delay_opt;
int8_t post_fail_delay_opt;
int8_t clean_start_opt;
+ int8_t override_path_opt;
};
#define FDFL_RUN (0)
--- cluster/fence/fenced/main.c 2006/12/01 15:27:50 1.38.2.2
+++ cluster/fence/fenced/main.c 2007/01/29 20:30:25 1.38.2.3
@@ -15,7 +15,7 @@
#include "ccs.h"
#include "copyright.cf"
-#define OPTION_STRING ("cj:f:Dn:hVSw")
+#define OPTION_STRING ("cj:f:Dn:O:hVSw")
#define LOCKFILE_NAME "/var/run/fenced.pid"
struct client {
@@ -145,6 +145,23 @@
free(str);
}
+ if (comline.override_path_opt == FALSE) {
+ str = NULL;
+ memset(path, 0, 256);
+ sprintf(path, "/cluster/fence_daemon/@override_path");
+
+ error = ccs_get(cd, path, &str);
+ if (!error)
+ /* XXX These are not explicitly freed on exit; if
+ we decide to make fenced handle SIGHUP at a later
+ time, we will need to free this. */
+ comline.override_path = strdup(str);
+ else
+ comline.override_path = strdup(DEFAULT_OVERRIDE_PATH);
+ if (str)
+ free(str);
+ }
+
log_debug("delay post_join %ds post_fail %ds",
comline.post_join_delay, comline.post_fail_delay);
@@ -500,6 +517,8 @@
DEFAULT_POST_JOIN_DELAY);
printf(" -f <secs> Post-fail fencing delay (default %d)\n",
DEFAULT_POST_FAIL_DELAY);
+ printf(" -O <path> Override path (default %s)\n",
+ DEFAULT_OVERRIDE_PATH);
printf(" -D Enable debugging code and don't fork\n");
printf(" -h Print this help, then exit\n");
printf(" -V Print program version information, then exit\n");
@@ -547,6 +566,8 @@
int cont = TRUE;
int optchar;
+ comline->override_path_opt = FALSE;
+ comline->override_path = NULL;
comline->post_join_delay_opt = FALSE;
comline->post_fail_delay_opt = FALSE;
comline->clean_start_opt = FALSE;
@@ -571,6 +592,11 @@
comline->post_fail_delay_opt = TRUE;
break;
+ case 'O':
+ comline->override_path = strdup(optarg);
+ comline->override_path_opt = TRUE;
+ break;
+
case 'D':
daemon_debug_opt = TRUE;
break;
--- cluster/fence/fenced/recover.c 2006/12/01 19:17:46 1.25.2.1
+++ cluster/fence/fenced/recover.c 2007/01/29 20:30:25 1.25.2.2
@@ -13,6 +13,9 @@
#include "fd.h"
#include "ccs.h"
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/select.h>
extern int our_nodeid;
extern commandline_t comline;
@@ -212,6 +215,79 @@
return num_victims;
}
+static inline void close_override(int *fd, char *path)
+{
+ unlink(path);
+ if (fd && *fd >= 0)
+ close(*fd);
+ *fd = -1;
+}
+
+static int open_override(char *path)
+{
+ int ret;
+ mode_t om;
+
+ om = umask(077);
+ ret = mkfifo(path, (S_IRUSR | S_IWUSR));
+ umask(om);
+
+ if (ret < 0)
+ return -1;
+ return open(path, O_RDONLY | O_NONBLOCK);
+}
+
+static int check_override(int ofd, char *nodename, int timeout)
+{
+ char buf[128];
+ fd_set rfds;
+ struct timeval tv = {0, 0};
+ int ret, x;
+
+ if (ofd < 0 || !nodename || !strlen(nodename)) {
+ sleep(timeout);
+ return 0;
+ }
+
+ FD_ZERO(&rfds);
+ FD_SET(ofd, &rfds);
+ tv.tv_usec = 0;
+ tv.tv_sec = timeout;
+
+ ret = select(ofd + 1, &rfds, NULL, NULL, &tv);
+ if (ret < 0) {
+ syslog(LOG_ERR, "select: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (ret == 0)
+ return 0;
+
+ memset(buf, 0, sizeof(buf));
+ ret = read(ofd, buf, sizeof(buf) - 1);
+ if (ret < 0) {
+ syslog(LOG_ERR, "read: %s\n", strerror(errno));
+ return -1;
+ }
+
+ /* chop off control characters */
+ for (x = 0; x < ret; x++) {
+ if (buf[x] < 0x20) {
+ buf[x] = 0;
+ break;
+ }
+ }
+
+ if (!strcasecmp(nodename, buf)) {
+ /* Case insensitive, but not as nice as, say, name_equal
+ in the other file... */
+ return 1;
+ }
+
+ return 0;
+}
+
+
/* If there are victims after a node has joined, it's a good indication that
they may be joining the cluster shortly. If we delay a bit they might
become members and we can avoid fencing them. This is only really an issue
@@ -282,6 +358,7 @@
fd_node_t *node;
char *master_name;
int master, error, cd;
+ int override = -1;
master = find_master_nodeid(fd, &master_name);
@@ -318,7 +395,22 @@
list_del(&node->list);
free(node);
}
- sleep(5);
+
+ if (!comline.override_path) {
+ sleep(5);
+ continue;
+ }
+
+ /* Check for manual intervention */
+ override = open_override(comline.override_path);
+ if (check_override(override, node->name, 5) > 0) {
+ syslog(LOG_WARNING, "fence \"%s\" overridden by "
+ "administrator intervention", node->name);
+
+ list_del(&node->list);
+ free(node);
+ }
+ close_override(&override, comline.override_path);
}
ccs_disconnect(cd);
^ permalink raw reply [flat|nested] 3+ messages in thread* [Cluster-devel] cluster/fence/fenced fd.h main.c recover.c
@ 2007-01-29 19:55 lhh
0 siblings, 0 replies; 3+ messages in thread
From: lhh @ 2007-01-29 19:55 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: lhh at sourceware.org 2007-01-29 19:55:06
Modified files:
fence/fenced : fd.h main.c recover.c
Log message:
Add manual override for fenced to RHEL4 branch; patch is a backport from HEAD branch; fixes 223060
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/fd.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.7.2.5&r2=1.7.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.16.2.11&r2=1.16.2.12
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/recover.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.10.2.8&r2=1.10.2.9
--- cluster/fence/fenced/fd.h 2006/12/20 18:14:29 1.7.2.5
+++ cluster/fence/fenced/fd.h 2007/01/29 19:55:06 1.7.2.6
@@ -52,6 +52,7 @@
#define FENCED_SOCK_PATH "fenced_socket"
+#define DEFAULT_OVERRIDE_PATH "/var/run/cluster/fenced_override"
#define DEFAULT_POST_JOIN_DELAY 6
#define DEFAULT_POST_FAIL_DELAY 0
@@ -129,10 +130,12 @@
int debug;
int post_join_delay;
int post_fail_delay;
+ char *override_path;
int8_t clean_start;
int8_t post_join_delay_opt;
int8_t post_fail_delay_opt;
int8_t clean_start_opt;
+ int8_t override_path_opt;
};
#define FDFL_RUN (0)
--- cluster/fence/fenced/main.c 2005/12/20 16:03:58 1.16.2.11
+++ cluster/fence/fenced/main.c 2007/01/29 19:55:06 1.16.2.12
@@ -23,7 +23,7 @@
char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1];
-#define OPTION_STRING ("cj:f:t:Dn:hVSwQ")
+#define OPTION_STRING ("cj:f:t:Dn:O:hVSwQ")
#define LOCKFILE_NAME "/var/run/fenced.pid"
@@ -40,6 +40,8 @@
DEFAULT_POST_JOIN_DELAY);
printf(" -f <secs> Post-fail fencing delay (default %d)\n",
DEFAULT_POST_FAIL_DELAY);
+ printf(" -O <path> Override path (default %s)\n",
+ DEFAULT_OVERRIDE_PATH);
printf(" -D Enable debugging code and don't fork\n");
printf(" -h Print this help, then exit\n");
printf(" -n <name> Name of the fence domain, \"default\" if none\n");
@@ -434,6 +436,23 @@
free(str);
}
+ if (fd->comline->override_path_opt == FALSE) {
+ str = NULL;
+ memset(path, 0, 256);
+ sprintf(path, "/cluster/fence_daemon/@override_path");
+
+ error = ccs_get(cd, path, &str);
+ if (!error)
+ /* XXX These are not explicitly freed on exit; if
+ we decide to make fenced handle SIGHUP at a later
+ time, we will need to free this. */
+ fd->comline->override_path = strdup(str);
+ else
+ fd->comline->override_path = strdup(DEFAULT_OVERRIDE_PATH);
+ if (str)
+ free(str);
+ }
+
log_debug("delay post_join %ds post_fail %ds",
fd->comline->post_join_delay, fd->comline->post_fail_delay);
@@ -527,6 +546,8 @@
int cont = TRUE;
int optchar;
+ comline->override_path_opt = FALSE;
+ comline->override_path = NULL;
comline->post_join_delay_opt = FALSE;
comline->post_fail_delay_opt = FALSE;
comline->clean_start_opt = FALSE;
@@ -551,6 +572,11 @@
comline->post_fail_delay_opt = TRUE;
break;
+ case 'O':
+ comline->override_path = strdup(optarg);
+ comline->override_path_opt = TRUE;
+ break;
+
case 'D':
comline->debug = TRUE;
fenced_debug = TRUE;
--- cluster/fence/fenced/recover.c 2006/12/20 18:14:29 1.10.2.8
+++ cluster/fence/fenced/recover.c 2007/01/29 19:55:06 1.10.2.9
@@ -12,6 +12,9 @@
******************************************************************************/
#include "fd.h"
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/select.h>
/* Fencing recovery algorithm
@@ -358,6 +361,79 @@
return num_victims;
}
+static inline void close_override(int *fd, char *path)
+{
+ unlink(path);
+ if (fd && *fd >= 0)
+ close(*fd);
+ *fd = -1;
+}
+
+static int open_override(char *path)
+{
+ int ret;
+ mode_t om;
+
+ om = umask(077);
+ ret = mkfifo(path, (S_IRUSR | S_IWUSR));
+ umask(om);
+
+ if (ret < 0)
+ return -1;
+ return open(path, O_RDONLY | O_NONBLOCK);
+}
+
+static int check_override(int ofd, char *nodename, int timeout)
+{
+ char buf[128];
+ fd_set rfds;
+ struct timeval tv = {0, 0};
+ int ret, x;
+
+ if (ofd < 0 || !nodename || !strlen(nodename)) {
+ sleep(timeout);
+ return 0;
+ }
+
+ FD_ZERO(&rfds);
+ FD_SET(ofd, &rfds);
+ tv.tv_usec = 0;
+ tv.tv_sec = timeout;
+
+ ret = select(ofd + 1, &rfds, NULL, NULL, &tv);
+ if (ret < 0) {
+ syslog(LOG_ERR, "select: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (ret == 0)
+ return 0;
+
+ memset(buf, 0, sizeof(buf));
+ ret = read(ofd, buf, sizeof(buf) - 1);
+ if (ret < 0) {
+ syslog(LOG_ERR, "read: %s\n", strerror(errno));
+ return -1;
+ }
+
+ /* chop off control characters */
+ for (x = 0; x < ret; x++) {
+ if (buf[x] < 0x20) {
+ buf[x] = 0;
+ break;
+ }
+ }
+
+ if (!strcasecmp(nodename, buf)) {
+ /* Case insensitive, but not as nice as, say, name_equal
+ in the other file... */
+ return 1;
+ }
+
+ return 0;
+}
+
+
/* If there are victims after a node has joined, it's a good indication that
they may be joining the cluster shortly. If we delay a bit they might
become members and we can avoid fencing them. This is only really an issue
@@ -428,7 +504,7 @@
fd_node_t *node;
char *master_name;
uint32_t master;
- int error;
+ int error, override = -1;
master = find_master_nodeid(fd, &master_name);
@@ -462,7 +538,22 @@
list_del(&node->list);
free(node);
}
- sleep(5);
+
+ if (!fd->comline->override_path) {
+ sleep(5);
+ continue;
+ }
+
+ /* Check for manual intervention */
+ override = open_override(fd->comline->override_path);
+ if (check_override(override, node->name, 5) > 0) {
+ syslog(LOG_WARNING, "fence \"%s\" overridden by "
+ "administrator intervention", node->name);
+
+ list_del(&node->list);
+ free(node);
+ }
+ close_override(&override, fd->comline->override_path);
}
}
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2007-01-29 20:30 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-29 20:04 [Cluster-devel] cluster/fence/fenced fd.h main.c recover.c lhh
-- strict thread matches above, loose matches on Subject: below --
2007-01-29 20:30 lhh
2007-01-29 19:55 lhh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).