* [Cluster-devel] cluster/fence/fence_tool fence_tool.c
@ 2007-01-05 16:40 rohara
0 siblings, 0 replies; 10+ messages in thread
From: rohara @ 2007-01-05 16:40 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: STABLE
Changes by: rohara at sourceware.org 2007-01-05 16:40:24
Modified files:
fence/fence_tool: fence_tool.c
Log message:
Add timeout option for fence_tool leave.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.6.6.3&r2=1.5.2.6.6.4
--- cluster/fence/fence_tool/fence_tool.c 2006/02/09 15:38:09 1.5.2.6.6.3
+++ cluster/fence/fence_tool/fence_tool.c 2007/01/05 16:40:24 1.5.2.6.6.4
@@ -62,7 +62,7 @@
int operation;
int child_wait;
int quorum_wait = TRUE;
-int fenced_start_timeout = 0;
+int fenced_timeout = 0;
int signalled = 0;
int cl_sock;
char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1];
@@ -282,9 +282,9 @@
setup_sock();
- if (fenced_start_timeout) {
+ if (fenced_timeout) {
signal(SIGALRM, sigalarm_handler);
- alarm(fenced_start_timeout);
+ alarm(fenced_timeout);
}
if (!check_quorum()) {
@@ -373,8 +373,17 @@
check_mounted();
setup_sock();
- if (!check_quorum())
+ if (fenced_timeout) {
+ signal(SIGALRM, sigalarm_handler);
+ alarm(fenced_timeout);
+ }
+
+ if (!check_quorum()) {
+ if (errno == ETIMEDOUT)
+ printf("%s: Timed out waiting for cluster "
+ "quorum to form.\n", prog_name);
return EXIT_FAILURE;
+ }
close(cl_sock);
@@ -487,7 +496,7 @@
break;
case 't':
- fenced_start_timeout = get_int_arg(optchar, optarg);
+ fenced_timeout = get_int_arg(optchar, optarg);
break;
case 'c':
^ permalink raw reply [flat|nested] 10+ messages in thread* [Cluster-devel] cluster/fence/fence_tool fence_tool.c
@ 2007-11-29 14:46 teigland
0 siblings, 0 replies; 10+ messages in thread
From: teigland @ 2007-11-29 14:46 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: teigland at sourceware.org 2007-11-29 14:46:41
Modified files:
fence/fence_tool: fence_tool.c
Log message:
[sync from HEAD]
clean out some options that were only relevant to rhel4
remove the monitor option which didn't do anything
add the dump option to dump the fenced debug buffer
(group_tool can still do this, but fence_tool wasn't oddly enough
bz 404451
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.23.2.1&r2=1.23.2.2
--- cluster/fence/fence_tool/fence_tool.c 2007/01/23 16:54:09 1.23.2.1
+++ cluster/fence/fence_tool/fence_tool.c 2007/11/29 14:46:41 1.23.2.2
@@ -37,14 +37,18 @@
#define FALSE 0
#endif
-#define OPTION_STRING ("Vhcj:f:t:wQ")
+#define OPTION_STRING ("Vht:wQ")
#define FENCED_SOCK_PATH "fenced_socket"
#define MAXLINE 256
#define OP_JOIN 1
#define OP_LEAVE 2
-#define OP_MONITOR 3
-#define OP_WAIT 4
+#define OP_WAIT 3
+#define OP_DUMP 4
+
+/* needs to match the same in cluster/group/daemon/gd_internal.h and
+ cluster/group/gfs_controld/lock_dlm.h and cluster/fence/fenced/fd.h */
+#define DUMP_SIZE (1024 * 1024)
#define die(fmt, args...) \
do \
@@ -63,10 +67,47 @@
int signalled = 0;
cman_handle_t ch;
+static int do_write(int fd, void *buf, size_t count)
+{
+ int rv, off = 0;
+
+ retry:
+ rv = write(fd, buf + off, count);
+ if (rv == -1 && errno == EINTR)
+ goto retry;
+ if (rv < 0)
+ return rv;
+
+ if (rv != count) {
+ count -= rv;
+ off += rv;
+ goto retry;
+ }
+ return 0;
+}
+
+static int do_read(int fd, void *buf, size_t count)
+{
+ int rv, off = 0;
+
+ while (off < count) {
+ rv = read(fd, buf + off, count - off);
+ if (rv == 0)
+ return -1;
+ if (rv == -1 && errno == EINTR)
+ continue;
+ if (rv == -1)
+ return -1;
+ off += rv;
+ }
+ return 0;
+}
+
static int get_int_arg(char argopt, char *arg)
{
char *tmp;
- int val;
+ int val;
+
val = strtol(arg, &tmp, 10);
if (tmp == arg || tmp != arg + strlen(arg))
die("argument to %c (%s) is not an integer", argopt, arg);
@@ -195,7 +236,7 @@
for (i=0; !fenced_start_timeout || i < fenced_start_timeout; i++) {
if (we_are_in_fence_domain() == joining)
return 0;
- if (!(i % 5))
+ if (i && !(i % 5))
printf("Waiting for fenced to %s the fence group.\n",
(joining?"join":"leave"));
sleep(1);
@@ -281,41 +322,43 @@
return EXIT_SUCCESS;
}
-static int do_monitor(void)
+static int do_dump(void)
{
+ char inbuf[DUMP_SIZE];
+ char outbuf[MAXLINE];
int fd, rv;
- char *out, buf[256];
fd = fenced_connect();
- if (!fd)
- die("fenced not running");
- out = "monitor";
+ memset(inbuf, 0, sizeof(inbuf));
+ memset(outbuf, 0, sizeof(outbuf));
+
+ sprintf(outbuf, "dump");
- rv = write(fd, out, sizeof(out));
+ rv = do_write(fd, outbuf, sizeof(outbuf));
if (rv < 0)
die("can't communicate with fenced");
- while (1) {
- memset(buf, 0, sizeof(buf));
- rv = read(fd, buf, sizeof(buf));
- printf("%s", buf);
- }
+ rv = do_read(fd, inbuf, sizeof(inbuf));
+ if (rv < 0)
+ printf("dump read: %s\n", strerror(errno));
+
+ do_write(STDOUT_FILENO, inbuf, sizeof(inbuf));
close(fd);
- return EXIT_SUCCESS;
+ return 0;
}
static void print_usage(void)
{
printf("Usage:\n");
printf("\n");
- printf("%s <join|leave|wait> [options]\n", prog_name);
+ printf("%s <join|leave|dump> [options]\n", prog_name);
printf("\n");
printf("Actions:\n");
printf(" join Join the default fence domain\n");
printf(" leave Leave default fence domain\n");
- printf(" wait Wait for node to be member of default fence domain\n");
+ printf(" dump Dump debug buffer from fenced\n");
printf("\n");
printf("Options:\n");
printf(" -w Wait for join to complete\n");
@@ -324,12 +367,6 @@
printf(" -t Maximum time in seconds to wait\n");
printf(" -Q Fail if cluster is not quorate, don't wait\n");
printf("\n");
- printf("Fenced options:\n");
- printf(" these are passed on to fenced when it's started\n");
- printf(" -c All nodes are in a clean state to start\n");
- printf(" -j <secs> Post-join fencing delay\n");
- printf(" -f <secs> Post-fail fencing delay\n");
- printf("\n");
}
static void decode_arguments(int argc, char *argv[])
@@ -376,12 +413,6 @@
fenced_start_timeout = get_int_arg(optchar, optarg);
break;
- case 'c':
- case 'j':
- case 'f':
- /* Do nothing, just pass these options on to fenced */
- break;
-
default:
die("unknown option: %c\n", optchar);
break;
@@ -393,8 +424,8 @@
operation = OP_JOIN;
} else if (strcmp(argv[optind], "leave") == 0) {
operation = OP_LEAVE;
- } else if (strcmp(argv[optind], "monitor") == 0) {
- operation = OP_MONITOR;
+ } else if (strcmp(argv[optind], "dump") == 0) {
+ operation = OP_DUMP;
} else
die("unknown option %s\n", argv[optind]);
optind++;
@@ -415,8 +446,8 @@
return do_join(argc, argv);
case OP_LEAVE:
return do_leave();
- case OP_MONITOR:
- return do_monitor();
+ case OP_DUMP:
+ return do_dump();
case OP_WAIT:
return -1;
}
^ permalink raw reply [flat|nested] 10+ messages in thread* [Cluster-devel] cluster/fence/fence_tool fence_tool.c
@ 2007-08-15 20:57 teigland
0 siblings, 0 replies; 10+ messages in thread
From: teigland @ 2007-08-15 20:57 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: teigland at sourceware.org 2007-08-15 20:57:28
Modified files:
fence/fence_tool: fence_tool.c
Log message:
clean out some options that were only relevant to rhel4
remove the monitor option which didn't do anything
add the dump option to dump the fenced debug buffer
(group_tool can still do this, but fence_tool wasn't oddly enough)
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&r1=1.24&r2=1.25
--- cluster/fence/fence_tool/fence_tool.c 2007/01/23 16:53:28 1.24
+++ cluster/fence/fence_tool/fence_tool.c 2007/08/15 20:57:28 1.25
@@ -37,14 +37,18 @@
#define FALSE 0
#endif
-#define OPTION_STRING ("Vhcj:f:t:wQ")
+#define OPTION_STRING ("Vht:wQ")
#define FENCED_SOCK_PATH "fenced_socket"
#define MAXLINE 256
#define OP_JOIN 1
#define OP_LEAVE 2
-#define OP_MONITOR 3
-#define OP_WAIT 4
+#define OP_WAIT 3
+#define OP_DUMP 4
+
+/* needs to match the same in cluster/group/daemon/gd_internal.h and
+ cluster/group/gfs_controld/lock_dlm.h and cluster/fence/fenced/fd.h */
+#define DUMP_SIZE (1024 * 1024)
#define die(fmt, args...) \
do \
@@ -63,10 +67,47 @@
int signalled = 0;
cman_handle_t ch;
+static int do_write(int fd, void *buf, size_t count)
+{
+ int rv, off = 0;
+
+ retry:
+ rv = write(fd, buf + off, count);
+ if (rv == -1 && errno == EINTR)
+ goto retry;
+ if (rv < 0)
+ return rv;
+
+ if (rv != count) {
+ count -= rv;
+ off += rv;
+ goto retry;
+ }
+ return 0;
+}
+
+static int do_read(int fd, void *buf, size_t count)
+{
+ int rv, off = 0;
+
+ while (off < count) {
+ rv = read(fd, buf + off, count - off);
+ if (rv == 0)
+ return -1;
+ if (rv == -1 && errno == EINTR)
+ continue;
+ if (rv == -1)
+ return -1;
+ off += rv;
+ }
+ return 0;
+}
+
static int get_int_arg(char argopt, char *arg)
{
char *tmp;
- int val;
+ int val;
+
val = strtol(arg, &tmp, 10);
if (tmp == arg || tmp != arg + strlen(arg))
die("argument to %c (%s) is not an integer", argopt, arg);
@@ -195,7 +236,7 @@
for (i=0; !fenced_start_timeout || i < fenced_start_timeout; i++) {
if (we_are_in_fence_domain() == joining)
return 0;
- if (!(i % 5))
+ if (i && !(i % 5))
printf("Waiting for fenced to %s the fence group.\n",
(joining?"join":"leave"));
sleep(1);
@@ -281,41 +322,43 @@
return EXIT_SUCCESS;
}
-static int do_monitor(void)
+static int do_dump(void)
{
+ char inbuf[DUMP_SIZE];
+ char outbuf[MAXLINE];
int fd, rv;
- char *out, buf[256];
fd = fenced_connect();
- if (!fd)
- die("fenced not running");
- out = "monitor";
+ memset(inbuf, 0, sizeof(inbuf));
+ memset(outbuf, 0, sizeof(outbuf));
+
+ sprintf(outbuf, "dump");
- rv = write(fd, out, sizeof(out));
+ rv = do_write(fd, outbuf, sizeof(outbuf));
if (rv < 0)
die("can't communicate with fenced");
- while (1) {
- memset(buf, 0, sizeof(buf));
- rv = read(fd, buf, sizeof(buf));
- printf("%s", buf);
- }
+ rv = do_read(fd, inbuf, sizeof(inbuf));
+ if (rv < 0)
+ printf("dump read: %s\n", strerror(errno));
+
+ do_write(STDOUT_FILENO, inbuf, sizeof(inbuf));
close(fd);
- return EXIT_SUCCESS;
+ return 0;
}
static void print_usage(void)
{
printf("Usage:\n");
printf("\n");
- printf("%s <join|leave|wait> [options]\n", prog_name);
+ printf("%s <join|leave|dump> [options]\n", prog_name);
printf("\n");
printf("Actions:\n");
printf(" join Join the default fence domain\n");
printf(" leave Leave default fence domain\n");
- printf(" wait Wait for node to be member of default fence domain\n");
+ printf(" dump Dump debug buffer from fenced\n");
printf("\n");
printf("Options:\n");
printf(" -w Wait for join to complete\n");
@@ -324,12 +367,6 @@
printf(" -t Maximum time in seconds to wait\n");
printf(" -Q Fail if cluster is not quorate, don't wait\n");
printf("\n");
- printf("Fenced options:\n");
- printf(" these are passed on to fenced when it's started\n");
- printf(" -c All nodes are in a clean state to start\n");
- printf(" -j <secs> Post-join fencing delay\n");
- printf(" -f <secs> Post-fail fencing delay\n");
- printf("\n");
}
static void decode_arguments(int argc, char *argv[])
@@ -376,12 +413,6 @@
fenced_start_timeout = get_int_arg(optchar, optarg);
break;
- case 'c':
- case 'j':
- case 'f':
- /* Do nothing, just pass these options on to fenced */
- break;
-
default:
die("unknown option: %c\n", optchar);
break;
@@ -393,8 +424,8 @@
operation = OP_JOIN;
} else if (strcmp(argv[optind], "leave") == 0) {
operation = OP_LEAVE;
- } else if (strcmp(argv[optind], "monitor") == 0) {
- operation = OP_MONITOR;
+ } else if (strcmp(argv[optind], "dump") == 0) {
+ operation = OP_DUMP;
} else
die("unknown option %s\n", argv[optind]);
optind++;
@@ -415,8 +446,8 @@
return do_join(argc, argv);
case OP_LEAVE:
return do_leave();
- case OP_MONITOR:
- return do_monitor();
+ case OP_DUMP:
+ return do_dump();
case OP_WAIT:
return -1;
}
^ permalink raw reply [flat|nested] 10+ messages in thread* [Cluster-devel] cluster/fence/fence_tool fence_tool.c
@ 2007-01-23 17:21 rpeterso
0 siblings, 0 replies; 10+ messages in thread
From: rpeterso @ 2007-01-23 17:21 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL50
Changes by: rpeterso at sourceware.org 2007-01-23 17:21:11
Modified files:
fence/fence_tool: fence_tool.c
Log message:
Resolves: bz 222933: regression: fence_tool no longer times out
after 300 seconds
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.23&r2=1.23.4.1
--- cluster/fence/fence_tool/fence_tool.c 2006/10/13 14:57:55 1.23
+++ cluster/fence/fence_tool/fence_tool.c 2007/01/23 17:21:11 1.23.4.1
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -29,6 +29,7 @@
#include "ccs.h"
#include "copyright.cf"
+#include "libcman.h"
#include "libgroup.h"
#ifndef TRUE
@@ -36,7 +37,7 @@
#define FALSE 0
#endif
-#define OPTION_STRING ("Vhcj:f:t:w")
+#define OPTION_STRING ("Vhcj:f:t:wQ")
#define FENCED_SOCK_PATH "fenced_socket"
#define MAXLINE 256
@@ -57,7 +58,10 @@
char *prog_name;
int operation;
int child_wait = FALSE;
+int quorum_wait = TRUE;
int fenced_start_timeout = 300; /* five minutes */
+int signalled = 0;
+cman_handle_t ch;
static int get_int_arg(char argopt, char *arg)
{
@@ -97,6 +101,11 @@
return 0;
}
+static void sigalarm_handler(int sig)
+{
+ signalled = 1;
+}
+
int fenced_connect(void)
{
struct sockaddr_un sun;
@@ -135,6 +144,50 @@
return gdata.member;
}
+/*
+ * We wait for the cluster to be quorate in this program because it's easy to
+ * kill this program if we want to quit waiting. If we just started fenced
+ * without waiting for quorum, fenced's join would then wait for quorum in SM
+ * but we can't kill/cancel it at that point -- we have to wait for it to
+ * complete.
+ *
+ * A second reason to wait for quorum is that the unfencing step involves
+ * cluster.conf lookups through ccs, but ccsd may wait for the cluster to be
+ * quorate before responding to the lookups. There wouldn't be a problem
+ * blocking there per se, but it's cleaner I think to just wait here first.
+ *
+ * In the case where we're leaving, we want to wait for quorum because if we go
+ * ahead and shut down fenced, the fence domain leave will block in SM where it
+ * will wait for quorum before the leave can be processed. We can't
+ * kill/cancel the leave at that point, but we can if we're waiting here.
+ *
+ * Waiting here doesn't guarantee we won't end up blocking in SM on the join or
+ * leave, but it avoids it in some common cases which can be helpful. (Quorum
+ * could easily be lost between the time we wait for it here and then begin the
+ * join/leave process.)
+ */
+
+static int check_quorum(void)
+{
+ int rv = 0, i = 0;
+
+ while (!signalled) {
+ rv = cman_is_quorate(ch);
+ if (rv)
+ return TRUE;
+ else if (!quorum_wait)
+ return FALSE;
+
+ sleep(1);
+
+ if (!signalled && ++i > 9 && !(i % 10))
+ printf("%s: waiting for cluster quorum\n", prog_name);
+ }
+
+ errno = ETIMEDOUT;
+ return FALSE;
+}
+
static int do_wait(int joining)
{
int i;
@@ -156,6 +209,22 @@
int i, fd, rv;
char buf[MAXLINE];
+ ch = cman_init(NULL);
+
+ if (fenced_start_timeout) {
+ signal(SIGALRM, sigalarm_handler);
+ alarm(fenced_start_timeout);
+ }
+
+ if (!check_quorum()) {
+ if (errno == ETIMEDOUT)
+ printf("%s: Timed out waiting for cluster "
+ "quorum to form.\n", prog_name);
+ cman_finish(ch);
+ return EXIT_FAILURE;
+ }
+ cman_finish(ch);
+
i = 0;
do {
sleep(1);
@@ -253,6 +322,7 @@
printf(" -V Print program version information, then exit\n");
printf(" -h Print this help, then exit\n");
printf(" -t Maximum time in seconds to wait\n");
+ printf(" -Q Fail if cluster is not quorate, don't wait\n");
printf("\n");
printf("Fenced options:\n");
printf(" these are passed on to fenced when it's started\n");
@@ -284,6 +354,10 @@
exit(EXIT_SUCCESS);
break;
+ case 'Q':
+ quorum_wait = FALSE;
+ break;
+
case 'w':
child_wait = TRUE;
break;
^ permalink raw reply [flat|nested] 10+ messages in thread* [Cluster-devel] cluster/fence/fence_tool fence_tool.c
@ 2007-01-23 16:54 rpeterso
0 siblings, 0 replies; 10+ messages in thread
From: rpeterso @ 2007-01-23 16:54 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: rpeterso at sourceware.org 2007-01-23 16:54:09
Modified files:
fence/fence_tool: fence_tool.c
Log message:
Resolves: bz 222933: regression: fence_tool no longer times out
after 300 seconds
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.23&r2=1.23.2.1
--- cluster/fence/fence_tool/fence_tool.c 2006/10/13 14:57:55 1.23
+++ cluster/fence/fence_tool/fence_tool.c 2007/01/23 16:54:09 1.23.2.1
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -29,6 +29,7 @@
#include "ccs.h"
#include "copyright.cf"
+#include "libcman.h"
#include "libgroup.h"
#ifndef TRUE
@@ -36,7 +37,7 @@
#define FALSE 0
#endif
-#define OPTION_STRING ("Vhcj:f:t:w")
+#define OPTION_STRING ("Vhcj:f:t:wQ")
#define FENCED_SOCK_PATH "fenced_socket"
#define MAXLINE 256
@@ -57,7 +58,10 @@
char *prog_name;
int operation;
int child_wait = FALSE;
+int quorum_wait = TRUE;
int fenced_start_timeout = 300; /* five minutes */
+int signalled = 0;
+cman_handle_t ch;
static int get_int_arg(char argopt, char *arg)
{
@@ -97,6 +101,11 @@
return 0;
}
+static void sigalarm_handler(int sig)
+{
+ signalled = 1;
+}
+
int fenced_connect(void)
{
struct sockaddr_un sun;
@@ -135,6 +144,50 @@
return gdata.member;
}
+/*
+ * We wait for the cluster to be quorate in this program because it's easy to
+ * kill this program if we want to quit waiting. If we just started fenced
+ * without waiting for quorum, fenced's join would then wait for quorum in SM
+ * but we can't kill/cancel it at that point -- we have to wait for it to
+ * complete.
+ *
+ * A second reason to wait for quorum is that the unfencing step involves
+ * cluster.conf lookups through ccs, but ccsd may wait for the cluster to be
+ * quorate before responding to the lookups. There wouldn't be a problem
+ * blocking there per se, but it's cleaner I think to just wait here first.
+ *
+ * In the case where we're leaving, we want to wait for quorum because if we go
+ * ahead and shut down fenced, the fence domain leave will block in SM where it
+ * will wait for quorum before the leave can be processed. We can't
+ * kill/cancel the leave at that point, but we can if we're waiting here.
+ *
+ * Waiting here doesn't guarantee we won't end up blocking in SM on the join or
+ * leave, but it avoids it in some common cases which can be helpful. (Quorum
+ * could easily be lost between the time we wait for it here and then begin the
+ * join/leave process.)
+ */
+
+static int check_quorum(void)
+{
+ int rv = 0, i = 0;
+
+ while (!signalled) {
+ rv = cman_is_quorate(ch);
+ if (rv)
+ return TRUE;
+ else if (!quorum_wait)
+ return FALSE;
+
+ sleep(1);
+
+ if (!signalled && ++i > 9 && !(i % 10))
+ printf("%s: waiting for cluster quorum\n", prog_name);
+ }
+
+ errno = ETIMEDOUT;
+ return FALSE;
+}
+
static int do_wait(int joining)
{
int i;
@@ -156,6 +209,22 @@
int i, fd, rv;
char buf[MAXLINE];
+ ch = cman_init(NULL);
+
+ if (fenced_start_timeout) {
+ signal(SIGALRM, sigalarm_handler);
+ alarm(fenced_start_timeout);
+ }
+
+ if (!check_quorum()) {
+ if (errno == ETIMEDOUT)
+ printf("%s: Timed out waiting for cluster "
+ "quorum to form.\n", prog_name);
+ cman_finish(ch);
+ return EXIT_FAILURE;
+ }
+ cman_finish(ch);
+
i = 0;
do {
sleep(1);
@@ -253,6 +322,7 @@
printf(" -V Print program version information, then exit\n");
printf(" -h Print this help, then exit\n");
printf(" -t Maximum time in seconds to wait\n");
+ printf(" -Q Fail if cluster is not quorate, don't wait\n");
printf("\n");
printf("Fenced options:\n");
printf(" these are passed on to fenced when it's started\n");
@@ -284,6 +354,10 @@
exit(EXIT_SUCCESS);
break;
+ case 'Q':
+ quorum_wait = FALSE;
+ break;
+
case 'w':
child_wait = TRUE;
break;
^ permalink raw reply [flat|nested] 10+ messages in thread* [Cluster-devel] cluster/fence/fence_tool fence_tool.c
@ 2007-01-23 16:53 rpeterso
0 siblings, 0 replies; 10+ messages in thread
From: rpeterso @ 2007-01-23 16:53 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: rpeterso at sourceware.org 2007-01-23 16:53:29
Modified files:
fence/fence_tool: fence_tool.c
Log message:
Resolves: bz 222933: regression: fence_tool no longer times out
after 300 seconds
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&r1=1.23&r2=1.24
--- cluster/fence/fence_tool/fence_tool.c 2006/10/13 14:57:55 1.23
+++ cluster/fence/fence_tool/fence_tool.c 2007/01/23 16:53:28 1.24
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -29,6 +29,7 @@
#include "ccs.h"
#include "copyright.cf"
+#include "libcman.h"
#include "libgroup.h"
#ifndef TRUE
@@ -36,7 +37,7 @@
#define FALSE 0
#endif
-#define OPTION_STRING ("Vhcj:f:t:w")
+#define OPTION_STRING ("Vhcj:f:t:wQ")
#define FENCED_SOCK_PATH "fenced_socket"
#define MAXLINE 256
@@ -57,7 +58,10 @@
char *prog_name;
int operation;
int child_wait = FALSE;
+int quorum_wait = TRUE;
int fenced_start_timeout = 300; /* five minutes */
+int signalled = 0;
+cman_handle_t ch;
static int get_int_arg(char argopt, char *arg)
{
@@ -97,6 +101,11 @@
return 0;
}
+static void sigalarm_handler(int sig)
+{
+ signalled = 1;
+}
+
int fenced_connect(void)
{
struct sockaddr_un sun;
@@ -135,6 +144,50 @@
return gdata.member;
}
+/*
+ * We wait for the cluster to be quorate in this program because it's easy to
+ * kill this program if we want to quit waiting. If we just started fenced
+ * without waiting for quorum, fenced's join would then wait for quorum in SM
+ * but we can't kill/cancel it at that point -- we have to wait for it to
+ * complete.
+ *
+ * A second reason to wait for quorum is that the unfencing step involves
+ * cluster.conf lookups through ccs, but ccsd may wait for the cluster to be
+ * quorate before responding to the lookups. There wouldn't be a problem
+ * blocking there per se, but it's cleaner I think to just wait here first.
+ *
+ * In the case where we're leaving, we want to wait for quorum because if we go
+ * ahead and shut down fenced, the fence domain leave will block in SM where it
+ * will wait for quorum before the leave can be processed. We can't
+ * kill/cancel the leave at that point, but we can if we're waiting here.
+ *
+ * Waiting here doesn't guarantee we won't end up blocking in SM on the join or
+ * leave, but it avoids it in some common cases which can be helpful. (Quorum
+ * could easily be lost between the time we wait for it here and then begin the
+ * join/leave process.)
+ */
+
+static int check_quorum(void)
+{
+ int rv = 0, i = 0;
+
+ while (!signalled) {
+ rv = cman_is_quorate(ch);
+ if (rv)
+ return TRUE;
+ else if (!quorum_wait)
+ return FALSE;
+
+ sleep(1);
+
+ if (!signalled && ++i > 9 && !(i % 10))
+ printf("%s: waiting for cluster quorum\n", prog_name);
+ }
+
+ errno = ETIMEDOUT;
+ return FALSE;
+}
+
static int do_wait(int joining)
{
int i;
@@ -156,6 +209,22 @@
int i, fd, rv;
char buf[MAXLINE];
+ ch = cman_init(NULL);
+
+ if (fenced_start_timeout) {
+ signal(SIGALRM, sigalarm_handler);
+ alarm(fenced_start_timeout);
+ }
+
+ if (!check_quorum()) {
+ if (errno == ETIMEDOUT)
+ printf("%s: Timed out waiting for cluster "
+ "quorum to form.\n", prog_name);
+ cman_finish(ch);
+ return EXIT_FAILURE;
+ }
+ cman_finish(ch);
+
i = 0;
do {
sleep(1);
@@ -253,6 +322,7 @@
printf(" -V Print program version information, then exit\n");
printf(" -h Print this help, then exit\n");
printf(" -t Maximum time in seconds to wait\n");
+ printf(" -Q Fail if cluster is not quorate, don't wait\n");
printf("\n");
printf("Fenced options:\n");
printf(" these are passed on to fenced when it's started\n");
@@ -284,6 +354,10 @@
exit(EXIT_SUCCESS);
break;
+ case 'Q':
+ quorum_wait = FALSE;
+ break;
+
case 'w':
child_wait = TRUE;
break;
^ permalink raw reply [flat|nested] 10+ messages in thread* [Cluster-devel] cluster/fence/fence_tool fence_tool.c
@ 2007-01-05 16:44 rohara
0 siblings, 0 replies; 10+ messages in thread
From: rohara @ 2007-01-05 16:44 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: rohara at sourceware.org 2007-01-05 16:44:53
Modified files:
fence/fence_tool: fence_tool.c
Log message:
Remove extra check_quorum call.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5.2.15&r2=1.5.2.16
--- cluster/fence/fence_tool/fence_tool.c 2007/01/05 16:24:29 1.5.2.15
+++ cluster/fence/fence_tool/fence_tool.c 2007/01/05 16:44:53 1.5.2.16
@@ -385,9 +385,6 @@
return EXIT_FAILURE;
}
- if (!check_quorum())
- return EXIT_FAILURE;
-
close(cl_sock);
kill(pid, SIGTERM);
^ permalink raw reply [flat|nested] 10+ messages in thread
* [Cluster-devel] cluster/fence/fence_tool fence_tool.c
@ 2007-01-05 16:24 rohara
0 siblings, 0 replies; 10+ messages in thread
From: rohara @ 2007-01-05 16:24 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: rohara at sourceware.org 2007-01-05 16:24:30
Modified files:
fence/fence_tool: fence_tool.c
Log message:
Add timeout option for fence_tool leave.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5.2.14&r2=1.5.2.15
--- cluster/fence/fence_tool/fence_tool.c 2006/12/20 18:14:29 1.5.2.14
+++ cluster/fence/fence_tool/fence_tool.c 2007/01/05 16:24:29 1.5.2.15
@@ -62,7 +62,7 @@
int operation;
int child_wait;
int quorum_wait = TRUE;
-int fenced_start_timeout = 0;
+int fenced_timeout = 0;
int signalled = 0;
int cl_sock;
char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1];
@@ -282,9 +282,9 @@
setup_sock();
- if (fenced_start_timeout) {
+ if (fenced_timeout) {
signal(SIGALRM, sigalarm_handler);
- alarm(fenced_start_timeout);
+ alarm(fenced_timeout);
}
if (!check_quorum()) {
@@ -373,6 +373,18 @@
check_mounted();
setup_sock();
+ if (fenced_timeout) {
+ signal(SIGALRM, sigalarm_handler);
+ alarm(fenced_timeout);
+ }
+
+ if (!check_quorum()) {
+ if (errno == ETIMEDOUT)
+ printf("%s: Timed out waiting for cluster "
+ "quorum to form.\n", prog_name);
+ return EXIT_FAILURE;
+ }
+
if (!check_quorum())
return EXIT_FAILURE;
@@ -490,7 +502,7 @@
break;
case 't':
- fenced_start_timeout = get_int_arg(optchar, optarg);
+ fenced_timeout = get_int_arg(optchar, optarg);
break;
case 'c':
^ permalink raw reply [flat|nested] 10+ messages in thread* [Cluster-devel] cluster/fence/fence_tool fence_tool.c
@ 2006-10-23 16:23 jparsons
0 siblings, 0 replies; 10+ messages in thread
From: jparsons @ 2006-10-23 16:23 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: jparsons at sourceware.org 2006-10-23 16:23:56
Modified files:
fence/fence_tool: fence_tool.c
Log message:
This fixes bz 203804, wherein pid files are left around. Thx jlayton.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5.2.12&r2=1.5.2.13
--- cluster/fence/fence_tool/fence_tool.c 2006/02/09 15:25:11 1.5.2.12
+++ cluster/fence/fence_tool/fence_tool.c 2006/10/23 16:23:56 1.5.2.13
@@ -357,7 +357,7 @@
{
FILE *f;
char buf[33] = "";
- int pid = 0;
+ int pid = 0, error;
lockfile();
@@ -379,7 +379,10 @@
close(cl_sock);
kill(pid, SIGTERM);
-
+ error = unlink(LOCKFILE_NAME);
+ if (error)
+ die("signal sent, but unable to unlink %s: %s", LOCKFILE_NAME,
+ strerror(error));
return EXIT_SUCCESS;
}
^ permalink raw reply [flat|nested] 10+ messages in thread* [Cluster-devel] cluster/fence/fence_tool fence_tool.c
@ 2006-07-10 17:04 rohara
0 siblings, 0 replies; 10+ messages in thread
From: rohara @ 2006-07-10 17:04 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: rohara at sourceware.org 2006-07-10 17:04:08
Modified files:
fence/fence_tool: fence_tool.c
Log message:
Added "self" parament to dispatch_fence_agent.
Needed for SCSI persistent reservation (fence_scsi).
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&r1=1.20&r2=1.21
--- cluster/fence/fence_tool/fence_tool.c 2006/02/15 22:00:41 1.20
+++ cluster/fence/fence_tool/fence_tool.c 2006/07/10 17:04:08 1.21
@@ -56,7 +56,7 @@
char *prog_name;
int operation;
-int dispatch_fence_agent(int cd, char *victim);
+int dispatch_fence_agent(int cd, char *victim, char *self);
static int check_mounted(void)
{
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2007-11-29 14:46 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-05 16:40 [Cluster-devel] cluster/fence/fence_tool fence_tool.c rohara
-- strict thread matches above, loose matches on Subject: below --
2007-11-29 14:46 teigland
2007-08-15 20:57 teigland
2007-01-23 17:21 rpeterso
2007-01-23 16:54 rpeterso
2007-01-23 16:53 rpeterso
2007-01-05 16:44 rohara
2007-01-05 16:24 rohara
2006-10-23 16:23 jparsons
2006-07-10 17:04 rohara
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).