From: rpeterso@sourceware.org <rpeterso@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/fence/fence_tool fence_tool.c
Date: 23 Jan 2007 16:53:29 -0000 [thread overview]
Message-ID: <20070123165329.21339.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: rpeterso at sourceware.org 2007-01-23 16:53:29
Modified files:
fence/fence_tool: fence_tool.c
Log message:
Resolves: bz 222933: regression: fence_tool no longer times out
after 300 seconds
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&r1=1.23&r2=1.24
--- cluster/fence/fence_tool/fence_tool.c 2006/10/13 14:57:55 1.23
+++ cluster/fence/fence_tool/fence_tool.c 2007/01/23 16:53:28 1.24
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -29,6 +29,7 @@
#include "ccs.h"
#include "copyright.cf"
+#include "libcman.h"
#include "libgroup.h"
#ifndef TRUE
@@ -36,7 +37,7 @@
#define FALSE 0
#endif
-#define OPTION_STRING ("Vhcj:f:t:w")
+#define OPTION_STRING ("Vhcj:f:t:wQ")
#define FENCED_SOCK_PATH "fenced_socket"
#define MAXLINE 256
@@ -57,7 +58,10 @@
char *prog_name;
int operation;
int child_wait = FALSE;
+int quorum_wait = TRUE;
int fenced_start_timeout = 300; /* five minutes */
+int signalled = 0;
+cman_handle_t ch;
static int get_int_arg(char argopt, char *arg)
{
@@ -97,6 +101,11 @@
return 0;
}
+static void sigalarm_handler(int sig)
+{
+ signalled = 1;
+}
+
int fenced_connect(void)
{
struct sockaddr_un sun;
@@ -135,6 +144,50 @@
return gdata.member;
}
+/*
+ * We wait for the cluster to be quorate in this program because it's easy to
+ * kill this program if we want to quit waiting. If we just started fenced
+ * without waiting for quorum, fenced's join would then wait for quorum in SM
+ * but we can't kill/cancel it at that point -- we have to wait for it to
+ * complete.
+ *
+ * A second reason to wait for quorum is that the unfencing step involves
+ * cluster.conf lookups through ccs, but ccsd may wait for the cluster to be
+ * quorate before responding to the lookups. There wouldn't be a problem
+ * blocking there per se, but it's cleaner I think to just wait here first.
+ *
+ * In the case where we're leaving, we want to wait for quorum because if we go
+ * ahead and shut down fenced, the fence domain leave will block in SM where it
+ * will wait for quorum before the leave can be processed. We can't
+ * kill/cancel the leave at that point, but we can if we're waiting here.
+ *
+ * Waiting here doesn't guarantee we won't end up blocking in SM on the join or
+ * leave, but it avoids it in some common cases which can be helpful. (Quorum
+ * could easily be lost between the time we wait for it here and then begin the
+ * join/leave process.)
+ */
+
+static int check_quorum(void)
+{
+ int rv = 0, i = 0;
+
+ while (!signalled) {
+ rv = cman_is_quorate(ch);
+ if (rv)
+ return TRUE;
+ else if (!quorum_wait)
+ return FALSE;
+
+ sleep(1);
+
+ if (!signalled && ++i > 9 && !(i % 10))
+ printf("%s: waiting for cluster quorum\n", prog_name);
+ }
+
+ errno = ETIMEDOUT;
+ return FALSE;
+}
+
static int do_wait(int joining)
{
int i;
@@ -156,6 +209,22 @@
int i, fd, rv;
char buf[MAXLINE];
+ ch = cman_init(NULL);
+
+ if (fenced_start_timeout) {
+ signal(SIGALRM, sigalarm_handler);
+ alarm(fenced_start_timeout);
+ }
+
+ if (!check_quorum()) {
+ if (errno == ETIMEDOUT)
+ printf("%s: Timed out waiting for cluster "
+ "quorum to form.\n", prog_name);
+ cman_finish(ch);
+ return EXIT_FAILURE;
+ }
+ cman_finish(ch);
+
i = 0;
do {
sleep(1);
@@ -253,6 +322,7 @@
printf(" -V Print program version information, then exit\n");
printf(" -h Print this help, then exit\n");
printf(" -t Maximum time in seconds to wait\n");
+ printf(" -Q Fail if cluster is not quorate, don't wait\n");
printf("\n");
printf("Fenced options:\n");
printf(" these are passed on to fenced when it's started\n");
@@ -284,6 +354,10 @@
exit(EXIT_SUCCESS);
break;
+ case 'Q':
+ quorum_wait = FALSE;
+ break;
+
case 'w':
child_wait = TRUE;
break;
next reply other threads:[~2007-01-23 16:53 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-01-23 16:53 rpeterso [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-11-29 14:46 [Cluster-devel] cluster/fence/fence_tool fence_tool.c teigland
2007-08-15 20:57 teigland
2007-01-23 17:21 rpeterso
2007-01-23 16:54 rpeterso
2007-01-05 16:44 rohara
2007-01-05 16:40 rohara
2007-01-05 16:24 rohara
2006-10-23 16:23 jparsons
2006-07-10 17:04 rohara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070123165329.21339.qmail@sourceware.org \
--to=rpeterso@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.