From mboxrd@z Thu Jan 1 00:00:00 1970 From: cfeist@sourceware.org Date: 20 Dec 2006 18:14:30 -0000 Subject: [Cluster-devel] cluster/fence fence_node/fence_node.c fence_to ... Message-ID: <20061220181430.10347.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL4 Changes by: cfeist at sourceware.org 2006-12-20 18:14:29 Modified files: fence/fence_node: fence_node.c fence/fence_tool: fence_tool.c fence/fenced : agent.c fd.h recover.c Log message: - Fixed a bug which would cause fenced to fail to execute secondary fence actions if ccs connection times out. (#219633) From jwhiter at redhat.com Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_node/fence_node.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.4&r2=1.2.2.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5.2.13&r2=1.5.2.14 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/agent.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.7.2.6&r2=1.7.2.7 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/fd.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.7.2.4&r2=1.7.2.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/recover.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.10.2.7&r2=1.10.2.8 --- cluster/fence/fence_node/fence_node.c 2006/07/07 19:40:22 1.2.2.4 +++ cluster/fence/fence_node/fence_node.c 2006/12/20 18:14:29 1.2.2.5 @@ -34,7 +34,7 @@ static char *prog_name; static int force; -int dispatch_fence_agent(int cd, char *victim); +int dispatch_fence_agent(char *victim, int force); static void print_usage(void) { @@ -104,28 +104,15 @@ if (!victim) die("no node name specified"); - if (force) - cd = ccs_force_connect(NULL, 0); - else - cd = ccs_connect(); - openlog("fence_node", LOG_PID, LOG_USER); - if (cd < 0) { - syslog(LOG_ERR, "cannot connect to ccs %d\n", cd); - goto fail; - } - - error = dispatch_fence_agent(cd, victim); + error = dispatch_fence_agent(victim, force); if (error) - goto fail_ccs; + goto fail; syslog(LOG_NOTICE, "Fence of \"%s\" was successful\n", victim); - ccs_disconnect(cd); exit(EXIT_SUCCESS); - fail_ccs: - ccs_disconnect(cd); fail: syslog(LOG_ERR, "Fence of \"%s\" was unsuccessful\n", victim); exit(EXIT_FAILURE); --- cluster/fence/fence_tool/fence_tool.c 2006/10/23 16:23:56 1.5.2.13 +++ cluster/fence/fence_tool/fence_tool.c 2006/12/20 18:14:29 1.5.2.14 @@ -67,7 +67,7 @@ int cl_sock; char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1]; -int dispatch_fence_agent(int cd, char *victim, int in); +int dispatch_fence_agent(char *victim, int force); static int check_mounted(void) --- cluster/fence/fenced/agent.c 2006/05/09 19:42:52 1.7.2.6 +++ cluster/fence/fenced/agent.c 2006/12/20 18:14:29 1.7.2.7 @@ -274,17 +274,44 @@ return error; } -int dispatch_fence_agent(int cd, char *victim) +int dispatch_fence_agent(char *victim, int force) { char *method = NULL, *device = NULL; - int num_methods, num_devices, m, d, error = -1; + int num_methods, num_devices, m, d, error = -1, cd; + + if (force) + cd = ccs_force_connect(NULL, 0); + else { + while ((cd = ccs_connect()) < 0) + sleep(1); + } + + if (cd < 0) { + syslog(LOG_ERR, "cannot connect to ccs %d\n", cd); + return -1; + } num_methods = count_methods(cd, victim); for (m = 0; m < num_methods; m++) { error = get_method(cd, victim, m, &method); - if (error) + + /* if the connection timed out while we were trying + * to fence, try to open the connection again + */ + if (error == -EBADR) { + syslog(LOG_INFO, "ccs connection timed out, " + "retrying\n"); + + while ((cd = ccs_connect()) < 0) + sleep(1); + + error = get_method(cd, victim, m, &method); + + if (error) + continue; + } else if (error) continue; /* if num_devices is zero we should return an error */ @@ -313,6 +340,8 @@ break; } + ccs_disconnect(cd); + return error; } --- cluster/fence/fenced/fd.h 2005/02/24 07:06:09 1.7.2.4 +++ cluster/fence/fenced/fd.h 2006/12/20 18:14:29 1.7.2.5 @@ -173,6 +173,6 @@ void do_recovery(fd_t *fd, struct cl_service_event *ev, struct cl_cluster_node *cl_nodes); void do_recovery_done(fd_t *fd); -int dispatch_fence_agent(int cd, char *victim); +int dispatch_fence_agent(char *victim, int force); #endif /* __FD_DOT_H__ */ --- cluster/fence/fenced/recover.c 2005/04/20 05:51:15 1.10.2.7 +++ cluster/fence/fenced/recover.c 2006/12/20 18:14:29 1.10.2.8 @@ -12,7 +12,6 @@ ******************************************************************************/ #include "fd.h" -#include "ccs.h" /* Fencing recovery algorithm @@ -429,7 +428,7 @@ fd_node_t *node; char *master_name; uint32_t master; - int error, cd; + int error; master = find_master_nodeid(fd, &master_name); @@ -441,9 +440,6 @@ delay_fencing(fd, ev); - while ((cd = ccs_connect()) < 0) - sleep(1); - while (!list_empty(&fd->victims)) { node = list_entry(fd->victims.next, fd_node_t, list); @@ -457,7 +453,7 @@ log_debug("fencing node %s", node->name); syslog(LOG_INFO, "fencing node \"%s\"", node->name); - error = dispatch_fence_agent(cd, node->name); + error = dispatch_fence_agent(node->name, 0); syslog(LOG_INFO, "fence \"%s\" %s", node->name, error ? "failed" : "success"); @@ -468,8 +464,6 @@ } sleep(5); } - - ccs_disconnect(cd); } static void add_victims(fd_t *fd, struct cl_service_event *ev,