From mboxrd@z Thu Jan 1 00:00:00 1970 From: pcaulfield@sourceware.org Date: 30 Jan 2008 15:46:42 -0000 Subject: [Cluster-devel] cluster/cman cman_tool/cman_tool.h cman_tool/j ... Message-ID: <20080130154642.19548.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: pcaulfield at sourceware.org 2008-01-30 15:46:41 Modified files: cman/cman_tool : cman_tool.h join.c cman/daemon : ais.c cman/man : cman_tool.8 Log message: Improve startup error checking and logging. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/cman_tool.h.diff?cvsroot=cluster&r1=1.14&r2=1.15 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/cman_tool/join.c.diff?cvsroot=cluster&r1=1.53&r2=1.54 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/daemon/ais.c.diff?cvsroot=cluster&r1=1.59&r2=1.60 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/cman_tool.8.diff?cvsroot=cluster&r1=1.15&r2=1.16 --- cluster/cman/cman_tool/cman_tool.h 2007/11/29 11:19:12 1.14 +++ cluster/cman/cman_tool/cman_tool.h 2008/01/30 15:46:41 1.15 @@ -56,6 +56,8 @@ #define MAX_MCAST_NAME_LEN 256 #define MAX_PATH_LEN 256 +#define DEBUG_STARTUP_ONLY 32 + enum format_opt { FMT_NONE, --- cluster/cman/cman_tool/join.c 2008/01/10 10:39:16 1.53 +++ cluster/cman/cman_tool/join.c 2008/01/30 15:46:41 1.54 @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -51,22 +51,21 @@ setsid(); } - int join(commandline_t *comline) { int i; int envptr = 0; + int argvptr = 0; char scratch[1024]; cman_handle_t h; + int status; pid_t aisexec_pid; int ctree; int p[2]; - if (!comline->noccs_opt) - { + if (!comline->noccs_opt) { ctree = ccs_force_connect(NULL, 1); - if (ctree < 0) - { + if (ctree < 0) { die("ccsd is not running\n"); } ccs_disconnect(ctree); @@ -79,7 +78,6 @@ if (h) die("Node is already active"); - /* Set up environment variables for override */ if (comline->multicast_addr) { snprintf(scratch, sizeof(scratch), "CMAN_MCAST_ADDR=%s", comline->multicast_addr); @@ -117,27 +115,29 @@ snprintf(scratch, sizeof(scratch), "CMAN_2NODE=true"); envp[envptr++] = strdup(scratch); } - if (comline->verbose) { + if (comline->verbose ^ DEBUG_STARTUP_ONLY) { snprintf(scratch, sizeof(scratch), "CMAN_DEBUGLOG=%d", comline->verbose); envp[envptr++] = strdup(scratch); } if (comline->noccs_opt) { - snprintf(scratch, sizeof(scratch), "CMAN_NOCCS=TRUE"); - envp[envptr++] = strdup(scratch); + envp[envptr++] = strdup("CMAN_NOCCS=true"); + envp[envptr++] = strdup("OPENAIS_DEFAULT_CONFIG_IFACE=cmanpreconfig"); + } + else { + envp[envptr++] = strdup("OPENAIS_DEFAULT_CONFIG_IFACE=cmanconfig"); } - - /* Use cman to configure services */ - envp[envptr++] = strdup("OPENAIS_DEFAULT_CONFIG_IFACE=cmanconfig"); /* Create a pipe to monitor cman startup progress */ pipe(p); fcntl(p[1], F_SETFD, 0); /* Don't close on exec */ snprintf(scratch, sizeof(scratch), "CMAN_PIPE=%d", p[1]); envp[envptr++] = strdup(scratch); - envp[envptr++] = NULL; argv[0] = "aisexec"; + if (comline->verbose & ~DEBUG_STARTUP_ONLY) + argv[++argvptr] = "-f"; + argv[++argvptr] = NULL; /* Fork/exec cman */ switch ( (aisexec_pid = fork()) ) @@ -145,18 +145,28 @@ case -1: die("fork of aisexec daemon failed: %s", strerror(errno)); - case 0: // child + case 0: /* child */ close(p[0]); - be_daemon(!comline->verbose); + if (comline->verbose & DEBUG_STARTUP_ONLY) { + fprintf(stderr, "Starting %s", AISEXECBIN); + for (i=0; i< argvptr; i++) { + fprintf(stderr, " %s", argv[i]); + } + fprintf(stderr, "\n"); + for (i=0; iverbose & ~DEBUG_STARTUP_ONLY)); execve(AISEXECBIN, argv, envp); - // exec failed - tell the parent process */ + /* exec failed - tell the parent process */ sprintf(scratch, "execve of " AISEXECBIN " failed: %s", strerror(errno)); write(p[1], scratch, strlen(scratch)); exit(1); break; - default: //parent + default: /* parent */ break; } @@ -164,10 +174,12 @@ /* Give the daemon a chance to start up, and monitor the pipe FD for messages */ i = 0; close(p[1]); + + /* Wait for the process to start or die */ + sleep(1); do { fd_set fds; struct timeval tv={1, 0}; - int status; char message[1024]; FD_ZERO(&fds); @@ -177,31 +189,69 @@ /* Did we get an error? */ if (status == 1) { - if (read(p[0], message, sizeof(message)) != 0) { - fprintf(stderr, "cman not started: %s\n", message); + int len; + if ((len = read(p[0], message, sizeof(message)) > 0)) { + + /* Success! get the new PID of double-forked aisexec */ + if (sscanf(message, "SUCCESS: %d", &aisexec_pid) == 1) { + if (comline->verbose & DEBUG_STARTUP_ONLY) + fprintf(stderr, "aisexec running, process ID is %d\n", aisexec_pid); + status = 0; + } + else { + fprintf(stderr, "cman not started: %s\n", message); + } break; } - else { + else if (len < 0 && errno == EINTR) { + continue; + } + else { /* Error or EOF - check the child status */ int pidstatus; - if (waitpid(aisexec_pid, &pidstatus, WNOHANG) == 0 && pidstatus != 0) - fprintf(stderr, "cman died with status: %d\n", WEXITSTATUS(pidstatus)); - else + status = waitpid(aisexec_pid, &pidstatus, WNOHANG); + if (status == -1 && errno == ECHILD) { + fprintf(stderr, "cman not started\n"); + break; + } + if (status == 0 && pidstatus != 0) { + if (WIFEXITED(pidstatus)) + fprintf(stderr, "aisexec died with status: %d\n", WEXITSTATUS(pidstatus)); + if (WIFSIGNALED(pidstatus)) + fprintf(stderr, "aisexec died with signal: %d\n", WTERMSIG(pidstatus)); + status = -1; + break; + } + else { status = 0; /* Try to connect */ + } } } - if (status == 0) { - h = cman_admin_init(NULL); - if (!h && comline->verbose) - { - fprintf(stderr, "waiting for aisexec to start\n"); + + } while (status != 0); + close(p[0]); + + /* If aisexec has started, try to connect to cman ... if it's still there */ + if (status == 0) { + do { + if (status == 0) { + if (kill(aisexec_pid, 0) < 0) { + die("aisexec died during startup\n"); + } + + h = cman_admin_init(NULL); + if (!h && comline->verbose & DEBUG_STARTUP_ONLY) + { + fprintf(stderr, "waiting for aisexec to start\n"); + } } - } - } while (!h && ++i < 100); + sleep (1); + } while (!h && ++i < 100); + } if (!h) die("aisexec daemon didn't start"); - if (comline->verbose && !cman_is_active(h)) + if ((comline->verbose & DEBUG_STARTUP_ONLY) && !cman_is_active(h)) fprintf(stderr, "aisexec started, but not joined the cluster yet.\n"); cman_finish(h); --- cluster/cman/daemon/ais.c 2008/01/02 16:35:44 1.59 +++ cluster/cman/daemon/ais.c 2008/01/30 15:46:41 1.60 @@ -249,6 +249,7 @@ static int cman_exec_init_fn(struct objdb_iface_ver0 *objdb) { unsigned int object_handle; + char pipe_msg[256]; /* We can only work if our config interface was run first */ if (!config_run) @@ -273,7 +274,9 @@ /* Open local sockets and initialise I/O queues */ cman_init(); - /* Let cman_tool know we are running */ + /* Let cman_tool know we are running and our PID */ + sprintf(pipe_msg,"SUCCESS: %d", getpid()); + write_cman_pipe(pipe_msg); close(startup_pipe); startup_pipe = 0; --- cluster/cman/man/cman_tool.8 2007/11/29 11:19:12 1.15 +++ cluster/cman/man/cman_tool.8 2008/01/30 15:46:41 1.16 @@ -290,6 +290,8 @@ .br 16 Interaction with OpenAIS .br +32 Startup debugging (cman_tool join operations only) +.br .SH NOTES .br the