[Cluster-devel] cluster/rgmanager ChangeLog errors.txt init.d/ ...

cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

* [Cluster-devel] cluster/rgmanager ChangeLog errors.txt init.d/ ...
@ 2006-09-07 18:39 lhh
  0 siblings, 0 replies; 2+ messages in thread
From: lhh @ 2006-09-07 18:39 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL4
Changes by:	lhh at sourceware.org	2006-09-07 18:39:45

Modified files:
	rgmanager      : ChangeLog errors.txt 
	rgmanager/init.d: rgmanager 
	rgmanager/src/daemons: main.c 
	rgmanager/src/utils: clustat.c 

Log message:
	2006-09-07 Lon Hohberger <lhh@redhat.com>
	* src/daemons/main.c, init.d/rgmanager: Make rgmanager init script
	report failure correctly in most cases. (#193603)
	* src/utils/clustat.c: Fix #146924 - segfault if cman is not
	in a state to give out member lists

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5.2.20&r2=1.5.2.21
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/errors.txt.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.4&r2=1.1.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/init.d/rgmanager.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.3.2.2&r2=1.3.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.9.2.17&r2=1.9.2.18
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5.2.13&r2=1.5.2.14

--- cluster/rgmanager/ChangeLog	2006/06/21 18:34:19	1.5.2.20
+++ cluster/rgmanager/ChangeLog	2006/09/07 18:39:45	1.5.2.21
@@ -1,3 +1,9 @@
+2006-09-07 Lon Hohberger <lhh@redhat.com>
+	* src/daemons/main.c, init.d/rgmanager: Make rgmanager init script
+	report failure correctly in most cases. (#193603)
+	* src/utils/clustat.c: Fix #146924 - segfault if cman is not
+	in a state to give out member lists
+
 2006-06-21 Lon Hohberger <lhh@redhat.com>
 	* src/daemons/nodeevent.c: Don't use the rg thread refcount in
 	node event handling (#194491)
--- cluster/rgmanager/errors.txt	2005/03/21 22:01:30	1.1.2.4
+++ cluster/rgmanager/errors.txt	2006/09/07 18:39:45	1.1.2.5
@@ -76,7 +76,9 @@
 The resource group manager was unable to find a plugin which was able to
 talk to the cluster infrastructure.  Generally, this occurs when no cluster
 infrastruture is running.  Try starting the preferred cluster infrastructure
-for your configuration (e.g. CMAN+DLM, GuLM) and restarting rgmanager.
+for your configuration (e.g. CMAN+DLM, GuLM) and restarting rgmanager.  This
+can also occur if CMAN is loaded, while DLM is not.  Rgmanager (really,
+the SM magma plugin) requires that the DLM be loaded prior to starting.
 
 #10: Couldn't set up listen socket
 
--- cluster/rgmanager/init.d/rgmanager	2006/05/12 21:28:30	1.3.2.2
+++ cluster/rgmanager/init.d/rgmanager	2006/09/07 18:39:45	1.3.2.3
@@ -93,6 +93,7 @@
 
 case $1 in
 	start)
+		[ -z "$RGMGR_OPTS" ] && RGMGR_OPTS="-t 30"
 		echo -n $"Starting $ID: "
 		daemon $RGMGRD $RGMGR_OPTS
 		echo
--- cluster/rgmanager/src/daemons/main.c	2006/05/26 17:39:32	1.9.2.17
+++ cluster/rgmanager/src/daemons/main.c	2006/09/07 18:39:45	1.9.2.18
@@ -706,22 +706,60 @@
 }
 
 
+void
+wait_for_status(int pid, int fd, int timeout)
+{
+	struct timeval tv;
+	fd_set rfds;
+	int err;
+	
+	FD_ZERO(&rfds);
+	FD_SET(fd, &rfds);
+	tv.tv_sec = timeout;
+	tv.tv_usec = 0;
+
+	if (select(fd + 1, &rfds, NULL, NULL, &tv) == 1) {
+		err = 0;
+		read(fd, &err, sizeof(err));
+		exit(!!err);
+		/* could put in messages for waiting */
+	}
+	exit(1);
+}
+
+
+#define notify_status(value) \
+do { \
+	if (waittime) { \
+		waiter = value; \
+		write(waitpipe[1], &waiter, sizeof(waiter)); \
+		close(waitpipe[0]); \
+		close(waitpipe[1]); \
+	} \
+} while(0)
+
+
 int
 main(int argc, char **argv)
 {
 	int cluster_fd, rv;
 	char foreground = 0;
 	int quorate;
-	int listen_fds[2], listeners;
+	int listen_fds[2], listeners, waittime = 0, waitpipe[2];
+	int waiter;
 	uint64_t myNodeID;
 
-	while ((rv = getopt(argc, argv, "fd")) != EOF) {
+	while ((rv = getopt(argc, argv, "fdt:")) != EOF) {
 		switch (rv) {
 		case 'd':
 			debug = 1;
 			break;
 		case 'f':
 			foreground = 1;
+		case 't':
+			waittime = atoi(optarg);
+			if (waittime < 0)
+				waittime = 0;
 		default:
 			break;
 		}
@@ -736,6 +774,16 @@
 		clu_log_console(1);
 
 	if (!foreground && (geteuid() == 0)) {
+		if (waittime) {
+			waitpipe[0] = -1;
+			waitpipe[1] = -1;
+			pipe(waitpipe);
+			waiter = fork();
+			if (waiter > 0)
+				wait_for_status(waiter, waitpipe[0], waittime);
+			/* notreached by parent */
+		}
+			
 		daemon_init(argv[0]);
 		if (!debug && !watchdog_init())
 			clulog(LOG_NOTICE, "Failed to start watchdog\n");
@@ -756,6 +804,7 @@
 
 	if (init_resource_groups(0) != 0) {
 		clulog(LOG_CRIT, "#8: Couldn't initialize services\n");
+		notify_status(1);
 		return -1;
 	}
 
@@ -766,6 +815,7 @@
 	if (cluster_fd < 0) {
 		clu_log_console(1);
 		clulog(LOG_CRIT, "#9: Couldn't connect to cluster\n");
+		notify_status(2);
 		return -1;
 	}
    	msg_set_purpose(cluster_fd, MSGP_CLUSTER);
@@ -786,6 +836,7 @@
 	if ((listeners = msg_listen(RG_PORT, RG_PURPOSE,
 				    listen_fds, 2)) <= 0) {
 		clulog(LOG_CRIT, "#10: Couldn't set up listen socket\n");
+		notify_status(3);
 		return -1;
 	}
 
@@ -810,6 +861,7 @@
 	 */
 	if (vf_init(myNodeID, RG_VF_PORT, NULL, NULL) != 0) {
 		clulog(LOG_CRIT, "#11: Couldn't set up VF listen socket\n");
+		notify_status(4);
 		return -1;
 	}
 
@@ -839,6 +891,8 @@
 	/*
 	   Do everything useful
 	 */
+	notify_status(0);
+
 	while (running)
 		event_loop(cluster_fd);
 
--- cluster/rgmanager/src/utils/clustat.c	2006/05/26 15:32:00	1.5.2.13
+++ cluster/rgmanager/src/utils/clustat.c	2006/09/07 18:39:45	1.5.2.14
@@ -486,6 +486,11 @@
 {
 	int x;
 
+	if (!membership) {
+		printf("Membership information not available\n");
+		return;
+	}
+
 	printf("  %-40.40s %s\n", "Member Name", "Status");
 	printf("  %-40.40s %s\n", "------ ----", "------");
 
@@ -504,8 +509,10 @@
 {
 	int x;
 
-	if (!membership)
+	if (!membership) {
+		printf("  <nodes/>\n");
 		return;
+	}
 
 	printf("  <nodes>\n");
 	for (x = 0; x < membership->cml_count; x++) {
@@ -608,6 +615,10 @@
 	/* Grab the local node ID and flag it from the list of reported
 	   online nodes */
 	clu_local_nodeid(NULL, lid);
+
+	if (!all)
+		return NULL;
+
 	for (x=0; x<all->cml_count; x++) {
 		if (all->cml_members[x].cm_id == *lid) {
 			m = &all->cml_members[x];



^ permalink raw reply	[flat|nested] 2+ messages in thread

* [Cluster-devel] cluster/rgmanager ChangeLog errors.txt init.d/ ...
@ 2006-11-03 16:30 lhh
  0 siblings, 0 replies; 2+ messages in thread
From: lhh @ 2006-11-03 16:30 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	STABLE
Changes by:	lhh at sourceware.org	2006-11-03 16:29:59

Modified files:
	rgmanager      : ChangeLog errors.txt 
	rgmanager/init.d: rgmanager 
	rgmanager/man  : clusvcadm.8 
	rgmanager/src/clulib: clulog.c 
	rgmanager/src/daemons: main.c resrules.c restree.c rg_state.c 
	                       rg_thread.c 
	rgmanager/src/resources: clusterfs.sh fs.sh nfsclient.sh 
	                         ocf-shellfuncs script.sh 
	rgmanager/src/utils: clustat.c 

Log message:
	Merge fixes from RHEL4 branch

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.13.6.3&r2=1.5.2.13.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/errors.txt.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.4&r2=1.1.2.4.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/init.d/rgmanager.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.1.6.1&r2=1.3.2.1.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/man/clusvcadm.8.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.1.6.1&r2=1.1.2.1.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/clulog.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1.6.2&r2=1.2.2.1.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.9.2.8.6.7&r2=1.9.2.8.6.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.9.2.2&r2=1.9.2.2.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.10.2.2.6.8&r2=1.10.2.2.6.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.8.6.4&r2=1.4.2.8.6.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.7.2.3.6.3&r2=1.7.2.3.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/clusterfs.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.3.4.5&r2=1.1.2.3.4.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/fs.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.6.4.6&r2=1.4.2.6.4.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsclient.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.2.6.6&r2=1.3.2.2.6.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ocf-shellfuncs.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.2&r2=1.2.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.2.6.2&r2=1.3.2.2.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.3.6.8&r2=1.5.2.3.6.9

--- cluster/rgmanager/ChangeLog	2006/06/21 18:34:39	1.5.2.13.6.3
+++ cluster/rgmanager/ChangeLog	2006/11/03 16:29:58	1.5.2.13.6.4
@@ -1,3 +1,27 @@
+2006-11-03 Lon Hohberger <lhh@redhat.com>
+	* src/daemons/restree.c: Merge patch from Jeff Layton to instrument
+	res_exec more closely. (#213246)
+	* src/daemons/rg_thread.c: Fix segfault caused by improper loop 
+	semantics (#213312)
+
+2006-10-05 Lon Hohberger <lhh@redhat.com>
+	* src/clulib/clulog.c: Change stdout/stderr to nonblocking (#207144
+	part 1)
+	* src/daemons/main.c: Give a timeout for a message to be received
+	after msg_accept() so that if we are processing a message from a
+	client which died, we don't wait forever, thereby preventing
+        group updates (#207144 part 2).
+
+2006-09-27 Lon Hohberger <lhh@redhat.com>
+	* src/daemons/rg_state.c: Fix fail->disable state transitions,
+	bugzilla #208011
+
+2006-09-07 Lon Hohberger <lhh@redhat.com>
+	* src/daemons/main.c, init.d/rgmanager: Make rgmanager init script
+	report failure correctly in most cases. (#193603)
+	* src/utils/clustat.c: Fix #146924 - segfault if cman is not
+	in a state to give out member lists
+
 2006-06-21 Lon Hohberger <lhh@redhat.com>
 	* src/daemons/nodeevent.c: Don't use the rg thread refcount in
 	node event handling (#194491)
--- cluster/rgmanager/errors.txt	2005/03/21 22:01:30	1.1.2.4
+++ cluster/rgmanager/errors.txt	2006/11/03 16:29:58	1.1.2.4.6.1
@@ -76,7 +76,9 @@
 The resource group manager was unable to find a plugin which was able to
 talk to the cluster infrastructure.  Generally, this occurs when no cluster
 infrastruture is running.  Try starting the preferred cluster infrastructure
-for your configuration (e.g. CMAN+DLM, GuLM) and restarting rgmanager.
+for your configuration (e.g. CMAN+DLM, GuLM) and restarting rgmanager.  This
+can also occur if CMAN is loaded, while DLM is not.  Rgmanager (really,
+the SM magma plugin) requires that the DLM be loaded prior to starting.
 
 #10: Couldn't set up listen socket
 
--- cluster/rgmanager/init.d/rgmanager	2006/06/16 20:07:46	1.3.2.1.6.1
+++ cluster/rgmanager/init.d/rgmanager	2006/11/03 16:29:58	1.3.2.1.6.2
@@ -93,6 +93,7 @@
 
 case $1 in
 	start)
+		[ -z "$RGMGR_OPTS" ] && RGMGR_OPTS="-t 30"
 		echo -n $"Starting $ID: "
 		daemon $RGMGRD $RGMGR_OPTS
 		echo
--- cluster/rgmanager/man/clusvcadm.8	2006/06/16 20:07:46	1.1.2.1.6.1
+++ cluster/rgmanager/man/clusvcadm.8	2006/11/03 16:29:58	1.1.2.1.6.2
@@ -67,6 +67,11 @@
 preferred
 target member on which to start the 
 service.
+.IP "\-r <service>"
+Relocates the user service named
+.I
+service
+to another cluster member.
 .IP "\-R <service>"
 Restarts the user service named
 .I
--- cluster/rgmanager/src/clulib/clulog.c	2006/06/16 20:07:46	1.2.2.1.6.2
+++ cluster/rgmanager/src/clulib/clulog.c	2006/11/03 16:29:58	1.2.2.1.6.3
@@ -20,7 +20,7 @@
 /** @file
  * Library routines for communicating with the logging daemon.
  *
- *  $Id: clulog.c,v 1.2.2.1.6.2 2006/06/16 20:07:46 lhh Exp $
+ *  $Id: clulog.c,v 1.2.2.1.6.3 2006/11/03 16:29:58 lhh Exp $
  *
  *  Author: Jeff Moyer <moyer@missioncriticallinux.com>
  */
@@ -50,7 +50,7 @@
 #include <string.h>
 
 
-static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.1.6.2 $";
+static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.1.6.3 $";
 
 #ifdef DEBUG
 #include <assert.h>
@@ -183,6 +183,14 @@
 clu_log_console(int onoff)
 {
 	int ret = useconsole;
+	int val;
+
+	if (onoff) {
+		val = fcntl(STDERR_FILENO, F_GETFL, 0);
+		fcntl(STDERR_FILENO, F_SETFL, val|O_NONBLOCK);
+		val = fcntl(STDOUT_FILENO, F_GETFL, 0);
+		fcntl(STDOUT_FILENO, F_SETFL, val|O_NONBLOCK);
+	}
 
 	useconsole = !!onoff;
 	return ret;
@@ -204,6 +212,7 @@
 	char         logmsg[MAX_LOGMSG_LEN];	/* message to go to the log */
 	char         printmsg[MAX_LOGMSG_LEN];	/* message to go to stdout */
 	int          syslog_flags = LOG_NDELAY;
+	int	     val;
 
 	pthread_mutex_lock(&log_mutex);
 	if (severity > loglevel) {
@@ -275,9 +284,19 @@
 			  MAX_LOGMSG_LEN - strlen(printmsg), fmt, args);
 		va_end(args);
 
-		fprintf(stdout, "%s", printmsg);
+		if (useconsole && !write_to_cons) {
+			val = fcntl(STDOUT_FILENO, F_GETFL, 0);
+			fcntl(STDOUT_FILENO, F_SETFL, val | O_NONBLOCK);
+		}
+
+		/* Ignore error return code */
+		write(STDOUT_FILENO, printmsg, strlen(printmsg));
+
+		if (useconsole && !write_to_cons)
+			fcntl(STDOUT_FILENO, F_SETFL, val);
 	}
 
+	/* TODO make this non-blocking */
 	syslog(severity, logmsg);
 
 	pthread_mutex_unlock(&log_mutex);
--- cluster/rgmanager/src/daemons/main.c	2006/06/16 20:07:46	1.9.2.8.6.7
+++ cluster/rgmanager/src/daemons/main.c	2006/11/03 16:29:58	1.9.2.8.6.8
@@ -342,8 +342,18 @@
 	int ret;
 	generic_msg_hdr	msg_hdr;
 	SmMessageSt	msg_sm;
+	fd_set rfds;
+	struct timeval tv = { 0, 500000 };
 
 	/* Peek-a-boo */
+	FD_ZERO(&rfds);
+	FD_SET(fd, &rfds);
+	if (select(fd+1, &rfds, NULL, NULL, &tv) <= 0) {
+		clulog(LOG_WARNING, "Client timeout after new connection.\n");
+		msg_close(fd);
+		return -1;
+	}
+
 	ret = msg_peek(fd, &msg_hdr, sizeof(msg_hdr));
 	if (ret != sizeof (generic_msg_hdr)) {
 		clulog(LOG_ERR, "#37: Error receiving message header\n");
@@ -706,22 +716,62 @@
 }
 
 
+void
+wait_for_status(int pid, int fd, int timeout)
+{
+	struct timeval tv;
+	fd_set rfds;
+	int err;
+	
+	FD_ZERO(&rfds);
+	FD_SET(fd, &rfds);
+	tv.tv_sec = timeout;
+	tv.tv_usec = 0;
+
+	if (select(fd + 1, &rfds, NULL, NULL, &tv) == 1) {
+		err = 0;
+		read(fd, &err, sizeof(err));
+		exit(!!err);
+		/* could put in messages for waiting */
+	}
+	exit(1);
+}
+
+
+#define notify_status(value) \
+do { \
+	if (waittime) { \
+		waiter = value; \
+		write(waitpipe[1], &waiter, sizeof(waiter)); \
+		close(waitpipe[0]); \
+		close(waitpipe[1]); \
+	} \
+} while(0)
+
+
 int
 main(int argc, char **argv)
 {
 	int cluster_fd, rv;
 	char foreground = 0;
 	int quorate;
-	int listen_fds[2], listeners;
+	int listen_fds[2], listeners, waittime = 0, waitpipe[2];
+	int waiter;
 	uint64_t myNodeID;
 
-	while ((rv = getopt(argc, argv, "fd")) != EOF) {
+	while ((rv = getopt(argc, argv, "fdt:")) != EOF) {
 		switch (rv) {
 		case 'd':
 			debug = 1;
 			break;
 		case 'f':
 			foreground = 1;
+			break;
+		case 't':
+			waittime = atoi(optarg);
+			if (waittime < 0)
+				waittime = 0;
+			break;
 		default:
 			break;
 		}
@@ -736,6 +786,16 @@
 		clu_log_console(1);
 
 	if (!foreground && (geteuid() == 0)) {
+		if (waittime) {
+			waitpipe[0] = -1;
+			waitpipe[1] = -1;
+			pipe(waitpipe);
+			waiter = fork();
+			if (waiter > 0)
+				wait_for_status(waiter, waitpipe[0], waittime);
+			/* notreached by parent */
+		}
+			
 		daemon_init(argv[0]);
 		if (!debug && !watchdog_init())
 			clulog(LOG_NOTICE, "Failed to start watchdog\n");
@@ -756,6 +816,7 @@
 
 	if (init_resource_groups(0) != 0) {
 		clulog(LOG_CRIT, "#8: Couldn't initialize services\n");
+		notify_status(1);
 		return -1;
 	}
 
@@ -766,6 +827,7 @@
 	if (cluster_fd < 0) {
 		clu_log_console(1);
 		clulog(LOG_CRIT, "#9: Couldn't connect to cluster\n");
+		notify_status(2);
 		return -1;
 	}
    	msg_set_purpose(cluster_fd, MSGP_CLUSTER);
@@ -786,6 +848,7 @@
 	if ((listeners = msg_listen(RG_PORT, RG_PURPOSE,
 				    listen_fds, 2)) <= 0) {
 		clulog(LOG_CRIT, "#10: Couldn't set up listen socket\n");
+		notify_status(3);
 		return -1;
 	}
 
@@ -810,6 +873,7 @@
 	 */
 	if (vf_init(myNodeID, RG_VF_PORT, NULL, NULL) != 0) {
 		clulog(LOG_CRIT, "#11: Couldn't set up VF listen socket\n");
+		notify_status(4);
 		return -1;
 	}
 
@@ -839,6 +903,8 @@
 	/*
 	   Do everything useful
 	 */
+	notify_status(0);
+
 	while (running)
 		event_loop(cluster_fd);
 
--- cluster/rgmanager/src/daemons/resrules.c	2005/03/21 22:01:30	1.9.2.2
+++ cluster/rgmanager/src/daemons/resrules.c	2006/11/03 16:29:58	1.9.2.2.6.1
@@ -426,6 +426,48 @@
 
 
 /**
+  Take the first unique + required attr and call it the 'primary' attr
+  for rgmanager.  If there's no primary, index 0 becomes the primary attr.
+ */
+int
+choose_primary(resource_attr_t *attrs)
+{
+	int x = 0, primary = 0;
+	int flags;
+	char *name, *val;
+
+	if (!attrs)
+		return 0;
+
+	for (x = 0; attrs[x].ra_name; x++) {
+
+		if ((attrs[x].ra_flags & (RA_UNIQUE | RA_REQUIRED)) == 
+		    (RA_UNIQUE | RA_REQUIRED)) {
+			primary = x;
+			break;
+		}
+	}
+
+	if (primary != 0) {
+		flags = attrs[primary].ra_flags | RA_PRIMARY;
+		name = attrs[primary].ra_name;
+		val = attrs[primary].ra_value;
+
+		attrs[primary].ra_flags = attrs[0].ra_flags;
+		attrs[primary].ra_name = attrs[0].ra_name;
+		attrs[primary].ra_value = attrs[0].ra_value;
+
+		attrs[0].ra_flags = flags;
+		attrs[0].ra_name = name;
+		attrs[0].ra_value = val;
+	} else {
+		attrs[0].ra_flags |= RA_PRIMARY;
+	}
+
+	return 0;
+}
+
+/**
    Store a child type in the child array of a resource rule.
    XXX Could be rewritten to use list macros.
 
@@ -551,7 +593,7 @@
 
 
 children:
-	printf("Recognized child resource types:\n");
+	printf("Explicitly defined child resource types:\n");
 	if (!rr->rr_childtypes) {
 		printf("  - None -\n\n");
 		return;
@@ -697,6 +739,9 @@
 			store_attribute(&rr->rr_attrs, attrname, ret, flags);
 	}
 
+	if (!primary_found)
+		choose_primary(rr->rr_attrs);
+
 	return 0;
 }
 
@@ -991,6 +1036,10 @@
 		fn = basename(de->d_name);
 		if (!fn)
 			continue;
+		
+		if ((fn != NULL) && (strlen(fn) > 0) && 
+			(fn[strlen(fn)-1] == '~')) 
+			continue;
 
 		snprintf(path, sizeof(path), "%s/%s",
 			 rpath, de->d_name);
--- cluster/rgmanager/src/daemons/restree.c	2006/08/16 14:55:40	1.10.2.2.6.8
+++ cluster/rgmanager/src/daemons/restree.c	2006/11/03 16:29:58	1.10.2.2.6.9
@@ -181,11 +181,13 @@
 	   Store the OCF Resource Instance (primary attr)
 	 */
 	n = strlen(OCF_RESOURCE_INSTANCE_STR) +
+		strlen(res->r_rule->rr_type) + 1 +
 		strlen(res->r_attrs[0].ra_value) + 2;
 	val = malloc(n);
 	if (!val)
 		return;
-	snprintf(val, n, "%s=%s", OCF_RESOURCE_INSTANCE_STR,
+	snprintf(val, n, "%s=%s:%s", OCF_RESOURCE_INSTANCE_STR,
+		 res->r_rule->rr_type,
 		 res->r_attrs[0].ra_value);
 	*env = val; env++;
 
@@ -338,18 +340,30 @@
 	resource_t *res = node->rn_resource;
 	char fullpath[2048];
 
-	if (!res->r_rule->rr_agent)
+	if (!res->r_rule->rr_agent) {
+		clulog(LOG_DEBUG,
+		       "%s on %s \"%s\" no rr_agent\n",
+		       res_ops[op], res->r_rule->rr_type,
+		       res->r_attrs->ra_value);
 		return 0;
+	}
 
 #ifdef DEBUG
 	env = build_env(node, op);
-	if (!env)
+	if (!env) {
+		clulog(LOG_DEBUG,
+		       "%s on %s \"%s\" build_env failed %d\n",
+		       res_ops[op], res->r_rule->rr_type,
+		       res->r_attrs->ra_value, errno);
 		return -errno;
+	}
 #endif
 
 	childpid = fork();
-	if (childpid < 0)
+	if (childpid < 0) {
+		clulog(LOG_ERR, "%s: fork failed (%d)!\n", __func__, errno);
 		return -errno;
+	}
 
 	if (!childpid) {
 		/* Child */ 
@@ -366,8 +380,13 @@
 		env = build_env(node, op, depth);
 #endif
 
-		if (!env)
+		if (!env) {
+			clulog(LOG_DEBUG,
+		       		"%s on %s \"%s\" build_env failed (ENOMEM)\n",
+		       		res_ops[op], res->r_rule->rr_type,
+		       		res->r_attrs->ra_value);
 			exit(-ENOMEM);
+		}
 
 		if (res->r_rule->rr_agent[0] != '/')
 			snprintf(fullpath, sizeof(fullpath), "%s/%s",
@@ -397,7 +416,13 @@
 
 		if (ret) {
 			clulog(LOG_NOTICE,
-			       "%s on %s \"%s\" returned %d (%s)\n",
+			       "%s on %s:%s returned %d (%s)\n",
+			       res_ops[op], res->r_rule->rr_type,
+			       res->r_attrs->ra_value, ret,
+			       ocf_strerror(ret));
+		} else {
+			clulog(LOG_DEBUG,
+			       "%s on %s:%s returned %d (%s)\n",
 			       res_ops[op], res->r_rule->rr_type,
 			       res->r_attrs->ra_value, ret,
 			       ocf_strerror(ret));
@@ -409,6 +434,11 @@
 	if (!WIFSIGNALED(ret))
 		assert(0);
 
+	clulog(LOG_ERR,
+	       "%s on %s:%s caught signal %d\n",
+	       res_ops[op], res->r_rule->rr_type,
+	       res->r_attrs->ra_value, WTERMSIG(ret));
+
 	return -EFAULT;
 }
 
--- cluster/rgmanager/src/daemons/rg_state.c	2006/06/16 20:07:46	1.4.2.8.6.4
+++ cluster/rgmanager/src/daemons/rg_state.c	2006/11/03 16:29:58	1.4.2.8.6.5
@@ -879,6 +879,7 @@
 	void *lockp = NULL;
 	rg_state_t svcStatus;
 	int ret;
+	int old_state;
 
 	if (!rg_quorate()) {
 		clulog(LOG_WARNING, "#69: Unclean %s of %s\n", 
@@ -918,6 +919,8 @@
 		break;
 	}
 
+	old_state = svcStatus.rs_state;
+
 	clulog(LOG_NOTICE, "Stopping service %s\n", svcName);
 
 	if (recover)
@@ -937,7 +940,15 @@
 
 	ret = group_op(svcName, RG_STOP);
 
-	_svc_stop_finish(svcName, ret, newstate);
+	if (old_state == RG_STATE_FAILED && newstate == RG_STATE_DISABLED) {
+		if (ret)
+			clulog(LOG_ALERT, "Marking %s as 'disabled', "
+			       "but some resources may still be allocated!\n",
+			       svcName);
+		_svc_stop_finish(svcName, 0, newstate);
+	} else {
+		_svc_stop_finish(svcName, ret, newstate);
+	}
 
 	return ret;
 }
--- cluster/rgmanager/src/daemons/rg_thread.c	2006/06/16 20:07:46	1.7.2.3.6.3
+++ cluster/rgmanager/src/daemons/rg_thread.c	2006/11/03 16:29:58	1.7.2.3.6.4
@@ -110,18 +110,22 @@
 purge_status_checks(request_t **list)
 {
 	request_t *curr;
+	int found;
 	
 	if (!list)
 		return;
 
-	list_do(list, curr) {
-		if (curr->rr_request != RG_STATUS)
-			continue;
-
-		list_remove(list, curr);
-		rq_free(curr);
-		curr = *list;
-	} while (!list_done(list, curr));
+	do {
+		found = 0;
+		list_do(list, curr) {
+			if (curr->rr_request == RG_STATUS) {
+				list_remove(list, curr);
+				rq_free(curr);
+				found = 1;
+				break;
+			}
+		} while (!list_done(list, curr));
+	} while (found);
 }
 
 
--- cluster/rgmanager/src/resources/clusterfs.sh	2006/06/16 20:07:46	1.1.2.3.4.5
+++ cluster/rgmanager/src/resources/clusterfs.sh	2006/11/03 16:29:58	1.1.2.3.4.6
@@ -802,6 +802,7 @@
 	   [ "$OCF_RESKEY_nfslock" = "1" ]; then
 		ocf_log warning "Dropping node-wide NFS locks"
 		mkdir -p $mp/.clumanager/statd
+		pkill -KILL -x lockd
 		# Copy out the notify list; our 
 			# IPs are already torn down
 			if notify_list_store $mp/.clumanager/statd; then
@@ -889,12 +890,16 @@
 	;;
 status|monitor)
   	isMounted ${OCF_RESKEY_device} ${OCF_RESKEY_mountpoint}
- 	[ $? -ne $YES ] && exit $OCF_ERR_GENERIC
+ 	if [ $? -ne $YES ]; then
+		ocf_log err "fs:${OCF_RESKEY_name}: ${OCF_RESKEY_device} is not mounted on ${OCF_RESKEY_mountpoint}"
+		exit $OCF_ERR_GENERIC
+	fi
 
  	isAlive ${OCF_RESKEY_mountpoint}
- 	[ $? -ne $YES ] && exit $OCF_ERR_GENERIC
- 	
-	exit 0
+ 	[ $? -eq $YES ] && exit 0
+
+	ocf_log err "fs:${OCF_RESKEY_name}: Mount point is not accessible!"
+	exit $OCF_ERR_GENERIC
 	;;
 restart)
 	stopFilesystem
--- cluster/rgmanager/src/resources/fs.sh	2006/08/11 15:00:59	1.4.2.6.4.6
+++ cluster/rgmanager/src/resources/fs.sh	2006/11/03 16:29:58	1.4.2.6.4.7
@@ -243,7 +243,7 @@
 {
 	if [ -z "$OCF_RESKEY_mountpoint" ]; then
 		ocf_log err "No mount point specified."
-		return 1
+		return $OCF_ERR_ARGS
 	fi
 
 	if ! [ -e "$OCF_RESKEY_mountpoint" ]; then
@@ -514,7 +514,7 @@
 	dev=$(real_device $1)
 	if [ -z "$dev" ]; then
 		ocf_log err \
-			"isMounted: Could not match $1 with a real device"
+			"fs (isMounted): Could not match $1 with a real device"
 		return $FAIL
 	fi
 	mp=$2
@@ -530,8 +530,8 @@
 			# place
 			#
 			if [ -n "$tmp_mp"  -a "$tmp_mp"  != "$mp" ]; then
-				ocf_log warn "\
-Device $dev is mounted on $tmp_mp instead of $mp"
+				ocf_log warn \
+"Device $dev is mounted on $tmp_mp instead of $mp"
 			fi
 			return $YES
 		fi
@@ -553,14 +553,14 @@
 	declare rw
 	
 	if [ $# -ne 1 ]; then
-	        logAndPrint $LOG_ERR "Usage: isAlive mount_point"
+	        ocf_log err "Usage: isAlive mount_point"
 		return $FAIL
 	fi
 	mount_point=$1
 	
 	test -d $mount_point
 	if [ $? -ne 0 ]; then
-		logAndPrint $LOG_ERR "$mount_point is not a directory"
+		ocf_log err "fs (isAlive): $mount_point is not a directory"
 		return $FAIL
 	fi
 	
@@ -707,6 +707,7 @@
 	return $ret
 }
 
+
 activeMonitor() {
 	declare monpath=$OCF_RESKEY_mountpoint/.clumanager
 	declare p
@@ -733,7 +734,7 @@
 	case $1 in
 	start)
 		ocf_log info "Starting active monitoring of $OCF_RESKEY_mountpoint"
-		mkdir -p $(dirname $monpath) || return 1
+		mkdir -p $(dirname $monpath) || return $OCF_ERR_GENERIC
 		devmon $args -p $monpath/devmon.data -P $monpath/devmon.pid
 		;;
 	stop)
@@ -787,6 +788,7 @@
 enable_fs_quotas()
 {
 	declare -i need_check=0
+	declare -i rv
 	declare quotaopts=""
 	declare mopt
 	declare opts=$1
@@ -794,7 +796,7 @@
 
 	if [ -z "`which quotaon`" ]; then
 		ocf_log err "quotaon not found in $PATH"
-		return 1
+		return $OCF_ERR_GENERIC
 	fi
 
 	for mopt in `echo $opts | sed -e s/,/\ /g`; do
@@ -834,8 +836,13 @@
 	ocf_log info "Enabling Quotas on $mp"
 	ocf_log debug "quotaon -$quotaopts $mp"
 	quotaon -$quotaopts $mp
+	rv=$?
+	if [ $rv -ne 0 ]; then
+		# Just a warning
+		ocf_log warn "Unable to turn on quotas for $mp; return = $rv"
+	fi
 
-	return $?
+	return $rv
 }
 
 
@@ -857,14 +864,14 @@
 	mp=${OCF_RESKEY_mountpoint}
 	case "$mp" in 
       	""|"[ 	]*")		# nothing to mount
-    		return $SUCCESS
+    		return $OCF_SUCCESS
     		;;
 	/*)			# found it
 	  	;;
 	*)	 		# invalid format
 			ocf_log err \
 "startFilesystem: Invalid mount point format (must begin with a '/'): \'$mp\'"
-	    	return $FAIL
+	    	return $OCF_ERR_ARGS
 	    	;;
 	esac
 	
@@ -875,7 +882,7 @@
 	if [ -z "$dev" ]; then
 			ocf_log err "\
 startFilesystem: Could not match $OCF_RESKEY_device with a real device"
-			return $FAIL
+			return $OCF_ERR_ARGS
 	fi
 
 	#
@@ -885,7 +892,7 @@
 		if ! [ -d "$mp" ]; then
 			ocf_log err"\
 startFilesystem: Mount point $mp exists but is not a directory"
-			return $FAIL
+			return $OCF_ERR_ARGS
 		fi
 	else
 		ocf_log err "\
@@ -914,7 +921,7 @@
 	case $? in
 	$YES)		# already mounted
 		ocf_log debug "$dev already mounted"
-		return $SUCCESS
+		return $OCF_SUCCESS
 		;;
 	$NO)		# not mounted, continue
 		;;
@@ -1215,29 +1222,35 @@
 	;;
 status|monitor)
   	isMounted ${OCF_RESKEY_device} ${OCF_RESKEY_mountpoint}
- 	[ $? -ne $YES ] && exit $OCF_ERR_GENERIC
+ 	if [ $? -ne $YES ]; then
+		ocf_log err "fs:${OCF_RESKEY_name}: ${OCF_RESKEY_device} is not mounted on ${OCF_RESKEY_mountpoint}"
+		exit $OCF_ERR_GENERIC
+	fi
 
 	if [ "$OCF_RESKEY_active_monitor" = "yes" ] ||
 	   [ "$OCF_RESKEY_active_monitor" = "1" ]; then
 
-		activeMonitor status || exit $OCF_ERR_GENERIC
-		exit 0
+	   	activeMonitor status
+		[ $? -eq 0 ] && exit 0
+		ocf_log err "fs:${OCF_RESKEY_name}: Active Monitoring reported a failure"
+		exit $OCF_ERR_GENERIC
 	fi
  	
  	isAlive ${OCF_RESKEY_mountpoint}
- 	[ $? -ne $YES ] && exit $OCF_ERR_GENERIC
- 	
-	exit 0
+ 	[ $? -eq $YES ] && exit 0
+
+	ocf_log err "fs:${OCF_RESKEY_name}: Mount point is not accessible!"
+	exit $OCF_ERR_GENERIC
 	;;
 restart)
 	stopFilesystem
 	if [ $? -ne 0 ]; then
-		exit 1
+		exit $OCF_ERR_GENERIC
 	fi
 
 	startFilesystem
 	if [ $? -ne 0 ]; then
-		exit 1
+		exit $OCF_ERR_GENERIC
 	fi
 
 	exit 0
--- cluster/rgmanager/src/resources/nfsclient.sh	2006/08/02 17:24:18	1.3.2.2.6.6
+++ cluster/rgmanager/src/resources/nfsclient.sh	2006/11/03 16:29:58	1.3.2.2.6.7
@@ -340,7 +340,11 @@
 		sed -e 's/*/[*]/g' -e 's/?/[?]/g' -e 's/\./\\./g') 
         exportfs -v | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -q \
 		"^${OCF_RESKEY_path}[\t ]*.*${OCF_RESKEY_target_regexp}" 
+
 	rv=$? 
+	if [ $rv -ne 0 ]; then
+		ocf_log err "nfsclient:$OCF_RESKEY_name is missing!"
+	fi
 	;;
 
 recover)
--- cluster/rgmanager/src/resources/ocf-shellfuncs	2006/06/16 20:07:46	1.2.2.2
+++ cluster/rgmanager/src/resources/ocf-shellfuncs	2006/11/03 16:29:58	1.2.2.3
@@ -1,5 +1,5 @@
 #
-# 	$Id: ocf-shellfuncs,v 1.2.2.2 2006/06/16 20:07:46 lhh Exp $
+# 	$Id: ocf-shellfuncs,v 1.2.2.3 2006/11/03 16:29:58 lhh Exp $
 #
 # 	Common helper functions for the OCF Resource Agents supplied by
 # 	heartbeat.
--- cluster/rgmanager/src/resources/script.sh	2005/11/21 21:48:25	1.3.2.2.6.2
+++ cluster/rgmanager/src/resources/script.sh	2006/11/03 16:29:58	1.3.2.2.6.3
@@ -110,4 +110,10 @@
 
 # Don't need to catch return codes; this one will work.
 ocf_log info "Executing ${OCF_RESKEY_file} $1"
-exec /bin/sh ${OCF_RESKEY_file} $1
+${OCF_RESKEY_file} $1
+
+declare -i rv=$?
+if [ $rv -ne 0 ]; then
+	ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)"
+	return $OCF_ERR_GENERIC
+fi
--- cluster/rgmanager/src/utils/clustat.c	2006/06/16 20:07:47	1.5.2.3.6.8
+++ cluster/rgmanager/src/utils/clustat.c	2006/11/03 16:29:59	1.5.2.3.6.9
@@ -486,6 +486,11 @@
 {
 	int x;
 
+	if (!membership) {
+		printf("Membership information not available\n");
+		return;
+	}
+
 	printf("  %-40.40s %s\n", "Member Name", "Status");
 	printf("  %-40.40s %s\n", "------ ----", "------");
 
@@ -504,8 +509,10 @@
 {
 	int x;
 
-	if (!membership)
+	if (!membership) {
+		printf("  <nodes/>\n");
 		return;
+	}
 
 	printf("  <nodes>\n");
 	for (x = 0; x < membership->cml_count; x++) {
@@ -608,6 +615,10 @@
 	/* Grab the local node ID and flag it from the list of reported
 	   online nodes */
 	clu_local_nodeid(NULL, lid);
+
+	if (!all)
+		return NULL;
+
 	for (x=0; x<all->cml_count; x++) {
 		if (all->cml_members[x].cm_id == *lid) {
 			m = &all->cml_members[x];



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2006-11-03 16:30 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-09-07 18:39 [Cluster-devel] cluster/rgmanager ChangeLog errors.txt init.d/ lhh
  -- strict thread matches above, loose matches on Subject: below --
2006-11-03 16:30 lhh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).