* [Cluster-devel] cluster/rgmanager ChangeLog errors.txt init.d/ ...
@ 2006-09-07 18:39 lhh
0 siblings, 0 replies; 2+ messages in thread
From: lhh @ 2006-09-07 18:39 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: lhh at sourceware.org 2006-09-07 18:39:45
Modified files:
rgmanager : ChangeLog errors.txt
rgmanager/init.d: rgmanager
rgmanager/src/daemons: main.c
rgmanager/src/utils: clustat.c
Log message:
2006-09-07 Lon Hohberger <lhh@redhat.com>
* src/daemons/main.c, init.d/rgmanager: Make rgmanager init script
report failure correctly in most cases. (#193603)
* src/utils/clustat.c: Fix #146924 - segfault if cman is not
in a state to give out member lists
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5.2.20&r2=1.5.2.21
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/errors.txt.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.4&r2=1.1.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/init.d/rgmanager.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.3.2.2&r2=1.3.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.9.2.17&r2=1.9.2.18
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5.2.13&r2=1.5.2.14
--- cluster/rgmanager/ChangeLog 2006/06/21 18:34:19 1.5.2.20
+++ cluster/rgmanager/ChangeLog 2006/09/07 18:39:45 1.5.2.21
@@ -1,3 +1,9 @@
+2006-09-07 Lon Hohberger <lhh@redhat.com>
+ * src/daemons/main.c, init.d/rgmanager: Make rgmanager init script
+ report failure correctly in most cases. (#193603)
+ * src/utils/clustat.c: Fix #146924 - segfault if cman is not
+ in a state to give out member lists
+
2006-06-21 Lon Hohberger <lhh@redhat.com>
* src/daemons/nodeevent.c: Don't use the rg thread refcount in
node event handling (#194491)
--- cluster/rgmanager/errors.txt 2005/03/21 22:01:30 1.1.2.4
+++ cluster/rgmanager/errors.txt 2006/09/07 18:39:45 1.1.2.5
@@ -76,7 +76,9 @@
The resource group manager was unable to find a plugin which was able to
talk to the cluster infrastructure. Generally, this occurs when no cluster
infrastruture is running. Try starting the preferred cluster infrastructure
-for your configuration (e.g. CMAN+DLM, GuLM) and restarting rgmanager.
+for your configuration (e.g. CMAN+DLM, GuLM) and restarting rgmanager. This
+can also occur if CMAN is loaded, while DLM is not. Rgmanager (really,
+the SM magma plugin) requires that the DLM be loaded prior to starting.
#10: Couldn't set up listen socket
--- cluster/rgmanager/init.d/rgmanager 2006/05/12 21:28:30 1.3.2.2
+++ cluster/rgmanager/init.d/rgmanager 2006/09/07 18:39:45 1.3.2.3
@@ -93,6 +93,7 @@
case $1 in
start)
+ [ -z "$RGMGR_OPTS" ] && RGMGR_OPTS="-t 30"
echo -n $"Starting $ID: "
daemon $RGMGRD $RGMGR_OPTS
echo
--- cluster/rgmanager/src/daemons/main.c 2006/05/26 17:39:32 1.9.2.17
+++ cluster/rgmanager/src/daemons/main.c 2006/09/07 18:39:45 1.9.2.18
@@ -706,22 +706,60 @@
}
+void
+wait_for_status(int pid, int fd, int timeout)
+{
+ struct timeval tv;
+ fd_set rfds;
+ int err;
+
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+ tv.tv_sec = timeout;
+ tv.tv_usec = 0;
+
+ if (select(fd + 1, &rfds, NULL, NULL, &tv) == 1) {
+ err = 0;
+ read(fd, &err, sizeof(err));
+ exit(!!err);
+ /* could put in messages for waiting */
+ }
+ exit(1);
+}
+
+
+#define notify_status(value) \
+do { \
+ if (waittime) { \
+ waiter = value; \
+ write(waitpipe[1], &waiter, sizeof(waiter)); \
+ close(waitpipe[0]); \
+ close(waitpipe[1]); \
+ } \
+} while(0)
+
+
int
main(int argc, char **argv)
{
int cluster_fd, rv;
char foreground = 0;
int quorate;
- int listen_fds[2], listeners;
+ int listen_fds[2], listeners, waittime = 0, waitpipe[2];
+ int waiter;
uint64_t myNodeID;
- while ((rv = getopt(argc, argv, "fd")) != EOF) {
+ while ((rv = getopt(argc, argv, "fdt:")) != EOF) {
switch (rv) {
case 'd':
debug = 1;
break;
case 'f':
foreground = 1;
+ case 't':
+ waittime = atoi(optarg);
+ if (waittime < 0)
+ waittime = 0;
default:
break;
}
@@ -736,6 +774,16 @@
clu_log_console(1);
if (!foreground && (geteuid() == 0)) {
+ if (waittime) {
+ waitpipe[0] = -1;
+ waitpipe[1] = -1;
+ pipe(waitpipe);
+ waiter = fork();
+ if (waiter > 0)
+ wait_for_status(waiter, waitpipe[0], waittime);
+ /* notreached by parent */
+ }
+
daemon_init(argv[0]);
if (!debug && !watchdog_init())
clulog(LOG_NOTICE, "Failed to start watchdog\n");
@@ -756,6 +804,7 @@
if (init_resource_groups(0) != 0) {
clulog(LOG_CRIT, "#8: Couldn't initialize services\n");
+ notify_status(1);
return -1;
}
@@ -766,6 +815,7 @@
if (cluster_fd < 0) {
clu_log_console(1);
clulog(LOG_CRIT, "#9: Couldn't connect to cluster\n");
+ notify_status(2);
return -1;
}
msg_set_purpose(cluster_fd, MSGP_CLUSTER);
@@ -786,6 +836,7 @@
if ((listeners = msg_listen(RG_PORT, RG_PURPOSE,
listen_fds, 2)) <= 0) {
clulog(LOG_CRIT, "#10: Couldn't set up listen socket\n");
+ notify_status(3);
return -1;
}
@@ -810,6 +861,7 @@
*/
if (vf_init(myNodeID, RG_VF_PORT, NULL, NULL) != 0) {
clulog(LOG_CRIT, "#11: Couldn't set up VF listen socket\n");
+ notify_status(4);
return -1;
}
@@ -839,6 +891,8 @@
/*
Do everything useful
*/
+ notify_status(0);
+
while (running)
event_loop(cluster_fd);
--- cluster/rgmanager/src/utils/clustat.c 2006/05/26 15:32:00 1.5.2.13
+++ cluster/rgmanager/src/utils/clustat.c 2006/09/07 18:39:45 1.5.2.14
@@ -486,6 +486,11 @@
{
int x;
+ if (!membership) {
+ printf("Membership information not available\n");
+ return;
+ }
+
printf(" %-40.40s %s\n", "Member Name", "Status");
printf(" %-40.40s %s\n", "------ ----", "------");
@@ -504,8 +509,10 @@
{
int x;
- if (!membership)
+ if (!membership) {
+ printf(" <nodes/>\n");
return;
+ }
printf(" <nodes>\n");
for (x = 0; x < membership->cml_count; x++) {
@@ -608,6 +615,10 @@
/* Grab the local node ID and flag it from the list of reported
online nodes */
clu_local_nodeid(NULL, lid);
+
+ if (!all)
+ return NULL;
+
for (x=0; x<all->cml_count; x++) {
if (all->cml_members[x].cm_id == *lid) {
m = &all->cml_members[x];
^ permalink raw reply [flat|nested] 2+ messages in thread* [Cluster-devel] cluster/rgmanager ChangeLog errors.txt init.d/ ...
@ 2006-11-03 16:30 lhh
0 siblings, 0 replies; 2+ messages in thread
From: lhh @ 2006-11-03 16:30 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: STABLE
Changes by: lhh at sourceware.org 2006-11-03 16:29:59
Modified files:
rgmanager : ChangeLog errors.txt
rgmanager/init.d: rgmanager
rgmanager/man : clusvcadm.8
rgmanager/src/clulib: clulog.c
rgmanager/src/daemons: main.c resrules.c restree.c rg_state.c
rg_thread.c
rgmanager/src/resources: clusterfs.sh fs.sh nfsclient.sh
ocf-shellfuncs script.sh
rgmanager/src/utils: clustat.c
Log message:
Merge fixes from RHEL4 branch
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.13.6.3&r2=1.5.2.13.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/errors.txt.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.4&r2=1.1.2.4.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/init.d/rgmanager.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.1.6.1&r2=1.3.2.1.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/man/clusvcadm.8.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.1.6.1&r2=1.1.2.1.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/clulog.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1.6.2&r2=1.2.2.1.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.9.2.8.6.7&r2=1.9.2.8.6.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.9.2.2&r2=1.9.2.2.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.10.2.2.6.8&r2=1.10.2.2.6.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.8.6.4&r2=1.4.2.8.6.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.7.2.3.6.3&r2=1.7.2.3.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/clusterfs.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.3.4.5&r2=1.1.2.3.4.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/fs.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.6.4.6&r2=1.4.2.6.4.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsclient.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.2.6.6&r2=1.3.2.2.6.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ocf-shellfuncs.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.2&r2=1.2.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.2.6.2&r2=1.3.2.2.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.3.6.8&r2=1.5.2.3.6.9
--- cluster/rgmanager/ChangeLog 2006/06/21 18:34:39 1.5.2.13.6.3
+++ cluster/rgmanager/ChangeLog 2006/11/03 16:29:58 1.5.2.13.6.4
@@ -1,3 +1,27 @@
+2006-11-03 Lon Hohberger <lhh@redhat.com>
+ * src/daemons/restree.c: Merge patch from Jeff Layton to instrument
+ res_exec more closely. (#213246)
+ * src/daemons/rg_thread.c: Fix segfault caused by improper loop
+ semantics (#213312)
+
+2006-10-05 Lon Hohberger <lhh@redhat.com>
+ * src/clulib/clulog.c: Change stdout/stderr to nonblocking (#207144
+ part 1)
+ * src/daemons/main.c: Give a timeout for a message to be received
+ after msg_accept() so that if we are processing a message from a
+ client which died, we don't wait forever, thereby preventing
+ group updates (#207144 part 2).
+
+2006-09-27 Lon Hohberger <lhh@redhat.com>
+ * src/daemons/rg_state.c: Fix fail->disable state transitions,
+ bugzilla #208011
+
+2006-09-07 Lon Hohberger <lhh@redhat.com>
+ * src/daemons/main.c, init.d/rgmanager: Make rgmanager init script
+ report failure correctly in most cases. (#193603)
+ * src/utils/clustat.c: Fix #146924 - segfault if cman is not
+ in a state to give out member lists
+
2006-06-21 Lon Hohberger <lhh@redhat.com>
* src/daemons/nodeevent.c: Don't use the rg thread refcount in
node event handling (#194491)
--- cluster/rgmanager/errors.txt 2005/03/21 22:01:30 1.1.2.4
+++ cluster/rgmanager/errors.txt 2006/11/03 16:29:58 1.1.2.4.6.1
@@ -76,7 +76,9 @@
The resource group manager was unable to find a plugin which was able to
talk to the cluster infrastructure. Generally, this occurs when no cluster
infrastruture is running. Try starting the preferred cluster infrastructure
-for your configuration (e.g. CMAN+DLM, GuLM) and restarting rgmanager.
+for your configuration (e.g. CMAN+DLM, GuLM) and restarting rgmanager. This
+can also occur if CMAN is loaded, while DLM is not. Rgmanager (really,
+the SM magma plugin) requires that the DLM be loaded prior to starting.
#10: Couldn't set up listen socket
--- cluster/rgmanager/init.d/rgmanager 2006/06/16 20:07:46 1.3.2.1.6.1
+++ cluster/rgmanager/init.d/rgmanager 2006/11/03 16:29:58 1.3.2.1.6.2
@@ -93,6 +93,7 @@
case $1 in
start)
+ [ -z "$RGMGR_OPTS" ] && RGMGR_OPTS="-t 30"
echo -n $"Starting $ID: "
daemon $RGMGRD $RGMGR_OPTS
echo
--- cluster/rgmanager/man/clusvcadm.8 2006/06/16 20:07:46 1.1.2.1.6.1
+++ cluster/rgmanager/man/clusvcadm.8 2006/11/03 16:29:58 1.1.2.1.6.2
@@ -67,6 +67,11 @@
preferred
target member on which to start the
service.
+.IP "\-r <service>"
+Relocates the user service named
+.I
+service
+to another cluster member.
.IP "\-R <service>"
Restarts the user service named
.I
--- cluster/rgmanager/src/clulib/clulog.c 2006/06/16 20:07:46 1.2.2.1.6.2
+++ cluster/rgmanager/src/clulib/clulog.c 2006/11/03 16:29:58 1.2.2.1.6.3
@@ -20,7 +20,7 @@
/** @file
* Library routines for communicating with the logging daemon.
*
- * $Id: clulog.c,v 1.2.2.1.6.2 2006/06/16 20:07:46 lhh Exp $
+ * $Id: clulog.c,v 1.2.2.1.6.3 2006/11/03 16:29:58 lhh Exp $
*
* Author: Jeff Moyer <moyer@missioncriticallinux.com>
*/
@@ -50,7 +50,7 @@
#include <string.h>
-static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.1.6.2 $";
+static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.1.6.3 $";
#ifdef DEBUG
#include <assert.h>
@@ -183,6 +183,14 @@
clu_log_console(int onoff)
{
int ret = useconsole;
+ int val;
+
+ if (onoff) {
+ val = fcntl(STDERR_FILENO, F_GETFL, 0);
+ fcntl(STDERR_FILENO, F_SETFL, val|O_NONBLOCK);
+ val = fcntl(STDOUT_FILENO, F_GETFL, 0);
+ fcntl(STDOUT_FILENO, F_SETFL, val|O_NONBLOCK);
+ }
useconsole = !!onoff;
return ret;
@@ -204,6 +212,7 @@
char logmsg[MAX_LOGMSG_LEN]; /* message to go to the log */
char printmsg[MAX_LOGMSG_LEN]; /* message to go to stdout */
int syslog_flags = LOG_NDELAY;
+ int val;
pthread_mutex_lock(&log_mutex);
if (severity > loglevel) {
@@ -275,9 +284,19 @@
MAX_LOGMSG_LEN - strlen(printmsg), fmt, args);
va_end(args);
- fprintf(stdout, "%s", printmsg);
+ if (useconsole && !write_to_cons) {
+ val = fcntl(STDOUT_FILENO, F_GETFL, 0);
+ fcntl(STDOUT_FILENO, F_SETFL, val | O_NONBLOCK);
+ }
+
+ /* Ignore error return code */
+ write(STDOUT_FILENO, printmsg, strlen(printmsg));
+
+ if (useconsole && !write_to_cons)
+ fcntl(STDOUT_FILENO, F_SETFL, val);
}
+ /* TODO make this non-blocking */
syslog(severity, logmsg);
pthread_mutex_unlock(&log_mutex);
--- cluster/rgmanager/src/daemons/main.c 2006/06/16 20:07:46 1.9.2.8.6.7
+++ cluster/rgmanager/src/daemons/main.c 2006/11/03 16:29:58 1.9.2.8.6.8
@@ -342,8 +342,18 @@
int ret;
generic_msg_hdr msg_hdr;
SmMessageSt msg_sm;
+ fd_set rfds;
+ struct timeval tv = { 0, 500000 };
/* Peek-a-boo */
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+ if (select(fd+1, &rfds, NULL, NULL, &tv) <= 0) {
+ clulog(LOG_WARNING, "Client timeout after new connection.\n");
+ msg_close(fd);
+ return -1;
+ }
+
ret = msg_peek(fd, &msg_hdr, sizeof(msg_hdr));
if (ret != sizeof (generic_msg_hdr)) {
clulog(LOG_ERR, "#37: Error receiving message header\n");
@@ -706,22 +716,62 @@
}
+void
+wait_for_status(int pid, int fd, int timeout)
+{
+ struct timeval tv;
+ fd_set rfds;
+ int err;
+
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+ tv.tv_sec = timeout;
+ tv.tv_usec = 0;
+
+ if (select(fd + 1, &rfds, NULL, NULL, &tv) == 1) {
+ err = 0;
+ read(fd, &err, sizeof(err));
+ exit(!!err);
+ /* could put in messages for waiting */
+ }
+ exit(1);
+}
+
+
+#define notify_status(value) \
+do { \
+ if (waittime) { \
+ waiter = value; \
+ write(waitpipe[1], &waiter, sizeof(waiter)); \
+ close(waitpipe[0]); \
+ close(waitpipe[1]); \
+ } \
+} while(0)
+
+
int
main(int argc, char **argv)
{
int cluster_fd, rv;
char foreground = 0;
int quorate;
- int listen_fds[2], listeners;
+ int listen_fds[2], listeners, waittime = 0, waitpipe[2];
+ int waiter;
uint64_t myNodeID;
- while ((rv = getopt(argc, argv, "fd")) != EOF) {
+ while ((rv = getopt(argc, argv, "fdt:")) != EOF) {
switch (rv) {
case 'd':
debug = 1;
break;
case 'f':
foreground = 1;
+ break;
+ case 't':
+ waittime = atoi(optarg);
+ if (waittime < 0)
+ waittime = 0;
+ break;
default:
break;
}
@@ -736,6 +786,16 @@
clu_log_console(1);
if (!foreground && (geteuid() == 0)) {
+ if (waittime) {
+ waitpipe[0] = -1;
+ waitpipe[1] = -1;
+ pipe(waitpipe);
+ waiter = fork();
+ if (waiter > 0)
+ wait_for_status(waiter, waitpipe[0], waittime);
+ /* notreached by parent */
+ }
+
daemon_init(argv[0]);
if (!debug && !watchdog_init())
clulog(LOG_NOTICE, "Failed to start watchdog\n");
@@ -756,6 +816,7 @@
if (init_resource_groups(0) != 0) {
clulog(LOG_CRIT, "#8: Couldn't initialize services\n");
+ notify_status(1);
return -1;
}
@@ -766,6 +827,7 @@
if (cluster_fd < 0) {
clu_log_console(1);
clulog(LOG_CRIT, "#9: Couldn't connect to cluster\n");
+ notify_status(2);
return -1;
}
msg_set_purpose(cluster_fd, MSGP_CLUSTER);
@@ -786,6 +848,7 @@
if ((listeners = msg_listen(RG_PORT, RG_PURPOSE,
listen_fds, 2)) <= 0) {
clulog(LOG_CRIT, "#10: Couldn't set up listen socket\n");
+ notify_status(3);
return -1;
}
@@ -810,6 +873,7 @@
*/
if (vf_init(myNodeID, RG_VF_PORT, NULL, NULL) != 0) {
clulog(LOG_CRIT, "#11: Couldn't set up VF listen socket\n");
+ notify_status(4);
return -1;
}
@@ -839,6 +903,8 @@
/*
Do everything useful
*/
+ notify_status(0);
+
while (running)
event_loop(cluster_fd);
--- cluster/rgmanager/src/daemons/resrules.c 2005/03/21 22:01:30 1.9.2.2
+++ cluster/rgmanager/src/daemons/resrules.c 2006/11/03 16:29:58 1.9.2.2.6.1
@@ -426,6 +426,48 @@
/**
+ Take the first unique + required attr and call it the 'primary' attr
+ for rgmanager. If there's no primary, index 0 becomes the primary attr.
+ */
+int
+choose_primary(resource_attr_t *attrs)
+{
+ int x = 0, primary = 0;
+ int flags;
+ char *name, *val;
+
+ if (!attrs)
+ return 0;
+
+ for (x = 0; attrs[x].ra_name; x++) {
+
+ if ((attrs[x].ra_flags & (RA_UNIQUE | RA_REQUIRED)) ==
+ (RA_UNIQUE | RA_REQUIRED)) {
+ primary = x;
+ break;
+ }
+ }
+
+ if (primary != 0) {
+ flags = attrs[primary].ra_flags | RA_PRIMARY;
+ name = attrs[primary].ra_name;
+ val = attrs[primary].ra_value;
+
+ attrs[primary].ra_flags = attrs[0].ra_flags;
+ attrs[primary].ra_name = attrs[0].ra_name;
+ attrs[primary].ra_value = attrs[0].ra_value;
+
+ attrs[0].ra_flags = flags;
+ attrs[0].ra_name = name;
+ attrs[0].ra_value = val;
+ } else {
+ attrs[0].ra_flags |= RA_PRIMARY;
+ }
+
+ return 0;
+}
+
+/**
Store a child type in the child array of a resource rule.
XXX Could be rewritten to use list macros.
@@ -551,7 +593,7 @@
children:
- printf("Recognized child resource types:\n");
+ printf("Explicitly defined child resource types:\n");
if (!rr->rr_childtypes) {
printf(" - None -\n\n");
return;
@@ -697,6 +739,9 @@
store_attribute(&rr->rr_attrs, attrname, ret, flags);
}
+ if (!primary_found)
+ choose_primary(rr->rr_attrs);
+
return 0;
}
@@ -991,6 +1036,10 @@
fn = basename(de->d_name);
if (!fn)
continue;
+
+ if ((fn != NULL) && (strlen(fn) > 0) &&
+ (fn[strlen(fn)-1] == '~'))
+ continue;
snprintf(path, sizeof(path), "%s/%s",
rpath, de->d_name);
--- cluster/rgmanager/src/daemons/restree.c 2006/08/16 14:55:40 1.10.2.2.6.8
+++ cluster/rgmanager/src/daemons/restree.c 2006/11/03 16:29:58 1.10.2.2.6.9
@@ -181,11 +181,13 @@
Store the OCF Resource Instance (primary attr)
*/
n = strlen(OCF_RESOURCE_INSTANCE_STR) +
+ strlen(res->r_rule->rr_type) + 1 +
strlen(res->r_attrs[0].ra_value) + 2;
val = malloc(n);
if (!val)
return;
- snprintf(val, n, "%s=%s", OCF_RESOURCE_INSTANCE_STR,
+ snprintf(val, n, "%s=%s:%s", OCF_RESOURCE_INSTANCE_STR,
+ res->r_rule->rr_type,
res->r_attrs[0].ra_value);
*env = val; env++;
@@ -338,18 +340,30 @@
resource_t *res = node->rn_resource;
char fullpath[2048];
- if (!res->r_rule->rr_agent)
+ if (!res->r_rule->rr_agent) {
+ clulog(LOG_DEBUG,
+ "%s on %s \"%s\" no rr_agent\n",
+ res_ops[op], res->r_rule->rr_type,
+ res->r_attrs->ra_value);
return 0;
+ }
#ifdef DEBUG
env = build_env(node, op);
- if (!env)
+ if (!env) {
+ clulog(LOG_DEBUG,
+ "%s on %s \"%s\" build_env failed %d\n",
+ res_ops[op], res->r_rule->rr_type,
+ res->r_attrs->ra_value, errno);
return -errno;
+ }
#endif
childpid = fork();
- if (childpid < 0)
+ if (childpid < 0) {
+ clulog(LOG_ERR, "%s: fork failed (%d)!\n", __func__, errno);
return -errno;
+ }
if (!childpid) {
/* Child */
@@ -366,8 +380,13 @@
env = build_env(node, op, depth);
#endif
- if (!env)
+ if (!env) {
+ clulog(LOG_DEBUG,
+ "%s on %s \"%s\" build_env failed (ENOMEM)\n",
+ res_ops[op], res->r_rule->rr_type,
+ res->r_attrs->ra_value);
exit(-ENOMEM);
+ }
if (res->r_rule->rr_agent[0] != '/')
snprintf(fullpath, sizeof(fullpath), "%s/%s",
@@ -397,7 +416,13 @@
if (ret) {
clulog(LOG_NOTICE,
- "%s on %s \"%s\" returned %d (%s)\n",
+ "%s on %s:%s returned %d (%s)\n",
+ res_ops[op], res->r_rule->rr_type,
+ res->r_attrs->ra_value, ret,
+ ocf_strerror(ret));
+ } else {
+ clulog(LOG_DEBUG,
+ "%s on %s:%s returned %d (%s)\n",
res_ops[op], res->r_rule->rr_type,
res->r_attrs->ra_value, ret,
ocf_strerror(ret));
@@ -409,6 +434,11 @@
if (!WIFSIGNALED(ret))
assert(0);
+ clulog(LOG_ERR,
+ "%s on %s:%s caught signal %d\n",
+ res_ops[op], res->r_rule->rr_type,
+ res->r_attrs->ra_value, WTERMSIG(ret));
+
return -EFAULT;
}
--- cluster/rgmanager/src/daemons/rg_state.c 2006/06/16 20:07:46 1.4.2.8.6.4
+++ cluster/rgmanager/src/daemons/rg_state.c 2006/11/03 16:29:58 1.4.2.8.6.5
@@ -879,6 +879,7 @@
void *lockp = NULL;
rg_state_t svcStatus;
int ret;
+ int old_state;
if (!rg_quorate()) {
clulog(LOG_WARNING, "#69: Unclean %s of %s\n",
@@ -918,6 +919,8 @@
break;
}
+ old_state = svcStatus.rs_state;
+
clulog(LOG_NOTICE, "Stopping service %s\n", svcName);
if (recover)
@@ -937,7 +940,15 @@
ret = group_op(svcName, RG_STOP);
- _svc_stop_finish(svcName, ret, newstate);
+ if (old_state == RG_STATE_FAILED && newstate == RG_STATE_DISABLED) {
+ if (ret)
+ clulog(LOG_ALERT, "Marking %s as 'disabled', "
+ "but some resources may still be allocated!\n",
+ svcName);
+ _svc_stop_finish(svcName, 0, newstate);
+ } else {
+ _svc_stop_finish(svcName, ret, newstate);
+ }
return ret;
}
--- cluster/rgmanager/src/daemons/rg_thread.c 2006/06/16 20:07:46 1.7.2.3.6.3
+++ cluster/rgmanager/src/daemons/rg_thread.c 2006/11/03 16:29:58 1.7.2.3.6.4
@@ -110,18 +110,22 @@
purge_status_checks(request_t **list)
{
request_t *curr;
+ int found;
if (!list)
return;
- list_do(list, curr) {
- if (curr->rr_request != RG_STATUS)
- continue;
-
- list_remove(list, curr);
- rq_free(curr);
- curr = *list;
- } while (!list_done(list, curr));
+ do {
+ found = 0;
+ list_do(list, curr) {
+ if (curr->rr_request == RG_STATUS) {
+ list_remove(list, curr);
+ rq_free(curr);
+ found = 1;
+ break;
+ }
+ } while (!list_done(list, curr));
+ } while (found);
}
--- cluster/rgmanager/src/resources/clusterfs.sh 2006/06/16 20:07:46 1.1.2.3.4.5
+++ cluster/rgmanager/src/resources/clusterfs.sh 2006/11/03 16:29:58 1.1.2.3.4.6
@@ -802,6 +802,7 @@
[ "$OCF_RESKEY_nfslock" = "1" ]; then
ocf_log warning "Dropping node-wide NFS locks"
mkdir -p $mp/.clumanager/statd
+ pkill -KILL -x lockd
# Copy out the notify list; our
# IPs are already torn down
if notify_list_store $mp/.clumanager/statd; then
@@ -889,12 +890,16 @@
;;
status|monitor)
isMounted ${OCF_RESKEY_device} ${OCF_RESKEY_mountpoint}
- [ $? -ne $YES ] && exit $OCF_ERR_GENERIC
+ if [ $? -ne $YES ]; then
+ ocf_log err "fs:${OCF_RESKEY_name}: ${OCF_RESKEY_device} is not mounted on ${OCF_RESKEY_mountpoint}"
+ exit $OCF_ERR_GENERIC
+ fi
isAlive ${OCF_RESKEY_mountpoint}
- [ $? -ne $YES ] && exit $OCF_ERR_GENERIC
-
- exit 0
+ [ $? -eq $YES ] && exit 0
+
+ ocf_log err "fs:${OCF_RESKEY_name}: Mount point is not accessible!"
+ exit $OCF_ERR_GENERIC
;;
restart)
stopFilesystem
--- cluster/rgmanager/src/resources/fs.sh 2006/08/11 15:00:59 1.4.2.6.4.6
+++ cluster/rgmanager/src/resources/fs.sh 2006/11/03 16:29:58 1.4.2.6.4.7
@@ -243,7 +243,7 @@
{
if [ -z "$OCF_RESKEY_mountpoint" ]; then
ocf_log err "No mount point specified."
- return 1
+ return $OCF_ERR_ARGS
fi
if ! [ -e "$OCF_RESKEY_mountpoint" ]; then
@@ -514,7 +514,7 @@
dev=$(real_device $1)
if [ -z "$dev" ]; then
ocf_log err \
- "isMounted: Could not match $1 with a real device"
+ "fs (isMounted): Could not match $1 with a real device"
return $FAIL
fi
mp=$2
@@ -530,8 +530,8 @@
# place
#
if [ -n "$tmp_mp" -a "$tmp_mp" != "$mp" ]; then
- ocf_log warn "\
-Device $dev is mounted on $tmp_mp instead of $mp"
+ ocf_log warn \
+"Device $dev is mounted on $tmp_mp instead of $mp"
fi
return $YES
fi
@@ -553,14 +553,14 @@
declare rw
if [ $# -ne 1 ]; then
- logAndPrint $LOG_ERR "Usage: isAlive mount_point"
+ ocf_log err "Usage: isAlive mount_point"
return $FAIL
fi
mount_point=$1
test -d $mount_point
if [ $? -ne 0 ]; then
- logAndPrint $LOG_ERR "$mount_point is not a directory"
+ ocf_log err "fs (isAlive): $mount_point is not a directory"
return $FAIL
fi
@@ -707,6 +707,7 @@
return $ret
}
+
activeMonitor() {
declare monpath=$OCF_RESKEY_mountpoint/.clumanager
declare p
@@ -733,7 +734,7 @@
case $1 in
start)
ocf_log info "Starting active monitoring of $OCF_RESKEY_mountpoint"
- mkdir -p $(dirname $monpath) || return 1
+ mkdir -p $(dirname $monpath) || return $OCF_ERR_GENERIC
devmon $args -p $monpath/devmon.data -P $monpath/devmon.pid
;;
stop)
@@ -787,6 +788,7 @@
enable_fs_quotas()
{
declare -i need_check=0
+ declare -i rv
declare quotaopts=""
declare mopt
declare opts=$1
@@ -794,7 +796,7 @@
if [ -z "`which quotaon`" ]; then
ocf_log err "quotaon not found in $PATH"
- return 1
+ return $OCF_ERR_GENERIC
fi
for mopt in `echo $opts | sed -e s/,/\ /g`; do
@@ -834,8 +836,13 @@
ocf_log info "Enabling Quotas on $mp"
ocf_log debug "quotaon -$quotaopts $mp"
quotaon -$quotaopts $mp
+ rv=$?
+ if [ $rv -ne 0 ]; then
+ # Just a warning
+ ocf_log warn "Unable to turn on quotas for $mp; return = $rv"
+ fi
- return $?
+ return $rv
}
@@ -857,14 +864,14 @@
mp=${OCF_RESKEY_mountpoint}
case "$mp" in
""|"[ ]*") # nothing to mount
- return $SUCCESS
+ return $OCF_SUCCESS
;;
/*) # found it
;;
*) # invalid format
ocf_log err \
"startFilesystem: Invalid mount point format (must begin with a '/'): \'$mp\'"
- return $FAIL
+ return $OCF_ERR_ARGS
;;
esac
@@ -875,7 +882,7 @@
if [ -z "$dev" ]; then
ocf_log err "\
startFilesystem: Could not match $OCF_RESKEY_device with a real device"
- return $FAIL
+ return $OCF_ERR_ARGS
fi
#
@@ -885,7 +892,7 @@
if ! [ -d "$mp" ]; then
ocf_log err"\
startFilesystem: Mount point $mp exists but is not a directory"
- return $FAIL
+ return $OCF_ERR_ARGS
fi
else
ocf_log err "\
@@ -914,7 +921,7 @@
case $? in
$YES) # already mounted
ocf_log debug "$dev already mounted"
- return $SUCCESS
+ return $OCF_SUCCESS
;;
$NO) # not mounted, continue
;;
@@ -1215,29 +1222,35 @@
;;
status|monitor)
isMounted ${OCF_RESKEY_device} ${OCF_RESKEY_mountpoint}
- [ $? -ne $YES ] && exit $OCF_ERR_GENERIC
+ if [ $? -ne $YES ]; then
+ ocf_log err "fs:${OCF_RESKEY_name}: ${OCF_RESKEY_device} is not mounted on ${OCF_RESKEY_mountpoint}"
+ exit $OCF_ERR_GENERIC
+ fi
if [ "$OCF_RESKEY_active_monitor" = "yes" ] ||
[ "$OCF_RESKEY_active_monitor" = "1" ]; then
- activeMonitor status || exit $OCF_ERR_GENERIC
- exit 0
+ activeMonitor status
+ [ $? -eq 0 ] && exit 0
+ ocf_log err "fs:${OCF_RESKEY_name}: Active Monitoring reported a failure"
+ exit $OCF_ERR_GENERIC
fi
isAlive ${OCF_RESKEY_mountpoint}
- [ $? -ne $YES ] && exit $OCF_ERR_GENERIC
-
- exit 0
+ [ $? -eq $YES ] && exit 0
+
+ ocf_log err "fs:${OCF_RESKEY_name}: Mount point is not accessible!"
+ exit $OCF_ERR_GENERIC
;;
restart)
stopFilesystem
if [ $? -ne 0 ]; then
- exit 1
+ exit $OCF_ERR_GENERIC
fi
startFilesystem
if [ $? -ne 0 ]; then
- exit 1
+ exit $OCF_ERR_GENERIC
fi
exit 0
--- cluster/rgmanager/src/resources/nfsclient.sh 2006/08/02 17:24:18 1.3.2.2.6.6
+++ cluster/rgmanager/src/resources/nfsclient.sh 2006/11/03 16:29:58 1.3.2.2.6.7
@@ -340,7 +340,11 @@
sed -e 's/*/[*]/g' -e 's/?/[?]/g' -e 's/\./\\./g')
exportfs -v | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -q \
"^${OCF_RESKEY_path}[\t ]*.*${OCF_RESKEY_target_regexp}"
+
rv=$?
+ if [ $rv -ne 0 ]; then
+ ocf_log err "nfsclient:$OCF_RESKEY_name is missing!"
+ fi
;;
recover)
--- cluster/rgmanager/src/resources/ocf-shellfuncs 2006/06/16 20:07:46 1.2.2.2
+++ cluster/rgmanager/src/resources/ocf-shellfuncs 2006/11/03 16:29:58 1.2.2.3
@@ -1,5 +1,5 @@
#
-# $Id: ocf-shellfuncs,v 1.2.2.2 2006/06/16 20:07:46 lhh Exp $
+# $Id: ocf-shellfuncs,v 1.2.2.3 2006/11/03 16:29:58 lhh Exp $
#
# Common helper functions for the OCF Resource Agents supplied by
# heartbeat.
--- cluster/rgmanager/src/resources/script.sh 2005/11/21 21:48:25 1.3.2.2.6.2
+++ cluster/rgmanager/src/resources/script.sh 2006/11/03 16:29:58 1.3.2.2.6.3
@@ -110,4 +110,10 @@
# Don't need to catch return codes; this one will work.
ocf_log info "Executing ${OCF_RESKEY_file} $1"
-exec /bin/sh ${OCF_RESKEY_file} $1
+${OCF_RESKEY_file} $1
+
+declare -i rv=$?
+if [ $rv -ne 0 ]; then
+ ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)"
+ return $OCF_ERR_GENERIC
+fi
--- cluster/rgmanager/src/utils/clustat.c 2006/06/16 20:07:47 1.5.2.3.6.8
+++ cluster/rgmanager/src/utils/clustat.c 2006/11/03 16:29:59 1.5.2.3.6.9
@@ -486,6 +486,11 @@
{
int x;
+ if (!membership) {
+ printf("Membership information not available\n");
+ return;
+ }
+
printf(" %-40.40s %s\n", "Member Name", "Status");
printf(" %-40.40s %s\n", "------ ----", "------");
@@ -504,8 +509,10 @@
{
int x;
- if (!membership)
+ if (!membership) {
+ printf(" <nodes/>\n");
return;
+ }
printf(" <nodes>\n");
for (x = 0; x < membership->cml_count; x++) {
@@ -608,6 +615,10 @@
/* Grab the local node ID and flag it from the list of reported
online nodes */
clu_local_nodeid(NULL, lid);
+
+ if (!all)
+ return NULL;
+
for (x=0; x<all->cml_count; x++) {
if (all->cml_members[x].cm_id == *lid) {
m = &all->cml_members[x];
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2006-11-03 16:30 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-09-07 18:39 [Cluster-devel] cluster/rgmanager ChangeLog errors.txt init.d/ lhh
-- strict thread matches above, loose matches on Subject: below --
2006-11-03 16:30 lhh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).