From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 18 Aug 2006 15:26:25 -0000 Subject: [Cluster-devel] cluster/rgmanager ChangeLog include/resgroup.h ... Message-ID: <20060818152625.24927.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: lhh at sourceware.org 2006-08-18 15:26:23 Modified files: rgmanager : ChangeLog rgmanager/include: resgroup.h rgmanager/src/clulib: ckpt_state.c rgmanager/src/daemons: groups.c main.c rg_state.c rgmanager/src/resources: clusterfs.sh fs.sh nfsclient.sh ra-api-1-modified.dtd script.sh Log message: 2006-08-18 Lon Hohberger * include/resgroup.h: Change ordering and add magic field to rgmanager state field (warning: breaks compatibility from 08/08 CVS!) * src/clulib/ckpt_state.c, src/daemons/rg_state.c: Fix bug preventing correct operation of ckpt operation after initial boot. Get rid of debug info. * src/daemons/groups,c, main.c: Fix #202499 - shutdown while handling transitions sometimes allows services to restart (due to not locking RGs locally) * src/resources/clusterfs.sh, fs.sh, nfsclient.sh: Add proper warning messages if status check fails * src/resources/ra-api-1-modified.dtd: Allow 'migrate' option Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.19&r2=1.20 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&r1=1.12&r2=1.13 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/ckpt_state.c.diff?cvsroot=cluster&r1=1.1&r2=1.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.20&r2=1.21 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.28&r2=1.29 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.19&r2=1.20 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/clusterfs.sh.diff?cvsroot=cluster&r1=1.10&r2=1.11 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/fs.sh.diff?cvsroot=cluster&r1=1.16&r2=1.17 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsclient.sh.diff?cvsroot=cluster&r1=1.12&r2=1.13 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ra-api-1-modified.dtd.diff?cvsroot=cluster&r1=1.3&r2=1.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&r1=1.7&r2=1.8 --- cluster/rgmanager/ChangeLog 2006/08/09 21:48:34 1.19 +++ cluster/rgmanager/ChangeLog 2006/08/18 15:26:21 1.20 @@ -1,3 +1,16 @@ +2006-08-18 Lon Hohberger + * include/resgroup.h: Change ordering and add magic field to + rgmanager state field (warning: breaks compatibility from 08/08 CVS!) + * src/clulib/ckpt_state.c, src/daemons/rg_state.c: Fix bug + preventing correct operation of ckpt operation after initial boot. + Get rid of debug info. + * src/daemons/groups,c, main.c: Fix #202499 - shutdown while handling + transitions sometimes allows services to restart (due to not locking + RGs locally) + * src/resources/clusterfs.sh, fs.sh, nfsclient.sh: Add proper + warning messages if status check fails + * src/resources/ra-api-1-modified.dtd: Allow 'migrate' option + 2006-08-08 Lon Hohberger * src/clulib/members.c: Fix gained/lost list creation so that the count is actually nonzero (#201713) --- cluster/rgmanager/include/resgroup.h 2006/07/19 18:43:32 1.12 +++ cluster/rgmanager/include/resgroup.h 2006/08/18 15:26:22 1.13 @@ -27,31 +27,30 @@ */ typedef struct { char rs_name[64]; /**< Service name */ + uint32_t rs_id; /**< Service ID */ + uint32_t rs_magic; /**< Magic ID */ uint32_t rs_owner; /**< Member ID running service. */ uint32_t rs_last_owner; /**< Last member to run the service. */ uint32_t rs_state; /**< State of service. */ uint32_t rs_restarts; /**< Number of cluster-induced restarts */ uint64_t rs_transition; /**< Last service transition time */ - uint32_t rs_id; /**< Service ID */ - uint32_t rs_pad; /**< pad to 64-bit boundary */ } rg_state_t; #define swab_rg_state_t(ptr) \ {\ + swab32((ptr)->rs_id);\ + swab32((ptr)->rs_magic);\ swab32((ptr)->rs_owner);\ swab32((ptr)->rs_last_owner);\ swab32((ptr)->rs_state);\ swab32((ptr)->rs_restarts);\ swab64((ptr)->rs_transition);\ - swab32((ptr)->rs_pad);\ } #define RG_PORT 177 -#define RG_VF_PORT 178 -#define RG_PURPOSE 0x11398fed -#define RG_SERVICE_GROUP "usrm::manager" +#define RG_MAGIC 0x11398fed #define RG_ACTION_REQUEST /* Message header */ 0x138582 #define RG_EVENT 0x138583 --- cluster/rgmanager/src/clulib/ckpt_state.c 2006/08/07 22:05:01 1.1 +++ cluster/rgmanager/src/clulib/ckpt_state.c 2006/08/18 15:26:22 1.2 @@ -75,8 +75,10 @@ { SaCkptCheckpointCreationAttributesT attrs; SaCkptCheckpointOpenFlagsT flags; +#if 0 SaCkptCheckpointDescriptorT status; - SaAisErrorT err; +#endif + SaAisErrorT err = SA_AIS_OK; key_node_t *newnode = NULL; newnode = kn_find_key(keyid); @@ -111,6 +113,7 @@ &newnode->kn_cph); if (err == SA_AIS_OK) { +#if 0 saCkptCheckpointStatusGet(newnode->kn_cph, &status); @@ -141,12 +144,10 @@ (int)status.checkpointCreationAttributes.maxSectionIdSize); printf("Section count = %d\n", status.numberOfSections); printf("\n"); - +#endif goto good; } - printf("Retrying w/ create\n"); - attrs.creationFlags = SA_CKPT_WR_ALL_REPLICAS; attrs.checkpointSize = (SaSizeT)maxsize; attrs.retentionDuration = SA_TIME_ONE_HOUR; @@ -175,7 +176,9 @@ newnode->kn_ready = 1; newnode->kn_next = key_list; key_list = newnode; +#if 0 printf("Opened ckpt %s\n", keyid); +#endif return err; } --- cluster/rgmanager/src/daemons/groups.c 2006/07/19 18:43:32 1.20 +++ cluster/rgmanager/src/daemons/groups.c 2006/08/18 15:26:22 1.21 @@ -418,7 +418,7 @@ int ret; if (rg_locked()) { - clulog(LOG_NOTICE, + clulog(LOG_DEBUG, "Resource groups locked; not evaluating\n"); return -EAGAIN; } --- cluster/rgmanager/src/daemons/main.c 2006/08/09 21:48:34 1.28 +++ cluster/rgmanager/src/daemons/main.c 2006/08/18 15:26:22 1.29 @@ -792,6 +792,7 @@ void * shutdown_thread(void *arg) { + rg_lockall(L_SYS); rg_doall(RG_STOP_EXITING, 1, NULL); running = 0; --- cluster/rgmanager/src/daemons/rg_state.c 2006/08/07 22:05:01 1.19 +++ cluster/rgmanager/src/daemons/rg_state.c 2006/08/18 15:26:22 1.20 @@ -306,11 +306,12 @@ if (errno == ENOENT) { ds_key_init(res, DS_MIN_SIZE, 10); } else { + perror("ds_read"); return -1; } } - if (datalen < 0) { + if (datalen <= 0) { ret = init_rg(name, svcblk); if (ret < 0) { @@ -326,6 +327,7 @@ } memcpy(svcblk, data, sizeof(*svcblk)); + return 0; #else membership = member_list(); --- cluster/rgmanager/src/resources/clusterfs.sh 2006/06/02 17:37:10 1.10 +++ cluster/rgmanager/src/resources/clusterfs.sh 2006/08/18 15:26:22 1.11 @@ -889,12 +889,16 @@ ;; status|monitor) isMounted ${OCF_RESKEY_device} ${OCF_RESKEY_mountpoint} - [ $? -ne $YES ] && exit $OCF_ERR_GENERIC + if [ $? -ne $YES ]; then + ocf_log err "fs:${OCF_RESKEY_name}: ${OCF_RESKEY_device} is not mounted on ${OCF_RESKEY_mountpoint}" + exit $OCF_ERR_GENERIC + fi isAlive ${OCF_RESKEY_mountpoint} - [ $? -ne $YES ] && exit $OCF_ERR_GENERIC - - exit 0 + [ $? -eq $YES ] && exit 0 + + ocf_log err "fs:${OCF_RESKEY_name}: Mount point is not accessible!" + exit $OCF_ERR_GENERIC ;; restart) stopFilesystem --- cluster/rgmanager/src/resources/fs.sh 2006/06/02 17:37:10 1.16 +++ cluster/rgmanager/src/resources/fs.sh 2006/08/18 15:26:22 1.17 @@ -243,7 +243,7 @@ { if [ -z "$OCF_RESKEY_mountpoint" ]; then ocf_log err "No mount point specified." - return 1 + return $OCF_ERR_ARGS fi if ! [ -e "$OCF_RESKEY_mountpoint" ]; then @@ -514,7 +514,7 @@ dev=$(real_device $1) if [ -z "$dev" ]; then ocf_log err \ - "isMounted: Could not match $1 with a real device" + "fs (isMounted): Could not match $1 with a real device" return $FAIL fi mp=$2 @@ -553,14 +553,14 @@ declare rw if [ $# -ne 1 ]; then - logAndPrint $LOG_ERR "Usage: isAlive mount_point" + ocf_log err "Usage: isAlive mount_point" return $FAIL fi mount_point=$1 test -d $mount_point if [ $? -ne 0 ]; then - logAndPrint $LOG_ERR "$mount_point is not a directory" + ocf_log err "fs (isAlive): $mount_point is not a directory" return $FAIL fi @@ -707,6 +707,7 @@ return $ret } + activeMonitor() { declare monpath=$OCF_RESKEY_mountpoint/.clumanager declare p @@ -733,7 +734,7 @@ case $1 in start) ocf_log info "Starting active monitoring of $OCF_RESKEY_mountpoint" - mkdir -p $(dirname $monpath) || return 1 + mkdir -p $(dirname $monpath) || return $OCF_ERR_GENERIC devmon $args -p $monpath/devmon.data -P $monpath/devmon.pid ;; stop) @@ -794,7 +795,7 @@ if [ -z "`which quotaon`" ]; then ocf_log err "quotaon not found in $PATH" - return 1 + return $OCF_ERR_GENERIC fi for mopt in `echo $opts | sed -e s/,/\ /g`; do @@ -1211,29 +1212,35 @@ ;; status|monitor) isMounted ${OCF_RESKEY_device} ${OCF_RESKEY_mountpoint} - [ $? -ne $YES ] && exit $OCF_ERR_GENERIC + if [ $? -ne $YES ]; then + ocf_log err "fs:${OCF_RESKEY_name}: ${OCF_RESKEY_device} is not mounted on ${OCF_RESKEY_mountpoint}" + exit $OCF_ERR_GENERIC + fi if [ "$OCF_RESKEY_active_monitor" = "yes" ] || [ "$OCF_RESKEY_active_monitor" = "1" ]; then - activeMonitor status || exit $OCF_ERR_GENERIC - exit 0 + activeMonitor status + [ $? -eq 0 ] && exit 0 + ocf_log err "fs:${OCF_RESKEY_name}: Active Monitoring reported a failure" + exit $OCF_ERR_GENERIC fi isAlive ${OCF_RESKEY_mountpoint} - [ $? -ne $YES ] && exit $OCF_ERR_GENERIC - - exit 0 + [ $? -eq $YES ] && exit 0 + + ocf_log err "fs:${OCF_RESKEY_name}: Mount point is not accessible!" + exit $OCF_ERR_GENERIC ;; restart) stopFilesystem if [ $? -ne 0 ]; then - exit 1 + exit $OCF_ERR_GENERIC fi startFilesystem if [ $? -ne 0 ]; then - exit 1 + exit $OCF_ERR_GENERIC fi exit 0 --- cluster/rgmanager/src/resources/nfsclient.sh 2006/08/02 17:24:31 1.12 +++ cluster/rgmanager/src/resources/nfsclient.sh 2006/08/18 15:26:22 1.13 @@ -320,7 +320,11 @@ sed -e 's/*/[*]/g' -e 's/?/[?]/g' -e 's/\./\\./g') exportfs -v | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -q \ "^${OCF_RESKEY_path}[\t ]*.*${OCF_RESKEY_target_regexp}" + rv=$? + if [ $rv -ne 0 ]; then + ocf_log err "nfsclient:$OCF_RESKEY_name is missing!" + fi ;; recover) --- cluster/rgmanager/src/resources/ra-api-1-modified.dtd 2006/07/19 18:43:32 1.3 +++ cluster/rgmanager/src/resources/ra-api-1-modified.dtd 2006/08/18 15:26:22 1.4 @@ -25,7 +25,8 @@ primary (1|0) "0" required (1|0) "0" inherit CDATA "" - unique (1|0) "0"> + unique (1|0) "0" + reconfig (1|0) "0">