From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 27 Mar 2007 19:33:24 -0000 Subject: [Cluster-devel] cluster/rgmanager ChangeLog make/defines.mk.in ... Message-ID: <20070327193324.22260.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: lhh at sourceware.org 2007-03-27 20:33:21 Modified files: rgmanager : ChangeLog rgmanager/make : defines.mk.input rgmanager/src/clulib: Makefile clulog.c vft.c rgmanager/src/daemons: groups.c main.c nodeevent.c resrules.c rg_locks.c rg_thread.c rgmanager/src/utils: clulog.c Added files: rgmanager/src/clulib: wrap_lock.c Log message: Merge patch from Crosswalk team Team: Leonard Maiorani, Scott Cannata, Henry Harris * Always check malloc() return codes * Fix errant clu_unlock() calls in vft.c in cases where clu_lock() failed * Add ability to wrap pthread_mutex / pthread_rwlock calls for better stability * Fix improper pthread_mutex_destroy() semantics Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.31&r2=1.32 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/make/defines.mk.input.diff?cvsroot=cluster&r1=1.6&r2=1.7 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/wrap_lock.c.diff?cvsroot=cluster&r1=NONE&r2=1.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/Makefile.diff?cvsroot=cluster&r1=1.11&r2=1.12 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/clulog.c.diff?cvsroot=cluster&r1=1.5&r2=1.6 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&r1=1.17&r2=1.18 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.29&r2=1.30 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.35&r2=1.36 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&r1=1.5&r2=1.6 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&r1=1.19&r2=1.20 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_locks.c.diff?cvsroot=cluster&r1=1.8&r2=1.9 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&r1=1.18&r2=1.19 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clulog.c.diff?cvsroot=cluster&r1=1.3&r2=1.4 --- cluster/rgmanager/ChangeLog 2006/10/25 15:39:41 1.31 +++ cluster/rgmanager/ChangeLog 2007/03/27 19:33:19 1.32 @@ -1,3 +1,11 @@ +2007-03-27 Lon Hohberger + * Merge patch from Crosswalk development team: + * Scott Cannata + * Henry Harris + * Leonard Maiorani + +===== + 2006-10-25 Lon Hohberger * src/resources/clusterfs.sh: Fix unmounting problem (#212074) --- cluster/rgmanager/make/defines.mk.input 2007/03/22 23:24:58 1.6 +++ cluster/rgmanager/make/defines.mk.input 2007/03/27 19:33:19 1.7 @@ -32,3 +32,11 @@ # Default CFLAGS CFLAGS += -DSHAREDIR=\"@SHAREDIR@\" -Wall -Werror -Wstrict-prototypes -Wshadow ${INCLUDE} + +# +# Wrappers around pthread_mutex / pthread_rwlock calls for deadlock +# detection (and other things) +# +#CFLAGS += -DSHAREDIR=\"@SHAREDIR@\" -Wall -Werror -Wstrict-prototypes -Wshadow ${INCLUDE} -DWRAP_LOCKS +#LDFLAGS += -Wl,-wrap,pthread_mutex_lock,-wrap,pthread_mutex_unlock,-wrap,pthread_rwlock_rdlock,-wrap,pthread_rwlock_wrlock,-wrap,pthread_rwlock_unlock + /cvs/cluster/cluster/rgmanager/src/clulib/wrap_lock.c,v --> standard output revision 1.1 --- cluster/rgmanager/src/clulib/wrap_lock.c +++ - 2007-03-27 20:33:21.897905000 +0100 @@ -0,0 +1,224 @@ +/* + Copyright Red Hat, Inc. 2007 + Copyright Crosswalk 2006-2007 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + MA 02139, USA. +*/ +#ifdef WRAP_LOCKS +#include +#include +#include +#include +#include +#include +#include +#include + +int __real_pthread_mutex_lock(pthread_mutex_t *lock); +int +__wrap_pthread_mutex_lock(pthread_mutex_t *lock) +{ + int status; + struct timespec delay; + + while (1) { + status = __real_pthread_mutex_lock(lock); + + switch(status) { + case EDEADLK: + /* Already own it: Note the error, but continue */ + fprintf(stderr, "[%d] %s(%p): %s; continuing\n", + gettid(), + __FUNCTION__, lock, strerror(status)); + /* deliberate fallthrough */ + case 0: + return 0; + case EBUSY: + /* Try again */ + break; + default: + /* Other return codes */ + fprintf(stderr, "[%d] %s(%p): %s\n", gettid(), + __FUNCTION__, lock, strerror(status)); + raise(SIGSEGV); + /* EINVAL? */ + return 0; + } + + delay.tv_sec = 0; + delay.tv_nsec = 100000; + nanosleep(&delay, NULL); + } + + /* Not reached */ + return 0; +} + + +int __real_pthread_mutex_unlock(pthread_mutex_t *lock); +int +__wrap_pthread_mutex_unlock(pthread_mutex_t *lock) +{ + int status; + struct timespec delay; + + while (1) { + status = __real_pthread_mutex_unlock(lock); + + switch(status) { + case EPERM: + /* Don't own it: Note the error, but continue */ + fprintf(stderr, "[%d] %s(%p): %s; continuing\n", + gettid(), + __FUNCTION__, lock, strerror(status)); + /* deliberate fallthrough */ + case 0: + return 0; + default: + fprintf(stderr, "[%d] %s(%p): %s\n", gettid(), + __FUNCTION__, lock, strerror(status)); + raise(SIGSEGV); + return 0; + } + + delay.tv_sec = 0; + delay.tv_nsec = 100000; + nanosleep(&delay, NULL); + } + + /* Not reached */ + return 0; +} + + +int __real_pthread_rwlock_rdlock(pthread_rwlock_t *lock); +int +__wrap_pthread_rwlock_rdlock(pthread_rwlock_t *lock) +{ + int status; + struct timespec delay; + + while (1) { + status = __real_pthread_rwlock_rdlock(lock); + + switch(status) { + case EDEADLK: + /* Already own it: Note the error, but continue */ + fprintf(stderr, "[%d] %s(%p): %s; continuing\n", + gettid(), + __FUNCTION__, lock, strerror(status)); + /* deliberate fallthrough */ + case 0: + return 0; + case EBUSY: + /* Try again */ + break; + default: + /* Other return codes */ + fprintf(stderr, "[%d] %s(%p): %s\n", gettid(), + __FUNCTION__, lock, strerror(status)); + raise(SIGSEGV); + /* EINVAL? */ + return 0; + } + + delay.tv_sec = 0; + delay.tv_nsec = 100000; + nanosleep(&delay, NULL); + } + + /* Not reached */ + return 0; +} + + +int __real_pthread_rwlock_wrlock(pthread_rwlock_t *lock); +int +__wrap_pthread_rwlock_wrlock(pthread_rwlock_t *lock) +{ + int status; + struct timespec delay; + + while (1) { + status = __real_pthread_rwlock_wrlock(lock); + + switch(status) { + case EDEADLK: + /* Already own it: Note the error, but continue */ + fprintf(stderr, "[%d] %s(%p): %s; continuing\n", + gettid(), + __FUNCTION__, lock, strerror(status)); + /* deliberate fallthrough */ + case 0: + return 0; + case EBUSY: + /* Try again */ + break; + default: + /* Other return codes */ + fprintf(stderr, "[%d] %s(%p): %s\n", gettid(), + __FUNCTION__, lock, strerror(status)); + raise(SIGSEGV); + /* EINVAL? */ + return 0; + } + + delay.tv_sec = 0; + delay.tv_nsec = 100000; + nanosleep(&delay, NULL); + } + + /* Not reached */ + return 0; +} + + +int __real_pthread_rwlock_unlock(pthread_rwlock_t *lock); +int +__wrap_pthread_rwlock_unlock(pthread_rwlock_t *lock) +{ + int status; + struct timespec delay; + + while (1) { + status = __real_pthread_rwlock_unlock(lock); + + switch(status) { + case EPERM: + /* Don't own it: Note the error, but continue */ + fprintf(stderr, "[%d] %s(%p): %s; continuing\n", + gettid(), + __FUNCTION__, lock, strerror(status)); + /* deliberate fallthrough */ + case 0: + return 0; + default: + fprintf(stderr, "[%d] %s(%p): %s\n", gettid(), + __FUNCTION__, lock, strerror(status)); + raise(SIGSEGV); + return 0; + } + + delay.tv_sec = 0; + delay.tv_nsec = 100000; + nanosleep(&delay, NULL); + } + + /* Not reached */ + return 0; +} +#endif + --- cluster/rgmanager/src/clulib/Makefile 2007/03/20 17:09:56 1.11 +++ cluster/rgmanager/src/clulib/Makefile 2007/03/27 19:33:20 1.12 @@ -33,7 +33,8 @@ libclulib.a: clulog.o daemon_init.o signals.o msgsimple.o \ gettid.o rg_strings.o message.o members.o fdops.o \ - lock.o cman.o vft.o msg_cluster.o msg_socket.o + lock.o cman.o vft.o msg_cluster.o msg_socket.o \ + wrap_lock.o ${AR} cru $@ $^ ranlib $@ --- cluster/rgmanager/src/clulib/clulog.c 2006/06/02 17:37:10 1.5 +++ cluster/rgmanager/src/clulib/clulog.c 2007/03/27 19:33:20 1.6 @@ -20,7 +20,7 @@ /** @file * Library routines for communicating with the logging daemon. * - * $Id: clulog.c,v 1.5 2006/06/02 17:37:10 lhh Exp $ + * $Id: clulog.c,v 1.6 2007/03/27 19:33:20 lhh Exp $ * * Author: Jeff Moyer */ @@ -50,7 +50,7 @@ #include -static const char *version __attribute__ ((unused)) = "$Revision: 1.5 $"; +static const char *version __attribute__ ((unused)) = "$Revision: 1.6 $"; #ifdef DEBUG #include @@ -70,7 +70,12 @@ static int syslog_facility = LOG_DAEMON; static char *daemon_name = NULL; static pid_t daemon_pid = -1; + +#ifdef WRAP_LOCKS +static pthread_mutex_t log_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; +#else static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif CODE logger_prioritynames[] = { {"emerg", LOG_EMERG}, --- cluster/rgmanager/src/clulib/vft.c 2006/10/23 22:47:00 1.17 +++ cluster/rgmanager/src/clulib/vft.c 2007/03/27 19:33:20 1.18 @@ -50,8 +50,13 @@ * TODO: We could make it thread safe, but this might be unnecessary work * Solution: Super-coarse-grained-bad-code-locking! */ +#ifdef WRAP_LOCKS +static pthread_mutex_t key_list_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; +static pthread_mutex_t vf_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; +#else static pthread_mutex_t key_list_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t vf_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif /* WRAP_LOCKS */ static pthread_t vf_thread = (pthread_t)-1; static int vf_thread_ready = 0; static vf_vote_cb_t default_vote_cb = NULL; @@ -788,6 +793,15 @@ free(key_node->kn_data); key_node->kn_datalen = vnp->vn_datalen; key_node->kn_data = malloc(vnp->vn_datalen); + + /* + * Need to check return of malloc always + */ + if (key_node->kn_data == NULL) { + fprintf (stderr, "malloc fail err=%d\n", errno); + return -1; + } + memcpy(key_node->kn_data, vnp->vn_data, vnp->vn_datalen); free(vnp); @@ -1013,6 +1027,13 @@ } newnode = malloc(sizeof(*newnode)); + + if (newnode == NULL) { + fprintf(stderr, "malloc fail3 err=%d\n", errno); + pthread_mutex_unlock(&key_list_mutex); + return -1; + } + newnode->kn_data = NULL; memset(newnode,0,sizeof(*newnode)); newnode->kn_keyid = strdup(keyid); @@ -1136,7 +1157,6 @@ snprintf(lock_name, sizeof(lock_name), "usrm::vf"); l = clu_lock(LKM_EXMODE, &lockp, 0, lock_name); if (l < 0) { - clu_unlock(&lockp); pthread_mutex_unlock(&vf_mutex); return l; } @@ -1153,7 +1173,7 @@ } #ifdef DEBUG - printf("aight, need responses from %d guys\n", remain); + printf("Allright, need responses from %d members\n", remain); #endif pthread_mutex_lock(&key_list_mutex); @@ -1195,6 +1215,8 @@ */ if (msg_open(MSG_CLUSTER, 0, _port, &everyone, 0) < 0) { printf("msg_open: fail: %s\n", strerror(errno)); + clu_unlock(&lockp); + pthread_mutex_unlock(&vf_mutex); return -1; } @@ -1432,9 +1454,7 @@ snprintf(lock_name, sizeof(lock_name), "usrm::vf"); l = clu_lock(LKM_EXMODE, &lockp, 0, lock_name); if (l < 0) { - clu_unlock(&lockp); pthread_mutex_unlock(&vf_mutex); - printf("Couldn't lock %s\n", keyid); return l; } --- cluster/rgmanager/src/daemons/groups.c 2007/03/20 17:09:57 1.29 +++ cluster/rgmanager/src/daemons/groups.c 2007/03/27 19:33:20 1.30 @@ -42,8 +42,13 @@ static resource_node_t *_tree = NULL; static fod_t *_domains = NULL; +#ifdef WRAP_LOCKS +pthread_mutex_t config_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; +pthread_mutex_t status_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; +#else pthread_mutex_t config_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t status_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif pthread_rwlock_t resource_lock = PTHREAD_RWLOCK_INITIALIZER; void res_build_name(char *, size_t, resource_t *); --- cluster/rgmanager/src/daemons/main.c 2007/03/20 17:09:57 1.35 +++ cluster/rgmanager/src/daemons/main.c 2007/03/27 19:33:20 1.36 @@ -403,7 +403,8 @@ /* Peek-a-boo */ sz = msg_receive(ctx, msg_hdr, sizeof(msgbuf), 1); if (sz < sizeof (generic_msg_hdr)) { - clulog(LOG_ERR, "#37: Error receiving message header (%d)\n", sz); + clulog(LOG_ERR, + "#37: Error receiving message header (%d)\n", sz); goto out; } --- cluster/rgmanager/src/daemons/nodeevent.c 2007/03/20 17:09:57 1.5 +++ cluster/rgmanager/src/daemons/nodeevent.c 2007/03/27 19:33:20 1.6 @@ -35,8 +35,12 @@ /** * Node event queue. */ -static nevent_t *event_queue = NULL; +#ifdef WRAP_LOCKS +static pthread_mutex_t ne_queue_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; +#else static pthread_mutex_t ne_queue_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif +static nevent_t *event_queue = NULL; static pthread_t ne_thread = 0; int ne_queue_request(int local, int nodeid, int state); --- cluster/rgmanager/src/daemons/resrules.c 2007/03/22 23:46:58 1.19 +++ cluster/rgmanager/src/daemons/resrules.c 2007/03/27 19:33:20 1.20 @@ -815,6 +815,10 @@ n = read(fd, buf, sizeof(buf)); if (n < 0) { + + if (errno == EINTR) + continue; + if (*file) free(*file); return -1; --- cluster/rgmanager/src/daemons/rg_locks.c 2006/12/18 21:55:27 1.8 +++ cluster/rgmanager/src/daemons/rg_locks.c 2007/03/27 19:33:20 1.9 @@ -36,11 +36,17 @@ static int _rg_statuscnt = 0; static int _rg_statusmax = 5; /* XXX */ -static pthread_mutex_t locks_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t unlock_cond = PTHREAD_COND_INITIALIZER; static pthread_cond_t zero_cond = PTHREAD_COND_INITIALIZER; static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER; + +#ifdef WRAP_LOCKS +static pthread_mutex_t locks_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; +static pthread_mutex_t _ccs_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; +#else +static pthread_mutex_t locks_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t _ccs_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif #ifdef NO_CCS static xmlDocPtr ccs_doc = NULL; @@ -317,4 +323,3 @@ pthread_mutex_unlock(&locks_mutex); return 0; } - --- cluster/rgmanager/src/daemons/rg_thread.c 2007/03/10 00:20:54 1.18 +++ cluster/rgmanager/src/daemons/rg_thread.c 2007/03/27 19:33:20 1.19 @@ -42,8 +42,12 @@ * Resource thread queue head. */ static resthread_t *resthread_list = NULL; -static pthread_mutex_t reslist_mutex = PTHREAD_MUTEX_INITIALIZER; +#ifdef WRAP_LOCKS +static pthread_mutex_t reslist_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; +#else +static pthread_mutex_t reslist_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif static resthread_t *find_resthread_byname(const char *resgroupname); static int spawn_if_needed(const char *resgroupname); @@ -163,7 +167,7 @@ char myname[256]; resthread_t *myself; request_t *req; - int ret = RG_EFAIL, error = 0; + int ret = RG_EFAIL, error = 0, mystatus; rg_inc_threads(); @@ -453,7 +457,22 @@ raise(SIGSEGV); } - pthread_mutex_destroy(&my_queue_mutex); + mystatus = pthread_mutex_destroy(&my_queue_mutex); + if (mystatus != 0) + { + if (mystatus == EBUSY) { + pthread_mutex_unlock(&my_queue_mutex); + } + + mystatus = pthread_mutex_destroy(&my_queue_mutex); + if (mystatus != 0) { + fprintf (stderr, "mutex_destroy=%d err=%d %p\n", + mystatus, errno, &my_queue_mutex); + + fflush (stderr); + } + } + list_remove(&resthread_list, myself); free(myself); --- cluster/rgmanager/src/utils/clulog.c 2006/08/18 20:33:24 1.3 +++ cluster/rgmanager/src/utils/clulog.c 2007/03/27 19:33:20 1.4 @@ -122,6 +122,12 @@ /* Add two bytes for linefeed and NULL terminator */ len = strlen(argv[argc-1]) + 2; logmsg = (char*)malloc(strlen(argv[argc-1])+2); + if (logmsg == NULL) { + fprintf(stderr, + "clulog: malloc fail err=%d\n", errno); + exit(0); + } + snprintf(logmsg, len, "%s\n", argv[argc-1]); if (!cmdline_loglevel) {