From mboxrd@z Thu Jan 1 00:00:00 1970 From: teigland@sourceware.org Date: 23 Oct 2006 15:44:34 -0000 Subject: [Cluster-devel] cluster/group/gfs_controld lock_dlm.h recover.c Message-ID: <20061023154434.14039.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: teigland at sourceware.org 2006-10-23 15:44:34 Modified files: group/gfs_controld: lock_dlm.h recover.c Log message: Patch from Abhi to fix case where a node's mount is rejected by other group members causing gfs_controld on the mounter to leave the group immediately. It was sometimes leaving before its join was even finished which caused groupd to reject the leave, so we need to wait for the join to complete before doing the leave. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/lock_dlm.h.diff?cvsroot=cluster&r1=1.20&r2=1.21 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/recover.c.diff?cvsroot=cluster&r1=1.22&r2=1.23 --- cluster/group/gfs_controld/lock_dlm.h 2006/10/16 14:44:02 1.20 +++ cluster/group/gfs_controld/lock_dlm.h 2006/10/23 15:44:33 1.21 @@ -138,6 +138,7 @@ int mount_client_fd; int mount_client_notified; int mount_client_delay; + int group_leave_on_finish; int remount_client; int init; int got_our_options; --- cluster/group/gfs_controld/recover.c 2006/10/16 17:12:10 1.22 +++ cluster/group/gfs_controld/recover.c 2006/10/23 15:44:33 1.23 @@ -1933,6 +1933,9 @@ { char buf[MAXLINE]; int rv, error = 0; + struct mg_member *memb; + + memb = find_memb_nodeid(mg, our_nodeid); memset(buf, 0, MAXLINE); @@ -1963,9 +1966,15 @@ if (error) { log_group(mg, "leaving due to mount error: %s", mg->error_msg); - group_leave(gh, mg->name); - } else + if (memb->finished) + group_leave(gh, mg->name); + else { + log_group(mg, "delay leave until after join"); + mg->group_leave_on_finish = 1; + } + } else { mg->mount_client_notified = 1; + } } void ping_kernel_mount(char *table) @@ -2192,6 +2201,13 @@ list_for_each_entry(memb, &mg->members, list) memb->finished = 1; + if (mg->group_leave_on_finish) { + log_group(mg, "leaving group after delay for join to finish"); + group_leave(gh, mg->name); + mg->group_leave_on_finish = 0; + return 0; + } + if (mg->needs_recovery) { log_group(mg, "finish: leave locks blocked for needs_recovery"); leave_blocked = 1;