From mboxrd@z Thu Jan 1 00:00:00 1970 From: teigland@sourceware.org Date: 16 Oct 2006 14:44:03 -0000 Subject: [Cluster-devel] cluster/group/gfs_controld lock_dlm.h recover.c Message-ID: <20061016144403.24169.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: teigland at sourceware.org 2006-10-16 14:44:02 Modified files: group/gfs_controld: lock_dlm.h recover.c Log message: A node that was just added would incorrectly conclude that the node after it needed to do first mounter recovery. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/lock_dlm.h.diff?cvsroot=cluster&r1=1.19&r2=1.20 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/recover.c.diff?cvsroot=cluster&r1=1.19&r2=1.20 --- cluster/group/gfs_controld/lock_dlm.h 2006/10/13 20:00:02 1.19 +++ cluster/group/gfs_controld/lock_dlm.h 2006/10/16 14:44:02 1.20 @@ -149,6 +149,7 @@ int first_mount_pending_stop; int first_mounter; int first_mounter_done; + int global_first_recover_done; int emulate_first_mounter; int wait_first_done; int low_nodeid; --- cluster/group/gfs_controld/recover.c 2006/10/13 20:00:02 1.19 +++ cluster/group/gfs_controld/recover.c 2006/10/16 14:44:02 1.20 @@ -822,9 +822,22 @@ goto out; } + /* when we received our journals, no one was flagged with OPT_RECOVER + which means no first mounter recovery is needed or is current */ + + if (mg->global_first_recover_done) { + log_group(mg, "assign_journal: global_firsts_recover_done"); + goto out; + } + /* no one has done kernel mount successfully and no one is doing first mounter recovery, the new node gets to try first mounter recovery */ + log_group(mg, "kernel_mount_done %d kernel_mount_error %d " + "first_mounter %d first_mounter_done %d", + mg->kernel_mount_done, mg->kernel_mount_error, + mg->first_mounter, mg->first_mounter_done); + log_group(mg, "assign_journal: memb %d gets OPT_RECOVER", new->nodeid); new->opts |= MEMB_OPT_RECOVER; @@ -1007,6 +1020,7 @@ struct mg_member *memb, *memb2; struct gdlm_header *hd; int *ids, count, i, nodeid, jid, opts; + int current_first_recover = 0; hd = (struct gdlm_header *)buf; @@ -1048,8 +1062,16 @@ else if (opts & MEMB_OPT_SPECT) memb->spectator = 1; } + + if (opts & MEMB_OPT_RECOVER) + current_first_recover = 1; } + /* FIXME: use global_first_recover_done more widely instead of + as a single special case */ + if (!current_first_recover) + mg->global_first_recover_done = 1; + process_saved_mount_status(mg); /* we delay processing any options messages from new mounters