From mboxrd@z Thu Jan 1 00:00:00 1970 From: teigland@sourceware.org Date: 20 Jul 2006 20:19:44 -0000 Subject: [Cluster-devel] cluster/group/gfs_controld lock_dlm.h main.c r ... Message-ID: <20060720201944.15164.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: teigland at sourceware.org 2006-07-20 20:19:44 Modified files: group/gfs_controld: lock_dlm.h main.c recover.c Log message: if mount.gfs is unmounting/leaving the group because the kernel mount failed, then don't wait for the kernel mount to complete before doing the leave Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/lock_dlm.h.diff?cvsroot=cluster&r1=1.6&r2=1.7 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/main.c.diff?cvsroot=cluster&r1=1.5&r2=1.6 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/recover.c.diff?cvsroot=cluster&r1=1.3&r2=1.4 --- cluster/group/gfs_controld/lock_dlm.h 2006/07/19 14:44:40 1.6 +++ cluster/group/gfs_controld/lock_dlm.h 2006/07/20 20:19:44 1.7 @@ -133,6 +133,7 @@ int mount_client_notified; int mount_client_delay; int delay_send_journals; + int kernel_mount_error; int got_kernel_mount; int first_mounter; int first_mounter_done; @@ -235,7 +236,7 @@ int do_mount(int ci, char *dir, char *type, char *proto, char *table, char *options); -int do_unmount(int ci, char *dir); +int do_unmount(int ci, char *dir, int mnterr); int do_remount(int ci, char *dir, char *mode); int do_withdraw(char *name); int kernel_recovery_done(char *name); --- cluster/group/gfs_controld/main.c 2006/07/14 18:56:10 1.5 +++ cluster/group/gfs_controld/main.c 2006/07/20 20:19:44 1.6 @@ -178,7 +178,7 @@ if (!strcmp(cmd, "join")) rv = do_mount(ci, dir, type, proto, table, extra); else if (!strcmp(cmd, "leave")) - rv = do_unmount(ci, dir); + rv = do_unmount(ci, dir, atoi(proto)); else if (!strcmp(cmd, "remount")) rv = do_remount(ci, dir, argv[3]); else if (!strcmp(cmd, "dump")) { --- cluster/group/gfs_controld/recover.c 2006/06/15 20:41:46 1.3 +++ cluster/group/gfs_controld/recover.c 2006/07/20 20:19:44 1.4 @@ -1447,7 +1447,7 @@ return 0; } -int do_unmount(int ci, char *dir) +int do_unmount(int ci, char *dir, int mnterr) { struct mountgroup *mg; @@ -1466,11 +1466,17 @@ return -1; } + if (mnterr) { + log_group(mg, "do_unmount: kernel mount error %d", mnterr); + mg->kernel_mount_error = mnterr; + goto out; + } + if (mg->withdraw) { log_error("do_unmount: fs on %s is withdrawing", dir); return -1; } - + /* Check to see if we're waiting for a kernel recovery_done to do a start_done(). If so, call the start_done() here because we won't be getting anything else from gfs-kernel which is now gone. */ @@ -1479,7 +1485,7 @@ log_group(mg, "do_unmount: fill in start_done"); start_done(mg); } - + out: group_leave(gh, mg->name); return 0; } @@ -1600,9 +1606,22 @@ if (mg->got_kernel_mount) break; - if (mg->mount_client_notified) - wait_for_kernel_mount(mg); - else { + if (mg->mount_client_notified) { + + /* this kernel_mount_error check isn't perfect, we + could still 1) notify mount.gfs, 2) get a stop cb, + 3) kernel mount fails, 4) mount.gfs sends a leave + with mnterr, 5) we don't recv it and don't set + kernel_mount_error because we're stuck in + wait_for_kernel_mount() from do_stop */ + + if (!mg->kernel_mount_error) + wait_for_kernel_mount(mg); + else { + log_group(mg, "ignore stop, failed mount"); + break; + } + } else { mg->mount_client_delay = 1; break; }