From mboxrd@z Thu Jan 1 00:00:00 1970 From: teigland@sourceware.org Date: 12 Jun 2007 20:04:42 -0000 Subject: [Cluster-devel] cluster/group/gfs_controld lock_dlm.h main.c Message-ID: <20070612200442.31365.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: teigland at sourceware.org 2007-06-12 20:04:41 Modified files: group/gfs_controld: lock_dlm.h main.c Log message: log an error message if we see mount.gfs killed before it's done Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/lock_dlm.h.diff?cvsroot=cluster&r1=1.27&r2=1.28 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/main.c.diff?cvsroot=cluster&r1=1.29&r2=1.30 --- cluster/group/gfs_controld/lock_dlm.h 2006/12/20 19:13:13 1.27 +++ cluster/group/gfs_controld/lock_dlm.h 2007/06/12 20:04:41 1.28 @@ -289,6 +289,7 @@ void got_mount_result(struct mountgroup *mg, int result, int ci, int another); int client_send(int ci, char *buf, int len); +int get_sysfs(struct mountgroup *mg, char *field, char *buf, int len); int send_group_message(struct mountgroup *mg, int len, char *buf); void update_flow_control_status(void); --- cluster/group/gfs_controld/main.c 2007/04/26 19:01:34 1.29 +++ cluster/group/gfs_controld/main.c 2007/06/12 20:04:41 1.30 @@ -39,6 +39,7 @@ extern struct list_head mounts; extern struct list_head withdrawn_mounts; +extern group_handle_t gh; int no_withdraw; int no_plock; uint32_t plock_rate_limit = DEFAULT_PLOCK_RATE_LIMIT; @@ -171,9 +172,66 @@ } } +/* I don't think we really want to try to do anything if mount.gfs is killed, + because I suspect there are various corner cases where we might not do the + right thing. Even without the corner cases things still don't work out + too nicely. Best to just tell people not to kill a mount or unmount + because doing so can leave things (kernel, group, mtab) in inconsistent + states that can't be straightened out properly without a reboot. */ + +static void mount_client_dead(struct mountgroup *mg, int ci) +{ + char buf[MAXLINE]; + int rv; + + if (ci != mg->mount_client) { + log_error("mount client mismatch %d %d", ci, mg->mount_client); + return; + } + + /* is checking sysfs really a reliable way of telling whether the + kernel has been mounted or not? might the kernel mount just not + have reached the sysfs registration yet? */ + + memset(buf, 0, sizeof(buf)); + + rv = get_sysfs(mg, "id", buf, sizeof(buf)); + if (!rv) { + log_error("mount_client_dead ci %d sysfs id %s", ci, buf); +#if 0 + /* finish the mount, although there will be no mtab entry + which will confuse umount causing it to do the kernel + umount but not call umount.gfs */ + got_mount_result(mg, 0, ci, client[ci].another_mount); +#endif + return; + } + + log_error("mount_client_dead ci %d no sysfs entry for fs", ci); + +#if 0 + mp = find_mountpoint_client(mg, ci); + if (mp) { + list_del(&mp->list); + free(mp); + } + group_leave(gh, mg->name); +#endif +} + static void client_dead(int ci) { + struct mountgroup *mg; + log_debug("client %d fd %d dead", ci, client[ci].fd); + + /* if the dead mount client is mount.gfs and we've not received + a mount result, then try to put things into a clean state */ + + mg = client[ci].mg; + if (mg && mg->mount_client && mg->mount_client_fd) + mount_client_dead(mg, ci); + close(client[ci].fd); client[ci].fd = -1; pollfd[ci].fd = -1;