From mboxrd@z Thu Jan  1 00:00:00 1970
From: teigland@sourceware.org <teigland@sourceware.org>
Date: 12 Jun 2007 20:04:42 -0000
Subject: [Cluster-devel] cluster/group/gfs_controld lock_dlm.h main.c
Message-ID: <20070612200442.31365.qmail@sourceware.org>
List-Id: <cluster-devel.redhat.com>
To: cluster-devel.redhat.com
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	teigland at sourceware.org	2007-06-12 20:04:41

Modified files:
	group/gfs_controld: lock_dlm.h main.c 

Log message:
	log an error message if we see mount.gfs killed before it's done

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/lock_dlm.h.diff?cvsroot=cluster&r1=1.27&r2=1.28
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/main.c.diff?cvsroot=cluster&r1=1.29&r2=1.30

--- cluster/group/gfs_controld/lock_dlm.h	2006/12/20 19:13:13	1.27
+++ cluster/group/gfs_controld/lock_dlm.h	2007/06/12 20:04:41	1.28
@@ -289,6 +289,7 @@
 void got_mount_result(struct mountgroup *mg, int result, int ci, int another);
 
 int client_send(int ci, char *buf, int len);
+int get_sysfs(struct mountgroup *mg, char *field, char *buf, int len);
 
 int send_group_message(struct mountgroup *mg, int len, char *buf);
 void update_flow_control_status(void);
--- cluster/group/gfs_controld/main.c	2007/04/26 19:01:34	1.29
+++ cluster/group/gfs_controld/main.c	2007/06/12 20:04:41	1.30
@@ -39,6 +39,7 @@
 
 extern struct list_head mounts;
 extern struct list_head withdrawn_mounts;
+extern group_handle_t gh;
 int no_withdraw;
 int no_plock;
 uint32_t plock_rate_limit = DEFAULT_PLOCK_RATE_LIMIT;
@@ -171,9 +172,66 @@
 	}
 }
 
+/* I don't think we really want to try to do anything if mount.gfs is killed,
+   because I suspect there are various corner cases where we might not do the
+   right thing.  Even without the corner cases things still don't work out
+   too nicely.  Best to just tell people not to kill a mount or unmount
+   because doing so can leave things (kernel, group, mtab) in inconsistent
+   states that can't be straightened out properly without a reboot. */
+
+static void mount_client_dead(struct mountgroup *mg, int ci)
+{
+	char buf[MAXLINE];
+	int rv;
+
+	if (ci != mg->mount_client) {
+		log_error("mount client mismatch %d %d", ci, mg->mount_client);
+		return;
+	}
+
+	/* is checking sysfs really a reliable way of telling whether the
+	   kernel has been mounted or not?  might the kernel mount just not
+	   have reached the sysfs registration yet? */
+
+	memset(buf, 0, sizeof(buf));
+
+	rv = get_sysfs(mg, "id", buf, sizeof(buf));
+	if (!rv) {
+		log_error("mount_client_dead ci %d sysfs id %s", ci, buf);
+#if 0
+		/* finish the mount, although there will be no mtab entry
+		   which will confuse umount causing it to do the kernel
+		   umount but not call umount.gfs */
+		got_mount_result(mg, 0, ci, client[ci].another_mount);
+#endif
+		return;
+	}
+
+	log_error("mount_client_dead ci %d no sysfs entry for fs", ci);
+
+#if 0
+	mp = find_mountpoint_client(mg, ci);
+	if (mp) {
+		list_del(&mp->list);
+		free(mp);
+	}
+	group_leave(gh, mg->name);
+#endif
+}
+
 static void client_dead(int ci)
 {
+	struct mountgroup *mg;
+
 	log_debug("client %d fd %d dead", ci, client[ci].fd);
+
+	/* if the dead mount client is mount.gfs and we've not received
+	   a mount result, then try to put things into a clean state */
+	   
+	mg = client[ci].mg;
+	if (mg && mg->mount_client && mg->mount_client_fd)
+		mount_client_dead(mg, ci);
+
 	close(client[ci].fd);
 	client[ci].fd = -1;
 	pollfd[ci].fd = -1;