Linux NFS development
 help / color / mirror / Atom feed
* [PATCH 22/22] [lockd] Add kernel statd
@ 2006-08-05 13:06 Olaf Kirch
  0 siblings, 0 replies; only message in thread
From: Olaf Kirch @ 2006-08-05 13:06 UTC (permalink / raw)
  To: nfs

From: Olaf Kirch <okir@suse.de>
Subject: [lockd] Add kernel statd

 This patch adds the kernel statd, and allows the switchable statd support
 to use this instead of the upcalls to user land statd.

Signed-off-by: Olaf Kirch <okir@suse.de>

 fs/buffer.c                    |    1 
 fs/lockd/Makefile              |    2 
 fs/lockd/statd.c               |  405 +++++++++++++++++++++++++++++++++++++++++
 fs/lockd/svc.c                 |   59 +++++
 include/linux/lockd/lockd.h    |    1 
 include/linux/lockd/sm_inter.h |    5 
 6 files changed, 469 insertions(+), 4 deletions(-)

Index: build/fs/lockd/svc.c
===================================================================
--- build.orig/fs/lockd/svc.c
+++ build/fs/lockd/svc.c
@@ -40,6 +40,7 @@
 #define ALLOWED_SIGS		(sigmask(SIGKILL))
 
 static struct svc_program	nlmsvc_program;
+extern struct svc_program	nsmsvc_program;
 
 struct nlmsvc_binding *		nlmsvc_ops;
 EXPORT_SYMBOL(nlmsvc_ops);
@@ -62,6 +63,7 @@ static unsigned long		nlm_timeout = LOCK
 static int			nlm_udpport, nlm_tcpport;
 int				nlm_max_hosts = 256;
 int				nsm_use_hostnames = 0;
+static int			nsm_use_kstatd = 0;
 
 /*
  * Constants needed for the sysctl interface.
@@ -119,8 +121,17 @@ lockd(struct svc_rqst *rqstp)
 
 	daemonize("lockd");
 
-	/* Initialize the statd upcalls to rpc.statd */
-	nsm_statd_upcalls_init();
+	/* See if we should use the kernel statd. If not,
+	 * or if setting up the kernel statd fails, try
+	 * falling back to user land upcalls.
+	 */
+	if (nsm_use_kstatd && nsm_kernel_statd_init() < 0)
+		nsm_use_kstatd = 0;
+
+	if (nsm_use_kstatd == 0) {
+		/* Initialize the statd upcalls to rpc.statd */
+		nsm_statd_upcalls_init();
+	}
 
 	/* Process request with signals blocked, but allow SIGKILL.  */
 	allow_signal(SIGKILL);
@@ -218,6 +229,7 @@ int
 lockd_up(void)
 {
 	static int		warned;
+	struct svc_program *	prog;
 	struct svc_serv *	serv;
 	int			error = 0;
 
@@ -241,8 +253,12 @@ lockd_up(void)
 		printk(KERN_WARNING
 			"lockd_up: no pid, %d users??\n", nlmsvc_users);
 
+	/* Register NLM program and possibly NSM (if using kstatd) */
 	error = -ENOMEM;
-	serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE);
+	prog = &nlmsvc_program;
+	if (nsm_use_kstatd)
+		prog = &nsmsvc_program;
+	serv = svc_create(prog, LOCKD_BUFSIZE);
 	if (!serv) {
 		printk(KERN_WARNING "lockd_up: create service failed\n");
 		goto out;
@@ -397,6 +413,15 @@ static ctl_table nlm_sysctls[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "nsm_use_kstatd",
+		.data		= &nsm_use_kstatd,
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= &proc_dointvec,
+	},
+
 
 	{ .ctl_name = 0 }
 };
@@ -466,6 +491,7 @@ module_param_call(nlm_udpport, param_set
 		  &nlm_udpport, 0644);
 module_param_call(nlm_tcpport, param_set_port, param_get_int,
 		  &nlm_tcpport, 0644);
+module_param(nsm_use_kstatd, int, 0444);
 
 /*
  * Initialising and terminating the module.
@@ -536,3 +562,30 @@ static struct svc_program	nlmsvc_program
 	.pg_stats		= &nlmsvc_stats,	/* stats table */
 	.pg_authenticate = &lockd_authenticate	/* export authentication */
 };
+
+/*
+ * Define NSM program and procedures
+ */
+static struct svc_version	nsmsvc_version1 = {
+		.vs_vers	= 1,
+		.vs_nproc	= 7,
+		.vs_proc	= nsmsvc_procedures,
+		.vs_xdrsize	= SMSVC_XDRSIZE,
+};
+static struct svc_version *	nsmsvc_version[] = {
+	[1] = &nsmsvc_version1,
+};
+
+static struct svc_stat		nsmsvc_stats;
+
+#define SM_NRVERS	(sizeof(nsmsvc_version)/sizeof(nsmsvc_version[0]))
+struct svc_program	nsmsvc_program = {
+	.pg_next		= &nlmsvc_program,
+	.pg_prog		= SM_PROGRAM,		/* program number */
+	.pg_nvers		= SM_NRVERS,		/* number of entries in nlmsvc_version */
+	.pg_vers		= nsmsvc_version,	/* version table */
+	.pg_name		= "statd",		/* service name */
+	.pg_class		= "nfsd",		/* share authentication with nfsd */
+	.pg_stats		= &nsmsvc_stats,	/* stats table */
+	.pg_authenticate	= &nsmsvc_authenticate	/* no authentication :-( */
+};
Index: build/include/linux/lockd/sm_inter.h
===================================================================
--- build.orig/include/linux/lockd/sm_inter.h
+++ build/include/linux/lockd/sm_inter.h
@@ -19,6 +19,7 @@
 #define SM_NOTIFY	6
 
 #define SM_MAXSTRLEN	1024
+#define SMSVC_XDRSIZE	(sizeof(struct nsm_args) + SM_MAXSTRLEN)
 
 /*
  * Arguments for all calls to statd
@@ -30,6 +31,8 @@ struct nsm_args {
 	u32		proc;
 
 	char *		mon_name;
+	int		mon_name_len;
+	u32		state;
 };
 
 /*
@@ -41,6 +44,8 @@ struct nsm_res {
 };
 
 extern void	nsm_statd_upcalls_init(void);
+extern int	nsm_kernel_statd_init(void);
+extern int	nsmsvc_authenticate(struct svc_rqst *);
 extern int	(*nsm_monitor)(struct nlm_host *);
 extern int	(*nsm_unmonitor)(struct nlm_host *);
 extern int	nsm_local_state;
Index: build/fs/buffer.c
===================================================================
--- build.orig/fs/buffer.c
+++ build/fs/buffer.c
@@ -183,6 +183,7 @@ int fsync_super(struct super_block *sb)
 	__fsync_super(sb);
 	return sync_blockdev(sb->s_bdev);
 }
+EXPORT_SYMBOL_GPL(fsync_super);
 
 /*
  * Write out and wait upon all dirty data associated with this
Index: build/fs/lockd/Makefile
===================================================================
--- build.orig/fs/lockd/Makefile
+++ build/fs/lockd/Makefile
@@ -5,6 +5,6 @@
 obj-$(CONFIG_LOCKD) += lockd.o
 
 lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \
-	        svcproc.o svcsubs.o mon.o xdr.o
+	        svcproc.o svcsubs.o mon.o xdr.o statd.o
 lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o
 lockd-objs		      := $(lockd-objs-y)
Index: build/fs/lockd/statd.c
===================================================================
--- /dev/null
+++ build/fs/lockd/statd.c
@@ -0,0 +1,405 @@
+/*
+ * linux/fs/lockd/nsmproc.c
+ *
+ * Kernel-based status monitor. This is an alternative to
+ * the code in mon.c.
+ *
+ * When asked to monitor a host, we add it to /var/lib/nsm/sm
+ * ourselves, and that's it. In order to catch SM_NOTIFY calls
+ * we implement a minimal statd.
+ *
+ * Minimal user space requirements for this implementation:
+ *  /var/lib/nfs/state
+ *	must exist, and must contain the NSM state as a 32bit
+ *	binary counter.
+ * /var/lib/nfs/sm
+ *	must exist
+ *
+ * Copyright (C) 2004, Olaf Kirch <okir@suse.de>
+ */
+
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/lockd/lockd.h>
+#include <linux/lockd/share.h>
+#include <linux/lockd/sm_inter.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <asm/uaccess.h>
+#include <linux/buffer_head.h>
+
+
+/* XXX make this a module parameter? */
+#define NSM_BASE_PATH		"/var/lib/nfs"
+#define NSM_SM_PATH		NSM_BASE_PATH "/sm"
+#define NSM_STATE_PATH		NSM_BASE_PATH "/state"
+
+#define NLMDBG_FACILITY		NLMDBG_CLIENT
+
+static int	__nsm_monitor(struct nlm_host *host);
+static int	__nsm_unmonitor(struct nlm_host *host);
+
+/*
+ * Initialize local NSM state variable
+ */
+int
+nsm_kernel_statd_init(void)
+{
+	struct file	*filp;
+	char		buffer[32];
+	mm_segment_t	fs;
+	int		res;
+
+	dprintk("lockd: nsm_init()\n");
+	filp = filp_open(NSM_STATE_PATH, O_RDONLY, 0444);
+	if (IS_ERR(filp)) {
+		res = PTR_ERR(filp);
+		printk(KERN_NOTICE "lockd: failed to open %s: err=%d\n",
+				NSM_STATE_PATH, res);
+		return res;
+	}
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	res = vfs_read(filp, buffer, sizeof(buffer), &filp->f_pos);
+	set_fs(fs);
+	filp_close(filp, NULL);
+
+	if (res < 0)
+		return res;
+	if (res == 4)
+		nsm_local_state = *(u32 *) buffer;
+	else
+		nsm_local_state = simple_strtol(buffer, NULL, 10);
+
+	nsm_monitor = __nsm_monitor;
+	nsm_unmonitor = __nsm_unmonitor;
+	return 0;
+}
+
+/*
+ * Build the NSM file name
+ */
+static char *
+nsm_filename(struct nsm_handle *nsm)
+{
+	char	*name;
+
+	name = (char *) __get_free_page(GFP_KERNEL);
+	if (name == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	if (nsm_use_hostnames) {
+		snprintf(name, PAGE_SIZE, "%s/%s",
+				NSM_SM_PATH, nsm->sm_name);
+	} else {
+		/* FIXME IPV6 */
+		snprintf(name, PAGE_SIZE, "%s/%u.%u.%u.%u",
+				NSM_SM_PATH,
+				NIPQUAD(nsm->sm_addr.sin_addr));
+	}
+	return name;
+}
+
+static void
+nsm_put_name(char *name)
+{
+	free_page((unsigned long) name);
+}
+
+/*
+ * Create the NSM monitor file
+ */
+static int
+nsm_create(struct nsm_handle *nsm)
+{
+	struct file	*filp;
+	char		*filename;
+	int		res = 0;
+
+	dprintk("lockd: creating statd monitor file for %s\n", nsm->sm_name);
+
+	if (!(filename = nsm_filename(nsm)))
+		return -ENOMEM;
+
+	filp = filp_open(filename, O_CREAT|O_SYNC|O_RDWR, 0644);
+	if (IS_ERR(filp)) {
+		res = PTR_ERR(filp);
+		printk(KERN_NOTICE
+			"lockd/statd: failed to create %s: err=%d\n",
+			filename, res);
+	} else {
+		fsync_super(filp->f_dentry->d_inode->i_sb);
+		filp_close(filp, NULL);
+	}
+
+	nsm_put_name(filename);
+	return res;
+}
+
+static int
+nsm_unlink(struct nsm_handle *nsm)
+{
+	struct nameidata nd;
+	struct inode	*inode = NULL;
+	struct dentry	*dentry;
+	char		*filename;
+	int		res = 0;
+
+	if (!(filename = nsm_filename(nsm)))
+		return -ENOMEM;
+
+	/* Doing unlink from kernel space is really icky. */
+	if ((res = path_lookup(filename, LOOKUP_PARENT, &nd)) != 0)
+		goto exit;
+
+	if (nd.last_type == LAST_NORM && !nd.last.name[nd.last.len]) {
+		mutex_lock(&nd.dentry->d_inode->i_mutex);
+
+		dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
+		if (!IS_ERR(dentry)) {
+			if ((inode = dentry->d_inode) != NULL)
+				atomic_inc(&inode->i_count);
+			res = vfs_unlink(nd.dentry->d_inode, dentry);
+			dput(dentry);
+		} else {
+			res = PTR_ERR(dentry);
+		}
+		mutex_unlock(&nd.dentry->d_inode->i_mutex);
+	} else {
+		res = -EISDIR;
+	}
+	path_release(&nd);
+
+exit:
+	if (res < 0) {
+		printk(KERN_NOTICE
+			"lockd/statd: failed to unlink %s: err=%d\n",
+			filename, res);
+	}
+
+	if (inode)
+		iput(inode);
+	nsm_put_name(filename);
+	return res;
+}
+
+/*
+ * Call nsm_create/nsm_unlink with CAP_DAC_OVERRIDE
+ */
+#define swap_ugid(type, var) { \
+	type tmp = current->var; current->var = var; var = tmp; \
+}
+
+static int
+with_privilege(int (*func)(struct nsm_handle *), struct nsm_handle *nsm)
+{
+	kernel_cap_t	cap = current->cap_effective;
+	int		res = 0, mask;
+	uid_t		fsuid = 0;
+	gid_t		fsgid = 0;
+
+	/* If we're unprivileged, a call to capable() will set the
+	 * SUPERPRIV flag */
+	mask = current->flags | ~PF_SUPERPRIV;
+
+	/* Raise capability to that we're able to create/unlink the file.
+	 * Set fsuid/fsgid to 0 so the file will be owned by root. */
+	cap_raise(current->cap_effective, CAP_DAC_OVERRIDE);
+	swap_ugid(uid_t, fsuid);
+	swap_ugid(gid_t, fsgid);
+
+	res = func(nsm);
+
+	/* drop privileges */
+	current->cap_effective = cap;
+	swap_ugid(uid_t, fsuid);
+	swap_ugid(gid_t, fsgid);
+
+	/* Clear PF_SUPERPRIV unless it was set to begin with */
+	current->flags &= mask;
+
+	return res;
+}
+
+/*
+ * Set up monitoring of a remote host
+ * Note we hold the semaphore for the host table while
+ * we're here.
+ */
+static int
+__nsm_monitor(struct nlm_host *host)
+{
+	struct nsm_handle *nsm;
+	int		res = 0;
+
+	dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
+	if ((nsm = host->h_nsmhandle) == NULL)
+		BUG();
+
+	if (!nsm->sm_monitored) {
+		res = with_privilege(nsm_create, nsm);
+		if (res >= 0) {
+			nsm->sm_monitored = 1;
+		} else {
+			dprintk(KERN_NOTICE "nsm_monitor(%s) failed: errno=%d\n",
+					nsm->sm_name, -res);
+		}
+	}
+
+	return res;
+}
+
+/*
+ * Cease to monitor remote host
+ * Code stolen from sys_unlink.
+ */
+static int
+__nsm_unmonitor(struct nlm_host *host)
+{
+	struct nsm_handle *nsm;
+	int res = 0;
+
+	nsm = host->h_nsmhandle;
+	host->h_nsmhandle = NULL;
+
+	/* If the host was invalidated due to lockd restart/shutdown,
+	 * don't unmonitor it.
+	 * (Strictly speaking, we would have to keep the SM file
+	 * until the next reboot. The only way to achieve that
+	 * would be to link the monitor file to sm.bak now.)
+	 */
+	if (nsm && atomic_read(&nsm->sm_count) == 1
+	 && nsm->sm_monitored && !nsm->sm_sticky) {
+		dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
+
+		res = with_privilege(nsm_unlink, nsm);
+	}
+
+	nsm_release(nsm);
+	return res;
+}
+
+/*
+ * NSM server implementation starts here
+ */
+int
+nsmsvc_authenticate(struct svc_rqst *rqstp)
+{
+	/* No authentication for statd. Many statd implementations
+	 * even send their reboot notifications from an unprivileged
+	 * port.
+	 */
+	rqstp->rq_client = NULL;
+	return SVC_OK;
+}
+
+
+/*
+ * NULL: Test for presence of service
+ */
+static int
+nsmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	dprintk("statd: NULL          called\n");
+	return rpc_success;
+}
+
+/*
+ * NOTIFY: receive notification that remote host rebooted
+ */
+static int
+nsmsvc_proc_notify(struct svc_rqst *rqstp, struct nsm_args *argp,
+				           struct nsm_res  *resp)
+{
+	struct sockaddr_in	saddr = rqstp->rq_addr;
+
+	dprintk("statd: NOTIFY        called\n");
+
+	nlm_host_rebooted(&saddr, argp->mon_name, argp->mon_name_len, argp->state);
+	return rpc_success;
+}
+
+/*
+ * All other operations: return failure
+ */
+static int
+nsmsvc_proc_fail(struct svc_rqst *rqstp, struct nsm_args *argp,
+				         struct nsm_res  *resp)
+{
+	dprintk("statd: proc %u        called\n", rqstp->rq_proc);
+	resp->status = 0;
+	resp->state = -1;
+	return rpc_success;
+}
+
+/*
+ * NSM XDR routines
+ */
+static int
+nsmsvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+	return xdr_argsize_check(rqstp, p);
+}
+
+static int
+nsmsvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+	return xdr_ressize_check(rqstp, p);
+}
+
+static int
+nsmsvc_decode_stat_chge(struct svc_rqst *rqstp, u32 *p, struct nsm_args *argp)
+{
+	/* Skip over the client's mon_name */
+	p = xdr_decode_string_inplace(p, &argp->mon_name, &argp->mon_name_len, SM_MAXSTRLEN);
+	if (p == NULL)
+		return 0;
+
+	argp->state = ntohl(*p++);
+	return xdr_argsize_check(rqstp, p);
+}
+
+static int
+nsmsvc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nsm_res *resp)
+{
+	*p++ = resp->status;
+	return xdr_ressize_check(rqstp, p);
+}
+
+static int
+nsmsvc_encode_stat_res(struct svc_rqst *rqstp, u32 *p, struct nsm_res *resp)
+{
+	*p++ = resp->status;
+	*p++ = resp->state;
+	return xdr_ressize_check(rqstp, p);
+}
+
+struct nsm_void			{ int dummy; };
+
+#define PROC(name, xargt, xrest, argt, rest, respsize)	\
+ { .pc_func	= (svc_procfunc) nsmsvc_proc_##name,	\
+   .pc_decode	= (kxdrproc_t) nsmsvc_decode_##xargt,	\
+   .pc_encode	= (kxdrproc_t) nsmsvc_encode_##xrest,	\
+   .pc_release	= NULL,					\
+   .pc_argsize	= sizeof(struct nsm_##argt),		\
+   .pc_ressize	= sizeof(struct nsm_##rest),		\
+   .pc_xdrressize = respsize,				\
+ }
+
+struct svc_procedure		nsmsvc_procedures[] = {
+  PROC(null,		void,		void,		void,	void, 1),
+  PROC(fail,		void,		stat_res,	void,	res, 2),
+  PROC(fail,		void,		stat_res,	void,	res, 2),
+  PROC(fail,		void,		res,		void,	res, 1),
+  PROC(fail,		void,		res,		void,	res, 1),
+  PROC(fail,		void,		res,		void,	res, 1),
+  PROC(notify,		stat_chge,	void,		args,	void, 1)
+};
Index: build/include/linux/lockd/lockd.h
===================================================================
--- build.orig/include/linux/lockd/lockd.h
+++ build/include/linux/lockd/lockd.h
@@ -141,6 +141,7 @@ extern struct svc_procedure	nlmsvc_proce
 #ifdef CONFIG_LOCKD_V4
 extern struct svc_procedure	nlmsvc_procedures4[];
 #endif
+extern struct svc_procedure	nsmsvc_procedures[];
 extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;
 extern int			nlm_max_hosts;

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys -- and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2006-08-05 13:06 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-05 13:06 [PATCH 22/22] [lockd] Add kernel statd Olaf Kirch

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox