From: Olaf Kirch <okir@suse.de>
To: "Lever, Charles" <Charles.Lever@netapp.com>
Cc: nfs@lists.sourceforge.net
Subject: Re: NSM lock recovery fails too often
Date: Tue, 9 Mar 2004 11:56:07 +0100 [thread overview]
Message-ID: <20040309105607.GA20391@suse.de> (raw)
In-Reply-To: <482A3FA0050D21419C269D13989C61130435DD1C@lavender-fe.eng.netapp.com>
[-- Attachment #1: Type: text/plain, Size: 761 bytes --]
Hi,
On Mon, Mar 08, 2004 at 08:30:45PM -0800, Lever, Charles wrote:
> perhaps the best solution is to use an option as Olaf's patch
> does, but to make the default behavior match the in-kernel
> lockd's behavior, not the user-level lockd's behavior. or,
> maybe we use the second patch and simply remove the user
> level lockd from nfs-utils.
I have continued working on the kernel statd, and it seems to be
reasonably functional now. I'm attaching my current kernel patch
and a user land utility for sending out the SM_NOTIFY calls
at reboot.
The kernel patch isn't 100% clean yet, as it breaks the non-
CONFIG_STATD case.
Olaf
--
Olaf Kirch | Stop wasting entropy - start using predictable
okir@suse.de | tempfile names today!
---------------+
[-- Attachment #2: kernel-statd --]
[-- Type: text/plain, Size: 27389 bytes --]
diff -X excl -purNa linux-2.6.2/fs/Kconfig linux-2.6.2-kstatd/fs/Kconfig
--- linux-2.6.2/fs/Kconfig 2004-02-13 15:01:50.000000000 +0100
+++ linux-2.6.2-kstatd/fs/Kconfig 2004-02-13 15:02:14.000000000 +0100
@@ -1531,6 +1531,10 @@ config ROOT_NFS
config LOCKD
tristate
+config STATD
+ bool "Use kernel statd implementation"
+ depends on LOCKD && EXPERIMENTAL
+
config LOCKD_V4
bool
depends on NFSD_V3 || NFS_V3
diff -X excl -purNa linux-2.6.2/fs/buffer.c linux-2.6.2-kstatd/fs/buffer.c
--- linux-2.6.2/fs/buffer.c 2004-02-04 04:43:56.000000000 +0100
+++ linux-2.6.2-kstatd/fs/buffer.c 2004-02-13 15:02:14.000000000 +0100
@@ -242,6 +242,7 @@ int fsync_super(struct super_block *sb)
return sync_blockdev(sb->s_bdev);
}
+EXPORT_SYMBOL(fsync_super);
/*
* Write out and wait upon all dirty data associated with this
diff -X excl -purNa linux-2.6.2/fs/lockd/Makefile linux-2.6.2-kstatd/fs/lockd/Makefile
--- linux-2.6.2/fs/lockd/Makefile 2004-02-04 04:43:10.000000000 +0100
+++ linux-2.6.2-kstatd/fs/lockd/Makefile 2004-02-13 15:02:14.000000000 +0100
@@ -5,6 +5,12 @@
obj-$(CONFIG_LOCKD) += lockd.o
lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \
- svcproc.o svcsubs.o mon.o xdr.o lockd_syms.o
+ svcproc.o svcsubs.o xdr.o lockd_syms.o
+ifeq ($(CONFIG_STATD),y)
+lockd-objs-y += statd.o
+else
+lockd-objs-y += mon.o
+endif
+
lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o
lockd-objs := $(lockd-objs-y)
diff -X excl -purNa linux-2.6.2/fs/lockd/clntlock.c linux-2.6.2-kstatd/fs/lockd/clntlock.c
--- linux-2.6.2/fs/lockd/clntlock.c 2004-02-04 04:44:43.000000000 +0100
+++ linux-2.6.2-kstatd/fs/lockd/clntlock.c 2004-02-13 15:02:14.000000000 +0100
@@ -164,7 +164,6 @@ void nlmclnt_mark_reclaim(struct nlm_hos
static inline
void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
{
- host->h_monitored = 0;
host->h_nsmstate = newstate;
host->h_state++;
host->h_nextrebind = 0;
diff -X excl -purNa linux-2.6.2/fs/lockd/clntproc.c linux-2.6.2-kstatd/fs/lockd/clntproc.c
--- linux-2.6.2/fs/lockd/clntproc.c 2004-02-04 04:43:06.000000000 +0100
+++ linux-2.6.2-kstatd/fs/lockd/clntproc.c 2004-02-13 15:02:14.000000000 +0100
@@ -442,7 +442,7 @@ nlmclnt_lock(struct nlm_rqst *req, struc
struct nlm_res *resp = &req->a_res;
int status;
- if (!host->h_monitored && nsm_monitor(host) < 0) {
+ if (nsm_monitor(host) < 0) {
printk(KERN_NOTICE "lockd: failed to monitor %s\n",
host->h_name);
return -ENOLCK;
diff -X excl -purNa linux-2.6.2/fs/lockd/host.c linux-2.6.2-kstatd/fs/lockd/host.c
--- linux-2.6.2/fs/lockd/host.c 2004-02-04 04:43:56.000000000 +0100
+++ linux-2.6.2-kstatd/fs/lockd/host.c 2004-02-13 15:02:18.000000000 +0100
@@ -61,7 +61,7 @@ struct nlm_host *
nlm_lookup_host(int server, struct sockaddr_in *sin,
int proto, int version)
{
- struct nlm_host *host, **hp;
+ struct nlm_host *host, **hp, *host2;
u32 addr;
int hash;
@@ -119,7 +119,7 @@ nlm_lookup_host(int server, struct socka
init_MUTEX(&host->h_sema);
host->h_nextrebind = jiffies + NLM_HOST_REBIND;
host->h_expires = jiffies + NLM_HOST_EXPIRE;
- host->h_count = 1;
+ atomic_set(&host->h_count, 1);
init_waitqueue_head(&host->h_gracewait);
host->h_state = 0; /* pseudo NSM state */
host->h_nsmstate = 0; /* real NSM state */
@@ -127,6 +127,27 @@ nlm_lookup_host(int server, struct socka
host->h_next = nlm_hosts[hash];
nlm_hosts[hash] = host;
+#ifdef CONFIG_STATD
+ /* Do the loop again - see if we have an nlm_host for
+ * this address already.
+ */
+ for (hp = &nlm_hosts[hash]; (host2 = *hp); hp = &host2->h_next) {
+ if (nlm_cmp_addr(&host2->h_addr, sin)) {
+ struct nsm_handle *nsm;
+
+ nsm = host2->h_nsmhandle;
+ if (nsm) {
+ host->h_nsmhandle = nsm;
+ atomic_inc(&nsm->sm_count);
+ break;
+ }
+ }
+ }
+
+ if (host->h_nsmhandle == NULL)
+ host->h_nsmhandle = nsm_alloc(&host->h_addr);
+#endif
+
if (++nrhosts > NLM_HOST_MAX)
next_gc = 0;
@@ -138,17 +159,17 @@ nohost:
struct nlm_host *
nlm_find_client(void)
{
- /* find a nlm_host for a client for which h_killed == 0.
- * and return it
+ /* Find the next NLM client host and remove it from the
+ * list. The caller is supposed to release all resources
+ * held by this client, and release the nlm_host afterwards.
*/
int hash;
down(&nlm_host_sema);
for (hash = 0 ; hash < NLM_HOST_NRHASH; hash++) {
struct nlm_host *host, **hp;
for (hp = &nlm_hosts[hash]; (host = *hp) ; hp = &host->h_next) {
- if (host->h_server &&
- host->h_killed == 0) {
- nlm_get_host(host);
+ if (host->h_server) {
+ *hp = host->h_next;
up(&nlm_host_sema);
return host;
}
@@ -235,7 +256,7 @@ struct nlm_host * nlm_get_host(struct nl
{
if (host) {
dprintk("lockd: get host %s\n", host->h_name);
- host->h_count ++;
+ atomic_inc(&host->h_count);
host->h_expires = jiffies + NLM_HOST_EXPIRE;
}
return host;
@@ -246,10 +267,61 @@ struct nlm_host * nlm_get_host(struct nl
*/
void nlm_release_host(struct nlm_host *host)
{
- if (host && host->h_count) {
+ if (host && atomic_dec_and_test(&host->h_count))
dprintk("lockd: release host %s\n", host->h_name);
- host->h_count --;
+}
+
+/*
+ * Given an IP address, initiate recovery and ditch all locks.
+ */
+void
+nlm_host_rebooted(struct sockaddr_in *sin, u32 new_state)
+{
+ struct nlm_host *host, **hp;
+ int hash;
+
+ dprintk("lockd: nlm_host_rebooted(%u.%u.%u.%u)\n",
+ NIPQUAD(sin->sin_addr));
+
+ hash = NLM_ADDRHASH(sin->sin_addr.s_addr);
+
+ /* Lock hash table */
+ down(&nlm_host_sema);
+ for (hp = &nlm_hosts[hash]; (host = *hp); hp = &host->h_next) {
+ if (nlm_cmp_addr(&host->h_addr, sin)) {
+ if (host->h_nsmhandle)
+ host->h_nsmhandle->sm_monitored = 0;
+ host->h_rebooted = 1;
+ }
+ }
+
+again:
+ for (hp = &nlm_hosts[hash]; (host = *hp); hp = &host->h_next) {
+ if (nlm_cmp_addr(&host->h_addr, sin) && host->h_rebooted) {
+ host->h_rebooted = 0;
+ atomic_inc(&host->h_count);
+ up(&nlm_host_sema);
+
+ /* If we're server for this guy, just ditch
+ * all the locks he held.
+ * If he's the server, initiate lock recovery.
+ */
+ if (host->h_server) {
+ nlmsvc_free_host_resources(host);
+ } else {
+ nlmclnt_recovery(host, new_state);
+ }
+
+ down(&nlm_host_sema);
+ nlm_release_host(host);
+
+ /* Host table may have changed in the meanwhile,
+ * start over */
+ goto again;
+ }
}
+
+ up(&nlm_host_sema);
}
/*
@@ -283,7 +355,8 @@ nlm_shutdown_hosts(void)
for (i = 0; i < NLM_HOST_NRHASH; i++) {
for (host = nlm_hosts[i]; host; host = host->h_next) {
dprintk(" %s (cnt %d use %d exp %ld)\n",
- host->h_name, host->h_count,
+ host->h_name,
+ atomic_read(&host->h_count),
host->h_inuse, host->h_expires);
}
}
@@ -314,19 +387,24 @@ nlm_gc_hosts(void)
for (i = 0; i < NLM_HOST_NRHASH; i++) {
q = &nlm_hosts[i];
while ((host = *q) != NULL) {
- if (host->h_count || host->h_inuse
+ if (atomic_read(&host->h_count)
+ || host->h_inuse
|| time_before(jiffies, host->h_expires)) {
dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
- host->h_name, host->h_count,
+ host->h_name,
+ atomic_read(&host->h_count),
host->h_inuse, host->h_expires);
q = &host->h_next;
continue;
}
dprintk("lockd: delete host %s\n", host->h_name);
*q = host->h_next;
- /* Don't unmonitor hosts that have been invalidated */
- if (host->h_monitored && !host->h_killed)
- nsm_unmonitor(host);
+
+ /* Release the NSM handle. Unmonitor unless
+ * host was invalidated (i.e. lockd restarted)
+ */
+ nsm_unmonitor(host);
+
if ((clnt = host->h_rpcclnt) != NULL) {
if (atomic_read(&clnt->cl_users)) {
printk(KERN_WARNING
diff -X excl -purNa linux-2.6.2/fs/lockd/mon.c linux-2.6.2-kstatd/fs/lockd/mon.c
--- linux-2.6.2/fs/lockd/mon.c 2004-02-04 04:44:05.000000000 +0100
+++ linux-2.6.2-kstatd/fs/lockd/mon.c 2004-02-13 15:02:18.000000000 +0100
@@ -3,6 +3,10 @@
*
* The kernel statd client.
*
+ * When using the kernel statd implementation, none of the
+ * stuff inside this file is used.
+ * Instead look at statd.c
+ *
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
@@ -15,6 +19,9 @@
#include <linux/lockd/sm_inter.h>
+
+#ifndef CONFIG_STATD
+
#define NLMDBG_FACILITY NLMDBG_MONITOR
static struct rpc_clnt * nsm_create(void);
@@ -22,7 +29,8 @@ static struct rpc_clnt * nsm_create(void
extern struct rpc_program nsm_program;
/*
- * Local NSM state
+ * Local NSM state.
+ * This should really be initialized somehow.
*/
u32 nsm_local_state;
@@ -64,17 +72,20 @@ nsm_mon_unmon(struct nlm_host *host, u32
int
nsm_monitor(struct nlm_host *host)
{
+ struct nsm_handle *nsm;
struct nsm_res res;
int status;
dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
+ if ((nsm = host->h_nsmhandle) == NULL)
+ BUG();
status = nsm_mon_unmon(host, SM_MON, &res);
if (status < 0 || res.status != 0)
printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name);
else
- host->h_monitored = 1;
+ nsm->sm_monitored = 1;
return status;
}
@@ -84,16 +95,25 @@ nsm_monitor(struct nlm_host *host)
int
nsm_unmonitor(struct nlm_host *host)
{
+ struct nsm_handle *nsm;
struct nsm_res res;
int status;
- dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
+ nsm = host->h_nsmhandle;
+ host->h_nsmhandle = NULL;
- status = nsm_mon_unmon(host, SM_UNMON, &res);
- if (status < 0)
- printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", host->h_name);
- else
- host->h_monitored = 0;
+ if (!nsm || !atomic_dec_and_test(&nsm->sm_count))
+ return 0;
+
+ if (nsm->sm_monitored && !nsm->sm_sticky) {
+ dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
+ status = nsm_mon_unmon(host, SM_UNMON, &res);
+ if (status < 0)
+ printk(KERN_NOTICE "lockd: cannot unmonitor %s\n",
+ host->h_name);
+ else
+ nsm->sm_monitored = 0;
+ }
return status;
}
@@ -246,3 +266,5 @@ struct rpc_program nsm_program = {
.version = nsm_version,
.stats = &nsm_stats
};
+
+#endif
diff -X excl -purNa linux-2.6.2/fs/lockd/statd.c linux-2.6.2-kstatd/fs/lockd/statd.c
--- linux-2.6.2/fs/lockd/statd.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.2-kstatd/fs/lockd/statd.c 2004-02-13 15:02:18.000000000 +0100
@@ -0,0 +1,386 @@
+/*
+ * linux/fs/lockd/nsmproc.c
+ *
+ * Kernel-based status monitor. This is an alternative to
+ * the stuff in mon.c.
+ *
+ * When asked to monitor a host, we add it to /var/lib/nsm/sm
+ * ourselves, and that's it. In order to catch SM_NOTIFY calls
+ * we implement a minimal statd.
+ *
+ * Minimal user space requirements for this implementation:
+ * /var/lib/nfs/state
+ * must exist, and must contain the NSM state as a 32bit
+ * binary counter.
+ * /var/lib/nfs/sm
+ * must exist
+ *
+ * Copyright (C) 2004, Olaf Kirch <okir@suse.de>
+ */
+
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/lockd/lockd.h>
+#include <linux/lockd/share.h>
+#include <linux/lockd/sm_inter.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <asm/uaccess.h>
+#include <linux/buffer_head.h>
+
+
+/* XXX make this a module parameter? */
+#define NSM_BASE_PATH "/var/lib/nfs"
+#define NSM_SM_PATH NSM_BASE_PATH "/sm"
+#define NSM_STATE_PATH NSM_BASE_PATH "/state"
+
+#define NLMDBG_FACILITY NLMDBG_CLIENT
+
+/*
+ * Local NSM state.
+ */
+u32 nsm_local_state;
+
+/*
+ * Initialize local NSM state variable
+ */
+int
+nsm_init(void)
+{
+ struct file *filp;
+ char buffer[32];
+ mm_segment_t fs;
+ int res;
+
+ dprintk("lockd: nsm_init()\n");
+ filp = filp_open(NSM_STATE_PATH, O_RDONLY, 0444);
+ if (IS_ERR(filp)) {
+ res = PTR_ERR(filp);
+ printk(KERN_NOTICE "lockd: failed to open %s: err=%d\n",
+ NSM_STATE_PATH, res);
+ return res;
+ }
+
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+ res = vfs_read(filp, buffer, sizeof(buffer), &filp->f_pos);
+ set_fs(fs);
+ filp_close(filp, NULL);
+
+ if (res < 0)
+ return res;
+ if (res == 4)
+ nsm_local_state = *(u32 *) buffer;
+ else
+ nsm_local_state = simple_strtol(buffer, NULL, 10);
+ return 0;
+}
+
+/*
+ * Build the path name for this lockd peer.
+ *
+ * We keep it extremely simple. Since we can have more
+ * than one nlm_host object peer (depending on whether
+ * it's server or client, and what proto/version of NLM
+ * we use to communicate), we cannot create a file named
+ * $IPADDR and remove it when the nlm_host is unmonitored.
+ * Besides, unlink() is tricky (there's no kernel_syscall
+ * for it), so we just create the file and leave it.
+ *
+ * When we reboot, the notifier should sort the IPs by
+ * descending mtime so that the most recent hosts get
+ * notified first.
+ */
+static char *
+nsm_filename(struct in_addr addr)
+{
+ char *name;
+
+ name = (char *) __get_free_page(GFP_KERNEL);
+ if (name == NULL)
+ return NULL;
+
+ /* FIXME IPV6 */
+ snprintf(name, PAGE_SIZE, "%s/%u.%u.%u.%u",
+ NSM_SM_PATH, NIPQUAD(addr));
+ return name;
+}
+
+/*
+ * Create the NSM monitor file
+ */
+static int
+nsm_create(struct in_addr addr)
+{
+ struct file *filp;
+ char *name;
+ int res = 0;
+
+ if (!(name = nsm_filename(addr)))
+ return -ENOMEM;
+
+ dprintk("lockd: creating statd monitor file %s\n", name);
+ filp = filp_open(name, O_CREAT|O_SYNC|O_RDWR, 0644);
+ if (IS_ERR(filp)) {
+ res = PTR_ERR(filp);
+ printk(KERN_NOTICE
+ "lockd/statd: failed to create %s: err=%d\n",
+ name, res);
+ } else {
+ fsync_super(filp->f_dentry->d_inode->i_sb);
+ filp_close(filp, NULL);
+ }
+
+ free_page((long) name);
+ return res;
+}
+
+static int
+nsm_unlink(struct in_addr addr)
+{
+ struct nameidata nd;
+ struct inode *inode = NULL;
+ struct dentry *dentry;
+ char *name;
+ int res = 0;
+
+ if (!(name = nsm_filename(addr)))
+ return -ENOMEM;
+
+ if ((res = path_lookup(name, LOOKUP_PARENT, &nd)) != 0)
+ goto exit;
+
+ if (nd.last_type == LAST_NORM && !nd.last.name[nd.last.len]) {
+ down(&nd.dentry->d_inode->i_sem);
+
+ dentry = lookup_hash(&nd.last, nd.dentry);
+ if (!IS_ERR(dentry)) {
+ if ((inode = dentry->d_inode) != NULL)
+ atomic_inc(&inode->i_count);
+ res = vfs_unlink(nd.dentry->d_inode, dentry);
+ dput(dentry);
+ } else {
+ res = PTR_ERR(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
+ } else {
+ res = -EISDIR;
+ }
+ path_release(&nd);
+
+exit:
+ if (res < 0) {
+ printk(KERN_NOTICE
+ "lockd/statd: failed to unlink %s: err=%d\n",
+ name, res);
+ }
+
+ free_page((long) name);
+ if (inode)
+ iput(inode);
+ return res;
+}
+
+/*
+ * Allocate an NSM handle
+ */
+struct nsm_handle *
+nsm_alloc(struct sockaddr_in *sin)
+{
+ struct nsm_handle *nsm;
+
+ nsm = (struct nsm_handle *) kmalloc(sizeof(*nsm), GFP_KERNEL);
+ if (nsm == NULL)
+ return NULL;
+
+ memset(nsm, 0, sizeof(*nsm));
+ memcpy(&nsm->sm_addr, sin, sizeof(nsm->sm_addr));
+ atomic_set(&nsm->sm_count, 1);
+
+ return nsm;
+}
+
+/*
+ * Set up monitoring of a remote host
+ * Note we hold the semaphore for the host table while
+ * we're here.
+ */
+int
+nsm_monitor(struct nlm_host *host)
+{
+ kernel_cap_t cap = current->cap_effective;
+ struct nsm_handle *nsm;
+ int res = 0;
+
+ dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
+ if ((nsm = host->h_nsmhandle) == NULL)
+ BUG();
+
+ /* Raise capability to that we're able to create the file */
+ cap_raise(current->cap_effective, CAP_DAC_OVERRIDE);
+ res = nsm_create(nsm->sm_addr.sin_addr);
+ current->cap_effective = cap;
+
+ if (res >= 0)
+ nsm->sm_monitored = 1;
+ return res;
+}
+
+/*
+ * Cease to monitor remote host
+ * Code stolen from sys_unlink.
+ */
+int
+nsm_unmonitor(struct nlm_host *host)
+{
+ kernel_cap_t cap = current->cap_effective;
+ struct nsm_handle *nsm;
+ int res = 0;
+
+ nsm = host->h_nsmhandle;
+ host->h_nsmhandle = NULL;
+
+ if (!nsm || !atomic_dec_and_test(&nsm->sm_count))
+ return 0;
+
+ /* If the host was invalidated due to lockd restart/shutdown,
+ * don't unmonitor it.
+ * (Strictly speaking, we would have to keep the SM file
+ * until the next reboot. The only way to achieve that
+ * would be to link the monitor file to sm.bak now.)
+ */
+ if (nsm->sm_monitored && !nsm->sm_sticky) {
+ dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
+
+ /* Raise capability to that we're able to delete the file */
+ cap_raise(current->cap_effective, CAP_DAC_OVERRIDE);
+ res = nsm_unlink(host->h_addr.sin_addr);
+ current->cap_effective = cap;
+ }
+
+ kfree(nsm);
+ return res;
+}
+
+/*
+ * NSM server implementation starts here
+ */
+
+/*
+ * NULL: Test for presence of service
+ */
+static int
+nsmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+ dprintk("statd: NULL called\n");
+ return rpc_success;
+}
+
+/*
+ * NOTIFY: receive notification that remote host rebooted
+ */
+static int
+nsmsvc_proc_notify(struct svc_rqst *rqstp, struct nsm_args *argp,
+ struct nsm_res *resp)
+{
+ struct sockaddr_in saddr = rqstp->rq_addr;
+
+ dprintk("statd: NOTIFY called\n");
+ if (ntohs(saddr.sin_port) >= 1024) {
+ printk(KERN_WARNING
+ "statd: rejected NSM_NOTIFY from %08x:%d\n",
+ ntohl(rqstp->rq_addr.sin_addr.s_addr),
+ ntohs(rqstp->rq_addr.sin_port));
+ return rpc_system_err;
+ }
+
+ nlm_host_rebooted(&saddr, argp->state);
+ return rpc_success;
+}
+
+/*
+ * All other operations: return failure
+ */
+static int
+nsmsvc_proc_fail(struct svc_rqst *rqstp, struct nsm_args *argp,
+ struct nsm_res *resp)
+{
+ dprintk("statd: proc %u called\n", rqstp->rq_proc);
+ resp->status = 0;
+ resp->state = -1;
+ return rpc_success;
+}
+
+/*
+ * NSM XDR routines
+ */
+int
+nsmsvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nsmsvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+ return xdr_ressize_check(rqstp, p);
+}
+
+int
+nsmsvc_decode_stat_chge(struct svc_rqst *rqstp, u32 *p, struct nsm_args *argp)
+{
+ char *mon_name;
+ __u32 mon_name_len;
+
+ /* Skip over the client's mon_name */
+ p = xdr_decode_string_inplace(p, &mon_name, &mon_name_len, SM_MAXSTRLEN);
+ if (p == NULL)
+ return 0;
+
+ argp->state = ntohl(*p++);
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nsmsvc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nsm_res *resp)
+{
+ *p++ = resp->status;
+ return xdr_ressize_check(rqstp, p);
+}
+
+int
+nsmsvc_encode_stat_res(struct svc_rqst *rqstp, u32 *p, struct nsm_res *resp)
+{
+ *p++ = resp->status;
+ *p++ = resp->state;
+ return xdr_ressize_check(rqstp, p);
+}
+
+struct nsm_void { int dummy; };
+
+#define PROC(name, xargt, xrest, argt, rest, respsize) \
+ { .pc_func = (svc_procfunc) nsmsvc_proc_##name, \
+ .pc_decode = (kxdrproc_t) nsmsvc_decode_##xargt, \
+ .pc_encode = (kxdrproc_t) nsmsvc_encode_##xrest, \
+ .pc_release = NULL, \
+ .pc_argsize = sizeof(struct nsm_##argt), \
+ .pc_ressize = sizeof(struct nsm_##rest), \
+ .pc_xdrressize = respsize, \
+ }
+
+struct svc_procedure nsmsvc_procedures[] = {
+ PROC(null, void, void, void, void, 1),
+ PROC(fail, void, stat_res, void, res, 2),
+ PROC(fail, void, stat_res, void, res, 2),
+ PROC(fail, void, res, void, res, 1),
+ PROC(fail, void, res, void, res, 1),
+ PROC(fail, void, res, void, res, 1),
+ PROC(notify, stat_chge, void, args, void, 1)
+};
diff -X excl -purNa linux-2.6.2/fs/lockd/svc.c linux-2.6.2-kstatd/fs/lockd/svc.c
--- linux-2.6.2/fs/lockd/svc.c 2004-02-04 04:43:57.000000000 +0100
+++ linux-2.6.2-kstatd/fs/lockd/svc.c 2004-02-13 15:02:18.000000000 +0100
@@ -34,6 +34,7 @@
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/lockd/lockd.h>
+#include <linux/lockd/sm_inter.h>
#include <linux/nfs.h>
#define NLMDBG_FACILITY NLMDBG_SVC
@@ -115,13 +116,22 @@ lockd(struct svc_rqst *rqstp)
daemonize("lockd");
+#ifdef CONFIG_STATD
+ /* Set up statd */
+ nsm_init();
+#endif
+
/* Process request with signals blocked, but allow SIGKILL. */
allow_signal(SIGKILL);
/* kick rpciod */
rpciod_up();
+#ifndef CONFIG_STATD
dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
+#else
+ dprintk("NFS lockd/statd started (ver " LOCKD_VERSION ").\n");
+#endif
if (!nlm_timeout)
nlm_timeout = LOCKD_DFLT_TIMEO;
@@ -439,6 +449,37 @@ static void __exit exit_nlm(void)
module_init(init_nlm);
module_exit(exit_nlm);
+#ifdef CONFIG_STATD
+/*
+ * Define NSM program and procedures
+ */
+static struct svc_version nsmsvc_version1 = {
+ .vs_vers = 1,
+ .vs_nproc = 5,
+ .vs_proc = nsmsvc_procedures,
+ .vs_xdrsize = SMSVC_XDRSIZE,
+};
+static struct svc_version * nsmsvc_version[] = {
+ [1] = &nsmsvc_version1,
+};
+
+static struct svc_stat nsmsvc_stats;
+
+#define SM_NRVERS (sizeof(nsmsvc_version)/sizeof(nsmsvc_version[0]))
+static struct svc_program nsmsvc_program = {
+ .pg_prog = SM_PROGRAM, /* program number */
+ .pg_nvers = SM_NRVERS, /* number of entries in nlmsvc_version */
+ .pg_vers = nsmsvc_version, /* version table */
+ .pg_name = "statd", /* service name */
+ .pg_class = "nfsd", /* share authentication with nfsd */
+ .pg_stats = &nsmsvc_stats, /* stats table */
+};
+
+#define nsmsvc_program_p &nsmsvc_program
+#else
+#define nsmsvc_program_p NULL
+#endif
+
/*
* Define NLM program and procedures
*/
@@ -474,6 +515,7 @@ static struct svc_stat nlmsvc_stats;
#define NLM_NRVERS (sizeof(nlmsvc_version)/sizeof(nlmsvc_version[0]))
struct svc_program nlmsvc_program = {
+ .pg_next = nsmsvc_program_p,
.pg_prog = NLM_PROGRAM, /* program number */
.pg_nvers = NLM_NRVERS, /* number of entries in nlmsvc_version */
.pg_vers = nlmsvc_version, /* version table */
diff -X excl -purNa linux-2.6.2/fs/lockd/svc4proc.c linux-2.6.2-kstatd/fs/lockd/svc4proc.c
--- linux-2.6.2/fs/lockd/svc4proc.c 2004-02-04 04:43:42.000000000 +0100
+++ linux-2.6.2-kstatd/fs/lockd/svc4proc.c 2004-02-13 15:02:14.000000000 +0100
@@ -42,7 +42,7 @@ nlm4svc_retrieve_args(struct svc_rqst *r
/* Obtain host handle */
if (!(host = nlmsvc_lookup_host(rqstp))
- || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0))
+ || (argp->monitor && nsm_monitor(host) < 0))
goto no_locks;
*hostp = host;
diff -X excl -purNa linux-2.6.2/fs/lockd/svcproc.c linux-2.6.2-kstatd/fs/lockd/svcproc.c
--- linux-2.6.2/fs/lockd/svcproc.c 2004-02-04 04:44:04.000000000 +0100
+++ linux-2.6.2-kstatd/fs/lockd/svcproc.c 2004-02-13 15:02:14.000000000 +0100
@@ -71,7 +71,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rq
/* Obtain host handle */
if (!(host = nlmsvc_lookup_host(rqstp))
- || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0))
+ || (argp->monitor && nsm_monitor(host) < 0))
goto no_locks;
*hostp = host;
diff -X excl -purNa linux-2.6.2/fs/lockd/svcsubs.c linux-2.6.2-kstatd/fs/lockd/svcsubs.c
--- linux-2.6.2/fs/lockd/svcsubs.c 2004-02-04 04:44:03.000000000 +0100
+++ linux-2.6.2-kstatd/fs/lockd/svcsubs.c 2004-02-13 15:02:14.000000000 +0100
@@ -303,7 +303,16 @@ nlmsvc_invalidate_all(void)
while ((host = nlm_find_client()) != NULL) {
nlmsvc_free_host_resources(host);
host->h_expires = 0;
- host->h_killed = 1;
+ /* Do not unmonitor the host */
+ if (host->h_nsmhandle)
+ host->h_nsmhandle->sm_sticky = 1;
+ if (atomic_read(&host->h_count) != 1) {
+ /* Whatever is holding references to this host,
+ * it seems likely we're going to leak memory
+ * or worse */
+ printk(KERN_WARNING "lockd: host still in use "
+ "after nlmsvc_free_host_resources!");
+ }
nlm_release_host(host);
}
}
diff -X excl -purNa linux-2.6.2/include/linux/lockd/lockd.h linux-2.6.2-kstatd/include/linux/lockd/lockd.h
--- linux-2.6.2/include/linux/lockd/lockd.h 2004-02-04 04:43:15.000000000 +0100
+++ linux-2.6.2-kstatd/include/linux/lockd/lockd.h 2004-02-13 15:02:14.000000000 +0100
@@ -47,15 +47,22 @@ struct nlm_host {
unsigned short h_reclaiming : 1,
h_server : 1, /* server side, not client side */
h_inuse : 1,
- h_killed : 1,
- h_monitored : 1;
+ h_rebooted : 1;
wait_queue_head_t h_gracewait; /* wait while reclaiming */
u32 h_state; /* pseudo-state counter */
u32 h_nsmstate; /* true remote NSM state */
- unsigned int h_count; /* reference count */
+ atomic_t h_count; /* reference count */
struct semaphore h_sema; /* mutex for pmap binding */
unsigned long h_nextrebind; /* next portmap call */
unsigned long h_expires; /* eligible for GC */
+ struct nsm_handle * h_nsmhandle; /* for kernel statd */
+};
+
+struct nsm_handle {
+ atomic_t sm_count;
+ struct sockaddr_in sm_addr;
+ unsigned int sm_monitored : 1,
+ sm_sticky : 1; /* don't unmonitor */
};
/*
@@ -121,6 +128,9 @@ extern struct svc_procedure nlmsvc_proce
#ifdef CONFIG_LOCKD_V4
extern struct svc_procedure nlmsvc_procedures4[];
#endif
+#ifdef CONFIG_STATD
+extern struct svc_procedure nsmsvc_procedures[];
+#endif
extern int nlmsvc_grace_period;
extern unsigned long nlmsvc_timeout;
@@ -150,6 +160,7 @@ struct nlm_host * nlm_get_host(struct nl
void nlm_release_host(struct nlm_host *);
void nlm_shutdown_hosts(void);
extern struct nlm_host *nlm_find_client(void);
+extern void nlm_host_rebooted(struct sockaddr_in *, u32);
/*
diff -X excl -purNa linux-2.6.2/include/linux/lockd/sm_inter.h linux-2.6.2-kstatd/include/linux/lockd/sm_inter.h
--- linux-2.6.2/include/linux/lockd/sm_inter.h 2004-02-04 04:43:49.000000000 +0100
+++ linux-2.6.2-kstatd/include/linux/lockd/sm_inter.h 2004-02-13 15:02:18.000000000 +0100
@@ -19,6 +19,7 @@
#define SM_NOTIFY 6
#define SM_MAXSTRLEN 1024
+#define SMSVC_XDRSIZE sizeof(struct nsm_args)
/*
* Arguments for all calls to statd
@@ -29,6 +30,7 @@ struct nsm_args {
u32 vers;
u32 proc;
u32 proto; /* protocol (udp/tcp) plus server/client flag */
+ u32 state; /* in NOTIFY calls */
};
/*
@@ -39,6 +41,8 @@ struct nsm_res {
u32 state;
};
+extern int nsm_init(void);
+struct nsm_handle *nsm_alloc(struct sockaddr_in *);
int nsm_monitor(struct nlm_host *);
int nsm_unmonitor(struct nlm_host *);
extern u32 nsm_local_state;
diff -X excl -purNa linux-2.6.2/net/sunrpc/svc.c linux-2.6.2-kstatd/net/sunrpc/svc.c
--- linux-2.6.2/net/sunrpc/svc.c 2004-02-13 15:01:50.000000000 +0100
+++ linux-2.6.2-kstatd/net/sunrpc/svc.c 2004-02-13 15:02:14.000000000 +0100
@@ -221,22 +221,27 @@ svc_register(struct svc_serv *serv, int
progp = serv->sv_program;
- dprintk("RPC: svc_register(%s, %s, %d)\n",
- progp->pg_name, proto == IPPROTO_UDP? "udp" : "tcp", port);
-
if (!port)
clear_thread_flag(TIF_SIGPENDING);
- for (i = 0; i < progp->pg_nvers; i++) {
- if (progp->pg_vers[i] == NULL)
- continue;
- error = rpc_register(progp->pg_prog, i, proto, port, &dummy);
- if (error < 0)
- break;
- if (port && !dummy) {
- error = -EACCES;
- break;
+ while (progp) {
+ dprintk("RPC: svc_register(%s, %s, %d)\n",
+ progp->pg_name,
+ proto == IPPROTO_UDP? "udp" : "tcp",
+ port);
+
+ for (i = 0; i < progp->pg_nvers; i++) {
+ if (progp->pg_vers[i] == NULL)
+ continue;
+ error = rpc_register(progp->pg_prog, i, proto, port, &dummy);
+ if (error < 0)
+ break;
+ if (port && !dummy) {
+ error = -EACCES;
+ break;
+ }
}
+ progp = progp->pg_next;
}
if (!port) {
next prev parent reply other threads:[~2004-03-09 11:05 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-03-09 4:30 NSM lock recovery fails too often Lever, Charles
2004-03-09 10:56 ` Olaf Kirch [this message]
2004-03-09 10:57 ` Olaf Kirch
-- strict thread matches above, loose matches on Subject: below --
2004-03-09 14:15 Lever, Charles
2004-03-09 14:22 ` Olaf Kirch
2004-03-09 15:04 ` Trond Myklebust
2004-03-09 15:10 ` Olaf Kirch
2004-03-09 15:47 ` Trond Myklebust
2004-03-09 15:59 ` Olaf Kirch
2004-03-12 16:47 Lever, Charles
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20040309105607.GA20391@suse.de \
--to=okir@suse.de \
--cc=Charles.Lever@netapp.com \
--cc=nfs@lists.sourceforge.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.