[PATCH] NLM: add network test when host expire but hold lock at nlm_gc

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] NLM: add network test when host expire but hold lock at nlm_gc_hosts
@ 2009-12-02  9:47 Mi Jinlong
  2009-12-02 12:26 ` Jeff Layton
  0 siblings, 1 reply; 8+ messages in thread
From: Mi Jinlong @ 2009-12-02  9:47 UTC (permalink / raw)
  To: NFSv3 list, Trond.Myklebust, J. Bruce Fields

After a client get lock, it's network partition for some reasons.
other client cannot get lock success forever.

This patch can avoid this problem using rpc_ping to test client's
network when host expired but hold lock.

If the client's network is partition, server will release client's 
lock, other client will get lock success.

Signed-off-by: mijinlong@cn.fujitsu.com
---
 fs/lockd/host.c             |   54 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/sunrpc/clnt.h |    1 +
 net/sunrpc/clnt.c           |    4 +-
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 4600c20..73f6732 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -31,6 +31,7 @@ static int			nrhosts;
 static DEFINE_MUTEX(nlm_host_mutex);
 
 static void			nlm_gc_hosts(void);
+static int			nlm_test_host(struct nlm_host *);
 
 struct nlm_lookup_host_info {
 	const int		server;		/* search for server|client */
@@ -550,13 +551,24 @@ nlm_gc_hosts(void)
 
 	for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
 		hlist_for_each_entry_safe(host, pos, next, chain, h_hash) {
-			if (atomic_read(&host->h_count) || host->h_inuse
+			if (atomic_read(&host->h_count) 
 			 || time_before(jiffies, host->h_expires)) {
 				dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
 					host->h_name, atomic_read(&host->h_count),
 					host->h_inuse, host->h_expires);
 				continue;
 			}
+
+			if (host->h_inuse) {
+				if (!nlm_test_host(host)) {
+					dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
+						host->h_name, atomic_read(&host->h_count),
+						host->h_inuse, host->h_expires);
+					continue;
+				}
+				nlmsvc_free_host_resources(host);
+			}
+
 			dprintk("lockd: delete host %s\n", host->h_name);
 			hlist_del_init(&host->h_hash);
 
@@ -567,3 +579,43 @@ nlm_gc_hosts(void)
 
 	next_gc = jiffies + NLM_HOST_COLLECT;
 }
+
+static int
+nlm_test_host(struct nlm_host *host)
+{
+	struct rpc_clnt *clnt;
+
+	dprintk("lockd: test host %s\n", host->h_name);
+	clnt = host->h_rpcclnt;
+	if (clnt == NULL) {
+		unsigned long increment = HZ;
+		struct rpc_timeout timeparms = {
+			.to_initval	= increment,
+			.to_increment	= increment,
+			.to_maxval	= increment * 3UL,
+			.to_retries	= 2U,
+		};
+
+		struct rpc_create_args args = {
+			.protocol	= host->h_proto,
+			.address	= nlm_addr(host),
+			.addrsize	= host->h_addrlen,
+			.saddress	= nlm_srcaddr(host),
+			.timeout	= &timeparms,
+			.servername	= host->h_name,
+			.program	= &nlm_program,
+			.version	= host->h_version,
+			.authflavor	= RPC_AUTH_UNIX,
+			.flags		= RPC_CLNT_CREATE_AUTOBIND,
+		};
+
+		clnt = rpc_create(&args);
+		if (IS_ERR(clnt)) 
+			return -EIO;
+
+		rpc_shutdown_client(clnt);
+		return 0;
+	}
+
+	return rpc_ping(clnt, RPC_TASK_SOFT);
+}
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 8ed9642..b221f8f 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -162,6 +162,7 @@ size_t		rpc_pton(const char *, const size_t,
 char *		rpc_sockaddr2uaddr(const struct sockaddr *);
 size_t		rpc_uaddr2sockaddr(const char *, const size_t,
 				   struct sockaddr *, const size_t);
+int 		rpc_ping(struct rpc_clnt *clnt, int flags);
 
 static inline unsigned short rpc_get_port(const struct sockaddr *sap)
 {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 38829e2..53b9f34 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -79,7 +79,6 @@ static void	call_connect_status(struct rpc_task *task);
 
 static __be32	*rpc_encode_header(struct rpc_task *task);
 static __be32	*rpc_verify_header(struct rpc_task *task);
-static int	rpc_ping(struct rpc_clnt *clnt, int flags);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
 {
@@ -1675,7 +1674,7 @@ static struct rpc_procinfo rpcproc_null = {
 	.p_decode = rpcproc_decode_null,
 };
 
-static int rpc_ping(struct rpc_clnt *clnt, int flags)
+int rpc_ping(struct rpc_clnt *clnt, int flags)
 {
 	struct rpc_message msg = {
 		.rpc_proc = &rpcproc_null,
@@ -1686,6 +1685,7 @@ static int rpc_ping(struct rpc_clnt *clnt, int flags)
 	put_rpccred(msg.rpc_cred);
 	return err;
 }
+EXPORT_SYMBOL_GPL(rpc_ping);
 
 struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
 {
-- 
1.6.4.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] NLM: add network test when host expire but hold lock at nlm_gc_hosts
  2009-12-02  9:47 [PATCH] NLM: add network test when host expire but hold lock at nlm_gc_hosts Mi Jinlong
@ 2009-12-02 12:26 ` Jeff Layton
       [not found]   ` <20091202072644.31c5d17e-9yPaYZwiELC+kQycOl6kW4xkIHaj4LzF@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Jeff Layton @ 2009-12-02 12:26 UTC (permalink / raw)
  To: Mi Jinlong; +Cc: NFSv3 list, Trond.Myklebust, J. Bruce Fields

On Wed, 02 Dec 2009 17:47:04 +0800
Mi Jinlong <mijinlong@cn.fujitsu.com> wrote:

> After a client get lock, it's network partition for some reasons.
> other client cannot get lock success forever.
> 
> This patch can avoid this problem using rpc_ping to test client's
> network when host expired but hold lock.
> 
> If the client's network is partition, server will release client's 
> lock, other client will get lock success.
> 
> Signed-off-by: mijinlong@cn.fujitsu.com

Yikes! That sounds like it'll make locking subject to the reliability
of the network. I don't think that's a good idea.

What might be more reasonable is to consider implementing something
like the clear_locks command in Solaris. That is, a way for an admin to
remove server-side locks held by a client that he knows is never going
to come back. With that, this sort of thing at least becomes a willful
act...

> ---
>  fs/lockd/host.c             |   54 ++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/sunrpc/clnt.h |    1 +
>  net/sunrpc/clnt.c           |    4 +-
>  3 files changed, 56 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/lockd/host.c b/fs/lockd/host.c
> index 4600c20..73f6732 100644
> --- a/fs/lockd/host.c
> +++ b/fs/lockd/host.c
> @@ -31,6 +31,7 @@ static int			nrhosts;
>  static DEFINE_MUTEX(nlm_host_mutex);
>  
>  static void			nlm_gc_hosts(void);
> +static int			nlm_test_host(struct nlm_host *);
>  
>  struct nlm_lookup_host_info {
>  	const int		server;		/* search for server|client */
> @@ -550,13 +551,24 @@ nlm_gc_hosts(void)
>  
>  	for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
>  		hlist_for_each_entry_safe(host, pos, next, chain, h_hash) {
> -			if (atomic_read(&host->h_count) || host->h_inuse
> +			if (atomic_read(&host->h_count) 
>  			 || time_before(jiffies, host->h_expires)) {
>  				dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
>  					host->h_name, atomic_read(&host->h_count),
>  					host->h_inuse, host->h_expires);
>  				continue;
>  			}
> +
> +			if (host->h_inuse) {
> +				if (!nlm_test_host(host)) {
> +					dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
> +						host->h_name, atomic_read(&host->h_count),
> +						host->h_inuse, host->h_expires);
> +					continue;
> +				}
> +				nlmsvc_free_host_resources(host);
> +			}
> +
>  			dprintk("lockd: delete host %s\n", host->h_name);
>  			hlist_del_init(&host->h_hash);
>  
> @@ -567,3 +579,43 @@ nlm_gc_hosts(void)
>  
>  	next_gc = jiffies + NLM_HOST_COLLECT;
>  }
> +
> +static int
> +nlm_test_host(struct nlm_host *host)
> +{
> +	struct rpc_clnt *clnt;
> +
> +	dprintk("lockd: test host %s\n", host->h_name);
> +	clnt = host->h_rpcclnt;
> +	if (clnt == NULL) {
> +		unsigned long increment = HZ;
> +		struct rpc_timeout timeparms = {
> +			.to_initval	= increment,
> +			.to_increment	= increment,
> +			.to_maxval	= increment * 3UL,
> +			.to_retries	= 2U,
> +		};
> +
> +		struct rpc_create_args args = {
> +			.protocol	= host->h_proto,
> +			.address	= nlm_addr(host),
> +			.addrsize	= host->h_addrlen,
> +			.saddress	= nlm_srcaddr(host),
> +			.timeout	= &timeparms,
> +			.servername	= host->h_name,
> +			.program	= &nlm_program,
> +			.version	= host->h_version,
> +			.authflavor	= RPC_AUTH_UNIX,
> +			.flags		= RPC_CLNT_CREATE_AUTOBIND,
> +		};
> +
> +		clnt = rpc_create(&args);
> +		if (IS_ERR(clnt)) 
> +			return -EIO;
> +
> +		rpc_shutdown_client(clnt);
> +		return 0;
> +	}
> +
> +	return rpc_ping(clnt, RPC_TASK_SOFT);
> +}

Hmm...this runs in lockd's context too, right? While it's waiting for a
ping to come back, the server can't service any lock requests. That
could really slow down lockd in the event of a network partition. In
general, it's best to avoid blocking operations in lockd's context if
at all possible.

> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
> index 8ed9642..b221f8f 100644
> --- a/include/linux/sunrpc/clnt.h
> +++ b/include/linux/sunrpc/clnt.h
> @@ -162,6 +162,7 @@ size_t		rpc_pton(const char *, const size_t,
>  char *		rpc_sockaddr2uaddr(const struct sockaddr *);
>  size_t		rpc_uaddr2sockaddr(const char *, const size_t,
>  				   struct sockaddr *, const size_t);
> +int 		rpc_ping(struct rpc_clnt *clnt, int flags);
>  
>  static inline unsigned short rpc_get_port(const struct sockaddr *sap)
>  {
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index 38829e2..53b9f34 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -79,7 +79,6 @@ static void	call_connect_status(struct rpc_task *task);
>  
>  static __be32	*rpc_encode_header(struct rpc_task *task);
>  static __be32	*rpc_verify_header(struct rpc_task *task);
> -static int	rpc_ping(struct rpc_clnt *clnt, int flags);
>  
>  static void rpc_register_client(struct rpc_clnt *clnt)
>  {
> @@ -1675,7 +1674,7 @@ static struct rpc_procinfo rpcproc_null = {
>  	.p_decode = rpcproc_decode_null,
>  };
>  
> -static int rpc_ping(struct rpc_clnt *clnt, int flags)
> +int rpc_ping(struct rpc_clnt *clnt, int flags)
>  {
>  	struct rpc_message msg = {
>  		.rpc_proc = &rpcproc_null,
> @@ -1686,6 +1685,7 @@ static int rpc_ping(struct rpc_clnt *clnt, int flags)
>  	put_rpccred(msg.rpc_cred);
>  	return err;
>  }
> +EXPORT_SYMBOL_GPL(rpc_ping);
>  
>  struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
>  {


-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] NLM: add network test when host expire but hold lock at nlm_gc_hosts
       [not found]   ` <20091202072644.31c5d17e-9yPaYZwiELC+kQycOl6kW4xkIHaj4LzF@public.gmane.org>
@ 2009-12-02 14:29     ` Trond Myklebust
  2009-12-02 17:09       ` J. Bruce Fields
  0 siblings, 1 reply; 8+ messages in thread
From: Trond Myklebust @ 2009-12-02 14:29 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Mi Jinlong, NFSv3 list, J. Bruce Fields

On Wed, 2009-12-02 at 07:26 -0500, Jeff Layton wrote: 
> On Wed, 02 Dec 2009 17:47:04 +0800
> Mi Jinlong <mijinlong@cn.fujitsu.com> wrote:
> 
> > After a client get lock, it's network partition for some reasons.
> > other client cannot get lock success forever.
> > 
> > This patch can avoid this problem using rpc_ping to test client's
> > network when host expired but hold lock.
> > 
> > If the client's network is partition, server will release client's 
> > lock, other client will get lock success.
> > 
> > Signed-off-by: mijinlong@cn.fujitsu.com
> 
> Yikes! That sounds like it'll make locking subject to the reliability
> of the network. I don't think that's a good idea.
> 
> What might be more reasonable is to consider implementing something
> like the clear_locks command in Solaris. That is, a way for an admin to
> remove server-side locks held by a client that he knows is never going
> to come back. With that, this sort of thing at least becomes a willful
> act...

Agreed on both counts.

We should not be changing the semantics of either NFSv3 or NLM at this
time. That will break existing setups that are treating NFSv3 as being a
stable platform.
As I've said in previous correspondence: NFSv4 already offers lease
based locking. If people are worried about network partitions and/or
locks being held by clients that are dead, then they can switch to that.

On the other hand, a clear_locks command could be useful in order to
tell a server that a given client is dead. It should be fairly easy to
leverage the existing NSM/statd protocol to implement this.

Trond


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] NLM: add network test when host expire but hold lock at nlm_gc_hosts
  2009-12-02 14:29     ` Trond Myklebust
@ 2009-12-02 17:09       ` J. Bruce Fields
  2009-12-02 17:20         ` Chuck Lever
  0 siblings, 1 reply; 8+ messages in thread
From: J. Bruce Fields @ 2009-12-02 17:09 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Jeff Layton, Mi Jinlong, NFSv3 list

On Wed, Dec 02, 2009 at 09:29:03AM -0500, Trond Myklebust wrote:
> On Wed, 2009-12-02 at 07:26 -0500, Jeff Layton wrote: 
> > On Wed, 02 Dec 2009 17:47:04 +0800
> > Mi Jinlong <mijinlong@cn.fujitsu.com> wrote:
> > 
> > > After a client get lock, it's network partition for some reasons.
> > > other client cannot get lock success forever.
> > > 
> > > This patch can avoid this problem using rpc_ping to test client's
> > > network when host expired but hold lock.
> > > 
> > > If the client's network is partition, server will release client's 
> > > lock, other client will get lock success.
> > > 
> > > Signed-off-by: mijinlong@cn.fujitsu.com
> > 
> > Yikes! That sounds like it'll make locking subject to the reliability
> > of the network. I don't think that's a good idea.
> > 
> > What might be more reasonable is to consider implementing something
> > like the clear_locks command in Solaris. That is, a way for an admin to
> > remove server-side locks held by a client that he knows is never going
> > to come back. With that, this sort of thing at least becomes a willful
> > act...
> 
> Agreed on both counts.
> 
> We should not be changing the semantics of either NFSv3 or NLM at this
> time. That will break existing setups that are treating NFSv3 as being a
> stable platform.
> As I've said in previous correspondence: NFSv4 already offers lease
> based locking. If people are worried about network partitions and/or
> locks being held by clients that are dead, then they can switch to that.
> 
> On the other hand, a clear_locks command could be useful in order to
> tell a server that a given client is dead. It should be fairly easy to
> leverage the existing NSM/statd protocol to implement this.

Oh, so all clear_locks does is send an nsm notification?  Yeah, that
sounds like a completely reasonable project for someone.

--b.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] NLM: add network test when host expire but hold lock at nlm_gc_hosts
  2009-12-02 17:09       ` J. Bruce Fields
@ 2009-12-02 17:20         ` Chuck Lever
  2009-12-03 15:28           ` Chuck Lever
  0 siblings, 1 reply; 8+ messages in thread
From: Chuck Lever @ 2009-12-02 17:20 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, Jeff Layton, Mi Jinlong, NFSv3 list

On Dec 2, 2009, at 12:09 PM, J. Bruce Fields wrote:
> On Wed, Dec 02, 2009 at 09:29:03AM -0500, Trond Myklebust wrote:
>> On Wed, 2009-12-02 at 07:26 -0500, Jeff Layton wrote:
>>> On Wed, 02 Dec 2009 17:47:04 +0800
>>> Mi Jinlong <mijinlong@cn.fujitsu.com> wrote:
>>>
>>>> After a client get lock, it's network partition for some reasons.
>>>> other client cannot get lock success forever.
>>>>
>>>> This patch can avoid this problem using rpc_ping to test client's
>>>> network when host expired but hold lock.
>>>>
>>>> If the client's network is partition, server will release client's
>>>> lock, other client will get lock success.
>>>>
>>>> Signed-off-by: mijinlong@cn.fujitsu.com
>>>
>>> Yikes! That sounds like it'll make locking subject to the  
>>> reliability
>>> of the network. I don't think that's a good idea.
>>>
>>> What might be more reasonable is to consider implementing something
>>> like the clear_locks command in Solaris. That is, a way for an  
>>> admin to
>>> remove server-side locks held by a client that he knows is never  
>>> going
>>> to come back. With that, this sort of thing at least becomes a  
>>> willful
>>> act...
>>
>> Agreed on both counts.
>>
>> We should not be changing the semantics of either NFSv3 or NLM at  
>> this
>> time. That will break existing setups that are treating NFSv3 as  
>> being a
>> stable platform.
>> As I've said in previous correspondence: NFSv4 already offers lease
>> based locking. If people are worried about network partitions and/or
>> locks being held by clients that are dead, then they can switch to  
>> that.
>>
>> On the other hand, a clear_locks command could be useful in order to
>> tell a server that a given client is dead. It should be fairly easy  
>> to
>> leverage the existing NSM/statd protocol to implement this.
>
> Oh, so all clear_locks does is send an nsm notification?  Yeah, that
> sounds like a completely reasonable project for someone.

If you send an SM_NOTIFY to statd, it will ignore it if it doesn't  
recognize the mon_name.  statd also checks the sender's IP address,  
which would be different in this case than that actual peer's IP  
address.

The SM_NOTIFY RPC does not have a return value, so there's no way to  
know whether your command was effective (other than seeing that the  
locks are still held).

clear_locks would have to read /var/lib/nfs/statd/sm/foo to get the  
RPC proc/vers/proc and priv arguments if it were to send an NLM  
downcall.

So, using NSM might be a simple approach, but not a robust one, IMO.

I've always wanted to have the kernel's NSM hosts cache exported via / 
sys (or similar).  That would make it somewhat easier to see what's  
going on, and provide a convenient sysctl-like interface for local  
commands to make adjustments such as this (or for statd to gather more  
information than is available from an SM_MON request).

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com




^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] NLM: add network test when host expire but hold lock at nlm_gc_hosts
  2009-12-02 17:20         ` Chuck Lever
@ 2009-12-03 15:28           ` Chuck Lever
  2009-12-07 16:36             ` J. Bruce Fields
  0 siblings, 1 reply; 8+ messages in thread
From: Chuck Lever @ 2009-12-03 15:28 UTC (permalink / raw)
  To: J. Bruce Fields, Trond Myklebust; +Cc: Jeff Layton, Mi Jinlong, NFSv3 list


On Dec 2, 2009, at 12:20 PM, Chuck Lever wrote:

> On Dec 2, 2009, at 12:09 PM, J. Bruce Fields wrote:
>> On Wed, Dec 02, 2009 at 09:29:03AM -0500, Trond Myklebust wrote:
>>> On Wed, 2009-12-02 at 07:26 -0500, Jeff Layton wrote:
>>>> On Wed, 02 Dec 2009 17:47:04 +0800
>>>> Mi Jinlong <mijinlong@cn.fujitsu.com> wrote:
>>>>
>>>>> After a client get lock, it's network partition for some reasons.
>>>>> other client cannot get lock success forever.
>>>>>
>>>>> This patch can avoid this problem using rpc_ping to test client's
>>>>> network when host expired but hold lock.
>>>>>
>>>>> If the client's network is partition, server will release client's
>>>>> lock, other client will get lock success.
>>>>>
>>>>> Signed-off-by: mijinlong@cn.fujitsu.com
>>>>
>>>> Yikes! That sounds like it'll make locking subject to the  
>>>> reliability
>>>> of the network. I don't think that's a good idea.
>>>>
>>>> What might be more reasonable is to consider implementing something
>>>> like the clear_locks command in Solaris. That is, a way for an  
>>>> admin to
>>>> remove server-side locks held by a client that he knows is never  
>>>> going
>>>> to come back. With that, this sort of thing at least becomes a  
>>>> willful
>>>> act...
>>>
>>> Agreed on both counts.
>>>
>>> We should not be changing the semantics of either NFSv3 or NLM at  
>>> this
>>> time. That will break existing setups that are treating NFSv3 as  
>>> being a
>>> stable platform.
>>> As I've said in previous correspondence: NFSv4 already offers lease
>>> based locking. If people are worried about network partitions and/or
>>> locks being held by clients that are dead, then they can switch to  
>>> that.
>>>
>>> On the other hand, a clear_locks command could be useful in order to
>>> tell a server that a given client is dead. It should be fairly  
>>> easy to
>>> leverage the existing NSM/statd protocol to implement this.
>>
>> Oh, so all clear_locks does is send an nsm notification?  Yeah, that
>> sounds like a completely reasonable project for someone.
>
> If you send an SM_NOTIFY to statd, it will ignore it if it doesn't  
> recognize the mon_name.  statd also checks the sender's IP address,  
> which would be different in this case than that actual peer's IP  
> address.
>
> The SM_NOTIFY RPC does not have a return value, so there's no way to  
> know whether your command was effective (other than seeing that the  
> locks are still held).
>
> clear_locks would have to read /var/lib/nfs/statd/sm/foo to get the  
> RPC proc/vers/proc and priv arguments if it were to send an NLM  
> downcall.

Taking the downcall approach....

If we can live with operating "in the dark" (with regard to what the  
kernel is actually doing) and live with the "appropriation" of data  
in /var/lib/nfs/statd, this would be simple and get us 70-80%.

Basically this tool would make use of the features of the new  
libnsm.a.  Copy sm-notify.c, strip out the unnecessary parts, and use  
the libnsm.a NLM downcall functions instead of its SM_NOTIFY functions.

A synopsis might be:

    clear-locks [-a] [-p state-directory] [--list] [hostname]  
[hostname] [hostname] ...

-a      Clear NLM locks for all monitored peers

-p      Specify an alternate state directory (default: /var/lib/nfs/ 
statd)

--list  List all monitored peers

Each hostname would have to match a monitor record.

The tool could report only on the contents of /var/lib/nfs/statd; it  
could not report on kernel state, so it could not report whether the  
peer actually had any locks, or whether existing locks were actually  
cleared successfully. The kernel would poke statd to unmonitor the  
peer as needed, in order to keep the kernel's monitor list in sync  
with statd's.

For discussion, I could mock up a prototype and insert it in my statd  
patch series (which introduces libnsm.a).

> So, using NSM might be a simple approach, but not a robust one, IMO.
>
> I've always wanted to have the kernel's NSM hosts cache exported  
> via /sys (or similar).  That would make it somewhat easier to see  
> what's going on, and provide a convenient sysctl-like interface for  
> local commands to make adjustments such as this (or for statd to  
> gather more information than is available from an SM_MON request).

If this is ever implemented, clear-locks could use it when it was  
available.

-- 
Chuck Lever
chuck[dot]lever[at]oracle[dot]com





^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] NLM: add network test when host expire but hold lock at nlm_gc_hosts
  2009-12-03 15:28           ` Chuck Lever
@ 2009-12-07 16:36             ` J. Bruce Fields
  2009-12-07 16:59               ` Chuck Lever
  0 siblings, 1 reply; 8+ messages in thread
From: J. Bruce Fields @ 2009-12-07 16:36 UTC (permalink / raw)
  To: Chuck Lever; +Cc: Trond Myklebust, Jeff Layton, Mi Jinlong, NFSv3 list

On Thu, Dec 03, 2009 at 10:28:53AM -0500, Chuck Lever wrote:
> On Dec 2, 2009, at 12:20 PM, Chuck Lever wrote:
>> If you send an SM_NOTIFY to statd, it will ignore it if it doesn't  
>> recognize the mon_name.  statd also checks the sender's IP address,  
>> which would be different in this case than that actual peer's IP  
>> address.
>>
>> The SM_NOTIFY RPC does not have a return value, so there's no way to  
>> know whether your command was effective (other than seeing that the  
>> locks are still held).
>>
>> clear_locks would have to read /var/lib/nfs/statd/sm/foo to get the  
>> RPC proc/vers/proc and priv arguments if it were to send an NLM  
>> downcall.
>
> Taking the downcall approach....
>
> If we can live with operating "in the dark" (with regard to what the  
> kernel is actually doing) and live with the "appropriation" of data in 
> /var/lib/nfs/statd, this would be simple and get us 70-80%.
>
> Basically this tool would make use of the features of the new libnsm.a.  
> Copy sm-notify.c, strip out the unnecessary parts, and use the libnsm.a 
> NLM downcall functions instead of its SM_NOTIFY functions.

Forgive me for being behind here: what's the practical difference
between the two?  I guess the NLM rpc's are authenticated just by being
from localhost.  Does it give any better error reporting?  What's the
remaining 20-30%?

--b.

>
> A synopsis might be:
>
>    clear-locks [-a] [-p state-directory] [--list] [hostname] [hostname] 
> [hostname] ...
>
> -a      Clear NLM locks for all monitored peers
>
> -p      Specify an alternate state directory (default: /var/lib/nfs/ 
> statd)
>
> --list  List all monitored peers
>
> Each hostname would have to match a monitor record.
>
> The tool could report only on the contents of /var/lib/nfs/statd; it  
> could not report on kernel state, so it could not report whether the  
> peer actually had any locks, or whether existing locks were actually  
> cleared successfully. The kernel would poke statd to unmonitor the peer 
> as needed, in order to keep the kernel's monitor list in sync with 
> statd's.
>
> For discussion, I could mock up a prototype and insert it in my statd  
> patch series (which introduces libnsm.a).
>
>> So, using NSM might be a simple approach, but not a robust one, IMO.
>>
>> I've always wanted to have the kernel's NSM hosts cache exported via 
>> /sys (or similar).  That would make it somewhat easier to see what's 
>> going on, and provide a convenient sysctl-like interface for local 
>> commands to make adjustments such as this (or for statd to gather more 
>> information than is available from an SM_MON request).
>
> If this is ever implemented, clear-locks could use it when it was  
> available.
>
> -- 
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com
>
>
>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] NLM: add network test when host expire but hold lock at nlm_gc_hosts
  2009-12-07 16:36             ` J. Bruce Fields
@ 2009-12-07 16:59               ` Chuck Lever
  0 siblings, 0 replies; 8+ messages in thread
From: Chuck Lever @ 2009-12-07 16:59 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, Jeff Layton, Mi Jinlong, NFSv3 list


On Dec 7, 2009, at 11:36 AM, J. Bruce Fields wrote:

> On Thu, Dec 03, 2009 at 10:28:53AM -0500, Chuck Lever wrote:
>> On Dec 2, 2009, at 12:20 PM, Chuck Lever wrote:
>>> If you send an SM_NOTIFY to statd, it will ignore it if it doesn't
>>> recognize the mon_name.  statd also checks the sender's IP address,
>>> which would be different in this case than that actual peer's IP
>>> address.
>>>
>>> The SM_NOTIFY RPC does not have a return value, so there's no way to
>>> know whether your command was effective (other than seeing that the
>>> locks are still held).
>>>
>>> clear_locks would have to read /var/lib/nfs/statd/sm/foo to get the
>>> RPC proc/vers/proc and priv arguments if it were to send an NLM
>>> downcall.
>>
>> Taking the downcall approach....
>>
>> If we can live with operating "in the dark" (with regard to what the
>> kernel is actually doing) and live with the "appropriation" of data  
>> in
>> /var/lib/nfs/statd, this would be simple and get us 70-80%.
>>
>> Basically this tool would make use of the features of the new  
>> libnsm.a.
>> Copy sm-notify.c, strip out the unnecessary parts, and use the  
>> libnsm.a
>> NLM downcall functions instead of its SM_NOTIFY functions.
>
> Forgive me for being behind here: what's the practical difference
> between the two?  I guess the NLM rpc's are authenticated just by  
> being
> from localhost.  Does it give any better error reporting?  What's  
> the remaining 20-30%?

Having a sysfs interface would allow the tool to detect immediately  
whether the clearlocks downcall worked.  See above: the NLM downcall  
has a void result, so there's no easy way to tell whether it actually  
did anything.

Also, the statd data under /var/lib/nfs could be out of sync with the  
kernel's NSM host cache.  Essentially clearlocks would be operating  
against a possibly stale copy of the real working list of remote peers.

>> A synopsis might be:
>>
>>   clear-locks [-a] [-p state-directory] [--list] [hostname]  
>> [hostname]
>> [hostname] ...
>>
>> -a      Clear NLM locks for all monitored peers
>>
>> -p      Specify an alternate state directory (default: /var/lib/nfs/
>> statd)
>>
>> --list  List all monitored peers
>>
>> Each hostname would have to match a monitor record.
>>
>> The tool could report only on the contents of /var/lib/nfs/statd; it
>> could not report on kernel state, so it could not report whether the
>> peer actually had any locks, or whether existing locks were actually
>> cleared successfully. The kernel would poke statd to unmonitor the  
>> peer
>> as needed, in order to keep the kernel's monitor list in sync with
>> statd's.
>>
>> For discussion, I could mock up a prototype and insert it in my statd
>> patch series (which introduces libnsm.a).
>>
>>> So, using NSM might be a simple approach, but not a robust one, IMO.
>>>
>>> I've always wanted to have the kernel's NSM hosts cache exported via
>>> /sys (or similar).  That would make it somewhat easier to see what's
>>> going on, and provide a convenient sysctl-like interface for local
>>> commands to make adjustments such as this (or for statd to gather  
>>> more
>>> information than is available from an SM_MON request).
>>
>> If this is ever implemented, clear-locks could use it when it was
>> available.
>>
>> -- 
>> Chuck Lever
>> chuck[dot]lever[at]oracle[dot]com
>>
>>
>>
>>

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com




^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2009-12-07 17:00 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-02  9:47 [PATCH] NLM: add network test when host expire but hold lock at nlm_gc_hosts Mi Jinlong
2009-12-02 12:26 ` Jeff Layton
     [not found]   ` <20091202072644.31c5d17e-9yPaYZwiELC+kQycOl6kW4xkIHaj4LzF@public.gmane.org>
2009-12-02 14:29     ` Trond Myklebust
2009-12-02 17:09       ` J. Bruce Fields
2009-12-02 17:20         ` Chuck Lever
2009-12-03 15:28           ` Chuck Lever
2009-12-07 16:36             ` J. Bruce Fields
2009-12-07 16:59               ` Chuck Lever

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.