From: Wendy Cheng <wcheng@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] Re: [PATCH 1/2] NLM failover unlock commands
Date: Tue, 08 Jan 2008 15:57:45 -0500 [thread overview]
Message-ID: <4783E3C9.3040803@redhat.com> (raw)
In-Reply-To: <20080108174958.GA25025@infradead.org>
Christoph Hellwig wrote:
> Ok, I played around with this and cleaned up the ip/path codepathes to
> be entirely setup which helped the code quite a bit. Also a few other
>
Thanks for doing this :) . In the middle of running it with our cluster
test - if passed, will repost it. Get your "signed-off" line ready ?
-- Wendy
> cleanups and two bugfixes (postive error code returned and missing
> path_release) fell out of it. I still don't like what's going on in
> fs/lockd/svcsubs.c, it would be much better if the cluster unlock code
> simply didn't use nlm_traverse_files but did it's own loop over
> the nlm hosts. That should also absolete the second patch.
>
>
> Index: linux-2.6/fs/nfsd/nfsctl.c
> ===================================================================
> --- linux-2.6.orig/fs/nfsd/nfsctl.c 2008-01-08 18:36:37.000000000 +0100
> +++ linux-2.6/fs/nfsd/nfsctl.c 2008-01-08 18:45:55.000000000 +0100
> @@ -22,6 +22,7 @@
> #include <linux/seq_file.h>
> #include <linux/pagemap.h>
> #include <linux/init.h>
> +#include <linux/inet.h>
> #include <linux/string.h>
> #include <linux/smp_lock.h>
> #include <linux/ctype.h>
> @@ -35,6 +36,7 @@
> #include <linux/nfsd/cache.h>
> #include <linux/nfsd/xdr.h>
> #include <linux/nfsd/syscall.h>
> +#include <linux/lockd/lockd.h>
>
> #include <asm/uaccess.h>
>
> @@ -52,6 +54,8 @@ enum {
> NFSD_Getfs,
> NFSD_List,
> NFSD_Fh,
> + NFSD_FO_UnlockIP,
> + NFSD_FO_UnlockFS,
> NFSD_Threads,
> NFSD_Pool_Threads,
> NFSD_Versions,
> @@ -88,6 +92,9 @@ static ssize_t write_leasetime(struct fi
> static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
> #endif
>
> +static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size);
> +static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size);
> +
> static ssize_t (*write_op[])(struct file *, char *, size_t) = {
> [NFSD_Svc] = write_svc,
> [NFSD_Add] = write_add,
> @@ -97,6 +104,8 @@ static ssize_t (*write_op[])(struct file
> [NFSD_Getfd] = write_getfd,
> [NFSD_Getfs] = write_getfs,
> [NFSD_Fh] = write_filehandle,
> + [NFSD_FO_UnlockIP] = failover_unlock_ip,
> + [NFSD_FO_UnlockFS] = failover_unlock_fs,
> [NFSD_Threads] = write_threads,
> [NFSD_Pool_Threads] = write_pool_threads,
> [NFSD_Versions] = write_versions,
> @@ -288,6 +297,55 @@ static ssize_t write_getfd(struct file *
> return err;
> }
>
> +static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
> +{
> + __be32 server_ip;
> + char *fo_path;
> + char *mesg;
> +
> + /* sanity check */
> + if (size <= 0)
> + return -EINVAL;
> +
> + if (buf[size-1] == '\n')
> + buf[size-1] = 0;
> +
> + fo_path = mesg = buf;
> + if (qword_get(&mesg, fo_path, size) < 0)
> + return -EINVAL;
> +
> + server_ip = in_aton(fo_path);
> + return nlmsvc_failover_ip(server_ip[0]);
> +}
> +
> +static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
> +{
> + struct nameidata nd;
> + char *fo_path;
> + char *mesg;
> + int error;
> +
> + /* sanity check */
> + if (size <= 0)
> + return -EINVAL;
> +
> + if (buf[size-1] == '\n')
> + buf[size-1] = 0;
> +
> + fo_path = mesg = buf;
> + if (qword_get(&mesg, fo_path, size) < 0)
> + return -EINVAL;
> +
> + error = path_lookup(fo_path, 0, &nd);
> + if (error)
> + return error;
> +
> + error = nlmsvc_failover_path(&nd);
> +
> + path_release(&nd);
> + return error;
> +}
> +
> static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
> {
> /* request is:
> @@ -646,6 +704,8 @@ static int nfsd_fill_super(struct super_
> [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
> [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
> [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
> + [NFSD_FO_UnlockIP] = {"unlock_ip", &transaction_ops, S_IWUSR|S_IRUSR},
> + [NFSD_FO_UnlockFS] = {"unlock_filesystem", &transaction_ops, S_IWUSR|S_IRUSR},
> [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
> [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
> [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
> Index: linux-2.6/fs/lockd/svcsubs.c
> ===================================================================
> --- linux-2.6.orig/fs/lockd/svcsubs.c 2008-01-08 18:36:37.000000000 +0100
> +++ linux-2.6/fs/lockd/svcsubs.c 2008-01-08 18:44:11.000000000 +0100
> @@ -18,6 +18,8 @@
> #include <linux/lockd/lockd.h>
> #include <linux/lockd/share.h>
> #include <linux/lockd/sm_inter.h>
> +#include <linux/module.h>
> +#include <linux/mount.h>
>
> #define NLMDBG_FACILITY NLMDBG_SVCSUBS
>
> @@ -87,7 +89,7 @@ nlm_lookup_file(struct svc_rqst *rqstp,
> unsigned int hash;
> __be32 nfserr;
>
> - nlm_debug_print_fh("nlm_file_lookup", f);
> + nlm_debug_print_fh("nlm_lookup_file", f);
>
> hash = file_hash(f);
>
> @@ -123,6 +125,9 @@ nlm_lookup_file(struct svc_rqst *rqstp,
>
> hlist_add_head(&file->f_list, &nlm_files[hash]);
>
> + /* fill in f_iaddr for nlm lock failover */
> + file->f_iaddr = rqstp->rq_daddr;
> +
> found:
> dprintk("lockd: found file %p (count %d)\n", file, file->f_count);
> *result = file;
> @@ -194,12 +199,63 @@ again:
> return 0;
> }
>
> +static int
> +nlmsvc_fo_unlock_match_path(void *datap, struct nlm_file *file)
> +{
> + struct nameidata *nd = datap;
> + return nd->mnt == file->f_file->f_path.mnt;
> +}
> +
> +static int
> +nlmsvc_fo_unlock_match_ip(void *datap, struct nlm_file *file)
> +{
> + struct in_addr *in = datap;
> +
> + return file->f_iaddr.addr.s_addr == in->s_addr;
> +}
> +
> +/*
> + * To fit the logic into current lockd code structure, we add a
> + * little wrapper function here. The real matching task should be
> + * carried out by nlm_fo_check_fsid().
> + */
> +
> +static int nlmsvc_fo_match_path(struct nlm_host *dummy1,
> + struct nlm_host *dummy2)
> +{
> + return 1;
> +}
> +
> +static int nlmsvc_fo_match_ip(struct nlm_host *dummy1, struct nlm_host *dummy2)
> +{
> + return 1;
> +}
> +
> /*
> * Inspect a single file
> */
> static inline int
> nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, nlm_host_match_fn_t match)
> {
> + /*
> + * Cluster failover has timing constraints. There is a slight
> + * performance hit if nlm_fo_unlock_match() is implemented as
> + * a match fn (since it will be invoked for each block, share,
> + * and lock later when the lists are traversed). Instead, we
> + * add path-matching logic into the following unlikely clause.
> + * If matches, the dummy nlmsvc_fo_match will always return
> + * true.
> + */
> + dprintk("nlm_inspect_files: file=%p\n", file);
> + if (unlikely(match == nlmsvc_fo_match_path)) {
> + if (!nlmsvc_fo_unlock_match_path(host, file))
> + return 0;
> + }
> + if (unlikely(match == nlmsvc_fo_match_ip)) {
> + if (!nlmsvc_fo_unlock_match_ip(host, file))
> + return 0;
> + }
> +
> nlmsvc_traverse_blocks(host, file, match);
> nlmsvc_traverse_shares(host, file, match);
> return nlm_traverse_locks(host, file, match);
> @@ -370,3 +426,22 @@ nlmsvc_invalidate_all(void)
> */
> nlm_traverse_files(NULL, nlmsvc_is_client);
> }
> +
> +/*
> + * Release locks associated with an export fsid upon failover
> + * invoked via nfsd nfsctl call (write_fo_unlock).
> + */
> +int
> +nlmsvc_failover_path(struct nameidata *nd)
> +{
> + return nlm_traverse_files((struct nlm_host*)nd, nlmsvc_fo_match_path);
> +}
> +EXPORT_SYMBOL_GPL(nlmsvc_failover_path);
> +
> +int
> +nlmsvc_failover_ip(__be32 server_addr)
> +{
> + return nlm_traverse_files((struct nlm_host *)&server_addr,
> + nlmsvc_fo_match_ip);
> +}
> +EXPORT_SYMBOL_GPL(nlmsvc_failover_ip);
> Index: linux-2.6/include/linux/lockd/lockd.h
> ===================================================================
> --- linux-2.6.orig/include/linux/lockd/lockd.h 2008-01-08 18:36:37.000000000 +0100
> +++ linux-2.6/include/linux/lockd/lockd.h 2008-01-08 18:44:44.000000000 +0100
> @@ -113,6 +113,7 @@ struct nlm_file {
> unsigned int f_locks; /* guesstimate # of locks */
> unsigned int f_count; /* reference count */
> struct mutex f_mutex; /* avoid concurrent access */
> + union svc_addr_u f_iaddr; /* server ip for failover */
> };
>
> /*
> @@ -214,6 +215,12 @@ void nlmsvc_mark_resources(void);
> void nlmsvc_free_host_resources(struct nlm_host *);
> void nlmsvc_invalidate_all(void);
>
> +/*
> + * Cluster failover support
> + */
> +int nlmsvc_failover_path(struct nameidata *nd);
> +int nlmsvc_failover_ip(__be32 server_addr);
> +
> static __inline__ struct inode *
> nlmsvc_file_inode(struct nlm_file *file)
> {
>
next prev parent reply other threads:[~2008-01-08 20:57 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-01-07 5:39 [Cluster-devel] [PATCH 1/2] NLM failover unlock commands Wendy Cheng
[not found] ` <message from Wendy Cheng on Monday January 7>
2008-01-08 5:18 ` [Cluster-devel] " Neil Brown
2008-01-09 2:51 ` Wendy Cheng
2008-01-08 5:31 ` [Cluster-devel] Re: [PATCH 2/2] Fix lockd panic Neil Brown
2008-01-09 3:02 ` Wendy Cheng
2008-01-09 4:43 ` Wendy Cheng
2008-01-09 23:33 ` Wendy Cheng
2008-01-12 6:51 ` Wendy Cheng
2008-01-08 17:02 ` [Cluster-devel] Re: [PATCH 1/2] NLM failover unlock commands Christoph Hellwig
2008-01-08 17:49 ` Christoph Hellwig
2008-01-08 20:57 ` Wendy Cheng [this message]
2008-01-09 18:02 ` Christoph Hellwig
2008-01-10 7:59 ` Christoph Hellwig
2008-01-12 7:03 ` Wendy Cheng
2008-01-12 9:38 ` Christoph Hellwig
2008-01-14 23:07 ` J. Bruce Fields
[not found] ` <message from J. Bruce Fields on Monday January 14>
2008-01-14 23:31 ` Neil Brown
2008-01-22 22:53 ` J. Bruce Fields
[not found] ` <message from J. Bruce Fields on Tuesday January 22>
2008-01-24 4:02 ` Neil Brown
2008-01-15 16:14 ` Wendy Cheng
2008-01-15 16:30 ` J. Bruce Fields
[not found] ` <message from Wendy Cheng on Saturday January 12>
2008-01-14 23:52 ` Neil Brown
2008-01-15 20:17 ` Wendy Cheng
[not found] ` <message from Wendy Cheng on Tuesday January 15>
2008-01-15 20:50 ` Neil Brown
2008-01-15 20:56 ` Wendy Cheng
2008-01-15 22:48 ` Wendy Cheng
2008-01-17 15:10 ` J. Bruce Fields
2008-01-17 15:48 ` Wendy Cheng
2008-01-17 16:08 ` Wendy Cheng
2008-01-17 16:10 ` Wendy Cheng
2008-01-18 10:21 ` Frank van Maarseveen
2008-01-18 15:00 ` Wendy Cheng
2008-01-17 16:14 ` J. Bruce Fields
2008-01-17 16:17 ` Wendy Cheng
2008-01-17 16:21 ` J. Bruce Fields
2008-01-17 16:31 ` J. Bruce Fields
2008-01-17 16:31 ` Wendy Cheng
2008-01-17 16:40 ` J. Bruce Fields
[not found] ` <1200591323.13670.34.camel@dyn9047022153>
2008-01-17 17:59 ` Wendy Cheng
2008-01-17 18:07 ` Wendy Cheng
2008-01-17 20:23 ` J. Bruce Fields
2008-01-18 10:03 ` Frank van Maarseveen
2008-01-18 14:56 ` Wendy Cheng
2008-01-24 16:00 ` J. Bruce Fields
[not found] ` <4798BAAE.6090107@redhat.com>
2008-01-24 16:39 ` J. Bruce Fields
2008-01-24 19:45 ` Wendy Cheng
2008-01-24 20:19 ` J. Bruce Fields
2008-01-24 21:06 ` Wendy Cheng
2008-01-24 21:40 ` J. Bruce Fields
2008-01-24 21:49 ` Wendy Cheng
2008-01-28 3:46 ` Felix Blyakher
2008-01-28 15:56 ` Wendy Cheng
2008-01-28 17:06 ` Felix Blyakher
2008-01-16 4:19 ` Neil Brown
2008-01-09 3:49 ` Wendy Cheng
2008-01-09 16:13 ` J. Bruce Fields
-- strict thread matches above, loose matches on Subject: below --
2008-01-07 5:53 [Cluster-devel] [PATCH 2/2] Fix lockd panic Wendy Cheng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4783E3C9.3040803@redhat.com \
--to=wcheng@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).