Two new NFSD procfs files are added: /proc/fs/nfsd/unlock_ip /proc/fs/nfsd/unlock_filesystem They are intended to allow admin or user mode script to release NLM locks based on either a path name or a server in-bound ip address (ipv4 for now) as; shell> echo 10.1.1.2 > /proc/fs/nfsd/unlock_ip shell> echo /mnt/sfs1 > /proc/fs/nfsd/unlock_filesystem Signed-off-by: S. Wendy Cheng Signed-off-by: Lon Hohberger fs/lockd/svcsubs.c | 117 +++++++++++++++++++++++++++++++++++++++++++- fs/nfsd/export.c | 20 +++++++ fs/nfsd/nfsctl.c | 60 ++++++++++++++++++++++ include/linux/lockd/bind.h | 2 include/linux/lockd/lockd.h | 14 ++++- include/linux/nfsd/export.h | 12 ++++ 6 files changed, 221 insertions(+), 4 deletions(-) --- linux-o/include/linux/nfsd/export.h 2008-01-04 10:01:08.000000000 -0500 +++ linux/include/linux/nfsd/export.h 2008-01-06 15:33:13.000000000 -0500 @@ -138,6 +138,18 @@ int exp_rootfh(struct auth_domain *, __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); __be32 nfserrno(int errno); +/* cluster failover support */ + +#define NFSD_FO_VIP 0 +#define NFSD_FO_PATH 1 + +#define DEBUG 0 +#define fo_printk(x...) ((void)(DEBUG && printk(x))) + +int nfsd_fo_cmd(int cmd, char *datap, int grace_time); + +/* end of failover addition */ + extern struct cache_detail svc_export_cache; static inline void exp_put(struct svc_export *exp) --- linux-o/fs/nfsd/nfsctl.c 2008-01-04 10:01:08.000000000 -0500 +++ linux/fs/nfsd/nfsctl.c 2008-01-06 15:27:34.000000000 -0500 @@ -52,6 +52,8 @@ enum { NFSD_Getfs, NFSD_List, NFSD_Fh, + NFSD_FO_UnlockIP, + NFSD_FO_UnlockFS, NFSD_Threads, NFSD_Pool_Threads, NFSD_Versions, @@ -88,6 +90,9 @@ static ssize_t write_leasetime(struct fi static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); #endif +static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size); +static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size); + static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Svc] = write_svc, [NFSD_Add] = write_add, @@ -97,6 +102,8 @@ static ssize_t (*write_op[])(struct file [NFSD_Getfd] = write_getfd, [NFSD_Getfs] = write_getfs, [NFSD_Fh] = write_filehandle, + [NFSD_FO_UnlockIP] = failover_unlock_ip, + [NFSD_FO_UnlockFS] = failover_unlock_fs, [NFSD_Threads] = write_threads, [NFSD_Pool_Threads] = write_pool_threads, [NFSD_Versions] = write_versions, @@ -288,6 +295,56 @@ static ssize_t write_getfd(struct file * return err; } +extern __u32 in_aton(const char *str); + +static +ssize_t failover_parse(int where, struct file *file, char *buf, size_t size) +{ + char *fo_path, *mesg; + __be32 server_ip[4]; + + /* sanity check */ + if (size <= 0) { + fo_printk("nfsd fo buf size not correct\n"); + return -EINVAL; + } + if (buf[size-1] == '\n') + buf[size-1] = 0; + + /* get the string */ + fo_printk("nfsd fo buf = %s\n", buf); + + fo_path = mesg = buf; + if (qword_get(&mesg, fo_path, size) < 0) + return EINVAL; + + fo_printk("fo_dev=%s\n", fo_path); + + switch (where) { + case NFSD_FO_PATH: + break; + case NFSD_FO_VIP: + server_ip[0] = in_aton(fo_path); + fo_path = (char *) server_ip; + break; + default: + fo_printk("nfsd unknown fo cmd (%d)\n", where); + return -EINVAL; + } + + return (nfsd_fo_cmd(where, fo_path, 0)); +} + +static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size) +{ + return (failover_parse(NFSD_FO_VIP, file, buf, size)); +} + +static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) +{ + return (failover_parse(NFSD_FO_PATH, file, buf, size)); +} + static ssize_t write_filehandle(struct file *file, char *buf, size_t size) { /* request is: @@ -646,6 +703,8 @@ static int nfsd_fill_super(struct super_ [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, + [NFSD_FO_UnlockIP] = {"unlock_ip", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_FO_UnlockFS] = {"unlock_filesystem", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, @@ -717,7 +776,6 @@ static void __exit exit_nfsd(void) nfsd4_free_slabs(); unregister_filesystem(&nfsd_fs_type); } - MODULE_AUTHOR("Olaf Kirch "); MODULE_LICENSE("GPL"); module_init(init_nfsd) --- linux-o/fs/nfsd/export.c 2008-01-04 10:01:08.000000000 -0500 +++ linux/fs/nfsd/export.c 2008-01-06 15:14:55.000000000 -0500 @@ -1679,3 +1679,23 @@ nfsd_export_shutdown(void) exp_writeunlock(); dprintk("nfsd: export shutdown complete.\n"); } + +int +nfsd_fo_cmd(int cmd, char *datap, int grace_period) +{ + struct nameidata nd; + void *objp = (void *)datap; + int rc=0; + + if (cmd == NFSD_FO_PATH) { + rc = path_lookup((const char *)datap, 0, &nd); + if (rc) { + fo_printk("nfsd: nfsd_fo path (%s) not found\n", datap); + return rc; + } + fo_printk("nfsd: nfsd_fo lookup path = (0x%p,0x%p)\n", + nd.mnt, nd.dentry); + objp = (void *) &nd; + } + return (nlmsvc_fo_cmd(cmd, objp, grace_period)); +} --- linux-o/fs/lockd/svcsubs.c 2008-01-04 10:01:08.000000000 -0500 +++ linux/fs/lockd/svcsubs.c 2008-01-06 16:20:37.000000000 -0500 @@ -18,10 +18,11 @@ #include #include #include +#include +#include #define NLMDBG_FACILITY NLMDBG_SVCSUBS - /* * Global file hash table */ @@ -87,7 +88,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, unsigned int hash; __be32 nfserr; - nlm_debug_print_fh("nlm_file_lookup", f); + nlm_debug_print_fh("nlm_lookup_file", f); hash = file_hash(f); @@ -123,6 +124,11 @@ nlm_lookup_file(struct svc_rqst *rqstp, hlist_add_head(&file->f_list, &nlm_files[hash]); + /* fill in f_iaddr for nlm lock failover */ + file->f_iaddr = rqstp->rq_daddr; + fo_printk("lockd: file->f_iaddr = %u.%u.%u.%u\n", + NIPQUAD(file->f_iaddr.addr.s_addr)); + found: dprintk("lockd: found file %p (count %d)\n", file, file->f_count); *result = file; @@ -194,12 +200,88 @@ again: return 0; } +static inline int +nlmsvc_fo_unlock_match(void *datap, struct nlm_file *file) +{ + nlm_fo_cmd *fo_cmd = (nlm_fo_cmd *) datap; + int cmd = fo_cmd->cmd; + struct path *f_path; + + fo_printk("nlm_fo_unlock_match cmd=%d\n", cmd); + + if (cmd == NFSD_FO_VIP) { + if (file->f_iaddr.addr.s_addr == + ((struct in_addr *)fo_cmd->datap)->s_addr) { + fo_printk("lockd: fo ip matches %u.%u.%u.%u\n", + NIPQUAD(file->f_iaddr.addr.s_addr)); + goto nlmsvc_fo_unlock_match_found; + } else { + fo_printk("lockd: fo ip no match %u.%u.%u.%u\n", + NIPQUAD(((struct in_addr *)fo_cmd->datap)->s_addr)); + return 0; + } + } + + /* looking for match using file's vfsmount */ + f_path = &(file->f_file->f_path); + + if (cmd == NFSD_FO_PATH) { + struct path fo_path; + /* + * The dentry is not really used but stays here for + * debugging purpose. + */ + fo_path.mnt = ((struct nameidata *) fo_cmd->datap)->mnt; + fo_path.dentry = ((struct nameidata *) fo_cmd->datap)->dentry; + fo_printk("f_path->mnt (0x%p) f_path->dentry (0x%p)\n", + f_path->mnt, f_path->dentry); + fo_printk("fo_path (0x%p) fo_path->dentry (0x%p)\n", + fo_path.mnt, fo_path.dentry); + /* check vfsmount */ + if (fo_path.mnt == f_path->mnt) + goto nlmsvc_fo_unlock_match_found; + return 0; /* not found */ + } + + fo_printk("nlmsvc_fo_unlock_match - unknown cmd\n"); + return 0; /* should never reach here */ + +nlmsvc_fo_unlock_match_found: + fo_printk("nlmsvc_fo_unlock_match found file=0x%p\n", file); + fo_cmd->stat++; + return 1; +} + +/* To fit the logic into current lockd code structure, we add a + * little wrapper function here. The real matching task should be + * carried out by nlm_fo_check_fsid(). + */ +int nlmsvc_fo_match(struct nlm_host *dummy1, struct nlm_host *dummy2) +{ + return 1; +} + /* * Inspect a single file */ static inline int nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, nlm_host_match_fn_t match) { + /* Cluster failover has timing constraints. There is a slight + * performance hit if nlm_fo_unlock_match() is implemented as + * a match fn (since it will be invoked for each block, share, + * and lock later when the lists are traversed). Instead, we + * add path-matching logic into the following unlikely clause. + * If matches, the dummy nlmsvc_fo_match will always return + * true. + */ + dprintk("nlm_inspect_files: file=%p\n", file); + if (unlikely(match == nlmsvc_fo_match)) { + if (!nlmsvc_fo_unlock_match((void *)host, file)) + return 0; + fo_printk("nlm_fo find lock file entry (0x%p)\n", file); + } + nlmsvc_traverse_blocks(host, file, match); nlmsvc_traverse_shares(host, file, match); return nlm_traverse_locks(host, file, match); @@ -370,3 +452,34 @@ nlmsvc_invalidate_all(void) */ nlm_traverse_files(NULL, nlmsvc_is_client); } + +/* + * Release locks associated with an export fsid upon failover + * invoked via nfsd nfsctl call (write_fo_unlock). + */ +int +nlmsvc_fo_cmd(int cmd, void *datap, int grace_time) +{ + nlm_fo_cmd fo_cmd; + int rc=-EINVAL; + + fo_printk("lockd: nlmsvc_fo_cmd enter, cmd=%d, datap=0x%p, gp=%d\n", + cmd, datap, grace_time); + + fo_cmd.cmd = cmd; + fo_cmd.stat = 0; + fo_cmd.gp = 0; + fo_cmd.datap = datap; + + /* "if" place holder for NFSD_FO_RESUME */ + { + /* fo_start */ + rc = nlm_traverse_files((struct nlm_host*) &fo_cmd, + nlmsvc_fo_match); + fo_printk("nlmsvc_fo_cmd rc=%d, stat=%d\n", rc, fo_cmd.stat); + } + + return rc; +} + +EXPORT_SYMBOL(nlmsvc_fo_cmd); --- linux-o/include/linux/lockd/bind.h 2008-01-04 10:01:08.000000000 -0500 +++ linux/include/linux/lockd/bind.h 2008-01-06 15:14:55.000000000 -0500 @@ -47,4 +47,6 @@ unsigned long get_nfs4_grace_period(void static inline unsigned long get_nfs4_grace_period(void) {return 0;} #endif +extern int nlmsvc_fo_cmd(int cmd, void *datap, int grace_time); + #endif /* LINUX_LOCKD_BIND_H */ --- linux-o/include/linux/lockd/lockd.h 2008-01-04 10:01:08.000000000 -0500 +++ linux/include/linux/lockd/lockd.h 2008-01-06 15:14:55.000000000 -0500 @@ -39,7 +39,7 @@ struct nlm_host { struct hlist_node h_hash; /* doubly linked list */ struct sockaddr_in h_addr; /* peer address */ - struct sockaddr_in h_saddr; /* our address (optional) */ + struct sockaddr_in h_saddr; /* our address (optional) */ struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */ char * h_name; /* remote hostname */ u32 h_version; /* interface version */ @@ -113,6 +113,7 @@ struct nlm_file { unsigned int f_locks; /* guesstimate # of locks */ unsigned int f_count; /* reference count */ struct mutex f_mutex; /* avoid concurrent access */ + union svc_addr_u f_iaddr; /* server ip for failover */ }; /* @@ -214,6 +215,17 @@ void nlmsvc_mark_resources(void); void nlmsvc_free_host_resources(struct nlm_host *); void nlmsvc_invalidate_all(void); +/* cluster failover support */ + +typedef struct { + int cmd; + int stat; + int gp; + void *datap; +} nlm_fo_cmd; + +int nlmsvc_fo_cmd(int cmd, void *datap, int grace_time); + static __inline__ struct inode * nlmsvc_file_inode(struct nlm_file *file) {