--- linux-1/include/linux/lockd/lockd.h 2006-09-03 21:51:41.000000000 -0400 +++ linux-2/include/linux/lockd/lockd.h 2006-09-13 22:48:00.000000000 -0400 @@ -107,6 +107,17 @@ struct nlm_file { int f_hash; /* hash of f_handle */ }; +#define NLM_FO_MAX_FSID_GP 127 + +/* Server fsid linked list for NLM lock failover */ +struct fo_fsid { + struct list_head g_list; /* linked list */ + unsigned long g_expire; /* when this grace period + * will expire */ + int g_fsid; /* exported fsid */ + int g_flag; /* printk flag */ +}; + /* * This is a server block (i.e. a lock requested by some client which * couldn't be granted because of a conflicting lock). @@ -187,6 +198,8 @@ void nlmsvc_traverse_blocks(struct nl int action); void nlmsvc_grant_reply(struct svc_rqst *, struct nlm_cookie *, u32); +unsigned long set_grace_period(void); /*required by svcsubs.c and svc.c + to support nlm failover */ /* * File handling for the server personality */ @@ -197,6 +210,7 @@ void nlmsvc_mark_resources(void); void nlmsvc_free_host_resources(struct nlm_host *); void nlmsvc_invalidate_all(void); int nlmsvc_fo_unlock(int *fsid); +int nlmsvc_fo_check(struct nfs_fh *fh); static __inline__ struct inode * nlmsvc_file_inode(struct nlm_file *file) --- linux-1/fs/lockd/svcsubs.c 2006-09-13 13:48:01.000000000 -0400 +++ linux-2/fs/lockd/svcsubs.c 2006-09-13 22:50:51.000000000 -0400 @@ -32,6 +32,13 @@ static struct nlm_file * nlm_files[FILE_NRHASH]; static DEFINE_MUTEX(nlm_file_mutex); +/* + * Global control structure for lock failover + */ +static spinlock_t nlm_fo_lock=SPIN_LOCK_UNLOCKED; +static int fo_fsid_cnt=0; +LIST_HEAD(fo_fsid_list); + #ifdef NFSD_DEBUG static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f) { @@ -403,3 +410,176 @@ nlmsvc_fo_unlock(int *fsid) return (nlm_traverse_files(NULL, fsid, NLM_ACT_FO_UNLOCK)); } +EXPORT_SYMBOL(nlmsvc_fo_setgrace); + +/* + * Add fsid into global fo_fsid_list (single linked list). + * + * Note that if this routine is repeatedly called with the very + * same fsid, we could end up with multiple fsid in the global + * fo_fsid_list. Instead of searching thru the list to purge old + * entries (to make the code un-necessarily complicated), we + * will just leave the old entries there because the list is + * searched in top-down order (newer entry first). As soon as one + * is found, the search stops. This implies the older entries will + * not be used and always expire before new entry. + * + * As an admin interface, the list is expected to be short and + * entries are purged (expired) quickly. + * + * Also, please don't ask why using opencoded list manipulation, + * instead of , unless you can point to me where + * in that file have existing macro and/or functions that can do + * single linked list. + */ +int +nlmsvc_fo_setgrace(int fsid) +{ + struct list_head *p, *tlist; + struct fo_fsid *per_fsid, *entry; + int done=0; + + /* allocate the entry */ + per_fsid = kmalloc(sizeof(struct fo_fsid), GFP_KERNEL); + if (per_fsid == NULL) { + printk("lockd: nlmsvc_fo_setgrace kmalloc fails\n"); + return(-ENOMEM); + } + + /* debug printk */ + dprintk("lockd: nlmsvc_fo_setgrace fsid=%d jiffies=%lu\n", + fsid, jiffies); + + /* fill in info */ + per_fsid->g_expire = set_grace_period(); + per_fsid->g_fsid = fsid; + per_fsid->g_flag = 0; + + spin_lock(&nlm_fo_lock); + + if (list_empty(&fo_fsid_list)) { + list_add(&per_fsid->g_list, &fo_fsid_list); + fo_fsid_cnt = 1; + done = 1; + goto nlmsvc_fo_setgrace_out; + } else if (fo_fsid_cnt > NLM_FO_MAX_FSID_GP) { + kfree(per_fsid); + printk("lockd: fo_setgrace max cnt reached fsid=%d not added\n", fsid); + goto nlmsvc_fo_setgrace_out; + } + + list_for_each_safe(p, tlist, &fo_fsid_list) { + entry = list_entry(p, struct fo_fsid, g_list); + if (!done) { + /* add the new fsid into the list */ + if (entry->g_expire <= per_fsid->g_expire) { + list_add(&per_fsid->g_list, &entry->g_list); + fo_fsid_cnt++; + done = 1; + } + } + if (done && (entry->g_fsid == fsid)) { + /* multiple fsid(s) */ + BUG_ON(entry->g_expire > per_fsid->g_expire); + list_del(p); + fo_fsid_cnt--; + kfree(entry); + } else if (time_before(entry->g_expire, jiffies)) { + /* garbage collection */ + printk("nlmsvc fo_fsid = %d expires\n", entry->g_fsid); + list_del(p); + fo_fsid_cnt--; + kfree(entry); + } + } + +nlmsvc_fo_setgrace_out: + + spin_unlock(&nlm_fo_lock); + + /* debug */ + if (done) + printk("nlmsvc fo setgrace: fsid=%d, jiffies=%lu, expire=%lu\n", + per_fsid->g_fsid, jiffies, per_fsid->g_expire); + else + printk("nlmsvc_fo_setgrace: adding fsid=%d fails\n", fsid); + + return 0; +} + +/* + * Reset global fo_fsid_list list + */ +void +nlmsvc_fo_reset_servs() +{ + struct fo_fsid *e_purge; + struct list_head *p, *tlist; + + spin_lock(&nlm_fo_lock); + + /* nothing to do */ + if (list_empty(&fo_fsid_list)) { + spin_unlock(&nlm_fo_lock); + return; + } + + dprintk("lockd: nlmsvc_fo_reset fo_fsid_list\n"); + + /* purge the entries */ + list_for_each_safe(p, tlist, &fo_fsid_list) { + e_purge = list_entry(p, struct fo_fsid, g_list); + list_del(p); + kfree(e_purge); + } + fo_fsid_cnt = 0; + + spin_unlock(&nlm_fo_lock); +} + +/* + * Check whether the fsid is in the failover list: fo_fsid_list. + * return TRUE (1) if fsid in nlm_serv. + */ +int +nlmsvc_fo_check(struct nfs_fh *fh) +{ + struct fo_fsid *e_this; + struct list_head *p, *tlist; + int rc=0, this_fsid; + + /* see if this fh has fsid */ + if (!nlm_fo_get_fsid(fh, &this_fsid)) { + return 0; + } + + spin_lock(&nlm_fo_lock); + + /* no failover entry */ + if (list_empty(&fo_fsid_list)) + goto nlmsvc_fo_check_out; + + /* check to see whether this_fsid is in fo_fsid_list list */ + list_for_each_safe(p, tlist, &fo_fsid_list) { + e_this = list_entry(p, struct fo_fsid, g_list); + if (time_before(e_this->g_expire, jiffies)) { + printk("lockd: fsid=%d grace period expires\n", + e_this->g_fsid); + list_del(p); + fo_fsid_cnt--; + kfree(e_this); + } else if (e_this->g_fsid == this_fsid) { + if (!e_this->g_flag) { + e_this->g_flag = 1; + printk("lockd: fsid=%d in grace period\n", + e_this->g_fsid); + } + rc = 1; + } + } + +nlmsvc_fo_check_out: + spin_unlock(&nlm_fo_lock); + return rc; +} + --- linux-1/include/linux/lockd/bind.h 2006-09-03 21:51:41.000000000 -0400 +++ linux-2/include/linux/lockd/bind.h 2006-09-11 16:52:34.000000000 -0400 @@ -37,5 +37,8 @@ extern void lockd_down(void); * NLM failover */ extern int nlmsvc_fo_unlock(int *fsid); +extern int nlmsvc_fo_setgrace(int fsid); +extern void nlmsvc_fo_reset_servs(void); + #endif /* LINUX_LOCKD_BIND_H */ --- linux-1/fs/nfsd/nfsctl.c 2006-09-03 21:51:40.000000000 -0400 +++ linux-2/fs/nfsd/nfsctl.c 2006-09-11 16:52:25.000000000 -0400 @@ -56,6 +56,7 @@ enum { NFSD_List, NFSD_Fh, NFSD_NlmUnlock, + NFSD_NlmIgrace, NFSD_Threads, NFSD_Versions, /* @@ -93,6 +94,7 @@ static ssize_t write_recoverydir(struct #define NFSDDBG_FACILITY NFSDDBG_CLUSTER static ssize_t write_fo_unlock(struct file *file, char *buf, size_t size); +static ssize_t write_fo_grace(struct file *file, char *buf, size_t size); static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Svc] = write_svc, @@ -104,6 +106,7 @@ static ssize_t (*write_op[])(struct file [NFSD_Getfs] = write_getfs, [NFSD_Fh] = write_filehandle, [NFSD_NlmUnlock] = write_fo_unlock, + [NFSD_NlmIgrace] = write_fo_grace, [NFSD_Threads] = write_threads, [NFSD_Versions] = write_versions, #ifdef CONFIG_NFSD_V4 @@ -375,6 +378,34 @@ static ssize_t write_fo_unlock(struct fi return strlen(buf); } +static ssize_t write_fo_grace(struct file *file, char *buf, size_t size) +{ + char *mesg = buf; + int fsid, rc; + + if (size <= 0) return -EINVAL; + + /* convert string into a valid fsid */ + rc = get_int(&mesg, &fsid); + if (rc) { + dprintk("do_nlm_fsid_grace: invalid fsid (%s)\n", buf); + return rc; + } + + /* call nlm to set the grace period */ + rc = nlmsvc_fo_setgrace(fsid); + if (rc) { + dprintk("nlmsvc_fo_setgrace return rc=%d\n", rc); + return rc; + } + + dprintk("nlm set fsid=%d grace period\n", fsid); + + /* done */ + sprintf(buf, "nlm set per fsid=%d grace period\n", fsid); + return strlen(buf); +} + extern int nfsd_nrthreads(void); static ssize_t write_threads(struct file *file, char *buf, size_t size) @@ -524,6 +555,7 @@ static int nfsd_fill_super(struct super_ [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_NlmUnlock] = {"nlm_unlock", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_NlmIgrace] = {"nlm_set_igrace", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, #ifdef CONFIG_NFSD_V4 --- linux-1/fs/lockd/svc4proc.c 2006-09-13 13:49:35.000000000 -0400 +++ linux-2/fs/lockd/svc4proc.c 2006-09-13 14:03:39.000000000 -0400 @@ -18,9 +18,22 @@ #include #include - #define NLMDBG_FACILITY NLMDBG_CLIENT +extern struct list_head fo_fsid_list; + +/* + * Check for per filesystem failover grace period + */ +static inline int +nlm4svc_fo_grace_period(struct nlm_args *argp) +{ + if (unlikely(!list_empty(&fo_fsid_list))) + return(nlmsvc_fo_check(&argp->lock.fh)); + + return 0; +} + /* * Obtain client and file from arguments */ @@ -89,7 +102,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp resp->cookie = argp->cookie; /* Don't accept test requests during grace period */ - if (nlmsvc_grace_period) { + if ((nlmsvc_grace_period) || (nlm4svc_fo_grace_period(argp))) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -119,7 +132,8 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if ((nlmsvc_grace_period || (nlm4svc_fo_grace_period(argp))) + && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -162,7 +176,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqs resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if ((nlmsvc_grace_period || (nlm4svc_fo_grace_period(argp)))) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -195,7 +209,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqs resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period) { + if (nlmsvc_grace_period || (nlm4svc_fo_grace_period(argp))) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -330,7 +344,7 @@ nlm4svc_proc_share(struct svc_rqst *rqst resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if ((nlmsvc_grace_period ||(nlm4svc_fo_grace_period(argp))) && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -363,7 +377,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rq resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if (nlmsvc_grace_period || (nlm4svc_fo_grace_period(argp))) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } --- linux-1/fs/lockd/svcproc.c 2006-09-03 21:51:39.000000000 -0400 +++ linux-2/fs/lockd/svcproc.c 2006-09-13 13:51:59.000000000 -0400 @@ -50,6 +50,21 @@ cast_to_nlm(u32 status, u32 vers) #endif /* + * Check for per filesystem failover grace period + */ + +extern struct list_head fo_fsid_list; + +int inline +nlmsvc_fo_grace_period(struct nlm_args *argp) +{ + if (unlikely(!list_empty(&fo_fsid_list))) + return(nlmsvc_fo_check(&argp->lock.fh)); + + return 0; +} + +/* * Obtain client and file from arguments */ static u32 @@ -115,7 +130,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, resp->cookie = argp->cookie; /* Don't accept test requests during grace period */ - if (nlmsvc_grace_period) { + if (nlmsvc_grace_period || (nlmsvc_fo_grace_period(argp))) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -146,7 +161,8 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if ((nlmsvc_grace_period || (nlmsvc_fo_grace_period(argp))) + && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -189,7 +205,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqst resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if (nlmsvc_grace_period || nlmsvc_fo_grace_period(argp)) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -222,7 +238,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqst resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period) { + if (nlmsvc_grace_period || nlmsvc_fo_grace_period(argp)) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -359,7 +375,8 @@ nlmsvc_proc_share(struct svc_rqst *rqstp resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if ((nlmsvc_grace_period || (nlmsvc_fo_grace_period(argp))) + && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -392,7 +409,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqs resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if (nlmsvc_grace_period || nlmsvc_fo_grace_period(argp)) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } --- linux-1/fs/lockd/svc.c 2006-09-03 21:51:39.000000000 -0400 +++ linux-2/fs/lockd/svc.c 2006-09-11 16:51:58.000000000 -0400 @@ -71,7 +71,7 @@ static const int nlm_port_min = 0, nlm_ static struct ctl_table_header * nlm_sysctl_table; -static unsigned long set_grace_period(void) +unsigned long set_grace_period(void) { unsigned long grace_period; @@ -81,7 +81,6 @@ static unsigned long set_grace_period(vo / nlm_timeout) * nlm_timeout * HZ; else grace_period = nlm_timeout * 5 * HZ; - nlmsvc_grace_period = 1; return grace_period + jiffies; } @@ -129,6 +128,8 @@ lockd(struct svc_rqst *rqstp) nlmsvc_timeout = nlm_timeout * HZ; grace_period_expire = set_grace_period(); + nlmsvc_grace_period = 1; + (void) nlmsvc_fo_reset_servs(); /* * The main request loop. We don't terminate until the last @@ -143,6 +144,8 @@ lockd(struct svc_rqst *rqstp) if (nlmsvc_ops) { nlmsvc_invalidate_all(); grace_period_expire = set_grace_period(); + nlmsvc_grace_period = 1; + (void) nlmsvc_fo_reset_servs(); } } @@ -189,6 +192,7 @@ lockd(struct svc_rqst *rqstp) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); nlmsvc_pid = 0; + (void) nlmsvc_fo_reset_servs(); } else printk(KERN_DEBUG "lockd: new process, skipping host shutdown\n");