linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [NLM] support for a per-mount grace period.
@ 2011-07-28 18:44 Frank van Maarseveen
  2011-07-28 18:44 ` [PATCH 1/2] Minor NLM cleanup preparing for a per-mount based grace time Frank van Maarseveen
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Frank van Maarseveen @ 2011-07-28 18:44 UTC (permalink / raw)
  To: linux-nfs

The following two patches implement support for a per-mount NLM
grace period. The first patch is a minor cleanup which pushes
down locks_in_grace() calls into functions shared by NFS[234]. Two
locks_in_grace() tests have been reordered to avoid duplicate calls at
run-time (assuming gcc is smart enough). nlmsvc_grace_period is now a
function instead of an unused variable.

The second patch is the actual implementation. It is currently in use for
a number of NFSv3 virtual servers on one physical machine running 2.6.39.3
where the virtualization is based on using different IPv4 addresses.

The patches have been created using a recent 3.0 tree.
-- 
Frank

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] Minor NLM cleanup preparing for a per-mount based grace time.
  2011-07-28 18:44 [NLM] support for a per-mount grace period Frank van Maarseveen
@ 2011-07-28 18:44 ` Frank van Maarseveen
  2011-07-28 18:44 ` [PATCH 2/2] Support a per-mount NLM grace period Frank van Maarseveen
  2011-07-29 17:11 ` [NLM] support for a per-mount " J. Bruce Fields
  2 siblings, 0 replies; 9+ messages in thread
From: Frank van Maarseveen @ 2011-07-28 18:44 UTC (permalink / raw)
  To: linux-nfs


Signed-off-by: Frank van Maarseveen <frankvm@frankvm.com>
---
 fs/lockd/svc.c              |    8 ++++++--
 fs/lockd/svc4proc.c         |   24 ------------------------
 fs/lockd/svclock.c          |    4 +++-
 fs/lockd/svcproc.c          |   24 ------------------------
 fs/lockd/svcshare.c         |    8 ++++++++
 fs/nfsd/nfs4proc.c          |    2 +-
 fs/nfsd/nfs4state.c         |    2 +-
 include/linux/lockd/lockd.h |    2 +-
 8 files changed, 20 insertions(+), 54 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index abfff9d..0efbbfc 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -74,7 +74,10 @@ static const int		nlm_port_min = 0, nlm_port_max = 65535;
 static struct ctl_table_header * nlm_sysctl_table;
 #endif
 
-static unsigned long get_lockd_grace_period(void)
+/**
+ * nlmsvc_grace_period - return configured grace period in jiffies.
+ */
+unsigned long nlmsvc_grace_period(void)
 {
 	/* Note: nlm_timeout should always be nonzero */
 	if (nlm_grace_period)
@@ -82,6 +85,7 @@ static unsigned long get_lockd_grace_period(void)
 	else
 		return nlm_timeout * 5 * HZ;
 }
+EXPORT_SYMBOL_GPL(nlmsvc_grace_period);
 
 static struct lock_manager lockd_manager = {
 };
@@ -95,7 +99,7 @@ static DECLARE_DELAYED_WORK(grace_period_end, grace_ender);
 
 static void set_grace_period(void)
 {
-	unsigned long grace_period = get_lockd_grace_period();
+	unsigned long grace_period = nlmsvc_grace_period();
 
 	locks_start_grace(&lockd_manager);
 	cancel_delayed_work_sync(&grace_period_end);
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 9a41fdc..16e9b3b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -150,12 +150,6 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	resp->cookie = argp->cookie;
 
-	/* Don't accept requests during grace period */
-	if (locks_in_grace()) {
-		resp->status = nlm_lck_denied_grace_period;
-		return rpc_success;
-	}
-
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
@@ -183,12 +177,6 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	resp->cookie = argp->cookie;
 
-	/* Don't accept new lock requests during grace period */
-	if (locks_in_grace()) {
-		resp->status = nlm_lck_denied_grace_period;
-		return rpc_success;
-	}
-
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
@@ -320,12 +308,6 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	resp->cookie = argp->cookie;
 
-	/* Don't accept new lock requests during grace period */
-	if (locks_in_grace() && !argp->reclaim) {
-		resp->status = nlm_lck_denied_grace_period;
-		return rpc_success;
-	}
-
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
@@ -353,12 +335,6 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	resp->cookie = argp->cookie;
 
-	/* Don't accept requests during grace period */
-	if (locks_in_grace()) {
-		resp->status = nlm_lck_denied_grace_period;
-		return rpc_success;
-	}
-
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index f0179c3..ab62c57 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -587,7 +587,9 @@ nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
 				(long long)lock->fl.fl_end);
 
 	/* First, cancel any lock that might be there */
-	nlmsvc_cancel_blocked(file, lock);
+	error = nlmsvc_cancel_blocked(file, lock);
+	if (error == nlm_lck_denied_grace_period)
+		return error;
 
 	lock->fl.fl_type = F_UNLCK;
 	error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index d27aab1..4048362 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -180,12 +180,6 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	resp->cookie = argp->cookie;
 
-	/* Don't accept requests during grace period */
-	if (locks_in_grace()) {
-		resp->status = nlm_lck_denied_grace_period;
-		return rpc_success;
-	}
-
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
@@ -213,12 +207,6 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	resp->cookie = argp->cookie;
 
-	/* Don't accept new lock requests during grace period */
-	if (locks_in_grace()) {
-		resp->status = nlm_lck_denied_grace_period;
-		return rpc_success;
-	}
-
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
@@ -360,12 +348,6 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	resp->cookie = argp->cookie;
 
-	/* Don't accept new lock requests during grace period */
-	if (locks_in_grace() && !argp->reclaim) {
-		resp->status = nlm_lck_denied_grace_period;
-		return rpc_success;
-	}
-
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
@@ -393,12 +375,6 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	resp->cookie = argp->cookie;
 
-	/* Don't accept requests during grace period */
-	if (locks_in_grace()) {
-		resp->status = nlm_lck_denied_grace_period;
-		return rpc_success;
-	}
-
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index b0ae070..ccad267 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -31,6 +31,10 @@ nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file,
 	struct xdr_netobj	*oh = &argp->lock.oh;
 	u8			*ohdata;
 
+	/* Don't accept new share requests during grace period */
+	if (locks_in_grace() && !argp->reclaim)
+		return nlm_lck_denied_grace_period;
+
 	for (share = file->f_shares; share; share = share->s_next) {
 		if (share->s_host == host && nlm_cmp_owner(share, oh))
 			goto update;
@@ -71,6 +75,10 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
 	struct nlm_share	*share, **shpp;
 	struct xdr_netobj	*oh = &argp->lock.oh;
 
+	/* Don't accept unshare requests during grace period */
+	if (locks_in_grace())
+		return nlm_lck_denied_grace_period;
+
 	for (shpp = &file->f_shares; (share = *shpp) != NULL;
 					shpp = &share->s_next) {
 		if (share->s_host == host && nlm_cmp_owner(share, oh)) {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e807776..e248b9e 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -333,7 +333,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
 		goto out;
 	status = nfserr_no_grace;
-	if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && !locks_in_grace())
 		goto out;
 
 	switch (open->op_claim_type) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3787ec1..741e03a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4061,7 +4061,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (locks_in_grace() && !lock->lk_reclaim)
 		goto out;
 	status = nfserr_no_grace;
-	if (!locks_in_grace() && lock->lk_reclaim)
+	if (lock->lk_reclaim && !locks_in_grace())
 		goto out;
 
 	locks_init_lock(&file_lock);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ff9abff..b1845cb 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -193,7 +193,6 @@ extern struct svc_procedure	nlmsvc_procedures[];
 #ifdef CONFIG_LOCKD_V4
 extern struct svc_procedure	nlmsvc_procedures4[];
 #endif
-extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;
 extern int			nsm_use_hostnames;
 extern u32			nsm_local_state;
@@ -269,6 +268,7 @@ void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
 					nlm_host_match_fn_t match);
 void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
 void		  nlmsvc_release_call(struct nlm_rqst *);
+unsigned long	  nlmsvc_grace_period(void);
 
 /*
  * File handling for the server personality
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] Support a per-mount NLM grace period.
  2011-07-28 18:44 [NLM] support for a per-mount grace period Frank van Maarseveen
  2011-07-28 18:44 ` [PATCH 1/2] Minor NLM cleanup preparing for a per-mount based grace time Frank van Maarseveen
@ 2011-07-28 18:44 ` Frank van Maarseveen
  2011-07-29 17:11 ` [NLM] support for a per-mount " J. Bruce Fields
  2 siblings, 0 replies; 9+ messages in thread
From: Frank van Maarseveen @ 2011-07-28 18:44 UTC (permalink / raw)
  To: linux-nfs

This implements /proc/fs/nfsd/relock_filesystem, complementing
/proc/fs/nfsd/unlock_filesystem using an identical syntax. When
a mountpoint pathname is written to the first then an NLM
grace period will start for files referring to its super block.

Signed-off-by: Frank van Maarseveen <frankvm@frankvm.com>
---
 fs/lockd/grace.c    |   86 +++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/lockd/svc.c      |    1 +
 fs/lockd/svclock.c  |    8 ++--
 fs/lockd/svcshare.c |    4 +-
 fs/nfsd/nfs4proc.c  |   12 ++++---
 fs/nfsd/nfs4state.c |   14 ++++----
 fs/nfsd/nfsctl.c    |   26 +++++++++++++--
 include/linux/fs.h  |    4 ++-
 8 files changed, 130 insertions(+), 25 deletions(-)

diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c
index 183cc1f..6ddb806 100644
--- a/fs/lockd/grace.c
+++ b/fs/lockd/grace.c
@@ -4,8 +4,18 @@
 
 #include <linux/module.h>
 #include <linux/lockd/bind.h>
+#include <linux/lockd/lockd.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+
+struct sb_in_grace {
+	struct list_head	sbg_link;
+	struct vfsmount		*sbg_mnt;
+	u64			sbg_timeout;
+};
 
 static LIST_HEAD(grace_list);
+static LIST_HEAD(sb_grace_list);
 static DEFINE_SPINLOCK(grace_lock);
 
 /**
@@ -46,14 +56,84 @@ void locks_end_grace(struct lock_manager *lm)
 EXPORT_SYMBOL_GPL(locks_end_grace);
 
 /**
+ * locks_start_sb_grace
+ * @path: reference to superblock of FS to enter grace.
+ *
+ * This function is a filesystem specific version of locks_start_grace()
+ */
+int locks_start_sb_grace(struct path *path)
+{
+	struct sb_in_grace *sbg;
+
+	sbg = kzalloc(sizeof(*sbg), GFP_KERNEL);
+	if (!sbg)
+		return -ENOMEM;
+	mntget(path->mnt);
+	sbg->sbg_mnt = path->mnt;
+	sbg->sbg_timeout = get_jiffies_64() + nlmsvc_grace_period();
+	spin_lock(&grace_lock);
+	list_add_tail(&sbg->sbg_link, &sb_grace_list);
+	spin_unlock(&grace_lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(locks_start_sb_grace);
+
+/**
+ * locks_end_sb_grace
+ * @path: reference to superblock of FS to leave grace.
+ *
+ * This function is a filesystem specific version of locks_end_grace()
+ * When @path is NULL then all filesystem specific grace periods end.
+ */
+void locks_end_sb_grace(struct path *path)
+{
+	struct sb_in_grace *sbg, *next;
+	struct super_block *sb = path ? path->mnt->mnt_sb : NULL;
+
+	spin_lock(&grace_lock);
+	list_for_each_entry_safe(sbg, next, &sb_grace_list, sbg_link) {
+		if (!sb || sb == sbg->sbg_mnt->mnt_sb) {
+			list_del(&sbg->sbg_link);
+			mntput(sbg->sbg_mnt);
+			kfree(sbg);
+		}
+	}
+	spin_unlock(&grace_lock);
+}
+EXPORT_SYMBOL_GPL(locks_end_sb_grace);
+
+/**
  * locks_in_grace
+ * @sb: super block for checking filesystem specific grace time.
  *
  * Lock managers call this function to determine when it is OK for them
  * to answer ordinary lock requests, and when they should accept only
- * lock reclaims.
+ * lock reclaims. @sb can be NULL to test for a global grace period.
  */
-int locks_in_grace(void)
+int locks_in_grace(struct super_block *sb)
 {
-	return !list_empty(&grace_list);
+	struct sb_in_grace *sbg, *next;
+	int in_grace;
+	u64 now;
+
+	if (!list_empty(&grace_list))
+		return true;
+	in_grace = false;
+	now = get_jiffies_64();
+	spin_lock(&grace_lock);
+	list_for_each_entry_safe(sbg, next, &sb_grace_list, sbg_link) {
+		if (time_after64(now, sbg->sbg_timeout)) {
+			list_del(&sbg->sbg_link);
+			mntput(sbg->sbg_mnt);
+			kfree(sbg);
+			continue;
+		}
+		if (sb == sbg->sbg_mnt->mnt_sb) {
+			in_grace = true;
+			break;
+		}
+	}
+	spin_unlock(&grace_lock);
+	return in_grace;
 }
 EXPORT_SYMBOL_GPL(locks_in_grace);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 0efbbfc..9c53971 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -186,6 +186,7 @@ lockd(void *vrqstp)
 	flush_signals(current);
 	cancel_delayed_work_sync(&grace_period_end);
 	locks_end_grace(&lockd_manager);
+	locks_end_sb_grace(NULL);
 	if (nlmsvc_ops)
 		nlmsvc_invalidate_all();
 	nlm_shutdown_hosts();
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ab62c57..95c85d7 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -419,11 +419,11 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 		goto out;
 	}
 
-	if (locks_in_grace() && !reclaim) {
+	if (locks_in_grace(file->f_file->f_path.dentry->d_sb) && !reclaim) {
 		ret = nlm_lck_denied_grace_period;
 		goto out;
 	}
-	if (reclaim && !locks_in_grace()) {
+	if (reclaim && !locks_in_grace(file->f_file->f_path.dentry->d_sb)) {
 		ret = nlm_lck_denied_grace_period;
 		goto out;
 	}
@@ -531,7 +531,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 		goto out;
 	}
 
-	if (locks_in_grace()) {
+	if (locks_in_grace(file->f_file->f_path.dentry->d_sb)) {
 		ret = nlm_lck_denied_grace_period;
 		goto out;
 	}
@@ -617,7 +617,7 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
 
-	if (locks_in_grace())
+	if (locks_in_grace(file->f_file->f_path.dentry->d_sb))
 		return nlm_lck_denied_grace_period;
 
 	mutex_lock(&file->f_mutex);
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index ccad267..10c7415 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -32,7 +32,7 @@ nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file,
 	u8			*ohdata;
 
 	/* Don't accept new share requests during grace period */
-	if (locks_in_grace() && !argp->reclaim)
+	if (locks_in_grace(file->f_file->f_path.dentry->d_sb) && !argp->reclaim)
 		return nlm_lck_denied_grace_period;
 
 	for (share = file->f_shares; share; share = share->s_next) {
@@ -76,7 +76,7 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
 	struct xdr_netobj	*oh = &argp->lock.oh;
 
 	/* Don't accept unshare requests during grace period */
-	if (locks_in_grace())
+	if (locks_in_grace(file->f_file->f_path.dentry->d_sb))
 		return nlm_lck_denied_grace_period;
 
 	for (shpp = &file->f_shares; (share = *shpp) != NULL;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e248b9e..fa325df 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -330,10 +330,12 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	/* Openowner is now set, so sequence id will get bumped.  Now we need
 	 * these checks before we do any creates: */
 	status = nfserr_grace;
-	if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
+	if (locks_in_grace(cstate->current_fh.fh_dentry->d_sb)
+		&& open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
 		goto out;
 	status = nfserr_no_grace;
-	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && !locks_in_grace())
+	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
+		&& !locks_in_grace(cstate->current_fh.fh_dentry->d_sb))
 		goto out;
 
 	switch (open->op_claim_type) {
@@ -715,7 +717,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
 	__be32 status;
 
-	if (locks_in_grace())
+	if (locks_in_grace(cstate->current_fh.fh_dentry->d_sb))
 		return nfserr_grace;
 	status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
 			     remove->rm_name, remove->rm_namelen);
@@ -736,8 +738,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	if (!cstate->save_fh.fh_dentry)
 		return status;
-	if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags
-					& NFSEXP_NOSUBTREECHECK))
+	if (locks_in_grace(cstate->save_fh.fh_dentry->d_sb)
+		&& !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
 		return nfserr_grace;
 	status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
 			     rename->rn_snamelen, &cstate->current_fh,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 741e03a..5f3f3cc 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2779,7 +2779,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 		case NFS4_OPEN_CLAIM_NULL:
 			/* Let's not give out any delegations till everyone's
 			 * had the chance to reclaim theirs.... */
-			if (locks_in_grace())
+			if (locks_in_grace(fh->fh_dentry->d_sb))
 				goto out;
 			if (!cb_up || !sop->so_confirmed)
 				goto out;
@@ -2972,7 +2972,7 @@ nfs4_laundromat(void)
 	nfs4_lock_state();
 
 	dprintk("NFSD: laundromat service - starting\n");
-	if (locks_in_grace())
+	if (locks_in_grace(NULL))
 		nfsd4_end_grace();
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&client_lock);
@@ -3117,7 +3117,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
 {
 	if (ONE_STATEID(stateid) && (flags & RD_STATE))
 		return nfs_ok;
-	else if (locks_in_grace()) {
+	else if (locks_in_grace(current_fh->fh_dentry->d_sb)) {
 		/* Answer in remaining cases depends on existence of
 		 * conflicting state; so we must wait out the grace period. */
 		return nfserr_grace;
@@ -3136,7 +3136,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
 static inline int
 grace_disallows_io(struct inode *inode)
 {
-	return locks_in_grace() && mandatory_lock(inode);
+	return locks_in_grace(inode->i_sb) && mandatory_lock(inode);
 }
 
 static int check_stateid_generation(stateid_t *in, stateid_t *ref, int flags)
@@ -4058,10 +4058,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	/* lock->lk_replay_owner and lock_stp have been created or found */
 
 	status = nfserr_grace;
-	if (locks_in_grace() && !lock->lk_reclaim)
+	if (locks_in_grace(cstate->current_fh.fh_dentry->d_sb) && !lock->lk_reclaim)
 		goto out;
 	status = nfserr_no_grace;
-	if (lock->lk_reclaim && !locks_in_grace())
+	if (lock->lk_reclaim && !locks_in_grace(cstate->current_fh.fh_dentry->d_sb))
 		goto out;
 
 	locks_init_lock(&file_lock);
@@ -4166,7 +4166,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	int error;
 	__be32 status;
 
-	if (locks_in_grace())
+	if (locks_in_grace(cstate->current_fh.fh_dentry->d_sb))
 		return nfserr_grace;
 
 	if (check_lock_length(lockt->lt_offset, lockt->lt_length))
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c771614..af992ef 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -29,6 +29,7 @@ enum {
 	NFSD_Fh,
 	NFSD_FO_UnlockIP,
 	NFSD_FO_UnlockFS,
+	NFSD_FO_RelockFS,
 	NFSD_Threads,
 	NFSD_Pool_Threads,
 	NFSD_Pool_Stats,
@@ -53,6 +54,7 @@ enum {
 static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
 static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size);
 static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size);
+static ssize_t write_relock_fs(struct file *file, char *buf, size_t size);
 static ssize_t write_threads(struct file *file, char *buf, size_t size);
 static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
 static ssize_t write_versions(struct file *file, char *buf, size_t size);
@@ -68,6 +70,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 	[NFSD_Fh] = write_filehandle,
 	[NFSD_FO_UnlockIP] = write_unlock_ip,
 	[NFSD_FO_UnlockFS] = write_unlock_fs,
+	[NFSD_FO_RelockFS] = write_relock_fs,
 	[NFSD_Threads] = write_threads,
 	[NFSD_Pool_Threads] = write_pool_threads,
 	[NFSD_Versions] = write_versions,
@@ -229,7 +232,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
 }
 
 /**
- * write_unlock_fs - Release all locks on a local file system
+ * start_stop_fs_locking - Drop all locks or enter grace period for a FS
  *
  * Experimental.
  *
@@ -242,7 +245,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
  *			returns one if one or more locks were not released
  *	On error:	return code is negative errno value
  */
-static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
+static ssize_t start_stop_fs_locking(struct file *file, char *buf, size_t size, int start)
 {
 	struct path path;
 	char *fo_path;
@@ -272,12 +275,27 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
 	 * 2.  Is that directory a mount point, or
 	 * 3.  Is that directory the root of an exported file system?
 	 */
-	error = nlmsvc_unlock_all_by_sb(path.mnt->mnt_sb);
+	if (start)
+		error = locks_start_sb_grace(&path);
+	else {
+		locks_end_sb_grace(&path);	// drop sb reference
+		error = nlmsvc_unlock_all_by_sb(path.mnt->mnt_sb);
+	}
 
 	path_put(&path);
 	return error;
 }
 
+static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
+{
+	return start_stop_fs_locking(file, buf, size, false);
+}
+
+static ssize_t write_relock_fs(struct file *file, char *buf, size_t size)
+{
+	return start_stop_fs_locking(file, buf, size, true);
+}
+
 /**
  * write_filehandle - Get a variable-length NFS file handle by path
  *
@@ -1070,6 +1088,8 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 					&transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_FO_UnlockFS] = {"unlock_filesystem",
 					&transaction_ops, S_IWUSR|S_IRUSR},
+		[NFSD_FO_RelockFS] = {"relock_filesystem",
+					&transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f23bcb7..752f6b8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1086,7 +1086,9 @@ struct lock_manager {
 
 void locks_start_grace(struct lock_manager *);
 void locks_end_grace(struct lock_manager *);
-int locks_in_grace(void);
+int locks_start_sb_grace(struct path *);
+void locks_end_sb_grace(struct path *);
+int locks_in_grace(struct super_block *);
 
 /* that will die - we need it for nfs_lock_info */
 #include <linux/nfs_fs_i.h>
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [NLM] support for a per-mount grace period.
  2011-07-28 18:44 [NLM] support for a per-mount grace period Frank van Maarseveen
  2011-07-28 18:44 ` [PATCH 1/2] Minor NLM cleanup preparing for a per-mount based grace time Frank van Maarseveen
  2011-07-28 18:44 ` [PATCH 2/2] Support a per-mount NLM grace period Frank van Maarseveen
@ 2011-07-29 17:11 ` J. Bruce Fields
  2011-07-29 17:40   ` Chuck Lever
                     ` (2 more replies)
  2 siblings, 3 replies; 9+ messages in thread
From: J. Bruce Fields @ 2011-07-29 17:11 UTC (permalink / raw)
  To: Frank van Maarseveen; +Cc: linux-nfs, Pavel Emelyanov, jlayton

On Thu, Jul 28, 2011 at 08:44:18PM +0200, Frank van Maarseveen wrote:
> The following two patches implement support for a per-mount NLM
> grace period. The first patch is a minor cleanup which pushes
> down locks_in_grace() calls into functions shared by NFS[234]. Two
> locks_in_grace() tests have been reordered to avoid duplicate calls at
> run-time (assuming gcc is smart enough). nlmsvc_grace_period is now a
> function instead of an unused variable.
> 
> The second patch is the actual implementation. It is currently in use for
> a number of NFSv3 virtual servers on one physical machine running 2.6.39.3
> where the virtualization is based on using different IPv4 addresses.

Thanks, that is something we'd like to have working well.

Off the top of my head:
	- Do you have a plan for dealing with NFSv4?
	- Do you need any more kernel changes to get this working?
	- What about userspace changes?
	- Do you support migrating/failing over virtual nfs service
	  between machines, and if so, how are you doing it?

I've also been trying to work out whether it would be cleaner to do this
sort of thing by building on top of Pavel Emelyanov's container
work--cc'd.

(Obnoxious of me, because I know he's working on something else right
now, but perhaps he won't mind setting this email aside for now and
responding when he gets some time back....)

--b.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [NLM] support for a per-mount grace period.
  2011-07-29 17:11 ` [NLM] support for a per-mount " J. Bruce Fields
@ 2011-07-29 17:40   ` Chuck Lever
  2011-07-30  9:44   ` Frank van Maarseveen
  2011-08-23 14:19   ` Frank van Maarseveen
  2 siblings, 0 replies; 9+ messages in thread
From: Chuck Lever @ 2011-07-29 17:40 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Frank van Maarseveen, linux-nfs, Pavel Emelyanov, jlayton


On Jul 29, 2011, at 1:11 PM, J. Bruce Fields wrote:

> On Thu, Jul 28, 2011 at 08:44:18PM +0200, Frank van Maarseveen wrote:
>> The following two patches implement support for a per-mount NLM
>> grace period. The first patch is a minor cleanup which pushes
>> down locks_in_grace() calls into functions shared by NFS[234]. Two
>> locks_in_grace() tests have been reordered to avoid duplicate calls at
>> run-time (assuming gcc is smart enough). nlmsvc_grace_period is now a
>> function instead of an unused variable.
>> 
>> The second patch is the actual implementation. It is currently in use for
>> a number of NFSv3 virtual servers on one physical machine running 2.6.39.3
>> where the virtualization is based on using different IPv4 addresses.
> 
> Thanks, that is something we'd like to have working well.
> 
> Off the top of my head:
> 	- Do you have a plan for dealing with NFSv4?
> 	- Do you need any more kernel changes to get this working?
> 	- What about userspace changes?
> 	- Do you support migrating/failing over virtual nfs service
> 	  between machines, and if so, how are you doing it?

Nit: At one point we had considered doing something like killing sm-notify when the grace period ends on the server.  This would make that harder to do, I would think.

> I've also been trying to work out whether it would be cleaner to do this
> sort of thing by building on top of Pavel Emelyanov's container
> work--cc'd.
> 
> (Obnoxious of me, because I know he's working on something else right
> now, but perhaps he won't mind setting this email aside for now and
> responding when he gets some time back....)
> 
> --b.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Chuck Lever
chuck[dot]lever[at]oracle[dot]com





^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [NLM] support for a per-mount grace period.
  2011-07-29 17:11 ` [NLM] support for a per-mount " J. Bruce Fields
  2011-07-29 17:40   ` Chuck Lever
@ 2011-07-30  9:44   ` Frank van Maarseveen
  2011-08-23 14:19   ` Frank van Maarseveen
  2 siblings, 0 replies; 9+ messages in thread
From: Frank van Maarseveen @ 2011-07-30  9:44 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: linux-nfs, Pavel Emelyanov, jlayton

On Fri, Jul 29, 2011 at 01:11:27PM -0400, J. Bruce Fields wrote:
> On Thu, Jul 28, 2011 at 08:44:18PM +0200, Frank van Maarseveen wrote:
> > The following two patches implement support for a per-mount NLM
> > grace period. The first patch is a minor cleanup which pushes
> > down locks_in_grace() calls into functions shared by NFS[234]. Two
> > locks_in_grace() tests have been reordered to avoid duplicate calls at
> > run-time (assuming gcc is smart enough). nlmsvc_grace_period is now a
> > function instead of an unused variable.
> > 
> > The second patch is the actual implementation. It is currently in use for
> > a number of NFSv3 virtual servers on one physical machine running 2.6.39.3
> > where the virtualization is based on using different IPv4 addresses.
> 
> Thanks, that is something we'd like to have working well.
> 
> Off the top of my head:
> 	- Do you have a plan for dealing with NFSv4?

Not yet but I'm not aware of any additional issue there (I haven't used v4 yet).

> 	- Do you need any more kernel changes to get this working?

No.

> 	- What about userspace changes?

None except for scripting. Years ago I had to grab a random sm-notify
to make it work. At that time (2.6.27) there was a different patch
for fsid based grace times from Wendy Cheng.

> 	- Do you support migrating/failing over virtual nfs service
> 	  between machines, and if so, how are you doing it?

Migration basically works as follows:

-	Create a network block device on the source machine to access a
	new physical block device on the destination.

-	Shutdown the virtual server, create a RAID-1 device on top of
	the original block device and start the server for the resulting
	device. The mdadm command (v2.5.6, 2006) is something like:

	mdadm -B -ayes -n2 -l1 $md $localdev -b $bitmap --write-behind missing

-	Add the network block device to synchronize the destination:

	mdadm $md --add --write-mostly $nbd

-	When RAID-1 has synchronized then shutdown the virtual server
	on the source machine and start it on the destination, i.e. migrate
	its IP address.

A virtual server IP address removal is always accompanied by a

	iptables -I OUTPUT -s $ADDR -j DROP

because traffic can still be in-flight causing troubles (have seen
ESTALE). Every virtual server has its own statd directory (and a
private "state" file), basically maintained from /var/lib/nfs/*. Upon
startup after a crash the latter must be saved before the standard
rpc.statd/sm-notify get a chance to empty it.

-- 
Frank

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [NLM] support for a per-mount grace period.
  2011-07-29 17:11 ` [NLM] support for a per-mount " J. Bruce Fields
  2011-07-29 17:40   ` Chuck Lever
  2011-07-30  9:44   ` Frank van Maarseveen
@ 2011-08-23 14:19   ` Frank van Maarseveen
  2011-08-23 14:32     ` J. Bruce Fields
  2 siblings, 1 reply; 9+ messages in thread
From: Frank van Maarseveen @ 2011-08-23 14:19 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: linux-nfs, Pavel Emelyanov, jlayton

On Fri, Jul 29, 2011 at 01:11:27PM -0400, J. Bruce Fields wrote:
> On Thu, Jul 28, 2011 at 08:44:18PM +0200, Frank van Maarseveen wrote:
> > The following two patches implement support for a per-mount NLM
> > grace period. The first patch is a minor cleanup which pushes
> > down locks_in_grace() calls into functions shared by NFS[234]. Two
> > locks_in_grace() tests have been reordered to avoid duplicate calls at
> > run-time (assuming gcc is smart enough). nlmsvc_grace_period is now a
> > function instead of an unused variable.
> > 
> > The second patch is the actual implementation. It is currently in use for
> > a number of NFSv3 virtual servers on one physical machine running 2.6.39.3
> > where the virtualization is based on using different IPv4 addresses.
> 
> Thanks, that is something we'd like to have working well.

Are the patches queued anywhere for inclusion in mainline?

-- 
Frank

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [NLM] support for a per-mount grace period.
  2011-08-23 14:19   ` Frank van Maarseveen
@ 2011-08-23 14:32     ` J. Bruce Fields
  2011-08-23 15:49       ` Frank van Maarseveen
  0 siblings, 1 reply; 9+ messages in thread
From: J. Bruce Fields @ 2011-08-23 14:32 UTC (permalink / raw)
  To: Frank van Maarseveen; +Cc: linux-nfs, Pavel Emelyanov, jlayton

On Tue, Aug 23, 2011 at 04:19:09PM +0200, Frank van Maarseveen wrote:
> On Fri, Jul 29, 2011 at 01:11:27PM -0400, J. Bruce Fields wrote:
> > On Thu, Jul 28, 2011 at 08:44:18PM +0200, Frank van Maarseveen wrote:
> > > The following two patches implement support for a per-mount NLM
> > > grace period. The first patch is a minor cleanup which pushes
> > > down locks_in_grace() calls into functions shared by NFS[234]. Two
> > > locks_in_grace() tests have been reordered to avoid duplicate calls at
> > > run-time (assuming gcc is smart enough). nlmsvc_grace_period is now a
> > > function instead of an unused variable.
> > > 
> > > The second patch is the actual implementation. It is currently in use for
> > > a number of NFSv3 virtual servers on one physical machine running 2.6.39.3
> > > where the virtualization is based on using different IPv4 addresses.
> > 
> > Thanks, that is something we'd like to have working well.
> 
> Are the patches queued anywhere for inclusion in mainline?

To merge it upstream, at a minimum we need NFSv4 working as well.  It
causes problems when version n+1 lacks features that version n has,
especially in the presence of clients automatically negotiate up.

I'm also inclined to think that making the various data structures and
interfaces network-namespace-dependent is going to result in the cleaner
and more useful solution.

I probably should take that first cleanup patch at least, though.

--b.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [NLM] support for a per-mount grace period.
  2011-08-23 14:32     ` J. Bruce Fields
@ 2011-08-23 15:49       ` Frank van Maarseveen
  0 siblings, 0 replies; 9+ messages in thread
From: Frank van Maarseveen @ 2011-08-23 15:49 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: linux-nfs, Pavel Emelyanov, jlayton

On Tue, Aug 23, 2011 at 10:32:57AM -0400, J. Bruce Fields wrote:
> On Tue, Aug 23, 2011 at 04:19:09PM +0200, Frank van Maarseveen wrote:
> > On Fri, Jul 29, 2011 at 01:11:27PM -0400, J. Bruce Fields wrote:
> > > On Thu, Jul 28, 2011 at 08:44:18PM +0200, Frank van Maarseveen wrote:
> > > > The following two patches implement support for a per-mount NLM
> > > > grace period. The first patch is a minor cleanup which pushes
> > > > down locks_in_grace() calls into functions shared by NFS[234]. Two
> > > > locks_in_grace() tests have been reordered to avoid duplicate calls at
> > > > run-time (assuming gcc is smart enough). nlmsvc_grace_period is now a
> > > > function instead of an unused variable.
> > > > 
> > > > The second patch is the actual implementation. It is currently in use for
> > > > a number of NFSv3 virtual servers on one physical machine running 2.6.39.3
> > > > where the virtualization is based on using different IPv4 addresses.
> > > 
> > > Thanks, that is something we'd like to have working well.
> > 
> > Are the patches queued anywhere for inclusion in mainline?
> 
> To merge it upstream, at a minimum we need NFSv4 working as well.  It
> causes problems when version n+1 lacks features that version n has,
> especially in the presence of clients automatically negotiate up.

This is not possible since there are no (standard) userland tools which
start the new per-mount grace time. Anyway, The patch implements it for
all NFS versions including NFSv4.

> 
> I'm also inclined to think that making the various data structures and
> interfaces network-namespace-dependent is going to result in the cleaner
> and more useful solution.

I don't understand this. NLM has been refactored in the past for NFSv4,
adding a shared fs/lockd/grace.c for all NFS versions. I think this
is good.

> 
> I probably should take that first cleanup patch at least, though.

Which git repo do you use for this?


-- 
Frank

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2011-08-23 15:49 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-07-28 18:44 [NLM] support for a per-mount grace period Frank van Maarseveen
2011-07-28 18:44 ` [PATCH 1/2] Minor NLM cleanup preparing for a per-mount based grace time Frank van Maarseveen
2011-07-28 18:44 ` [PATCH 2/2] Support a per-mount NLM grace period Frank van Maarseveen
2011-07-29 17:11 ` [NLM] support for a per-mount " J. Bruce Fields
2011-07-29 17:40   ` Chuck Lever
2011-07-30  9:44   ` Frank van Maarseveen
2011-08-23 14:19   ` Frank van Maarseveen
2011-08-23 14:32     ` J. Bruce Fields
2011-08-23 15:49       ` Frank van Maarseveen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).