Linux NFS development
 help / color / mirror / Atom feed
* [PATCH 05/11] nfsd: prepare for supporting admin-revocation of state
  2023-11-24  0:23 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
@ 2023-11-24  0:23 ` NeilBrown
  0 siblings, 0 replies; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:23 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

The NFSv4 protocol allows state to be revoked by the admin and has error
codes which allow this to be communicated to the client.

This patch
 - introduces a new state-id status NFS4_STID_ADMIN_REVOKE
   which can be set on open, lock, or delegation state.
 - reports NFS4ERR_ADMIN_REVOKED when these are accessed
 - introduces a per-client counter of these states and returns
   SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
   Decrements this when freeing any admin-revoked state.
 - introduces stub code to find all interesting states for a given
   superblock so they can be revoked via the 'unlock_filesystem'
   file in /proc/fs/nfsd/
   No actual states are handled yet.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 71 ++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfsd/nfsctl.c    |  1 +
 fs/nfsd/nfsd.h      |  1 +
 fs/nfsd/state.h     | 10 +++++++
 fs/nfsd/trace.h     |  3 +-
 5 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b9239f2ebc79..477a9e9aebbd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1215,6 +1215,8 @@ nfs4_put_stid(struct nfs4_stid *s)
 		return;
 	}
 	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
+		atomic_dec(&s->sc_client->cl_admin_revoked);
 	nfs4_free_cpntf_statelist(clp->net, s);
 	spin_unlock(&clp->cl_lock);
 	s->sc_free(s);
@@ -1534,6 +1536,8 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
 	}
 
 	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
+		atomic_dec(&s->sc_client->cl_admin_revoked);
 	list_add(&stp->st_locks, reaplist);
 }
 
@@ -1679,6 +1683,54 @@ static void release_openowner(struct nfs4_openowner *oo)
 	nfs4_put_stateowner(&oo->oo_owner);
 }
 
+static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
+					  struct super_block *sb,
+					  unsigned int sc_types)
+{
+	unsigned long id, tmp;
+	struct nfs4_stid *stid;
+
+	spin_lock(&clp->cl_lock);
+	idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
+		if ((stid->sc_type & sc_types) &&
+		    stid->sc_status == 0 &&
+		    stid->sc_file->fi_inode->i_sb == sb) {
+			refcount_inc(&stid->sc_count);
+			break;
+		}
+	spin_unlock(&clp->cl_lock);
+	return stid;
+}
+
+void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	unsigned int idhashval;
+	unsigned int sc_types;
+
+	sc_types = 0;
+
+	spin_lock(&nn->client_lock);
+	for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
+		struct list_head *head = &nn->conf_id_hashtbl[idhashval];
+		struct nfs4_client *clp;
+	retry:
+		list_for_each_entry(clp, head, cl_idhash) {
+			struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
+								  sc_types);
+			if (stid) {
+				spin_unlock(&nn->client_lock);
+				switch (stid->sc_type) {
+				}
+				nfs4_put_stid(stid);
+				spin_lock(&nn->client_lock);
+				goto retry;
+			}
+		}
+	}
+	spin_unlock(&nn->client_lock);
+}
+
 static inline int
 hash_sessionid(struct nfs4_sessionid *sessionid)
 {
@@ -2550,6 +2602,8 @@ static int client_info_show(struct seq_file *m, void *v)
 	}
 	seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
 	seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
+	seq_printf(m, "admin-revoked states: %d\n",
+		   atomic_read(&clp->cl_admin_revoked));
 	drop_client(clp);
 
 	return 0;
@@ -4109,6 +4163,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 	if (!list_empty(&clp->cl_revoked))
 		seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
+	if (atomic_read(&clp->cl_admin_revoked))
+		seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
 out_no_session:
 	if (conn)
 		free_conn(conn);
@@ -4597,7 +4653,9 @@ nfsd4_verify_open_stid(struct nfs4_stid *s)
 {
 	__be32 ret = nfs_ok;
 
-	if (s->sc_status & NFS4_STID_REVOKED)
+	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
+		ret = nfserr_admin_revoked;
+	else if (s->sc_status & NFS4_STID_REVOKED)
 		ret = nfserr_deleg_revoked;
 	else if (s->sc_status & NFS4_STID_CLOSED)
 		ret = nfserr_bad_stateid;
@@ -5188,6 +5246,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
 	deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
 	if (deleg == NULL)
 		goto out;
+	if (deleg->dl_stid.sc_status & NFS4_STID_ADMIN_REVOKED) {
+		nfs4_put_stid(&deleg->dl_stid);
+		status = nfserr_admin_revoked;
+		goto out;
+	}
 	if (deleg->dl_stid.sc_status & NFS4_STID_REVOKED) {
 		nfs4_put_stid(&deleg->dl_stid);
 		status = nfserr_deleg_revoked;
@@ -6508,6 +6571,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		 */
 		statusmask |= NFS4_STID_REVOKED;
 
+	statusmask |= NFS4_STID_ADMIN_REVOKED;
+
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
 		CLOSE_STATEID(stateid))
 		return nfserr_bad_stateid;
@@ -6526,6 +6591,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		nfs4_put_stid(stid);
 		return nfserr_deleg_revoked;
 	}
+	if (stid->sc_type & NFS4_STID_ADMIN_REVOKED) {
+		nfs4_put_stid(stid);
+		return nfserr_admin_revoked;
+	}
 	*s = stid;
 	return nfs_ok;
 }
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d6eeee149370..a622d773f428 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -285,6 +285,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
 	 * 3.  Is that directory the root of an exported file system?
 	 */
 	error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
+	nfsd4_revoke_states(netns(file), path.dentry->d_sb);
 
 	path_put(&path);
 	return error;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index f5ff42f41ee7..d46203eac3c8 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -280,6 +280,7 @@ void		nfsd_lockd_shutdown(void);
 #define	nfserr_no_grace		cpu_to_be32(NFSERR_NO_GRACE)
 #define	nfserr_reclaim_bad	cpu_to_be32(NFSERR_RECLAIM_BAD)
 #define	nfserr_badname		cpu_to_be32(NFSERR_BADNAME)
+#define	nfserr_admin_revoked	cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
 #define	nfserr_cb_path_down	cpu_to_be32(NFSERR_CB_PATH_DOWN)
 #define	nfserr_locked		cpu_to_be32(NFSERR_LOCKED)
 #define	nfserr_wrongsec		cpu_to_be32(NFSERR_WRONGSEC)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index bb00dcd4c1ba..584378c43e0a 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -112,6 +112,7 @@ struct nfs4_stid {
 #define NFS4_STID_CLOSED	BIT(0)
 /* For a deleg stateid kept around only to process free_stateid's: */
 #define NFS4_STID_REVOKED	BIT(1)
+#define NFS4_STID_ADMIN_REVOKED	BIT(2)
 	unsigned short		sc_status;
 
 	struct list_head	sc_cp_list;
@@ -388,6 +389,7 @@ struct nfs4_client {
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
 	u32			cl_minorversion;
+	atomic_t		cl_admin_revoked; /* count of admin-revoked states */
 	/* NFSv4.1 client implementation id: */
 	struct xdr_netobj	cl_nii_domain;
 	struct xdr_netobj	cl_nii_name;
@@ -752,6 +754,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
 }
 struct nfsd_file *find_any_file(struct nfs4_file *f);
 
+#ifdef CONFIG_NFSD_V4
+void nfsd4_revoke_states(struct net *net, struct super_block *sb);
+#else
+static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+}
+#endif
+
 /* grace period management */
 void nfsd4_end_grace(struct nfsd_net *nn);
 
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 568b4ec9a2af..281aeb42c9eb 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -651,7 +651,8 @@ DEFINE_STATESEQID_EVENT(open_confirm);
 #define show_stid_status(x)						\
 	__print_flags(x, "|",						\
 		{ NFS4_STID_CLOSED,		"CLOSED" },		\
-		{ NFS4_STID_REVOKED,		"REVOKED" })		\
+		{ NFS4_STID_REVOKED,		"REVOKED" },		\
+		{ NFS4_STID_ADMIN_REVOKED,	"ADMIN_REVOKED" })
 
 DECLARE_EVENT_CLASS(nfsd_stid_class,
 	TP_PROTO(
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state
@ 2023-11-24  0:28 NeilBrown
  2023-11-24  0:28 ` [PATCH 01/11] nfsd: hold ->cl_lock for hash_delegation_locked() NeilBrown
                   ` (10 more replies)
  0 siblings, 11 replies; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

This revision addes revocation of layout state, is rebased on latest
nfsd-next, and addresses a couple of other review comments.

Thanks,
NeilBrown

 [PATCH 01/11] nfsd: hold ->cl_lock for hash_delegation_locked()
 [PATCH 02/11] nfsd: don't call functions with side-effecting inside
 [PATCH 03/11] nfsd: avoid race after unhash_delegation_locked()
 [PATCH 04/11] nfsd: split sc_status out of sc_type
 [PATCH 05/11] nfsd: prepare for supporting admin-revocation of state
 [PATCH 06/11] nfsd: allow admin-revoked state to appear in
 [PATCH 07/11] nfsd: allow admin-revoked NFSv4.0 state to be freed.
 [PATCH 08/11] nfsd: allow lock state ids to be revoked and then freed
 [PATCH 09/11] nfsd: allow open state ids to be revoked and then freed
 [PATCH 10/11] nfsd: allow delegation state ids to be revoked and then
 [PATCH 11/11] nfsd: allow layout state to be admin-revoked.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 01/11] nfsd: hold ->cl_lock for hash_delegation_locked()
  2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
@ 2023-11-24  0:28 ` NeilBrown
  2023-11-24  0:28 ` [PATCH 02/11] nfsd: don't call functions with side-effecting inside WARN_ON() NeilBrown
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

The protocol for creating a new state in nfsd is to allocated the state
leaving it largely uninitialised, add that state to the ->cl_stateids
idr so as to reserve a state id, then complete initialisation of the
state and only set ->sc_type to non-zero once the state is fully
initialised.

If a state is found in the idr with ->sc_type == 0, it is ignored.
The ->cl_lock lock is used to avoid races - it is held while checking
sc_type during lookup, and held when a non-zero value is stored in
->sc_type.

... except... hash_delegation_locked() finalises the initialisation of a
delegation state, but does NOT hold ->cl_lock.

So this patch takes ->cl_lock at the appropriate time w.r.t other locks,
and so ensures there are no races (which are extremely unlikely in any
case).
As ->fi_lock is often taken when ->cl_lock is held, we need to take
->cl_lock first of those two.
Currently ->cl_lock and state_lock are never both taken at the same time.
We need both for this patch so an arbitrary choice is needed concerning
which to take first.  As state_lock is more global, it might be more
contended, so take it first.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 40415929e2ae..042c7a50f425 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1317,6 +1317,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
 
 	lockdep_assert_held(&state_lock);
 	lockdep_assert_held(&fp->fi_lock);
+	lockdep_assert_held(&clp->cl_lock);
 
 	if (nfs4_delegation_exists(clp, fp))
 		return -EAGAIN;
@@ -5608,12 +5609,14 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 		goto out_unlock;
 
 	spin_lock(&state_lock);
+	spin_lock(&clp->cl_lock);
 	spin_lock(&fp->fi_lock);
 	if (fp->fi_had_conflict)
 		status = -EAGAIN;
 	else
 		status = hash_delegation_locked(dp, fp);
 	spin_unlock(&fp->fi_lock);
+	spin_unlock(&clp->cl_lock);
 	spin_unlock(&state_lock);
 
 	if (status)
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 02/11] nfsd: don't call functions with side-effecting inside WARN_ON()
  2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
  2023-11-24  0:28 ` [PATCH 01/11] nfsd: hold ->cl_lock for hash_delegation_locked() NeilBrown
@ 2023-11-24  0:28 ` NeilBrown
  2023-11-24  0:28 ` [PATCH 03/11] nfsd: avoid race after unhash_delegation_locked() NeilBrown
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Code like:

    WARN_ON(foo())

looks like an assertion and might not be expected to have any side
effects.
When testing if a function with side-effects fails a construct like

    if (foo())
       WARN_ON(1);

makes the intent more obvious.

nfsd has several WARN_ON calls where the test has side effects, so it
would be good to change them.  These cases don't really need the
WARN_ON.  They have never failed in 8 years of usage so let's just
remove the WARN_ON wrapper.

Suggested-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 042c7a50f425..dd01d0b9e21e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1605,7 +1605,7 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
 	while (!list_empty(&open_stp->st_locks)) {
 		stp = list_entry(open_stp->st_locks.next,
 				struct nfs4_ol_stateid, st_locks);
-		WARN_ON(!unhash_lock_stateid(stp));
+		unhash_lock_stateid(stp);
 		put_ol_stateid_locked(stp, reaplist);
 	}
 }
@@ -2234,7 +2234,7 @@ __destroy_client(struct nfs4_client *clp)
 	spin_lock(&state_lock);
 	while (!list_empty(&clp->cl_delegations)) {
 		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
-		WARN_ON(!unhash_delegation_locked(dp));
+		unhash_delegation_locked(dp);
 		list_add(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&state_lock);
@@ -6234,7 +6234,7 @@ nfs4_laundromat(struct nfsd_net *nn)
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
 		if (!state_expired(&lt, dp->dl_time))
 			break;
-		WARN_ON(!unhash_delegation_locked(dp));
+		unhash_delegation_locked(dp);
 		list_add(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&state_lock);
@@ -8060,7 +8060,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 		stp = list_first_entry(&lo->lo_owner.so_stateids,
 				       struct nfs4_ol_stateid,
 				       st_perstateowner);
-		WARN_ON(!unhash_lock_stateid(stp));
+		unhash_lock_stateid(stp);
 		put_ol_stateid_locked(stp, &reaplist);
 	}
 	spin_unlock(&clp->cl_lock);
@@ -8353,7 +8353,7 @@ nfs4_state_shutdown_net(struct net *net)
 	spin_lock(&state_lock);
 	list_for_each_safe(pos, next, &nn->del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-		WARN_ON(!unhash_delegation_locked(dp));
+		unhash_delegation_locked(dp);
 		list_add(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&state_lock);
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 03/11] nfsd: avoid race after unhash_delegation_locked()
  2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
  2023-11-24  0:28 ` [PATCH 01/11] nfsd: hold ->cl_lock for hash_delegation_locked() NeilBrown
  2023-11-24  0:28 ` [PATCH 02/11] nfsd: don't call functions with side-effecting inside WARN_ON() NeilBrown
@ 2023-11-24  0:28 ` NeilBrown
  2023-11-24  0:28 ` [PATCH 04/11] nfsd: split sc_status out of sc_type NeilBrown
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

NFS4_CLOSED_DELEG_STID and NFS4_REVOKED_DELEG_STID are similar in
purpose.
REVOKED is used for NFSv4.1 states which have been revoked because the
lease has expired.  CLOSED is used in other cases.
The difference has two practical effects.
1/ REVOKED states are on the ->cl_revoked list
2/ REVOKED states result in nfserr_deleg_revoked from
   nfsd4_verify_open_stid() asnd nfsd4_validate_stateid while
   CLOSED states result in nfserr_bad_stid.

Currently a state that is being revoked is first set to "CLOSED" in
unhash_delegation_locked(), then possibly to "REVOKED" in
revoke_delegation(), at which point it is added to the cl_revoked list.

It is possible that a stateid test could see the CLOSED state
which really should be REVOKED, and so return the wrong error code.  So
it is safest to remove this window of inconsistency.

With this patch, unhash_delegation_locked() always set the state
correctly, and revoke_delegation() no longer changes the state.

Also remove a redundant test on minorversion when
NFS4_REVOKED_DELEG_STID is seen - it can only be seen when minorversion
is non-zero.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index dd01d0b9e21e..276afcba07a0 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1334,7 +1334,7 @@ static bool delegation_hashed(struct nfs4_delegation *dp)
 }
 
 static bool
-unhash_delegation_locked(struct nfs4_delegation *dp)
+unhash_delegation_locked(struct nfs4_delegation *dp, unsigned char type)
 {
 	struct nfs4_file *fp = dp->dl_stid.sc_file;
 
@@ -1343,7 +1343,9 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
 	if (!delegation_hashed(dp))
 		return false;
 
-	dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
+	if (dp->dl_stid.sc_client->cl_minorversion == 0)
+		type = NFS4_CLOSED_DELEG_STID;
+	dp->dl_stid.sc_type = type;
 	/* Ensure that deleg break won't try to requeue it */
 	++dp->dl_time;
 	spin_lock(&fp->fi_lock);
@@ -1359,7 +1361,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
 	bool unhashed;
 
 	spin_lock(&state_lock);
-	unhashed = unhash_delegation_locked(dp);
+	unhashed = unhash_delegation_locked(dp, NFS4_CLOSED_DELEG_STID);
 	spin_unlock(&state_lock);
 	if (unhashed)
 		destroy_unhashed_deleg(dp);
@@ -1373,9 +1375,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
 
 	trace_nfsd_stid_revoke(&dp->dl_stid);
 
-	if (clp->cl_minorversion) {
+	if (dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
 		spin_lock(&clp->cl_lock);
-		dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
 		refcount_inc(&dp->dl_stid.sc_count);
 		list_add(&dp->dl_recall_lru, &clp->cl_revoked);
 		spin_unlock(&clp->cl_lock);
@@ -2234,7 +2235,7 @@ __destroy_client(struct nfs4_client *clp)
 	spin_lock(&state_lock);
 	while (!list_empty(&clp->cl_delegations)) {
 		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
-		unhash_delegation_locked(dp);
+		unhash_delegation_locked(dp, NFS4_CLOSED_DELEG_STID);
 		list_add(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&state_lock);
@@ -5196,8 +5197,7 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
 		goto out;
 	if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
 		nfs4_put_stid(&deleg->dl_stid);
-		if (cl->cl_minorversion)
-			status = nfserr_deleg_revoked;
+		status = nfserr_deleg_revoked;
 		goto out;
 	}
 	flags = share_access_to_flags(open->op_share_access);
@@ -6234,7 +6234,7 @@ nfs4_laundromat(struct nfsd_net *nn)
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
 		if (!state_expired(&lt, dp->dl_time))
 			break;
-		unhash_delegation_locked(dp);
+		unhash_delegation_locked(dp, NFS4_REVOKED_DELEG_STID);
 		list_add(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&state_lock);
@@ -8353,7 +8353,7 @@ nfs4_state_shutdown_net(struct net *net)
 	spin_lock(&state_lock);
 	list_for_each_safe(pos, next, &nn->del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-		unhash_delegation_locked(dp);
+		unhash_delegation_locked(dp, NFS4_CLOSED_DELEG_STID);
 		list_add(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&state_lock);
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 04/11] nfsd: split sc_status out of sc_type
  2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
                   ` (2 preceding siblings ...)
  2023-11-24  0:28 ` [PATCH 03/11] nfsd: avoid race after unhash_delegation_locked() NeilBrown
@ 2023-11-24  0:28 ` NeilBrown
  2023-11-24  0:28 ` [PATCH 05/11] nfsd: prepare for supporting admin-revocation of state NeilBrown
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

sc_type identifies the type of a state - open, lock, deleg, layout - and
also the status of a state - closed or revoked.

This is a bit untidy and could get worse when "admin-revoked" states are
added.  So clean it up.

With this patch, the type is now all that is stored in sc_type.  This is
zero when the state is first added to ->cl_stateids (causing it to be
ignored), and is then set appropriately once it is fully initialised.
It is set under ->cl_lock to ensure atomicity w.r.t lookup.  It is now
never cleared.

sc_type is still a bit-set even though at most one bit is set.  This allows
lookup functions to be given a bitmap of acceptable types.

sc_type is now an unsigned short rather than char.  There is no value in
restricting to just 8 bits.

The status is stored in a separate unsigned short named "sc_status".  It
has two flags: NFS4_STID_CLOSED and NFS4_STID_REVOKED.
CLOSED combines NFS4_CLOSED_STID, NFS4_CLOSED_DELEG_STID, and is used
for LOCK_STID and LAYOUT_STID instead of setting the sc_type to zero.
These flags are only ever set, never cleared.
For deleg stateids they are set under the global state_lock.
For open and lock stateids they are set under ->cl_lock.
For layout stateids they are set under ->ls_lock

nfs4_unhash_stid() has been removed, and we never set sc_type = 0.  This
was only used for LOCK and LAYOUT stids and they now use
NFS4_STID_CLOSED.

Also TRACE_DEFINE_NUM() calls for the various STD #define have been
removed because these things are not enums, and so that call is
incorrect.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4layouts.c |   6 +-
 fs/nfsd/nfs4state.c   | 165 +++++++++++++++++++++---------------------
 fs/nfsd/state.h       |  40 +++++++---
 fs/nfsd/trace.h       |  23 +++---
 4 files changed, 122 insertions(+), 112 deletions(-)

diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 5e8096bc5eaa..77656126ad2a 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -269,13 +269,13 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
 {
 	struct nfs4_layout_stateid *ls;
 	struct nfs4_stid *stid;
-	unsigned char typemask = NFS4_LAYOUT_STID;
+	unsigned short typemask = NFS4_LAYOUT_STID;
 	__be32 status;
 
 	if (create)
 		typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
 
-	status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
+	status = nfsd4_lookup_stateid(cstate, stateid, typemask, 0, &stid,
 			net_generic(SVC_NET(rqstp), nfsd_net_id));
 	if (status)
 		goto out;
@@ -518,7 +518,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
 		lrp->lrs_present = true;
 	} else {
 		trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid);
-		nfs4_unhash_stid(&ls->ls_stid);
+		ls->ls_stid.sc_status |= NFS4_STID_CLOSED;
 		lrp->lrs_present = false;
 	}
 	spin_unlock(&ls->ls_lock);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 276afcba07a0..b9239f2ebc79 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1265,11 +1265,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
 	nfs4_put_stid(&dp->dl_stid);
 }
 
-void nfs4_unhash_stid(struct nfs4_stid *s)
-{
-	s->sc_type = 0;
-}
-
 /**
  * nfs4_delegation_exists - Discover if this delegation already exists
  * @clp:     a pointer to the nfs4_client we're granting a delegation to
@@ -1334,7 +1329,7 @@ static bool delegation_hashed(struct nfs4_delegation *dp)
 }
 
 static bool
-unhash_delegation_locked(struct nfs4_delegation *dp, unsigned char type)
+unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short statusmask)
 {
 	struct nfs4_file *fp = dp->dl_stid.sc_file;
 
@@ -1344,8 +1339,9 @@ unhash_delegation_locked(struct nfs4_delegation *dp, unsigned char type)
 		return false;
 
 	if (dp->dl_stid.sc_client->cl_minorversion == 0)
-		type = NFS4_CLOSED_DELEG_STID;
-	dp->dl_stid.sc_type = type;
+		statusmask = NFS4_STID_CLOSED;
+	dp->dl_stid.sc_status |= statusmask;
+
 	/* Ensure that deleg break won't try to requeue it */
 	++dp->dl_time;
 	spin_lock(&fp->fi_lock);
@@ -1361,7 +1357,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
 	bool unhashed;
 
 	spin_lock(&state_lock);
-	unhashed = unhash_delegation_locked(dp, NFS4_CLOSED_DELEG_STID);
+	unhashed = unhash_delegation_locked(dp, NFS4_STID_CLOSED);
 	spin_unlock(&state_lock);
 	if (unhashed)
 		destroy_unhashed_deleg(dp);
@@ -1375,7 +1371,7 @@ static void revoke_delegation(struct nfs4_delegation *dp)
 
 	trace_nfsd_stid_revoke(&dp->dl_stid);
 
-	if (dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
+	if (dp->dl_stid.sc_status & NFS4_STID_REVOKED) {
 		spin_lock(&clp->cl_lock);
 		refcount_inc(&dp->dl_stid.sc_count);
 		list_add(&dp->dl_recall_lru, &clp->cl_revoked);
@@ -1384,8 +1380,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
 	destroy_unhashed_deleg(dp);
 }
 
-/* 
- * SETCLIENTID state 
+/*
+ * SETCLIENTID state
  */
 
 static unsigned int clientid_hashval(u32 id)
@@ -1548,7 +1544,7 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
 	if (!unhash_ol_stateid(stp))
 		return false;
 	list_del_init(&stp->st_locks);
-	nfs4_unhash_stid(&stp->st_stid);
+	stp->st_stid.sc_status |= NFS4_STID_CLOSED;
 	return true;
 }
 
@@ -1627,6 +1623,7 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
 	LIST_HEAD(reaplist);
 
 	spin_lock(&stp->st_stid.sc_client->cl_lock);
+	stp->st_stid.sc_status |= NFS4_STID_CLOSED;
 	if (unhash_open_stateid(stp, &reaplist))
 		put_ol_stateid_locked(stp, &reaplist);
 	spin_unlock(&stp->st_stid.sc_client->cl_lock);
@@ -2235,7 +2232,7 @@ __destroy_client(struct nfs4_client *clp)
 	spin_lock(&state_lock);
 	while (!list_empty(&clp->cl_delegations)) {
 		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
-		unhash_delegation_locked(dp, NFS4_CLOSED_DELEG_STID);
+		unhash_delegation_locked(dp, NFS4_STID_CLOSED);
 		list_add(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&state_lock);
@@ -2467,14 +2464,16 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
 }
 
 static struct nfs4_stid *
-find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
+find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
+		     unsigned short typemask, unsigned short ok_states)
 {
 	struct nfs4_stid *s;
 
 	spin_lock(&cl->cl_lock);
 	s = find_stateid_locked(cl, t);
 	if (s != NULL) {
-		if (typemask & s->sc_type)
+		if ((s->sc_status & ~ok_states) == 0 &&
+		    (typemask & s->sc_type))
 			refcount_inc(&s->sc_count);
 		else
 			s = NULL;
@@ -4583,7 +4582,8 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
 			continue;
 		if (local->st_stateowner != &oo->oo_owner)
 			continue;
-		if (local->st_stid.sc_type == NFS4_OPEN_STID) {
+		if (local->st_stid.sc_type == NFS4_OPEN_STID &&
+		    !local->st_stid.sc_status) {
 			ret = local;
 			refcount_inc(&ret->st_stid.sc_count);
 			break;
@@ -4597,17 +4597,10 @@ nfsd4_verify_open_stid(struct nfs4_stid *s)
 {
 	__be32 ret = nfs_ok;
 
-	switch (s->sc_type) {
-	default:
-		break;
-	case 0:
-	case NFS4_CLOSED_STID:
-	case NFS4_CLOSED_DELEG_STID:
-		ret = nfserr_bad_stateid;
-		break;
-	case NFS4_REVOKED_DELEG_STID:
+	if (s->sc_status & NFS4_STID_REVOKED)
 		ret = nfserr_deleg_revoked;
-	}
+	else if (s->sc_status & NFS4_STID_CLOSED)
+		ret = nfserr_bad_stateid;
 	return ret;
 }
 
@@ -4920,9 +4913,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
 
 	trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
 
-	if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
-	    dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
-	        return 1;
+	if (dp->dl_stid.sc_status)
+		/* CLOSED or REVOKED */
+		return 1;
 
 	switch (task->tk_status) {
 	case 0:
@@ -5167,12 +5160,12 @@ static int share_access_to_flags(u32 share_access)
 	return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
 }
 
-static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s)
+static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl,
+						  stateid_t *s)
 {
 	struct nfs4_stid *ret;
 
-	ret = find_stateid_by_type(cl, s,
-				NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
+	ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID, NFS4_STID_REVOKED);
 	if (!ret)
 		return NULL;
 	return delegstateid(ret);
@@ -5195,7 +5188,7 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
 	deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
 	if (deleg == NULL)
 		goto out;
-	if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
+	if (deleg->dl_stid.sc_status & NFS4_STID_REVOKED) {
 		nfs4_put_stid(&deleg->dl_stid);
 		status = nfserr_deleg_revoked;
 		goto out;
@@ -5842,7 +5835,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	} else {
 		status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true);
 		if (status) {
-			stp->st_stid.sc_type = NFS4_CLOSED_STID;
 			release_open_stateid(stp);
 			mutex_unlock(&stp->st_mutex);
 			goto out;
@@ -6234,7 +6226,7 @@ nfs4_laundromat(struct nfsd_net *nn)
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
 		if (!state_expired(&lt, dp->dl_time))
 			break;
-		unhash_delegation_locked(dp, NFS4_REVOKED_DELEG_STID);
+		unhash_delegation_locked(dp, NFS4_STID_REVOKED);
 		list_add(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&state_lock);
@@ -6473,22 +6465,20 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
 	status = nfsd4_stid_check_stateid_generation(stateid, s, 1);
 	if (status)
 		goto out_unlock;
+	status = nfsd4_verify_open_stid(s);
+	if (status)
+		goto out_unlock;
+
 	switch (s->sc_type) {
 	case NFS4_DELEG_STID:
 		status = nfs_ok;
 		break;
-	case NFS4_REVOKED_DELEG_STID:
-		status = nfserr_deleg_revoked;
-		break;
 	case NFS4_OPEN_STID:
 	case NFS4_LOCK_STID:
 		status = nfsd4_check_openowner_confirmed(openlockstateid(s));
 		break;
 	default:
 		printk("unknown stateid type %x\n", s->sc_type);
-		fallthrough;
-	case NFS4_CLOSED_STID:
-	case NFS4_CLOSED_DELEG_STID:
 		status = nfserr_bad_stateid;
 	}
 out_unlock:
@@ -6498,7 +6488,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
 
 __be32
 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
-		     stateid_t *stateid, unsigned char typemask,
+		     stateid_t *stateid,
+		     unsigned short typemask, unsigned short statusmask,
 		     struct nfs4_stid **s, struct nfsd_net *nn)
 {
 	__be32 status;
@@ -6509,10 +6500,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 	 *  only return revoked delegations if explicitly asked.
 	 *  otherwise we report revoked or bad_stateid status.
 	 */
-	if (typemask & NFS4_REVOKED_DELEG_STID)
+	if (statusmask & NFS4_STID_REVOKED)
 		return_revoked = true;
-	else if (typemask & NFS4_DELEG_STID)
-		typemask |= NFS4_REVOKED_DELEG_STID;
+	if (typemask & NFS4_DELEG_STID)
+		/* Always allow REVOKED for DELEG so we can
+		 * retturn the appropriate error.
+		 */
+		statusmask |= NFS4_STID_REVOKED;
 
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
 		CLOSE_STATEID(stateid))
@@ -6525,14 +6519,12 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 	}
 	if (status)
 		return status;
-	stid = find_stateid_by_type(cstate->clp, stateid, typemask);
+	stid = find_stateid_by_type(cstate->clp, stateid, typemask, statusmask);
 	if (!stid)
 		return nfserr_bad_stateid;
-	if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
+	if ((stid->sc_status & NFS4_STID_REVOKED) && !return_revoked) {
 		nfs4_put_stid(stid);
-		if (cstate->minorversion)
-			return nfserr_deleg_revoked;
-		return nfserr_bad_stateid;
+		return nfserr_deleg_revoked;
 	}
 	*s = stid;
 	return nfs_ok;
@@ -6543,7 +6535,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
 {
 	struct nfsd_file *ret = NULL;
 
-	if (!s)
+	if (!s || s->sc_status)
 		return NULL;
 
 	switch (s->sc_type) {
@@ -6666,7 +6658,8 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
 		goto out;
 
 	*stid = find_stateid_by_type(found, &cps->cp_p_stateid,
-			NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID);
+				     NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
+				     0);
 	if (*stid)
 		status = nfs_ok;
 	else
@@ -6724,7 +6717,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
 
 	status = nfsd4_lookup_stateid(cstate, stateid,
 				NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
-				&s, nn);
+				0, &s, nn);
 	if (status == nfserr_bad_stateid)
 		status = find_cpntf_state(nn, stateid, &s);
 	if (status)
@@ -6742,9 +6735,6 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
 	case NFS4_LOCK_STID:
 		status = nfs4_check_olstateid(openlockstateid(s), flags);
 		break;
-	default:
-		status = nfserr_bad_stateid;
-		break;
 	}
 	if (status)
 		goto out;
@@ -6823,11 +6813,20 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	spin_lock(&cl->cl_lock);
 	s = find_stateid_locked(cl, stateid);
-	if (!s)
+	if (!s || s->sc_status & NFS4_STID_CLOSED)
 		goto out_unlock;
 	spin_lock(&s->sc_lock);
 	switch (s->sc_type) {
 	case NFS4_DELEG_STID:
+		if (s->sc_status & NFS4_STID_REVOKED) {
+			spin_unlock(&s->sc_lock);
+			dp = delegstateid(s);
+			list_del_init(&dp->dl_recall_lru);
+			spin_unlock(&cl->cl_lock);
+			nfs4_put_stid(s);
+			ret = nfs_ok;
+			goto out;
+		}
 		ret = nfserr_locks_held;
 		break;
 	case NFS4_OPEN_STID:
@@ -6842,14 +6841,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		spin_unlock(&cl->cl_lock);
 		ret = nfsd4_free_lock_stateid(stateid, s);
 		goto out;
-	case NFS4_REVOKED_DELEG_STID:
-		spin_unlock(&s->sc_lock);
-		dp = delegstateid(s);
-		list_del_init(&dp->dl_recall_lru);
-		spin_unlock(&cl->cl_lock);
-		nfs4_put_stid(s);
-		ret = nfs_ok;
-		goto out;
 	/* Default falls through and returns nfserr_bad_stateid */
 	}
 	spin_unlock(&s->sc_lock);
@@ -6892,6 +6883,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
  * @seqid: seqid (provided by client)
  * @stateid: stateid (provided by client)
  * @typemask: mask of allowable types for this operation
+ * @statusmask: mask of allowed states: 0 or STID_CLOSED
  * @stpp: return pointer for the stateid found
  * @nn: net namespace for request
  *
@@ -6901,7 +6893,8 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
  */
 static __be32
 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
-			 stateid_t *stateid, char typemask,
+			 stateid_t *stateid,
+			 unsigned short typemask, unsigned short statusmask,
 			 struct nfs4_ol_stateid **stpp,
 			 struct nfsd_net *nn)
 {
@@ -6912,7 +6905,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
 	trace_nfsd_preprocess(seqid, stateid);
 
 	*stpp = NULL;
-	status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
+	status = nfsd4_lookup_stateid(cstate, stateid,
+				      typemask, statusmask, &s, nn);
 	if (status)
 		return status;
 	stp = openlockstateid(s);
@@ -6934,7 +6928,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
 	struct nfs4_ol_stateid *stp;
 
 	status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
-						NFS4_OPEN_STID, &stp, nn);
+					  NFS4_OPEN_STID, 0, &stp, nn);
 	if (status)
 		return status;
 	oo = openowner(stp->st_stateowner);
@@ -6965,8 +6959,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		return status;
 
 	status = nfs4_preprocess_seqid_op(cstate,
-					oc->oc_seqid, &oc->oc_req_stateid,
-					NFS4_OPEN_STID, &stp, nn);
+					  oc->oc_seqid, &oc->oc_req_stateid,
+					  NFS4_OPEN_STID, 0, &stp, nn);
 	if (status)
 		goto out;
 	oo = openowner(stp->st_stateowner);
@@ -7096,18 +7090,20 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct net *net = SVC_NET(rqstp);
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	dprintk("NFSD: nfsd4_close on file %pd\n", 
+	dprintk("NFSD: nfsd4_close on file %pd\n",
 			cstate->current_fh.fh_dentry);
 
 	status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
-					&close->cl_stateid,
-					NFS4_OPEN_STID|NFS4_CLOSED_STID,
-					&stp, nn);
+					  &close->cl_stateid,
+					  NFS4_OPEN_STID, NFS4_STID_CLOSED,
+					  &stp, nn);
 	nfsd4_bump_seqid(cstate, status);
 	if (status)
-		goto out; 
+		goto out;
 
-	stp->st_stid.sc_type = NFS4_CLOSED_STID;
+	spin_lock(&stp->st_stid.sc_client->cl_lock);
+	stp->st_stid.sc_status |= NFS4_STID_CLOSED;
+	spin_unlock(&stp->st_stid.sc_client->cl_lock);
 
 	/*
 	 * Technically we don't _really_ have to increment or copy it, since
@@ -7149,7 +7145,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
 		return status;
 
-	status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
+	status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, 0, &s, nn);
 	if (status)
 		goto out;
 	dp = delegstateid(s);
@@ -7603,9 +7599,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 							&lock_stp, &new);
 	} else {
 		status = nfs4_preprocess_seqid_op(cstate,
-				       lock->lk_old_lock_seqid,
-				       &lock->lk_old_lock_stateid,
-				       NFS4_LOCK_STID, &lock_stp, nn);
+						  lock->lk_old_lock_seqid,
+						  &lock->lk_old_lock_stateid,
+						  NFS4_LOCK_STID, 0, &lock_stp,
+						  nn);
 	}
 	if (status)
 		goto out;
@@ -7918,8 +7915,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		 return nfserr_inval;
 
 	status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
-					&locku->lu_stateid, NFS4_LOCK_STID,
-					&stp, nn);
+					  &locku->lu_stateid, NFS4_LOCK_STID, 0,
+					  &stp, nn);
 	if (status)
 		goto out;
 	nf = find_any_file(stp->st_stid.sc_file);
@@ -8353,7 +8350,7 @@ nfs4_state_shutdown_net(struct net *net)
 	spin_lock(&state_lock);
 	list_for_each_safe(pos, next, &nn->del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-		unhash_delegation_locked(dp, NFS4_CLOSED_DELEG_STID);
+		unhash_delegation_locked(dp, NFS4_STID_CLOSED);
 		list_add(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&state_lock);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index f96eaa8e9413..bb00dcd4c1ba 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -88,17 +88,33 @@ struct nfsd4_callback_ops {
  */
 struct nfs4_stid {
 	refcount_t		sc_count;
-#define NFS4_OPEN_STID 1
-#define NFS4_LOCK_STID 2
-#define NFS4_DELEG_STID 4
-/* For an open stateid kept around *only* to process close replays: */
-#define NFS4_CLOSED_STID 8
+
+	/* A new stateid is added to the cl_stateids idr early before it
+	 * is fully initialised.  Its sc_type is then zero.  After
+	 * initialisation the sc_type it set under cl_lock, and then
+	 * never changes.
+	 */
+#define NFS4_OPEN_STID		BIT(0)
+#define NFS4_LOCK_STID		BIT(1)
+#define NFS4_DELEG_STID		BIT(2)
+#define NFS4_LAYOUT_STID	BIT(3)
+	unsigned short		sc_type;
+
+/* state_lock protects sc_status for delegation stateids.
+ * ->cl_lock protects sc_status for open and lock stateids.
+ * ->st_mutex also protect sc_status for open stateids.
+ * ->ls_lock protects sc_status for layout stateids.
+ */
+/*
+ * For an open stateid kept around *only* to process close replays.
+ * For deleg stateid, kept in idr until last reference is dropped.
+ */
+#define NFS4_STID_CLOSED	BIT(0)
 /* For a deleg stateid kept around only to process free_stateid's: */
-#define NFS4_REVOKED_DELEG_STID 16
-#define NFS4_CLOSED_DELEG_STID 32
-#define NFS4_LAYOUT_STID 64
+#define NFS4_STID_REVOKED	BIT(1)
+	unsigned short		sc_status;
+
 	struct list_head	sc_cp_list;
-	unsigned char		sc_type;
 	stateid_t		sc_stateid;
 	spinlock_t		sc_lock;
 	struct nfs4_client	*sc_client;
@@ -694,15 +710,15 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
 		stateid_t *stateid, int flags, struct nfsd_file **filp,
 		struct nfs4_stid **cstid);
 __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
-		     stateid_t *stateid, unsigned char typemask,
-		     struct nfs4_stid **s, struct nfsd_net *nn);
+			    stateid_t *stateid, unsigned short typemask,
+			    unsigned short statusmask,
+			    struct nfs4_stid **s, struct nfsd_net *nn);
 struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
 				  void (*sc_free)(struct nfs4_stid *));
 int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
 void nfs4_free_copy_state(struct nfsd4_copy *copy);
 struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
 			struct nfs4_stid *p_stid);
-void nfs4_unhash_stid(struct nfs4_stid *s);
 void nfs4_put_stid(struct nfs4_stid *s);
 void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
 void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index d1e8cf079b0f..568b4ec9a2af 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -641,24 +641,18 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
 DEFINE_STATESEQID_EVENT(preprocess);
 DEFINE_STATESEQID_EVENT(open_confirm);
 
-TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
-TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
-TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
-TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
-
 #define show_stid_type(x)						\
 	__print_flags(x, "|",						\
 		{ NFS4_OPEN_STID,		"OPEN" },		\
 		{ NFS4_LOCK_STID,		"LOCK" },		\
 		{ NFS4_DELEG_STID,		"DELEG" },		\
-		{ NFS4_CLOSED_STID,		"CLOSED" },		\
-		{ NFS4_REVOKED_DELEG_STID,	"REVOKED" },		\
-		{ NFS4_CLOSED_DELEG_STID,	"CLOSED_DELEG" },	\
 		{ NFS4_LAYOUT_STID,		"LAYOUT" })
 
+#define show_stid_status(x)						\
+	__print_flags(x, "|",						\
+		{ NFS4_STID_CLOSED,		"CLOSED" },		\
+		{ NFS4_STID_REVOKED,		"REVOKED" })		\
+
 DECLARE_EVENT_CLASS(nfsd_stid_class,
 	TP_PROTO(
 		const struct nfs4_stid *stid
@@ -666,6 +660,7 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
 	TP_ARGS(stid),
 	TP_STRUCT__entry(
 		__field(unsigned long, sc_type)
+		__field(unsigned long, sc_status)
 		__field(int, sc_count)
 		__field(u32, cl_boot)
 		__field(u32, cl_id)
@@ -676,16 +671,18 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
 		const stateid_t *stp = &stid->sc_stateid;
 
 		__entry->sc_type = stid->sc_type;
+		__entry->sc_status = stid->sc_status;
 		__entry->sc_count = refcount_read(&stid->sc_count);
 		__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
 		__entry->cl_id = stp->si_opaque.so_clid.cl_id;
 		__entry->si_id = stp->si_opaque.so_id;
 		__entry->si_generation = stp->si_generation;
 	),
-	TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s",
+	TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s state=%s",
 		__entry->cl_boot, __entry->cl_id,
 		__entry->si_id, __entry->si_generation,
-		__entry->sc_count, show_stid_type(__entry->sc_type)
+		__entry->sc_count, show_stid_type(__entry->sc_type),
+		show_stid_status(__entry->sc_status)
 	)
 );
 
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 05/11] nfsd: prepare for supporting admin-revocation of state
  2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
                   ` (3 preceding siblings ...)
  2023-11-24  0:28 ` [PATCH 04/11] nfsd: split sc_status out of sc_type NeilBrown
@ 2023-11-24  0:28 ` NeilBrown
  2023-11-26 17:36   ` Chuck Lever
  2023-11-24  0:28 ` [PATCH 06/11] nfsd: allow admin-revoked state to appear in /proc/fs/nfsd/clients/*/states NeilBrown
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

The NFSv4 protocol allows state to be revoked by the admin and has error
codes which allow this to be communicated to the client.

This patch
 - introduces a new state-id status NFS4_STID_ADMIN_REVOKE
   which can be set on open, lock, or delegation state.
 - reports NFS4ERR_ADMIN_REVOKED when these are accessed
 - introduces a per-client counter of these states and returns
   SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
   Decrements this when freeing any admin-revoked state.
 - introduces stub code to find all interesting states for a given
   superblock so they can be revoked via the 'unlock_filesystem'
   file in /proc/fs/nfsd/
   No actual states are handled yet.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 71 ++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfsd/nfsctl.c    |  1 +
 fs/nfsd/nfsd.h      |  1 +
 fs/nfsd/state.h     | 10 +++++++
 fs/nfsd/trace.h     |  3 +-
 5 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b9239f2ebc79..477a9e9aebbd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1215,6 +1215,8 @@ nfs4_put_stid(struct nfs4_stid *s)
 		return;
 	}
 	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
+		atomic_dec(&s->sc_client->cl_admin_revoked);
 	nfs4_free_cpntf_statelist(clp->net, s);
 	spin_unlock(&clp->cl_lock);
 	s->sc_free(s);
@@ -1534,6 +1536,8 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
 	}
 
 	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
+		atomic_dec(&s->sc_client->cl_admin_revoked);
 	list_add(&stp->st_locks, reaplist);
 }
 
@@ -1679,6 +1683,54 @@ static void release_openowner(struct nfs4_openowner *oo)
 	nfs4_put_stateowner(&oo->oo_owner);
 }
 
+static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
+					  struct super_block *sb,
+					  unsigned int sc_types)
+{
+	unsigned long id, tmp;
+	struct nfs4_stid *stid;
+
+	spin_lock(&clp->cl_lock);
+	idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
+		if ((stid->sc_type & sc_types) &&
+		    stid->sc_status == 0 &&
+		    stid->sc_file->fi_inode->i_sb == sb) {
+			refcount_inc(&stid->sc_count);
+			break;
+		}
+	spin_unlock(&clp->cl_lock);
+	return stid;
+}
+
+void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	unsigned int idhashval;
+	unsigned int sc_types;
+
+	sc_types = 0;
+
+	spin_lock(&nn->client_lock);
+	for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
+		struct list_head *head = &nn->conf_id_hashtbl[idhashval];
+		struct nfs4_client *clp;
+	retry:
+		list_for_each_entry(clp, head, cl_idhash) {
+			struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
+								  sc_types);
+			if (stid) {
+				spin_unlock(&nn->client_lock);
+				switch (stid->sc_type) {
+				}
+				nfs4_put_stid(stid);
+				spin_lock(&nn->client_lock);
+				goto retry;
+			}
+		}
+	}
+	spin_unlock(&nn->client_lock);
+}
+
 static inline int
 hash_sessionid(struct nfs4_sessionid *sessionid)
 {
@@ -2550,6 +2602,8 @@ static int client_info_show(struct seq_file *m, void *v)
 	}
 	seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
 	seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
+	seq_printf(m, "admin-revoked states: %d\n",
+		   atomic_read(&clp->cl_admin_revoked));
 	drop_client(clp);
 
 	return 0;
@@ -4109,6 +4163,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 	if (!list_empty(&clp->cl_revoked))
 		seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
+	if (atomic_read(&clp->cl_admin_revoked))
+		seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
 out_no_session:
 	if (conn)
 		free_conn(conn);
@@ -4597,7 +4653,9 @@ nfsd4_verify_open_stid(struct nfs4_stid *s)
 {
 	__be32 ret = nfs_ok;
 
-	if (s->sc_status & NFS4_STID_REVOKED)
+	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
+		ret = nfserr_admin_revoked;
+	else if (s->sc_status & NFS4_STID_REVOKED)
 		ret = nfserr_deleg_revoked;
 	else if (s->sc_status & NFS4_STID_CLOSED)
 		ret = nfserr_bad_stateid;
@@ -5188,6 +5246,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
 	deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
 	if (deleg == NULL)
 		goto out;
+	if (deleg->dl_stid.sc_status & NFS4_STID_ADMIN_REVOKED) {
+		nfs4_put_stid(&deleg->dl_stid);
+		status = nfserr_admin_revoked;
+		goto out;
+	}
 	if (deleg->dl_stid.sc_status & NFS4_STID_REVOKED) {
 		nfs4_put_stid(&deleg->dl_stid);
 		status = nfserr_deleg_revoked;
@@ -6508,6 +6571,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		 */
 		statusmask |= NFS4_STID_REVOKED;
 
+	statusmask |= NFS4_STID_ADMIN_REVOKED;
+
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
 		CLOSE_STATEID(stateid))
 		return nfserr_bad_stateid;
@@ -6526,6 +6591,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		nfs4_put_stid(stid);
 		return nfserr_deleg_revoked;
 	}
+	if (stid->sc_type & NFS4_STID_ADMIN_REVOKED) {
+		nfs4_put_stid(stid);
+		return nfserr_admin_revoked;
+	}
 	*s = stid;
 	return nfs_ok;
 }
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d6eeee149370..a622d773f428 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -285,6 +285,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
 	 * 3.  Is that directory the root of an exported file system?
 	 */
 	error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
+	nfsd4_revoke_states(netns(file), path.dentry->d_sb);
 
 	path_put(&path);
 	return error;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index f5ff42f41ee7..d46203eac3c8 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -280,6 +280,7 @@ void		nfsd_lockd_shutdown(void);
 #define	nfserr_no_grace		cpu_to_be32(NFSERR_NO_GRACE)
 #define	nfserr_reclaim_bad	cpu_to_be32(NFSERR_RECLAIM_BAD)
 #define	nfserr_badname		cpu_to_be32(NFSERR_BADNAME)
+#define	nfserr_admin_revoked	cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
 #define	nfserr_cb_path_down	cpu_to_be32(NFSERR_CB_PATH_DOWN)
 #define	nfserr_locked		cpu_to_be32(NFSERR_LOCKED)
 #define	nfserr_wrongsec		cpu_to_be32(NFSERR_WRONGSEC)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index bb00dcd4c1ba..584378c43e0a 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -112,6 +112,7 @@ struct nfs4_stid {
 #define NFS4_STID_CLOSED	BIT(0)
 /* For a deleg stateid kept around only to process free_stateid's: */
 #define NFS4_STID_REVOKED	BIT(1)
+#define NFS4_STID_ADMIN_REVOKED	BIT(2)
 	unsigned short		sc_status;
 
 	struct list_head	sc_cp_list;
@@ -388,6 +389,7 @@ struct nfs4_client {
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
 	u32			cl_minorversion;
+	atomic_t		cl_admin_revoked; /* count of admin-revoked states */
 	/* NFSv4.1 client implementation id: */
 	struct xdr_netobj	cl_nii_domain;
 	struct xdr_netobj	cl_nii_name;
@@ -752,6 +754,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
 }
 struct nfsd_file *find_any_file(struct nfs4_file *f);
 
+#ifdef CONFIG_NFSD_V4
+void nfsd4_revoke_states(struct net *net, struct super_block *sb);
+#else
+static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+}
+#endif
+
 /* grace period management */
 void nfsd4_end_grace(struct nfsd_net *nn);
 
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 568b4ec9a2af..281aeb42c9eb 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -651,7 +651,8 @@ DEFINE_STATESEQID_EVENT(open_confirm);
 #define show_stid_status(x)						\
 	__print_flags(x, "|",						\
 		{ NFS4_STID_CLOSED,		"CLOSED" },		\
-		{ NFS4_STID_REVOKED,		"REVOKED" })		\
+		{ NFS4_STID_REVOKED,		"REVOKED" },		\
+		{ NFS4_STID_ADMIN_REVOKED,	"ADMIN_REVOKED" })
 
 DECLARE_EVENT_CLASS(nfsd_stid_class,
 	TP_PROTO(
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 06/11] nfsd: allow admin-revoked state to appear in /proc/fs/nfsd/clients/*/states
  2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
                   ` (4 preceding siblings ...)
  2023-11-24  0:28 ` [PATCH 05/11] nfsd: prepare for supporting admin-revocation of state NeilBrown
@ 2023-11-24  0:28 ` NeilBrown
  2023-11-26 17:41   ` Chuck Lever
  2023-11-24  0:28 ` [PATCH 07/11] nfsd: allow admin-revoked NFSv4.0 state to be freed NeilBrown
                   ` (4 subsequent siblings)
  10 siblings, 1 reply; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Change the "show" functions to show some content even if a file cannot
be found.
This is primarily useful for debugging - to ensure states are being
removed eventually.

Also remove a "Kinda dead" comment which is no longer correct as we
now support write delegations.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 82 ++++++++++++++++++++++-----------------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 477a9e9aebbd..52e680235afe 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2680,17 +2680,10 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
 	struct nfs4_stateowner *oo;
 	unsigned int access, deny;
 
-	if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID)
-		return 0; /* XXX: or SEQ_SKIP? */
 	ols = openlockstateid(st);
 	oo = ols->st_stateowner;
 	nf = st->sc_file;
 
-	spin_lock(&nf->fi_lock);
-	file = find_any_file_locked(nf);
-	if (!file)
-		goto out;
-
 	seq_printf(s, "- ");
 	nfs4_show_stateid(s, &st->sc_stateid);
 	seq_printf(s, ": { type: open, ");
@@ -2705,14 +2698,19 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
 		deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
 		deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
 
-	nfs4_show_superblock(s, file);
-	seq_printf(s, ", ");
-	nfs4_show_fname(s, file);
-	seq_printf(s, ", ");
+	spin_lock(&nf->fi_lock);
+	file = find_any_file_locked(nf);
+	if (file) {
+		nfs4_show_superblock(s, file);
+		seq_puts(s, ", ");
+		nfs4_show_fname(s, file);
+		seq_puts(s, ", ");
+	}
+	spin_unlock(&nf->fi_lock);
 	nfs4_show_owner(s, oo);
+	if (st->sc_status & NFS4_STID_ADMIN_REVOKED)
+		seq_puts(s, ", admin-revoked");
 	seq_printf(s, " }\n");
-out:
-	spin_unlock(&nf->fi_lock);
 	return 0;
 }
 
@@ -2726,30 +2724,31 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
 	ols = openlockstateid(st);
 	oo = ols->st_stateowner;
 	nf = st->sc_file;
-	spin_lock(&nf->fi_lock);
-	file = find_any_file_locked(nf);
-	if (!file)
-		goto out;
 
 	seq_printf(s, "- ");
 	nfs4_show_stateid(s, &st->sc_stateid);
 	seq_printf(s, ": { type: lock, ");
 
-	/*
-	 * Note: a lock stateid isn't really the same thing as a lock,
-	 * it's the locking state held by one owner on a file, and there
-	 * may be multiple (or no) lock ranges associated with it.
-	 * (Same for the matter is true of open stateids.)
-	 */
+	spin_lock(&nf->fi_lock);
+	file = find_any_file_locked(nf);
+	if (file) {
+		/*
+		 * Note: a lock stateid isn't really the same thing as a lock,
+		 * it's the locking state held by one owner on a file, and there
+		 * may be multiple (or no) lock ranges associated with it.
+		 * (Same for the matter is true of open stateids.)
+		 */
 
-	nfs4_show_superblock(s, file);
-	/* XXX: open stateid? */
-	seq_printf(s, ", ");
-	nfs4_show_fname(s, file);
-	seq_printf(s, ", ");
+		nfs4_show_superblock(s, file);
+		/* XXX: open stateid? */
+		seq_puts(s, ", ");
+		nfs4_show_fname(s, file);
+		seq_puts(s, ", ");
+	}
 	nfs4_show_owner(s, oo);
+	if (st->sc_status & NFS4_STID_ADMIN_REVOKED)
+		seq_puts(s, ", admin-revoked");
 	seq_printf(s, " }\n");
-out:
 	spin_unlock(&nf->fi_lock);
 	return 0;
 }
@@ -2762,27 +2761,28 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
 
 	ds = delegstateid(st);
 	nf = st->sc_file;
-	spin_lock(&nf->fi_lock);
-	file = nf->fi_deleg_file;
-	if (!file)
-		goto out;
 
 	seq_printf(s, "- ");
 	nfs4_show_stateid(s, &st->sc_stateid);
 	seq_printf(s, ": { type: deleg, ");
 
-	/* Kinda dead code as long as we only support read delegs: */
-	seq_printf(s, "access: %s, ",
-		ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+	seq_printf(s, "access: %s",
+		   ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
 
 	/* XXX: lease time, whether it's being recalled. */
 
-	nfs4_show_superblock(s, file);
-	seq_printf(s, ", ");
-	nfs4_show_fname(s, file);
-	seq_printf(s, " }\n");
-out:
+	spin_lock(&nf->fi_lock);
+	file = nf->fi_deleg_file;
+	if (file) {
+		seq_puts(s, ", ");
+		nfs4_show_superblock(s, file);
+		seq_puts(s, ", ");
+		nfs4_show_fname(s, file);
+	}
 	spin_unlock(&nf->fi_lock);
+	if (st->sc_status & NFS4_STID_ADMIN_REVOKED)
+		seq_puts(s, ", admin-revoked");
+	seq_puts(s, " }\n");
 	return 0;
 }
 
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 07/11] nfsd: allow admin-revoked NFSv4.0 state to be freed.
  2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
                   ` (5 preceding siblings ...)
  2023-11-24  0:28 ` [PATCH 06/11] nfsd: allow admin-revoked state to appear in /proc/fs/nfsd/clients/*/states NeilBrown
@ 2023-11-24  0:28 ` NeilBrown
  2023-11-26 17:54   ` Chuck Lever
  2023-11-26 18:07   ` Chuck Lever
  2023-11-24  0:28 ` [PATCH 08/11] nfsd: allow lock state ids to be revoked and then freed NeilBrown
                   ` (3 subsequent siblings)
  10 siblings, 2 replies; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

For NFSv4.1 and later the client easily discovers if there is any
admin-revoked state and will then find and explicitly free it.

For NFSv4.0 there is no such mechanism.  The client can only find that
state is admin-revoked if it tries to use that state, and there is no
way for it to explicitly free the state.  So the server must hold on to
the stateid (at least) for an indefinite amount of time.  A
RELEASE_LOCKOWNER request might justify forgetting some of these
stateids, as would the whole clients lease lapsing, but these are not
reliable.

This patch takes two approaches.

Whenever a client uses an revoked stateid, that stateid is then
discarded and will not be recognised again.  This might confuse a client
which expect to get NFS4ERR_ADMIN_REVOKED consistently once it get it at
all, but should mostly work.  Hopefully one error will lead to other
resources being closed (e.g.  process exits), which will result in more
stateid being freed when a CLOSE attempt gets NFS4ERR_ADMIN_REVOKED.

Also, any admin-revoked stateids that have been that way for more than
one lease time are periodically revoke.

No actual freeing of state happens in this patch.  That will come in
future patches which handle the different sorts of revoked state.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/netns.h     |  4 ++
 fs/nfsd/nfs4state.c | 97 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index ab303a8b77d5..7458f672b33e 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -197,6 +197,10 @@ struct nfsd_net {
 	atomic_t		nfsd_courtesy_clients;
 	struct shrinker		*nfsd_client_shrinker;
 	struct work_struct	nfsd_shrinker_work;
+
+	/* last time an admin-revoke happened for NFSv4.0 */
+	time64_t		nfs40_last_revoke;
+
 };
 
 /* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 52e680235afe..c57f2ff954cb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1724,6 +1724,14 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 				}
 				nfs4_put_stid(stid);
 				spin_lock(&nn->client_lock);
+				if (clp->cl_minorversion == 0)
+					/* Allow cleanup after a lease period.
+					 * store_release ensures cleanup will
+					 * see any newly revoked states if it
+					 * sees the time updated.
+					 */
+					nn->nfs40_last_revoke =
+						ktime_get_boottime_seconds();
 				goto retry;
 			}
 		}
@@ -4648,6 +4656,39 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
 	return ret;
 }
 
+static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
+{
+	struct nfs4_client *cl = s->sc_client;
+
+	switch (s->sc_type) {
+	default:
+		spin_unlock(&cl->cl_lock);
+	}
+}
+
+static void nfs40_drop_revoked_stid(struct nfs4_client *cl,
+				    stateid_t *stid)
+{
+	/* NFSv4.0 has no way for the client to tell the server
+	 * that it can forget an admin-revoked stateid.
+	 * So we keep it around until the first time that the
+	 * client uses it, and drop it the first time
+	 * nfserr_admin_revoked is returned.
+	 * For v4.1 and later we wait until explicitly told
+	 * to free the stateid.
+	 */
+	if (cl->cl_minorversion == 0) {
+		struct nfs4_stid *st;
+
+		spin_lock(&cl->cl_lock);
+		st = find_stateid_locked(cl, stid);
+		if (st)
+			nfsd_drop_revoked_stid(st);
+		else
+			spin_unlock(&cl->cl_lock);
+	}
+}
+
 static __be32
 nfsd4_verify_open_stid(struct nfs4_stid *s)
 {
@@ -4670,6 +4711,10 @@ nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
 
 	mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX);
 	ret = nfsd4_verify_open_stid(&stp->st_stid);
+	if (ret == nfserr_admin_revoked)
+		nfs40_drop_revoked_stid(stp->st_stid.sc_client,
+					&stp->st_stid.sc_stateid);
+
 	if (ret != nfs_ok)
 		mutex_unlock(&stp->st_mutex);
 	return ret;
@@ -5253,6 +5298,7 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
 	}
 	if (deleg->dl_stid.sc_status & NFS4_STID_REVOKED) {
 		nfs4_put_stid(&deleg->dl_stid);
+		nfs40_drop_revoked_stid(cl, &open->op_delegate_stateid);
 		status = nfserr_deleg_revoked;
 		goto out;
 	}
@@ -6251,6 +6297,43 @@ nfs4_process_client_reaplist(struct list_head *reaplist)
 	}
 }
 
+static void nfs40_clean_admin_revoked(struct nfsd_net *nn,
+				      struct laundry_time *lt)
+{
+	struct nfs4_client *clp;
+
+	spin_lock(&nn->client_lock);
+	if (nn->nfs40_last_revoke == 0 ||
+	    nn->nfs40_last_revoke > lt->cutoff) {
+		spin_unlock(&nn->client_lock);
+		return;
+	}
+	nn->nfs40_last_revoke = 0;
+
+retry:
+	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+		unsigned long id, tmp;
+		struct nfs4_stid *stid;
+
+		if (atomic_read(&clp->cl_admin_revoked) == 0)
+			continue;
+
+		spin_lock(&clp->cl_lock);
+		idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
+			if (stid->sc_status & NFS4_STID_ADMIN_REVOKED) {
+				refcount_inc(&stid->sc_count);
+				spin_unlock(&nn->client_lock);
+				/* this function drops ->cl_lock */
+				nfsd_drop_revoked_stid(stid);
+				nfs4_put_stid(stid);
+				spin_lock(&nn->client_lock);
+				goto retry;
+			}
+		spin_unlock(&clp->cl_lock);
+	}
+	spin_unlock(&nn->client_lock);
+}
+
 static time64_t
 nfs4_laundromat(struct nfsd_net *nn)
 {
@@ -6284,6 +6367,8 @@ nfs4_laundromat(struct nfsd_net *nn)
 	nfs4_get_client_reaplist(nn, &reaplist, &lt);
 	nfs4_process_client_reaplist(&reaplist);
 
+	nfs40_clean_admin_revoked(nn, &lt);
+
 	spin_lock(&state_lock);
 	list_for_each_safe(pos, next, &nn->del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
@@ -6502,6 +6587,9 @@ static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_sti
 	if (ret == nfs_ok)
 		ret = check_stateid_generation(in, &s->sc_stateid, has_session);
 	spin_unlock(&s->sc_lock);
+	if (ret == nfserr_admin_revoked)
+		nfs40_drop_revoked_stid(s->sc_client,
+					&s->sc_stateid);
 	return ret;
 }
 
@@ -6546,6 +6634,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
 	}
 out_unlock:
 	spin_unlock(&cl->cl_lock);
+	if (status == nfserr_admin_revoked)
+		nfs40_drop_revoked_stid(cl, stateid);
 	return status;
 }
 
@@ -6592,6 +6682,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		return nfserr_deleg_revoked;
 	}
 	if (stid->sc_type & NFS4_STID_ADMIN_REVOKED) {
+		nfs40_drop_revoked_stid(cstate->clp, stateid);
 		nfs4_put_stid(stid);
 		return nfserr_admin_revoked;
 	}
@@ -6884,6 +6975,11 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	s = find_stateid_locked(cl, stateid);
 	if (!s || s->sc_status & NFS4_STID_CLOSED)
 		goto out_unlock;
+	if (s->sc_status & NFS4_STID_ADMIN_REVOKED) {
+		nfsd_drop_revoked_stid(s);
+		ret = nfs_ok;
+		goto out;
+	}
 	spin_lock(&s->sc_lock);
 	switch (s->sc_type) {
 	case NFS4_DELEG_STID:
@@ -6910,7 +7006,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		spin_unlock(&cl->cl_lock);
 		ret = nfsd4_free_lock_stateid(stateid, s);
 		goto out;
-	/* Default falls through and returns nfserr_bad_stateid */
 	}
 	spin_unlock(&s->sc_lock);
 out_unlock:
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 08/11] nfsd: allow lock state ids to be revoked and then freed
  2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
                   ` (6 preceding siblings ...)
  2023-11-24  0:28 ` [PATCH 07/11] nfsd: allow admin-revoked NFSv4.0 state to be freed NeilBrown
@ 2023-11-24  0:28 ` NeilBrown
  2023-11-24  0:28 ` [PATCH 09/11] nfsd: allow open " NeilBrown
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Revoking state through 'unlock_filesystem' now revokes any lock states
found.  When the stateids are then freed by the client, the revoked
stateids will be cleaned up correctly.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c57f2ff954cb..0f52e10fbdfb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1708,7 +1708,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 	unsigned int idhashval;
 	unsigned int sc_types;
 
-	sc_types = 0;
+	sc_types = NFS4_LOCK_STID;
 
 	spin_lock(&nn->client_lock);
 	for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
@@ -1719,8 +1719,36 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 			struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
 								  sc_types);
 			if (stid) {
+				struct nfs4_ol_stateid *stp;
+
 				spin_unlock(&nn->client_lock);
 				switch (stid->sc_type) {
+				case NFS4_LOCK_STID:
+					stp = openlockstateid(stid);
+					mutex_lock_nested(&stp->st_mutex,
+							  LOCK_STATEID_MUTEX);
+					spin_lock(&clp->cl_lock);
+					if (stid->sc_status == 0) {
+						struct nfs4_lockowner *lo =
+							lockowner(stp->st_stateowner);
+						struct nfsd_file *nf;
+
+						stid->sc_status |=
+							NFS4_STID_ADMIN_REVOKED;
+						atomic_inc(&clp->cl_admin_revoked);
+						spin_unlock(&clp->cl_lock);
+						nf = find_any_file(stp->st_stid.sc_file);
+						if (nf) {
+							get_file(nf->nf_file);
+							filp_close(nf->nf_file,
+								   (fl_owner_t)lo);
+							nfsd_file_put(nf);
+						}
+						release_all_access(stp);
+					} else
+						spin_unlock(&clp->cl_lock);
+					mutex_unlock(&stp->st_mutex);
+					break;
 				}
 				nfs4_put_stid(stid);
 				spin_lock(&nn->client_lock);
@@ -4659,8 +4687,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
 static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
 {
 	struct nfs4_client *cl = s->sc_client;
+	LIST_HEAD(reaplist);
+	struct nfs4_ol_stateid *stp;
+	bool unhashed;
 
 	switch (s->sc_type) {
+	case NFS4_LOCK_STID:
+		stp = openlockstateid(s);
+		unhashed = unhash_lock_stateid(stp);
+		spin_unlock(&cl->cl_lock);
+		if (unhashed)
+			nfs4_put_stid(s);
+		break;
 	default:
 		spin_unlock(&cl->cl_lock);
 	}
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 09/11] nfsd: allow open state ids to be revoked and then freed
  2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
                   ` (7 preceding siblings ...)
  2023-11-24  0:28 ` [PATCH 08/11] nfsd: allow lock state ids to be revoked and then freed NeilBrown
@ 2023-11-24  0:28 ` NeilBrown
  2023-11-24  0:28 ` [PATCH 10/11] nfsd: allow delegation " NeilBrown
  2023-11-24  0:28 ` [PATCH 11/11] nfsd: allow layout state to be admin-revoked NeilBrown
  10 siblings, 0 replies; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Revoking state through 'unlock_filesystem' now revokes any open states
found.  When the stateids are then freed by the client, the revoked
stateids will be cleaned up correctly.

Possibly the related lock states should be revoked too, but a
subsequent patch will do that for all lock state on the superblock.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0f52e10fbdfb..8712eb81123f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1708,7 +1708,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 	unsigned int idhashval;
 	unsigned int sc_types;
 
-	sc_types = NFS4_LOCK_STID;
+	sc_types = NFS4_OPEN_STID | NFS4_LOCK_STID;
 
 	spin_lock(&nn->client_lock);
 	for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
@@ -1723,6 +1723,22 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 
 				spin_unlock(&nn->client_lock);
 				switch (stid->sc_type) {
+				case NFS4_OPEN_STID:
+					stp = openlockstateid(stid);
+					mutex_lock_nested(&stp->st_mutex,
+							  OPEN_STATEID_MUTEX);
+
+					spin_lock(&clp->cl_lock);
+					if (stid->sc_status == 0) {
+						stid->sc_status |=
+							NFS4_STID_ADMIN_REVOKED;
+						atomic_inc(&clp->cl_admin_revoked);
+						spin_unlock(&clp->cl_lock);
+						release_all_access(stp);
+					} else
+						spin_unlock(&clp->cl_lock);
+					mutex_unlock(&stp->st_mutex);
+					break;
 				case NFS4_LOCK_STID:
 					stp = openlockstateid(stid);
 					mutex_lock_nested(&stp->st_mutex,
@@ -4692,6 +4708,13 @@ static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
 	bool unhashed;
 
 	switch (s->sc_type) {
+	case NFS4_OPEN_STID:
+		stp = openlockstateid(s);
+		if (unhash_open_stateid(stp, &reaplist))
+			put_ol_stateid_locked(stp, &reaplist);
+		spin_unlock(&cl->cl_lock);
+		free_ol_stateid_reaplist(&reaplist);
+		break;
 	case NFS4_LOCK_STID:
 		stp = openlockstateid(s);
 		unhashed = unhash_lock_stateid(stp);
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 10/11] nfsd: allow delegation state ids to be revoked and then freed
  2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
                   ` (8 preceding siblings ...)
  2023-11-24  0:28 ` [PATCH 09/11] nfsd: allow open " NeilBrown
@ 2023-11-24  0:28 ` NeilBrown
  2023-11-30  4:48   ` kernel test robot
  2023-11-24  0:28 ` [PATCH 11/11] nfsd: allow layout state to be admin-revoked NeilBrown
  10 siblings, 1 reply; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Revoking state through 'unlock_filesystem' now revokes any delegation
states found.  When the stateids are then freed by the client, the
revoked stateids will be cleaned up correctly.

As there is already support for revoking delegations, we build on that
for admin-revoking.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8712eb81123f..3d85c88ec4d7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1340,9 +1340,12 @@ unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short statusmask)
 	if (!delegation_hashed(dp))
 		return false;
 
-	if (dp->dl_stid.sc_client->cl_minorversion == 0)
+	if (statusmask == NFS4_STID_REVOKED &&
+	    dp->dl_stid.sc_client->cl_minorversion == 0)
 		statusmask = NFS4_STID_CLOSED;
 	dp->dl_stid.sc_status |= statusmask;
+	if (statusmask & NFS4_STID_ADMIN_REVOKED)
+		atomic_inc(&dp->dl_stid.sc_client->cl_admin_revoked);
 
 	/* Ensure that deleg break won't try to requeue it */
 	++dp->dl_time;
@@ -1373,7 +1376,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
 
 	trace_nfsd_stid_revoke(&dp->dl_stid);
 
-	if (dp->dl_stid.sc_status & NFS4_STID_REVOKED) {
+	if (dp->dl_stid.sc_status &
+	    (NFS4_STID_REVOKED | NFS4_STID_ADMIN_REVOKED)) {
 		spin_lock(&clp->cl_lock);
 		refcount_inc(&dp->dl_stid.sc_count);
 		list_add(&dp->dl_recall_lru, &clp->cl_revoked);
@@ -1708,7 +1712,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 	unsigned int idhashval;
 	unsigned int sc_types;
 
-	sc_types = NFS4_OPEN_STID | NFS4_LOCK_STID;
+	sc_types = NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID;
 
 	spin_lock(&nn->client_lock);
 	for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
@@ -1720,6 +1724,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 								  sc_types);
 			if (stid) {
 				struct nfs4_ol_stateid *stp;
+				struct nfs4_delegation *dp;
 
 				spin_unlock(&nn->client_lock);
 				switch (stid->sc_type) {
@@ -1765,6 +1770,16 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 						spin_unlock(&clp->cl_lock);
 					mutex_unlock(&stp->st_mutex);
 					break;
+				case NFS4_DELEG_STID:
+					dp = delegstateid(stid);
+					spin_lock(&state_lock);
+					if (!unhash_delegation_locked(
+						    dp, NFS4_STID_ADMIN_REVOKED))
+						dp = NULL;
+					spin_unlock(&state_lock);
+					if (dp)
+						revoke_delegation(dp);
+					break;
 				}
 				nfs4_put_stid(stid);
 				spin_lock(&nn->client_lock);
@@ -4705,6 +4720,7 @@ static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
 	struct nfs4_client *cl = s->sc_client;
 	LIST_HEAD(reaplist);
 	struct nfs4_ol_stateid *stp;
+	struct nfs4_delegation *dp;
 	bool unhashed;
 
 	switch (s->sc_type) {
@@ -4722,6 +4738,12 @@ static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
 		if (unhashed)
 			nfs4_put_stid(s);
 		break;
+	case NFS4_DELEG_STID:
+		dp = delegstateid(s);
+		list_del_init(&dp->dl_recall_lru);
+		spin_unlock(&cl->cl_lock);
+		nfs4_put_stid(s);
+		break;
 	default:
 		spin_unlock(&cl->cl_lock);
 	}
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 11/11] nfsd: allow layout state to be admin-revoked.
  2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
                   ` (9 preceding siblings ...)
  2023-11-24  0:28 ` [PATCH 10/11] nfsd: allow delegation " NeilBrown
@ 2023-11-24  0:28 ` NeilBrown
  2023-11-24 15:23   ` kernel test robot
                     ` (2 more replies)
  10 siblings, 3 replies; 26+ messages in thread
From: NeilBrown @ 2023-11-24  0:28 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

When there is layout state on a filesystem that is being "unlocked" that
is now revoked, which involves closing the nfsd_file and releasing the
vfs lease.

To avoid races, all users of ->ls_file - after the layout state has been
successfully created - now need to take a counted reference under
rcu_read_lock().  To support this, ->fence_client and
nfsd4_cb_layout_fail() now take a second argument being the nfsd_file.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/blocklayout.c |  4 ++--
 fs/nfsd/nfs4layouts.c | 38 +++++++++++++++++++++++++++-----------
 fs/nfsd/nfs4state.c   | 28 +++++++++++++++++++++-------
 fs/nfsd/pnfs.h        |  7 ++++++-
 4 files changed, 56 insertions(+), 21 deletions(-)

diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 46fd74d91ea9..3c040c81c77d 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -328,10 +328,10 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
 }
 
 static void
-nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
+nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
 {
 	struct nfs4_client *clp = ls->ls_stid.sc_client;
-	struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
+	struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
 
 	bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
 			nfsd4_scsi_pr_key(clp), 0, true);
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 77656126ad2a..dbc52413ce57 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -152,6 +152,18 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
 #endif
 }
 
+void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
+{
+	struct nfsd_file *fl = xchg(&ls->ls_file, NULL);
+
+	if (fl) {
+		if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
+			vfs_setlease(fl->nf_file, F_UNLCK, NULL,
+				     (void **)&ls);
+		nfsd_file_put(fl);
+	}
+}
+
 static void
 nfsd4_free_layout_stateid(struct nfs4_stid *stid)
 {
@@ -169,9 +181,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
 	list_del_init(&ls->ls_perfile);
 	spin_unlock(&fp->fi_lock);
 
-	if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
-		vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
-	nfsd_file_put(ls->ls_file);
+	nfsd4_close_layout(ls);
 
 	if (ls->ls_recalled)
 		atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@@ -605,7 +615,7 @@ nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
 }
 
 static void
-nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
+nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
 {
 	struct nfs4_client *clp = ls->ls_stid.sc_client;
 	char addr_str[INET6_ADDRSTRLEN];
@@ -627,7 +637,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
 
 	argv[0] = (char *)nfsd_recall_failed;
 	argv[1] = addr_str;
-	argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
+	argv[2] = file->nf_file->f_path.mnt->mnt_sb->s_id;
 	argv[3] = NULL;
 
 	error = call_usermodehelper(nfsd_recall_failed, argv, envp,
@@ -657,6 +667,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
 	struct nfsd_net *nn;
 	ktime_t now, cutoff;
 	const struct nfsd4_layout_ops *ops;
+	struct nfsd_file *fl;
 
 	trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
 	switch (task->tk_status) {
@@ -688,12 +699,17 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
 		 * Unknown error or non-responding client, we'll need to fence.
 		 */
 		trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
-
-		ops = nfsd4_layout_ops[ls->ls_layout_type];
-		if (ops->fence_client)
-			ops->fence_client(ls);
-		else
-			nfsd4_cb_layout_fail(ls);
+		rcu_read_lock();
+		fl = nfsd_file_get(ls->ls_file);
+		rcu_read_unlock();
+		if (fl) {
+			ops = nfsd4_layout_ops[ls->ls_layout_type];
+			if (ops->fence_client)
+				ops->fence_client(ls, fl);
+			else
+				nfsd4_cb_layout_fail(ls, fl);
+			nfsd_file_put(fl);
+		}
 		return 1;
 	case -NFS4ERR_NOMATCHING_LAYOUT:
 		trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3d85c88ec4d7..d82ca209eb96 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1712,7 +1712,8 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 	unsigned int idhashval;
 	unsigned int sc_types;
 
-	sc_types = NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID;
+	sc_types = (NFS4_OPEN_STID | NFS4_LOCK_STID |
+		    NFS4_DELEG_STID | NFS4_LAYOUT_STID);
 
 	spin_lock(&nn->client_lock);
 	for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
@@ -1725,6 +1726,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 			if (stid) {
 				struct nfs4_ol_stateid *stp;
 				struct nfs4_delegation *dp;
+				struct nfs4_layout_stateid *ls;
 
 				spin_unlock(&nn->client_lock);
 				switch (stid->sc_type) {
@@ -1780,6 +1782,10 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
 					if (dp)
 						revoke_delegation(dp);
 					break;
+				case NFS4_LAYOUT_STID:
+					ls = layoutstateid(stid);
+					nfsd4_close_layout(ls);
+					break;
 				}
 				nfs4_put_stid(stid);
 				spin_lock(&nn->client_lock);
@@ -2859,17 +2865,25 @@ static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
 	struct nfsd_file *file;
 
 	ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
-	file = ls->ls_file;
+	rcu_read_lock();
+	file = nfsd_file_get(ls->ls_file);
+	rcu_read_unlock();
 
-	seq_printf(s, "- ");
+	seq_puts(s, "- ");
 	nfs4_show_stateid(s, &st->sc_stateid);
-	seq_printf(s, ": { type: layout, ");
+	seq_puts(s, ": { type: layout");
 
 	/* XXX: What else would be useful? */
 
-	nfs4_show_superblock(s, file);
-	seq_printf(s, ", ");
-	nfs4_show_fname(s, file);
+	if (file) {
+		seq_puts(s, ", ");
+		nfs4_show_superblock(s, file);
+		seq_puts(s, ", ");
+		nfs4_show_fname(s, file);
+		nfsd_file_put(file);
+	}
+	if (st->sc_status & NFS4_STID_ADMIN_REVOKED)
+		seq_puts(s, ", admin-revoked");
 	seq_printf(s, " }\n");
 
 	return 0;
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index de1e0dfed06a..f2777577865e 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -37,7 +37,8 @@ struct nfsd4_layout_ops {
 	__be32 (*proc_layoutcommit)(struct inode *inode,
 			struct nfsd4_layoutcommit *lcp);
 
-	void (*fence_client)(struct nfs4_layout_stateid *ls);
+	void (*fence_client)(struct nfs4_layout_stateid *ls,
+			     struct nfsd_file *file);
 };
 
 extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
@@ -72,6 +73,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp);
 void nfsd4_return_all_client_layouts(struct nfs4_client *);
 void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
 		struct nfs4_file *fp);
+void nfsd4_close_layout(struct nfs4_layout_stateid *ls);
 int nfsd4_init_pnfs(void);
 void nfsd4_exit_pnfs(void);
 #else
@@ -89,6 +91,9 @@ static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
 		struct nfs4_file *fp)
 {
 }
+static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
+{
+}
 static inline void nfsd4_exit_pnfs(void)
 {
 }
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 11/11] nfsd: allow layout state to be admin-revoked.
  2023-11-24  0:28 ` [PATCH 11/11] nfsd: allow layout state to be admin-revoked NeilBrown
@ 2023-11-24 15:23   ` kernel test robot
  2023-11-24 15:41   ` kernel test robot
  2023-11-27 15:25   ` Chuck Lever
  2 siblings, 0 replies; 26+ messages in thread
From: kernel test robot @ 2023-11-24 15:23 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever, Jeff Layton
  Cc: oe-kbuild-all, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Hi NeilBrown,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.7-rc2 next-20231124]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/NeilBrown/nfsd-hold-cl_lock-for-hash_delegation_locked/20231124-123723
base:   linus/master
patch link:    https://lore.kernel.org/r/20231124002925.1816-12-neilb%40suse.de
patch subject: [PATCH 11/11] nfsd: allow layout state to be admin-revoked.
config: alpha-defconfig (https://download.01.org/0day-ci/archive/20231124/202311241708.t3tyrrzD-lkp@intel.com/config)
compiler: alpha-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231124/202311241708.t3tyrrzD-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311241708.t3tyrrzD-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from fs/nfsd/export.c:24:
>> fs/nfsd/pnfs.h:94:46: warning: 'struct nfs4_layout_stateid' declared inside parameter list will not be visible outside of this definition or declaration
      94 | static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
         |                                              ^~~~~~~~~~~~~~~~~~~
   fs/nfsd/export.c: In function 'exp_rootfh':
   fs/nfsd/export.c:1016:34: warning: variable 'inode' set but not used [-Wunused-but-set-variable]
    1016 |         struct inode            *inode;
         |                                  ^~~~~


vim +94 fs/nfsd/pnfs.h

    86	
    87	static inline void nfsd4_return_all_client_layouts(struct nfs4_client *clp)
    88	{
    89	}
    90	static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
    91			struct nfs4_file *fp)
    92	{
    93	}
  > 94	static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 11/11] nfsd: allow layout state to be admin-revoked.
  2023-11-24  0:28 ` [PATCH 11/11] nfsd: allow layout state to be admin-revoked NeilBrown
  2023-11-24 15:23   ` kernel test robot
@ 2023-11-24 15:41   ` kernel test robot
  2023-11-27 15:25   ` Chuck Lever
  2 siblings, 0 replies; 26+ messages in thread
From: kernel test robot @ 2023-11-24 15:41 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever, Jeff Layton
  Cc: llvm, oe-kbuild-all, linux-nfs, Olga Kornievskaia, Dai Ngo,
	Tom Talpey

Hi NeilBrown,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.7-rc2 next-20231124]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/NeilBrown/nfsd-hold-cl_lock-for-hash_delegation_locked/20231124-123723
base:   linus/master
patch link:    https://lore.kernel.org/r/20231124002925.1816-12-neilb%40suse.de
patch subject: [PATCH 11/11] nfsd: allow layout state to be admin-revoked.
config: x86_64-buildonly-randconfig-002-20231124 (https://download.01.org/0day-ci/archive/20231124/202311241941.jNlc0B4A-lkp@intel.com/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231124/202311241941.jNlc0B4A-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311241941.jNlc0B4A-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from fs/nfsd/export.c:24:
>> fs/nfsd/pnfs.h:94:46: warning: declaration of 'struct nfs4_layout_stateid' will not be visible outside of this function [-Wvisibility]
   static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
                                                ^
   fs/nfsd/export.c:1016:17: warning: variable 'inode' set but not used [-Wunused-but-set-variable]
           struct inode            *inode;
                                    ^
   2 warnings generated.


vim +94 fs/nfsd/pnfs.h

    86	
    87	static inline void nfsd4_return_all_client_layouts(struct nfs4_client *clp)
    88	{
    89	}
    90	static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
    91			struct nfs4_file *fp)
    92	{
    93	}
  > 94	static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 05/11] nfsd: prepare for supporting admin-revocation of state
  2023-11-24  0:28 ` [PATCH 05/11] nfsd: prepare for supporting admin-revocation of state NeilBrown
@ 2023-11-26 17:36   ` Chuck Lever
  2024-01-19  0:46     ` NeilBrown
  0 siblings, 1 reply; 26+ messages in thread
From: Chuck Lever @ 2023-11-26 17:36 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, Nov 24, 2023 at 11:28:40AM +1100, NeilBrown wrote:
> The NFSv4 protocol allows state to be revoked by the admin and has error
> codes which allow this to be communicated to the client.
> 
> This patch
>  - introduces a new state-id status NFS4_STID_ADMIN_REVOKE
>    which can be set on open, lock, or delegation state.
>  - reports NFS4ERR_ADMIN_REVOKED when these are accessed
>  - introduces a per-client counter of these states and returns
>    SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
>    Decrements this when freeing any admin-revoked state.
>  - introduces stub code to find all interesting states for a given
>    superblock so they can be revoked via the 'unlock_filesystem'
>    file in /proc/fs/nfsd/
>    No actual states are handled yet.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 71 ++++++++++++++++++++++++++++++++++++++++++++-
>  fs/nfsd/nfsctl.c    |  1 +
>  fs/nfsd/nfsd.h      |  1 +
>  fs/nfsd/state.h     | 10 +++++++
>  fs/nfsd/trace.h     |  3 +-
>  5 files changed, 84 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index b9239f2ebc79..477a9e9aebbd 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1215,6 +1215,8 @@ nfs4_put_stid(struct nfs4_stid *s)
>  		return;
>  	}
>  	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> +	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
> +		atomic_dec(&s->sc_client->cl_admin_revoked);
>  	nfs4_free_cpntf_statelist(clp->net, s);
>  	spin_unlock(&clp->cl_lock);
>  	s->sc_free(s);
> @@ -1534,6 +1536,8 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
>  	}
>  
>  	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> +	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
> +		atomic_dec(&s->sc_client->cl_admin_revoked);
>  	list_add(&stp->st_locks, reaplist);
>  }
>  
> @@ -1679,6 +1683,54 @@ static void release_openowner(struct nfs4_openowner *oo)
>  	nfs4_put_stateowner(&oo->oo_owner);
>  }
>  
> +static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
> +					  struct super_block *sb,
> +					  unsigned int sc_types)
> +{
> +	unsigned long id, tmp;
> +	struct nfs4_stid *stid;
> +
> +	spin_lock(&clp->cl_lock);
> +	idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
> +		if ((stid->sc_type & sc_types) &&
> +		    stid->sc_status == 0 &&
> +		    stid->sc_file->fi_inode->i_sb == sb) {
> +			refcount_inc(&stid->sc_count);
> +			break;
> +		}
> +	spin_unlock(&clp->cl_lock);
> +	return stid;
> +}
> +

nfsd4_revoke_states() needs a kdoc comment.


> +void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> +{
> +	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> +	unsigned int idhashval;
> +	unsigned int sc_types;
> +
> +	sc_types = 0;
> +
> +	spin_lock(&nn->client_lock);
> +	for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> +		struct list_head *head = &nn->conf_id_hashtbl[idhashval];
> +		struct nfs4_client *clp;
> +	retry:
> +		list_for_each_entry(clp, head, cl_idhash) {
> +			struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
> +								  sc_types);
> +			if (stid) {
> +				spin_unlock(&nn->client_lock);
> +				switch (stid->sc_type) {

This is "dead" code, for now. Does this stub really need to be
introduced in this patch?


> +				}
> +				nfs4_put_stid(stid);
> +				spin_lock(&nn->client_lock);
> +				goto retry;
> +			}
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +}
> +
>  static inline int
>  hash_sessionid(struct nfs4_sessionid *sessionid)
>  {
> @@ -2550,6 +2602,8 @@ static int client_info_show(struct seq_file *m, void *v)
>  	}
>  	seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
>  	seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
> +	seq_printf(m, "admin-revoked states: %d\n",
> +		   atomic_read(&clp->cl_admin_revoked));
>  	drop_client(clp);
>  
>  	return 0;
> @@ -4109,6 +4163,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	}
>  	if (!list_empty(&clp->cl_revoked))
>  		seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
> +	if (atomic_read(&clp->cl_admin_revoked))
> +		seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
>  out_no_session:
>  	if (conn)
>  		free_conn(conn);
> @@ -4597,7 +4653,9 @@ nfsd4_verify_open_stid(struct nfs4_stid *s)
>  {
>  	__be32 ret = nfs_ok;
>  
> -	if (s->sc_status & NFS4_STID_REVOKED)
> +	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
> +		ret = nfserr_admin_revoked;
> +	else if (s->sc_status & NFS4_STID_REVOKED)
>  		ret = nfserr_deleg_revoked;
>  	else if (s->sc_status & NFS4_STID_CLOSED)
>  		ret = nfserr_bad_stateid;
> @@ -5188,6 +5246,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
>  	deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
>  	if (deleg == NULL)
>  		goto out;
> +	if (deleg->dl_stid.sc_status & NFS4_STID_ADMIN_REVOKED) {
> +		nfs4_put_stid(&deleg->dl_stid);
> +		status = nfserr_admin_revoked;
> +		goto out;
> +	}
>  	if (deleg->dl_stid.sc_status & NFS4_STID_REVOKED) {
>  		nfs4_put_stid(&deleg->dl_stid);
>  		status = nfserr_deleg_revoked;
> @@ -6508,6 +6571,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
>  		 */
>  		statusmask |= NFS4_STID_REVOKED;
>  
> +	statusmask |= NFS4_STID_ADMIN_REVOKED;
> +
>  	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
>  		CLOSE_STATEID(stateid))
>  		return nfserr_bad_stateid;
> @@ -6526,6 +6591,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
>  		nfs4_put_stid(stid);
>  		return nfserr_deleg_revoked;
>  	}
> +	if (stid->sc_type & NFS4_STID_ADMIN_REVOKED) {
> +		nfs4_put_stid(stid);
> +		return nfserr_admin_revoked;
> +	}
>  	*s = stid;
>  	return nfs_ok;
>  }
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index d6eeee149370..a622d773f428 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -285,6 +285,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
>  	 * 3.  Is that directory the root of an exported file system?
>  	 */
>  	error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
> +	nfsd4_revoke_states(netns(file), path.dentry->d_sb);
>  
>  	path_put(&path);
>  	return error;
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index f5ff42f41ee7..d46203eac3c8 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -280,6 +280,7 @@ void		nfsd_lockd_shutdown(void);
>  #define	nfserr_no_grace		cpu_to_be32(NFSERR_NO_GRACE)
>  #define	nfserr_reclaim_bad	cpu_to_be32(NFSERR_RECLAIM_BAD)
>  #define	nfserr_badname		cpu_to_be32(NFSERR_BADNAME)
> +#define	nfserr_admin_revoked	cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
>  #define	nfserr_cb_path_down	cpu_to_be32(NFSERR_CB_PATH_DOWN)
>  #define	nfserr_locked		cpu_to_be32(NFSERR_LOCKED)
>  #define	nfserr_wrongsec		cpu_to_be32(NFSERR_WRONGSEC)
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index bb00dcd4c1ba..584378c43e0a 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -112,6 +112,7 @@ struct nfs4_stid {
>  #define NFS4_STID_CLOSED	BIT(0)
>  /* For a deleg stateid kept around only to process free_stateid's: */
>  #define NFS4_STID_REVOKED	BIT(1)
> +#define NFS4_STID_ADMIN_REVOKED	BIT(2)

The names of these mask bits are now getting to be visually
indistinguishable from the stateid type names. The subtlety of
where the _STID_ falls in the name makes me blink a few times when
reading this code.

It would be a little more friendly to add _STATUS_ or some other
infix that makes it easy to tell these are not stateid types. I
know that makes the names longer and more unwieldy.


>  	unsigned short		sc_status;
>  
>  	struct list_head	sc_cp_list;
> @@ -388,6 +389,7 @@ struct nfs4_client {
>  	clientid_t		cl_clientid;	/* generated by server */
>  	nfs4_verifier		cl_confirm;	/* generated by server */
>  	u32			cl_minorversion;
> +	atomic_t		cl_admin_revoked; /* count of admin-revoked states */
>  	/* NFSv4.1 client implementation id: */
>  	struct xdr_netobj	cl_nii_domain;
>  	struct xdr_netobj	cl_nii_name;
> @@ -752,6 +754,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
>  }
>  struct nfsd_file *find_any_file(struct nfs4_file *f);
>  
> +#ifdef CONFIG_NFSD_V4
> +void nfsd4_revoke_states(struct net *net, struct super_block *sb);
> +#else
> +static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> +{
> +}
> +#endif
> +
>  /* grace period management */
>  void nfsd4_end_grace(struct nfsd_net *nn);
>  
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index 568b4ec9a2af..281aeb42c9eb 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -651,7 +651,8 @@ DEFINE_STATESEQID_EVENT(open_confirm);
>  #define show_stid_status(x)						\
>  	__print_flags(x, "|",						\
>  		{ NFS4_STID_CLOSED,		"CLOSED" },		\
> -		{ NFS4_STID_REVOKED,		"REVOKED" })		\
> +		{ NFS4_STID_REVOKED,		"REVOKED" },		\
> +		{ NFS4_STID_ADMIN_REVOKED,	"ADMIN_REVOKED" })
>  
>  DECLARE_EVENT_CLASS(nfsd_stid_class,
>  	TP_PROTO(
> -- 
> 2.42.1
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 06/11] nfsd: allow admin-revoked state to appear in /proc/fs/nfsd/clients/*/states
  2023-11-24  0:28 ` [PATCH 06/11] nfsd: allow admin-revoked state to appear in /proc/fs/nfsd/clients/*/states NeilBrown
@ 2023-11-26 17:41   ` Chuck Lever
  0 siblings, 0 replies; 26+ messages in thread
From: Chuck Lever @ 2023-11-26 17:41 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, Nov 24, 2023 at 11:28:41AM +1100, NeilBrown wrote:
> Change the "show" functions to show some content even if a file cannot
> be found.
> This is primarily useful for debugging - to ensure states are being
> removed eventually.
> 
> Also remove a "Kinda dead" comment which is no longer correct as we
> now support write delegations.

Nit: I know it's in the same piece of code, but the "Kinda dead"
clean-up is perhaps not relevant to the purpose of this patch. Maybe
do it as a separate patch?


> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 82 ++++++++++++++++++++++-----------------------
>  1 file changed, 41 insertions(+), 41 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 477a9e9aebbd..52e680235afe 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2680,17 +2680,10 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
>  	struct nfs4_stateowner *oo;
>  	unsigned int access, deny;
>  
> -	if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID)
> -		return 0; /* XXX: or SEQ_SKIP? */
>  	ols = openlockstateid(st);
>  	oo = ols->st_stateowner;
>  	nf = st->sc_file;
>  
> -	spin_lock(&nf->fi_lock);
> -	file = find_any_file_locked(nf);
> -	if (!file)
> -		goto out;
> -
>  	seq_printf(s, "- ");
>  	nfs4_show_stateid(s, &st->sc_stateid);
>  	seq_printf(s, ": { type: open, ");
> @@ -2705,14 +2698,19 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
>  		deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
>  		deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
>  
> -	nfs4_show_superblock(s, file);
> -	seq_printf(s, ", ");
> -	nfs4_show_fname(s, file);
> -	seq_printf(s, ", ");
> +	spin_lock(&nf->fi_lock);
> +	file = find_any_file_locked(nf);
> +	if (file) {
> +		nfs4_show_superblock(s, file);
> +		seq_puts(s, ", ");
> +		nfs4_show_fname(s, file);
> +		seq_puts(s, ", ");
> +	}
> +	spin_unlock(&nf->fi_lock);
>  	nfs4_show_owner(s, oo);
> +	if (st->sc_status & NFS4_STID_ADMIN_REVOKED)
> +		seq_puts(s, ", admin-revoked");

Wondering if this addition (and the other similar additions below)
would be more appropriately done in the previous patch. These
additions seem to have a different purpose than the purpose stated
in the patch description: "Change the 'show' functions to show some
content even if a file cannot be found."

Just a thought.


>  	seq_printf(s, " }\n");
> -out:
> -	spin_unlock(&nf->fi_lock);
>  	return 0;
>  }
>  
> @@ -2726,30 +2724,31 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
>  	ols = openlockstateid(st);
>  	oo = ols->st_stateowner;
>  	nf = st->sc_file;
> -	spin_lock(&nf->fi_lock);
> -	file = find_any_file_locked(nf);
> -	if (!file)
> -		goto out;
>  
>  	seq_printf(s, "- ");
>  	nfs4_show_stateid(s, &st->sc_stateid);
>  	seq_printf(s, ": { type: lock, ");
>  
> -	/*
> -	 * Note: a lock stateid isn't really the same thing as a lock,
> -	 * it's the locking state held by one owner on a file, and there
> -	 * may be multiple (or no) lock ranges associated with it.
> -	 * (Same for the matter is true of open stateids.)
> -	 */
> +	spin_lock(&nf->fi_lock);
> +	file = find_any_file_locked(nf);
> +	if (file) {
> +		/*
> +		 * Note: a lock stateid isn't really the same thing as a lock,
> +		 * it's the locking state held by one owner on a file, and there
> +		 * may be multiple (or no) lock ranges associated with it.
> +		 * (Same for the matter is true of open stateids.)
> +		 */
>  
> -	nfs4_show_superblock(s, file);
> -	/* XXX: open stateid? */
> -	seq_printf(s, ", ");
> -	nfs4_show_fname(s, file);
> -	seq_printf(s, ", ");
> +		nfs4_show_superblock(s, file);
> +		/* XXX: open stateid? */
> +		seq_puts(s, ", ");
> +		nfs4_show_fname(s, file);
> +		seq_puts(s, ", ");
> +	}
>  	nfs4_show_owner(s, oo);
> +	if (st->sc_status & NFS4_STID_ADMIN_REVOKED)
> +		seq_puts(s, ", admin-revoked");
>  	seq_printf(s, " }\n");
> -out:
>  	spin_unlock(&nf->fi_lock);
>  	return 0;
>  }
> @@ -2762,27 +2761,28 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
>  
>  	ds = delegstateid(st);
>  	nf = st->sc_file;
> -	spin_lock(&nf->fi_lock);
> -	file = nf->fi_deleg_file;
> -	if (!file)
> -		goto out;
>  
>  	seq_printf(s, "- ");
>  	nfs4_show_stateid(s, &st->sc_stateid);
>  	seq_printf(s, ": { type: deleg, ");
>  
> -	/* Kinda dead code as long as we only support read delegs: */
> -	seq_printf(s, "access: %s, ",
> -		ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
> +	seq_printf(s, "access: %s",
> +		   ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
>  
>  	/* XXX: lease time, whether it's being recalled. */
>  
> -	nfs4_show_superblock(s, file);
> -	seq_printf(s, ", ");
> -	nfs4_show_fname(s, file);
> -	seq_printf(s, " }\n");
> -out:
> +	spin_lock(&nf->fi_lock);
> +	file = nf->fi_deleg_file;
> +	if (file) {
> +		seq_puts(s, ", ");
> +		nfs4_show_superblock(s, file);
> +		seq_puts(s, ", ");
> +		nfs4_show_fname(s, file);
> +	}
>  	spin_unlock(&nf->fi_lock);
> +	if (st->sc_status & NFS4_STID_ADMIN_REVOKED)
> +		seq_puts(s, ", admin-revoked");
> +	seq_puts(s, " }\n");
>  	return 0;
>  }
>  
> -- 
> 2.42.1
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 07/11] nfsd: allow admin-revoked NFSv4.0 state to be freed.
  2023-11-24  0:28 ` [PATCH 07/11] nfsd: allow admin-revoked NFSv4.0 state to be freed NeilBrown
@ 2023-11-26 17:54   ` Chuck Lever
  2024-01-19  1:41     ` NeilBrown
  2023-11-26 18:07   ` Chuck Lever
  1 sibling, 1 reply; 26+ messages in thread
From: Chuck Lever @ 2023-11-26 17:54 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, Nov 24, 2023 at 11:28:42AM +1100, NeilBrown wrote:
> For NFSv4.1 and later the client easily discovers if there is any
> admin-revoked state and will then find and explicitly free it.
> 
> For NFSv4.0 there is no such mechanism.  The client can only find that
> state is admin-revoked if it tries to use that state, and there is no
> way for it to explicitly free the state.  So the server must hold on to
> the stateid (at least) for an indefinite amount of time.  A
> RELEASE_LOCKOWNER request might justify forgetting some of these
> stateids, as would the whole clients lease lapsing, but these are not
> reliable.

They aren't reliable, but what are the consequences of revoked
state left on the server? Seems like our implementation has a
number of mechanisms for cleaning up state over time. Do you feel
this is a denial-of-service vector?


> This patch takes two approaches.
> 
> Whenever a client uses an revoked stateid, that stateid is then
> discarded and will not be recognised again.  This might confuse a client
> which expect to get NFS4ERR_ADMIN_REVOKED consistently once it get it at
> all, but should mostly work.  Hopefully one error will lead to other
> resources being closed (e.g.  process exits), which will result in more
> stateid being freed when a CLOSE attempt gets NFS4ERR_ADMIN_REVOKED.

I'm leery of this: "This might confuse..." and "Hopefully..." suggest
we're not real sure how this will behave in practice with the current
cohort of client implementations.

Also, this paragraph in Section 10.2.1 of RFC 7530 is concerning:

>  A client normally finds out about revocation of a delegation when it
>  uses a stateid associated with a delegation and receives one of the
>  errors NFS4ERR_EXPIRED, NFS4ERR_BAD_STATEID, or NFS4ERR_ADMIN_REVOKED
>  (NFS4ERR_EXPIRED indicates that all lock state associated with the
>  client has been lost).  It also may find out about delegation
>  revocation after a client reboot when it attempts to reclaim a
>  delegation and receives NFS4ERR_EXPIRED.  Note that in the case of a
>  revoked OPEN_DELEGATE_WRITE delegation, there are issues because data
>  may have been modified by the client whose delegation is revoked and,
>  separately, by other clients.  See Section 10.5.1 for a discussion of
>  such issues.  Note also that when delegations are revoked,
>  information about the revoked delegation will be written by the
>  server to stable storage (as described in Section 9.6).  This is done
>  to deal with the case in which a server reboots after revoking a
>  delegation but before the client holding the revoked delegation is
>  notified about the revocation.

The text here suggests that the server persists the ADMIN_REVOKED
status, which suggests to me that the server is supposed to continue
returning ADMIN_REVOKED when presented with the revoked state,
until the state is freed.

AFAICT NFSD isn't recording this status persistently... Is there a
plan to add that (later) or some words suggesting that it is safe
and reasonable not to record it?


> Also, any admin-revoked stateids that have been that way for more than
> one lease time are periodically revoke.
> 
> No actual freeing of state happens in this patch.  That will come in
> future patches which handle the different sorts of revoked state.
>
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/netns.h     |  4 ++
>  fs/nfsd/nfs4state.c | 97 ++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 100 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index ab303a8b77d5..7458f672b33e 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -197,6 +197,10 @@ struct nfsd_net {
>  	atomic_t		nfsd_courtesy_clients;
>  	struct shrinker		*nfsd_client_shrinker;
>  	struct work_struct	nfsd_shrinker_work;
> +
> +	/* last time an admin-revoke happened for NFSv4.0 */
> +	time64_t		nfs40_last_revoke;
> +
>  };
>  
>  /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 52e680235afe..c57f2ff954cb 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1724,6 +1724,14 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
>  				}
>  				nfs4_put_stid(stid);
>  				spin_lock(&nn->client_lock);
> +				if (clp->cl_minorversion == 0)
> +					/* Allow cleanup after a lease period.
> +					 * store_release ensures cleanup will
> +					 * see any newly revoked states if it
> +					 * sees the time updated.
> +					 */
> +					nn->nfs40_last_revoke =
> +						ktime_get_boottime_seconds();
>  				goto retry;
>  			}
>  		}
> @@ -4648,6 +4656,39 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
>  	return ret;
>  }
>  
> +static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
> +{
> +	struct nfs4_client *cl = s->sc_client;
> +
> +	switch (s->sc_type) {
> +	default:
> +		spin_unlock(&cl->cl_lock);
> +	}
> +}
> +
> +static void nfs40_drop_revoked_stid(struct nfs4_client *cl,
> +				    stateid_t *stid)

Nits: I'd prefer nfsd4_drop_revoked_stid() and nfsd40_drop_revoked_stid()


> +{
> +	/* NFSv4.0 has no way for the client to tell the server
> +	 * that it can forget an admin-revoked stateid.
> +	 * So we keep it around until the first time that the
> +	 * client uses it, and drop it the first time
> +	 * nfserr_admin_revoked is returned.
> +	 * For v4.1 and later we wait until explicitly told
> +	 * to free the stateid.
> +	 */
> +	if (cl->cl_minorversion == 0) {
> +		struct nfs4_stid *st;
> +
> +		spin_lock(&cl->cl_lock);
> +		st = find_stateid_locked(cl, stid);
> +		if (st)
> +			nfsd_drop_revoked_stid(st);
> +		else
> +			spin_unlock(&cl->cl_lock);
> +	}
> +}
> +
>  static __be32
>  nfsd4_verify_open_stid(struct nfs4_stid *s)
>  {
> @@ -4670,6 +4711,10 @@ nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
>  
>  	mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX);
>  	ret = nfsd4_verify_open_stid(&stp->st_stid);
> +	if (ret == nfserr_admin_revoked)
> +		nfs40_drop_revoked_stid(stp->st_stid.sc_client,
> +					&stp->st_stid.sc_stateid);
> +
>  	if (ret != nfs_ok)
>  		mutex_unlock(&stp->st_mutex);
>  	return ret;
> @@ -5253,6 +5298,7 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
>  	}
>  	if (deleg->dl_stid.sc_status & NFS4_STID_REVOKED) {
>  		nfs4_put_stid(&deleg->dl_stid);
> +		nfs40_drop_revoked_stid(cl, &open->op_delegate_stateid);
>  		status = nfserr_deleg_revoked;
>  		goto out;
>  	}
> @@ -6251,6 +6297,43 @@ nfs4_process_client_reaplist(struct list_head *reaplist)
>  	}
>  }
>  
> +static void nfs40_clean_admin_revoked(struct nfsd_net *nn,
> +				      struct laundry_time *lt)
> +{
> +	struct nfs4_client *clp;
> +
> +	spin_lock(&nn->client_lock);
> +	if (nn->nfs40_last_revoke == 0 ||
> +	    nn->nfs40_last_revoke > lt->cutoff) {
> +		spin_unlock(&nn->client_lock);
> +		return;
> +	}
> +	nn->nfs40_last_revoke = 0;
> +
> +retry:
> +	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
> +		unsigned long id, tmp;
> +		struct nfs4_stid *stid;
> +
> +		if (atomic_read(&clp->cl_admin_revoked) == 0)
> +			continue;
> +
> +		spin_lock(&clp->cl_lock);
> +		idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
> +			if (stid->sc_status & NFS4_STID_ADMIN_REVOKED) {
> +				refcount_inc(&stid->sc_count);
> +				spin_unlock(&nn->client_lock);
> +				/* this function drops ->cl_lock */
> +				nfsd_drop_revoked_stid(stid);
> +				nfs4_put_stid(stid);
> +				spin_lock(&nn->client_lock);
> +				goto retry;
> +			}
> +		spin_unlock(&clp->cl_lock);
> +	}
> +	spin_unlock(&nn->client_lock);
> +}
> +
>  static time64_t
>  nfs4_laundromat(struct nfsd_net *nn)
>  {
> @@ -6284,6 +6367,8 @@ nfs4_laundromat(struct nfsd_net *nn)
>  	nfs4_get_client_reaplist(nn, &reaplist, &lt);
>  	nfs4_process_client_reaplist(&reaplist);
>  
> +	nfs40_clean_admin_revoked(nn, &lt);
> +
>  	spin_lock(&state_lock);
>  	list_for_each_safe(pos, next, &nn->del_recall_lru) {
>  		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
> @@ -6502,6 +6587,9 @@ static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_sti
>  	if (ret == nfs_ok)
>  		ret = check_stateid_generation(in, &s->sc_stateid, has_session);
>  	spin_unlock(&s->sc_lock);
> +	if (ret == nfserr_admin_revoked)
> +		nfs40_drop_revoked_stid(s->sc_client,
> +					&s->sc_stateid);
>  	return ret;
>  }
>  
> @@ -6546,6 +6634,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
>  	}
>  out_unlock:
>  	spin_unlock(&cl->cl_lock);
> +	if (status == nfserr_admin_revoked)
> +		nfs40_drop_revoked_stid(cl, stateid);
>  	return status;
>  }
>  
> @@ -6592,6 +6682,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
>  		return nfserr_deleg_revoked;
>  	}
>  	if (stid->sc_type & NFS4_STID_ADMIN_REVOKED) {
> +		nfs40_drop_revoked_stid(cstate->clp, stateid);
>  		nfs4_put_stid(stid);
>  		return nfserr_admin_revoked;
>  	}
> @@ -6884,6 +6975,11 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	s = find_stateid_locked(cl, stateid);
>  	if (!s || s->sc_status & NFS4_STID_CLOSED)
>  		goto out_unlock;
> +	if (s->sc_status & NFS4_STID_ADMIN_REVOKED) {
> +		nfsd_drop_revoked_stid(s);
> +		ret = nfs_ok;
> +		goto out;
> +	}
>  	spin_lock(&s->sc_lock);
>  	switch (s->sc_type) {
>  	case NFS4_DELEG_STID:
> @@ -6910,7 +7006,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		spin_unlock(&cl->cl_lock);
>  		ret = nfsd4_free_lock_stateid(stateid, s);
>  		goto out;
> -	/* Default falls through and returns nfserr_bad_stateid */
>  	}
>  	spin_unlock(&s->sc_lock);
>  out_unlock:
> -- 
> 2.42.1
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 07/11] nfsd: allow admin-revoked NFSv4.0 state to be freed.
  2023-11-24  0:28 ` [PATCH 07/11] nfsd: allow admin-revoked NFSv4.0 state to be freed NeilBrown
  2023-11-26 17:54   ` Chuck Lever
@ 2023-11-26 18:07   ` Chuck Lever
  2024-01-19  1:43     ` NeilBrown
  1 sibling, 1 reply; 26+ messages in thread
From: Chuck Lever @ 2023-11-26 18:07 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, Nov 24, 2023 at 11:28:42AM +1100, NeilBrown wrote:
> For NFSv4.1 and later the client easily discovers if there is any
> admin-revoked state and will then find and explicitly free it.
> 
> For NFSv4.0 there is no such mechanism.  The client can only find that
> state is admin-revoked if it tries to use that state, and there is no
> way for it to explicitly free the state.  So the server must hold on to
> the stateid (at least) for an indefinite amount of time.  A
> RELEASE_LOCKOWNER request might justify forgetting some of these
> stateids, as would the whole clients lease lapsing, but these are not
> reliable.
> 
> This patch takes two approaches.
> 
> Whenever a client uses an revoked stateid, that stateid is then
> discarded and will not be recognised again.  This might confuse a client
> which expect to get NFS4ERR_ADMIN_REVOKED consistently once it get it at
> all, but should mostly work.  Hopefully one error will lead to other
> resources being closed (e.g.  process exits), which will result in more
> stateid being freed when a CLOSE attempt gets NFS4ERR_ADMIN_REVOKED.
> 
> Also, any admin-revoked stateids that have been that way for more than
> one lease time are periodically revoke.
> 
> No actual freeing of state happens in this patch.  That will come in
> future patches which handle the different sorts of revoked state.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/netns.h     |  4 ++
>  fs/nfsd/nfs4state.c | 97 ++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 100 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index ab303a8b77d5..7458f672b33e 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -197,6 +197,10 @@ struct nfsd_net {
>  	atomic_t		nfsd_courtesy_clients;
>  	struct shrinker		*nfsd_client_shrinker;
>  	struct work_struct	nfsd_shrinker_work;
> +
> +	/* last time an admin-revoke happened for NFSv4.0 */
> +	time64_t		nfs40_last_revoke;
> +
>  };
>  
>  /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 52e680235afe..c57f2ff954cb 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1724,6 +1724,14 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
>  				}
>  				nfs4_put_stid(stid);
>  				spin_lock(&nn->client_lock);
> +				if (clp->cl_minorversion == 0)
> +					/* Allow cleanup after a lease period.
> +					 * store_release ensures cleanup will
> +					 * see any newly revoked states if it
> +					 * sees the time updated.
> +					 */
> +					nn->nfs40_last_revoke =
> +						ktime_get_boottime_seconds();
>  				goto retry;
>  			}
>  		}
> @@ -4648,6 +4656,39 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
>  	return ret;
>  }
>  
> +static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
> +{
> +	struct nfs4_client *cl = s->sc_client;
> +
> +	switch (s->sc_type) {
> +	default:
> +		spin_unlock(&cl->cl_lock);
> +	}
> +}

I'm not in love with unlocking cl_lock inside nfsd_drop_revoked_stid,
but I understand why it's necessary. How about:

static void nfsd4_drop_revoked_stid_unlock(struct nfs4_client *cl,
					   struct nfs4_stid *s)
	__releases(&cl->cl_lock)
{
	....


> +
> +static void nfs40_drop_revoked_stid(struct nfs4_client *cl,
> +				    stateid_t *stid)
> +{
> +	/* NFSv4.0 has no way for the client to tell the server
> +	 * that it can forget an admin-revoked stateid.
> +	 * So we keep it around until the first time that the
> +	 * client uses it, and drop it the first time
> +	 * nfserr_admin_revoked is returned.
> +	 * For v4.1 and later we wait until explicitly told
> +	 * to free the stateid.
> +	 */
> +	if (cl->cl_minorversion == 0) {
> +		struct nfs4_stid *st;
> +
> +		spin_lock(&cl->cl_lock);
> +		st = find_stateid_locked(cl, stid);
> +		if (st)
> +			nfsd_drop_revoked_stid(st);
> +		else
> +			spin_unlock(&cl->cl_lock);
> +	}
> +}
> +
>  static __be32
>  nfsd4_verify_open_stid(struct nfs4_stid *s)
>  {
> @@ -4670,6 +4711,10 @@ nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
>  
>  	mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX);
>  	ret = nfsd4_verify_open_stid(&stp->st_stid);
> +	if (ret == nfserr_admin_revoked)
> +		nfs40_drop_revoked_stid(stp->st_stid.sc_client,
> +					&stp->st_stid.sc_stateid);
> +
>  	if (ret != nfs_ok)
>  		mutex_unlock(&stp->st_mutex);
>  	return ret;
> @@ -5253,6 +5298,7 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
>  	}
>  	if (deleg->dl_stid.sc_status & NFS4_STID_REVOKED) {
>  		nfs4_put_stid(&deleg->dl_stid);
> +		nfs40_drop_revoked_stid(cl, &open->op_delegate_stateid);
>  		status = nfserr_deleg_revoked;
>  		goto out;
>  	}
> @@ -6251,6 +6297,43 @@ nfs4_process_client_reaplist(struct list_head *reaplist)
>  	}
>  }
>  
> +static void nfs40_clean_admin_revoked(struct nfsd_net *nn,
> +				      struct laundry_time *lt)
> +{
> +	struct nfs4_client *clp;
> +
> +	spin_lock(&nn->client_lock);
> +	if (nn->nfs40_last_revoke == 0 ||
> +	    nn->nfs40_last_revoke > lt->cutoff) {
> +		spin_unlock(&nn->client_lock);
> +		return;
> +	}
> +	nn->nfs40_last_revoke = 0;
> +
> +retry:
> +	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
> +		unsigned long id, tmp;
> +		struct nfs4_stid *stid;
> +
> +		if (atomic_read(&clp->cl_admin_revoked) == 0)
> +			continue;
> +
> +		spin_lock(&clp->cl_lock);
> +		idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
> +			if (stid->sc_status & NFS4_STID_ADMIN_REVOKED) {
> +				refcount_inc(&stid->sc_count);
> +				spin_unlock(&nn->client_lock);
> +				/* this function drops ->cl_lock */
> +				nfsd_drop_revoked_stid(stid);
> +				nfs4_put_stid(stid);
> +				spin_lock(&nn->client_lock);
> +				goto retry;
> +			}
> +		spin_unlock(&clp->cl_lock);
> +	}
> +	spin_unlock(&nn->client_lock);
> +}
> +
>  static time64_t
>  nfs4_laundromat(struct nfsd_net *nn)
>  {
> @@ -6284,6 +6367,8 @@ nfs4_laundromat(struct nfsd_net *nn)
>  	nfs4_get_client_reaplist(nn, &reaplist, &lt);
>  	nfs4_process_client_reaplist(&reaplist);
>  
> +	nfs40_clean_admin_revoked(nn, &lt);
> +
>  	spin_lock(&state_lock);
>  	list_for_each_safe(pos, next, &nn->del_recall_lru) {
>  		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
> @@ -6502,6 +6587,9 @@ static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_sti
>  	if (ret == nfs_ok)
>  		ret = check_stateid_generation(in, &s->sc_stateid, has_session);
>  	spin_unlock(&s->sc_lock);
> +	if (ret == nfserr_admin_revoked)
> +		nfs40_drop_revoked_stid(s->sc_client,
> +					&s->sc_stateid);
>  	return ret;
>  }
>  
> @@ -6546,6 +6634,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
>  	}
>  out_unlock:
>  	spin_unlock(&cl->cl_lock);
> +	if (status == nfserr_admin_revoked)
> +		nfs40_drop_revoked_stid(cl, stateid);
>  	return status;
>  }
>  
> @@ -6592,6 +6682,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
>  		return nfserr_deleg_revoked;
>  	}
>  	if (stid->sc_type & NFS4_STID_ADMIN_REVOKED) {
> +		nfs40_drop_revoked_stid(cstate->clp, stateid);
>  		nfs4_put_stid(stid);
>  		return nfserr_admin_revoked;
>  	}
> @@ -6884,6 +6975,11 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	s = find_stateid_locked(cl, stateid);
>  	if (!s || s->sc_status & NFS4_STID_CLOSED)
>  		goto out_unlock;
> +	if (s->sc_status & NFS4_STID_ADMIN_REVOKED) {
> +		nfsd_drop_revoked_stid(s);
> +		ret = nfs_ok;
> +		goto out;
> +	}
>  	spin_lock(&s->sc_lock);
>  	switch (s->sc_type) {
>  	case NFS4_DELEG_STID:
> @@ -6910,7 +7006,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		spin_unlock(&cl->cl_lock);
>  		ret = nfsd4_free_lock_stateid(stateid, s);
>  		goto out;
> -	/* Default falls through and returns nfserr_bad_stateid */
>  	}
>  	spin_unlock(&s->sc_lock);
>  out_unlock:
> -- 
> 2.42.1
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 11/11] nfsd: allow layout state to be admin-revoked.
  2023-11-24  0:28 ` [PATCH 11/11] nfsd: allow layout state to be admin-revoked NeilBrown
  2023-11-24 15:23   ` kernel test robot
  2023-11-24 15:41   ` kernel test robot
@ 2023-11-27 15:25   ` Chuck Lever
  2023-12-22 15:01     ` Chuck Lever
  2 siblings, 1 reply; 26+ messages in thread
From: Chuck Lever @ 2023-11-27 15:25 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, Nov 24, 2023 at 11:28:46AM +1100, NeilBrown wrote:
> When there is layout state on a filesystem that is being "unlocked" that
> is now revoked, which involves closing the nfsd_file and releasing the
> vfs lease.
> 
> To avoid races, all users of ->ls_file - after the layout state has been
> successfully created - now need to take a counted reference under
> rcu_read_lock().  To support this, ->fence_client and
> nfsd4_cb_layout_fail() now take a second argument being the nfsd_file.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>

Hi Neil, would you Cc: Christoph Hellwig <hch@lst.de> and Tom Haynes
<loghyr@gmail.com> to this patch next time you post this series?


> ---
>  fs/nfsd/blocklayout.c |  4 ++--
>  fs/nfsd/nfs4layouts.c | 38 +++++++++++++++++++++++++++-----------
>  fs/nfsd/nfs4state.c   | 28 +++++++++++++++++++++-------
>  fs/nfsd/pnfs.h        |  7 ++++++-
>  4 files changed, 56 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> index 46fd74d91ea9..3c040c81c77d 100644
> --- a/fs/nfsd/blocklayout.c
> +++ b/fs/nfsd/blocklayout.c
> @@ -328,10 +328,10 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
>  }
>  
>  static void
> -nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
> +nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
>  {
>  	struct nfs4_client *clp = ls->ls_stid.sc_client;
> -	struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
> +	struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
>  
>  	bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
>  			nfsd4_scsi_pr_key(clp), 0, true);
> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> index 77656126ad2a..dbc52413ce57 100644
> --- a/fs/nfsd/nfs4layouts.c
> +++ b/fs/nfsd/nfs4layouts.c
> @@ -152,6 +152,18 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
>  #endif
>  }
>  
> +void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
> +{
> +	struct nfsd_file *fl = xchg(&ls->ls_file, NULL);
> +
> +	if (fl) {
> +		if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
> +			vfs_setlease(fl->nf_file, F_UNLCK, NULL,
> +				     (void **)&ls);
> +		nfsd_file_put(fl);
> +	}
> +}
> +
>  static void
>  nfsd4_free_layout_stateid(struct nfs4_stid *stid)
>  {
> @@ -169,9 +181,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
>  	list_del_init(&ls->ls_perfile);
>  	spin_unlock(&fp->fi_lock);
>  
> -	if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
> -		vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
> -	nfsd_file_put(ls->ls_file);
> +	nfsd4_close_layout(ls);
>  
>  	if (ls->ls_recalled)
>  		atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
> @@ -605,7 +615,7 @@ nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
>  }
>  
>  static void
> -nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
> +nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
>  {
>  	struct nfs4_client *clp = ls->ls_stid.sc_client;
>  	char addr_str[INET6_ADDRSTRLEN];
> @@ -627,7 +637,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
>  
>  	argv[0] = (char *)nfsd_recall_failed;
>  	argv[1] = addr_str;
> -	argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
> +	argv[2] = file->nf_file->f_path.mnt->mnt_sb->s_id;
>  	argv[3] = NULL;
>  
>  	error = call_usermodehelper(nfsd_recall_failed, argv, envp,
> @@ -657,6 +667,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
>  	struct nfsd_net *nn;
>  	ktime_t now, cutoff;
>  	const struct nfsd4_layout_ops *ops;
> +	struct nfsd_file *fl;
>  
>  	trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
>  	switch (task->tk_status) {
> @@ -688,12 +699,17 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
>  		 * Unknown error or non-responding client, we'll need to fence.
>  		 */
>  		trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
> -
> -		ops = nfsd4_layout_ops[ls->ls_layout_type];
> -		if (ops->fence_client)
> -			ops->fence_client(ls);
> -		else
> -			nfsd4_cb_layout_fail(ls);
> +		rcu_read_lock();
> +		fl = nfsd_file_get(ls->ls_file);
> +		rcu_read_unlock();
> +		if (fl) {
> +			ops = nfsd4_layout_ops[ls->ls_layout_type];
> +			if (ops->fence_client)
> +				ops->fence_client(ls, fl);
> +			else
> +				nfsd4_cb_layout_fail(ls, fl);
> +			nfsd_file_put(fl);
> +		}
>  		return 1;
>  	case -NFS4ERR_NOMATCHING_LAYOUT:
>  		trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 3d85c88ec4d7..d82ca209eb96 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1712,7 +1712,8 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
>  	unsigned int idhashval;
>  	unsigned int sc_types;
>  
> -	sc_types = NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID;
> +	sc_types = (NFS4_OPEN_STID | NFS4_LOCK_STID |
> +		    NFS4_DELEG_STID | NFS4_LAYOUT_STID);
>  
>  	spin_lock(&nn->client_lock);
>  	for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> @@ -1725,6 +1726,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
>  			if (stid) {
>  				struct nfs4_ol_stateid *stp;
>  				struct nfs4_delegation *dp;
> +				struct nfs4_layout_stateid *ls;
>  
>  				spin_unlock(&nn->client_lock);
>  				switch (stid->sc_type) {
> @@ -1780,6 +1782,10 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
>  					if (dp)
>  						revoke_delegation(dp);
>  					break;
> +				case NFS4_LAYOUT_STID:
> +					ls = layoutstateid(stid);
> +					nfsd4_close_layout(ls);
> +					break;
>  				}
>  				nfs4_put_stid(stid);
>  				spin_lock(&nn->client_lock);
> @@ -2859,17 +2865,25 @@ static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
>  	struct nfsd_file *file;
>  
>  	ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
> -	file = ls->ls_file;
> +	rcu_read_lock();
> +	file = nfsd_file_get(ls->ls_file);
> +	rcu_read_unlock();
>  
> -	seq_printf(s, "- ");
> +	seq_puts(s, "- ");
>  	nfs4_show_stateid(s, &st->sc_stateid);
> -	seq_printf(s, ": { type: layout, ");
> +	seq_puts(s, ": { type: layout");
>  
>  	/* XXX: What else would be useful? */
>  
> -	nfs4_show_superblock(s, file);
> -	seq_printf(s, ", ");
> -	nfs4_show_fname(s, file);
> +	if (file) {
> +		seq_puts(s, ", ");
> +		nfs4_show_superblock(s, file);
> +		seq_puts(s, ", ");
> +		nfs4_show_fname(s, file);
> +		nfsd_file_put(file);
> +	}
> +	if (st->sc_status & NFS4_STID_ADMIN_REVOKED)
> +		seq_puts(s, ", admin-revoked");
>  	seq_printf(s, " }\n");
>  
>  	return 0;
> diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
> index de1e0dfed06a..f2777577865e 100644
> --- a/fs/nfsd/pnfs.h
> +++ b/fs/nfsd/pnfs.h
> @@ -37,7 +37,8 @@ struct nfsd4_layout_ops {
>  	__be32 (*proc_layoutcommit)(struct inode *inode,
>  			struct nfsd4_layoutcommit *lcp);
>  
> -	void (*fence_client)(struct nfs4_layout_stateid *ls);
> +	void (*fence_client)(struct nfs4_layout_stateid *ls,
> +			     struct nfsd_file *file);
>  };
>  
>  extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
> @@ -72,6 +73,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp);
>  void nfsd4_return_all_client_layouts(struct nfs4_client *);
>  void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
>  		struct nfs4_file *fp);
> +void nfsd4_close_layout(struct nfs4_layout_stateid *ls);
>  int nfsd4_init_pnfs(void);
>  void nfsd4_exit_pnfs(void);
>  #else
> @@ -89,6 +91,9 @@ static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
>  		struct nfs4_file *fp)
>  {
>  }
> +static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
> +{
> +}
>  static inline void nfsd4_exit_pnfs(void)
>  {
>  }
> -- 
> 2.42.1
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 10/11] nfsd: allow delegation state ids to be revoked and then freed
  2023-11-24  0:28 ` [PATCH 10/11] nfsd: allow delegation " NeilBrown
@ 2023-11-30  4:48   ` kernel test robot
  0 siblings, 0 replies; 26+ messages in thread
From: kernel test robot @ 2023-11-30  4:48 UTC (permalink / raw)
  To: NeilBrown
  Cc: oe-lkp, lkp, linux-nfs, Chuck Lever, Jeff Layton,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, oliver.sang



Hello,

kernel test robot noticed "WARNING:at_fs/nfsd/nfs4state.c:#nfs4_free_deleg[nfsd]" on:

commit: 927562a91e483a71dd172f74f070503da5e3d9c0 ("[PATCH 10/11] nfsd: allow delegation state ids to be revoked and then freed")
url: https://github.com/intel-lab-lkp/linux/commits/NeilBrown/nfsd-hold-cl_lock-for-hash_delegation_locked/20231124-123723
base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git f1a09972a45ae63efbd1587337c4be13b1893330
patch link: https://lore.kernel.org/all/20231124002925.1816-11-neilb@suse.de/
patch subject: [PATCH 10/11] nfsd: allow delegation state ids to be revoked and then freed

in testcase: filebench
version: filebench-x86_64-22620e6-1_20221010
with following parameters:

	disk: 1HDD
	fs: xfs
	fs2: nfsv4
	test: webserver.f
	cpufreq_governor: performance



compiler: gcc-12
test machine: 128 threads 2 sockets Intel(R) Xeon(R) Platinum 8358 CPU @ 2.60GHz (Ice Lake) with 128G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202311301059.38a31c59-oliver.sang@intel.com



The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20231130/202311301059.38a31c59-oliver.sang@intel.com


[  307.542673][ T3954] ------------[ cut here ]------------
[  307.548304][ T3954] WARNING: CPU: 43 PID: 3954 at fs/nfsd/nfs4state.c:1075 nfs4_free_deleg+0x68/0xb0 [nfsd]
[  307.558383][ T3954] Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver nfsd auth_rpcgss xfs dm_mod kmem device_dax nd_pmem nd_btt dax_pmem btrfs blake2b_generic xor raid6_pq libcrc32c sd_mod t10_pi crc64_rocksoft_generic crc64_rocksoft crc64 sg intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 rapl ast ahci drm_shmem_helper libahci mei_me intel_cstate ipmi_ssif ioatdma i2c_i801 acpi_ipmi libata drm_kms_helper mei intel_uncore i2c_smbus joydev intel_pch_thermal dca dax_hmem wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_pad acpi_power_meter drm fuse ip_tables
[  307.622302][ T3954] CPU: 43 PID: 3954 Comm: nfsd Tainted: G S                 6.7.0-rc2-00157-g927562a91e48 #1
[  307.632555][ T3954] Hardware name: Intel Corporation M50CYP2SB1U/M50CYP2SB1U, BIOS SE5C620.86B.01.01.0003.2104260124 04/26/2021
[  307.644544][ T3954] RIP: 0010:nfs4_free_deleg+0x68/0xb0 [nfsd]
[  307.650693][ T3954] Code: 75 46 48 8b 3d c9 99 03 00 e8 b4 be f9 bf f0 48 ff 0d a4 99 03 00 c3 cc cc cc cc 0f 0b 48 8b 56 48 48 8d 46 48 48 39 c2 74 be <0f> 0b 48 8b 56 58 48 8d 46 58 48 39 c2 74 bc 0f 0b 48 8b 56 68 48
[  307.670673][ T3954] RSP: 0018:ffa000000b187ce0 EFLAGS: 00010206
[  307.676902][ T3954] RAX: ff110001498131b0 RBX: ff11000149813168 RCX: 0000000000000000
[  307.685072][ T3954] RDX: ff11000220359598 RSI: ff11000149813168 RDI: ff11000149813168
[  307.693174][ T3954] RBP: ff11001088cca820 R08: ff11001085b73288 R09: ff11001088cca500
[  307.701270][ T3954] R10: ffa000000b187cb0 R11: ffa000000b187cb8 R12: ff11001088cca4c0
[  307.709374][ T3954] R13: ff11000220359560 R14: ff110001abf0e028 R15: ffa000000b187d50
[  307.717474][ T3954] FS:  0000000000000000(0000) GS:ff11001fff4c0000(0000) knlGS:0000000000000000
[  307.726523][ T3954] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  307.733228][ T3954] CR2: 0000000000451c00 CR3: 000000207e01c005 CR4: 0000000000771ef0
[  307.741326][ T3954] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  307.749419][ T3954] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  307.757510][ T3954] PKRU: 55555554
[  307.761182][ T3954] Call Trace:
[  307.764594][ T3954]  <TASK>
[  307.767648][ T3954]  ? nfs4_free_deleg+0x68/0xb0 [nfsd]
[  307.773161][ T3954]  ? __warn+0x81/0x130
[  307.777344][ T3954]  ? nfs4_free_deleg+0x68/0xb0 [nfsd]
[  307.782887][ T3954]  ? report_bug+0x15d/0x1b0
[  307.787494][ T3954]  ? handle_bug+0x3c/0x70
[  307.791978][ T3954]  ? exc_invalid_op+0x17/0x70
[  307.796759][ T3954]  ? asm_exc_invalid_op+0x1a/0x20
[  307.801914][ T3954]  ? nfs4_free_deleg+0x68/0xb0 [nfsd]
[  307.807407][ T3954]  nfs4_put_stid+0x6e/0xb0 [nfsd]
[  307.812556][ T3954]  nfsd4_lookup_stateid+0x11b/0x170 [nfsd]
[  307.818482][ T3954]  nfsd4_delegreturn+0xa8/0x170 [nfsd]
[  307.824060][ T3954]  nfsd4_proc_compound+0x345/0x630 [nfsd]
[  307.829931][ T3954]  nfsd_dispatch+0xfd/0x230 [nfsd]
[  307.835155][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  307.840194][ T3954]  svc_process_common+0x2f5/0x5f0
[  307.845298][ T3954]  ? __pfx_nfsd_dispatch+0x10/0x10 [nfsd]
[  307.851118][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  307.856149][ T3954]  svc_process+0x131/0x170
[  307.860636][ T3954]  svc_handle_xprt+0x4b0/0x5b0
[  307.865468][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  307.870500][ T3954]  svc_recv+0x17e/0x2f0
[  307.874720][ T3954]  nfsd+0x77/0xf0 [nfsd]
[  307.879049][ T3954]  kthread+0xcd/0x130
[  307.883087][ T3954]  ? __pfx_kthread+0x10/0x10
[  307.887722][ T3954]  ret_from_fork+0x31/0x70
[  307.892183][ T3954]  ? __pfx_kthread+0x10/0x10
[  307.896830][ T3954]  ret_from_fork_asm+0x1b/0x30
[  307.901627][ T3954]  </TASK>
[  307.904679][ T3954] ---[ end trace 0000000000000000 ]---
[  307.910161][ T3954] ------------[ cut here ]------------
[  307.915636][ T3954] WARNING: CPU: 43 PID: 3954 at fs/nfsd/nfs4state.c:1076 nfs4_free_deleg+0x77/0xb0 [nfsd]
[  307.925562][ T3954] Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver nfsd auth_rpcgss xfs dm_mod kmem device_dax nd_pmem nd_btt dax_pmem btrfs blake2b_generic xor raid6_pq libcrc32c sd_mod t10_pi crc64_rocksoft_generic crc64_rocksoft crc64 sg intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 rapl ast ahci drm_shmem_helper libahci mei_me intel_cstate ipmi_ssif ioatdma i2c_i801 acpi_ipmi libata drm_kms_helper mei intel_uncore i2c_smbus joydev intel_pch_thermal dca dax_hmem wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_pad acpi_power_meter drm fuse ip_tables
[  307.988853][ T3954] CPU: 43 PID: 3954 Comm: nfsd Tainted: G S      W          6.7.0-rc2-00157-g927562a91e48 #1
[  307.999025][ T3954] Hardware name: Intel Corporation M50CYP2SB1U/M50CYP2SB1U, BIOS SE5C620.86B.01.01.0003.2104260124 04/26/2021
[  308.010876][ T3954] RIP: 0010:nfs4_free_deleg+0x77/0xb0 [nfsd]
[  308.016969][ T3954] Code: 48 ff 0d a4 99 03 00 c3 cc cc cc cc 0f 0b 48 8b 56 48 48 8d 46 48 48 39 c2 74 be 0f 0b 48 8b 56 58 48 8d 46 58 48 39 c2 74 bc <0f> 0b 48 8b 56 68 48 8d 46 68 48 39 c2 74 ba 0f 0b eb b6 66 66 2e
[  308.036795][ T3954] RSP: 0018:ffa000000b187ce0 EFLAGS: 00010283
[  308.042959][ T3954] RAX: ff110001498131c0 RBX: ff11000149813168 RCX: 0000000000000000
[  308.050984][ T3954] RDX: ff11000149810388 RSI: ff11000149813168 RDI: ff11000149813168
[  308.059011][ T3954] RBP: ff11001088cca820 R08: ff11001085b73288 R09: ff11001088cca500
[  308.067036][ T3954] R10: ffa000000b187cb0 R11: ffa000000b187cb8 R12: ff11001088cca4c0
[  308.075060][ T3954] R13: ff11000220359560 R14: ff110001abf0e028 R15: ffa000000b187d50
[  308.083085][ T3954] FS:  0000000000000000(0000) GS:ff11001fff4c0000(0000) knlGS:0000000000000000
[  308.092068][ T3954] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  308.098710][ T3954] CR2: 0000000000451c00 CR3: 000000207e01c005 CR4: 0000000000771ef0
[  308.106747][ T3954] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  308.114797][ T3954] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  308.122857][ T3954] PKRU: 55555554
[  308.126471][ T3954] Call Trace:
[  308.129852][ T3954]  <TASK>
[  308.132887][ T3954]  ? nfs4_free_deleg+0x77/0xb0 [nfsd]
[  308.138353][ T3954]  ? __warn+0x81/0x130
[  308.142486][ T3954]  ? nfs4_free_deleg+0x77/0xb0 [nfsd]
[  308.148003][ T3954]  ? report_bug+0x15d/0x1b0
[  308.152574][ T3954]  ? handle_bug+0x3c/0x70
[  308.156970][ T3954]  ? exc_invalid_op+0x17/0x70
[  308.161708][ T3954]  ? asm_exc_invalid_op+0x1a/0x20
[  308.166817][ T3954]  ? nfs4_free_deleg+0x77/0xb0 [nfsd]
[  308.172280][ T3954]  nfs4_put_stid+0x6e/0xb0 [nfsd]
[  308.177393][ T3954]  nfsd4_lookup_stateid+0x11b/0x170 [nfsd]
[  308.183290][ T3954]  nfsd4_delegreturn+0xa8/0x170 [nfsd]
[  308.188864][ T3954]  nfsd4_proc_compound+0x345/0x630 [nfsd]
[  308.194680][ T3954]  nfsd_dispatch+0xfd/0x230 [nfsd]
[  308.199925][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  308.204965][ T3954]  svc_process_common+0x2f5/0x5f0
[  308.210054][ T3954]  ? __pfx_nfsd_dispatch+0x10/0x10 [nfsd]
[  308.215907][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  308.220991][ T3954]  svc_process+0x131/0x170
[  308.225468][ T3954]  svc_handle_xprt+0x4b0/0x5b0
[  308.230299][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  308.235327][ T3954]  svc_recv+0x17e/0x2f0
[  308.239542][ T3954]  nfsd+0x77/0xf0 [nfsd]
[  308.243909][ T3954]  kthread+0xcd/0x130
[  308.247959][ T3954]  ? __pfx_kthread+0x10/0x10
[  308.252594][ T3954]  ret_from_fork+0x31/0x70
[  308.257057][ T3954]  ? __pfx_kthread+0x10/0x10
[  308.261686][ T3954]  ret_from_fork_asm+0x1b/0x30
[  308.266478][ T3954]  </TASK>
[  308.269533][ T3954] ---[ end trace 0000000000000000 ]---
[  308.275016][ T3954] ------------[ cut here ]------------
[  308.280488][ T3954] WARNING: CPU: 43 PID: 3954 at fs/nfsd/nfs4state.c:598 put_nfs4_file+0x5c/0x70 [nfsd]
[  308.290156][ T3954] Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver nfsd auth_rpcgss xfs dm_mod kmem device_dax nd_pmem nd_btt dax_pmem btrfs blake2b_generic xor raid6_pq libcrc32c sd_mod t10_pi crc64_rocksoft_generic crc64_rocksoft crc64 sg intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 rapl ast ahci drm_shmem_helper libahci mei_me intel_cstate ipmi_ssif ioatdma i2c_i801 acpi_ipmi libata drm_kms_helper mei intel_uncore i2c_smbus joydev intel_pch_thermal dca dax_hmem wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_pad acpi_power_meter drm fuse ip_tables
[  308.353356][ T3954] CPU: 43 PID: 3954 Comm: nfsd Tainted: G S      W          6.7.0-rc2-00157-g927562a91e48 #1
[  308.363522][ T3954] Hardware name: Intel Corporation M50CYP2SB1U/M50CYP2SB1U, BIOS SE5C620.86B.01.01.0003.2104260124 04/26/2021
[  308.375339][ T3954] RIP: 0010:put_nfs4_file+0x5c/0x70 [nfsd]
[  308.381204][ T3954] Code: 48 39 c2 75 29 48 8b 43 38 48 8d 7b 38 48 39 c7 75 18 48 c7 c6 90 07 48 c1 5b e9 6f 40 d4 bf be 03 00 00 00 5b e9 e4 fa 2e c0 <0f> 0b eb e4 0f 0b eb d3 66 66 2e 0f 1f 84 00 00 00 00 00 90 90 90
[  308.401096][ T3954] RSP: 0018:ffa000000b187d00 EFLAGS: 00010206
[  308.407207][ T3954] RAX: ff110001498131b0 RBX: ff11000220359560 RCX: 00000000000003e7
[  308.415227][ T3954] RDX: ff110002203595a8 RSI: 00000000cccccccd RDI: ff11000220359598
[  308.423248][ T3954] RBP: 0000000000000000 R08: 0000000000000000 R09: ffffffffc1480801
[  308.431277][ T3954] R10: ff11000149813168 R11: 0000000000000001 R12: 0000000000000004
[  308.439303][ T3954] R13: 000000003f270000 R14: ff110001abf0e028 R15: ffa000000b187d50
[  308.447330][ T3954] FS:  0000000000000000(0000) GS:ff11001fff4c0000(0000) knlGS:0000000000000000
[  308.456317][ T3954] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  308.462972][ T3954] CR2: 0000000000451c00 CR3: 000000207e01c005 CR4: 0000000000771ef0
[  308.471005][ T3954] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  308.479039][ T3954] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  308.487076][ T3954] PKRU: 55555554
[  308.490692][ T3954] Call Trace:
[  308.494048][ T3954]  <TASK>
[  308.497052][ T3954]  ? put_nfs4_file+0x5c/0x70 [nfsd]
[  308.502340][ T3954]  ? __warn+0x81/0x130
[  308.506475][ T3954]  ? put_nfs4_file+0x5c/0x70 [nfsd]
[  308.511782][ T3954]  ? report_bug+0x15d/0x1b0
[  308.516353][ T3954]  ? handle_bug+0x3c/0x70
[  308.520742][ T3954]  ? exc_invalid_op+0x17/0x70
[  308.525477][ T3954]  ? asm_exc_invalid_op+0x1a/0x20
[  308.530566][ T3954]  ? __pfx_nfs4_free_deleg+0x1/0x10 [nfsd]
[  308.536460][ T3954]  ? put_nfs4_file+0x5c/0x70 [nfsd]
[  308.541748][ T3954]  nfsd4_lookup_stateid+0x11b/0x170 [nfsd]
[  308.547655][ T3954]  nfsd4_delegreturn+0xa8/0x170 [nfsd]
[  308.553203][ T3954]  nfsd4_proc_compound+0x345/0x630 [nfsd]
[  308.559015][ T3954]  nfsd_dispatch+0xfd/0x230 [nfsd]
[  308.564223][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  308.569255][ T3954]  svc_process_common+0x2f5/0x5f0
[  308.574345][ T3954]  ? __pfx_nfsd_dispatch+0x10/0x10 [nfsd]
[  308.580150][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  308.585177][ T3954]  svc_process+0x131/0x170
[  308.589661][ T3954]  svc_handle_xprt+0x4b0/0x5b0
[  308.594494][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  308.599521][ T3954]  svc_recv+0x17e/0x2f0
[  308.603735][ T3954]  nfsd+0x77/0xf0 [nfsd]
[  308.608074][ T3954]  kthread+0xcd/0x130
[  308.612107][ T3954]  ? __pfx_kthread+0x10/0x10
[  308.616749][ T3954]  ret_from_fork+0x31/0x70
[  308.621218][ T3954]  ? __pfx_kthread+0x10/0x10
[  308.625875][ T3954]  ret_from_fork_asm+0x1b/0x30
[  308.630667][ T3954]  </TASK>
[  308.633720][ T3954] ---[ end trace 0000000000000000 ]---
[  308.639455][ T3954] ------------[ cut here ]------------
[  308.644956][ T3954] refcount_t: addition on 0; use-after-free.
[  308.651007][ T3954] WARNING: CPU: 39 PID: 3954 at lib/refcount.c:25 refcount_warn_saturate+0x9b/0x130
[  308.660431][ T3954] Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver nfsd auth_rpcgss xfs dm_mod kmem device_dax nd_pmem nd_btt dax_pmem btrfs blake2b_generic xor raid6_pq libcrc32c sd_mod t10_pi crc64_rocksoft_generic crc64_rocksoft crc64 sg intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 rapl ast ahci drm_shmem_helper libahci mei_me intel_cstate ipmi_ssif ioatdma i2c_i801 acpi_ipmi libata drm_kms_helper mei intel_uncore i2c_smbus joydev intel_pch_thermal dca dax_hmem wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_pad acpi_power_meter drm fuse ip_tables
[  308.723741][ T3954] CPU: 39 PID: 3954 Comm: nfsd Tainted: G S      W          6.7.0-rc2-00157-g927562a91e48 #1
[  308.733961][ T3954] Hardware name: Intel Corporation M50CYP2SB1U/M50CYP2SB1U, BIOS SE5C620.86B.01.01.0003.2104260124 04/26/2021
[  308.745793][ T3954] RIP: 0010:refcount_warn_saturate+0x9b/0x130
[  308.751935][ T3954] Code: 01 01 e8 08 f0 98 ff 0f 0b c3 cc cc cc cc 80 3d 12 3e 9e 01 00 75 a8 48 c7 c7 c8 f7 96 82 c6 05 02 3e 9e 01 01 e8 e5 ef 98 ff <0f> 0b c3 cc cc cc cc 80 3d ee 3d 9e 01 00 75 85 48 c7 c7 f8 f7 96
[  308.771763][ T3954] RSP: 0018:ffa000000b187c08 EFLAGS: 00010282
[  308.777918][ T3954] RAX: 0000000000000000 RBX: ff11000149813168 RCX: 0000000000000000
[  308.785997][ T3954] RDX: ff11001fff3ecd00 RSI: ff11001fff3e0840 RDI: ff11001fff3e0840
[  308.794022][ T3954] RBP: ff11000220359574 R08: 0000000000000000 R09: ffa000000b187a98
[  308.802048][ T3954] R10: 0000000000000003 R11: ffffffff82fdd6e8 R12: ff110001c8bf0870
[  308.810073][ T3954] R13: ff11000220359560 R14: ff1100207a3ad368 R15: 000000010000c13c
[  308.818102][ T3954] FS:  0000000000000000(0000) GS:ff11001fff3c0000(0000) knlGS:0000000000000000
[  308.827095][ T3954] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  308.833742][ T3954] CR2: 0000000000451c00 CR3: 000000108649e002 CR4: 0000000000771ef0
[  308.841794][ T3954] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  308.849859][ T3954] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  308.857943][ T3954] PKRU: 55555554
[  308.861565][ T3954] Call Trace:
[  308.864977][ T3954]  <TASK>
[  308.867982][ T3954]  ? refcount_warn_saturate+0x9b/0x130
[  308.873503][ T3954]  ? __warn+0x81/0x130
[  308.877639][ T3954]  ? refcount_warn_saturate+0x9b/0x130
[  308.883168][ T3954]  ? report_bug+0x15d/0x1b0
[  308.887734][ T3954]  ? prb_read_valid+0x1b/0x30
[  308.892481][ T3954]  ? handle_bug+0x3c/0x70
[  308.896911][ T3954]  ? exc_invalid_op+0x17/0x70
[  308.901651][ T3954]  ? asm_exc_invalid_op+0x1a/0x20
[  308.906739][ T3954]  ? refcount_warn_saturate+0x9b/0x130
[  308.912261][ T3954]  ? refcount_warn_saturate+0x9b/0x130
[  308.917790][ T3954]  nfsd_break_deleg_cb+0x161/0x170 [nfsd]
[  308.923605][ T3954]  __break_lease+0x22f/0x6b0
[  308.928267][ T3954]  vfs_unlink+0xe0/0x2b0
[  308.932575][ T3954]  nfsd_unlink+0x18a/0x330 [nfsd]
[  308.937699][ T3954]  nfsd4_remove+0x4f/0xb0 [nfsd]
[  308.942731][ T3954]  nfsd4_proc_compound+0x345/0x630 [nfsd]
[  308.948556][ T3954]  nfsd_dispatch+0xfd/0x230 [nfsd]
[  308.953778][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  308.958830][ T3954]  svc_process_common+0x2f5/0x5f0
[  308.963971][ T3954]  ? __pfx_nfsd_dispatch+0x10/0x10 [nfsd]
[  308.969797][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  308.974845][ T3954]  svc_process+0x131/0x170
[  308.979320][ T3954]  svc_handle_xprt+0x4b0/0x5b0
[  308.984138][ T3954]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[  308.989149][ T3954]  svc_recv+0x17e/0x2f0
[  308.993347][ T3954]  nfsd+0x77/0xf0 [nfsd]
[  308.997647][ T3954]  kthread+0xcd/0x130
[  309.001664][ T3954]  ? __pfx_kthread+0x10/0x10
[  309.006277][ T3954]  ret_from_fork+0x31/0x70
[  309.010720][ T3954]  ? __pfx_kthread+0x10/0x10
[  309.015325][ T3954]  ret_from_fork_asm+0x1b/0x30
[  309.020109][ T3954]  </TASK>
[  309.023141][ T3954] ---[ end trace 0000000000000000 ]---
[  400.427843][  T786] list_del corruption. prev->next should be ff110001498131b0, but was ff1100108544ea60. (prev=ff11000220359598)
[  400.439548][  T786] ------------[ cut here ]------------
[  400.444876][  T786] kernel BUG at lib/list_debug.c:62!
[  400.450034][  T786] invalid opcode: 0000 [#1] SMP NOPTI
[  400.455266][  T786] CPU: 4 PID: 786 Comm: kworker/u257:1 Tainted: G S      W          6.7.0-rc2-00157-g927562a91e48 #1
[  400.465964][  T786] Hardware name: Intel Corporation M50CYP2SB1U/M50CYP2SB1U, BIOS SE5C620.86B.01.01.0003.2104260124 04/26/2021
[  400.477438][  T786] Workqueue: nfsd4 laundromat_main [nfsd]
[  400.483084][  T786] RIP: 0010:__list_del_entry_valid_or_report+0xa7/0xf0
[  400.489791][  T786] Code: a1 ff 0f 0b 48 89 fe 48 89 ca 48 c7 c7 c8 02 97 82 e8 ed 20 a1 ff 0f 0b 48 89 fe 48 89 c2 48 c7 c7 00 03 97 82 e8 d9 20 a1 ff <0f> 0b 48 89 d1 48 c7 c7 50 03 97 82 48 89 f2 48 89 c6 e8 c2 20 a1
[  400.509242][  T786] RSP: 0018:ffa000000890fdb0 EFLAGS: 00010246
[  400.515165][  T786] RAX: 000000000000006d RBX: ff11000149813168 RCX: 0000000000000000
[  400.522992][  T786] RDX: 0000000000000000 RSI: ff1100103f320840 RDI: ff1100103f320840
[  400.530820][  T786] RBP: ff11000220359574 R08: 0000000000000000 R09: ffa000000890fc58
[  400.538652][  T786] R10: 0000000000000003 R11: ffffffff82fdd6e8 R12: ff110001498131b0
[  400.546478][  T786] R13: ff110001498131d0 R14: ff1100012079c4b0 R15: ff1100012079c4b0
[  400.554307][  T786] FS:  0000000000000000(0000) GS:ff1100103f300000(0000) knlGS:0000000000000000
[  400.563093][  T786] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  400.569538][  T786] CR2: 0000000000451c00 CR3: 000000207e01c002 CR4: 0000000000771ef0
[  400.577366][  T786] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  400.585191][  T786] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  400.593018][  T786] PKRU: 55555554
[  400.596420][  T786] Call Trace:
[  400.599569][  T786]  <TASK>
[  400.602365][  T786]  ? die+0x36/0xb0
[  400.605945][  T786]  ? do_trap+0xda/0x130
[  400.609959][  T786]  ? __list_del_entry_valid_or_report+0xa7/0xf0
[  400.616053][  T786]  ? do_error_trap+0x65/0xb0
[  400.620497][  T786]  ? __list_del_entry_valid_or_report+0xa7/0xf0
[  400.626592][  T786]  ? exc_invalid_op+0x50/0x70
[  400.631131][  T786]  ? __list_del_entry_valid_or_report+0xa7/0xf0
[  400.637223][  T786]  ? asm_exc_invalid_op+0x1a/0x20
[  400.642102][  T786]  ? __list_del_entry_valid_or_report+0xa7/0xf0
[  400.648198][  T786]  unhash_delegation_locked+0xb1/0x130 [nfsd]
[  400.654153][  T786]  nfs4_laundromat+0x54c/0x670 [nfsd]
[  400.659409][  T786]  laundromat_main+0x19/0x70 [nfsd]
[  400.664497][  T786]  process_one_work+0x173/0x330
[  400.669211][  T786]  worker_thread+0x27f/0x3b0
[  400.673658][  T786]  ? __pfx_worker_thread+0x10/0x10
[  400.678625][  T786]  kthread+0xcd/0x130
[  400.682469][  T786]  ? __pfx_kthread+0x10/0x10
[  400.686918][  T786]  ret_from_fork+0x31/0x70
[  400.691188][  T786]  ? __pfx_kthread+0x10/0x10
[  400.695893][  T786]  ret_from_fork_asm+0x1b/0x30
[  400.700743][  T786]  </TASK>
[  400.703858][  T786] Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver nfsd auth_rpcgss xfs dm_mod kmem device_dax nd_pmem nd_btt dax_pmem btrfs blake2b_generic xor raid6_pq libcrc32c sd_mod t10_pi crc64_rocksoft_generic crc64_rocksoft crc64 sg intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 rapl ast ahci drm_shmem_helper libahci mei_me intel_cstate ipmi_ssif ioatdma i2c_i801 acpi_ipmi libata drm_kms_helper mei intel_uncore i2c_smbus joydev intel_pch_thermal dca dax_hmem wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_pad acpi_power_meter drm fuse ip_tables
[  400.767434][  T786] ---[ end trace 0000000000000000 ]---
[  400.783218][  T786] pstore: backend (erst) writing error (-28)
[  400.789259][  T786] RIP: 0010:__list_del_entry_valid_or_report+0xa7/0xf0
[  400.796164][  T786] Code: a1 ff 0f 0b 48 89 fe 48 89 ca 48 c7 c7 c8 02 97 82 e8 ed 20 a1 ff 0f 0b 48 89 fe 48 89 c2 48 c7 c7 00 03 97 82 e8 d9 20 a1 ff <0f> 0b 48 89 d1 48 c7 c7 50 03 97 82 48 89 f2 48 89 c6 e8 c2 20 a1
[  400.816092][  T786] RSP: 0018:ffa000000890fdb0 EFLAGS: 00010246
[  400.822214][  T786] RAX: 000000000000006d RBX: ff11000149813168 RCX: 0000000000000000
[  400.830249][  T786] RDX: 0000000000000000 RSI: ff1100103f320840 RDI: ff1100103f320840
[  400.838278][  T786] RBP: ff11000220359574 R08: 0000000000000000 R09: ffa000000890fc58
[  400.846304][  T786] R10: 0000000000000003 R11: ffffffff82fdd6e8 R12: ff110001498131b0
[  400.854330][  T786] R13: ff110001498131d0 R14: ff1100012079c4b0 R15: ff1100012079c4b0
[  400.862357][  T786] FS:  0000000000000000(0000) GS:ff1100103f300000(0000) knlGS:0000000000000000
[  400.871346][  T786] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  400.877996][  T786] CR2: 0000000000451c00 CR3: 000000207e01c002 CR4: 0000000000771ef0
[  400.886025][  T786] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  400.894049][  T786] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  400.902072][  T786] PKRU: 55555554
[  400.905677][  T786] Kernel panic - not syncing: Fatal exception
[  400.957082][  T786] Kernel Offset: disabled


-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 11/11] nfsd: allow layout state to be admin-revoked.
  2023-11-27 15:25   ` Chuck Lever
@ 2023-12-22 15:01     ` Chuck Lever
  2023-12-22 23:02       ` NeilBrown
  0 siblings, 1 reply; 26+ messages in thread
From: Chuck Lever @ 2023-12-22 15:01 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Mon, Nov 27, 2023 at 10:25:59AM -0500, Chuck Lever wrote:
> On Fri, Nov 24, 2023 at 11:28:46AM +1100, NeilBrown wrote:
> > When there is layout state on a filesystem that is being "unlocked" that
> > is now revoked, which involves closing the nfsd_file and releasing the
> > vfs lease.
> > 
> > To avoid races, all users of ->ls_file - after the layout state has been
> > successfully created - now need to take a counted reference under
> > rcu_read_lock().  To support this, ->fence_client and
> > nfsd4_cb_layout_fail() now take a second argument being the nfsd_file.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> 
> Hi Neil, would you Cc: Christoph Hellwig <hch@lst.de> and Tom Haynes
> <loghyr@gmail.com> to this patch next time you post this series?

Re-visiting. Did you send out a v5 of this series and I missed it?


> > ---
> >  fs/nfsd/blocklayout.c |  4 ++--
> >  fs/nfsd/nfs4layouts.c | 38 +++++++++++++++++++++++++++-----------
> >  fs/nfsd/nfs4state.c   | 28 +++++++++++++++++++++-------
> >  fs/nfsd/pnfs.h        |  7 ++++++-
> >  4 files changed, 56 insertions(+), 21 deletions(-)
> > 
> > diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> > index 46fd74d91ea9..3c040c81c77d 100644
> > --- a/fs/nfsd/blocklayout.c
> > +++ b/fs/nfsd/blocklayout.c
> > @@ -328,10 +328,10 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
> >  }
> >  
> >  static void
> > -nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
> > +nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
> >  {
> >  	struct nfs4_client *clp = ls->ls_stid.sc_client;
> > -	struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
> > +	struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
> >  
> >  	bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
> >  			nfsd4_scsi_pr_key(clp), 0, true);
> > diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> > index 77656126ad2a..dbc52413ce57 100644
> > --- a/fs/nfsd/nfs4layouts.c
> > +++ b/fs/nfsd/nfs4layouts.c
> > @@ -152,6 +152,18 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
> >  #endif
> >  }
> >  
> > +void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
> > +{
> > +	struct nfsd_file *fl = xchg(&ls->ls_file, NULL);
> > +
> > +	if (fl) {
> > +		if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
> > +			vfs_setlease(fl->nf_file, F_UNLCK, NULL,
> > +				     (void **)&ls);
> > +		nfsd_file_put(fl);
> > +	}
> > +}
> > +
> >  static void
> >  nfsd4_free_layout_stateid(struct nfs4_stid *stid)
> >  {
> > @@ -169,9 +181,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
> >  	list_del_init(&ls->ls_perfile);
> >  	spin_unlock(&fp->fi_lock);
> >  
> > -	if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
> > -		vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
> > -	nfsd_file_put(ls->ls_file);
> > +	nfsd4_close_layout(ls);
> >  
> >  	if (ls->ls_recalled)
> >  		atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
> > @@ -605,7 +615,7 @@ nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
> >  }
> >  
> >  static void
> > -nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
> > +nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
> >  {
> >  	struct nfs4_client *clp = ls->ls_stid.sc_client;
> >  	char addr_str[INET6_ADDRSTRLEN];
> > @@ -627,7 +637,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
> >  
> >  	argv[0] = (char *)nfsd_recall_failed;
> >  	argv[1] = addr_str;
> > -	argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
> > +	argv[2] = file->nf_file->f_path.mnt->mnt_sb->s_id;
> >  	argv[3] = NULL;
> >  
> >  	error = call_usermodehelper(nfsd_recall_failed, argv, envp,
> > @@ -657,6 +667,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
> >  	struct nfsd_net *nn;
> >  	ktime_t now, cutoff;
> >  	const struct nfsd4_layout_ops *ops;
> > +	struct nfsd_file *fl;
> >  
> >  	trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
> >  	switch (task->tk_status) {
> > @@ -688,12 +699,17 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
> >  		 * Unknown error or non-responding client, we'll need to fence.
> >  		 */
> >  		trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
> > -
> > -		ops = nfsd4_layout_ops[ls->ls_layout_type];
> > -		if (ops->fence_client)
> > -			ops->fence_client(ls);
> > -		else
> > -			nfsd4_cb_layout_fail(ls);
> > +		rcu_read_lock();
> > +		fl = nfsd_file_get(ls->ls_file);
> > +		rcu_read_unlock();
> > +		if (fl) {
> > +			ops = nfsd4_layout_ops[ls->ls_layout_type];
> > +			if (ops->fence_client)
> > +				ops->fence_client(ls, fl);
> > +			else
> > +				nfsd4_cb_layout_fail(ls, fl);
> > +			nfsd_file_put(fl);
> > +		}
> >  		return 1;
> >  	case -NFS4ERR_NOMATCHING_LAYOUT:
> >  		trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 3d85c88ec4d7..d82ca209eb96 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1712,7 +1712,8 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> >  	unsigned int idhashval;
> >  	unsigned int sc_types;
> >  
> > -	sc_types = NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID;
> > +	sc_types = (NFS4_OPEN_STID | NFS4_LOCK_STID |
> > +		    NFS4_DELEG_STID | NFS4_LAYOUT_STID);
> >  
> >  	spin_lock(&nn->client_lock);
> >  	for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> > @@ -1725,6 +1726,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> >  			if (stid) {
> >  				struct nfs4_ol_stateid *stp;
> >  				struct nfs4_delegation *dp;
> > +				struct nfs4_layout_stateid *ls;
> >  
> >  				spin_unlock(&nn->client_lock);
> >  				switch (stid->sc_type) {
> > @@ -1780,6 +1782,10 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> >  					if (dp)
> >  						revoke_delegation(dp);
> >  					break;
> > +				case NFS4_LAYOUT_STID:
> > +					ls = layoutstateid(stid);
> > +					nfsd4_close_layout(ls);
> > +					break;
> >  				}
> >  				nfs4_put_stid(stid);
> >  				spin_lock(&nn->client_lock);
> > @@ -2859,17 +2865,25 @@ static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
> >  	struct nfsd_file *file;
> >  
> >  	ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
> > -	file = ls->ls_file;
> > +	rcu_read_lock();
> > +	file = nfsd_file_get(ls->ls_file);
> > +	rcu_read_unlock();
> >  
> > -	seq_printf(s, "- ");
> > +	seq_puts(s, "- ");
> >  	nfs4_show_stateid(s, &st->sc_stateid);
> > -	seq_printf(s, ": { type: layout, ");
> > +	seq_puts(s, ": { type: layout");
> >  
> >  	/* XXX: What else would be useful? */
> >  
> > -	nfs4_show_superblock(s, file);
> > -	seq_printf(s, ", ");
> > -	nfs4_show_fname(s, file);
> > +	if (file) {
> > +		seq_puts(s, ", ");
> > +		nfs4_show_superblock(s, file);
> > +		seq_puts(s, ", ");
> > +		nfs4_show_fname(s, file);
> > +		nfsd_file_put(file);
> > +	}
> > +	if (st->sc_status & NFS4_STID_ADMIN_REVOKED)
> > +		seq_puts(s, ", admin-revoked");
> >  	seq_printf(s, " }\n");
> >  
> >  	return 0;
> > diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
> > index de1e0dfed06a..f2777577865e 100644
> > --- a/fs/nfsd/pnfs.h
> > +++ b/fs/nfsd/pnfs.h
> > @@ -37,7 +37,8 @@ struct nfsd4_layout_ops {
> >  	__be32 (*proc_layoutcommit)(struct inode *inode,
> >  			struct nfsd4_layoutcommit *lcp);
> >  
> > -	void (*fence_client)(struct nfs4_layout_stateid *ls);
> > +	void (*fence_client)(struct nfs4_layout_stateid *ls,
> > +			     struct nfsd_file *file);
> >  };
> >  
> >  extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
> > @@ -72,6 +73,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp);
> >  void nfsd4_return_all_client_layouts(struct nfs4_client *);
> >  void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
> >  		struct nfs4_file *fp);
> > +void nfsd4_close_layout(struct nfs4_layout_stateid *ls);
> >  int nfsd4_init_pnfs(void);
> >  void nfsd4_exit_pnfs(void);
> >  #else
> > @@ -89,6 +91,9 @@ static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
> >  		struct nfs4_file *fp)
> >  {
> >  }
> > +static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
> > +{
> > +}
> >  static inline void nfsd4_exit_pnfs(void)
> >  {
> >  }
> > -- 
> > 2.42.1
> > 
> 
> -- 
> Chuck Lever
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 11/11] nfsd: allow layout state to be admin-revoked.
  2023-12-22 15:01     ` Chuck Lever
@ 2023-12-22 23:02       ` NeilBrown
  0 siblings, 0 replies; 26+ messages in thread
From: NeilBrown @ 2023-12-22 23:02 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Sat, 23 Dec 2023, Chuck Lever wrote:
> On Mon, Nov 27, 2023 at 10:25:59AM -0500, Chuck Lever wrote:
> > On Fri, Nov 24, 2023 at 11:28:46AM +1100, NeilBrown wrote:
> > > When there is layout state on a filesystem that is being "unlocked" that
> > > is now revoked, which involves closing the nfsd_file and releasing the
> > > vfs lease.
> > > 
> > > To avoid races, all users of ->ls_file - after the layout state has been
> > > successfully created - now need to take a counted reference under
> > > rcu_read_lock().  To support this, ->fence_client and
> > > nfsd4_cb_layout_fail() now take a second argument being the nfsd_file.
> > > 
> > > Signed-off-by: NeilBrown <neilb@suse.de>
> > 
> > Hi Neil, would you Cc: Christoph Hellwig <hch@lst.de> and Tom Haynes
> > <loghyr@gmail.com> to this patch next time you post this series?
> 
> Re-visiting. Did you send out a v5 of this series and I missed it?

No you didn't miss anything.  I think I got caught up with other
priorities.  I'll hopefully repost early in the new year.

NeilBrown


> 
> 
> > > ---
> > >  fs/nfsd/blocklayout.c |  4 ++--
> > >  fs/nfsd/nfs4layouts.c | 38 +++++++++++++++++++++++++++-----------
> > >  fs/nfsd/nfs4state.c   | 28 +++++++++++++++++++++-------
> > >  fs/nfsd/pnfs.h        |  7 ++++++-
> > >  4 files changed, 56 insertions(+), 21 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> > > index 46fd74d91ea9..3c040c81c77d 100644
> > > --- a/fs/nfsd/blocklayout.c
> > > +++ b/fs/nfsd/blocklayout.c
> > > @@ -328,10 +328,10 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
> > >  }
> > >  
> > >  static void
> > > -nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
> > > +nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
> > >  {
> > >  	struct nfs4_client *clp = ls->ls_stid.sc_client;
> > > -	struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
> > > +	struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
> > >  
> > >  	bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
> > >  			nfsd4_scsi_pr_key(clp), 0, true);
> > > diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> > > index 77656126ad2a..dbc52413ce57 100644
> > > --- a/fs/nfsd/nfs4layouts.c
> > > +++ b/fs/nfsd/nfs4layouts.c
> > > @@ -152,6 +152,18 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
> > >  #endif
> > >  }
> > >  
> > > +void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
> > > +{
> > > +	struct nfsd_file *fl = xchg(&ls->ls_file, NULL);
> > > +
> > > +	if (fl) {
> > > +		if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
> > > +			vfs_setlease(fl->nf_file, F_UNLCK, NULL,
> > > +				     (void **)&ls);
> > > +		nfsd_file_put(fl);
> > > +	}
> > > +}
> > > +
> > >  static void
> > >  nfsd4_free_layout_stateid(struct nfs4_stid *stid)
> > >  {
> > > @@ -169,9 +181,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
> > >  	list_del_init(&ls->ls_perfile);
> > >  	spin_unlock(&fp->fi_lock);
> > >  
> > > -	if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
> > > -		vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
> > > -	nfsd_file_put(ls->ls_file);
> > > +	nfsd4_close_layout(ls);
> > >  
> > >  	if (ls->ls_recalled)
> > >  		atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
> > > @@ -605,7 +615,7 @@ nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
> > >  }
> > >  
> > >  static void
> > > -nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
> > > +nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
> > >  {
> > >  	struct nfs4_client *clp = ls->ls_stid.sc_client;
> > >  	char addr_str[INET6_ADDRSTRLEN];
> > > @@ -627,7 +637,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
> > >  
> > >  	argv[0] = (char *)nfsd_recall_failed;
> > >  	argv[1] = addr_str;
> > > -	argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
> > > +	argv[2] = file->nf_file->f_path.mnt->mnt_sb->s_id;
> > >  	argv[3] = NULL;
> > >  
> > >  	error = call_usermodehelper(nfsd_recall_failed, argv, envp,
> > > @@ -657,6 +667,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
> > >  	struct nfsd_net *nn;
> > >  	ktime_t now, cutoff;
> > >  	const struct nfsd4_layout_ops *ops;
> > > +	struct nfsd_file *fl;
> > >  
> > >  	trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
> > >  	switch (task->tk_status) {
> > > @@ -688,12 +699,17 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
> > >  		 * Unknown error or non-responding client, we'll need to fence.
> > >  		 */
> > >  		trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
> > > -
> > > -		ops = nfsd4_layout_ops[ls->ls_layout_type];
> > > -		if (ops->fence_client)
> > > -			ops->fence_client(ls);
> > > -		else
> > > -			nfsd4_cb_layout_fail(ls);
> > > +		rcu_read_lock();
> > > +		fl = nfsd_file_get(ls->ls_file);
> > > +		rcu_read_unlock();
> > > +		if (fl) {
> > > +			ops = nfsd4_layout_ops[ls->ls_layout_type];
> > > +			if (ops->fence_client)
> > > +				ops->fence_client(ls, fl);
> > > +			else
> > > +				nfsd4_cb_layout_fail(ls, fl);
> > > +			nfsd_file_put(fl);
> > > +		}
> > >  		return 1;
> > >  	case -NFS4ERR_NOMATCHING_LAYOUT:
> > >  		trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index 3d85c88ec4d7..d82ca209eb96 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -1712,7 +1712,8 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> > >  	unsigned int idhashval;
> > >  	unsigned int sc_types;
> > >  
> > > -	sc_types = NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID;
> > > +	sc_types = (NFS4_OPEN_STID | NFS4_LOCK_STID |
> > > +		    NFS4_DELEG_STID | NFS4_LAYOUT_STID);
> > >  
> > >  	spin_lock(&nn->client_lock);
> > >  	for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> > > @@ -1725,6 +1726,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> > >  			if (stid) {
> > >  				struct nfs4_ol_stateid *stp;
> > >  				struct nfs4_delegation *dp;
> > > +				struct nfs4_layout_stateid *ls;
> > >  
> > >  				spin_unlock(&nn->client_lock);
> > >  				switch (stid->sc_type) {
> > > @@ -1780,6 +1782,10 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> > >  					if (dp)
> > >  						revoke_delegation(dp);
> > >  					break;
> > > +				case NFS4_LAYOUT_STID:
> > > +					ls = layoutstateid(stid);
> > > +					nfsd4_close_layout(ls);
> > > +					break;
> > >  				}
> > >  				nfs4_put_stid(stid);
> > >  				spin_lock(&nn->client_lock);
> > > @@ -2859,17 +2865,25 @@ static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
> > >  	struct nfsd_file *file;
> > >  
> > >  	ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
> > > -	file = ls->ls_file;
> > > +	rcu_read_lock();
> > > +	file = nfsd_file_get(ls->ls_file);
> > > +	rcu_read_unlock();
> > >  
> > > -	seq_printf(s, "- ");
> > > +	seq_puts(s, "- ");
> > >  	nfs4_show_stateid(s, &st->sc_stateid);
> > > -	seq_printf(s, ": { type: layout, ");
> > > +	seq_puts(s, ": { type: layout");
> > >  
> > >  	/* XXX: What else would be useful? */
> > >  
> > > -	nfs4_show_superblock(s, file);
> > > -	seq_printf(s, ", ");
> > > -	nfs4_show_fname(s, file);
> > > +	if (file) {
> > > +		seq_puts(s, ", ");
> > > +		nfs4_show_superblock(s, file);
> > > +		seq_puts(s, ", ");
> > > +		nfs4_show_fname(s, file);
> > > +		nfsd_file_put(file);
> > > +	}
> > > +	if (st->sc_status & NFS4_STID_ADMIN_REVOKED)
> > > +		seq_puts(s, ", admin-revoked");
> > >  	seq_printf(s, " }\n");
> > >  
> > >  	return 0;
> > > diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
> > > index de1e0dfed06a..f2777577865e 100644
> > > --- a/fs/nfsd/pnfs.h
> > > +++ b/fs/nfsd/pnfs.h
> > > @@ -37,7 +37,8 @@ struct nfsd4_layout_ops {
> > >  	__be32 (*proc_layoutcommit)(struct inode *inode,
> > >  			struct nfsd4_layoutcommit *lcp);
> > >  
> > > -	void (*fence_client)(struct nfs4_layout_stateid *ls);
> > > +	void (*fence_client)(struct nfs4_layout_stateid *ls,
> > > +			     struct nfsd_file *file);
> > >  };
> > >  
> > >  extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
> > > @@ -72,6 +73,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp);
> > >  void nfsd4_return_all_client_layouts(struct nfs4_client *);
> > >  void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
> > >  		struct nfs4_file *fp);
> > > +void nfsd4_close_layout(struct nfs4_layout_stateid *ls);
> > >  int nfsd4_init_pnfs(void);
> > >  void nfsd4_exit_pnfs(void);
> > >  #else
> > > @@ -89,6 +91,9 @@ static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
> > >  		struct nfs4_file *fp)
> > >  {
> > >  }
> > > +static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
> > > +{
> > > +}
> > >  static inline void nfsd4_exit_pnfs(void)
> > >  {
> > >  }
> > > -- 
> > > 2.42.1
> > > 
> > 
> > -- 
> > Chuck Lever
> > 
> 
> -- 
> Chuck Lever
> 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 05/11] nfsd: prepare for supporting admin-revocation of state
  2023-11-26 17:36   ` Chuck Lever
@ 2024-01-19  0:46     ` NeilBrown
  0 siblings, 0 replies; 26+ messages in thread
From: NeilBrown @ 2024-01-19  0:46 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Mon, 27 Nov 2023, Chuck Lever wrote:
> On Fri, Nov 24, 2023 at 11:28:40AM +1100, NeilBrown wrote:
> > The NFSv4 protocol allows state to be revoked by the admin and has error
> > codes which allow this to be communicated to the client.
> > 
> > This patch
> >  - introduces a new state-id status NFS4_STID_ADMIN_REVOKE
> >    which can be set on open, lock, or delegation state.
> >  - reports NFS4ERR_ADMIN_REVOKED when these are accessed
> >  - introduces a per-client counter of these states and returns
> >    SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
> >    Decrements this when freeing any admin-revoked state.
> >  - introduces stub code to find all interesting states for a given
> >    superblock so they can be revoked via the 'unlock_filesystem'
> >    file in /proc/fs/nfsd/
> >    No actual states are handled yet.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/nfs4state.c | 71 ++++++++++++++++++++++++++++++++++++++++++++-
> >  fs/nfsd/nfsctl.c    |  1 +
> >  fs/nfsd/nfsd.h      |  1 +
> >  fs/nfsd/state.h     | 10 +++++++
> >  fs/nfsd/trace.h     |  3 +-
> >  5 files changed, 84 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index b9239f2ebc79..477a9e9aebbd 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1215,6 +1215,8 @@ nfs4_put_stid(struct nfs4_stid *s)
> >  		return;
> >  	}
> >  	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> > +	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
> > +		atomic_dec(&s->sc_client->cl_admin_revoked);
> >  	nfs4_free_cpntf_statelist(clp->net, s);
> >  	spin_unlock(&clp->cl_lock);
> >  	s->sc_free(s);
> > @@ -1534,6 +1536,8 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
> >  	}
> >  
> >  	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> > +	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
> > +		atomic_dec(&s->sc_client->cl_admin_revoked);
> >  	list_add(&stp->st_locks, reaplist);
> >  }
> >  
> > @@ -1679,6 +1683,54 @@ static void release_openowner(struct nfs4_openowner *oo)
> >  	nfs4_put_stateowner(&oo->oo_owner);
> >  }
> >  
> > +static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
> > +					  struct super_block *sb,
> > +					  unsigned int sc_types)
> > +{
> > +	unsigned long id, tmp;
> > +	struct nfs4_stid *stid;
> > +
> > +	spin_lock(&clp->cl_lock);
> > +	idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
> > +		if ((stid->sc_type & sc_types) &&
> > +		    stid->sc_status == 0 &&
> > +		    stid->sc_file->fi_inode->i_sb == sb) {
> > +			refcount_inc(&stid->sc_count);
> > +			break;
> > +		}
> > +	spin_unlock(&clp->cl_lock);
> > +	return stid;
> > +}
> > +
> 
> nfsd4_revoke_states() needs a kdoc comment.

Done.

> 
> 
> > +void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> > +{
> > +	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > +	unsigned int idhashval;
> > +	unsigned int sc_types;
> > +
> > +	sc_types = 0;
> > +
> > +	spin_lock(&nn->client_lock);
> > +	for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> > +		struct list_head *head = &nn->conf_id_hashtbl[idhashval];
> > +		struct nfs4_client *clp;
> > +	retry:
> > +		list_for_each_entry(clp, head, cl_idhash) {
> > +			struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
> > +								  sc_types);
> > +			if (stid) {
> > +				spin_unlock(&nn->client_lock);
> > +				switch (stid->sc_type) {
> 
> This is "dead" code, for now. Does this stub really need to be
> introduced in this patch?

"need" is a strong word..
The entire patch is "dead" code.  I want to allow handling for the
different state types to be added one at a time.  I could delay much of
this patch until handling the first state, but I think that would hurt
reviewability of the series...

> 
> 
> > +				}
> > +				nfs4_put_stid(stid);
> > +				spin_lock(&nn->client_lock);
> > +				goto retry;
> > +			}
> > +		}
> > +	}
> > +	spin_unlock(&nn->client_lock);
> > +}
> > +
> >  static inline int
> >  hash_sessionid(struct nfs4_sessionid *sessionid)
> >  {
> > @@ -2550,6 +2602,8 @@ static int client_info_show(struct seq_file *m, void *v)
> >  	}
> >  	seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
> >  	seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
> > +	seq_printf(m, "admin-revoked states: %d\n",
> > +		   atomic_read(&clp->cl_admin_revoked));
> >  	drop_client(clp);
> >  
> >  	return 0;
> > @@ -4109,6 +4163,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  	}
> >  	if (!list_empty(&clp->cl_revoked))
> >  		seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
> > +	if (atomic_read(&clp->cl_admin_revoked))
> > +		seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
> >  out_no_session:
> >  	if (conn)
> >  		free_conn(conn);
> > @@ -4597,7 +4653,9 @@ nfsd4_verify_open_stid(struct nfs4_stid *s)
> >  {
> >  	__be32 ret = nfs_ok;
> >  
> > -	if (s->sc_status & NFS4_STID_REVOKED)
> > +	if (s->sc_status & NFS4_STID_ADMIN_REVOKED)
> > +		ret = nfserr_admin_revoked;
> > +	else if (s->sc_status & NFS4_STID_REVOKED)
> >  		ret = nfserr_deleg_revoked;
> >  	else if (s->sc_status & NFS4_STID_CLOSED)
> >  		ret = nfserr_bad_stateid;
> > @@ -5188,6 +5246,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
> >  	deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
> >  	if (deleg == NULL)
> >  		goto out;
> > +	if (deleg->dl_stid.sc_status & NFS4_STID_ADMIN_REVOKED) {
> > +		nfs4_put_stid(&deleg->dl_stid);
> > +		status = nfserr_admin_revoked;
> > +		goto out;
> > +	}
> >  	if (deleg->dl_stid.sc_status & NFS4_STID_REVOKED) {
> >  		nfs4_put_stid(&deleg->dl_stid);
> >  		status = nfserr_deleg_revoked;
> > @@ -6508,6 +6571,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> >  		 */
> >  		statusmask |= NFS4_STID_REVOKED;
> >  
> > +	statusmask |= NFS4_STID_ADMIN_REVOKED;
> > +
> >  	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
> >  		CLOSE_STATEID(stateid))
> >  		return nfserr_bad_stateid;
> > @@ -6526,6 +6591,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> >  		nfs4_put_stid(stid);
> >  		return nfserr_deleg_revoked;
> >  	}
> > +	if (stid->sc_type & NFS4_STID_ADMIN_REVOKED) {
> > +		nfs4_put_stid(stid);
> > +		return nfserr_admin_revoked;
> > +	}
> >  	*s = stid;
> >  	return nfs_ok;
> >  }
> > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > index d6eeee149370..a622d773f428 100644
> > --- a/fs/nfsd/nfsctl.c
> > +++ b/fs/nfsd/nfsctl.c
> > @@ -285,6 +285,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
> >  	 * 3.  Is that directory the root of an exported file system?
> >  	 */
> >  	error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
> > +	nfsd4_revoke_states(netns(file), path.dentry->d_sb);
> >  
> >  	path_put(&path);
> >  	return error;
> > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> > index f5ff42f41ee7..d46203eac3c8 100644
> > --- a/fs/nfsd/nfsd.h
> > +++ b/fs/nfsd/nfsd.h
> > @@ -280,6 +280,7 @@ void		nfsd_lockd_shutdown(void);
> >  #define	nfserr_no_grace		cpu_to_be32(NFSERR_NO_GRACE)
> >  #define	nfserr_reclaim_bad	cpu_to_be32(NFSERR_RECLAIM_BAD)
> >  #define	nfserr_badname		cpu_to_be32(NFSERR_BADNAME)
> > +#define	nfserr_admin_revoked	cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
> >  #define	nfserr_cb_path_down	cpu_to_be32(NFSERR_CB_PATH_DOWN)
> >  #define	nfserr_locked		cpu_to_be32(NFSERR_LOCKED)
> >  #define	nfserr_wrongsec		cpu_to_be32(NFSERR_WRONGSEC)
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index bb00dcd4c1ba..584378c43e0a 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -112,6 +112,7 @@ struct nfs4_stid {
> >  #define NFS4_STID_CLOSED	BIT(0)
> >  /* For a deleg stateid kept around only to process free_stateid's: */
> >  #define NFS4_STID_REVOKED	BIT(1)
> > +#define NFS4_STID_ADMIN_REVOKED	BIT(2)
> 
> The names of these mask bits are now getting to be visually
> indistinguishable from the stateid type names. The subtlety of
> where the _STID_ falls in the name makes me blink a few times when
> reading this code.
> 
> It would be a little more friendly to add _STATUS_ or some other
> infix that makes it easy to tell these are not stateid types. I
> know that makes the names longer and more unwieldy.

In an ideal world we could have just the words that 'trace' reports:

 OPEN LOCK DELEG LAYOUT   and CLOSED REVOKED ADMIN_REVOKED

and the language would tell us if the flag was not compatible with the
field it was stored in.  But C does not provide that world so we need
something help the reader assess consistency.

Do we really need NFS4 here?  These flags are local to nfsd/nfs4* (and
state.h and trace.h)

The values are stored in "sc_type" or "sc_status" (and occasionally
typemask or similar).  So

 TYPE_OPEN TYPE_DELETE TYPE_LAYOUT and STATUS_CLOSED STATUS_REVOKED STATUS_ADMIN_REVOKED

would be sufficiently informative for the reader.  Putting "NFS4_STID_"
in front of each of those makes them unwieldy as you say, and doesn't add
any value that I can see.  Possibly putting "SC_" in front to match the
field name could be justified.

Thoughts?

Thanks,
NeilBrown

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 07/11] nfsd: allow admin-revoked NFSv4.0 state to be freed.
  2023-11-26 17:54   ` Chuck Lever
@ 2024-01-19  1:41     ` NeilBrown
  0 siblings, 0 replies; 26+ messages in thread
From: NeilBrown @ 2024-01-19  1:41 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Mon, 27 Nov 2023, Chuck Lever wrote:
> On Fri, Nov 24, 2023 at 11:28:42AM +1100, NeilBrown wrote:
> > For NFSv4.1 and later the client easily discovers if there is any
> > admin-revoked state and will then find and explicitly free it.
> > 
> > For NFSv4.0 there is no such mechanism.  The client can only find that
> > state is admin-revoked if it tries to use that state, and there is no
> > way for it to explicitly free the state.  So the server must hold on to
> > the stateid (at least) for an indefinite amount of time.  A
> > RELEASE_LOCKOWNER request might justify forgetting some of these
> > stateids, as would the whole clients lease lapsing, but these are not
> > reliable.
> 
> They aren't reliable, but what are the consequences of revoked
> state left on the server? Seems like our implementation has a
> number of mechanisms for cleaning up state over time. Do you feel
> this is a denial-of-service vector?

The consequence of revoked state being left on the server is only the
memory usage (and possible associated costs of indexing a data structure
that is larger than it needs to be).  The only existing mechanisms that
will clean it up is the cleanup when a client expires or when the server
exits.  These may not happen for years.

We might expect admin-revoke to be used rarely, but such expectations
are not reliable.  So the number of revoked states that accumulate could
grow without bound - unlikely though that is.

I don't think it is a denial-of-service "attack" vector as only the
admin can initiate the admin-revocation.  But an admin could unwittingly
bring denial of service upon themselves by revoking state repeatedly
over an extended period of time - if we did not proactively clean up old
revoked state.

> 
> 
> > This patch takes two approaches.
> > 
> > Whenever a client uses an revoked stateid, that stateid is then
> > discarded and will not be recognised again.  This might confuse a client
> > which expect to get NFS4ERR_ADMIN_REVOKED consistently once it get it at
> > all, but should mostly work.  Hopefully one error will lead to other
> > resources being closed (e.g.  process exits), which will result in more
> > stateid being freed when a CLOSE attempt gets NFS4ERR_ADMIN_REVOKED.
> 
> I'm leery of this: "This might confuse..." and "Hopefully..." suggest
> we're not real sure how this will behave in practice with the current
> cohort of client implementations.

It is true - we are not really sure.  There are many reason why we have
NFSv4.1 and I suspect this is one of them.
I would be happy to only support admin-revoke for v4.1 and later and put
v4.0 in the "too hard" basket.  But I think this "best effort" without
strong guarantees is better than not supporting it at all.

> 
> Also, this paragraph in Section 10.2.1 of RFC 7530 is concerning:
> 
> >  A client normally finds out about revocation of a delegation when it
> >  uses a stateid associated with a delegation and receives one of the
> >  errors NFS4ERR_EXPIRED, NFS4ERR_BAD_STATEID, or NFS4ERR_ADMIN_REVOKED
> >  (NFS4ERR_EXPIRED indicates that all lock state associated with the
> >  client has been lost).  It also may find out about delegation
> >  revocation after a client reboot when it attempts to reclaim a
> >  delegation and receives NFS4ERR_EXPIRED.  Note that in the case of a
> >  revoked OPEN_DELEGATE_WRITE delegation, there are issues because data
> >  may have been modified by the client whose delegation is revoked and,
> >  separately, by other clients.  See Section 10.5.1 for a discussion of
> >  such issues.  Note also that when delegations are revoked,
> >  information about the revoked delegation will be written by the
> >  server to stable storage (as described in Section 9.6).  This is done
> >  to deal with the case in which a server reboots after revoking a
> >  delegation but before the client holding the revoked delegation is
> >  notified about the revocation.
> 
> The text here suggests that the server persists the ADMIN_REVOKED
> status, which suggests to me that the server is supposed to continue
> returning ADMIN_REVOKED when presented with the revoked state,
> until the state is freed.

I agree - but when can the state be freed?  NFSv4.0 has no mechanism to
do this.  So the text suggests that the server is supposed to continue
returning ADMIN_REVOKED when presented with the revoked state
indefinitely.

We could do that.  I just don't feel comfortable storing an indefinite
amount of state for an indefinite amount of time.

> 
> AFAICT NFSD isn't recording this status persistently... Is there a
> plan to add that (later) or some words suggesting that it is safe
> and reasonable not to record it?

I hadn't thought about this...

The expectation (well ...  my expectation) is that state will only be
admin-revoked after the filesystem has been un-exported (and shortly
before it is unmounted).  In this case there is no chance for another
client to open the file and violate expectations reasonably held by the
first client.

But people might use the functionality differently to my expectations
(it happens all the time...).

I cannot see that Linux nfsd currently saves any per-state info to
stable storage.  There is only per-client information.
Based on this, section 9.6.3.4.3 Handling Server Edge Conditions
seems to suggest that all attempts to reclaim locks should get
NFS4ERR_NO_GRACE.  But I don't think we do this.

So maybe I can justify the lack of recording admin-revoke state to
stable storage on the basis that the whole "record state to stable
storage" idea is currently ignored by nfsd ????

> 
> 
> > Also, any admin-revoked stateids that have been that way for more than
> > one lease time are periodically revoke.
> > 
> > No actual freeing of state happens in this patch.  That will come in
> > future patches which handle the different sorts of revoked state.
> >
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/netns.h     |  4 ++
> >  fs/nfsd/nfs4state.c | 97 ++++++++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 100 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > index ab303a8b77d5..7458f672b33e 100644
> > --- a/fs/nfsd/netns.h
> > +++ b/fs/nfsd/netns.h
> > @@ -197,6 +197,10 @@ struct nfsd_net {
> >  	atomic_t		nfsd_courtesy_clients;
> >  	struct shrinker		*nfsd_client_shrinker;
> >  	struct work_struct	nfsd_shrinker_work;
> > +
> > +	/* last time an admin-revoke happened for NFSv4.0 */
> > +	time64_t		nfs40_last_revoke;
> > +
> >  };
> >  
> >  /* Simple check to find out if a given net was properly initialized */
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 52e680235afe..c57f2ff954cb 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1724,6 +1724,14 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> >  				}
> >  				nfs4_put_stid(stid);
> >  				spin_lock(&nn->client_lock);
> > +				if (clp->cl_minorversion == 0)
> > +					/* Allow cleanup after a lease period.
> > +					 * store_release ensures cleanup will
> > +					 * see any newly revoked states if it
> > +					 * sees the time updated.
> > +					 */
> > +					nn->nfs40_last_revoke =
> > +						ktime_get_boottime_seconds();
> >  				goto retry;
> >  			}
> >  		}
> > @@ -4648,6 +4656,39 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
> >  	return ret;
> >  }
> >  
> > +static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
> > +{
> > +	struct nfs4_client *cl = s->sc_client;
> > +
> > +	switch (s->sc_type) {
> > +	default:
> > +		spin_unlock(&cl->cl_lock);
> > +	}
> > +}
> > +
> > +static void nfs40_drop_revoked_stid(struct nfs4_client *cl,
> > +				    stateid_t *stid)
> 
> Nits: I'd prefer nfsd4_drop_revoked_stid() and nfsd40_drop_revoked_stid()
> 

I guess..

nfsd code sometimes uses "nfsd" and sometimes "nfs" and sometimes adds a
"4" and it doesn't seem to be at all consistent.  Standardising on the
longest such form doesn't fill me with joy.  I would prefer to remove
the prefix completely - at least for names local to a file and probably
for names local to the module..

I've made the change you suggest.

Thanks,
NeilBrown


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 07/11] nfsd: allow admin-revoked NFSv4.0 state to be freed.
  2023-11-26 18:07   ` Chuck Lever
@ 2024-01-19  1:43     ` NeilBrown
  0 siblings, 0 replies; 26+ messages in thread
From: NeilBrown @ 2024-01-19  1:43 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Mon, 27 Nov 2023, Chuck Lever wrote:

> > +static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
> > +{
> > +	struct nfs4_client *cl = s->sc_client;
> > +
> > +	switch (s->sc_type) {
> > +	default:
> > +		spin_unlock(&cl->cl_lock);
> > +	}
> > +}
> 
> I'm not in love with unlocking cl_lock inside nfsd_drop_revoked_stid,
> but I understand why it's necessary. How about:
> 
> static void nfsd4_drop_revoked_stid_unlock(struct nfs4_client *cl,
> 					   struct nfs4_stid *s)
> 	__releases(&cl->cl_lock)
> {
> 	....
> 

I made it
	__releases(&s->sc_client->cl_lock)

thanks.
NeilBrown

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2024-01-19  1:43 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-24  0:28 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
2023-11-24  0:28 ` [PATCH 01/11] nfsd: hold ->cl_lock for hash_delegation_locked() NeilBrown
2023-11-24  0:28 ` [PATCH 02/11] nfsd: don't call functions with side-effecting inside WARN_ON() NeilBrown
2023-11-24  0:28 ` [PATCH 03/11] nfsd: avoid race after unhash_delegation_locked() NeilBrown
2023-11-24  0:28 ` [PATCH 04/11] nfsd: split sc_status out of sc_type NeilBrown
2023-11-24  0:28 ` [PATCH 05/11] nfsd: prepare for supporting admin-revocation of state NeilBrown
2023-11-26 17:36   ` Chuck Lever
2024-01-19  0:46     ` NeilBrown
2023-11-24  0:28 ` [PATCH 06/11] nfsd: allow admin-revoked state to appear in /proc/fs/nfsd/clients/*/states NeilBrown
2023-11-26 17:41   ` Chuck Lever
2023-11-24  0:28 ` [PATCH 07/11] nfsd: allow admin-revoked NFSv4.0 state to be freed NeilBrown
2023-11-26 17:54   ` Chuck Lever
2024-01-19  1:41     ` NeilBrown
2023-11-26 18:07   ` Chuck Lever
2024-01-19  1:43     ` NeilBrown
2023-11-24  0:28 ` [PATCH 08/11] nfsd: allow lock state ids to be revoked and then freed NeilBrown
2023-11-24  0:28 ` [PATCH 09/11] nfsd: allow open " NeilBrown
2023-11-24  0:28 ` [PATCH 10/11] nfsd: allow delegation " NeilBrown
2023-11-30  4:48   ` kernel test robot
2023-11-24  0:28 ` [PATCH 11/11] nfsd: allow layout state to be admin-revoked NeilBrown
2023-11-24 15:23   ` kernel test robot
2023-11-24 15:41   ` kernel test robot
2023-11-27 15:25   ` Chuck Lever
2023-12-22 15:01     ` Chuck Lever
2023-12-22 23:02       ` NeilBrown
  -- strict thread matches above, loose matches on Subject: below --
2023-11-24  0:23 [PATCH 00/11 v4] nfsd: support admin-revocation of v4 state NeilBrown
2023-11-24  0:23 ` [PATCH 05/11] nfsd: prepare for supporting admin-revocation of state NeilBrown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox