Linux NFS development
 help / color / mirror / Atom feed
From: Chris Mason <clm@meta.com>
To: Chuck Lever <chuck.lever@oracle.com>,
	Jeff Layton <jlayton@kernel.org>,
	"J. Bruce Fields" <bfields@fieldses.org>,
	Trond Myklebust <trondmy@gmail.com>, <linux-nfs@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>
Subject: [PATCH 1/2] nfsd: clear cl_cb_session on DESTROY_SESSION
Date: Tue, 19 May 2026 10:49:14 -0700	[thread overview]
Message-ID: <20260519180032.1852793-3-clm@meta.com> (raw)
In-Reply-To: <20260519180032.1852793-2-clm@meta.com>

The parent commit drains in-flight backchannel callbacks before
nfsd4_destroy_session() frees the session, which closes the primary
use-after-free.  clp->cl_cb_session still caches the freed pointer
though: no destroy, expire, or shutdown path clears it, so the
stale value survives until the next CREATE_SESSION overwrites it.

A backchannel dispatch that races in during that window dereferences
the freed session through one of three helpers:

    nfsd41_cb_get_slot()
      ses = clp->cl_cb_session;
      grab_slot(ses);             /* deref */

    nfsd41_cb_release_slot()
      ses = clp->cl_cb_session;
      spin_lock(&ses->se_lock);   /* deref */

    nfsd4_cb_sequence_done()
      session = cb->cb_clp->cl_cb_session;
      session->se_cb_seq_nr[...]; /* deref */

Fix by clearing the cached pointer in nfsd4_destroy_session() after
the inflight drain returns, under clp->cl_lock and only when the
session being destroyed is still the active backchannel session.
Pair the store with READ_ONCE() loads and NULL checks in the three
helpers so each takes its early-return / requeue path.

The encode and decode helpers also read cl_cb_session but only from
in-flight RPCs, which nfsd4_probe_callback_sync() has already
quiesced; the nfsd41_cb_get_slot() NULL check then blocks any new
RPC from acquiring a slot, so no fresh CB can reach encode/decode.
Without the WRITE_ONCE() store the NULL guards would be unreachable.

Assisted-by: kres (claude-opus-4-7)
Signed-off-by: Chris Mason <clm@meta.com>
---
 fs/nfsd/nfs4callback.c | 24 ++++++++++++++++--------
 fs/nfsd/nfs4state.c    | 14 ++++++++++++++
 fs/nfsd/trace.h        | 35 ++++++++++++++++++++---------------
 3 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 50827405468d..8af2d0cc37c2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1150,9 +1150,11 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 	} else {
 		if (!conn->cb_xprt || !ses)
 			return -EINVAL;
-		clp->cl_cb_session = ses;
+		spin_lock(&clp->cl_lock);
+		WRITE_ONCE(clp->cl_cb_session, ses);
+		spin_unlock(&clp->cl_lock);
 		args.bc_xprt = conn->cb_xprt;
-		args.prognumber = clp->cl_cb_session->se_cb_prog;
+		args.prognumber = ses->se_cb_prog;
 		args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
 				XPRT_TRANSPORT_BC;
 		args.authflavor = ses->se_cb_sec.flavor;
@@ -1278,10 +1280,12 @@ static int grab_slot(struct nfsd4_session *ses)
 static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
 {
 	struct nfs4_client *clp = cb->cb_clp;
-	struct nfsd4_session *ses = clp->cl_cb_session;
+	struct nfsd4_session *ses = READ_ONCE(clp->cl_cb_session);
 
 	if (cb->cb_held_slot >= 0)
 		return true;
+	if (!ses)
+		return false;
 	cb->cb_held_slot = grab_slot(ses);
 	if (cb->cb_held_slot < 0) {
 		rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
@@ -1297,12 +1301,14 @@ static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
 static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
 {
 	struct nfs4_client *clp = cb->cb_clp;
-	struct nfsd4_session *ses = clp->cl_cb_session;
+	struct nfsd4_session *ses = READ_ONCE(clp->cl_cb_session);
 
 	if (cb->cb_held_slot >= 0) {
-		spin_lock(&ses->se_lock);
-		ses->se_cb_slot_avail |= BIT(cb->cb_held_slot);
-		spin_unlock(&ses->se_lock);
+		if (ses) {
+			spin_lock(&ses->se_lock);
+			ses->se_cb_slot_avail |= BIT(cb->cb_held_slot);
+			spin_unlock(&ses->se_lock);
+		}
 		cb->cb_held_slot = -1;
 		rpc_wake_up_next(&clp->cl_cb_waitq);
 	}
@@ -1442,9 +1448,11 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
 /* Returns true if CB_COMPOUND processing should continue */
 static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb)
 {
-	struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
+	struct nfsd4_session *session = READ_ONCE(cb->cb_clp->cl_cb_session);
 	bool ret = false;
 
+	if (!session)
+		goto requeue;
 	if (cb->cb_held_slot < 0)
 		goto requeue;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6837b63d9864..563af61796e3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4281,6 +4281,20 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,
 
 	nfsd4_probe_callback_sync(ses->se_client);
 
+	/*
+	 * The inflight drain inside nfsd4_probe_callback_sync() guarantees
+	 * that no backchannel RPC still references @ses.  Clear the client's
+	 * cached backchannel session pointer so any callback that races in
+	 * after this point (e.g. via a freshly re-established backchannel)
+	 * cannot dereference the about-to-be-freed session.  The matching
+	 * READ_ONCE() NULL checks live in nfsd41_cb_get_slot(),
+	 * nfsd41_cb_release_slot() and nfsd4_cb_sequence_done().
+	 */
+	spin_lock(&ses->se_client->cl_lock);
+	if (ses->se_client->cl_cb_session == ses)
+		WRITE_ONCE(ses->se_client->cl_cb_session, NULL);
+	spin_unlock(&ses->se_client->cl_lock);
+
 	spin_lock(&nn->client_lock);
 	status = nfs_ok;
 out_put_session:
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 5ad38f50836d..db9b10978b2d 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1742,17 +1742,19 @@ TRACE_EVENT(nfsd_cb_seq_status,
 	),
 	TP_fast_assign(
 		const struct nfs4_client *clp = cb->cb_clp;
-		const struct nfsd4_session *session = clp->cl_cb_session;
-		const struct nfsd4_sessionid *sid =
-			(struct nfsd4_sessionid *)&session->se_sessionid;
+		const struct nfsd4_session *session =
+			READ_ONCE(clp->cl_cb_session);
+		const struct nfsd4_sessionid *sid = session ?
+			(struct nfsd4_sessionid *)&session->se_sessionid :
+			NULL;
 
 		__entry->task_id = task->tk_pid;
 		__entry->client_id = task->tk_client ?
 				     task->tk_client->cl_clid : -1;
-		__entry->cl_boot = sid->clientid.cl_boot;
-		__entry->cl_id = sid->clientid.cl_id;
-		__entry->seqno = sid->sequence;
-		__entry->reserved = sid->reserved;
+		__entry->cl_boot = sid ? sid->clientid.cl_boot : 0;
+		__entry->cl_id = sid ? sid->clientid.cl_id : 0;
+		__entry->seqno = sid ? sid->sequence : 0;
+		__entry->reserved = sid ? sid->reserved : 0;
 		__entry->tk_status = task->tk_status;
 		__entry->seq_status = cb->cb_seq_status;
 	),
@@ -1782,18 +1784,21 @@ TRACE_EVENT(nfsd_cb_free_slot,
 	),
 	TP_fast_assign(
 		const struct nfs4_client *clp = cb->cb_clp;
-		const struct nfsd4_session *session = clp->cl_cb_session;
-		const struct nfsd4_sessionid *sid =
-			(struct nfsd4_sessionid *)&session->se_sessionid;
+		const struct nfsd4_session *session =
+			READ_ONCE(clp->cl_cb_session);
+		const struct nfsd4_sessionid *sid = session ?
+			(struct nfsd4_sessionid *)&session->se_sessionid :
+			NULL;
 
 		__entry->task_id = task->tk_pid;
 		__entry->client_id = task->tk_client ?
 				     task->tk_client->cl_clid : -1;
-		__entry->cl_boot = sid->clientid.cl_boot;
-		__entry->cl_id = sid->clientid.cl_id;
-		__entry->seqno = sid->sequence;
-		__entry->reserved = sid->reserved;
-		__entry->slot_seqno = session->se_cb_seq_nr[cb->cb_held_slot];
+		__entry->cl_boot = sid ? sid->clientid.cl_boot : 0;
+		__entry->cl_id = sid ? sid->clientid.cl_id : 0;
+		__entry->seqno = sid ? sid->sequence : 0;
+		__entry->reserved = sid ? sid->reserved : 0;
+		__entry->slot_seqno = session ?
+			session->se_cb_seq_nr[cb->cb_held_slot] : 0;
 	),
 	TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
 		" sessionid=%08x:%08x:%08x:%08x new slot seqno=%u",
-- 
2.52.0


  reply	other threads:[~2026-05-19 18:02 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-19 17:49 [PATCH] nfsd: drain backchannel callbacks before freeing a session Chris Mason
2026-05-19 17:49 ` Chris Mason [this message]
2026-05-19 19:48   ` [PATCH 1/2] nfsd: clear cl_cb_session on DESTROY_SESSION Jeff Layton
2026-05-19 17:49 ` [PATCH 2/2] nfsd: drain inflight callbacks in probe_callback_sync Chris Mason
2026-05-19 18:44   ` J. Bruce Fields
2026-05-19 20:01 ` [PATCH] nfsd: drain backchannel callbacks before freeing a session Jeff Layton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260519180032.1852793-3-clm@meta.com \
    --to=clm@meta.com \
    --cc=bfields@fieldses.org \
    --cc=chuck.lever@oracle.com \
    --cc=jlayton@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trondmy@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox