From: NeilBrown <neilb@suse.de>
To: Chuck Lever <chuck.lever@oracle.com>, Jeff Layton <jlayton@kernel.org>
Cc: linux-nfs@vger.kernel.org, Olga Kornievskaia <kolga@netapp.com>,
Dai Ngo <Dai.Ngo@oracle.com>, Tom Talpey <tom@talpey.com>
Subject: [PATCH 2/3] nfsd: replace rp_mutex to avoid deadlock in move_to_close_lru()
Date: Fri, 1 Mar 2024 11:07:38 +1100 [thread overview]
Message-ID: <20240301000950.2306-3-neilb@suse.de> (raw)
In-Reply-To: <20240301000950.2306-1-neilb@suse.de>
move_to_close_lru() waits for sc_count to become zero while holding
rp_mutex. This can deadlock if another thread holds a reference and is
waiting for rp_mutex.
By the time we get to move_to_close_lru() the openowner is unhashed and
cannot be found any more. So code waiting for the mutex can safely
retry the lookup if move_to_close_lru() has started.
So change rp_mutex to an atomic_t with three states:
RP_UNLOCK - state is still hashed, not locked for reply
RP_LOCKED - state is still hashed, is locked for reply
RP_UNHASHED - state is now hashed, no code can get a lock.
Use wait_ver_event() to wait for either a lock, or for the owner to be
unhashed. In the latter case, retry the lookup.
Signed-off-by: NeilBrown <neilb@suse.de>
---
fs/nfsd/nfs4state.c | 46 ++++++++++++++++++++++++++++++++++++---------
fs/nfsd/state.h | 2 +-
2 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e625f738f7b0..5dab316932d3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4442,21 +4442,32 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
atomic_set(&nn->nfsd_courtesy_clients, 0);
}
+enum rp_lock {
+ RP_UNLOCKED,
+ RP_LOCKED,
+ RP_UNHASHED,
+};
+
static void init_nfs4_replay(struct nfs4_replay *rp)
{
rp->rp_status = nfserr_serverfault;
rp->rp_buflen = 0;
rp->rp_buf = rp->rp_ibuf;
- mutex_init(&rp->rp_mutex);
+ atomic_set(&rp->rp_locked, RP_UNLOCKED);
}
-static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
- struct nfs4_stateowner *so)
+static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
+ struct nfs4_stateowner *so)
{
if (!nfsd4_has_session(cstate)) {
- mutex_lock(&so->so_replay.rp_mutex);
+ wait_var_event(&so->so_replay.rp_locked,
+ atomic_cmpxchg(&so->so_replay.rp_locked,
+ RP_UNLOCKED, RP_LOCKED) != RP_LOCKED);
+ if (atomic_read(&so->so_replay.rp_locked) == RP_UNHASHED)
+ return -EAGAIN;
cstate->replay_owner = nfs4_get_stateowner(so);
}
+ return 0;
}
void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
@@ -4465,7 +4476,8 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
if (so != NULL) {
cstate->replay_owner = NULL;
- mutex_unlock(&so->so_replay.rp_mutex);
+ atomic_set(&so->so_replay.rp_locked, RP_UNLOCKED);
+ wake_up_var(&so->so_replay.rp_locked);
nfs4_put_stateowner(so);
}
}
@@ -4691,7 +4703,11 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
* Wait for the refcount to drop to 2. Since it has been unhashed,
* there should be no danger of the refcount going back up again at
* this point.
+ * Some threads with a reference might be waiting for rp_locked,
+ * so tell them to stop waiting.
*/
+ atomic_set(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
+ wake_up_var(&oo->oo_owner.so_replay.rp_locked);
wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
release_all_access(s);
@@ -5064,6 +5080,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
clp = cstate->clp;
strhashval = ownerstr_hashval(&open->op_owner);
+retry:
oo = find_openstateowner_str(strhashval, open, clp);
open->op_openowner = oo;
if (!oo)
@@ -5074,17 +5091,24 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
open->op_openowner = NULL;
goto new_owner;
}
- nfsd4_cstate_assign_replay(cstate, &oo->oo_owner);
+ if (nfsd4_cstate_assign_replay(cstate, &oo->oo_owner) == -EAGAIN) {
+ nfs4_put_stateowner(&oo->oo_owner);
+ goto retry;
+ }
status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
if (status)
return status;
goto alloc_stateid;
new_owner:
oo = alloc_init_open_stateowner(strhashval, open, cstate);
+ open->op_openowner = oo;
if (oo == NULL)
return nfserr_jukebox;
- open->op_openowner = oo;
- nfsd4_cstate_assign_replay(cstate, &oo->oo_owner);
+ if (nfsd4_cstate_assign_replay(cstate, &oo->oo_owner) == -EAGAIN) {
+ WARN_ON(1);
+ nfs4_put_stateowner(&oo->oo_owner);
+ goto new_owner;
+ }
alloc_stateid:
open->op_stp = nfs4_alloc_open_stateid(clp);
if (!open->op_stp)
@@ -6841,11 +6865,15 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
trace_nfsd_preprocess(seqid, stateid);
*stpp = NULL;
+retry:
status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
if (status)
return status;
stp = openlockstateid(s);
- nfsd4_cstate_assign_replay(cstate, stp->st_stateowner);
+ if (nfsd4_cstate_assign_replay(cstate, stp->st_stateowner) == -EAGAIN) {
+ nfs4_put_stateowner(stp->st_stateowner);
+ goto retry;
+ }
status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp);
if (!status)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 41bdc913fa71..6a3becd86112 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -446,7 +446,7 @@ struct nfs4_replay {
unsigned int rp_buflen;
char *rp_buf;
struct knfsd_fh rp_openfh;
- struct mutex rp_mutex;
+ atomic_t rp_locked;
char rp_ibuf[NFSD4_REPLAY_ISIZE];
};
--
2.43.0
next prev parent reply other threads:[~2024-03-01 0:10 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-01 0:07 [PATCH 0/3] nfsd: fix dadlock in move_to_close_lru() NeilBrown
2024-03-01 0:07 ` [PATCH 1/3] nfsd: move nfsd4_cstate_assign_replay() earlier in open handling NeilBrown
2024-03-01 12:54 ` Jeff Layton
2024-03-03 23:59 ` NeilBrown
2024-03-01 0:07 ` NeilBrown [this message]
2024-03-01 13:09 ` [PATCH 2/3] nfsd: replace rp_mutex to avoid deadlock in move_to_close_lru() Jeff Layton
2024-03-04 0:09 ` NeilBrown
2024-03-01 0:07 ` [PATCH 3/3] nfsd: drop st_mutex_mutex before calling move_to_close_lru() NeilBrown
2024-03-01 14:05 ` [PATCH 0/3] nfsd: fix dadlock in move_to_close_lru() Chuck Lever
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240301000950.2306-3-neilb@suse.de \
--to=neilb@suse.de \
--cc=Dai.Ngo@oracle.com \
--cc=chuck.lever@oracle.com \
--cc=jlayton@kernel.org \
--cc=kolga@netapp.com \
--cc=linux-nfs@vger.kernel.org \
--cc=tom@talpey.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).