* [PATCH 1/4] nfsd: perform all find_openstateowner_str calls in the one place.
2024-04-08 2:09 [PATCH 0/4 v4] nfsd: fix deadlock in move_to_close_lru() NeilBrown
@ 2024-04-08 2:09 ` NeilBrown
2024-04-08 2:09 ` [PATCH 2/4] nfsd: move nfsd4_cstate_assign_replay() earlier in open handling NeilBrown
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: NeilBrown @ 2024-04-08 2:09 UTC (permalink / raw)
To: Chuck Lever, Jeff Layton
Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey
Currently find_openstateowner_str look ups are done both in
nfsd4_process_open1() and alloc_init_open_stateowner() - the latter
possibly being a surprise based on its name.
It would be easier to follow, and more conformant to common patterns, if
the lookup was all in the one place.
So replace alloc_init_open_stateowner() with
find_or_alloc_open_stateowner() and use the latter in
nfsd4_process_open1() without any calls to find_openstateowner_str().
This means all finds are find_openstateowner_str_locked() and
find_openstateowner_str() is no longer needed. So discard
find_openstateowner_str() and rename find_openstateowner_str_locked() to
find_openstateowner_str().
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
fs/nfsd/nfs4state.c | 93 +++++++++++++++++++--------------------------
1 file changed, 40 insertions(+), 53 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1824a30e7dd4..6ac1dff29d42 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -541,7 +541,7 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner)
}
static struct nfs4_openowner *
-find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
+find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
struct nfs4_client *clp)
{
struct nfs4_stateowner *so;
@@ -558,18 +558,6 @@ find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
return NULL;
}
-static struct nfs4_openowner *
-find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
- struct nfs4_client *clp)
-{
- struct nfs4_openowner *oo;
-
- spin_lock(&clp->cl_lock);
- oo = find_openstateowner_str_locked(hashval, open, clp);
- spin_unlock(&clp->cl_lock);
- return oo;
-}
-
static inline u32
opaque_hashval(const void *ptr, int nbytes)
{
@@ -4880,34 +4868,46 @@ nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
}
static struct nfs4_openowner *
-alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
- struct nfsd4_compound_state *cstate)
+find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
+ struct nfsd4_compound_state *cstate)
{
struct nfs4_client *clp = cstate->clp;
- struct nfs4_openowner *oo, *ret;
+ struct nfs4_openowner *oo, *new = NULL;
- oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
- if (!oo)
- return NULL;
- oo->oo_owner.so_ops = &openowner_ops;
- oo->oo_owner.so_is_open_owner = 1;
- oo->oo_owner.so_seqid = open->op_seqid;
- oo->oo_flags = 0;
- if (nfsd4_has_session(cstate))
- oo->oo_flags |= NFS4_OO_CONFIRMED;
- oo->oo_time = 0;
- oo->oo_last_closed_stid = NULL;
- INIT_LIST_HEAD(&oo->oo_close_lru);
+retry:
spin_lock(&clp->cl_lock);
- ret = find_openstateowner_str_locked(strhashval, open, clp);
- if (ret == NULL) {
- hash_openowner(oo, clp, strhashval);
- ret = oo;
- } else
- nfs4_free_stateowner(&oo->oo_owner);
-
+ oo = find_openstateowner_str(strhashval, open, clp);
+ if (!oo && new) {
+ hash_openowner(new, clp, strhashval);
+ spin_unlock(&clp->cl_lock);
+ return new;
+ }
spin_unlock(&clp->cl_lock);
- return ret;
+
+ if (oo && !(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+ /* Replace unconfirmed owners without checking for replay. */
+ release_openowner(oo);
+ oo = NULL;
+ }
+ if (oo) {
+ if (new)
+ nfs4_free_stateowner(&new->oo_owner);
+ return oo;
+ }
+
+ new = alloc_stateowner(openowner_slab, &open->op_owner, clp);
+ if (!new)
+ return NULL;
+ new->oo_owner.so_ops = &openowner_ops;
+ new->oo_owner.so_is_open_owner = 1;
+ new->oo_owner.so_seqid = open->op_seqid;
+ new->oo_flags = 0;
+ if (nfsd4_has_session(cstate))
+ new->oo_flags |= NFS4_OO_CONFIRMED;
+ new->oo_time = 0;
+ new->oo_last_closed_stid = NULL;
+ INIT_LIST_HEAD(&new->oo_close_lru);
+ goto retry;
}
static struct nfs4_ol_stateid *
@@ -5356,27 +5356,14 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
clp = cstate->clp;
strhashval = ownerstr_hashval(&open->op_owner);
- oo = find_openstateowner_str(strhashval, open, clp);
+ oo = find_or_alloc_open_stateowner(strhashval, open, cstate);
open->op_openowner = oo;
- if (!oo) {
- goto new_owner;
- }
- if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
- /* Replace unconfirmed owners without checking for replay. */
- release_openowner(oo);
- open->op_openowner = NULL;
- goto new_owner;
- }
+ if (!oo)
+ return nfserr_jukebox;
status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
if (status)
return status;
- goto alloc_stateid;
-new_owner:
- oo = alloc_init_open_stateowner(strhashval, open, cstate);
- if (oo == NULL)
- return nfserr_jukebox;
- open->op_openowner = oo;
-alloc_stateid:
+
open->op_stp = nfs4_alloc_open_stateid(clp);
if (!open->op_stp)
return nfserr_jukebox;
--
2.44.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 2/4] nfsd: move nfsd4_cstate_assign_replay() earlier in open handling.
2024-04-08 2:09 [PATCH 0/4 v4] nfsd: fix deadlock in move_to_close_lru() NeilBrown
2024-04-08 2:09 ` [PATCH 1/4] nfsd: perform all find_openstateowner_str calls in the one place NeilBrown
@ 2024-04-08 2:09 ` NeilBrown
2024-04-08 2:09 ` [PATCH 3/4] nfsd: replace rp_mutex to avoid deadlock in move_to_close_lru() NeilBrown
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: NeilBrown @ 2024-04-08 2:09 UTC (permalink / raw)
To: Chuck Lever, Jeff Layton
Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey
Rather than taking the rp_mutex (via nfsd4_cstate_assign_replay) in
nfsd4_cleanup_open_state() (which seems counter-intuitive), take it and
assign rp_owner as soon as possible - in nfsd4_process_open1().
This will support a future change when nfsd4_cstate_assign_replay() might
fail.
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
fs/nfsd/nfs4state.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6ac1dff29d42..2ff8cb50f2e3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5360,6 +5360,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
open->op_openowner = oo;
if (!oo)
return nfserr_jukebox;
+ nfsd4_cstate_assign_replay(cstate, &oo->oo_owner);
status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
if (status)
return status;
@@ -6134,12 +6135,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
struct nfsd4_open *open)
{
- if (open->op_openowner) {
- struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
-
- nfsd4_cstate_assign_replay(cstate, so);
- nfs4_put_stateowner(so);
- }
+ if (open->op_openowner)
+ nfs4_put_stateowner(&open->op_openowner->oo_owner);
if (open->op_file)
kmem_cache_free(file_slab, open->op_file);
if (open->op_stp)
--
2.44.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 3/4] nfsd: replace rp_mutex to avoid deadlock in move_to_close_lru()
2024-04-08 2:09 [PATCH 0/4 v4] nfsd: fix deadlock in move_to_close_lru() NeilBrown
2024-04-08 2:09 ` [PATCH 1/4] nfsd: perform all find_openstateowner_str calls in the one place NeilBrown
2024-04-08 2:09 ` [PATCH 2/4] nfsd: move nfsd4_cstate_assign_replay() earlier in open handling NeilBrown
@ 2024-04-08 2:09 ` NeilBrown
2024-04-08 2:09 ` [PATCH 4/4] nfsd: drop st_mutex before calling move_to_close_lru() NeilBrown
2024-04-09 2:09 ` [PATCH 0/4 v4] nfsd: fix deadlock in move_to_close_lru() Chuck Lever
4 siblings, 0 replies; 6+ messages in thread
From: NeilBrown @ 2024-04-08 2:09 UTC (permalink / raw)
To: Chuck Lever, Jeff Layton
Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey
move_to_close_lru() waits for sc_count to become zero while holding
rp_mutex. This can deadlock if another thread holds a reference and is
waiting for rp_mutex.
By the time we get to move_to_close_lru() the openowner is unhashed and
cannot be found any more. So code waiting for the mutex can safely
retry the lookup if move_to_close_lru() has started.
So change rp_mutex to an atomic_t with three states:
RP_UNLOCK - state is still hashed, not locked for reply
RP_LOCKED - state is still hashed, is locked for reply
RP_UNHASHED - state is not hashed, no code can get a lock.
Use wait_var_event() to wait for either a lock, or for the owner to be
unhashed. In the latter case, retry the lookup.
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
fs/nfsd/nfs4state.c | 38 +++++++++++++++++++++++++++++++-------
fs/nfsd/state.h | 2 +-
2 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2ff8cb50f2e3..5dbd1a73bc28 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4664,21 +4664,32 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
atomic_set(&nn->nfsd_courtesy_clients, 0);
}
+enum rp_lock {
+ RP_UNLOCKED,
+ RP_LOCKED,
+ RP_UNHASHED,
+};
+
static void init_nfs4_replay(struct nfs4_replay *rp)
{
rp->rp_status = nfserr_serverfault;
rp->rp_buflen = 0;
rp->rp_buf = rp->rp_ibuf;
- mutex_init(&rp->rp_mutex);
+ atomic_set(&rp->rp_locked, RP_UNLOCKED);
}
-static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
- struct nfs4_stateowner *so)
+static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
+ struct nfs4_stateowner *so)
{
if (!nfsd4_has_session(cstate)) {
- mutex_lock(&so->so_replay.rp_mutex);
+ wait_var_event(&so->so_replay.rp_locked,
+ atomic_cmpxchg(&so->so_replay.rp_locked,
+ RP_UNLOCKED, RP_LOCKED) != RP_LOCKED);
+ if (atomic_read(&so->so_replay.rp_locked) == RP_UNHASHED)
+ return -EAGAIN;
cstate->replay_owner = nfs4_get_stateowner(so);
}
+ return 0;
}
void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
@@ -4687,7 +4698,8 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
if (so != NULL) {
cstate->replay_owner = NULL;
- mutex_unlock(&so->so_replay.rp_mutex);
+ atomic_set(&so->so_replay.rp_locked, RP_UNLOCKED);
+ wake_up_var(&so->so_replay.rp_locked);
nfs4_put_stateowner(so);
}
}
@@ -4983,7 +4995,11 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
* Wait for the refcount to drop to 2. Since it has been unhashed,
* there should be no danger of the refcount going back up again at
* this point.
+ * Some threads with a reference might be waiting for rp_locked,
+ * so tell them to stop waiting.
*/
+ atomic_set(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
+ wake_up_var(&oo->oo_owner.so_replay.rp_locked);
wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
release_all_access(s);
@@ -5356,11 +5372,15 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
clp = cstate->clp;
strhashval = ownerstr_hashval(&open->op_owner);
+retry:
oo = find_or_alloc_open_stateowner(strhashval, open, cstate);
open->op_openowner = oo;
if (!oo)
return nfserr_jukebox;
- nfsd4_cstate_assign_replay(cstate, &oo->oo_owner);
+ if (nfsd4_cstate_assign_replay(cstate, &oo->oo_owner) == -EAGAIN) {
+ nfs4_put_stateowner(&oo->oo_owner);
+ goto retry;
+ }
status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
if (status)
return status;
@@ -7200,12 +7220,16 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
trace_nfsd_preprocess(seqid, stateid);
*stpp = NULL;
+retry:
status = nfsd4_lookup_stateid(cstate, stateid,
typemask, statusmask, &s, nn);
if (status)
return status;
stp = openlockstateid(s);
- nfsd4_cstate_assign_replay(cstate, stp->st_stateowner);
+ if (nfsd4_cstate_assign_replay(cstate, stp->st_stateowner) == -EAGAIN) {
+ nfs4_put_stateowner(stp->st_stateowner);
+ goto retry;
+ }
status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp);
if (!status)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 7f33b89f3b2c..f42d8d782c84 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -488,7 +488,7 @@ struct nfs4_replay {
unsigned int rp_buflen;
char *rp_buf;
struct knfsd_fh rp_openfh;
- struct mutex rp_mutex;
+ atomic_t rp_locked;
char rp_ibuf[NFSD4_REPLAY_ISIZE];
};
--
2.44.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 4/4] nfsd: drop st_mutex before calling move_to_close_lru()
2024-04-08 2:09 [PATCH 0/4 v4] nfsd: fix deadlock in move_to_close_lru() NeilBrown
` (2 preceding siblings ...)
2024-04-08 2:09 ` [PATCH 3/4] nfsd: replace rp_mutex to avoid deadlock in move_to_close_lru() NeilBrown
@ 2024-04-08 2:09 ` NeilBrown
2024-04-09 2:09 ` [PATCH 0/4 v4] nfsd: fix deadlock in move_to_close_lru() Chuck Lever
4 siblings, 0 replies; 6+ messages in thread
From: NeilBrown @ 2024-04-08 2:09 UTC (permalink / raw)
To: Chuck Lever, Jeff Layton
Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey
move_to_close_lru() is currently called with ->st_mutex held.
This can lead to a deadlock as move_to_close_lru() waits for sc_count to
drop to 2, and some threads holding a reference might be waiting for the
mutex. These references will never be dropped so sc_count will never
reach 2.
There can be no harm in dropping ->st_mutex before
move_to_close_lru() because the only place that takes the mutex is
nfsd4_lock_ol_stateid(), and it quickly aborts if sc_type is
NFS4_CLOSED_STID, which it will be before move_to_close_lru() is called.
See also
https://lore.kernel.org/lkml/4dd1fe21e11344e5969bb112e954affb@jd.com/T/
where this problem was raised but not successfully resolved.
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
fs/nfsd/nfs4state.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5dbd1a73bc28..4b7fc0644f89 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7371,7 +7371,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
return status;
}
-static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
+static bool nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
{
struct nfs4_client *clp = s->st_stid.sc_client;
bool unhashed;
@@ -7388,11 +7388,11 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
list_for_each_entry(stp, &reaplist, st_locks)
nfs4_free_cpntf_statelist(clp->net, &stp->st_stid);
free_ol_stateid_reaplist(&reaplist);
+ return false;
} else {
spin_unlock(&clp->cl_lock);
free_ol_stateid_reaplist(&reaplist);
- if (unhashed)
- move_to_close_lru(s, clp->net);
+ return unhashed;
}
}
@@ -7408,6 +7408,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *stp;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ bool need_move_to_close_list;
dprintk("NFSD: nfsd4_close on file %pd\n",
cstate->current_fh.fh_dentry);
@@ -7432,8 +7433,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
*/
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
- nfsd4_close_open_stateid(stp);
+ need_move_to_close_list = nfsd4_close_open_stateid(stp);
mutex_unlock(&stp->st_mutex);
+ if (need_move_to_close_list)
+ move_to_close_lru(stp, net);
/* v4.1+ suggests that we send a special stateid in here, since the
* clients should just ignore this anyway. Since this is not useful
--
2.44.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH 0/4 v4] nfsd: fix deadlock in move_to_close_lru()
2024-04-08 2:09 [PATCH 0/4 v4] nfsd: fix deadlock in move_to_close_lru() NeilBrown
` (3 preceding siblings ...)
2024-04-08 2:09 ` [PATCH 4/4] nfsd: drop st_mutex before calling move_to_close_lru() NeilBrown
@ 2024-04-09 2:09 ` Chuck Lever
4 siblings, 0 replies; 6+ messages in thread
From: Chuck Lever @ 2024-04-09 2:09 UTC (permalink / raw)
To: NeilBrown
Cc: Jeff Layton, linux-nfs@vger.kernel.org, Olga Kornievskaia,
Dai Ngo, Tom Talpey
On Sun, Apr 07, 2024 at 10:09:14PM -0400, NeilBrown wrote:
> This series replaces
> nfsd: drop st_mutex and rp_mutex before calling move_to_close_lru()
> which was recently dropped as a problem was found.
> The first two patches rearrange code without important functional change.
> The last two address the two relaced problems of two different mutexes which are
> held while waiting and can each trigger a deadlock.
>
> This is against v6.9-rc2.
>
> Thanks,
> NeilBrown
>
> [PATCH 1/4] nfsd: perform all find_openstateowner_str calls in the
> [PATCH 2/4] nfsd: move nfsd4_cstate_assign_replay() earlier in open
> [PATCH 3/4] nfsd: replace rp_mutex to avoid deadlock in
> [PATCH 4/4] nfsd: drop st_mutex before calling move_to_close_lru()
Applied to nfsd-next. Thank you, Neil!
--
Chuck Lever
^ permalink raw reply [flat|nested] 6+ messages in thread