From: Trond Myklebust <trondmy@hammerspace.com>
To: "bfields@redhat.com" <bfields@redhat.com>
Cc: "linux-nfs@vger.kernel.org" <linux-nfs@vger.kernel.org>
Subject: Re: [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()
Date: Fri, 25 Oct 2019 14:55:45 +0000 [thread overview]
Message-ID: <97f56de86f0aeafb56998023d0561bb4a6233eb8.camel@hammerspace.com> (raw)
In-Reply-To: <20191025145147.GA16053@pick.fieldses.org>
On Fri, 2019-10-25 at 10:51 -0400, J. Bruce Fields wrote:
> On Wed, Oct 23, 2019 at 05:43:18PM -0400, Trond Myklebust wrote:
> > When we're destroying the client lease, and we call
> > nfsd4_shutdown_callback(), we must ensure that we do not return
> > before all outstanding callbacks have terminated and have
> > released their payloads.
>
> This is great, thanks! We've seen what I'm fairly sure is the same
> bug
> from Red Hat users. I think my blind spot was an assumption that
> rpc tasks wouldn't outlive rpc_shutdown_client().
>
> However, it's causing xfstests runs to hang, and I haven't worked out
> why yet.
>
> I'll spend some time on it this afternoon and let you know what I
> figure
> out.
>
Is that happening with v2 or with v1? With v1 there is definitely a
hang in __destroy_client() due to the refcount leak that I believe I
fixed in v2.
> --b.
>
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> > v2 - Fix a leak of clp->cl_cb_inflight when running null probes
> >
> > fs/nfsd/nfs4callback.c | 97 +++++++++++++++++++++++++++++++++-----
> > ----
> > fs/nfsd/state.h | 1 +
> > 2 files changed, 79 insertions(+), 19 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> > index 524111420b48..a3c9517bcc64 100644
> > --- a/fs/nfsd/nfs4callback.c
> > +++ b/fs/nfsd/nfs4callback.c
> > @@ -826,6 +826,31 @@ static int max_cb_time(struct net *net)
> > return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
> > }
> >
> > +static struct workqueue_struct *callback_wq;
> > +
> > +static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
> > +{
> > + return queue_work(callback_wq, &cb->cb_work);
> > +}
> > +
> > +static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
> > +{
> > + atomic_inc(&clp->cl_cb_inflight);
> > +}
> > +
> > +static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
> > +{
> > +
> > + if (atomic_dec_and_test(&clp->cl_cb_inflight))
> > + wake_up_var(&clp->cl_cb_inflight);
> > +}
> > +
> > +static void nfsd41_cb_inflight_wait_complete(struct nfs4_client
> > *clp)
> > +{
> > + wait_var_event(&clp->cl_cb_inflight,
> > + !atomic_read(&clp->cl_cb_inflight));
> > +}
> > +
> > static const struct cred *get_backchannel_cred(struct nfs4_client
> > *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
> > {
> > if (clp->cl_minorversion == 0) {
> > @@ -937,14 +962,21 @@ static void nfsd4_cb_probe_done(struct
> > rpc_task *task, void *calldata)
> > clp->cl_cb_state = NFSD4_CB_UP;
> > }
> >
> > +static void nfsd4_cb_probe_release(void *calldata)
> > +{
> > + struct nfs4_client *clp = container_of(calldata, struct
> > nfs4_client, cl_cb_null);
> > +
> > + nfsd41_cb_inflight_end(clp);
> > +
> > +}
> > +
> > static const struct rpc_call_ops nfsd4_cb_probe_ops = {
> > /* XXX: release method to ensure we set the cb channel down if
> > * necessary on early failure? */
> > .rpc_call_done = nfsd4_cb_probe_done,
> > + .rpc_release = nfsd4_cb_probe_release,
> > };
> >
> > -static struct workqueue_struct *callback_wq;
> > -
> > /*
> > * Poke the callback thread to process any updates to the callback
> > * parameters, and send a null probe.
> > @@ -975,9 +1007,12 @@ void nfsd4_change_callback(struct nfs4_client
> > *clp, struct nfs4_cb_conn *conn)
> > * If the slot is available, then mark it busy. Otherwise, set
> > the
> > * thread for sleeping on the callback RPC wait queue.
> > */
> > -static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct
> > rpc_task *task)
> > +static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct
> > rpc_task *task)
> > {
> > - if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> > + struct nfs4_client *clp = cb->cb_clp;
> > +
> > + if (!cb->cb_holds_slot &&
> > + test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> > rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
> > /* Race breaker */
> > if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> > @@ -985,10 +1020,32 @@ static bool nfsd41_cb_get_slot(struct
> > nfs4_client *clp, struct rpc_task *task)
> > return false;
> > }
> > rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
> > + cb->cb_holds_slot = true;
> > }
> > return true;
> > }
> >
> > +static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
> > +{
> > + struct nfs4_client *clp = cb->cb_clp;
> > +
> > + if (cb->cb_holds_slot) {
> > + cb->cb_holds_slot = false;
> > + clear_bit(0, &clp->cl_cb_slot_busy);
> > + rpc_wake_up_next(&clp->cl_cb_waitq);
> > + }
> > +}
> > +
> > +static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
> > +{
> > + struct nfs4_client *clp = cb->cb_clp;
> > +
> > + nfsd41_cb_release_slot(cb);
> > + if (cb->cb_ops && cb->cb_ops->release)
> > + cb->cb_ops->release(cb);
> > + nfsd41_cb_inflight_end(clp);
> > +}
> > +
> > /*
> > * TODO: cb_sequence should support referring call lists,
> > cachethis, multiple
> > * slots, and mark callback channel down on communication errors.
> > @@ -1005,11 +1062,8 @@ static void nfsd4_cb_prepare(struct rpc_task
> > *task, void *calldata)
> > */
> > cb->cb_seq_status = 1;
> > cb->cb_status = 0;
> > - if (minorversion) {
> > - if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp,
> > task))
> > - return;
> > - cb->cb_holds_slot = true;
> > - }
> > + if (minorversion && !nfsd41_cb_get_slot(cb, task))
> > + return;
> > rpc_call_start(task);
> > }
> >
> > @@ -1076,9 +1130,7 @@ static bool nfsd4_cb_sequence_done(struct
> > rpc_task *task, struct nfsd4_callback
> > cb->cb_seq_status);
> > }
> >
> > - cb->cb_holds_slot = false;
> > - clear_bit(0, &clp->cl_cb_slot_busy);
> > - rpc_wake_up_next(&clp->cl_cb_waitq);
> > + nfsd41_cb_release_slot(cb);
> > dprintk("%s: freed slot, new seqid=%d\n", __func__,
> > clp->cl_cb_session->se_cb_seq_nr);
> >
> > @@ -1091,8 +1143,10 @@ static bool nfsd4_cb_sequence_done(struct
> > rpc_task *task, struct nfsd4_callback
> > ret = false;
> > goto out;
> > need_restart:
> > - task->tk_status = 0;
> > - cb->cb_need_restart = true;
> > + if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
> > + task->tk_status = 0;
> > + cb->cb_need_restart = true;
> > + }
> > return false;
> > }
> >
> > @@ -1134,9 +1188,9 @@ static void nfsd4_cb_release(void *calldata)
> > struct nfsd4_callback *cb = calldata;
> >
> > if (cb->cb_need_restart)
> > - nfsd4_run_cb(cb);
> > + nfsd4_queue_cb(cb);
> > else
> > - cb->cb_ops->release(cb);
> > + nfsd41_destroy_cb(cb);
> >
> > }
> >
> > @@ -1170,6 +1224,7 @@ void nfsd4_shutdown_callback(struct
> > nfs4_client *clp)
> > */
> > nfsd4_run_cb(&clp->cl_cb_null);
> > flush_workqueue(callback_wq);
> > + nfsd41_cb_inflight_wait_complete(clp);
> > }
> >
> > /* requires cl_lock: */
> > @@ -1255,8 +1310,7 @@ nfsd4_run_cb_work(struct work_struct *work)
> > clnt = clp->cl_cb_client;
> > if (!clnt) {
> > /* Callback channel broken, or client killed; give up:
> > */
> > - if (cb->cb_ops && cb->cb_ops->release)
> > - cb->cb_ops->release(cb);
> > + nfsd41_destroy_cb(cb);
> > return;
> > }
> >
> > @@ -1265,6 +1319,7 @@ nfsd4_run_cb_work(struct work_struct *work)
> > */
> > if (!cb->cb_ops && clp->cl_minorversion) {
> > clp->cl_cb_state = NFSD4_CB_UP;
> > + nfsd41_destroy_cb(cb);
> > return;
> > }
> >
> > @@ -1290,5 +1345,9 @@ void nfsd4_init_cb(struct nfsd4_callback *cb,
> > struct nfs4_client *clp,
> >
> > void nfsd4_run_cb(struct nfsd4_callback *cb)
> > {
> > - queue_work(callback_wq, &cb->cb_work);
> > + struct nfs4_client *clp = cb->cb_clp;
> > +
> > + nfsd41_cb_inflight_begin(clp);
> > + if (!nfsd4_queue_cb(cb))
> > + nfsd41_cb_inflight_end(clp);
> > }
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 46f56afb6cb8..d61b83b9654c 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -367,6 +367,7 @@ struct nfs4_client {
> > struct net *net;
> > struct list_head async_copies; /* list of async copies */
> > spinlock_t async_lock; /* lock for async copies */
> > + atomic_t cl_cb_inflight; /* Outstanding callbacks */
> > };
> >
> > /* struct nfs4_client_reset
> > --
> > 2.21.0
> >
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com
next prev parent reply other threads:[~2019-10-25 14:55 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-10-23 21:43 [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback() Trond Myklebust
2019-10-25 14:51 ` J. Bruce Fields
2019-10-25 14:55 ` Trond Myklebust [this message]
2019-10-25 15:21 ` J. Bruce Fields
2019-10-25 15:33 ` J. Bruce Fields
2019-10-29 21:47 ` J. Bruce Fields
2019-11-07 22:27 ` J. Bruce Fields
2019-11-08 17:51 ` J. Bruce Fields
2019-11-08 17:52 ` J. Bruce Fields
2019-11-08 17:54 ` [PATCH] nfsd: document callback_wq serialization of callback code J. Bruce Fields
2019-11-08 17:55 ` [PATCH] nfsd: mark cb path down on unknown errors J. Bruce Fields
2019-11-08 18:44 ` [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback() Trond Myklebust
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=97f56de86f0aeafb56998023d0561bb4a6233eb8.camel@hammerspace.com \
--to=trondmy@hammerspace.com \
--cc=bfields@redhat.com \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox