From: Jeff Layton <jlayton@kernel.org>
To: Chuck Lever III <chuck.lever@oracle.com>, Dai Ngo <dai.ngo@oracle.com>
Cc: Linux NFS Mailing List <linux-nfs@vger.kernel.org>
Subject: Re: [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition
Date: Fri, 11 Nov 2022 10:58:45 -0500 [thread overview]
Message-ID: <e1c6e855f20801944c4cfea19c459e513c9e39b0.camel@kernel.org> (raw)
In-Reply-To: <B311BC62-A485-47EE-86F3-0C17AF119C12@oracle.com>
On Fri, 2022-11-11 at 15:25 +0000, Chuck Lever III wrote:
>
> > On Nov 9, 2022, at 11:17 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> >
> > The delegation shrinker is scheduled to run on every shrinker's
> > 'count' callback. It scans the client list and sends the
> > courtesy CB_RECALL_ANY to the clients that hold delegations.
> >
> > To avoid flooding the clients with CB_RECALL_ANY requests, the
> > delegation shrinker is scheduled to run after a 5 seconds delay.
> >
> > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > ---
> > fs/nfsd/netns.h | 3 ++
> > fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> > fs/nfsd/state.h | 8 ++++
> > 3 files changed, 125 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > index 8c854ba3285b..394a05fb46d8 100644
> > --- a/fs/nfsd/netns.h
> > +++ b/fs/nfsd/netns.h
> > @@ -196,6 +196,9 @@ struct nfsd_net {
> > atomic_t nfsd_courtesy_clients;
> > struct shrinker nfsd_client_shrinker;
> > struct delayed_work nfsd_shrinker_work;
> > +
> > + struct shrinker nfsd_deleg_shrinker;
> > + struct delayed_work nfsd_deleg_shrinker_work;
> > };
> >
> > /* Simple check to find out if a given net was properly initialized */
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 4e718500a00c..813cdb67b370 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
> > kfree(clp->cl_nii_domain.data);
> > kfree(clp->cl_nii_name.data);
> > idr_destroy(&clp->cl_stateids);
> > + kfree(clp->cl_ra);
> > kmem_cache_free(client_slab, clp);
> > }
> >
> > @@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
> > [3] = {""},
> > };
> >
> > +static int
> > +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> > + struct rpc_task *task)
> > +{
> > + switch (task->tk_status) {
> > + case -NFS4ERR_DELAY:
> > + rpc_delay(task, 2 * HZ);
> > + return 0;
> > + default:
> > + return 1;
> > + }
> > +}
> > +
> > +static void
> > +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > +{
> > + struct nfs4_client *clp = cb->cb_clp;
> > + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> > +
> > + spin_lock(&nn->client_lock);
> > + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> > + put_client_renew_locked(clp);
> > + spin_unlock(&nn->client_lock);
> > +}
> > +
> > +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> > + .done = nfsd4_cb_recall_any_done,
> > + .release = nfsd4_cb_recall_any_release,
> > +};
> > +
> > static struct nfs4_client *create_client(struct xdr_netobj name,
> > struct svc_rqst *rqstp, nfs4_verifier *verf)
> > {
> > @@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> > free_client(clp);
> > return NULL;
> > }
> > + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
> > + if (!clp->cl_ra) {
> > + free_client(clp);
> > + return NULL;
> > + }
> > + clp->cl_ra_time = 0;
> > + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
> > + NFSPROC4_CLNT_CB_RECALL_ANY);
> > return clp;
> > }
> >
> > @@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> > return SHRINK_STOP;
> > }
> >
> > +static unsigned long
> > +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> > +{
> > + unsigned long cnt;
>
> No reason not to spell out "count".
>
> > + struct nfsd_net *nn = container_of(shrink,
> > + struct nfsd_net, nfsd_deleg_shrinker);
> > +
> > + cnt = atomic_long_read(&num_delegations);
> > + if (cnt)
> > + mod_delayed_work(laundry_wq,
> > + &nn->nfsd_deleg_shrinker_work, 0);
> > + return cnt;
> > +}
> > +
> > +static unsigned long
> > +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> > +{
> > + return SHRINK_STOP;
> > +}
> > +
> > int
> > nfsd4_init_leases_net(struct nfsd_net *nn)
> > {
> > struct sysinfo si;
> > u64 max_clients;
> > + int retval;
> >
> > nn->nfsd4_lease = 90; /* default lease time */
> > nn->nfsd4_grace = 90;
> > @@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> > nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> > nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> > nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> > - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> > + retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> > + if (retval)
> > + return retval;
> > + nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> > + nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> > + nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> > + retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> > + if (retval)
> > + unregister_shrinker(&nn->nfsd_client_shrinker);
> > + return retval;
> > +
> > }
> >
> > void
> > nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> > {
> > unregister_shrinker(&nn->nfsd_client_shrinker);
> > + unregister_shrinker(&nn->nfsd_deleg_shrinker);
> > }
> >
> > static void init_nfs4_replay(struct nfs4_replay *rp)
> > @@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
> > nfs4_process_client_reaplist(&reaplist);
> > }
> >
> > +static void
> > +deleg_reaper(struct work_struct *deleg_work)
> > +{
> > + struct list_head *pos, *next;
> > + struct nfs4_client *clp;
> > + struct list_head cblist;
> > + struct delayed_work *dwork = to_delayed_work(deleg_work);
> > + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> > + nfsd_deleg_shrinker_work);
> > +
> > + INIT_LIST_HEAD(&cblist);
> > + spin_lock(&nn->client_lock);
> > + list_for_each_safe(pos, next, &nn->client_lru) {
> > + clp = list_entry(pos, struct nfs4_client, cl_lru);
> > + if (clp->cl_state != NFSD4_ACTIVE ||
> > + list_empty(&clp->cl_delegations) ||
> > + atomic_read(&clp->cl_delegs_in_recall) ||
> > + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
> > + (ktime_get_boottime_seconds() -
> > + clp->cl_ra_time < 5)) {
> > + continue;
> > + }
> > + list_add(&clp->cl_ra_cblist, &cblist);
> > +
> > + /* release in nfsd4_cb_recall_any_release */
> > + atomic_inc(&clp->cl_rpc_users);
> > + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> > + clp->cl_ra_time = ktime_get_boottime_seconds();
> > + }
> > + spin_unlock(&nn->client_lock);
> > + list_for_each_safe(pos, next, &cblist) {
>
> The usual idiom for draining a list like this is
>
> while (!list_empty(&cblist)) {
> clp = list_first_entry( ... );
>
Agreed. Note that it's also slightly more efficient to do it that way,
since you don't require a "next" pointer.
>
> > + clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
> > + list_del_init(&clp->cl_ra_cblist);
> > + clp->cl_ra->ra_keep = 0;
> > + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> > + BIT(RCA4_TYPE_MASK_WDATA_DLG);
> > + nfsd4_run_cb(&clp->cl_ra->ra_cb);
> > + }
> > +
> > +}
> > +
> > static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> > {
> > if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> > @@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)
> >
> > INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> > INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> > + INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> > get_net(net);
> >
> > return 0;
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 6b33cbbe76d3..12ce9792c5b6 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -368,6 +368,7 @@ struct nfs4_client {
> > #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
> > #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
> > 1 << NFSD4_CLIENT_CB_KILL)
> > +#define NFSD4_CLIENT_CB_RECALL_ANY (6)
> > unsigned long cl_flags;
> > const struct cred *cl_cb_cred;
> > struct rpc_clnt *cl_cb_client;
> > @@ -411,6 +412,10 @@ struct nfs4_client {
> >
> > unsigned int cl_state;
> > atomic_t cl_delegs_in_recall;
> > +
> > + struct nfsd4_cb_recall_any *cl_ra;
> > + time64_t cl_ra_time;
> > + struct list_head cl_ra_cblist;
> > };
> >
> > /* struct nfs4_client_reset
> > @@ -642,6 +647,9 @@ enum nfsd4_cb_op {
> > NFSPROC4_CLNT_CB_RECALL_ANY,
> > };
> >
> > +#define RCA4_TYPE_MASK_RDATA_DLG 0
> > +#define RCA4_TYPE_MASK_WDATA_DLG 1
> > +
> > /* Returns true iff a is later than b: */
> > static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> > {
> > --
> > 2.9.5
> >
>
> --
> Chuck Lever
>
>
>
--
Jeff Layton <jlayton@kernel.org>
next prev parent reply other threads:[~2022-11-11 15:58 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-10 4:17 [PATCH v4 0/3] add support for CB_RECALL_ANY and the delegation shrinker Dai Ngo
2022-11-10 4:17 ` [PATCH v4 1/3] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
2022-11-11 15:25 ` Chuck Lever III
2022-11-10 4:17 ` [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition Dai Ngo
2022-11-11 15:25 ` Chuck Lever III
2022-11-11 15:58 ` Jeff Layton [this message]
2022-11-10 4:17 ` [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints Dai Ngo
2022-11-11 15:25 ` Chuck Lever III
2022-11-11 21:34 ` dai.ngo
2022-11-11 22:02 ` Chuck Lever III
2022-11-15 5:08 ` dai.ngo
2022-11-15 14:21 ` Chuck Lever III
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e1c6e855f20801944c4cfea19c459e513c9e39b0.camel@kernel.org \
--to=jlayton@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=dai.ngo@oracle.com \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox