* [PATCH 0/2] add support for CB_RECALL_ANY and the delegation shrinker
@ 2022-10-18 5:15 Dai Ngo
2022-10-18 5:15 ` [PATCH 1/2] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
2022-10-18 5:15 ` [PATCH 2/2] NFSD: add delegation shrinker to react to low memory condition Dai Ngo
0 siblings, 2 replies; 9+ messages in thread
From: Dai Ngo @ 2022-10-18 5:15 UTC (permalink / raw)
To: chuck.lever; +Cc: linux-nfs
This patch series adds:
. support for sending the CB_RECALL_ANY op.
There is only one nfsd4_callback, cl_recall_any, added for each
nfs4_client. Access to it must be serialized. For now it's done
by the cl_recall_any_busy flag since it's used only by the
delegation shrinker. If there is another consumer of cl_recall_any
then a spinlock must be used.
. the delegation shrinker that sends the advisory CB_RECALL_ANY
to the clients to release unused delegations.
---
Dai Ngo (2):
NFSD: add support for sending CB_RECALL_ANY
NFSD: add delegation shrinker to react to low memory condition
fs/nfsd/netns.h | 3 ++
fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++
fs/nfsd/nfs4state.c | 97 ++++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/state.h | 9 +++++
fs/nfsd/xdr4cb.h | 6 +++
5 files changed, 178 insertions(+), 1 deletion(-)
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/2] NFSD: add support for sending CB_RECALL_ANY
2022-10-18 5:15 [PATCH 0/2] add support for CB_RECALL_ANY and the delegation shrinker Dai Ngo
@ 2022-10-18 5:15 ` Dai Ngo
2022-10-18 13:25 ` Tom Talpey
2022-10-18 5:15 ` [PATCH 2/2] NFSD: add delegation shrinker to react to low memory condition Dai Ngo
1 sibling, 1 reply; 9+ messages in thread
From: Dai Ngo @ 2022-10-18 5:15 UTC (permalink / raw)
To: chuck.lever; +Cc: linux-nfs
There is only one nfsd4_callback, cl_recall_any, added for each
nfs4_client. Access to it must be serialized. For now it's done
by the cl_recall_any_busy flag since it's used only by the
delegation shrinker. If there is another consumer of CB_RECALL_ANY
then a spinlock must be used.
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4state.c | 27 +++++++++++++++++++++
fs/nfsd/state.h | 8 +++++++
fs/nfsd/xdr4cb.h | 6 +++++
4 files changed, 105 insertions(+)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f0e69edf5f0f..03587e1397f4 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
}
/*
+ * CB_RECALLANY4args
+ *
+ * struct CB_RECALLANY4args {
+ * uint32_t craa_objects_to_keep;
+ * bitmap4 craa_type_mask;
+ * };
+ */
+static void
+encode_cb_recallany4args(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
+{
+ __be32 *p;
+
+ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
+ p = xdr_reserve_space(xdr, 4);
+ *p++ = xdr_zero; /* craa_objects_to_keep */
+ p = xdr_reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(1);
+ *p++ = cpu_to_be32(bmval);
+ hdr->nops++;
+}
+
+/*
* CB_SEQUENCE4args
*
* struct CB_SEQUENCE4args {
@@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_cb_nops(&hdr);
}
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static void
+nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
+ encode_cb_nops(&hdr);
+}
/*
* NFSv4.0 and NFSv4.1 XDR decode functions
@@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static int
+nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+ status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
+ return status;
+}
+
#ifdef CONFIG_NFSD_PNFS
/*
* CB_LAYOUTRECALL4args
@@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
#endif
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
+ PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
};
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4e718500a00c..c60c937dece6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
[3] = {""},
};
+static int
+nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
+ struct rpc_task *task)
+{
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+{
+ cb->cb_clp->cl_recall_any_busy = false;
+ atomic_dec(&cb->cb_clp->cl_rpc_users);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+ .done = nfsd4_cb_recall_any_done,
+ .release = nfsd4_cb_recall_any_release,
+};
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
+ nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
+ NFSPROC4_CLNT_CB_RECALL_ANY);
return clp;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e2daef3cc003..49ca06169642 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -411,6 +411,10 @@ struct nfs4_client {
unsigned int cl_state;
atomic_t cl_delegs_in_recall;
+
+ bool cl_recall_any_busy;
+ uint32_t cl_recall_any_bm;
+ struct nfsd4_callback cl_recall_any;
};
/* struct nfs4_client_reset
@@ -639,8 +643,12 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_OFFLOAD,
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
+ NFSPROC4_CLNT_CB_RECALL_ANY,
};
+#define RCA4_TYPE_MASK_RDATA_DLG 0
+#define RCA4_TYPE_MASK_WDATA_DLG 1
+
/* Returns true iff a is later than b: */
static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
{
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 547cf07cf4e0..0d39af1b00a0 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -48,3 +48,9 @@
#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + 1 + 1)
+#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)
--
2.9.5
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] NFSD: add delegation shrinker to react to low memory condition
2022-10-18 5:15 [PATCH 0/2] add support for CB_RECALL_ANY and the delegation shrinker Dai Ngo
2022-10-18 5:15 ` [PATCH 1/2] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
@ 2022-10-18 5:15 ` Dai Ngo
2022-10-18 13:07 ` Jeff Layton
1 sibling, 1 reply; 9+ messages in thread
From: Dai Ngo @ 2022-10-18 5:15 UTC (permalink / raw)
To: chuck.lever; +Cc: linux-nfs
The delegation shrinker is scheduled to run on every shrinker's
'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.
To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation shrinker is scheduled to run after a 5 seconds delay.
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
fs/nfsd/netns.h | 3 +++
fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/state.h | 1 +
3 files changed, 73 insertions(+), 1 deletion(-)
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..394a05fb46d8 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -196,6 +196,9 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
struct delayed_work nfsd_shrinker_work;
+
+ struct shrinker nfsd_deleg_shrinker;
+ struct delayed_work nfsd_deleg_shrinker_work;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c60c937dece6..bdaea0e6e72f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4392,11 +4392,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
return SHRINK_STOP;
}
+static unsigned long
+nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ unsigned long cnt;
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_deleg_shrinker);
+
+ cnt = atomic_long_read(&num_delegations);
+ if (cnt)
+ mod_delayed_work(laundry_wq,
+ &nn->nfsd_deleg_shrinker_work, 5 * HZ);
+ return cnt;
+}
+
+static unsigned long
+nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return SHRINK_STOP;
+}
+
int
nfsd4_init_leases_net(struct nfsd_net *nn)
{
struct sysinfo si;
u64 max_clients;
+ int retval;
nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
@@ -4417,13 +4438,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
- return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+ retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+ if (retval)
+ return retval;
+ nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
+ nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
+ nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
+ retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
+ if (retval)
+ unregister_shrinker(&nn->nfsd_client_shrinker);
+ return retval;
}
void
nfsd4_leases_net_shutdown(struct nfsd_net *nn)
{
unregister_shrinker(&nn->nfsd_client_shrinker);
+ unregister_shrinker(&nn->nfsd_deleg_shrinker);
}
static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -6162,6 +6193,42 @@ courtesy_client_reaper(struct work_struct *reaper)
nfs4_process_client_reaplist(&reaplist);
}
+static void
+deleg_reaper(struct work_struct *deleg_work)
+{
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+ struct list_head cblist;
+ struct delayed_work *dwork = to_delayed_work(deleg_work);
+ struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ nfsd_deleg_shrinker_work);
+
+ INIT_LIST_HEAD(&cblist);
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (clp->cl_state != NFSD4_ACTIVE ||
+ list_empty(&clp->cl_delegations) ||
+ atomic_read(&clp->cl_delegs_in_recall) ||
+ clp->cl_recall_any_busy) {
+ continue;
+ }
+ clp->cl_recall_any_busy = true;
+ list_add(&clp->cl_recall_any_cblist, &cblist);
+
+ /* release in nfsd4_cb_recall_any_release */
+ atomic_inc(&clp->cl_rpc_users);
+ }
+ spin_unlock(&nn->client_lock);
+ list_for_each_safe(pos, next, &cblist) {
+ clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
+ list_del_init(&clp->cl_recall_any_cblist);
+ clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+ BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ nfsd4_run_cb(&clp->cl_recall_any);
+ }
+}
+
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
{
if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -7985,6 +8052,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+ INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
get_net(net);
return 0;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 49ca06169642..05b572ce6605 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -415,6 +415,7 @@ struct nfs4_client {
bool cl_recall_any_busy;
uint32_t cl_recall_any_bm;
struct nfsd4_callback cl_recall_any;
+ struct list_head cl_recall_any_cblist;
};
/* struct nfs4_client_reset
--
2.9.5
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] NFSD: add delegation shrinker to react to low memory condition
2022-10-18 5:15 ` [PATCH 2/2] NFSD: add delegation shrinker to react to low memory condition Dai Ngo
@ 2022-10-18 13:07 ` Jeff Layton
2022-10-18 15:51 ` dai.ngo
0 siblings, 1 reply; 9+ messages in thread
From: Jeff Layton @ 2022-10-18 13:07 UTC (permalink / raw)
To: Dai Ngo, chuck.lever; +Cc: linux-nfs
On Mon, 2022-10-17 at 22:15 -0700, Dai Ngo wrote:
> The delegation shrinker is scheduled to run on every shrinker's
> 'count' callback. It scans the client list and sends the
> courtesy CB_RECALL_ANY to the clients that hold delegations.
>
> To avoid flooding the clients with CB_RECALL_ANY requests, the
> delegation shrinker is scheduled to run after a 5 seconds delay.
>
But...when the kernel needs memory, it generally needs it _now_, and 5s
is a long time. It'd be better to not delay the initial sending of
CB_RECALL_ANY callbacks this much.
Maybe the logic should be changed such that it runs no more frequently
than once every 5s, but don't delay the initial sending of
CB_RECALL_ANYs.
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
> fs/nfsd/netns.h | 3 +++
> fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/state.h | 1 +
> 3 files changed, 73 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 8c854ba3285b..394a05fb46d8 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -196,6 +196,9 @@ struct nfsd_net {
> atomic_t nfsd_courtesy_clients;
> struct shrinker nfsd_client_shrinker;
> struct delayed_work nfsd_shrinker_work;
> +
> + struct shrinker nfsd_deleg_shrinker;
> + struct delayed_work nfsd_deleg_shrinker_work;
> };
>
> /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index c60c937dece6..bdaea0e6e72f 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4392,11 +4392,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> return SHRINK_STOP;
> }
>
> +static unsigned long
> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + unsigned long cnt;
> + struct nfsd_net *nn = container_of(shrink,
> + struct nfsd_net, nfsd_deleg_shrinker);
> +
> + cnt = atomic_long_read(&num_delegations);
> + if (cnt)
> + mod_delayed_work(laundry_wq,
> + &nn->nfsd_deleg_shrinker_work, 5 * HZ);
> + return cnt;
> +}
> +
What's the rationale for doing all of this in the count callback? Why
not just return the value here and leave scheduling to the scan
callback?
> +static unsigned long
> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + return SHRINK_STOP;
> +}
> +
> int
> nfsd4_init_leases_net(struct nfsd_net *nn)
> {
> struct sysinfo si;
> u64 max_clients;
> + int retval;
>
> nn->nfsd4_lease = 90; /* default lease time */
> nn->nfsd4_grace = 90;
> @@ -4417,13 +4438,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> + retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> + if (retval)
> + return retval;
> + nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> + nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> + nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> + retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> + if (retval)
> + unregister_shrinker(&nn->nfsd_client_shrinker);
> + return retval;
> }
>
> void
> nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> {
> unregister_shrinker(&nn->nfsd_client_shrinker);
> + unregister_shrinker(&nn->nfsd_deleg_shrinker);
> }
>
> static void init_nfs4_replay(struct nfs4_replay *rp)
> @@ -6162,6 +6193,42 @@ courtesy_client_reaper(struct work_struct *reaper)
> nfs4_process_client_reaplist(&reaplist);
> }
>
> +static void
> +deleg_reaper(struct work_struct *deleg_work)
> +{
> + struct list_head *pos, *next;
> + struct nfs4_client *clp;
> + struct list_head cblist;
> + struct delayed_work *dwork = to_delayed_work(deleg_work);
> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> + nfsd_deleg_shrinker_work);
> +
> + INIT_LIST_HEAD(&cblist);
> + spin_lock(&nn->client_lock);
> + list_for_each_safe(pos, next, &nn->client_lru) {
> + clp = list_entry(pos, struct nfs4_client, cl_lru);
> + if (clp->cl_state != NFSD4_ACTIVE ||
> + list_empty(&clp->cl_delegations) ||
> + atomic_read(&clp->cl_delegs_in_recall) ||
> + clp->cl_recall_any_busy) {
> + continue;
> + }
> + clp->cl_recall_any_busy = true;
> + list_add(&clp->cl_recall_any_cblist, &cblist);
> +
> + /* release in nfsd4_cb_recall_any_release */
> + atomic_inc(&clp->cl_rpc_users);
> + }
> + spin_unlock(&nn->client_lock);
> + list_for_each_safe(pos, next, &cblist) {
> + clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
> + list_del_init(&clp->cl_recall_any_cblist);
> + clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> + BIT(RCA4_TYPE_MASK_WDATA_DLG);
> + nfsd4_run_cb(&clp->cl_recall_any);
> + }
> +}
> +
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> {
> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> @@ -7985,6 +8052,7 @@ static int nfs4_state_create_net(struct net *net)
>
> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> + INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> get_net(net);
>
> return 0;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 49ca06169642..05b572ce6605 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -415,6 +415,7 @@ struct nfs4_client {
> bool cl_recall_any_busy;
> uint32_t cl_recall_any_bm;
> struct nfsd4_callback cl_recall_any;
> + struct list_head cl_recall_any_cblist;
> };
>
> /* struct nfs4_client_reset
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] NFSD: add support for sending CB_RECALL_ANY
2022-10-18 5:15 ` [PATCH 1/2] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
@ 2022-10-18 13:25 ` Tom Talpey
2022-10-18 14:03 ` Chuck Lever III
2022-10-18 15:49 ` dai.ngo
0 siblings, 2 replies; 9+ messages in thread
From: Tom Talpey @ 2022-10-18 13:25 UTC (permalink / raw)
To: Dai Ngo, chuck.lever; +Cc: linux-nfs
On 10/18/2022 1:15 AM, Dai Ngo wrote:
> There is only one nfsd4_callback, cl_recall_any, added for each
> nfs4_client. Access to it must be serialized. For now it's done
> by the cl_recall_any_busy flag since it's used only by the
> delegation shrinker. If there is another consumer of CB_RECALL_ANY
> then a spinlock must be used.
I'm curious if clients have shown any quirks with the operation in
your testing. If the (Linux) server hasn't ever been sending it,
then I'd expect some possible issues/quirks in the client.
For example, do they really start handing back a significant number
of useful delegations? Enough to satisfy the server's need without
going to specific resource-based recalls?
Tom.
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
> fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
> fs/nfsd/nfs4state.c | 27 +++++++++++++++++++++
> fs/nfsd/state.h | 8 +++++++
> fs/nfsd/xdr4cb.h | 6 +++++
> 4 files changed, 105 insertions(+)
>
> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> index f0e69edf5f0f..03587e1397f4 100644
> --- a/fs/nfsd/nfs4callback.c
> +++ b/fs/nfsd/nfs4callback.c
> @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
> }
>
> /*
> + * CB_RECALLANY4args
> + *
> + * struct CB_RECALLANY4args {
> + * uint32_t craa_objects_to_keep;
> + * bitmap4 craa_type_mask;
> + * };
> + */
> +static void
> +encode_cb_recallany4args(struct xdr_stream *xdr,
> + struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
> +{
> + __be32 *p;
> +
> + encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
> + p = xdr_reserve_space(xdr, 4);
> + *p++ = xdr_zero; /* craa_objects_to_keep */
> + p = xdr_reserve_space(xdr, 8);
> + *p++ = cpu_to_be32(1);
> + *p++ = cpu_to_be32(bmval);
> + hdr->nops++;
> +}
> +
> +/*
> * CB_SEQUENCE4args
> *
> * struct CB_SEQUENCE4args {
> @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
> encode_cb_nops(&hdr);
> }
>
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static void
> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
> + struct xdr_stream *xdr, const void *data)
> +{
> + const struct nfsd4_callback *cb = data;
> + struct nfs4_cb_compound_hdr hdr = {
> + .ident = cb->cb_clp->cl_cb_ident,
> + .minorversion = cb->cb_clp->cl_minorversion,
> + };
> +
> + encode_cb_compound4args(xdr, &hdr);
> + encode_cb_sequence4args(xdr, cb, &hdr);
> + encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
> + encode_cb_nops(&hdr);
> +}
>
> /*
> * NFSv4.0 and NFSv4.1 XDR decode functions
> @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
> return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
> }
>
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static int
> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
> + struct xdr_stream *xdr,
> + void *data)
> +{
> + struct nfsd4_callback *cb = data;
> + struct nfs4_cb_compound_hdr hdr;
> + int status;
> +
> + status = decode_cb_compound4res(xdr, &hdr);
> + if (unlikely(status))
> + return status;
> + status = decode_cb_sequence4res(xdr, cb);
> + if (unlikely(status || cb->cb_seq_status))
> + return status;
> + status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
> + return status;
> +}
> +
> #ifdef CONFIG_NFSD_PNFS
> /*
> * CB_LAYOUTRECALL4args
> @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
> #endif
> PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
> PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
> + PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
> };
>
> static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 4e718500a00c..c60c937dece6 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
> [3] = {""},
> };
>
> +static int
> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> + struct rpc_task *task)
> +{
> + switch (task->tk_status) {
> + case -NFS4ERR_DELAY:
> + rpc_delay(task, 2 * HZ);
> + return 0;
> + default:
> + return 1;
> + }
> +}
> +
> +static void
> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> +{
> + cb->cb_clp->cl_recall_any_busy = false;
> + atomic_dec(&cb->cb_clp->cl_rpc_users);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> + .done = nfsd4_cb_recall_any_done,
> + .release = nfsd4_cb_recall_any_release,
> +};
> +
> static struct nfs4_client *create_client(struct xdr_netobj name,
> struct svc_rqst *rqstp, nfs4_verifier *verf)
> {
> @@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> free_client(clp);
> return NULL;
> }
> + nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
> + NFSPROC4_CLNT_CB_RECALL_ANY);
> return clp;
> }
>
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index e2daef3cc003..49ca06169642 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -411,6 +411,10 @@ struct nfs4_client {
>
> unsigned int cl_state;
> atomic_t cl_delegs_in_recall;
> +
> + bool cl_recall_any_busy;
> + uint32_t cl_recall_any_bm;
> + struct nfsd4_callback cl_recall_any;
> };
>
> /* struct nfs4_client_reset
> @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
> NFSPROC4_CLNT_CB_OFFLOAD,
> NFSPROC4_CLNT_CB_SEQUENCE,
> NFSPROC4_CLNT_CB_NOTIFY_LOCK,
> + NFSPROC4_CLNT_CB_RECALL_ANY,
> };
>
> +#define RCA4_TYPE_MASK_RDATA_DLG 0
> +#define RCA4_TYPE_MASK_WDATA_DLG 1
> +
> /* Returns true iff a is later than b: */
> static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> {
> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
> index 547cf07cf4e0..0d39af1b00a0 100644
> --- a/fs/nfsd/xdr4cb.h
> +++ b/fs/nfsd/xdr4cb.h
> @@ -48,3 +48,9 @@
> #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
> cb_sequence_dec_sz + \
> op_dec_sz)
> +#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
> + cb_sequence_enc_sz + \
> + 1 + 1 + 1)
> +#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
> + cb_sequence_dec_sz + \
> + op_dec_sz)
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] NFSD: add support for sending CB_RECALL_ANY
2022-10-18 13:25 ` Tom Talpey
@ 2022-10-18 14:03 ` Chuck Lever III
2022-10-18 15:52 ` dai.ngo
2022-10-18 15:49 ` dai.ngo
1 sibling, 1 reply; 9+ messages in thread
From: Chuck Lever III @ 2022-10-18 14:03 UTC (permalink / raw)
To: Tom Talpey, Dai Ngo; +Cc: Linux NFS Mailing List
> On Oct 18, 2022, at 9:25 AM, Tom Talpey <tom@talpey.com> wrote:
>
> On 10/18/2022 1:15 AM, Dai Ngo wrote:
>> There is only one nfsd4_callback, cl_recall_any, added for each
>> nfs4_client. Access to it must be serialized. For now it's done
>> by the cl_recall_any_busy flag since it's used only by the
>> delegation shrinker. If there is another consumer of CB_RECALL_ANY
>> then a spinlock must be used.
>
> I'm curious if clients have shown any quirks with the operation in
> your testing. If the (Linux) server hasn't ever been sending it,
> then I'd expect some possible issues/quirks in the client.
Is Linux NFSD the first implementation of CB_RECALL_ANY? If other
servers already have this capability, I would expect clients to
work adequately.
Of course, if the community doesn't have any unit tests for
CB_RECALL_ANY... "what's not tested is broken" -- Brian Wong.
> For example, do they really start handing back a significant number
> of useful delegations? Enough to satisfy the server's need without
> going to specific resource-based recalls?
I don't think of CB_RECALL_ANY as heroic, it's more of a hint. So
if a client doesn't return anything, that's not really a problem --
NFSD is not relying on it, and it certainly does take some time
before server-side state resources are eventually released.
Another possible use case is for the server to start sending
CB_RECALL_ANY when a client hits the max per-client delegation
limit.
--
Chuck Lever
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] NFSD: add support for sending CB_RECALL_ANY
2022-10-18 13:25 ` Tom Talpey
2022-10-18 14:03 ` Chuck Lever III
@ 2022-10-18 15:49 ` dai.ngo
1 sibling, 0 replies; 9+ messages in thread
From: dai.ngo @ 2022-10-18 15:49 UTC (permalink / raw)
To: Tom Talpey, chuck.lever; +Cc: linux-nfs
On 10/18/22 6:25 AM, Tom Talpey wrote:
> On 10/18/2022 1:15 AM, Dai Ngo wrote:
>> There is only one nfsd4_callback, cl_recall_any, added for each
>> nfs4_client. Access to it must be serialized. For now it's done
>> by the cl_recall_any_busy flag since it's used only by the
>> delegation shrinker. If there is another consumer of CB_RECALL_ANY
>> then a spinlock must be used.
>
> I'm curious if clients have shown any quirks with the operation in
> your testing. If the (Linux) server hasn't ever been sending it,
> then I'd expect some possible issues/quirks in the client.
>
> For example, do they really start handing back a significant number
> of useful delegations? Enough to satisfy the server's need without
> going to specific resource-based recalls?
In my testing with Linux client that have active delegations, the client
just replies with NFS4_OK without returning any of the active delegations.
I guess to see the client returning the delegation, that delegation must
be not in used but so far I have not been able to force that condition:
not is use but have not returned it to their server yet.
-Dai
>
> Tom.
>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>> fs/nfsd/nfs4callback.c | 64
>> ++++++++++++++++++++++++++++++++++++++++++++++++++
>> fs/nfsd/nfs4state.c | 27 +++++++++++++++++++++
>> fs/nfsd/state.h | 8 +++++++
>> fs/nfsd/xdr4cb.h | 6 +++++
>> 4 files changed, 105 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
>> index f0e69edf5f0f..03587e1397f4 100644
>> --- a/fs/nfsd/nfs4callback.c
>> +++ b/fs/nfsd/nfs4callback.c
>> @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct
>> xdr_stream *xdr,
>> }
>> /*
>> + * CB_RECALLANY4args
>> + *
>> + * struct CB_RECALLANY4args {
>> + * uint32_t craa_objects_to_keep;
>> + * bitmap4 craa_type_mask;
>> + * };
>> + */
>> +static void
>> +encode_cb_recallany4args(struct xdr_stream *xdr,
>> + struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
>> +{
>> + __be32 *p;
>> +
>> + encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
>> + p = xdr_reserve_space(xdr, 4);
>> + *p++ = xdr_zero; /* craa_objects_to_keep */
>> + p = xdr_reserve_space(xdr, 8);
>> + *p++ = cpu_to_be32(1);
>> + *p++ = cpu_to_be32(bmval);
>> + hdr->nops++;
>> +}
>> +
>> +/*
>> * CB_SEQUENCE4args
>> *
>> * struct CB_SEQUENCE4args {
>> @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct
>> rpc_rqst *req, struct xdr_stream *xdr,
>> encode_cb_nops(&hdr);
>> }
>> +/*
>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>> + */
>> +static void
>> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
>> + struct xdr_stream *xdr, const void *data)
>> +{
>> + const struct nfsd4_callback *cb = data;
>> + struct nfs4_cb_compound_hdr hdr = {
>> + .ident = cb->cb_clp->cl_cb_ident,
>> + .minorversion = cb->cb_clp->cl_minorversion,
>> + };
>> +
>> + encode_cb_compound4args(xdr, &hdr);
>> + encode_cb_sequence4args(xdr, cb, &hdr);
>> + encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
>> + encode_cb_nops(&hdr);
>> +}
>> /*
>> * NFSv4.0 and NFSv4.1 XDR decode functions
>> @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct
>> rpc_rqst *rqstp,
>> return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
>> }
>> +/*
>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>> + */
>> +static int
>> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
>> + struct xdr_stream *xdr,
>> + void *data)
>> +{
>> + struct nfsd4_callback *cb = data;
>> + struct nfs4_cb_compound_hdr hdr;
>> + int status;
>> +
>> + status = decode_cb_compound4res(xdr, &hdr);
>> + if (unlikely(status))
>> + return status;
>> + status = decode_cb_sequence4res(xdr, cb);
>> + if (unlikely(status || cb->cb_seq_status))
>> + return status;
>> + status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY,
>> &cb->cb_status);
>> + return status;
>> +}
>> +
>> #ifdef CONFIG_NFSD_PNFS
>> /*
>> * CB_LAYOUTRECALL4args
>> @@ -783,6 +846,7 @@ static const struct rpc_procinfo
>> nfs4_cb_procedures[] = {
>> #endif
>> PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock,
>> cb_notify_lock),
>> PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
>> + PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
>> };
>> static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 4e718500a00c..c60c937dece6 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
>> [3] = {""},
>> };
>> +static int
>> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> + struct rpc_task *task)
>> +{
>> + switch (task->tk_status) {
>> + case -NFS4ERR_DELAY:
>> + rpc_delay(task, 2 * HZ);
>> + return 0;
>> + default:
>> + return 1;
>> + }
>> +}
>> +
>> +static void
>> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>> +{
>> + cb->cb_clp->cl_recall_any_busy = false;
>> + atomic_dec(&cb->cb_clp->cl_rpc_users);
>> +}
>> +
>> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>> + .done = nfsd4_cb_recall_any_done,
>> + .release = nfsd4_cb_recall_any_release,
>> +};
>> +
>> static struct nfs4_client *create_client(struct xdr_netobj name,
>> struct svc_rqst *rqstp, nfs4_verifier *verf)
>> {
>> @@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct
>> xdr_netobj name,
>> free_client(clp);
>> return NULL;
>> }
>> + nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
>> + NFSPROC4_CLNT_CB_RECALL_ANY);
>> return clp;
>> }
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index e2daef3cc003..49ca06169642 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -411,6 +411,10 @@ struct nfs4_client {
>> unsigned int cl_state;
>> atomic_t cl_delegs_in_recall;
>> +
>> + bool cl_recall_any_busy;
>> + uint32_t cl_recall_any_bm;
>> + struct nfsd4_callback cl_recall_any;
>> };
>> /* struct nfs4_client_reset
>> @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
>> NFSPROC4_CLNT_CB_OFFLOAD,
>> NFSPROC4_CLNT_CB_SEQUENCE,
>> NFSPROC4_CLNT_CB_NOTIFY_LOCK,
>> + NFSPROC4_CLNT_CB_RECALL_ANY,
>> };
>> +#define RCA4_TYPE_MASK_RDATA_DLG 0
>> +#define RCA4_TYPE_MASK_WDATA_DLG 1
>> +
>> /* Returns true iff a is later than b: */
>> static inline bool nfsd4_stateid_generation_after(stateid_t *a,
>> stateid_t *b)
>> {
>> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
>> index 547cf07cf4e0..0d39af1b00a0 100644
>> --- a/fs/nfsd/xdr4cb.h
>> +++ b/fs/nfsd/xdr4cb.h
>> @@ -48,3 +48,9 @@
>> #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
>> cb_sequence_dec_sz + \
>> op_dec_sz)
>> +#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
>> + cb_sequence_enc_sz + \
>> + 1 + 1 + 1)
>> +#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
>> + cb_sequence_dec_sz + \
>> + op_dec_sz)
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] NFSD: add delegation shrinker to react to low memory condition
2022-10-18 13:07 ` Jeff Layton
@ 2022-10-18 15:51 ` dai.ngo
0 siblings, 0 replies; 9+ messages in thread
From: dai.ngo @ 2022-10-18 15:51 UTC (permalink / raw)
To: Jeff Layton, chuck.lever; +Cc: linux-nfs
On 10/18/22 6:07 AM, Jeff Layton wrote:
> On Mon, 2022-10-17 at 22:15 -0700, Dai Ngo wrote:
>> The delegation shrinker is scheduled to run on every shrinker's
>> 'count' callback. It scans the client list and sends the
>> courtesy CB_RECALL_ANY to the clients that hold delegations.
>>
>> To avoid flooding the clients with CB_RECALL_ANY requests, the
>> delegation shrinker is scheduled to run after a 5 seconds delay.
>>
> But...when the kernel needs memory, it generally needs it _now_, and 5s
> is a long time. It'd be better to not delay the initial sending of
> CB_RECALL_ANY callbacks this much.
yes, makes sense.
>
> Maybe the logic should be changed such that it runs no more frequently
> than once every 5s, but don't delay the initial sending of
> CB_RECALL_ANYs.
I'll make this adjustment.
Thanks,
-Dai
>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>> fs/nfsd/netns.h | 3 +++
>> fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> fs/nfsd/state.h | 1 +
>> 3 files changed, 73 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
>> index 8c854ba3285b..394a05fb46d8 100644
>> --- a/fs/nfsd/netns.h
>> +++ b/fs/nfsd/netns.h
>> @@ -196,6 +196,9 @@ struct nfsd_net {
>> atomic_t nfsd_courtesy_clients;
>> struct shrinker nfsd_client_shrinker;
>> struct delayed_work nfsd_shrinker_work;
>> +
>> + struct shrinker nfsd_deleg_shrinker;
>> + struct delayed_work nfsd_deleg_shrinker_work;
>> };
>>
>> /* Simple check to find out if a given net was properly initialized */
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index c60c937dece6..bdaea0e6e72f 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -4392,11 +4392,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
>> return SHRINK_STOP;
>> }
>>
>> +static unsigned long
>> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
>> +{
>> + unsigned long cnt;
>> + struct nfsd_net *nn = container_of(shrink,
>> + struct nfsd_net, nfsd_deleg_shrinker);
>> +
>> + cnt = atomic_long_read(&num_delegations);
>> + if (cnt)
>> + mod_delayed_work(laundry_wq,
>> + &nn->nfsd_deleg_shrinker_work, 5 * HZ);
>> + return cnt;
>> +}
>> +
> What's the rationale for doing all of this in the count callback? Why
> not just return the value here and leave scheduling to the scan
> callback?
>
>> +static unsigned long
>> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
>> +{
>> + return SHRINK_STOP;
>> +}
>> +
>> int
>> nfsd4_init_leases_net(struct nfsd_net *nn)
>> {
>> struct sysinfo si;
>> u64 max_clients;
>> + int retval;
>>
>> nn->nfsd4_lease = 90; /* default lease time */
>> nn->nfsd4_grace = 90;
>> @@ -4417,13 +4438,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
>> nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
>> nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
>> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
>> - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
>> + retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
>> + if (retval)
>> + return retval;
>> + nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
>> + nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
>> + nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
>> + retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
>> + if (retval)
>> + unregister_shrinker(&nn->nfsd_client_shrinker);
>> + return retval;
>> }
>>
>> void
>> nfsd4_leases_net_shutdown(struct nfsd_net *nn)
>> {
>> unregister_shrinker(&nn->nfsd_client_shrinker);
>> + unregister_shrinker(&nn->nfsd_deleg_shrinker);
>> }
>>
>> static void init_nfs4_replay(struct nfs4_replay *rp)
>> @@ -6162,6 +6193,42 @@ courtesy_client_reaper(struct work_struct *reaper)
>> nfs4_process_client_reaplist(&reaplist);
>> }
>>
>> +static void
>> +deleg_reaper(struct work_struct *deleg_work)
>> +{
>> + struct list_head *pos, *next;
>> + struct nfs4_client *clp;
>> + struct list_head cblist;
>> + struct delayed_work *dwork = to_delayed_work(deleg_work);
>> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
>> + nfsd_deleg_shrinker_work);
>> +
>> + INIT_LIST_HEAD(&cblist);
>> + spin_lock(&nn->client_lock);
>> + list_for_each_safe(pos, next, &nn->client_lru) {
>> + clp = list_entry(pos, struct nfs4_client, cl_lru);
>> + if (clp->cl_state != NFSD4_ACTIVE ||
>> + list_empty(&clp->cl_delegations) ||
>> + atomic_read(&clp->cl_delegs_in_recall) ||
>> + clp->cl_recall_any_busy) {
>> + continue;
>> + }
>> + clp->cl_recall_any_busy = true;
>> + list_add(&clp->cl_recall_any_cblist, &cblist);
>> +
>> + /* release in nfsd4_cb_recall_any_release */
>> + atomic_inc(&clp->cl_rpc_users);
>> + }
>> + spin_unlock(&nn->client_lock);
>> + list_for_each_safe(pos, next, &cblist) {
>> + clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
>> + list_del_init(&clp->cl_recall_any_cblist);
>> + clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> + BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> + nfsd4_run_cb(&clp->cl_recall_any);
>> + }
>> +}
>> +
>> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
>> {
>> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
>> @@ -7985,6 +8052,7 @@ static int nfs4_state_create_net(struct net *net)
>>
>> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
>> INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
>> + INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
>> get_net(net);
>>
>> return 0;
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index 49ca06169642..05b572ce6605 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -415,6 +415,7 @@ struct nfs4_client {
>> bool cl_recall_any_busy;
>> uint32_t cl_recall_any_bm;
>> struct nfsd4_callback cl_recall_any;
>> + struct list_head cl_recall_any_cblist;
>> };
>>
>> /* struct nfs4_client_reset
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] NFSD: add support for sending CB_RECALL_ANY
2022-10-18 14:03 ` Chuck Lever III
@ 2022-10-18 15:52 ` dai.ngo
0 siblings, 0 replies; 9+ messages in thread
From: dai.ngo @ 2022-10-18 15:52 UTC (permalink / raw)
To: Chuck Lever III, Tom Talpey; +Cc: Linux NFS Mailing List
On 10/18/22 7:03 AM, Chuck Lever III wrote:
>
>> On Oct 18, 2022, at 9:25 AM, Tom Talpey <tom@talpey.com> wrote:
>>
>> On 10/18/2022 1:15 AM, Dai Ngo wrote:
>>> There is only one nfsd4_callback, cl_recall_any, added for each
>>> nfs4_client. Access to it must be serialized. For now it's done
>>> by the cl_recall_any_busy flag since it's used only by the
>>> delegation shrinker. If there is another consumer of CB_RECALL_ANY
>>> then a spinlock must be used.
>> I'm curious if clients have shown any quirks with the operation in
>> your testing. If the (Linux) server hasn't ever been sending it,
>> then I'd expect some possible issues/quirks in the client.
> Is Linux NFSD the first implementation of CB_RECALL_ANY? If other
> servers already have this capability, I would expect clients to
> work adequately.
>
> Of course, if the community doesn't have any unit tests for
> CB_RECALL_ANY... "what's not tested is broken" -- Brian Wong.
>
>
>> For example, do they really start handing back a significant number
>> of useful delegations? Enough to satisfy the server's need without
>> going to specific resource-based recalls?
> I don't think of CB_RECALL_ANY as heroic, it's more of a hint. So
> if a client doesn't return anything, that's not really a problem --
> NFSD is not relying on it, and it certainly does take some time
> before server-side state resources are eventually released.
>
> Another possible use case is for the server to start sending
> CB_RECALL_ANY when a client hits the max per-client delegation
> limit.
Yes, this is what I plan to do once we get the mechanism works
correctly.
-Dai
>
>
> --
> Chuck Lever
>
>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-10-18 15:53 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-10-18 5:15 [PATCH 0/2] add support for CB_RECALL_ANY and the delegation shrinker Dai Ngo
2022-10-18 5:15 ` [PATCH 1/2] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
2022-10-18 13:25 ` Tom Talpey
2022-10-18 14:03 ` Chuck Lever III
2022-10-18 15:52 ` dai.ngo
2022-10-18 15:49 ` dai.ngo
2022-10-18 5:15 ` [PATCH 2/2] NFSD: add delegation shrinker to react to low memory condition Dai Ngo
2022-10-18 13:07 ` Jeff Layton
2022-10-18 15:51 ` dai.ngo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).