public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/3] add support for CB_RECALL_ANY and the delegation shrinker
@ 2022-11-10  4:17 Dai Ngo
  2022-11-10  4:17 ` [PATCH v4 1/3] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Dai Ngo @ 2022-11-10  4:17 UTC (permalink / raw)
  To: chuck.lever, jlayton; +Cc: linux-nfs

This patch series adds:

    . XDR encode and decode function for CB_RECALL_ANY op.

    . the delegation shrinker that sends the advisory CB_RECALL_ANY 
      to the clients to release unused delegations.
      There is only one nfsd4_callback added for each nfs4_cleint.
      Access to it must be serialized via the client flag
      NFSD4_CLIENT_CB_RECALL_ANY.

v2:
    . modify deleg_reaper to check and send CB_RECALL_ANY to client
      only once per 5 secs.
v3:
    . modify nfsd4_cb_recall_any_release to use nn->client_lock to
      protect cl_recall_any_busy and call put_client_renew_locked
      to decrement cl_rpc_users.

v4:
    . move changes in nfs4state.c from patch (1/2) to patch(2/2).
    . use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
      to encode CB_RECALL_ANY arguments.
    . add struct nfsd4_cb_recall_any with embedded nfs4_callback
      and params for CB_RECALL_ANY op.
    . replace cl_recall_any_busy boolean with client flag
      NFSD4_CLIENT_CB_RECALL_ANY 
    . add tracepoints for CB_RECALL_ANY
---

Dai Ngo (3):
     NFSD: add support for sending CB_RECALL_ANY
     NFSD: add delegation shrinker to react to low memory condition
     NFSD: add CB_RECALL_ANY tracepoints

 fs/nfsd/netns.h        |   3 ++
 fs/nfsd/nfs4callback.c |  62 +++++++++++++++++++++++
 fs/nfsd/nfs4state.c    | 117 +++++++++++++++++++++++++++++++++++++++++++-
 fs/nfsd/state.h        |   9 ++++
 fs/nfsd/trace.h        |  53 ++++++++++++++++++++
 fs/nfsd/xdr4.h         |   5 ++
 fs/nfsd/xdr4cb.h       |   6 +++
 7 files changed, 254 insertions(+), 1 deletion(-)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v4 1/3] NFSD: add support for sending CB_RECALL_ANY
  2022-11-10  4:17 [PATCH v4 0/3] add support for CB_RECALL_ANY and the delegation shrinker Dai Ngo
@ 2022-11-10  4:17 ` Dai Ngo
  2022-11-11 15:25   ` Chuck Lever III
  2022-11-10  4:17 ` [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition Dai Ngo
  2022-11-10  4:17 ` [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints Dai Ngo
  2 siblings, 1 reply; 12+ messages in thread
From: Dai Ngo @ 2022-11-10  4:17 UTC (permalink / raw)
  To: chuck.lever, jlayton; +Cc: linux-nfs

Add XDR encode and decode function for CB_RECALL_ANY.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4callback.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/state.h        |  1 +
 fs/nfsd/xdr4.h         |  5 ++++
 fs/nfsd/xdr4cb.h       |  6 +++++
 4 files changed, 74 insertions(+)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f0e69edf5f0f..01226e5233cd 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -329,6 +329,25 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
 }
 
 /*
+ * CB_RECALLANY4args
+ *
+ *	struct CB_RECALLANY4args {
+ *		uint32_t	craa_objects_to_keep;
+ *		bitmap4		craa_type_mask;
+ *	};
+ */
+static void
+encode_cb_recallany4args(struct xdr_stream *xdr,
+	struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
+{
+	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
+	WARN_ON_ONCE(xdr_stream_encode_u32(xdr, ra->ra_keep) < 0);
+	WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr,
+		ra->ra_bmval, sizeof(ra->ra_bmval) / sizeof(u32)) < 0);
+	hdr->nops++;
+}
+
+/*
  * CB_SEQUENCE4args
  *
  *	struct CB_SEQUENCE4args {
@@ -482,6 +501,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_cb_nops(&hdr);
 }
 
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static void
+nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
+		struct xdr_stream *xdr, const void *data)
+{
+	const struct nfsd4_callback *cb = data;
+	struct nfsd4_cb_recall_any *ra;
+	struct nfs4_cb_compound_hdr hdr = {
+		.ident = cb->cb_clp->cl_cb_ident,
+		.minorversion = cb->cb_clp->cl_minorversion,
+	};
+
+	ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
+	encode_cb_compound4args(xdr, &hdr);
+	encode_cb_sequence4args(xdr, cb, &hdr);
+	encode_cb_recallany4args(xdr, &hdr, ra);
+	encode_cb_nops(&hdr);
+}
 
 /*
  * NFSv4.0 and NFSv4.1 XDR decode functions
@@ -520,6 +559,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
 	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
 }
 
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static int
+nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
+				  struct xdr_stream *xdr,
+				  void *data)
+{
+	struct nfsd4_callback *cb = data;
+	struct nfs4_cb_compound_hdr hdr;
+	int status;
+
+	status = decode_cb_compound4res(xdr, &hdr);
+	if (unlikely(status))
+		return status;
+	status = decode_cb_sequence4res(xdr, cb);
+	if (unlikely(status || cb->cb_seq_status))
+		return status;
+	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
+	return status;
+}
+
 #ifdef CONFIG_NFSD_PNFS
 /*
  * CB_LAYOUTRECALL4args
@@ -783,6 +844,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
 #endif
 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
 	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
+	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
 };
 
 static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e2daef3cc003..6b33cbbe76d3 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -639,6 +639,7 @@ enum nfsd4_cb_op {
 	NFSPROC4_CLNT_CB_OFFLOAD,
 	NFSPROC4_CLNT_CB_SEQUENCE,
 	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
+	NFSPROC4_CLNT_CB_RECALL_ANY,
 };
 
 /* Returns true iff a is later than b: */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 0eb00105d845..4fd2cf6d1d2d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -896,5 +896,10 @@ struct nfsd4_operation {
 			union nfsd4_op_u *);
 };
 
+struct nfsd4_cb_recall_any {
+	struct nfsd4_callback	ra_cb;
+	u32			ra_keep;
+	u32			ra_bmval[1];
+};
 
 #endif
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 547cf07cf4e0..0d39af1b00a0 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -48,3 +48,9 @@
 #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
 					cb_sequence_dec_sz +            \
 					op_dec_sz)
+#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
+					cb_sequence_enc_sz +            \
+					1 + 1 + 1)
+#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
+					cb_sequence_dec_sz +            \
+					op_dec_sz)
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition
  2022-11-10  4:17 [PATCH v4 0/3] add support for CB_RECALL_ANY and the delegation shrinker Dai Ngo
  2022-11-10  4:17 ` [PATCH v4 1/3] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
@ 2022-11-10  4:17 ` Dai Ngo
  2022-11-11 15:25   ` Chuck Lever III
  2022-11-10  4:17 ` [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints Dai Ngo
  2 siblings, 1 reply; 12+ messages in thread
From: Dai Ngo @ 2022-11-10  4:17 UTC (permalink / raw)
  To: chuck.lever, jlayton; +Cc: linux-nfs

The delegation shrinker is scheduled to run on every shrinker's
'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.

To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation shrinker is scheduled to run after a 5 seconds delay.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/netns.h     |   3 ++
 fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfsd/state.h     |   8 ++++
 3 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..394a05fb46d8 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -196,6 +196,9 @@ struct nfsd_net {
 	atomic_t		nfsd_courtesy_clients;
 	struct shrinker		nfsd_client_shrinker;
 	struct delayed_work	nfsd_shrinker_work;
+
+	struct shrinker		nfsd_deleg_shrinker;
+	struct delayed_work	nfsd_deleg_shrinker_work;
 };
 
 /* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4e718500a00c..813cdb67b370 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
 	kfree(clp->cl_nii_domain.data);
 	kfree(clp->cl_nii_name.data);
 	idr_destroy(&clp->cl_stateids);
+	kfree(clp->cl_ra);
 	kmem_cache_free(client_slab, clp);
 }
 
@@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
 	[3] = {""},
 };
 
+static int
+nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
+				struct rpc_task *task)
+{
+	switch (task->tk_status) {
+	case -NFS4ERR_DELAY:
+		rpc_delay(task, 2 * HZ);
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static void
+nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+{
+	struct nfs4_client *clp = cb->cb_clp;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	spin_lock(&nn->client_lock);
+	clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+	put_client_renew_locked(clp);
+	spin_unlock(&nn->client_lock);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+	.done		= nfsd4_cb_recall_any_done,
+	.release	= nfsd4_cb_recall_any_release,
+};
+
 static struct nfs4_client *create_client(struct xdr_netobj name,
 		struct svc_rqst *rqstp, nfs4_verifier *verf)
 {
@@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
 		free_client(clp);
 		return NULL;
 	}
+	clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
+	if (!clp->cl_ra) {
+		free_client(clp);
+		return NULL;
+	}
+	clp->cl_ra_time = 0;
+	nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
+			NFSPROC4_CLNT_CB_RECALL_ANY);
 	return clp;
 }
 
@@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
 	return SHRINK_STOP;
 }
 
+static unsigned long
+nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+	unsigned long cnt;
+	struct nfsd_net *nn = container_of(shrink,
+				struct nfsd_net, nfsd_deleg_shrinker);
+
+	cnt = atomic_long_read(&num_delegations);
+	if (cnt)
+		mod_delayed_work(laundry_wq,
+			&nn->nfsd_deleg_shrinker_work, 0);
+	return cnt;
+}
+
+static unsigned long
+nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+	return SHRINK_STOP;
+}
+
 int
 nfsd4_init_leases_net(struct nfsd_net *nn)
 {
 	struct sysinfo si;
 	u64 max_clients;
+	int retval;
 
 	nn->nfsd4_lease = 90;	/* default lease time */
 	nn->nfsd4_grace = 90;
@@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
 	nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
 	nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
 	nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
-	return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+	retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+	if (retval)
+		return retval;
+	nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
+	nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
+	nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
+	retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
+	if (retval)
+		unregister_shrinker(&nn->nfsd_client_shrinker);
+	return retval;
+
 }
 
 void
 nfsd4_leases_net_shutdown(struct nfsd_net *nn)
 {
 	unregister_shrinker(&nn->nfsd_client_shrinker);
+	unregister_shrinker(&nn->nfsd_deleg_shrinker);
 }
 
 static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
 	nfs4_process_client_reaplist(&reaplist);
 }
 
+static void
+deleg_reaper(struct work_struct *deleg_work)
+{
+	struct list_head *pos, *next;
+	struct nfs4_client *clp;
+	struct list_head cblist;
+	struct delayed_work *dwork = to_delayed_work(deleg_work);
+	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+					nfsd_deleg_shrinker_work);
+
+	INIT_LIST_HEAD(&cblist);
+	spin_lock(&nn->client_lock);
+	list_for_each_safe(pos, next, &nn->client_lru) {
+		clp = list_entry(pos, struct nfs4_client, cl_lru);
+		if (clp->cl_state != NFSD4_ACTIVE ||
+			list_empty(&clp->cl_delegations) ||
+			atomic_read(&clp->cl_delegs_in_recall) ||
+			test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
+			(ktime_get_boottime_seconds() -
+				clp->cl_ra_time < 5)) {
+			continue;
+		}
+		list_add(&clp->cl_ra_cblist, &cblist);
+
+		/* release in nfsd4_cb_recall_any_release */
+		atomic_inc(&clp->cl_rpc_users);
+		set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+		clp->cl_ra_time = ktime_get_boottime_seconds();
+	}
+	spin_unlock(&nn->client_lock);
+	list_for_each_safe(pos, next, &cblist) {
+		clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
+		list_del_init(&clp->cl_ra_cblist);
+		clp->cl_ra->ra_keep = 0;
+		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+						BIT(RCA4_TYPE_MASK_WDATA_DLG);
+		nfsd4_run_cb(&clp->cl_ra->ra_cb);
+	}
+
+}
+
 static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
 {
 	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)
 
 	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
 	INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+	INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
 	get_net(net);
 
 	return 0;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 6b33cbbe76d3..12ce9792c5b6 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -368,6 +368,7 @@ struct nfs4_client {
 #define NFSD4_CLIENT_UPCALL_LOCK	(5)	/* upcall serialization */
 #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
 					 1 << NFSD4_CLIENT_CB_KILL)
+#define	NFSD4_CLIENT_CB_RECALL_ANY	(6)
 	unsigned long		cl_flags;
 	const struct cred	*cl_cb_cred;
 	struct rpc_clnt		*cl_cb_client;
@@ -411,6 +412,10 @@ struct nfs4_client {
 
 	unsigned int		cl_state;
 	atomic_t		cl_delegs_in_recall;
+
+	struct nfsd4_cb_recall_any	*cl_ra;
+	time64_t		cl_ra_time;
+	struct list_head	cl_ra_cblist;
 };
 
 /* struct nfs4_client_reset
@@ -642,6 +647,9 @@ enum nfsd4_cb_op {
 	NFSPROC4_CLNT_CB_RECALL_ANY,
 };
 
+#define	RCA4_TYPE_MASK_RDATA_DLG	0
+#define	RCA4_TYPE_MASK_WDATA_DLG	1
+
 /* Returns true iff a is later than b: */
 static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
 {
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints
  2022-11-10  4:17 [PATCH v4 0/3] add support for CB_RECALL_ANY and the delegation shrinker Dai Ngo
  2022-11-10  4:17 ` [PATCH v4 1/3] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
  2022-11-10  4:17 ` [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition Dai Ngo
@ 2022-11-10  4:17 ` Dai Ngo
  2022-11-11 15:25   ` Chuck Lever III
  2 siblings, 1 reply; 12+ messages in thread
From: Dai Ngo @ 2022-11-10  4:17 UTC (permalink / raw)
  To: chuck.lever, jlayton; +Cc: linux-nfs

Add tracepoints to trace start and end of CB_RECALL_ANY operation.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4state.c |  2 ++
 fs/nfsd/trace.h     | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 813cdb67b370..eac7212c9218 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2859,6 +2859,7 @@ static int
 nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
 				struct rpc_task *task)
 {
+	trace_nfsd_cb_recall_any_done(cb, task);
 	switch (task->tk_status) {
 	case -NFS4ERR_DELAY:
 		rpc_delay(task, 2 * HZ);
@@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
 		clp->cl_ra->ra_keep = 0;
 		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
 						BIT(RCA4_TYPE_MASK_WDATA_DLG);
+		trace_nfsd_cb_recall_any(clp->cl_ra);
 		nfsd4_run_cb(&clp->cl_ra->ra_cb);
 	}
 
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 06a96e955bd0..efc69c96bcbd 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,9 +9,11 @@
 #define _NFSD_TRACE_H
 
 #include <linux/tracepoint.h>
+#include <linux/sunrpc/xprt.h>
 
 #include "export.h"
 #include "nfsfh.h"
+#include "xdr4.h"
 
 #define NFSD_TRACE_PROC_RES_FIELDS \
 		__field(unsigned int, netns_ino) \
@@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
 DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
 DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
 
+TRACE_EVENT(nfsd_cb_recall_any,
+	TP_PROTO(
+		const struct nfsd4_cb_recall_any *ra
+	),
+	TP_ARGS(ra),
+	TP_STRUCT__entry(
+		__field(u32, cl_boot)
+		__field(u32, cl_id)
+		__field(u32, ra_keep)
+		__field(u32, ra_bmval)
+		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+	),
+	TP_fast_assign(
+		__entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
+		__entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
+		__entry->ra_keep = ra->ra_keep;
+		__entry->ra_bmval = ra->ra_bmval[0];
+		memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
+			sizeof(struct sockaddr_in6));
+	),
+	TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
+		__entry->cl_boot, __entry->cl_id,
+		__entry->addr, __entry->ra_keep, __entry->ra_bmval
+	)
+);
+
+TRACE_EVENT(nfsd_cb_recall_any_done,
+	TP_PROTO(
+		const struct nfsd4_callback *cb,
+		const struct rpc_task *task
+	),
+	TP_ARGS(cb, task),
+	TP_STRUCT__entry(
+		__field(u32, cl_boot)
+		__field(u32, cl_id)
+		__field(int, status)
+		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+	),
+	TP_fast_assign(
+		__entry->status = task->tk_status;
+		__entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
+		__entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
+		memcpy(__entry->addr, &cb->cb_clp->cl_addr,
+			sizeof(struct sockaddr_in6));
+	),
+	TP_printk("client %08x:%08x addr=%pISpc status=%d",
+		__entry->cl_boot, __entry->cl_id,
+		__entry->addr, __entry->status
+	)
+);
+
 #endif /* _NFSD_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 1/3] NFSD: add support for sending CB_RECALL_ANY
  2022-11-10  4:17 ` [PATCH v4 1/3] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
@ 2022-11-11 15:25   ` Chuck Lever III
  0 siblings, 0 replies; 12+ messages in thread
From: Chuck Lever III @ 2022-11-11 15:25 UTC (permalink / raw)
  To: Dai Ngo; +Cc: jlayton@kernel.org, Linux NFS Mailing List



> On Nov 9, 2022, at 11:17 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> Add XDR encode and decode function for CB_RECALL_ANY.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
> fs/nfsd/nfs4callback.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
> fs/nfsd/state.h        |  1 +
> fs/nfsd/xdr4.h         |  5 ++++
> fs/nfsd/xdr4cb.h       |  6 +++++
> 4 files changed, 74 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> index f0e69edf5f0f..01226e5233cd 100644
> --- a/fs/nfsd/nfs4callback.c
> +++ b/fs/nfsd/nfs4callback.c
> @@ -329,6 +329,25 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
> }
> 
> /*
> + * CB_RECALLANY4args
> + *
> + *	struct CB_RECALLANY4args {
> + *		uint32_t	craa_objects_to_keep;
> + *		bitmap4		craa_type_mask;
> + *	};
> + */
> +static void
> +encode_cb_recallany4args(struct xdr_stream *xdr,
> +	struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
> +{
> +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
> +	WARN_ON_ONCE(xdr_stream_encode_u32(xdr, ra->ra_keep) < 0);
> +	WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr,
> +		ra->ra_bmval, sizeof(ra->ra_bmval) / sizeof(u32)) < 0);

My only nit: You should use ARRAY_SIZE() here.


> +	hdr->nops++;
> +}
> +
> +/*
>  * CB_SEQUENCE4args
>  *
>  *	struct CB_SEQUENCE4args {
> @@ -482,6 +501,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
> 	encode_cb_nops(&hdr);
> }
> 
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static void
> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
> +		struct xdr_stream *xdr, const void *data)
> +{
> +	const struct nfsd4_callback *cb = data;
> +	struct nfsd4_cb_recall_any *ra;
> +	struct nfs4_cb_compound_hdr hdr = {
> +		.ident = cb->cb_clp->cl_cb_ident,
> +		.minorversion = cb->cb_clp->cl_minorversion,
> +	};
> +
> +	ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
> +	encode_cb_compound4args(xdr, &hdr);
> +	encode_cb_sequence4args(xdr, cb, &hdr);
> +	encode_cb_recallany4args(xdr, &hdr, ra);
> +	encode_cb_nops(&hdr);
> +}
> 
> /*
>  * NFSv4.0 and NFSv4.1 XDR decode functions
> @@ -520,6 +559,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
> 	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
> }
> 
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static int
> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
> +				  struct xdr_stream *xdr,
> +				  void *data)
> +{
> +	struct nfsd4_callback *cb = data;
> +	struct nfs4_cb_compound_hdr hdr;
> +	int status;
> +
> +	status = decode_cb_compound4res(xdr, &hdr);
> +	if (unlikely(status))
> +		return status;
> +	status = decode_cb_sequence4res(xdr, cb);
> +	if (unlikely(status || cb->cb_seq_status))
> +		return status;
> +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
> +	return status;
> +}
> +
> #ifdef CONFIG_NFSD_PNFS
> /*
>  * CB_LAYOUTRECALL4args
> @@ -783,6 +844,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
> #endif
> 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
> 	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
> +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
> };
> 
> static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index e2daef3cc003..6b33cbbe76d3 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -639,6 +639,7 @@ enum nfsd4_cb_op {
> 	NFSPROC4_CLNT_CB_OFFLOAD,
> 	NFSPROC4_CLNT_CB_SEQUENCE,
> 	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
> +	NFSPROC4_CLNT_CB_RECALL_ANY,
> };
> 
> /* Returns true iff a is later than b: */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 0eb00105d845..4fd2cf6d1d2d 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -896,5 +896,10 @@ struct nfsd4_operation {
> 			union nfsd4_op_u *);
> };
> 
> +struct nfsd4_cb_recall_any {
> +	struct nfsd4_callback	ra_cb;
> +	u32			ra_keep;
> +	u32			ra_bmval[1];
> +};
> 
> #endif
> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
> index 547cf07cf4e0..0d39af1b00a0 100644
> --- a/fs/nfsd/xdr4cb.h
> +++ b/fs/nfsd/xdr4cb.h
> @@ -48,3 +48,9 @@
> #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
> 					cb_sequence_dec_sz +            \
> 					op_dec_sz)
> +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
> +					cb_sequence_enc_sz +            \
> +					1 + 1 + 1)
> +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
> +					cb_sequence_dec_sz +            \
> +					op_dec_sz)
> -- 
> 2.9.5
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition
  2022-11-10  4:17 ` [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition Dai Ngo
@ 2022-11-11 15:25   ` Chuck Lever III
  2022-11-11 15:58     ` Jeff Layton
  0 siblings, 1 reply; 12+ messages in thread
From: Chuck Lever III @ 2022-11-11 15:25 UTC (permalink / raw)
  To: Dai Ngo; +Cc: jlayton@kernel.org, Linux NFS Mailing List



> On Nov 9, 2022, at 11:17 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> The delegation shrinker is scheduled to run on every shrinker's
> 'count' callback. It scans the client list and sends the
> courtesy CB_RECALL_ANY to the clients that hold delegations.
> 
> To avoid flooding the clients with CB_RECALL_ANY requests, the
> delegation shrinker is scheduled to run after a 5 seconds delay.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
> fs/nfsd/netns.h     |   3 ++
> fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/state.h     |   8 ++++
> 3 files changed, 125 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 8c854ba3285b..394a05fb46d8 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -196,6 +196,9 @@ struct nfsd_net {
> 	atomic_t		nfsd_courtesy_clients;
> 	struct shrinker		nfsd_client_shrinker;
> 	struct delayed_work	nfsd_shrinker_work;
> +
> +	struct shrinker		nfsd_deleg_shrinker;
> +	struct delayed_work	nfsd_deleg_shrinker_work;
> };
> 
> /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 4e718500a00c..813cdb67b370 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
> 	kfree(clp->cl_nii_domain.data);
> 	kfree(clp->cl_nii_name.data);
> 	idr_destroy(&clp->cl_stateids);
> +	kfree(clp->cl_ra);
> 	kmem_cache_free(client_slab, clp);
> }
> 
> @@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
> 	[3] = {""},
> };
> 
> +static int
> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> +				struct rpc_task *task)
> +{
> +	switch (task->tk_status) {
> +	case -NFS4ERR_DELAY:
> +		rpc_delay(task, 2 * HZ);
> +		return 0;
> +	default:
> +		return 1;
> +	}
> +}
> +
> +static void
> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> +{
> +	struct nfs4_client *clp = cb->cb_clp;
> +	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> +
> +	spin_lock(&nn->client_lock);
> +	clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> +	put_client_renew_locked(clp);
> +	spin_unlock(&nn->client_lock);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> +	.done		= nfsd4_cb_recall_any_done,
> +	.release	= nfsd4_cb_recall_any_release,
> +};
> +
> static struct nfs4_client *create_client(struct xdr_netobj name,
> 		struct svc_rqst *rqstp, nfs4_verifier *verf)
> {
> @@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> 		free_client(clp);
> 		return NULL;
> 	}
> +	clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
> +	if (!clp->cl_ra) {
> +		free_client(clp);
> +		return NULL;
> +	}
> +	clp->cl_ra_time = 0;
> +	nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
> +			NFSPROC4_CLNT_CB_RECALL_ANY);
> 	return clp;
> }
> 
> @@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> 	return SHRINK_STOP;
> }
> 
> +static unsigned long
> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> +	unsigned long cnt;

No reason not to spell out "count".

> +	struct nfsd_net *nn = container_of(shrink,
> +				struct nfsd_net, nfsd_deleg_shrinker);
> +
> +	cnt = atomic_long_read(&num_delegations);
> +	if (cnt)
> +		mod_delayed_work(laundry_wq,
> +			&nn->nfsd_deleg_shrinker_work, 0);
> +	return cnt;
> +}
> +
> +static unsigned long
> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> +	return SHRINK_STOP;
> +}
> +
> int
> nfsd4_init_leases_net(struct nfsd_net *nn)
> {
> 	struct sysinfo si;
> 	u64 max_clients;
> +	int retval;
> 
> 	nn->nfsd4_lease = 90;	/* default lease time */
> 	nn->nfsd4_grace = 90;
> @@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> 	nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> 	nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> 	nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> -	return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> +	retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> +	if (retval)
> +		return retval;
> +	nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> +	nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> +	nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> +	retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> +	if (retval)
> +		unregister_shrinker(&nn->nfsd_client_shrinker);
> +	return retval;
> +
> }
> 
> void
> nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> {
> 	unregister_shrinker(&nn->nfsd_client_shrinker);
> +	unregister_shrinker(&nn->nfsd_deleg_shrinker);
> }
> 
> static void init_nfs4_replay(struct nfs4_replay *rp)
> @@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
> 	nfs4_process_client_reaplist(&reaplist);
> }
> 
> +static void
> +deleg_reaper(struct work_struct *deleg_work)
> +{
> +	struct list_head *pos, *next;
> +	struct nfs4_client *clp;
> +	struct list_head cblist;
> +	struct delayed_work *dwork = to_delayed_work(deleg_work);
> +	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> +					nfsd_deleg_shrinker_work);
> +
> +	INIT_LIST_HEAD(&cblist);
> +	spin_lock(&nn->client_lock);
> +	list_for_each_safe(pos, next, &nn->client_lru) {
> +		clp = list_entry(pos, struct nfs4_client, cl_lru);
> +		if (clp->cl_state != NFSD4_ACTIVE ||
> +			list_empty(&clp->cl_delegations) ||
> +			atomic_read(&clp->cl_delegs_in_recall) ||
> +			test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
> +			(ktime_get_boottime_seconds() -
> +				clp->cl_ra_time < 5)) {
> +			continue;
> +		}
> +		list_add(&clp->cl_ra_cblist, &cblist);
> +
> +		/* release in nfsd4_cb_recall_any_release */
> +		atomic_inc(&clp->cl_rpc_users);
> +		set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> +		clp->cl_ra_time = ktime_get_boottime_seconds();
> +	}
> +	spin_unlock(&nn->client_lock);
> +	list_for_each_safe(pos, next, &cblist) {

The usual idiom for draining a list like this is

	while (!list_empty(&cblist)) {
		clp = list_first_entry( ... );


> +		clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
> +		list_del_init(&clp->cl_ra_cblist);
> +		clp->cl_ra->ra_keep = 0;
> +		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> +						BIT(RCA4_TYPE_MASK_WDATA_DLG);
> +		nfsd4_run_cb(&clp->cl_ra->ra_cb);
> +	}
> +
> +}
> +
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> {
> 	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> @@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)
> 
> 	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> 	INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> +	INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> 	get_net(net);
> 
> 	return 0;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 6b33cbbe76d3..12ce9792c5b6 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -368,6 +368,7 @@ struct nfs4_client {
> #define NFSD4_CLIENT_UPCALL_LOCK	(5)	/* upcall serialization */
> #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
> 					 1 << NFSD4_CLIENT_CB_KILL)
> +#define	NFSD4_CLIENT_CB_RECALL_ANY	(6)
> 	unsigned long		cl_flags;
> 	const struct cred	*cl_cb_cred;
> 	struct rpc_clnt		*cl_cb_client;
> @@ -411,6 +412,10 @@ struct nfs4_client {
> 
> 	unsigned int		cl_state;
> 	atomic_t		cl_delegs_in_recall;
> +
> +	struct nfsd4_cb_recall_any	*cl_ra;
> +	time64_t		cl_ra_time;
> +	struct list_head	cl_ra_cblist;
> };
> 
> /* struct nfs4_client_reset
> @@ -642,6 +647,9 @@ enum nfsd4_cb_op {
> 	NFSPROC4_CLNT_CB_RECALL_ANY,
> };
> 
> +#define	RCA4_TYPE_MASK_RDATA_DLG	0
> +#define	RCA4_TYPE_MASK_WDATA_DLG	1
> +
> /* Returns true iff a is later than b: */
> static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> {
> -- 
> 2.9.5
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints
  2022-11-10  4:17 ` [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints Dai Ngo
@ 2022-11-11 15:25   ` Chuck Lever III
  2022-11-11 21:34     ` dai.ngo
  2022-11-15  5:08     ` dai.ngo
  0 siblings, 2 replies; 12+ messages in thread
From: Chuck Lever III @ 2022-11-11 15:25 UTC (permalink / raw)
  To: Dai Ngo; +Cc: jlayton@kernel.org, Linux NFS Mailing List



> On Nov 9, 2022, at 11:17 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
> fs/nfsd/nfs4state.c |  2 ++
> fs/nfsd/trace.h     | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 55 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 813cdb67b370..eac7212c9218 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2859,6 +2859,7 @@ static int
> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> 				struct rpc_task *task)
> {
> +	trace_nfsd_cb_recall_any_done(cb, task);
> 	switch (task->tk_status) {
> 	case -NFS4ERR_DELAY:
> 		rpc_delay(task, 2 * HZ);
> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
> 		clp->cl_ra->ra_keep = 0;
> 		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> 						BIT(RCA4_TYPE_MASK_WDATA_DLG);
> +		trace_nfsd_cb_recall_any(clp->cl_ra);
> 		nfsd4_run_cb(&clp->cl_ra->ra_cb);
> 	}
> 
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index 06a96e955bd0..efc69c96bcbd 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -9,9 +9,11 @@
> #define _NFSD_TRACE_H
> 
> #include <linux/tracepoint.h>
> +#include <linux/sunrpc/xprt.h>
> 
> #include "export.h"
> #include "nfsfh.h"
> +#include "xdr4.h"
> 
> #define NFSD_TRACE_PROC_RES_FIELDS \
> 		__field(unsigned int, netns_ino) \
> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
> 
> +TRACE_EVENT(nfsd_cb_recall_any,
> +	TP_PROTO(
> +		const struct nfsd4_cb_recall_any *ra
> +	),
> +	TP_ARGS(ra),
> +	TP_STRUCT__entry(
> +		__field(u32, cl_boot)
> +		__field(u32, cl_id)
> +		__field(u32, ra_keep)
> +		__field(u32, ra_bmval)
> +		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
> +	),
> +	TP_fast_assign(
> +		__entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
> +		__entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
> +		__entry->ra_keep = ra->ra_keep;
> +		__entry->ra_bmval = ra->ra_bmval[0];
> +		memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
> +			sizeof(struct sockaddr_in6));
> +	),
> +	TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
> +		__entry->cl_boot, __entry->cl_id,
> +		__entry->addr, __entry->ra_keep, __entry->ra_bmval
> +	)
> +);

This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"

And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.


> +
> +TRACE_EVENT(nfsd_cb_recall_any_done,
> +	TP_PROTO(
> +		const struct nfsd4_callback *cb,
> +		const struct rpc_task *task
> +	),
> +	TP_ARGS(cb, task),
> +	TP_STRUCT__entry(
> +		__field(u32, cl_boot)
> +		__field(u32, cl_id)
> +		__field(int, status)
> +		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
> +	),
> +	TP_fast_assign(
> +		__entry->status = task->tk_status;
> +		__entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
> +		__entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
> +		memcpy(__entry->addr, &cb->cb_clp->cl_addr,
> +			sizeof(struct sockaddr_in6));
> +	),
> +	TP_printk("client %08x:%08x addr=%pISpc status=%d",
> +		__entry->cl_boot, __entry->cl_id,
> +		__entry->addr, __entry->status
> +	)
> +);

I'd like you to change this to

DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);


> +
> #endif /* _NFSD_TRACE_H */
> 
> #undef TRACE_INCLUDE_PATH
> -- 
> 2.9.5
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition
  2022-11-11 15:25   ` Chuck Lever III
@ 2022-11-11 15:58     ` Jeff Layton
  0 siblings, 0 replies; 12+ messages in thread
From: Jeff Layton @ 2022-11-11 15:58 UTC (permalink / raw)
  To: Chuck Lever III, Dai Ngo; +Cc: Linux NFS Mailing List

On Fri, 2022-11-11 at 15:25 +0000, Chuck Lever III wrote:
> 
> > On Nov 9, 2022, at 11:17 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> > 
> > The delegation shrinker is scheduled to run on every shrinker's
> > 'count' callback. It scans the client list and sends the
> > courtesy CB_RECALL_ANY to the clients that hold delegations.
> > 
> > To avoid flooding the clients with CB_RECALL_ANY requests, the
> > delegation shrinker is scheduled to run after a 5 seconds delay.
> > 
> > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > ---
> > fs/nfsd/netns.h     |   3 ++
> > fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> > fs/nfsd/state.h     |   8 ++++
> > 3 files changed, 125 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > index 8c854ba3285b..394a05fb46d8 100644
> > --- a/fs/nfsd/netns.h
> > +++ b/fs/nfsd/netns.h
> > @@ -196,6 +196,9 @@ struct nfsd_net {
> > 	atomic_t		nfsd_courtesy_clients;
> > 	struct shrinker		nfsd_client_shrinker;
> > 	struct delayed_work	nfsd_shrinker_work;
> > +
> > +	struct shrinker		nfsd_deleg_shrinker;
> > +	struct delayed_work	nfsd_deleg_shrinker_work;
> > };
> > 
> > /* Simple check to find out if a given net was properly initialized */
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 4e718500a00c..813cdb67b370 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
> > 	kfree(clp->cl_nii_domain.data);
> > 	kfree(clp->cl_nii_name.data);
> > 	idr_destroy(&clp->cl_stateids);
> > +	kfree(clp->cl_ra);
> > 	kmem_cache_free(client_slab, clp);
> > }
> > 
> > @@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
> > 	[3] = {""},
> > };
> > 
> > +static int
> > +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> > +				struct rpc_task *task)
> > +{
> > +	switch (task->tk_status) {
> > +	case -NFS4ERR_DELAY:
> > +		rpc_delay(task, 2 * HZ);
> > +		return 0;
> > +	default:
> > +		return 1;
> > +	}
> > +}
> > +
> > +static void
> > +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > +{
> > +	struct nfs4_client *clp = cb->cb_clp;
> > +	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> > +
> > +	spin_lock(&nn->client_lock);
> > +	clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> > +	put_client_renew_locked(clp);
> > +	spin_unlock(&nn->client_lock);
> > +}
> > +
> > +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> > +	.done		= nfsd4_cb_recall_any_done,
> > +	.release	= nfsd4_cb_recall_any_release,
> > +};
> > +
> > static struct nfs4_client *create_client(struct xdr_netobj name,
> > 		struct svc_rqst *rqstp, nfs4_verifier *verf)
> > {
> > @@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> > 		free_client(clp);
> > 		return NULL;
> > 	}
> > +	clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
> > +	if (!clp->cl_ra) {
> > +		free_client(clp);
> > +		return NULL;
> > +	}
> > +	clp->cl_ra_time = 0;
> > +	nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
> > +			NFSPROC4_CLNT_CB_RECALL_ANY);
> > 	return clp;
> > }
> > 
> > @@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> > 	return SHRINK_STOP;
> > }
> > 
> > +static unsigned long
> > +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> > +{
> > +	unsigned long cnt;
> 
> No reason not to spell out "count".
> 
> > +	struct nfsd_net *nn = container_of(shrink,
> > +				struct nfsd_net, nfsd_deleg_shrinker);
> > +
> > +	cnt = atomic_long_read(&num_delegations);
> > +	if (cnt)
> > +		mod_delayed_work(laundry_wq,
> > +			&nn->nfsd_deleg_shrinker_work, 0);
> > +	return cnt;
> > +}
> > +
> > +static unsigned long
> > +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> > +{
> > +	return SHRINK_STOP;
> > +}
> > +
> > int
> > nfsd4_init_leases_net(struct nfsd_net *nn)
> > {
> > 	struct sysinfo si;
> > 	u64 max_clients;
> > +	int retval;
> > 
> > 	nn->nfsd4_lease = 90;	/* default lease time */
> > 	nn->nfsd4_grace = 90;
> > @@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> > 	nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> > 	nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> > 	nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> > -	return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> > +	retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> > +	if (retval)
> > +		return retval;
> > +	nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> > +	nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> > +	nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> > +	retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> > +	if (retval)
> > +		unregister_shrinker(&nn->nfsd_client_shrinker);
> > +	return retval;
> > +
> > }
> > 
> > void
> > nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> > {
> > 	unregister_shrinker(&nn->nfsd_client_shrinker);
> > +	unregister_shrinker(&nn->nfsd_deleg_shrinker);
> > }
> > 
> > static void init_nfs4_replay(struct nfs4_replay *rp)
> > @@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
> > 	nfs4_process_client_reaplist(&reaplist);
> > }
> > 
> > +static void
> > +deleg_reaper(struct work_struct *deleg_work)
> > +{
> > +	struct list_head *pos, *next;
> > +	struct nfs4_client *clp;
> > +	struct list_head cblist;
> > +	struct delayed_work *dwork = to_delayed_work(deleg_work);
> > +	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> > +					nfsd_deleg_shrinker_work);
> > +
> > +	INIT_LIST_HEAD(&cblist);
> > +	spin_lock(&nn->client_lock);
> > +	list_for_each_safe(pos, next, &nn->client_lru) {
> > +		clp = list_entry(pos, struct nfs4_client, cl_lru);
> > +		if (clp->cl_state != NFSD4_ACTIVE ||
> > +			list_empty(&clp->cl_delegations) ||
> > +			atomic_read(&clp->cl_delegs_in_recall) ||
> > +			test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
> > +			(ktime_get_boottime_seconds() -
> > +				clp->cl_ra_time < 5)) {
> > +			continue;
> > +		}
> > +		list_add(&clp->cl_ra_cblist, &cblist);
> > +
> > +		/* release in nfsd4_cb_recall_any_release */
> > +		atomic_inc(&clp->cl_rpc_users);
> > +		set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> > +		clp->cl_ra_time = ktime_get_boottime_seconds();
> > +	}
> > +	spin_unlock(&nn->client_lock);
> > +	list_for_each_safe(pos, next, &cblist) {
> 
> The usual idiom for draining a list like this is
> 
> 	while (!list_empty(&cblist)) {
> 		clp = list_first_entry( ... );
> 

Agreed. Note that it's also slightly more efficient to do it that way,
since you don't require a "next" pointer.

> 
> > +		clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
> > +		list_del_init(&clp->cl_ra_cblist);
> > +		clp->cl_ra->ra_keep = 0;
> > +		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> > +						BIT(RCA4_TYPE_MASK_WDATA_DLG);
> > +		nfsd4_run_cb(&clp->cl_ra->ra_cb);
> > +	}
> > +
> > +}
> > +
> > static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> > {
> > 	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> > @@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)
> > 
> > 	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> > 	INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> > +	INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> > 	get_net(net);
> > 
> > 	return 0;
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 6b33cbbe76d3..12ce9792c5b6 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -368,6 +368,7 @@ struct nfs4_client {
> > #define NFSD4_CLIENT_UPCALL_LOCK	(5)	/* upcall serialization */
> > #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
> > 					 1 << NFSD4_CLIENT_CB_KILL)
> > +#define	NFSD4_CLIENT_CB_RECALL_ANY	(6)
> > 	unsigned long		cl_flags;
> > 	const struct cred	*cl_cb_cred;
> > 	struct rpc_clnt		*cl_cb_client;
> > @@ -411,6 +412,10 @@ struct nfs4_client {
> > 
> > 	unsigned int		cl_state;
> > 	atomic_t		cl_delegs_in_recall;
> > +
> > +	struct nfsd4_cb_recall_any	*cl_ra;
> > +	time64_t		cl_ra_time;
> > +	struct list_head	cl_ra_cblist;
> > };
> > 
> > /* struct nfs4_client_reset
> > @@ -642,6 +647,9 @@ enum nfsd4_cb_op {
> > 	NFSPROC4_CLNT_CB_RECALL_ANY,
> > };
> > 
> > +#define	RCA4_TYPE_MASK_RDATA_DLG	0
> > +#define	RCA4_TYPE_MASK_WDATA_DLG	1
> > +
> > /* Returns true iff a is later than b: */
> > static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> > {
> > -- 
> > 2.9.5
> > 
> 
> --
> Chuck Lever
> 
> 
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints
  2022-11-11 15:25   ` Chuck Lever III
@ 2022-11-11 21:34     ` dai.ngo
  2022-11-11 22:02       ` Chuck Lever III
  2022-11-15  5:08     ` dai.ngo
  1 sibling, 1 reply; 12+ messages in thread
From: dai.ngo @ 2022-11-11 21:34 UTC (permalink / raw)
  To: Chuck Lever III; +Cc: jlayton@kernel.org, Linux NFS Mailing List


On 11/11/22 7:25 AM, Chuck Lever III wrote:
>
>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>
>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>> fs/nfsd/nfs4state.c |  2 ++
>> fs/nfsd/trace.h     | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 55 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 813cdb67b370..eac7212c9218 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2859,6 +2859,7 @@ static int
>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> 				struct rpc_task *task)
>> {
>> +	trace_nfsd_cb_recall_any_done(cb, task);
>> 	switch (task->tk_status) {
>> 	case -NFS4ERR_DELAY:
>> 		rpc_delay(task, 2 * HZ);
>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>> 		clp->cl_ra->ra_keep = 0;
>> 		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> 						BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> +		trace_nfsd_cb_recall_any(clp->cl_ra);
>> 		nfsd4_run_cb(&clp->cl_ra->ra_cb);
>> 	}
>>
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index 06a96e955bd0..efc69c96bcbd 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -9,9 +9,11 @@
>> #define _NFSD_TRACE_H
>>
>> #include <linux/tracepoint.h>
>> +#include <linux/sunrpc/xprt.h>
>>
>> #include "export.h"
>> #include "nfsfh.h"
>> +#include "xdr4.h"
>>
>> #define NFSD_TRACE_PROC_RES_FIELDS \
>> 		__field(unsigned int, netns_ino) \
>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>
>> +TRACE_EVENT(nfsd_cb_recall_any,
>> +	TP_PROTO(
>> +		const struct nfsd4_cb_recall_any *ra
>> +	),
>> +	TP_ARGS(ra),
>> +	TP_STRUCT__entry(
>> +		__field(u32, cl_boot)
>> +		__field(u32, cl_id)
>> +		__field(u32, ra_keep)
>> +		__field(u32, ra_bmval)
>> +		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> +	),
>> +	TP_fast_assign(
>> +		__entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>> +		__entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>> +		__entry->ra_keep = ra->ra_keep;
>> +		__entry->ra_bmval = ra->ra_bmval[0];
>> +		memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>> +			sizeof(struct sockaddr_in6));
>> +	),
>> +	TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>> +		__entry->cl_boot, __entry->cl_id,
>> +		__entry->addr, __entry->ra_keep, __entry->ra_bmval
>> +	)
>> +);
> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>
> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.

I tried but it did not work. I got the same error as 'nfsd_cb_args' tracepoint:

[root@nfsvmf24 ~]# uname -r
6.1.0-rc1+
[root@nfsvmf24 ~]# trace-cmd record -e nfsd_cb_args
[root@nfsvmf24 ~]# trace-cmd report
trace-cmd: No such file or directory
   Error: expected type 4 but read 5
   [nfsd:nfsd_cb_args]*function __get_sockaddr not defined*
   cpus=1
     nfsd-3976  [000]   410.956975: nfsd_cb_args: [*FAILED TO PARSE*] cl_boot=1668201586 cl_id=2938128369 prog=1073741824 ident=1 addr=ARRAY[02, 00, 80, 93, 0a, 50, 6f, 5f, 00, ..]

I also tried Fedora 36 and got same error.

Can you verify __get_sockaddr works with tracepoints on your system?

Thanks,
-Dai

>
>
>> +
>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>> +	TP_PROTO(
>> +		const struct nfsd4_callback *cb,
>> +		const struct rpc_task *task
>> +	),
>> +	TP_ARGS(cb, task),
>> +	TP_STRUCT__entry(
>> +		__field(u32, cl_boot)
>> +		__field(u32, cl_id)
>> +		__field(int, status)
>> +		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> +	),
>> +	TP_fast_assign(
>> +		__entry->status = task->tk_status;
>> +		__entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>> +		__entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>> +		memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>> +			sizeof(struct sockaddr_in6));
>> +	),
>> +	TP_printk("client %08x:%08x addr=%pISpc status=%d",
>> +		__entry->cl_boot, __entry->cl_id,
>> +		__entry->addr, __entry->status
>> +	)
>> +);
> I'd like you to change this to
>
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);
>
>
>> +
>> #endif /* _NFSD_TRACE_H */
>>
>> #undef TRACE_INCLUDE_PATH
>> -- 
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints
  2022-11-11 21:34     ` dai.ngo
@ 2022-11-11 22:02       ` Chuck Lever III
  0 siblings, 0 replies; 12+ messages in thread
From: Chuck Lever III @ 2022-11-11 22:02 UTC (permalink / raw)
  To: Dai Ngo; +Cc: jlayton@kernel.org, Linux NFS Mailing List



> On Nov 11, 2022, at 4:34 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> 
> On 11/11/22 7:25 AM, Chuck Lever III wrote:
>> 
>>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>> 
>>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>> 
>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>> ---
>>> fs/nfsd/nfs4state.c |  2 ++
>>> fs/nfsd/trace.h     | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 55 insertions(+)
>>> 
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index 813cdb67b370..eac7212c9218 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -2859,6 +2859,7 @@ static int
>>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>>> 				struct rpc_task *task)
>>> {
>>> +	trace_nfsd_cb_recall_any_done(cb, task);
>>> 	switch (task->tk_status) {
>>> 	case -NFS4ERR_DELAY:
>>> 		rpc_delay(task, 2 * HZ);
>>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>>> 		clp->cl_ra->ra_keep = 0;
>>> 		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>>> 						BIT(RCA4_TYPE_MASK_WDATA_DLG);
>>> +		trace_nfsd_cb_recall_any(clp->cl_ra);
>>> 		nfsd4_run_cb(&clp->cl_ra->ra_cb);
>>> 	}
>>> 
>>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>>> index 06a96e955bd0..efc69c96bcbd 100644
>>> --- a/fs/nfsd/trace.h
>>> +++ b/fs/nfsd/trace.h
>>> @@ -9,9 +9,11 @@
>>> #define _NFSD_TRACE_H
>>> 
>>> #include <linux/tracepoint.h>
>>> +#include <linux/sunrpc/xprt.h>
>>> 
>>> #include "export.h"
>>> #include "nfsfh.h"
>>> +#include "xdr4.h"
>>> 
>>> #define NFSD_TRACE_PROC_RES_FIELDS \
>>> 		__field(unsigned int, netns_ino) \
>>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>> 
>>> +TRACE_EVENT(nfsd_cb_recall_any,
>>> +	TP_PROTO(
>>> +		const struct nfsd4_cb_recall_any *ra
>>> +	),
>>> +	TP_ARGS(ra),
>>> +	TP_STRUCT__entry(
>>> +		__field(u32, cl_boot)
>>> +		__field(u32, cl_id)
>>> +		__field(u32, ra_keep)
>>> +		__field(u32, ra_bmval)
>>> +		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> +	),
>>> +	TP_fast_assign(
>>> +		__entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>>> +		__entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>>> +		__entry->ra_keep = ra->ra_keep;
>>> +		__entry->ra_bmval = ra->ra_bmval[0];
>>> +		memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>>> +			sizeof(struct sockaddr_in6));
>>> +	),
>>> +	TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>>> +		__entry->cl_boot, __entry->cl_id,
>>> +		__entry->addr, __entry->ra_keep, __entry->ra_bmval
>>> +	)
>>> +);
>> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>> 
>> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.
> 
> I tried but it did not work. I got the same error as 'nfsd_cb_args' tracepoint:
> 
> [root@nfsvmf24 ~]# uname -r
> 6.1.0-rc1+
> [root@nfsvmf24 ~]# trace-cmd record -e nfsd_cb_args
> [root@nfsvmf24 ~]# trace-cmd report
> trace-cmd: No such file or directory
>  Error: expected type 4 but read 5
>  [nfsd:nfsd_cb_args]*function __get_sockaddr not defined*
>  cpus=1
>    nfsd-3976  [000]   410.956975: nfsd_cb_args: [*FAILED TO PARSE*] cl_boot=1668201586 cl_id=2938128369 prog=1073741824 ident=1 addr=ARRAY[02, 00, 80, 93, 0a, 50, 6f, 5f, 00, ..]
> 
> I also tried Fedora 36 and got same error.
> 
> Can you verify __get_sockaddr works with tracepoints on your system?

The user space libraries still don't have support for __get_sockaddr().

Assuming that eventually those libraries will get the proper support,
in this case I'm willing to take tracepoints that don't parse.


> Thanks,
> -Dai
> 
>> 
>> 
>>> +
>>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>>> +	TP_PROTO(
>>> +		const struct nfsd4_callback *cb,
>>> +		const struct rpc_task *task
>>> +	),
>>> +	TP_ARGS(cb, task),
>>> +	TP_STRUCT__entry(
>>> +		__field(u32, cl_boot)
>>> +		__field(u32, cl_id)
>>> +		__field(int, status)
>>> +		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> +	),
>>> +	TP_fast_assign(
>>> +		__entry->status = task->tk_status;
>>> +		__entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>>> +		__entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>>> +		memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>>> +			sizeof(struct sockaddr_in6));
>>> +	),
>>> +	TP_printk("client %08x:%08x addr=%pISpc status=%d",
>>> +		__entry->cl_boot, __entry->cl_id,
>>> +		__entry->addr, __entry->status
>>> +	)
>>> +);
>> I'd like you to change this to
>> 
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);
>> 
>> 
>>> +
>>> #endif /* _NFSD_TRACE_H */
>>> 
>>> #undef TRACE_INCLUDE_PATH
>>> -- 
>>> 2.9.5
>>> 
>> --
>> Chuck Lever

--
Chuck Lever




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints
  2022-11-11 15:25   ` Chuck Lever III
  2022-11-11 21:34     ` dai.ngo
@ 2022-11-15  5:08     ` dai.ngo
  2022-11-15 14:21       ` Chuck Lever III
  1 sibling, 1 reply; 12+ messages in thread
From: dai.ngo @ 2022-11-15  5:08 UTC (permalink / raw)
  To: Chuck Lever III; +Cc: jlayton@kernel.org, Linux NFS Mailing List


On 11/11/22 7:25 AM, Chuck Lever III wrote:
>
>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>
>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>> fs/nfsd/nfs4state.c |  2 ++
>> fs/nfsd/trace.h     | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 55 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 813cdb67b370..eac7212c9218 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2859,6 +2859,7 @@ static int
>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> 				struct rpc_task *task)
>> {
>> +	trace_nfsd_cb_recall_any_done(cb, task);
>> 	switch (task->tk_status) {
>> 	case -NFS4ERR_DELAY:
>> 		rpc_delay(task, 2 * HZ);
>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>> 		clp->cl_ra->ra_keep = 0;
>> 		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> 						BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> +		trace_nfsd_cb_recall_any(clp->cl_ra);
>> 		nfsd4_run_cb(&clp->cl_ra->ra_cb);
>> 	}
>>
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index 06a96e955bd0..efc69c96bcbd 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -9,9 +9,11 @@
>> #define _NFSD_TRACE_H
>>
>> #include <linux/tracepoint.h>
>> +#include <linux/sunrpc/xprt.h>
>>
>> #include "export.h"
>> #include "nfsfh.h"
>> +#include "xdr4.h"
>>
>> #define NFSD_TRACE_PROC_RES_FIELDS \
>> 		__field(unsigned int, netns_ino) \
>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>
>> +TRACE_EVENT(nfsd_cb_recall_any,
>> +	TP_PROTO(
>> +		const struct nfsd4_cb_recall_any *ra
>> +	),
>> +	TP_ARGS(ra),
>> +	TP_STRUCT__entry(
>> +		__field(u32, cl_boot)
>> +		__field(u32, cl_id)
>> +		__field(u32, ra_keep)
>> +		__field(u32, ra_bmval)
>> +		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> +	),
>> +	TP_fast_assign(
>> +		__entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>> +		__entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>> +		__entry->ra_keep = ra->ra_keep;
>> +		__entry->ra_bmval = ra->ra_bmval[0];
>> +		memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>> +			sizeof(struct sockaddr_in6));
>> +	),
>> +	TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>> +		__entry->cl_boot, __entry->cl_id,
>> +		__entry->addr, __entry->ra_keep, __entry->ra_bmval
>> +	)
>> +);
> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>
> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.
>
>
>> +
>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>> +	TP_PROTO(
>> +		const struct nfsd4_callback *cb,
>> +		const struct rpc_task *task
>> +	),
>> +	TP_ARGS(cb, task),
>> +	TP_STRUCT__entry(
>> +		__field(u32, cl_boot)
>> +		__field(u32, cl_id)
>> +		__field(int, status)
>> +		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> +	),
>> +	TP_fast_assign(
>> +		__entry->status = task->tk_status;
>> +		__entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>> +		__entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>> +		memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>> +			sizeof(struct sockaddr_in6));
>> +	),
>> +	TP_printk("client %08x:%08x addr=%pISpc status=%d",
>> +		__entry->cl_boot, __entry->cl_id,
>> +		__entry->addr, __entry->status
>> +	)
>> +);
> I'd like you to change this to
>
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);

TP_PRORO of DEFINE_NFSD_CB_DONE_EVENT requires a stateid_t which
CB_RECALL_ANY does not have. Can we can create a dummy stateid_t
for nfsd_cb_recall_any_done?

Note that nfsd_cb_done_class does not print the server IP address
which is more useful for tracing. Should I modify nfsd_cb_recall_any_done
class to print the IP address from rpc_xprt?

-Dai

>
>
>> +
>> #endif /* _NFSD_TRACE_H */
>>
>> #undef TRACE_INCLUDE_PATH
>> -- 
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints
  2022-11-15  5:08     ` dai.ngo
@ 2022-11-15 14:21       ` Chuck Lever III
  0 siblings, 0 replies; 12+ messages in thread
From: Chuck Lever III @ 2022-11-15 14:21 UTC (permalink / raw)
  To: Dai Ngo; +Cc: jlayton@kernel.org, Linux NFS Mailing List



> On Nov 15, 2022, at 12:08 AM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> 
> On 11/11/22 7:25 AM, Chuck Lever III wrote:
>> 
>>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>> 
>>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>> 
>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>> ---
>>> fs/nfsd/nfs4state.c |  2 ++
>>> fs/nfsd/trace.h     | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 55 insertions(+)
>>> 
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index 813cdb67b370..eac7212c9218 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -2859,6 +2859,7 @@ static int
>>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>>> 				struct rpc_task *task)
>>> {
>>> +	trace_nfsd_cb_recall_any_done(cb, task);
>>> 	switch (task->tk_status) {
>>> 	case -NFS4ERR_DELAY:
>>> 		rpc_delay(task, 2 * HZ);
>>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>>> 		clp->cl_ra->ra_keep = 0;
>>> 		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>>> 						BIT(RCA4_TYPE_MASK_WDATA_DLG);
>>> +		trace_nfsd_cb_recall_any(clp->cl_ra);
>>> 		nfsd4_run_cb(&clp->cl_ra->ra_cb);
>>> 	}
>>> 
>>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>>> index 06a96e955bd0..efc69c96bcbd 100644
>>> --- a/fs/nfsd/trace.h
>>> +++ b/fs/nfsd/trace.h
>>> @@ -9,9 +9,11 @@
>>> #define _NFSD_TRACE_H
>>> 
>>> #include <linux/tracepoint.h>
>>> +#include <linux/sunrpc/xprt.h>
>>> 
>>> #include "export.h"
>>> #include "nfsfh.h"
>>> +#include "xdr4.h"
>>> 
>>> #define NFSD_TRACE_PROC_RES_FIELDS \
>>> 		__field(unsigned int, netns_ino) \
>>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>> 
>>> +TRACE_EVENT(nfsd_cb_recall_any,
>>> +	TP_PROTO(
>>> +		const struct nfsd4_cb_recall_any *ra
>>> +	),
>>> +	TP_ARGS(ra),
>>> +	TP_STRUCT__entry(
>>> +		__field(u32, cl_boot)
>>> +		__field(u32, cl_id)
>>> +		__field(u32, ra_keep)
>>> +		__field(u32, ra_bmval)
>>> +		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> +	),
>>> +	TP_fast_assign(
>>> +		__entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>>> +		__entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>>> +		__entry->ra_keep = ra->ra_keep;
>>> +		__entry->ra_bmval = ra->ra_bmval[0];
>>> +		memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>>> +			sizeof(struct sockaddr_in6));
>>> +	),
>>> +	TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>>> +		__entry->cl_boot, __entry->cl_id,
>>> +		__entry->addr, __entry->ra_keep, __entry->ra_bmval
>>> +	)
>>> +);
>> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>> 
>> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.
>> 
>> 
>>> +
>>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>>> +	TP_PROTO(
>>> +		const struct nfsd4_callback *cb,
>>> +		const struct rpc_task *task
>>> +	),
>>> +	TP_ARGS(cb, task),
>>> +	TP_STRUCT__entry(
>>> +		__field(u32, cl_boot)
>>> +		__field(u32, cl_id)
>>> +		__field(int, status)
>>> +		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> +	),
>>> +	TP_fast_assign(
>>> +		__entry->status = task->tk_status;
>>> +		__entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>>> +		__entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>>> +		memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>>> +			sizeof(struct sockaddr_in6));
>>> +	),
>>> +	TP_printk("client %08x:%08x addr=%pISpc status=%d",
>>> +		__entry->cl_boot, __entry->cl_id,
>>> +		__entry->addr, __entry->status
>>> +	)
>>> +);
>> I'd like you to change this to
>> 
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);
> 
> TP_PRORO of DEFINE_NFSD_CB_DONE_EVENT requires a stateid_t which
> CB_RECALL_ANY does not have. Can we can create a dummy stateid_t
> for nfsd_cb_recall_any_done?

Ah, I didn't remember that a state ID was recorded. OK, then a
separate TRACE_EVENT is appropriate for nfsd_cb_recall_any_done.


> Note that nfsd_cb_done_class does not print the server IP address
> which is more useful for tracing. Should I modify nfsd_cb_recall_any_done
> class to print the IP address from rpc_xprt?

The IP address is recorded by the Call side tracepoints. I'm OK
leaving the IP address out of the Reply side tracepoints.


> -Dai
> 
>> 
>> 
>>> +
>>> #endif /* _NFSD_TRACE_H */
>>> 
>>> #undef TRACE_INCLUDE_PATH
>>> -- 
>>> 2.9.5
>>> 
>> --
>> Chuck Lever

--
Chuck Lever




^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2022-11-15 14:21 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-11-10  4:17 [PATCH v4 0/3] add support for CB_RECALL_ANY and the delegation shrinker Dai Ngo
2022-11-10  4:17 ` [PATCH v4 1/3] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
2022-11-11 15:25   ` Chuck Lever III
2022-11-10  4:17 ` [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition Dai Ngo
2022-11-11 15:25   ` Chuck Lever III
2022-11-11 15:58     ` Jeff Layton
2022-11-10  4:17 ` [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints Dai Ngo
2022-11-11 15:25   ` Chuck Lever III
2022-11-11 21:34     ` dai.ngo
2022-11-11 22:02       ` Chuck Lever III
2022-11-15  5:08     ` dai.ngo
2022-11-15 14:21       ` Chuck Lever III

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox