Linux Trace Kernel

Linux Trace Kernel
 help / color / mirror / Atom feed

* Re: [RFC PATCH 3/4] livepatch: Add "replaceable" attribute to klp_patch
From: Yafang Shao @ 2026-04-07  3:16 UTC (permalink / raw)
  To: Song Liu
  Cc: Joe Lawrence, Dylan Hatch, jpoimboe, jikos, mbenes, pmladek,
	rostedt, mhiramat, mathieu.desnoyers, kpsingh, mattbobrowski,
	jolsa, ast, daniel, andrii, martin.lau, eddyz87, memxor,
	yonghong.song, live-patching, linux-kernel, linux-trace-kernel,
	bpf
In-Reply-To: <CAPhsuW66tuF+QZ0pVheWb5sC4NQ-9CXikq=zMrPBXTHcsVPjdg@mail.gmail.com>

On Tue, Apr 7, 2026 at 10:54 AM Song Liu <song@kernel.org> wrote:
>
> On Mon, Apr 6, 2026 at 2:12 PM Joe Lawrence <joe.lawrence@redhat.com> wrote:
> [...]
> > > > > - The regular livepatches are cumulative, have the replace flag; and
> > > > >   are replaceable.
> > > > > - The occasional "off-band" livepatches do not have the replace flag,
> > > > >   and are not replaceable.
> > > > >
> > > > > With this setup, for systems with off-band livepatches loaded, we can
> > > > > still release a cumulative livepatch to replace the previous cumulative
> > > > > livepatch. Is this the expected use case?
> > > >
> > > > That matches our expected use case.
> > >
> > > If we really want to serve use cases like this, I think we can introduce
> > > some replace tag concept: Each livepatch will have a tag, u32 number.
> > > Newly loaded livepatch will only replace existing livepatch with the
> > > same tag. We can even reuse the existing "bool replace" in klp_patch,
> > > and make it u32: replace=0 means no replace; replace > 0 are the
> > > replace tag.
> > >
> > > For current users of cumulative patches, all the livepatch will have the
> > > same tag, say 1. For your use case, you can assign each user a
> > > unique tag. Then all these users can do atomic upgrades of their
> > > own livepatches.
> > >
> > > We may also need to check whether two livepatches of different tags
> > > touch the same kernel function. When that happens, the later
> > > livepatch should fail to load.

That sounds like a viable solution. I'll look into it and see how we
can implement it.

> > >
> > > Does this make sense?
> > >
> >
> > I haven't been following the thread carefully, but could the Livepatch
> > system state API (see Documentation/livepatch/system-state.rst) be
> > leveraged somehow instead of adding further replace semantics?
>
> AFAICT, system state will not help Yafang's use case.

Right.

-- 
Regards
Yafang

^ permalink raw reply

* [PATCH] tracing/hist: bound expression string construction
From: Pengpeng Hou @ 2026-04-07  6:09 UTC (permalink / raw)
  To: Steven Rostedt, Masami Hiramatsu
  Cc: Mathieu Desnoyers, linux-trace-kernel, linux-kernel, pengpeng

expr_str() allocates a fixed MAX_FILTER_STR_VAL buffer and then builds
expression names with a series of raw strcat() appends. Nested operands,
constants and field flags can push the rendered string past that fixed
limit before the name is attached to the hist field.

Convert the construction helpers to explicit bounded appends and
propagate failures back to the expression parser when the rendered name
would exceed MAX_FILTER_STR_VAL.

Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
---
 kernel/trace/trace_events_hist.c | 101 +++++++++++++++++++++++--------
 1 file changed, 76 insertions(+), 25 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 73ea180cad55..caaa262360d2 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1738,85 +1738,121 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
 	return flags_str;
 }
 
-static void expr_field_str(struct hist_field *field, char *expr)
+static bool expr_append(char *expr, size_t *len, const char *str)
 {
-	if (field->flags & HIST_FIELD_FL_VAR_REF)
-		strcat(expr, "$");
-	else if (field->flags & HIST_FIELD_FL_CONST) {
+	size_t str_len = strlen(str);
+
+	if (*len + str_len >= MAX_FILTER_STR_VAL)
+		return false;
+
+	memcpy(expr + *len, str, str_len + 1);
+	*len += str_len;
+	return true;
+}
+
+static bool expr_field_str(struct hist_field *field, char *expr, size_t *len)
+{
+	if (field->flags & HIST_FIELD_FL_VAR_REF) {
+		if (!expr_append(expr, len, "$"))
+			return false;
+	} else if (field->flags & HIST_FIELD_FL_CONST) {
 		char str[HIST_CONST_DIGITS_MAX];
+		int ret;
+
+		ret = snprintf(str, sizeof(str), "%llu", field->constant);
+		if (ret >= sizeof(str))
+			return false;
 
-		snprintf(str, HIST_CONST_DIGITS_MAX, "%llu", field->constant);
-		strcat(expr, str);
+		if (!expr_append(expr, len, str))
+			return false;
 	}
 
-	strcat(expr, hist_field_name(field, 0));
+	if (!expr_append(expr, len, hist_field_name(field, 0)))
+		return false;
 
 	if (field->flags && !(field->flags & HIST_FIELD_FL_VAR_REF)) {
 		const char *flags_str = get_hist_field_flags(field);
 
 		if (flags_str) {
-			strcat(expr, ".");
-			strcat(expr, flags_str);
+			if (!expr_append(expr, len, ".") ||
+			    !expr_append(expr, len, flags_str))
+				return false;
 		}
 	}
+
+	return true;
 }
 
 static char *expr_str(struct hist_field *field, unsigned int level)
 {
 	char *expr;
+	size_t len = 0;
 
 	if (level > 1)
-		return NULL;
+		return ERR_PTR(-EINVAL);
 
 	expr = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
 	if (!expr)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	if (!field->operands[0]) {
-		expr_field_str(field, expr);
+		if (!expr_field_str(field, expr, &len))
+			goto free;
 		return expr;
 	}
 
 	if (field->operator == FIELD_OP_UNARY_MINUS) {
 		char *subexpr;
 
-		strcat(expr, "-(");
+		if (!expr_append(expr, &len, "-("))
+			goto free;
 		subexpr = expr_str(field->operands[0], ++level);
 		if (!subexpr) {
-			kfree(expr);
-			return NULL;
+			goto free;
+		}
+		if (!expr_append(expr, &len, subexpr) ||
+		    !expr_append(expr, &len, ")")) {
+			kfree(subexpr);
+			goto free;
 		}
-		strcat(expr, subexpr);
-		strcat(expr, ")");
 
 		kfree(subexpr);
 
 		return expr;
 	}
 
-	expr_field_str(field->operands[0], expr);
+	if (!expr_field_str(field->operands[0], expr, &len))
+		goto free;
 
 	switch (field->operator) {
 	case FIELD_OP_MINUS:
-		strcat(expr, "-");
+		if (!expr_append(expr, &len, "-"))
+			goto free;
 		break;
 	case FIELD_OP_PLUS:
-		strcat(expr, "+");
+		if (!expr_append(expr, &len, "+"))
+			goto free;
 		break;
 	case FIELD_OP_DIV:
-		strcat(expr, "/");
+		if (!expr_append(expr, &len, "/"))
+			goto free;
 		break;
 	case FIELD_OP_MULT:
-		strcat(expr, "*");
+		if (!expr_append(expr, &len, "*"))
+			goto free;
 		break;
 	default:
-		kfree(expr);
-		return NULL;
+		goto free;
 	}
 
-	expr_field_str(field->operands[1], expr);
+	if (!expr_field_str(field->operands[1], expr, &len))
+		goto free;
 
 	return expr;
+
+free:
+	kfree(expr);
+	return ERR_PTR(-E2BIG);
 }
 
 /*
@@ -2630,6 +2666,11 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
 	expr->is_signed = operand1->is_signed;
 	expr->operator = FIELD_OP_UNARY_MINUS;
 	expr->name = expr_str(expr, 0);
+	if (IS_ERR(expr->name)) {
+		ret = PTR_ERR(expr->name);
+		expr->name = NULL;
+		goto free;
+	}
 	expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
 	if (!expr->type) {
 		ret = -ENOMEM;
@@ -2842,6 +2883,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
 		destroy_hist_field(operand1, 0);
 
 		expr->name = expr_str(expr, 0);
+		if (IS_ERR(expr->name)) {
+			ret = PTR_ERR(expr->name);
+			expr->name = NULL;
+			goto free_expr;
+		}
 	} else {
 		/* The operand sizes should be the same, so just pick one */
 		expr->size = operand1->size;
@@ -2855,6 +2901,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
 		}
 
 		expr->name = expr_str(expr, 0);
+		if (IS_ERR(expr->name)) {
+			ret = PTR_ERR(expr->name);
+			expr->name = NULL;
+			goto free_expr;
+		}
 	}
 
 	return expr;
-- 
2.50.1 (Apple Git-155)



^ permalink raw reply related

* [PATCH net-next v2 0/2] mptcp: autotune related improvement
From: Matthieu Baerts (NGI0) @ 2026-04-07  8:45 UTC (permalink / raw)
  To: Mat Martineau, Geliang Tang, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Simon Horman, Florian Westphal
  Cc: netdev, mptcp, linux-kernel, Matthieu Baerts (NGI0),
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	linux-trace-kernel

Here are two patches from Paolo that have been crafted a couple of
months ago, but needed more validation because they were indirectly
causing instabilities in the sefltests. The root cause has been fixed in
'net' recently in commit 8c09412e584d ("selftests: mptcp: more stable
simult_flows tests").

These patches refactor the receive space and RTT estimator, overall
making DRS more correct while avoiding receive buffer drifting to
tcp_rmem[2], which in turn makes the throughput more stable and less
bursty, especially with high bandwidth and low delay environments.

Note that the first patch addresses a very old issue. 'net-next' is
targeted because the change is quite invasive and based on a recent
backlog refactor. The 'Fixes' tag is then there more as a FYI, because
backporting this patch will quickly be blocked due to large conflicts.

Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
---
Changes in v2:
- Patch 1: add missing READ_ONCE() and remove unused entry. (AI)
- Link to v1: https://patch.msgid.link/20260309-net-next-mptcp-reduce-rbuf-v1-0-8f471206f9c5@kernel.org

---
Paolo Abeni (2):
      mptcp: better mptcp-level RTT estimator
      mptcp: add receive queue awareness in tcp_rcv_space_adjust()

 include/trace/events/mptcp.h |  2 +-
 net/mptcp/protocol.c         | 71 +++++++++++++++++++++++++-------------------
 net/mptcp/protocol.h         | 37 ++++++++++++++++++++++-
 3 files changed, 77 insertions(+), 33 deletions(-)
---
base-commit: c149d90e260ca1b6b9175468955a15c4d95a9f3b
change-id: 20260306-net-next-mptcp-reduce-rbuf-4166ba6fb763

Best regards,
--  
Matthieu Baerts (NGI0) <matttbe@kernel.org>

^ permalink raw reply

* [PATCH net-next v2 1/2] mptcp: better mptcp-level RTT estimator
From: Matthieu Baerts (NGI0) @ 2026-04-07  8:45 UTC (permalink / raw)
  To: Mat Martineau, Geliang Tang, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Simon Horman, Florian Westphal
  Cc: netdev, mptcp, linux-kernel, Matthieu Baerts (NGI0),
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	linux-trace-kernel
In-Reply-To: <20260407-net-next-mptcp-reduce-rbuf-v2-0-0d1d135bf6f6@kernel.org>

From: Paolo Abeni <pabeni@redhat.com>

The current MPTCP-level RTT estimator has several issues. On high speed
links, the MPTCP-level receive buffer auto-tuning happens with a
frequency well above the TCP-level's one. That in turn can cause
excessive/unneeded receive buffer increase.

On such links, the initial rtt_us value is considerably higher than the
actual delay, and the current mptcp_rcv_space_adjust() updates
msk->rcvq_space.rtt_us with a period equal to the such field previous
value. If the initial rtt_us is 40ms, its first update will happen after
40ms, even if the subflows see actual RTT orders of magnitude lower.

Additionally:
- setting the msk RTT to the maximum among all the subflows RTTs makes
  DRS constantly overshooting the rcvbuf size when a subflow has
  considerable higher latency than the other(s).

- during unidirectional bulk transfers with multiple active subflows,
  the TCP-level RTT estimator occasionally sees considerably higher
  value than the real link delay, i.e. when the packet scheduler reacts
  to an incoming ACK on given subflow pushing data on a different
  subflow.

- currently inactive but still open subflows (i.e. switched to backup
  mode) are always considered when computing the msk-level RTT.

Address the all the issues above with a more accurate RTT estimation
strategy: the MPTCP-level RTT is set to the minimum of all the subflows
actually feeding data into the MPTCP receive buffer, using a small
sliding window.

While at it, also use EWMA to compute the msk-level scaling_ratio, to
that MPTCP can avoid traversing the subflow list is
mptcp_rcv_space_adjust().

Use some care to avoid updating msk and ssk level fields too often.

Fixes: a6b118febbab ("mptcp: add receive buffer auto-tuning")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
---
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: linux-trace-kernel@vger.kernel.org
---
v2:
 - samples[0] was read without READ_ONCE, prev_rtt_us was not used (AI).
---
 include/trace/events/mptcp.h |  2 +-
 net/mptcp/protocol.c         | 63 ++++++++++++++++++++++++--------------------
 net/mptcp/protocol.h         | 37 +++++++++++++++++++++++++-
 3 files changed, 72 insertions(+), 30 deletions(-)

diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h
index 269d949b2025..04521acba483 100644
--- a/include/trace/events/mptcp.h
+++ b/include/trace/events/mptcp.h
@@ -219,7 +219,7 @@ TRACE_EVENT(mptcp_rcvbuf_grow,
 		__be32 *p32;
 
 		__entry->time = time;
-		__entry->rtt_us = msk->rcvq_space.rtt_us >> 3;
+		__entry->rtt_us = mptcp_rtt_us_est(msk) >> 3;
 		__entry->copied = msk->rcvq_space.copied;
 		__entry->inq = mptcp_inq_hint(sk);
 		__entry->space = msk->rcvq_space.space;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2f4776a4f06a..70a090a95299 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -879,6 +879,32 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 	return moved;
 }
 
+static void mptcp_rcv_rtt_update(struct mptcp_sock *msk,
+				 struct mptcp_subflow_context *subflow)
+{
+	const struct tcp_sock *tp = tcp_sk(subflow->tcp_sock);
+	u32 rtt_us = tp->rcv_rtt_est.rtt_us;
+	int id;
+
+	/* Update once per subflow per rcvwnd to avoid touching the msk
+	 * too often.
+	 */
+	if (!rtt_us || tp->rcv_rtt_est.seq == subflow->prev_rtt_seq)
+		return;
+
+	subflow->prev_rtt_seq = tp->rcv_rtt_est.seq;
+
+	/* Pairs with READ_ONCE() in mptcp_rtt_us_est(). */
+	id = msk->rcv_rtt_est.next_sample;
+	WRITE_ONCE(msk->rcv_rtt_est.samples[id], rtt_us);
+	if (++msk->rcv_rtt_est.next_sample == MPTCP_RTT_SAMPLES)
+		msk->rcv_rtt_est.next_sample = 0;
+
+	/* EWMA among the incoming subflows */
+	msk->scaling_ratio = ((msk->scaling_ratio << 3) - msk->scaling_ratio +
+			     tp->scaling_ratio) >> 3;
+}
+
 void mptcp_data_ready(struct sock *sk, struct sock *ssk)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -892,6 +918,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
 		return;
 
 	mptcp_data_lock(sk);
+	mptcp_rcv_rtt_update(msk, subflow);
 	if (!sock_owned_by_user(sk)) {
 		/* Wake-up the reader only for in-sequence data */
 		if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
@@ -2095,7 +2122,6 @@ static void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
 
 	msk->rcvspace_init = 1;
 	msk->rcvq_space.copied = 0;
-	msk->rcvq_space.rtt_us = 0;
 
 	/* initial rcv_space offering made to peer */
 	msk->rcvq_space.space = min_t(u32, tp->rcv_wnd,
@@ -2106,15 +2132,15 @@ static void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
 
 /* receive buffer autotuning.  See tcp_rcv_space_adjust for more information.
  *
- * Only difference: Use highest rtt estimate of the subflows in use.
+ * Only difference: Use lowest rtt estimate of the subflows in use, see
+ * mptcp_rcv_rtt_update() and mptcp_rtt_us_est().
  */
 static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
 {
 	struct mptcp_subflow_context *subflow;
 	struct sock *sk = (struct sock *)msk;
-	u8 scaling_ratio = U8_MAX;
-	u32 time, advmss = 1;
-	u64 rtt_us, mstamp;
+	u32 time, rtt_us;
+	u64 mstamp;
 
 	msk_owned_by_me(msk);
 
@@ -2129,29 +2155,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
 	mstamp = mptcp_stamp();
 	time = tcp_stamp_us_delta(mstamp, READ_ONCE(msk->rcvq_space.time));
 
-	rtt_us = msk->rcvq_space.rtt_us;
-	if (rtt_us && time < (rtt_us >> 3))
-		return;
-
-	rtt_us = 0;
-	mptcp_for_each_subflow(msk, subflow) {
-		const struct tcp_sock *tp;
-		u64 sf_rtt_us;
-		u32 sf_advmss;
-
-		tp = tcp_sk(mptcp_subflow_tcp_sock(subflow));
-
-		sf_rtt_us = READ_ONCE(tp->rcv_rtt_est.rtt_us);
-		sf_advmss = READ_ONCE(tp->advmss);
-
-		rtt_us = max(sf_rtt_us, rtt_us);
-		advmss = max(sf_advmss, advmss);
-		scaling_ratio = min(tp->scaling_ratio, scaling_ratio);
-	}
-
-	msk->rcvq_space.rtt_us = rtt_us;
-	msk->scaling_ratio = scaling_ratio;
-	if (time < (rtt_us >> 3) || rtt_us == 0)
+	rtt_us = mptcp_rtt_us_est(msk);
+	if (rtt_us == U32_MAX || time < (rtt_us >> 3))
 		return;
 
 	if (msk->rcvq_space.copied <= msk->rcvq_space.space)
@@ -3015,6 +3020,7 @@ static void __mptcp_init_sock(struct sock *sk)
 	msk->timer_ival = TCP_RTO_MIN;
 	msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
 	msk->backlog_len = 0;
+	mptcp_init_rtt_est(msk);
 
 	WRITE_ONCE(msk->first, NULL);
 	inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
@@ -3460,6 +3466,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
 	msk->bytes_retrans = 0;
 	msk->rcvspace_init = 0;
 	msk->fastclosing = 0;
+	mptcp_init_rtt_est(msk);
 
 	/* for fallback's sake */
 	WRITE_ONCE(msk->ack_seq, 0);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 1208f317ac33..e1d4783db02f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -269,6 +269,13 @@ struct mptcp_data_frag {
 	struct page *page;
 };
 
+/* Arbitrary compromise between as low as possible to react timely to subflow
+ * close event and as big as possible to avoid being fouled by biased large
+ * samples due to peer sending data on a different subflow WRT to the incoming
+ * ack.
+ */
+#define MPTCP_RTT_SAMPLES	5
+
 /* MPTCP connection sock */
 struct mptcp_sock {
 	/* inet_connection_sock must be the first member */
@@ -341,11 +348,17 @@ struct mptcp_sock {
 				 */
 	struct mptcp_pm_data	pm;
 	struct mptcp_sched_ops	*sched;
+
+	/* Most recent rtt_us observed by in use incoming subflows. */
+	struct {
+		u32	samples[MPTCP_RTT_SAMPLES];
+		u32	next_sample;
+	} rcv_rtt_est;
+
 	struct {
 		int	space;	/* bytes copied in last measurement window */
 		int	copied; /* bytes copied in this measurement window */
 		u64	time;	/* start time of measurement window */
-		u64	rtt_us; /* last maximum rtt of subflows */
 	} rcvq_space;
 	u8		scaling_ratio;
 	bool		allow_subflows;
@@ -423,6 +436,27 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
 	return msk->first_pending;
 }
 
+static inline void mptcp_init_rtt_est(struct mptcp_sock *msk)
+{
+	int i;
+
+	for (i = 0; i < MPTCP_RTT_SAMPLES; ++i)
+		msk->rcv_rtt_est.samples[i] = U32_MAX;
+	msk->rcv_rtt_est.next_sample = 0;
+	msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
+}
+
+static inline u32 mptcp_rtt_us_est(const struct mptcp_sock *msk)
+{
+	u32 rtt_us = READ_ONCE(msk->rcv_rtt_est.samples[0]);
+	int i;
+
+	/* Lockless access of collected samples. */
+	for (i = 1; i < MPTCP_RTT_SAMPLES; ++i)
+		rtt_us = min(rtt_us, READ_ONCE(msk->rcv_rtt_est.samples[i]));
+	return rtt_us;
+}
+
 static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -524,6 +558,7 @@ struct mptcp_subflow_context {
 	u32	map_data_len;
 	__wsum	map_data_csum;
 	u32	map_csum_len;
+	u32	prev_rtt_seq;
 	u32	request_mptcp : 1,  /* send MP_CAPABLE */
 		request_join : 1,   /* send MP_JOIN */
 		request_bkup : 1,

-- 
2.53.0


^ permalink raw reply related

* [PATCH] ring-buffer: Preserve true payload lengths in long data events
From: Cao Ruichuang @ 2026-04-07  9:15 UTC (permalink / raw)
  To: rostedt, mhiramat; +Cc: mathieu.desnoyers, linux-kernel, linux-trace-kernel

Long ring buffer data records currently store the aligned in-buffer size in
their length field. That makes ring_buffer_event_length() report padded
sizes, and small TRACE_PRINT / TRACE_RAW_DATA records lose their true
payload length entirely when they use the short type_len encoding.

Teach long data events to keep the true payload size in array[0], and let
the ring buffer derive the aligned in-buffer size separately when it needs
to walk or discard records. Then add a long-reserve helper and use it for
TRACE_PRINT and TRACE_RAW_DATA so their zero-length-array tails always
preserve the real payload size.

The temporary filtered-event buffer keeps the same long-record payload
length semantics, and a QEMU runtime reproducer for trace_marker_raw now
reports the expected byte counts again.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=210173
Signed-off-by: Cao Ruichuang <create0818@163.com>
---
 include/linux/ring_buffer.h |  2 ++
 kernel/trace/ring_buffer.c  | 56 ++++++++++++++++++++++++++-----------
 kernel/trace/trace.c        |  8 +++---
 kernel/trace/trace.h        | 15 ++++++++++
 kernel/trace/trace_printk.c |  8 +++---
 5 files changed, 65 insertions(+), 24 deletions(-)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index d862fa610..a4e46cb53 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -137,6 +137,8 @@ void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val);
 
 struct ring_buffer_event *ring_buffer_lock_reserve(struct trace_buffer *buffer,
 						   unsigned long length);
+struct ring_buffer_event *ring_buffer_lock_reserve_long(struct trace_buffer *buffer,
+							unsigned long length);
 int ring_buffer_unlock_commit(struct trace_buffer *buffer);
 int ring_buffer_write(struct trace_buffer *buffer,
 		      unsigned long length, void *data);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 170170bd8..c9ade62df 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -206,10 +206,14 @@ rb_event_data_length(struct ring_buffer_event *event)
 	unsigned length;
 
 	if (event->type_len)
-		length = event->type_len * RB_ALIGNMENT;
-	else
-		length = event->array[0];
-	return length + RB_EVNT_HDR_SIZE;
+		return event->type_len * RB_ALIGNMENT + RB_EVNT_HDR_SIZE;
+
+	/*
+	 * Long records store the true payload size in array[0], but still
+	 * consume an aligned amount of space in the buffer.
+	 */
+	length = event->array[0] + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
+	return ALIGN(length, RB_ARCH_ALIGNMENT);
 }
 
 /*
@@ -276,12 +280,13 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 	if (extended_time(event))
 		event = skip_time_extend(event);
 
+	if (!event->type_len)
+		return event->array[0];
+
 	length = rb_event_length(event);
 	if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 		return length;
 	length -= RB_EVNT_HDR_SIZE;
-	if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
-                length -= sizeof(event->array[0]);
 	return length;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_event_length);
@@ -463,9 +468,11 @@ struct rb_event_info {
 	u64			delta;
 	u64			before;
 	u64			after;
+	unsigned long		data_length;
 	unsigned long		length;
 	struct buffer_page	*tail_page;
 	int			add_timestamp;
+	bool			force_long;
 };
 
 /*
@@ -3796,14 +3803,15 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
 
 	event->time_delta = delta;
 	length -= RB_EVNT_HDR_SIZE;
-	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT ||
+	    info->force_long) {
 		event->type_len = 0;
-		event->array[0] = length;
+		event->array[0] = info->data_length;
 	} else
 		event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
 }
 
-static unsigned rb_calculate_event_length(unsigned length)
+static unsigned int rb_calculate_event_length(unsigned int length, bool force_long)
 {
 	struct ring_buffer_event event; /* Used only for sizeof array */
 
@@ -3811,7 +3819,7 @@ static unsigned rb_calculate_event_length(unsigned length)
 	if (!length)
 		length++;
 
-	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
+	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT || force_long)
 		length += sizeof(event.array[0]);
 
 	length += RB_EVNT_HDR_SIZE;
@@ -4605,7 +4613,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 static __always_inline struct ring_buffer_event *
 rb_reserve_next_event(struct trace_buffer *buffer,
 		      struct ring_buffer_per_cpu *cpu_buffer,
-		      unsigned long length)
+		      unsigned long length, bool force_long)
 {
 	struct ring_buffer_event *event;
 	struct rb_event_info info;
@@ -4641,7 +4649,9 @@ rb_reserve_next_event(struct trace_buffer *buffer,
 	}
 #endif
 
-	info.length = rb_calculate_event_length(length);
+	info.length = rb_calculate_event_length(length, force_long);
+	info.data_length = length ? : 1;
+	info.force_long = force_long;
 
 	if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) {
 		add_ts_default = RB_ADD_STAMP_ABSOLUTE;
@@ -4698,8 +4708,9 @@ rb_reserve_next_event(struct trace_buffer *buffer,
  * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
  * If NULL is returned, then nothing has been allocated or locked.
  */
-struct ring_buffer_event *
-ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
+static struct ring_buffer_event *
+__ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length,
+			   bool force_long)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
@@ -4727,7 +4738,7 @@ ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
 	if (unlikely(trace_recursive_lock(cpu_buffer)))
 		goto out;
 
-	event = rb_reserve_next_event(buffer, cpu_buffer, length);
+	event = rb_reserve_next_event(buffer, cpu_buffer, length, force_long);
 	if (!event)
 		goto out_unlock;
 
@@ -4739,8 +4750,21 @@ ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
 	preempt_enable_notrace();
 	return NULL;
 }
+
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
+{
+	return __ring_buffer_lock_reserve(buffer, length, false);
+}
 EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
 
+struct ring_buffer_event *
+ring_buffer_lock_reserve_long(struct trace_buffer *buffer, unsigned long length)
+{
+	return __ring_buffer_lock_reserve(buffer, length, true);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve_long);
+
 /*
  * Decrement the entries to the page that an event is on.
  * The event does not even need to exist, only the pointer
@@ -4874,7 +4898,7 @@ int ring_buffer_write(struct trace_buffer *buffer,
 	if (unlikely(trace_recursive_lock(cpu_buffer)))
 		return -EBUSY;
 
-	event = rb_reserve_next_event(buffer, cpu_buffer, length);
+	event = rb_reserve_next_event(buffer, cpu_buffer, length, false);
 	if (!event)
 		goto out_unlock;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a626211ce..ffc1b1e9c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6503,8 +6503,8 @@ static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
 	size = cnt + meta_size;
 
 	buffer = tr->array_buffer.buffer;
-	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
-					    tracing_gen_ctx());
+	event = __trace_buffer_lock_reserve_long(buffer, TRACE_PRINT, size,
+						 tracing_gen_ctx());
 	if (unlikely(!event)) {
 		/*
 		 * If the size was greater than what was allowed, then
@@ -6917,8 +6917,8 @@ static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
 	if (size > ring_buffer_max_event_size(buffer))
 		return -EINVAL;
 
-	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
-					    tracing_gen_ctx());
+	event = __trace_buffer_lock_reserve_long(buffer, TRACE_RAW_DATA, size,
+						 tracing_gen_ctx());
 	if (!event)
 		/* Ring buffer disabled, return as if not open for write */
 		return -EBADF;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b8f380458..da55717c9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1613,6 +1613,21 @@ __trace_buffer_lock_reserve(struct trace_buffer *buffer,
 	return event;
 }
 
+static __always_inline struct ring_buffer_event *
+__trace_buffer_lock_reserve_long(struct trace_buffer *buffer,
+				 int type,
+				 unsigned long len,
+				 unsigned int trace_ctx)
+{
+	struct ring_buffer_event *event;
+
+	event = ring_buffer_lock_reserve_long(buffer, len);
+	if (event != NULL)
+		trace_event_setup(event, type, trace_ctx);
+
+	return event;
+}
+
 static __always_inline void
 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
 {
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 9f67ce42e..1441b2bd4 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -444,8 +444,8 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip,
 	trace_ctx = tracing_gen_ctx();
 	buffer = tr->array_buffer.buffer;
 	guard(ring_buffer_nest)(buffer);
-	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
-					    trace_ctx);
+	event = __trace_buffer_lock_reserve_long(buffer, TRACE_PRINT, alloc,
+						 trace_ctx);
 	if (!event)
 		return 0;
 
@@ -725,8 +725,8 @@ int __trace_array_vprintk(struct trace_buffer *buffer,
 
 	size = sizeof(*entry) + len + 1;
 	scoped_guard(ring_buffer_nest, buffer) {
-		event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
-						    trace_ctx);
+		event = __trace_buffer_lock_reserve_long(buffer, TRACE_PRINT, size,
+							 trace_ctx);
 		if (!event)
 			goto out;
 		entry = ring_buffer_event_data(event);
-- 
2.39.5 (Apple Git-154)


^ permalink raw reply related

* Re: [RFC PATCH 3/4] livepatch: Add "replaceable" attribute to klp_patch
From: Yafang Shao @ 2026-04-07  9:45 UTC (permalink / raw)
  To: Song Liu
  Cc: Joe Lawrence, Dylan Hatch, jpoimboe, jikos, mbenes, pmladek,
	rostedt, mhiramat, mathieu.desnoyers, kpsingh, mattbobrowski,
	jolsa, ast, daniel, andrii, martin.lau, eddyz87, memxor,
	yonghong.song, live-patching, linux-kernel, linux-trace-kernel,
	bpf
In-Reply-To: <CALOAHbDN_t-ZRO0g9_sQFCv0J6SPDFfwJCcwSzd4ww5XRkU0QA@mail.gmail.com>

On Tue, Apr 7, 2026 at 11:16 AM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> On Tue, Apr 7, 2026 at 10:54 AM Song Liu <song@kernel.org> wrote:
> >
> > On Mon, Apr 6, 2026 at 2:12 PM Joe Lawrence <joe.lawrence@redhat.com> wrote:
> > [...]
> > > > > > - The regular livepatches are cumulative, have the replace flag; and
> > > > > >   are replaceable.
> > > > > > - The occasional "off-band" livepatches do not have the replace flag,
> > > > > >   and are not replaceable.
> > > > > >
> > > > > > With this setup, for systems with off-band livepatches loaded, we can
> > > > > > still release a cumulative livepatch to replace the previous cumulative
> > > > > > livepatch. Is this the expected use case?
> > > > >
> > > > > That matches our expected use case.
> > > >
> > > > If we really want to serve use cases like this, I think we can introduce
> > > > some replace tag concept: Each livepatch will have a tag, u32 number.
> > > > Newly loaded livepatch will only replace existing livepatch with the
> > > > same tag. We can even reuse the existing "bool replace" in klp_patch,
> > > > and make it u32: replace=0 means no replace; replace > 0 are the
> > > > replace tag.
> > > >
> > > > For current users of cumulative patches, all the livepatch will have the
> > > > same tag, say 1. For your use case, you can assign each user a
> > > > unique tag. Then all these users can do atomic upgrades of their
> > > > own livepatches.
> > > >
> > > > We may also need to check whether two livepatches of different tags
> > > > touch the same kernel function. When that happens, the later
> > > > livepatch should fail to load.
>
> That sounds like a viable solution. I'll look into it and see how we
> can implement it.

Does the following change look good to you ?

Subject: [PATCH] livepatch: Support scoped atomic replace using replace tags

Extend the replace attribute from a boolean to a u32 to act as a replace
tag. This introduces the following semantics:

  replace = 0: Atomic replace is disabled. However, this patch remains
               eligible to be superseded by others.
  replace > 0: Enables tagged replace (default is 1). A newly loaded
               livepatch will only replace existing patches that share the
               same tag.

To maintain backward compatibility, a patch with replace == 0 does not
trigger an outgoing atomic replace, but remains eligible to be superseded
by any incoming patch with a valid replace tag.

Suggested-by: Song Liu <song@kernel.org>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 .../livepatch/cumulative-patches.rst          | 20 +++++++-----
 Documentation/livepatch/livepatch.rst         | 31 +++++++++++++------
 include/linux/livepatch.h                     |  8 +++--
 kernel/livepatch/core.c                       |  4 +++
 scripts/livepatch/init.c                      |  6 +---
 scripts/livepatch/klp-build                   | 11 +++++--
 6 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/Documentation/livepatch/cumulative-patches.rst
b/Documentation/livepatch/cumulative-patches.rst
index 1931f318976a..06e90dc5967c 100644
--- a/Documentation/livepatch/cumulative-patches.rst
+++ b/Documentation/livepatch/cumulative-patches.rst
@@ -12,23 +12,26 @@ modified the same function in different ways.

 An elegant solution comes with the feature called "Atomic Replace". It allows
 creation of so called "Cumulative Patches". They include all wanted changes
-from all older livepatches and completely replace them in one transition.
+from older livepatches with a matching tag and replace them in one transition.

 Usage
 -----

-The atomic replace can be enabled by setting "replace" flag in struct
klp_patch,
-for example::
+he atomic replace can be enabled by setting a non-zero value to the "replace"
+attribute in ``struct klp_patch``. This value acts as a **replace tag**,
+defining the scope of the replacement.
+
+For example::

        static struct klp_patch patch = {
                .mod = THIS_MODULE,
                .objs = objs,
-               .replace = true,
+               .replace = 1,
        };

 All processes are then migrated to use the code only from the new patch.
-Once the transition is finished, all older patches are automatically
-disabled.
+Once the transition is finished, all older patches with the same replace tag
+are automatically disabled. Patches with different tags remain active.

 Ftrace handlers are transparently removed from functions that are no
 longer modified by the new cumulative patch.
@@ -62,9 +65,10 @@ Limitations:
 ------------

   - Once the operation finishes, there is no straightforward way
-    to reverse it and restore the replaced patches atomically.
+    to reverse it and restore the replaced patches (with the same tag)
+    atomically.

-    A good practice is to set .replace flag in any released livepatch.
+    A good practice is to set a consistent .replace tag in related livepatches.
     Then re-adding an older livepatch is equivalent to downgrading
     to that patch. This is safe as long as the livepatches do _not_ do
     extra modifications in (un)patching callbacks or in the module_init()
diff --git a/Documentation/livepatch/livepatch.rst
b/Documentation/livepatch/livepatch.rst
index acb90164929e..1fc1543a22c3 100644
--- a/Documentation/livepatch/livepatch.rst
+++ b/Documentation/livepatch/livepatch.rst
@@ -347,15 +347,28 @@ to '0'.
 5.3. Replacing
 --------------

-All enabled patches might get replaced by a cumulative patch that
-has the .replace flag set.
-
-Once the new patch is enabled and the 'transition' finishes then
-all the functions (struct klp_func) associated with the replaced
-patches are removed from the corresponding struct klp_ops. Also
-the ftrace handler is unregistered and the struct klp_ops is
-freed when the related function is not modified by the new patch
-and func_stack list becomes empty.
+All currently enabled patches may be superseded by a cumulative patch that
+has the ``.replace`` attribute enabled. The behavior of the replacement
+depends on the value assigned to the replace tag:
+
+replace = 0
+    Atomic replace is disabled. However, this patch remains eligible to be
+    superseded by others.
+
+replace > 0
+    Enables tagged atomic replace. Once the new patch is enabled and the
+    transition finishes, the livepatching core identifies all existing
+    patches that share the same replace tag.
+
+Once the transition is complete, all functions (``struct klp_func``)
+associated with the matching replaced patches are removed from the
+corresponding ``struct klp_ops``. If a function is no longer modified by
+the new patch and its ``func_stack`` list becomes empty, the ftrace
+handler is unregistered and the ``struct klp_ops`` is freed.
+
+Patches with a different replace tag are not affected by this process
+and remain active. This allows for the independent management and
+stacking of multiple, non-conflicting livepatch sets.

 See Documentation/livepatch/cumulative-patches.rst for more details.

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index ba9e3988c07c..417c67a17b99 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -123,7 +123,11 @@ struct klp_state {
  * @mod:       reference to the live patch module
  * @objs:      object entries for kernel objects to be patched
  * @states:    system states that can get modified
- * @replace:   replace all actively used patches
+ * @replace:   replace tag:
+ *             = 0: Atomic replace is disabled; however, this patch remains
+ *                  eligible to be superseded by others.
+ *             > 0: Atomic replace is enabled. Only existing patches with a
+ *                  matching replace tag will be superseded.
  * @list:      list node for global list of actively used patches
  * @kobj:      kobject for sysfs resources
  * @obj_list:  dynamic list of the object entries
@@ -137,7 +141,7 @@ struct klp_patch {
        struct module *mod;
        struct klp_object *objs;
        struct klp_state *states;
-       bool replace;
+       unsigned int replace;

        /* internal */
        struct list_head list;
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 28d15ba58a26..e4e5c03b0724 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -793,6 +793,8 @@ void klp_free_replaced_patches_async(struct
klp_patch *new_patch)
        klp_for_each_patch_safe(old_patch, tmp_patch) {
                if (old_patch == new_patch)
                        return;
+               if (old_patch->replace && old_patch->replace !=
new_patch->replace)
+                       continue;
                klp_free_patch_async(old_patch);
        }
 }
@@ -1194,6 +1196,8 @@ void klp_unpatch_replaced_patches(struct
klp_patch *new_patch)
        klp_for_each_patch(old_patch) {
                if (old_patch == new_patch)
                        return;
+               if (old_patch->replace && old_patch->replace !=
new_patch->replace)
+                       continue;

                old_patch->enabled = false;
                klp_unpatch_objects(old_patch);
diff --git a/scripts/livepatch/init.c b/scripts/livepatch/init.c
index f14d8c8fb35f..cd00e278a1d2 100644
--- a/scripts/livepatch/init.c
+++ b/scripts/livepatch/init.c
@@ -72,11 +72,7 @@ static int __init livepatch_mod_init(void)

        /* TODO patch->states */

-#ifdef KLP_NO_REPLACE
-       patch->replace = false;
-#else
-       patch->replace = true;
-#endif
+       patch->replace = KLP_REPLACE_TAG;

        return klp_enable_patch(patch);

diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build
index 7b82c7503c2b..9f6a7673304f 100755
--- a/scripts/livepatch/klp-build
+++ b/scripts/livepatch/klp-build
@@ -118,6 +118,7 @@ Options:
    -j, --jobs=<jobs>           Build jobs to run simultaneously
[default: $JOBS]
    -o, --output=<file.ko>      Output file [default: livepatch-<patch-name>.ko]
        --no-replace            Disable livepatch atomic replace
+   -t, --replace-tag=<tag>     Set the atomic replace tag for this livepatch
    -v, --verbose               Pass V=1 to kernel/module builds

 Advanced Options:
@@ -142,8 +143,8 @@ process_args() {
        local long
        local args

-       short="hfj:o:vdS:T"
-       long="help,show-first-changed,jobs:,output:,no-replace,verbose,debug,short-circuit:,keep-tmp"
+       short="hfj:o:t:vdS:T"
+       long="help,show-first-changed,jobs:,output:,no-replace,replace-tag:,verbose,debug,short-circuit:,keep-tmp"

        args=$(getopt --options "$short" --longoptions "$long" -- "$@") || {
                echo; usage; exit
@@ -176,6 +177,10 @@ process_args() {
                                REPLACE=0
                                shift
                                ;;
+                       -t | --replace-tag)
+                               REPLACE="$2"
+                               shift 2
+                               ;;
                        -v | --verbose)
                                VERBOSE="V=1"
                                shift
@@ -759,7 +764,7 @@ build_patch_module() {

        cflags=("-ffunction-sections")
        cflags+=("-fdata-sections")
-       [[ $REPLACE -eq 0 ]] && cflags+=("-DKLP_NO_REPLACE")
+       cflags+=("-DKLP_REPLACE_TAG=$REPLACE")

        cmd=("make")
        cmd+=("$VERBOSE")


--
Regards
Yafang

^ permalink raw reply related

* [PATCH] selftests/ftrace: Check exact trace_marker_raw payload lengths
From: CaoRuichuang @ 2026-04-07 10:12 UTC (permalink / raw)
  To: rostedt, mhiramat, mathieu.desnoyers, shuah
  Cc: linux-kernel, linux-trace-kernel, linux-kselftest, Cao Ruichuang

From: Cao Ruichuang <create0818@163.com>

trace_marker_raw.tc currently depends on awk strtonum() and
assumes that the printed raw-data byte count is rounded up to four
bytes.

That makes the test fail on systems that use mawk, and it no longer
matches the raw_data trace output we want to validate after preserving
true payload lengths for long records.

Rewrite the test to capture a small sequence of raw marker writes and
check the exact number of printed payload bytes in order. While doing
that, use od for the endian probe, switch to a fixed raw marker id so
the test only varies payload length, and disable pause-on-trace while
streaming trace_pipe.

Signed-off-by: Cao Ruichuang <create0818@163.com>
---
 .../ftrace/test.d/00basic/trace_marker_raw.tc | 93 ++++++++++++-------
 1 file changed, 59 insertions(+), 34 deletions(-)

diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
index a2c42e13f..3b37890f8 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
@@ -1,11 +1,11 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 # description: Basic tests on writing to trace_marker_raw
-# requires: trace_marker_raw
+# requires: trace_marker_raw od:program
 # flags: instance
 
 is_little_endian() {
-	if lscpu | grep -q 'Little Endian'; then
+	if [ "$(printf '\001\000\000\000' | od -An -tu4 | tr -d '[:space:]')" = "1" ]; then
 		echo 1;
 	else
 		echo 0;
@@ -34,7 +34,7 @@ make_str() {
 
 	data=`printf -- 'X%.0s' $(seq $cnt)`
 
-	printf "${val}${data}"
+	printf "%b%s" "${val}" "${data}"
 }
 
 write_buffer() {
@@ -47,36 +47,68 @@ write_buffer() {
 
 
 test_multiple_writes() {
+	out_file=$TMPDIR/trace_marker_raw.out
+	match_file=$TMPDIR/trace_marker_raw.lines
+	wait_iter=0
+	pause_on_trace=
+
+	if [ -f options/pause-on-trace ]; then
+		pause_on_trace=`cat options/pause-on-trace`
+		echo 0 > options/pause-on-trace
+	fi
+
+	: > trace
+	cat trace_pipe > $out_file &
+	reader_pid=$!
+	sleep 1
+
+	# Write sizes that cover both the short and long raw-data encodings
+	# without overflowing the trace buffer before we can verify them.
+	for i in `seq 1 12`; do
+		write_buffer 0x12345678 $i
+	done
 
-	# Write a bunch of data where the id is the count of
-	# data to write
-	for i in `seq 1 10` `seq 101 110` `seq 1001 1010`; do
-		write_buffer $i $i
+	while [ "`grep -c ' buf:' $out_file 2> /dev/null || true`" -lt 12 ]; do
+		wait_iter=$((wait_iter + 1))
+		if [ $wait_iter -ge 10 ]; then
+			kill $reader_pid 2> /dev/null || true
+			wait $reader_pid 2> /dev/null || true
+			if [ -n "$pause_on_trace" ]; then
+				echo $pause_on_trace > options/pause-on-trace
+			fi
+			return 1
+		fi
+		sleep 1
 	done
 
 	# add a little buffer
 	echo stop > trace_marker
+	sleep 1
+	kill $reader_pid 2> /dev/null || true
+	wait $reader_pid 2> /dev/null || true
+	if [ -n "$pause_on_trace" ]; then
+		echo $pause_on_trace > options/pause-on-trace
+	fi
 
-	# Check to make sure the number of entries is the id (rounded up by 4)
-	awk '/.*: # [0-9a-f]* / {
-			print;
-			cnt = -1;
-			for (i = 0; i < NF; i++) {
-				# The counter is after the "#" marker
-				if ( $i == "#" ) {
-					i++;
-					cnt = strtonum("0x" $i);
-					num = NF - (i + 1);
-					# The number of items is always rounded up by 4
-					cnt2 = int((cnt + 3) / 4) * 4;
-					if (cnt2 != num) {
-						exit 1;
-					}
-					break;
-				}
-			}
-		}
-	// { if (NR > 30) { exit 0; } } ' trace_pipe;
+	grep ' buf:' $out_file > $match_file || return 1
+	if [ "`wc -l < $match_file`" -ne 12 ]; then
+		cat $match_file
+		return 1
+	fi
+
+	# Check to make sure the number of byte values matches the id exactly.
+	for expected in `seq 1 12`; do
+		line=`sed -n "${expected}p" $match_file`
+		if [ -z "$line" ]; then
+			return 1
+		fi
+		rest=${line#* buf: }
+		set -- $rest
+		if [ "$#" -ne "$expected" ]; then
+			echo "$line"
+			return 1
+		fi
+	done
 }
 
 
@@ -107,13 +139,6 @@ test_buffer() {
 
 ORIG=`cat buffer_size_kb`
 
-# test_multiple_writes test needs at least 12KB buffer
-NEW_SIZE=12
-
-if [ ${ORIG} -lt ${NEW_SIZE} ]; then
-	echo ${NEW_SIZE} > buffer_size_kb
-fi
-
 test_buffer
 if ! test_multiple_writes; then
 	echo ${ORIG} > buffer_size_kb
-- 
2.39.5 (Apple Git-154)


^ permalink raw reply related

* Re: [PATCH v2] bootconfig: Apply early options from embedded config
From: Breno Leitao @ 2026-04-07 10:19 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Jonathan Corbet, Shuah Khan, linux-kernel, linux-trace-kernel,
	linux-doc, oss, paulmck, rostedt, kernel-team, Kiryl Shutsemau
In-Reply-To: <20260403114519.14e326a4bba019373bf3ff09@kernel.org>

On Fri, Apr 03, 2026 at 11:45:19AM +0900, Masami Hiramatsu wrote:
> > I'm still uncertain about this approach. The goal is to identify and
> > categorize the early parameters that are parsed prior to bootconfig
> > initialization.
>
> Yes, if we support early parameters in bootconfig, we need to clarify
> which parameters are inherently unsupportable, and document it.
> Currently it is easy to say that it does not support the parameter
> defined with "early_param()". Similary, maybe we should introduce
> "arch_param()" or something like it (or support all of them).
>
> >
> > Moreover, this work could become obsolete if bootconfig's initialization
> > point shifts earlier or later in the boot sequence, necessitating
> > another comprehensive analysis.
>
> If we can init it before calling setup_arch(), yes, we don't need to
> check it. So that is another option. Do you think it is feasible to
> support all of them? (Of course, theologically we can do, but the
> question is the use case and requirements.)

I don't believe all early parameters can be supported by bootconfig.
Some are inherently incompatible as far as I understand, while others
depend on bootconfig's initialization point in the boot sequence.

Regarding documenting arch_param(), I need clarification: should we
document parameters that are currently called before bootconfig (as of
today), or those that fundamentally cannot be called by bootconfig
regardless of its location?

> > Do you have any additional recommendations if I proceed with this
> > approach?
>
> OK,
>
> First of all, even if we enable early parameter support in bootconfig,
> this is only possible if bootconfig is embedded. In that case, we can
> pass memory that has been pre-allocated at compile time to bootconfig
> as a working area. However, this will consume a lot of memory, so it
> needs to be selectable in Kconfig.
>
> If you're going to embed this, as Kiryl pointed out[1], it might be better
> to pass pre-normalized (or compiled) data and avoid using a parser.
> Compilation itself is relatively easy if you utilize the tools/bootconfig.
> (However, in this case, there doesn't seem to be much point in using
>  bootconfig in the first place because we also can use embed kernel
>  cmdline.)
>
> [1] https://lore.kernel.org/all/acueCFv4neO7zQGI@thinkstation/
>
> Can you clarify the main reason of requesting this feature and
> examples?

My primary objective is to enable early configuration of
`irqchip.gicv3_pseudo_nmi`, allowing the kernel to support pseudo NMI
on arm64 by default.

^ permalink raw reply

* [PATCH] tracing/fprobe: Check the same type fprobe on table as the unregistered one
From: Masami Hiramatsu (Google) @ 2026-04-07 10:24 UTC (permalink / raw)
  To: Steven Rostedt, Masami Hiramatsu
  Cc: Menglong Dong, Mathieu Desnoyers, jiang.biao, linux-kernel,
	linux-trace-kernel

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Commit 2c67dc457bc6 ("tracing: fprobe: optimization for entry only case")
introduced a different ftrace_ops for entry-only fprobes.

However, when unregistering an fprobe, the kernel only checks if another
fprobe exists at the same address, without checking which type of fprobe
it is.
If different fprobes are registered at the same address, the same address
will be registered in both fgraph_ops and ftrace_ops, but only one of
them will be deleted when unregistering. (the one removed first will not
be deleted from the ops).

This results in junk entries remaining in either fgraph_ops or ftrace_ops.
For example:
 =======
 cd /sys/kernel/tracing

 # 'Add entry and exit events on the same place'
 echo 'f:event1 vfs_read' >> dynamic_events
 echo 'f:event2 vfs_read%return' >> dynamic_events

 # 'Enable both of them'
 echo 1 > events/fprobes/enable
 cat enabled_functions
vfs_read (2)            ->arch_ftrace_ops_list_func+0x0/0x210

 # 'Disable and remove exit event'
 echo 0 > events/fprobes/event2/enable
 echo -:event2 >> dynamic_events

 # 'Disable and remove all events'
 echo 0 > events/fprobes/enable
 echo > dynamic_events

 # 'Add another event'
 echo 'f:event3 vfs_open%return' > dynamic_events
 cat dynamic_events
f:fprobes/event3 vfs_open%return

 echo 1 > events/fprobes/enable
 cat enabled_functions
vfs_open (1)            tramp: 0xffffffffa0001000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60    subops: {ent:fprobe_fgraph_entry+0x0/0x620 ret:fprobe_return+0x0/0x150}
vfs_read (1)            tramp: 0xffffffffa0001000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60    subops: {ent:fprobe_fgraph_entry+0x0/0x620 ret:fprobe_return+0x0/0x150}
 =======

As you can see, an entry for the vfs_read remains.

To fix this issue, when unregistering, the kernel should also check if
there is the same type of fprobes still exist at the same address, and
if not, delete its entry from either fgraph_ops or ftrace_ops.

Fixes: 2c67dc457bc6 ("tracing: fprobe: optimization for entry only case")
Cc: stable@vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/fprobe.c |   77 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 62 insertions(+), 15 deletions(-)

diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index dcadf1d23b8a..7f75e6e4462c 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -85,11 +85,9 @@ static int insert_fprobe_node(struct fprobe_hlist_node *node)
 	return rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params);
 }
 
-/* Return true if there are synonims */
-static bool delete_fprobe_node(struct fprobe_hlist_node *node)
+static void delete_fprobe_node(struct fprobe_hlist_node *node)
 {
 	lockdep_assert_held(&fprobe_mutex);
-	bool ret;
 
 	/* Avoid double deleting */
 	if (READ_ONCE(node->fp) != NULL) {
@@ -97,13 +95,6 @@ static bool delete_fprobe_node(struct fprobe_hlist_node *node)
 		rhltable_remove(&fprobe_ip_table, &node->hlist,
 				fprobe_rht_params);
 	}
-
-	rcu_read_lock();
-	ret = !!rhltable_lookup(&fprobe_ip_table, &node->addr,
-				fprobe_rht_params);
-	rcu_read_unlock();
-
-	return ret;
 }
 
 /* Check existence of the fprobe */
@@ -337,6 +328,32 @@ static bool fprobe_is_ftrace(struct fprobe *fp)
 	return !fp->exit_handler;
 }
 
+static bool fprobe_exists_on_hash(unsigned long ip, bool ftrace)
+{
+	struct rhlist_head *head, *pos;
+	struct fprobe_hlist_node *node;
+	struct fprobe *fp;
+
+	guard(rcu)();
+	head = rhltable_lookup(&fprobe_ip_table, &ip,
+				fprobe_rht_params);
+	if (!head)
+		return false;
+	/* We have to check the same type on the list. */
+	rhl_for_each_entry_rcu(node, pos, head, hlist) {
+		if (node->addr != ip)
+			break;
+		fp = READ_ONCE(node->fp);
+		if (likely(fp)) {
+			if ((!ftrace && fp->exit_handler) ||
+			    (ftrace && !fp->exit_handler))
+				return true;
+		}
+	}
+
+	return false;
+}
+
 #ifdef CONFIG_MODULES
 static void fprobe_set_ips(unsigned long *ips, unsigned int cnt, int remove,
 			   int reset)
@@ -360,6 +377,29 @@ static bool fprobe_is_ftrace(struct fprobe *fp)
 	return false;
 }
 
+static bool fprobe_exists_on_hash(unsigned long ip, bool ftrace __maybe_unused)
+{
+	struct rhlist_head *head, *pos;
+	struct fprobe_hlist_node *node;
+	struct fprobe *fp;
+
+	guard(rcu)();
+	head = rhltable_lookup(&fprobe_ip_table, &ip,
+				fprobe_rht_params);
+	if (!head)
+		return false;
+	/* We only need to check fp is there. */
+	rhl_for_each_entry_rcu(node, pos, head, hlist) {
+		if (node->addr != ip)
+			break;
+		fp = READ_ONCE(node->fp);
+		if (likely(fp))
+			return true;
+	}
+
+	return false;
+}
+
 #ifdef CONFIG_MODULES
 static void fprobe_set_ips(unsigned long *ips, unsigned int cnt, int remove,
 			   int reset)
@@ -574,15 +614,20 @@ static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long ad
 static void fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node,
 					 struct fprobe_addr_list *alist)
 {
+	lockdep_assert_in_rcu_read_lock();
+
 	if (!within_module(node->addr, mod))
 		return;
-	if (delete_fprobe_node(node))
-		return;
+
+	delete_fprobe_node(node);
 	/*
-	 * If failed to update alist, just continue to update hlist.
+	 * Ignore failure of updating alist, but continue to update hlist.
 	 * Therefore, at list user handler will not hit anymore.
+	 * And don't care the type here, because all fprobes on the same
+	 * address must be removed eventually.
 	 */
-	fprobe_addr_list_add(alist, node->addr);
+	if (!rhltable_lookup(&fprobe_ip_table, &node->addr, fprobe_rht_params))
+		fprobe_addr_list_add(alist, node->addr);
 }
 
 /* Handle module unloading to manage fprobe_ip_table. */
@@ -943,7 +988,9 @@ int unregister_fprobe(struct fprobe *fp)
 	/* Remove non-synonim ips from table and hash */
 	count = 0;
 	for (i = 0; i < hlist_array->size; i++) {
-		if (!delete_fprobe_node(&hlist_array->array[i]))
+		delete_fprobe_node(&hlist_array->array[i]);
+		if (!fprobe_exists_on_hash(hlist_array->array[i].addr,
+					   fprobe_is_ftrace(fp)))
 			addrs[count++] = hlist_array->array[i].addr;
 	}
 	del_fprobe_hash(fp);


^ permalink raw reply related

* [PATCH] selftests/ftrace: Drop invalid top-level local in test_ownership
From: CaoRuichuang @ 2026-04-07 10:26 UTC (permalink / raw)
  To: rostedt, mhiramat, mathieu.desnoyers, shuah
  Cc: linux-kernel, linux-trace-kernel, linux-kselftest, Cao Ruichuang

From: Cao Ruichuang <create0818@163.com>

test_ownership.tc is sourced by ftracetest under /bin/sh.

The script currently declares mount_point with local at file scope,
which makes /bin/sh abort with "local: not in a function" before the
test can reach the eventfs ownership checks.

Replace the top-level local declaration with a normal shell variable so
kernels that support the gid= tracefs mount option can run the test at
all.

Signed-off-by: Cao Ruichuang <create0818@163.com>
---
 tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
index e71cc3ad0..6d00d3c0f 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
@@ -6,7 +6,7 @@
 original_group=`stat -c "%g" .`
 original_owner=`stat -c "%u" .`
 
-local mount_point=$(get_mount_point)
+mount_point=$(get_mount_point)
 
 mount_options=$(get_mnt_options "$mount_point")
 
-- 
2.39.5 (Apple Git-154)


^ permalink raw reply related

* [PATCH] selftests/ftrace: Account for fprobe attachment at creation
From: Cao Ruichuang @ 2026-04-07 11:57 UTC (permalink / raw)
  To: rostedt, mhiramat, mathieu.desnoyers, shuah
  Cc: linux-kernel, linux-trace-kernel, linux-kselftest

add_remove_fprobe.tc assumes that enabling an fprobe event is what adds
its target function to enabled_functions.

On the current kernel, the fprobe target already appears in
enabled_functions as soon as the event is created, and enabling the
event does not change that count again. That makes the test fail even
though the event lifecycle itself works.

Record the attachment baseline after creating the probe events and only
check that enabling them keeps the expected functions attached. The
cleanup checks still verify that removing the events returns
enabled_functions to its original state.

Signed-off-by: Cao Ruichuang <create0818@163.com>
---
 .../test.d/dynevent/add_remove_fprobe.tc      | 28 +++++++++++++------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
index 47067a5e3..ff08bd1ac 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
@@ -26,23 +26,29 @@ grep -q myevent2 dynamic_events
 grep -q myevent3 dynamic_events
 test -d events/fprobes/myevent1
 test -d events/fprobes/myevent2
-
-echo 1 > events/fprobes/myevent1/enable
-# Make sure the event is attached.
 grep -q $PLACE enabled_functions
+grep -q $PLACE2 enabled_functions
 cnt=`cat enabled_functions | wc -l`
-if [ $cnt -eq $ocnt ]; then
+if [ $cnt -le $ocnt ]; then
+	exit_fail
+fi
+
+echo 1 > events/fprobes/myevent1/enable
+cnt1=`cat enabled_functions | wc -l`
+if [ $cnt1 -ne $cnt ]; then
 	exit_fail
 fi
 
 echo 1 > events/fprobes/myevent2/enable
 cnt2=`cat enabled_functions | wc -l`
+if [ $cnt2 -ne $cnt1 ]; then
+	exit_fail
+fi
 
 echo 1 > events/fprobes/myevent3/enable
-# If the function is different, the attached function should be increased
 grep -q $PLACE2 enabled_functions
 cnt=`cat enabled_functions | wc -l`
-if [ $cnt -eq $cnt2 ]; then
+if [ $cnt -ne $cnt2 ]; then
 	exit_fail
 fi
 
@@ -62,11 +68,15 @@ if [ $cnt -ne $ocnt ]; then
 fi
 
 echo "f:myevent4 $PLACE" >> dynamic_events
+grep -q $PLACE enabled_functions
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -le $ocnt ]; then
+	exit_fail
+fi
 
 echo 1 > events/fprobes/myevent4/enable
-# Should only have one enabled
-cnt=`cat enabled_functions | wc -l`
-if [ $cnt -ne $((ocnt + 1)) ]; then
+cnt2=`cat enabled_functions | wc -l`
+if [ $cnt2 -ne $cnt ]; then
 	exit_fail
 fi
 
-- 
2.39.5 (Apple Git-154)


^ permalink raw reply related

* [PATCH] selftests/ftrace: Fix BAD_TP_NAME marker in fprobe_syntax_errors
From: Cao Ruichuang @ 2026-04-07 12:11 UTC (permalink / raw)
  To: rostedt, mhiramat
  Cc: mathieu.desnoyers, shuah, linux-kernel, linux-trace-kernel,
	linux-kselftest

The BAD_TP_NAME check currently expects the error marker for
`t kmem/kfree` to point at the slash. Current kernels report
this parse error at the start of the invalid tracepoint name,
so the selftest fails even though the input is still rejected.

Move the caret to match the reported error position.

Signed-off-by: Cao Ruichuang <create0818@163.com>
---
 .../selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index fee479295..e51f642c3 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -27,7 +27,7 @@ check_error 'f:^foo.1/bar vfs_read'	# BAD_GROUP_NAME
 check_error 'f:^ vfs_read'		# NO_EVENT_NAME
 check_error 'f:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read'	# EVENT_TOO_LONG
 check_error 'f:foo/^bar.1 vfs_read'	# BAD_EVENT_NAME
-check_error 't kmem^/kfree'       # BAD_TP_NAME
+check_error 't ^kmem/kfree'       # BAD_TP_NAME
 
 check_error 'f vfs_read ^$stack10000'	# BAD_STACK_NUM
 
-- 
2.39.5 (Apple Git-154)


^ permalink raw reply related

* Re: [RFC PATCH bpf-next v5 1/2] tracing: Prefer vmlinux symbols over module symbols for unqualified kprobes
From: Jiri Olsa @ 2026-04-07 12:59 UTC (permalink / raw)
  To: bot+bpf-ci
  Cc: andrey.grodzovsky, bpf, linux-trace-kernel, ast, daniel, andrii,
	rostedt, mhiramat, ihor.solodrai, emil, linux-open-source,
	martin.lau, eddyz87, yonghong.song, clm
In-Reply-To: <669779524c357c9790196e8e9ce8da8b21a71756c96e828252fead311fdc44c3@mail.kernel.org>

On Mon, Apr 06, 2026 at 08:15:10PM +0000, bot+bpf-ci@kernel.org wrote:
> > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
> > Suggested-by: Ihor Solodrai <ihor.solodrai@linux.dev>
> 
> This commit fixes a bug introduced when module symbol counting was
> added to number_of_same_symbols(). Would it be worth adding a
> Fixes: tag?
> 
> Suggested:
> Fixes: 926fe783c8a6 ("tracing/kprobes: Fix symbol counting logic by looking at modules as well")

there's also:
  9d8616034f16 tracing/kprobes: Add symbol counting check when module loads

jirka

> 
> That commit added module_kallsyms_on_each_symbol() to
> number_of_same_symbols(), which caused unqualified kprobe targets
> matching both vmlinux and a module to return count > 1 and fail
> with -EADDRNOTAVAIL.
> 
> 
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
> 
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/24047967861


^ permalink raw reply

* Re: [RFC PATCH bpf-next v5 2/2] selftests/bpf: Add tests for duplicate kprobe symbol handling
From: Jiri Olsa @ 2026-04-07 12:59 UTC (permalink / raw)
  To: Andrey Grodzovsky
  Cc: bpf, linux-trace-kernel, ast, daniel, andrii, rostedt, mhiramat,
	ihor.solodrai, emil, linux-open-source
In-Reply-To: <20260406193158.754498-3-andrey.grodzovsky@crowdstrike.com>

On Mon, Apr 06, 2026 at 03:31:58PM -0400, Andrey Grodzovsky wrote:

SNIP

> +static void test_attach_probe_dup_sym(enum probe_attach_mode attach_mode)
> +{
> +	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
> +	struct bpf_link *kprobe_link, *kretprobe_link;
> +	struct test_attach_probe_manual *skel;
> +	int err;
> +
> +	/* Load module with duplicate symbol */
> +	err = load_module("bpf_testmod_dup_sym.ko", false);
> +	if (!ASSERT_OK(err, "load_bpf_testmod_dup_sym")) {
> +		test__skip();
> +		return;
> +	}
> +
> +	skel = test_attach_probe_manual__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "skel_dup_sym_open_and_load"))
> +		goto unload_module;
> +
> +	/* manual-attach kprobe/kretprobe with duplicate symbol present */
> +	kprobe_opts.attach_mode = attach_mode;
> +	kprobe_opts.retprobe = false;
> +	kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
> +						      SYS_NANOSLEEP_KPROBE_NAME,
> +						      &kprobe_opts);
> +	if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_dup_sym"))
> +		goto cleanup;
> +	skel->links.handle_kprobe = kprobe_link;
> +
> +	kprobe_opts.retprobe = true;
> +	kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
> +							 SYS_NANOSLEEP_KPROBE_NAME,
> +							 &kprobe_opts);

maybe add tests for attaching the shadow module function as well?

> diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_dup_sym.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_dup_sym.c
> new file mode 100644
> index 000000000000..0e12f68afe3a
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_dup_sym.c
> @@ -0,0 +1,48 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2025 CrowdStrike */
> +/* Test module for duplicate kprobe symbol handling */
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/kernel.h>
> +
> +/* Duplicate symbol to test kprobe attachment with duplicate symbols.
> + * This creates a duplicate of the syscall wrapper used in attach_probe tests.
> + * The libbpf fix should handle this by preferring the vmlinux symbol.
> + * This function should NEVER be called - kprobes should attach to vmlinux version.
> + */
> +#ifdef __x86_64__
> +int __x64_sys_nanosleep(void);
> +noinline int __x64_sys_nanosleep(void)
> +#elif defined(__s390x__)
> +int __s390x_sys_nanosleep(void);
> +noinline int __s390x_sys_nanosleep(void)
> +#elif defined(__aarch64__)
> +int __arm64_sys_nanosleep(void);
> +noinline int __arm64_sys_nanosleep(void)
> +#elif defined(__riscv)
> +int __riscv_sys_nanosleep(void);
> +noinline int __riscv_sys_nanosleep(void)
> +#else
> +int sys_nanosleep(void);
> +noinline int sys_nanosleep(void)
> +#endif

could we use module_fentry_shadow instead? it's in kernel and in bpf_testmod
for fentry shadowing test.. it's not executed via test_run but it could be
added or we just don't run it

jirka

> +{
> +	WARN_ONCE(1, "bpf_testmod_dup_sym: dummy nanosleep symbol called - this should never execute!\n");
> +	return -EINVAL;
> +}
> +
> +static int __init bpf_testmod_dup_sym_init(void)
> +{
> +	return 0;
> +}
> +
> +static void __exit bpf_testmod_dup_sym_exit(void)
> +{
> +}
> +
> +module_init(bpf_testmod_dup_sym_init);
> +module_exit(bpf_testmod_dup_sym_exit);
> +
> +MODULE_AUTHOR("Andrey Grodzovsky");
> +MODULE_DESCRIPTION("BPF selftest duplicate symbol module");
> +MODULE_LICENSE("GPL");
> -- 
> 2.34.1
> 

^ permalink raw reply

* Re: [RFC PATCH bpf-next v5 1/2] tracing: Prefer vmlinux symbols over module symbols for unqualified kprobes
From: Jiri Olsa @ 2026-04-07 12:59 UTC (permalink / raw)
  To: Andrey Grodzovsky
  Cc: bpf, linux-trace-kernel, ast, daniel, andrii, rostedt, mhiramat,
	ihor.solodrai, emil, linux-open-source
In-Reply-To: <20260406193158.754498-2-andrey.grodzovsky@crowdstrike.com>

On Mon, Apr 06, 2026 at 03:31:57PM -0400, Andrey Grodzovsky wrote:
> When an unqualified kprobe target exists in both vmlinux and a loaded
> module, number_of_same_symbols() returns a count greater than 1,
> causing kprobe attachment to fail with -EADDRNOTAVAIL even though the
> vmlinux symbol is unambiguous.
> 
> When no module qualifier is given and the symbol is found in vmlinux,
> return the vmlinux-only count without scanning loaded modules. This
> preserves the existing behavior for all other cases:
> - Symbol only in a module: vmlinux count is 0, falls through to module
>   scan as before.
> - Symbol qualified with MOD:SYM: mod != NULL, unchanged path.
> - Symbol ambiguous within vmlinux itself: count > 1 is returned as-is.
> 
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
> Suggested-by: Ihor Solodrai <ihor.solodrai@linux.dev>

lgtm, kprobe_multi seems to behave like that already, maybe you could add test for that as well

Acked-by: Jiri Olsa <jolsa@kernel.org>

jirka


> ---
>  kernel/trace/trace_kprobe.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index a5dbb72528e0..99c41ea8b6d7 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -765,6 +765,13 @@ static unsigned int number_of_same_symbols(const char *mod, const char *func_nam
>  	if (!mod)
>  		kallsyms_on_each_match_symbol(count_symbols, func_name, &ctx.count);
>  
> +	/* If the symbol is found in vmlinux, use vmlinux resolution only.
> +	 * This prevents module symbols from shadowing vmlinux symbols
> +	 * and causing -EADDRNOTAVAIL for unqualified kprobe targets.
> +	 */
> +	if (!mod && ctx.count > 0)
> +		return ctx.count;
> +
>  	module_kallsyms_on_each_symbol(mod, count_mod_symbols, &ctx);
>  
>  	return ctx.count;
> -- 
> 2.34.1
> 

^ permalink raw reply

* Re: [PATCH v2 12/17] landlock: Add tracepoints for ptrace and scope denials
From: Mickaël Salaün @ 2026-04-07 13:00 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Christian Brauner, Günther Noack, Jann Horn, Jeff Xu,
	Justin Suess, Kees Cook, Masami Hiramatsu, Mathieu Desnoyers,
	Matthieu Buffet, Mikhail Ivanov, Tingmao Wang, kernel-team,
	linux-fsdevel, linux-security-module, linux-trace-kernel
In-Reply-To: <20260406110123.4072a765@gandalf.local.home>

On Mon, Apr 06, 2026 at 11:01:23AM -0400, Steven Rostedt wrote:
> On Mon,  6 Apr 2026 16:37:10 +0200
> Mickaël Salaün <mic@digikod.net> wrote:
> 
> > ---
> >  include/trace/events/landlock.h | 135 ++++++++++++++++++++++++++++++++
> >  security/landlock/log.c         |  20 +++++
> >  2 files changed, 155 insertions(+)
> > 
> > diff --git a/include/trace/events/landlock.h b/include/trace/events/landlock.h
> > index 1afab091efba..9f96c9897f44 100644
> > --- a/include/trace/events/landlock.h
> > +++ b/include/trace/events/landlock.h
> > @@ -11,6 +11,7 @@
> >  #define _TRACE_LANDLOCK_H
> >  
> >  #include <linux/tracepoint.h>
> > +#include <net/af_unix.h>
> >  
> >  struct dentry;
> >  struct landlock_domain;
> > @@ -19,6 +20,7 @@ struct landlock_rule;
> >  struct landlock_ruleset;
> >  struct path;
> >  struct sock;
> > +struct task_struct;
> >  
> >  /**
> >   * DOC: Landlock trace events
> > @@ -433,6 +435,139 @@ TRACE_EVENT(
> >  		__entry->log_new_exec, __entry->blockers, __entry->sport,
> >  		__entry->dport));
> >  
> > +/**
> > + * landlock_deny_ptrace - ptrace access denied
> > + * @hierarchy: Hierarchy node that blocked the access (never NULL)
> > + * @same_exec: Whether the current task is the same executable that called
> > + *             landlock_restrict_self() for the denying hierarchy node
> > + * @tracee: Target task (never NULL); eBPF can read pid, comm, cred,
> > + *          namespaces, and cgroup via BTF
> > + */
> > +TRACE_EVENT(
> > +	landlock_deny_ptrace,
> > +
> > +	TP_PROTO(const struct landlock_hierarchy *hierarchy, bool same_exec,
> > +		 const struct task_struct *tracee),
> > +
> > +	TP_ARGS(hierarchy, same_exec, tracee),
> > +
> > +	TP_STRUCT__entry(
> > +		__field(__u64, domain_id) __field(bool, same_exec)
> > +			__field(u32, log_same_exec) __field(u32, log_new_exec)
> > +				__field(pid_t, tracee_pid)
> > +					__string(tracee_comm, tracee->comm)),
> 
> Event formats are different than normal macro formatting. Please use the
> event formatting. The above is a defined structure that is being created
> for use. Keep it looking like a structure:
> 
> 	TP_STRUCT__entry(
> 		__field(	__u64,		domain_id)
> 		__field(	bool,		same_exec)
> 		__field(	u32,		log_same_exec)
> 		__field(	u32,		log_new_exec)
> 		__field(	pid_t,		tracee_pid)
> 		__string(	tracee_comm,	tracee->comm)
> 	),

I was using clang-format, but it doesn't make sense here, I'll fix it.

> 
> See how the above resembles:
> 
> struct entry {
> 	__u64		domain_id;
> 	bool		same_exec;
> 	u32		log_same_exec;
> 	u32		log_new_exec;
> 	pid_t		tracee_pid;
> 	string		tracee_comm;
> };
> 
> Because that's pretty much what the trace event TP_STRUCT__entry() is going
> to do with it. (The string will obviously be something else).
> 
> This way it's also easy to spot wholes in the structure that is written
> into the ring buffer. The "same_exec" being a bool followed by two u32
> types, is going to cause a hole. Move it to between tracee_pid and
> tracee_comm.

Actually, the log_* field should be bool too.  Anyway, is it a concern
that the ring buffer leaks (previous event) kernel memory or is the
concern mostly about avoiding wasted space and making easy to spot holes
even if it's OK?

> 
> Please fix the other events too.

Sure. Thanks!

> 
> -- Steve
> 
> 
> > +
> > +	TP_fast_assign(__entry->domain_id = hierarchy->id;
> > +		       __entry->same_exec = same_exec;
> > +		       __entry->log_same_exec = hierarchy->log_same_exec;
> > +		       __entry->log_new_exec = hierarchy->log_new_exec;
> > +		       __entry->tracee_pid =
> > +			       task_tgid_nr((struct task_struct *)tracee);
> > +		       __assign_str(tracee_comm);),
> > +
> > +	TP_printk(
> > +		"domain=%llx same_exec=%d log_same_exec=%u log_new_exec=%u tracee_pid=%d comm=%s",
> > +		__entry->domain_id, __entry->same_exec, __entry->log_same_exec,
> > +		__entry->log_new_exec, __entry->tracee_pid,
> > +		__print_untrusted_str(tracee_comm)));

Are you OK with this new helper?

> > +
> >
> 

^ permalink raw reply

* Re: [PATCH v4 0/5] locking: contended_release tracepoint instrumentation
From: Dmitry Ilvokhin @ 2026-04-07 13:10 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng, Waiman Long,
	Thomas Bogendoerfer, Juergen Gross, Ajay Kaher, Alexey Makhalov,
	Broadcom internal kernel review list, Thomas Gleixner,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Arnd Bergmann,
	Dennis Zhou, Tejun Heo, Christoph Lameter, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers
  Cc: linux-kernel, linux-mips, virtualization, linux-arch, linux-mm,
	linux-trace-kernel, kernel-team
In-Reply-To: <cover.1774536681.git.d@ilvokhin.com>

Hi,

Just a gentle ping on this series.

I'd appreciate any feedback. The spinning locks part (patch 5)
would particularly benefit from review.

Peter, Steven, any thoughts on that part would be greatly appreciated.

^ permalink raw reply

* [PATCH 00/24] vfs/nfsd: add support for CB_NOTIFY callbacks in directory delegations
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton

This patchset builds on the directory delegation work we did a few
months ago, to add support for CB_NOTIFY callbacks for some events. In
particular, creates, unlinks and renames. The server also sends updated
directory attributes in the notifications. With this support, the client
can register interest in a directory and get notifications about changes
within it without losing its lease.

The series starts with patches to allow the vfs to ignore certain types
of events on directories. nfsd can then request these sorts of
delegations on directories, and then set up inotify watches on the
directory to trigger sending CB_NOTIFY events.

This has mainly been tested with pynfs, with some new testcases that
I'll be posting soon. They seem to work fine with those tests, but I
don't think we'll want to merge these until we have a complete
client-side implementation to test against.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
Jeff Layton (24):
      filelock: add support for ignoring deleg breaks for dir change events
      filelock: add a tracepoint to start of break_lease()
      filelock: add an inode_lease_ignore_mask helper
      nfsd: add protocol support for CB_NOTIFY
      nfs_common: add new NOTIFY4_* flags proposed in RFC8881bis
      nfsd: allow nfsd to get a dir lease with an ignore mask
      vfs: add fsnotify_modify_mark_mask()
      nfsd: update the fsnotify mark when setting or removing a dir delegation
      nfsd: make nfsd4_callback_ops->prepare operation bool return
      nfsd: add callback encoding and decoding linkages for CB_NOTIFY
      nfsd: use RCU to protect fi_deleg_file
      nfsd: add data structures for handling CB_NOTIFY
      nfsd: add notification handlers for dir events
      nfsd: add tracepoint to dir_event handler
      nfsd: apply the notify mask to the delegation when requested
      nfsd: add helper to marshal a fattr4 from completed args
      nfsd: allow nfsd4_encode_fattr4_change() to work with no export
      nfsd: send basic file attributes in CB_NOTIFY
      nfsd: allow encoding a filehandle into fattr4 without a svc_fh
      nfsd: add a fi_connectable flag to struct nfs4_file
      nfsd: add the filehandle to returned attributes in CB_NOTIFY
      nfsd: properly track requested child attributes
      nfsd: track requested dir attributes
      nfsd: add support to CB_NOTIFY for dir attribute changes

 Documentation/sunrpc/xdr/nfs4_1.x    | 264 ++++++++++++++-
 fs/attr.c                            |   2 +-
 fs/locks.c                           |  89 +++++-
 fs/namei.c                           |  31 +-
 fs/nfsd/filecache.c                  |  57 +++-
 fs/nfsd/nfs4callback.c               |  60 +++-
 fs/nfsd/nfs4layouts.c                |   5 +-
 fs/nfsd/nfs4proc.c                   |  15 +
 fs/nfsd/nfs4state.c                  | 524 ++++++++++++++++++++++++++----
 fs/nfsd/nfs4xdr.c                    | 300 ++++++++++++++---
 fs/nfsd/nfs4xdr_gen.c                | 601 ++++++++++++++++++++++++++++++++++-
 fs/nfsd/nfs4xdr_gen.h                |  20 +-
 fs/nfsd/state.h                      |  70 +++-
 fs/nfsd/trace.h                      |  21 ++
 fs/nfsd/xdr4.h                       |   5 +
 fs/nfsd/xdr4cb.h                     |  12 +
 fs/notify/mark.c                     |  29 ++
 fs/posix_acl.c                       |   4 +-
 fs/xattr.c                           |   4 +-
 include/linux/filelock.h             |  54 +++-
 include/linux/fsnotify_backend.h     |   1 +
 include/linux/nfs4.h                 | 127 --------
 include/linux/sunrpc/xdrgen/nfs4_1.h | 291 ++++++++++++++++-
 include/trace/events/filelock.h      |  38 ++-
 include/uapi/linux/nfs4.h            |   2 -
 25 files changed, 2321 insertions(+), 305 deletions(-)
---
base-commit: bd5b9fd5e3d55bc412cec4bebe5a11da2151de4a
change-id: 20260325-dir-deleg-339066dd1017

Best regards,
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply

* [PATCH 01/24] filelock: add support for ignoring deleg breaks for dir change events
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

If a NFS client requests a directory delegation with a notification
bitmask covering directory change events, the server shouldn't recall
the delegation. Instead the client will be notified of the change after
the fact.

Add support for ignoring lease breaks on directory changes. Add a new
flags parameter to try_break_deleg() and teach __break_lease how to
ignore certain types of delegation break events.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/attr.c                       |  2 +-
 fs/locks.c                      | 55 +++++++++++++++++++++++++++++++++++------
 fs/namei.c                      | 31 ++++++++++++-----------
 fs/posix_acl.c                  |  4 +--
 fs/xattr.c                      |  4 +--
 include/linux/filelock.h        | 53 +++++++++++++++++++++++++++------------
 include/trace/events/filelock.h |  5 +++-
 7 files changed, 110 insertions(+), 44 deletions(-)

diff --git a/fs/attr.c b/fs/attr.c
index e7d7c6d19fe9..28744f0e9ff4 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -547,7 +547,7 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
 	 * breaking the delegation in this case.
 	 */
 	if (!(ia_valid & ATTR_DELEG)) {
-		error = try_break_deleg(inode, delegated_inode);
+		error = try_break_deleg(inode, 0, delegated_inode);
 		if (error)
 			return error;
 	}
diff --git a/fs/locks.c b/fs/locks.c
index 8e44b1f6c15a..dafa0752fdce 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1597,15 +1597,52 @@ any_leases_conflict(struct inode *inode, struct file_lease *breaker)
 	return false;
 }
 
+static bool
+ignore_dir_deleg_break(struct file_lease *fl, unsigned int flags)
+{
+	if ((flags & LEASE_BREAK_DIR_CREATE) && (fl->c.flc_flags & FL_IGN_DIR_CREATE))
+		return true;
+	if ((flags & LEASE_BREAK_DIR_DELETE) && (fl->c.flc_flags & FL_IGN_DIR_DELETE))
+		return true;
+	if ((flags & LEASE_BREAK_DIR_RENAME) && (fl->c.flc_flags & FL_IGN_DIR_RENAME))
+		return true;
+
+	return false;
+}
+
+static bool
+visible_leases_remaining(struct inode *inode, unsigned int flags)
+{
+	struct file_lock_context *ctx = locks_inode_context(inode);
+	struct file_lease *fl;
+
+	lockdep_assert_held(&ctx->flc_lock);
+
+	if (list_empty(&ctx->flc_lease))
+		return false;
+
+	if (!S_ISDIR(inode->i_mode))
+		return true;
+
+	list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
+		if (!ignore_dir_deleg_break(fl, flags))
+			return true;
+	}
+	return false;
+}
+
 /**
- *	__break_lease	-	revoke all outstanding leases on file
- *	@inode: the inode of the file to return
- *	@flags: LEASE_BREAK_* flags
+ * __break_lease	-	revoke all outstanding leases on file
+ * @inode: the inode of the file to return
+ * @flags: LEASE_BREAK_* flags
  *
- *	break_lease (inlined for speed) has checked there already is at least
- *	some kind of lock (maybe a lease) on this file.  Leases are broken on
- *	a call to open() or truncate().  This function can block waiting for the
- *	lease break unless you specify LEASE_BREAK_NONBLOCK.
+ * break_lease (inlined for speed) has checked there already is at least
+ * some kind of lock (maybe a lease) on this file. Leases and Delegations
+ * are broken on a call to open() or truncate(). Delegations are also
+ * broken on any event that would change the ctime. Directory delegations
+ * are broken whenever the directory changes (unless the delegation is set
+ * up to ignore the event). This function can block waiting for the lease
+ * break unless you specify LEASE_BREAK_NONBLOCK.
  */
 int __break_lease(struct inode *inode, unsigned int flags)
 {
@@ -1655,6 +1692,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
 	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
 		if (!leases_conflict(&fl->c, &new_fl->c))
 			continue;
+		if (S_ISDIR(inode->i_mode) && ignore_dir_deleg_break(fl, flags))
+			continue;
 		if (want_write) {
 			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
 				continue;
@@ -1670,7 +1709,7 @@ int __break_lease(struct inode *inode, unsigned int flags)
 			locks_delete_lock_ctx(&fl->c, &dispose);
 	}
 
-	if (list_empty(&ctx->flc_lease))
+	if (!visible_leases_remaining(inode, flags))
 		goto out;
 
 	if (flags & LEASE_BREAK_NONBLOCK) {
diff --git a/fs/namei.c b/fs/namei.c
index 9e5500dad14f..e3cbd9f877bd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4176,7 +4176,7 @@ int vfs_create(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode,
 	error = security_inode_create(dir, dentry, mode);
 	if (error)
 		return error;
-	error = try_break_deleg(dir, di);
+	error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, di);
 	if (error)
 		return error;
 	error = dir->i_op->create(idmap, dir, dentry, mode, true);
@@ -4475,7 +4475,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
 	/* Negative dentry, just create the file */
 	if (!dentry->d_inode && (open_flag & O_CREAT)) {
 		/* but break the directory lease first! */
-		error = try_break_deleg(dir_inode, delegated_inode);
+		error = try_break_deleg(dir_inode, LEASE_BREAK_DIR_CREATE, delegated_inode);
 		if (error)
 			goto out_dput;
 
@@ -5091,7 +5091,7 @@ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	if (error)
 		return error;
 
-	error = try_break_deleg(dir, delegated_inode);
+	error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode);
 	if (error)
 		return error;
 
@@ -5232,7 +5232,7 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	if (max_links && dir->i_nlink >= max_links)
 		goto err;
 
-	error = try_break_deleg(dir, delegated_inode);
+	error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode);
 	if (error)
 		goto err;
 
@@ -5337,7 +5337,7 @@ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
 	if (error)
 		goto out;
 
-	error = try_break_deleg(dir, delegated_inode);
+	error = try_break_deleg(dir, LEASE_BREAK_DIR_DELETE, delegated_inode);
 	if (error)
 		goto out;
 
@@ -5467,10 +5467,10 @@ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
 	else {
 		error = security_inode_unlink(dir, dentry);
 		if (!error) {
-			error = try_break_deleg(dir, delegated_inode);
+			error = try_break_deleg(dir, LEASE_BREAK_DIR_DELETE, delegated_inode);
 			if (error)
 				goto out;
-			error = try_break_deleg(target, delegated_inode);
+			error = try_break_deleg(target, 0, delegated_inode);
 			if (error)
 				goto out;
 			error = dir->i_op->unlink(dir, dentry);
@@ -5614,7 +5614,7 @@ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	if (error)
 		return error;
 
-	error = try_break_deleg(dir, delegated_inode);
+	error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode);
 	if (error)
 		return error;
 
@@ -5745,9 +5745,9 @@ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
 	else if (max_links && inode->i_nlink >= max_links)
 		error = -EMLINK;
 	else {
-		error = try_break_deleg(dir, delegated_inode);
+		error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode);
 		if (!error)
-			error = try_break_deleg(inode, delegated_inode);
+			error = try_break_deleg(inode, 0, delegated_inode);
 		if (!error)
 			error = dir->i_op->link(old_dentry, dir, new_dentry);
 	}
@@ -6011,21 +6011,24 @@ int vfs_rename(struct renamedata *rd)
 		    old_dir->i_nlink >= max_links)
 			goto out;
 	}
-	error = try_break_deleg(old_dir, delegated_inode);
+	error = try_break_deleg(old_dir,
+				old_dir == new_dir ? LEASE_BREAK_DIR_RENAME :
+						     LEASE_BREAK_DIR_DELETE,
+				delegated_inode);
 	if (error)
 		goto out;
 	if (new_dir != old_dir) {
-		error = try_break_deleg(new_dir, delegated_inode);
+		error = try_break_deleg(new_dir, LEASE_BREAK_DIR_CREATE, delegated_inode);
 		if (error)
 			goto out;
 	}
 	if (!is_dir) {
-		error = try_break_deleg(source, delegated_inode);
+		error = try_break_deleg(source, 0, delegated_inode);
 		if (error)
 			goto out;
 	}
 	if (target && !new_is_dir) {
-		error = try_break_deleg(target, delegated_inode);
+		error = try_break_deleg(target, 0, delegated_inode);
 		if (error)
 			goto out;
 	}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 12591c95c925..b4bfe4ddf64e 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -1126,7 +1126,7 @@ int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (error)
 		goto out_inode_unlock;
 
-	error = try_break_deleg(inode, &delegated_inode);
+	error = try_break_deleg(inode, 0, &delegated_inode);
 	if (error)
 		goto out_inode_unlock;
 
@@ -1234,7 +1234,7 @@ int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (error)
 		goto out_inode_unlock;
 
-	error = try_break_deleg(inode, &delegated_inode);
+	error = try_break_deleg(inode, 0, &delegated_inode);
 	if (error)
 		goto out_inode_unlock;
 
diff --git a/fs/xattr.c b/fs/xattr.c
index 3e49e612e1ba..6b67a6e76eeb 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -288,7 +288,7 @@ __vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (error)
 		goto out;
 
-	error = try_break_deleg(inode, delegated_inode);
+	error = try_break_deleg(inode, 0, delegated_inode);
 	if (error)
 		goto out;
 
@@ -546,7 +546,7 @@ __vfs_removexattr_locked(struct mnt_idmap *idmap,
 	if (error)
 		goto out;
 
-	error = try_break_deleg(inode, delegated_inode);
+	error = try_break_deleg(inode, 0, delegated_inode);
 	if (error)
 		goto out;
 
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index 5f0a2fb31450..5a19cdb047da 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -4,19 +4,22 @@
 
 #include <linux/fs.h>
 
-#define FL_POSIX	1
-#define FL_FLOCK	2
-#define FL_DELEG	4	/* NFSv4 delegation */
-#define FL_ACCESS	8	/* not trying to lock, just looking */
-#define FL_EXISTS	16	/* when unlocking, test for existence */
-#define FL_LEASE	32	/* lease held on this file */
-#define FL_CLOSE	64	/* unlock on close */
-#define FL_SLEEP	128	/* A blocking lock */
-#define FL_DOWNGRADE_PENDING	256 /* Lease is being downgraded */
-#define FL_UNLOCK_PENDING	512 /* Lease is being broken */
-#define FL_OFDLCK	1024	/* lock is "owned" by struct file */
-#define FL_LAYOUT	2048	/* outstanding pNFS layout */
-#define FL_RECLAIM	4096	/* reclaiming from a reboot server */
+#define FL_POSIX		BIT(0)	/* POSIX lock */
+#define FL_FLOCK		BIT(1)	/* BSD lock */
+#define FL_LEASE		BIT(2)	/* file lease */
+#define FL_DELEG		BIT(3)	/* NFSv4 delegation */
+#define FL_LAYOUT		BIT(4)	/* outstanding pNFS layout */
+#define FL_ACCESS		BIT(5)	/* not trying to lock, just looking */
+#define FL_EXISTS		BIT(6)	/* when unlocking, test for existence */
+#define FL_CLOSE		BIT(7)	/* unlock on close */
+#define FL_SLEEP		BIT(8)	/* A blocking lock */
+#define FL_DOWNGRADE_PENDING	BIT(9)	/* Lease is being downgraded */
+#define FL_UNLOCK_PENDING	BIT(10) /* Lease is being broken */
+#define FL_OFDLCK		BIT(11) /* POSIX lock "owned" by struct file */
+#define FL_RECLAIM		BIT(12) /* reclaiming from a reboot server */
+#define FL_IGN_DIR_CREATE	BIT(13) /* ignore DIR_CREATE events */
+#define FL_IGN_DIR_DELETE	BIT(14) /* ignore DIR_DELETE events */
+#define FL_IGN_DIR_RENAME	BIT(15) /* ignore DIR_RENAME events */
 
 #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
 
@@ -222,6 +225,10 @@ struct file_lease *locks_alloc_lease(void);
 #define LEASE_BREAK_LAYOUT		BIT(2)	// break layouts only
 #define LEASE_BREAK_NONBLOCK		BIT(3)	// non-blocking break
 #define LEASE_BREAK_OPEN_RDONLY		BIT(4)	// readonly open event
+#define LEASE_BREAK_DIR_CREATE		BIT(6)  // dir deleg create event
+#define LEASE_BREAK_DIR_DELETE		BIT(7)  // dir deleg delete event
+#define LEASE_BREAK_DIR_RENAME		BIT(8)  // dir deleg rename event
+
 
 int __break_lease(struct inode *inode, unsigned int flags);
 void lease_get_mtime(struct inode *, struct timespec64 *time);
@@ -516,12 +523,26 @@ static inline bool is_delegated(struct delegated_inode *di)
 	return di->di_inode;
 }
 
-static inline int try_break_deleg(struct inode *inode,
+/**
+ * try_break_deleg - do a non-blocking delegation break
+ * @inode: inode that should have its delegations broken
+ * @flags: extra LEASE_BREAK_* flags to pass to break_deleg()
+ * @di: returns pointer to delegated inode (may be NULL)
+ *
+ * Break delegations in a non-blocking fashion. If there are
+ * outstanding delegations and @di is set, then an extra reference
+ * will be taken on @inode and @di->di_inode will be populated so
+ * that it may be waited upon.
+ *
+ * Returns 0 if there is no need to wait or an error. If -EWOULDBLOCK
+ * is returned, then @di will be populated (if non-NULL).
+ */
+static inline int try_break_deleg(struct inode *inode, unsigned int flags,
 				  struct delegated_inode *di)
 {
 	int ret;
 
-	ret = break_deleg(inode, LEASE_BREAK_NONBLOCK);
+	ret = break_deleg(inode, flags | LEASE_BREAK_NONBLOCK);
 	if (ret == -EWOULDBLOCK && di) {
 		di->di_inode = inode;
 		ihold(inode);
@@ -574,7 +595,7 @@ static inline int break_deleg(struct inode *inode, unsigned int flags)
 	return 0;
 }
 
-static inline int try_break_deleg(struct inode *inode,
+static inline int try_break_deleg(struct inode *inode, unsigned int flags,
 				  struct delegated_inode *delegated_inode)
 {
 	return 0;
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index 370016c38a5b..ef4bb0afb86a 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -28,7 +28,10 @@
 		{ FL_DOWNGRADE_PENDING,	"FL_DOWNGRADE_PENDING" },	\
 		{ FL_UNLOCK_PENDING,	"FL_UNLOCK_PENDING" },		\
 		{ FL_OFDLCK,		"FL_OFDLCK" },			\
-		{ FL_RECLAIM,		"FL_RECLAIM"})
+		{ FL_RECLAIM,		"FL_RECLAIM" },			\
+		{ FL_IGN_DIR_CREATE,	"FL_IGN_DIR_CREATE" },		\
+		{ FL_IGN_DIR_DELETE,	"FL_IGN_DIR_DELETE" },		\
+		{ FL_IGN_DIR_RENAME,	"FL_IGN_DIR_RENAME" })
 
 #define show_fl_type(val)				\
 	__print_symbolic(val,				\

-- 
2.53.0


^ permalink raw reply related

* [PATCH 02/24] filelock: add a tracepoint to start of break_lease()
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

...mostly to show the LEASE_BREAK_* flags.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c                      |  2 ++
 include/trace/events/filelock.h | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/fs/locks.c b/fs/locks.c
index dafa0752fdce..5af6dca2d46c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1654,6 +1654,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
 	bool want_write = !(flags & LEASE_BREAK_OPEN_RDONLY);
 	int error = 0;
 
+	trace_break_lease(inode, flags);
+
 	if (flags & LEASE_BREAK_LEASE)
 		type = FL_LEASE;
 	else if (flags & LEASE_BREAK_DELEG)
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index ef4bb0afb86a..fff0ee2d452d 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -120,6 +120,39 @@ DEFINE_EVENT(filelock_lock, flock_lock_inode,
 		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
 		TP_ARGS(inode, fl, ret));
 
+#define show_lease_break_flags(val)					\
+	__print_flags(val, "|",						\
+		{ LEASE_BREAK_LEASE,		"LEASE" },		\
+		{ LEASE_BREAK_DELEG,		"DELEG" },		\
+		{ LEASE_BREAK_LAYOUT,		"LAYOUT" },		\
+		{ LEASE_BREAK_NONBLOCK,		"NONBLOCK" },		\
+		{ LEASE_BREAK_OPEN_RDONLY,	"OPEN_RDONLY" },	\
+		{ LEASE_BREAK_DIR_CREATE,	"DIR_CREATE" },		\
+		{ LEASE_BREAK_DIR_DELETE,	"DIR_DELETE" },		\
+		{ LEASE_BREAK_DIR_RENAME,	"DIR_RENAME" })
+
+TRACE_EVENT(break_lease,
+	TP_PROTO(struct inode *inode, unsigned int flags),
+
+	TP_ARGS(inode, flags),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, i_ino)
+		__field(dev_t, s_dev)
+		__field(unsigned int, flags)
+	),
+
+	TP_fast_assign(
+		__entry->s_dev = inode->i_sb->s_dev;
+		__entry->i_ino = inode->i_ino;
+		__entry->flags = flags;
+	),
+
+	TP_printk("dev=0x%x:0x%x ino=0x%lx flags=%s",
+		  MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		  __entry->i_ino, show_lease_break_flags(__entry->flags))
+);
+
 DECLARE_EVENT_CLASS(filelock_lease,
 	TP_PROTO(struct inode *inode, struct file_lease *fl),
 

-- 
2.53.0


^ permalink raw reply related

* [PATCH 03/24] filelock: add an inode_lease_ignore_mask helper
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

Add a new routine that returns a mask of all dir change events that are
currently ignored by any leases. nfsd will use this to determine how to
configure the fsnotify_mark mask.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c               | 32 ++++++++++++++++++++++++++++++++
 include/linux/filelock.h |  1 +
 2 files changed, 33 insertions(+)

diff --git a/fs/locks.c b/fs/locks.c
index 5af6dca2d46c..04980b065734 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1597,6 +1597,38 @@ any_leases_conflict(struct inode *inode, struct file_lease *breaker)
 	return false;
 }
 
+#define IGNORE_MASK	(FL_IGN_DIR_CREATE | FL_IGN_DIR_DELETE | FL_IGN_DIR_RENAME)
+
+/**
+ * inode_lease_ignore_mask - return union of all ignored inode events for this inode
+ * @inode: inode of which to get ignore mask
+ *
+ * Walk the list of leases, and return the result of all of
+ * their FL_IGN_DIR_* bits or'ed together.
+ */
+u32
+inode_lease_ignore_mask(struct inode *inode)
+{
+	struct file_lock_context *ctx;
+	struct file_lock_core *flc;
+	u32 mask = 0;
+
+	ctx = locks_inode_context(inode);
+	if (!ctx)
+		return 0;
+
+	spin_lock(&ctx->flc_lock);
+	list_for_each_entry(flc, &ctx->flc_lease, flc_list) {
+		mask |= flc->flc_flags & IGNORE_MASK;
+		/* If we already have everything, we can stop */
+		if (mask == IGNORE_MASK)
+			break;
+	}
+	spin_unlock(&ctx->flc_lock);
+	return mask;
+}
+EXPORT_SYMBOL_GPL(inode_lease_ignore_mask);
+
 static bool
 ignore_dir_deleg_break(struct file_lease *fl, unsigned int flags)
 {
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index 5a19cdb047da..416483b136f1 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -236,6 +236,7 @@ int generic_setlease(struct file *, int, struct file_lease **, void **priv);
 int kernel_setlease(struct file *, int, struct file_lease **, void **);
 int vfs_setlease(struct file *, int, struct file_lease **, void **);
 int lease_modify(struct file_lease *, int, struct list_head *);
+u32 inode_lease_ignore_mask(struct inode *inode);
 
 struct notifier_block;
 int lease_register_notifier(struct notifier_block *);

-- 
2.53.0


^ permalink raw reply related

* [PATCH 04/24] nfsd: add protocol support for CB_NOTIFY
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

Add the necessary bits to nfs4_1.x and remove the duplicate definitions
from nfs4.h and the uapi nfs4 header. Regenerate the xdr files.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 Documentation/sunrpc/xdr/nfs4_1.x    | 250 ++++++++++++++-
 fs/nfsd/nfs4xdr_gen.c                | 590 ++++++++++++++++++++++++++++++++++-
 fs/nfsd/nfs4xdr_gen.h                |  20 +-
 fs/nfsd/trace.h                      |   1 +
 include/linux/nfs4.h                 | 127 --------
 include/linux/sunrpc/xdrgen/nfs4_1.h | 280 ++++++++++++++++-
 include/uapi/linux/nfs4.h            |   2 -
 7 files changed, 1129 insertions(+), 141 deletions(-)

diff --git a/Documentation/sunrpc/xdr/nfs4_1.x b/Documentation/sunrpc/xdr/nfs4_1.x
index 5b45547b2ebc..632f5b579c39 100644
--- a/Documentation/sunrpc/xdr/nfs4_1.x
+++ b/Documentation/sunrpc/xdr/nfs4_1.x
@@ -45,19 +45,165 @@ pragma header nfs4;
 /*
  * Basic typedefs for RFC 1832 data type definitions
  */
-typedef hyper		int64_t;
-typedef unsigned int	uint32_t;
+typedef int                  int32_t;
+typedef unsigned int         uint32_t;
+typedef hyper                int64_t;
+typedef unsigned hyper       uint64_t;
+
+const NFS4_VERIFIER_SIZE        = 8;
+const NFS4_FHSIZE               = 128;
+
+enum nfsstat4 {
+ NFS4_OK                = 0,    /* everything is okay      */
+ NFS4ERR_PERM           = 1,    /* caller not privileged   */
+ NFS4ERR_NOENT          = 2,    /* no such file/directory  */
+ NFS4ERR_IO             = 5,    /* hard I/O error          */
+ NFS4ERR_NXIO           = 6,    /* no such device          */
+ NFS4ERR_ACCESS         = 13,   /* access denied           */
+ NFS4ERR_EXIST          = 17,   /* file already exists     */
+ NFS4ERR_XDEV           = 18,   /* different filesystems   */
+
+ /*
+  * Please do not allocate value 19; it was used in NFSv3
+  * and we do not want a value in NFSv3 to have a different
+  * meaning in NFSv4.x.
+  */
+
+ NFS4ERR_NOTDIR         = 20,   /* should be a directory   */
+ NFS4ERR_ISDIR          = 21,   /* should not be directory */
+ NFS4ERR_INVAL          = 22,   /* invalid argument        */
+ NFS4ERR_FBIG           = 27,   /* file exceeds server max */
+ NFS4ERR_NOSPC          = 28,   /* no space on filesystem  */
+ NFS4ERR_ROFS           = 30,   /* read-only filesystem    */
+ NFS4ERR_MLINK          = 31,   /* too many hard links     */
+ NFS4ERR_NAMETOOLONG    = 63,   /* name exceeds server max */
+ NFS4ERR_NOTEMPTY       = 66,   /* directory not empty     */
+ NFS4ERR_DQUOT          = 69,   /* hard quota limit reached*/
+ NFS4ERR_STALE          = 70,   /* file no longer exists   */
+ NFS4ERR_BADHANDLE      = 10001,/* Illegal filehandle      */
+ NFS4ERR_BAD_COOKIE     = 10003,/* READDIR cookie is stale */
+ NFS4ERR_NOTSUPP        = 10004,/* operation not supported */
+ NFS4ERR_TOOSMALL       = 10005,/* response limit exceeded */
+ NFS4ERR_SERVERFAULT    = 10006,/* undefined server error  */
+ NFS4ERR_BADTYPE        = 10007,/* type invalid for CREATE */
+ NFS4ERR_DELAY          = 10008,/* file "busy" - retry     */
+ NFS4ERR_SAME           = 10009,/* nverify says attrs same */
+ NFS4ERR_DENIED         = 10010,/* lock unavailable        */
+ NFS4ERR_EXPIRED        = 10011,/* lock lease expired      */
+ NFS4ERR_LOCKED         = 10012,/* I/O failed due to lock  */
+ NFS4ERR_GRACE          = 10013,/* in grace period         */
+ NFS4ERR_FHEXPIRED      = 10014,/* filehandle expired      */
+ NFS4ERR_SHARE_DENIED   = 10015,/* share reserve denied    */
+ NFS4ERR_WRONGSEC       = 10016,/* wrong security flavor   */
+ NFS4ERR_CLID_INUSE     = 10017,/* clientid in use         */
+
+ /* NFS4ERR_RESOURCE is not a valid error in NFSv4.1 */
+ NFS4ERR_RESOURCE       = 10018,/* resource exhaustion     */
+
+ NFS4ERR_MOVED          = 10019,/* filesystem relocated    */
+ NFS4ERR_NOFILEHANDLE   = 10020,/* current FH is not set   */
+ NFS4ERR_MINOR_VERS_MISMATCH= 10021,/* minor vers not supp */
+ NFS4ERR_STALE_CLIENTID = 10022,/* server has rebooted     */
+ NFS4ERR_STALE_STATEID  = 10023,/* server has rebooted     */
+ NFS4ERR_OLD_STATEID    = 10024,/* state is out of sync    */
+ NFS4ERR_BAD_STATEID    = 10025,/* incorrect stateid       */
+ NFS4ERR_BAD_SEQID      = 10026,/* request is out of seq.  */
+ NFS4ERR_NOT_SAME       = 10027,/* verify - attrs not same */
+ NFS4ERR_LOCK_RANGE     = 10028,/* overlapping lock range  */
+ NFS4ERR_SYMLINK        = 10029,/* should be file/directory*/
+ NFS4ERR_RESTOREFH      = 10030,/* no saved filehandle     */
+ NFS4ERR_LEASE_MOVED    = 10031,/* some filesystem moved   */
+ NFS4ERR_ATTRNOTSUPP    = 10032,/* recommended attr not sup*/
+ NFS4ERR_NO_GRACE       = 10033,/* reclaim outside of grace*/
+ NFS4ERR_RECLAIM_BAD    = 10034,/* reclaim error at server */
+ NFS4ERR_RECLAIM_CONFLICT= 10035,/* conflict on reclaim    */
+ NFS4ERR_BADXDR         = 10036,/* XDR decode failed       */
+ NFS4ERR_LOCKS_HELD     = 10037,/* file locks held at CLOSE*/
+ NFS4ERR_OPENMODE       = 10038,/* conflict in OPEN and I/O*/
+ NFS4ERR_BADOWNER       = 10039,/* owner translation bad   */
+ NFS4ERR_BADCHAR        = 10040,/* utf-8 char not supported*/
+ NFS4ERR_BADNAME        = 10041,/* name not supported      */
+ NFS4ERR_BAD_RANGE      = 10042,/* lock range not supported*/
+ NFS4ERR_LOCK_NOTSUPP   = 10043,/* no atomic up/downgrade  */
+ NFS4ERR_OP_ILLEGAL     = 10044,/* undefined operation     */
+ NFS4ERR_DEADLOCK       = 10045,/* file locking deadlock   */
+ NFS4ERR_FILE_OPEN      = 10046,/* open file blocks op.    */
+ NFS4ERR_ADMIN_REVOKED  = 10047,/* lockowner state revoked */
+ NFS4ERR_CB_PATH_DOWN   = 10048,/* callback path down      */
+
+ /* NFSv4.1 errors start here. */
+
+ NFS4ERR_BADIOMODE      = 10049,
+ NFS4ERR_BADLAYOUT      = 10050,
+ NFS4ERR_BAD_SESSION_DIGEST = 10051,
+ NFS4ERR_BADSESSION     = 10052,
+ NFS4ERR_BADSLOT        = 10053,
+ NFS4ERR_COMPLETE_ALREADY = 10054,
+ NFS4ERR_CONN_NOT_BOUND_TO_SESSION = 10055,
+ NFS4ERR_DELEG_ALREADY_WANTED = 10056,
+ NFS4ERR_BACK_CHAN_BUSY = 10057,/*backchan reqs outstanding*/
+ NFS4ERR_LAYOUTTRYLATER = 10058,
+ NFS4ERR_LAYOUTUNAVAILABLE = 10059,
+ NFS4ERR_NOMATCHING_LAYOUT = 10060,
+ NFS4ERR_RECALLCONFLICT = 10061,
+ NFS4ERR_UNKNOWN_LAYOUTTYPE = 10062,
+ NFS4ERR_SEQ_MISORDERED = 10063,/* unexpected seq.ID in req*/
+ NFS4ERR_SEQUENCE_POS   = 10064,/* [CB_]SEQ. op not 1st op */
+ NFS4ERR_REQ_TOO_BIG    = 10065,/* request too big         */
+ NFS4ERR_REP_TOO_BIG    = 10066,/* reply too big           */
+ NFS4ERR_REP_TOO_BIG_TO_CACHE =10067,/* rep. not all cached*/
+ NFS4ERR_RETRY_UNCACHED_REP =10068,/* retry & rep. uncached*/
+ NFS4ERR_UNSAFE_COMPOUND =10069,/* retry/recovery too hard */
+ NFS4ERR_TOO_MANY_OPS   = 10070,/*too many ops in [CB_]COMP*/
+ NFS4ERR_OP_NOT_IN_SESSION =10071,/* op needs [CB_]SEQ. op */
+ NFS4ERR_HASH_ALG_UNSUPP = 10072, /* hash alg. not supp.   */
+                                /* Error 10073 is unused.  */
+ NFS4ERR_CLIENTID_BUSY  = 10074,/* clientid has state      */
+ NFS4ERR_PNFS_IO_HOLE   = 10075,/* IO to _SPARSE file hole */
+ NFS4ERR_SEQ_FALSE_RETRY= 10076,/* Retry != original req.  */
+ NFS4ERR_BAD_HIGH_SLOT  = 10077,/* req has bad highest_slot*/
+ NFS4ERR_DEADSESSION    = 10078,/*new req sent to dead sess*/
+ NFS4ERR_ENCR_ALG_UNSUPP= 10079,/* encr alg. not supp.     */
+ NFS4ERR_PNFS_NO_LAYOUT = 10080,/* I/O without a layout    */
+ NFS4ERR_NOT_ONLY_OP    = 10081,/* addl ops not allowed    */
+ NFS4ERR_WRONG_CRED     = 10082,/* op done by wrong cred   */
+ NFS4ERR_WRONG_TYPE     = 10083,/* op on wrong type object */
+ NFS4ERR_DIRDELEG_UNAVAIL=10084,/* delegation not avail.   */
+ NFS4ERR_REJECT_DELEG   = 10085,/* cb rejected delegation  */
+ NFS4ERR_RETURNCONFLICT = 10086,/* layout get before return*/
+ NFS4ERR_DELEG_REVOKED  = 10087, /* deleg./layout revoked   */
+ NFS4ERR_PARTNER_NOTSUPP = 10088,
+ NFS4ERR_PARTNER_NO_AUTH = 10089,
+ NFS4ERR_UNION_NOTSUPP = 10090,
+ NFS4ERR_OFFLOAD_DENIED = 10091,
+ NFS4ERR_WRONG_LFS = 10092,
+ NFS4ERR_BADLABEL = 10093,
+ NFS4ERR_OFFLOAD_NO_REQS = 10094,
+ NFS4ERR_NOXATTR = 10095,
+ NFS4ERR_XATTR2BIG = 10096,
+
+ /* always set this to one more than the last one in the enum */
+ NFS4ERR_FIRST_FREE = 10097
+};
 
 /*
  * Basic data types
  */
+typedef opaque		attrlist4<>;
 typedef uint32_t	bitmap4<>;
+typedef opaque		verifier4[NFS4_VERIFIER_SIZE];
+typedef uint64_t        nfs_cookie4;
+typedef opaque		nfs_fh4<NFS4_FHSIZE>;
 
 typedef opaque		utf8string<>;
 typedef utf8string	utf8str_cis;
 typedef utf8string	utf8str_cs;
 typedef utf8string	utf8str_mixed;
 
+typedef utf8str_cs      component4;
+typedef utf8str_cs      linktext4;
+typedef component4      pathname4<>;
+
 /*
  * Timeval
  */
@@ -66,6 +212,21 @@ struct nfstime4 {
 	uint32_t	nseconds;
 };
 
+/*
+ * File attribute container
+ */
+struct fattr4 {
+        bitmap4         attrmask;
+        attrlist4       attr_vals;
+};
+
+/*
+ * Stateid
+ */
+struct stateid4 {
+        uint32_t        seqid;
+        opaque          other[12];
+};
 
 /*
  * The following content was extracted from draft-ietf-nfsv4-delstid
@@ -245,3 +406,88 @@ const FATTR4_ACL_TRUEFORM	= 89;
 const FATTR4_ACL_TRUEFORM_SCOPE	= 90;
 const FATTR4_POSIX_DEFAULT_ACL	= 91;
 const FATTR4_POSIX_ACCESS_ACL	= 92;
+
+/*
+ * Directory notification types.
+ */
+enum notify_type4 {
+        NOTIFY4_CHANGE_CHILD_ATTRS = 0,
+        NOTIFY4_CHANGE_DIR_ATTRS = 1,
+        NOTIFY4_REMOVE_ENTRY = 2,
+        NOTIFY4_ADD_ENTRY = 3,
+        NOTIFY4_RENAME_ENTRY = 4,
+        NOTIFY4_CHANGE_COOKIE_VERIFIER = 5
+};
+
+/* Changed entry information.  */
+struct notify_entry4 {
+        component4      ne_file;
+        fattr4          ne_attrs;
+};
+
+/* Previous entry information */
+struct prev_entry4 {
+        notify_entry4   pe_prev_entry;
+        /* what READDIR returned for this entry */
+        nfs_cookie4     pe_prev_entry_cookie;
+};
+
+struct notify_remove4 {
+        notify_entry4   nrm_old_entry;
+        nfs_cookie4     nrm_old_entry_cookie;
+};
+pragma public notify_remove4;
+
+struct notify_add4 {
+        /*
+         * Information on object
+         * possibly renamed over.
+         */
+        notify_remove4      nad_old_entry<1>;
+        notify_entry4       nad_new_entry;
+        /* what READDIR would have returned for this entry */
+        nfs_cookie4         nad_new_entry_cookie<1>;
+        prev_entry4         nad_prev_entry<1>;
+        bool                nad_last_entry;
+};
+pragma public notify_add4;
+
+struct notify_attr4 {
+        notify_entry4   na_changed_entry;
+};
+pragma public notify_attr4;
+
+struct notify_rename4 {
+        notify_remove4  nrn_old_entry;
+        notify_add4     nrn_new_entry;
+};
+pragma public notify_rename4;
+
+struct notify_verifier4 {
+        verifier4       nv_old_cookieverf;
+        verifier4       nv_new_cookieverf;
+};
+
+/*
+ * Objects of type notify_<>4 and
+ * notify_device_<>4 are encoded in this.
+ */
+typedef opaque notifylist4<>;
+
+struct notify4 {
+        /* composed from notify_type4 or notify_deviceid_type4 */
+        bitmap4         notify_mask;
+        notifylist4     notify_vals;
+};
+
+struct CB_NOTIFY4args {
+        stateid4    cna_stateid;
+        nfs_fh4     cna_fh;
+        notify4     cna_changes<>;
+};
+pragma public CB_NOTIFY4args;
+
+struct CB_NOTIFY4res {
+        nfsstat4    cnr_status;
+};
+pragma public CB_NOTIFY4res;
diff --git a/fs/nfsd/nfs4xdr_gen.c b/fs/nfsd/nfs4xdr_gen.c
index 824497051b87..5e656d6bbb8e 100644
--- a/fs/nfsd/nfs4xdr_gen.c
+++ b/fs/nfsd/nfs4xdr_gen.c
@@ -1,16 +1,16 @@
 // SPDX-License-Identifier: GPL-2.0
 // Generated by xdrgen. Manual edits will be lost.
 // XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x
-// XDR specification modification time: Thu Jan  8 23:12:07 2026
+// XDR specification modification time: Wed Mar 25 11:39:22 2026
 
 #include <linux/sunrpc/svc.h>
 
 #include "nfs4xdr_gen.h"
 
 static bool __maybe_unused
-xdrgen_decode_int64_t(struct xdr_stream *xdr, int64_t *ptr)
+xdrgen_decode_int32_t(struct xdr_stream *xdr, int32_t *ptr)
 {
-	return xdrgen_decode_hyper(xdr, ptr);
+	return xdrgen_decode_int(xdr, ptr);
 }
 
 static bool __maybe_unused
@@ -19,6 +19,155 @@ xdrgen_decode_uint32_t(struct xdr_stream *xdr, uint32_t *ptr)
 	return xdrgen_decode_unsigned_int(xdr, ptr);
 }
 
+static bool __maybe_unused
+xdrgen_decode_int64_t(struct xdr_stream *xdr, int64_t *ptr)
+{
+	return xdrgen_decode_hyper(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_uint64_t(struct xdr_stream *xdr, uint64_t *ptr)
+{
+	return xdrgen_decode_unsigned_hyper(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_nfsstat4(struct xdr_stream *xdr, nfsstat4 *ptr)
+{
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	/* Compiler may optimize to a range check for dense enums */
+	switch (val) {
+	case NFS4_OK:
+	case NFS4ERR_PERM:
+	case NFS4ERR_NOENT:
+	case NFS4ERR_IO:
+	case NFS4ERR_NXIO:
+	case NFS4ERR_ACCESS:
+	case NFS4ERR_EXIST:
+	case NFS4ERR_XDEV:
+	case NFS4ERR_NOTDIR:
+	case NFS4ERR_ISDIR:
+	case NFS4ERR_INVAL:
+	case NFS4ERR_FBIG:
+	case NFS4ERR_NOSPC:
+	case NFS4ERR_ROFS:
+	case NFS4ERR_MLINK:
+	case NFS4ERR_NAMETOOLONG:
+	case NFS4ERR_NOTEMPTY:
+	case NFS4ERR_DQUOT:
+	case NFS4ERR_STALE:
+	case NFS4ERR_BADHANDLE:
+	case NFS4ERR_BAD_COOKIE:
+	case NFS4ERR_NOTSUPP:
+	case NFS4ERR_TOOSMALL:
+	case NFS4ERR_SERVERFAULT:
+	case NFS4ERR_BADTYPE:
+	case NFS4ERR_DELAY:
+	case NFS4ERR_SAME:
+	case NFS4ERR_DENIED:
+	case NFS4ERR_EXPIRED:
+	case NFS4ERR_LOCKED:
+	case NFS4ERR_GRACE:
+	case NFS4ERR_FHEXPIRED:
+	case NFS4ERR_SHARE_DENIED:
+	case NFS4ERR_WRONGSEC:
+	case NFS4ERR_CLID_INUSE:
+	case NFS4ERR_RESOURCE:
+	case NFS4ERR_MOVED:
+	case NFS4ERR_NOFILEHANDLE:
+	case NFS4ERR_MINOR_VERS_MISMATCH:
+	case NFS4ERR_STALE_CLIENTID:
+	case NFS4ERR_STALE_STATEID:
+	case NFS4ERR_OLD_STATEID:
+	case NFS4ERR_BAD_STATEID:
+	case NFS4ERR_BAD_SEQID:
+	case NFS4ERR_NOT_SAME:
+	case NFS4ERR_LOCK_RANGE:
+	case NFS4ERR_SYMLINK:
+	case NFS4ERR_RESTOREFH:
+	case NFS4ERR_LEASE_MOVED:
+	case NFS4ERR_ATTRNOTSUPP:
+	case NFS4ERR_NO_GRACE:
+	case NFS4ERR_RECLAIM_BAD:
+	case NFS4ERR_RECLAIM_CONFLICT:
+	case NFS4ERR_BADXDR:
+	case NFS4ERR_LOCKS_HELD:
+	case NFS4ERR_OPENMODE:
+	case NFS4ERR_BADOWNER:
+	case NFS4ERR_BADCHAR:
+	case NFS4ERR_BADNAME:
+	case NFS4ERR_BAD_RANGE:
+	case NFS4ERR_LOCK_NOTSUPP:
+	case NFS4ERR_OP_ILLEGAL:
+	case NFS4ERR_DEADLOCK:
+	case NFS4ERR_FILE_OPEN:
+	case NFS4ERR_ADMIN_REVOKED:
+	case NFS4ERR_CB_PATH_DOWN:
+	case NFS4ERR_BADIOMODE:
+	case NFS4ERR_BADLAYOUT:
+	case NFS4ERR_BAD_SESSION_DIGEST:
+	case NFS4ERR_BADSESSION:
+	case NFS4ERR_BADSLOT:
+	case NFS4ERR_COMPLETE_ALREADY:
+	case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+	case NFS4ERR_DELEG_ALREADY_WANTED:
+	case NFS4ERR_BACK_CHAN_BUSY:
+	case NFS4ERR_LAYOUTTRYLATER:
+	case NFS4ERR_LAYOUTUNAVAILABLE:
+	case NFS4ERR_NOMATCHING_LAYOUT:
+	case NFS4ERR_RECALLCONFLICT:
+	case NFS4ERR_UNKNOWN_LAYOUTTYPE:
+	case NFS4ERR_SEQ_MISORDERED:
+	case NFS4ERR_SEQUENCE_POS:
+	case NFS4ERR_REQ_TOO_BIG:
+	case NFS4ERR_REP_TOO_BIG:
+	case NFS4ERR_REP_TOO_BIG_TO_CACHE:
+	case NFS4ERR_RETRY_UNCACHED_REP:
+	case NFS4ERR_UNSAFE_COMPOUND:
+	case NFS4ERR_TOO_MANY_OPS:
+	case NFS4ERR_OP_NOT_IN_SESSION:
+	case NFS4ERR_HASH_ALG_UNSUPP:
+	case NFS4ERR_CLIENTID_BUSY:
+	case NFS4ERR_PNFS_IO_HOLE:
+	case NFS4ERR_SEQ_FALSE_RETRY:
+	case NFS4ERR_BAD_HIGH_SLOT:
+	case NFS4ERR_DEADSESSION:
+	case NFS4ERR_ENCR_ALG_UNSUPP:
+	case NFS4ERR_PNFS_NO_LAYOUT:
+	case NFS4ERR_NOT_ONLY_OP:
+	case NFS4ERR_WRONG_CRED:
+	case NFS4ERR_WRONG_TYPE:
+	case NFS4ERR_DIRDELEG_UNAVAIL:
+	case NFS4ERR_REJECT_DELEG:
+	case NFS4ERR_RETURNCONFLICT:
+	case NFS4ERR_DELEG_REVOKED:
+	case NFS4ERR_PARTNER_NOTSUPP:
+	case NFS4ERR_PARTNER_NO_AUTH:
+	case NFS4ERR_UNION_NOTSUPP:
+	case NFS4ERR_OFFLOAD_DENIED:
+	case NFS4ERR_WRONG_LFS:
+	case NFS4ERR_BADLABEL:
+	case NFS4ERR_OFFLOAD_NO_REQS:
+	case NFS4ERR_NOXATTR:
+	case NFS4ERR_XATTR2BIG:
+	case NFS4ERR_FIRST_FREE:
+		break;
+	default:
+		return false;
+	}
+	*ptr = val;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_attrlist4(struct xdr_stream *xdr, attrlist4 *ptr)
+{
+	return xdrgen_decode_opaque(xdr, ptr, 0);
+}
+
 static bool __maybe_unused
 xdrgen_decode_bitmap4(struct xdr_stream *xdr, bitmap4 *ptr)
 {
@@ -30,6 +179,24 @@ xdrgen_decode_bitmap4(struct xdr_stream *xdr, bitmap4 *ptr)
 	return true;
 }
 
+static bool __maybe_unused
+xdrgen_decode_verifier4(struct xdr_stream *xdr, verifier4 *ptr)
+{
+	return xdr_stream_decode_opaque_fixed(xdr, ptr, NFS4_VERIFIER_SIZE) == 0;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nfs_cookie4(struct xdr_stream *xdr, nfs_cookie4 *ptr)
+{
+	return xdrgen_decode_uint64_t(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_nfs_fh4(struct xdr_stream *xdr, nfs_fh4 *ptr)
+{
+	return xdrgen_decode_opaque(xdr, ptr, NFS4_FHSIZE);
+}
+
 static bool __maybe_unused
 xdrgen_decode_utf8string(struct xdr_stream *xdr, utf8string *ptr)
 {
@@ -54,6 +221,29 @@ xdrgen_decode_utf8str_mixed(struct xdr_stream *xdr, utf8str_mixed *ptr)
 	return xdrgen_decode_utf8string(xdr, ptr);
 }
 
+static bool __maybe_unused
+xdrgen_decode_component4(struct xdr_stream *xdr, component4 *ptr)
+{
+	return xdrgen_decode_utf8str_cs(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_linktext4(struct xdr_stream *xdr, linktext4 *ptr)
+{
+	return xdrgen_decode_utf8str_cs(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_pathname4(struct xdr_stream *xdr, pathname4 *ptr)
+{
+	if (xdr_stream_decode_u32(xdr, &ptr->count) < 0)
+		return false;
+	for (u32 i = 0; i < ptr->count; i++)
+		if (!xdrgen_decode_component4(xdr, &ptr->element[i]))
+			return false;
+	return true;
+}
+
 static bool __maybe_unused
 xdrgen_decode_nfstime4(struct xdr_stream *xdr, struct nfstime4 *ptr)
 {
@@ -64,6 +254,26 @@ xdrgen_decode_nfstime4(struct xdr_stream *xdr, struct nfstime4 *ptr)
 	return true;
 }
 
+static bool __maybe_unused
+xdrgen_decode_fattr4(struct xdr_stream *xdr, struct fattr4 *ptr)
+{
+	if (!xdrgen_decode_bitmap4(xdr, &ptr->attrmask))
+		return false;
+	if (!xdrgen_decode_attrlist4(xdr, &ptr->attr_vals))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_stateid4(struct xdr_stream *xdr, struct stateid4 *ptr)
+{
+	if (!xdrgen_decode_uint32_t(xdr, &ptr->seqid))
+		return false;
+	if (xdr_stream_decode_opaque_fixed(xdr, ptr->other, 12) < 0)
+		return false;
+	return true;
+}
+
 static bool __maybe_unused
 xdrgen_decode_fattr4_offline(struct xdr_stream *xdr, fattr4_offline *ptr)
 {
@@ -366,9 +576,160 @@ xdrgen_decode_fattr4_posix_access_acl(struct xdr_stream *xdr, fattr4_posix_acces
  */
 
 static bool __maybe_unused
-xdrgen_encode_int64_t(struct xdr_stream *xdr, const int64_t value)
+xdrgen_decode_notify_type4(struct xdr_stream *xdr, notify_type4 *ptr)
 {
-	return xdrgen_encode_hyper(xdr, value);
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	/* Compiler may optimize to a range check for dense enums */
+	switch (val) {
+	case NOTIFY4_CHANGE_CHILD_ATTRS:
+	case NOTIFY4_CHANGE_DIR_ATTRS:
+	case NOTIFY4_REMOVE_ENTRY:
+	case NOTIFY4_ADD_ENTRY:
+	case NOTIFY4_RENAME_ENTRY:
+	case NOTIFY4_CHANGE_COOKIE_VERIFIER:
+		break;
+	default:
+		return false;
+	}
+	*ptr = val;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_notify_entry4(struct xdr_stream *xdr, struct notify_entry4 *ptr)
+{
+	if (!xdrgen_decode_component4(xdr, &ptr->ne_file))
+		return false;
+	if (!xdrgen_decode_fattr4(xdr, &ptr->ne_attrs))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_prev_entry4(struct xdr_stream *xdr, struct prev_entry4 *ptr)
+{
+	if (!xdrgen_decode_notify_entry4(xdr, &ptr->pe_prev_entry))
+		return false;
+	if (!xdrgen_decode_nfs_cookie4(xdr, &ptr->pe_prev_entry_cookie))
+		return false;
+	return true;
+}
+
+bool
+xdrgen_decode_notify_remove4(struct xdr_stream *xdr, struct notify_remove4 *ptr)
+{
+	if (!xdrgen_decode_notify_entry4(xdr, &ptr->nrm_old_entry))
+		return false;
+	if (!xdrgen_decode_nfs_cookie4(xdr, &ptr->nrm_old_entry_cookie))
+		return false;
+	return true;
+}
+
+bool
+xdrgen_decode_notify_add4(struct xdr_stream *xdr, struct notify_add4 *ptr)
+{
+	if (xdr_stream_decode_u32(xdr, &ptr->nad_old_entry.count) < 0)
+		return false;
+	if (ptr->nad_old_entry.count > 1)
+		return false;
+	for (u32 i = 0; i < ptr->nad_old_entry.count; i++)
+		if (!xdrgen_decode_notify_remove4(xdr, &ptr->nad_old_entry.element[i]))
+			return false;
+	if (!xdrgen_decode_notify_entry4(xdr, &ptr->nad_new_entry))
+		return false;
+	if (xdr_stream_decode_u32(xdr, &ptr->nad_new_entry_cookie.count) < 0)
+		return false;
+	if (ptr->nad_new_entry_cookie.count > 1)
+		return false;
+	for (u32 i = 0; i < ptr->nad_new_entry_cookie.count; i++)
+		if (!xdrgen_decode_nfs_cookie4(xdr, &ptr->nad_new_entry_cookie.element[i]))
+			return false;
+	if (xdr_stream_decode_u32(xdr, &ptr->nad_prev_entry.count) < 0)
+		return false;
+	if (ptr->nad_prev_entry.count > 1)
+		return false;
+	for (u32 i = 0; i < ptr->nad_prev_entry.count; i++)
+		if (!xdrgen_decode_prev_entry4(xdr, &ptr->nad_prev_entry.element[i]))
+			return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->nad_last_entry))
+		return false;
+	return true;
+}
+
+bool
+xdrgen_decode_notify_attr4(struct xdr_stream *xdr, struct notify_attr4 *ptr)
+{
+	if (!xdrgen_decode_notify_entry4(xdr, &ptr->na_changed_entry))
+		return false;
+	return true;
+}
+
+bool
+xdrgen_decode_notify_rename4(struct xdr_stream *xdr, struct notify_rename4 *ptr)
+{
+	if (!xdrgen_decode_notify_remove4(xdr, &ptr->nrn_old_entry))
+		return false;
+	if (!xdrgen_decode_notify_add4(xdr, &ptr->nrn_new_entry))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_notify_verifier4(struct xdr_stream *xdr, struct notify_verifier4 *ptr)
+{
+	if (!xdrgen_decode_verifier4(xdr, &ptr->nv_old_cookieverf))
+		return false;
+	if (!xdrgen_decode_verifier4(xdr, &ptr->nv_new_cookieverf))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_notifylist4(struct xdr_stream *xdr, notifylist4 *ptr)
+{
+	return xdrgen_decode_opaque(xdr, ptr, 0);
+}
+
+static bool __maybe_unused
+xdrgen_decode_notify4(struct xdr_stream *xdr, struct notify4 *ptr)
+{
+	if (!xdrgen_decode_bitmap4(xdr, &ptr->notify_mask))
+		return false;
+	if (!xdrgen_decode_notifylist4(xdr, &ptr->notify_vals))
+		return false;
+	return true;
+}
+
+bool
+xdrgen_decode_CB_NOTIFY4args(struct xdr_stream *xdr, struct CB_NOTIFY4args *ptr)
+{
+	if (!xdrgen_decode_stateid4(xdr, &ptr->cna_stateid))
+		return false;
+	if (!xdrgen_decode_nfs_fh4(xdr, &ptr->cna_fh))
+		return false;
+	if (xdr_stream_decode_u32(xdr, &ptr->cna_changes.count) < 0)
+		return false;
+	for (u32 i = 0; i < ptr->cna_changes.count; i++)
+		if (!xdrgen_decode_notify4(xdr, &ptr->cna_changes.element[i]))
+			return false;
+	return true;
+}
+
+bool
+xdrgen_decode_CB_NOTIFY4res(struct xdr_stream *xdr, struct CB_NOTIFY4res *ptr)
+{
+	if (!xdrgen_decode_nfsstat4(xdr, &ptr->cnr_status))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_int32_t(struct xdr_stream *xdr, const int32_t value)
+{
+	return xdrgen_encode_int(xdr, value);
 }
 
 static bool __maybe_unused
@@ -377,6 +738,30 @@ xdrgen_encode_uint32_t(struct xdr_stream *xdr, const uint32_t value)
 	return xdrgen_encode_unsigned_int(xdr, value);
 }
 
+static bool __maybe_unused
+xdrgen_encode_int64_t(struct xdr_stream *xdr, const int64_t value)
+{
+	return xdrgen_encode_hyper(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_uint64_t(struct xdr_stream *xdr, const uint64_t value)
+{
+	return xdrgen_encode_unsigned_hyper(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_nfsstat4(struct xdr_stream *xdr, nfsstat4 value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_attrlist4(struct xdr_stream *xdr, const attrlist4 value)
+{
+	return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
+}
+
 static bool __maybe_unused
 xdrgen_encode_bitmap4(struct xdr_stream *xdr, const bitmap4 value)
 {
@@ -388,6 +773,24 @@ xdrgen_encode_bitmap4(struct xdr_stream *xdr, const bitmap4 value)
 	return true;
 }
 
+static bool __maybe_unused
+xdrgen_encode_verifier4(struct xdr_stream *xdr, const verifier4 value)
+{
+	return xdr_stream_encode_opaque_fixed(xdr, value, NFS4_VERIFIER_SIZE) >= 0;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nfs_cookie4(struct xdr_stream *xdr, const nfs_cookie4 value)
+{
+	return xdrgen_encode_uint64_t(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_nfs_fh4(struct xdr_stream *xdr, const nfs_fh4 value)
+{
+	return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
+}
+
 static bool __maybe_unused
 xdrgen_encode_utf8string(struct xdr_stream *xdr, const utf8string value)
 {
@@ -412,6 +815,29 @@ xdrgen_encode_utf8str_mixed(struct xdr_stream *xdr, const utf8str_mixed value)
 	return xdrgen_encode_utf8string(xdr, value);
 }
 
+static bool __maybe_unused
+xdrgen_encode_component4(struct xdr_stream *xdr, const component4 value)
+{
+	return xdrgen_encode_utf8str_cs(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_linktext4(struct xdr_stream *xdr, const linktext4 value)
+{
+	return xdrgen_encode_utf8str_cs(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_pathname4(struct xdr_stream *xdr, const pathname4 value)
+{
+	if (xdr_stream_encode_u32(xdr, value.count) != XDR_UNIT)
+		return false;
+	for (u32 i = 0; i < value.count; i++)
+		if (!xdrgen_encode_component4(xdr, value.element[i]))
+			return false;
+	return true;
+}
+
 static bool __maybe_unused
 xdrgen_encode_nfstime4(struct xdr_stream *xdr, const struct nfstime4 *value)
 {
@@ -422,6 +848,26 @@ xdrgen_encode_nfstime4(struct xdr_stream *xdr, const struct nfstime4 *value)
 	return true;
 }
 
+static bool __maybe_unused
+xdrgen_encode_fattr4(struct xdr_stream *xdr, const struct fattr4 *value)
+{
+	if (!xdrgen_encode_bitmap4(xdr, value->attrmask))
+		return false;
+	if (!xdrgen_encode_attrlist4(xdr, value->attr_vals))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_stateid4(struct xdr_stream *xdr, const struct stateid4 *value)
+{
+	if (!xdrgen_encode_uint32_t(xdr, value->seqid))
+		return false;
+	if (xdr_stream_encode_opaque_fixed(xdr, value->other, 12) < 0)
+		return false;
+	return true;
+}
+
 static bool __maybe_unused
 xdrgen_encode_fattr4_offline(struct xdr_stream *xdr, const fattr4_offline value)
 {
@@ -567,3 +1013,137 @@ xdrgen_encode_fattr4_posix_access_acl(struct xdr_stream *xdr, const fattr4_posix
 			return false;
 	return true;
 }
+
+static bool __maybe_unused
+xdrgen_encode_notify_type4(struct xdr_stream *xdr, notify_type4 value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_notify_entry4(struct xdr_stream *xdr, const struct notify_entry4 *value)
+{
+	if (!xdrgen_encode_component4(xdr, value->ne_file))
+		return false;
+	if (!xdrgen_encode_fattr4(xdr, &value->ne_attrs))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_prev_entry4(struct xdr_stream *xdr, const struct prev_entry4 *value)
+{
+	if (!xdrgen_encode_notify_entry4(xdr, &value->pe_prev_entry))
+		return false;
+	if (!xdrgen_encode_nfs_cookie4(xdr, value->pe_prev_entry_cookie))
+		return false;
+	return true;
+}
+
+bool
+xdrgen_encode_notify_remove4(struct xdr_stream *xdr, const struct notify_remove4 *value)
+{
+	if (!xdrgen_encode_notify_entry4(xdr, &value->nrm_old_entry))
+		return false;
+	if (!xdrgen_encode_nfs_cookie4(xdr, value->nrm_old_entry_cookie))
+		return false;
+	return true;
+}
+
+bool
+xdrgen_encode_notify_add4(struct xdr_stream *xdr, const struct notify_add4 *value)
+{
+	if (value->nad_old_entry.count > 1)
+		return false;
+	if (xdr_stream_encode_u32(xdr, value->nad_old_entry.count) != XDR_UNIT)
+		return false;
+	for (u32 i = 0; i < value->nad_old_entry.count; i++)
+		if (!xdrgen_encode_notify_remove4(xdr, &value->nad_old_entry.element[i]))
+			return false;
+	if (!xdrgen_encode_notify_entry4(xdr, &value->nad_new_entry))
+		return false;
+	if (value->nad_new_entry_cookie.count > 1)
+		return false;
+	if (xdr_stream_encode_u32(xdr, value->nad_new_entry_cookie.count) != XDR_UNIT)
+		return false;
+	for (u32 i = 0; i < value->nad_new_entry_cookie.count; i++)
+		if (!xdrgen_encode_nfs_cookie4(xdr, value->nad_new_entry_cookie.element[i]))
+			return false;
+	if (value->nad_prev_entry.count > 1)
+		return false;
+	if (xdr_stream_encode_u32(xdr, value->nad_prev_entry.count) != XDR_UNIT)
+		return false;
+	for (u32 i = 0; i < value->nad_prev_entry.count; i++)
+		if (!xdrgen_encode_prev_entry4(xdr, &value->nad_prev_entry.element[i]))
+			return false;
+	if (!xdrgen_encode_bool(xdr, value->nad_last_entry))
+		return false;
+	return true;
+}
+
+bool
+xdrgen_encode_notify_attr4(struct xdr_stream *xdr, const struct notify_attr4 *value)
+{
+	if (!xdrgen_encode_notify_entry4(xdr, &value->na_changed_entry))
+		return false;
+	return true;
+}
+
+bool
+xdrgen_encode_notify_rename4(struct xdr_stream *xdr, const struct notify_rename4 *value)
+{
+	if (!xdrgen_encode_notify_remove4(xdr, &value->nrn_old_entry))
+		return false;
+	if (!xdrgen_encode_notify_add4(xdr, &value->nrn_new_entry))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_notify_verifier4(struct xdr_stream *xdr, const struct notify_verifier4 *value)
+{
+	if (!xdrgen_encode_verifier4(xdr, value->nv_old_cookieverf))
+		return false;
+	if (!xdrgen_encode_verifier4(xdr, value->nv_new_cookieverf))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_notifylist4(struct xdr_stream *xdr, const notifylist4 value)
+{
+	return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
+}
+
+static bool __maybe_unused
+xdrgen_encode_notify4(struct xdr_stream *xdr, const struct notify4 *value)
+{
+	if (!xdrgen_encode_bitmap4(xdr, value->notify_mask))
+		return false;
+	if (!xdrgen_encode_notifylist4(xdr, value->notify_vals))
+		return false;
+	return true;
+}
+
+bool
+xdrgen_encode_CB_NOTIFY4args(struct xdr_stream *xdr, const struct CB_NOTIFY4args *value)
+{
+	if (!xdrgen_encode_stateid4(xdr, &value->cna_stateid))
+		return false;
+	if (!xdrgen_encode_nfs_fh4(xdr, value->cna_fh))
+		return false;
+	if (xdr_stream_encode_u32(xdr, value->cna_changes.count) != XDR_UNIT)
+		return false;
+	for (u32 i = 0; i < value->cna_changes.count; i++)
+		if (!xdrgen_encode_notify4(xdr, &value->cna_changes.element[i]))
+			return false;
+	return true;
+}
+
+bool
+xdrgen_encode_CB_NOTIFY4res(struct xdr_stream *xdr, const struct CB_NOTIFY4res *value)
+{
+	if (!xdrgen_encode_nfsstat4(xdr, value->cnr_status))
+		return false;
+	return true;
+}
diff --git a/fs/nfsd/nfs4xdr_gen.h b/fs/nfsd/nfs4xdr_gen.h
index 1c487f1a11ab..503fe2ccba51 100644
--- a/fs/nfsd/nfs4xdr_gen.h
+++ b/fs/nfsd/nfs4xdr_gen.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Generated by xdrgen. Manual edits will be lost. */
 /* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
-/* XDR specification modification time: Thu Jan  8 23:12:07 2026 */
+/* XDR specification modification time: Wed Mar 25 11:39:22 2026 */
 
 #ifndef _LINUX_XDRGEN_NFS4_1_DECL_H
 #define _LINUX_XDRGEN_NFS4_1_DECL_H
@@ -32,4 +32,22 @@ bool xdrgen_decode_posixaceperm4(struct xdr_stream *xdr, posixaceperm4 *ptr);
 bool xdrgen_encode_posixaceperm4(struct xdr_stream *xdr, const posixaceperm4 value);
 
 
+bool xdrgen_decode_notify_remove4(struct xdr_stream *xdr, struct notify_remove4 *ptr);
+bool xdrgen_encode_notify_remove4(struct xdr_stream *xdr, const struct notify_remove4 *value);
+
+bool xdrgen_decode_notify_add4(struct xdr_stream *xdr, struct notify_add4 *ptr);
+bool xdrgen_encode_notify_add4(struct xdr_stream *xdr, const struct notify_add4 *value);
+
+bool xdrgen_decode_notify_attr4(struct xdr_stream *xdr, struct notify_attr4 *ptr);
+bool xdrgen_encode_notify_attr4(struct xdr_stream *xdr, const struct notify_attr4 *value);
+
+bool xdrgen_decode_notify_rename4(struct xdr_stream *xdr, struct notify_rename4 *ptr);
+bool xdrgen_encode_notify_rename4(struct xdr_stream *xdr, const struct notify_rename4 *value);
+
+bool xdrgen_decode_CB_NOTIFY4args(struct xdr_stream *xdr, struct CB_NOTIFY4args *ptr);
+bool xdrgen_encode_CB_NOTIFY4args(struct xdr_stream *xdr, const struct CB_NOTIFY4args *value);
+
+bool xdrgen_decode_CB_NOTIFY4res(struct xdr_stream *xdr, struct CB_NOTIFY4res *ptr);
+bool xdrgen_encode_CB_NOTIFY4res(struct xdr_stream *xdr, const struct CB_NOTIFY4res *value);
+
 #endif /* _LINUX_XDRGEN_NFS4_1_DECL_H */
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index a13d18447324..60cacf64181c 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1677,6 +1677,7 @@ TRACE_EVENT(nfsd_cb_setup_err,
 		{ OP_CB_RECALL,			"CB_RECALL" },		\
 		{ OP_CB_LAYOUTRECALL,		"CB_LAYOUTRECALL" },	\
 		{ OP_CB_RECALL_ANY,		"CB_RECALL_ANY" },	\
+		{ OP_CB_NOTIFY,			"CB_NOTIFY" },		\
 		{ OP_CB_NOTIFY_LOCK,		"CB_NOTIFY_LOCK" },	\
 		{ OP_CB_OFFLOAD,		"CB_OFFLOAD" })
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index d87be1f25273..44e5e9fa12e1 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -171,133 +171,6 @@ Needs to be updated if more operations are defined in future.*/
 #define LAST_NFS42_OP	OP_REMOVEXATTR
 #define LAST_NFS4_OP	LAST_NFS42_OP
 
-enum nfsstat4 {
-	NFS4_OK = 0,
-	NFS4ERR_PERM = 1,
-	NFS4ERR_NOENT = 2,
-	NFS4ERR_IO = 5,
-	NFS4ERR_NXIO = 6,
-	NFS4ERR_ACCESS = 13,
-	NFS4ERR_EXIST = 17,
-	NFS4ERR_XDEV = 18,
-	/* Unused/reserved 19 */
-	NFS4ERR_NOTDIR = 20,
-	NFS4ERR_ISDIR = 21,
-	NFS4ERR_INVAL = 22,
-	NFS4ERR_FBIG = 27,
-	NFS4ERR_NOSPC = 28,
-	NFS4ERR_ROFS = 30,
-	NFS4ERR_MLINK = 31,
-	NFS4ERR_NAMETOOLONG = 63,
-	NFS4ERR_NOTEMPTY = 66,
-	NFS4ERR_DQUOT = 69,
-	NFS4ERR_STALE = 70,
-	NFS4ERR_BADHANDLE = 10001,
-	NFS4ERR_BAD_COOKIE = 10003,
-	NFS4ERR_NOTSUPP = 10004,
-	NFS4ERR_TOOSMALL = 10005,
-	NFS4ERR_SERVERFAULT = 10006,
-	NFS4ERR_BADTYPE = 10007,
-	NFS4ERR_DELAY = 10008,
-	NFS4ERR_SAME = 10009,
-	NFS4ERR_DENIED = 10010,
-	NFS4ERR_EXPIRED = 10011,
-	NFS4ERR_LOCKED = 10012,
-	NFS4ERR_GRACE = 10013,
-	NFS4ERR_FHEXPIRED = 10014,
-	NFS4ERR_SHARE_DENIED = 10015,
-	NFS4ERR_WRONGSEC = 10016,
-	NFS4ERR_CLID_INUSE = 10017,
-	NFS4ERR_RESOURCE = 10018,
-	NFS4ERR_MOVED = 10019,
-	NFS4ERR_NOFILEHANDLE = 10020,
-	NFS4ERR_MINOR_VERS_MISMATCH = 10021,
-	NFS4ERR_STALE_CLIENTID = 10022,
-	NFS4ERR_STALE_STATEID = 10023,
-	NFS4ERR_OLD_STATEID = 10024,
-	NFS4ERR_BAD_STATEID = 10025,
-	NFS4ERR_BAD_SEQID = 10026,
-	NFS4ERR_NOT_SAME = 10027,
-	NFS4ERR_LOCK_RANGE = 10028,
-	NFS4ERR_SYMLINK = 10029,
-	NFS4ERR_RESTOREFH = 10030,
-	NFS4ERR_LEASE_MOVED = 10031,
-	NFS4ERR_ATTRNOTSUPP = 10032,
-	NFS4ERR_NO_GRACE = 10033,
-	NFS4ERR_RECLAIM_BAD = 10034,
-	NFS4ERR_RECLAIM_CONFLICT = 10035,
-	NFS4ERR_BADXDR = 10036,
-	NFS4ERR_LOCKS_HELD = 10037,
-	NFS4ERR_OPENMODE = 10038,
-	NFS4ERR_BADOWNER = 10039,
-	NFS4ERR_BADCHAR = 10040,
-	NFS4ERR_BADNAME = 10041,
-	NFS4ERR_BAD_RANGE = 10042,
-	NFS4ERR_LOCK_NOTSUPP = 10043,
-	NFS4ERR_OP_ILLEGAL = 10044,
-	NFS4ERR_DEADLOCK = 10045,
-	NFS4ERR_FILE_OPEN = 10046,
-	NFS4ERR_ADMIN_REVOKED = 10047,
-	NFS4ERR_CB_PATH_DOWN = 10048,
-
-	/* nfs41 */
-	NFS4ERR_BADIOMODE	= 10049,
-	NFS4ERR_BADLAYOUT	= 10050,
-	NFS4ERR_BAD_SESSION_DIGEST = 10051,
-	NFS4ERR_BADSESSION	= 10052,
-	NFS4ERR_BADSLOT		= 10053,
-	NFS4ERR_COMPLETE_ALREADY = 10054,
-	NFS4ERR_CONN_NOT_BOUND_TO_SESSION = 10055,
-	NFS4ERR_DELEG_ALREADY_WANTED = 10056,
-	NFS4ERR_BACK_CHAN_BUSY	= 10057,	/* backchan reqs outstanding */
-	NFS4ERR_LAYOUTTRYLATER	= 10058,
-	NFS4ERR_LAYOUTUNAVAILABLE = 10059,
-	NFS4ERR_NOMATCHING_LAYOUT = 10060,
-	NFS4ERR_RECALLCONFLICT	= 10061,
-	NFS4ERR_UNKNOWN_LAYOUTTYPE = 10062,
-	NFS4ERR_SEQ_MISORDERED = 10063, 	/* unexpected seq.id in req */
-	NFS4ERR_SEQUENCE_POS	= 10064,	/* [CB_]SEQ. op not 1st op */
-	NFS4ERR_REQ_TOO_BIG	= 10065,	/* request too big */
-	NFS4ERR_REP_TOO_BIG	= 10066,	/* reply too big */
-	NFS4ERR_REP_TOO_BIG_TO_CACHE = 10067,	/* rep. not all cached */
-	NFS4ERR_RETRY_UNCACHED_REP = 10068,	/* retry & rep. uncached */
-	NFS4ERR_UNSAFE_COMPOUND = 10069,	/* retry/recovery too hard */
-	NFS4ERR_TOO_MANY_OPS	= 10070,	/* too many ops in [CB_]COMP */
-	NFS4ERR_OP_NOT_IN_SESSION = 10071,	/* op needs [CB_]SEQ. op */
-	NFS4ERR_HASH_ALG_UNSUPP = 10072,	/* hash alg. not supp. */
-						/* Error 10073 is unused. */
-	NFS4ERR_CLIENTID_BUSY	= 10074,	/* clientid has state */
-	NFS4ERR_PNFS_IO_HOLE	= 10075,	/* IO to _SPARSE file hole */
-	NFS4ERR_SEQ_FALSE_RETRY	= 10076,	/* retry not original */
-	NFS4ERR_BAD_HIGH_SLOT	= 10077,	/* sequence arg bad */
-	NFS4ERR_DEADSESSION	= 10078,	/* persistent session dead */
-	NFS4ERR_ENCR_ALG_UNSUPP = 10079,	/* SSV alg mismatch */
-	NFS4ERR_PNFS_NO_LAYOUT	= 10080,	/* direct I/O with no layout */
-	NFS4ERR_NOT_ONLY_OP	= 10081,	/* bad compound */
-	NFS4ERR_WRONG_CRED	= 10082,	/* permissions:state change */
-	NFS4ERR_WRONG_TYPE	= 10083,	/* current operation mismatch */
-	NFS4ERR_DIRDELEG_UNAVAIL = 10084,	/* no directory delegation */
-	NFS4ERR_REJECT_DELEG	= 10085,	/* on callback */
-	NFS4ERR_RETURNCONFLICT	= 10086,	/* outstanding layoutreturn */
-	NFS4ERR_DELEG_REVOKED	= 10087,	/* deleg./layout revoked */
-
-	/* nfs42 */
-	NFS4ERR_PARTNER_NOTSUPP	= 10088,
-	NFS4ERR_PARTNER_NO_AUTH	= 10089,
-	NFS4ERR_UNION_NOTSUPP	= 10090,
-	NFS4ERR_OFFLOAD_DENIED	= 10091,
-	NFS4ERR_WRONG_LFS	= 10092,
-	NFS4ERR_BADLABEL	= 10093,
-	NFS4ERR_OFFLOAD_NO_REQS	= 10094,
-
-	/* xattr (RFC8276) */
-	NFS4ERR_NOXATTR		= 10095,
-	NFS4ERR_XATTR2BIG	= 10096,
-
-	/* can be used for internal errors */
-	NFS4ERR_FIRST_FREE
-};
-
 /* error codes for internal client use */
 #define NFS4ERR_RESET_TO_MDS   12001
 #define NFS4ERR_RESET_TO_PNFS  12002
diff --git a/include/linux/sunrpc/xdrgen/nfs4_1.h b/include/linux/sunrpc/xdrgen/nfs4_1.h
index 4ac54bdbd335..f761c3ddb4c7 100644
--- a/include/linux/sunrpc/xdrgen/nfs4_1.h
+++ b/include/linux/sunrpc/xdrgen/nfs4_1.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Generated by xdrgen. Manual edits will be lost. */
 /* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
-/* XDR specification modification time: Thu Jan  8 23:12:07 2026 */
+/* XDR specification modification time: Wed Mar 25 11:39:22 2026 */
 
 #ifndef _LINUX_XDRGEN_NFS4_1_DEF_H
 #define _LINUX_XDRGEN_NFS4_1_DEF_H
@@ -9,15 +9,150 @@
 #include <linux/types.h>
 #include <linux/sunrpc/xdrgen/_defs.h>
 
-typedef s64 int64_t;
+typedef s32 int32_t;
 
 typedef u32 uint32_t;
 
+typedef s64 int64_t;
+
+typedef u64 uint64_t;
+
+enum { NFS4_VERIFIER_SIZE = 8 };
+
+enum { NFS4_FHSIZE = 128 };
+
+enum nfsstat4 {
+	NFS4_OK = 0,
+	NFS4ERR_PERM = 1,
+	NFS4ERR_NOENT = 2,
+	NFS4ERR_IO = 5,
+	NFS4ERR_NXIO = 6,
+	NFS4ERR_ACCESS = 13,
+	NFS4ERR_EXIST = 17,
+	NFS4ERR_XDEV = 18,
+	NFS4ERR_NOTDIR = 20,
+	NFS4ERR_ISDIR = 21,
+	NFS4ERR_INVAL = 22,
+	NFS4ERR_FBIG = 27,
+	NFS4ERR_NOSPC = 28,
+	NFS4ERR_ROFS = 30,
+	NFS4ERR_MLINK = 31,
+	NFS4ERR_NAMETOOLONG = 63,
+	NFS4ERR_NOTEMPTY = 66,
+	NFS4ERR_DQUOT = 69,
+	NFS4ERR_STALE = 70,
+	NFS4ERR_BADHANDLE = 10001,
+	NFS4ERR_BAD_COOKIE = 10003,
+	NFS4ERR_NOTSUPP = 10004,
+	NFS4ERR_TOOSMALL = 10005,
+	NFS4ERR_SERVERFAULT = 10006,
+	NFS4ERR_BADTYPE = 10007,
+	NFS4ERR_DELAY = 10008,
+	NFS4ERR_SAME = 10009,
+	NFS4ERR_DENIED = 10010,
+	NFS4ERR_EXPIRED = 10011,
+	NFS4ERR_LOCKED = 10012,
+	NFS4ERR_GRACE = 10013,
+	NFS4ERR_FHEXPIRED = 10014,
+	NFS4ERR_SHARE_DENIED = 10015,
+	NFS4ERR_WRONGSEC = 10016,
+	NFS4ERR_CLID_INUSE = 10017,
+	NFS4ERR_RESOURCE = 10018,
+	NFS4ERR_MOVED = 10019,
+	NFS4ERR_NOFILEHANDLE = 10020,
+	NFS4ERR_MINOR_VERS_MISMATCH = 10021,
+	NFS4ERR_STALE_CLIENTID = 10022,
+	NFS4ERR_STALE_STATEID = 10023,
+	NFS4ERR_OLD_STATEID = 10024,
+	NFS4ERR_BAD_STATEID = 10025,
+	NFS4ERR_BAD_SEQID = 10026,
+	NFS4ERR_NOT_SAME = 10027,
+	NFS4ERR_LOCK_RANGE = 10028,
+	NFS4ERR_SYMLINK = 10029,
+	NFS4ERR_RESTOREFH = 10030,
+	NFS4ERR_LEASE_MOVED = 10031,
+	NFS4ERR_ATTRNOTSUPP = 10032,
+	NFS4ERR_NO_GRACE = 10033,
+	NFS4ERR_RECLAIM_BAD = 10034,
+	NFS4ERR_RECLAIM_CONFLICT = 10035,
+	NFS4ERR_BADXDR = 10036,
+	NFS4ERR_LOCKS_HELD = 10037,
+	NFS4ERR_OPENMODE = 10038,
+	NFS4ERR_BADOWNER = 10039,
+	NFS4ERR_BADCHAR = 10040,
+	NFS4ERR_BADNAME = 10041,
+	NFS4ERR_BAD_RANGE = 10042,
+	NFS4ERR_LOCK_NOTSUPP = 10043,
+	NFS4ERR_OP_ILLEGAL = 10044,
+	NFS4ERR_DEADLOCK = 10045,
+	NFS4ERR_FILE_OPEN = 10046,
+	NFS4ERR_ADMIN_REVOKED = 10047,
+	NFS4ERR_CB_PATH_DOWN = 10048,
+	NFS4ERR_BADIOMODE = 10049,
+	NFS4ERR_BADLAYOUT = 10050,
+	NFS4ERR_BAD_SESSION_DIGEST = 10051,
+	NFS4ERR_BADSESSION = 10052,
+	NFS4ERR_BADSLOT = 10053,
+	NFS4ERR_COMPLETE_ALREADY = 10054,
+	NFS4ERR_CONN_NOT_BOUND_TO_SESSION = 10055,
+	NFS4ERR_DELEG_ALREADY_WANTED = 10056,
+	NFS4ERR_BACK_CHAN_BUSY = 10057,
+	NFS4ERR_LAYOUTTRYLATER = 10058,
+	NFS4ERR_LAYOUTUNAVAILABLE = 10059,
+	NFS4ERR_NOMATCHING_LAYOUT = 10060,
+	NFS4ERR_RECALLCONFLICT = 10061,
+	NFS4ERR_UNKNOWN_LAYOUTTYPE = 10062,
+	NFS4ERR_SEQ_MISORDERED = 10063,
+	NFS4ERR_SEQUENCE_POS = 10064,
+	NFS4ERR_REQ_TOO_BIG = 10065,
+	NFS4ERR_REP_TOO_BIG = 10066,
+	NFS4ERR_REP_TOO_BIG_TO_CACHE = 10067,
+	NFS4ERR_RETRY_UNCACHED_REP = 10068,
+	NFS4ERR_UNSAFE_COMPOUND = 10069,
+	NFS4ERR_TOO_MANY_OPS = 10070,
+	NFS4ERR_OP_NOT_IN_SESSION = 10071,
+	NFS4ERR_HASH_ALG_UNSUPP = 10072,
+	NFS4ERR_CLIENTID_BUSY = 10074,
+	NFS4ERR_PNFS_IO_HOLE = 10075,
+	NFS4ERR_SEQ_FALSE_RETRY = 10076,
+	NFS4ERR_BAD_HIGH_SLOT = 10077,
+	NFS4ERR_DEADSESSION = 10078,
+	NFS4ERR_ENCR_ALG_UNSUPP = 10079,
+	NFS4ERR_PNFS_NO_LAYOUT = 10080,
+	NFS4ERR_NOT_ONLY_OP = 10081,
+	NFS4ERR_WRONG_CRED = 10082,
+	NFS4ERR_WRONG_TYPE = 10083,
+	NFS4ERR_DIRDELEG_UNAVAIL = 10084,
+	NFS4ERR_REJECT_DELEG = 10085,
+	NFS4ERR_RETURNCONFLICT = 10086,
+	NFS4ERR_DELEG_REVOKED = 10087,
+	NFS4ERR_PARTNER_NOTSUPP = 10088,
+	NFS4ERR_PARTNER_NO_AUTH = 10089,
+	NFS4ERR_UNION_NOTSUPP = 10090,
+	NFS4ERR_OFFLOAD_DENIED = 10091,
+	NFS4ERR_WRONG_LFS = 10092,
+	NFS4ERR_BADLABEL = 10093,
+	NFS4ERR_OFFLOAD_NO_REQS = 10094,
+	NFS4ERR_NOXATTR = 10095,
+	NFS4ERR_XATTR2BIG = 10096,
+	NFS4ERR_FIRST_FREE = 10097,
+};
+
+typedef enum nfsstat4 nfsstat4;
+
+typedef opaque attrlist4;
+
 typedef struct {
 	u32 count;
 	uint32_t *element;
 } bitmap4;
 
+typedef u8 verifier4[NFS4_VERIFIER_SIZE];
+
+typedef uint64_t nfs_cookie4;
+
+typedef opaque nfs_fh4;
+
 typedef opaque utf8string;
 
 typedef utf8string utf8str_cis;
@@ -26,11 +161,30 @@ typedef utf8string utf8str_cs;
 
 typedef utf8string utf8str_mixed;
 
+typedef utf8str_cs component4;
+
+typedef utf8str_cs linktext4;
+
+typedef struct {
+	u32 count;
+	component4 *element;
+} pathname4;
+
 struct nfstime4 {
 	int64_t seconds;
 	uint32_t nseconds;
 };
 
+struct fattr4 {
+	bitmap4 attrmask;
+	attrlist4 attr_vals;
+};
+
+struct stateid4 {
+	uint32_t seqid;
+	u8 other[12];
+};
+
 typedef bool fattr4_offline;
 
 enum { FATTR4_OFFLINE = 83 };
@@ -216,11 +370,98 @@ enum { FATTR4_POSIX_DEFAULT_ACL = 91 };
 
 enum { FATTR4_POSIX_ACCESS_ACL = 92 };
 
-#define NFS4_int64_t_sz                 \
-	(XDR_hyper)
+enum notify_type4 {
+	NOTIFY4_CHANGE_CHILD_ATTRS = 0,
+	NOTIFY4_CHANGE_DIR_ATTRS = 1,
+	NOTIFY4_REMOVE_ENTRY = 2,
+	NOTIFY4_ADD_ENTRY = 3,
+	NOTIFY4_RENAME_ENTRY = 4,
+	NOTIFY4_CHANGE_COOKIE_VERIFIER = 5,
+};
+
+typedef enum notify_type4 notify_type4;
+
+struct notify_entry4 {
+	component4 ne_file;
+	struct fattr4 ne_attrs;
+};
+
+struct prev_entry4 {
+	struct notify_entry4 pe_prev_entry;
+	nfs_cookie4 pe_prev_entry_cookie;
+};
+
+struct notify_remove4 {
+	struct notify_entry4 nrm_old_entry;
+	nfs_cookie4 nrm_old_entry_cookie;
+};
+
+struct notify_add4 {
+	struct {
+		u32 count;
+		struct notify_remove4 *element;
+	} nad_old_entry;
+	struct notify_entry4 nad_new_entry;
+	struct {
+		u32 count;
+		nfs_cookie4 *element;
+	} nad_new_entry_cookie;
+	struct {
+		u32 count;
+		struct prev_entry4 *element;
+	} nad_prev_entry;
+	bool nad_last_entry;
+};
+
+struct notify_attr4 {
+	struct notify_entry4 na_changed_entry;
+};
+
+struct notify_rename4 {
+	struct notify_remove4 nrn_old_entry;
+	struct notify_add4 nrn_new_entry;
+};
+
+struct notify_verifier4 {
+	verifier4 nv_old_cookieverf;
+	verifier4 nv_new_cookieverf;
+};
+
+typedef opaque notifylist4;
+
+struct notify4 {
+	bitmap4 notify_mask;
+	notifylist4 notify_vals;
+};
+
+struct CB_NOTIFY4args {
+	struct stateid4 cna_stateid;
+	nfs_fh4 cna_fh;
+	struct {
+		u32 count;
+		struct notify4 *element;
+	} cna_changes;
+};
+
+struct CB_NOTIFY4res {
+	nfsstat4 cnr_status;
+};
+
+#define NFS4_int32_t_sz                 \
+	(XDR_int)
 #define NFS4_uint32_t_sz                \
 	(XDR_unsigned_int)
+#define NFS4_int64_t_sz                 \
+	(XDR_hyper)
+#define NFS4_uint64_t_sz                \
+	(XDR_unsigned_hyper)
+#define NFS4_nfsstat4_sz                (XDR_int)
+#define NFS4_attrlist4_sz               (XDR_unsigned_int)
 #define NFS4_bitmap4_sz                 (XDR_unsigned_int)
+#define NFS4_verifier4_sz               (XDR_QUADLEN(NFS4_VERIFIER_SIZE))
+#define NFS4_nfs_cookie4_sz             \
+	(NFS4_uint64_t_sz)
+#define NFS4_nfs_fh4_sz                 (XDR_unsigned_int + XDR_QUADLEN(NFS4_FHSIZE))
 #define NFS4_utf8string_sz              (XDR_unsigned_int)
 #define NFS4_utf8str_cis_sz             \
 	(NFS4_utf8string_sz)
@@ -228,8 +469,17 @@ enum { FATTR4_POSIX_ACCESS_ACL = 92 };
 	(NFS4_utf8string_sz)
 #define NFS4_utf8str_mixed_sz           \
 	(NFS4_utf8string_sz)
+#define NFS4_component4_sz              \
+	(NFS4_utf8str_cs_sz)
+#define NFS4_linktext4_sz               \
+	(NFS4_utf8str_cs_sz)
+#define NFS4_pathname4_sz               (XDR_unsigned_int)
 #define NFS4_nfstime4_sz                \
 	(NFS4_int64_t_sz + NFS4_uint32_t_sz)
+#define NFS4_fattr4_sz                  \
+	(NFS4_bitmap4_sz + NFS4_attrlist4_sz)
+#define NFS4_stateid4_sz                \
+	(NFS4_uint32_t_sz + XDR_QUADLEN(12))
 #define NFS4_fattr4_offline_sz          \
 	(XDR_bool)
 #define NFS4_open_arguments4_sz         \
@@ -259,5 +509,27 @@ enum { FATTR4_POSIX_ACCESS_ACL = 92 };
 	(NFS4_aclscope4_sz)
 #define NFS4_fattr4_posix_default_acl_sz (XDR_unsigned_int)
 #define NFS4_fattr4_posix_access_acl_sz (XDR_unsigned_int)
+#define NFS4_notify_type4_sz            (XDR_int)
+#define NFS4_notify_entry4_sz           \
+	(NFS4_component4_sz + NFS4_fattr4_sz)
+#define NFS4_prev_entry4_sz             \
+	(NFS4_notify_entry4_sz + NFS4_nfs_cookie4_sz)
+#define NFS4_notify_remove4_sz          \
+	(NFS4_notify_entry4_sz + NFS4_nfs_cookie4_sz)
+#define NFS4_notify_add4_sz             \
+	(XDR_unsigned_int + (1 * (NFS4_notify_remove4_sz)) + NFS4_notify_entry4_sz + XDR_unsigned_int + (1 * (NFS4_nfs_cookie4_sz)) + XDR_unsigned_int + (1 * (NFS4_prev_entry4_sz)) + XDR_bool)
+#define NFS4_notify_attr4_sz            \
+	(NFS4_notify_entry4_sz)
+#define NFS4_notify_rename4_sz          \
+	(NFS4_notify_remove4_sz + NFS4_notify_add4_sz)
+#define NFS4_notify_verifier4_sz        \
+	(NFS4_verifier4_sz + NFS4_verifier4_sz)
+#define NFS4_notifylist4_sz             (XDR_unsigned_int)
+#define NFS4_notify4_sz                 \
+	(NFS4_bitmap4_sz + NFS4_notifylist4_sz)
+#define NFS4_CB_NOTIFY4args_sz          \
+	(NFS4_stateid4_sz + NFS4_nfs_fh4_sz + XDR_unsigned_int)
+#define NFS4_CB_NOTIFY4res_sz           \
+	(NFS4_nfsstat4_sz)
 
 #endif /* _LINUX_XDRGEN_NFS4_1_DEF_H */
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index 4273e0249fcb..289205b53a08 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -17,11 +17,9 @@
 #include <linux/types.h>
 
 #define NFS4_BITMAP_SIZE	3
-#define NFS4_VERIFIER_SIZE	8
 #define NFS4_STATEID_SEQID_SIZE 4
 #define NFS4_STATEID_OTHER_SIZE 12
 #define NFS4_STATEID_SIZE	(NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE)
-#define NFS4_FHSIZE		128
 #define NFS4_MAXPATHLEN		PATH_MAX
 #define NFS4_MAXNAMLEN		NAME_MAX
 #define NFS4_OPAQUE_LIMIT	1024

-- 
2.53.0


^ permalink raw reply related

* [PATCH 05/24] nfs_common: add new NOTIFY4_* flags proposed in RFC8881bis
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

RFC8881bis adds some new flags to GET_DIR_DELEGATION that we very much
need to support.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 Documentation/sunrpc/xdr/nfs4_1.x    | 16 +++++++++++++++-
 fs/nfsd/nfs4xdr_gen.c                | 13 ++++++++++++-
 fs/nfsd/nfs4xdr_gen.h                |  2 +-
 include/linux/sunrpc/xdrgen/nfs4_1.h | 13 ++++++++++++-
 4 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/Documentation/sunrpc/xdr/nfs4_1.x b/Documentation/sunrpc/xdr/nfs4_1.x
index 632f5b579c39..aa14b590b524 100644
--- a/Documentation/sunrpc/xdr/nfs4_1.x
+++ b/Documentation/sunrpc/xdr/nfs4_1.x
@@ -416,7 +416,21 @@ enum notify_type4 {
         NOTIFY4_REMOVE_ENTRY = 2,
         NOTIFY4_ADD_ENTRY = 3,
         NOTIFY4_RENAME_ENTRY = 4,
-        NOTIFY4_CHANGE_COOKIE_VERIFIER = 5
+        NOTIFY4_CHANGE_COOKIE_VERIFIER = 5,
+        /*
+         * Added in NFSv4.1 bis document
+         */
+        NOTIFY4_GFLAG_EXTEND = 6,
+        NOTIFY4_AUFLAG_VALID = 7,
+        NOTIFY4_AUFLAG_USER = 8,
+        NOTIFY4_AUFLAG_GROUP = 9,
+        NOTIFY4_AUFLAG_OTHER = 10,
+        NOTIFY4_CHANGE_AUTH = 11,
+        NOTIFY4_CFLAG_ORDER = 12,
+        NOTIFY4_AUFLAG_GANOW = 13,
+        NOTIFY4_AUFLAG_GALATER = 14,
+        NOTIFY4_CHANGE_GA = 15,
+        NOTIFY4_CHANGE_AMASK = 16
 };
 
 /* Changed entry information.  */
diff --git a/fs/nfsd/nfs4xdr_gen.c b/fs/nfsd/nfs4xdr_gen.c
index 5e656d6bbb8e..80369139ef7e 100644
--- a/fs/nfsd/nfs4xdr_gen.c
+++ b/fs/nfsd/nfs4xdr_gen.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Generated by xdrgen. Manual edits will be lost.
 // XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x
-// XDR specification modification time: Wed Mar 25 11:39:22 2026
+// XDR specification modification time: Wed Mar 25 11:40:02 2026
 
 #include <linux/sunrpc/svc.h>
 
@@ -590,6 +590,17 @@ xdrgen_decode_notify_type4(struct xdr_stream *xdr, notify_type4 *ptr)
 	case NOTIFY4_ADD_ENTRY:
 	case NOTIFY4_RENAME_ENTRY:
 	case NOTIFY4_CHANGE_COOKIE_VERIFIER:
+	case NOTIFY4_GFLAG_EXTEND:
+	case NOTIFY4_AUFLAG_VALID:
+	case NOTIFY4_AUFLAG_USER:
+	case NOTIFY4_AUFLAG_GROUP:
+	case NOTIFY4_AUFLAG_OTHER:
+	case NOTIFY4_CHANGE_AUTH:
+	case NOTIFY4_CFLAG_ORDER:
+	case NOTIFY4_AUFLAG_GANOW:
+	case NOTIFY4_AUFLAG_GALATER:
+	case NOTIFY4_CHANGE_GA:
+	case NOTIFY4_CHANGE_AMASK:
 		break;
 	default:
 		return false;
diff --git a/fs/nfsd/nfs4xdr_gen.h b/fs/nfsd/nfs4xdr_gen.h
index 503fe2ccba51..092a1ed399c7 100644
--- a/fs/nfsd/nfs4xdr_gen.h
+++ b/fs/nfsd/nfs4xdr_gen.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Generated by xdrgen. Manual edits will be lost. */
 /* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
-/* XDR specification modification time: Wed Mar 25 11:39:22 2026 */
+/* XDR specification modification time: Wed Mar 25 11:40:02 2026 */
 
 #ifndef _LINUX_XDRGEN_NFS4_1_DECL_H
 #define _LINUX_XDRGEN_NFS4_1_DECL_H
diff --git a/include/linux/sunrpc/xdrgen/nfs4_1.h b/include/linux/sunrpc/xdrgen/nfs4_1.h
index f761c3ddb4c7..537504069f24 100644
--- a/include/linux/sunrpc/xdrgen/nfs4_1.h
+++ b/include/linux/sunrpc/xdrgen/nfs4_1.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Generated by xdrgen. Manual edits will be lost. */
 /* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
-/* XDR specification modification time: Wed Mar 25 11:39:22 2026 */
+/* XDR specification modification time: Wed Mar 25 11:40:02 2026 */
 
 #ifndef _LINUX_XDRGEN_NFS4_1_DEF_H
 #define _LINUX_XDRGEN_NFS4_1_DEF_H
@@ -377,6 +377,17 @@ enum notify_type4 {
 	NOTIFY4_ADD_ENTRY = 3,
 	NOTIFY4_RENAME_ENTRY = 4,
 	NOTIFY4_CHANGE_COOKIE_VERIFIER = 5,
+	NOTIFY4_GFLAG_EXTEND = 6,
+	NOTIFY4_AUFLAG_VALID = 7,
+	NOTIFY4_AUFLAG_USER = 8,
+	NOTIFY4_AUFLAG_GROUP = 9,
+	NOTIFY4_AUFLAG_OTHER = 10,
+	NOTIFY4_CHANGE_AUTH = 11,
+	NOTIFY4_CFLAG_ORDER = 12,
+	NOTIFY4_AUFLAG_GANOW = 13,
+	NOTIFY4_AUFLAG_GALATER = 14,
+	NOTIFY4_CHANGE_GA = 15,
+	NOTIFY4_CHANGE_AMASK = 16,
 };
 
 typedef enum notify_type4 notify_type4;

-- 
2.53.0


^ permalink raw reply related

* [PATCH 06/24] nfsd: allow nfsd to get a dir lease with an ignore mask
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

When requesting a directory lease, enable the FL_IGN_DIR_* bits that
correspond to the requested notification types.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4state.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fa657badf5f8..c8fb84c38637 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6021,7 +6021,22 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
 	return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
 }
 
-static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp)
+static unsigned int
+nfsd_notify_to_ignore(u32 notify)
+{
+	unsigned int mask = 0;
+
+	if (notify & BIT(NOTIFY4_REMOVE_ENTRY))
+		mask |= FL_IGN_DIR_DELETE;
+	if (notify & BIT(NOTIFY4_ADD_ENTRY))
+		mask |= FL_IGN_DIR_CREATE;
+	if (notify & BIT(NOTIFY4_RENAME_ENTRY))
+		mask |= FL_IGN_DIR_RENAME;
+
+	return mask;
+}
+
+static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp, u32 notify)
 {
 	struct file_lease *fl;
 
@@ -6029,7 +6044,7 @@ static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp)
 	if (!fl)
 		return NULL;
 	fl->fl_lmops = &nfsd_lease_mng_ops;
-	fl->c.flc_flags = FL_DELEG;
+	fl->c.flc_flags = FL_DELEG | nfsd_notify_to_ignore(notify);
 	fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK;
 	fl->c.flc_owner = (fl_owner_t)dp;
 	fl->c.flc_pid = current->tgid;
@@ -6246,7 +6261,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 	if (stp->st_stid.sc_export)
 		dp->dl_stid.sc_export = exp_get(stp->st_stid.sc_export);
 
-	fl = nfs4_alloc_init_lease(dp);
+	fl = nfs4_alloc_init_lease(dp, 0);
 	if (!fl)
 		goto out_clnt_odstate;
 
@@ -9612,12 +9627,11 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
 		dp->dl_stid.sc_export =
 			exp_get(cstate->current_fh.fh_export);
 
-	fl = nfs4_alloc_init_lease(dp);
+	fl = nfs4_alloc_init_lease(dp, gdd->gddr_notification[0]);
 	if (!fl)
 		goto out_put_stid;
 
-	status = kernel_setlease(nf->nf_file,
-				 fl->c.flc_type, &fl, NULL);
+	status = kernel_setlease(nf->nf_file, fl->c.flc_type, &fl, NULL);
 	if (fl)
 		locks_free_lease(fl);
 	if (status)

-- 
2.53.0


^ permalink raw reply related

* [PATCH 07/24] vfs: add fsnotify_modify_mark_mask()
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

nfsd needs to be able to modify the mask on an existing mark when new
directory delegations are set or unset. Add an exported function that
allows the caller to set and clear bits in the mark->mask, and does
the recalculation if something changed.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/notify/mark.c                 | 29 +++++++++++++++++++++++++++++
 include/linux/fsnotify_backend.h |  1 +
 2 files changed, 30 insertions(+)

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index c2ed5b11b0fe..b1e73c6fd382 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -310,6 +310,35 @@ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 		fsnotify_conn_set_children_dentry_flags(conn);
 }
 
+/**
+ * fsnotify_modify_mark_mask - set and/or clear flags in a mark's mask
+ * @mark: mark to be modified
+ * @set: bits to be set in mask
+ * @clear: bits to be cleared in mask
+ *
+ * Modify a fsnotify_mark mask as directed, and update its associated conn.
+ * The caller is expected to hold a reference to the mark.
+ */
+void fsnotify_modify_mark_mask(struct fsnotify_mark *mark, u32 set, u32 clear)
+{
+	bool recalc = false;
+	u32 mask;
+
+	WARN_ON_ONCE(clear & set);
+
+	spin_lock(&mark->lock);
+	mask = mark->mask;
+	mark->mask |= set;
+	mark->mask &= ~clear;
+	if (mark->mask != mask)
+		recalc = true;
+	spin_unlock(&mark->lock);
+
+	if (recalc)
+		fsnotify_recalc_mask(mark->connector);
+}
+EXPORT_SYMBOL_GPL(fsnotify_modify_mark_mask);
+
 /* Free all connectors queued for freeing once SRCU period ends */
 static void fsnotify_connector_destroy_workfn(struct work_struct *work)
 {
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 95985400d3d8..66e185bd1b1b 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -917,6 +917,7 @@ extern void fsnotify_get_mark(struct fsnotify_mark *mark);
 extern void fsnotify_put_mark(struct fsnotify_mark *mark);
 extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info);
 extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info);
+extern void fsnotify_modify_mark_mask(struct fsnotify_mark *mark, u32 set, u32 clear);
 
 static inline void fsnotify_init_event(struct fsnotify_event *event)
 {

-- 
2.53.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox