Linux Documentation

Linux Documentation
 help / color / mirror / Atom feed

* [PATCH v8 05/10] tracing/probes: Support nested typecast
From: Masami Hiramatsu (Google) @ 2026-06-24 14:42 UTC (permalink / raw)
  To: Steven Rostedt, Mathieu Desnoyers
  Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

When we hit an open parenthesis right after typecast closing
parenthesis, it means we have nested typecast. This allows us to
typecast a generic data member in a structure to a pointer to
another structure.

For example, to cast a DATA_MEMBER of VAR structure to STRUCT pointer
and get MEMBER value.

  (STRUCT)(VAR->DATA_MEMBER)->MEMBER

Also, we can nest typecast.

  (STRUCT1)((STRUCT2)$ARG->FIELD2)->FIELD1

Currently the max nest level is limited to 3.

This also allows user to use typecasting for registers or stacks on
kprobe events. e.g.

  (STRUCT)(%ax)->MEMBER

  (STRUCT)($stack0)->MEMBER


Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v6:
  - Add a WARN_ON_ONCE check for leaking nested_level (it must not happen.)
 Changes in v4:
  - Use orig_offset for reporting NO_PTR_STRCT error.
 Changes in v2:
  - Fix to skip "->" after closing parenthetsis.
---
 Documentation/trace/eprobetrace.rst |    2 +
 Documentation/trace/fprobetrace.rst |    2 +
 Documentation/trace/kprobetrace.rst |    2 +
 kernel/trace/trace.c                |    1 
 kernel/trace/trace_probe.c          |   81 ++++++++++++++++++++++++++++++++---
 kernel/trace/trace_probe.h          |    7 +++
 6 files changed, 86 insertions(+), 9 deletions(-)

diff --git a/Documentation/trace/eprobetrace.rst b/Documentation/trace/eprobetrace.rst
index fe3602540569..cd0b4aa7f896 100644
--- a/Documentation/trace/eprobetrace.rst
+++ b/Documentation/trace/eprobetrace.rst
@@ -50,6 +50,8 @@ Synopsis of eprobe_events
                   a pointer to STRUCT and then derference the pointer defined by
                   ->MEMBER. Note that when this is used, the FIELD name does not
                   need to be prefixed with a '$'.
+  (STRUCT)(FETCHARG)->MEMBER[->MEMBER] : typecast can nest, so the above can
+		  also be used with another FETCHARG instead of FIELD.
 
 Types
 -----
diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst
index 7435ded2d66d..6b8bb27bb62d 100644
--- a/Documentation/trace/fprobetrace.rst
+++ b/Documentation/trace/fprobetrace.rst
@@ -60,6 +60,8 @@ Synopsis of fprobe-events
   (STRUCT)FIELD->MEMBER[->MEMBER] : If BTF is supported, typecast FIELD to
                   a pointer to STRUCT and then derference the pointer defined by
                   ->MEMBER.
+  (STRUCT)(FETCHARG)->MEMBER[->MEMBER] : typecast can nest, so the above can
+                 also be used with another FETCHARG instead of FIELD.
 
   (\*1) This is available only when BTF is enabled.
   (\*2) only for the probe on function entry (offs == 0). Note, this argument access
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index f73614997d52..c4382765d5b2 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -65,6 +65,8 @@ Synopsis of kprobe_events
                   a pointer to STRUCT and then derference the pointer defined by
                   ->MEMBER. Note that this is available only when the probe is
 		   on function entry.
+  (STRUCT)(FETCHARG)->MEMBER[->MEMBER] : typecast can nest, so the above can
+                 also be used with another FETCHARG instead of FIELD.
 
   (\*1) only for the probe on function entry (offs == 0). Note, this argument access
         is best effort, because depending on the argument type, it may be passed on
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index aa93e7b01146..4f70318918c2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4326,6 +4326,7 @@ static const char readme_msg[] =
 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
 	"\t           [(structname)]<argname>[->field[->field|.field...]],\n"
+	"\t           [(structname)](fetcharg)->field[->field|.field...],\n"
 #endif
 #else
 	"\t           $stack<index>, $stack, $retval, $comm,\n"
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index e6cc9f3d6c8b..1d6afda39462 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -832,10 +832,35 @@ static int query_btf_struct(const char *sname, struct traceprobe_parse_context *
 	return 0;
 }
 
+/* Find the matching closing parenthesis for a given opening parenthesis. */
+static char *find_matched_close_paren(char *s)
+{
+	char *p = s;
+	int count = 0;
+
+	while (*p) {
+		if (*p == '(')
+			count++;
+		else if (*p == ')') {
+			if (--count == 0)
+				return p;
+		}
+		p++;
+	}
+	return NULL;
+}
+
+static int
+parse_probe_arg(char *arg, const struct fetch_type *type,
+		struct fetch_insn **pcode, struct fetch_insn *end,
+		struct traceprobe_parse_context *ctx);
+
 static int handle_typecast(char *arg, struct fetch_insn **pcode,
 			   struct fetch_insn *end,
 			   struct traceprobe_parse_context *ctx)
 {
+	int orig_offset = ctx->offset;
+	bool nested = false;
 	char *tmp;
 	int ret;
 
@@ -852,19 +877,56 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
 				    DEREF_OPEN_BRACE);
 		return -EINVAL;
 	}
-	*tmp = '\0';
-	ret = query_btf_struct(arg + 1, ctx);
-	*tmp = ')';
+	*tmp++ = '\0';
+
+	/* Handle the nested structure like (STRUCT)(VAR->FIELD)->... */
+	if (*tmp == '(') {
+		char *close = find_matched_close_paren(tmp);
 
+		ctx->offset += tmp - arg;
+		if (!close) {
+			trace_probe_log_err(ctx->offset, DEREF_OPEN_BRACE);
+			return -EINVAL;
+		}
+		/* We expect a field access for typecast */
+		if (close[1] != '-' || close[2] != '>') {
+			trace_probe_log_err(ctx->offset + close - tmp + 1,
+					    TYPECAST_REQ_FIELD);
+			return -EINVAL;
+		}
+
+		ctx->nested_level++;
+		if (ctx->nested_level > TRACEPROBE_MAX_NESTED_LEVEL) {
+			trace_probe_log_err(ctx->offset, TOO_MANY_NESTED);
+			return -E2BIG;
+		}
+		*close = '\0';
+
+		ctx->offset += 1;	/* for the '(' */
+		/* We need to parse the nested one */
+		ret = parse_probe_arg(tmp + 1, find_fetch_type(NULL, ctx->flags),
+				pcode, end, ctx);
+		if (ret < 0)
+			return ret;
+		ctx->nested_level--;
+		clear_struct_btf(ctx);
+
+		tmp = close + 3;/* Skip "->" after closing parenthesis */
+		nested = true;
+	}
+
+	ret = query_btf_struct(arg + 1, ctx);
 	if (ret < 0) {
-		trace_probe_log_err(ctx->offset + 1, NO_PTR_STRCT);
+		trace_probe_log_err(orig_offset + 1, NO_PTR_STRCT);
 		return -EINVAL;
 	}
 
-	tmp++;
-
-	ctx->offset += tmp - arg;
-	ret = parse_btf_arg(tmp, pcode, end, ctx);
+	ctx->offset = orig_offset + tmp - arg;
+	/* If it is nested, tmp points to the field name. */
+	if (nested)
+		ret = parse_btf_field(tmp, ctx->last_struct, pcode, end, ctx);
+	else
+		ret = parse_btf_arg(tmp, pcode, end, ctx);
 	return ret;
 }
 
@@ -1638,6 +1700,9 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
 			      ctx);
 	if (ret < 0)
 		goto fail;
+	/* nested_level must be 0 here, otherwise there is a bug. */
+	if (WARN_ON_ONCE(ctx->nested_level))
+		goto fail;
 
 	/* Update storing type if BTF is available */
 	if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) &&
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index aa72e2ffdd93..7d71925244e8 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -450,8 +450,11 @@ struct traceprobe_parse_context {
 	struct trace_probe *tp;
 	unsigned int flags;
 	int offset;
+	int nested_level;
 };
 
+#define TRACEPROBE_MAX_NESTED_LEVEL 3
+
 extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
 				      const char *argv,
 				      struct traceprobe_parse_context *ctx);
@@ -587,7 +590,9 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
 	C(TOO_MANY_ARGS,	"Too many arguments are specified"),	\
 	C(TOO_MANY_EARGS,	"Too many entry arguments specified"),	\
 	C(EVENT_TOO_BIG,	"Event too big (too many fields?)"),  \
-	C(TYPECAST_NOT_EVENT,	"Typecasts are only for eprobe fields"),
+	C(TYPECAST_NOT_EVENT,	"Typecasts are only for eprobe fields"), \
+	C(TYPECAST_REQ_FIELD,	"Typecast requires a field access"),	\
+	C(TOO_MANY_NESTED,	"Too many nested typecasts/dereferences"),
 
 #undef C
 #define C(a, b)		TP_ERR_##a


^ permalink raw reply related

* [PATCH v8 04/10] tracing/probes: Support typecast for various probe events
From: Masami Hiramatsu (Google) @ 2026-06-24 14:42 UTC (permalink / raw)
  To: Steven Rostedt, Mathieu Desnoyers
  Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Support BTF typecast feature on other probe events, but only if it is
kernel function entry or return, and must use function parameter name
or $retval. This means you can do:

  (STRUCT)PARAM->MEMBER

Note: you can not use other variables like $stackN, %reg etc. That
needs nesting support.

To support other probe events, we just need to use last_struct type
when we find a function parameter in parse_btf_arg().

This also updates <tracefs>/README file to show struct typecast.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v5:
  - Add comments about $retval with typecast.
  - Even if the type of retvalue is not known, if user specifies typecast,
    use it for its type.
 Changes in v3:
  - Clarify the limitation.
 Changes in v2:
  - Fix to re-enable typecast on eprobe.
---
 Documentation/trace/fprobetrace.rst |    3 +++
 Documentation/trace/kprobetrace.rst |    4 ++++
 kernel/trace/trace.c                |    2 +-
 kernel/trace/trace_probe.c          |   23 +++++++++++++++++------
 kernel/trace/trace_probe.h          |    5 +++++
 5 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst
index b4c2ca3d02c1..7435ded2d66d 100644
--- a/Documentation/trace/fprobetrace.rst
+++ b/Documentation/trace/fprobetrace.rst
@@ -57,6 +57,9 @@ Synopsis of fprobe-events
                   (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
                   (x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr"
                   and bitfield are supported.
+  (STRUCT)FIELD->MEMBER[->MEMBER] : If BTF is supported, typecast FIELD to
+                  a pointer to STRUCT and then derference the pointer defined by
+                  ->MEMBER.
 
   (\*1) This is available only when BTF is enabled.
   (\*2) only for the probe on function entry (offs == 0). Note, this argument access
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index 3b6791c17e9b..f73614997d52 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -61,6 +61,10 @@ Synopsis of kprobe_events
 		  (x8/x16/x32/x64), VFS layer common type(%pd/%pD), "char",
                   "string", "ustring", "symbol", "symstr" and bitfield are
                   supported.
+  (STRUCT)FIELD->MEMBER[->MEMBER] : If BTF is supported, typecast FIELD to
+                  a pointer to STRUCT and then derference the pointer defined by
+                  ->MEMBER. Note that this is available only when the probe is
+		   on function entry.
 
   (\*1) only for the probe on function entry (offs == 0). Note, this argument access
         is best effort, because depending on the argument type, it may be passed on
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6eb4d3097a4d..aa93e7b01146 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4325,7 +4325,7 @@ static const char readme_msg[] =
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
-	"\t           <argname>[->field[->field|.field...]],\n"
+	"\t           [(structname)]<argname>[->field[->field|.field...]],\n"
 #endif
 #else
 	"\t           $stack<index>, $stack, $retval, $comm,\n"
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 0908019aea12..e6cc9f3d6c8b 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -699,7 +699,7 @@ static int parse_btf_arg(char *varname,
 
 	if (ctx->flags & TPARG_FL_RETURN && !strcmp(varname, "$retval")) {
 		code->op = FETCH_OP_RETVAL;
-		/* Check whether the function return type is not void */
+		/* Check whether the function return type is not void, even with typecast. */
 		if (query_btf_context(ctx) == 0) {
 			if (ctx->proto->type == 0) {
 				trace_probe_log_err(ctx->offset, NO_RETVAL);
@@ -708,6 +708,13 @@ static int parse_btf_arg(char *varname,
 			tid = ctx->proto->type;
 			goto found;
 		}
+		/*
+		 * Even if we can not find appropriate BTF info, we can still access
+		 * the field via typecast.
+		 */
+		if (ctx->struct_btf)
+			goto found;
+
 		if (field) {
 			trace_probe_log_err(ctx->offset + field - varname,
 					    NO_BTF_ENTRY);
@@ -752,7 +759,10 @@ static int parse_btf_arg(char *varname,
 	return -ENOENT;
 
 found:
-	type = btf_type_skip_modifiers(ctx->btf, tid, NULL);
+	if (ctx->struct_btf)
+		type = ctx->last_struct;
+	else
+		type = btf_type_skip_modifiers(ctx->btf, tid, NULL);
 found_type:
 	if (!type) {
 		trace_probe_log_err(ctx->offset, BAD_BTF_TID);
@@ -829,10 +839,11 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
 	char *tmp;
 	int ret;
 
-	/* Currently this only works for eprobes */
-	if (!(ctx->flags & TPARG_FL_TEVENT)) {
-		trace_probe_log_err(ctx->offset, TYPECAST_NOT_EVENT);
-		return -EINVAL;
+	if (!(tparg_is_event_probe(ctx->flags) ||
+	      tparg_is_function_entry(ctx->flags) ||
+	      tparg_is_function_return(ctx->flags))) {
+		trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
+		return -EOPNOTSUPP;
 	}
 
 	tmp = strchr(arg, ')');
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index e36cfe39e9a8..aa72e2ffdd93 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -429,6 +429,11 @@ static inline bool tparg_is_function_return(unsigned int flags)
 	return (flags & TPARG_FL_LOC_MASK) == (TPARG_FL_KERNEL | TPARG_FL_RETURN);
 }
 
+static inline bool tparg_is_event_probe(unsigned int flags)
+{
+	return !!(flags & TPARG_FL_TEVENT);
+}
+
 struct traceprobe_parse_context {
 	struct trace_event_call *event;
 	/* BTF related parameters */


^ permalink raw reply related

* [PATCH v8 03/10] tracing/probes: Support dumping fetcharg program for debugging dynamic events
From: Masami Hiramatsu (Google) @ 2026-06-24 14:41 UTC (permalink / raw)
  To: Steven Rostedt, Mathieu Desnoyers
  Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

For debugging probe events, it is helpful to verify the compiled
fetch instructions for each probe argument. This introduces a new
kernel config CONFIG_PROBE_EVENTS_DUMP_FETCHARG to decode the
instruction sequence of each argument and display it under a
commented line starting with '#' immediately following the dynamic
event definition (such as in dynamic_events, kprobe_events,
uprobe_events, etc.).

For example:
 /sys/kernel/tracing # cat dynamic_events
 p:kprobes/p_vfs_read_0 vfs_read arg1=+0(file):ustring arg2=%ax:x16
 #  arg1: ARG(0) -> ST_USTRING(offset=0,size=4) -> END
 #  arg2: REG(80) -> ST_RAW(size=2) -> END

Assisted-by: Antigravity:gemini-3.5-flash
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v8:
  - State this feature is only for debugging probe events.
  - Fix dependency list after description in Kconfig.
 Changes in v7:
   - Show trace event field name for FETCH_OP_TP_ARG.
   - Show immediate string value for FETCH_OP_IMMSTR.
   - Fix style issues warned by checkpatch.pl.
 Changes in v6:
   - Newly added.
---
 kernel/trace/Kconfig        |   12 +++++
 kernel/trace/trace_eprobe.c |    2 +
 kernel/trace/trace_fprobe.c |    2 +
 kernel/trace/trace_kprobe.c |    2 +
 kernel/trace/trace_probe.c  |   96 +++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_probe.h  |   79 +++++++++++++++++++++--------------
 kernel/trace/trace_uprobe.c |    3 +
 7 files changed, 164 insertions(+), 32 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e130da35808f..ca78727ad121 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -779,6 +779,18 @@ config PROBE_EVENTS_BTF_ARGS
 	  kernel function entry or a tracepoint.
 	  This is available only if BTF (BPF Type Format) support is enabled.
 
+config PROBE_EVENTS_DUMP_FETCHARG
+	bool "Dump of dynamic probe event fetch-arguments"
+	depends on PROBE_EVENTS
+	default n
+	help
+	  This shows the dump of fetch-arguments of dynamic probe events
+	  alongside their event definitions in the dynamic_events file
+	  as comment lines. This is useful to debug the probe events.
+	  Since this exposes the raw values in the dynamic_events file,
+	  it might be a security risk. Only enable it if you need to debug
+	  probe events themselves.
+
 config KPROBE_EVENTS
 	depends on KPROBES
 	depends on HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 50518b071414..462c31145733 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -87,6 +87,8 @@ static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev)
 		seq_printf(m, " %s=%s", ep->tp.args[i].name, ep->tp.args[i].comm);
 	seq_putc(m, '\n');
 
+	trace_probe_dump_args(m, &ep->tp);
+
 	return 0;
 }
 
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 4d1abbf66229..536781cd4c47 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -1449,6 +1449,8 @@ static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
 		seq_printf(m, " %s=%s", tf->tp.args[i].name, tf->tp.args[i].comm);
 	seq_putc(m, '\n');
 
+	trace_probe_dump_args(m, &tf->tp);
+
 	return 0;
 }
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a8420e6abb56..cfa807d8e760 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1320,6 +1320,8 @@ static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
 		seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
 	seq_putc(m, '\n');
 
+	trace_probe_dump_args(m, &tk->tp);
+
 	return 0;
 }
 
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 2ce7d62471cb..0908019aea12 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -2403,3 +2403,99 @@ int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_a
 	}
 	return 0;
 }
+
+#ifdef CONFIG_PROBE_EVENTS_DUMP_FETCHARG
+
+struct fetch_op_decode {
+	const char *name;
+	void (*decode)(struct seq_file *m, struct fetch_insn *insn);
+};
+
+static const struct fetch_op_decode fetch_op_decode[];
+
+static void fetcharg_decode_none(struct seq_file *m, struct fetch_insn *insn)
+{
+	seq_puts(m, fetch_op_decode[insn->op].name);
+}
+
+static void fetcharg_decode_param(struct seq_file *m, struct fetch_insn *insn)
+{
+	seq_printf(m, "%s(%u)", fetch_op_decode[insn->op].name, insn->param);
+}
+
+static void fetcharg_decode_imm(struct seq_file *m, struct fetch_insn *insn)
+{
+	seq_printf(m, "%s(0x%lx)", fetch_op_decode[insn->op].name, insn->immediate);
+}
+
+static void fetcharg_decode_string(struct seq_file *m, struct fetch_insn *insn)
+{
+	seq_printf(m, "%s(%s)", fetch_op_decode[insn->op].name, (char *)insn->data);
+}
+
+static void fetcharg_decode_symbol(struct seq_file *m, struct fetch_insn *insn)
+{
+	seq_printf(m, "%s(%s)", fetch_op_decode[insn->op].name, (char *)insn->data);
+}
+
+static void fetcharg_decode_offset(struct seq_file *m, struct fetch_insn *insn)
+{
+	seq_printf(m, "%s(offset=%d)", fetch_op_decode[insn->op].name, insn->offset);
+}
+
+static void fetcharg_decode_store(struct seq_file *m, struct fetch_insn *insn)
+{
+	if (insn->op == FETCH_OP_ST_RAW)
+		seq_printf(m, "%s(size=%u)", fetch_op_decode[insn->op].name, insn->size);
+	else
+		seq_printf(m, "%s(offset=%d,size=%u)", fetch_op_decode[insn->op].name,
+			  insn->offset, insn->size);
+}
+
+static void fetcharg_decode_bf(struct seq_file *m, struct fetch_insn *insn)
+{
+	seq_printf(m, "%s(basesize=%u,lshift=%u,rshift=%u)",
+		   fetch_op_decode[insn->op].name, insn->basesize, insn->lshift, insn->rshift);
+}
+
+static void fetcharg_decode_tp_arg(struct seq_file *m, struct fetch_insn *insn)
+{
+	struct ftrace_event_field *field = insn->data;
+
+	seq_printf(m, "%s(%s)", fetch_op_decode[insn->op].name, field->name);
+}
+
+#define FETCH_OP(opname, decode_fn) \
+	[FETCH_OP_##opname] = { .name = #opname, .decode = fetcharg_decode_##decode_fn }
+
+static const struct fetch_op_decode fetch_op_decode[] = FETCH_OP_LIST;
+#undef FETCH_OP
+
+static void trace_probe_dump_arg(struct seq_file *m, struct probe_arg *parg)
+{
+	int i;
+
+	seq_printf(m, "#  %s: ", parg->name);
+	for (i = 0; i < FETCH_INSN_MAX; i++) {
+		struct fetch_insn *insn = parg->code + i;
+
+		if (insn->op >= ARRAY_SIZE(fetch_op_decode) || !fetch_op_decode[insn->op].decode)
+			seq_printf(m, "unknown(%d)", insn->op);
+		else
+			fetch_op_decode[insn->op].decode(m, insn);
+
+		if (insn->op == FETCH_OP_END)
+			break;
+		seq_puts(m, " -> ");
+	}
+	seq_putc(m, '\n');
+}
+
+void trace_probe_dump_args(struct seq_file *m, struct trace_probe *tp)
+{
+	int i;
+
+	for (i = 0; i < tp->nr_args; i++)
+		trace_probe_dump_arg(m, &tp->args[i]);
+}
+#endif /* CONFIG_PROBE_EVENTS_DUMP_FETCHARG */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 2e0d8384ee5c..e36cfe39e9a8 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -83,38 +83,46 @@ static nokprobe_inline u32 update_data_loc(u32 loc, int consumed)
 /* Printing function type */
 typedef int (*print_type_func_t)(struct trace_seq *, void *, void *);
 
-enum fetch_op {
-	FETCH_OP_NOP = 0,
-	// Stage 1 (load) ops
-	FETCH_OP_REG,		/* Register : .param = offset */
-	FETCH_OP_STACK,		/* Stack : .param = index */
-	FETCH_OP_STACKP,	/* Stack pointer */
-	FETCH_OP_RETVAL,	/* Return value */
-	FETCH_OP_IMM,		/* Immediate : .immediate */
-	FETCH_OP_COMM,		/* Current comm */
-	FETCH_OP_ARG,		/* Function argument : .param */
-	FETCH_OP_FOFFS,		/* File offset: .immediate */
-	FETCH_OP_IMMSTR,	/* Allocated string: .data */
-	FETCH_OP_EDATA,		/* Entry data: .offset */
-	// Stage 2 (dereference) op
-	FETCH_OP_DEREF,		/* Dereference: .offset */
-	FETCH_OP_UDEREF,	/* User-space Dereference: .offset */
-	// Stage 3 (store) ops
-	FETCH_OP_ST_RAW,	/* Raw: .size */
-	FETCH_OP_ST_MEM,	/* Mem: .offset, .size */
-	FETCH_OP_ST_UMEM,	/* Mem: .offset, .size */
-	FETCH_OP_ST_STRING,	/* String: .offset, .size */
-	FETCH_OP_ST_USTRING,	/* User String: .offset, .size */
-	FETCH_OP_ST_SYMSTR,	/* Kernel Symbol String: .offset, .size */
-	FETCH_OP_ST_EDATA,	/* Store Entry Data: .offset */
-	// Stage 4 (modify) op
-	FETCH_OP_MOD_BF,	/* Bitfield: .basesize, .lshift, .rshift */
-	// Stage 5 (loop) op
-	FETCH_OP_LP_ARRAY,	/* Array: .param = loop count */
-	FETCH_OP_TP_ARG,	/* Trace Point argument */
-	FETCH_OP_END,
-	FETCH_NOP_SYMBOL,	/* Unresolved Symbol holder */
-};
+#define FETCH_OP_LIST	{						\
+	/* Stage 1 (load) ops */					\
+	FETCH_OP(NOP, none),		/* NOP */			\
+	FETCH_OP(REG, param),		/* Register: .param = offset */	\
+	FETCH_OP(STACK, param),		/* Stack: .param = index */	\
+	FETCH_OP(STACKP, none),		/* Stack pointer */		\
+	FETCH_OP(RETVAL, none),		/* Return value */		\
+	FETCH_OP(IMM, imm),		/* Immediate: .immediate */	\
+	FETCH_OP(COMM, none),		/* Current comm */		\
+	FETCH_OP(ARG, param),		/* Argument: .param = index */	\
+	FETCH_OP(FOFFS, imm),		/* File offset: .immediate */	\
+	FETCH_OP(IMMSTR, string),	/* Allocated string: .data */	\
+	FETCH_OP(EDATA, offset),	/* Entry data: .offset */	\
+	FETCH_OP(TP_ARG, tp_arg),	/* Tracepoint argument: .data */\
+	/* Stage 2 (dereference) ops */					\
+	FETCH_OP(DEREF, offset),	/* Dereference: .offset */	\
+	FETCH_OP(UDEREF, offset),	/* User-space dereference: .offset */\
+	/* Stage 3 (store) ops */					\
+	FETCH_OP(ST_RAW, store),	/* Raw value: .size */		\
+	FETCH_OP(ST_MEM, store),	/* Memory: .offset, .size */	\
+	FETCH_OP(ST_UMEM, store),	/* User memory: .offset, .size */\
+	FETCH_OP(ST_STRING, store),	/* String: .offset, .size */	\
+	FETCH_OP(ST_USTRING, store),	/* User string: .offset, .size */\
+	FETCH_OP(ST_SYMSTR, store),	/* Symbol name: .offset, .size */\
+	FETCH_OP(ST_EDATA, offset),	/* Entry data: .offset */	\
+	/* Stage 4 (modify) op */					\
+	FETCH_OP(MOD_BF, bf),		/* Bitfield: .basesize, .lshift, .rshift*/\
+	/* Stage 5 (loop) op */						\
+	FETCH_OP(LP_ARRAY, param),	/* Loop array: .param = count */\
+	/* End */							\
+	FETCH_OP(END, none),						\
+	/* Unresolved Symbol holder */					\
+	FETCH_OP(NOP_SYMBOL, symbol),	/* Non loaded symbol: .data = symbol name */\
+}
+
+#define FETCH_OP(opname, decode_fn) FETCH_OP_##opname
+enum fetch_op FETCH_OP_LIST;
+#undef FETCH_OP
+
+#define FETCH_NOP_SYMBOL FETCH_OP_NOP_SYMBOL
 
 struct fetch_insn {
 	enum fetch_op op;
@@ -370,6 +378,13 @@ bool trace_probe_match_command_args(struct trace_probe *tp,
 int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **));
 int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
 		 u8 *data, void *field);
+#ifdef CONFIG_PROBE_EVENTS_DUMP_FETCHARG
+void trace_probe_dump_args(struct seq_file *m, struct trace_probe *tp);
+#else
+static inline void trace_probe_dump_args(struct seq_file *m, struct trace_probe *tp)
+{
+}
+#endif
 
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
 int traceprobe_get_entry_data_size(struct trace_probe *tp);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c274346853d1..b2e264a4b96c 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -765,6 +765,9 @@ static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev)
 		seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
 
 	seq_putc(m, '\n');
+
+	trace_probe_dump_args(m, &tu->tp);
+
 	return 0;
 }
 


^ permalink raw reply related

* [PATCH v8 02/10] tracing/probes: Allow eprobe to use variable without $ prefix
From: Masami Hiramatsu (Google) @ 2026-06-24 14:41 UTC (permalink / raw)
  To: Steven Rostedt, Mathieu Desnoyers
  Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

The commit 69efd863a785 ("tracing/eprobes: Allow use of BTF names
to dereference pointers") allows eprobe to use event field without
"$" prefix when it is used with typecast, it is natual to allow it
without typecast.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v8:
  - Newly added.
---
 kernel/trace/trace_probe.c                         |   12 +++++++++++-
 kernel/trace/trace_probe.h                         |    1 +
 .../test.d/dynevent/eprobes_syntax_errors.tc       |    3 +--
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 0da7c0b53ba7..2ce7d62471cb 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -1341,7 +1341,17 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
 		ret = handle_typecast(arg, pcode, end, ctx);
 		break;
 	default:
-		if (isalpha(arg[0]) || arg[0] == '_') {	/* BTF variable */
+		if (isalpha(arg[0]) || arg[0] == '_') {
+			/* BTF variable or event field*/
+			if (ctx->flags & TPARG_FL_TEVENT) {
+				ret = parse_trace_event(arg, *pcode, ctx);
+				if (ret < 0) {
+					trace_probe_log_err(ctx->offset,
+							    NO_EVENT_FIELD);
+					return -EINVAL;
+				}
+				break;
+			}
 			if (!tparg_is_function_entry(ctx->flags) &&
 			    !tparg_is_function_return(ctx->flags)) {
 				trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 40b53b5b58a9..2e0d8384ee5c 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -559,6 +559,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
 	C(NO_PTR_STRCT,		"This is not a pointer to union/structure."),	\
 	C(NOSUP_DAT_ARG,	"Non pointer structure/union argument is not supported."),\
 	C(BAD_HYPHEN,		"Failed to parse single hyphen. Forgot '>'?"),	\
+	C(NO_EVENT_FIELD,	"This event field is not found."),	\
 	C(NO_BTF_FIELD,		"This field is not found."),	\
 	C(BAD_BTF_TID,		"Failed to get BTF type info."),\
 	C(BAD_TYPE4STR,		"This type does not fit for string."),\
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
index 2a680c086047..0e65e787e426 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
@@ -10,7 +10,7 @@ check_error() { # command-with-error-pos-by-^
 check_error 'e ^a.'			# NO_EVENT_INFO
 check_error 'e ^.b'			# NO_EVENT_INFO
 check_error 'e ^a.b'			# BAD_ATTACH_EVENT
-check_error 'e syscalls/sys_enter_openat ^foo'	# BAD_ATTACH_ARG
+check_error 'e syscalls/sys_enter_openat ^foo'	# NO_EVENT_FIELD
 check_error 'e:^/bar syscalls/sys_enter_openat'	# NO_GROUP_NAME
 check_error 'e:^12345678901234567890123456789012345678901234567890123456789012345/bar syscalls/sys_enter_openat'	# GROUP_TOO_LONG
 
@@ -19,7 +19,6 @@ check_error 'e:^ syscalls/sys_enter_openat'		# NO_EVENT_NAME
 check_error 'e:foo/^12345678901234567890123456789012345678901234567890123456789012345 syscalls/sys_enter_openat'	# EVENT_TOO_LONG
 check_error 'e:foo/^bar.1 syscalls/sys_enter_openat'	# BAD_EVENT_NAME
 
-check_error 'e:foo/bar syscalls/sys_enter_openat arg=^dfd'	# BAD_FETCH_ARG
 check_error 'e:foo/bar syscalls/sys_enter_openat arg=^$foo'	# BAD_ATTACH_ARG
 
 if grep -q '<attached-group>\.<attached-event>.*\[if <filter>\]' README; then


^ permalink raw reply related

* [PATCH v8 01/10] tracing/probes: Make the $ prefix mandatory for comm access
From: Masami Hiramatsu (Google) @ 2026-06-24 14:41 UTC (permalink / raw)
  To: Steven Rostedt, Mathieu Desnoyers
  Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Since $comm or $COMM are not event field but special fetcharg
variables to access current->comm, It should not be accessed
without '$' prefix even with typecast.

Fixes: 69efd863a785 ("tracing/eprobes: Allow use of BTF names to dereference pointers")
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v8:
  - Newly added.
---
 kernel/trace/trace_probe.c |   12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index c10bbb0df7b9..0da7c0b53ba7 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -342,10 +342,6 @@ static int parse_trace_event(char *arg, struct fetch_insn *code,
 	ret = parse_trace_event_arg(arg, code, ctx);
 	if (!ret)
 		return 0;
-	if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
-		code->op = FETCH_OP_COMM;
-		return 0;
-	}
 	return -EINVAL;
 }
 
@@ -1065,8 +1061,14 @@ static int parse_probe_vars(char *orig_arg, const struct fetch_type *t,
 	int len;
 
 	if (ctx->flags & TPARG_FL_TEVENT) {
-		if (parse_trace_event(arg, code, ctx) < 0)
+		if (parse_trace_event(arg, code, ctx) < 0) {
+			/* 'comm' should be checked after field parsing. */
+			if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
+				code->op = FETCH_OP_COMM;
+				return 0;
+			}
 			goto inval;
+		}
 		return 0;
 	}
 


^ permalink raw reply related

* [PATCH v8 00/10] tracing/probes: Add more typecast features
From: Masami Hiramatsu (Google) @ 2026-06-24 14:41 UTC (permalink / raw)
  To: Steven Rostedt, Mathieu Desnoyers
  Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest

Hi,

Here is the 8th version of series to introduce more typecast features
to probe events. The previous version is here:

 https://lore.kernel.org/all/178217904992.643090.15726197350652241270.stgit@devnote2/

In this version, I removed already picked 2 patches and add 2 new
fix and feature patches. The previous BTF typecast patch allows
`(STRUCT)FIELD->MEMBER` without $ prefix for eprobes, but it also
allows user to use COMM/comm instead of FIELD. $COMM/$comm are special
variables, so it should not skip $ prefix[1/10]. However, accessing
event fields without $ prefix itself is acceptable, it is generically
allowed without typecast[2/10].
Other patches have small fixes according to Julian and Sashiko's
comments and are rebased on top of probes/core branch.

This series extends BTF typecast feature and add more options:

1. Expanding BTF typecast to kprobe and fprobe.
   (currently only function entry/exit)

2. Introduce container_of like typecast. This adds a "assigned
   member" option to the typecast.

   (STRUCT,MEMBER)VAR->ANOTHER_MEMBER

   This casts VAR to STRUCT type but the VAR is as the address
   of STRUCT.MEMBER. In C, it is:

   container_of(VAR, STRUCT, MEMBER)->ANOTHER_MEMBER

3. Support nested typecast, e.g.

   (STRUCT)((STRUCT2)VAR->MEMBER2)->MEMBER

   the nest level must be smaller than 3.

4. Add $current variable to point "current" task_struct.
   This is useful with typecast, e.g.

   (task_struct)$current->pid

5. per-cpu dereference support.

   Intrdouce this_cpu_read(VAR) and this_cpu_ptr(VAR) to
   access per-cpu data on the current CPU (accessing other CPU
   data is not stable, because it can be changed.)

   You can access the member of per-cpu data structure using
   typecast like:

   (STRUCT)this_cpu_ptr(VAR)->MEMBER

6. Support event fields without $ prefix on eprobes.

   Now eprobe events can access its event fields.

And added fetcharg dump feature (for debug) and updated test scripts
to test part of them.

Thanks,

---
base-commit: 18dfb4703cd6af27deb30d628dac2e7db2b24e6a

Masami Hiramatsu (Google) (10):
      tracing/probes: Make the $ prefix mandatory for comm access
      tracing/probes: Allow eprobe to use variable without $ prefix
      tracing/probes: Support dumping fetcharg program for debugging dynamic events
      tracing/probes: Support typecast for various probe events
      tracing/probes: Support nested typecast
      tracing/probes: Type casting always involves nested calls
      tracing/probes: Support field specifier option for typecast
      tracing/probes: Add $current variable support
      tracing/probes: Add this_cpu_read() and this_cpu_ptr() dereference method to fetcharg
      tracing/probes: Add a new testcase for BTF typecasts

 Documentation/trace/eprobetrace.rst                |    9 
 Documentation/trace/fprobetrace.rst                |   10 
 Documentation/trace/kprobetrace.rst                |   11 
 kernel/trace/Kconfig                               |   12 
 kernel/trace/trace.c                               |    8 
 kernel/trace/trace_eprobe.c                        |    2 
 kernel/trace/trace_fprobe.c                        |    2 
 kernel/trace/trace_kprobe.c                        |    2 
 kernel/trace/trace_probe.c                         |  586 ++++++++++++++++----
 kernel/trace/trace_probe.h                         |   99 ++-
 kernel/trace/trace_probe_tmpl.h                    |   25 +
 kernel/trace/trace_uprobe.c                        |    3 
 samples/trace_events/trace-events-sample.c         |   40 +
 samples/trace_events/trace-events-sample.h         |   34 +
 .../ftrace/test.d/dynevent/btf_probe_event.tc      |   51 ++
 .../test.d/dynevent/eprobes_syntax_errors.tc       |    6 
 .../ftrace/test.d/dynevent/fprobe_syntax_errors.tc |   12 
 .../ftrace/test.d/kprobe/kprobe_syntax_errors.tc   |   12 
 .../ftrace/test.d/kprobe/uprobe_syntax_errors.tc   |    5 
 19 files changed, 770 insertions(+), 159 deletions(-)
 create mode 100644 tools/testing/selftests/ftrace/test.d/dynevent/btf_probe_event.tc

--
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH v8 09/46] KVM: guest_memfd: Introduce function to check GFN private/shared status
From: Ackerley Tng @ 2026-06-24 14:38 UTC (permalink / raw)
  To: Binbin Wu
  Cc: aik, andrew.jones, brauner, chao.p.peng, david, jmattson,
	jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
	wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
	aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
	Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
	Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
	Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
	Baoquan He, Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <1b59fec2-a464-4429-8532-880394912af5@linux.intel.com>

Binbin Wu <binbin.wu@linux.intel.com> writes:

>
> [...snip...]
>
>> +bool kvm_gmem_is_private(struct kvm *kvm, gfn_t gfn)
>> +{
>> +	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
>> +	struct inode *inode;
>> +
>> +	/*
>> +	 * If this gfn has no associated memslot, there's no chance of the gfn
>> +	 * being backed by private memory, since guest_memfd must be used for
>> +	 * private memory,
>
> "guest_memfd must be used for private memory" is a bit confusing to me.
>

Hmm good point. Is the source of confusion that guest_memfd can be used
for both shared and private memory?

Perhaps this can be rephrased as:

guest_memfd is the only provider of private memory and guest_memfd must
be used with a memslot, hence if there's no associated memslot, there's
no chance of this gfn being private.

>> and guest_memfd must be associated with some memslot.
>> +	 */
>> +	if (!slot)
>> +		return 0;
>> +
>>
>> [...snip...]
>>

^ permalink raw reply

* [RFC PATCH 01/11] Docs/mm/damon/design: update for DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP
From: SeongJae Park @ 2026-06-24 14:19 UTC (permalink / raw)
  Cc: SeongJae Park, Liam R. Howlett, Andrew Morton, David Hildenbrand,
	Jonathan Corbet, Lorenzo Stoakes, Michal Hocko, Mike Rapoport,
	Shuah Khan, Suren Baghdasaryan, Vlastimil Babka, damon, linux-doc,
	linux-kernel, linux-mm
In-Reply-To: <20260624142008.87180-1-sj@kernel.org>

Commit 9138e27a3bc3 ("mm/damon: add node_eligible_mem_bp goal metric")
introduced DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP but forgot updating the
DAMON design document for that.  Update.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 Documentation/mm/damon/design.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst
index c16a3bb288d07..06a8894eb06f9 100644
--- a/Documentation/mm/damon/design.rst
+++ b/Documentation/mm/damon/design.rst
@@ -686,6 +686,8 @@ mechanism tries to make ``current_value`` of ``target_metric`` be same to
   (1/10,000).
 - ``inactive_mem_bp``: Inactive to active + inactive (LRU) memory size ratio in
   bp (1/10,000).
+- ``node_eligible_mem_bp``: Scheme target access pattern-eligible memory ratio
+  of a node in bp (1/10,000).
 
 ``nid`` is optionally required for only ``node_mem_used_bp``,
 ``node_mem_free_bp``, ``node_memcg_used_bp`` and ``node_memcg_free_bp`` to
-- 
2.47.3

^ permalink raw reply related

* [RFC PATCH 00/11] mm/damon: update, optimize, and clean up doc, tests, and code
From: SeongJae Park @ 2026-06-24 14:19 UTC (permalink / raw)
  Cc: SeongJae Park, Liam R. Howlett, Andrew Morton, Brendan Higgins,
	David Gow, David Hildenbrand, Jonathan Corbet, Lorenzo Stoakes,
	Michal Hocko, Mike Rapoport, Shuah Khan, Shuah Khan,
	Suren Baghdasaryan, Vlastimil Babka, damon, kunit-dev, linux-doc,
	linux-kernel, linux-kselftest, linux-mm

Patches 1 and 2 update the design and ABI documents for recently added
DAMON features.  Patches 3-7 add or update more unit and self tests for
DAMON to cover recently changed or added functions and sysfs files.
Patch 8 optimizes damon_commit_target_regions() to skip unnecessary
adjacent ranges setup.  Patches 9-11 clean and fix up recently added
DAMON sysfs interface code for readability.

SeongJae Park (11):
  Docs/mm/damon/design: update for DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP
  Docs/ABI/damon: document probe files
  mm/damon/tests/core-kunit: test damon_rand()
  selftests/damon/sysfs.sh: test multiple probe dirs creation
  selftests/damon/sysfs.sh: test {core,ops}_filters/ directories
  selftests/damon/sysfs.sh: test dests dir
  selftests/damon/sysfs.sh: test all files in quota goal dir
  mm/damon/core: reduce range setup in damon_commit_target_regions()
  mm/damon/sysfs: split probe setup function out
  mm/damon/sysfs: split out filters setup function
  mm/damon/sysfs: fix typos in probe_{add,rm}_dirs: s/attr/probe/

 .../ABI/testing/sysfs-kernel-mm-damon         |  40 +++++++
 Documentation/mm/damon/design.rst             |   2 +
 mm/damon/core.c                               |  22 +++-
 mm/damon/sysfs.c                              | 102 ++++++++++--------
 mm/damon/tests/core-kunit.h                   |  21 ++++
 tools/testing/selftests/damon/sysfs.sh        |  70 +++++++++++-
 6 files changed, 206 insertions(+), 51 deletions(-)


base-commit: 197a7eb91f786e5deeb1dfea35076c01ebd37ce0
-- 
2.47.3

^ permalink raw reply

* Re: [PATCH v8 08/46] KVM: Provide generic interface for checking memory private/shared status
From: Ackerley Tng @ 2026-06-24 14:18 UTC (permalink / raw)
  To: Suzuki K Poulose, Fuad Tabba
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, aneesh.kumar, liam, Paolo Bonzini,
	Sean Christopherson, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers, Jonathan Corbet, Shuah Khan,
	Shuah Khan, Vishal Annapurve, Andrew Morton, Chris Li,
	Kairui Song, Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen,
	Yuanchu Xie, Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt,
	Kiryl Shutsemau, Baoquan He, Jason Gunthorpe, Vlastimil Babka,
	kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
	linux-mm, linux-coco
In-Reply-To: <3ec15992-2a29-434b-8c99-8b86bfcf007e@arm.com>

Suzuki K Poulose <suzuki.poulose@arm.com> writes:

>
> [...snip...]
>
>>>> @@ -2546,7 +2546,7 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
>>>>   bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
>>>>                                           struct kvm_gfn_range *range);
>>>>
>>>> -static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
>>>> +static inline bool kvm_vm_mem_is_private(struct kvm *kvm, gfn_t gfn)
>>
>> Should have read the Sashiko review first, but where is this used?
>> It's not used at all in this series...
>
> See below:
>
>>
>> /fuad
>>
>>>>   {
>>>>          return kvm_get_vm_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
>>>>   }
>>>> @@ -2557,6 +2557,16 @@ static inline bool kvm_mem_range_is_private(struct kvm *kvm, gfn_t start,
>>>>                                                    KVM_MEMORY_ATTRIBUTE_PRIVATE,
>>>>                                                    KVM_MEMORY_ATTRIBUTE_PRIVATE);
>>>>   }
>>>> +#endif  /* CONFIG_KVM_VM_MEMORY_ATTRIBUTES */
>>>> +
>>>> +#ifdef kvm_arch_has_private_mem
>>>> +typedef bool (kvm_mem_is_private_t)(struct kvm *kvm, gfn_t gfn);
>>>> +DECLARE_STATIC_CALL(__kvm_mem_is_private, kvm_mem_is_private_t);
>>>> +
>>>> +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
>>>> +{
>>>> +       return static_call(__kvm_mem_is_private)(kvm, gfn);
>>>> +}
>>>>   #else
>>>>   static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
>>>>   {
>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>>> index 6669f1477013c..8b238e461b854 100644
>>>> --- a/virt/kvm/kvm_main.c
>>>> +++ b/virt/kvm/kvm_main.c
>>>> @@ -2627,6 +2627,20 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
>>>>   }
>>>>   #endif /* CONFIG_KVM_VM_MEMORY_ATTRIBUTES */
>>>>
>>>> +#ifdef kvm_arch_has_private_mem
>>>> +DEFINE_STATIC_CALL_RET0(__kvm_mem_is_private, kvm_mem_is_private_t);
>>>> +EXPORT_STATIC_CALL_GPL(__kvm_mem_is_private);
>>>> +
>>>> +static void kvm_init_memory_attributes(void)
>>>> +{
>>>> +#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
>>>> +       static_call_update(__kvm_mem_is_private, kvm_vm_mem_is_private);
>>>> +#endif
>>>> +}
>
>
> Here ^^ as the static call update ?
>
>
> Suzuki

Thanks Suzuki, it is used here. kvm_mem_is_private() was and still is
the function used to check if some gfn is private or shared. Hence, in
this patch, the usages of kvm_mem_is_private() were not
updated. Instead, kvm_mem_is_private() is now set up as a static call,
and the static call is hard-wired to kvm_vm_mem_is_private() in this
patch.

In the later wiring patch, all the places where attributes are looked up
are updated all at once: if conversion enabled, take gmem route, else
take VM route.

kvm_mem_is_private() is special in that the if-else is done at KVM load
time rather than runtime, and I believe that's for performance reasons
since this is checked quite often from the KVM fault handling code.

Buut I think perhaps Fuad was referring to kvm_mem_range_is_private(),
which is indeed not used anywhere. Binbin also asked about this, I think
we should drop kvm_mem_range_is_private(). My reply to Binbin is at [1].

[1] https://lore.kernel.org/all/CAEvNRgGbBcrX5Fw3vNTsTOBNC=Ypi=9-S07674yPxLU9i4akjA@mail.gmail.com/

^ permalink raw reply

* Re: [PATCH v2] usbcore: Add quirk for 255-bytes initial config read
From: Alan Stern @ 2026-06-24 14:00 UTC (permalink / raw)
  To: Nikhil Solanke
  Cc: linux-usb, gregkh, linux-kernel, michal.pecio, stable, corbet,
	skhan, linux-doc
In-Reply-To: <CAFgddhJ2HeJ=oTBX_axMJcgJq7GXH9abe+LH+x9NGekGO4BMyw@mail.gmail.com>

On Wed, Jun 24, 2026 at 01:36:28PM +0530, Nikhil Solanke wrote:
> > Actually, the best approach here would be to put this single change into
> > a separate patch that comes before the current one.  That removes issues
> > of making more than one functional change in one patch and improves
> > bisectability.
> 
> Before? Shouldn't it be after my changes? That would make it easier to
> justify the changes. And just to be sure, you did mention it does
> align with what the intention of USB_QUIRK_DELAY_INIT, but it does
> change its behavior when the quirk is not set. Atleast from what I
> understood from the documentation and an LLM's summary, the device
> needs time to prepare the full configuration set. So, does delaying
> before the first header read really work? I can't test this since I
> don't have a device that requires the quirk to be set.
> 
> I personally think adding a condition to check if the quirk is set and
> then delaying before sending the first request would be appropriate.
> What are your opinions on this.

Well, put it this way: If you change the existing behavior, that change 
belongs in a separate patch.  If you want to redo this patch so that it 
doesn't change anything when the quirk flag isn't set, that's fine.

> Also is it fine if the string lines exceed 100 columns?

In lines containing long strings, it's okay for the string to extend 
well beyond 80 columns.  But then you should break the line at some 
point closely following the end of the string.  I'm sure you can find 
examples of this if you look through some of the other source files.

> Also, is there a need to check for krealloc()'s return value? Since we
> are only shrinking the buffer, there won't be any moves or completely
> new blocks (at least as per my understanding). Do I still need to
> check its return value for completeness' sake?

It's a little tricky to track this down, but if you look in 
include/linux/slab.h you'll see that krealloc() is defined as 
krealloc_node(), which is defined as krealloc_node_align(), which is 
defined as krealloc_node_align_noprof(), which is declared with 
__must_check.  So yes, you need to check the return value from 
krealloc().

Of course, you could simply try not checking the return value and seeing 
if that provokes a warning or error from the compiler.

Alan Stern

^ permalink raw reply

* Re: [PATCH v8 07/46] KVM: Rename memory attribute APIs to prepare for in-place gmem conversion
From: Ackerley Tng @ 2026-06-24 13:44 UTC (permalink / raw)
  To: Binbin Wu
  Cc: aik, andrew.jones, brauner, chao.p.peng, david, jmattson,
	jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
	wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
	aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
	Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
	Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
	Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
	Baoquan He, Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <96fb369d-dbff-4ed6-b1f9-0ce63d7d4ed0@linux.intel.com>

Binbin Wu <binbin.wu@linux.intel.com> writes:

>
> [...snip...]
>
>> +static inline bool kvm_mem_range_is_private(struct kvm *kvm, gfn_t start,
>> +					    gfn_t end)
>> +{
>> +	return kvm_range_has_vm_memory_attributes(kvm, start, end,
>> +						  KVM_MEMORY_ATTRIBUTE_PRIVATE,
>> +						  KVM_MEMORY_ATTRIBUTE_PRIVATE);
>>  }
>
> This function is added, but never used in this patch series.
> Is it intended to be called only when CONFIG_KVM_VM_MEMORY_ATTRIBUTES is
> enabled?
>

Thank you for catching this! I think in some earlier revision this was
meant to be used from the guest_memfd populate flow.

I think the version of kvm_gmem_range_is_private in this revision is
good because it is symmetric. If conversion is enabled, call the gmem
range-has-attributes function, and if conversion is disabled, use the VM
range-has-attributes function.

Sean, if no new revision is needed would you be able to drop
kvm_mem_range_is_private() while you're pulling it in?

>>
>> [...snip...]
>>

^ permalink raw reply

* Re: [PATCH v12 07/12] static_call: Define EXPORT_STATIC_CALL_FOR_MODULES()
From: Sean Christopherson @ 2026-06-24 12:59 UTC (permalink / raw)
  To: Pawan Gupta
  Cc: x86, Jon Kohler, Nikolay Borisov, H. Peter Anvin, Josh Poimboeuf,
	David Kaplan, Borislav Petkov, Dave Hansen, Peter Zijlstra,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, KP Singh,
	Jiri Olsa, David S. Miller, David Laight, Andy Lutomirski,
	Thomas Gleixner, Ingo Molnar, David Ahern, Martin KaFai Lau,
	Eduard Zingerman, Song Liu, Yonghong Song, John Fastabend,
	Stanislav Fomichev, Hao Luo, Paolo Bonzini, Jonathan Corbet,
	Jason Baron, Alice Ryhl, Steven Rostedt, Ard Biesheuvel,
	Shuah Khan, linux-kernel, kvm, Asit Mallick, Tao Zhang, bpf,
	netdev, linux-doc
In-Reply-To: <20260622-vmscape-bhb-v12-7-76cbda0ae3e5@linux.intel.com>

[-- Attachment #1: Type: text/plain, Size: 3538 bytes --]

On Tue, Jun 23, 2026, Pawan Gupta wrote:
> There is EXPORT_STATIC_CALL_TRAMP() that hides the static key from all
> modules. But there is no equivalent of EXPORT_SYMBOL_FOR_MODULES() to
> restrict symbol visibility to only certain modules.
> 
> Add EXPORT_STATIC_CALL_FOR_MODULES(name, mods) that wraps both the key and
> the trampoline with EXPORT_SYMBOL_FOR_MODULES(), allowing only a limited
> set of modules to see and update the static key.
> 
> The immediate user is KVM, in the following commit.
> 
> checkpatch reported below warnings with this change that I believe don't
> apply in this case:
> 
>   include/linux/static_call.h:219: WARNING: Non-declarative macros with multiple statements should be enclosed in a do - while loop
>   include/linux/static_call.h:220: WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> ---
>  include/linux/static_call.h | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/include/linux/static_call.h b/include/linux/static_call.h
> index 78a77a4ae0ea..b610afd1ed55 100644
> --- a/include/linux/static_call.h
> +++ b/include/linux/static_call.h
> @@ -216,6 +216,9 @@ extern long __static_call_return0(void);
>  #define EXPORT_STATIC_CALL_GPL(name)					\
>  	EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name));			\
>  	EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
> +#define EXPORT_STATIC_CALL_FOR_MODULES(name, mods)			\
> +	EXPORT_SYMBOL_FOR_MODULES(STATIC_CALL_KEY(name), mods);		\
> +	EXPORT_SYMBOL_FOR_MODULES(STATIC_CALL_TRAMP(name), mods)
>  
>  /* Leave the key unexported, so modules can't change static call targets: */
>  #define EXPORT_STATIC_CALL_TRAMP(name)					\
> @@ -276,6 +279,9 @@ extern long __static_call_return0(void);
>  #define EXPORT_STATIC_CALL_GPL(name)					\
>  	EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name));			\
>  	EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
> +#define EXPORT_STATIC_CALL_FOR_MODULES(name, mods)			\
> +	EXPORT_SYMBOL_FOR_MODULES(STATIC_CALL_KEY(name), mods);		\
> +	EXPORT_SYMBOL_FOR_MODULES(STATIC_CALL_TRAMP(name), mods)
>  
>  /* Leave the key unexported, so modules can't change static call targets: */
>  #define EXPORT_STATIC_CALL_TRAMP(name)					\
> @@ -346,6 +352,8 @@ static inline int static_call_text_reserved(void *start, void *end)
>  
>  #define EXPORT_STATIC_CALL(name)	EXPORT_SYMBOL(STATIC_CALL_KEY(name))
>  #define EXPORT_STATIC_CALL_GPL(name)	EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name))
> +#define EXPORT_STATIC_CALL_FOR_MODULES(name, mods)			\
> +	EXPORT_SYMBOL_FOR_MODULES(STATIC_CALL_KEY(name), mods)
>  
>  #endif /* CONFIG_HAVE_STATIC_CALL */

Drat, I forgot about this.  Exporting static call trampolines for KVM came up in
another conversation[*].  I had already put together patches to effectively default
to exporting only the trampoline, and also to deduplicate this code so that the
CONFIG_HAVE_STATIC_CALL_INLINE=y / CONFIG_HAVE_STATIC_CALL=y / CONFIG_HAVE_STATIC_CALL=n
implementations don't need to copy+paste the same lines of code.

The attached patches touch a lot more code, and will conflict mightily with KVM
changes I want to land in 7.3 (more use of a static_call in KVM).  But if we get
them applied (to tip tree) shortly after 7.2-rc1 and provide a topic branch/tag,
then there shouldn't be too much juggling needed?

If we want to go with the more aggressive cleanup, I'll formally post the patches.

[*] https://lore.kernel.org/all/ahhoDGUz39KSGZ6o@google.com

[-- Attachment #2: 0001-static_call-Add-stub-for-ARCH_ADD_TRAMP_KEY-if-not-p.patch --]
[-- Type: text/x-diff, Size: 1244 bytes --]

From 415eb214ed200ef82244468a0682ac884b14c051 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 28 May 2026 08:09:56 -0700
Subject: [PATCH 1/4] static_call: Add stub for ARCH_ADD_TRAMP_KEY if not
 provided by arch

Add a dummy #define for ARCH_ADD_TRAMP_KEY if one is not provided by arch
code so that EXPORT_STATIC_CALL_TRAMP{,_GPL} can be used in arch-neutral
code.

No functional change intended.

Fixes: 73f44fe19d35 ("static_call: Allow module use without exposing static_call_key")
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/static_call.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 78a77a4ae0ea..7539c82dd35f 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -210,6 +210,10 @@ extern long __static_call_return0(void);
 
 #define static_call_cond(name)	(void)__static_call(name)
 
+#ifndef ARCH_ADD_TRAMP_KEY
+#define ARCH_ADD_TRAMP_KEY(name)
+#endif
+
 #define EXPORT_STATIC_CALL(name)					\
 	EXPORT_SYMBOL(STATIC_CALL_KEY(name));				\
 	EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))

base-commit: 9d4853b044beefa21c4ee3e18c40653601a64ced
-- 
2.55.0.rc0.799.gd6f94ed593-goog


[-- Attachment #3: 0002-KVM-x86-Don-t-export-static-call-keys-to-vendor-modu.patch --]
[-- Type: text/x-diff, Size: 2823 bytes --]

From 0166d0df57a43f20a24f5b75a6c34929221c1f30 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 28 May 2026 07:01:36 -0700
Subject: [PATCH 2/4] KVM: x86: Don't export static call keys to vendor modules

Export only the trampoline, not the full trampoline+key, of KVM's static
calls that are also used by vendor modules, to harden KVM against unwanted
modifications of the static calls, i.e. to allow vendor code to invoke the
static call, but not redirect it.

Use static_call_mod() instead of the vanilla static_call(), which is
required by the objtool magic to glue things together when exporting only
the trampoline.

No functional change intended.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Closes: https://lore.kernel.org/all/20260528091357.GB343181@noisy.programming.kicks-ass.net
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 arch/x86/kvm/pmu.h              | 2 +-
 arch/x86/kvm/x86.c              | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index eee473717c0e..55d674c647e6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2061,7 +2061,7 @@ extern bool __read_mostly enable_ipiv;
 extern bool __read_mostly enable_device_posted_irqs;
 extern struct kvm_x86_ops kvm_x86_ops;
 
-#define kvm_x86_call(func) static_call(kvm_x86_##func)
+#define kvm_x86_call(func) static_call_mod(kvm_x86_##func)
 
 #define KVM_X86_OP(func) \
 	DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func));
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index a5821d7c87f9..77cdee3e4aa6 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -54,7 +54,7 @@ struct kvm_pmu_ops {
 	const u32 MSR_STRIDE;
 };
 
-#define kvm_pmu_call(func) static_call(kvm_x86_pmu_##func)
+#define kvm_pmu_call(func) static_call_mod(kvm_x86_pmu_##func)
 
 #define KVM_X86_PMU_OP(func) \
 	DECLARE_STATIC_CALL(kvm_x86_pmu_##func, *(((struct kvm_pmu_ops *)0)->func));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d9d51803b7b2..792f402f493f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -148,9 +148,9 @@ struct kvm_x86_ops kvm_x86_ops __read_mostly;
 #define KVM_X86_OP_OPTIONAL KVM_X86_OP
 #define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
 #include <asm/kvm-x86-ops.h>
-EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits);
-EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg);
-EXPORT_STATIC_CALL_GPL(kvm_x86_get_cpl);
+EXPORT_STATIC_CALL_TRAMP_GPL(kvm_x86_get_cs_db_l_bits);
+EXPORT_STATIC_CALL_TRAMP_GPL(kvm_x86_cache_reg);
+EXPORT_STATIC_CALL_TRAMP_GPL(kvm_x86_get_cpl);
 
 static bool __read_mostly ignore_msrs = 0;
 module_param(ignore_msrs, bool, 0644);
-- 
2.55.0.rc0.799.gd6f94ed593-goog


[-- Attachment #4: 0003-static_call-Restrict-exporting-of-static-call-key-to.patch --]
[-- Type: text/x-diff, Size: 10765 bytes --]

From 87068920d63a87235ae9a1e62529a4d897bd6c6c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 28 May 2026 07:03:24 -0700
Subject: [PATCH 3/4] static_call: Restrict exporting of static call *key* to
 tracepoints

Rename the export macros for static call trampolines and keys so that the
"default" EXPORT_STATIC_CALL{,_GPL}() exports only the trampoline, and full
exports of trampoline+key pairs is restricted to tracepoints (by naming
convention).  Most developers are blissfully unaware of the gory details of
static calls, and so don't understand the implications of using the
innocuous-looking "vanilla" macros.

Effectively defaulting to exporting the key is undesirable as there is no
known use case for allowing a module to change an export static call's
target, outside of tracepoints.

Opportunistically massage the macro magic to deduplicate the
CONFIG_HAVE_STATIC_CALL_INLINE=y vs. CONFIG_HAVE_STATIC_CALL=y vs.
CONFIG_HAVE_STATIC_CALL=n implementations.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/events/amd/brs.c      |  2 +-
 arch/x86/kernel/apic/init.c    |  4 +--
 arch/x86/kernel/cpu/mshyperv.c |  2 +-
 arch/x86/kernel/traps.c        |  2 +-
 arch/x86/kvm/x86.c             |  6 ++--
 arch/x86/xen/enlighten.c       |  2 +-
 include/linux/static_call.h    | 59 +++++++++++++---------------------
 include/linux/tracepoint.h     |  4 +--
 kernel/sched/core.c            |  8 ++---
 9 files changed, 38 insertions(+), 51 deletions(-)

diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
index 06f35a6b58a5..b9a246989bd4 100644
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -424,7 +424,7 @@ void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in)
 }
 
 DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
-EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);
+EXPORT_STATIC_CALL_GPL(perf_lopwr_cb);
 
 void __init amd_brs_lopwr_init(void)
 {
diff --git a/arch/x86/kernel/apic/init.c b/arch/x86/kernel/apic/init.c
index 821e2e536f19..933b8d2d3af5 100644
--- a/arch/x86/kernel/apic/init.c
+++ b/arch/x86/kernel/apic/init.c
@@ -30,8 +30,8 @@ DEFINE_APIC_CALL(wakeup_secondary_cpu);
 DEFINE_APIC_CALL(wakeup_secondary_cpu_64);
 DEFINE_APIC_CALL(write);
 
-EXPORT_STATIC_CALL_TRAMP_GPL(apic_call_send_IPI_mask);
-EXPORT_STATIC_CALL_TRAMP_GPL(apic_call_send_IPI_self);
+EXPORT_STATIC_CALL_GPL(apic_call_send_IPI_mask);
+EXPORT_STATIC_CALL_GPL(apic_call_send_IPI_self);
 
 /* The container for function call overrides */
 struct apic_override __x86_apic_override __initdata;
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index b5b6a58b67b0..9adfc12be1db 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -333,7 +333,7 @@ static void __init x86_setup_ops_for_tsc_pg_clock(void)
 
 #ifdef CONFIG_X86_64
 DEFINE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
-EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall);
+EXPORT_STATIC_CALL_GPL(hv_hypercall);
 #define hypercall_update(hc) static_call_update(hv_hypercall, hc)
 #endif
 #endif /* CONFIG_HYPERV */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 0ca3912ecb7f..df05ad454414 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -218,7 +218,7 @@ static inline unsigned long pt_regs_val(struct pt_regs *regs, int nr)
 
 #ifdef HAVE_ARCH_BUG_FORMAT_ARGS
 DEFINE_STATIC_CALL(WARN_trap, __WARN_trap);
-EXPORT_STATIC_CALL_TRAMP(WARN_trap);
+EXPORT_STATIC_CALL(WARN_trap);
 
 /*
  * Create a va_list from an exception context.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 792f402f493f..d9d51803b7b2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -148,9 +148,9 @@ struct kvm_x86_ops kvm_x86_ops __read_mostly;
 #define KVM_X86_OP_OPTIONAL KVM_X86_OP
 #define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
 #include <asm/kvm-x86-ops.h>
-EXPORT_STATIC_CALL_TRAMP_GPL(kvm_x86_get_cs_db_l_bits);
-EXPORT_STATIC_CALL_TRAMP_GPL(kvm_x86_cache_reg);
-EXPORT_STATIC_CALL_TRAMP_GPL(kvm_x86_get_cpl);
+EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits);
+EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg);
+EXPORT_STATIC_CALL_GPL(kvm_x86_get_cpl);
 
 static bool __read_mostly ignore_msrs = 0;
 module_param(ignore_msrs, bool, 0644);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 23b91bf9b663..ec14d2017909 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -23,7 +23,7 @@
 #include "xen-ops.h"
 
 DEFINE_STATIC_CALL(xen_hypercall, xen_hypercall_hvm);
-EXPORT_STATIC_CALL_TRAMP(xen_hypercall);
+EXPORT_STATIC_CALL(xen_hypercall);
 
 /*
  * Pointer to the xen_vcpu_info structure or
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 7539c82dd35f..c2c667baf8fe 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -26,7 +26,7 @@
  *   static_call_update(name, func);
  *   static_call_query(name);
  *
- *   EXPORT_STATIC_CALL{,_TRAMP}{,_GPL}()
+ *   EXPORT_STATIC_CALL{,_GPL}()
  *
  * Usage example:
  *
@@ -121,14 +121,6 @@
  *   completely eliding any function call overhead.
  *
  *   Notably argument setup is unconditional.
- *
- *
- * EXPORT_STATIC_CALL() vs EXPORT_STATIC_CALL_TRAMP():
- *
- *   The difference is that the _TRAMP variant tries to only export the
- *   trampoline with the result that a module can use static_call{,_cond}() but
- *   not static_call_update().
- *
  */
 
 #include <linux/types.h>
@@ -214,19 +206,8 @@ extern long __static_call_return0(void);
 #define ARCH_ADD_TRAMP_KEY(name)
 #endif
 
-#define EXPORT_STATIC_CALL(name)					\
-	EXPORT_SYMBOL(STATIC_CALL_KEY(name));				\
-	EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
-#define EXPORT_STATIC_CALL_GPL(name)					\
-	EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name));			\
-	EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
-
-/* Leave the key unexported, so modules can't change static call targets: */
-#define EXPORT_STATIC_CALL_TRAMP(name)					\
-	EXPORT_SYMBOL(STATIC_CALL_TRAMP(name));				\
-	ARCH_ADD_TRAMP_KEY(name)
-#define EXPORT_STATIC_CALL_TRAMP_GPL(name)				\
-	EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name));			\
+#define __EXPORT_STATIC_CALL(name, scope)				\
+	EXPORT_SYMBOL##scope(STATIC_CALL_TRAMP(name));			\
 	ARCH_ADD_TRAMP_KEY(name)
 
 #elif defined(CONFIG_HAVE_STATIC_CALL)
@@ -274,18 +255,8 @@ static inline int static_call_text_reserved(void *start, void *end)
 
 extern long __static_call_return0(void);
 
-#define EXPORT_STATIC_CALL(name)					\
-	EXPORT_SYMBOL(STATIC_CALL_KEY(name));				\
-	EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
-#define EXPORT_STATIC_CALL_GPL(name)					\
-	EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name));			\
-	EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
-
-/* Leave the key unexported, so modules can't change static call targets: */
-#define EXPORT_STATIC_CALL_TRAMP(name)					\
-	EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
-#define EXPORT_STATIC_CALL_TRAMP_GPL(name)				\
-	EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
+#define __EXPORT_STATIC_CALL(name, scope)				\
+	EXPORT_SYMBOL##scope(STATIC_CALL_TRAMP(name))
 
 #else /* Generic implementation */
 
@@ -348,9 +319,25 @@ static inline int static_call_text_reserved(void *start, void *end)
 	return 0;
 }
 
-#define EXPORT_STATIC_CALL(name)	EXPORT_SYMBOL(STATIC_CALL_KEY(name))
-#define EXPORT_STATIC_CALL_GPL(name)	EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name))
+#define __EXPORT_STATIC_CALL(name, scope)
 
 #endif /* CONFIG_HAVE_STATIC_CALL */
 
+#define __EXPORT_TRACEPOINT_STATIC_CALL(name, scope)			\
+	EXPORT_SYMBOL##scope(STATIC_CALL_KEY(name));			\
+	__EXPORT_STATIC_CALL(name, scope)
+#define EXPORT_TRACEPOINT_STATIC_CALL(name)				\
+	__EXPORT_TRACEPOINT_STATIC_CALL(name, )
+#define EXPORT_TRACEPOINT_STATIC_CALL_GPL(name)				\
+	__EXPORT_TRACEPOINT_STATIC_CALL(name, _GPL)
+
+/*
+ * For non-tracepoint usage, leave the key unexported, so modules can't change
+ * static call targets, i.e. can only invoke the static call.
+ */
+#define EXPORT_STATIC_CALL(name)					\
+	__EXPORT_STATIC_CALL(name, )
+#define EXPORT_STATIC_CALL_GPL(name)					\
+	__EXPORT_STATIC_CALL(name, _GPL)
+
 #endif /* _LINUX_STATIC_CALL_H */
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 2d2b9f8cdda4..1b64dcaf683e 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -423,12 +423,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	TRACEPOINT_CHECK(name)						\
 	EXPORT_SYMBOL_GPL(__tracepoint_##name);				\
 	EXPORT_SYMBOL_GPL(__traceiter_##name);				\
-	EXPORT_STATIC_CALL_GPL(tp_func_##name)
+	EXPORT_TRACEPOINT_STATIC_CALL_GPL(tp_func_##name)
 #define EXPORT_TRACEPOINT_SYMBOL(name)					\
 	TRACEPOINT_CHECK(name)						\
 	EXPORT_SYMBOL(__tracepoint_##name);				\
 	EXPORT_SYMBOL(__traceiter_##name);				\
-	EXPORT_STATIC_CALL(tp_func_##name)
+	EXPORT_TRACEPOINT_STATIC_CALL(tp_func_##name)
 
 
 #else /* !TRACEPOINTS_ENABLED */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b8871449d3c6..c4d0db00d036 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7403,7 +7403,7 @@ EXPORT_SYMBOL(preempt_schedule);
 #   define preempt_schedule_dynamic_disabled	NULL
 #  endif
 DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
-EXPORT_STATIC_CALL_TRAMP(preempt_schedule);
+EXPORT_STATIC_CALL(preempt_schedule);
 # elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
 static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule);
 void __sched notrace dynamic_preempt_schedule(void)
@@ -7476,7 +7476,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
 #   define preempt_schedule_notrace_dynamic_disabled	NULL
 #  endif
 DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
-EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace);
+EXPORT_STATIC_CALL(preempt_schedule_notrace);
 # elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
 static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule_notrace);
 void __sched notrace dynamic_preempt_schedule_notrace(void)
@@ -7723,12 +7723,12 @@ EXPORT_SYMBOL(__cond_resched);
 #  define cond_resched_dynamic_enabled	__cond_resched
 #  define cond_resched_dynamic_disabled	((void *)&__static_call_return0)
 DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched);
-EXPORT_STATIC_CALL_TRAMP(cond_resched);
+EXPORT_STATIC_CALL(cond_resched);
 
 #  define might_resched_dynamic_enabled	__cond_resched
 #  define might_resched_dynamic_disabled ((void *)&__static_call_return0)
 DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched);
-EXPORT_STATIC_CALL_TRAMP(might_resched);
+EXPORT_STATIC_CALL(might_resched);
 # elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
 static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
 int __sched dynamic_cond_resched(void)
-- 
2.55.0.rc0.799.gd6f94ed593-goog


[-- Attachment #5: 0004-static_call-Add-FOR_MODULES-static-call-exports-use-.patch --]
[-- Type: text/x-diff, Size: 4948 bytes --]

From 05623900bddabdc47e6797b72a610b1d4f825e1d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 28 May 2026 07:32:41 -0700
Subject: [PATCH 4/4] static_call: Add FOR_MODULES static call exports, use 'em
 in KVM

Add EXPORT_STATIC_CALL_FOR_MODULES(), along with KVM-specific variants,
and use the KVM-internal variants to export KVM's internal static calls
only to KVM's vendor modules (if any exist).

For all intents and purposes, no functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/pmu.c          |  2 +-
 arch/x86/kvm/x86.c          |  6 +++---
 include/linux/kvm_types.h   |  8 ++++++++
 include/linux/static_call.h | 12 +++++++-----
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index b92dd2e58335..7e4f6e5ff436 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -100,7 +100,7 @@ static struct kvm_pmu_ops kvm_pmu_ops __read_mostly;
 #define KVM_X86_PMU_OP_OPTIONAL KVM_X86_PMU_OP
 #define KVM_X86_PMU_OP_OPTIONAL_RET0 KVM_X86_PMU_OP
 #include <asm/kvm-x86-pmu-ops.h>
-EXPORT_STATIC_CALL_GPL(kvm_x86_pmu_pmc_is_disabled_in_current_mode);
+EXPORT_STATIC_CALL_FOR_KVM_INTERNAL(kvm_x86_pmu_pmc_is_disabled_in_current_mode);
 
 void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d9d51803b7b2..6df084d827b8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -148,9 +148,9 @@ struct kvm_x86_ops kvm_x86_ops __read_mostly;
 #define KVM_X86_OP_OPTIONAL KVM_X86_OP
 #define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
 #include <asm/kvm-x86-ops.h>
-EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits);
-EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg);
-EXPORT_STATIC_CALL_GPL(kvm_x86_get_cpl);
+EXPORT_STATIC_CALL_FOR_KVM_INTERNAL(kvm_x86_get_cs_db_l_bits);
+EXPORT_STATIC_CALL_FOR_KVM_INTERNAL(kvm_x86_cache_reg);
+EXPORT_STATIC_CALL_FOR_KVM_INTERNAL(kvm_x86_get_cpl);
 
 static bool __read_mostly ignore_msrs = 0;
 module_param(ignore_msrs, bool, 0644);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index a568d8e6f4e8..3bf9d113b001 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -13,8 +13,14 @@
 	EXPORT_SYMBOL_FOR_MODULES(symbol, __stringify(KVM_SUB_MODULES))
 #define EXPORT_SYMBOL_FOR_KVM(symbol) \
 	EXPORT_SYMBOL_FOR_MODULES(symbol, "kvm," __stringify(KVM_SUB_MODULES))
+#define EXPORT_STATIC_CALL_FOR_KVM_INTERNAL(symbol) \
+	EXPORT_STATIC_CALL_FOR_MODULES(symbol, __stringify(KVM_SUB_MODULES))
+#define EXPORT_STATIC_CALL_FOR_KVM(symbol) \
+	EXPORT_STATIC_CALL_FOR_MODULES(symbol, "kvm," __stringify(KVM_SUB_MODULES))
 #else
 #define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol)
+#define EXPORT_STATIC_CALL_FOR_KVM_INTERNAL(symbol)
+
 /*
  * Allow architectures to provide a custom EXPORT_SYMBOL_FOR_KVM, but only
  * if there are no submodules, e.g. to allow suppressing exports if KVM=m, but
@@ -23,8 +29,10 @@
 #ifndef EXPORT_SYMBOL_FOR_KVM
 #if IS_MODULE(CONFIG_KVM)
 #define EXPORT_SYMBOL_FOR_KVM(symbol) EXPORT_SYMBOL_FOR_MODULES(symbol, "kvm")
+#define EXPORT_STATIC_CALL_FOR_KVM(symbol) EXPORT_STATIC_CALL_FOR_MODULES(symbol, "kvm")
 #else
 #define EXPORT_SYMBOL_FOR_KVM(symbol)
+#define EXPORT_STATIC_CALL_FOR_KVM(symbol)
 #endif /* IS_MODULE(CONFIG_KVM) */
 #endif /* EXPORT_SYMBOL_FOR_KVM */
 #endif
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index c2c667baf8fe..9b38f82b35c4 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -206,8 +206,8 @@ extern long __static_call_return0(void);
 #define ARCH_ADD_TRAMP_KEY(name)
 #endif
 
-#define __EXPORT_STATIC_CALL(name, scope)				\
-	EXPORT_SYMBOL##scope(STATIC_CALL_TRAMP(name));			\
+#define __EXPORT_STATIC_CALL(name, scope, ...)				\
+	EXPORT_SYMBOL##scope(STATIC_CALL_TRAMP(name), ##__VA_ARGS__);	\
 	ARCH_ADD_TRAMP_KEY(name)
 
 #elif defined(CONFIG_HAVE_STATIC_CALL)
@@ -255,8 +255,8 @@ static inline int static_call_text_reserved(void *start, void *end)
 
 extern long __static_call_return0(void);
 
-#define __EXPORT_STATIC_CALL(name, scope)				\
-	EXPORT_SYMBOL##scope(STATIC_CALL_TRAMP(name))
+#define __EXPORT_STATIC_CALL(name, scope, ...)				\
+	EXPORT_SYMBOL##scope(STATIC_CALL_TRAMP(name), ##__VA_ARGS__)
 
 #else /* Generic implementation */
 
@@ -319,7 +319,7 @@ static inline int static_call_text_reserved(void *start, void *end)
 	return 0;
 }
 
-#define __EXPORT_STATIC_CALL(name, scope)
+#define __EXPORT_STATIC_CALL(name, scope, ...)
 
 #endif /* CONFIG_HAVE_STATIC_CALL */
 
@@ -339,5 +339,7 @@ static inline int static_call_text_reserved(void *start, void *end)
 	__EXPORT_STATIC_CALL(name, )
 #define EXPORT_STATIC_CALL_GPL(name)					\
 	__EXPORT_STATIC_CALL(name, _GPL)
+#define EXPORT_STATIC_CALL_FOR_MODULES(name, mods)				\
+	__EXPORT_STATIC_CALL(name, _FOR_MODULES, mods)
 
 #endif /* _LINUX_STATIC_CALL_H */
-- 
2.55.0.rc0.799.gd6f94ed593-goog


^ permalink raw reply related

* [PATCH 0/2] Document amd_pmc delay_suspend quirk
From: Daniel Gibson @ 2026-06-24 12:43 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Jonathan Corbet, Shuah Khan, linux-kernel,
	linux-doc, Ilpo Järvinen
  Cc: Daniel Gibson

Recently I submitted patches that work around an EC bug in some Lenovo
IdeaPad laptops by adding a quirk to amd_pmc that is enabled
automatically for detected devices and can be enforced (to test on
possibly affected devices that aren't detected yet) with the
delay_suspend module parameter. The quirk makes the kernel sleep
for 2.5 seconds before actually suspending.

Now that those patches are in mainline and their commit IDs are known,
I submit the documentation for them, which refers the commit IDs.

There's also another small change to the same documentation file that
clarifies how to identify kernel log lines with information from the
reset register, because so far it was only mentioned that this
information is logged, but not how to find it.

See also https://bugzilla.kernel.org/show_bug.cgi?id=221383 and
https://lore.kernel.org/platform-driver-x86/20260611150426.3683372-1-daniel@gibson.sh/T/#u

These documentation changes were last discussed here:
https://lore.kernel.org/platform-driver-x86/20260509013105.816339-5-daniel@gibson.sh/t/#u

Daniel Gibson (2):
  Documentation/arch/x86/amd-debugging: Add example for reset register
  Documentation/arch/x86/amd-debugging: Add section about delay_suspend

 Documentation/arch/x86/amd-debugging.rst | 30 ++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

-- 
2.48.1

^ permalink raw reply

* [PATCH 2/2] Documentation/arch/x86/amd-debugging: Add section about delay_suspend
From: Daniel Gibson @ 2026-06-24 12:43 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Jonathan Corbet, Shuah Khan, linux-kernel,
	linux-doc, Ilpo Järvinen
  Cc: Daniel Gibson
In-Reply-To: <20260624124326.746525-1-daniel@gibson.sh>

Some Lenovo IdeaPad Slim 3 devices and similar with AMD CPUs (so far
observed with Zen3 and Zen3+ CPUs) have a nonfunctional keyboard and
lid switch after s2idle.

It helps to delay suspend by 2.5 seconds so the EC has some time
to do whatever it needs to get done before suspend.

Devices known to be affected are matched automatically, others can
enforce the delay with a amd_pmc module parameter.

This is now documented in amd-debugging.rst

Signed-off-by: Daniel Gibson <daniel@gibson.sh>
---
 Documentation/arch/x86/amd-debugging.rst | 25 ++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/Documentation/arch/x86/amd-debugging.rst b/Documentation/arch/x86/amd-debugging.rst
index 3176a1240fee..3725adb42673 100644
--- a/Documentation/arch/x86/amd-debugging.rst
+++ b/Documentation/arch/x86/amd-debugging.rst
@@ -249,6 +249,31 @@ state entry.
 
 `commit 40b8c14936bd2 ("drm/amd/display: Disable unneeded hpd interrupts during dm_init") <https://git.kernel.org/torvalds/c/40b8c14936bd2>`_
 
+Keyboard and Lid Switch stop working after resume
+-------------------------------------------------
+On various variants of the Lenovo IdeaPad Slim 3 with Barcelo and Rembrandt CPUs
+the lid switch and keyboard, or at least the Fn/Multimedia keys, stopped working
+after resume, until the next reboot.
+
+This was caused by buggy firmware having timing probles, the EC needed some idle
+time right before the CPU cores are suspended, or it got into an inconsistent state.
+
+For laptops that are known to be affected this workaround is enabled
+automatically, to test this workaround on other machines you can set the
+``delay_suspend`` parameter of the ``amd_pmc`` module.
+
+If you need to set the ``delay_suspend`` parameter to fix your machine, please
+report this at platform-driver-x86@vger.kernel.org for it to be added to the
+list of devices that need this workaround, so in future kernel versions it's
+enabled automatically.
+
+Note that the current workaround isn't perfect: On some devices the problems still
+happen if resume is triggered by a timer (wakealarm).
+
+`commit 9b9e60dd31da0 ("platform/x86/amd/pmc: Delay suspend for some Lenovo Laptops") <https://git.kernel.org/torvalds/c/9b9e60dd31da0>`_
+
+`commit 428b9fd2dce50 ("platform/x86/amd/pmc: Add delay_suspend module parameter") <https://git.kernel.org/torvalds/c/428b9fd2dce50>`_
+
 Runtime power consumption issues
 ================================
 
-- 
2.48.1


^ permalink raw reply related

* [PATCH 1/2] Documentation/arch/x86/amd-debugging: Add example for reset register
From: Daniel Gibson @ 2026-06-24 12:43 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Jonathan Corbet, Shuah Khan, linux-kernel,
	linux-doc, Ilpo Järvinen
  Cc: Daniel Gibson, Mario Limonciello (AMD)
In-Reply-To: <20260624124326.746525-1-daniel@gibson.sh>

The amd debugging documentation didn't state how to identify kernel log
lines with information from the reset register about the cause of a
previous random reboot.

The added example rectifies this.

Suggested-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Daniel Gibson <daniel@gibson.sh>
---
 Documentation/arch/x86/amd-debugging.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/arch/x86/amd-debugging.rst b/Documentation/arch/x86/amd-debugging.rst
index d92bf59d62c7..3176a1240fee 100644
--- a/Documentation/arch/x86/amd-debugging.rst
+++ b/Documentation/arch/x86/amd-debugging.rst
@@ -366,3 +366,8 @@ There are 6 classes of reasons for the reboot:
 This information is read by the kernel at bootup and printed into
 the syslog. When a random reboot occurs this message can be helpful
 to determine the next component to debug.
+
+For example, if bit 19 was set, you will get a message like this in the log on
+next bootup::
+
+  x86/amd: Previous system reset reason [0x00080000]: software wrote 0x6 to reset control register 0xCF9
-- 
2.48.1


^ permalink raw reply related

* [PATCH] docs: kdoc: fix troff output description typo
From: Yousef Alhouseen @ 2026-06-24 12:24 UTC (permalink / raw)
  To: Jonathan Corbet; +Cc: Shuah Khan, linux-doc, linux-kernel, Yousef Alhouseen

Fix a typo in the ManFormat documentation string that describes the
generated troff title header fields.

Signed-off-by: Yousef Alhouseen <alhouseenyousef@gmail.com>
---
 tools/lib/python/kdoc/kdoc_output.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index de107ab4a..618b0d765 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -624,7 +624,7 @@ class ManFormat(OutputFormat):
     ``manual``
         Defaults to ``Kernel API Manual``.
 
-    The above controls the output of teh corresponding fields on troff
+    The above controls the output of the corresponding fields on troff
     title headers, which will be filled like this::
 
         .TH "{name}" {section} "{date}" "{modulename}" "{manual}"
-- 
2.54.0


^ permalink raw reply related

* Re: [PATCH v12 02/12] x86/bhi: Make clear_bhb_loop() effective on newer CPUs
From: Nikolay Borisov @ 2026-06-24 12:12 UTC (permalink / raw)
  To: Pawan Gupta, x86, Jon Kohler, H. Peter Anvin, Josh Poimboeuf,
	David Kaplan, Sean Christopherson, Borislav Petkov, Dave Hansen,
	Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, KP Singh, Jiri Olsa, David S. Miller,
	David Laight, Andy Lutomirski, Thomas Gleixner, Ingo Molnar,
	David Ahern, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, John Fastabend, Stanislav Fomichev, Hao Luo,
	Paolo Bonzini, Jonathan Corbet, Jason Baron, Alice Ryhl,
	Steven Rostedt, Ard Biesheuvel, Shuah Khan
  Cc: linux-kernel, kvm, Asit Mallick, Tao Zhang, bpf, netdev,
	linux-doc
In-Reply-To: <20260622-vmscape-bhb-v12-2-76cbda0ae3e5@linux.intel.com>



On 23.06.26 г. 20:33 ч., Pawan Gupta wrote:
> As a mitigation for BHI, clear_bhb_loop() executes branches that overwrite
> the Branch History Buffer (BHB). On Alder Lake and newer parts this
> sequence is not sufficient because it doesn't clear enough entries. This
> was not an issue because these CPUs use the BHI_DIS_S hardware mitigation
> in the kernel.
> 
> Now with VMSCAPE (BHI variant) it is also required to isolate branch
> history between guests and userspace. Since BHI_DIS_S only protects the
> kernel, the newer CPUs also use IBPB.
> 
> A cheaper alternative to the current IBPB mitigation is clear_bhb_loop().
> But it currently does not clear enough BHB entries to be effective on newer
> CPUs with larger BHB. At boot, dynamically set the loop count of
> clear_bhb_loop() such that it is effective on newer CPUs too.
> 
> Introduce global loop counts, initializing them with appropriate value
> based on the hardware feature X86_FEATURE_BHI_CTRL.
> 
> Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
> Acked-by: Borislav Petkov (AMD) <bp@alien8.de>
> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>

Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>

Although AI brings up a valid argument about whether guests should be 
pessimized and fallback to the longer sequence ?

^ permalink raw reply

* Re: [PATCH v4 1/5] mm/zswap: Extend shrink_memcg() writeback capability
From: Hao Jia @ 2026-06-24 11:58 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: akpm, tj, hannes, shakeel.butt, mhocko, mkoutny, nphamcs,
	chengming.zhou, muchun.song, roman.gushchin, linux-mm,
	linux-kernel, linux-doc, Hao Jia
In-Reply-To: <CAO9r8zMgaqP=n6rmhnMU+qhp1Www1Y5kdbLTLX1v=fj_ybHyiw@mail.gmail.com>



On 2026/6/24 02:17, Yosry Ahmed wrote:
>> My initial thought was that if cold memory is evenly distributed across
>> nodes and we are doing a large writeback, it would be better to balance
>> the zswap entry writeback across all nodes rather than just draining
>> node 0 first. However, since we currently lack a proper metric to
>> represent hot/cold memory (such as age-based tracking), doing this
>> probably doesn't make much sense right now.
> 
> Yeah let's start simple and go from there.
> 
>>
>> So, perhaps we want something like this? Please correct me if I'm wrong.
>>
>> static long shrink_memcg(struct mem_cgroup *memcg,
>>          unsigned long nr_to_scan)
>> {
>>     struct zswap_shrink_walk_arg walk_arg = {
>>       .bytes_written = 0,
>>       .encountered_page_in_swapcache = false,
>>     };
>>     unsigned long nr_remaining = nr_to_scan;
>>     bool memcg_list_is_empty = true;
>>     int nid;
>>
>>     if (!mem_cgroup_zswap_writeback_enabled(memcg))
>>       return -ENOENT;
>>
>>     if (memcg && !mem_cgroup_online(memcg))
>>       return -ENOENT;
>>
>>     for_each_node_state(nid, N_NORMAL_MEMORY) {
>>       unsigned long nr_to_walk;
>>
>>       /*
>>        * Cap the per-node scan by the current LRU length. A referenced
>>        * entry is only rotated to the tail (second chance) and may be
>>        * revisited within a single walk; without this cap those rotated
>>        * entries could drain the shared scan budget on one node.
>>        */
> 
> The comment here is a bit misleading. It's not just about draining one
> node. One call to shrink_memcg() should only scan entries once. The
> caller can then choose to scan the memcg again, or scan a different
> one. In this case, the caller should iterate all memcgs first before
> retrying memcgs again and reclaiming rotated entries.

I have updated the comment. Please see below.
> 
>>       nr_to_walk = min(nr_remaining,
>>            list_lru_count_one(&zswap_list_lru, nid, memcg));
>>       if (!nr_to_walk)
>>         continue;
>>       memcg_list_is_empty = false;
>>
>>       nr_remaining -= nr_to_walk;
>>       list_lru_walk_one(&zswap_list_lru, nid, memcg,
>>             &shrink_memcg_cb, &walk_arg, &nr_to_walk);
>>       /* Return the unused share of the budget to the pool. */
>>       nr_remaining += nr_to_walk;
>>
>>       /* Bail out once the whole scan budget has been spent. */
> 
> The comment is unnecessary.

I'll do this, thanks.
> 
>>       if (!nr_remaining)
>>         break;
>>
>>       cond_resched();
> 
> Did you observe a problem here or did you just add this due to an
> abundance of caution?

The cond_resched() here was just out of caution. Given that both callers 
(shrink_worker() and zswap_proactive_writeback()) already have 
rescheduling checks, I suppose we can remove it from here."
> 
>>     }
>>
>>     if (memcg_list_is_empty)
> 
> Do we need memcg_list_is_empty? Can we just check if nr_remaining
> matches nr_to_scan?
> 

indeed.
>>       return -ENOENT;
>>
>>     return walk_arg.bytes_written;
>> }


/*
  * Scan up to @nr_to_scan pages across the per-node zswap LRUs of @memcg
  * and write back the reclaimable ones.
  *
  * Since the second-chance algorithm rotates referenced entries to the
  * LRU tail, the per-node scan is capped at the current LRU length so
  * each entry is scanned at most once per call. It is up to the caller
  * to handle retries, deciding whether to scan the next memcg to complete
  * the full iteration, or to rescan the current memcg to drain its zswap
  * entries.
  *
  * Return: The number of compressed bytes written back (>= 0), or -ENOENT
  * if @memcg has writeback disabled, is a zombie cgroup, or has empty
  * zswap LRUs.
  */
static long shrink_memcg(struct mem_cgroup *memcg, unsigned long nr_to_scan)
{
     struct zswap_shrink_walk_arg walk_arg = {
         .bytes_written = 0,
         .encountered_page_in_swapcache = false,
     };
     unsigned long nr_remaining = nr_to_scan;
     int nid;

     if (!mem_cgroup_zswap_writeback_enabled(memcg))
         return -ENOENT;

     /*
      * Skip zombies because their LRUs are reparented and we would be
      * reclaiming from the parent instead of the dead memcg.
      */
     if (memcg && !mem_cgroup_online(memcg))
         return -ENOENT;

     for_each_node_state(nid, N_NORMAL_MEMORY) {
         unsigned long nr_to_walk;

         /*
          * Cap the walk at the current LRU length to ensure each entry is
          * scanned at most once per call. Referenced entries are rotated
          * to the tail for a second chance, and this bound prevents them
          * from being revisited within a single call. Retries are left to
          * the caller, which can choose to rescan the current memcg or
          * move on to the next one.
          */
         nr_to_walk = min(nr_remaining,
                  list_lru_count_one(&zswap_list_lru, nid, memcg));
         if (!nr_to_walk)
             continue;

         nr_remaining -= nr_to_walk;
         list_lru_walk_one(&zswap_list_lru, nid, memcg, &shrink_memcg_cb,
                   &walk_arg, &nr_to_walk);
         /* Return the unused share of the budget to the pool. */
         nr_remaining += nr_to_walk;

         if (!nr_remaining)
             break;
     }

     /* Nothing was scanned: every LRU under @memcg was empty. */
     if (nr_remaining == nr_to_scan)
         return -ENOENT;

     return walk_arg.bytes_written;
}


Thanks,
Hao

^ permalink raw reply

* Re: [PATCH v4 2/5] mm/zswap: Factor writeback loop out of shrink_worker()
From: Hao Jia @ 2026-06-24 11:55 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: akpm, tj, hannes, shakeel.butt, mhocko, mkoutny, nphamcs,
	chengming.zhou, muchun.song, roman.gushchin, linux-mm,
	linux-kernel, linux-doc, Hao Jia
In-Reply-To: <ajnGTt8tkbAWX8Oc@google.com>



On 2026/6/23 07:36, Yosry Ahmed wrote:
>> +/*
>> + * Walk the memcg tree and write back zswap pages until the
>> + * (lower_pages, upper_pages) window closes, or abort encounter
>> + * MAX_RECLAIM_RETRIES times of the following conditions:
>> + * - No writeback-candidate memcgs found in a memcg tree walk.
>> + * - Shrinking a writeback-candidate memcg failed.
>> + *
>> + * For shrink_worker(), it passes lower=thr and upper=zswap_total_pages().
>> + * The @upper limit is refreshed in each iteration by re-evaluating
>> + * zswap_total_pages(), and the window closes once the total falls
>> + * below the threshold.
> 
> This is the wrong abstraction level, and it's obvious by the fact that
> the function calls zswap_total_pages() again to recalcualte
> 'upper_pages'. It gets much worse in the next patch as well.
> 
> The lower_pages and upper_pages thing is also unnecessarily hard to
> follow.
> 
> The core of the reuse here is the retry logic. So maybe keep the memcg
> iteration in the callers, and define a function that takes in one memcg
> and reclaims one batch from it? failures and attempts can be passed into
> the function to maintain the state across scans of different memcgs,
> like zswap_shrink_walk_arg?
> 
> WDYT?


Perhaps something like this?

struct zswap_shrink_state {
     int attempts;
     int failures;
     bool stop;
};

static bool zswap_shrink_no_candidate(struct zswap_shrink_state *s)
{
     if (!s->attempts && ++s->failures == MAX_RECLAIM_RETRIES)
         return true;

     s->attempts = 0;
     return false;
}

static long zswap_shrink_one(struct mem_cgroup *memcg,
                  struct zswap_shrink_state *s)
{
     long shrunk;

     shrunk = shrink_memcg(memcg, NR_ZSWAP_WB_BATCH);
     if (shrunk == -ENOENT)
         return 0;

     s->attempts++;
     if (shrunk <= 0 && ++s->failures == MAX_RECLAIM_RETRIES)
         s->stop = true;

     return shrunk;
}

static void shrink_worker(struct work_struct *w)
{
     struct zswap_shrink_state s = {};
     unsigned long thr;

     /* Reclaim down to the accept threshold */
     thr = zswap_accept_thr_pages();

     while (zswap_total_pages() > thr) {
         struct mem_cgroup *memcg;

         cond_resched();

         memcg = zswap_iter_global();
         if (!memcg) {
             if (zswap_shrink_no_candidate(&s))
                 break;
             continue;
         }

         zswap_shrink_one(memcg, &s);
         /* Drop the extra reference taken by the iterator. */
         mem_cgroup_put(memcg);
         if (s.stop)
             break;
     }
}

We could also fold the logic of zswap_shrink_no_candidate() into 
zswap_shrink_one(), but adding a !memcg check inside zswap_shrink_one() 
feels a bit awkward.

WDYT?

Thanks,
Hao

^ permalink raw reply

* Re: [RFC PATCH 0/6] mm/damon: hardware-sampled access reports
From: Zeng Heng @ 2026-06-24 11:23 UTC (permalink / raw)
  To: SeongJae Park
  Cc: Ravi Jonnalagadda, akinobu.mita, damon, linux-mm, linux-kernel,
	linux-doc, akpm, corbet, bijan311, ajayjoshi, honggyu.kim,
	yunjeong.mun
In-Reply-To: <20260624001425.77489-1-sj@kernel.org>

Hi SeongJae,

On 2026/6/24 8:14, SeongJae Park wrote:
> Hello Zeng,
> 
> On Tue, 23 Jun 2026 22:08:03 +0800 Zeng Heng <zengheng4@huawei.com> wrote:
> 
>> Hi Ravi,
>>
>> On 2026/5/30 0:56, Ravi Jonnalagadda wrote:
>>> This series introduces a vendor and PMU-agnostic substrate inside DAMON
>>> that consumes hardware-sampled access reports through the standard
>>> perf-event interface.  Userspace selects the PMU through sysfs (raw
>>> type/config knobs), driving either Intel PEBS L3-miss sampling or AMD
>>> IBS Op sampling.
>>>
>>
>> [...]
>>
>>>
>>> Ravi Jonnalagadda (6):
>>>     mm/damon: add struct damon_perf_event{,_attr} and per-ctx perf_events
>>>       list
>>>     mm/damon/sysfs-sample: expose perf_events configuration via sysfs
>>>     mm/damon/sysfs: install perf_events on apply
>>>     mm/damon/core: per-CPU SPSC ring drain and damon_perf_event lifecycle
>>>     mm/damon/vaddr: implement perf-event access check
>>>     mm/damon: add damos_node_eligible_mem_bp tracepoint
>>>
>>>    include/linux/damon.h        |  80 +++++
>>>    include/trace/events/damon.h |  49 +++
>>>    mm/damon/core.c              | 403 ++++++++++++++++++++----
>>>    mm/damon/ops-common.h        |  39 +++
>>>    mm/damon/sysfs-common.h      |   6 +
>>>    mm/damon/sysfs-sample.c      | 579 +++++++++++++++++++++++++++++++++++
>>>    mm/damon/sysfs.c             |   3 +
>>>    mm/damon/vaddr.c             | 267 ++++++++++++++++
>>>    8 files changed, 1370 insertions(+), 56 deletions(-)
>>>
>>>
>>> base-commit: 4c8ad15abf15eb480d3ad85f902001e35465ef18
>>
>> I wasn't able to apply this patch series to the linux (and linux-next)
>> mainline branch, and also had trouble identifying the source of the base
>> commit.
>>
>> Would you mind sharing where this baseline is from?
> 
> TLDR: I pushed [1] a tree having this series applied on top of the baseline to
> GitHub.  Please feel free to use it.
> 
> I think the baseline was a commit on damon/next tree [2].  Because damon/next
> is continuously rebased, we cannot get the commit in a simple way.  Fortunately
> the commit is still available on my local tree.  So I applied this patch series
> on top of the commit and pushed [1] to a branch of DAMON kernel tree at GitHub.
> 
> Note that the branch is not guaranteed to exist there for long term.  But
> hopefully this series will be merged into the mainline before that.
> 
> [1] https://github.com/damonitor/linux/tree/ravi_hw_sampled_access_reports_rfc_v1
> [2] https://origin.kernel.org/doc/html/latest/mm/damon/maintainer-profile.html#scm-trees
> 
> 
> Thanks,
> SJ
> 

Thanks a lot for providing the branch on GitHub. I've pulled it and
confirmed it builds cleanly on my end.

Appreciate the help!


Best Regards,
Zeng Heng

^ permalink raw reply

* Re: [PATCH] docs: kgdb: Fix path of driver options
From: Daniel Thompson @ 2026-06-24 10:40 UTC (permalink / raw)
  To: Zenghui Yu
  Cc: kgdb-bugreport, workflows, linux-doc, linux-kernel, jason.wessel,
	dianders, corbet, skhan, rdunlap
In-Reply-To: <20260620234035.9917-1-zenghui.yu@linux.dev>

On Sun, Jun 21, 2026 at 07:40:35AM +0800, Zenghui Yu wrote:
> The correct path of driver options should be
> /sys/module/<driver>/parameters/<option>. Fix it.
>
> Signed-off-by: Zenghui Yu <zenghui.yu@linux.dev>

Acked-by: Daniel Thompson (RISCstar) <danielt@kernel.org>


Daniel.

^ permalink raw reply

* Re: [RFC PATCH 0/2] kasan: hw_tags: Add option to tag only at allocation time
From: Catalin Marinas @ 2026-06-24 10:13 UTC (permalink / raw)
  To: Dev Jain
  Cc: Harry Yoo, ryabinin.a.a, akpm, corbet, glider, andreyknvl,
	dvyukov, vincenzo.frascino, kasan-dev, linux-mm, linux-kernel,
	skhan, workflows, linux-doc, linux-arm-kernel, ryan.roberts,
	anshuman.khandual, kaleshsingh, 21cnbao, david, will
In-Reply-To: <f5927785-d5d3-4e64-bbac-220d40718a1f@arm.com>

On Wed, Jun 24, 2026 at 09:44:49AM +0530, Dev Jain wrote:
> Anyhow someone needs to first test the current patchset to get some
> numbers, we would be wasting time on this if no one gets an improvement.

I agree. Something like iperf3 would be interesting, lots of skb
allocations.

-- 
Catalin

^ permalink raw reply

* [PATCH v3 7/7] net: wwan: t9xx: Add maintainers entry
From: Jack Wu via B4 Relay @ 2026-06-24 10:04 UTC (permalink / raw)
  To: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Jack Wu, Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng,
	Matthias Brugger, AngeloGioacchino Del Regno, Simon Horman,
	Jonathan Corbet, Shuah Khan
  Cc: linux-kernel, netdev, linux-arm-kernel, linux-mediatek, linux-doc
In-Reply-To: <20260624-t9xx_driver_v1-v3-0-73ff03f60c48@compal.com>

From: Jack Wu <jackbb_wu@compal.com>

Add MAINTAINERS entry for the MediaTek T9XX 5G WWAN modem device
driver.

Signed-off-by: Jack Wu <jackbb_wu@compal.com>
---
 MAINTAINERS | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 461a3eed6129..8155d26bff03 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16494,6 +16494,15 @@ L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/wwan/t7xx/
 
+MEDIATEK T9XX 5G WWAN MODEM DRIVER
+M:	Jack Wu <jackbb_wu@compal.com>
+R:	Wen-Zhi Huang <wen-zhi.huang@mediatek.com>
+R:	Shi-Wei Yeh <shi-wei.yeh@mediatek.com>
+R:	Minano Tseng <Minano.tseng@mediatek.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/wwan/t9xx/
+
 MEDIATEK USB3 DRD IP DRIVER
 M:	Chunfeng Yun <chunfeng.yun@mediatek.com>
 L:	linux-usb@vger.kernel.org

-- 
2.34.1



^ permalink raw reply related

* [PATCH v3 6/7] net: wwan: t9xx: Add AT & MBIM WWAN ports
From: Jack Wu via B4 Relay @ 2026-06-24 10:04 UTC (permalink / raw)
  To: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Jack Wu, Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng,
	Matthias Brugger, AngeloGioacchino Del Regno, Simon Horman,
	Jonathan Corbet, Shuah Khan
  Cc: linux-kernel, netdev, linux-arm-kernel, linux-mediatek, linux-doc
In-Reply-To: <20260624-t9xx_driver_v1-v3-0-73ff03f60c48@compal.com>

From: Jack Wu <jackbb_wu@compal.com>

Add AT & MBIM ports to the port infrastructure.
The WWAN initialization method is responsible for creating the
corresponding ports using the WWAN framework infrastructure. The
implemented WWAN port operations are start, stop, tx, tx_blocking
and tx_poll.

Signed-off-by: Jack Wu <jackbb_wu@compal.com>
---
 drivers/net/wwan/t9xx/mtk_port.c               |  26 ++
 drivers/net/wwan/t9xx/mtk_port.h               |  13 +
 drivers/net/wwan/t9xx/mtk_port_io.c            | 336 ++++++++++++++++++++++++-
 drivers/net/wwan/t9xx/mtk_port_io.h            |   5 +
 drivers/net/wwan/t9xx/pcie/mtk_ctrl_cfg_m9xx.c |   8 +
 5 files changed, 387 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wwan/t9xx/mtk_port.c b/drivers/net/wwan/t9xx/mtk_port.c
index fbc2fb7bdc2d..9015be090023 100644
--- a/drivers/net/wwan/t9xx/mtk_port.c
+++ b/drivers/net/wwan/t9xx/mtk_port.c
@@ -819,6 +819,29 @@ int mtk_port_ch_disable(struct mtk_port *port)
 	return ret;
 }
 
+static int mtk_port_enable_by_type(struct mtk_port_mngr *port_mngr, int tbl_type)
+{
+	struct mtk_port **ports;
+	int ret, idx;
+
+	if (tbl_type < 0 || tbl_type >= PORT_TBL_MAX)
+		return -EINVAL;
+
+	ports = kcalloc(port_mngr->port_cnt, sizeof(struct mtk_port *), GFP_KERNEL);
+	if (!ports)
+		return -ENOMEM;
+
+	ret = radix_tree_gang_lookup(&port_mngr->port_tbl[tbl_type],
+				     (void **)ports, 0, port_mngr->port_cnt);
+	for (idx = 0; idx < ret; idx++) {
+		if (ports[idx]->enable)
+			ports_ops[ports[idx]->info.type]->enable(ports[idx]);
+	}
+
+	kfree(ports);
+	return 0;
+}
+
 static void mtk_port_disable(struct mtk_port_mngr *port_mngr)
 {
 	struct mtk_port **ports;
@@ -852,6 +875,9 @@ void mtk_port_mngr_fsm_state_handler(struct mtk_fsm_param *fsm_param, void *arg)
 	case FSM_STATE_OFF:
 		mtk_port_disable(port_mngr);
 		break;
+	case FSM_STATE_READY:
+		mtk_port_enable_by_type(port_mngr, PORT_TBL_MD);
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/wwan/t9xx/mtk_port.h b/drivers/net/wwan/t9xx/mtk_port.h
index a201c0007878..4846354981d7 100644
--- a/drivers/net/wwan/t9xx/mtk_port.h
+++ b/drivers/net/wwan/t9xx/mtk_port.h
@@ -56,6 +56,10 @@ enum mtk_ccci_ch {
 	/* to MD */
 	CCCI_CONTROL_RX				= 0x2000,
 	CCCI_CONTROL_TX				= 0x2001,
+	CCCI_UART2_RX				= 0x200A,
+	CCCI_UART2_TX				= 0x200C,
+	CCCI_MBIM_RX				= 0x20D0,
+	CCCI_MBIM_TX				= 0x20D1,
 };
 
 enum mtk_port_flag {
@@ -73,6 +77,7 @@ enum mtk_port_tbl {
 
 enum mtk_port_type {
 	PORT_TYPE_INTERNAL,
+	PORT_TYPE_WWAN,
 	PORT_TYPE_MAX
 };
 
@@ -81,6 +86,13 @@ struct mtk_internal_port {
 	int (*recv_cb)(void *arg, struct sk_buff *skb);
 };
 
+struct mtk_wwan_port {
+	/* w_lock protects wwan_port when recv data and disable port at the same time */
+	struct mutex w_lock;
+	int w_type;
+	void *w_port;
+};
+
 struct mtk_port_cfg {
 	enum mtk_ccci_ch tx_ch;
 	enum mtk_ccci_ch rx_ch;
@@ -110,6 +122,7 @@ struct mtk_port {
 	char dev_str[MTK_DEV_STR_LEN];
 	struct mtk_port_mngr *port_mngr;
 	struct mtk_internal_port i_priv;
+	struct mtk_wwan_port w_priv;
 };
 
 struct mtk_port_mngr {
diff --git a/drivers/net/wwan/t9xx/mtk_port_io.c b/drivers/net/wwan/t9xx/mtk_port_io.c
index e3a2de6d2f29..882254b74026 100644
--- a/drivers/net/wwan/t9xx/mtk_port_io.c
+++ b/drivers/net/wwan/t9xx/mtk_port_io.c
@@ -3,6 +3,10 @@
  * Copyright (c) 2022, MediaTek Inc.
  */
 #include <linux/netdevice.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/wwan.h>
 
 #include "mtk_port_io.h"
 
@@ -41,6 +45,145 @@ static void mtk_port_struct_init(struct mtk_port *port)
 	init_waitqueue_head(&port->rx_wq);
 }
 
+static int mtk_port_copy_data_from(void *to, union user_buf from, unsigned int len,
+				   unsigned int offset, bool from_user_space)
+{
+	if (from_user_space) {
+		if (copy_from_user(to, from.ubuf + offset, len))
+			return -EINVAL;
+	} else {
+		memcpy(to, from.kbuf + offset, len);
+	}
+
+	return 0;
+}
+
+static int mtk_port_common_write_frag_skb(struct mtk_port *port, struct sk_buff *skb,
+					  union user_buf buf, u32 packet_size,
+					  u32 cur_pos, bool from_user_space)
+{
+	struct sk_buff *frag_skb, *tmp = NULL;
+	u32 frag_size;
+	int ret;
+
+	frag_size = min(packet_size, port->tx_frag_size);
+	ret = mtk_port_copy_data_from(skb_put(skb, frag_size),
+				      buf, frag_size,
+				      cur_pos, from_user_space);
+	if (ret) {
+		dev_err(port->port_mngr->ctrl_blk->mdev->dev,
+			"Failed to copy skb for port(%s)\n", port->info.name);
+		goto err_reset_skb;
+	}
+	cur_pos += frag_size;
+	packet_size -= frag_size;
+	if (!packet_size)
+		return cur_pos;
+
+	while (packet_size > 0) {
+		frag_skb = __dev_alloc_skb(port->tx_mtu, GFP_KERNEL);
+		if (!frag_skb) {
+			ret = -ENOMEM;
+			goto err_free_frag_list;
+		}
+
+		frag_size = min(packet_size, port->tx_frag_size);
+		ret = mtk_port_copy_data_from(skb_put(frag_skb, frag_size),
+					      buf, frag_size,
+					      cur_pos, from_user_space);
+		if (ret) {
+			dev_err(port->port_mngr->ctrl_blk->mdev->dev,
+				"Failed to copy frag_skb for port(%s)\n", port->info.name);
+			dev_kfree_skb_any(frag_skb);
+			goto err_free_frag_list;
+		}
+		skb->data_len += frag_size;
+		skb->len += frag_size;
+		cur_pos += frag_size;
+		packet_size -= frag_size;
+		if (!tmp)
+			skb_shinfo(skb)->frag_list = frag_skb;
+		else
+			tmp->next = frag_skb;
+		tmp = frag_skb;
+	}
+	return cur_pos;
+
+err_free_frag_list:
+	frag_skb = skb_shinfo(skb)->frag_list;
+	while (frag_skb) {
+		tmp = frag_skb->next;
+		frag_skb->next = NULL;
+		dev_kfree_skb_any(frag_skb);
+		frag_skb = tmp;
+	}
+	skb_shinfo(skb)->frag_list = NULL;
+err_reset_skb:
+	skb->data_len = 0;
+	return ret;
+}
+
+static int mtk_port_common_write(struct mtk_port *port, union user_buf buf, unsigned int len,
+				 bool from_user_space)
+{
+	u32 packet_size, left_cnt = len, cur_pos;
+	struct sk_buff *skb;
+	int ret;
+
+	if (len == 0)
+		return -EINVAL;
+
+start_write:
+	ret = mtk_port_status_check(port);
+	if (ret)
+		goto end_write;
+
+	skb = __dev_alloc_skb(port->tx_mtu, GFP_KERNEL);
+	if (!skb) {
+		ret = -ENOMEM;
+		goto end_write;
+	}
+
+	skb_reserve(skb, sizeof(struct mtk_ccci_header));
+
+	packet_size = min_t(u32, left_cnt, port->tx_mtu - sizeof(struct mtk_ccci_header));
+	cur_pos = len - left_cnt;
+	/* Support scatter gather transmission */
+	if (port->tx_mtu > port->tx_frag_size) {
+		ret = mtk_port_common_write_frag_skb(port, skb, buf, packet_size,
+						     cur_pos, from_user_space);
+		if (ret < 0)
+			goto err_free_skb;
+	} else {
+		ret = mtk_port_copy_data_from(skb_put(skb, packet_size),
+					      buf, packet_size,
+					      cur_pos, from_user_space);
+		if (ret) {
+			dev_err(port->port_mngr->ctrl_blk->mdev->dev,
+				"Failed to copy data for port(%s)\n", port->info.name);
+			goto err_free_skb;
+		}
+	}
+
+	ret = mtk_port_send_data(port, skb);
+	if (ret < 0) {
+		if (ret == -EINTR)
+			left_cnt -= packet_size;
+		goto end_write;
+	}
+
+	left_cnt -= ret;
+	if (left_cnt)
+		goto start_write;
+	else
+		goto end_write;
+
+err_free_skb:
+	dev_kfree_skb_any(skb);
+end_write:
+	return (len > left_cnt) ? (len - left_cnt) : ret;
+}
+
 static int mtk_port_internal_init(struct mtk_port *port)
 {
 	mtk_port_struct_init(port);
@@ -101,7 +244,6 @@ static int mtk_port_internal_recv(struct mtk_port *port, struct sk_buff *skb)
 	return ret;
 
 drop_data:
-	dev_kfree_skb_any(skb);
 	return ret;
 }
 
@@ -234,6 +376,198 @@ static const struct port_ops port_internal_ops = {
 	.recv = mtk_port_internal_recv,
 };
 
+static int mtk_port_wwan_open(struct wwan_port *w_port)
+{
+	struct mtk_port *port;
+	int ret;
+
+	port = wwan_port_get_drvdata(w_port);
+	ret = mtk_port_get_locked(port);
+	if (ret)
+		return ret;
+
+	ret = mtk_port_common_open(port);
+	if (ret)
+		mtk_port_put_locked(port);
+
+	return ret;
+}
+
+static void mtk_port_wwan_close(struct wwan_port *w_port)
+{
+	struct mtk_port *port = wwan_port_get_drvdata(w_port);
+
+	mtk_port_common_close(port);
+	mtk_port_put_locked(port);
+}
+
+static int mtk_port_wwan_write(struct wwan_port *w_port, struct sk_buff *skb)
+{
+	struct mtk_port *port = wwan_port_get_drvdata(w_port);
+	union user_buf user_buf;
+	int ret;
+
+	if (unlikely(!skb->len)) {
+		consume_skb(skb);
+		return 0;
+	}
+
+	port->info.flags &= ~PORT_F_BLOCKING;
+	user_buf.kbuf = (void *)skb->data;
+	ret = mtk_port_common_write(port, user_buf, skb->len, false);
+	if (ret < 0)
+		return ret;
+
+	consume_skb(skb);
+	return 0;
+}
+
+static int mtk_port_wwan_write_blocking(struct wwan_port *w_port, struct sk_buff *skb)
+{
+	struct mtk_port *port = wwan_port_get_drvdata(w_port);
+	union user_buf user_buf;
+	int ret;
+
+	if (unlikely(!skb->len)) {
+		consume_skb(skb);
+		return 0;
+	}
+
+	port->info.flags |= PORT_F_BLOCKING;
+	user_buf.kbuf = (void *)skb->data;
+	ret = mtk_port_common_write(port, user_buf, skb->len, false);
+	if (ret < 0)
+		return ret;
+
+	consume_skb(skb);
+	return 0;
+}
+
+static __poll_t mtk_port_wwan_poll(struct wwan_port *w_port, struct file *file,
+				   struct poll_table_struct *poll)
+{
+	struct mtk_port *port = wwan_port_get_drvdata(w_port);
+	union ctrl_hif_cmd_data hif_cmd;
+	struct mtk_ctrl_blk *ctrl_blk;
+	__poll_t mask = 0;
+
+	poll_wait(file, &port->trb_wq, poll);
+	if (mtk_port_status_check(port))
+		return EPOLLERR | EPOLLHUP;
+
+	ctrl_blk = port->port_mngr->ctrl_blk;
+	hif_cmd.rx_ch = port->info.rx_ch;
+	if (!ctrl_blk->ops->send_cmd(ctrl_blk->mdev, HIF_CTRL_CMD_CHECK_TX_FULL, &hif_cmd))
+		mask |= EPOLLOUT | EPOLLWRNORM;
+
+	return mask;
+}
+
+static const struct wwan_port_ops wwan_ops = {
+	.start = mtk_port_wwan_open,
+	.stop = mtk_port_wwan_close,
+	.tx = mtk_port_wwan_write,
+	.tx_blocking = mtk_port_wwan_write_blocking,
+	.tx_poll = mtk_port_wwan_poll,
+};
+
+static int mtk_port_wwan_init(struct mtk_port *port)
+{
+	mtk_port_struct_init(port);
+	port->enable = false;
+
+	mutex_init(&port->w_priv.w_lock);
+
+	switch (port->info.rx_ch) {
+	case CCCI_MBIM_RX:
+		port->w_priv.w_type = WWAN_PORT_MBIM;
+		break;
+	case CCCI_UART2_RX:
+		port->w_priv.w_type = WWAN_PORT_AT;
+		break;
+	default:
+		port->w_priv.w_type = WWAN_PORT_UNKNOWN;
+		break;
+	}
+
+	return 0;
+}
+
+static void mtk_port_wwan_exit(struct mtk_port *port)
+{
+	if (test_bit(PORT_S_ENABLE, &port->status))
+		ports_ops[port->info.type]->disable(port);
+}
+
+static void mtk_port_wwan_enable(struct mtk_port *port)
+{
+	struct mtk_port_mngr *port_mngr;
+	int ret;
+
+	port_mngr = port->port_mngr;
+
+	if (test_bit(PORT_S_ENABLE, &port->status))
+		return;
+
+	ret = mtk_port_ch_enable(port);
+	if (ret && ret != -EBUSY)
+		return;
+
+	port->w_priv.w_port = wwan_create_port(port_mngr->ctrl_blk->mdev->dev,
+					       port->w_priv.w_type,
+					       &wwan_ops, NULL, port);
+	if (IS_ERR(port->w_priv.w_port)) {
+		dev_warn(port_mngr->ctrl_blk->mdev->dev,
+			 "Failed to create wwan port for (%s)\n", port->info.name);
+		port->w_priv.w_port = NULL;
+		mtk_port_ch_disable(port);
+		return;
+	}
+
+	set_bit(PORT_S_WR, &port->status);
+	set_bit(PORT_S_ENABLE, &port->status);
+}
+
+static void mtk_port_wwan_disable(struct mtk_port *port)
+{
+	struct wwan_port *w_port;
+
+	if (!test_and_clear_bit(PORT_S_ENABLE, &port->status))
+		return;
+
+	clear_bit(PORT_S_WR, &port->status);
+	w_port = port->w_priv.w_port;
+	mutex_lock(&port->w_priv.w_lock);
+	port->w_priv.w_port = NULL;
+	mutex_unlock(&port->w_priv.w_lock);
+
+	mtk_port_ch_disable(port);
+	wwan_remove_port(w_port);
+}
+
+static int mtk_port_wwan_recv(struct mtk_port *port, struct sk_buff *skb)
+{
+	mutex_lock(&port->w_priv.w_lock);
+	if (!port->w_priv.w_port) {
+		mutex_unlock(&port->w_priv.w_lock);
+		return -ENXIO;
+	}
+
+	wwan_port_rx(port->w_priv.w_port, skb);
+	mutex_unlock(&port->w_priv.w_lock);
+	return 0;
+}
+
+static const struct port_ops port_wwan_ops = {
+	.init = mtk_port_wwan_init,
+	.exit = mtk_port_wwan_exit,
+	.reset = mtk_port_reset,
+	.enable = mtk_port_wwan_enable,
+	.disable = mtk_port_wwan_disable,
+	.recv = mtk_port_wwan_recv,
+};
+
 const struct port_ops *ports_ops[PORT_TYPE_MAX] = {
 	&port_internal_ops,
+	&port_wwan_ops,
 };
diff --git a/drivers/net/wwan/t9xx/mtk_port_io.h b/drivers/net/wwan/t9xx/mtk_port_io.h
index 0c10e893b7e0..ea92cd22dba0 100644
--- a/drivers/net/wwan/t9xx/mtk_port_io.h
+++ b/drivers/net/wwan/t9xx/mtk_port_io.h
@@ -23,6 +23,11 @@ struct port_ops {
 	int (*recv)(struct mtk_port *port, struct sk_buff *skb);
 };
 
+union user_buf {
+	void __user *ubuf;
+	void *kbuf;
+};
+
 void *mtk_port_internal_open(struct mtk_md_dev *mdev, char *name, int flag);
 int mtk_port_internal_close(void *i_port);
 int mtk_port_internal_write(void *i_port, struct sk_buff *skb);
diff --git a/drivers/net/wwan/t9xx/pcie/mtk_ctrl_cfg_m9xx.c b/drivers/net/wwan/t9xx/pcie/mtk_ctrl_cfg_m9xx.c
index 8611561dd67c..aab09cab360c 100644
--- a/drivers/net/wwan/t9xx/pcie/mtk_ctrl_cfg_m9xx.c
+++ b/drivers/net/wwan/t9xx/pcie/mtk_ctrl_cfg_m9xx.c
@@ -16,6 +16,10 @@ static const int mtk_srv_cfg_m9xx[NR_CLDMA][HW_QUE_NUM] = {
 
 /* the number of RX GPDs should be at last two */
 static const struct queue_info mtk_queue_info_m9xx[] = {
+	{CCCI_UART2_TX, CCCI_UART2_RX, CLDMA1, TXQ(5), RXQ(5),
+	 Q_MTU_3_5K, Q_MTU_3_5K, TX_GPD_NUM, RX_GPD_NUM, Q_FRAG_3_5K, Q_FRAG_3_5K, 0},
+	{CCCI_MBIM_TX, CCCI_MBIM_RX, CLDMA1, TXQ(2), RXQ(2),
+	 Q_MTU_3_5K, Q_MTU_3_5K, TX_GPD_NUM, RX_GPD_NUM, Q_FRAG_3_5K, Q_FRAG_3_5K, 0},
 	{CCCI_CONTROL_TX, CCCI_CONTROL_RX, CLDMA1, TXQ(0), RXQ(0),
 	 Q_MTU_3_5K, Q_MTU_3_5K, TX_GPD_NUM, RX_GPD_NUM, Q_FRAG_3_5K, Q_FRAG_3_5K, 0},
 	{CCCI_SAP_CONTROL_TX, CCCI_SAP_CONTROL_RX, CLDMA0, TXQ(0), RXQ(0),
@@ -23,6 +27,10 @@ static const struct queue_info mtk_queue_info_m9xx[] = {
 };
 
 static const struct mtk_port_cfg port_cfg_m9xx[] = {
+	{CCCI_UART2_TX, CCCI_UART2_RX, PORT_TYPE_WWAN, "AT",
+		PORT_F_ALLOW_DROP},
+	{CCCI_MBIM_TX, CCCI_MBIM_RX, PORT_TYPE_WWAN, "MBIM",
+		PORT_F_ALLOW_DROP},
 	{CCCI_CONTROL_TX, CCCI_CONTROL_RX, PORT_TYPE_INTERNAL, "MDCTRL",
 		PORT_F_ALLOW_DROP},
 	{CCCI_SAP_CONTROL_TX, CCCI_SAP_CONTROL_RX, PORT_TYPE_INTERNAL, "SAPCTRL",

-- 
2.34.1



^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox