Linux Trace Kernel

Linux Trace Kernel
 help / color / mirror / Atom feed

* [PATCH v2] bootconfig: Apply early options from embedded config
From: Breno Leitao @ 2026-03-25 10:05 UTC (permalink / raw)
  To: Masami Hiramatsu, Jonathan Corbet, Shuah Khan
  Cc: linux-kernel, linux-trace-kernel, linux-doc, oss, paulmck,
	rostedt, kernel-team, Breno Leitao

Bootconfig currently cannot be used to configure early kernel
parameters. For example, the "mitigations=" parameter must be passed
through traditional boot methods because bootconfig parsing happens
after these early parameters need to be processed.

This patch allows early options such as:

  kernel.mitigations = off

to be placed in the embedded bootconfig and take effect, without
requiring them to be on the kernel command line.

Add bootconfig_apply_early_params() which walks all kernel.* keys in the
parsed XBC tree and calls do_early_param() for each one. It is called
from setup_boot_config() immediately after a successful xbc_init() on
the embedded data, which happens before parse_early_param() runs in
start_kernel().

Early options in initrd bootconfig are still silently ignored, as the
initrd is only available after the early param window has closed.

Document this behaviour in both Kconfig and the admin guide.

Signed-off-by: Breno Leitao <leitao@debian.org>
---
Changes in v2:
- Made val_buf static __initdata to keep 2KB off the stack
- Removed dead !val branch — xbc_node_find_next_key_value() returns "" for boolean keys, never NULL
- Added pr_warn + continue when strscpy truncates the value
- Link to v1: https://patch.msgid.link/20260324-early_bootconfig-v1-1-1c0e625aff06@debian.org
---
 Documentation/admin-guide/bootconfig.rst |  4 ++
 init/Kconfig                             |  6 +++
 init/main.c                              | 68 +++++++++++++++++++++++++++++++-
 3 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst
index f712758472d5c..e820f33d3ad16 100644
--- a/Documentation/admin-guide/bootconfig.rst
+++ b/Documentation/admin-guide/bootconfig.rst
@@ -169,6 +169,10 @@ Boot Kernel With a Boot Config
 There are two options to boot the kernel with bootconfig: attaching the
 bootconfig to the initrd image or embedding it in the kernel itself.
 
+Early options (those registered with ``early_param()``) may only be
+specified in the embedded bootconfig, because the initrd is not yet
+available when early parameters are processed.
+
 Attaching a Boot Config to Initrd
 ---------------------------------
 
diff --git a/init/Kconfig b/init/Kconfig
index 938fbe6a91e15..5e8057e73fe06 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1534,6 +1534,12 @@ config BOOT_CONFIG_EMBED
 	  image. But if the system doesn't support initrd, this option will
 	  help you by embedding a bootconfig file while building the kernel.
 
+	  Unlike bootconfig attached to initrd, the embedded bootconfig also
+	  supports early options (those registered with early_param()). Any
+	  kernel.* key in the embedded bootconfig is applied before
+	  parse_early_param() runs. Early options in initrd bootconfig will
+	  not be applied.
+
 	  If unsure, say N.
 
 config BOOT_CONFIG_EMBED_FILE
diff --git a/init/main.c b/init/main.c
index 453ac9dff2da0..14a04c283fa48 100644
--- a/init/main.c
+++ b/init/main.c
@@ -416,9 +416,64 @@ static int __init warn_bootconfig(char *str)
 	return 0;
 }
 
+/*
+ * do_early_param() is defined later in this file but called from
+ * bootconfig_apply_early_params() below, so we need a forward declaration.
+ */
+static int __init do_early_param(char *param, char *val,
+				 const char *unused, void *arg);
+
+/*
+ * bootconfig_apply_early_params - dispatch kernel.* keys from the embedded
+ * bootconfig as early_param() calls.
+ *
+ * early_param() handlers must run before most of the kernel initialises
+ * (e.g. before the GIC driver reads irqchip.gicv3_pseudo_nmi).  A bootconfig
+ * attached to the initrd arrives too late for this because the initrd is not
+ * mapped yet when early params are processed.  The embedded bootconfig lives
+ * in the kernel image itself (.init.data), so it is always reachable.
+ *
+ * This function is called from setup_boot_config() which runs in
+ * start_kernel() before parse_early_param(), making the timing correct.
+ */
+static void __init bootconfig_apply_early_params(void)
+{
+	static char val_buf[COMMAND_LINE_SIZE] __initdata;
+	struct xbc_node *knode, *root;
+	const char *val;
+	ssize_t ret;
+
+	root = xbc_find_node("kernel");
+	if (!root)
+		return;
+
+	/*
+	 * Keys that do not match any early_param() handler are silently
+	 * ignored — do_early_param() always returns 0.
+	 */
+	xbc_node_for_each_key_value(root, knode, val) {
+		if (xbc_node_compose_key_after(root, knode, xbc_namebuf, XBC_KEYLEN_MAX) < 0)
+			continue;
+
+		/*
+		 * We need to copy const char *val to a char pointer,
+		 * which is what do_early_param() need, given it might
+		 * call strsep(), strtok() later.
+		 */
+		ret = strscpy(val_buf, val, sizeof(val_buf));
+		if (ret < 0) {
+			pr_warn("ignoring bootconfig value '%s', too long\n",
+				xbc_namebuf);
+			continue;
+		}
+		do_early_param(xbc_namebuf, val_buf, NULL, NULL);
+	}
+}
+
 static void __init setup_boot_config(void)
 {
 	static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
+	bool using_embedded = false;
 	const char *msg, *data;
 	int pos, ret;
 	size_t size;
@@ -427,8 +482,17 @@ static void __init setup_boot_config(void)
 	/* Cut out the bootconfig data even if we have no bootconfig option */
 	data = get_boot_config_from_initrd(&size);
 	/* If there is no bootconfig in initrd, try embedded one. */
-	if (!data)
+	if (!data) {
 		data = xbc_get_embedded_bootconfig(&size);
+		/*
+		 * Record that we are using the embedded config so that
+		 * bootconfig_apply_early_params() is called below.
+		 * When CONFIG_BOOT_CONFIG_EMBED is not set,
+		 * xbc_get_embedded_bootconfig() is a stub returning NULL, so
+		 * data is always NULL here and using_embedded stays false.
+		 */
+		using_embedded = data;
+	}
 
 	strscpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
 	err = parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
@@ -466,6 +530,8 @@ static void __init setup_boot_config(void)
 	} else {
 		xbc_get_info(&ret, NULL);
 		pr_info("Load bootconfig: %ld bytes %d nodes\n", (long)size, ret);
+		if (using_embedded)
+			bootconfig_apply_early_params();
 		/* keys starting with "kernel." are passed via cmdline */
 		extra_command_line = xbc_make_cmdline("kernel");
 		/* Also, "init." keys are init arguments */

---
base-commit: 785f0eb2f85decbe7c1ef9ae922931f0194ffc2e
change-id: 20260323-early_bootconfig-2efc4509af3d

Best regards,
--  
Breno Leitao <leitao@debian.org>


^ permalink raw reply related

* Re: [PATCH v4 0/5] mm: zone lock tracepoint instrumentation
From: Dmitry Ilvokhin @ 2026-03-25 12:14 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Steven Rostedt, Matthew Wilcox, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Axel Rasmussen, Yuanchu Xie,
	Wei Xu, Masami Hiramatsu, Mathieu Desnoyers, Rafael J. Wysocki,
	Pavel Machek, Len Brown, Brendan Jackman, Johannes Weiner, Zi Yan,
	Oscar Salvador, Qi Zheng, Shakeel Butt, linux-kernel, linux-mm,
	linux-trace-kernel, linux-pm
In-Reply-To: <20260324163918.1a3c5c960d85a4243c9ae314@linux-foundation.org>

On Tue, Mar 24, 2026 at 04:39:18PM -0700, Andrew Morton wrote:
> On Thu, 19 Mar 2026 13:22:54 +0000 Dmitry Ilvokhin <d@ilvokhin.com> wrote:
> 
> > On Mon, Mar 16, 2026 at 05:40:50PM +0000, Dmitry Ilvokhin wrote:
> > 
> > [...]
> > 
> > > A possible generic solution is a trace_contended_release() for spin
> > > locks, for example:
> > > 
> > >     if (trace_contended_release_enabled() &&
> > >         atomic_read(&lock->val) & ~_Q_LOCKED_MASK)
> > >         trace_contended_release(lock);
> > > 
> > > This might work on x86, but could increase code size and regress
> > > performance on arches where spin_unlock() is inlined, such as arm64
> > > under !PREEMPTION.
> > 
> > I took a stab at this idea and submitted an RFC [1].
> > 
> > The implementation builds on your earlier observation from Matthew that
> > _raw_spin_unlock() is not inlined in most configurations. In those
> > cases, when the tracepoint is disabled, this adds a single NOP on the
> > fast path, with the conditional check staying out of line. The measured
> > text size increase in this configuration is +983 bytes.
> > 
> > For configurations where _raw_spin_unlock() is inlined, the
> > instrumentation does increase code size more noticeably
> > (+71 KB in my measurements), since the check and out of line call is
> > replicated at each call site.
> > 
> > This provides a generic release-side signal for contended locks,
> > allowing: correlation of lock holders with waiters and measurement of
> > contended hold times
> > 
> > This RFC addressing the same visibility gap without introducing per-lock
> > instrumentation.
> > 
> > If this tradeoff is acceptable, this could be a generic alternative to
> > lock-specific tracepoints.
> > 
> > [1]: https://lore.kernel.org/all/51aad0415b78c5a39f2029722118fa01eac77538.1773858853.git.d@ilvokhin.com 
> 
> That submission has met a disappointing response.
> 
> How should I proceed with this series "mm: zone lock tracepoint
> instrumentation"?  It's not urgent so I'm inclined to put this on hold
> while you pursue "locking: Add contended_release tracepoint to spinning
> locks"?

Thanks for the follow-up, Andrew.

My current plan is to focus on the "locking: Add contended_release
tracepoint to spinning locks" work and drive it to a clear conclusion:
either by getting feedback that it's not a good direction, or by getting
it into mainline.

In the meantime, it seems reasonable to drop the "mm: zone lock
tracepoint instrumentation" patchset from mm-new to avoid confusion
until the direction is clearer. I can revisit and respin it if the more
generic locking approach doesn't pan out.

> 
> Please send that v2 sometime and hopefully Steven can help push it along?

I'll send the next version of the generic locking series soon. Any help
in pushing it along would be appreciated.

^ permalink raw reply

* Re: [PATCH v4 0/5] mm: zone lock tracepoint instrumentation
From: Steven Rostedt @ 2026-03-25 14:19 UTC (permalink / raw)
  To: Dmitry Ilvokhin
  Cc: Andrew Morton, Matthew Wilcox, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Axel Rasmussen, Yuanchu Xie,
	Wei Xu, Masami Hiramatsu, Mathieu Desnoyers, Rafael J. Wysocki,
	Pavel Machek, Len Brown, Brendan Jackman, Johannes Weiner, Zi Yan,
	Oscar Salvador, Qi Zheng, Shakeel Butt, linux-kernel, linux-mm,
	linux-trace-kernel, linux-pm
In-Reply-To: <acPRq1YPeGR8EqMB@shell.ilvokhin.com>

On Wed, 25 Mar 2026 12:14:35 +0000
Dmitry Ilvokhin <d@ilvokhin.com> wrote:

> > Please send that v2 sometime and hopefully Steven can help push it along?  
> 
> I'll send the next version of the generic locking series soon. Any help
> in pushing it along would be appreciated.

I'll see what I can do when I see v2!

-- Steve

^ permalink raw reply

* Re: [PATCH v2] bootconfig: Apply early options from embedded config
From: Masami Hiramatsu @ 2026-03-25 14:22 UTC (permalink / raw)
  To: Breno Leitao
  Cc: Jonathan Corbet, Shuah Khan, linux-kernel, linux-trace-kernel,
	linux-doc, oss, paulmck, rostedt, kernel-team
In-Reply-To: <20260325-early_bootconfig-v2-1-6b05a36fbfb5@debian.org>

Hi Breno,

On Wed, 25 Mar 2026 03:05:38 -0700
Breno Leitao <leitao@debian.org> wrote:

> Bootconfig currently cannot be used to configure early kernel
> parameters. For example, the "mitigations=" parameter must be passed
> through traditional boot methods because bootconfig parsing happens
> after these early parameters need to be processed.
> 
> This patch allows early options such as:
> 
>   kernel.mitigations = off
> 
> to be placed in the embedded bootconfig and take effect, without
> requiring them to be on the kernel command line.
> 
> Add bootconfig_apply_early_params() which walks all kernel.* keys in the
> parsed XBC tree and calls do_early_param() for each one. It is called
> from setup_boot_config() immediately after a successful xbc_init() on
> the embedded data, which happens before parse_early_param() runs in
> start_kernel().
> 
> Early options in initrd bootconfig are still silently ignored, as the
> initrd is only available after the early param window has closed.
> 
> Document this behaviour in both Kconfig and the admin guide.

AI review made some comments. Some of the review comments seem
reasonable.

https://sashiko.dev/#/patchset/20260325-early_bootconfig-v2-1-6b05a36fbfb5%40debian.org

[..]
> 
> diff --git a/init/main.c b/init/main.c
> index 453ac9dff2da0..14a04c283fa48 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -416,9 +416,64 @@ static int __init warn_bootconfig(char *str)
>  	return 0;
>  }
>  
> +/*
> + * do_early_param() is defined later in this file but called from
> + * bootconfig_apply_early_params() below, so we need a forward declaration.
> + */
> +static int __init do_early_param(char *param, char *val,
> +				 const char *unused, void *arg);
> +
> +/*
> + * bootconfig_apply_early_params - dispatch kernel.* keys from the embedded
> + * bootconfig as early_param() calls.
> + *
> + * early_param() handlers must run before most of the kernel initialises
> + * (e.g. before the GIC driver reads irqchip.gicv3_pseudo_nmi).  A bootconfig
> + * attached to the initrd arrives too late for this because the initrd is not
> + * mapped yet when early params are processed.  The embedded bootconfig lives
> + * in the kernel image itself (.init.data), so it is always reachable.
> + *
> + * This function is called from setup_boot_config() which runs in
> + * start_kernel() before parse_early_param(), making the timing correct.
> + */
> +static void __init bootconfig_apply_early_params(void)

[sashiko comment]
| Does this run early enough for architectural parameters?
| While setup_boot_config() runs before parse_early_param() in start_kernel(),
| it runs after setup_arch(). setup_boot_config() relies on xbc_init() which
| uses the memblock allocator, requiring setup_arch() to have already
| initialized it.
| However, the kernel expects many early parameters (like mem=, earlycon,
| noapic, and iommu) to be parsed during setup_arch() via the architecture's
| call to parse_early_param(). Since setup_arch() completes before
| setup_boot_config() runs, will these architectural early parameters be
| silently ignored because the decisions they influence were already
| finalized?

This is the major reason that I did not support early parameter
in bootconfig. Some archs initialize kernel_cmdline in setup_arch()
and setup early parameters in it.
To fix this, we need to change setup_arch() for each architecture so
that it calls this bootconfig_apply_early_params().

> +{
> +	static char val_buf[COMMAND_LINE_SIZE] __initdata;

[sashiko comment]
| Can using a single shared static buffer cause data corruption for handlers
| that save the argument pointer?
| Several early_param handlers assume the passed string pointer is persistent
| (like the boot_command_line) and retain it internally. For example,
| setup_earlycon() calls register_earlycon(), which sets
| early_console_dev.con->options = options, where options is a pointer
| directly into the passed buffer.
| Because val_buf is overwritten on every loop iteration, the stored pointer
| will point to the value of the last bootconfig key processed.

Ah, good catch. Since we don't have any standard way to handle the
parameters, some of them does not copy the value but try to keep
reference to the given string. 

> +	struct xbc_node *knode, *root;
> +	const char *val;
> +	ssize_t ret;
> +
> +	root = xbc_find_node("kernel");
> +	if (!root)
> +		return;
> +
> +	/*
> +	 * Keys that do not match any early_param() handler are silently
> +	 * ignored — do_early_param() always returns 0.
> +	 */
> +	xbc_node_for_each_key_value(root, knode, val) {

[sashiko comment]
| Does this loop handle array values correctly?
| xbc_node_for_each_key_value() only assigns the first value of an array to
| the val pointer before advancing to the next key. It does not iterate over
| the child nodes of the array.
| If the bootconfig contains a multi-value key like
| kernel.console = "ttyS0", "tty0", will the subsequent values in the array
| be silently dropped instead of passed to the early_param handlers?

Also, good catch :) we need to use xbc_node_for_each_array_value()
for inner loop.

> +		if (xbc_node_compose_key_after(root, knode, xbc_namebuf, XBC_KEYLEN_MAX) < 0)
> +			continue;
> +
> +		/*
> +		 * We need to copy const char *val to a char pointer,
> +		 * which is what do_early_param() need, given it might
> +		 * call strsep(), strtok() later.
> +		 */
> +		ret = strscpy(val_buf, val, sizeof(val_buf));
> +		if (ret < 0) {
> +			pr_warn("ignoring bootconfig value '%s', too long\n",
> +				xbc_namebuf);
> +			continue;
> +		}
> +		do_early_param(xbc_namebuf, val_buf, NULL, NULL);

[sashiko comment]
| How does this handle valueless parameters (boolean flags)?
| When parsing the standard kernel command line, parse_args() passes a NULL
| value to the setup function for flags that lack an = sign (e.g., ro or
| earlycon).
| However, the bootconfig parser returns a zero-length string for valueless
| keys, which gets copied into val_buf as "" and passed to do_early_param().
| This semantic deviation breaks handlers that explicitly check if (!val).
| For instance, param_setup_earlycon() and parse_lapic() check for a NULL
| argument to enable features. Will passing "" instead of NULL prevent these
| handlers from working correctly?

See fs/proc/bootconfig.c. You can check whether the key has a value or
not by checking xbc_node_get_child(knode) != NULL.

Thank you,

> +	}
> +}
> +
>  static void __init setup_boot_config(void)
>  {
>  	static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
> +	bool using_embedded = false;
>  	const char *msg, *data;
>  	int pos, ret;
>  	size_t size;
> @@ -427,8 +482,17 @@ static void __init setup_boot_config(void)
>  	/* Cut out the bootconfig data even if we have no bootconfig option */
>  	data = get_boot_config_from_initrd(&size);
>  	/* If there is no bootconfig in initrd, try embedded one. */
> -	if (!data)
> +	if (!data) {
>  		data = xbc_get_embedded_bootconfig(&size);
> +		/*
> +		 * Record that we are using the embedded config so that
> +		 * bootconfig_apply_early_params() is called below.
> +		 * When CONFIG_BOOT_CONFIG_EMBED is not set,
> +		 * xbc_get_embedded_bootconfig() is a stub returning NULL, so
> +		 * data is always NULL here and using_embedded stays false.
> +		 */
> +		using_embedded = data;
> +	}
>  
>  	strscpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
>  	err = parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
> @@ -466,6 +530,8 @@ static void __init setup_boot_config(void)
>  	} else {
>  		xbc_get_info(&ret, NULL);
>  		pr_info("Load bootconfig: %ld bytes %d nodes\n", (long)size, ret);
> +		if (using_embedded)
> +			bootconfig_apply_early_params();
>  		/* keys starting with "kernel." are passed via cmdline */
>  		extra_command_line = xbc_make_cmdline("kernel");
>  		/* Also, "init." keys are init arguments */
> 
> ---
> base-commit: 785f0eb2f85decbe7c1ef9ae922931f0194ffc2e
> change-id: 20260323-early_bootconfig-2efc4509af3d
> 
> Best regards,
> --  
> Breno Leitao <leitao@debian.org>
> 

-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH] tracing/osnoise: fix potential deadlock in cpu hotplug
From: Steven Rostedt @ 2026-03-25 14:34 UTC (permalink / raw)
  To: hu.shengming
  Cc: mhiramat, mathieu.desnoyers, linux-kernel, linux-trace-kernel,
	zhang.run, yang.tao172, ran.xiaokai, luo.haiyang
In-Reply-To: <20260325102542300G48VT-wLNp-dOgT_9Qi2f@zte.com.cn>

On Wed, 25 Mar 2026 10:25:42 +0800 (CST)
<hu.shengming@zte.com.cn> wrote:

> >On Tue, 24 Mar 2026 15:06:16 +0800 (CST)
> ><hu.shengming@zte.com.cn> wrote:
> >  
> >> From: luohaiyang10243395 <luo.haiyang@zte.com.cn>
> >> 
> >> The following sequence may leads deadlock in cpu hotplug:
> >> 
> >>   CPU0                        |  CPU1
> >>                               |  schedule_work_on
> >>                               |
> >>   _cpu_down//set CPU1 offline |
> >>   cpus_write_lock             |
> >>                               |  osnoise_hotplug_workfn
> >>                               |    mutex_lock(&interface_lock);
> >>                               |    cpus_read_lock();  //wait cpu_hotplug_lock
> >>                               |
> >>                               |  cpuhp/1
> >>                               |    osnoise_cpu_die
> >>                               |      kthread_stop
> >>                               |        wait_for_completion //wait osnoise/1 exit
> >>                               |
> >>                               |  osnoise/1
> >>                               |    osnoise_sleep
> >>                               |      mutex_lock(&interface_lock); //deadlock
> >> 
> >> Fix by swap the order of cpus_read_lock() and mutex_lock(&interface_lock).  
> >
> >So the deadlock is due to the "wait_for_completion"?  
> 
> The osnoise_cpu_init callback returns directly, which may allow another CPU offline task to run, 
> the offline task holds the cpu_hotplug_lock while waiting for the osnoise task to exit. 
> osnoise_hotplug_workfn may acquire interface_lock first, causing the offline task to be blocked. 
> This is an ABBA deadlock.

Right, as I said, it is due to the "wait_for_completion" and not due to two
different locks. One is waiting for the osnoise task to exit (the
"wait_for_completion") but the osnoise task is blocked on the interface_lock().

Better to show it as:


    task1		task2		task3
    -----		-----		-----

 mutex_lock(&interface_lock)

		    [CPU GOING OFFLINE]

		    cpus_write_lock();
		    osnoise_cpu_die();
		      kthread_stop(task3);
		        wait_for_completion();

				      osnoise_sleep();
				        mutex_lock(&interface_lock);

 cpus_read_lock();

 [DEAD LOCK]

> 
> >How did you find this bug? Inspection, AI, triggered?
> >
> >Thanks,
> >
> >-- Steve  
> 
> We run autotests on kernel-6.6, report following hung task warning, and we think the same issue exists
> in linux-stable.

Thanks. It's usually good to state how a bug was discovered when fixing it.

Could you send a v2 with an updated change log?

-- Steve

^ permalink raw reply

* Re: [PATCHv4 bpf-next 00/25] bpf: tracing_multi link
From: Leon Hwang @ 2026-03-25 14:58 UTC (permalink / raw)
  To: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: Hengqi Chen, bpf, linux-trace-kernel, Martin KaFai Lau,
	Eduard Zingerman, Song Liu, Yonghong Song, Menglong Dong,
	Steven Rostedt
In-Reply-To: <7fc7e5ad-ac42-4c7d-8314-bd252f8887a3@linux.dev>

On 2026/3/25 14:42, Leon Hwang wrote:
> Hi Jiri,
> 
> Nice version for tracing_multi link.
> 
> I hope I have time to add tracing_multi link support to bpfsnoop, and
> test this new tracing feature.
> 
> I left comments on patches #13, #24, and #25.
> 
Hmm, sashiko's reviews [1] cover my comments on patches #24 and #25. I
should check them first.

[1]
https://sashiko.dev/#/patchset/20260324081846.2334094-1-jolsa%40kernel.org

Thanks,
Leon

[...]


^ permalink raw reply

* Re: [PATCHv4 bpf-next 24/25] selftests/bpf: Add tracing multi attach benchmark test
From: Alexei Starovoitov @ 2026-03-25 15:11 UTC (permalink / raw)
  To: Leon Hwang
  Cc: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
	Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <7a119223-9994-4edc-af0b-f1ee9876cd20@linux.dev>

On Tue, Mar 24, 2026 at 11:45 PM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> > +
> > +     btf = btf__load_vmlinux_btf();
> > +     if (!ASSERT_OK_PTR(btf, "btf__load_vmlinux_btf"))
> > +             return;> +
> > +     skel = tracing_multi_bench__open_and_load();
> > +     if (!ASSERT_OK_PTR(skel, "tracing_multi_bench__open_and_load"))
> > +             goto cleanup;
> > +
> > +     if (!ASSERT_OK(bpf_get_ksyms(&ksyms, true), "get_syms"))
> > +             goto cleanup;
> > +
> > +     /* Get all ftrace 'safe' symbols.. */
> > +     for (i = 0; i < ksyms->filtered_cnt; i++) {
> > +             if (is_unsafe_function(ksyms->filtered_syms[i]))
> > +                     continue;
> > +             tsearch(&ksyms->filtered_syms[i], &root, compare);
> > +     }
> > +
> > +     /* ..and filter them through BTF and btf_type_is_traceable_func. */
> > +     nr = btf__type_cnt(btf);
> > +     for (type_id = 1; type_id < nr; type_id++) {
> > +             const struct btf_type *type;
> > +             const char *str;
> > +
> > +             type = btf__type_by_id(btf, type_id);
> > +             if (!type)
> > +                     break;
> > +
> > +             if (BTF_INFO_KIND(type->info) != BTF_KIND_FUNC)
> > +                     continue;
> > +
> > +             str = btf__name_by_offset(btf, type->name_off);
> > +             if (!str)
> > +                     break;
> > +
> > +             if (!tfind(&str, &root, compare))
> > +                     continue;
> > +
> > +             if (!btf_type_is_traceable_func(btf, type))
> > +                     continue;
> > +
> > +             err = libbpf_ensure_mem((void **) &ids, &cap, sizeof(*ids), cnt + 1);
> > +             if (err)
> > +                     goto cleanup;
> > +
> > +             ids[cnt++] = type_id;
> > +     }
> > +
> > +     opts.ids = ids;
> > +     opts.cnt = cnt;
> > +
> > +     attach_start_ns = get_time_ns();
> > +     link = bpf_program__attach_tracing_multi(skel->progs.bench, NULL, &opts);
> > +     attach_end_ns = get_time_ns();
> > +
> > +     if (!ASSERT_OK_PTR(link, "bpf_program__attach_tracing_multi"))
> > +             goto cleanup;
> > +
> > +     detach_start_ns = get_time_ns();
> > +     bpf_link__destroy(link);
> > +     detach_end_ns = get_time_ns();
> > +
> > +     attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
> > +     detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
> > +
> > +     printf("%s: found %lu functions\n", __func__, cnt);
> > +     printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
> > +     printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
> > +
> > +cleanup:
> > +     tracing_multi_bench__destroy(skel);
> > +     tdestroy(root, tdestroy_free_nop);
> > +     free_kallsyms_local(ksyms);
> > +     free(ids);
>
> Is btf__free(btf) missing here? Since 'btf' was calloc inner
> btf__load_vmlinux_btf().

Good point.
Leon, please trim your replies. No need to quote the whole patch.

btw sashiko caught it too:
https://sashiko.dev/#/patchset/20260324081846.2334094-1-jolsa%40kernel.org
and many other bugs beyond what bpf CI could find.

Jiri, please address them all.

^ permalink raw reply

* Re: [PATCH v2 02/19] kernel: Use trace_call__##name() at guarded tracepoint call sites
From: Thomas Gleixner @ 2026-03-25 17:16 UTC (permalink / raw)
  To: Vineeth Pillai (Google)
  Cc: Vineeth Pillai (Google), Steven Rostedt, Peter Zijlstra,
	Tejun Heo, David Vernet, Andrea Righi, Changwoo Min, Ingo Molnar,
	Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
	Mel Gorman, Valentin Schneider, Yury Norov [NVIDIA],
	Paul E. McKenney, Rik van Riel, Roman Kisel, Joel Fernandes,
	Rafael J. Wysocki, Ulf Hansson, linux-kernel, sched-ext,
	linux-trace-kernel
In-Reply-To: <20260323160052.17528-3-vineeth@bitbyteword.org>

On Mon, Mar 23 2026 at 12:00, Vineeth Pillai wrote:

> Replace trace_foo() with the new trace_call__foo() at sites already
> guarded by trace_foo_enabled(), avoiding a redundant
> static_branch_unlikely() re-evaluation inside the tracepoint.
> trace_call__foo() calls the tracepoint callbacks directly without
> utilizing the static branch again.
>
> Suggested-by: Steven Rostedt <rostedt@goodmis.org>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
> Assisted-by: Claude:claude-sonnet-4-6

Acked-by: Thomas Gleixner <tglx@kernel.org>

^ permalink raw reply

* Re: [PATCHv4 bpf-next 24/25] selftests/bpf: Add tracing multi attach benchmark test
From: Jiri Olsa @ 2026-03-25 21:48 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Leon Hwang, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
	Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <CAADnVQL3gO=kuvDHQNa8VfL_NWUMvBv6=vrXkOd=8Fe9-dcU3A@mail.gmail.com>

On Wed, Mar 25, 2026 at 08:11:00AM -0700, Alexei Starovoitov wrote:

SNIP

> > > +     attach_start_ns = get_time_ns();
> > > +     link = bpf_program__attach_tracing_multi(skel->progs.bench, NULL, &opts);
> > > +     attach_end_ns = get_time_ns();
> > > +
> > > +     if (!ASSERT_OK_PTR(link, "bpf_program__attach_tracing_multi"))
> > > +             goto cleanup;
> > > +
> > > +     detach_start_ns = get_time_ns();
> > > +     bpf_link__destroy(link);
> > > +     detach_end_ns = get_time_ns();
> > > +
> > > +     attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
> > > +     detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
> > > +
> > > +     printf("%s: found %lu functions\n", __func__, cnt);
> > > +     printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
> > > +     printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
> > > +
> > > +cleanup:
> > > +     tracing_multi_bench__destroy(skel);
> > > +     tdestroy(root, tdestroy_free_nop);
> > > +     free_kallsyms_local(ksyms);
> > > +     free(ids);
> >
> > Is btf__free(btf) missing here? Since 'btf' was calloc inner
> > btf__load_vmlinux_btf().
> 
> Good point.
> Leon, please trim your replies. No need to quote the whole patch.
> 
> btw sashiko caught it too:
> https://sashiko.dev/#/patchset/20260324081846.2334094-1-jolsa%40kernel.org
> and many other bugs beyond what bpf CI could find.
> 
> Jiri, please address them all.

ok, will check

jirka

^ permalink raw reply

* Re: [PATCHv4 bpf-next 24/25] selftests/bpf: Add tracing multi attach benchmark test
From: Jiri Olsa @ 2026-03-25 21:48 UTC (permalink / raw)
  To: Leon Hwang
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, bpf,
	linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <7a119223-9994-4edc-af0b-f1ee9876cd20@linux.dev>

On Wed, Mar 25, 2026 at 02:45:31PM +0800, Leon Hwang wrote:

SNIP

> > +
> > +	attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
> > +	detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
> > +
> > +	printf("%s: found %lu functions\n", __func__, cnt);
> > +	printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
> > +	printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
> > +
> > +cleanup:
> > +	tracing_multi_bench__destroy(skel);
> > +	tdestroy(root, tdestroy_free_nop);
> > +	free_kallsyms_local(ksyms);
> > +	free(ids);
> 
> Is btf__free(btf) missing here? Since 'btf' was calloc inner
> btf__load_vmlinux_btf().

ah yea, will add, thanks

jirka

^ permalink raw reply

* Re: [PATCHv4 bpf-next 13/25] bpf: Add support for tracing_multi link fdinfo
From: Jiri Olsa @ 2026-03-25 21:49 UTC (permalink / raw)
  To: Leon Hwang
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, bpf,
	linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <03163163-fae9-4980-8526-a1353cdabcfc@linux.dev>

On Wed, Mar 25, 2026 at 02:43:19PM +0800, Leon Hwang wrote:
> On 24/3/26 16:18, Jiri Olsa wrote:
> > Adding tracing_multi link fdinfo support with following output:
> > 
> > pos:    0
> > flags:  02000000
> > mnt_id: 19
> > ino:    3091
> > link_type:      tracing_multi
> > link_id:        382
> 
> Would better to add attach_type?
> 
> attach_type:	[fentry,fexit,fsession]_multi

that's seems ok, will add

thanks,
jirka

^ permalink raw reply

* Re: [PATCHv4 bpf-next 25/25] selftests/bpf: Add tracing multi attach rollback tests
From: Jiri Olsa @ 2026-03-25 21:49 UTC (permalink / raw)
  To: Leon Hwang
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, bpf,
	linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <f222748c-69ce-4c63-826b-4f2d67fd4ec9@linux.dev>

On Wed, Mar 25, 2026 at 02:45:59PM +0800, Leon Hwang wrote:

SNIP

> > +static void test_rollback_unlink(void)
> > +{
> > +	struct tracing_multi_rollback *skel, *extra;
> > +	LIBBPF_OPTS(bpf_tracing_multi_opts, opts);
> > +	struct tracing_multi_rollback **fillers;
> > +	size_t cnt = FUNCS_CNT;
> > +	__u32 *ids = NULL;
> > +	int err, max;
> > +
> > +	max = get_bpf_max_tramp_links();
> > +	if (!ASSERT_GE(max, 1, "bpf_max_tramp_links"))
> > +		return;
> > +
> > +	/* Attach maximum allowed programs to bpf_fentry_test10 */
> > +	fillers = fillers_load_and_link(max);
> > +	if (!ASSERT_OK_PTR(fillers, "fillers_load_and_link"))
> > +		return;
> > +
> > +	extra = extra_load_and_link();
> > +	if (!ASSERT_OK_PTR(extra, "extra_load_and_link"))
> 
> Should cleanup fillers here?

yep, should jump to cleanup, thanks

jirka

^ permalink raw reply

* Re: [PATCH v2 01/19] tracepoint: Add trace_call__##name() API
From: Masami Hiramatsu @ 2026-03-26  1:28 UTC (permalink / raw)
  To: Vineeth Pillai (Google)
  Cc: Steven Rostedt, Peter Zijlstra, Dmitry Ilvokhin, Masami Hiramatsu,
	Mathieu Desnoyers, Ingo Molnar, Jens Axboe, io-uring,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Alexei Starovoitov, Daniel Borkmann, Marcelo Ricardo Leitner,
	Xin Long, Jon Maloy, Aaron Conole, Eelco Chaudron, Ilya Maximets,
	netdev, bpf, linux-sctp, tipc-discussion, dev, Jiri Pirko,
	Oded Gabbay, Koby Elbaz, dri-devel, Rafael J. Wysocki,
	Viresh Kumar, Gautham R. Shenoy, Huang Rui, Mario Limonciello,
	Len Brown, Srinivas Pandruvada, linux-pm, MyungJoo Ham,
	Kyungmin Park, Chanwoo Choi, Christian König, Sumit Semwal,
	linaro-mm-sig, Eddie James, Andrew Jeffery, Joel Stanley,
	linux-fsi, David Airlie, Simona Vetter, Alex Deucher,
	Danilo Krummrich, Matthew Brost, Philipp Stanner, Harry Wentland,
	Leo Li, amd-gfx, Jiri Kosina, Benjamin Tissoires, linux-input,
	Wolfram Sang, linux-i2c, Mark Brown, Michael Hennerich,
	Nuno Sá, linux-spi, James E.J. Bottomley, Martin K. Petersen,
	linux-scsi, Chris Mason, David Sterba, linux-btrfs,
	Thomas Gleixner, Andrew Morton, SeongJae Park, linux-mm,
	Borislav Petkov, Dave Hansen, x86, linux-trace-kernel,
	linux-kernel
In-Reply-To: <20260323160052.17528-2-vineeth@bitbyteword.org>

On Mon, 23 Mar 2026 12:00:20 -0400
"Vineeth Pillai (Google)" <vineeth@bitbyteword.org> wrote:

> Add trace_call__##name() as a companion to trace_##name().  When a
> caller already guards a tracepoint with an explicit enabled check:
> 
>   if (trace_foo_enabled() && cond)
>       trace_foo(args);
> 
> trace_foo() internally repeats the static_branch_unlikely() test, which
> the compiler cannot fold since static branches are patched binary
> instructions.  This results in two static-branch evaluations for every
> guarded call site.
> 
> trace_call__##name() calls __do_trace_##name() directly, skipping the
> redundant static-branch re-check.  This avoids leaking the internal
> __do_trace_##name() symbol into call sites while still eliminating the
> double evaluation:
> 
>   if (trace_foo_enabled() && cond)
>       trace_invoke_foo(args);   /* calls __do_trace_foo() directly */

nit: trace_call_foo() instead of trace_invoke_foo()?

Anyway looks good to me.

Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>


> 
> Three locations are updated:
> - __DECLARE_TRACE: invoke form omits static_branch_unlikely, retains
>   the LOCKDEP RCU-watching assertion.
> - __DECLARE_TRACE_SYSCALL: same, plus retains might_fault().
> - !TRACEPOINTS_ENABLED stub: empty no-op so callers compile cleanly
>   when tracepoints are compiled out.
> 
> Suggested-by: Steven Rostedt <rostedt@goodmis.org>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
> Assisted-by: Claude:claude-sonnet-4-6
> ---
>  include/linux/tracepoint.h | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
> index 22ca1c8b54f32..ed969705341f1 100644
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -294,6 +294,10 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>  			WARN_ONCE(!rcu_is_watching(),			\
>  				  "RCU not watching for tracepoint");	\
>  		}							\
> +	}								\
> +	static inline void trace_call__##name(proto)			\
> +	{								\
> +		__do_trace_##name(args);				\
>  	}
>  
>  #define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto)		\
> @@ -313,6 +317,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>  			WARN_ONCE(!rcu_is_watching(),			\
>  				  "RCU not watching for tracepoint");	\
>  		}							\
> +	}								\
> +	static inline void trace_call__##name(proto)			\
> +	{								\
> +		might_fault();						\
> +		__do_trace_##name(args);				\
>  	}
>  
>  /*
> @@ -398,6 +407,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>  #define __DECLARE_TRACE_COMMON(name, proto, args, data_proto)		\
>  	static inline void trace_##name(proto)				\
>  	{ }								\
> +	static inline void trace_call__##name(proto)			\
> +	{ }								\
>  	static inline int						\
>  	register_trace_##name(void (*probe)(data_proto),		\
>  			      void *data)				\
> -- 
> 2.53.0
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH v2 02/19] kernel: Use trace_call__##name() at guarded tracepoint call sites
From: Masami Hiramatsu @ 2026-03-26  1:29 UTC (permalink / raw)
  To: Vineeth Pillai (Google)
  Cc: Steven Rostedt, Peter Zijlstra, Tejun Heo, David Vernet,
	Andrea Righi, Changwoo Min, Ingo Molnar, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Ben Segall, Mel Gorman,
	Valentin Schneider, Thomas Gleixner, Yury Norov [NVIDIA],
	Paul E. McKenney, Rik van Riel, Roman Kisel, Joel Fernandes,
	Rafael J. Wysocki, Ulf Hansson, linux-kernel, sched-ext,
	linux-trace-kernel
In-Reply-To: <20260323160052.17528-3-vineeth@bitbyteword.org>

On Mon, 23 Mar 2026 12:00:21 -0400
"Vineeth Pillai (Google)" <vineeth@bitbyteword.org> wrote:

> Replace trace_foo() with the new trace_call__foo() at sites already
> guarded by trace_foo_enabled(), avoiding a redundant
> static_branch_unlikely() re-evaluation inside the tracepoint.
> trace_call__foo() calls the tracepoint callbacks directly without
> utilizing the static branch again.

This looks good to me.

Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Thanks,

> 
> Suggested-by: Steven Rostedt <rostedt@goodmis.org>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
> Assisted-by: Claude:claude-sonnet-4-6
> ---
>  kernel/irq_work.c  | 2 +-
>  kernel/sched/ext.c | 2 +-
>  kernel/smp.c       | 2 +-
>  3 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/irq_work.c b/kernel/irq_work.c
> index 73f7e1fd4ab4d..120fd7365fbe2 100644
> --- a/kernel/irq_work.c
> +++ b/kernel/irq_work.c
> @@ -79,7 +79,7 @@ void __weak arch_irq_work_raise(void)
>  static __always_inline void irq_work_raise(struct irq_work *work)
>  {
>  	if (trace_ipi_send_cpu_enabled() && arch_irq_work_has_interrupt())
> -		trace_ipi_send_cpu(smp_processor_id(), _RET_IP_, work->func);
> +		trace_call__ipi_send_cpu(smp_processor_id(), _RET_IP_, work->func);
>  
>  	arch_irq_work_raise();
>  }
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 1594987d637b0..cfbac9cf62f84 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -4494,7 +4494,7 @@ static __printf(2, 3) void dump_line(struct seq_buf *s, const char *fmt, ...)
>  		vscnprintf(line_buf, sizeof(line_buf), fmt, args);
>  		va_end(args);
>  
> -		trace_sched_ext_dump(line_buf);
> +		trace_call__sched_ext_dump(line_buf);
>  	}
>  #endif
>  	/* @s may be zero sized and seq_buf triggers WARN if so */
> diff --git a/kernel/smp.c b/kernel/smp.c
> index f349960f79cad..537cf1f461d75 100644
> --- a/kernel/smp.c
> +++ b/kernel/smp.c
> @@ -394,7 +394,7 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
>  		func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
>  			sched_ttwu_pending : csd->func;
>  
> -		trace_csd_queue_cpu(cpu, _RET_IP_, func, csd);
> +		trace_call__csd_queue_cpu(cpu, _RET_IP_, func, csd);
>  	}
>  
>  	/*
> -- 
> 2.53.0
> 
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* [PATCH v2] tracing/osnoise: fix potential deadlock in cpu hotplug
From: hu.shengming @ 2026-03-26  6:19 UTC (permalink / raw)
  To: rostedt, mhiramat, mathieu.desnoyers
  Cc: linux-kernel, linux-trace-kernel, zhang.run, yang.tao172,
	ran.xiaokai, luo.haiyang


[-- Attachment #1.1.1: Type: text/plain, Size: 2491 bytes --]

From: Luo Haiyang <luo.haiyang@zte.com.cn>

The following sequence may leads deadlock in cpu hotplug:

    task1        task2        task3
    -----        -----        -----

 mutex_lock(&interface_lock)

            [CPU GOING OFFLINE]

            cpus_write_lock();
            osnoise_cpu_die();
              kthread_stop(task3);
                wait_for_completion();

                      osnoise_sleep();
                        mutex_lock(&interface_lock);

 cpus_read_lock();

 [DEAD LOCK]

Fix by swap the order of cpus_read_lock() and mutex_lock(&interface_lock).

Signed-off-by: Luo Haiyang <luo.haiyang@zte.com.cn>

---
Changes in v2:
- update change log
- Link to v1: https://lore.kernel.org/all/20260324150616953rMo1BWtAZ1nXTNrEFP6hr@zte.com.cn/
---
 kernel/trace/trace_osnoise.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index dee610e465b9..be6cf0bb3c03 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -2073,8 +2073,8 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
 	if (!osnoise_has_registered_instances())
 		return;
 
-	guard(mutex)(&interface_lock);
 	guard(cpus_read_lock)();
+	guard(mutex)(&interface_lock);
 
 	if (!cpu_online(cpu))
 		return;
@@ -2237,11 +2237,11 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
 	if (running)
 		stop_per_cpu_kthreads();
 
-	mutex_lock(&interface_lock);
 	/*
 	 * avoid CPU hotplug operations that might read options.
 	 */
 	cpus_read_lock();
+	mutex_lock(&interface_lock);
 
 	retval = cnt;
 
@@ -2257,8 +2257,8 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
 			clear_bit(option, &osnoise_options);
 	}
 
-	cpus_read_unlock();
 	mutex_unlock(&interface_lock);
+	cpus_read_unlock();
 
 	if (running)
 		start_per_cpu_kthreads();
@@ -2345,16 +2345,16 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
 	if (running)
 		stop_per_cpu_kthreads();
 
-	mutex_lock(&interface_lock);
 	/*
 	 * osnoise_cpumask is read by CPU hotplug operations.
 	 */
 	cpus_read_lock();
+	mutex_lock(&interface_lock);
 
 	cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
 
-	cpus_read_unlock();
 	mutex_unlock(&interface_lock);
+	cpus_read_unlock();
 
 	if (running)
 		start_per_cpu_kthreads();
-- 
2.25.1

[-- Attachment #1.1.2: Type: text/html , Size: 7713 bytes --]

^ permalink raw reply related

* Re: [PATCH v5 1/3] PCI: trace: Add PCI controller LTSSM transition tracepoint
From: Anand Moon @ 2026-03-26  7:13 UTC (permalink / raw)
  To: Shawn Lin
  Cc: Manivannan Sadhasivam, Bjorn Helgaas, linux-rockchip, linux-pci,
	linux-trace-kernel, linux-doc, Steven Rostedt
In-Reply-To: <1774403912-210670-2-git-send-email-shawn.lin@rock-chips.com>

Hi Shawn,

On Wed, 25 Mar 2026 at 07:28, Shawn Lin <shawn.lin@rock-chips.com> wrote:
>
> Some platforms may provide LTSSM trace functionality, recording historical
> LTSSM state transition information. This is very useful for debugging, such
> as when certain devices cannot be recognized or link broken during test.
> Implement the pci controller tracepoint for recording LTSSM and rate.
>
> Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
> ---
Tested-by: Anand Moon <linux.amoon@gmail.com>

Thanks
-Anand

^ permalink raw reply

* Re: [PATCH v5 3/3] PCI: dw-rockchip: Add pcie_ltssm_state_transition trace support
From: Anand Moon @ 2026-03-26  7:13 UTC (permalink / raw)
  To: Shawn Lin
  Cc: Manivannan Sadhasivam, Bjorn Helgaas, linux-rockchip, linux-pci,
	linux-trace-kernel, linux-doc, Steven Rostedt
In-Reply-To: <1774403912-210670-4-git-send-email-shawn.lin@rock-chips.com>

Hi Shawn,

On Wed, 25 Mar 2026 at 07:29, Shawn Lin <shawn.lin@rock-chips.com> wrote:
>
> Rockchip platforms provide a 64x4 bytes debug FIFO to trace the
> LTSSM history. Any LTSSM change will be recorded. It's useful
> for debug purpose, for example link failure, etc.
>
> Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
> ---
Tested-by: Anand Moon <linux.amoon@gmail.com>

Thanks
-Anand

^ permalink raw reply

* Re: [PATCH v2] tracing/osnoise: fix potential deadlock in cpu hotplug
From: Masami Hiramatsu @ 2026-03-26  7:18 UTC (permalink / raw)
  To: hu.shengming
  Cc: rostedt, mathieu.desnoyers, linux-kernel, linux-trace-kernel,
	zhang.run, yang.tao172, ran.xiaokai, luo.haiyang
In-Reply-To: <20260326141953414bVSj33dAYktqp9Oiyizq8@zte.com.cn>

On Thu, 26 Mar 2026 14:19:53 +0800 (CST)
<hu.shengming@zte.com.cn> wrote:

> From: Luo Haiyang <luo.haiyang@zte.com.cn>
> 
> The following sequence may leads deadlock in cpu hotplug:
> 
>     task1        task2        task3
>     -----        -----        -----
> 
>  mutex_lock(&interface_lock)
> 
>             [CPU GOING OFFLINE]
> 
>             cpus_write_lock();
>             osnoise_cpu_die();
>               kthread_stop(task3);
>                 wait_for_completion();
> 
>                       osnoise_sleep();
>                         mutex_lock(&interface_lock);
> 
>  cpus_read_lock();
> 
>  [DEAD LOCK]
> 
> Fix by swap the order of cpus_read_lock() and mutex_lock(&interface_lock).
> 
> Signed-off-by: Luo Haiyang <luo.haiyang@zte.com.cn>
> 

This looks good to me.

Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Thanks,


> ---
> Changes in v2:
> - update change log
> - Link to v1: https://lore.kernel.org/all/20260324150616953rMo1BWtAZ1nXTNrEFP6hr@zte.com.cn/
> ---
>  kernel/trace/trace_osnoise.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
> index dee610e465b9..be6cf0bb3c03 100644
> --- a/kernel/trace/trace_osnoise.c
> +++ b/kernel/trace/trace_osnoise.c
> @@ -2073,8 +2073,8 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
>  	if (!osnoise_has_registered_instances())
>  		return;
>  
> -	guard(mutex)(&interface_lock);
>  	guard(cpus_read_lock)();
> +	guard(mutex)(&interface_lock);
>  
>  	if (!cpu_online(cpu))
>  		return;
> @@ -2237,11 +2237,11 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
>  	if (running)
>  		stop_per_cpu_kthreads();
>  
> -	mutex_lock(&interface_lock);
>  	/*
>  	 * avoid CPU hotplug operations that might read options.
>  	 */
>  	cpus_read_lock();
> +	mutex_lock(&interface_lock);
>  
>  	retval = cnt;
>  
> @@ -2257,8 +2257,8 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
>  			clear_bit(option, &osnoise_options);
>  	}
>  
> -	cpus_read_unlock();
>  	mutex_unlock(&interface_lock);
> +	cpus_read_unlock();
>  
>  	if (running)
>  		start_per_cpu_kthreads();
> @@ -2345,16 +2345,16 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
>  	if (running)
>  		stop_per_cpu_kthreads();
>  
> -	mutex_lock(&interface_lock);
>  	/*
>  	 * osnoise_cpumask is read by CPU hotplug operations.
>  	 */
>  	cpus_read_lock();
> +	mutex_lock(&interface_lock);
>  
>  	cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
>  
> -	cpus_read_unlock();
>  	mutex_unlock(&interface_lock);
> +	cpus_read_unlock();
>  
>  	if (running)
>  		start_per_cpu_kthreads();
> -- 
> 2.25.1

-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH v2 06/19] cpufreq: Use trace_call__##name() at guarded tracepoint call sites
From: Rafael J. Wysocki @ 2026-03-26 10:24 UTC (permalink / raw)
  To: Vineeth Pillai (Google)
  Cc: Steven Rostedt, Peter Zijlstra, Huang Rui, Gautham R. Shenoy,
	Mario Limonciello, Perry Yuan, Rafael J. Wysocki, Viresh Kumar,
	Srinivas Pandruvada, Len Brown, linux-pm, linux-kernel,
	linux-trace-kernel
In-Reply-To: <20260323160052.17528-7-vineeth@bitbyteword.org>

On Mon, Mar 23, 2026 at 5:01 PM Vineeth Pillai (Google)
<vineeth@bitbyteword.org> wrote:
>
> Replace trace_foo() with the new trace_call__foo() at sites already
> guarded by trace_foo_enabled(), avoiding a redundant
> static_branch_unlikely() re-evaluation inside the tracepoint.
> trace_call__foo() calls the tracepoint callbacks directly without
> utilizing the static branch again.
>
> Suggested-by: Steven Rostedt <rostedt@goodmis.org>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
> Assisted-by: Claude:claude-sonnet-4-6

Acked-by: Rafael J. Wysocki (Intel) <rafael@kernel.org> # cpufreq core
& intel_pstate

> ---
>  drivers/cpufreq/amd-pstate.c   | 10 +++++-----
>  drivers/cpufreq/cpufreq.c      |  2 +-
>  drivers/cpufreq/intel_pstate.c |  2 +-
>  3 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 5aa9fcd80cf51..4c47324aa2f73 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -247,7 +247,7 @@ static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf,
>         if (trace_amd_pstate_epp_perf_enabled()) {
>                 union perf_cached perf = READ_ONCE(cpudata->perf);
>
> -               trace_amd_pstate_epp_perf(cpudata->cpu,
> +               trace_call__amd_pstate_epp_perf(cpudata->cpu,
>                                           perf.highest_perf,
>                                           epp,
>                                           min_perf,
> @@ -298,7 +298,7 @@ static int msr_set_epp(struct cpufreq_policy *policy, u8 epp)
>         if (trace_amd_pstate_epp_perf_enabled()) {
>                 union perf_cached perf = cpudata->perf;
>
> -               trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
> +               trace_call__amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
>                                           epp,
>                                           FIELD_GET(AMD_CPPC_MIN_PERF_MASK,
>                                                     cpudata->cppc_req_cached),
> @@ -343,7 +343,7 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp)
>         if (trace_amd_pstate_epp_perf_enabled()) {
>                 union perf_cached perf = cpudata->perf;
>
> -               trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
> +               trace_call__amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
>                                           epp,
>                                           FIELD_GET(AMD_CPPC_MIN_PERF_MASK,
>                                                     cpudata->cppc_req_cached),
> @@ -507,7 +507,7 @@ static int shmem_update_perf(struct cpufreq_policy *policy, u8 min_perf,
>         if (trace_amd_pstate_epp_perf_enabled()) {
>                 union perf_cached perf = READ_ONCE(cpudata->perf);
>
> -               trace_amd_pstate_epp_perf(cpudata->cpu,
> +               trace_call__amd_pstate_epp_perf(cpudata->cpu,
>                                           perf.highest_perf,
>                                           epp,
>                                           min_perf,
> @@ -588,7 +588,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf,
>         }
>
>         if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
> -               trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
> +               trace_call__amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
>                         cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
>                                 cpudata->cpu, fast_switch);
>         }
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 277884d91913c..58901047eae5a 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -2222,7 +2222,7 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
>
>         if (trace_cpu_frequency_enabled()) {
>                 for_each_cpu(cpu, policy->cpus)
> -                       trace_cpu_frequency(freq, cpu);
> +                       trace_call__cpu_frequency(freq, cpu);
>         }
>
>         return freq;
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 11c58af419006..70be952209144 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -3132,7 +3132,7 @@ static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, in
>                 return;
>
>         sample = &cpu->sample;
> -       trace_pstate_sample(trace_type,
> +       trace_call__pstate_sample(trace_type,
>                 0,
>                 old_pstate,
>                 cpu->pstate.current_pstate,
> --
> 2.53.0
>

^ permalink raw reply

* [syzbot] Monthly trace report (Mar 2026)
From: syzbot @ 2026-03-26 12:34 UTC (permalink / raw)
  To: linux-kernel, linux-trace-kernel, syzkaller-bugs

Hello trace maintainers/developers,

This is a 31-day syzbot report for the trace subsystem.
All related reports/information can be found at:
https://syzkaller.appspot.com/upstream/s/trace

During the period, 3 new issues were detected and 1 were fixed.
In total, 17 issues are still open and 64 have already been fixed.

Some of the still happening issues:

Ref  Crashes Repro Title
<1>  277     Yes   WARNING in tracepoint_probe_unregister (3)
                   https://syzkaller.appspot.com/bug?extid=a1d25e53cd4a10f7f2d3
<2>  107     Yes   WARNING in blk_register_tracepoints
                   https://syzkaller.appspot.com/bug?extid=c54ded83396afee31eb1
<3>  76      Yes   KASAN: slab-use-after-free Read in bpf_trace_run2 (3)
                   https://syzkaller.appspot.com/bug?extid=59701a78e84b0bccfe1b
<4>  75      Yes   INFO: task hung in blk_trace_ioctl (4)
                   https://syzkaller.appspot.com/bug?extid=ed812ed461471ab17a0c
<5>  46      Yes   INFO: task hung in blk_trace_startstop
                   https://syzkaller.appspot.com/bug?extid=774863666ef5b025c9d0
<6>  42      No    WARNING in ring_buffer_map_get_reader (2)
                   https://syzkaller.appspot.com/bug?extid=c7143161d8215214a993
<7>  27      Yes   INFO: task hung in relay_open (2)
                   https://syzkaller.appspot.com/bug?extid=16ea22f26882d7e46f35
<8>  27      Yes   WARNING in get_probe_ref
                   https://syzkaller.appspot.com/bug?extid=8672dcb9d10011c0a160
<9>  18      Yes   INFO: task hung in blk_trace_setup (4)
                   https://syzkaller.appspot.com/bug?extid=9c1ebb9957045e00ac63
<10> 2       Yes   possible deadlock in down_trylock (3)
                   https://syzkaller.appspot.com/bug?extid=c3740bc819eb55460ec3

---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

To disable reminders for individual bugs, reply with the following command:
#syz set <Ref> no-reminders

To change bug's subsystems, reply with:
#syz set <Ref> subsystems: new-subsystem

You may send multiple commands in a single email message.

^ permalink raw reply

* Re: [PATCH v2] bootconfig: Apply early options from embedded config
From: Masami Hiramatsu @ 2026-03-26 14:30 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Breno Leitao, Jonathan Corbet, Shuah Khan, linux-kernel,
	linux-trace-kernel, linux-doc, oss, paulmck, rostedt, kernel-team
In-Reply-To: <20260325232204.05edbb21c7602b6408ca007b@kernel.org>

On Wed, 25 Mar 2026 23:22:04 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> > +	/*
> > +	 * Keys that do not match any early_param() handler are silently
> > +	 * ignored — do_early_param() always returns 0.
> > +	 */
> > +	xbc_node_for_each_key_value(root, knode, val) {
> 
> [sashiko comment]
> | Does this loop handle array values correctly?
> | xbc_node_for_each_key_value() only assigns the first value of an array to
> | the val pointer before advancing to the next key. It does not iterate over
> | the child nodes of the array.
> | If the bootconfig contains a multi-value key like
> | kernel.console = "ttyS0", "tty0", will the subsequent values in the array
> | be silently dropped instead of passed to the early_param handlers?
> 
> Also, good catch :) we need to use xbc_node_for_each_array_value()
> for inner loop.

FYI, xbc_snprint_cmdline() translates the arraied parameter as
multiple parameters. For example,

foo = bar, buz;

will be converted to

foo=bar foo=buz

Thus, I think we should do the same thing below;

> 
> > +		if (xbc_node_compose_key_after(root, knode, xbc_namebuf, XBC_KEYLEN_MAX) < 0)
> > +			continue;
> > +
> > +		/*
> > +		 * We need to copy const char *val to a char pointer,
> > +		 * which is what do_early_param() need, given it might
> > +		 * call strsep(), strtok() later.
> > +		 */
> > +		ret = strscpy(val_buf, val, sizeof(val_buf));
> > +		if (ret < 0) {
> > +			pr_warn("ignoring bootconfig value '%s', too long\n",
> > +				xbc_namebuf);
> > +			continue;
> > +		}
> > +		do_early_param(xbc_namebuf, val_buf, NULL, NULL);

So instead of this;

xbc_array_for_each_value(vnode, val) {
	do_early_param(xbc_namebuf, val, NULL, NULL);
}

Maybe it is a good timing to recondier unifying kernel cmdline and bootconfig
from API viewpoint.

Thanks,

-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* [PATCH v4 5/5] locking: Add contended_release tracepoint to spinning locks
From: Dmitry Ilvokhin @ 2026-03-26 15:10 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng, Waiman Long,
	Thomas Bogendoerfer, Juergen Gross, Ajay Kaher, Alexey Makhalov,
	Broadcom internal kernel review list, Thomas Gleixner,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Arnd Bergmann,
	Dennis Zhou, Tejun Heo, Christoph Lameter, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers
  Cc: linux-kernel, linux-mips, virtualization, linux-arch, linux-mm,
	linux-trace-kernel, kernel-team, Dmitry Ilvokhin
In-Reply-To: <cover.1774536681.git.d@ilvokhin.com>

Extend the contended_release tracepoint to queued spinlocks and queued
rwlocks.

Use the arch-overridable queued_spin_release(), introduced in the
previous commit, to ensure the tracepoint works correctly across all
architectures, including those with custom unlock implementations (e.g.
x86 paravirt).

When the tracepoint is disabled, the only addition to the hot path is a
single NOP instruction (the static branch). When enabled, the contention
check, trace call, and unlock are combined in an out-of-line function to
minimize hot path impact, avoiding the compiler needing to preserve the
lock pointer in a callee-saved register across the trace call.

Binary size impact (x86_64, defconfig):
  uninlined unlock (common case): +983 bytes  (+0.00%)
  inlined unlock (worst case):    +58165 bytes (+0.24%)

The inlined unlock case could not be achieved through Kconfig options on
x86_64 as PREEMPT_BUILD unconditionally selects UNINLINE_SPIN_UNLOCK on
x86_64. The UNINLINE_SPIN_UNLOCK guards were manually inverted to force
inline the unlock path and estimate the worst case binary size increase.

Architectures with fully custom qspinlock implementations (e.g.
PowerPC) are not covered by this change.

Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
 include/asm-generic/qrwlock.h   | 48 +++++++++++++++++++++++++++------
 include/asm-generic/qspinlock.h | 18 +++++++++++++
 kernel/locking/qrwlock.c        | 16 +++++++++++
 kernel/locking/qspinlock.c      |  8 ++++++
 4 files changed, 82 insertions(+), 8 deletions(-)

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 75b8f4601b28..e24dc537fd66 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -14,6 +14,7 @@
 #define __ASM_GENERIC_QRWLOCK_H
 
 #include <linux/atomic.h>
+#include <linux/tracepoint-defs.h>
 #include <asm/barrier.h>
 #include <asm/processor.h>
 
@@ -35,6 +36,10 @@
  */
 extern void queued_read_lock_slowpath(struct qrwlock *lock);
 extern void queued_write_lock_slowpath(struct qrwlock *lock);
+extern void queued_read_unlock_traced(struct qrwlock *lock);
+extern void queued_write_unlock_traced(struct qrwlock *lock);
+
+DECLARE_TRACEPOINT(contended_release);
 
 /**
  * queued_read_trylock - try to acquire read lock of a queued rwlock
@@ -102,10 +107,16 @@ static inline void queued_write_lock(struct qrwlock *lock)
 }
 
 /**
- * queued_read_unlock - release read lock of a queued rwlock
+ * queued_rwlock_is_contended - check if the lock is contended
  * @lock : Pointer to queued rwlock structure
+ * Return: 1 if lock contended, 0 otherwise
  */
-static inline void queued_read_unlock(struct qrwlock *lock)
+static inline int queued_rwlock_is_contended(struct qrwlock *lock)
+{
+	return arch_spin_is_locked(&lock->wait_lock);
+}
+
+static __always_inline void __queued_read_unlock(struct qrwlock *lock)
 {
 	/*
 	 * Atomically decrement the reader count
@@ -114,22 +125,43 @@ static inline void queued_read_unlock(struct qrwlock *lock)
 }
 
 /**
- * queued_write_unlock - release write lock of a queued rwlock
+ * queued_read_unlock - release read lock of a queued rwlock
  * @lock : Pointer to queued rwlock structure
  */
-static inline void queued_write_unlock(struct qrwlock *lock)
+static inline void queued_read_unlock(struct qrwlock *lock)
+{
+	/*
+	 * Trace and unlock are combined in the traced unlock variant so
+	 * the compiler does not need to preserve the lock pointer across
+	 * the function call, avoiding callee-saved register save/restore
+	 * on the hot path.
+	 */
+	if (tracepoint_enabled(contended_release)) {
+		queued_read_unlock_traced(lock);
+		return;
+	}
+
+	__queued_read_unlock(lock);
+}
+
+static __always_inline void __queued_write_unlock(struct qrwlock *lock)
 {
 	smp_store_release(&lock->wlocked, 0);
 }
 
 /**
- * queued_rwlock_is_contended - check if the lock is contended
+ * queued_write_unlock - release write lock of a queued rwlock
  * @lock : Pointer to queued rwlock structure
- * Return: 1 if lock contended, 0 otherwise
  */
-static inline int queued_rwlock_is_contended(struct qrwlock *lock)
+static inline void queued_write_unlock(struct qrwlock *lock)
 {
-	return arch_spin_is_locked(&lock->wait_lock);
+	/* See comment in queued_read_unlock(). */
+	if (tracepoint_enabled(contended_release)) {
+		queued_write_unlock_traced(lock);
+		return;
+	}
+
+	__queued_write_unlock(lock);
 }
 
 /*
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index df76f34645a0..915a4c2777f6 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -41,6 +41,7 @@
 
 #include <asm-generic/qspinlock_types.h>
 #include <linux/atomic.h>
+#include <linux/tracepoint-defs.h>
 
 #ifndef queued_spin_is_locked
 /**
@@ -129,12 +130,29 @@ static __always_inline void queued_spin_release(struct qspinlock *lock)
 }
 #endif
 
+DECLARE_TRACEPOINT(contended_release);
+
+extern void queued_spin_release_traced(struct qspinlock *lock);
+
 /**
  * queued_spin_unlock - unlock a queued spinlock
  * @lock : Pointer to queued spinlock structure
+ *
+ * Generic tracing wrapper around the arch-overridable
+ * queued_spin_release().
  */
 static __always_inline void queued_spin_unlock(struct qspinlock *lock)
 {
+	/*
+	 * Trace and release are combined in queued_spin_release_traced() so
+	 * the compiler does not need to preserve the lock pointer across the
+	 * function call, avoiding callee-saved register save/restore on the
+	 * hot path.
+	 */
+	if (tracepoint_enabled(contended_release)) {
+		queued_spin_release_traced(lock);
+		return;
+	}
 	queued_spin_release(lock);
 }
 
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index d2ef312a8611..5f7a0fc2b27a 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -90,3 +90,19 @@ void __lockfunc queued_write_lock_slowpath(struct qrwlock *lock)
 	trace_contention_end(lock, 0);
 }
 EXPORT_SYMBOL(queued_write_lock_slowpath);
+
+void __lockfunc queued_read_unlock_traced(struct qrwlock *lock)
+{
+	if (queued_rwlock_is_contended(lock))
+		trace_contended_release(lock);
+	__queued_read_unlock(lock);
+}
+EXPORT_SYMBOL(queued_read_unlock_traced);
+
+void __lockfunc queued_write_unlock_traced(struct qrwlock *lock)
+{
+	if (queued_rwlock_is_contended(lock))
+		trace_contended_release(lock);
+	__queued_write_unlock(lock);
+}
+EXPORT_SYMBOL(queued_write_unlock_traced);
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index af8d122bb649..c72610980ec7 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -104,6 +104,14 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
 #define queued_spin_lock_slowpath	native_queued_spin_lock_slowpath
 #endif
 
+void __lockfunc queued_spin_release_traced(struct qspinlock *lock)
+{
+	if (queued_spin_is_contended(lock))
+		trace_contended_release(lock);
+	queued_spin_release(lock);
+}
+EXPORT_SYMBOL(queued_spin_release_traced);
+
 #endif /* _GEN_PV_LOCK_SLOWPATH */
 
 /**
-- 
2.52.0


^ permalink raw reply related

* [PATCH v4 2/5] locking/percpu-rwsem: Extract __percpu_up_read()
From: Dmitry Ilvokhin @ 2026-03-26 15:10 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng, Waiman Long,
	Thomas Bogendoerfer, Juergen Gross, Ajay Kaher, Alexey Makhalov,
	Broadcom internal kernel review list, Thomas Gleixner,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Arnd Bergmann,
	Dennis Zhou, Tejun Heo, Christoph Lameter, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers
  Cc: linux-kernel, linux-mips, virtualization, linux-arch, linux-mm,
	linux-trace-kernel, kernel-team, Dmitry Ilvokhin, Usama Arif
In-Reply-To: <cover.1774536681.git.d@ilvokhin.com>

Move the percpu_up_read() slowpath out of the inline function into a new
__percpu_up_read() to avoid binary size increase from adding a
tracepoint to an inlined function.

Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
Acked-by: Usama Arif <usama.arif@linux.dev>
---
 include/linux/percpu-rwsem.h  | 15 +++------------
 kernel/locking/percpu-rwsem.c | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index c8cb010d655e..39d5bf8e6562 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -107,6 +107,8 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 	return ret;
 }
 
+extern void __percpu_up_read(struct percpu_rw_semaphore *sem);
+
 static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
 {
 	rwsem_release(&sem->dep_map, _RET_IP_);
@@ -118,18 +120,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
 	if (likely(rcu_sync_is_idle(&sem->rss))) {
 		this_cpu_dec(*sem->read_count);
 	} else {
-		/*
-		 * slowpath; reader will only ever wake a single blocked
-		 * writer.
-		 */
-		smp_mb(); /* B matches C */
-		/*
-		 * In other words, if they see our decrement (presumably to
-		 * aggregate zero, as that is the only time it matters) they
-		 * will also see our critical section.
-		 */
-		this_cpu_dec(*sem->read_count);
-		rcuwait_wake_up(&sem->writer);
+		__percpu_up_read(sem);
 	}
 	preempt_enable();
 }
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index ef234469baac..f3ee7a0d6047 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -288,3 +288,21 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
 	rcu_sync_exit(&sem->rss);
 }
 EXPORT_SYMBOL_GPL(percpu_up_write);
+
+void __percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+	lockdep_assert_preemption_disabled();
+	/*
+	 * slowpath; reader will only ever wake a single blocked
+	 * writer.
+	 */
+	smp_mb(); /* B matches C */
+	/*
+	 * In other words, if they see our decrement (presumably to
+	 * aggregate zero, as that is the only time it matters) they
+	 * will also see our critical section.
+	 */
+	this_cpu_dec(*sem->read_count);
+	rcuwait_wake_up(&sem->writer);
+}
+EXPORT_SYMBOL_GPL(__percpu_up_read);
-- 
2.52.0


^ permalink raw reply related

* [PATCH v4 4/5] locking: Factor out queued_spin_release()
From: Dmitry Ilvokhin @ 2026-03-26 15:10 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng, Waiman Long,
	Thomas Bogendoerfer, Juergen Gross, Ajay Kaher, Alexey Makhalov,
	Broadcom internal kernel review list, Thomas Gleixner,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Arnd Bergmann,
	Dennis Zhou, Tejun Heo, Christoph Lameter, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers
  Cc: linux-kernel, linux-mips, virtualization, linux-arch, linux-mm,
	linux-trace-kernel, kernel-team, Dmitry Ilvokhin
In-Reply-To: <cover.1774536681.git.d@ilvokhin.com>

Introduce queued_spin_release() as an arch-overridable unlock primitive,
and make queued_spin_unlock() a generic wrapper around it. This is a
preparatory refactoring for the next commit, which adds
contended_release tracepoint instrumentation to queued_spin_unlock().

Rename the existing arch-specific queued_spin_unlock() overrides on
x86 (paravirt) and MIPS to queued_spin_release().

No functional change.

Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
 arch/mips/include/asm/spinlock.h         |  6 +++---
 arch/x86/include/asm/paravirt-spinlock.h |  6 +++---
 include/asm-generic/qspinlock.h          | 15 ++++++++++++---
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 6ce2117e49f6..c349162f15eb 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -13,12 +13,12 @@
 
 #include <asm-generic/qspinlock_types.h>
 
-#define	queued_spin_unlock queued_spin_unlock
+#define	queued_spin_release queued_spin_release
 /**
- * queued_spin_unlock - release a queued spinlock
+ * queued_spin_release - release a queued spinlock
  * @lock : Pointer to queued spinlock structure
  */
-static inline void queued_spin_unlock(struct qspinlock *lock)
+static inline void queued_spin_release(struct qspinlock *lock)
 {
 	/* This could be optimised with ARCH_HAS_MMIOWB */
 	mmiowb();
diff --git a/arch/x86/include/asm/paravirt-spinlock.h b/arch/x86/include/asm/paravirt-spinlock.h
index 7beffcb08ed6..ac75e0736198 100644
--- a/arch/x86/include/asm/paravirt-spinlock.h
+++ b/arch/x86/include/asm/paravirt-spinlock.h
@@ -49,9 +49,9 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu)
 				ALT_NOT(X86_FEATURE_VCPUPREEMPT));
 }
 
-#define queued_spin_unlock queued_spin_unlock
+#define queued_spin_release queued_spin_release
 /**
- * queued_spin_unlock - release a queued spinlock
+ * queued_spin_release - release a queued spinlock
  * @lock : Pointer to queued spinlock structure
  *
  * A smp_store_release() on the least-significant byte.
@@ -66,7 +66,7 @@ static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	pv_queued_spin_lock_slowpath(lock, val);
 }
 
-static inline void queued_spin_unlock(struct qspinlock *lock)
+static inline void queued_spin_release(struct qspinlock *lock)
 {
 	kcsan_release();
 	pv_queued_spin_unlock(lock);
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index bf47cca2c375..df76f34645a0 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -115,12 +115,12 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
 }
 #endif
 
-#ifndef queued_spin_unlock
+#ifndef queued_spin_release
 /**
- * queued_spin_unlock - release a queued spinlock
+ * queued_spin_release - release a queued spinlock
  * @lock : Pointer to queued spinlock structure
  */
-static __always_inline void queued_spin_unlock(struct qspinlock *lock)
+static __always_inline void queued_spin_release(struct qspinlock *lock)
 {
 	/*
 	 * unlock() needs release semantics:
@@ -129,6 +129,15 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
 }
 #endif
 
+/**
+ * queued_spin_unlock - unlock a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ */
+static __always_inline void queued_spin_unlock(struct qspinlock *lock)
+{
+	queued_spin_release(lock);
+}
+
 #ifndef virt_spin_lock
 static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 {
-- 
2.52.0


^ permalink raw reply related

* [PATCH v4 1/5] tracing/lock: Remove unnecessary linux/sched.h include
From: Dmitry Ilvokhin @ 2026-03-26 15:10 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng, Waiman Long,
	Thomas Bogendoerfer, Juergen Gross, Ajay Kaher, Alexey Makhalov,
	Broadcom internal kernel review list, Thomas Gleixner,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Arnd Bergmann,
	Dennis Zhou, Tejun Heo, Christoph Lameter, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers
  Cc: linux-kernel, linux-mips, virtualization, linux-arch, linux-mm,
	linux-trace-kernel, kernel-team, Dmitry Ilvokhin
In-Reply-To: <cover.1774536681.git.d@ilvokhin.com>

None of the trace events in lock.h reference anything from
linux/sched.h. Remove the unnecessary include.

Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
 include/trace/events/lock.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index 8e89baa3775f..da978f2afb45 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -5,7 +5,6 @@
 #if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_LOCK_H
 
-#include <linux/sched.h>
 #include <linux/tracepoint.h>
 
 /* flags for lock:contention_begin */
-- 
2.52.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox