Linux Trace Kernel

Linux Trace Kernel
 help / color / mirror / Atom feed

* Re: [PATCHv3 bpf-next 23/24] selftests/bpf: Add tracing multi attach benchmark test
From: Leon Hwang @ 2026-03-17  3:09 UTC (permalink / raw)
  To: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
	Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260316075138.465430-24-jolsa@kernel.org>

On 16/3/26 15:51, Jiri Olsa wrote:
> Adding benchmark test that attaches to (almost) all allowed tracing
> functions and display attach/detach times.
> 
>   # ./test_progs -t tracing_multi_bench_attach -v
>   bpf_testmod.ko is already unloaded.
>   Loading bpf_testmod.ko...
>   Successfully loaded bpf_testmod.ko.
>   serial_test_tracing_multi_bench_attach:PASS:btf__load_vmlinux_btf 0 nsec
>   serial_test_tracing_multi_bench_attach:PASS:tracing_multi_bench__open_and_load 0 nsec
>   serial_test_tracing_multi_bench_attach:PASS:get_syms 0 nsec
>   serial_test_tracing_multi_bench_attach:PASS:bpf_program__attach_tracing_multi 0 nsec
>   serial_test_tracing_multi_bench_attach: found 51186 functions
>   serial_test_tracing_multi_bench_attach: attached in   1.295s
>   serial_test_tracing_multi_bench_attach: detached in   0.243s
>   #507     tracing_multi_bench_attach:OK
>   Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
>   Successfully unloaded bpf_testmod.ko.
> 
> Exporting skip_entry as is_unsafe_function and usign it in the test.
                                                 ^ using

> 
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  .../selftests/bpf/prog_tests/tracing_multi.c  | 97 +++++++++++++++++++
>  .../selftests/bpf/progs/tracing_multi_bench.c | 13 +++
>  tools/testing/selftests/bpf/trace_helpers.c   |  6 +-
>  tools/testing/selftests/bpf/trace_helpers.h   |  1 +
>  4 files changed, 114 insertions(+), 3 deletions(-)
>  create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_bench.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> index 9f4c5af88e21..a0fcda51bb6c 100644
> --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> @@ -9,6 +9,7 @@
>  #include "tracing_multi_intersect.skel.h"
>  #include "tracing_multi_session.skel.h"
>  #include "tracing_multi_fail.skel.h"
> +#include "tracing_multi_bench.skel.h"
>  #include "trace_helpers.h"
>  
>  static __u64 bpf_fentry_test_cookies[] = {
> @@ -552,6 +553,102 @@ static void test_attach_api_fails(void)
>  	tracing_multi_fail__destroy(skel);
>  }
>  
> +void serial_test_tracing_multi_bench_attach(void)
> +{
> +	LIBBPF_OPTS(bpf_tracing_multi_opts, opts);
> +	struct tracing_multi_bench *skel = NULL;
> +	long attach_start_ns, attach_end_ns;
> +	long detach_start_ns, detach_end_ns;
> +	double attach_delta, detach_delta;
> +	struct bpf_link *link = NULL;
> +	size_t i, cap = 0, cnt = 0;
> +	struct ksyms *ksyms = NULL;
> +	void *root = NULL;
> +	__u32 *ids = NULL;
> +	__u32 nr, type_id;
> +	struct btf *btf;
> +	int err;
> +
> +#ifndef __x86_64__
> +	test__skip();
> +	return;
> +#endif
> +
> +	btf = btf__load_vmlinux_btf();
> +	if (!ASSERT_OK_PTR(btf, "btf__load_vmlinux_btf"))
> +		return;
> +
> +	skel = tracing_multi_bench__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "tracing_multi_bench__open_and_load"))
> +		goto cleanup;
> +
> +	if (!ASSERT_OK(bpf_get_ksyms(&ksyms, true), "get_syms"))
> +		goto cleanup;
> +
> +	/* Get all ftrace 'safe' symbols.. */
> +	for (i = 0; i < ksyms->filtered_cnt; i++) {
> +		if (is_unsafe_function(ksyms->filtered_syms[i]))
> +			continue;
> +		tsearch(&ksyms->filtered_syms[i], &root, compare);
                ^ missing tdestroy() to free tree nodes?

> +	}
> +
> +	/* ..and filter them through BTF and btf_type_is_traceable_func. */
> +	nr = btf__type_cnt(btf);
> +	for (type_id = 1; type_id < nr; type_id++) {
> +		const struct btf_type *type;
> +		const char *str;
> +
> +		type = btf__type_by_id(btf, type_id);
> +		if (!type)
> +			break;
> +
> +		if (BTF_INFO_KIND(type->info) != BTF_KIND_FUNC)
> +			continue;
> +
> +		str = btf__name_by_offset(btf, type->name_off);
> +		if (!str)
> +			break;
> +
> +		if (!tfind(&str, &root, compare))
> +			continue;
> +
> +		if (!btf_type_is_traceable_func(btf, type))
> +			continue;
> +
> +		err = libbpf_ensure_mem((void **) &ids, &cap, sizeof(*ids), cnt + 1);
> +		if (err)
> +			goto cleanup;
> +
> +		ids[cnt++] = type_id;
> +	}
> +
> +	opts.ids = ids;
> +	opts.cnt = cnt;
> +
> +	attach_start_ns = get_time_ns();
> +	link = bpf_program__attach_tracing_multi(skel->progs.bench, NULL, &opts);
> +	attach_end_ns = get_time_ns();
> +
> +	if (!ASSERT_OK_PTR(link, "bpf_program__attach_tracing_multi"))
> +		goto cleanup;
> +
> +	detach_start_ns = get_time_ns();
> +	bpf_link__destroy(link);
> +	detach_end_ns = get_time_ns();
> +
> +	attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
> +	detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
> +
> +	printf("%s: found %lu functions\n", __func__, cnt);
> +	printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
> +	printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
> +
> +cleanup:
> +	tracing_multi_bench__destroy(skel);
> +	free_kallsyms_local(ksyms);
> +	free(ids);
> +}
> +
>  void test_tracing_multi_test(void)
>  {
>  #ifndef __x86_64__
> diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_bench.c b/tools/testing/selftests/bpf/progs/tracing_multi_bench.c
> new file mode 100644
> index 000000000000..067ba668489b
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/tracing_multi_bench.c
> @@ -0,0 +1,13 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <stdbool.h>
> +#include <linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +SEC("fentry.multi")
> +int BPF_PROG(bench)
> +{
> +	return 0;
> +}
> diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
> index 0e63daf83ed5..3bf600f3271b 100644
> --- a/tools/testing/selftests/bpf/trace_helpers.c
> +++ b/tools/testing/selftests/bpf/trace_helpers.c
> @@ -548,7 +548,7 @@ static const char * const trace_blacklist[] = {
>  	"bpf_get_numa_node_id",
>  };
>  
> -static bool skip_entry(char *name)
> +bool is_unsafe_function(char *name)
NIT:                       ^ should const char * ?

Thanks,
Leon

>  {
>  	int i;
>  
> @@ -651,7 +651,7 @@ int bpf_get_ksyms(struct ksyms **ksymsp, bool kernel)
>  		free(name);
>  		if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
>  			continue;
> -		if (skip_entry(name))
> +		if (is_unsafe_function(name))
>  			continue;
>  
>  		ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
> @@ -728,7 +728,7 @@ int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
>  		free(name);
>  		if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
>  			continue;
> -		if (skip_entry(name))
> +		if (is_unsafe_function(name))
>  			continue;
>  
>  		if (cnt == max_cnt) {
> diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
> index d5bf1433675d..d93be322675d 100644
> --- a/tools/testing/selftests/bpf/trace_helpers.h
> +++ b/tools/testing/selftests/bpf/trace_helpers.h
> @@ -63,4 +63,5 @@ int read_build_id(const char *path, char *build_id, size_t size);
>  int bpf_get_ksyms(struct ksyms **ksymsp, bool kernel);
>  int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel);
>  
> +bool is_unsafe_function(char *name);
>  #endif


^ permalink raw reply

* Re: [PATCHv3 bpf-next 22/24] selftests/bpf: Add tracing multi attach fails test
From: Leon Hwang @ 2026-03-17  3:06 UTC (permalink / raw)
  To: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
	Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260316075138.465430-23-jolsa@kernel.org>

On 16/3/26 15:51, Jiri Olsa wrote:
> Adding tests for attach fails on tracing multi link.
> 
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  .../selftests/bpf/prog_tests/tracing_multi.c  | 74 +++++++++++++++++++
>  .../selftests/bpf/progs/tracing_multi_fail.c  | 19 +++++
>  2 files changed, 93 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_fail.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> index 04d83c37495b..9f4c5af88e21 100644
> --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> @@ -8,6 +8,7 @@
>  #include "tracing_multi_module.skel.h"
>  #include "tracing_multi_intersect.skel.h"
>  #include "tracing_multi_session.skel.h"
> +#include "tracing_multi_fail.skel.h"
>  #include "trace_helpers.h"
>  
>  static __u64 bpf_fentry_test_cookies[] = {
> @@ -480,6 +481,77 @@ static void test_session(void)
>  	tracing_multi_session__destroy(skel);
>  }
>  
> +static void test_attach_api_fails(void)
> +{
> +	LIBBPF_OPTS(bpf_tracing_multi_opts, opts);
> +	static const char * const func[] = {
> +		"bpf_fentry_test2",
> +	};
> +	struct tracing_multi_fail *skel = NULL;
> +	__u32 ids[2], *ids2;
> +	__u64 cookies[2];
> +
> +	skel = tracing_multi_fail__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "tracing_multi_fail__open_and_load"))
> +		return;
> +
> +	/* fail#1 pattern and opts NULL */
> +	skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry,
> +						NULL, NULL);
> +	if (!ASSERT_ERR_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi"))
> +		goto cleanup;
> +
> +	/* fail#2 pattern and ids */
> +	opts.ids = ids;
> +	opts.cnt = 2;
> +
> +	skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry,
> +						"bpf_fentry_test*", &opts);
> +	if (!ASSERT_ERR_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi"))
> +		goto cleanup;
> +
> +	/* fail#3 pattern and cookies */
> +	opts.ids = NULL;
> +	opts.cnt = 2;
> +	opts.cookies = cookies;
> +
> +	skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry,
> +						"bpf_fentry_test*", &opts);
> +	if (!ASSERT_ERR_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi"))
> +		goto cleanup;
> +
> +	/* fail#4 bogus pattern */
> +	skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry,
> +						"bpf_not_really_a_function*", NULL);
> +	if (!ASSERT_ERR_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi"))
> +		goto cleanup;
> +
> +	/* fail#5 abnormal cnt */
> +	opts.ids = ids;
> +	opts.cnt = INT_MAX;
> +
> +	skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry,
> +						NULL, &opts);
> +	if (!ASSERT_ERR_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi"))
> +		goto cleanup;
> +
> +	/* fail#6 attach sleepable program to not-allowed function */
> +	ids2 = get_ids(func, 1, NULL);
> +	if (!ASSERT_OK_PTR(ids, "get_ids"))
                           ^ ids2 ?

> +		goto cleanup;
> +
> +	opts.ids = ids2;
> +	opts.cnt = 1;
> +
> +	skel->links.test_fentry_s = bpf_program__attach_tracing_multi(skel->progs.test_fentry_s,
> +						NULL, &opts);
> +	ASSERT_ERR_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi");
                                   ^ test_fentry_s ?

Thanks,
Leon

> +	free(ids2);
> +
> +cleanup:
> +	tracing_multi_fail__destroy(skel);
> +}
> +
>  void test_tracing_multi_test(void)
>  {
>  #ifndef __x86_64__
> @@ -505,4 +577,6 @@ void test_tracing_multi_test(void)
>  		test_link_api_ids(true);
>  	if (test__start_subtest("session"))
>  		test_session();
> +	if (test__start_subtest("attach_api_fails"))
> +		test_attach_api_fails();
>  }
> diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_fail.c b/tools/testing/selftests/bpf/progs/tracing_multi_fail.c
> new file mode 100644
> index 000000000000..8f769ddb9136
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/tracing_multi_fail.c
> @@ -0,0 +1,19 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <stdbool.h>
> +#include <linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +SEC("fentry.multi")
> +int BPF_PROG(test_fentry)
> +{
> +	return 0;
> +}
> +
> +SEC("fentry.multi.s")
> +int BPF_PROG(test_fentry_s)
> +{
> +	return 0;
> +}


^ permalink raw reply

* Re: [PATCHv3 bpf-next 20/24] selftests/bpf: Add tracing multi cookies test
From: Leon Hwang @ 2026-03-17  3:06 UTC (permalink / raw)
  To: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
	Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260316075138.465430-21-jolsa@kernel.org>

On 16/3/26 15:51, Jiri Olsa wrote:
> Adding tests for using cookies on tracing multi link.
> 
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  .../selftests/bpf/prog_tests/tracing_multi.c  | 23 +++++++++++++++++--
>  .../selftests/bpf/progs/tracing_multi_check.c | 15 +++++++++++-
>  2 files changed, 35 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> index b7818f438d6e..f14a936a4667 100644
> --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> @@ -9,6 +9,19 @@
>  #include "tracing_multi_intersect.skel.h"
>  #include "trace_helpers.h"
>  
> +static __u64 bpf_fentry_test_cookies[] = {
> +	8,  /* bpf_fentry_test1 */
> +	9,  /* bpf_fentry_test2 */
> +	7,  /* bpf_fentry_test3 */
> +	5,  /* bpf_fentry_test4 */
> +	4,  /* bpf_fentry_test5 */
> +	2,  /* bpf_fentry_test6 */
> +	3,  /* bpf_fentry_test7 */
> +	1,  /* bpf_fentry_test8 */
> +	10, /* bpf_fentry_test9 */
> +	6,  /* bpf_fentry_test10 */
> +};
> +
>  static const char * const bpf_fentry_test[] = {
>  	"bpf_fentry_test1",
>  	"bpf_fentry_test2",
> @@ -204,7 +217,7 @@ static void test_link_api_pattern(void)
>  	tracing_multi__destroy(skel);
>  }
>  
> -static void test_link_api_ids(void)
> +static void test_link_api_ids(bool test_cookies)
>  {
>  	LIBBPF_OPTS(bpf_tracing_multi_opts, opts);
>  	struct tracing_multi *skel;
> @@ -216,6 +229,7 @@ static void test_link_api_ids(void)
>  		return;
>  
>  	skel->bss->pid = getpid();
> +	skel->bss->test_cookies = test_cookies;
>  
>  	ids = get_ids(bpf_fentry_test, cnt, NULL);
>  	if (!ASSERT_OK_PTR(ids, "get_ids"))
> @@ -224,6 +238,9 @@ static void test_link_api_ids(void)
>  	opts.ids = ids;
>  	opts.cnt = cnt;
>  
> +	if (test_cookies)
> +		opts.cookies = bpf_fentry_test_cookies;
> +
>  	skel->links.test_fentry = bpf_program__attach_tracing_multi(skel->progs.test_fentry,
>  						NULL, &opts);
>  	if (!ASSERT_OK_PTR(skel->links.test_fentry, "bpf_program__attach_tracing_multi"))
> @@ -437,7 +454,7 @@ void test_tracing_multi_test(void)
>  	if (test__start_subtest("link_api_pattern"))
>  		test_link_api_pattern();
>  	if (test__start_subtest("link_api_ids"))
> -		test_link_api_ids();
> +		test_link_api_ids(false);
>  	if (test__start_subtest("module_skel_api"))
>  		test_module_skel_api();
>  	if (test__start_subtest("module_link_api_pattern"))
> @@ -446,4 +463,6 @@ void test_tracing_multi_test(void)
>  		test_module_link_api_ids();
>  	if (test__start_subtest("intersect"))
>  		test_intersect();
> +	if (test__start_subtest("cookies"))
> +		test_link_api_ids(true);
>  }
> diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_check.c b/tools/testing/selftests/bpf/progs/tracing_multi_check.c
> index 0e3248312dd5..e6047d5a078a 100644
> --- a/tools/testing/selftests/bpf/progs/tracing_multi_check.c
> +++ b/tools/testing/selftests/bpf/progs/tracing_multi_check.c
> @@ -7,6 +7,7 @@
>  char _license[] SEC("license") = "GPL";
>  
>  int pid = 0;
> +bool test_cookies = false;
>  
>  extern const void bpf_fentry_test1 __ksym;
>  extern const void bpf_fentry_test2 __ksym;
> @@ -28,7 +29,7 @@ extern const void bpf_testmod_fentry_test11 __ksym;
>  void tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return)
>  {
>  	void *ip = (void *) bpf_get_func_ip(ctx);
> -	__u64 value = 0, ret = 0;
> +	__u64 value = 0, ret = 0, cookie = 0;
>  	long err = 0;
>  
>  	if (bpf_get_current_pid_tgid() >> 32 != pid)
> @@ -36,6 +37,8 @@ void tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return)
>  
>  	if (is_return)
>  		err |= bpf_get_func_ret(ctx, &ret);
> +	if (test_cookies)
> +		cookie = test_cookies ? bpf_get_attach_cookie(ctx) : 0;
                         ^ dup test_cookies check ? Can drop this one.

Thanks,
Leon

[...]


^ permalink raw reply

* Re: [PATCHv3 bpf-next 19/24] selftests/bpf: Add tracing multi intersect tests
From: Leon Hwang @ 2026-03-17  3:05 UTC (permalink / raw)
  To: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
	Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260316075138.465430-20-jolsa@kernel.org>

On 16/3/26 15:51, Jiri Olsa wrote:
> Adding tracing multi tests for intersecting attached functions.
> 
> Using bits from (from 1 to 16 values) to specify (up to 4) attached
> programs, and randomly choosing bpf_fentry_test* functions they are
> attached to.
> 
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  tools/testing/selftests/bpf/Makefile          |  4 +-
>  .../selftests/bpf/prog_tests/tracing_multi.c  | 99 +++++++++++++++++++
>  .../progs/tracing_multi_intersect_attach.c    | 42 ++++++++
>  3 files changed, 144 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c
> 
> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> index cf01a11d7803..e56e213441d8 100644
> --- a/tools/testing/selftests/bpf/Makefile
> +++ b/tools/testing/selftests/bpf/Makefile
> @@ -486,7 +486,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
>  		linked_vars.skel.h linked_maps.skel.h 			\
>  		test_subskeleton.skel.h test_subskeleton_lib.skel.h	\
>  		test_usdt.skel.h tracing_multi.skel.h			\
> -		tracing_multi_module.skel.h
> +		tracing_multi_module.skel.h				\
> +		tracing_multi_intersect.skel.h
>  
>  LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c 	\
>  	core_kern.c core_kern_overflow.c test_ringbuf.c			\
> @@ -514,6 +515,7 @@ xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o
>  xdp_features.skel.h-deps := xdp_features.bpf.o
>  tracing_multi.skel.h-deps := tracing_multi_attach.bpf.o tracing_multi_check.bpf.o
>  tracing_multi_module.skel.h-deps := tracing_multi_attach_module.bpf.o tracing_multi_check.bpf.o
> +tracing_multi_intersect.skel.h-deps := tracing_multi_intersect_attach.bpf.o tracing_multi_check.bpf.o
>  
>  LINKED_BPF_OBJS := $(foreach skel,$(LINKED_SKELS),$($(skel)-deps))
>  LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(LINKED_BPF_OBJS))
> diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> index e9042d8d4760..b7818f438d6e 100644
> --- a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> @@ -6,6 +6,7 @@
>  #include "bpf/libbpf_internal.h"
>  #include "tracing_multi.skel.h"
>  #include "tracing_multi_module.skel.h"
> +#include "tracing_multi_intersect.skel.h"
>  #include "trace_helpers.h"
>  
>  static const char * const bpf_fentry_test[] = {
> @@ -31,6 +32,20 @@ static const char * const bpf_testmod_fentry_test[] = {
>  
>  #define FUNCS_CNT (ARRAY_SIZE(bpf_fentry_test))
>  
> +static int get_random_funcs(const char **funcs)
> +{
> +	int i, cnt = 0;
> +
> +	for (i = 0; i < FUNCS_CNT; i++) {
> +		if (rand() % 2)
                    ^ srand() is missing for rand() ?

> +			funcs[cnt++] = bpf_fentry_test[i];
> +	}
> +	/* we always need at least one.. */
> +	if (!cnt)
> +		funcs[cnt++] = bpf_fentry_test[rand() % FUNCS_CNT];
> +	return cnt;
> +}
> +
>  static int compare(const void *ppa, const void *ppb)
>  {
>  	const char *pa = *(const char **) ppa;
> @@ -328,6 +343,88 @@ static void test_module_link_api_ids(void)
>  	free(ids);
>  }
>  
> +static bool is_set(__u32 mask, __u32 bit)
> +{
> +	return (1 << bit) & mask;
> +}
> +
> +static void __test_intersect(__u32 mask, const struct bpf_program *progs[4], __u64 *test_results[4])
> +{
> +	LIBBPF_OPTS(bpf_tracing_multi_opts, opts);
> +	LIBBPF_OPTS(bpf_test_run_opts, topts);
> +	struct bpf_link *links[4] = { NULL };
> +	const char *funcs[FUNCS_CNT];
> +	__u64 expected[4];
> +	__u32 *ids, i;
> +	int err, cnt;
> +
> +	/*
> +	 * We have 4 programs in progs and the mask bits pick which
> +	 * of them gets attached to randomly chosen functions.
> +	 */
> +	for (i = 0; i < 4; i++) {
> +		if (!is_set(mask, i))
> +			continue;
> +
> +		cnt = get_random_funcs(funcs);
> +		ids = get_ids(funcs, cnt, NULL);
> +		if (!ASSERT_OK_PTR(ids, "get_ids"))
> +			goto cleanup;
> +
> +		opts.ids = ids;
> +		opts.cnt = cnt;
> +		links[i] = bpf_program__attach_tracing_multi(progs[i], NULL, &opts);
> +		free(ids);
> +
> +		if (!ASSERT_OK_PTR(links[i], "bpf_program__attach_tracing_multi"))
> +			goto cleanup;
> +
> +		expected[i] = *test_results[i] + cnt;
> +	}
> +
> +	err = bpf_prog_test_run_opts(bpf_program__fd(progs[0]), &topts);
> +	ASSERT_OK(err, "test_run");
> +
> +	for (i = 0; i < 4; i++) {
> +		if (!is_set(mask, i))
> +			continue;
> +		ASSERT_EQ(*test_results[i], expected[i], "test_results");
> +	}
> +
> +cleanup:
> +	for (i = 0; i < 4; i++)
> +		bpf_link__destroy(links[i]);
> +}
> +
> +static void test_intersect(void)
> +{
> +	struct tracing_multi_intersect *skel;
> +	const struct bpf_program *progs[4];
> +	__u64 *test_results[4];
> +	__u32 i;
> +
> +	skel = tracing_multi_intersect__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "tracing_multi_intersect__open_and_load"))
> +		return;
> +
> +	skel->bss->pid = getpid();
> +
> +	progs[0] = skel->progs.fentry_1;
> +	progs[1] = skel->progs.fexit_1;
> +	progs[2] = skel->progs.fentry_2;
> +	progs[3] = skel->progs.fexit_2;
> +
> +	test_results[0] = &skel->bss->test_result_fentry_1;
> +	test_results[1] = &skel->bss->test_result_fexit_1;
> +	test_results[2] = &skel->bss->test_result_fentry_2;
> +	test_results[3] = &skel->bss->test_result_fexit_2;
> +
> +	for (i = 1; i < 16; i++)
> +		__test_intersect(i, progs, test_results);
> +
> +	tracing_multi_intersect__destroy(skel);
> +}
> +
>  void test_tracing_multi_test(void)
>  {
>  #ifndef __x86_64__
> @@ -347,4 +444,6 @@ void test_tracing_multi_test(void)
>  		test_module_link_api_pattern();
>  	if (test__start_subtest("module_link_api_ids"))
>  		test_module_link_api_ids();
> +	if (test__start_subtest("intersect"))
> +		test_intersect();
>  }
> diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c b/tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c
> new file mode 100644
> index 000000000000..b8aecbf44093
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c
> @@ -0,0 +1,42 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <stdbool.h>
> +#include <linux/bpf.h>
NIT:         ^ vmlinux.h is better than stdbool.h + bpf.h.

Thanks,
Leon

> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +__hidden extern void tracing_multi_arg_check(__u64 *ctx, __u64 *test_result, bool is_return);
> +
> +__u64 test_result_fentry_1 = 0;
> +__u64 test_result_fentry_2 = 0;
> +__u64 test_result_fexit_1 = 0;
> +__u64 test_result_fexit_2 = 0;
> +
> +SEC("fentry.multi")
> +int BPF_PROG(fentry_1)
> +{
> +	tracing_multi_arg_check(ctx, &test_result_fentry_1, false);
> +	return 0;
> +}
> +
> +SEC("fentry.multi")
> +int BPF_PROG(fentry_2)
> +{
> +	tracing_multi_arg_check(ctx, &test_result_fentry_2, false);
> +	return 0;
> +}
> +
> +SEC("fexit.multi")
> +int BPF_PROG(fexit_1)
> +{
> +	tracing_multi_arg_check(ctx, &test_result_fexit_1, true);
> +	return 0;
> +}
> +
> +SEC("fexit.multi")
> +int BPF_PROG(fexit_2)
> +{
> +	tracing_multi_arg_check(ctx, &test_result_fexit_2, true);
> +	return 0;
> +}


^ permalink raw reply

* Re: [PATCHv3 bpf-next 17/24] selftests/bpf: Add tracing multi skel/pattern/ids attach tests
From: Leon Hwang @ 2026-03-17  3:04 UTC (permalink / raw)
  To: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
	Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260316075138.465430-18-jolsa@kernel.org>

On 16/3/26 15:51, Jiri Olsa wrote:
> Adding tests for tracing_multi link attachment via all possible
> libbpf apis - skeleton, function pattern and btf ids.
> 
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  tools/testing/selftests/bpf/Makefile          |   3 +-
>  .../selftests/bpf/prog_tests/tracing_multi.c  | 245 ++++++++++++++++++
>  .../bpf/progs/tracing_multi_attach.c          |  40 +++
>  .../selftests/bpf/progs/tracing_multi_check.c | 150 +++++++++++
>  4 files changed, 437 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/tracing_multi.c
>  create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_attach.c
>  create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_check.c
> 
> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> index 869b582b1d1f..e09beba5674e 100644
> --- a/tools/testing/selftests/bpf/Makefile
> +++ b/tools/testing/selftests/bpf/Makefile
> @@ -485,7 +485,7 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
>  LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
>  		linked_vars.skel.h linked_maps.skel.h 			\
>  		test_subskeleton.skel.h test_subskeleton_lib.skel.h	\
> -		test_usdt.skel.h
> +		test_usdt.skel.h tracing_multi.skel.h
>  
>  LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c 	\
>  	core_kern.c core_kern_overflow.c test_ringbuf.c			\
> @@ -511,6 +511,7 @@ test_usdt.skel.h-deps := test_usdt.bpf.o test_usdt_multispec.bpf.o
>  xsk_xdp_progs.skel.h-deps := xsk_xdp_progs.bpf.o
>  xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o
>  xdp_features.skel.h-deps := xdp_features.bpf.o
> +tracing_multi.skel.h-deps := tracing_multi_attach.bpf.o tracing_multi_check.bpf.o
>  
>  LINKED_BPF_OBJS := $(foreach skel,$(LINKED_SKELS),$($(skel)-deps))
>  LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(LINKED_BPF_OBJS))
> diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> new file mode 100644
> index 000000000000..cebf4eb68f18
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi.c
> @@ -0,0 +1,245 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <test_progs.h>
> +#include <bpf/btf.h>
> +#include <search.h>
> +#include "bpf/libbpf_internal.h"
> +#include "tracing_multi.skel.h"
> +#include "trace_helpers.h"
> +
> +static const char * const bpf_fentry_test[] = {
> +	"bpf_fentry_test1",
> +	"bpf_fentry_test2",
> +	"bpf_fentry_test3",
> +	"bpf_fentry_test4",
> +	"bpf_fentry_test5",
> +	"bpf_fentry_test6",
> +	"bpf_fentry_test7",
> +	"bpf_fentry_test8",
> +	"bpf_fentry_test9",
> +	"bpf_fentry_test10",
> +};
> +
> +#define FUNCS_CNT (ARRAY_SIZE(bpf_fentry_test))
> +
> +static int compare(const void *ppa, const void *ppb)
> +{
> +	const char *pa = *(const char **) ppa;
> +	const char *pb = *(const char **) ppb;
> +
> +	return strcmp(pa, pb);
> +}
> +
> +static __u32 *get_ids(const char * const funcs[], int funcs_cnt, const char *mod)
> +{
> +	struct btf *btf, *vmlinux_btf;
> +	__u32 nr, type_id, cnt = 0;
> +	void *root = NULL;
> +	__u32 *ids = NULL;
> +	int i, err = 0;
> +
> +	btf = btf__load_vmlinux_btf();
> +	if (!ASSERT_OK_PTR(btf, "btf__load_vmlinux_btf"))
> +		return NULL;
> +
> +	if (mod) {
> +		vmlinux_btf = btf;
> +		btf = btf__load_module_btf(mod, vmlinux_btf);
> +		if (!ASSERT_OK_PTR(btf, "btf__load_module_btf"))
> +			return NULL;
                        ^ vmlinux_btf does not get released.

> +	}
> +
> +	ids = calloc(funcs_cnt, sizeof(ids[0]));
> +	if (!ids)
> +		goto out;
> +
> +	/*
> +	 * We sort function names by name and search them
> +	 * below for each function.
> +	 */
> +	for (i = 0; i < funcs_cnt; i++)
> +		tsearch(&funcs[i], &root, compare);
                ^ tdestroy() is missing to free tree nodes?

Thanks,
Leon

[...]


^ permalink raw reply

* Re: [PATCH v6 14/17] lib/bootconfig: narrow offset type in xbc_init_node()
From: Masami Hiramatsu @ 2026-03-17  0:55 UTC (permalink / raw)
  To: Josh Law; +Cc: Andrew Morton, Steven Rostedt, linux-kernel, linux-trace-kernel
In-Reply-To: <20260315122015.55965-15-objecting@objecting.org>

On Sun, 15 Mar 2026 12:20:12 +0000
Josh Law <objecting@objecting.org> wrote:

>   lib/bootconfig.c:415:32: warning: conversion to 'long unsigned int'
>   from 'long int' may change the sign of the result [-Wsign-conversion]
> 
> Pointer subtraction yields ptrdiff_t (signed long), which was stored in
> unsigned long.  The offset is immediately checked against XBC_DATA_MAX
> (32767) and then truncated to uint16_t, so unsigned int is sufficient.
> Add an explicit cast on the subtraction to suppress the sign-conversion
> warning.
> 
> Signed-off-by: Josh Law <objecting@objecting.org>
> ---
>  lib/bootconfig.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/lib/bootconfig.c b/lib/bootconfig.c
> index 995c2ec94cbe..7296df003459 100644
> --- a/lib/bootconfig.c
> +++ b/lib/bootconfig.c
> @@ -412,7 +412,7 @@ const char * __init xbc_node_find_next_key_value(struct xbc_node *root,
>  
>  static int __init xbc_init_node(struct xbc_node *node, char *data, uint16_t flag)
>  {
> -	unsigned long offset = data - xbc_data;
> +	unsigned int offset = (unsigned int)(data - xbc_data);
>  
>  	if (WARN_ON(offset >= XBC_DATA_MAX))

OK, then this can be changed to

	long offset = data - xbc_data;

	if (WARN_ON(offset < 0 || offset >= XBC_DATA_MAX))

The original code is to handle data < xbc_data case (in that
case, the offset is over LONG_MAX, so offset >= XBC_DATA_MAX
is also true.) Note that this is for catching broken pointer
to find program bug (WARN_ON is used for such case).

Thank you,

>  		return -EINVAL;
> -- 
> 2.34.1
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH v6 12/17] lib/bootconfig: fix signed comparison in xbc_node_get_data()
From: Masami Hiramatsu @ 2026-03-16 23:57 UTC (permalink / raw)
  To: Josh Law; +Cc: Andrew Morton, Steven Rostedt, linux-kernel, linux-trace-kernel
In-Reply-To: <20260315122015.55965-13-objecting@objecting.org>

On Sun, 15 Mar 2026 12:20:10 +0000
Josh Law <objecting@objecting.org> wrote:

>   lib/bootconfig.c:188:28: warning: comparison of integer expressions
>   of different signedness: 'int' and 'size_t' [-Wsign-compare]
> 
> The local variable 'offset' is declared as int, but xbc_data_size is
> size_t.  Using ~XBC_VALUE as the mask also involves integer promotion
> rules that obscure intent.
> 
> Change the type to unsigned int and mask with XBC_DATA_MAX (which is
> the 15-bit data mask) instead of ~XBC_VALUE, making the expression
> self-documenting and eliminating the signed/unsigned comparison.

Please follow the warning message and use size_t instead.

Thanks,

> 
> Signed-off-by: Josh Law <objecting@objecting.org>
> ---
>  lib/bootconfig.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/lib/bootconfig.c b/lib/bootconfig.c
> index 182d9d9bc5a6..806a8f038d24 100644
> --- a/lib/bootconfig.c
> +++ b/lib/bootconfig.c
> @@ -183,7 +183,7 @@ struct xbc_node * __init xbc_node_get_next(struct xbc_node *node)
>   */
>  const char * __init xbc_node_get_data(struct xbc_node *node)
>  {
> -	int offset = node->data & ~XBC_VALUE;
> +	unsigned int offset = node->data & XBC_DATA_MAX;
>  
>  	if (WARN_ON(offset >= xbc_data_size))
>  		return NULL;
> -- 
> 2.34.1
> 
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH v9 0/4] ring-buffer: Making persistent ring buffers robust
From: Masami Hiramatsu @ 2026-03-16 23:21 UTC (permalink / raw)
  To: Masami Hiramatsu (Google)
  Cc: Steven Rostedt, Mathieu Desnoyers, linux-kernel,
	linux-trace-kernel, Ian Rogers
In-Reply-To: <177319273059.130641.10882692460536780093.stgit@mhiramat.tok.corp.google.com>

On Wed, 11 Mar 2026 10:32:11 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:

> Hi,
> 
> Here is the 9th version of improvement patches for making persistent
> ring buffers robust to failures.
> The previous version is here:
> 
> https://lore.kernel.org/all/177303264034.767813.5345788067082238396.stgit@mhiramat.tok.corp.google.com/
> 
> In this version, I fixed bugs/typos in [2/4][3/4] and add a bugfix patch
> [1/4] and a test[4/4]. Also, add a meta->subbuf_size validation[3/4].

Hmm, the test case fails if rewinding happens, because the 
data_page validation failed in rewinding and stop rewinding.
The test may need to be designed more carefully.
Others looks good to me.

Thanks,

> 
> Thank you,
> 
> ---
> 
> Masami Hiramatsu (Google) (4):
>       ring-buffer: Fix to update per-subbuf entries of persistent ring buffer
>       ring-buffer: Flush and stop persistent ring buffer on panic
>       ring-buffer: Skip invalid sub-buffers when validating persistent ring buffer
>       ring-buffer: Add persistent ring buffer selftest
> 
> 
>  arch/alpha/include/asm/Kbuild        |    1 
>  arch/arc/include/asm/Kbuild          |    1 
>  arch/arm/include/asm/Kbuild          |    1 
>  arch/arm64/include/asm/ring_buffer.h |   10 ++
>  arch/csky/include/asm/Kbuild         |    1 
>  arch/hexagon/include/asm/Kbuild      |    1 
>  arch/loongarch/include/asm/Kbuild    |    1 
>  arch/m68k/include/asm/Kbuild         |    1 
>  arch/microblaze/include/asm/Kbuild   |    1 
>  arch/mips/include/asm/Kbuild         |    1 
>  arch/nios2/include/asm/Kbuild        |    1 
>  arch/openrisc/include/asm/Kbuild     |    1 
>  arch/parisc/include/asm/Kbuild       |    1 
>  arch/powerpc/include/asm/Kbuild      |    1 
>  arch/riscv/include/asm/Kbuild        |    1 
>  arch/s390/include/asm/Kbuild         |    1 
>  arch/sh/include/asm/Kbuild           |    1 
>  arch/sparc/include/asm/Kbuild        |    1 
>  arch/um/include/asm/Kbuild           |    1 
>  arch/x86/include/asm/Kbuild          |    1 
>  arch/xtensa/include/asm/Kbuild       |    1 
>  include/asm-generic/ring_buffer.h    |   13 +++
>  include/linux/ring_buffer.h          |    1 
>  kernel/trace/Kconfig                 |   15 +++
>  kernel/trace/ring_buffer.c           |  169 ++++++++++++++++++++++++++--------
>  kernel/trace/trace.c                 |    4 +
>  26 files changed, 192 insertions(+), 40 deletions(-)
>  create mode 100644 arch/arm64/include/asm/ring_buffer.h
>  create mode 100644 include/asm-generic/ring_buffer.h
> 
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH 46/61] vfio: Prefer IS_ERR_OR_NULL over manual NULL check
From: Alex Williamson @ 2026-03-16 22:10 UTC (permalink / raw)
  To: Philipp Hahn
  Cc: amd-gfx, apparmor, bpf, ceph-devel, cocci, dm-devel, dri-devel,
	gfs2, intel-gfx, intel-wired-lan, iommu, kvm, linux-arm-kernel,
	linux-block, linux-bluetooth, linux-btrfs, linux-cifs, linux-clk,
	linux-erofs, linux-ext4, linux-fsdevel, linux-gpio, linux-hyperv,
	linux-input, linux-kernel, linux-leds, linux-media, linux-mips,
	linux-mm, linux-modules, linux-mtd, linux-nfs, linux-omap,
	linux-phy, linux-pm, linux-rockchip, linux-s390, linux-scsi,
	linux-sctp, linux-security-module, linux-sh, linux-sound,
	linux-stm32, linux-trace-kernel, linux-usb, linux-wireless,
	netdev, ntfs3, samba-technical, sched-ext, target-devel,
	tipc-discussion, v9fs, alex
In-Reply-To: <20260310-b4-is_err_or_null-v1-46-bd63b656022d@avm.de>

On Tue, 10 Mar 2026 12:49:12 +0100
Philipp Hahn <phahn-oss@avm.de> wrote:

> Prefer using IS_ERR_OR_NULL() over using IS_ERR() and a manual NULL
> check.
> 
> Change generated with coccinelle.
> 
> To: Alex Williamson <alex@shazbot.org>
> Cc: kvm@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Philipp Hahn <phahn-oss@avm.de>
> ---
>  drivers/vfio/vfio_main.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index 742477546b15d4dbaf9ebcfb2e67627db71521e0..d71922dfde5885967398deddec3e9e04b05adfec 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -923,7 +923,7 @@ vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
>  
>  	/* Handle the VFIO_DEVICE_FEATURE_SET */
>  	filp = device->mig_ops->migration_set_state(device, mig.device_state);
> -	if (IS_ERR(filp) || !filp)
> +	if (IS_ERR_OR_NULL(filp))
>  		goto out_copy;
>  
>  	return vfio_ioct_mig_return_fd(filp, arg, &mig);
> 

As others have expressed in general, this doesn't seem to be cleaner
and tends to mask that we consider IS_ERR() and NULL as separate cases
in the goto.  This code looks like it could use some refactoring, and
likely that refactoring should handle the IS_ERR() and NULL cases
separately, but conflating them here is not an improvement.  Thanks,

Alex

^ permalink raw reply

* Re: [PATCHv3 bpf-next 02/24] bpf: Use mutex lock pool for bpf trampolines
From: Jiri Olsa @ 2026-03-16 21:16 UTC (permalink / raw)
  To: bot+bpf-ci
  Cc: ast, daniel, andrii, bpf, linux-trace-kernel, kafai, eddyz87,
	songliubraving, yhs, menglong8.dong, rostedt, martin.lau,
	yonghong.song, clm, ihor.solodrai
In-Reply-To: <31ae46274a3157f2b9840e1a09b2698d1ec0cfd461737ff460c2d3349a9f0054@mail.kernel.org>

On Mon, Mar 16, 2026 at 08:35:14AM +0000, bot+bpf-ci@kernel.org wrote:
> > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> > index f02254a21585..9923703a1544 100644
> > --- a/kernel/bpf/trampoline.c
> > +++ b/kernel/bpf/trampoline.c
> 
> [ ... ]
> 
> > +/*
> > + * We keep 32 trampoline locks (5 bits) in the pool, because there
> > + * is 48 (MAX_LOCK_DEPTH) locks limit allowed to be simultaneously
> > + * held by task.
> > + */
> > +#define TRAMPOLINE_LOCKS_BITS 5
> > +#define TRAMPOLINE_LOCKS_TABLE_SIZE (1 << TRAMPOLINE_LOCKS_BITS)
> > +
> > +static struct {
> > +	struct mutex mutex;
> > +	struct lock_class_key key;
> > +} trampoline_locks[TRAMPOLINE_LOCKS_TABLE_SIZE];
> 
> During v1 review, Alexei asked to add a comment explaining both the
> pool size of 32 and why per-lock class keys are needed:
> 
> https://lore.kernel.org/bpf/CAADnVQL_Jpe_7a55HTn5CyYoxWcHahiK3+CDeOeURqZAuf+teA@mail.gmail.com/
> 
> The comment explains the 32 count (MAX_LOCK_DEPTH limit), but does
> it also need to mention why each lock has its own lock_class_key?
> Without that, it is not obvious that distinct keys are required to
> avoid lockdep "recursive locking" warnings when
> trampoline_lock_all() acquires all 32 pool mutexes simultaneously.

yep, will add

jirka

> 
> 
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
> 
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/23133791558


^ permalink raw reply

* Re: [PATCHv3 bpf-next 08/24] bpf: Add bpf_trampoline_multi_attach/detach functions
From: Jiri Olsa @ 2026-03-16 21:16 UTC (permalink / raw)
  To: bot+bpf-ci
  Cc: ast, daniel, andrii, bpf, linux-trace-kernel, kafai, eddyz87,
	songliubraving, yhs, menglong8.dong, rostedt, martin.lau,
	yonghong.song, clm, ihor.solodrai
In-Reply-To: <d97442169a77b667c7f12664cd684f51a8914d4e2c05ef64344aacaae11c3415@mail.kernel.org>

On Mon, Mar 16, 2026 at 08:35:15AM +0000, bot+bpf-ci@kernel.org wrote:
> > The detach works as follows:
> > - we lock all the needed trampolines
> > - remove the program from each (__bpf_trampoline_unlink_prog)
> > - the trampoline_multi_ops passed in __bpf_trampoline_link_prog gathers
> >   ftrace_hash (ip -> trampoline) objects
> 
> The detach description references __bpf_trampoline_link_prog, but the
> detach path calls __bpf_trampoline_unlink_prog.

ack

> 
> > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> > index d55651b13511..9331cca8c0b4 100644
> > --- a/kernel/bpf/trampoline.c
> > +++ b/kernel/bpf/trampoline.c
> 
> [ ... ]
> 
> > +static int modify_fentry_multi(struct bpf_trampoline *tr, u32 orig_flags, void *old_addr,
> > +			       void *new_addr, bool lock_direct_mutex, void *ptr)
> > +{
> > +	unsigned long addr = (unsigned long) new_addr;
> > +	unsigned long ip = ftrace_location(tr->ip);
> > +	struct fentry_multi_data *data = ptr;
> > +
> > +	if (bpf_trampoline_use_jmp(tr->flags))
> > +		addr = ftrace_jmp_set(addr);
> > +	return add_ftrace_hash_entry_direct(data->modify, ip, addr) ? 0 : -ENOMEM;
> > +}
> 
> Unlike the standard modify_fentry() which calls direct_ops_mod() ->
> update_ftrace_direct_mod() to update ftrace immediately,
> modify_fentry_multi() only adds a hash entry.  The actual ftrace
> update is deferred until after the link loop.
> 
> This matters because bpf_trampoline_update() unconditionally calls
> bpf_tramp_image_put() on the old cur_image after a successful
> modify_fentry callback:
> 
>     bpf_trampoline_update() {
>         ...
>         err = ops->modify_fentry(tr, ...);
>         ...
>         if (tr->cur_image)
>             bpf_tramp_image_put(tr->cur_image);
>         tr->cur_image = im;
>     }
> 
> With the standard ops, the old image is already unreachable when put.
> With multi ops, ftrace still points to the old image when it is put.
> bpf_tramp_image_put() text-pokes the old image to skip fexit programs
> and starts deferred freeing.
> 
> In the success path, the window between image_put and the batched
> update_ftrace_direct_mod is brief (bounded by trampoline_lock_all).
> 
> However, in the rollback path below, ftrace is never updated at all:
> 
> [ ... ]
> 
> > +	trampoline_lock_all();
> > +
> > +	for (i = 0; i < cnt; i++) {
> > +		mnode = &link->nodes[i];
> > +		err = __bpf_trampoline_link_prog(&mnode->node, mnode->trampoline, NULL,
> > +						 &trampoline_multi_ops, &data);
> > +		if (err)
> > +			goto rollback_unlink;
> > +	}
> 
> [ ... ]
> 
> > +rollback_unlink:
> > +	for (j = 0; j < i; j++) {
> > +		mnode = &link->nodes[j];
> > +		WARN_ON_ONCE(__bpf_trampoline_unlink_prog(&mnode->node, mnode->trampoline,
> > +					NULL, &trampoline_multi_ops, &data));
> > +	}
> > +	trampoline_unlock_all();
> > +
> > +	i = cnt;
> 
> If a trampoline at index 0 already had programs attached through the
> standard single-trampoline path, and __bpf_trampoline_link_prog
> succeeds for it but fails for a later trampoline, the rollback path
> restores tr->cur_image to a correct new image but never calls
> update_ftrace_direct_mod.
> 
> At that point, ftrace still routes calls through the old image that
> was put during the link phase.  bpf_tramp_image_put() has already
> patched that old image to skip fexit programs and started the deferred
> freeing sequence (percpu_ref_kill).
> 
> Could this leave existing fexit/fmod_ret programs on that trampoline
> non-functional?  And once the percpu_ref drains during a quiet period,
> would the old image be freed while ftrace still references it?

good catch, we need the update_ftrace_direct_mod call after the unlink,
I changed the rollback test to hit the described issue, will fix

> 
> [ ... ]
> 
> > +int bpf_trampoline_multi_detach(struct bpf_prog *prog, struct bpf_tracing_multi_link *link)
> > +{
> > +	struct bpf_tracing_multi_node *mnode;
> > +	struct fentry_multi_data data = {};
> > +	int i, cnt = link->nodes_cnt;
> > +
> > +	data.unreg  = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
> > +	data.modify = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
> > +
> > +	if (!data.unreg || !data.modify) {
> > +		free_fentry_multi_data(&data);
> > +		return -ENOMEM;
> > +	}
> 
> bpf_trampoline_multi_detach can return -ENOMEM here without detaching
> anything.  In e80a839e3d2c ("bpf: Add support for tracing multi
> link"), this is called from bpf_tracing_multi_link_release():
> 
>     WARN_ON_ONCE(bpf_trampoline_multi_detach(link->prog, tr_link));
> 
> If detach fails, the programs remain linked to the trampolines via
> mnode->node.tramp_hlist, but the link (and its embedded nodes array)
> is freed by bpf_tracing_multi_link_dealloc().  Would this leave
> dangling hlist entries in each trampoline's progs_hlist?

not sure there's anything useful we could do if allocation fails

jirka

^ permalink raw reply

* Re: [PATCHv3 bpf-next 14/24] libbpf: Add bpf_link_create support for tracing_multi link
From: Jiri Olsa @ 2026-03-16 21:16 UTC (permalink / raw)
  To: bot+bpf-ci
  Cc: ast, daniel, andrii, bpf, linux-trace-kernel, kafai, eddyz87,
	songliubraving, yhs, menglong8.dong, rostedt, martin.lau,
	yonghong.song, clm, ihor.solodrai
In-Reply-To: <a34d2de2bc6db3cad91a50bb298bd687fa6ffa8701a14d6e7a07a29e7c34ffc1@mail.kernel.org>

On Mon, Mar 16, 2026 at 08:35:10AM +0000, bot+bpf-ci@kernel.org wrote:
> > diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
> > --- a/tools/lib/bpf/bpf.h
> > +++ b/tools/lib/bpf/bpf.h
> > @@ -454,6 +454,11 @@ struct bpf_link_create_opts {
> >  			__u64 expected_revision;
> >  		} cgroup;
> > +		struct {
> > +			__u32 *ids;
> > +			__u64 *cookies;
> > +			__u32 cnt;
> > +		} tracing_multi;
> >  	};
> 
> Should the ids and cookies pointers be const-qualified here?
> Every other multi-attach member in this union uses const for
> its pointer fields, for example in kprobe_multi:
> 
>     const char **syms;
>     const unsigned long *addrs;
>     const __u64 *cookies;
> 
> and in uprobe_multi:
> 
>     const unsigned long *offsets;
>     const unsigned long *ref_ctr_offsets;
>     const __u64 *cookies;
> 
> These arrays are input-only (passed straight through to the
> kernel via ptr_to_u64), so marking them const would be
> consistent with the existing convention and would avoid
> compiler warnings for callers passing const arrays.

yep, will make it const

jirka

> 
> 
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
> 
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/23133791558


^ permalink raw reply

* Re: [PATCHv3 bpf-next 16/24] libbpf: Add support to create tracing multi link
From: Jiri Olsa @ 2026-03-16 21:16 UTC (permalink / raw)
  To: bot+bpf-ci
  Cc: ast, daniel, andrii, bpf, linux-trace-kernel, kafai, eddyz87,
	songliubraving, yhs, menglong8.dong, rostedt, martin.lau,
	yonghong.song, clm, ihor.solodrai
In-Reply-To: <46d7ff451871969354eeff0215fec41d41f1587deb6b0944f8f38184b715e2f3@mail.kernel.org>

On Mon, Mar 16, 2026 at 08:35:12AM +0000, bot+bpf-ci@kernel.org wrote:
> > The provided <pattern> is used as 'pattern' argument in
> > bpf_program__attach_kprobe_multi_opts function.
> 
> Should this say bpf_program__attach_tracing_multi instead of
> bpf_program__attach_kprobe_multi_opts?  The auto-attach path in
> attach_tracing_multi() calls bpf_program__attach_tracing_multi(),
> not the kprobe variant.

no, it's a refference to similar kprobe function

jirka

> 
> 
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
> 
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/23133791558


^ permalink raw reply

* Re: [PATCH v3] tracing: Generate undef symbols allowlist for simple_ring_buffer
From: Arnd Bergmann @ 2026-03-16 20:48 UTC (permalink / raw)
  To: Vincent Donnefort, Marc Zyngier
  Cc: Steven Rostedt, Nathan Chancellor, linux-trace-kernel, kvmarm,
	kernel-team
In-Reply-To: <6d40c8c0-f00a-4031-bd94-7c0a417eb0cf@app.fastmail.com>

On Mon, Mar 16, 2026, at 21:47, Arnd Bergmann wrote:
>
> This needs "__kmsan" as well, for these symbols:
>

"__msan" of course, not "__kmsan".

^ permalink raw reply

* Re: [PATCH v3] tracing: Generate undef symbols allowlist for simple_ring_buffer
From: Arnd Bergmann @ 2026-03-16 20:47 UTC (permalink / raw)
  To: Vincent Donnefort, Marc Zyngier
  Cc: Steven Rostedt, Nathan Chancellor, linux-trace-kernel, kvmarm,
	kernel-team
In-Reply-To: <20260316092845.3367411-1-vdonnefort@google.com>

On Mon, Mar 16, 2026, at 10:28, Vincent Donnefort wrote:
> Compiler and tooling-generated symbols are difficult to maintain
> across all supported architectures. Make the allowlist more robust by
> replacing the harcoded list with a mechanism that automatically detects
> these symbols.
>
> This mechanism generates a C function designed to trigger common
> compiler-inserted symbols.
>
> Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
> Reviewed-by: Nathan Chancellor <nathan@kernel.org>
> Tested-by: Nathan Chancellor <nathan@kernel.org>

Tested-by: Arnd Bergmann <arnd@arndb.de>

A few hundred randconfig builds in, I came across a single build failure
that you missed:

> +UNDEFINED_ALLOWLIST = __asan __gcov __kasan __kcsan __hwasan __sancov 
> __sanitizer __tsan __ubsan __x86_indirect_thunk \
> +		      simple_ring_buffer \

This needs "__kmsan" as well, for these symbols:

                 U __msan_chain_origin
                 U __msan_get_context_state
                 U __msan_instrument_asm_store
                 U __msan_metadata_ptr_for_load_4
                 U __msan_metadata_ptr_for_load_8
                 U __msan_metadata_ptr_for_store_4
                 U __msan_metadata_ptr_for_store_8
                 U __msan_warning

     Arnd

^ permalink raw reply

* Re: [PATCH v3 4/8] dma-mapping: Introduce DMA require coherency attribute
From: Leon Romanovsky @ 2026-03-16 20:39 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: Marek Szyprowski, Robin Murphy, Michael S. Tsirkin, Petr Tesarik,
	Jonathan Corbet, Shuah Khan, Jason Wang, Xuan Zhuo,
	Eugenio Pérez, Jason Gunthorpe, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers, Joerg Roedel, Will Deacon,
	Andrew Morton, iommu, linux-kernel, linux-doc, virtualization,
	linux-rdma, linux-trace-kernel, linux-mm
In-Reply-To: <659bd750-c67a-4290-8c2d-58bc13c9e2a6@infradead.org>

On Mon, Mar 16, 2026 at 12:17:39PM -0700, Randy Dunlap wrote:
> 
> 
> On 3/16/26 12:06 PM, Leon Romanovsky wrote:
> > diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst
> > index 48cfe86cc06d7..441bdc9d08318 100644
> > --- a/Documentation/core-api/dma-attributes.rst
> > +++ b/Documentation/core-api/dma-attributes.rst
> > @@ -163,3 +163,19 @@ data corruption.
> >  
> >  All mappings that share a cache line must set this attribute to suppress DMA
> >  debug warnings about overlapping mappings.
> > +
> > +DMA_ATTR_REQUIRE_COHERENT
> > +-------------------------
> > +
> > +DMA mapping requests with the DMA_ATTR_REQUIRE_COHERENT fail on any
> > +system where SWIOTLB or cache management is required. This should only
> > +be used to support uAPI designs that require continuous HW DMA
> > +coherence with userspace processes, for example RDMA and DRM. At a
> > +minimum the memory being mapped must be userspace memory from
> > +pin_user_pages() or similar.
> > +
> > +Drivers should consider using dma_mmap_pages() instead of this
> > +interface when building their uAPIs, when possible.
> > +
> > +It must never be used in an in-kernel driver that only works with
> > +kernal memory.
> 
>    kernel

Thanks, let's hope that it is the only one comment :).

> 
> -- 
> ~Randy
> 

^ permalink raw reply

* Re: [PATCH v3 4/8] dma-mapping: Introduce DMA require coherency attribute
From: Randy Dunlap @ 2026-03-16 19:17 UTC (permalink / raw)
  To: Leon Romanovsky, Marek Szyprowski, Robin Murphy,
	Michael S. Tsirkin, Petr Tesarik, Jonathan Corbet, Shuah Khan,
	Jason Wang, Xuan Zhuo, Eugenio Pérez, Jason Gunthorpe,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Joerg Roedel,
	Will Deacon, Andrew Morton
  Cc: iommu, linux-kernel, linux-doc, virtualization, linux-rdma,
	linux-trace-kernel, linux-mm
In-Reply-To: <20260316-dma-debug-overlap-v3-4-1dde90a7f08b@nvidia.com>



On 3/16/26 12:06 PM, Leon Romanovsky wrote:
> diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst
> index 48cfe86cc06d7..441bdc9d08318 100644
> --- a/Documentation/core-api/dma-attributes.rst
> +++ b/Documentation/core-api/dma-attributes.rst
> @@ -163,3 +163,19 @@ data corruption.
>  
>  All mappings that share a cache line must set this attribute to suppress DMA
>  debug warnings about overlapping mappings.
> +
> +DMA_ATTR_REQUIRE_COHERENT
> +-------------------------
> +
> +DMA mapping requests with the DMA_ATTR_REQUIRE_COHERENT fail on any
> +system where SWIOTLB or cache management is required. This should only
> +be used to support uAPI designs that require continuous HW DMA
> +coherence with userspace processes, for example RDMA and DRM. At a
> +minimum the memory being mapped must be userspace memory from
> +pin_user_pages() or similar.
> +
> +Drivers should consider using dma_mmap_pages() instead of this
> +interface when building their uAPIs, when possible.
> +
> +It must never be used in an in-kernel driver that only works with
> +kernal memory.

   kernel

-- 
~Randy


^ permalink raw reply

* [PATCH v3 8/8] mm/hmm: Indicate that HMM requires DMA coherency
From: Leon Romanovsky @ 2026-03-16 19:06 UTC (permalink / raw)
  To: Marek Szyprowski, Robin Murphy, Michael S. Tsirkin, Petr Tesarik,
	Jonathan Corbet, Shuah Khan, Jason Wang, Xuan Zhuo,
	Eugenio Pérez, Jason Gunthorpe, Leon Romanovsky,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Joerg Roedel,
	Will Deacon, Andrew Morton
  Cc: iommu, linux-kernel, linux-doc, virtualization, linux-rdma,
	linux-trace-kernel, linux-mm, Jason Gunthorpe
In-Reply-To: <20260316-dma-debug-overlap-v3-0-1dde90a7f08b@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

HMM is fundamentally about allowing a sophisticated device to perform DMA
directly to a process’s memory while the CPU accesses that same memory at
the same time. It is similar to SVA but does not rely on IOMMU support.
Because the entire model depends on concurrent access to shared memory, it
fails as a uAPI if SWIOTLB substitutes the memory or if the CPU caches are
not coherent with DMA.

Until now, there has been no reliable way to report this, and various
approximations have been used:

int hmm_dma_map_alloc(struct device *dev, struct hmm_dma_map *map,
                      size_t nr_entries, size_t dma_entry_size)
{
<...>
        /*
         * The HMM API violates our normal DMA buffer ownership rules and can't
         * transfer buffer ownership.  The dma_addressing_limited() check is a
         * best approximation to ensure no swiotlb buffering happens.
         */
        dma_need_sync = !dev->dma_skip_sync;
        if (dma_need_sync || dma_addressing_limited(dev))
                return -EOPNOTSUPP;

So let's mark mapped buffers with DMA_ATTR_REQUIRE_COHERENT attribute
to prevent silent data corruption if someone tries to use hmm in a system
with swiotlb or incoherent DMA

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 mm/hmm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index f6c4ddff4bd61..5955f2f0c83db 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -778,7 +778,7 @@ dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map,
 	struct page *page = hmm_pfn_to_page(pfns[idx]);
 	phys_addr_t paddr = hmm_pfn_to_phys(pfns[idx]);
 	size_t offset = idx * map->dma_entry_size;
-	unsigned long attrs = 0;
+	unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT;
 	dma_addr_t dma_addr;
 	int ret;
 
@@ -871,7 +871,7 @@ bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx)
 	struct dma_iova_state *state = &map->state;
 	dma_addr_t *dma_addrs = map->dma_list;
 	unsigned long *pfns = map->pfn_list;
-	unsigned long attrs = 0;
+	unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT;
 
 	if ((pfns[idx] & valid_dma) != valid_dma)
 		return false;

-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 5/8] dma-direct: prevent SWIOTLB path when DMA_ATTR_REQUIRE_COHERENT is set
From: Leon Romanovsky @ 2026-03-16 19:06 UTC (permalink / raw)
  To: Marek Szyprowski, Robin Murphy, Michael S. Tsirkin, Petr Tesarik,
	Jonathan Corbet, Shuah Khan, Jason Wang, Xuan Zhuo,
	Eugenio Pérez, Jason Gunthorpe, Leon Romanovsky,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Joerg Roedel,
	Will Deacon, Andrew Morton
  Cc: iommu, linux-kernel, linux-doc, virtualization, linux-rdma,
	linux-trace-kernel, linux-mm
In-Reply-To: <20260316-dma-debug-overlap-v3-0-1dde90a7f08b@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

DMA_ATTR_REQUIRE_COHERENT indicates that SWIOTLB must not be used.
Ensure the SWIOTLB path is declined whenever the DMA direct path is
selected.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 kernel/dma/direct.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index e89f175e9c2d0..6184ff303f080 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -84,7 +84,7 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
 	dma_addr_t dma_addr;
 
 	if (is_swiotlb_force_bounce(dev)) {
-		if (attrs & DMA_ATTR_MMIO)
+		if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
 			return DMA_MAPPING_ERROR;
 
 		return swiotlb_map(dev, phys, size, dir, attrs);
@@ -98,7 +98,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
 		dma_addr = phys_to_dma(dev, phys);
 		if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
 		    dma_kmalloc_needs_bounce(dev, size, dir)) {
-			if (is_swiotlb_active(dev))
+			if (is_swiotlb_active(dev) &&
+			    !(attrs & DMA_ATTR_REQUIRE_COHERENT))
 				return swiotlb_map(dev, phys, size, dir, attrs);
 
 			goto err_overflow;
@@ -123,7 +124,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
 {
 	phys_addr_t phys;
 
-	if (attrs & DMA_ATTR_MMIO)
+	if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
 		/* nothing to do: uncached and no swiotlb */
 		return;
 

-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 7/8] RDMA/umem: Tell DMA mapping that UMEM requires coherency
From: Leon Romanovsky @ 2026-03-16 19:06 UTC (permalink / raw)
  To: Marek Szyprowski, Robin Murphy, Michael S. Tsirkin, Petr Tesarik,
	Jonathan Corbet, Shuah Khan, Jason Wang, Xuan Zhuo,
	Eugenio Pérez, Jason Gunthorpe, Leon Romanovsky,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Joerg Roedel,
	Will Deacon, Andrew Morton
  Cc: iommu, linux-kernel, linux-doc, virtualization, linux-rdma,
	linux-trace-kernel, linux-mm, Jason Gunthorpe
In-Reply-To: <20260316-dma-debug-overlap-v3-0-1dde90a7f08b@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

The RDMA subsystem exposes DMA regions through the verbs interface, which
assumes a coherent system. Use the DMA_ATTR_REQUIRE_COHERENCE attribute
to ensure coherency and avoid taking the SWIOTLB path.

The RDMA verbs programming model resembles HMM and assumes concurrent DMA
and CPU access to userspace memory. The hardware and programming model
support "one-sided" operations initiated remotely without any local CPU
involvement or notification. These include ATOMIC compare/swap, READ, and
WRITE. A remote CPU can use these operations to traverse data structures,
manipulate locks, and perform similar tasks without the host CPU’s
awareness. If SWIOTLB substitutes memory or DMA is not cache coherent,
these use cases break entirely.

In-kernel RDMA is fine with incoherent mappings because kernel users do
not rely on one-sided operations in ways that would expose these issues.

A given region may also be exported multiple times, which can trigger
warnings about cacheline overlaps. These warnings are suppressed when the
new attribute is used.

infiniband rocep8s0f0: mlx5_ib_reg_user_mr:1592:(pid 5812): start 0x2b28c000, iova 0x2b28c000, length 0x1000, access_flags 0x1
infiniband rocep8s0f0: mlx5_ib_reg_user_mr:1592:(pid 5812): start 0x2b28c001, iova 0x2b28c001, length 0xfff, access_flags 0x1
 ------------[ cut here ]------------
 DMA-API: mlx5_core 0000:08:00.0: cacheline tracking EEXIST, overlapping mappings aren't supported
 WARNING: kernel/dma/debug.c:620 at add_dma_entry+0x1bb/0x280, CPU#6: ibv_rc_pingpong/5812
 Modules linked in: veth xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat nf_nat xt_addrtype br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_fwctl zram zsmalloc mlx5_ib fuse rpcrdma rdma_ucm ib_uverbs ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_core ib_core
 CPU: 6 UID: 2733 PID: 5812 Comm: ibv_rc_pingpong Tainted: G        W           6.19.0+ #129 PREEMPT
 Tainted: [W]=WARN
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:add_dma_entry+0x1be/0x280
 Code: 8b 7b 10 48 85 ff 0f 84 c3 00 00 00 48 8b 6f 50 48 85 ed 75 03 48 8b 2f e8 ff 8e 6a 00 48 89 c6 48 8d 3d 55 ef 2d 01 48 89 ea <67> 48 0f b9 3a 48 85 db 74 1a 48 c7 c7 b0 00 2b 82 e8 9c 25 fd ff
 RSP: 0018:ff11000138717978 EFLAGS: 00010286
 RAX: ffffffffa02d7831 RBX: ff1100010246de00 RCX: 0000000000000000
 RDX: ff110001036fac30 RSI: ffffffffa02d7831 RDI: ffffffff82678650
 RBP: ff110001036fac30 R08: ff11000110dcb4a0 R09: ff11000110dcb478
 R10: 0000000000000000 R11: ffffffff824b30a8 R12: 0000000000000000
 R13: 00000000ffffffef R14: 0000000000000202 R15: ff1100010246de00
 FS:  00007f59b411c740(0000) GS:ff110008dcc99000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007ffe538f7000 CR3: 000000010e066005 CR4: 0000000000373eb0
 Call Trace:
  <TASK>
  debug_dma_map_sg+0x1b4/0x390
  __dma_map_sg_attrs+0x6d/0x1a0
  dma_map_sgtable+0x19/0x30
  ib_umem_get+0x254/0x380 [ib_uverbs]
  mlx5_ib_reg_user_mr+0x68/0x2a0 [mlx5_ib]
  ib_uverbs_reg_mr+0x17f/0x2a0 [ib_uverbs]
  ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xc2/0x130 [ib_uverbs]
  ib_uverbs_cmd_verbs+0xa0b/0xae0 [ib_uverbs]
  ? ib_uverbs_handler_UVERBS_METHOD_QUERY_PORT_SPEED+0xe0/0xe0 [ib_uverbs]
  ? mmap_region+0x7a/0xb0
  ? do_mmap+0x3b8/0x5c0
  ib_uverbs_ioctl+0xa7/0x110 [ib_uverbs]
  __x64_sys_ioctl+0x14f/0x8b0
  ? ksys_mmap_pgoff+0xc5/0x190
  do_syscall_64+0x8c/0xbf0
  entry_SYSCALL_64_after_hwframe+0x4b/0x53
 RIP: 0033:0x7f59b430aeed
 Code: 04 25 28 00 00 00 48 89 45 c8 31 c0 48 8d 45 10 c7 45 b0 10 00 00 00 48 89 45 b8 48 8d 45 d0 48 89 45 c0 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1a 48 8b 45 c8 64 48 2b 04 25 28 00 00 00
 RSP: 002b:00007ffe538f9430 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
 RAX: ffffffffffffffda RBX: 00007ffe538f94c0 RCX: 00007f59b430aeed
 RDX: 00007ffe538f94e0 RSI: 00000000c0181b01 RDI: 0000000000000003
 RBP: 00007ffe538f9480 R08: 0000000000000028 R09: 00007ffe538f9684
 R10: 0000000000000001 R11: 0000000000000246 R12: 00007ffe538f9684
 R13: 000000000000000c R14: 000000002b28d170 R15: 000000000000000c
  </TASK>
 ---[ end trace 0000000000000000 ]---

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/umem.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index cff4fcca2c345..edc34c69f0f23 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -55,7 +55,8 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 
 	if (dirty)
 		ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
-					   DMA_BIDIRECTIONAL, 0);
+					   DMA_BIDIRECTIONAL,
+					   DMA_ATTR_REQUIRE_COHERENT);
 
 	for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) {
 		unpin_user_page_range_dirty_lock(sg_page(sg),
@@ -169,7 +170,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 	unsigned long lock_limit;
 	unsigned long new_pinned;
 	unsigned long cur_base;
-	unsigned long dma_attr = 0;
+	unsigned long dma_attr = DMA_ATTR_REQUIRE_COHERENT;
 	struct mm_struct *mm;
 	unsigned long npages;
 	int pinned, ret;

-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 6/8] iommu/dma: add support for DMA_ATTR_REQUIRE_COHERENT attribute
From: Leon Romanovsky @ 2026-03-16 19:06 UTC (permalink / raw)
  To: Marek Szyprowski, Robin Murphy, Michael S. Tsirkin, Petr Tesarik,
	Jonathan Corbet, Shuah Khan, Jason Wang, Xuan Zhuo,
	Eugenio Pérez, Jason Gunthorpe, Leon Romanovsky,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Joerg Roedel,
	Will Deacon, Andrew Morton
  Cc: iommu, linux-kernel, linux-doc, virtualization, linux-rdma,
	linux-trace-kernel, linux-mm
In-Reply-To: <20260316-dma-debug-overlap-v3-0-1dde90a7f08b@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Add support for the DMA_ATTR_REQUIRE_COHERENT attribute to the exported
functions. This attribute indicates that the SWIOTLB path must not be
used and that no sync operations should be performed.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/iommu/dma-iommu.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 5dac64be61bb2..94d5141696424 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1211,7 +1211,7 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
 	 */
 	if (dev_use_swiotlb(dev, size, dir) &&
 	    iova_unaligned(iovad, phys, size)) {
-		if (attrs & DMA_ATTR_MMIO)
+		if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
 			return DMA_MAPPING_ERROR;
 
 		phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs);
@@ -1223,7 +1223,8 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
 		arch_sync_dma_for_device(phys, size, dir);
 
 	iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
-	if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO))
+	if (iova == DMA_MAPPING_ERROR &&
+	    !(attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)))
 		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
 	return iova;
 }
@@ -1233,7 +1234,7 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
 {
 	phys_addr_t phys;
 
-	if (attrs & DMA_ATTR_MMIO) {
+	if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) {
 		__iommu_dma_unmap(dev, dma_handle, size);
 		return;
 	}
@@ -1945,9 +1946,21 @@ int dma_iova_link(struct device *dev, struct dma_iova_state *state,
 	if (WARN_ON_ONCE(iova_start_pad && offset > 0))
 		return -EIO;
 
+	/*
+	 * DMA_IOVA_USE_SWIOTLB is set on state after some entry
+	 * took SWIOTLB path, which we were supposed to prevent
+	 * for DMA_ATTR_REQUIRE_COHERENT attribute.
+	 */
+	if (WARN_ON_ONCE((state->__size & DMA_IOVA_USE_SWIOTLB) &&
+			 (attrs & DMA_ATTR_REQUIRE_COHERENT)))
+		return -EOPNOTSUPP;
+
+	if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+		return -EOPNOTSUPP;
+
 	if (dev_use_swiotlb(dev, size, dir) &&
 	    iova_unaligned(iovad, phys, size)) {
-		if (attrs & DMA_ATTR_MMIO)
+		if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
 			return -EPERM;
 
 		return iommu_dma_iova_link_swiotlb(dev, state, phys, offset,

-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 3/8] dma-mapping: Clarify valid conditions for CPU cache line overlap
From: Leon Romanovsky @ 2026-03-16 19:06 UTC (permalink / raw)
  To: Marek Szyprowski, Robin Murphy, Michael S. Tsirkin, Petr Tesarik,
	Jonathan Corbet, Shuah Khan, Jason Wang, Xuan Zhuo,
	Eugenio Pérez, Jason Gunthorpe, Leon Romanovsky,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Joerg Roedel,
	Will Deacon, Andrew Morton
  Cc: iommu, linux-kernel, linux-doc, virtualization, linux-rdma,
	linux-trace-kernel, linux-mm
In-Reply-To: <20260316-dma-debug-overlap-v3-0-1dde90a7f08b@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Rename the DMA_ATTR_CPU_CACHE_CLEAN attribute to better reflect that it
is debugging aid to inform DMA core code that CPU cache line overlaps are
allowed, and refine the documentation describing its use.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 Documentation/core-api/dma-attributes.rst | 22 ++++++++++++++--------
 drivers/virtio/virtio_ring.c              | 10 +++++-----
 include/linux/dma-mapping.h               |  8 ++++----
 include/trace/events/dma.h                |  2 +-
 kernel/dma/debug.c                        |  2 +-
 5 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst
index 1d7bfad73b1c7..48cfe86cc06d7 100644
--- a/Documentation/core-api/dma-attributes.rst
+++ b/Documentation/core-api/dma-attributes.rst
@@ -149,11 +149,17 @@ For architectures that require cache flushing for DMA coherence
 DMA_ATTR_MMIO will not perform any cache flushing. The address
 provided must never be mapped cacheable into the CPU.
 
-DMA_ATTR_CPU_CACHE_CLEAN
-------------------------
-
-This attribute indicates the CPU will not dirty any cacheline overlapping this
-DMA_FROM_DEVICE/DMA_BIDIRECTIONAL buffer while it is mapped. This allows
-multiple small buffers to safely share a cacheline without risk of data
-corruption, suppressing DMA debug warnings about overlapping mappings.
-All mappings sharing a cacheline should have this attribute.
+DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
+------------------------------------
+
+This attribute indicates that CPU cache lines may overlap for buffers mapped
+with DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
+
+Such overlap may occur when callers map multiple small buffers that reside
+within the same cache line. In this case, callers must guarantee that the CPU
+will not dirty these cache lines after the mappings are established. When this
+condition is met, multiple buffers can safely share a cache line without risking
+data corruption.
+
+All mappings that share a cache line must set this attribute to suppress DMA
+debug warnings about overlapping mappings.
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 335692d41617a..fbca7ce1c6bf0 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2912,10 +2912,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
  * @data: the token identifying the buffer.
  * @gfp: how to do memory allocations (if necessary).
  *
- * Same as virtqueue_add_inbuf but passes DMA_ATTR_CPU_CACHE_CLEAN to indicate
- * that the CPU will not dirty any cacheline overlapping this buffer while it
- * is available, and to suppress overlapping cacheline warnings in DMA debug
- * builds.
+ * Same as virtqueue_add_inbuf but passes DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
+ * to indicate that the CPU will not dirty any cacheline overlapping this buffer
+ * while it is available, and to suppress overlapping cacheline warnings in DMA
+ * debug builds.
  *
  * Caller must ensure we don't call this with other virtqueue operations
  * at the same time (except where noted).
@@ -2928,7 +2928,7 @@ int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq,
 				    gfp_t gfp)
 {
 	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp,
-			     DMA_ATTR_CPU_CACHE_CLEAN);
+			     DMA_ATTR_DEBUGGING_IGNORE_CACHELINES);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean);
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 29973baa05816..da44394b3a1a7 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -80,11 +80,11 @@
 #define DMA_ATTR_MMIO		(1UL << 10)
 
 /*
- * DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline
- * overlapping this buffer while it is mapped for DMA. All mappings sharing
- * a cacheline must have this attribute for this to be considered safe.
+ * DMA_ATTR_DEBUGGING_IGNORE_CACHELINES: Indicates the CPU cache line can be
+ * overlapped. All mappings sharing a cacheline must have this attribute for
+ * this to be considered safe.
  */
-#define DMA_ATTR_CPU_CACHE_CLEAN	(1UL << 11)
+#define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES	(1UL << 11)
 
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 69cb3805ee81c..8c64bc0721fe4 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -33,7 +33,7 @@ TRACE_DEFINE_ENUM(DMA_NONE);
 		{ DMA_ATTR_NO_WARN, "NO_WARN" }, \
 		{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
 		{ DMA_ATTR_MMIO, "MMIO" }, \
-		{ DMA_ATTR_CPU_CACHE_CLEAN, "CACHE_CLEAN" })
+		{ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" })
 
 DECLARE_EVENT_CLASS(dma_map,
 	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index be207be749968..83e1cfe05f08d 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -601,7 +601,7 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)
 	unsigned long flags;
 	int rc;
 
-	entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN);
+	entry->is_cache_clean = attrs & DMA_ATTR_DEBUGGING_IGNORE_CACHELINES;
 
 	bucket = get_hash_bucket(entry, &flags);
 	hash_bucket_add(bucket, entry);

-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 4/8] dma-mapping: Introduce DMA require coherency attribute
From: Leon Romanovsky @ 2026-03-16 19:06 UTC (permalink / raw)
  To: Marek Szyprowski, Robin Murphy, Michael S. Tsirkin, Petr Tesarik,
	Jonathan Corbet, Shuah Khan, Jason Wang, Xuan Zhuo,
	Eugenio Pérez, Jason Gunthorpe, Leon Romanovsky,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Joerg Roedel,
	Will Deacon, Andrew Morton
  Cc: iommu, linux-kernel, linux-doc, virtualization, linux-rdma,
	linux-trace-kernel, linux-mm
In-Reply-To: <20260316-dma-debug-overlap-v3-0-1dde90a7f08b@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

The mapping buffers which carry this attribute require DMA coherent system.
This means that they can't take SWIOTLB path, can perform CPU cache overlap
and doesn't perform cache flushing.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 Documentation/core-api/dma-attributes.rst | 16 ++++++++++++++++
 include/linux/dma-mapping.h               |  7 +++++++
 include/trace/events/dma.h                |  3 ++-
 kernel/dma/debug.c                        |  3 ++-
 kernel/dma/mapping.c                      |  6 ++++++
 5 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst
index 48cfe86cc06d7..441bdc9d08318 100644
--- a/Documentation/core-api/dma-attributes.rst
+++ b/Documentation/core-api/dma-attributes.rst
@@ -163,3 +163,19 @@ data corruption.
 
 All mappings that share a cache line must set this attribute to suppress DMA
 debug warnings about overlapping mappings.
+
+DMA_ATTR_REQUIRE_COHERENT
+-------------------------
+
+DMA mapping requests with the DMA_ATTR_REQUIRE_COHERENT fail on any
+system where SWIOTLB or cache management is required. This should only
+be used to support uAPI designs that require continuous HW DMA
+coherence with userspace processes, for example RDMA and DRM. At a
+minimum the memory being mapped must be userspace memory from
+pin_user_pages() or similar.
+
+Drivers should consider using dma_mmap_pages() instead of this
+interface when building their uAPIs, when possible.
+
+It must never be used in an in-kernel driver that only works with
+kernal memory.
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index da44394b3a1a7..482b919f040f7 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -86,6 +86,13 @@
  */
 #define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES	(1UL << 11)
 
+/*
+ * DMA_ATTR_REQUIRE_COHERENT: Indicates that DMA coherency is required.
+ * All mappings that carry this attribute can't work with SWIOTLB and cache
+ * flushing.
+ */
+#define DMA_ATTR_REQUIRE_COHERENT	(1UL << 12)
+
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
  * be given to a device to use as a DMA source or target.  It is specific to a
diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 8c64bc0721fe4..63597b0044247 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -33,7 +33,8 @@ TRACE_DEFINE_ENUM(DMA_NONE);
 		{ DMA_ATTR_NO_WARN, "NO_WARN" }, \
 		{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
 		{ DMA_ATTR_MMIO, "MMIO" }, \
-		{ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" })
+		{ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \
+		{ DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" })
 
 DECLARE_EVENT_CLASS(dma_map,
 	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 83e1cfe05f08d..0677918f06a80 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -601,7 +601,8 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)
 	unsigned long flags;
 	int rc;
 
-	entry->is_cache_clean = attrs & DMA_ATTR_DEBUGGING_IGNORE_CACHELINES;
+	entry->is_cache_clean = attrs & (DMA_ATTR_DEBUGGING_IGNORE_CACHELINES |
+					 DMA_ATTR_REQUIRE_COHERENT);
 
 	bucket = get_hash_bucket(entry, &flags);
 	hash_bucket_add(bucket, entry);
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 3928a509c44c2..6d3dd0bd3a886 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -164,6 +164,9 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
 	if (WARN_ON_ONCE(!dev->dma_mask))
 		return DMA_MAPPING_ERROR;
 
+	if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+		return DMA_MAPPING_ERROR;
+
 	if (dma_map_direct(dev, ops) ||
 	    (!is_mmio && arch_dma_map_phys_direct(dev, phys + size)))
 		addr = dma_direct_map_phys(dev, phys, size, dir, attrs);
@@ -235,6 +238,9 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
 
 	BUG_ON(!valid_dma_direction(dir));
 
+	if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+		return -EOPNOTSUPP;
+
 	if (WARN_ON_ONCE(!dev->dma_mask))
 		return 0;
 

-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 2/8] dma-mapping: handle DMA_ATTR_CPU_CACHE_CLEAN in trace output
From: Leon Romanovsky @ 2026-03-16 19:06 UTC (permalink / raw)
  To: Marek Szyprowski, Robin Murphy, Michael S. Tsirkin, Petr Tesarik,
	Jonathan Corbet, Shuah Khan, Jason Wang, Xuan Zhuo,
	Eugenio Pérez, Jason Gunthorpe, Leon Romanovsky,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Joerg Roedel,
	Will Deacon, Andrew Morton
  Cc: iommu, linux-kernel, linux-doc, virtualization, linux-rdma,
	linux-trace-kernel, linux-mm
In-Reply-To: <20260316-dma-debug-overlap-v3-0-1dde90a7f08b@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Tracing prints decoded DMA attribute flags, but it does not yet
include the recently added DMA_ATTR_CPU_CACHE_CLEAN. Add support
for decoding and displaying this attribute in the trace output.

Fixes: 61868dc55a11 ("dma-mapping: add DMA_ATTR_CPU_CACHE_CLEAN")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/trace/events/dma.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 33e99e792f1aa..69cb3805ee81c 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -32,7 +32,8 @@ TRACE_DEFINE_ENUM(DMA_NONE);
 		{ DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \
 		{ DMA_ATTR_NO_WARN, "NO_WARN" }, \
 		{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
-		{ DMA_ATTR_MMIO, "MMIO" })
+		{ DMA_ATTR_MMIO, "MMIO" }, \
+		{ DMA_ATTR_CPU_CACHE_CLEAN, "CACHE_CLEAN" })
 
 DECLARE_EVENT_CLASS(dma_map,
 	TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,

-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 1/8] dma-debug: Allow multiple invocations of overlapping entries
From: Leon Romanovsky @ 2026-03-16 19:06 UTC (permalink / raw)
  To: Marek Szyprowski, Robin Murphy, Michael S. Tsirkin, Petr Tesarik,
	Jonathan Corbet, Shuah Khan, Jason Wang, Xuan Zhuo,
	Eugenio Pérez, Jason Gunthorpe, Leon Romanovsky,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Joerg Roedel,
	Will Deacon, Andrew Morton
  Cc: iommu, linux-kernel, linux-doc, virtualization, linux-rdma,
	linux-trace-kernel, linux-mm
In-Reply-To: <20260316-dma-debug-overlap-v3-0-1dde90a7f08b@nvidia.com>

From: Leon Romanovsky <leonro@nvidia.com>

Repeated DMA mappings with DMA_ATTR_CPU_CACHE_CLEAN trigger the
following splat. This prevents using the attribute in cases where a DMA
region is shared and reused more than seven times.

 ------------[ cut here ]------------
 DMA-API: exceeded 7 overlapping mappings of cacheline 0x000000000438c440
 WARNING: kernel/dma/debug.c:467 at add_dma_entry+0x219/0x280, CPU#4: ibv_rc_pingpong/1644
 Modules linked in: xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat nf_nat xt_addrtype br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_fwctl zram zsmalloc mlx5_ib fuse rpcrdma rdma_ucm ib_uverbs ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_core ib_core
 CPU: 4 UID: 2733 PID: 1644 Comm: ibv_rc_pingpong Not tainted 6.19.0+ #129 PREEMPT
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:add_dma_entry+0x221/0x280
 Code: c0 0f 84 f2 fe ff ff 83 e8 01 89 05 6d 99 11 01 e9 e4 fe ff ff 0f 8e 1f ff ff ff 48 8d 3d 07 ef 2d 01 be 07 00 00 00 48 89 e2 <67> 48 0f b9 3a e9 06 ff ff ff 48 c7 c7 98 05 2b 82 c6 05 72 92 28
 RSP: 0018:ff1100010e657970 EFLAGS: 00010002
 RAX: 0000000000000007 RBX: ff1100010234eb00 RCX: 0000000000000000
 RDX: ff1100010e657970 RSI: 0000000000000007 RDI: ffffffff82678660
 RBP: 000000000438c440 R08: 0000000000000228 R09: 0000000000000000
 R10: 00000000000001be R11: 000000000000089d R12: 0000000000000800
 R13: 00000000ffffffef R14: 0000000000000202 R15: ff1100010234eb00
 FS:  00007fb15f3f6740(0000) GS:ff110008dcc19000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007fb15f32d3a0 CR3: 0000000116f59001 CR4: 0000000000373eb0
 Call Trace:
  <TASK>
  debug_dma_map_sg+0x1b4/0x390
  __dma_map_sg_attrs+0x6d/0x1a0
  dma_map_sgtable+0x19/0x30
  ib_umem_get+0x284/0x3b0 [ib_uverbs]
  mlx5_ib_reg_user_mr+0x68/0x2a0 [mlx5_ib]
  ib_uverbs_reg_mr+0x17f/0x2a0 [ib_uverbs]
  ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xc2/0x130 [ib_uverbs]
  ib_uverbs_cmd_verbs+0xa0b/0xae0 [ib_uverbs]
  ? ib_uverbs_handler_UVERBS_METHOD_QUERY_PORT_SPEED+0xe0/0xe0 [ib_uverbs]
  ? mmap_region+0x7a/0xb0
  ? do_mmap+0x3b8/0x5c0
  ib_uverbs_ioctl+0xa7/0x110 [ib_uverbs]
  __x64_sys_ioctl+0x14f/0x8b0
  ? ksys_mmap_pgoff+0xc5/0x190
  do_syscall_64+0x8c/0xbf0
  entry_SYSCALL_64_after_hwframe+0x4b/0x53
 RIP: 0033:0x7fb15f5e4eed
 Code: 04 25 28 00 00 00 48 89 45 c8 31 c0 48 8d 45 10 c7 45 b0 10 00 00 00 48 89 45 b8 48 8d 45 d0 48 89 45 c0 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1a 48 8b 45 c8 64 48 2b 04 25 28 00 00 00
 RSP: 002b:00007ffe09a5c540 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
 RAX: ffffffffffffffda RBX: 00007ffe09a5c5d0 RCX: 00007fb15f5e4eed
 RDX: 00007ffe09a5c5f0 RSI: 00000000c0181b01 RDI: 0000000000000003
 RBP: 00007ffe09a5c590 R08: 0000000000000028 R09: 00007ffe09a5c794
 R10: 0000000000000001 R11: 0000000000000246 R12: 00007ffe09a5c794
 R13: 000000000000000c R14: 0000000025a49170 R15: 000000000000000c
  </TASK>
 ---[ end trace 0000000000000000 ]---

Fixes: 61868dc55a11 ("dma-mapping: add DMA_ATTR_CPU_CACHE_CLEAN")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 kernel/dma/debug.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 86f87e43438c3..be207be749968 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -453,7 +453,7 @@ static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
 	return overlap;
 }
 
-static void active_cacheline_inc_overlap(phys_addr_t cln)
+static void active_cacheline_inc_overlap(phys_addr_t cln, bool is_cache_clean)
 {
 	int overlap = active_cacheline_read_overlap(cln);
 
@@ -462,7 +462,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln)
 	/* If we overflowed the overlap counter then we're potentially
 	 * leaking dma-mappings.
 	 */
-	WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
+	WARN_ONCE(!is_cache_clean && overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
 		  pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"),
 		  ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
 }
@@ -495,7 +495,7 @@ static int active_cacheline_insert(struct dma_debug_entry *entry,
 	if (rc == -EEXIST) {
 		struct dma_debug_entry *existing;
 
-		active_cacheline_inc_overlap(cln);
+		active_cacheline_inc_overlap(cln, entry->is_cache_clean);
 		existing = radix_tree_lookup(&dma_active_cacheline, cln);
 		/* A lookup failure here after we got -EEXIST is unexpected. */
 		WARN_ON(!existing);

-- 
2.53.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox