public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore()
@ 2025-03-04 16:36 Amery Hung
  2025-03-04 16:36 ` [PATCH bpf-next v4 2/3] selftests/bpf: Allow assigning traffic monitor print function Amery Hung
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Amery Hung @ 2025-03-04 16:36 UTC (permalink / raw)
  To: bpf; +Cc: daniel, andrii, alexei.starovoitov, martin.lau, ameryhung,
	kernel-team

There is no need to call a bunch of stdio_restore() in test_progs if the
scope of stdio redirection is reduced to what it needs to be: only
hijacking tests/subtests' stdio.

Also remove an unnecessary check of env.stdout_saved in the crash handler.

Signed-off-by: Amery Hung <ameryhung@gmail.com>
---
 tools/testing/selftests/bpf/test_progs.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 0cb759632225..ab0f2fed3c58 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -474,8 +474,6 @@ static void dump_test_log(const struct prog_test_def *test,
 	print_test_result(test, test_state);
 }
 
-static void stdio_restore(void);
-
 /* A bunch of tests set custom affinity per-thread and/or per-process. Reset
  * it after each test/sub-test.
  */
@@ -490,13 +488,11 @@ static void reset_affinity(void)
 
 	err = sched_setaffinity(0, sizeof(cpuset), &cpuset);
 	if (err < 0) {
-		stdio_restore();
 		fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
 		exit(EXIT_ERR_SETUP_INFRA);
 	}
 	err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
 	if (err < 0) {
-		stdio_restore();
 		fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
 		exit(EXIT_ERR_SETUP_INFRA);
 	}
@@ -514,7 +510,6 @@ static void save_netns(void)
 static void restore_netns(void)
 {
 	if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
-		stdio_restore();
 		perror("setns(CLONE_NEWNS)");
 		exit(EXIT_ERR_SETUP_INFRA);
 	}
@@ -1270,8 +1265,7 @@ void crash_handler(int signum)
 
 	sz = backtrace(bt, ARRAY_SIZE(bt));
 
-	if (env.stdout_saved)
-		stdio_restore();
+	stdio_restore();
 	if (env.test) {
 		env.test_state->error_cnt++;
 		dump_test_log(env.test, env.test_state, true, false, NULL);
@@ -1400,6 +1394,8 @@ static void run_one_test(int test_num)
 
 	state->tested = true;
 
+	stdio_restore();
+
 	if (verbose() && env.worker_id == -1)
 		print_test_result(test, state);
 
@@ -1408,7 +1404,6 @@ static void run_one_test(int test_num)
 	if (test->need_cgroup_cleanup)
 		cleanup_cgroup_environment();
 
-	stdio_restore();
 	free(stop_libbpf_log_capture());
 
 	dump_test_log(test, state, false, false, NULL);
@@ -1943,6 +1938,9 @@ int main(int argc, char **argv)
 
 	sigaction(SIGSEGV, &sigact, NULL);
 
+	env.stdout_saved = stdout;
+	env.stderr_saved = stderr;
+
 	env.secs_till_notify = 10;
 	env.secs_till_kill = 120;
 	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
@@ -1969,9 +1967,6 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
-	env.stdout_saved = stdout;
-	env.stderr_saved = stderr;
-
 	env.has_testmod = true;
 	if (!env.list_test_names) {
 		/* ensure previous instance of the module is unloaded */
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH bpf-next v4 2/3] selftests/bpf: Allow assigning traffic monitor print function
  2025-03-04 16:36 [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore() Amery Hung
@ 2025-03-04 16:36 ` Amery Hung
  2025-03-04 16:36 ` [PATCH bpf-next v4 3/3] selftests/bpf: Fix dangling stdout seen by traffic monitor thread Amery Hung
  2025-03-04 21:48 ` [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore() Eduard Zingerman
  2 siblings, 0 replies; 10+ messages in thread
From: Amery Hung @ 2025-03-04 16:36 UTC (permalink / raw)
  To: bpf; +Cc: daniel, andrii, alexei.starovoitov, martin.lau, ameryhung,
	kernel-team

Allow users to change traffic monitor's print function. If not provided,
traffic monitor will print to stdout by default.

Signed-off-by: Amery Hung <ameryhung@gmail.com>
---
 tools/testing/selftests/bpf/network_helpers.c | 69 ++++++++++++++-----
 tools/testing/selftests/bpf/network_helpers.h |  8 +++
 2 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 737a952dcf80..11f07c82166b 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -750,6 +750,36 @@ struct tmonitor_ctx {
 	int pcap_fd;
 };
 
+static int __base_pr(const char *format, va_list args)
+{
+	return vfprintf(stdout, format, args);
+}
+
+static tm_print_fn_t __tm_pr = __base_pr;
+
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+	tm_print_fn_t old_print_fn;
+
+	old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED);
+
+	return old_print_fn;
+}
+
+void tm_print(const char *format, ...)
+{
+	tm_print_fn_t print_fn;
+	va_list args;
+
+	print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED);
+	if (!print_fn)
+		return;
+
+	va_start(args, format);
+	__tm_pr(format, args);
+	va_end(args);
+}
+
 /* Is this packet captured with a Ethernet protocol type? */
 static bool is_ethernet(const u_char *packet)
 {
@@ -767,7 +797,7 @@ static bool is_ethernet(const u_char *packet)
 	case 770: /* ARPHRD_FRAD */
 	case 778: /* ARPHDR_IPGRE */
 	case 803: /* ARPHRD_IEEE80211_RADIOTAP */
-		printf("Packet captured: arphdr_type=%d\n", arphdr_type);
+		tm_print("Packet captured: arphdr_type=%d\n", arphdr_type);
 		return false;
 	}
 	return true;
@@ -817,19 +847,19 @@ static void show_transport(const u_char *packet, u16 len, u32 ifindex,
 		dst_port = ntohs(tcp->dest);
 		transport_str = "TCP";
 	} else if (proto == IPPROTO_ICMP) {
-		printf("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
-		       ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
-		       packet[0], packet[1]);
+		tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
+			 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+			 packet[0], packet[1]);
 		return;
 	} else if (proto == IPPROTO_ICMPV6) {
-		printf("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
-		       ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
-		       packet[0], packet[1]);
+		tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
+			 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+			 packet[0], packet[1]);
 		return;
 	} else {
-		printf("%-7s %-3s %s %s > %s: protocol %d\n",
-		       ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
-		       src_addr, dst_addr, proto);
+		tm_print("%-7s %-3s %s %s > %s: protocol %d\n",
+			 ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
+			 src_addr, dst_addr, proto);
 		return;
 	}
 
@@ -843,13 +873,13 @@ static void show_transport(const u_char *packet, u16 len, u32 ifindex,
 			 tcp->ack ? ", ACK" : "");
 
 	if (ipv6)
-		printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n",
-		       ifname, pkt_type_str(pkt_type), src_addr, src_port,
-		       dst_addr, dst_port, transport_str, len, flags);
+		tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n",
+			 ifname, pkt_type_str(pkt_type), src_addr, src_port,
+			 dst_addr, dst_port, transport_str, len, flags);
 	else
-		printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n",
-		       ifname, pkt_type_str(pkt_type), src_addr, src_port,
-		       dst_addr, dst_port, transport_str, len, flags);
+		tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n",
+			 ifname, pkt_type_str(pkt_type), src_addr, src_port,
+			 dst_addr, dst_port, transport_str, len, flags);
 }
 
 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
@@ -964,8 +994,8 @@ static void *traffic_monitor_thread(void *arg)
 				ifname = _ifname;
 			}
 
-			printf("%-7s %-3s Unknown network protocol type 0x%x\n",
-			       ifname, pkt_type_str(ptype), proto);
+			tm_print("%-7s %-3s Unknown network protocol type 0x%x\n",
+				 ifname, pkt_type_str(ptype), proto);
 		}
 	}
 
@@ -1165,8 +1195,9 @@ void traffic_monitor_stop(struct tmonitor_ctx *ctx)
 	write(ctx->wake_fd, &w, sizeof(w));
 	pthread_join(ctx->thread, NULL);
 
-	printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
+	tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
 
 	traffic_monitor_release(ctx);
 }
+
 #endif /* TRAFFIC_MONITOR */
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 9f6e05d886c5..2d726f8f6327 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -249,10 +249,13 @@ static inline __sum16 build_udp_v6_csum(const struct ipv6hdr *ip6h,
 
 struct tmonitor_ctx;
 
+typedef int (*tm_print_fn_t)(const char *format, va_list args);
+
 #ifdef TRAFFIC_MONITOR
 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
 					   const char *subtest_name);
 void traffic_monitor_stop(struct tmonitor_ctx *ctx);
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn);
 #else
 static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
 							 const char *subtest_name)
@@ -263,6 +266,11 @@ static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, cons
 static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
 {
 }
+
+static inline tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+	return NULL;
+}
 #endif
 
 #endif
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH bpf-next v4 3/3] selftests/bpf: Fix dangling stdout seen by traffic monitor thread
  2025-03-04 16:36 [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore() Amery Hung
  2025-03-04 16:36 ` [PATCH bpf-next v4 2/3] selftests/bpf: Allow assigning traffic monitor print function Amery Hung
@ 2025-03-04 16:36 ` Amery Hung
  2025-03-05  0:12   ` Eduard Zingerman
  2025-03-05  1:36   ` Martin KaFai Lau
  2025-03-04 21:48 ` [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore() Eduard Zingerman
  2 siblings, 2 replies; 10+ messages in thread
From: Amery Hung @ 2025-03-04 16:36 UTC (permalink / raw)
  To: bpf; +Cc: daniel, andrii, alexei.starovoitov, martin.lau, ameryhung,
	kernel-team

Traffic monitor thread may see dangling stdout as the main thread closes
and reassigns stdout without protection. This happens when the main thread
finishes one subtest and moves to another one in the same netns_new()
scope.

The issue can be reproduced by running test_progs repeatedly with traffic
monitor enabled:

for ((i=1;i<=100;i++)); do
   ./test_progs -a flow_dissector_skb* -m '*'
done

Fix it by first consolidating stdout assignment into stdio_restore().
stdout will be restored to env.stdout_saved when a test ends or running
in the crash handler and to test_state.stdout_saved otherwise.
Then, protect use/close/reassignment of stdout with a lock. The locking
in the main thread is always performed regradless of whether traffic
monitor is running or not for simplicity. It won't have any side-effect.
stdio_restore() is kept in the crash handler instead of making all print
functions in the crash handler use env.stdout_saved to make it less
error-prone.

Signed-off-by: Amery Hung <ameryhung@gmail.com>
---
 tools/testing/selftests/bpf/test_progs.c | 59 ++++++++++++++++--------
 1 file changed, 39 insertions(+), 20 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index ab0f2fed3c58..5b89f6ca5a0a 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -88,7 +88,11 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt)
 #endif
 }
 
-static void stdio_restore_cleanup(void)
+static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static bool in_crash_handler(void);
+
+static void stdio_restore(void)
 {
 #ifdef __GLIBC__
 	if (verbose() && env.worker_id == -1) {
@@ -98,34 +102,34 @@ static void stdio_restore_cleanup(void)
 
 	fflush(stdout);
 
-	if (env.subtest_state) {
+	pthread_mutex_lock(&stdout_lock);
+
+	if (!env.subtest_state || in_crash_handler()) {
+		if (stdout == env.stdout_saved)
+			goto out;
+
+		fclose(env.test_state->stdout_saved);
+		env.test_state->stdout_saved = NULL;
+		stdout = env.stdout_saved;
+		stderr = env.stderr_saved;
+	} else {
 		fclose(env.subtest_state->stdout_saved);
 		env.subtest_state->stdout_saved = NULL;
 		stdout = env.test_state->stdout_saved;
 		stderr = env.test_state->stdout_saved;
-	} else {
-		fclose(env.test_state->stdout_saved);
-		env.test_state->stdout_saved = NULL;
 	}
+out:
+	pthread_mutex_unlock(&stdout_lock);
 #endif
 }
 
-static void stdio_restore(void)
+static int traffic_monitor_print_fn(const char *format, va_list args)
 {
-#ifdef __GLIBC__
-	if (verbose() && env.worker_id == -1) {
-		/* nothing to do, output to stdout by default */
-		return;
-	}
-
-	if (stdout == env.stdout_saved)
-		return;
-
-	stdio_restore_cleanup();
+	pthread_mutex_lock(&stdout_lock);
+	vfprintf(stdout, format, args);
+	pthread_mutex_unlock(&stdout_lock);
 
-	stdout = env.stdout_saved;
-	stderr = env.stderr_saved;
-#endif
+	return 0;
 }
 
 /* Adapted from perf/util/string.c */
@@ -536,7 +540,8 @@ void test__end_subtest(void)
 				   test_result(subtest_state->error_cnt,
 					       subtest_state->skipped));
 
-	stdio_restore_cleanup();
+	stdio_restore();
+
 	env.subtest_state = NULL;
 }
 
@@ -1276,6 +1281,18 @@ void crash_handler(int signum)
 	backtrace_symbols_fd(bt, sz, STDERR_FILENO);
 }
 
+static bool in_crash_handler(void)
+{
+	struct sigaction sigact;
+
+	/* sa_handler will be cleared if invoked since crash_handler is
+	 * registered with SA_RESETHAND
+	 */
+	sigaction(SIGSEGV, NULL, &sigact);
+
+	return sigact.sa_handler != crash_handler;
+}
+
 void hexdump(const char *prefix, const void *buf, size_t len)
 {
 	for (int i = 0; i < len; i++) {
@@ -1957,6 +1974,8 @@ int main(int argc, char **argv)
 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 	libbpf_set_print(libbpf_print_fn);
 
+	traffic_monitor_set_print(traffic_monitor_print_fn);
+
 	srand(time(NULL));
 
 	env.jit_enabled = is_jit_enabled();
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore()
  2025-03-04 16:36 [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore() Amery Hung
  2025-03-04 16:36 ` [PATCH bpf-next v4 2/3] selftests/bpf: Allow assigning traffic monitor print function Amery Hung
  2025-03-04 16:36 ` [PATCH bpf-next v4 3/3] selftests/bpf: Fix dangling stdout seen by traffic monitor thread Amery Hung
@ 2025-03-04 21:48 ` Eduard Zingerman
  2025-03-04 23:39   ` Amery Hung
  2 siblings, 1 reply; 10+ messages in thread
From: Eduard Zingerman @ 2025-03-04 21:48 UTC (permalink / raw)
  To: Amery Hung, bpf
  Cc: daniel, andrii, alexei.starovoitov, martin.lau, kernel-team

On Tue, 2025-03-04 at 08:36 -0800, Amery Hung wrote:
> There is no need to call a bunch of stdio_restore() in test_progs if the
> scope of stdio redirection is reduced to what it needs to be: only
> hijacking tests/subtests' stdio.
> 
> Also remove an unnecessary check of env.stdout_saved in the crash handler.
> 
> Signed-off-by: Amery Hung <ameryhung@gmail.com>
> ---

If anyone else would look at this commit, here is an alternative
description:
- functions reset_affinity() and restore_netns() are only called from
  run_one_test();
- beside other places stdio_restore() is called from reset_affinity(),
  restore_netns() and run_one_test() itself;
- this commit moves stdio_restore() call in run_one_test() so that
  it executes before reset_affinity() and restore_netns().

Acked-by: Eduard Zingerman <eddyz87@gmail.com>

[...]

> @@ -1943,6 +1938,9 @@ int main(int argc, char **argv)
>  
>  	sigaction(SIGSEGV, &sigact, NULL);
>  
> +	env.stdout_saved = stdout;
> +	env.stderr_saved = stderr;
> +

Nit: why moving these?

>  	env.secs_till_notify = 10;
>  	env.secs_till_kill = 120;
>  	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
> @@ -1969,9 +1967,6 @@ int main(int argc, char **argv)
>  		return -1;
>  	}
>  
> -	env.stdout_saved = stdout;
> -	env.stderr_saved = stderr;
> -
>  	env.has_testmod = true;
>  	if (!env.list_test_names) {
>  		/* ensure previous instance of the module is unloaded */



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore()
  2025-03-04 21:48 ` [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore() Eduard Zingerman
@ 2025-03-04 23:39   ` Amery Hung
  2025-03-05  0:14     ` Eduard Zingerman
  0 siblings, 1 reply; 10+ messages in thread
From: Amery Hung @ 2025-03-04 23:39 UTC (permalink / raw)
  To: Eduard Zingerman
  Cc: bpf, daniel, andrii, alexei.starovoitov, martin.lau, kernel-team

On Tue, Mar 4, 2025 at 1:48 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Tue, 2025-03-04 at 08:36 -0800, Amery Hung wrote:
> > There is no need to call a bunch of stdio_restore() in test_progs if the
> > scope of stdio redirection is reduced to what it needs to be: only
> > hijacking tests/subtests' stdio.
> >
> > Also remove an unnecessary check of env.stdout_saved in the crash handler.
> >
> > Signed-off-by: Amery Hung <ameryhung@gmail.com>
> > ---
>
> If anyone else would look at this commit, here is an alternative
> description:
> - functions reset_affinity() and restore_netns() are only called from
>   run_one_test();
> - beside other places stdio_restore() is called from reset_affinity(),
>   restore_netns() and run_one_test() itself;
> - this commit moves stdio_restore() call in run_one_test() so that
>   it executes before reset_affinity() and restore_netns().
>
> Acked-by: Eduard Zingerman <eddyz87@gmail.com>
>

I can improve the commit message in the next respin.

Thanks,
Amery

> [...]
>
> > @@ -1943,6 +1938,9 @@ int main(int argc, char **argv)
> >
> >       sigaction(SIGSEGV, &sigact, NULL);
> >
> > +     env.stdout_saved = stdout;
> > +     env.stderr_saved = stderr;
> > +
>
> Nit: why moving these?

If we assign env.stdout_saved at the very beginning, crash_handler()
can just call stdio_restore() without checking if env.stdout_saved is
set or not.

>
> >       env.secs_till_notify = 10;
> >       env.secs_till_kill = 120;
> >       err = argp_parse(&argp, argc, argv, 0, NULL, &env);
> > @@ -1969,9 +1967,6 @@ int main(int argc, char **argv)
> >               return -1;
> >       }
> >
> > -     env.stdout_saved = stdout;
> > -     env.stderr_saved = stderr;
> > -
> >       env.has_testmod = true;
> >       if (!env.list_test_names) {
> >               /* ensure previous instance of the module is unloaded */
>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next v4 3/3] selftests/bpf: Fix dangling stdout seen by traffic monitor thread
  2025-03-04 16:36 ` [PATCH bpf-next v4 3/3] selftests/bpf: Fix dangling stdout seen by traffic monitor thread Amery Hung
@ 2025-03-05  0:12   ` Eduard Zingerman
  2025-03-05 16:52     ` Amery Hung
  2025-03-05  1:36   ` Martin KaFai Lau
  1 sibling, 1 reply; 10+ messages in thread
From: Eduard Zingerman @ 2025-03-05  0:12 UTC (permalink / raw)
  To: Amery Hung, bpf
  Cc: daniel, andrii, alexei.starovoitov, martin.lau, kernel-team

On Tue, 2025-03-04 at 08:36 -0800, Amery Hung wrote:
> Traffic monitor thread may see dangling stdout as the main thread closes
> and reassigns stdout without protection. This happens when the main thread
> finishes one subtest and moves to another one in the same netns_new()
> scope.
> The issue can be reproduced by running test_progs repeatedly with traffic
> monitor enabled:
> 
> for ((i=1;i<=100;i++)); do
>    ./test_progs -a flow_dissector_skb* -m '*'
> done
> 
> Fix it by first consolidating stdout assignment into stdio_restore().
> stdout will be restored to env.stdout_saved when a test ends or running
> in the crash handler and to test_state.stdout_saved otherwise.
> Then, protect use/close/reassignment of stdout with a lock. The locking
> in the main thread is always performed regradless of whether traffic
> monitor is running or not for simplicity. It won't have any side-effect.
> stdio_restore() is kept in the crash handler instead of making all print
> functions in the crash handler use env.stdout_saved to make it less
> error-prone.
> 
> Signed-off-by: Amery Hung <ameryhung@gmail.com>
> ---

This patch fixes the error for me.

Tested-by: Eduard Zingerman <eddyz87@gmail.com>

>  tools/testing/selftests/bpf/test_progs.c | 59 ++++++++++++++++--------
>  1 file changed, 39 insertions(+), 20 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> index ab0f2fed3c58..5b89f6ca5a0a 100644
> --- a/tools/testing/selftests/bpf/test_progs.c
> +++ b/tools/testing/selftests/bpf/test_progs.c
> @@ -88,7 +88,11 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt)
>  #endif
>  }
>  
> -static void stdio_restore_cleanup(void)
> +static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER;
> +
> +static bool in_crash_handler(void);
> +
> +static void stdio_restore(void)
>  {
>  #ifdef __GLIBC__
>  	if (verbose() && env.worker_id == -1) {
> @@ -98,34 +102,34 @@ static void stdio_restore_cleanup(void)
>  
>  	fflush(stdout);
>  
> -	if (env.subtest_state) {
> +	pthread_mutex_lock(&stdout_lock);
> +
> +	if (!env.subtest_state || in_crash_handler()) {
> +		if (stdout == env.stdout_saved)
> +			goto out;
> +
> +		fclose(env.test_state->stdout_saved);
> +		env.test_state->stdout_saved = NULL;
> +		stdout = env.stdout_saved;
> +		stderr = env.stderr_saved;
> +	} else {
>  		fclose(env.subtest_state->stdout_saved);
>  		env.subtest_state->stdout_saved = NULL;
>  		stdout = env.test_state->stdout_saved;
>  		stderr = env.test_state->stdout_saved;
> -	} else {
> -		fclose(env.test_state->stdout_saved);
> -		env.test_state->stdout_saved = NULL;
>  	}
> +out:
> +	pthread_mutex_unlock(&stdout_lock);
>  #endif
>  }

stdio_restore_cleanup() did not reset stderr/stdout when
env.subtest_state was NULL, but this difference does not seem to
matter, stdio_restore_cleanup() was called from:
- test__start_subtest(), where stdio_hijack_init() would override
  stderr/stdout anyways.
- run_one_test(), where it is followed by call to stdio_restore().

I think this change is Ok.

[...]

> @@ -1276,6 +1281,18 @@ void crash_handler(int signum)
>  	backtrace_symbols_fd(bt, sz, STDERR_FILENO);
>  }
>  
> +static bool in_crash_handler(void)
> +{
> +	struct sigaction sigact;
> +
> +	/* sa_handler will be cleared if invoked since crash_handler is
> +	 * registered with SA_RESETHAND
> +	 */
> +	sigaction(SIGSEGV, NULL, &sigact);
> +
> +	return sigact.sa_handler != crash_handler;
> +}
> +

The patch would be simpler w/o this function. I double checked
functions called from crash_handler() and two 'fprintf(stderr, ...)'
there are the only places where stderr/stdout is used instead of
*_saved versions. It is already a prevalent pattern to do
'fprintf(env.stderr_saved, ...)' in this file.
Or pass a flag as in v3?

>  void hexdump(const char *prefix, const void *buf, size_t len)
>  {
>  	for (int i = 0; i < len; i++) {
> @@ -1957,6 +1974,8 @@ int main(int argc, char **argv)

[...]


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore()
  2025-03-04 23:39   ` Amery Hung
@ 2025-03-05  0:14     ` Eduard Zingerman
  0 siblings, 0 replies; 10+ messages in thread
From: Eduard Zingerman @ 2025-03-05  0:14 UTC (permalink / raw)
  To: Amery Hung
  Cc: bpf, daniel, andrii, alexei.starovoitov, martin.lau, kernel-team

On Tue, 2025-03-04 at 15:39 -0800, Amery Hung wrote:
> On Tue, Mar 4, 2025 at 1:48 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > 
> > On Tue, 2025-03-04 at 08:36 -0800, Amery Hung wrote:
> > > There is no need to call a bunch of stdio_restore() in test_progs if the
> > > scope of stdio redirection is reduced to what it needs to be: only
> > > hijacking tests/subtests' stdio.
> > > 
> > > Also remove an unnecessary check of env.stdout_saved in the crash handler.
> > > 
> > > Signed-off-by: Amery Hung <ameryhung@gmail.com>
> > > ---
> > 
> > If anyone else would look at this commit, here is an alternative
> > description:
> > - functions reset_affinity() and restore_netns() are only called from
> >   run_one_test();
> > - beside other places stdio_restore() is called from reset_affinity(),
> >   restore_netns() and run_one_test() itself;
> > - this commit moves stdio_restore() call in run_one_test() so that
> >   it executes before reset_affinity() and restore_netns().
> > 
> > Acked-by: Eduard Zingerman <eddyz87@gmail.com>
> > 
> 
> I can improve the commit message in the next respin.

If there would be a respin, otherwise no need to worry.

[...]

> > > @@ -1943,6 +1938,9 @@ int main(int argc, char **argv)
> > > 
> > >       sigaction(SIGSEGV, &sigact, NULL);
> > > 
> > > +     env.stdout_saved = stdout;
> > > +     env.stderr_saved = stderr;
> > > +
> > 
> > Nit: why moving these?
> 
> If we assign env.stdout_saved at the very beginning, crash_handler()
> can just call stdio_restore() without checking if env.stdout_saved is
> set or not.

Understood, thank you.

[...]


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next v4 3/3] selftests/bpf: Fix dangling stdout seen by traffic monitor thread
  2025-03-04 16:36 ` [PATCH bpf-next v4 3/3] selftests/bpf: Fix dangling stdout seen by traffic monitor thread Amery Hung
  2025-03-05  0:12   ` Eduard Zingerman
@ 2025-03-05  1:36   ` Martin KaFai Lau
  2025-03-05 16:52     ` Amery Hung
  1 sibling, 1 reply; 10+ messages in thread
From: Martin KaFai Lau @ 2025-03-05  1:36 UTC (permalink / raw)
  To: Amery Hung
  Cc: daniel, andrii, alexei.starovoitov, martin.lau, kernel-team, bpf

On 3/4/25 8:36 AM, Amery Hung wrote:
> diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> index ab0f2fed3c58..5b89f6ca5a0a 100644
> --- a/tools/testing/selftests/bpf/test_progs.c
> +++ b/tools/testing/selftests/bpf/test_progs.c
> @@ -88,7 +88,11 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt)
>   #endif
>   }
>   
> -static void stdio_restore_cleanup(void)
> +static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER;
> +
> +static bool in_crash_handler(void);
> +
> +static void stdio_restore(void)
>   {
>   #ifdef __GLIBC__
>   	if (verbose() && env.worker_id == -1) {
> @@ -98,34 +102,34 @@ static void stdio_restore_cleanup(void)
>   
>   	fflush(stdout);
>   
> -	if (env.subtest_state) {
> +	pthread_mutex_lock(&stdout_lock);
> +
> +	if (!env.subtest_state || in_crash_handler()) {

Can the stdio restore be done in the crash_handler() itself instead of having a 
special case here and adding another in_crash_handler()?

Theoretically, the crash_handler() only needs to
fflush(stdout /* whatever the current stdout is */) and...

> +		if (stdout == env.stdout_saved)
> +			goto out;
> +
> +		fclose(env.test_state->stdout_saved);
> +		env.test_state->stdout_saved = NULL;
> +		stdout = env.stdout_saved;
> +		stderr = env.stderr_saved;

... restore std{out,err} = env.std{out,err}_saved.

At the crash point, it does not make a big difference to 
fclose(evn.test_state->stdout_saved) or not?

If the crash_handler() does not close the stdout that the traffic monitor might 
potentially be using, then crash_handler() does not need to take mutex, right?

> +	} else {
>   		fclose(env.subtest_state->stdout_saved);
>   		env.subtest_state->stdout_saved = NULL;
>   		stdout = env.test_state->stdout_saved;
>   		stderr = env.test_state->stdout_saved;
> -	} else {
> -		fclose(env.test_state->stdout_saved);
> -		env.test_state->stdout_saved = NULL;
>   	}
> +out:
> +	pthread_mutex_unlock(&stdout_lock);
>   #endif
>   }
>   

[ ... ]

> +static bool in_crash_handler(void)
> +{
> +	struct sigaction sigact;
> +
> +	/* sa_handler will be cleared if invoked since crash_handler is
> +	 * registered with SA_RESETHAND
> +	 */
> +	sigaction(SIGSEGV, NULL, &sigact);
> +
> +	return sigact.sa_handler != crash_handler;
> +}
> +

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next v4 3/3] selftests/bpf: Fix dangling stdout seen by traffic monitor thread
  2025-03-05  1:36   ` Martin KaFai Lau
@ 2025-03-05 16:52     ` Amery Hung
  0 siblings, 0 replies; 10+ messages in thread
From: Amery Hung @ 2025-03-05 16:52 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: daniel, andrii, alexei.starovoitov, martin.lau, kernel-team, bpf

On Tue, Mar 4, 2025 at 5:36 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 3/4/25 8:36 AM, Amery Hung wrote:
> > diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> > index ab0f2fed3c58..5b89f6ca5a0a 100644
> > --- a/tools/testing/selftests/bpf/test_progs.c
> > +++ b/tools/testing/selftests/bpf/test_progs.c
> > @@ -88,7 +88,11 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt)
> >   #endif
> >   }
> >
> > -static void stdio_restore_cleanup(void)
> > +static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER;
> > +
> > +static bool in_crash_handler(void);
> > +
> > +static void stdio_restore(void)
> >   {
> >   #ifdef __GLIBC__
> >       if (verbose() && env.worker_id == -1) {
> > @@ -98,34 +102,34 @@ static void stdio_restore_cleanup(void)
> >
> >       fflush(stdout);
> >
> > -     if (env.subtest_state) {
> > +     pthread_mutex_lock(&stdout_lock);
> > +
> > +     if (!env.subtest_state || in_crash_handler()) {
>
> Can the stdio restore be done in the crash_handler() itself instead of having a
> special case here and adding another in_crash_handler()?
>
> Theoretically, the crash_handler() only needs to
> fflush(stdout /* whatever the current stdout is */) and...
>
> > +             if (stdout == env.stdout_saved)
> > +                     goto out;
> > +
> > +             fclose(env.test_state->stdout_saved);
> > +             env.test_state->stdout_saved = NULL;
> > +             stdout = env.stdout_saved;
> > +             stderr = env.stderr_saved;
>
> ... restore std{out,err} = env.std{out,err}_saved.
>
> At the crash point, it does not make a big difference to
> fclose(evn.test_state->stdout_saved) or not?
>
> If the crash_handler() does not close the stdout that the traffic monitor might
> potentially be using, then crash_handler() does not need to take mutex, right?
>

You are right. I think it is simpler to not let stdio_restore() handle
the crash case, and just do the following in the crash handler.

fflush(stdout);
stdout = env.stdout_saved;
stderr = env.stderr_saved;


> > +     } else {
> >               fclose(env.subtest_state->stdout_saved);
> >               env.subtest_state->stdout_saved = NULL;
> >               stdout = env.test_state->stdout_saved;
> >               stderr = env.test_state->stdout_saved;
> > -     } else {
> > -             fclose(env.test_state->stdout_saved);
> > -             env.test_state->stdout_saved = NULL;
> >       }
> > +out:
> > +     pthread_mutex_unlock(&stdout_lock);
> >   #endif
> >   }
> >
>
> [ ... ]
>
> > +static bool in_crash_handler(void)
> > +{
> > +     struct sigaction sigact;
> > +
> > +     /* sa_handler will be cleared if invoked since crash_handler is
> > +      * registered with SA_RESETHAND
> > +      */
> > +     sigaction(SIGSEGV, NULL, &sigact);
> > +
> > +     return sigact.sa_handler != crash_handler;
> > +}
> > +

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next v4 3/3] selftests/bpf: Fix dangling stdout seen by traffic monitor thread
  2025-03-05  0:12   ` Eduard Zingerman
@ 2025-03-05 16:52     ` Amery Hung
  0 siblings, 0 replies; 10+ messages in thread
From: Amery Hung @ 2025-03-05 16:52 UTC (permalink / raw)
  To: Eduard Zingerman
  Cc: bpf, daniel, andrii, alexei.starovoitov, martin.lau, kernel-team

On Tue, Mar 4, 2025 at 4:12 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Tue, 2025-03-04 at 08:36 -0800, Amery Hung wrote:
> > Traffic monitor thread may see dangling stdout as the main thread closes
> > and reassigns stdout without protection. This happens when the main thread
> > finishes one subtest and moves to another one in the same netns_new()
> > scope.
> > The issue can be reproduced by running test_progs repeatedly with traffic
> > monitor enabled:
> >
> > for ((i=1;i<=100;i++)); do
> >    ./test_progs -a flow_dissector_skb* -m '*'
> > done
> >
> > Fix it by first consolidating stdout assignment into stdio_restore().
> > stdout will be restored to env.stdout_saved when a test ends or running
> > in the crash handler and to test_state.stdout_saved otherwise.
> > Then, protect use/close/reassignment of stdout with a lock. The locking
> > in the main thread is always performed regradless of whether traffic
> > monitor is running or not for simplicity. It won't have any side-effect.
> > stdio_restore() is kept in the crash handler instead of making all print
> > functions in the crash handler use env.stdout_saved to make it less
> > error-prone.
> >
> > Signed-off-by: Amery Hung <ameryhung@gmail.com>
> > ---
>
> This patch fixes the error for me.
>
> Tested-by: Eduard Zingerman <eddyz87@gmail.com>
>
> >  tools/testing/selftests/bpf/test_progs.c | 59 ++++++++++++++++--------
> >  1 file changed, 39 insertions(+), 20 deletions(-)
> >
> > diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> > index ab0f2fed3c58..5b89f6ca5a0a 100644
> > --- a/tools/testing/selftests/bpf/test_progs.c
> > +++ b/tools/testing/selftests/bpf/test_progs.c
> > @@ -88,7 +88,11 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt)
> >  #endif
> >  }
> >
> > -static void stdio_restore_cleanup(void)
> > +static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER;
> > +
> > +static bool in_crash_handler(void);
> > +
> > +static void stdio_restore(void)
> >  {
> >  #ifdef __GLIBC__
> >       if (verbose() && env.worker_id == -1) {
> > @@ -98,34 +102,34 @@ static void stdio_restore_cleanup(void)
> >
> >       fflush(stdout);
> >
> > -     if (env.subtest_state) {
> > +     pthread_mutex_lock(&stdout_lock);
> > +
> > +     if (!env.subtest_state || in_crash_handler()) {
> > +             if (stdout == env.stdout_saved)
> > +                     goto out;
> > +
> > +             fclose(env.test_state->stdout_saved);
> > +             env.test_state->stdout_saved = NULL;
> > +             stdout = env.stdout_saved;
> > +             stderr = env.stderr_saved;
> > +     } else {
> >               fclose(env.subtest_state->stdout_saved);
> >               env.subtest_state->stdout_saved = NULL;
> >               stdout = env.test_state->stdout_saved;
> >               stderr = env.test_state->stdout_saved;
> > -     } else {
> > -             fclose(env.test_state->stdout_saved);
> > -             env.test_state->stdout_saved = NULL;
> >       }
> > +out:
> > +     pthread_mutex_unlock(&stdout_lock);
> >  #endif
> >  }
>
> stdio_restore_cleanup() did not reset stderr/stdout when
> env.subtest_state was NULL, but this difference does not seem to
> matter, stdio_restore_cleanup() was called from:
> - test__start_subtest(), where stdio_hijack_init() would override
>   stderr/stdout anyways.
> - run_one_test(), where it is followed by call to stdio_restore().
>
> I think this change is Ok.
>
> [...]
>
> > @@ -1276,6 +1281,18 @@ void crash_handler(int signum)
> >       backtrace_symbols_fd(bt, sz, STDERR_FILENO);
> >  }
> >
> > +static bool in_crash_handler(void)
> > +{
> > +     struct sigaction sigact;
> > +
> > +     /* sa_handler will be cleared if invoked since crash_handler is
> > +      * registered with SA_RESETHAND
> > +      */
> > +     sigaction(SIGSEGV, NULL, &sigact);
> > +
> > +     return sigact.sa_handler != crash_handler;
> > +}
> > +
>
> The patch would be simpler w/o this function. I double checked
> functions called from crash_handler() and two 'fprintf(stderr, ...)'
> there are the only places where stderr/stdout is used instead of
> *_saved versions. It is already a prevalent pattern to do
> 'fprintf(env.stderr_saved, ...)' in this file.
> Or pass a flag as in v3?

While that is a common pattern, I think restoring stdio there can make
sure people don't accidentally use stdout in the crash handler and
find that it does not work occasionally.

I will remove the in_crash_handler special case handling per Martin's
suggestion.


>
> >  void hexdump(const char *prefix, const void *buf, size_t len)
> >  {
> >       for (int i = 0; i < len; i++) {
> > @@ -1957,6 +1974,8 @@ int main(int argc, char **argv)
>
> [...]
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2025-03-05 16:52 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-04 16:36 [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore() Amery Hung
2025-03-04 16:36 ` [PATCH bpf-next v4 2/3] selftests/bpf: Allow assigning traffic monitor print function Amery Hung
2025-03-04 16:36 ` [PATCH bpf-next v4 3/3] selftests/bpf: Fix dangling stdout seen by traffic monitor thread Amery Hung
2025-03-05  0:12   ` Eduard Zingerman
2025-03-05 16:52     ` Amery Hung
2025-03-05  1:36   ` Martin KaFai Lau
2025-03-05 16:52     ` Amery Hung
2025-03-04 21:48 ` [PATCH bpf-next v4 1/3] selftests/bpf: Clean up call sites of stdio_restore() Eduard Zingerman
2025-03-04 23:39   ` Amery Hung
2025-03-05  0:14     ` Eduard Zingerman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox