public inbox for dtrace@lists.linux.dev
 help / color / mirror / Atom feed
* [PATCH 1/2] Fix probe discovery loop
@ 2026-03-01 22:39 eugene.loh
  2026-03-01 22:39 ` [PATCH 2/2] test: Adjust sync timing eugene.loh
  0 siblings, 1 reply; 2+ messages in thread
From: eugene.loh @ 2026-03-01 22:39 UTC (permalink / raw)
  To: dtrace, dtrace-devel

From: Eugene Loh <eugene.loh@oracle.com>

In dt_provider_discover(), we loop over providers to discover new
probes.  For each existing provider whose implementation has a discovery
function, we call the function, thereby possibly adding new providers.

This has at least two problems:

  x The iterator is not designed to handle new table elements
    being added during the loop.

  x New providers that are added will presumably use the same
    implementation that discovered them.  So, new providers are
    simply adding redundant discovery functions.

While neither of these problems apparently causes any errors, the result
is a messy loop that leads potentially to unnecessary probe discovery.

Add dtp->dt_discs, a linked list of provider discovery functions.

Add functions to init and free this linked list.

Replace the iteration over a growing list of providers with an iteration
over the small, fixed list of provider discovery functions.

Signed-off-by: Eugene Loh <eugene.loh@oracle.com>
---
 libdtrace/dt_impl.h     |  1 +
 libdtrace/dt_open.c     |  6 ++++++
 libdtrace/dt_provider.c | 45 +++++++++++++++++++++++++++++++++++------
 libdtrace/dt_provider.h |  2 ++
 4 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index 5282efbdb..ff730319b 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -333,6 +333,7 @@ struct dtrace_hdl {
 	struct dt_probe *dt_error; /* ERROR probe */
 
 	dt_htab_t *dt_provs;	/* hash table of dt_provider_t's */
+	dt_list_t dt_discs;	/* linked list of provider discovery functions */
 	const struct dt_provider *dt_prov_pid; /* PID provider */
 	int dt_proc_signal;	/* signal used to interrupt monitoring threads */
 	struct sigaction dt_proc_oact;
diff --git a/libdtrace/dt_open.c b/libdtrace/dt_open.c
index a45908047..224f60cf7 100644
--- a/libdtrace/dt_open.c
+++ b/libdtrace/dt_open.c
@@ -1250,6 +1250,11 @@ dtrace_init(dtrace_hdl_t *dtp)
 			   dt_providers[i]->name);
 	}
 
+	/*
+	 * Initialize the list of provider discovery functions.
+	 */
+	dt_provider_discs_init(dtp);
+
 	return 0;
 }
 
@@ -1342,6 +1347,7 @@ dtrace_close(dtrace_hdl_t *dtp)
 	dt_probe_fini(dtp);
 
 	dt_htab_destroy(dtp->dt_provs);
+	dt_provider_discs_free(dtp);
 
 	for (i = 1; i < dtp->dt_cpp_argc; i++)
 		free(dtp->dt_cpp_argv[i]);
diff --git a/libdtrace/dt_provider.c b/libdtrace/dt_provider.c
index 079265b72..f7f76f9c2 100644
--- a/libdtrace/dt_provider.c
+++ b/libdtrace/dt_provider.c
@@ -231,18 +231,51 @@ dt_provider_xref(dtrace_hdl_t *dtp, dt_provider_t *pvp, id_t id)
 	return 0;
 }
 
-int
-dt_provider_discover(dtrace_hdl_t *dtp)
+typedef struct list_disc {
+	dt_list_t	list;
+	int		(*disc)(dtrace_hdl_t *dtp);
+} list_disc_t;
+
+void
+dt_provider_discs_init(dtrace_hdl_t *dtp)
 {
-	int		prid = dtp->dt_probe_id;
 	dt_htab_next_t	*it = NULL;
 	dt_provider_t	*pvp;
 
-	/* Discover new probes. */
 	while ((pvp = dt_htab_next(dtp->dt_provs, &it)) != NULL) {
-		if (pvp->impl->discover && pvp->impl->discover(dtp) < 0)
-			return -1;        /* errno is already set */
+		list_disc_t *disc;
+
+		if (pvp->impl->discover == NULL)
+			continue;
+		disc = dt_alloc(dtp, sizeof(list_disc_t));
+		if (disc == NULL)
+			return;
+		disc->disc = pvp->impl->discover;
+		dt_list_append(&dtp->dt_discs, disc);
 	}
+}
+
+void
+dt_provider_discs_free(dtrace_hdl_t *dtp)
+{
+	list_disc_t *disc;
+
+	while ((disc = dt_list_next(&dtp->dt_discs)) != NULL) {
+		dt_list_delete(&dtp->dt_discs, disc);
+		dt_free(dtp, disc);
+	}
+}
+
+int
+dt_provider_discover(dtrace_hdl_t *dtp)
+{
+	int		prid = dtp->dt_probe_id;
+	list_disc_t	*disc;
+
+	/* Discover new probes. */
+	for (disc = dt_list_next(&dtp->dt_discs); disc; disc = dt_list_next(disc))
+		if (disc->disc(dtp) < 0)
+			return -1;
 
 	/* Add them. */
 	for ( ; prid < dtp->dt_probe_id; prid++) {
diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
index dc1266711..0f8c13a00 100644
--- a/libdtrace/dt_provider.h
+++ b/libdtrace/dt_provider.h
@@ -120,6 +120,8 @@ extern dt_provider_t *dt_provider_create(dtrace_hdl_t *, const char *,
 					 const dtrace_pattr_t *, void *);
 extern int dt_provider_xref(dtrace_hdl_t *, dt_provider_t *, id_t);
 extern int dt_provider_discover(dtrace_hdl_t *dtp);
+extern void dt_provider_discs_init(dtrace_hdl_t *dtp);
+extern void dt_provider_discs_free(dtrace_hdl_t *dtp);
 
 #ifdef	__cplusplus
 }
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH 2/2] test: Adjust sync timing
  2026-03-01 22:39 [PATCH 1/2] Fix probe discovery loop eugene.loh
@ 2026-03-01 22:39 ` eugene.loh
  0 siblings, 0 replies; 2+ messages in thread
From: eugene.loh @ 2026-03-01 22:39 UTC (permalink / raw)
  To: dtrace, dtrace-devel

From: Eugene Loh <eugene.loh@oracle.com>

The nusdtprobes test was failing rather consistently on systems with
very large CPU counts.

The main problem was that dtrace programs with aggregations take a long
time to start up on such systems.  In particular, gmap_create_aggs()
tries to create an aggs_$cpu BPF map for each CPU, spending (in
measurements on one system) about 0.004 sec for each dt_bpf_map_create()
and 0.011 sec for each dt_bpf_map_update().  When there are hundreds of
CPUs, the time to start the job up increases by, for example, 5-6
seconds.  The test waits for dtrace to start up by checking "-e" on the
output file, but the aggs_$cpu delay occurs after the file is created.

Add a BEGIN clause to write to the output file.  Replace the "-e" test
with "-s".

The test has a sleep before starting a team of processes.  There is
perhaps no reason for this wait.  Nonetheless, we leave that sleep,
simply reducing the time to 1 second.

There is also a delay between launching processes and tracing them:

  x dtprobed sees newly launched processes rather quickly
  x dtrace tries discovery 1x/second
  x discovery is rather fast
  x starting a newly discovered probe is ~ 0.011sec (on one system)

For 40 probes, that last step is about 0.5 sec, but the total "sleep 3"
in the test before killing the new processes is apparently sufficient.

Make the corresponding "-e" to "-s" change in similar tests, even if
they have not shown the failure pattern.

Signed-off-by: Eugene Loh <eugene.loh@oracle.com>
---
 test/unittest/usdt/tst.defer-Z-basic.sh | 10 +++++++---
 test/unittest/usdt/tst.defer-Z.sh       | 10 +++++++---
 test/unittest/usdt/tst.defer.sh         | 10 +++++++---
 test/unittest/usdt/tst.nusdtprobes.sh   | 10 +++++++---
 4 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/test/unittest/usdt/tst.defer-Z-basic.sh b/test/unittest/usdt/tst.defer-Z-basic.sh
index 61adb4601..fad91e5fe 100755
--- a/test/unittest/usdt/tst.defer-Z-basic.sh
+++ b/test/unittest/usdt/tst.defer-Z-basic.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Oracle Linux DTrace.
-# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at
 # http://oss.oracle.com/licenses/upl.
 #
@@ -22,6 +22,10 @@ cp $trigger main
 
 # Start dtrace.
 $dtrace $dt_flags -Zq -o dtrace.out -n '
+BEGIN
+{
+	printf("BEGIN\n");
+}
 testprov*:::foo,
 testprov*:::bar
 {
@@ -33,7 +37,7 @@ dtpid=$!
 iter=$((timeout / 2))
 while [ $iter -gt 0 ]; do
 	sleep 1
-	if [ -e dtrace.out ]; then
+	if [ -s dtrace.out ]; then
 		break
 	fi
 	iter=$((iter - 1))
@@ -79,7 +83,7 @@ if ! diff -q main.out.post main.out.expected; then
 fi
 
 # Regularize the DTrace output, and check it.
-awk 'NF > 0 { map[$2 " " $1]++; }
+awk 'NF > 1 { map[$2 " " $1]++; }
      END { for (i in map) printf "%s %d\n", i, map[i]; }' dtrace.out > dtrace.out.post
 
 echo "$tpid main:bar 10" > dtrace.out.expected
diff --git a/test/unittest/usdt/tst.defer-Z.sh b/test/unittest/usdt/tst.defer-Z.sh
index ff2c5cbf1..db49cd583 100755
--- a/test/unittest/usdt/tst.defer-Z.sh
+++ b/test/unittest/usdt/tst.defer-Z.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Oracle Linux DTrace.
-# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at
 # http://oss.oracle.com/licenses/upl.
 #
@@ -29,6 +29,10 @@ cp $trigger main
 # Start dtrace.
 
 $dtrace $dt_flags -Zwq -o dtrace.out -n '
+BEGIN
+{
+	printf("BEGIN\n");
+}
 testprov*:::foo
 {
 	raise(SIGUSR1);
@@ -51,7 +55,7 @@ dtpid=$!
 iter=$((timeout / 2))
 while [ $iter -gt 0 ]; do
 	sleep 1
-	if [ -e dtrace.out ]; then
+	if [ -s dtrace.out ]; then
 		break
 	fi
 	iter=$((iter - 1))
@@ -128,7 +132,7 @@ done
 # Check the dtrace output.
 
 #     regularize the dtrace output
-awk 'NF != 0 { print $1, $2, $3 }' dtrace.out | sort > dtrace.out.post
+awk 'NF == 3 { print $1, $2, $3 }' dtrace.out | sort > dtrace.out.post
 
 #     determine what to expect
 
diff --git a/test/unittest/usdt/tst.defer.sh b/test/unittest/usdt/tst.defer.sh
index 073af12d5..a1c29571c 100755
--- a/test/unittest/usdt/tst.defer.sh
+++ b/test/unittest/usdt/tst.defer.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Oracle Linux DTrace.
-# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at
 # http://oss.oracle.com/licenses/upl.
 #
@@ -39,6 +39,10 @@ lastdigit=$((${pids[0]} % 10))
 # Start dtrace.
 
 $dtrace $dt_flags -wq -o dtrace.out -n '
+BEGIN
+{
+	printf("BEGIN\n");
+}
 testprov*:::foo
 {
 	raise(SIGUSR1);
@@ -57,7 +61,7 @@ dtpid=$!
 iter=$((timeout / 2))
 while [ $iter -gt 0 ]; do
 	sleep 1
-	if [ -e dtrace.out ]; then
+	if [ -s dtrace.out ]; then
 		break
 	fi
 	iter=$((iter - 1))
@@ -135,7 +139,7 @@ done
 # Check the dtrace output.
 
 #     regularize the dtrace output
-awk 'NF != 0 { print $1, $2, $3 }' dtrace.out | sort > dtrace.out.post
+awk 'NF == 3 { print $1, $2, $3 }' dtrace.out | sort > dtrace.out.post
 
 #     determine what to expect
 
diff --git a/test/unittest/usdt/tst.nusdtprobes.sh b/test/unittest/usdt/tst.nusdtprobes.sh
index 93c56e382..5e3a49724 100755
--- a/test/unittest/usdt/tst.nusdtprobes.sh
+++ b/test/unittest/usdt/tst.nusdtprobes.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Oracle Linux DTrace.
-# Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at
 # http://oss.oracle.com/licenses/upl.
 #
@@ -100,6 +100,10 @@ for nusdt in "" "-xnusdtprobes=40" "-xnusdtprobes=39"; do
 
 	rm -f dtrace.out
 	$dtrace $dt_flags $nusdt -Zq -o dtrace.out -n '
+	BEGIN
+	{
+		printf("BEGIN\n");
+	}
 	testprov*:::
 	{
 		@[probeprov, probemod, probefunc, probename] = count();
@@ -111,7 +115,7 @@ for nusdt in "" "-xnusdtprobes=40" "-xnusdtprobes=39"; do
 	iter=$((timeout / 4))
 	while [ $iter -gt 0 ]; do
 		sleep 1
-		if [ -e dtrace.out ]; then
+		if [ -s dtrace.out ]; then
 			break
 		fi
 		iter=$((iter - 1))
@@ -127,7 +131,7 @@ for nusdt in "" "-xnusdtprobes=40" "-xnusdtprobes=39"; do
 	rm -f check.txt
 	for (( iteam = 0; iteam < $nteams; iteam++ )); do
 		# Start the team, writing out expected output.
-		sleep 2
+		sleep 1
 		for (( immbr = 0; immbr < $nmmbrs; immbr++ )); do
 			./main &
 			pids[$immbr]=$!
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-03-01 22:39 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-01 22:39 [PATCH 1/2] Fix probe discovery loop eugene.loh
2026-03-01 22:39 ` [PATCH 2/2] test: Adjust sync timing eugene.loh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox