* [PATCH v3 2/2] Extend the USDT bit mask to multiple words
@ 2025-07-23 0:53 eugene.loh
2025-08-19 21:07 ` Kris Van Hees
0 siblings, 1 reply; 2+ messages in thread
From: eugene.loh @ 2025-07-23 0:53 UTC (permalink / raw)
To: dtrace, dtrace-devel
From: Eugene Loh <eugene.loh@oracle.com>
Currently, USDT is limited to 64 probe descriptions since the
underlying probe uses a 64-bit mask to decide which probes to execute.
Change to a multi-word bit mask that can be extended to however many
probe descriptions there are.
Also, change the mask words to be 32-bit rather than 64-bit. The reason
is that, commonly, there will be fewer than 32 probe descriptions. In
this case, we shorten the value of the "USDT prids" BPF map from 16 bytes
uint32_t prid;
long long mask[1];
down to 8 bytes
uint32_t prid;
uint32_t mask[1];
(The second member is smaller and no longer costs extra padding.)
We also add an
int dt_usdt_mask_bytes;
to denote how many bytes will be needed for the mask. This value is
computed by usdt_mask_bytes_init(). Currently, this function is
awkwardly called in gmap_create_usdt(), just before the value is needed.
Such a call to a provider-specific function is clumsy, but there are no
other calls to the provider between compilation (where the number of
statements is determined) and this map creation.
Signed-off-by: Eugene Loh <eugene.loh@oracle.com>
---
libdtrace/dt_bpf.c | 6 +-
libdtrace/dt_bpf_maps.h | 4 +-
libdtrace/dt_impl.h | 1 +
libdtrace/dt_prov_uprobe.c | 87 +++++++++---
.../unittest/usdt/tst.manyprobedescriptions.r | 1 +
.../usdt/tst.manyprobedescriptions.sh | 64 +++++++++
.../usdt/tst.manyprobedescriptions2.r | 1 +
.../usdt/tst.manyprobedescriptions2.sh | 127 ++++++++++++++++++
8 files changed, 267 insertions(+), 24 deletions(-)
create mode 100644 test/unittest/usdt/tst.manyprobedescriptions.r
create mode 100755 test/unittest/usdt/tst.manyprobedescriptions.sh
create mode 100644 test/unittest/usdt/tst.manyprobedescriptions2.r
create mode 100755 test/unittest/usdt/tst.manyprobedescriptions2.sh
diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
index ddd849d0b..fcb53f044 100644
--- a/libdtrace/dt_bpf.c
+++ b/libdtrace/dt_bpf.c
@@ -967,6 +967,7 @@ gmap_create_probes(dtrace_hdl_t *dtp)
return 0;
}
+void usdt_mask_bytes_init(dtrace_hdl_t *dtp);
/*
* Create the 'usdt_names' and 'usdt_prids' BPF maps.
*
@@ -992,8 +993,11 @@ gmap_create_usdt(dtrace_hdl_t *dtp)
if (dtp->dt_usdt_namesmap_fd == -1)
return -1;
+ usdt_mask_bytes_init(dtp);
+
dtp->dt_usdt_pridsmap_fd = create_gmap(dtp, "usdt_prids", BPF_MAP_TYPE_HASH,
- sizeof(usdt_prids_map_key_t), sizeof(usdt_prids_map_val_t), nusdtprobes);
+ sizeof(usdt_prids_map_key_t),
+ sizeof(usdt_prids_map_val_t) + dtp->dt_usdt_mask_bytes, nusdtprobes);
if (dtp->dt_usdt_pridsmap_fd == -1)
return -1;
diff --git a/libdtrace/dt_bpf_maps.h b/libdtrace/dt_bpf_maps.h
index 884dc3983..ec5d4d7b1 100644
--- a/libdtrace/dt_bpf_maps.h
+++ b/libdtrace/dt_bpf_maps.h
@@ -1,6 +1,6 @@
/*
* Oracle Linux DTrace.
- * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
@@ -48,7 +48,7 @@ typedef struct usdt_prids_map_key {
} usdt_prids_map_key_t;
typedef struct usdt_prids_map_val {
uint32_t prid; /* should be dtrace_id_t, sys/dtrace_types.h */
- long long mask;
+ uint32_t mask[];
} usdt_prids_map_val_t;
#ifdef __cplusplus
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index 2adc1252b..8bbc4dc1f 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -397,6 +397,7 @@ struct dtrace_hdl {
int dt_cpumap_fd; /* file descriptor for the 'cpuinfo' BPF map */
int dt_usdt_pridsmap_fd; /* file descriptor for the 'usdt_prids' BPF map */
int dt_usdt_namesmap_fd; /* file descriptor for the 'usdt_names' BPF map */
+ int dt_usdt_mask_bytes; /* size of USDT mask in bytes */
dtrace_handle_err_f *dt_errhdlr; /* error handler, if any */
void *dt_errarg; /* error handler argument */
dtrace_handle_drop_f *dt_drophdlr; /* drop handler, if any */
diff --git a/libdtrace/dt_prov_uprobe.c b/libdtrace/dt_prov_uprobe.c
index e8f9f8c98..8b55fe319 100644
--- a/libdtrace/dt_prov_uprobe.c
+++ b/libdtrace/dt_prov_uprobe.c
@@ -403,7 +403,7 @@ clean_usdt_probes(dtrace_hdl_t *dtp)
int fdprids = dtp->dt_usdt_pridsmap_fd;
int fdnames = dtp->dt_usdt_namesmap_fd;
usdt_prids_map_key_t key, nxt;
- usdt_prids_map_val_t val;
+ usdt_prids_map_val_t *val = alloca(sizeof(usdt_prids_map_val_t) + dtp->dt_usdt_mask_bytes);
list_key_t keys_to_delete, *elem, *elem_next;
dt_probe_t *prp, *prp_next;
@@ -418,7 +418,7 @@ clean_usdt_probes(dtrace_hdl_t *dtp)
while (dt_bpf_map_next_key(fdprids, &key, &nxt) == 0) {
memcpy(&key, &nxt, sizeof(usdt_prids_map_key_t));
- if (dt_bpf_map_lookup(fdprids, &key, &val) == -1)
+ if (dt_bpf_map_lookup(fdprids, &key, val) == -1)
return dt_set_errno(dtp, EDT_BPF);
/* Check if the process is still running. */
@@ -431,7 +431,7 @@ clean_usdt_probes(dtrace_hdl_t *dtp)
* we might delete the same usdt_names entry
* multiple times. That's okay.
*/
- dt_bpf_map_delete(fdnames, &val.prid);
+ dt_bpf_map_delete(fdnames, &val->prid);
/*
* Delete the usdt_prids entry.
@@ -452,7 +452,7 @@ clean_usdt_probes(dtrace_hdl_t *dtp)
* FIXME. There might be another case, where the process
* is still running, but some of its USDT probes are gone?
* So maybe we have to check for the existence of one of
- * dtrace_probedesc_t *pdp = dtp->dt_probes[val.prid]->desc;
+ * dtrace_probedesc_t *pdp = dtp->dt_probes[val->prid]->desc;
* char *prv = ...pdp->prv minus the numerial part;
*
* /run/dtrace/probes/$pid/$pdp->prv/$pdp->mod/$pdp->fun/$pdp->prb
@@ -590,6 +590,31 @@ static void usdt_error(dt_pcb_t *pcb, const char *fmt, ...)
longjmp(pcb->pcb_jmpbuf, EDT_COMPILER);
}
+void usdt_mask_bytes_init(dtrace_hdl_t *dtp)
+{
+ int i, n = 0, w = sizeof(((usdt_prids_map_val_t *)0)->mask[0]);
+
+ /* Count how many statements cannot be ignored, regardless of uprp. */
+ for (i = 0; i < dtp->dt_stmt_nextid; i++) {
+ dtrace_stmtdesc_t *stp;
+
+ stp = dtp->dt_stmts[i];
+ if (stp == NULL || ignore_clause(dtp, i, NULL))
+ continue;
+
+ n++;
+ }
+
+ /* Determine how many bytes are needed for this many bits. */
+ n = (n + CHAR_BIT - 1) / CHAR_BIT;
+
+ /* Determine how many words are needed for this many bytes. */
+ n = (n + w - 1) / w;
+
+ /* Determine how many bytes are needed. */
+ dtp->dt_usdt_mask_bytes = (n ? n : 1) * w;
+}
+
static int add_probe_uprobe(dtrace_hdl_t *dtp, dt_probe_t *prp)
{
dtrace_difo_t *dp;
@@ -651,6 +676,7 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
int fd = dtp->dt_usdt_namesmap_fd;
pid_t pid;
list_probe_t *pup;
+ usdt_prids_map_val_t *val;
/* Add probe name elements to usdt_names map. */
p = probnam;
@@ -686,11 +712,11 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
}
/* Add prid and bit mask to usdt_prids map. */
+ val = alloca(sizeof(usdt_prids_map_val_t) + dtp->dt_usdt_mask_bytes);
for (pup = prp->prv_data; pup != NULL; pup = dt_list_next(pup)) {
dt_probe_t *uprp = pup->probe;
- long long mask = 0, bit = 1;
+ uint32_t iword = 0, mask = 0, bit = 1;
usdt_prids_map_key_t key;
- usdt_prids_map_val_t val;
dt_uprobe_t *upp = uprp->prv_data;
/*
@@ -704,15 +730,24 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
if (uprp->prov->impl == &dt_uprobe && !(upp->flags & PP_IS_ENABLED)) {
int n;
+ /*
+ * The loop over n to dtp->dt_stmt_nextid, skipping
+ * ignore_clause(), should be the same here as in
+ * the trampoline.
+ */
for (n = 0; n < dtp->dt_stmt_nextid; n++) {
dtrace_stmtdesc_t *stp;
stp = dtp->dt_stmts[n];
- if (stp == NULL)
+ if (stp == NULL || ignore_clause(dtp, n, uprp))
continue;
- if (ignore_clause(dtp, n, uprp))
- continue;
+ if (bit == 0) {
+ val->mask[iword] = mask;
+ mask = 0;
+ iword++;
+ bit = 1;
+ }
if (dt_gmatch(prp->desc->prv, stp->dtsd_ecbdesc->dted_probe.prv) &&
dt_gmatch(prp->desc->mod, stp->dtsd_ecbdesc->dted_probe.mod) &&
@@ -727,11 +762,11 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
key.pid = pid;
key.uprid = uprp->desc->id;
- val.prid = prp->desc->id;
- val.mask = mask;
+ val->prid = prp->desc->id;
+ val->mask[iword] = mask;
// FIXME Check return value, but how should errors be handled?
- dt_bpf_map_update(dtp->dt_usdt_pridsmap_fd, &key, &val);
+ dt_bpf_map_update(dtp->dt_usdt_pridsmap_fd, &key, val);
}
return 0;
@@ -1452,7 +1487,7 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
const list_probe_t *pop;
uint_t lbl_exit = pcb->pcb_exitlbl;
dt_ident_t *usdt_prids = dt_dlib_get_map(dtp, "usdt_prids");
- int n;
+ int n, ibit, w = CHAR_BIT * sizeof(((usdt_prids_map_val_t *)0)->mask[0]);
assert(usdt_prids != NULL);
@@ -1539,7 +1574,8 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
*/
assert(sizeof(usdt_prids_map_key_t) <= DT_STK_SLOT_SZ);
emit(dlp, BPF_STORE(BPF_W, BPF_REG_FP, DT_TRAMP_SP_SLOT(0), BPF_REG_0));
- emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DT_TRAMP_SP_SLOT(0) + (int)sizeof(pid_t), uprp->desc->id));
+ emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP,
+ DT_TRAMP_SP_SLOT(0) + (int)sizeof(pid_t), uprp->desc->id));
dt_cg_xsetx(dlp, usdt_prids, DT_LBL_NONE, BPF_REG_1, usdt_prids->di_id);
emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_SLOT(0)));
@@ -1573,8 +1609,8 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
emit(dlp, BPF_LOAD(BPF_W, BPF_REG_1, BPF_REG_0, 0));
emit(dlp, BPF_STORE(BPF_W, BPF_REG_7, DMST_PRID, BPF_REG_1));
- /* Read the bit mask from the table lookup in %r6. */ // FIXME someday, extend this past 64 bits
- emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_0, offsetof(usdt_prids_map_val_t, mask)));
+ /* Store the value key for reuse. */
+ emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DT_TRAMP_SP_SLOT(0), BPF_REG_0));
/*
* Apply arg mappings, if needed.
@@ -1588,21 +1624,29 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
/*
* Hold the bit mask in %r6 between clause calls.
*/
- for (n = 0; n < dtp->dt_stmt_nextid; n++) {
+ /*
+ * The loop over n to dtp->dt_stmt_nextid, skipping
+ * ignore_clause(), should be the same here as in
+ * add_probe_usdt().
+ */
+ for (ibit = n = 0; n < dtp->dt_stmt_nextid; n++) {
dtrace_stmtdesc_t *stp;
dt_ident_t *idp;
uint_t lbl_next;
stp = dtp->dt_stmts[n];
- if (stp == NULL)
- continue;
-
- if (ignore_clause(dtp, n, uprp))
+ if (stp == NULL || ignore_clause(dtp, n, uprp))
continue;
idp = stp->dtsd_clause;
lbl_next = dt_irlist_label(dlp);
+ /* Load the next word of the bit mask into %r6. */
+ if (ibit % w == 0) {
+ emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DT_TRAMP_SP_SLOT(0)));
+ emit(dlp, BPF_LOAD(BPF_W, BPF_REG_6, BPF_REG_0, offsetof(usdt_prids_map_val_t, mask[ibit / w])));
+ }
+
/* If the lowest %r6 bit is 0, skip over this clause. */
emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_6));
emit(dlp, BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 1));
@@ -1630,6 +1674,7 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
/* Right-shift %r6. */
emit(dlp, BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 1));
+ ibit++;
}
out:
diff --git a/test/unittest/usdt/tst.manyprobedescriptions.r b/test/unittest/usdt/tst.manyprobedescriptions.r
new file mode 100644
index 000000000..2e9ba477f
--- /dev/null
+++ b/test/unittest/usdt/tst.manyprobedescriptions.r
@@ -0,0 +1 @@
+success
diff --git a/test/unittest/usdt/tst.manyprobedescriptions.sh b/test/unittest/usdt/tst.manyprobedescriptions.sh
new file mode 100755
index 000000000..92a61d5b7
--- /dev/null
+++ b/test/unittest/usdt/tst.manyprobedescriptions.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+dtrace=$1
+TRIGGER=$PWD/test/triggers/usdt-tst-args
+
+DIRNAME="$tmpdir/usdt-many_probe_descriptions.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+# Construct the D scripts and output files.
+# We stick 80 probe descriptions in each of 3 scripts to test
+# USDT's ability to handle hundreds of probe descriptions.
+for d in 0 1 2; do
+for x in 00 01 02 03 04 05 06 07 08 09 \
+ 10 11 12 13 14 15 16 17 18 19 \
+ 20 21 22 23 24 25 26 27 28 29 \
+ 30 31 32 33 34 35 36 37 38 39 \
+ 40 41 42 43 44 45 46 47 48 49 \
+ 50 51 52 53 54 55 56 57 58 59 \
+ 60 61 62 63 64 65 66 67 68 69 \
+ 70 71 72 73 74 75 76 77 78 79 \
+; do
+ echo 'test_prov$target:::place { printf("'$d$x'\n"); }' >> D$d.d
+ echo $d$x >> expect.txt
+done
+done
+echo 'test_prov$target:::place { exit(0); }' >> D$d.d
+echo >> expect.txt
+
+# Run DTrace.
+
+$dtrace $dt_flags -c $TRIGGER -q -s D0.d -s D1.d -s D2.d >& actual.txt
+if [ $? -eq 0 ]; then
+ if diff -q expect.txt actual.txt > /dev/null; then
+ echo success
+ exit 0
+ else
+ echo ERROR: did not get expected results
+ echo === expect.txt
+ cat expect.txt
+ echo === actual.txt
+ cat actual.txt
+ echo === diff
+ diff expect.txt actual.txt
+ fi
+else
+ echo ERROR: dtrace error
+ echo ==== output
+ cat actual.txt
+fi
+
+echo ==== script D0.d
+cat D0.d
+echo ==== script D1.d
+cat D1.d
+echo ==== script D2.d
+cat D2.d
+
+exit 1
diff --git a/test/unittest/usdt/tst.manyprobedescriptions2.r b/test/unittest/usdt/tst.manyprobedescriptions2.r
new file mode 100644
index 000000000..2e9ba477f
--- /dev/null
+++ b/test/unittest/usdt/tst.manyprobedescriptions2.r
@@ -0,0 +1 @@
+success
diff --git a/test/unittest/usdt/tst.manyprobedescriptions2.sh b/test/unittest/usdt/tst.manyprobedescriptions2.sh
new file mode 100755
index 000000000..8001cec0b
--- /dev/null
+++ b/test/unittest/usdt/tst.manyprobedescriptions2.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+
+# This test uses many probes and probe descriptions. Therefore, the
+# number of BPF programs to load into the kernel -- dt_bpf_load_prog()
+# calling prp->prov->impl->load_prog(), which is dt_bpf_prog_load() --
+# and the duration of each load are both increasing.
+# @@timeout: 400
+
+dtrace=$1
+
+DIRNAME="$tmpdir/usdt-many_probe_descriptions2.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+# Set the lists.
+# - The probes will be foo$x$y.
+# - The probe descriptions will be foo$x* and foo*$y, for each $d.
+# So if there are nx items in xlist, ny in ylist, and nd in dlist,
+# - there will be roughly nx*ny probes
+# - there will be roughly (nx+ny)*nd probe descriptions
+
+xlist="a b c d e f g h i j k l m"
+ylist="n o p q r s t u v w x y z"
+dlist="0 1 2 3 4 5 6 7 8"
+
+# Make the trigger: Preambles.
+
+echo "provider testprov {" > prov.d
+
+echo '#include "prov.h"' > main.c
+echo 'int main(int argc, char **argv) {' >> main.c
+
+# Make the trigger: Loop over the probes.
+
+for x in $xlist; do
+for y in $ylist; do
+ echo "probe foo$x$y();" >> prov.d
+ echo "TESTPROV_FOO$x$y();" | awk '{ print(toupper($1)) }' >> main.c
+done
+done
+
+# Make the trigger: Epilogues.
+
+echo "};" >> prov.d
+echo "return 0; }" >> main.c
+
+# Build the trigger.
+
+$dtrace $dt_flags -h -s prov.d
+if [ $? -ne 0 ]; then
+ echo "failed to generate header file" >&2
+ cat prov.d
+ exit 1
+fi
+$CC $test_cppflags -c main.c
+if [ $? -ne 0 ]; then
+ echo "failed to compile test" >&2
+ cat main.c
+ exit 1
+fi
+$dtrace $dt_flags -G -64 -s prov.d main.o
+if [ $? -ne 0 ]; then
+ echo "failed to create DOF" >&2
+ exit 1
+fi
+$CC $test_ldflags -o main main.o prov.o
+if [ $? -ne 0 ]; then
+ echo "failed to link final executable" >&2
+ exit 1
+fi
+
+# Prepare the D script, generating the probe descriptions.
+
+rm -f D.d
+for d in $dlist; do
+ for x in $xlist; do
+ echo 'testprov$target:::foo'$x'* { printf("'$d' '$x'* %s\n", probename) }' >> D.d
+ done
+ for y in $ylist; do
+ echo 'testprov$target:::foo*'$y' { printf("'$d' *'$y' %s\n", probename) }' >> D.d
+ done
+done
+
+# Prepare the expected output.
+
+for x in $xlist; do
+for y in $ylist; do
+for d in $dlist; do
+ echo $d $x'*' foo$x$y >> expect.txt
+ echo $d '*'$y foo$x$y >> expect.txt
+done
+done
+done
+echo >> expect.txt
+
+# Run DTrace.
+
+$dtrace $dt_flags -c ./main -qs D.d >& actual.txt
+if [ $? -ne 0 ]; then
+ echo ERROR: dtrace error
+ echo "==== D script"
+ cat D.d
+ echo "==== output"
+ cat actual.txt
+ exit 1
+fi
+
+# Check results.
+
+if diff -q expect.txt actual.txt; then
+ echo success
+ exit 0
+else
+ echo ERROR: unexpected results
+ echo "==== expect"
+ cat expect.txt
+ echo "==== actual"
+ cat actual.txt
+ echo "==== diff"
+ diff expect.txt actual.txt
+ exit 1
+fi
--
2.43.5
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH v3 2/2] Extend the USDT bit mask to multiple words
2025-07-23 0:53 [PATCH v3 2/2] Extend the USDT bit mask to multiple words eugene.loh
@ 2025-08-19 21:07 ` Kris Van Hees
0 siblings, 0 replies; 2+ messages in thread
From: Kris Van Hees @ 2025-08-19 21:07 UTC (permalink / raw)
To: eugene.loh; +Cc: dtrace, dtrace-devel
I would like to hold off on this patch a bit longer. I don't think it is too
likely people are running into toruble due to too many clauses, and it would
be nice to avoid adding more provider-specific logic in the core of DTrace.
My suggestion is that we move the creation of the USDT-specific maps to the
uprobe provider. Since those maps are only ever used by the trampolines
created by this provider, the fds and other data can be stored in the
provider specific private data. Upon first call to generate a trampoline we
can check whether it has been initialized, and if not, create the maps and
populate them.
This way all the USDT-specific handling remains within the uprobe provider,
and we actually fix the earlier inclusion of creation of USDT maps by the
generic code.
On Tue, Jul 22, 2025 at 08:53:04PM -0400, eugene.loh@oracle.com wrote:
> From: Eugene Loh <eugene.loh@oracle.com>
>
> Currently, USDT is limited to 64 probe descriptions since the
> underlying probe uses a 64-bit mask to decide which probes to execute.
>
> Change to a multi-word bit mask that can be extended to however many
> probe descriptions there are.
>
> Also, change the mask words to be 32-bit rather than 64-bit. The reason
> is that, commonly, there will be fewer than 32 probe descriptions. In
> this case, we shorten the value of the "USDT prids" BPF map from 16 bytes
> uint32_t prid;
> long long mask[1];
> down to 8 bytes
> uint32_t prid;
> uint32_t mask[1];
> (The second member is smaller and no longer costs extra padding.)
>
> We also add an
> int dt_usdt_mask_bytes;
> to denote how many bytes will be needed for the mask. This value is
> computed by usdt_mask_bytes_init(). Currently, this function is
> awkwardly called in gmap_create_usdt(), just before the value is needed.
> Such a call to a provider-specific function is clumsy, but there are no
> other calls to the provider between compilation (where the number of
> statements is determined) and this map creation.
>
> Signed-off-by: Eugene Loh <eugene.loh@oracle.com>
> ---
> libdtrace/dt_bpf.c | 6 +-
> libdtrace/dt_bpf_maps.h | 4 +-
> libdtrace/dt_impl.h | 1 +
> libdtrace/dt_prov_uprobe.c | 87 +++++++++---
> .../unittest/usdt/tst.manyprobedescriptions.r | 1 +
> .../usdt/tst.manyprobedescriptions.sh | 64 +++++++++
> .../usdt/tst.manyprobedescriptions2.r | 1 +
> .../usdt/tst.manyprobedescriptions2.sh | 127 ++++++++++++++++++
> 8 files changed, 267 insertions(+), 24 deletions(-)
> create mode 100644 test/unittest/usdt/tst.manyprobedescriptions.r
> create mode 100755 test/unittest/usdt/tst.manyprobedescriptions.sh
> create mode 100644 test/unittest/usdt/tst.manyprobedescriptions2.r
> create mode 100755 test/unittest/usdt/tst.manyprobedescriptions2.sh
>
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index ddd849d0b..fcb53f044 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -967,6 +967,7 @@ gmap_create_probes(dtrace_hdl_t *dtp)
> return 0;
> }
>
> +void usdt_mask_bytes_init(dtrace_hdl_t *dtp);
> /*
> * Create the 'usdt_names' and 'usdt_prids' BPF maps.
> *
> @@ -992,8 +993,11 @@ gmap_create_usdt(dtrace_hdl_t *dtp)
> if (dtp->dt_usdt_namesmap_fd == -1)
> return -1;
>
> + usdt_mask_bytes_init(dtp);
> +
> dtp->dt_usdt_pridsmap_fd = create_gmap(dtp, "usdt_prids", BPF_MAP_TYPE_HASH,
> - sizeof(usdt_prids_map_key_t), sizeof(usdt_prids_map_val_t), nusdtprobes);
> + sizeof(usdt_prids_map_key_t),
> + sizeof(usdt_prids_map_val_t) + dtp->dt_usdt_mask_bytes, nusdtprobes);
> if (dtp->dt_usdt_pridsmap_fd == -1)
> return -1;
>
> diff --git a/libdtrace/dt_bpf_maps.h b/libdtrace/dt_bpf_maps.h
> index 884dc3983..ec5d4d7b1 100644
> --- a/libdtrace/dt_bpf_maps.h
> +++ b/libdtrace/dt_bpf_maps.h
> @@ -1,6 +1,6 @@
> /*
> * Oracle Linux DTrace.
> - * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
> * Licensed under the Universal Permissive License v 1.0 as shown at
> * http://oss.oracle.com/licenses/upl.
> */
> @@ -48,7 +48,7 @@ typedef struct usdt_prids_map_key {
> } usdt_prids_map_key_t;
> typedef struct usdt_prids_map_val {
> uint32_t prid; /* should be dtrace_id_t, sys/dtrace_types.h */
> - long long mask;
> + uint32_t mask[];
> } usdt_prids_map_val_t;
>
> #ifdef __cplusplus
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index 2adc1252b..8bbc4dc1f 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -397,6 +397,7 @@ struct dtrace_hdl {
> int dt_cpumap_fd; /* file descriptor for the 'cpuinfo' BPF map */
> int dt_usdt_pridsmap_fd; /* file descriptor for the 'usdt_prids' BPF map */
> int dt_usdt_namesmap_fd; /* file descriptor for the 'usdt_names' BPF map */
> + int dt_usdt_mask_bytes; /* size of USDT mask in bytes */
> dtrace_handle_err_f *dt_errhdlr; /* error handler, if any */
> void *dt_errarg; /* error handler argument */
> dtrace_handle_drop_f *dt_drophdlr; /* drop handler, if any */
> diff --git a/libdtrace/dt_prov_uprobe.c b/libdtrace/dt_prov_uprobe.c
> index e8f9f8c98..8b55fe319 100644
> --- a/libdtrace/dt_prov_uprobe.c
> +++ b/libdtrace/dt_prov_uprobe.c
> @@ -403,7 +403,7 @@ clean_usdt_probes(dtrace_hdl_t *dtp)
> int fdprids = dtp->dt_usdt_pridsmap_fd;
> int fdnames = dtp->dt_usdt_namesmap_fd;
> usdt_prids_map_key_t key, nxt;
> - usdt_prids_map_val_t val;
> + usdt_prids_map_val_t *val = alloca(sizeof(usdt_prids_map_val_t) + dtp->dt_usdt_mask_bytes);
> list_key_t keys_to_delete, *elem, *elem_next;
> dt_probe_t *prp, *prp_next;
>
> @@ -418,7 +418,7 @@ clean_usdt_probes(dtrace_hdl_t *dtp)
> while (dt_bpf_map_next_key(fdprids, &key, &nxt) == 0) {
> memcpy(&key, &nxt, sizeof(usdt_prids_map_key_t));
>
> - if (dt_bpf_map_lookup(fdprids, &key, &val) == -1)
> + if (dt_bpf_map_lookup(fdprids, &key, val) == -1)
> return dt_set_errno(dtp, EDT_BPF);
>
> /* Check if the process is still running. */
> @@ -431,7 +431,7 @@ clean_usdt_probes(dtrace_hdl_t *dtp)
> * we might delete the same usdt_names entry
> * multiple times. That's okay.
> */
> - dt_bpf_map_delete(fdnames, &val.prid);
> + dt_bpf_map_delete(fdnames, &val->prid);
>
> /*
> * Delete the usdt_prids entry.
> @@ -452,7 +452,7 @@ clean_usdt_probes(dtrace_hdl_t *dtp)
> * FIXME. There might be another case, where the process
> * is still running, but some of its USDT probes are gone?
> * So maybe we have to check for the existence of one of
> - * dtrace_probedesc_t *pdp = dtp->dt_probes[val.prid]->desc;
> + * dtrace_probedesc_t *pdp = dtp->dt_probes[val->prid]->desc;
> * char *prv = ...pdp->prv minus the numerial part;
> *
> * /run/dtrace/probes/$pid/$pdp->prv/$pdp->mod/$pdp->fun/$pdp->prb
> @@ -590,6 +590,31 @@ static void usdt_error(dt_pcb_t *pcb, const char *fmt, ...)
> longjmp(pcb->pcb_jmpbuf, EDT_COMPILER);
> }
>
> +void usdt_mask_bytes_init(dtrace_hdl_t *dtp)
> +{
> + int i, n = 0, w = sizeof(((usdt_prids_map_val_t *)0)->mask[0]);
> +
> + /* Count how many statements cannot be ignored, regardless of uprp. */
> + for (i = 0; i < dtp->dt_stmt_nextid; i++) {
> + dtrace_stmtdesc_t *stp;
> +
> + stp = dtp->dt_stmts[i];
> + if (stp == NULL || ignore_clause(dtp, i, NULL))
> + continue;
> +
> + n++;
> + }
> +
> + /* Determine how many bytes are needed for this many bits. */
> + n = (n + CHAR_BIT - 1) / CHAR_BIT;
> +
> + /* Determine how many words are needed for this many bytes. */
> + n = (n + w - 1) / w;
> +
> + /* Determine how many bytes are needed. */
> + dtp->dt_usdt_mask_bytes = (n ? n : 1) * w;
> +}
> +
> static int add_probe_uprobe(dtrace_hdl_t *dtp, dt_probe_t *prp)
> {
> dtrace_difo_t *dp;
> @@ -651,6 +676,7 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
> int fd = dtp->dt_usdt_namesmap_fd;
> pid_t pid;
> list_probe_t *pup;
> + usdt_prids_map_val_t *val;
>
> /* Add probe name elements to usdt_names map. */
> p = probnam;
> @@ -686,11 +712,11 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
> }
>
> /* Add prid and bit mask to usdt_prids map. */
> + val = alloca(sizeof(usdt_prids_map_val_t) + dtp->dt_usdt_mask_bytes);
> for (pup = prp->prv_data; pup != NULL; pup = dt_list_next(pup)) {
> dt_probe_t *uprp = pup->probe;
> - long long mask = 0, bit = 1;
> + uint32_t iword = 0, mask = 0, bit = 1;
> usdt_prids_map_key_t key;
> - usdt_prids_map_val_t val;
> dt_uprobe_t *upp = uprp->prv_data;
>
> /*
> @@ -704,15 +730,24 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
> if (uprp->prov->impl == &dt_uprobe && !(upp->flags & PP_IS_ENABLED)) {
> int n;
>
> + /*
> + * The loop over n to dtp->dt_stmt_nextid, skipping
> + * ignore_clause(), should be the same here as in
> + * the trampoline.
> + */
> for (n = 0; n < dtp->dt_stmt_nextid; n++) {
> dtrace_stmtdesc_t *stp;
>
> stp = dtp->dt_stmts[n];
> - if (stp == NULL)
> + if (stp == NULL || ignore_clause(dtp, n, uprp))
> continue;
>
> - if (ignore_clause(dtp, n, uprp))
> - continue;
> + if (bit == 0) {
> + val->mask[iword] = mask;
> + mask = 0;
> + iword++;
> + bit = 1;
> + }
>
> if (dt_gmatch(prp->desc->prv, stp->dtsd_ecbdesc->dted_probe.prv) &&
> dt_gmatch(prp->desc->mod, stp->dtsd_ecbdesc->dted_probe.mod) &&
> @@ -727,11 +762,11 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
> key.pid = pid;
> key.uprid = uprp->desc->id;
>
> - val.prid = prp->desc->id;
> - val.mask = mask;
> + val->prid = prp->desc->id;
> + val->mask[iword] = mask;
>
> // FIXME Check return value, but how should errors be handled?
> - dt_bpf_map_update(dtp->dt_usdt_pridsmap_fd, &key, &val);
> + dt_bpf_map_update(dtp->dt_usdt_pridsmap_fd, &key, val);
> }
>
> return 0;
> @@ -1452,7 +1487,7 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> const list_probe_t *pop;
> uint_t lbl_exit = pcb->pcb_exitlbl;
> dt_ident_t *usdt_prids = dt_dlib_get_map(dtp, "usdt_prids");
> - int n;
> + int n, ibit, w = CHAR_BIT * sizeof(((usdt_prids_map_val_t *)0)->mask[0]);
>
> assert(usdt_prids != NULL);
>
> @@ -1539,7 +1574,8 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> */
> assert(sizeof(usdt_prids_map_key_t) <= DT_STK_SLOT_SZ);
> emit(dlp, BPF_STORE(BPF_W, BPF_REG_FP, DT_TRAMP_SP_SLOT(0), BPF_REG_0));
> - emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP, DT_TRAMP_SP_SLOT(0) + (int)sizeof(pid_t), uprp->desc->id));
> + emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_FP,
> + DT_TRAMP_SP_SLOT(0) + (int)sizeof(pid_t), uprp->desc->id));
> dt_cg_xsetx(dlp, usdt_prids, DT_LBL_NONE, BPF_REG_1, usdt_prids->di_id);
> emit(dlp, BPF_MOV_REG(BPF_REG_2, BPF_REG_FP));
> emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, DT_TRAMP_SP_SLOT(0)));
> @@ -1573,8 +1609,8 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> emit(dlp, BPF_LOAD(BPF_W, BPF_REG_1, BPF_REG_0, 0));
> emit(dlp, BPF_STORE(BPF_W, BPF_REG_7, DMST_PRID, BPF_REG_1));
>
> - /* Read the bit mask from the table lookup in %r6. */ // FIXME someday, extend this past 64 bits
> - emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_6, BPF_REG_0, offsetof(usdt_prids_map_val_t, mask)));
> + /* Store the value key for reuse. */
> + emit(dlp, BPF_STORE(BPF_DW, BPF_REG_FP, DT_TRAMP_SP_SLOT(0), BPF_REG_0));
>
> /*
> * Apply arg mappings, if needed.
> @@ -1588,21 +1624,29 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> /*
> * Hold the bit mask in %r6 between clause calls.
> */
> - for (n = 0; n < dtp->dt_stmt_nextid; n++) {
> + /*
> + * The loop over n to dtp->dt_stmt_nextid, skipping
> + * ignore_clause(), should be the same here as in
> + * add_probe_usdt().
> + */
> + for (ibit = n = 0; n < dtp->dt_stmt_nextid; n++) {
> dtrace_stmtdesc_t *stp;
> dt_ident_t *idp;
> uint_t lbl_next;
>
> stp = dtp->dt_stmts[n];
> - if (stp == NULL)
> - continue;
> -
> - if (ignore_clause(dtp, n, uprp))
> + if (stp == NULL || ignore_clause(dtp, n, uprp))
> continue;
>
> idp = stp->dtsd_clause;
> lbl_next = dt_irlist_label(dlp);
>
> + /* Load the next word of the bit mask into %r6. */
> + if (ibit % w == 0) {
> + emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_0, BPF_REG_FP, DT_TRAMP_SP_SLOT(0)));
> + emit(dlp, BPF_LOAD(BPF_W, BPF_REG_6, BPF_REG_0, offsetof(usdt_prids_map_val_t, mask[ibit / w])));
> + }
> +
> /* If the lowest %r6 bit is 0, skip over this clause. */
> emit(dlp, BPF_MOV_REG(BPF_REG_1, BPF_REG_6));
> emit(dlp, BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 1));
> @@ -1630,6 +1674,7 @@ static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
>
> /* Right-shift %r6. */
> emit(dlp, BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 1));
> + ibit++;
> }
>
> out:
> diff --git a/test/unittest/usdt/tst.manyprobedescriptions.r b/test/unittest/usdt/tst.manyprobedescriptions.r
> new file mode 100644
> index 000000000..2e9ba477f
> --- /dev/null
> +++ b/test/unittest/usdt/tst.manyprobedescriptions.r
> @@ -0,0 +1 @@
> +success
> diff --git a/test/unittest/usdt/tst.manyprobedescriptions.sh b/test/unittest/usdt/tst.manyprobedescriptions.sh
> new file mode 100755
> index 000000000..92a61d5b7
> --- /dev/null
> +++ b/test/unittest/usdt/tst.manyprobedescriptions.sh
> @@ -0,0 +1,64 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +
> +dtrace=$1
> +TRIGGER=$PWD/test/triggers/usdt-tst-args
> +
> +DIRNAME="$tmpdir/usdt-many_probe_descriptions.$$.$RANDOM"
> +mkdir -p $DIRNAME
> +cd $DIRNAME
> +
> +# Construct the D scripts and output files.
> +# We stick 80 probe descriptions in each of 3 scripts to test
> +# USDT's ability to handle hundreds of probe descriptions.
> +for d in 0 1 2; do
> +for x in 00 01 02 03 04 05 06 07 08 09 \
> + 10 11 12 13 14 15 16 17 18 19 \
> + 20 21 22 23 24 25 26 27 28 29 \
> + 30 31 32 33 34 35 36 37 38 39 \
> + 40 41 42 43 44 45 46 47 48 49 \
> + 50 51 52 53 54 55 56 57 58 59 \
> + 60 61 62 63 64 65 66 67 68 69 \
> + 70 71 72 73 74 75 76 77 78 79 \
> +; do
> + echo 'test_prov$target:::place { printf("'$d$x'\n"); }' >> D$d.d
> + echo $d$x >> expect.txt
> +done
> +done
> +echo 'test_prov$target:::place { exit(0); }' >> D$d.d
> +echo >> expect.txt
> +
> +# Run DTrace.
> +
> +$dtrace $dt_flags -c $TRIGGER -q -s D0.d -s D1.d -s D2.d >& actual.txt
> +if [ $? -eq 0 ]; then
> + if diff -q expect.txt actual.txt > /dev/null; then
> + echo success
> + exit 0
> + else
> + echo ERROR: did not get expected results
> + echo === expect.txt
> + cat expect.txt
> + echo === actual.txt
> + cat actual.txt
> + echo === diff
> + diff expect.txt actual.txt
> + fi
> +else
> + echo ERROR: dtrace error
> + echo ==== output
> + cat actual.txt
> +fi
> +
> +echo ==== script D0.d
> +cat D0.d
> +echo ==== script D1.d
> +cat D1.d
> +echo ==== script D2.d
> +cat D2.d
> +
> +exit 1
> diff --git a/test/unittest/usdt/tst.manyprobedescriptions2.r b/test/unittest/usdt/tst.manyprobedescriptions2.r
> new file mode 100644
> index 000000000..2e9ba477f
> --- /dev/null
> +++ b/test/unittest/usdt/tst.manyprobedescriptions2.r
> @@ -0,0 +1 @@
> +success
> diff --git a/test/unittest/usdt/tst.manyprobedescriptions2.sh b/test/unittest/usdt/tst.manyprobedescriptions2.sh
> new file mode 100755
> index 000000000..8001cec0b
> --- /dev/null
> +++ b/test/unittest/usdt/tst.manyprobedescriptions2.sh
> @@ -0,0 +1,127 @@
> +#!/bin/bash
> +#
> +# Oracle Linux DTrace.
> +# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
> +# Licensed under the Universal Permissive License v 1.0 as shown at
> +# http://oss.oracle.com/licenses/upl.
> +
> +# This test uses many probes and probe descriptions. Therefore, the
> +# number of BPF programs to load into the kernel -- dt_bpf_load_prog()
> +# calling prp->prov->impl->load_prog(), which is dt_bpf_prog_load() --
> +# and the duration of each load are both increasing.
> +# @@timeout: 400
> +
> +dtrace=$1
> +
> +DIRNAME="$tmpdir/usdt-many_probe_descriptions2.$$.$RANDOM"
> +mkdir -p $DIRNAME
> +cd $DIRNAME
> +
> +# Set the lists.
> +# - The probes will be foo$x$y.
> +# - The probe descriptions will be foo$x* and foo*$y, for each $d.
> +# So if there are nx items in xlist, ny in ylist, and nd in dlist,
> +# - there will be roughly nx*ny probes
> +# - there will be roughly (nx+ny)*nd probe descriptions
> +
> +xlist="a b c d e f g h i j k l m"
> +ylist="n o p q r s t u v w x y z"
> +dlist="0 1 2 3 4 5 6 7 8"
> +
> +# Make the trigger: Preambles.
> +
> +echo "provider testprov {" > prov.d
> +
> +echo '#include "prov.h"' > main.c
> +echo 'int main(int argc, char **argv) {' >> main.c
> +
> +# Make the trigger: Loop over the probes.
> +
> +for x in $xlist; do
> +for y in $ylist; do
> + echo "probe foo$x$y();" >> prov.d
> + echo "TESTPROV_FOO$x$y();" | awk '{ print(toupper($1)) }' >> main.c
> +done
> +done
> +
> +# Make the trigger: Epilogues.
> +
> +echo "};" >> prov.d
> +echo "return 0; }" >> main.c
> +
> +# Build the trigger.
> +
> +$dtrace $dt_flags -h -s prov.d
> +if [ $? -ne 0 ]; then
> + echo "failed to generate header file" >&2
> + cat prov.d
> + exit 1
> +fi
> +$CC $test_cppflags -c main.c
> +if [ $? -ne 0 ]; then
> + echo "failed to compile test" >&2
> + cat main.c
> + exit 1
> +fi
> +$dtrace $dt_flags -G -64 -s prov.d main.o
> +if [ $? -ne 0 ]; then
> + echo "failed to create DOF" >&2
> + exit 1
> +fi
> +$CC $test_ldflags -o main main.o prov.o
> +if [ $? -ne 0 ]; then
> + echo "failed to link final executable" >&2
> + exit 1
> +fi
> +
> +# Prepare the D script, generating the probe descriptions.
> +
> +rm -f D.d
> +for d in $dlist; do
> + for x in $xlist; do
> + echo 'testprov$target:::foo'$x'* { printf("'$d' '$x'* %s\n", probename) }' >> D.d
> + done
> + for y in $ylist; do
> + echo 'testprov$target:::foo*'$y' { printf("'$d' *'$y' %s\n", probename) }' >> D.d
> + done
> +done
> +
> +# Prepare the expected output.
> +
> +for x in $xlist; do
> +for y in $ylist; do
> +for d in $dlist; do
> + echo $d $x'*' foo$x$y >> expect.txt
> + echo $d '*'$y foo$x$y >> expect.txt
> +done
> +done
> +done
> +echo >> expect.txt
> +
> +# Run DTrace.
> +
> +$dtrace $dt_flags -c ./main -qs D.d >& actual.txt
> +if [ $? -ne 0 ]; then
> + echo ERROR: dtrace error
> + echo "==== D script"
> + cat D.d
> + echo "==== output"
> + cat actual.txt
> + exit 1
> +fi
> +
> +# Check results.
> +
> +if diff -q expect.txt actual.txt; then
> + echo success
> + exit 0
> +else
> + echo ERROR: unexpected results
> + echo "==== expect"
> + cat expect.txt
> + echo "==== actual"
> + cat actual.txt
> + echo "==== diff"
> + diff expect.txt actual.txt
> + exit 1
> +fi
> --
> 2.43.5
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2025-08-19 21:07 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-23 0:53 [PATCH v3 2/2] Extend the USDT bit mask to multiple words eugene.loh
2025-08-19 21:07 ` Kris Van Hees
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).