From: Alan Maguire <alan.maguire@oracle.com>
To: dtrace@lists.linux.dev
Cc: dtrace-devel@oss.oracle.com, Alan Maguire <alan.maguire@oracle.com>
Subject: [PATCH v3 2/9] stapsdt provider: support systemwide probing
Date: Tue, 13 Jan 2026 16:51:25 +0000 [thread overview]
Message-ID: <20260113165132.2454591-3-alan.maguire@oracle.com> (raw)
In-Reply-To: <20260113165132.2454591-1-alan.maguire@oracle.com>
For stapsdt probes we can do systemwide probing by having
the kernel insert traps into the VMAs associated with a file.
The key problem for DTrace is how to specify a file path in
a module in a provider:module:function:probe specification.
Here the approach (also used by libbpf) is to support both
absolute paths and to expand binary/library names into full
paths using [LD_LIBRARY_]PATH from a binary/library name; so
specifying
myprov*:myprog::myprobe
causes us to search /usr/bin, /usr/sbin and PATH directories
to find myprog to instrument it. If the module contains .so
we check /usr/lib, /usr/lib64 and LD_LIBRARY_PATH. This is
beneficial as it allows scripts to be interoperable across
distros that use different directories for locating binaries.
The other part that was needed was fixups to offsets that are
optionally provided in .stapsdt.base and the addrs[1] value.
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
libdtrace/dt_pid.c | 177 ++++++++++++++++++++++++++++---------
libdtrace/dt_prov_uprobe.c | 17 ++--
2 files changed, 148 insertions(+), 46 deletions(-)
diff --git a/libdtrace/dt_pid.c b/libdtrace/dt_pid.c
index 36a5883b..bd352455 100644
--- a/libdtrace/dt_pid.c
+++ b/libdtrace/dt_pid.c
@@ -7,7 +7,9 @@
#include <sys/ioctl.h>
#include <sys/types.h>
+#include <sys/stat.h>
#include <sys/sysmacros.h>
+#include <unistd.h>
#include <stddef.h>
#include <assert.h>
#include <ctype.h>
@@ -34,11 +36,13 @@
#include <dt_impl.h>
#include <dt_program.h>
+#include <dt_probe.h>
#include <dt_provider.h>
#include <dt_pid.h>
#include <dt_string.h>
#define SEC_STAPSDT_NOTE ".note.stapsdt"
+#define SEC_STAPSDT_BASE ".stapsdt.base"
#define NAME_STAPSDT_NOTE "stapsdt"
/*
@@ -1267,9 +1271,10 @@ dt_stapsdt_parse(dtrace_hdl_t *dtp, dt_proc_t *dpr, dtrace_probedesc_t *pdp,
unsigned long addr_start)
{
size_t shstrndx, noff, doff, off, n;
+ Elf_Scn *scn = NULL, *nscn = NULL;
const prmap_t *pmp = NULL;
+ unsigned long base = 0;
char *mapfile = NULL;
- Elf_Scn *scn = NULL;
Elf *elf = NULL;
GElf_Shdr shdr;
GElf_Ehdr ehdr;
@@ -1287,11 +1292,16 @@ dt_stapsdt_parse(dtrace_hdl_t *dtp, dt_proc_t *dpr, dtrace_probedesc_t *pdp,
path, strerror(errno));
return -1;
}
- mod = strrchr(path, '/');
- if (mod)
- mod++;
- else
- mod = path;
+
+ if (strlen(pdp->mod) == 0) {
+ mod = strrchr(path, '/');
+ if (mod)
+ mod++;
+ else
+ mod = path;
+ } else {
+ mod = (char *)pdp->mod;
+ }
elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); // ELF_C_READ ?
@@ -1323,12 +1333,14 @@ dt_stapsdt_parse(dtrace_hdl_t *dtp, dt_proc_t *dpr, dtrace_probedesc_t *pdp,
secname = elf_strptr(elf, shstrndx, shdr.sh_name);
if (strcmp(secname, SEC_STAPSDT_NOTE) == 0 &&
shdr.sh_type == SHT_NOTE)
- break;
+ nscn = scn;
+ if (strcmp(secname, SEC_STAPSDT_BASE) == 0)
+ base = shdr.sh_addr;
}
/* No ELF notes, just bail. */
- if (scn == NULL)
+ if (nscn == NULL)
goto out;
- data = elf_getdata(scn, 0);
+ data = elf_getdata(nscn, 0);
for (off = 0;
(off = gelf_getnote(data, off, &nhdr, &noff, &doff)) > 0;) {
char prvname[DTRACE_PROVNAMELEN];
@@ -1385,38 +1397,59 @@ dt_stapsdt_parse(dtrace_hdl_t *dtp, dt_proc_t *dpr, dtrace_probedesc_t *pdp,
psp.pps_refcntr_off = addrs[2] - phdr.p_vaddr + phdr.p_offset;
}
+ /* readjust based on optional .stapsdt.base, note base addr. */
+ if (base && addrs[1])
+ psp.pps_off += base - addrs[1];
+
if (!psp.pps_off)
continue;
psp.pps_nameoff = 0;
- if (!pmp)
- pmp = Paddr_to_map(dpr->dpr_proc, addr_start + addrs[0]);
- if (!pmp) {
- dt_dprintf("%i: cannot determine 0x%lx's mapping\n",
- Pgetpid(dpr->dpr_proc), psp.pps_off);
- continue;
- }
- if (!mapfile)
- mapfile = Pmap_mapfile_name(dpr->dpr_proc, pmp);
+ if (dpr) {
+ if (!pmp)
+ pmp = Paddr_to_map(dpr->dpr_proc, addr_start + addrs[0]);
+ if (!pmp) {
+ dt_dprintf("%i: cannot determine 0x%lx's mapping\n",
+ Pgetpid(dpr->dpr_proc), psp.pps_off);
+ continue;
+ }
+ if (!mapfile)
+ mapfile = Pmap_mapfile_name(dpr->dpr_proc, pmp);
- if (!mapfile) {
- dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
- "Cannot get name of mapping containing probe %s for pid %d\n",
- psp.pps_prb, dpr->dpr_pid);
- err = -1;
- break;
- }
- psp.pps_fn = mapfile;
- if (dt_Plookup_by_addr(dtp, dpr->dpr_pid, addr_start + addrs[0],
- &fun, &sym) == 0)
- psp.pps_fun = (char *)fun;
- else
- psp.pps_fun = no_fun;
- psp.pps_dev = pmp->pr_dev;
- psp.pps_inum = pmp->pr_inum;
- psp.pps_pid = dpr->dpr_pid;
- psp.pps_nameoff = 0;
+ if (!mapfile) {
+ dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
+ "Cannot get name of mapping containing probe %s for pid %d\n",
+ psp.pps_prb, dpr->dpr_pid);
+ err = -1;
+ break;
+ }
+ psp.pps_fn = mapfile;
+ if (dt_Plookup_by_addr(dtp, dpr->dpr_pid, addr_start + addrs[0],
+ &fun, &sym) == 0)
+ psp.pps_fun = (char *)fun;
+ else
+ psp.pps_fun = no_fun;
+ psp.pps_dev = pmp->pr_dev;
+ psp.pps_inum = pmp->pr_inum;
+ psp.pps_pid = dpr->dpr_pid;
+ psp.pps_nameoff = 0;
+ } else {
+ struct stat stats = {};
+ if (stat(path, &stats)) {
+ dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
+ "failed to stat() %s", path);
+ dtrace_errmsg(dtp, dtrace_errno(dtp));
+ err = -1;
+ break;
+ }
+ psp.pps_mod = mod;
+ psp.pps_dev = stats.st_dev;
+ psp.pps_inum = stats.st_ino;
+ psp.pps_fn = path;
+ psp.pps_fun = no_fun;
+ psp.pps_pid = -1;
+ }
if (pvp->impl->provide_probe(dtp, &psp) < 0) {
dt_pid_error(dtp, pcb, dpr, D_PROC_USDT,
"failed to instantiate probe %s for pid %d: %s",
@@ -1506,6 +1539,52 @@ dt_pid_create_stapsdt_probes_proc(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp,
fclose(fp);
}
+static int expand_modpath(const char *mod, char *path, size_t pathsz)
+{
+ const char *searches[2] = {};
+ int perm, i;
+
+ if (mod[0] == '/') {
+ strlcpy(path, mod, pathsz);
+ return 0;
+ }
+ if (strstr(mod, ".so")) {
+ searches[0] = getenv("LD_LIBRARY_PATH");
+ searches[1] = "/usr/lib64:/usr/lib";
+ perm = R_OK;
+ } else {
+ searches[0] = getenv("PATH");
+ searches[1] = "/usr/bin/:/usr/sbin";
+ perm = R_OK | X_OK;
+ }
+
+ for (i = 0; i < sizeof(searches)/sizeof(const char *); i++) {
+ const char *s, *n;
+
+ if (!searches[i])
+ continue;
+
+ for (s = searches[i]; s != NULL; s = n) {
+ int len;
+
+ if (*s == ':')
+ s++;
+ n = strchr(s, ':');
+ if (n)
+ len = n - s;
+ else
+ len = strlen(s);
+ snprintf(path, pathsz, "%.*s/%s", len, s, mod);
+ /* make sure accessible */
+ if (faccessat(AT_FDCWD, path, perm, AT_EACCESS) < 0)
+ continue;
+ dt_dprintf("%s: found full path '%s'\n", mod, path);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
static int
dt_pid_create_stapsdt_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb)
{
@@ -1522,14 +1601,25 @@ dt_pid_create_stapsdt_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_
pidstr = &pdp->prv[len];
- while (isdigit(*(pidstr - 1)))
- pidstr--;
- if (strlen(pidstr) == 0)
- return 0;
-
pvp = dt_provider_lookup(dtp, "stapsdt");
assert(pvp != NULL);
+ while (isdigit(*(pidstr - 1)))
+ pidstr--;
+ if (strlen(pidstr) == 0) {
+ char m[PATH_MAX];
+
+ /* only full pid wildcards are supported. */
+ if (*(pidstr - 1) != '*')
+ return 0;
+ if (isdigit(*(pidstr - 2)))
+ return 0;
+ if (dt_probe_lookup(dtp, pdp) != NULL)
+ return 0;
+ if (expand_modpath(pdp->mod, m, sizeof(m)))
+ return 0;
+ return dt_stapsdt_parse(dtp, NULL, pdp, pcb, pvp, m, 0);
+ }
pid = atoll(pidstr);
if (pid <= 0)
return 0;
@@ -1612,8 +1702,13 @@ dt_pid_create_usdt_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *
free(globpat);
globfree(&globbuf);
- if (err == 0)
+ if (err == 0) {
err = dt_pid_create_stapsdt_probes(pdp, dtp, pcb);
+ if (err != 0) {
+ dt_dprintf("stapsdt probe creation %s:%s:%s:%s failed: %d\n",
+ pdp->prv, pdp->mod, pdp->fun, pdp->prb, err);
+ }
+ }
/* If no errors, report success. */
if (err == 0)
diff --git a/libdtrace/dt_prov_uprobe.c b/libdtrace/dt_prov_uprobe.c
index e94827f2..7b41270a 100644
--- a/libdtrace/dt_prov_uprobe.c
+++ b/libdtrace/dt_prov_uprobe.c
@@ -507,7 +507,7 @@ clean_usdt_probes(dtrace_hdl_t *dtp)
list_probe_t *pup = prp->prv_data;
dt_uprobe_t *upp = pup->probe->prv_data;
- if (Pexists(upp->pid))
+ if (upp->pid == -1 || Pexists(upp->pid))
continue;
}
@@ -629,7 +629,7 @@ static int add_probe_usdt(dtrace_hdl_t *dtp, dt_probe_t *prp)
assert(0); // FIXME do something here
/* Even though we just enabled this, check it's still live. */
- if (!Pexists(pid)) {
+ if (pid != -1 && !Pexists(pid)) {
probe_disable(dtp, prp);
dt_bpf_map_delete(fd, &pdp->id);
@@ -919,7 +919,10 @@ static int provide_probe(dtrace_hdl_t *dtp, const pid_probespec_t *psp,
dt_probe_t *prp, *uprp;
list_probe_t *pop, *pup;
- snprintf(prv, sizeof(prv), "%s%d", psp->pps_prv, psp->pps_pid);
+ if (psp->pps_pid == -1)
+ snprintf(prv, sizeof(prv), "%s*", psp->pps_prv);
+ else
+ snprintf(prv, sizeof(prv), "%s%d", psp->pps_prv, psp->pps_pid);
pd.id = DTRACE_IDNONE;
pd.prv = prv;
@@ -944,6 +947,7 @@ static int provide_probe(dtrace_hdl_t *dtp, const pid_probespec_t *psp,
return -1;
upp = uprp->prv_data;
+ upp->pid = psp->pps_pid;
upp->flags |= flags;
/* Look up the overlying probe. */
@@ -1552,7 +1556,7 @@ static int uprobe_create(dtrace_hdl_t *dtp, const dt_uprobe_t *upp,
attr.uprobe_path = (uint64_t)upp->fn;
attr.probe_offset = upp->off;
- return dt_perf_event_open(&attr, upp->pid, -1, -1, 0);
+ return dt_perf_event_open(&attr, upp->pid, upp->pid == -1 ? 0 : -1, -1, 0);
}
static int attach(dtrace_hdl_t *dtp, const dt_probe_t *uprp, int bpf_fd)
@@ -1563,7 +1567,10 @@ static int attach(dtrace_hdl_t *dtp, const dt_probe_t *uprp, int bpf_fd)
assert(upp->fn != NULL);
upp->fd = uprobe_create(dtp, upp, upp->refcntr_off);
-
+ if (upp->fd < 0) {
+ dt_dprintf("uprobe_create failed: %d\n", upp->fd);
+ return upp->fd;
+ }
/* attach BPF program to the probe */
if (ioctl(upp->fd, PERF_EVENT_IOC_SET_BPF, bpf_fd) < 0)
return -errno;
--
2.43.5
next prev parent reply other threads:[~2026-01-13 16:51 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-13 16:51 [PATCH v3 0/9] stapsdt provider: simple system-wide probing Alan Maguire
2026-01-13 16:51 ` [PATCH v3 1/9] dt_lex: support '/' in probe descriptors Alan Maguire
2026-01-15 22:48 ` Kris Van Hees
2026-01-16 10:14 ` Alan Maguire
2026-01-13 16:51 ` Alan Maguire [this message]
2026-01-13 16:51 ` [PATCH v3 3/9] test: add systemwide stapsdt note test Alan Maguire
2026-01-13 16:51 ` [PATCH v3 4/9] test: add systemwide stapsdt note test using absolute path Alan Maguire
2026-01-13 16:51 ` [PATCH v3 5/9] test: add systemwide stapsdt note test for library Alan Maguire
2026-01-13 16:51 ` [PATCH v3 6/9] stapsdt: add test for listing systemwide probes in object Alan Maguire
2026-01-13 16:51 ` [PATCH v3 7/9] stapsdt: add test for listing systemwide probes in absolute path object Alan Maguire
2026-01-13 16:51 ` [PATCH v3 8/9] stapsdt: add systemwide test for is-enabled probes Alan Maguire
2026-01-13 16:51 ` [PATCH v3 9/9] documentation: update stapsdt docs to describe wildcard support Alan Maguire
2026-01-16 14:48 ` [PATCH v3 0/9] stapsdt provider: simple system-wide probing Kris Van Hees
2026-01-16 15:10 ` Alan Maguire
2026-01-16 15:33 ` Kris Van Hees
2026-01-16 15:36 ` Alan Maguire
2026-01-16 16:22 ` Kris Van Hees
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260113165132.2454591-3-alan.maguire@oracle.com \
--to=alan.maguire@oracle.com \
--cc=dtrace-devel@oss.oracle.com \
--cc=dtrace@lists.linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox