* [PATCH v2 1/2] Omit an aggregation record if [u][sym|mod] translation fails
@ 2025-06-03 21:08 eugene.loh
2025-08-12 14:06 ` Kris Van Hees
0 siblings, 1 reply; 3+ messages in thread
From: eugene.loh @ 2025-06-03 21:08 UTC (permalink / raw)
To: dtrace, dtrace-devel
From: Eugene Loh <eugene.loh@oracle.com>
An aggregation key can be a sym(), mod(), usym(), or umod()
translation of an address. It is passed from producer to user
space as an address, and the consumer must translate the address.
It is possible for the translation to fail.
Omit a record if the translation fails. This addresses failures
seen before OL9 in
test/unittest/profile-n/tst.ufunc.sh
test/unittest/profile-n/tst.usym.sh
The problem was that the kernel's aggregation buffers are snapshot
multiple times. If a translation ever fails, the raw address is
used instead. Later on, when the aggregation is printed, if the
translation is successful, the raw key will report a count of 0.
E.g., we snapshot an aggregation. The translation of address
0xabcedf fails and so the key remains 0xabcdef. Later the
aggregation is printed and, therefore, again snapshot. The
values are cleared. This time the translation of the address
-- say to the function starting at 0xabcd00 -- is successful.
That aggregation key is successfully and correctly reported, but
the the raw 0xabcdef will mysteriously be reported with a count 0.
It may be argued that some users would like to see addresses that
could not be translated, but typically those unsuccessful addresses
are not very meaningful.
Signed-off-by: Eugene Loh <eugene.loh@oracle.com>
---
libdtrace/dt_aggregate.c | 51 ++++++++++++++++++++++++++++------------
1 file changed, 36 insertions(+), 15 deletions(-)
diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
index a18de3a75..32b0faced 100644
--- a/libdtrace/dt_aggregate.c
+++ b/libdtrace/dt_aggregate.c
@@ -317,61 +317,76 @@ dt_aggregate_quantizedcmp(int64_t *lhs, int64_t *rhs)
return 0;
}
-static void
+static int
dt_aggregate_usym(dtrace_hdl_t *dtp, uint64_t *data)
{
uint64_t tgid = data[0];
uint64_t *pc = &data[1];
pid_t pid;
GElf_Sym sym;
+ int rc = 0;
if (dtp->dt_vector != NULL)
- return;
+ return -1;
pid = dt_proc_grab_lock(dtp, tgid, DTRACE_PROC_WAITING |
DTRACE_PROC_SHORTLIVED);
if (pid < 0)
- return;
+ return -1;
if (dt_Plookup_by_addr(dtp, pid, *pc, NULL, &sym) == 0)
*pc = sym.st_value;
+ else
+ rc = -1;
dt_proc_release_unlock(dtp, pid);
+
+ return rc;
}
-static void
+static int
dt_aggregate_umod(dtrace_hdl_t *dtp, uint64_t *data)
{
uint64_t tgid = data[0];
uint64_t *pc = &data[1];
pid_t pid;
const prmap_t *map;
+ int rc = 0;
if (dtp->dt_vector != NULL)
- return;
+ return -1;
pid = dt_proc_grab_lock(dtp, tgid, DTRACE_PROC_WAITING |
DTRACE_PROC_SHORTLIVED);
if (pid < 0)
- return;
+ return -1;
if ((map = dt_Paddr_to_map(dtp, pid, *pc)) != NULL)
*pc = map->pr_vaddr;
+ else
+ rc = -1;
dt_proc_release_unlock(dtp, pid);
+
+ return rc;
}
-static void
+static int
dt_aggregate_sym(dtrace_hdl_t *dtp, uint64_t *data)
{
GElf_Sym sym;
uint64_t *pc = data;
+ int rc = 0;
if (dtrace_lookup_by_addr(dtp, *pc, &sym, NULL) == 0)
*pc = sym.st_value;
+ else
+ rc = -1;
+
+ return rc;
}
-static void
+static int
dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr)
{
dt_module_t *dmp;
@@ -385,7 +400,7 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr)
* appear more than once in aggregation output). It seems
* unlikely that anyone will ever notice or care...
*/
- return;
+ return -1;
}
for (dmp = dt_list_next(&dtp->dt_modlist); dmp != NULL;
@@ -400,7 +415,7 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr)
dtrace_addr_range_cmp) != NULL) {
*addr = dmp->dm_text_addrs[0].dar_va;
- return;
+ return 0;
}
if (dmp->dm_data_addrs != NULL &&
@@ -413,9 +428,11 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr)
else
*addr = dmp->dm_data_addrs[0].dar_va;
- return;
+ return 0;
}
}
+
+ return -1;
}
static dtrace_aggid_t
@@ -574,16 +591,20 @@ dt_aggregate_snap_one(dtrace_hdl_t *dtp, int aggid, int cpu, const char *key,
switch(agg->dtagd_krecs[i].dtrd_action) {
case DTRACEACT_USYM:
- dt_aggregate_usym(dtp, p);
+ if (dt_aggregate_usym(dtp, p) == -1)
+ return 0;
break;
case DTRACEACT_UMOD:
- dt_aggregate_umod(dtp, p);
+ if (dt_aggregate_umod(dtp, p) == -1)
+ return 0;
break;
case DTRACEACT_SYM:
- dt_aggregate_sym(dtp, p);
+ if (dt_aggregate_sym(dtp, p) == -1)
+ return 0;
break;
case DTRACEACT_MOD:
- dt_aggregate_mod(dtp, p);
+ if (dt_aggregate_mod(dtp, p) == -1)
+ return 0;
break;
default:
break;
--
2.43.5
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH v2 1/2] Omit an aggregation record if [u][sym|mod] translation fails 2025-06-03 21:08 [PATCH v2 1/2] Omit an aggregation record if [u][sym|mod] translation fails eugene.loh @ 2025-08-12 14:06 ` Kris Van Hees 2025-10-30 0:16 ` Eugene Loh 0 siblings, 1 reply; 3+ messages in thread From: Kris Van Hees @ 2025-08-12 14:06 UTC (permalink / raw) To: eugene.loh; +Cc: dtrace, dtrace-devel On Tue, Jun 03, 2025 at 05:08:34PM -0400, eugene.loh@oracle.com wrote: > From: Eugene Loh <eugene.loh@oracle.com> > > An aggregation key can be a sym(), mod(), usym(), or umod() > translation of an address. It is passed from producer to user > space as an address, and the consumer must translate the address. > It is possible for the translation to fail. > > Omit a record if the translation fails. This addresses failures > seen before OL9 in > test/unittest/profile-n/tst.ufunc.sh > test/unittest/profile-n/tst.usym.sh > > The problem was that the kernel's aggregation buffers are snapshot > multiple times. If a translation ever fails, the raw address is > used instead. Later on, when the aggregation is printed, if the > translation is successful, the raw key will report a count of 0. > > E.g., we snapshot an aggregation. The translation of address > 0xabcedf fails and so the key remains 0xabcdef. Later the > aggregation is printed and, therefore, again snapshot. The > values are cleared. This time the translation of the address > -- say to the function starting at 0xabcd00 -- is successful. > That aggregation key is successfully and correctly reported, but > the the raw 0xabcdef will mysteriously be reported with a count 0. More in detail: whenever a snapshot of the aggregation data is taken, its key/value pairs are stored in a hashtable. For keys that contain address resolution elements like usym(), ufunc(), ... the address in the key is replaced with the canonical address for the resolving function (start address of a function for ufunc(), ...). Therefore, if the resolving does not work (no symbol, module, ... found), the key with the reported address is used, and a hash entry is added for it. If, at a later snapshot, the address *can* be resolved, the ley gets modified with the canonical address in place of the reported address, and the data gets hashed using that key. Before every snapshot is taken, all aggregation data is cleared -- but the hash table entries remain -- it is just their data that is cleared. So, the later snapshot that managed to get the address resolved will cause the hash entry with the unresolved address to never get data populated, and that is why is remains 0 from that point forward. > It may be argued that some users would like to see addresses that > could not be translated, but typically those unsuccessful addresses > are not very meaningful. The problem with this approach is that you drop data. If e.g. you were to collect this aggregation for a task that does not have symbolic address data, all addresses would remain unresolved, and you would be ignoring them all. That is certainly not an acceptable approach. With the on-demand snapshot code, I expect that this problem is less likely to occur. But it is certainly still a possibility - incidentally, this has been a problem with the DTrace implementation from the Solaris days. It has the same potential problem. Two approaches come to mind: (1) account for aggregation values to be 0 (or MIN or MAX for their respective agg funcs), and omit those entries when the aggregation data is printed (2) detect this situation (hash entry exists for both unresolved and resolved key), and if so, remove the unresolved hash entry Both have some performance impact and/or make for more complex code. In the end, perhaps we could choose not to do anything about this and allow it to be an artifact of address resolution issues on the system? If anything, they should be easy to spot because they are always going to be entries that have (essentially) no value. > Signed-off-by: Eugene Loh <eugene.loh@oracle.com> > --- > libdtrace/dt_aggregate.c | 51 ++++++++++++++++++++++++++++------------ > 1 file changed, 36 insertions(+), 15 deletions(-) > > diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c > index a18de3a75..32b0faced 100644 > --- a/libdtrace/dt_aggregate.c > +++ b/libdtrace/dt_aggregate.c > @@ -317,61 +317,76 @@ dt_aggregate_quantizedcmp(int64_t *lhs, int64_t *rhs) > return 0; > } > > -static void > +static int > dt_aggregate_usym(dtrace_hdl_t *dtp, uint64_t *data) > { > uint64_t tgid = data[0]; > uint64_t *pc = &data[1]; > pid_t pid; > GElf_Sym sym; > + int rc = 0; > > if (dtp->dt_vector != NULL) > - return; > + return -1; > > pid = dt_proc_grab_lock(dtp, tgid, DTRACE_PROC_WAITING | > DTRACE_PROC_SHORTLIVED); > if (pid < 0) > - return; > + return -1; > > if (dt_Plookup_by_addr(dtp, pid, *pc, NULL, &sym) == 0) > *pc = sym.st_value; > + else > + rc = -1; > > dt_proc_release_unlock(dtp, pid); > + > + return rc; > } > > -static void > +static int > dt_aggregate_umod(dtrace_hdl_t *dtp, uint64_t *data) > { > uint64_t tgid = data[0]; > uint64_t *pc = &data[1]; > pid_t pid; > const prmap_t *map; > + int rc = 0; > > if (dtp->dt_vector != NULL) > - return; > + return -1; > > pid = dt_proc_grab_lock(dtp, tgid, DTRACE_PROC_WAITING | > DTRACE_PROC_SHORTLIVED); > if (pid < 0) > - return; > + return -1; > > if ((map = dt_Paddr_to_map(dtp, pid, *pc)) != NULL) > *pc = map->pr_vaddr; > + else > + rc = -1; > > dt_proc_release_unlock(dtp, pid); > + > + return rc; > } > > -static void > +static int > dt_aggregate_sym(dtrace_hdl_t *dtp, uint64_t *data) > { > GElf_Sym sym; > uint64_t *pc = data; > + int rc = 0; > > if (dtrace_lookup_by_addr(dtp, *pc, &sym, NULL) == 0) > *pc = sym.st_value; > + else > + rc = -1; > + > + return rc; > } > > -static void > +static int > dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr) > { > dt_module_t *dmp; > @@ -385,7 +400,7 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr) > * appear more than once in aggregation output). It seems > * unlikely that anyone will ever notice or care... > */ > - return; > + return -1; > } > > for (dmp = dt_list_next(&dtp->dt_modlist); dmp != NULL; > @@ -400,7 +415,7 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr) > dtrace_addr_range_cmp) != NULL) { > > *addr = dmp->dm_text_addrs[0].dar_va; > - return; > + return 0; > } > > if (dmp->dm_data_addrs != NULL && > @@ -413,9 +428,11 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr) > else > *addr = dmp->dm_data_addrs[0].dar_va; > > - return; > + return 0; > } > } > + > + return -1; > } > > static dtrace_aggid_t > @@ -574,16 +591,20 @@ dt_aggregate_snap_one(dtrace_hdl_t *dtp, int aggid, int cpu, const char *key, > > switch(agg->dtagd_krecs[i].dtrd_action) { > case DTRACEACT_USYM: > - dt_aggregate_usym(dtp, p); > + if (dt_aggregate_usym(dtp, p) == -1) > + return 0; > break; > case DTRACEACT_UMOD: > - dt_aggregate_umod(dtp, p); > + if (dt_aggregate_umod(dtp, p) == -1) > + return 0; > break; > case DTRACEACT_SYM: > - dt_aggregate_sym(dtp, p); > + if (dt_aggregate_sym(dtp, p) == -1) > + return 0; > break; > case DTRACEACT_MOD: > - dt_aggregate_mod(dtp, p); > + if (dt_aggregate_mod(dtp, p) == -1) > + return 0; > break; > default: > break; > -- > 2.43.5 > ^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v2 1/2] Omit an aggregation record if [u][sym|mod] translation fails 2025-08-12 14:06 ` Kris Van Hees @ 2025-10-30 0:16 ` Eugene Loh 0 siblings, 0 replies; 3+ messages in thread From: Eugene Loh @ 2025-10-30 0:16 UTC (permalink / raw) To: Kris Van Hees; +Cc: dtrace, dtrace-devel Thanks for the review. Yes, ignoring untranslated addresses that, nonetheless, are interesting is a problem. Since related patches have relieved the need for this solution, I'm withdrawing this patch. On 8/12/25 10:06, Kris Van Hees wrote: > On Tue, Jun 03, 2025 at 05:08:34PM -0400, eugene.loh@oracle.com wrote: >> From: Eugene Loh <eugene.loh@oracle.com> >> >> An aggregation key can be a sym(), mod(), usym(), or umod() >> translation of an address. It is passed from producer to user >> space as an address, and the consumer must translate the address. >> It is possible for the translation to fail. >> >> Omit a record if the translation fails. This addresses failures >> seen before OL9 in >> test/unittest/profile-n/tst.ufunc.sh >> test/unittest/profile-n/tst.usym.sh >> >> The problem was that the kernel's aggregation buffers are snapshot >> multiple times. If a translation ever fails, the raw address is >> used instead. Later on, when the aggregation is printed, if the >> translation is successful, the raw key will report a count of 0. >> >> E.g., we snapshot an aggregation. The translation of address >> 0xabcedf fails and so the key remains 0xabcdef. Later the >> aggregation is printed and, therefore, again snapshot. The >> values are cleared. This time the translation of the address >> -- say to the function starting at 0xabcd00 -- is successful. >> That aggregation key is successfully and correctly reported, but >> the the raw 0xabcdef will mysteriously be reported with a count 0. > More in detail: whenever a snapshot of the aggregation data is taken, its > key/value pairs are stored in a hashtable. For keys that contain address > resolution elements like usym(), ufunc(), ... the address in the key is > replaced with the canonical address for the resolving function (start > address of a function for ufunc(), ...). > > Therefore, if the resolving does not work (no symbol, module, ... found), > the key with the reported address is used, and a hash entry is added for > it. > > If, at a later snapshot, the address *can* be resolved, the ley gets > modified with the canonical address in place of the reported address, and > the data gets hashed using that key. > > Before every snapshot is taken, all aggregation data is cleared -- but the > hash table entries remain -- it is just their data that is cleared. So, > the later snapshot that managed to get the address resolved will cause the > hash entry with the unresolved address to never get data populated, and that > is why is remains 0 from that point forward. > >> It may be argued that some users would like to see addresses that >> could not be translated, but typically those unsuccessful addresses >> are not very meaningful. > The problem with this approach is that you drop data. If e.g. you were to > collect this aggregation for a task that does not have symbolic address > data, all addresses would remain unresolved, and you would be ignoring them > all. That is certainly not an acceptable approach. > > With the on-demand snapshot code, I expect that this problem is less likely > to occur. But it is certainly still a possibility - incidentally, this has > been a problem with the DTrace implementation from the Solaris days. It has > the same potential problem. > > Two approaches come to mind: > (1) account for aggregation values to be 0 (or MIN or MAX for their respective > agg funcs), and omit those entries when the aggregation data is printed > (2) detect this situation (hash entry exists for both unresolved and resolved > key), and if so, remove the unresolved hash entry > > Both have some performance impact and/or make for more complex code. In the > end, perhaps we could choose not to do anything about this and allow it to be > an artifact of address resolution issues on the system? If anything, they > should be easy to spot because they are always going to be entries that have > (essentially) no value. > >> Signed-off-by: Eugene Loh <eugene.loh@oracle.com> >> --- >> libdtrace/dt_aggregate.c | 51 ++++++++++++++++++++++++++++------------ >> 1 file changed, 36 insertions(+), 15 deletions(-) >> >> diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c >> index a18de3a75..32b0faced 100644 >> --- a/libdtrace/dt_aggregate.c >> +++ b/libdtrace/dt_aggregate.c >> @@ -317,61 +317,76 @@ dt_aggregate_quantizedcmp(int64_t *lhs, int64_t *rhs) >> return 0; >> } >> >> -static void >> +static int >> dt_aggregate_usym(dtrace_hdl_t *dtp, uint64_t *data) >> { >> uint64_t tgid = data[0]; >> uint64_t *pc = &data[1]; >> pid_t pid; >> GElf_Sym sym; >> + int rc = 0; >> >> if (dtp->dt_vector != NULL) >> - return; >> + return -1; >> >> pid = dt_proc_grab_lock(dtp, tgid, DTRACE_PROC_WAITING | >> DTRACE_PROC_SHORTLIVED); >> if (pid < 0) >> - return; >> + return -1; >> >> if (dt_Plookup_by_addr(dtp, pid, *pc, NULL, &sym) == 0) >> *pc = sym.st_value; >> + else >> + rc = -1; >> >> dt_proc_release_unlock(dtp, pid); >> + >> + return rc; >> } >> >> -static void >> +static int >> dt_aggregate_umod(dtrace_hdl_t *dtp, uint64_t *data) >> { >> uint64_t tgid = data[0]; >> uint64_t *pc = &data[1]; >> pid_t pid; >> const prmap_t *map; >> + int rc = 0; >> >> if (dtp->dt_vector != NULL) >> - return; >> + return -1; >> >> pid = dt_proc_grab_lock(dtp, tgid, DTRACE_PROC_WAITING | >> DTRACE_PROC_SHORTLIVED); >> if (pid < 0) >> - return; >> + return -1; >> >> if ((map = dt_Paddr_to_map(dtp, pid, *pc)) != NULL) >> *pc = map->pr_vaddr; >> + else >> + rc = -1; >> >> dt_proc_release_unlock(dtp, pid); >> + >> + return rc; >> } >> >> -static void >> +static int >> dt_aggregate_sym(dtrace_hdl_t *dtp, uint64_t *data) >> { >> GElf_Sym sym; >> uint64_t *pc = data; >> + int rc = 0; >> >> if (dtrace_lookup_by_addr(dtp, *pc, &sym, NULL) == 0) >> *pc = sym.st_value; >> + else >> + rc = -1; >> + >> + return rc; >> } >> >> -static void >> +static int >> dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr) >> { >> dt_module_t *dmp; >> @@ -385,7 +400,7 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr) >> * appear more than once in aggregation output). It seems >> * unlikely that anyone will ever notice or care... >> */ >> - return; >> + return -1; >> } >> >> for (dmp = dt_list_next(&dtp->dt_modlist); dmp != NULL; >> @@ -400,7 +415,7 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr) >> dtrace_addr_range_cmp) != NULL) { >> >> *addr = dmp->dm_text_addrs[0].dar_va; >> - return; >> + return 0; >> } >> >> if (dmp->dm_data_addrs != NULL && >> @@ -413,9 +428,11 @@ dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *addr) >> else >> *addr = dmp->dm_data_addrs[0].dar_va; >> >> - return; >> + return 0; >> } >> } >> + >> + return -1; >> } >> >> static dtrace_aggid_t >> @@ -574,16 +591,20 @@ dt_aggregate_snap_one(dtrace_hdl_t *dtp, int aggid, int cpu, const char *key, >> >> switch(agg->dtagd_krecs[i].dtrd_action) { >> case DTRACEACT_USYM: >> - dt_aggregate_usym(dtp, p); >> + if (dt_aggregate_usym(dtp, p) == -1) >> + return 0; >> break; >> case DTRACEACT_UMOD: >> - dt_aggregate_umod(dtp, p); >> + if (dt_aggregate_umod(dtp, p) == -1) >> + return 0; >> break; >> case DTRACEACT_SYM: >> - dt_aggregate_sym(dtp, p); >> + if (dt_aggregate_sym(dtp, p) == -1) >> + return 0; >> break; >> case DTRACEACT_MOD: >> - dt_aggregate_mod(dtp, p); >> + if (dt_aggregate_mod(dtp, p) == -1) >> + return 0; >> break; >> default: >> break; >> -- >> 2.43.5 >> ^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-10-30 0:16 UTC | newest] Thread overview: 3+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2025-06-03 21:08 [PATCH v2 1/2] Omit an aggregation record if [u][sym|mod] translation fails eugene.loh 2025-08-12 14:06 ` Kris Van Hees 2025-10-30 0:16 ` Eugene Loh
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox