* [PATCHv2] trace: allocate fields with elt struct
@ 2026-05-22 21:26 Rosen Penev
2026-05-26 4:43 ` Masami Hiramatsu
0 siblings, 1 reply; 3+ messages in thread
From: Rosen Penev @ 2026-05-22 21:26 UTC (permalink / raw)
To: linux-trace-kernel
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
open list:TRACING
Use a flexible array member to embed the fields array in the
tracing_map_elt allocation, reducing the number of allocations
per element.
Since the fields are now embedded in the struct, taking the address
of a field through a const-qualified elt pointer yields a
const-qualified pointer. Rather than adding casts, switch the
comparison functions to take const void * parameters. These are
all read-only operations.
Assisted-by: OpenCode:BigPickle
Signed-off-by: Rosen Penev <rosenp@gmail.com>
---
v2: Rebased.
kernel/trace/tracing_map.c | 41 ++++++++++++++++----------------------
kernel/trace/tracing_map.h | 8 ++++----
2 files changed, 21 insertions(+), 28 deletions(-)
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index d7922f40dbe2..928948b50737 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -125,32 +125,32 @@ u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i)
return (u64)atomic64_read(&elt->vars[i]);
}
-int tracing_map_cmp_string(void *val_a, void *val_b)
+int tracing_map_cmp_string(const void *val_a, const void *val_b)
{
- char *a = val_a;
- char *b = val_b;
+ const char *a = val_a;
+ const char *b = val_b;
return strcmp(a, b);
}
-int tracing_map_cmp_none(void *val_a, void *val_b)
+int tracing_map_cmp_none(const void *val_a, const void *val_b)
{
return 0;
}
-static int tracing_map_cmp_atomic64(void *val_a, void *val_b)
+static int tracing_map_cmp_atomic64(const void *val_a, const void *val_b)
{
- u64 a = atomic64_read((atomic64_t *)val_a);
- u64 b = atomic64_read((atomic64_t *)val_b);
+ u64 a = atomic64_read((const atomic64_t *)val_a);
+ u64 b = atomic64_read((const atomic64_t *)val_b);
return (a > b) ? 1 : ((a < b) ? -1 : 0);
}
#define DEFINE_TRACING_MAP_CMP_FN(type) \
-static int tracing_map_cmp_##type(void *val_a, void *val_b) \
+static int tracing_map_cmp_##type(const void *val_a, const void *val_b) \
{ \
- type a = (type)(*(u64 *)val_a); \
- type b = (type)(*(u64 *)val_b); \
+ type a = (type)(*(const u64 *)val_a); \
+ type b = (type)(*(const u64 *)val_b); \
\
return (a > b) ? 1 : ((a < b) ? -1 : 0); \
}
@@ -383,7 +383,6 @@ static void __tracing_map_elt_free(struct tracing_map_elt *elt)
if (!elt)
return;
- kfree(elt->fields);
kfree(elt->vars);
kfree(elt->var_set);
kfree(elt->key);
@@ -406,7 +405,7 @@ static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map)
struct tracing_map_elt *elt;
int err = 0;
- elt = kzalloc_obj(*elt);
+ elt = kzalloc_flex(*elt, fields, map->n_fields);
if (!elt)
return ERR_PTR(-ENOMEM);
@@ -418,12 +417,6 @@ static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map)
goto free;
}
- elt->fields = kzalloc_objs(*elt->fields, map->n_fields);
- if (!elt->fields) {
- err = -ENOMEM;
- goto free;
- }
-
elt->vars = kzalloc_objs(*elt->vars, map->n_vars);
if (!elt->vars) {
err = -ENOMEM;
@@ -857,10 +850,10 @@ static int cmp_entries_sum(const void *A, const void *B)
{
const struct tracing_map_elt *elt_a, *elt_b;
const struct tracing_map_sort_entry *a, *b;
- struct tracing_map_sort_key *sort_key;
- struct tracing_map_field *field;
+ const struct tracing_map_sort_key *sort_key;
+ const struct tracing_map_field *field;
tracing_map_cmp_fn_t cmp_fn;
- void *val_a, *val_b;
+ const void *val_a, *val_b;
int ret = 0;
a = *(const struct tracing_map_sort_entry **)A;
@@ -888,10 +881,10 @@ static int cmp_entries_key(const void *A, const void *B)
{
const struct tracing_map_elt *elt_a, *elt_b;
const struct tracing_map_sort_entry *a, *b;
- struct tracing_map_sort_key *sort_key;
- struct tracing_map_field *field;
+ const struct tracing_map_sort_key *sort_key;
+ const struct tracing_map_field *field;
tracing_map_cmp_fn_t cmp_fn;
- void *val_a, *val_b;
+ const void *val_a, *val_b;
int ret = 0;
a = *(const struct tracing_map_sort_entry **)A;
diff --git a/kernel/trace/tracing_map.h b/kernel/trace/tracing_map.h
index 18a02959d77b..90a7fde5dd02 100644
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -13,7 +13,7 @@
#define TRACING_MAP_VARS_MAX 16
#define TRACING_MAP_SORT_KEYS_MAX 2
-typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b);
+typedef int (*tracing_map_cmp_fn_t) (const void *val_a, const void *val_b);
/*
* This is an overview of the tracing_map data structures and how they
@@ -137,11 +137,11 @@ struct tracing_map_field {
struct tracing_map_elt {
struct tracing_map *map;
- struct tracing_map_field *fields;
atomic64_t *vars;
bool *var_set;
void *key;
void *private_data;
+ struct tracing_map_field fields[];
};
struct tracing_map_entry {
@@ -260,8 +260,8 @@ tracing_map_lookup(struct tracing_map *map, void *key);
extern tracing_map_cmp_fn_t tracing_map_cmp_num(int field_size,
int field_is_signed);
-extern int tracing_map_cmp_string(void *val_a, void *val_b);
-extern int tracing_map_cmp_none(void *val_a, void *val_b);
+extern int tracing_map_cmp_string(const void *val_a, const void *val_b);
+extern int tracing_map_cmp_none(const void *val_a, const void *val_b);
extern void tracing_map_update_sum(struct tracing_map_elt *elt,
unsigned int i, u64 n);
--
2.54.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCHv2] trace: allocate fields with elt struct
2026-05-22 21:26 [PATCHv2] trace: allocate fields with elt struct Rosen Penev
@ 2026-05-26 4:43 ` Masami Hiramatsu
2026-05-28 20:22 ` Steven Rostedt
0 siblings, 1 reply; 3+ messages in thread
From: Masami Hiramatsu @ 2026-05-26 4:43 UTC (permalink / raw)
To: Rosen Penev, Steven Rostedt
Cc: linux-trace-kernel, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, open list:TRACING
Hi,
Sashiko made 2 comments.
https://sashiko.dev/#/patchset/20260522212627.12406-1-rosenp%40gmail.com
On Fri, 22 May 2026 14:26:27 -0700
Rosen Penev <rosenp@gmail.com> wrote:
> > diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
> > index 0dd7927df22ac..196a7b0be99df 100644
> > --- a/kernel/trace/tracing_map.c
> > +++ b/kernel/trace/tracing_map.c
> [ ... ]
> > @@ -125,32 +125,32 @@ u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i)
> [ ... ]
> > #define DEFINE_TRACING_MAP_CMP_FN(type) \
> > -static int tracing_map_cmp_##type(void *val_a, void *val_b) \
> > +static int tracing_map_cmp_##type(const void *val_a, const void *val_b) \
> > { \
> > - type a = (type)(*(u64 *)val_a); \
> > - type b = (type)(*(u64 *)val_b); \
> > + type a = (type)(*(const u64 *)val_a); \
> > + type b = (type)(*(const u64 *)val_b); \
> > \
> > return (a > b) ? 1 : ((a < b) ? -1 : 0); \
> > }
> This is a pre-existing issue, but does unconditionally reading 8 bytes
> via the u64 cast cause unaligned access exceptions on architectures that
> do not support them?
> Additionally, for fields near the end of the dynamically allocated elt->key
> buffer, can this trigger KASAN slab-out-of-bounds reads?
> Also, on big-endian architectures, reading a smaller integer as a 64-bit
> value and casting it down extracts the least-significant bytes rather than
> the correct field value. Could this result in completely incorrect sorting
> for small types?
Steve, it seems this comes from your commit 106f41f5a302 ("tracing: Have
the histogram compare functions convert to u64 first").
I think neither of them is a problem, but could you check it?
> > diff --git a/kernel/trace/tracing_map.h b/kernel/trace/tracing_map.h
> > index 99c37eeebc161..ed64136782d80 100644
> > --- a/kernel/trace/tracing_map.h
> > +++ b/kernel/trace/tracing_map.h
> [ ... ]
> > @@ -137,11 +137,11 @@ struct tracing_map_field {
> >
> > struct tracing_map_elt {
> > struct tracing_map *map;
> > - struct tracing_map_field *fields;
> > atomic64_t *vars;
> > bool *var_set;
> > void *key;
> > void *private_data;
> > + struct tracing_map_field fields[];
> > };
>
> Is it safe to add a flexible array member to struct tracing_map_elt when it
> is embedded by value in the middle of struct track_data?
> kernel/trace/trace_events_hist.c:struct track_data {
> ...
> struct tracing_map_elt elt;
> ...
> struct action_data *action_data;
> struct hist_trigger_data *hist_data;
> };
> Embedding a struct with a flexible array member in the middle of another
> struct violates C standard constraints. Does this trigger compiler
> warnings (such as -Wflex-array-member-not-at-end on modern compilers) or
> break bounds computations for FORTIFY_SOURCE?
Yeah, from this reason, this is is not acceptable.
To fix this issue, we need to refactor the trace_events_hist.c,
because track_data only uses tracing_map_elt as a placeholder
of private_data.
Thank you,
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [PATCHv2] trace: allocate fields with elt struct
2026-05-26 4:43 ` Masami Hiramatsu
@ 2026-05-28 20:22 ` Steven Rostedt
0 siblings, 0 replies; 3+ messages in thread
From: Steven Rostedt @ 2026-05-28 20:22 UTC (permalink / raw)
To: Masami Hiramatsu (Google)
Cc: Rosen Penev, linux-trace-kernel, Mathieu Desnoyers,
open list:TRACING
On Tue, 26 May 2026 13:43:17 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> > > #define DEFINE_TRACING_MAP_CMP_FN(type) \
> > > -static int tracing_map_cmp_##type(void *val_a, void *val_b) \
> > > +static int tracing_map_cmp_##type(const void *val_a, const void *val_b) \
> > > { \
> > > - type a = (type)(*(u64 *)val_a); \
> > > - type b = (type)(*(u64 *)val_b); \
> > > + type a = (type)(*(const u64 *)val_a); \
> > > + type b = (type)(*(const u64 *)val_b); \
> > > \
> > > return (a > b) ? 1 : ((a < b) ? -1 : 0); \
> > > }
> > This is a pre-existing issue, but does unconditionally reading 8 bytes
> > via the u64 cast cause unaligned access exceptions on architectures that
> > do not support them?
> > Additionally, for fields near the end of the dynamically allocated elt->key
> > buffer, can this trigger KASAN slab-out-of-bounds reads?
> > Also, on big-endian architectures, reading a smaller integer as a 64-bit
> > value and casting it down extracts the least-significant bytes rather than
> > the correct field value. Could this result in completely incorrect sorting
> > for small types?
>
> Steve, it seems this comes from your commit 106f41f5a302 ("tracing: Have
> the histogram compare functions convert to u64 first").
>
> I think neither of them is a problem, but could you check it?
This should not be a problem because the pointer being passed in was a
number to begin with. In fact that commit you shared was to fix this
compare on big endian machines. The typecast was specifically made to allow
big endian to work here.
The value is already in a 8 byte (64bit) memory location. It is copied into
it as a 64 bit number. Hence it has to be read as a 64 bit number for the
conversions.
A short would be copied into the location via:
u64 location;
location = short_word;
On big endian, for a short word of 0xabcd, it would be in the memory as:
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xab 0xcd
on little endian, it would be:
0xcd 0xab 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
For big endian to work, it would need to read that first into a 64 bit word
and then convert it back to short.
Thus, Sashiko doesn't know enough here to comment.
-- Steve
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-05-28 20:21 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-22 21:26 [PATCHv2] trace: allocate fields with elt struct Rosen Penev
2026-05-26 4:43 ` Masami Hiramatsu
2026-05-28 20:22 ` Steven Rostedt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox