* [PATCH v3 1/7] list: Add mutable iterator variants
From: Kaitao Cheng @ 2026-06-22 4:05 UTC (permalink / raw)
To: Andrew Morton, David Hildenbrand, Jens Axboe, Tejun Heo,
Alexander Viro, Christian Brauner, Alexei Starovoitov,
Daniel Borkmann, Andrii Nakryiko, Johannes Weiner, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim,
Thomas Gleixner, Juri Lelli, Vincent Guittot, Paul Moore,
Andy Shevchenko, Paul E. McKenney, Shakeel Butt,
Christian König
Cc: David Howells, Simona Vetter, Randy Dunlap, Luca Ceresoli,
Philipp Stanner, linux-block, linux-kernel, cgroups,
linux-ntfs-dev, linux-fsdevel, io-uring, audit, bpf, netdev,
dri-devel, linux-perf-users, linux-trace-kernel, kexec,
live-patching, linux-modules, linux-crypto, linux-pm, rcu,
sched-ext, linux-mm, virtualization, damon, llvm, Kaitao Cheng
In-Reply-To: <20260622040533.29824-1-kaitao.cheng@linux.dev>
From: Kaitao Cheng <chengkaitao@kylinos.cn>
The list_for_each*_safe() helpers are used when the loop body may
remove the current entry. Their API exposes the temporary cursor at
every call site, even though most users only need it for the iterator
implementation and never reference it in the loop body.
Add *_mutable() variants for list and hlist iteration. The new helpers
support both forms: callers may keep passing an explicit temporary cursor
when they need to inspect or reset it, or omit it and let the helper use
a unique internal cursor.
This makes call sites that only mutate the list through the current entry
less noisy, while keeping the existing *_safe() helpers available for
compatibility.
Signed-off-by: Kaitao Cheng <chengkaitao@kylinos.cn>
---
include/linux/list.h | 269 +++++++++++++++++++++++++++++++++++++------
1 file changed, 231 insertions(+), 38 deletions(-)
diff --git a/include/linux/list.h b/include/linux/list.h
index 09d979976b3b..1081def7cea9 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -7,6 +7,7 @@
#include <linux/stddef.h>
#include <linux/poison.h>
#include <linux/const.h>
+#include <linux/args.h>
#include <asm/barrier.h>
@@ -763,28 +764,72 @@ static inline void list_splice_tail_init(struct list_head *list,
#define list_for_each_prev(pos, head) \
for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev)
-/**
- * list_for_each_safe - iterate over a list safe against removal of list entry
- * @pos: the &struct list_head to use as a loop cursor.
- * @n: another &struct list_head to use as temporary storage
- * @head: the head for your list.
+/*
+ * list_for_each_safe is an old interface, use list_for_each_mutable instead.
*/
#define list_for_each_safe(pos, n, head) \
for (pos = (head)->next, n = pos->next; \
!list_is_head(pos, (head)); \
pos = n, n = pos->next)
+#define __list_for_each_mutable_internal(pos, tmp, head) \
+ for (typeof(pos) tmp = (pos = (head)->next)->next; \
+ !list_is_head(pos, (head)); \
+ pos = tmp, tmp = pos->next)
+
+#define __list_for_each_mutable1(pos, head) \
+ __list_for_each_mutable_internal(pos, __UNIQUE_ID(next), head)
+
+#define __list_for_each_mutable2(pos, next, head) \
+ list_for_each_safe(pos, next, head)
+
/**
- * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
+ * list_for_each_mutable - iterate over a list safe against entry removal
* @pos: the &struct list_head to use as a loop cursor.
- * @n: another &struct list_head to use as temporary storage
- * @head: the head for your list.
+ * @...: either (head) or (next, head)
+ *
+ * next: another &struct list_head to use as optional temporary storage.
+ * The temporary cursor is internal unless explicitly supplied by
+ * the caller.
+ * head: the head for your list.
+ */
+#define list_for_each_mutable(pos, ...) \
+ CONCATENATE(__list_for_each_mutable, COUNT_ARGS(__VA_ARGS__)) \
+ (pos, __VA_ARGS__)
+
+/*
+ * list_for_each_prev_safe is an old interface, use list_for_each_prev_mutable instead.
*/
#define list_for_each_prev_safe(pos, n, head) \
for (pos = (head)->prev, n = pos->prev; \
!list_is_head(pos, (head)); \
pos = n, n = pos->prev)
+#define __list_for_each_prev_mutable_internal(pos, tmp, head) \
+ for (typeof(pos) tmp = (pos = (head)->prev)->prev; \
+ !list_is_head(pos, (head)); \
+ pos = tmp, tmp = pos->prev)
+
+#define __list_for_each_prev_mutable1(pos, head) \
+ __list_for_each_prev_mutable_internal(pos, __UNIQUE_ID(prev), head)
+
+#define __list_for_each_prev_mutable2(pos, prev, head) \
+ list_for_each_prev_safe(pos, prev, head)
+
+/**
+ * list_for_each_prev_mutable - iterate over a list backwards safe against entry removal
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @...: either (head) or (prev, head)
+ *
+ * prev: another &struct list_head to use as optional temporary storage.
+ * The temporary cursor is internal unless explicitly supplied by
+ * the caller.
+ * head: the head for your list.
+ */
+#define list_for_each_prev_mutable(pos, ...) \
+ CONCATENATE(__list_for_each_prev_mutable, COUNT_ARGS(__VA_ARGS__)) \
+ (pos, __VA_ARGS__)
+
/**
* list_count_nodes - count nodes in the list
* @head: the head for your list.
@@ -895,12 +940,8 @@ static inline size_t list_count_nodes(struct list_head *head)
for (; !list_entry_is_head(pos, head, member); \
pos = list_prev_entry(pos, member))
-/**
- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @pos: the type * to use as a loop cursor.
- * @n: another type * to use as temporary storage
- * @head: the head for your list.
- * @member: the name of the list_head within the struct.
+/*
+ * list_for_each_entry_safe is an old interface, use list_for_each_entry_mutable instead.
*/
#define list_for_each_entry_safe(pos, n, head, member) \
for (pos = list_first_entry(head, typeof(*pos), member), \
@@ -908,15 +949,36 @@ static inline size_t list_count_nodes(struct list_head *head)
!list_entry_is_head(pos, head, member); \
pos = n, n = list_next_entry(n, member))
+#define __list_for_each_entry_mutable_internal(pos, tmp, head, member) \
+ for (typeof(pos) tmp = list_next_entry(pos = \
+ list_first_entry(head, typeof(*pos), member), member); \
+ !list_entry_is_head(pos, head, member); \
+ pos = tmp, tmp = list_next_entry(tmp, member))
+
+#define __list_for_each_entry_mutable2(pos, head, member) \
+ __list_for_each_entry_mutable_internal(pos, __UNIQUE_ID(next), head, member)
+
+#define __list_for_each_entry_mutable3(pos, next, head, member) \
+ list_for_each_entry_safe(pos, next, head, member)
+
/**
- * list_for_each_entry_safe_continue - continue list iteration safe against removal
+ * list_for_each_entry_mutable - iterate over a list safe against entry removal
* @pos: the type * to use as a loop cursor.
- * @n: another type * to use as temporary storage
- * @head: the head for your list.
- * @member: the name of the list_head within the struct.
+ * @...: either (head, member) or (next, head, member)
*
- * Iterate over list of given type, continuing after current point,
- * safe against removal of list entry.
+ * next: another type * to use as optional temporary storage. The
+ * temporary cursor is internal unless explicitly supplied by the
+ * caller.
+ * head: the head for your list.
+ * member: the name of the list_head within the struct.
+ */
+#define list_for_each_entry_mutable(pos, ...) \
+ CONCATENATE(__list_for_each_entry_mutable, COUNT_ARGS(__VA_ARGS__)) \
+ (pos, __VA_ARGS__)
+
+/*
+ * list_for_each_entry_safe_continue is an old interface,
+ * use list_for_each_entry_mutable_continue instead.
*/
#define list_for_each_entry_safe_continue(pos, n, head, member) \
for (pos = list_next_entry(pos, member), \
@@ -924,30 +986,79 @@ static inline size_t list_count_nodes(struct list_head *head)
!list_entry_is_head(pos, head, member); \
pos = n, n = list_next_entry(n, member))
+#define __list_for_each_entry_mutable_continue_internal(pos, tmp, head, member) \
+ for (typeof(pos) tmp = list_next_entry(pos = \
+ list_next_entry(pos, member), member); \
+ !list_entry_is_head(pos, head, member); \
+ pos = tmp, tmp = list_next_entry(tmp, member))
+
+#define __list_for_each_entry_mutable_continue2(pos, head, member) \
+ __list_for_each_entry_mutable_continue_internal(pos, \
+ __UNIQUE_ID(next), head, member)
+
+#define __list_for_each_entry_mutable_continue3(pos, next, head, member) \
+ list_for_each_entry_safe_continue(pos, next, head, member)
+
/**
- * list_for_each_entry_safe_from - iterate over list from current point safe against removal
+ * list_for_each_entry_mutable_continue - continue list iteration safe against removal
* @pos: the type * to use as a loop cursor.
- * @n: another type * to use as temporary storage
- * @head: the head for your list.
- * @member: the name of the list_head within the struct.
+ * @...: either (head, member) or (next, head, member)
*
- * Iterate over list of given type from current point, safe against
- * removal of list entry.
+ * next: another type * to use as optional temporary storage. The
+ * temporary cursor is internal unless explicitly supplied by the
+ * caller.
+ * head: the head for your list.
+ * member: the name of the list_head within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_for_each_entry_mutable_continue(pos, ...) \
+ CONCATENATE(__list_for_each_entry_mutable_continue, \
+ COUNT_ARGS(__VA_ARGS__))(pos, __VA_ARGS__)
+
+/*
+ * list_for_each_entry_safe_from is an old interface,
+ * use list_for_each_entry_mutable_from instead.
*/
#define list_for_each_entry_safe_from(pos, n, head, member) \
for (n = list_next_entry(pos, member); \
!list_entry_is_head(pos, head, member); \
pos = n, n = list_next_entry(n, member))
+#define __list_for_each_entry_mutable_from_internal(pos, tmp, head, member) \
+ for (typeof(pos) tmp = list_next_entry(pos, member); \
+ !list_entry_is_head(pos, head, member); \
+ pos = tmp, tmp = list_next_entry(tmp, member))
+
+#define __list_for_each_entry_mutable_from2(pos, head, member) \
+ __list_for_each_entry_mutable_from_internal(pos, \
+ __UNIQUE_ID(next), head, member)
+
+#define __list_for_each_entry_mutable_from3(pos, next, head, member) \
+ list_for_each_entry_safe_from(pos, next, head, member)
+
/**
- * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
+ * list_for_each_entry_mutable_from - iterate over list from current point safe against removal
* @pos: the type * to use as a loop cursor.
- * @n: another type * to use as temporary storage
- * @head: the head for your list.
- * @member: the name of the list_head within the struct.
+ * @...: either (head, member) or (next, head, member)
*
- * Iterate backwards over list of given type, safe against removal
- * of list entry.
+ * next: another type * to use as optional temporary storage. The
+ * temporary cursor is internal unless explicitly supplied by the
+ * caller.
+ * head: the head for your list.
+ * member: the name of the list_head within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_mutable_from(pos, ...) \
+ CONCATENATE(__list_for_each_entry_mutable_from, \
+ COUNT_ARGS(__VA_ARGS__))(pos, __VA_ARGS__)
+
+/*
+ * list_for_each_entry_safe_reverse is an old interface,
+ * use list_for_each_entry_mutable_reverse instead.
*/
#define list_for_each_entry_safe_reverse(pos, n, head, member) \
for (pos = list_last_entry(head, typeof(*pos), member), \
@@ -955,6 +1066,37 @@ static inline size_t list_count_nodes(struct list_head *head)
!list_entry_is_head(pos, head, member); \
pos = n, n = list_prev_entry(n, member))
+#define __list_for_each_entry_mutable_reverse_internal(pos, tmp, head, member) \
+ for (typeof(pos) tmp = list_prev_entry(pos = \
+ list_last_entry(head, typeof(*pos), member), member); \
+ !list_entry_is_head(pos, head, member); \
+ pos = tmp, tmp = list_prev_entry(tmp, member))
+
+#define __list_for_each_entry_mutable_reverse2(pos, head, member) \
+ __list_for_each_entry_mutable_reverse_internal(pos, \
+ __UNIQUE_ID(prev), head, member)
+
+#define __list_for_each_entry_mutable_reverse3(pos, prev, head, member) \
+ list_for_each_entry_safe_reverse(pos, prev, head, member)
+
+/**
+ * list_for_each_entry_mutable_reverse - iterate backwards over list safe against removal
+ * @pos: the type * to use as a loop cursor.
+ * @...: either (head, member) or (prev, head, member)
+ *
+ * prev: another type * to use as optional temporary storage. The
+ * temporary cursor is internal unless explicitly supplied by the
+ * caller.
+ * head: the head for your list.
+ * member: the name of the list_head within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_mutable_reverse(pos, ...) \
+ CONCATENATE(__list_for_each_entry_mutable_reverse, \
+ COUNT_ARGS(__VA_ARGS__))(pos, __VA_ARGS__)
+
/**
* list_safe_reset_next - reset a stale list_for_each_entry_safe loop
* @pos: the loop cursor used in the list_for_each_entry_safe loop
@@ -1189,6 +1331,31 @@ static inline void hlist_splice_init(struct hlist_head *from,
for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
pos = n)
+#define __hlist_for_each_mutable_internal(pos, tmp, head) \
+ for (typeof(pos) tmp = (pos = (head)->first) ? pos->next : NULL; \
+ pos; \
+ pos = tmp, tmp = pos ? pos->next : NULL)
+
+#define __hlist_for_each_mutable1(pos, head) \
+ __hlist_for_each_mutable_internal(pos, __UNIQUE_ID(next), head)
+
+#define __hlist_for_each_mutable2(pos, next, head) \
+ hlist_for_each_safe(pos, next, head)
+
+/**
+ * hlist_for_each_mutable - iterate over a hlist safe against entry removal
+ * @pos: the &struct hlist_node to use as a loop cursor.
+ * @...: either (head) or (next, head)
+ *
+ * next: another &struct hlist_node to use as optional temporary storage.
+ * The temporary cursor is internal unless explicitly supplied by
+ * the caller.
+ * head: the head for your hlist.
+ */
+#define hlist_for_each_mutable(pos, ...) \
+ CONCATENATE(__hlist_for_each_mutable, COUNT_ARGS(__VA_ARGS__)) \
+ (pos, __VA_ARGS__)
+
#define hlist_entry_safe(ptr, type, member) \
({ typeof(ptr) ____ptr = (ptr); \
____ptr ? hlist_entry(____ptr, type, member) : NULL; \
@@ -1224,18 +1391,44 @@ static inline void hlist_splice_init(struct hlist_head *from,
for (; pos; \
pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
-/**
- * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @pos: the type * to use as a loop cursor.
- * @n: a &struct hlist_node to use as temporary storage
- * @head: the head for your list.
- * @member: the name of the hlist_node within the struct.
+/*
+ * hlist_for_each_entry_safe is an old interface, use hlist_for_each_entry_mutable instead.
*/
#define hlist_for_each_entry_safe(pos, n, head, member) \
for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\
pos && ({ n = pos->member.next; 1; }); \
pos = hlist_entry_safe(n, typeof(*pos), member))
+#define __hlist_for_each_entry_mutable_internal(pos, tmp, head, member) \
+ for (struct hlist_node *tmp = (pos = \
+ hlist_entry_safe((head)->first, typeof(*pos), member)) ? \
+ pos->member.next : NULL; \
+ pos; \
+ pos = hlist_entry_safe((tmp), typeof(*pos), member), \
+ tmp = pos ? pos->member.next : NULL)
+
+#define __hlist_for_each_entry_mutable2(pos, head, member) \
+ __hlist_for_each_entry_mutable_internal(pos, \
+ __UNIQUE_ID(next), head, member)
+
+#define __hlist_for_each_entry_mutable3(pos, next, head, member) \
+ hlist_for_each_entry_safe(pos, next, head, member)
+
+/**
+ * hlist_for_each_entry_mutable - iterate over hlist safe against entry removal
+ * @pos: the type * to use as a loop cursor.
+ * @...: either (head, member) or (next, head, member)
+ *
+ * next: a &struct hlist_node to use as optional temporary storage. The
+ * temporary cursor is internal unless explicitly supplied by the
+ * caller.
+ * head: the head for your hlist.
+ * member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_mutable(pos, ...) \
+ CONCATENATE(__hlist_for_each_entry_mutable, \
+ COUNT_ARGS(__VA_ARGS__))(pos, __VA_ARGS__)
+
/**
* hlist_count_nodes - count nodes in the hlist
* @head: the head for your hlist.
--
2.43.0
^ permalink raw reply related
* [PATCH v3 0/7] Prepare mutable list iterators to cache cursor state
From: Kaitao Cheng @ 2026-06-22 4:05 UTC (permalink / raw)
To: Andrew Morton, David Hildenbrand, Jens Axboe, Tejun Heo,
Alexander Viro, Christian Brauner, Alexei Starovoitov,
Daniel Borkmann, Andrii Nakryiko, Johannes Weiner, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim,
Thomas Gleixner, Juri Lelli, Vincent Guittot, Paul Moore,
Andy Shevchenko, Paul E. McKenney, Shakeel Butt,
Christian König
Cc: David Howells, Simona Vetter, Randy Dunlap, Luca Ceresoli,
Philipp Stanner, linux-block, linux-kernel, cgroups,
linux-ntfs-dev, linux-fsdevel, io-uring, audit, bpf, netdev,
dri-devel, linux-perf-users, linux-trace-kernel, kexec,
live-patching, linux-modules, linux-crypto, linux-pm, rcu,
sched-ext, linux-mm, virtualization, damon, llvm, chengkaitao
From: chengkaitao <chengkaitao@kylinos.cn>
The list_for_each*_safe() helpers are used when the loop body may remove
the current entry. Their current interface, however, forces every caller
to define a temporary cursor outside the macro and pass it in, even when
the caller never uses that cursor directly. For most call sites this
extra cursor is just boilerplate required by the macro implementation.
This is awkward because the saved next pointer is an internal detail of
the iteration. Callers that only remove or move the current entry do not
need to spell it out.
The _safe() suffix has also caused confusion. Christian Koenig pointed
out that the name is easy to read as a thread-safe variant, especially
for beginners, even though it only means that the iterator keeps enough
state to tolerate removal of the current entry. He suggested _mutable()
as a clearer description of what the loop permits.
Add *_mutable() iterator variants for list, hlist and llist. The new
helpers are variadic and support both forms. In the common case, the
caller omits the temporary cursor and the macro creates a unique internal
cursor with typeof(pos) and __UNIQUE_ID(). If a loop really needs an
explicit temporary cursor, the caller can still pass it and the helper
keeps the existing *_safe() behaviour.
For example, a call site may use the shorter form:
list_for_each_entry_mutable(pos, head, member)
or keep the explicit temporary cursor form:
list_for_each_entry_mutable(pos, tmp, head, member)
The existing *_safe() helpers remain available for compatibility. This
series only converts users in mm, block, kernel, init and io_uring. If
this approach looks acceptable, the remaining users can be converted in
follow-up series.
Changes in v3 (Christian König, Andy Shevchenko):
- Convert safe list walks to mutable iterators
Changes in v2 (Muchun Song, Andy Shevchenko):
- Drop the list_for_each_entry_mutable*() helpers from v1 and make the
cursor change directly in the existing list_for_each_entry*() helpers.
- Open-code special list walks that rely on updating the loop cursor in
the body, preserving their existing traversal semantics.
Link to v2:
https://lore.kernel.org/all/20260609061347.93688-1-kaitao.cheng@linux.dev/
Link to v1:
https://lore.kernel.org/all/20260529082149.76764-1-kaitao.cheng@linux.dev/
Kaitao Cheng (7):
list: Add mutable iterator variants
llist: Add mutable iterator variants
mm: Use mutable list iterators
block: Use mutable list iterators
kernel: Use mutable list iterators
initramfs: Use mutable list iterator
io_uring: Use mutable list iterators
block/bfq-iosched.c | 17 +-
block/blk-cgroup.c | 12 +-
block/blk-flush.c | 4 +-
block/blk-iocost.c | 18 +-
block/blk-mq.c | 8 +-
block/blk-throttle.c | 4 +-
block/kyber-iosched.c | 4 +-
block/partitions/ldm.c | 8 +-
block/sed-opal.c | 4 +-
include/linux/list.h | 269 ++++++++++++++++++++++++----
include/linux/llist.h | 81 +++++++--
init/initramfs.c | 5 +-
io_uring/cancel.c | 6 +-
io_uring/poll.c | 3 +-
io_uring/rw.c | 4 +-
io_uring/timeout.c | 8 +-
io_uring/uring_cmd.c | 3 +-
kernel/audit_tree.c | 4 +-
kernel/audit_watch.c | 16 +-
kernel/auditfilter.c | 4 +-
kernel/auditsc.c | 4 +-
kernel/bpf/arena.c | 10 +-
kernel/bpf/arraymap.c | 8 +-
kernel/bpf/bpf_local_storage.c | 3 +-
kernel/bpf/bpf_lru_list.c | 25 ++-
kernel/bpf/btf.c | 18 +-
kernel/bpf/cgroup.c | 7 +-
kernel/bpf/cpumap.c | 4 +-
kernel/bpf/devmap.c | 10 +-
kernel/bpf/helpers.c | 8 +-
kernel/bpf/local_storage.c | 4 +-
kernel/bpf/memalloc.c | 16 +-
kernel/bpf/offload.c | 8 +-
kernel/bpf/states.c | 4 +-
kernel/bpf/stream.c | 4 +-
kernel/bpf/verifier.c | 6 +-
kernel/cgroup/cgroup-v1.c | 4 +-
kernel/cgroup/cgroup.c | 54 +++---
kernel/cgroup/dmem.c | 12 +-
kernel/cgroup/rdma.c | 8 +-
kernel/events/core.c | 44 +++--
kernel/events/uprobes.c | 12 +-
kernel/exit.c | 8 +-
kernel/fail_function.c | 4 +-
kernel/gcov/clang.c | 4 +-
kernel/irq_work.c | 4 +-
kernel/kexec_core.c | 4 +-
kernel/kprobes.c | 16 +-
kernel/livepatch/core.c | 4 +-
kernel/livepatch/core.h | 4 +-
kernel/liveupdate/kho_block.c | 4 +-
kernel/liveupdate/luo_flb.c | 4 +-
kernel/locking/rwsem.c | 2 +-
kernel/locking/test-ww_mutex.c | 2 +-
kernel/module/main.c | 11 +-
kernel/padata.c | 4 +-
kernel/power/snapshot.c | 8 +-
kernel/power/wakelock.c | 4 +-
kernel/printk/printk.c | 11 +-
kernel/ptrace.c | 4 +-
kernel/rcu/rcutorture.c | 3 +-
kernel/rcu/tasks.h | 9 +-
kernel/rcu/tree.c | 6 +-
kernel/resource.c | 4 +-
kernel/sched/core.c | 4 +-
kernel/sched/ext.c | 22 +--
kernel/sched/fair.c | 28 +--
kernel/sched/topology.c | 4 +-
kernel/sched/wait.c | 4 +-
kernel/seccomp.c | 4 +-
kernel/signal.c | 11 +-
kernel/smp.c | 4 +-
kernel/taskstats.c | 8 +-
kernel/time/clockevents.c | 6 +-
kernel/time/clocksource.c | 4 +-
kernel/time/posix-cpu-timers.c | 4 +-
kernel/time/posix-timers.c | 3 +-
kernel/torture.c | 3 +-
kernel/trace/bpf_trace.c | 4 +-
kernel/trace/ftrace.c | 49 +++--
kernel/trace/ring_buffer.c | 25 ++-
kernel/trace/trace.c | 12 +-
kernel/trace/trace_dynevent.c | 6 +-
kernel/trace/trace_dynevent.h | 5 +-
kernel/trace/trace_events.c | 35 ++--
kernel/trace/trace_events_filter.c | 4 +-
kernel/trace/trace_events_hist.c | 8 +-
kernel/trace/trace_events_trigger.c | 17 +-
kernel/trace/trace_events_user.c | 16 +-
kernel/trace/trace_stat.c | 4 +-
kernel/user-return-notifier.c | 3 +-
kernel/workqueue.c | 16 +-
mm/backing-dev.c | 8 +-
mm/balloon.c | 8 +-
mm/cma.c | 4 +-
mm/compaction.c | 4 +-
mm/damon/core.c | 4 +-
mm/damon/sysfs-schemes.c | 4 +-
mm/dmapool.c | 4 +-
mm/huge_memory.c | 8 +-
mm/hugetlb.c | 56 +++---
mm/hugetlb_vmemmap.c | 16 +-
mm/khugepaged.c | 14 +-
mm/kmemleak.c | 7 +-
mm/ksm.c | 25 +--
mm/list_lru.c | 4 +-
mm/memcontrol-v1.c | 8 +-
mm/memory-failure.c | 12 +-
mm/memory-tiers.c | 4 +-
mm/migrate.c | 23 ++-
mm/mmu_notifier.c | 9 +-
mm/page_alloc.c | 8 +-
mm/page_reporting.c | 2 +-
mm/percpu.c | 11 +-
mm/pgtable-generic.c | 4 +-
mm/rmap.c | 10 +-
mm/shmem.c | 9 +-
mm/slab_common.c | 14 +-
mm/slub.c | 33 ++--
mm/swapfile.c | 4 +-
mm/userfaultfd.c | 12 +-
mm/vmalloc.c | 24 +--
mm/vmscan.c | 7 +-
mm/zsmalloc.c | 4 +-
124 files changed, 875 insertions(+), 681 deletions(-)
--
2.43.0
^ permalink raw reply
* Re: [PATCH v2] tracing: eprobe: read the complete FILTER_PTR_STRING pointer
From: Masami Hiramatsu @ 2026-06-22 3:59 UTC (permalink / raw)
To: Martin Kaiser; +Cc: Steven Rostedt, linux-trace-kernel, linux-kernel
In-Reply-To: <20260620145339.3234726-1-martin@kaiser.cx>
On Sat, 20 Jun 2026 16:48:59 +0200
Martin Kaiser <martin@kaiser.cx> wrote:
> For a char * element in an event, the FILTER_PTR_STRING filter type is
> used. When the event occurs, a pointer is stored in the ringbuffer.
>
> If an eprobe references such a char * element of a "base event", the
> stored pointer is truncated when it's read from the ringbuffer.
>
> $ cd /sys/kernel/tracing
> $ echo 'e rcu.rcu_utilization $s:x64 $s:string' > dynamic_events
> $ echo 1 > tracing_on
> $ echo 1 > events/eprobes/enable
> $ sleep 1
> $ echo 0 > events/eprobes/enable
> $ cat trace
> <idle>-0 ...: (rcu.rcu_utilization) arg1=0x4f arg2=(fault)
> <idle>-0 ...: (rcu.rcu_utilization) arg1=0x2 arg2=(fault)
>
> The problem is in get_event_field
>
> val = (unsigned long)(*(char *)addr);
>
> addr points to the position in the ringbuffer where the pointer was
> stored. The assignment reads only the lowest byte of the pointer.
>
> Fix the cast to read the whole pointer. The output of the test above
> is now
>
> <idle>-0 ... arg1=0xffffffff81c7d3f3 arg2="Start scheduler-tick"
> <idle>-0 ... arg1=0xffffffff81c57340 arg2="End scheduler-tick"
This looks good to me. Let me pick it.
Thanks!
>
> Fixes: f04dec93466a ("tracing/eprobes: Fix reading of string fields")
> Signed-off-by: Martin Kaiser <martin@kaiser.cx>
> ---
> v2
> - use rcu event as an example, don't dereference a user pointer
> - add Fixes tag
>
> kernel/trace/trace_eprobe.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
> index b66d6196338d..50518b071414 100644
> --- a/kernel/trace/trace_eprobe.c
> +++ b/kernel/trace/trace_eprobe.c
> @@ -315,7 +315,7 @@ get_event_field(struct fetch_insn *code, void *rec)
> val = (unsigned long)addr;
> break;
> case FILTER_PTR_STRING:
> - val = (unsigned long)(*(char *)addr);
> + val = *(unsigned long *)addr;
> break;
> default:
> WARN_ON_ONCE(1);
> --
> 2.43.7
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply
* Re: [PATCH] tracing: eprobe: read the complete FILTER_PTR_STRING pointer
From: Masami Hiramatsu @ 2026-06-22 3:58 UTC (permalink / raw)
To: Martin Kaiser; +Cc: Steven Rostedt, linux-trace-kernel, linux-kernel
In-Reply-To: <ajasLMUt_AMM3ztH@akranes.kaiser.cx>
On Sat, 20 Jun 2026 17:05:16 +0200
Martin Kaiser <martin@kaiser.cx> wrote:
> Thus wrote Masami Hiramatsu (mhiramat@kernel.org):
>
> > Ah, OK. I understand the problem.
>
> > - ring buffer and its records should be self-contained.
> > - In most cases, events use __data_loc/__rel_loc or fixed array to store
> > strings.
> > - only syscall events exposes the char *, which is not recommended but
> > important to debug user space. (not for dereference)
>
> > The example usage of FILTER_PTR_STRING is actually using FILTER_STATIC_STRING
> > now, so FILTER_PTR_STRING is left broken. (hmm, but there are many
> > "const char *" are used especially under rcu events...)
>
> > OK, can you update your patch description to use rcu events?
>
> I've just sent a v2 with an rcu event as an example.
OK, let me check.
>
> > BTW, I think those also should be decoded from enum value in the events,
> > or use __rel_loc. Since it is not self-contained. (it's a TODO item)
>
> That makes sense. But it needs a bit more space in the ringbuffer for each
> event.
Yeah, but it just exposes the raw kernel address to users. That's not good.
>
> > > > I think better solution is fixing sycall tracer.
>
> > > I would say that syscall trace is doing the right thing. The ringbuffer entry
> > > is a struct syscall_trace_enter, the syscall arguments are unsigned longs.
> > > They are written in ftrace_syscall_enter, this looks correct to me.
>
> > OK, I thought the filename points the ringbuffer, but it actually points
> > the user space. (saving a raw parameter values) So it is OK.
>
> > For eprobe users, it should not access to the user space data directly
> > because it can cause page fault in the kernel without fixup. It may work
> > on x86, but it doesn't work on other architecture which has separated
> > address space for user space. To avoid such mistake, it saves actual
> > string in the ringbuffer as __filename_val.
>
> > Hmm, this must be documented in eprobe example code...
>
> Could we use is_kernel() from kallsyms.h to check the address?
No, it is hard to identify a given unsigned long value is an address
of kernel space or not.
>
> Or should we forbid string and ustring fetch types in eprobes if the
> base field is a FILTER_PTR_STRING?
No, it depends on user to use string or ustring. The "ustring" fetch
type can handle it correctly.
The problem is that the event does not provide the information that
the string is in user space or not. But actually, for syscall events
all data pointed by syscall parameter should be in the user space.
So feel free to add a new FILTER type to syscall events and use that
to check it should use ustring type or not, if you want.
Thank you,
>
> Best regards,
> Martin
>
> > > A const char * syscall argument is using FILTER_PTR_STRING, the unsigned long
> > > argument from the ringbuffer is read as a char and then converted to a
> > > truncated pointer.
>
>
> > Thanks,
>
> > --
> > Masami Hiramatsu (Google) <mhiramat@kernel.org>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply
* Re: [PATCH v6 1/8] tracing/probes: Support dumping fetcharg program for debugging dynamic events
From: Masami Hiramatsu @ 2026-06-22 0:01 UTC (permalink / raw)
To: Masami Hiramatsu (Google)
Cc: Steven Rostedt, Mathieu Desnoyers, Jonathan Corbet, Shuah Khan,
linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178196863297.560995.16891637449659873905.stgit@devnote2>
Hi
Sashiko found another bug about FETCH_OP_TP_ARG. which seems using
fetch_insn parameter wrongly (using different parameters).
I need to fix it first.
We also may need to rename FETCH_OP_DATA to FETCH_OP_IMMSTR.
Thanks,
On Sun, 21 Jun 2026 00:17:13 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
> From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
>
> For debugging probe events, it is helpful to verify the compiled
> fetch instructions for each probe argument. This introduces a new
> kernel config CONFIG_PROBE_EVENTS_DUMP_FETCHARG to decode the
> instruction sequence of each argument and display it under a
> commented line starting with '#' immediately following the dynamic
> event definition (such as in dynamic_events, kprobe_events,
> uprobe_events, etc.).
>
> For example:
> /sys/kernel/tracing # cat dynamic_events
> p:kprobes/p_vfs_read_0 vfs_read arg1=+0(file):ustring arg2=%ax:x16
> # arg1: ARG(0) -> ST_USTRING(offset=0,size=4) -> END
> # arg2: REG(80) -> ST_RAW(size=2) -> END
>
> Assisted-by: Antigravity:gemini-3.5-flash
> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> ---
> Changes in v6:
> - Newly added.
> ---
> kernel/trace/Kconfig | 11 +++++
> kernel/trace/trace_eprobe.c | 2 +
> kernel/trace/trace_fprobe.c | 2 +
> kernel/trace/trace_kprobe.c | 2 +
> kernel/trace/trace_probe.c | 90 +++++++++++++++++++++++++++++++++++++++++++
> kernel/trace/trace_probe.h | 77 ++++++++++++++++++++++---------------
> kernel/trace/trace_uprobe.c | 3 +
> 7 files changed, 157 insertions(+), 30 deletions(-)
>
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index e130da35808f..ed83fbfb4b7c 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -779,6 +779,17 @@ config PROBE_EVENTS_BTF_ARGS
> kernel function entry or a tracepoint.
> This is available only if BTF (BPF Type Format) support is enabled.
>
> +config PROBE_EVENTS_DUMP_FETCHARG
> + depends on PROBE_EVENTS
> + bool "Dump of dynamic probe event fetch-arguments"
> + default n
> + help
> + This shows the dump of fetch-arguments of dynamic probe events
> + alongside their event definitions in the dynamic_events file
> + as comment lines. This is useful to debug the probe events.
> +
> + If unsure, say N.
> +
> config KPROBE_EVENTS
> depends on KPROBES
> depends on HAVE_REGS_AND_STACK_ACCESS_API
> diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
> index b66d6196338d..fdb4ce993cad 100644
> --- a/kernel/trace/trace_eprobe.c
> +++ b/kernel/trace/trace_eprobe.c
> @@ -87,6 +87,8 @@ static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev)
> seq_printf(m, " %s=%s", ep->tp.args[i].name, ep->tp.args[i].comm);
> seq_putc(m, '\n');
>
> + trace_probe_dump_args(m, &ep->tp);
> +
> return 0;
> }
>
> diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
> index 4d1abbf66229..536781cd4c47 100644
> --- a/kernel/trace/trace_fprobe.c
> +++ b/kernel/trace/trace_fprobe.c
> @@ -1449,6 +1449,8 @@ static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
> seq_printf(m, " %s=%s", tf->tp.args[i].name, tf->tp.args[i].comm);
> seq_putc(m, '\n');
>
> + trace_probe_dump_args(m, &tf->tp);
> +
> return 0;
> }
>
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index a8420e6abb56..cfa807d8e760 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -1320,6 +1320,8 @@ static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
> seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
> seq_putc(m, '\n');
>
> + trace_probe_dump_args(m, &tk->tp);
> +
> return 0;
> }
>
> diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
> index 98532c503d02..9d174cd1fb1c 100644
> --- a/kernel/trace/trace_probe.c
> +++ b/kernel/trace/trace_probe.c
> @@ -2393,3 +2393,93 @@ int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_a
> }
> return 0;
> }
> +
> +#ifdef CONFIG_PROBE_EVENTS_DUMP_FETCHARG
> +
> +struct fetch_op_decode {
> + const char *name;
> + void (*decode)(struct seq_file *m, struct fetch_insn *insn);
> +};
> +
> +static const struct fetch_op_decode fetch_op_decode[];
> +
> +static void fetcharg_decode_none(struct seq_file *m, struct fetch_insn *insn)
> +{
> + seq_puts(m, fetch_op_decode[insn->op].name);
> +}
> +
> +static void fetcharg_decode_param(struct seq_file *m, struct fetch_insn *insn)
> +{
> + seq_printf(m, "%s(%u)", fetch_op_decode[insn->op].name, insn->param);
> +}
> +
> +static void fetcharg_decode_imm(struct seq_file *m, struct fetch_insn *insn)
> +{
> + seq_printf(m, "%s(0x%lx)", fetch_op_decode[insn->op].name, insn->immediate);
> +}
> +
> +static void fetcharg_decode_ptr(struct seq_file *m, struct fetch_insn *insn)
> +{
> + seq_printf(m, "%s(%p)", fetch_op_decode[insn->op].name, insn->data);
> +}
> +
> +static void fetcharg_decode_symbol(struct seq_file *m, struct fetch_insn *insn)
> +{
> + seq_printf(m, "%s(%s)", fetch_op_decode[insn->op].name, (char *)insn->data);
> +}
> +
> +static void fetcharg_decode_offset(struct seq_file *m, struct fetch_insn *insn)
> +{
> + seq_printf(m, "%s(offset=%d)", fetch_op_decode[insn->op].name, insn->offset);
> +}
> +
> +static void fetcharg_decode_store(struct seq_file *m, struct fetch_insn *insn)
> +{
> + if (insn->op == FETCH_OP_ST_RAW)
> + seq_printf(m, "%s(size=%u)", fetch_op_decode[insn->op].name, insn->size);
> + else
> + seq_printf(m, "%s(offset=%d,size=%u)", fetch_op_decode[insn->op].name, insn->offset, insn->size);
> +}
> +
> +static void fetcharg_decode_bf(struct seq_file *m, struct fetch_insn *insn)
> +{
> + seq_printf(m, "%s(basesize=%u,lshift=%u,rshift=%u)",
> + fetch_op_decode[insn->op].name, insn->basesize, insn->lshift, insn->rshift);
> +}
> +
> +#define FETCH_OP(opname, decode_fn) \
> + [FETCH_OP_##opname] = { .name = #opname, .decode = fetcharg_decode_##decode_fn },
> +
> +static const struct fetch_op_decode fetch_op_decode[] = {
> + FETCH_OP_LIST
> +};
> +#undef FETCH_OP
> +
> +static void trace_probe_dump_arg(struct seq_file *m, struct probe_arg *parg)
> +{
> + int i;
> +
> + seq_printf(m, "# %s: ", parg->name);
> + for (i = 0; i < FETCH_INSN_MAX; i++) {
> + struct fetch_insn *insn = parg->code + i;
> +
> + if (insn->op >= ARRAY_SIZE(fetch_op_decode) || !fetch_op_decode[insn->op].decode)
> + seq_printf(m, "unknown(%d)", insn->op);
> + else
> + fetch_op_decode[insn->op].decode(m, insn);
> +
> + if (insn->op == FETCH_OP_END)
> + break;
> + seq_puts(m, " -> ");
> + }
> + seq_putc(m, '\n');
> +}
> +
> +void trace_probe_dump_args(struct seq_file *m, struct trace_probe *tp)
> +{
> + int i;
> +
> + for (i = 0; i < tp->nr_args; i++)
> + trace_probe_dump_arg(m, &tp->args[i]);
> +}
> +#endif /* CONFIG_PROBE_EVENTS_DUMP_FETCHARG */
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index 0f09f7aaf93f..b428ef42b229 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -83,38 +83,47 @@ static nokprobe_inline u32 update_data_loc(u32 loc, int consumed)
> /* Printing function type */
> typedef int (*print_type_func_t)(struct trace_seq *, void *, void *);
>
> +#define FETCH_OP_LIST \
> + /* Stage 1 (load) ops */ \
> + FETCH_OP(NOP, none) /* NOP */ \
> + FETCH_OP(REG, param) /* Register: .param = offset */ \
> + FETCH_OP(STACK, param) /* Stack: .param = index */ \
> + FETCH_OP(STACKP, none) /* Stack pointer */ \
> + FETCH_OP(RETVAL, none) /* Return value */ \
> + FETCH_OP(IMM, imm) /* Immediate: .immediate */ \
> + FETCH_OP(COMM, none) /* Current comm */ \
> + FETCH_OP(ARG, param) /* Argument: .param = index */ \
> + FETCH_OP(FOFFS, imm) /* File offset: .immediate */ \
> + FETCH_OP(DATA, ptr) /* Allocated data: .data */ \
> + FETCH_OP(EDATA, offset) /* Entry data: .offset */ \
> + FETCH_OP(TP_ARG, param) /* Tracepoint argument: .data */\
> + /* Stage 2 (dereference) ops */ \
> + FETCH_OP(DEREF, offset) /* Dereference: .offset */ \
> + FETCH_OP(UDEREF, offset) /* User-space dereference: .offset */\
> + /* Stage 3 (store) ops */ \
> + FETCH_OP(ST_RAW, store) /* Raw value: .size */ \
> + FETCH_OP(ST_MEM, store) /* Memory: .offset, .size */ \
> + FETCH_OP(ST_UMEM, store) /* User memory: .offset, .size */\
> + FETCH_OP(ST_STRING, store) /* String: .offset, .size */ \
> + FETCH_OP(ST_USTRING, store) /* User string: .offset, .size */\
> + FETCH_OP(ST_SYMSTR, store) /* Symbol name: .offset, .size */\
> + FETCH_OP(ST_EDATA, offset) /* Entry data: .offset */ \
> + /* Stage 4 (modify) op */ \
> + FETCH_OP(MOD_BF, bf) /* Bitfield: .basesize, .lshift, .rshift*/\
> + /* Stage 5 (loop) op */ \
> + FETCH_OP(LP_ARRAY, param) /* Loop array: .param = count */\
> + /* End */ \
> + FETCH_OP(END, none) \
> + /* Unresolved Symbol holder */ \
> + FETCH_OP(NOP_SYMBOL, symbol) /* Non loaded symbol: .data = symbol name */
> +
> +#define FETCH_OP(opname, decode_fn) FETCH_OP_##opname,
> enum fetch_op {
> - FETCH_OP_NOP = 0,
> - // Stage 1 (load) ops
> - FETCH_OP_REG, /* Register : .param = offset */
> - FETCH_OP_STACK, /* Stack : .param = index */
> - FETCH_OP_STACKP, /* Stack pointer */
> - FETCH_OP_RETVAL, /* Return value */
> - FETCH_OP_IMM, /* Immediate : .immediate */
> - FETCH_OP_COMM, /* Current comm */
> - FETCH_OP_ARG, /* Function argument : .param */
> - FETCH_OP_FOFFS, /* File offset: .immediate */
> - FETCH_OP_DATA, /* Allocated data: .data */
> - FETCH_OP_EDATA, /* Entry data: .offset */
> - // Stage 2 (dereference) op
> - FETCH_OP_DEREF, /* Dereference: .offset */
> - FETCH_OP_UDEREF, /* User-space Dereference: .offset */
> - // Stage 3 (store) ops
> - FETCH_OP_ST_RAW, /* Raw: .size */
> - FETCH_OP_ST_MEM, /* Mem: .offset, .size */
> - FETCH_OP_ST_UMEM, /* Mem: .offset, .size */
> - FETCH_OP_ST_STRING, /* String: .offset, .size */
> - FETCH_OP_ST_USTRING, /* User String: .offset, .size */
> - FETCH_OP_ST_SYMSTR, /* Kernel Symbol String: .offset, .size */
> - FETCH_OP_ST_EDATA, /* Store Entry Data: .offset */
> - // Stage 4 (modify) op
> - FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */
> - // Stage 5 (loop) op
> - FETCH_OP_LP_ARRAY, /* Array: .param = loop count */
> - FETCH_OP_TP_ARG, /* Trace Point argument */
> - FETCH_OP_END,
> - FETCH_NOP_SYMBOL, /* Unresolved Symbol holder */
> + FETCH_OP_LIST
> };
> +#undef FETCH_OP
> +
> +#define FETCH_NOP_SYMBOL FETCH_OP_NOP_SYMBOL
>
> struct fetch_insn {
> enum fetch_op op;
> @@ -370,6 +379,14 @@ bool trace_probe_match_command_args(struct trace_probe *tp,
> int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **));
> int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
> u8 *data, void *field);
> +#ifdef CONFIG_PROBE_EVENTS_DUMP_FETCHARG
> +void trace_probe_dump_args(struct seq_file *m, struct trace_probe *tp);
> +#else
> +static inline void trace_probe_dump_args(struct seq_file *m, struct trace_probe *tp)
> +{
> + return;
> +}
> +#endif
>
> #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
> int traceprobe_get_entry_data_size(struct trace_probe *tp);
> diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> index c274346853d1..b2e264a4b96c 100644
> --- a/kernel/trace/trace_uprobe.c
> +++ b/kernel/trace/trace_uprobe.c
> @@ -765,6 +765,9 @@ static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev)
> seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
>
> seq_putc(m, '\n');
> +
> + trace_probe_dump_args(m, &tu->tp);
> +
> return 0;
> }
>
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply
* Re: [PATCH] tracing: Use seq_buf for string concatenation
From: Jori Koolstra @ 2026-06-21 17:16 UTC (permalink / raw)
To: Woradorn Laodhanadhaworn, rostedt
Cc: mhiramat, mathieu.desnoyers, linux-kernel, linux-trace-kernel,
linux-hardening, linux-kernel-mentees, shuah, skhan, me
In-Reply-To: <20260620175441.223342-1-woradorn.laon@gmail.com>
> Op 20-06-2026 19:54 CEST schreef Woradorn Laodhanadhaworn <woradorn.laon@gmail.com>:
>
>
> In preparation for removing the strlcat API[1],
> replace the string concatenation logic with a struct seq_buf,
> which tracks the current position and the remaining space internally.
>
> The backing buffer bootup_event_buf allocation is unchanged.
> Use seq_buf_str() to NUL-terminate before passing to early_enable_events().
>
> Link: https://github.com/KSPP/linux/issues/370 [1]
>
> Signed-off-by: Woradorn Laodhanadhaworn <woradorn.laon@gmail.com>
> ---
> kernel/trace/trace_events.c | 21 ++++++++++++++++-----
> 1 file changed, 16 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index c46e623e7e0d..15164723e028 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -22,6 +22,7 @@
> #include <linux/sort.h>
> #include <linux/slab.h>
> #include <linux/delay.h>
> +#include <linux/seq_buf.h>
>
> #include <trace/events/sched.h>
> #include <trace/syscall.h>
> @@ -4501,13 +4502,23 @@ extern struct trace_event_call *__start_ftrace_events[];
> extern struct trace_event_call *__stop_ftrace_events[];
>
> static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
Isn't this now unused?
> +static struct seq_buf bootup_event_seq;
> +static bool bootup_event_seq_initialized;
>
I think this can be refactored to avoid the bool. And should bootup_event_seq not be
__initdata?
> static __init int setup_trace_event(char *str)
> {
> - if (bootup_event_buf[0] != '\0')
> - strlcat(bootup_event_buf, ",", COMMAND_LINE_SIZE);
> + if (!bootup_event_seq_initialized) {
> + seq_buf_init(&bootup_event_seq, bootup_event_buf, COMMAND_LINE_SIZE);
> + bootup_event_seq_initialized = true;
> + }
> +
> + if (seq_buf_used(&bootup_event_seq) > 0)
> + seq_buf_puts(&bootup_event_seq, ",");
>
> - strlcat(bootup_event_buf, str, COMMAND_LINE_SIZE);
> + seq_buf_puts(&bootup_event_seq, str);
> +
> + if (seq_buf_has_overflowed(&bootup_event_seq))
> + return -ENOMEM;
>
> trace_set_ring_buffer_expanded(NULL);
> disable_tracing_selftest("running event tracing");
> @@ -4766,7 +4777,7 @@ static __init int event_trace_enable(void)
> */
> __trace_early_add_events(tr);
>
> - early_enable_events(tr, bootup_event_buf, false);
> + early_enable_events(tr, (char *)seq_buf_str(&bootup_event_seq), false);
What if trace_event is empty? Then setup_trace_event does not run AFAIK. See the
WARN_ON in seq_buf_str too. Have you tested this?
>
> trace_printk_start_comm();
>
> @@ -4794,7 +4805,7 @@ static __init int event_trace_enable_again(void)
> if (!tr)
> return -ENODEV;
>
> - early_enable_events(tr, bootup_event_buf, true);
> + early_enable_events(tr, (char *)seq_buf_str(&bootup_event_seq), true);
>
> return 0;
> }
> --
> 2.43.0
Thanks,
Jori.
^ permalink raw reply
* [RFC PATCH v1.2 05/17] mm/damon/core: use damon_nr_accesses_mvsum() for damos region tracing
From: SeongJae Park @ 2026-06-21 15:57 UTC (permalink / raw)
Cc: SeongJae Park, Andrew Morton, Masami Hiramatsu, Mathieu Desnoyers,
Steven Rostedt, damon, linux-kernel, linux-mm, linux-trace-kernel
In-Reply-To: <20260621155715.87932-1-sj@kernel.org>
damon_nr_accesses_mvsum() returns a value same to nr_accesses_bp. Also
the function is more simple and therefore more tolerant to errors.
Execution of the function would be more expensive than the simple read
of the field, but because the function is quite simple, the overhead
should be negligible. Use it in the DAMON region exporting trace points
instead of the nr_accesses_bp.
Signed-off-by: SeongJae Park <sj@kernel.org>
---
include/trace/events/damon.h | 8 +++++---
mm/damon/core.c | 5 +++--
2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
index 78388538acf44..8851727ae1627 100644
--- a/include/trace/events/damon.h
+++ b/include/trace/events/damon.h
@@ -78,9 +78,11 @@ TRACE_EVENT_CONDITION(damos_before_apply,
TP_PROTO(unsigned int context_idx, unsigned int scheme_idx,
unsigned int target_idx, struct damon_region *r,
- unsigned int nr_regions, bool do_trace),
+ unsigned int nr_accesses, unsigned int nr_regions,
+ bool do_trace),
- TP_ARGS(context_idx, scheme_idx, target_idx, r, nr_regions, do_trace),
+ TP_ARGS(context_idx, scheme_idx, target_idx, r, nr_accesses,
+ nr_regions, do_trace),
TP_CONDITION(do_trace),
@@ -101,7 +103,7 @@ TRACE_EVENT_CONDITION(damos_before_apply,
__entry->target_idx = target_idx;
__entry->start = r->ar.start;
__entry->end = r->ar.end;
- __entry->nr_accesses = r->nr_accesses_bp / 10000;
+ __entry->nr_accesses = nr_accesses;
__entry->age = r->age;
__entry->nr_regions = nr_regions;
),
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 8f845bf698b2d..91f137901e726 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -2439,7 +2439,7 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
struct damos *siter; /* schemes iterator */
unsigned int sidx = 0;
struct damon_target *titer; /* targets iterator */
- unsigned int tidx = 0;
+ unsigned int tidx = 0, nr_accesses = 0;
bool do_trace = false;
/* get indices for trace_damos_before_apply() */
@@ -2454,6 +2454,7 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
break;
tidx++;
}
+ nr_accesses = damon_nr_accesses_mvsum(r, c);
do_trace = true;
}
@@ -2469,7 +2470,7 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
if (damos_core_filter_out(c, t, r, s))
return;
ktime_get_coarse_ts64(&begin);
- trace_damos_before_apply(cidx, sidx, tidx, r,
+ trace_damos_before_apply(cidx, sidx, tidx, r, nr_accesses,
damon_nr_regions(t), do_trace);
sz_applied = c->ops.apply_scheme(c, t, r, s,
&sz_ops_filter_passed);
--
2.47.3
^ permalink raw reply related
* [RFC PATCH v1.2 00/17] mm/damon: optimize out nr_accesses_bp
From: SeongJae Park @ 2026-06-21 15:56 UTC (permalink / raw)
Cc: SeongJae Park, Andrew Morton, Brendan Higgins, David Gow,
Masami Hiramatsu, Mathieu Desnoyers, Shuah Khan, Steven Rostedt,
damon, kunit-dev, linux-kernel, linux-kselftest, linux-mm,
linux-trace-kernel
TLDR: Replace damon_region->nr_accesses_bp, which is easy to be wrong,
with a simpler on-demand moving sum function, damon_nr_accesses_mvsum().
Background
==========
DAMON's monitoring output (access pattern snapshot, or more technically
speaking, damon_region->nr_accesses) is completed once per aggregation
interval, which is 100 ms by default. Users can arbitrarily increase
the interval for demand. Under the suggested intervals auto-tuning
setup, it can span up to 200 seconds. If the aggregation interval is
too long, the snapshot users cannot use it in reasonable time. To
mitigate this, we introduced a new field of damon_region, namely
nr_accesses_bp. It contains a pseudo moving sum of nr_accesses in bp
units and is updated for each sampling interval.
It turned out keeping it correctly updated every sampling interval is
not that easy. From online parameter update feature development and
more experimental hacks, we found it is easy to be corrupted. Once it
is corrupted, DAMON's monitoring outputs become quite insane. Hence we
added a few validation checks. It is easy to be corrupted because it
requires every update per sampling interval to be correct.
Solution
========
There is no real reason to keep it updated every sampling interval. Due
to the simple pseudo-moving sum mechanism and existing helper field
(last_nr_accesses), we can also calculate the pseudo moving sum on
demand in a much simpler way.
Implement a function for getting the pseudo moving sum on demand, and
replace nr_accessses_bp uses with the new function. Also remove no more
needed tests for nr_accesses_bp and the per-sampling interval update
functions. Finally, remove the nr_accesses_bp. The new function is
quite simple.
Discussion
==========
Depending on the use case, multiple nr_accesses readers could be
executed in the same kdamond_fn() main loop iteration, which is executed
once per sampling interval. Such readers include DAMON region exporting
tracepoints (damon_[region_]aggregated and damos_before_apply), DAMOS,
and DAMON sysfs interface logic for update_schemes_tried_regions
command. In this case, the new function will be called multiple times
and this could be overhead compared to the old logic, which simply reads
the field without any additional work. Nonetheless, the new function is
quite simple. And the new approach does nothing while there is no need
to read. The old approach had to execute its update function for each
region for every sampling interval. Hence the new approach is believed
to be even more lightweight in common case, and the overhead is anyway
negligible.
One more advantage of this change is that one field from the
damon_region struct is removed. On setups that uses a high number of
DAMON regions, this could be a potential memory space benefit.
Patches Sequence
================
Patch 1 introduces the new function for getting the pseudo moving sum of
nr_accesses on demands. Patch 2 implements a unit test for the new
function's internal logic. Patch 3 updates monitoring logic to ready
for safe use of the new function. Patches 4-6 replace uses of
nr_accesses_bp in DAMOS, tracepoints and DAMON sysfs interface with the
new function, respectively. Patches 7-9 removes nr_accesses_bp
validation functions in DAMON core, one by one. Patches 10 and 11
further remove tests and test helper for nr_accesses_bp, respectively.
Patches 12 removes the setups and updates or nr_accesses_bp field.
Patches 13-15 cleans up function parameters that are no more being used
due to the previous patch. Patch 16 removes the function that was used
for updating nr_accesses_bp field with its unit test, which is the
single remaining caller of the function. Finally, patch 17 removes
damon_region->nr_accesses_bp field.
Changes from RFC v1.1
- RFC v1.1: https://lore.kernel.org/20260620172244.90953-1-sj@kernel.org
- Handle next_aggregation_sis < passed_sample_intervals in
nr_accesses_mvsum().
- Always rescale ->last_nr_accesss for intervals change.
- Remove unused attrs params from damon_update_region_access_rate() and
its callers.
Changes from RFC v1
- RFC v1: https://lore.kernel.org/20260619193415.73833-1-sj@kernel.org
- Avoid divide-by-zero from zero aggregation interval.
- Call damon_nr_accesses_mvsum() for damos tracing only when it is enabled.
- Remove obsolete mentions of nr_accesses_bp in comments.
SeongJae Park (17):
mm/damon: introduce damon_nr_accesses_mvsum()
mm/damon/tests/core-kunit: test damon_mvsum()
mm/damon/core: always update ->last_nr_accesses for intervals change
mm/damon/core: use damon_nr_accesses_mvsum() in __damos_valid_target()
mm/damon/core: use damon_nr_accesses_mvsum() for damos region tracing
mm/damon/sysfs-schemes: use damon_nr_accesses_mvsum() for damo regions
mm/damon/core: remove damon_warn_fix_nr_accesses_corruption()
mm/damon/core: remove damon_verify_reset_aggregated()
mm/damon/core: remove damon_verify_merge_regions_of()
mm/damon/tests/core-kunit: remove nr_accesses_bp setup and tests
selftests/damon/drgn_dump_damon_status: do not dump nr_accesses_bp
mm/damon/core: remove nr_accesses_bp setups and updates
mm/damon/core: remove attrs param from
damon_update_region_access_rate()
mm/damonn/paddr: remove attrs param from __damon_pa_check_access()
mm/damon/vaddr: remove attrs param from __damon_va_check_access()
mm/damon/core: remove damon_moving_sum() and its unit test
mm/damon: remove damon_region->nr_accesses_bp
include/linux/damon.h | 15 +-
include/trace/events/damon.h | 8 +-
mm/damon/core.c | 198 +++++++-----------
mm/damon/paddr.c | 9 +-
mm/damon/sysfs-schemes.c | 6 +-
mm/damon/tests/core-kunit.h | 37 ++--
mm/damon/vaddr.c | 12 +-
.../selftests/damon/drgn_dump_damon_status.py | 1 -
8 files changed, 116 insertions(+), 170 deletions(-)
base-commit: 6b4f924f70f92679c81959ad6b9234242aab74a7
--
2.47.3
^ permalink raw reply
* Re: [PATCH 1/2] tracing: Move non-trace_printk prototypes back to kernel.h
From: Steven Rostedt @ 2026-06-21 13:24 UTC (permalink / raw)
To: Yury Norov, Steven Rostedt
Cc: linux-kernel, linux-trace-kernel, Masami Hiramatsu, Mark Rutland,
Mathieu Desnoyers, Andrew Morton, Linus Torvalds,
Sebastian Andrzej Siewior, John Ogness, Thomas Gleixner,
Peter Zijlstra, Julia Lawall, linux-doc, linux-kbuild,
linuxppc-dev, dri-devel, linux-stm32, linux-arm-kernel,
linux-rdma, linux-usb, linux-ext4, linux-nfs, kvm, intel-gfx
In-Reply-To: <ajfiVTlCIVlqW3sh@yury>
On June 21, 2026 2:08:37 PM GMT+01:00, Yury Norov <yury.norov@gmail.com> wrote:
>On Sun, Jun 21, 2026 at 05:34:31AM -0400, Steven Rostedt wrote:
>> From: Steven Rostedt <rostedt@goodmis.org>
>>
>> In order to remove the include to trace_printk.h from kernel.h the tracing
>> control prototypes need to be moved back into kernel.h. That's because
>
>Please don't. Instead, you can split them out to trace_control.h, and
>include where needed. I actually have a prototype for it, FYI:
>
>https://github.com/norov/linux/tree/trace_pritk3
>
Sure, I have no problem adding another header for this.
>> they are used in other common header files like rcu.h. There's no point in
>> removing trace_printk.h from kernel.h if it just gets added back to other
>> common headers.
>>
>> Prototypes are very cheap for the compiler and should not be an issue.
>
>It's not about cost, it's about mess. kernel.h is included everywhere.
>Is that API needed everywhere? No, it's needed in literally 10 files.
>So, no place in kernel.h.
>
Well one of those files is rcu.h which is also pretty much included everywhere. But OK.
-- Steve
>>
>> 2.53.0
>>
^ permalink raw reply
* Re: [PATCH 2/2] tracing: Add CONFIG_TRACE_PRINTK_DEBUGGING to clean up kernel.h
From: Yury Norov @ 2026-06-21 13:57 UTC (permalink / raw)
To: Yury Norov
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel,
Masami Hiramatsu, Mark Rutland, Mathieu Desnoyers, Andrew Morton,
Linus Torvalds, Sebastian Andrzej Siewior, John Ogness,
Thomas Gleixner, Peter Zijlstra, Julia Lawall, linux-doc,
linux-kbuild, linuxppc-dev, dri-devel, linux-stm32,
linux-arm-kernel, linux-rdma, linux-usb, linux-ext4, linux-nfs,
kvm, intel-gfx
In-Reply-To: <ajfphe4Z8BrfYoUX@yury>
On Sun, Jun 21, 2026 at 09:39:17AM -0400, Yury Norov wrote:
> On Sun, Jun 21, 2026 at 05:47:21AM -0400, Steven Rostedt wrote:
> > On Sun, 21 Jun 2026 05:34:32 -0400
> > Steven Rostedt <rostedt@kernel.org> wrote:
> >
> > > Instead of having trace_printk.h included in kernel.h, create a config
> > > TRACE_PRINTK_DEBUGGING that when set will update the CFLAGS in the
> > > Makefile to allow developers to add trace_printk() without the need to add
> > > the include for it. Having it included in the Makefile keeps it from being
> > > in the dependency chain and it will not waste extra CPU cycles for those
> > > building the kernel without using trace_printk.
> >
> > Bah, I only tested with the config option enabled, and missed some
> > dependencies with it disabled.
>
> Yes you did.
>
> > For instance, rcu.h also uses ftrace_dump() so that too needs to go
> > into kernel.h.
>
> No, it shouldn't.
>
> > I also need to add a few more includes to trace_printk.h.
>
> > OK, I need to run this through all my tests to find where else I missed
> > adding the includes. But the idea should hopefully satisfy everyone.
>
> If you include it under config in kernel.h, to make the kernel buildable,
I mean: in kernel.h or in Makefile.
> you need to include trace_printk.h explicitly where it's actually used.
> IOW, apply my patch v4-7.
>
> Then, developers who use trace_printk() on their development machine,
> will be really frustrated when their debugging code will break client
> build just because CONFIG_TRACE_PRINTK_DEBUGGING is disabled there.
> They will spend a day, at best, communicating with remote managers,
> and end up with adding #include <linux/trace_printk.h> in the files
> they touch. Is that your plan?
>
> If I was one of those developers, the solution would be simple for me:
> don't use trace_printk() at all.
>
> Thanks,
> Yury
^ permalink raw reply
* Re: [PATCH 2/2] tracing: Add CONFIG_TRACE_PRINTK_DEBUGGING to clean up kernel.h
From: Yury Norov @ 2026-06-21 13:39 UTC (permalink / raw)
To: Steven Rostedt
Cc: linux-kernel, linux-trace-kernel, Masami Hiramatsu, Mark Rutland,
Mathieu Desnoyers, Andrew Morton, Linus Torvalds,
Sebastian Andrzej Siewior, John Ogness, Thomas Gleixner,
Peter Zijlstra, Julia Lawall, Yury Norov, linux-doc, linux-kbuild,
linuxppc-dev, dri-devel, linux-stm32, linux-arm-kernel,
linux-rdma, linux-usb, linux-ext4, linux-nfs, kvm, intel-gfx
In-Reply-To: <20260621054721.7cde38f0@fedora>
On Sun, Jun 21, 2026 at 05:47:21AM -0400, Steven Rostedt wrote:
> On Sun, 21 Jun 2026 05:34:32 -0400
> Steven Rostedt <rostedt@kernel.org> wrote:
>
> > Instead of having trace_printk.h included in kernel.h, create a config
> > TRACE_PRINTK_DEBUGGING that when set will update the CFLAGS in the
> > Makefile to allow developers to add trace_printk() without the need to add
> > the include for it. Having it included in the Makefile keeps it from being
> > in the dependency chain and it will not waste extra CPU cycles for those
> > building the kernel without using trace_printk.
>
> Bah, I only tested with the config option enabled, and missed some
> dependencies with it disabled.
Yes you did.
> For instance, rcu.h also uses ftrace_dump() so that too needs to go
> into kernel.h.
No, it shouldn't.
> I also need to add a few more includes to trace_printk.h.
> OK, I need to run this through all my tests to find where else I missed
> adding the includes. But the idea should hopefully satisfy everyone.
If you include it under config in kernel.h, to make the kernel buildable,
you need to include trace_printk.h explicitly where it's actually used.
IOW, apply my patch v4-7.
Then, developers who use trace_printk() on their development machine,
will be really frustrated when their debugging code will break client
build just because CONFIG_TRACE_PRINTK_DEBUGGING is disabled there.
They will spend a day, at best, communicating with remote managers,
and end up with adding #include <linux/trace_printk.h> in the files
they touch. Is that your plan?
If I was one of those developers, the solution would be simple for me:
don't use trace_printk() at all.
Thanks,
Yury
^ permalink raw reply
* Re: [PATCH] kprobes: Replace __ASSEMBLY__ with __ASSEMBLER__ in header file
From: Masami Hiramatsu @ 2026-06-21 13:27 UTC (permalink / raw)
To: Thomas Huth
Cc: Naveen N Rao, David S. Miller, linux-kernel, linux-trace-kernel,
linux-arch
In-Reply-To: <20260619161434.88270-1-thuth@redhat.com>
On Fri, 19 Jun 2026 18:14:34 +0200
Thomas Huth <thuth@redhat.com> wrote:
> From: Thomas Huth <thuth@redhat.com>
>
> While the GCC and Clang compilers already define __ASSEMBLER__
> automatically when compiling assembly code, __ASSEMBLY__ is a
> macro that only gets defined by the Makefiles in the kernel.
> This can be very confusing when switching between userspace
> and kernelspace coding, or when dealing with uapi headers that
> rather should use __ASSEMBLER__ instead. So let's standardize now
> on the __ASSEMBLER__ macro that is provided by the compilers.
>
Oops, good catch!
Let me pick it.
Thanks!
> Signed-off-by: Thomas Huth <thuth@redhat.com>
> ---
> Note: This patch has been split from an earlier bigger patch of
> mine to ease reviewing.
>
> include/asm-generic/kprobes.h | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/include/asm-generic/kprobes.h b/include/asm-generic/kprobes.h
> index 5290a2b2e15a0..16f16963d503d 100644
> --- a/include/asm-generic/kprobes.h
> +++ b/include/asm-generic/kprobes.h
> @@ -2,7 +2,7 @@
> #ifndef _ASM_GENERIC_KPROBES_H
> #define _ASM_GENERIC_KPROBES_H
>
> -#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
> +#if defined(__KERNEL__) && !defined(__ASSEMBLER__)
> #ifdef CONFIG_KPROBES
> /*
> * Blacklist ganerating macro. Specify functions which is not probed
> @@ -21,6 +21,6 @@ static unsigned long __used \
> # define __kprobes
> # define nokprobe_inline inline
> #endif
> -#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
> +#endif /* defined(__KERNEL__) && !defined(__ASSEMBLER__) */
>
> #endif /* _ASM_GENERIC_KPROBES_H */
> --
> 2.54.0
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply
* Re: [PATCH 1/2] tracing: Move non-trace_printk prototypes back to kernel.h
From: Yury Norov @ 2026-06-21 13:08 UTC (permalink / raw)
To: Steven Rostedt
Cc: linux-kernel, linux-trace-kernel, Masami Hiramatsu, Mark Rutland,
Mathieu Desnoyers, Andrew Morton, Linus Torvalds,
Sebastian Andrzej Siewior, John Ogness, Thomas Gleixner,
Peter Zijlstra, Julia Lawall, Yury Norov, linux-doc, linux-kbuild,
linuxppc-dev, dri-devel, linux-stm32, linux-arm-kernel,
linux-rdma, linux-usb, linux-ext4, linux-nfs, kvm, intel-gfx
In-Reply-To: <20260621093811.007634476@kernel.org>
On Sun, Jun 21, 2026 at 05:34:31AM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
>
> In order to remove the include to trace_printk.h from kernel.h the tracing
> control prototypes need to be moved back into kernel.h. That's because
Please don't. Instead, you can split them out to trace_control.h, and
include where needed. I actually have a prototype for it, FYI:
https://github.com/norov/linux/tree/trace_pritk3
> they are used in other common header files like rcu.h. There's no point in
> removing trace_printk.h from kernel.h if it just gets added back to other
> common headers.
>
> Prototypes are very cheap for the compiler and should not be an issue.
It's not about cost, it's about mess. kernel.h is included everywhere.
Is that API needed everywhere? No, it's needed in literally 10 files.
So, no place in kernel.h.
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
> ---
> include/linux/kernel.h | 18 ++++++++++++++++++
> include/linux/trace_printk.h | 17 -----------------
> 2 files changed, 18 insertions(+), 17 deletions(-)
>
> diff --git a/include/linux/kernel.h b/include/linux/kernel.h
> index e5570a16cbb1..c3c68128827c 100644
> --- a/include/linux/kernel.h
> +++ b/include/linux/kernel.h
> @@ -194,4 +194,22 @@ extern enum system_states system_state;
> # define REBUILD_DUE_TO_DYNAMIC_FTRACE
> #endif
>
> +#ifdef CONFIG_TRACING
> +void tracing_on(void);
> +void tracing_off(void);
> +int tracing_is_on(void);
> +void tracing_snapshot(void);
> +void tracing_snapshot_alloc(void);
> +void tracing_start(void);
> +void tracing_stop(void);
> +#else
> +static inline void tracing_start(void) { }
> +static inline void tracing_stop(void) { }
> +static inline void tracing_on(void) { }
> +static inline void tracing_off(void) { }
> +static inline int tracing_is_on(void) { return 0; }
> +static inline void tracing_snapshot(void) { }
> +static inline void tracing_snapshot_alloc(void) { }
> +#endif
> +
> #endif
> diff --git a/include/linux/trace_printk.h b/include/linux/trace_printk.h
> index 3d54f440dccf..879fed0805fd 100644
> --- a/include/linux/trace_printk.h
> +++ b/include/linux/trace_printk.h
> @@ -35,15 +35,6 @@ enum ftrace_dump_mode {
> };
>
> #ifdef CONFIG_TRACING
> -void tracing_on(void);
> -void tracing_off(void);
> -int tracing_is_on(void);
> -void tracing_snapshot(void);
> -void tracing_snapshot_alloc(void);
> -
> -extern void tracing_start(void);
> -extern void tracing_stop(void);
> -
> static inline __printf(1, 2)
> void ____trace_printk_check_format(const char *fmt, ...)
> {
> @@ -176,16 +167,8 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
>
> extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
> #else
> -static inline void tracing_start(void) { }
> -static inline void tracing_stop(void) { }
> static inline void trace_dump_stack(int skip) { }
>
> -static inline void tracing_on(void) { }
> -static inline void tracing_off(void) { }
> -static inline int tracing_is_on(void) { return 0; }
> -static inline void tracing_snapshot(void) { }
> -static inline void tracing_snapshot_alloc(void) { }
> -
> static inline __printf(1, 2)
> int trace_printk(const char *fmt, ...)
> {
> --
> 2.53.0
>
^ permalink raw reply
* Re: [PATCH 2/2] tracing: Add CONFIG_TRACE_PRINTK_DEBUGGING to clean up kernel.h
From: Steven Rostedt @ 2026-06-21 13:03 UTC (permalink / raw)
To: David Laight
Cc: Thomas Gleixner, linux-kernel, linux-trace-kernel,
Masami Hiramatsu, Mark Rutland, Mathieu Desnoyers, Andrew Morton,
Linus Torvalds, Sebastian Andrzej Siewior, John Ogness,
Peter Zijlstra, Julia Lawall, Yury Norov, linux-doc, linux-kbuild,
linuxppc-dev, dri-devel, linux-stm32, linux-arm-kernel,
linux-rdma, linux-usb, linux-ext4, linux-nfs, kvm, intel-gfx
In-Reply-To: <20260621135531.243375d9@pumpkin>
On Sun, 21 Jun 2026 13:55:31 +0100
David Laight <david.laight.linux@gmail.com> wrote:
> Indeed...
> Isn't trace_printk() just an extern?
> Having it defined somewhere isn't going to make any difference to build times.
No it is not. It is a macro to cut as many nanoseconds as possible as
trace_printk() was created to debug tight race conditions and any added
latency can make the race go away.
-- Steve
^ permalink raw reply
* Re: [PATCH 2/2] tracing: Add CONFIG_TRACE_PRINTK_DEBUGGING to clean up kernel.h
From: David Laight @ 2026-06-21 12:55 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel,
Masami Hiramatsu, Mark Rutland, Mathieu Desnoyers, Andrew Morton,
Linus Torvalds, Sebastian Andrzej Siewior, John Ogness,
Peter Zijlstra, Julia Lawall, Yury Norov, linux-doc, linux-kbuild,
linuxppc-dev, dri-devel, linux-stm32, linux-arm-kernel,
linux-rdma, linux-usb, linux-ext4, linux-nfs, kvm, intel-gfx
In-Reply-To: <87ik7cmcb7.ffs@fw13>
On Sun, 21 Jun 2026 12:13:00 +0200
Thomas Gleixner <tglx@kernel.org> wrote:
> On Sun, Jun 21 2026 at 05:34, Steven Rostedt wrote:
> > Instead of having trace_printk.h included in kernel.h, create a config
> > TRACE_PRINTK_DEBUGGING that when set will update the CFLAGS in the
> > Makefile to allow developers to add trace_printk() without the need to add
> > the include for it. Having it included in the Makefile keeps it from being
> > in the dependency chain and it will not waste extra CPU cycles for those
> > building the kernel without using trace_printk.
>
> IOW, you make it worse just because.
>
> With the header being separate I add the three trace_printk()s and the
> include to the source file I'm investigating. The recompile will build
> exactly this source file.
>
> Having to enable the config knob will result in a full kernel rebuild
> for no value.
Indeed...
Isn't trace_printk() just an extern?
Having it defined somewhere isn't going to make any difference to build times.
David
>
> Seriously?
>
> Thanks,
>
> tglx
>
>
>
^ permalink raw reply
* Re: [PATCH 2/2] tracing: Add CONFIG_TRACE_PRINTK_DEBUGGING to clean up kernel.h
From: Steven Rostedt @ 2026-06-21 10:38 UTC (permalink / raw)
To: Thomas Gleixner, Steven Rostedt, linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mark Rutland, Mathieu Desnoyers, Andrew Morton,
Linus Torvalds, Sebastian Andrzej Siewior, John Ogness,
Peter Zijlstra, Julia Lawall, Yury Norov, linux-doc, linux-kbuild,
linuxppc-dev, dri-devel, linux-stm32, linux-arm-kernel,
linux-rdma, linux-usb, linux-ext4, linux-nfs, kvm, intel-gfx
In-Reply-To: <87ik7cmcb7.ffs@fw13>
On June 21, 2026 11:13:00 AM GMT+01:00, Thomas Gleixner <tglx@kernel.org> wrote:
>On Sun, Jun 21 2026 at 05:34, Steven Rostedt wrote:
>> Instead of having trace_printk.h included in kernel.h, create a config
>> TRACE_PRINTK_DEBUGGING that when set will update the CFLAGS in the
>> Makefile to allow developers to add trace_printk() without the need to add
>> the include for it. Having it included in the Makefile keeps it from being
>> in the dependency chain and it will not waste extra CPU cycles for those
>> building the kernel without using trace_printk.
>
>IOW, you make it worse just because.
>
>With the header being separate I add the three trace_printk()s and the
>include to the source file I'm investigating. The recompile will build
>exactly this source file.
>
>Having to enable the config knob will result in a full kernel rebuild
>for no value.
>
>Seriously?
Like having lockdep enabled, this would always be set in the development environment. It's not something to only enable when you need to add a trace_printk. If you don't want to rebuild everything, by all means add the include file by file. There's nothing preventing you to do that with this solution.
-- Steve
P.S. I'm replying on my phone as I'm in the London Tube. Thus why I'm not trimming my email.
>
>Thanks,
>
> tglx
>
^ permalink raw reply
* Re: [PATCH 2/2] tracing: Add CONFIG_TRACE_PRINTK_DEBUGGING to clean up kernel.h
From: Thomas Gleixner @ 2026-06-21 10:13 UTC (permalink / raw)
To: Steven Rostedt, linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mark Rutland, Mathieu Desnoyers, Andrew Morton,
Linus Torvalds, Sebastian Andrzej Siewior, John Ogness,
Peter Zijlstra, Julia Lawall, Yury Norov, linux-doc, linux-kbuild,
linuxppc-dev, dri-devel, linux-stm32, linux-arm-kernel,
linux-rdma, linux-usb, linux-ext4, linux-nfs, kvm, intel-gfx
In-Reply-To: <20260621093811.168514984@kernel.org>
On Sun, Jun 21 2026 at 05:34, Steven Rostedt wrote:
> Instead of having trace_printk.h included in kernel.h, create a config
> TRACE_PRINTK_DEBUGGING that when set will update the CFLAGS in the
> Makefile to allow developers to add trace_printk() without the need to add
> the include for it. Having it included in the Makefile keeps it from being
> in the dependency chain and it will not waste extra CPU cycles for those
> building the kernel without using trace_printk.
IOW, you make it worse just because.
With the header being separate I add the three trace_printk()s and the
include to the source file I'm investigating. The recompile will build
exactly this source file.
Having to enable the config knob will result in a full kernel rebuild
for no value.
Seriously?
Thanks,
tglx
^ permalink raw reply
* [PATCH] samples: ftrace: fix typos in benchmark comment
From: Yudistira Putra @ 2026-06-21 9:51 UTC (permalink / raw)
To: rostedt, mhiramat
Cc: mark.rutland, linux-trace-kernel, linux-kernel, Yudistira Putra
Fix two typos in the ftrace operations sample benchmark comment.
Signed-off-by: Yudistira Putra <pyudistira519@gmail.com>
---
samples/ftrace/ftrace-ops.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/samples/ftrace/ftrace-ops.c b/samples/ftrace/ftrace-ops.c
index 68d6685c80bd..152ffc1a30b6 100644
--- a/samples/ftrace/ftrace-ops.c
+++ b/samples/ftrace/ftrace-ops.c
@@ -232,8 +232,8 @@ static int __init ftrace_ops_sample_init(void)
ops_destroy(ops_irrelevant, nr_ops_irrelevant);
/*
- * The benchmark completed sucessfully, but there's no reason to keep
- * the module around. Return an error do the user doesn't have to
+ * The benchmark completed successfully, but there's no reason to keep
+ * the module around. Return an error so the user doesn't have to
* manually unload the module.
*/
return -EINVAL;
--
2.43.0
^ permalink raw reply related
* Re: [PATCH 2/2] tracing: Add CONFIG_TRACE_PRINTK_DEBUGGING to clean up kernel.h
From: Steven Rostedt @ 2026-06-21 9:47 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mark Rutland, Mathieu Desnoyers, Andrew Morton,
Linus Torvalds, Sebastian Andrzej Siewior, John Ogness,
Thomas Gleixner, Peter Zijlstra, Julia Lawall, Yury Norov,
linux-doc, linux-kbuild, linuxppc-dev, dri-devel, linux-stm32,
linux-arm-kernel, linux-rdma, linux-usb, linux-ext4, linux-nfs,
kvm, intel-gfx
In-Reply-To: <20260621093811.168514984@kernel.org>
On Sun, 21 Jun 2026 05:34:32 -0400
Steven Rostedt <rostedt@kernel.org> wrote:
> Instead of having trace_printk.h included in kernel.h, create a config
> TRACE_PRINTK_DEBUGGING that when set will update the CFLAGS in the
> Makefile to allow developers to add trace_printk() without the need to add
> the include for it. Having it included in the Makefile keeps it from being
> in the dependency chain and it will not waste extra CPU cycles for those
> building the kernel without using trace_printk.
Bah, I only tested with the config option enabled, and missed some
dependencies with it disabled.
For instance, rcu.h also uses ftrace_dump() so that too needs to go
into kernel.h. I also need to add a few more includes to trace_printk.h.
OK, I need to run this through all my tests to find where else I missed
adding the includes. But the idea should hopefully satisfy everyone.
-- Steve
^ permalink raw reply
* [PATCH 2/2] tracing: Add CONFIG_TRACE_PRINTK_DEBUGGING to clean up kernel.h
From: Steven Rostedt @ 2026-06-21 9:34 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mark Rutland, Mathieu Desnoyers, Andrew Morton,
Linus Torvalds, Sebastian Andrzej Siewior, John Ogness,
Thomas Gleixner, Peter Zijlstra, Julia Lawall, Yury Norov,
linux-doc, linux-kbuild, linuxppc-dev, dri-devel, linux-stm32,
linux-arm-kernel, linux-rdma, linux-usb, linux-ext4, linux-nfs,
kvm, intel-gfx
In-Reply-To: <20260621093430.264983361@kernel.org>
From: Steven Rostedt <rostedt@goodmis.org>
Instead of having trace_printk.h included in kernel.h, create a config
TRACE_PRINTK_DEBUGGING that when set will update the CFLAGS in the
Makefile to allow developers to add trace_printk() without the need to add
the include for it. Having it included in the Makefile keeps it from being
in the dependency chain and it will not waste extra CPU cycles for those
building the kernel without using trace_printk.
Link: https://lore.kernel.org/all/CAHk-=wikCBeVFjVXiY4o-oepdbjAoir5+TcAgtL12c4u1TpZLQ@mail.gmail.com/
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
.../debugging/driver_development_debugging_guide.rst | 2 +-
Makefile | 5 +++++
arch/powerpc/kvm/book3s_xics.c | 1 +
drivers/gpu/drm/i915/gt/intel_gtt.h | 1 +
drivers/gpu/drm/i915/i915_gem.h | 1 +
drivers/hwtracing/stm/dummy_stm.c | 4 ++++
drivers/infiniband/hw/hfi1/trace_dbg.h | 1 +
drivers/usb/early/xhci-dbc.c | 1 +
fs/ext4/inline.c | 1 +
include/linux/kernel.h | 1 -
include/linux/sunrpc/debug.h | 1 +
include/linux/trace_printk.h | 5 +++--
kernel/trace/Kconfig | 10 ++++++++++
kernel/trace/ring_buffer_benchmark.c | 1 +
kernel/trace/trace.h | 1 +
samples/fprobe/fprobe_example.c | 1 +
samples/ftrace/ftrace-direct-modify.c | 1 +
samples/ftrace/ftrace-direct-multi-modify.c | 1 +
samples/ftrace/ftrace-direct-multi.c | 2 +-
samples/ftrace/ftrace-direct-too.c | 2 +-
samples/ftrace/ftrace-direct.c | 2 +-
21 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/Documentation/process/debugging/driver_development_debugging_guide.rst b/Documentation/process/debugging/driver_development_debugging_guide.rst
index aca08f457793..3c87aa03622f 100644
--- a/Documentation/process/debugging/driver_development_debugging_guide.rst
+++ b/Documentation/process/debugging/driver_development_debugging_guide.rst
@@ -52,7 +52,7 @@ For the full documentation see :doc:`/core-api/printk-basics`
Trace_printk
~~~~~~~~~~~~
-Prerequisite: ``CONFIG_DYNAMIC_FTRACE`` & ``#include <linux/ftrace.h>``
+Prerequisite: ``CONFIG_TRACE_PRINTK_DEBUGGING``
It is a tiny bit less comfortable to use than printk(), because you will have
to read the messages from the trace file (See: :ref:`read_ftrace_log`
diff --git a/Makefile b/Makefile
index d1c595db55c9..2f5923d5393b 100644
--- a/Makefile
+++ b/Makefile
@@ -840,6 +840,11 @@ ifdef CONFIG_FUNCTION_TRACER
CC_FLAGS_FTRACE := -pg
endif
+ifdef CONFIG_TRACE_PRINTK_DEBUGGING
+ # Allow trace_printk() to be used anywhere without including the header.
+ LINUXINCLUDE += -include $(srctree)/include/linux/trace_printk.h
+endif
+
ifdef CONFIG_TRACEPOINTS
# To check for unused tracepoints (tracepoints that are defined but never
# called), run with:
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 74a44fa702b0..ef5eb596a56e 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -26,6 +26,7 @@
#if 1
#define XICS_DBG(fmt...) do { } while (0)
#else
+#include <linux/trace_printk.h>
#define XICS_DBG(fmt...) trace_printk(fmt)
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index b54ee4f25af1..f6f223090760 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -35,6 +35,7 @@
#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
+#include <linux/trace_printk.h>
#define GTT_TRACE(...) trace_printk(__VA_ARGS__)
#else
#define GTT_TRACE(...)
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 20b3cb29cfff..5cab1836dc1d 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -116,6 +116,7 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
#endif
#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM)
+#include <linux/trace_printk.h>
#define GEM_TRACE(...) trace_printk(__VA_ARGS__)
#define GEM_TRACE_ERR(...) do { \
pr_err(__VA_ARGS__); \
diff --git a/drivers/hwtracing/stm/dummy_stm.c b/drivers/hwtracing/stm/dummy_stm.c
index 38528ffdc0b3..784f9af7ccba 100644
--- a/drivers/hwtracing/stm/dummy_stm.c
+++ b/drivers/hwtracing/stm/dummy_stm.c
@@ -14,6 +14,10 @@
#include <linux/stm.h>
#include <uapi/linux/stm.h>
+#ifdef DEBUG
+#include <linux/trace_printk.h>
+#endif
+
static ssize_t notrace
dummy_stm_packet(struct stm_data *stm_data, unsigned int master,
unsigned int channel, unsigned int packet, unsigned int flags,
diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h
index 58304b91380f..30df5e246586 100644
--- a/drivers/infiniband/hw/hfi1/trace_dbg.h
+++ b/drivers/infiniband/hw/hfi1/trace_dbg.h
@@ -103,6 +103,7 @@ __hfi1_trace_def(IOCTL);
*/
#ifdef HFI1_EARLY_DBG
+#include <linux/trace_printk.h>
#define hfi1_dbg_early(fmt, ...) \
trace_printk(fmt, ##__VA_ARGS__)
#else
diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
index 41118bba9197..955c73bd601f 100644
--- a/drivers/usb/early/xhci-dbc.c
+++ b/drivers/usb/early/xhci-dbc.c
@@ -30,6 +30,7 @@ static struct xdbc_state xdbc;
static bool early_console_keep;
#ifdef XDBC_TRACE
+#include <linux/trace_printk.h>
#define xdbc_trace trace_printk
#else
static inline void xdbc_trace(const char *fmt, ...) { }
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 8045e4ff270c..0eff4a0c6a6c 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -934,6 +934,7 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
}
#ifdef INLINE_DIR_DEBUG
+#include <linux/trace_printk.h>
void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
void *inline_start, int inline_size)
{
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index c3c68128827c..538655385089 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -31,7 +31,6 @@
#include <linux/build_bug.h>
#include <linux/sprintf.h>
#include <linux/static_call_types.h>
-#include <linux/trace_printk.h>
#include <linux/util_macros.h>
#include <linux/wordpart.h>
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index ab61bed2f7af..7524f5d82fba 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -29,6 +29,7 @@ extern unsigned int nlm_debug;
# define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac))
# if IS_ENABLED(CONFIG_SUNRPC_DEBUG_TRACE)
+# include <linux/trace_printk.h>
# define __sunrpc_printk(fmt, ...) trace_printk(fmt, ##__VA_ARGS__)
# else
# define __sunrpc_printk(fmt, ...) printk(KERN_DEFAULT fmt, ##__VA_ARGS__)
diff --git a/include/linux/trace_printk.h b/include/linux/trace_printk.h
index 879fed0805fd..66edec6d5dbf 100644
--- a/include/linux/trace_printk.h
+++ b/include/linux/trace_printk.h
@@ -1,11 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_TRACE_PRINTK_H
#define _LINUX_TRACE_PRINTK_H
+#if !defined(__ASSEMBLY__) && !defined(__GENKSYMS__) && !defined(BUILD_VDSO)
-#include <linux/compiler_attributes.h>
#include <linux/instruction_pointer.h>
#include <linux/stddef.h>
#include <linux/stringify.h>
+#include <linux/stdarg.h>
/*
* General tracing related utility functions - trace_printk(),
@@ -181,5 +182,5 @@ ftrace_vprintk(const char *fmt, va_list ap)
}
static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
#endif /* CONFIG_TRACING */
-
+#endif /* !defined(__ASSEMBLY__) && !defined(__GENKSYMS__) */
#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 084f34dc6c9f..ffbd1b0ce66e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -210,6 +210,16 @@ menuconfig FTRACE
if FTRACE
+config TRACE_PRINTK_DEBUGGING
+ bool "Debug with trace_printk()"
+ help
+ If you need to debug with trace_printk(), instead of adding
+ include <linux/trace_printk.h> to every file you add a trace_printk
+ to, select this option and it will add trace_printk.h to all code
+ to allow tracing with trace_printk() with.
+
+ If in doubt, select N
+
config TRACEFS_AUTOMOUNT_DEPRECATED
bool "Automount tracefs on debugfs [DEPRECATED]"
depends on TRACING
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 593e3b59e42e..2bb25caebb75 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -5,6 +5,7 @@
* Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
*/
#include <linux/ring_buffer.h>
+#include <linux/trace_printk.h>
#include <linux/completion.h>
#include <linux/kthread.h>
#include <uapi/linux/sched/types.h>
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 80fe152af1dd..580a3deab1e9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -13,6 +13,7 @@
#include <linux/ftrace.h>
#include <linux/trace.h>
#include <linux/hw_breakpoint.h>
+#include <linux/trace_printk.h>
#include <linux/trace_seq.h>
#include <linux/trace_events.h>
#include <linux/compiler.h>
diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c
index bfe98ce826f3..de81b9b4ca7d 100644
--- a/samples/fprobe/fprobe_example.c
+++ b/samples/fprobe/fprobe_example.c
@@ -12,6 +12,7 @@
#define pr_fmt(fmt) "%s: " fmt, __func__
+#include <linux/trace_printk.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fprobe.h>
diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
index 1ba1927b548e..30d0f8e644c8 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/trace_printk.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/ftrace.h>
diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c
index 7a7822dfeb50..f64b929e19ec 100644
--- a/samples/ftrace/ftrace-direct-multi-modify.c
+++ b/samples/ftrace/ftrace-direct-multi-modify.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/trace_printk.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/ftrace.h>
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
index 3fe6ddaf0b69..d32644a49554 100644
--- a/samples/ftrace/ftrace-direct-multi.c
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/trace_printk.h>
#include <linux/module.h>
-
#include <linux/mm.h> /* for handle_mm_fault() */
#include <linux/ftrace.h>
#include <linux/sched/stat.h>
diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c
index bf2411aa6fd7..266fcb233301 100644
--- a/samples/ftrace/ftrace-direct-too.c
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/trace_printk.h>
#include <linux/module.h>
-
#include <linux/mm.h> /* for handle_mm_fault() */
#include <linux/ftrace.h>
#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32)
diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c
index 5368c8c39cbb..85e0dff9b691 100644
--- a/samples/ftrace/ftrace-direct.c
+++ b/samples/ftrace/ftrace-direct.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/trace_printk.h>
#include <linux/module.h>
-
#include <linux/sched.h> /* for wake_up_process() */
#include <linux/ftrace.h>
#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32)
--
2.53.0
^ permalink raw reply related
* [PATCH 1/2] tracing: Move non-trace_printk prototypes back to kernel.h
From: Steven Rostedt @ 2026-06-21 9:34 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mark Rutland, Mathieu Desnoyers, Andrew Morton,
Linus Torvalds, Sebastian Andrzej Siewior, John Ogness,
Thomas Gleixner, Peter Zijlstra, Julia Lawall, Yury Norov,
linux-doc, linux-kbuild, linuxppc-dev, dri-devel, linux-stm32,
linux-arm-kernel, linux-rdma, linux-usb, linux-ext4, linux-nfs,
kvm, intel-gfx
In-Reply-To: <20260621093430.264983361@kernel.org>
From: Steven Rostedt <rostedt@goodmis.org>
In order to remove the include to trace_printk.h from kernel.h the tracing
control prototypes need to be moved back into kernel.h. That's because
they are used in other common header files like rcu.h. There's no point in
removing trace_printk.h from kernel.h if it just gets added back to other
common headers.
Prototypes are very cheap for the compiler and should not be an issue.
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
include/linux/kernel.h | 18 ++++++++++++++++++
include/linux/trace_printk.h | 17 -----------------
2 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e5570a16cbb1..c3c68128827c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -194,4 +194,22 @@ extern enum system_states system_state;
# define REBUILD_DUE_TO_DYNAMIC_FTRACE
#endif
+#ifdef CONFIG_TRACING
+void tracing_on(void);
+void tracing_off(void);
+int tracing_is_on(void);
+void tracing_snapshot(void);
+void tracing_snapshot_alloc(void);
+void tracing_start(void);
+void tracing_stop(void);
+#else
+static inline void tracing_start(void) { }
+static inline void tracing_stop(void) { }
+static inline void tracing_on(void) { }
+static inline void tracing_off(void) { }
+static inline int tracing_is_on(void) { return 0; }
+static inline void tracing_snapshot(void) { }
+static inline void tracing_snapshot_alloc(void) { }
+#endif
+
#endif
diff --git a/include/linux/trace_printk.h b/include/linux/trace_printk.h
index 3d54f440dccf..879fed0805fd 100644
--- a/include/linux/trace_printk.h
+++ b/include/linux/trace_printk.h
@@ -35,15 +35,6 @@ enum ftrace_dump_mode {
};
#ifdef CONFIG_TRACING
-void tracing_on(void);
-void tracing_off(void);
-int tracing_is_on(void);
-void tracing_snapshot(void);
-void tracing_snapshot_alloc(void);
-
-extern void tracing_start(void);
-extern void tracing_stop(void);
-
static inline __printf(1, 2)
void ____trace_printk_check_format(const char *fmt, ...)
{
@@ -176,16 +167,8 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
#else
-static inline void tracing_start(void) { }
-static inline void tracing_stop(void) { }
static inline void trace_dump_stack(int skip) { }
-static inline void tracing_on(void) { }
-static inline void tracing_off(void) { }
-static inline int tracing_is_on(void) { return 0; }
-static inline void tracing_snapshot(void) { }
-static inline void tracing_snapshot_alloc(void) { }
-
static inline __printf(1, 2)
int trace_printk(const char *fmt, ...)
{
--
2.53.0
^ permalink raw reply related
* [PATCH 0/2] tracing: Move trace_printk.h out of kernel.h
From: Steven Rostedt @ 2026-06-21 9:34 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mark Rutland, Mathieu Desnoyers, Andrew Morton,
Linus Torvalds, Sebastian Andrzej Siewior, John Ogness,
Thomas Gleixner, Peter Zijlstra, Julia Lawall, Yury Norov,
linux-doc, linux-kbuild, linuxppc-dev, dri-devel, linux-stm32,
linux-arm-kernel, linux-rdma, linux-usb, linux-ext4, linux-nfs,
kvm, intel-gfx
There's been complaints about trace_printk() being defined in kernel.h as it
can increase the compilation time. As it is only used by some developers for
debugging purposes, it should not be in kernel.h causing lots of wasted CPU
cycles for those that do not ever care about it.
Instead, add a CONFIG_TRACE_PRINTK_DEBUGGING option that developers that do
use it can set and not have to always remember to add #include <linux/trace_printk.h>
to the files they add trace_printk() while debugging. It also means that
those that do not have that config set will not have to worry about wasted
CPU cycles as it is only include in the CFLAGS when the option is set, and
its completely ignored otherwise.
Steven Rostedt (2):
tracing: Move non-trace_printk prototypes back to kernel.h
tracing: Add CONFIG_TRACE_PRINTK_DEBUGGING to clean up kernel.h
----
.../driver_development_debugging_guide.rst | 2 +-
Makefile | 5 +++++
arch/powerpc/kvm/book3s_xics.c | 1 +
drivers/gpu/drm/i915/gt/intel_gtt.h | 1 +
drivers/gpu/drm/i915/i915_gem.h | 1 +
drivers/hwtracing/stm/dummy_stm.c | 4 ++++
drivers/infiniband/hw/hfi1/trace_dbg.h | 1 +
drivers/usb/early/xhci-dbc.c | 1 +
fs/ext4/inline.c | 1 +
include/linux/kernel.h | 19 ++++++++++++++++++-
include/linux/sunrpc/debug.h | 1 +
include/linux/trace_printk.h | 22 +++-------------------
kernel/trace/Kconfig | 10 ++++++++++
kernel/trace/ring_buffer_benchmark.c | 1 +
kernel/trace/trace.h | 1 +
samples/fprobe/fprobe_example.c | 1 +
samples/ftrace/ftrace-direct-modify.c | 1 +
samples/ftrace/ftrace-direct-multi-modify.c | 1 +
samples/ftrace/ftrace-direct-multi.c | 2 +-
samples/ftrace/ftrace-direct-too.c | 2 +-
samples/ftrace/ftrace-direct.c | 2 +-
21 files changed, 56 insertions(+), 24 deletions(-)
^ permalink raw reply
* Re: [PATCH 3/3] rv/reactors: add KUnit tests for reactor_panic
From: Wen Yang @ 2026-06-21 3:34 UTC (permalink / raw)
To: XIAO WU, Gabriele Monaco; +Cc: Nam Cao, linux-trace-kernel, linux-kernel
In-Reply-To: <tencent_913194D0B2365EB0E404E464443D38FDC607@qq.com>
On 6/21/26 07:30, XIAO WU wrote:
> Hi Wen,
>
> I came across a Sashiko AI code review [1] that flagged a potential NULL
> pointer dereference in the `test_panic_register_unregister()` test case
> added by this patch (commit 8655782285e2). The review's analysis seemed
> plausible, so I spun up a QEMU environment to see whether it could be
> reproduced in practice.
>
> The short version: yes, it triggers a real kernel BUG + Oops. See below
> for the crash log and the reproduction approach.
>
> On Tue, 16 Jun 2026 at 00:44, Wen Yang wrote:
> > Add KUnit tests for the panic reactor covering:
> > - Reactor registration and unregistration lifecycle
> > - Panic notifier chain reachability
> ...
> > +static void test_panic_register_unregister(struct kunit *test)
> > +{
> > + int ret;
> > +
> > + ret = rv_register_reactor(&mock_panic_reactor);
> > + KUNIT_EXPECT_EQ(test, ret, 0);
> > + KUNIT_EXPECT_STREQ(test, mock_panic_reactor.name, "test_panic");
> > +
> > + rv_unregister_reactor(&mock_panic_reactor);
>
> This is the function the review highlighted. The issue is:
>
> - `KUNIT_EXPECT_EQ()` does *not* abort the test on failure.
> - If `rv_register_reactor()` fails (e.g. because another reactor
> named "test_panic" was already registered), the .list node of the
> statically-allocated `mock_panic_reactor` is never added to any
> list — it remains zero-initialized (prev = NULL, next = NULL).
> - `rv_unregister_reactor()` then unconditionally calls `list_del()`
> on this uninitialized list_head, which hits the NULL pointers.
>
> I was able to reproduce this reliably. The trigger condition is
> surprisingly simple: if any code path registers a reactor named
> "test_panic" before the KUnit suite runs, the test crashes the kernel.
>
> [Reproduction approach]
>
> I rebuilt the kernel with a small late_initcall in rv_reactors.c that
> pre-registers "test_panic" (simulating what would happen if, say, a
> kernel module or another subsystem registered a reactor with the same
> name before the KUnit tests execute):
>
> static int __init prereg_test_panic(void)
> {
> static struct rv_reactor prereg = {
> .name = "test_panic",
> .description = "pre-registered to simulate name collision",
> };
> return rv_register_reactor(&prereg);
> }
> late_initcall(prereg_test_panic);
>
> The KUnit tests then auto-run at boot (kunit_run_all_tests). The
> test_panic_register_unregister case fails registration with -EINVAL due
> to the duplicate name, the KUNIT_EXPECT_EQ does not abort, and
> rv_unregister_reactor() crashes on the uninitialized list.
>
> [Crash log — kernel 7.1.0-next-20260615, CONFIG_DEBUG_LIST=y]
>
> Reactor test_panic is already registered
> # test_panic_register_unregister: EXPECTATION FAILED at
> kernel/trace/rv/reactor_panic_kunit.c:68
> Expected ret == 0, but
> ret == -22 (0xffffffffffffffea)
> list_del corruption, ffffffff8ecce2f8->next is NULL
> ------------[ cut here ]------------
> kernel BUG at lib/list_debug.c:52!
> Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI
> CPU: 1 UID: 0 PID: 5028 Comm: kunit_try_catch Tainted: G N
> RIP: 0010:__list_del_entry_valid_or_report+0xf2/0x200
> Call Trace:
> <TASK>
> rv_unregister_reactor+0x37/0x190
> test_panic_register_unregister+0x1de/0x2e0
> kunit_try_run_case+0x1d2/0x520
> kunit_generic_run_threadfn_adapter+0x89/0x100
> kthread+0x387/0x4a0
> ret_from_fork+0xb2c/0xdd0
> </TASK>
> Kernel panic - not syncing: Fatal exception
>
> The crash is in `rv_unregister_reactor()`, called from
> `test_panic_register_unregister()`. The `list_del()` in
> `rv_unregister_reactor()` has no guard against a list node that was
> never added to any list. With CONFIG_DEBUG_LIST=y the corruption is
> caught explicitly; without it this would be a silent NULL dereference.
>
> [Suggested fix]
>
> The most straightforward fix is to use `KUNIT_ASSERT_EQ()` instead of
> `KUNIT_EXPECT_EQ()` for the registration result, so the test aborts
> before reaching `rv_unregister_reactor()` on a failed registration:
>
> static void test_panic_register_unregister(struct kunit *test)
> {
> int ret;
>
> ret = rv_register_reactor(&mock_panic_reactor);
> - KUNIT_EXPECT_EQ(test, ret, 0);
> + KUNIT_ASSERT_EQ(test, ret, 0);
> KUNIT_EXPECT_STREQ(test, mock_panic_reactor.name, "test_panic");
>
> rv_unregister_reactor(&mock_panic_reactor);
> }
>
> An alternative (or complementary) approach would be to add a guard in
> `rv_unregister_reactor()` itself — e.g. checking whether the reactor is
> actually on the list before calling `list_del()`. That would make the
> API more robust against future callers making the same mistake.
>
> The same pattern likely applies to the printk reactor tests in patch
> 2/3, though I haven't tested those.
>
Okay, thank you.
We've noted this is related to a Kunit test. We'll incorporate the
improvement in the v2.
Thanks again.
--
Wen
> Full PoC code follows.
>
> [PoC part 1 — Kernel-space: late_initcall to create the name collision]
>
> This is what was added to kernel/trace/rv/rv_reactors.c (or could be
> built as a standalone kernel module — see preregister.c below). It
> pre-registers "test_panic" before KUnit auto-runs, so the test's own
> rv_register_reactor() fails with -EINVAL:
>
> static int __init prereg_test_panic(void)
> {
> static struct rv_reactor prereg = {
> .name = "test_panic",
> .description = "pre-registered to simulate name collision",
> };
> return rv_register_reactor(&prereg);
> }
> late_initcall(prereg_test_panic);
>
> [PoC part 2 — Userspace: trigger the KUnit test via debugfs]
>
> poc.c:
> ---8<----------------------------------------------------------------
> /*
> * POC: NULL pointer dereference in rv_unregister_reactor()
> *
> * Bug location: kernel/trace/rv/reactor_panic_kunit.c
> * test_panic_register_unregister()
> *
> * Bug: When rv_register_reactor() fails (because "test_panic" is already
> * registered), the test calls rv_unregister_reactor() unconditionally.
> * This performs list_del() on a zero-initialized list_head (never added
> * to any list), causing a NULL pointer dereference crash.
> *
> * Trigger: With a kernel that has pre-registered "test_panic" reactor,
> * simply trigger the KUnit test via debugfs "run" file. The test's
> * rv_register_reactor() fails with -EINVAL (duplicate), and the
> subsequent
> * rv_unregister_reactor() crashes on the uninitialized list.
> */
>
> #define _GNU_SOURCE
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <unistd.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
> #include <errno.h>
>
> #define KUNIT_RUN_PATH "/sys/kernel/debug/kunit/rv_reactor_panic/run"
>
> int main(int argc, char **argv)
> {
> int fd, ret;
>
> setbuf(stdout, NULL);
>
> printf("[+] POC: Triggering NULL deref in rv_unregister_reactor\n");
> printf("[+] Target: %s\n\n", KUNIT_RUN_PATH);
>
> /* Mount debugfs if needed */
> if (access("/sys/kernel/debug", F_OK) != 0) {
> printf("[*] Mounting debugfs...\n");
> ret = system("mount -t debugfs none /sys/kernel/debug/
> 2>/dev/null");
> (void)ret;
> }
>
> /* Verify KUnit path exists */
> if (access(KUNIT_RUN_PATH, W_OK) != 0) {
> printf("[-] Cannot access %s: %m\n", KUNIT_RUN_PATH);
> printf("[*] Available KUnit suites:\n");
> fflush(stdout);
> system("ls -la /sys/kernel/debug/kunit/ 2>&1");
> return 1;
> }
>
> printf("[*] Test_panic reactor should be pre-registered at boot\n");
> printf("[*] Triggering KUnit test suite...\n\n");
>
> /*
> * Write to the KUnit run file. This executes
> * __kunit_test_suites_init() -> kunit_run_tests() which
> * runs the reactor_panic_kunit test cases including
> * test_panic_register_unregister.
> *
> * With "test_panic" pre-registered:
> * 1. rv_register_reactor() returns -EINVAL (duplicate)
> * 2. KUNIT_EXPECT_EQ doesn't abort
> * 3. rv_unregister_reactor() calls list_del() on NULL list
> * 4. BOOM: list corruption / NULL deref / kernel crash
> */
> fd = open(KUNIT_RUN_PATH, O_WRONLY);
> if (fd < 0) {
> printf("[-] open failed: %m\n");
> return 1;
> }
>
> printf("[!] Writing to %s - triggering the crash now...\n",
> KUNIT_RUN_PATH);
> fflush(stdout);
>
> ret = write(fd, "1", 1);
> if (ret < 0) {
> printf("[-] write failed: %m\n");
> } else {
> printf("[+] Write succeeded (ret=%d)\n", ret);
> }
>
> close(fd);
>
> /*
> * If we reach here without crashing, let the user know
> */
> printf("\n[*] If the system is still alive, check dmesg:\n");
> printf(" dmesg | grep -i -E
> 'list_del|list_add|list_corrupt|NULL|BUG|oops\n");
> printf("\n[*] dmesg output:\n");
> fflush(stdout);
> system("dmesg | tail -60");
>
> printf("\n[+] POC completed.\n");
>
> return 0;
> }
> ---8<----------------------------------------------------------------
>
> Compile with:
> gcc -o poc poc.c -static
>
> [PoC part 3 — Kernel module alternative (standalone)]
>
> If you prefer not to modify rv_reactors.c directly, the same name
> collision can be created by loading this module before running the
> KUnit test. Note: this requires rv_register_reactor() to be exported
> (or resolved via kallsyms), which it may not be in the current tree.
> In that case the late_initcall approach above is the way to go.
>
> preregister.c:
> ---8<----------------------------------------------------------------
> #include <linux/module.h>
> #include <linux/kernel.h>
> #include <linux/rv.h>
>
> static struct rv_reactor prereg_reactor = {
> .name = "test_panic",
> .description = "pre-registered to trigger KUnit bug",
> };
>
> static int __init prereg_init(void)
> {
> int ret;
> ret = rv_register_reactor(&prereg_reactor);
> if (ret < 0) {
> pr_err("preregister: rv_register_reactor failed: %d\n", ret);
> return ret;
> }
> pr_info("preregister: registered 'test_panic' reactor\n");
> return 0;
> }
>
> static void __exit prereg_exit(void)
> {
> rv_unregister_reactor(&prereg_reactor);
> pr_info("preregister: unregistered 'test_panic' reactor\n");
> }
>
> module_init(prereg_init);
> module_exit(prereg_exit);
> MODULE_LICENSE("GPL");
> ---8<----------------------------------------------------------------
>
>
> [1]
> https://sashiko.dev/#/patchset/cover.1781541556.git.wen.yang%40linux.dev
> (Sashiko AI code review — "Null Pointer Dereference", Severity: High)
>
> Thanks,
> XIAO
>
^ permalink raw reply
* [RESEND PATCH v6 8/8] tracing/probes: Add a new testcase for BTF typecasts
From: Masami Hiramatsu (Google) @ 2026-06-21 3:27 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178201238795.570818.15573963115625446598.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
With the introduction of container_of-style BTF typecasting and
per-CPU variable access support in trace probes, we need a way to
verify their functionality and prevent regressions.
Add a new ftrace kselftest and update the trace event sample module
to test and validate these features.
Specifically, update the trace-events-sample module to set up a
periodic timer whose callback accesses a per-CPU counter. Introduce
a new sample trace event, foo_timer_fn, to trace this callback
and log the current counter value.
Then, add a new test case, btf_probe_event.tc, which defines a
dynamic probe on the timer callback. The probe uses BTF typecasting
to recover the parent structure from the timer argument and
this_cpu_read() to fetch the per-CPU counter. The test verifies
the integrity of the implementation by ensuring the values
recorded by the dynamic probe match those from the static tracepoint.
Assisted-by: Antigravity:gemini-3.5-flash
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v6:
- Update testcase according to changes.
Changes in v5:
- Add more syntax test cases.
Changes in v4:
- Fix uprobe $current test.
Changes in v3:
- Add syntax test case.
- Update testcase to use this_cpu_read()
Changes in v2:
- Use timer_shutdown_sync() instead of timer_delete_sync() for teardown.
---
samples/trace_events/trace-events-sample.c | 40 +++++++++++++++-
samples/trace_events/trace-events-sample.h | 34 ++++++++++++-
.../ftrace/test.d/dynevent/btf_probe_event.tc | 51 ++++++++++++++++++++
.../ftrace/test.d/dynevent/fprobe_syntax_errors.tc | 11 ++++
.../ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 11 ++++
.../ftrace/test.d/kprobe/uprobe_syntax_errors.tc | 5 ++
6 files changed, 147 insertions(+), 5 deletions(-)
create mode 100644 tools/testing/selftests/ftrace/test.d/dynevent/btf_probe_event.tc
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index 0b7a6efdb247..ca5d98c360cb 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -94,6 +94,20 @@ static int simple_thread_fn(void *arg)
static DEFINE_MUTEX(thread_mutex);
static int simple_thread_cnt;
+static struct foo_timer_data *foo_timer_data;
+
+static void sample_timer_cb(struct timer_list *t)
+{
+ struct foo_timer_data *data = container_of(t, struct foo_timer_data, timer);
+
+ get_cpu();
+ trace_foo_timer_fn(data);
+ (*this_cpu_ptr(data->counter))++;
+ put_cpu();
+
+ mod_timer(t, jiffies + HZ);
+}
+
int foo_bar_reg(void)
{
mutex_lock(&thread_mutex);
@@ -132,9 +146,27 @@ void foo_bar_unreg(void)
static int __init trace_event_init(void)
{
+ foo_timer_data = kzalloc_obj(*foo_timer_data, GFP_KERNEL);
+ if (!foo_timer_data)
+ return -ENOMEM;
+
+ foo_timer_data->name = "sample_timer_counter";
+ foo_timer_data->counter = alloc_percpu(int);
+ if (!foo_timer_data->counter) {
+ kfree(foo_timer_data);
+ return -ENOMEM;
+ }
+
+ timer_setup(&foo_timer_data->timer, sample_timer_cb, 0);
+ mod_timer(&foo_timer_data->timer, jiffies + HZ);
+
simple_tsk = kthread_run(simple_thread, NULL, "event-sample");
- if (IS_ERR(simple_tsk))
- return -1;
+ if (IS_ERR(simple_tsk)) {
+ timer_shutdown_sync(&foo_timer_data->timer);
+ free_percpu(foo_timer_data->counter);
+ kfree(foo_timer_data);
+ return PTR_ERR(simple_tsk);
+ }
return 0;
}
@@ -147,6 +179,10 @@ static void __exit trace_event_exit(void)
kthread_stop(simple_tsk_fn);
simple_tsk_fn = NULL;
mutex_unlock(&thread_mutex);
+
+ timer_shutdown_sync(&foo_timer_data->timer);
+ free_percpu(foo_timer_data->counter);
+ kfree(foo_timer_data);
}
module_init(trace_event_init);
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 1a05fc153353..816848a456a2 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -247,12 +247,14 @@
*/
/*
- * It is OK to have helper functions in the file, but they need to be protected
- * from being defined more than once. Remember, this file gets included more
- * than once.
+ * It is OK to have helper functions and data structures in the file, but they
+ * need to be protected from being defined more than once. Remember, this file
+ * gets included more than once.
*/
#ifndef __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS
#define __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS
+#include <linux/timer.h>
+
static inline int __length_of(const int *list)
{
int i;
@@ -270,6 +272,13 @@ enum {
TRACE_SAMPLE_BAR = 4,
TRACE_SAMPLE_ZOO = 8,
};
+
+struct foo_timer_data {
+ const char *name;
+ struct timer_list timer;
+ int __percpu *counter;
+};
+
#endif
/*
@@ -595,6 +604,25 @@ TRACE_EVENT(foo_rel_loc,
__get_rel_bitmask(bitmask),
__get_rel_cpumask(cpumask))
);
+
+TRACE_EVENT(foo_timer_fn,
+
+ TP_PROTO(struct foo_timer_data *data),
+
+ TP_ARGS(data),
+
+ TP_STRUCT__entry(
+ __string( name, data->name )
+ __field( int, count )
+ ),
+
+ TP_fast_assign(
+ __assign_str(name);
+ __entry->count = *this_cpu_ptr(data->counter);
+ ),
+
+ TP_printk("name=%s count=%d", __get_str(name), __entry->count)
+);
#endif
/***** NOTICE! The #if protection ends here. *****/
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/btf_probe_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/btf_probe_event.tc
new file mode 100644
index 000000000000..96791e120b7d
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/btf_probe_event.tc
@@ -0,0 +1,51 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: BTF event with typecast and percpu access
+# requires: dynamic_events "this_cpu_read(<fetcharg>)":README "[(structname[,field])]<argname>[->field[->field|.field...]]":README
+
+# Check if the sample module is loaded
+if ! lsmod | grep -q trace_events_sample; then
+ modprobe trace-events-sample || exit_unsupported
+fi
+
+echo 0 > events/enable
+echo > dynamic_events
+
+# The sample_timer_cb(struct timer_list *t) is called.
+# We want to check (STRUCT,FIELD)VAR typecast and this_cpu_read() access.
+# (foo_timer_data,timer)t converts t to struct foo_timer_data * using container_of.
+# data->counter is a per-cpu pointer to int.
+# this_cpu_read(data->counter) should give the value of the counter.
+
+echo 'f:mysample/myevent sample_timer_cb name=(foo_timer_data,timer)t->name:string count=this_cpu_read((foo_timer_data,timer)t->counter)' >> dynamic_events
+
+echo 1 > events/mysample/myevent/enable
+echo 1 > events/sample-trace/foo_timer_fn/enable
+
+sleep 2
+
+echo 0 > events/mysample/myevent/enable
+echo 0 > events/sample-trace/foo_timer_fn/enable
+
+# Compare the values.
+MATCH=0
+while read line; do
+ if echo $line | grep -q "foo_timer_fn:"; then
+ NAME=`echo $line | sed 's/.*name=\([^ ]*\) .*/\1/'`
+ COUNT=`echo $line | sed 's/.*count=\([^ ]*\).*/\1/'`
+ if grep -q "myevent:.*name=\"${NAME}\" count=$COUNT" trace; then
+ MATCH=$((MATCH+1))
+ fi
+ fi
+done < trace
+
+if [ $MATCH -eq 0 ]; then
+ echo "No matching events found"
+ exit_fail
+fi
+
+# Clean up
+echo 0 > events/mysample/myevent/enable
+echo 0 > events/sample-trace/foo_timer_fn/enable
+echo > dynamic_events
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index fee479295e2f..e111d426a984 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -112,6 +112,17 @@ check_error 'f vfs_read%return $retval->^foo' # NO_PTR_STRCT
check_error 'f vfs_read file->^foo' # NO_BTF_FIELD
check_error 'f vfs_read file^-.foo' # BAD_HYPHEN
check_error 'f vfs_read ^file:string' # BAD_TYPE4STR
+if grep -qF "[(structname" README ; then
+check_error 'f vfs_read arg1=(task_struct)file^' # TYPECAST_REQ_FIELD
+check_error 'f vfs_read arg1=(a)((b)((c)(^(d)file->d)->c)->b)->a' # TOO_MANY_NESTED
+check_error 'f vfs_read arg1=(task_struct,^in_execve)file->comm' # TYPECAST_NOT_ALIGNED
+check_error 'f vfs_read arg1=(task_struct,^foo_bar)file->pid' # NO_BTF_FIELD
+check_error 'f vfs_read arg1=(^task_struct1234)file->pid' # NO_PTR_STRCT
+check_error 'f vfs_read arg1=(task_struct,se^->group_node)file->comm' # TYPECAST_BAD_ARROW
+check_error 'f vfs_read arg1=(task_struct,^->pid)file->comm' # NO_BTF_FIELD
+check_error 'f vfs_read arg1=(task_struct,^.pid)file->comm' # NO_BTF_FIELD
+check_error 'f vfs_read arg1=(task_struct,^.)file->comm' # NO_BTF_FIELD
+fi
fi
else
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index 8f1c58f0c239..626adeb2e840 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -115,6 +115,17 @@ check_error 'p vfs_read+20 ^$arg*' # NOFENTRY_ARGS
check_error 'p vfs_read ^hoge' # NO_BTFARG
check_error 'p kfree ^$arg10' # NO_BTFARG (exceed the number of parameters)
check_error 'r kfree ^$retval' # NO_RETVAL
+if grep -qF "[(structname" README ; then
+check_error 'p vfs_read arg1=(task_struct)file^' # TYPECAST_REQ_FIELD
+check_error 'p vfs_read arg1=(a)((b)((c)(^(d)file->d)->c)->b)->a' # TOO_MANY_NESTED
+check_error 'p vfs_read arg1=(task_struct,^in_execve)file->comm' # TYPECAST_NOT_ALIGNED
+check_error 'p vfs_read arg1=(task_struct,^foo_bar)file->pid' # NO_BTF_FIELD
+check_error 'p vfs_read arg1=(^task_struct1234)file->pid' # NO_PTR_STRCT
+check_error 'p vfs_read arg1=(task_struct,se^->group_node)file->comm' # TYPECAST_BAD_ARROW
+check_error 'p vfs_read arg1=(task_struct,^->pid)file->comm' # NO_BTF_FIELD
+check_error 'p vfs_read arg1=(task_struct,^.pid)file->comm' # NO_BTF_FIELD
+check_error 'p vfs_read arg1=(task_struct,^.)file->comm' # NO_BTF_FIELD
+fi
else
check_error 'p vfs_read ^$arg*' # NOSUP_BTFARG
fi
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
index c817158b99db..e12dc967ec76 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
@@ -28,4 +28,9 @@ if grep -q ".*symstr.*" README; then
check_error 'p /bin/sh:10 $stack0:^symstr' # BAD_TYPE
fi
+# $current is not supported by uprobe
+if grep -q "\$current.*" README; then
+check_error 'p /bin/sh:10 ^$current:u8' # BAD_VAR
+fi
+
exit 0
^ permalink raw reply related
* [RESEND PATCH v6 7/8] tracing/probes: Add this_cpu_read() and this_cpu_ptr() dereference method to fetcharg
From: Masami Hiramatsu (Google) @ 2026-06-21 3:27 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178201238795.570818.15573963115625446598.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
When tracing the kernel local variables, sometimes we need to get the
CPU local variables. To access it, current simple dereference is not
enough.
Thus, introduce a special this_cpu_read() dereference to access per-cpu
variable for the current CPU (accessing other CPU variable may race with
updates on other CPUs). Also this_cpu_ptr() is for accessing per-cpu
pointer.
Those are working as same as the kernel percpu macro.
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v6:
- Rebased on dump fetcharg patch.
- Fix to fetch static percpu variable with @SYM correctly.
Changes in v5:
- Simplify this_cpu_read() into +0(this_cpu_ptr()).
Changes in v3:
- Remove NULL check for percpu var because it is just an offset, could be 0.
- Simplify process_fetch_insn_bottom() code.
- If the last operation is this_cpu_read(), read only memory of the specific
size (of type).
Changes in v2:
- Drop +CPU/+PCPU and introduce this_cpu_read() and this_cpu_ptr().
- Support these method with BTF typecast.
- Just check the base address is NOT NULL instead of is_kernel_percpu_address().
---
Documentation/trace/eprobetrace.rst | 2
Documentation/trace/fprobetrace.rst | 2
Documentation/trace/kprobetrace.rst | 2
kernel/trace/trace.c | 1
kernel/trace/trace_probe.c | 143 ++++++++++++++++++++++++++---------
kernel/trace/trace_probe.h | 1
kernel/trace/trace_probe_tmpl.h | 22 ++++-
7 files changed, 129 insertions(+), 44 deletions(-)
diff --git a/Documentation/trace/eprobetrace.rst b/Documentation/trace/eprobetrace.rst
index 680e0af43d5d..279396951b34 100644
--- a/Documentation/trace/eprobetrace.rst
+++ b/Documentation/trace/eprobetrace.rst
@@ -39,6 +39,8 @@ Synopsis of eprobe_events
@SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
$comm : Fetch current task comm.
+|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
+ this_cpu_read(FETCHARG) : Read the value of the per-CPU variable FETCHARG on the current CPU.
+ this_cpu_ptr(FETCHARG) : Get the address of the per-CPU variable FETCHARG on the current CPU.
\IMM : Store an immediate value to the argument.
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst
index 3392cab016b3..3439bc9bd351 100644
--- a/Documentation/trace/fprobetrace.rst
+++ b/Documentation/trace/fprobetrace.rst
@@ -52,6 +52,8 @@ Synopsis of fprobe-events
$comm : Fetch current task comm.
$current : Fetch the address of the current task_struct.
+|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*4)(\*5)
+ this_cpu_read(FETCHARG) : Read the value of the per-CPU variable FETCHARG on the current CPU.
+ this_cpu_ptr(FETCHARG) : Get the address of the per-CPU variable FETCHARG on the current CPU.
\IMM : Store an immediate value to the argument.
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index 81e4fe38791d..9ae330eb0a52 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -55,6 +55,8 @@ Synopsis of kprobe_events
$comm : Fetch current task comm.
$current : Fetch the address of the current task_struct.
+|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
+ this_cpu_read(FETCHARG) : Read the value of the per-CPU variable FETCHARG on the current CPU.
+ this_cpu_ptr(FETCHARG) : Get the address of the per-CPU variable FETCHARG on the current CPU.
\IMM : Store an immediate value to the argument.
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7a5676524f1a..d4121acc2938 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4332,6 +4332,7 @@ static const char readme_msg[] =
"\t $stack<index>, $stack, $retval, $comm, $current\n"
#endif
"\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
+ "\t this_cpu_read(<fetcharg>), this_cpu_ptr(<fetcharg>)\n"
"\t kernel return probes support: $retval, $arg<N>, $comm\n"
"\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
"\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 1ddd0a804e39..a64edb0c6baa 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -349,6 +349,100 @@ static int parse_trace_event(char *arg, struct fetch_insn *code,
return -EINVAL;
}
+/* this_cpu_* parser */
+#define THIS_CPU_PTR_PREFIX "this_cpu_ptr("
+#define THIS_CPU_READ_PREFIX "this_cpu_read("
+#define THIS_CPU_PTR_LEN (sizeof(THIS_CPU_PTR_PREFIX) - 1)
+#define THIS_CPU_READ_LEN (sizeof(THIS_CPU_READ_PREFIX) - 1)
+
+static int
+parse_probe_arg(char *arg, const struct fetch_type *type,
+ struct fetch_insn **pcode, struct fetch_insn *end,
+ struct traceprobe_parse_context *ctx);
+
+/* handle dereference nested call */
+static inline int handle_dereference(char *arg, struct fetch_insn **pcode,
+ struct fetch_insn *end, struct traceprobe_parse_context *ctx,
+ int deref, long offset)
+{
+ const struct fetch_type *type = find_fetch_type(NULL, ctx->flags);
+ struct fetch_insn *code = *pcode;
+ int cur_offs = ctx->offset;
+ char *tmp;
+ int ret;
+
+ tmp = strrchr(arg, ')');
+ if (!tmp) {
+ trace_probe_log_err(ctx->offset + strlen(arg),
+ DEREF_OPEN_BRACE);
+ return -EINVAL;
+ }
+
+ *tmp = '\0';
+ ret = parse_probe_arg(arg, type, &code, end, ctx);
+ if (ret)
+ return ret;
+ ctx->offset = cur_offs;
+ if (code->op == FETCH_OP_COMM || code->op == FETCH_OP_DATA) {
+ trace_probe_log_err(ctx->offset, COMM_CANT_DEREF);
+ return -EINVAL;
+ }
+
+ /*
+ * this_cpu_ptr(@SYM) does not use SYM value, but use SYM address.
+ * So we overwrite the last FETCH_OP_DEREF with FETCH_OP_CPU_PTR.
+ */
+ if (!(deref == FETCH_OP_CPU_PTR && *arg == '@')) {
+ code++;
+ if (code == end) {
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
+ return -EINVAL;
+ }
+ *pcode = code;
+ }
+
+ code->op = deref;
+ code->offset = offset;
+ /* Reset the last type if used */
+ ctx->last_type = NULL;
+ return 0;
+}
+
+static int parse_this_cpu(char *arg, struct fetch_insn **pcode,
+ struct fetch_insn *end,
+ struct traceprobe_parse_context *ctx)
+{
+ struct fetch_insn *code;
+ bool is_ptr = false;
+ int ret;
+
+ if (str_has_prefix(arg, THIS_CPU_PTR_PREFIX)) {
+ arg += THIS_CPU_PTR_LEN;
+ ctx->offset += THIS_CPU_PTR_LEN;
+ is_ptr = true;
+ } else if (str_has_prefix(arg, THIS_CPU_READ_PREFIX)) {
+ arg += THIS_CPU_READ_LEN;
+ ctx->offset += THIS_CPU_READ_LEN;
+ } else
+ return -EINVAL;
+
+ ret = handle_dereference(arg, pcode, end, ctx, FETCH_OP_CPU_PTR, 0);
+ if (ret || is_ptr)
+ return ret;
+
+ /* this_cpu_read(VAR) -> +0(this_cpu_ptr(VAR)) */
+ code = *pcode;
+ code++;
+ if (code == end) {
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
+ return -EINVAL;
+ }
+ code->op = FETCH_OP_DEREF;
+ code->offset = 0;
+ *pcode = code;
+ return 0;
+}
+
#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
static u32 btf_type_int(const struct btf_type *t)
@@ -925,11 +1019,6 @@ static char *find_matched_close_paren(char *s)
return NULL;
}
-static int
-parse_probe_arg(char *arg, const struct fetch_type *type,
- struct fetch_insn **pcode, struct fetch_insn *end,
- struct traceprobe_parse_context *ctx);
-
static int handle_typecast(char *arg, struct fetch_insn **pcode,
struct fetch_insn *end,
struct traceprobe_parse_context *ctx)
@@ -982,7 +1071,9 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
/* Skip '(' */
ctx->offset += 1;
tmp++;
- } else if (*tmp == '+' || *tmp == '-') {
+ } else if (*tmp == '+' || *tmp == '-' ||
+ str_has_prefix(tmp, THIS_CPU_PTR_PREFIX) ||
+ str_has_prefix(tmp, THIS_CPU_READ_PREFIX)) {
/* Dereference can have another field access inside it. */
char *open = strchr(tmp + 1, '(');
@@ -1486,36 +1577,9 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
}
ctx->offset += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 0);
arg = tmp + 1;
- tmp = strrchr(arg, ')');
- if (!tmp) {
- trace_probe_log_err(ctx->offset + strlen(arg),
- DEREF_OPEN_BRACE);
- return -EINVAL;
- } else {
- const struct fetch_type *t2 = find_fetch_type(NULL, ctx->flags);
- int cur_offs = ctx->offset;
-
- *tmp = '\0';
- ret = parse_probe_arg(arg, t2, &code, end, ctx);
- if (ret)
- break;
- ctx->offset = cur_offs;
- if (code->op == FETCH_OP_COMM ||
- code->op == FETCH_OP_DATA) {
- trace_probe_log_err(ctx->offset, COMM_CANT_DEREF);
- return -EINVAL;
- }
- if (++code == end) {
- trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
- return -EINVAL;
- }
- *pcode = code;
-
- code->op = deref;
- code->offset = offset;
- /* Reset the last type if used */
- ctx->last_type = NULL;
- }
+ ret = handle_dereference(arg, pcode, end, ctx, deref, offset);
+ if (ret < 0)
+ return ret;
break;
case '\\': /* Immediate value */
if (arg[1] == '"') { /* Immediate string */
@@ -1536,15 +1600,18 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
ret = handle_typecast(arg, pcode, end, ctx);
break;
default:
- if (isalpha(arg[0]) || arg[0] == '_') { /* BTF variable */
+ if (str_has_prefix(arg, THIS_CPU_PTR_PREFIX) ||
+ str_has_prefix(arg, THIS_CPU_READ_PREFIX)) {
+ ret = parse_this_cpu(arg, pcode, end, ctx);
+ } else if (isalpha(arg[0]) || arg[0] == '_') { /* BTF variable */
if (!tparg_is_function_entry(ctx->flags) &&
!tparg_is_function_return(ctx->flags)) {
trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
return -EINVAL;
}
ret = parse_btf_arg(arg, pcode, end, ctx);
- break;
}
+ break;
}
if (!ret && code->op == FETCH_OP_NOP) {
/* Parsed, but do not find fetch method */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 488d6790b5ef..e79e019b922d 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -101,6 +101,7 @@ typedef int (*print_type_func_t)(struct trace_seq *, void *, void *);
/* Stage 2 (dereference) ops */ \
FETCH_OP(DEREF, offset) /* Dereference: .offset */ \
FETCH_OP(UDEREF, offset) /* User-space dereference: .offset */\
+ FETCH_OP(CPU_PTR, none) /* Per-CPU pointer: .offset */ \
/* Stage 3 (store) ops */ \
FETCH_OP(ST_RAW, store) /* Raw value: .size */ \
FETCH_OP(ST_MEM, store) /* Memory: .offset, .size */ \
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index f630930288d2..9265b03cf19d 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -129,25 +129,35 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
struct fetch_insn *s3 = NULL;
int total = 0, ret = 0, i = 0;
u32 loc = 0;
- unsigned long lval = val;
+ unsigned long lval, llval = val;
stage2:
/* 2nd stage: dereference memory if needed */
do {
- if (code->op == FETCH_OP_DEREF) {
- lval = val;
+ lval = val;
+ switch (code->op) {
+ case FETCH_OP_DEREF:
ret = probe_mem_read(&val, (void *)val + code->offset,
sizeof(val));
- } else if (code->op == FETCH_OP_UDEREF) {
- lval = val;
+ break;
+ case FETCH_OP_UDEREF:
ret = probe_mem_read_user(&val,
(void *)val + code->offset, sizeof(val));
- } else
break;
+ case FETCH_OP_CPU_PTR:
+ val = (unsigned long)this_cpu_ptr((void __percpu *)val);
+ ret = 0;
+ break;
+ default:
+ lval = llval;
+ goto out;
+ }
if (ret)
return ret;
+ llval = lval;
code++;
} while (1);
+out:
s3 = code;
stage3:
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox