* [PATCH] tracing: Add system trigger file to enable triggers for all the system's events
@ 2025-11-20 21:00 Steven Rostedt
2025-11-21 23:53 ` kernel test robot
0 siblings, 1 reply; 4+ messages in thread
From: Steven Rostedt @ 2025-11-20 21:00 UTC (permalink / raw)
To: LKML, Linux Trace Kernel; +Cc: Masami Hiramatsu, Mathieu Desnoyers, Tom Zanussi
From: Steven Rostedt <rostedt@goodmis.org>
Currently a trigger can only be added to individual events. Some triggers
(like stacktrace) can be useful to add as a bulk trigger for a set of
system events (like interrupt or scheduling).
Add a trigger file to the system directories:
/sys/kernel/tracing/events/*/trigger
And allow stacktrace trigger to be enabled for all those events.
Writing into the system/trigger file acts the same as writing into each of
the system event's trigger files individually.
This also allows to remove a trigger from all events in a subsystem (even
if it's not a subsystem trigger!).
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
Note, this is based on top of:
https://patchwork.kernel.org/project/linux-trace-kernel/cover/20251120205600.570673392@kernel.org/
Documentation/trace/events.rst | 25 ++++
kernel/trace/trace.c | 11 +-
kernel/trace/trace.h | 15 +-
kernel/trace/trace_events.c | 70 +++++-----
kernel/trace/trace_events_trigger.c | 205 +++++++++++++++++++++++++++-
5 files changed, 284 insertions(+), 42 deletions(-)
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index 18d112963dec..caa4958af43a 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -416,6 +416,31 @@ way, so beware about making generalizations between the two.
can also enable triggers that are written into
/sys/kernel/tracing/events/ftrace/print/trigger
+The system directory also has a trigger file that allows some triggers to be
+set for all the system's events. This is limited to only a small subset of the
+triggers and does not allow for the count parameter. But it does allow for
+filters. Writing into this file is the same as writing into each of the
+system's event's trigger files individually. Although only a subset of
+triggers may use this file for enabling, all triggers may use this file for
+disabling::
+
+ cd /sys/kernel/tracing
+ cat events/sched/trigger
+ # Available system triggers:
+ # stacktrace
+
+ echo stacktrace > events/sched/trigger
+ cat events/sched/sched_switch/trigger
+ stacktrace:unlimited
+
+ echo snapshot > events/sched/sched_waking/trigger
+ cat events/sched/sched_waking/trigger
+ snapshot:unlimited
+ echo '!snapshot' > events/sched/trigger
+ cat events/sched/sched_waking/trigger
+ # Available triggers:
+ # traceon traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist
+
6.1 Expression syntax
---------------------
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 59cd4ed8af6d..d400c013d42b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -592,11 +592,12 @@ void trace_set_ring_buffer_expanded(struct trace_array *tr)
LIST_HEAD(ftrace_trace_arrays);
-int trace_array_get(struct trace_array *this_tr)
+int __trace_array_get(struct trace_array *this_tr)
{
struct trace_array *tr;
- guard(mutex)(&trace_types_lock);
+ lockdep_assert_held(&trace_types_lock);
+
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
if (tr == this_tr) {
tr->ref++;
@@ -607,6 +608,12 @@ int trace_array_get(struct trace_array *this_tr)
return -ENODEV;
}
+int trace_array_get(struct trace_array *tr)
+{
+ guard(mutex)(&trace_types_lock);
+ return __trace_array_get(tr);
+}
+
static void __trace_array_put(struct trace_array *this_tr)
{
WARN_ON(!this_tr->ref);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fd5a6daa6c25..7379763a057d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -469,10 +469,14 @@ extern struct list_head ftrace_trace_arrays;
extern struct mutex trace_types_lock;
extern int trace_array_get(struct trace_array *tr);
+extern int __trace_array_get(struct trace_array *tr);
extern int tracing_check_open_get_tr(struct trace_array *tr);
extern struct trace_array *trace_array_find(const char *instance);
extern struct trace_array *trace_array_find_get(const char *instance);
+extern struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode);
+void trace_put_system_dir(struct trace_subsystem_dir *dir);
+
extern u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe);
extern int tracing_set_filter_buffering(struct trace_array *tr, bool set);
extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
@@ -1774,6 +1778,7 @@ static inline struct trace_event_file *event_file_file(struct file *filp)
}
extern const struct file_operations event_trigger_fops;
+extern const struct file_operations event_system_trigger_fops;
extern const struct file_operations event_hist_fops;
extern const struct file_operations event_hist_debug_fops;
extern const struct file_operations event_inject_fops;
@@ -2057,10 +2062,16 @@ struct event_command {
* regardless of whether or not it has a filter associated with
* it (filters make a trigger require access to the trace record
* but are not always present).
+ *
+ * @SYSTEM: A flag that says whether or not this command can be used
+ * at the event system level. For example, can it be written into
+ * events/sched/trigger file where it will be enabled for all
+ * sched events?
*/
enum event_command_flags {
- EVENT_CMD_FL_POST_TRIGGER = 1,
- EVENT_CMD_FL_NEEDS_REC = 2,
+ EVENT_CMD_FL_POST_TRIGGER = BIT(1),
+ EVENT_CMD_FL_NEEDS_REC = BIT(2),
+ EVENT_CMD_FL_SYSTEM = BIT(3),
};
static inline bool event_command_post_trigger(struct event_command *cmd_ops)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9b07ad9eb284..f00b41f73fc2 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2168,51 +2168,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
static LIST_HEAD(event_subsystems);
-static int subsystem_open(struct inode *inode, struct file *filp)
+struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode)
{
- struct trace_subsystem_dir *dir = NULL, *iter_dir;
- struct trace_array *tr = NULL, *iter_tr;
- struct event_subsystem *system = NULL;
- int ret;
+ struct trace_subsystem_dir *dir;
+ struct trace_array *tr = NULL;
- if (tracing_is_disabled())
- return -ENODEV;
+ guard(mutex)(&event_mutex);
+ guard(mutex)(&trace_types_lock);
/* Make sure the system still exists */
- mutex_lock(&event_mutex);
- mutex_lock(&trace_types_lock);
- list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) {
- list_for_each_entry(iter_dir, &iter_tr->systems, list) {
- if (iter_dir == inode->i_private) {
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ list_for_each_entry(dir, &tr->systems, list) {
+ if (dir == inode->i_private) {
/* Don't open systems with no events */
- tr = iter_tr;
- dir = iter_dir;
- if (dir->nr_events) {
- __get_system_dir(dir);
- system = dir->subsystem;
- }
- goto exit_loop;
+ if (!dir->nr_events)
+ return NULL;
+ if (__trace_array_get(tr) < 0)
+ return NULL;
+ __get_system_dir(dir);
+ return dir;
}
}
}
- exit_loop:
- mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
+ return NULL;
+}
- if (!system)
+void trace_put_system_dir(struct trace_subsystem_dir *dir)
+{
+ trace_array_put(dir->tr);
+ put_system(dir);
+}
+
+static int subsystem_open(struct inode *inode, struct file *filp)
+{
+ struct trace_subsystem_dir *dir;
+ int ret;
+
+ if (tracing_is_disabled())
return -ENODEV;
- /* Still need to increment the ref count of the system */
- if (trace_array_get(tr) < 0) {
- put_system(dir);
+ dir = trace_get_system_dir(inode);
+ if (!dir)
return -ENODEV;
- }
ret = tracing_open_generic(inode, filp);
- if (ret < 0) {
- trace_array_put(tr);
- put_system(dir);
- }
+ if (ret < 0)
+ trace_put_system_dir(dir);
return ret;
}
@@ -2761,6 +2762,9 @@ static int system_callback(const char *name, umode_t *mode, void **data,
else if (strcmp(name, "enable") == 0)
*fops = &ftrace_system_enable_fops;
+ else if (strcmp(name, "trigger") == 0)
+ *fops = &event_system_trigger_fops;
+
else
return 0;
@@ -2784,6 +2788,10 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
{
.name = "enable",
.callback = system_callback,
+ },
+ {
+ .name = "trigger",
+ .callback = system_callback,
}
};
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 1dfe69146a81..b621406054b7 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -329,21 +329,28 @@ int trigger_process_regex(struct trace_event_file *file, char *buff)
return -EINVAL;
}
+static char *get_user_buf(const char __user *ubuf, size_t cnt)
+{
+ if (!cnt)
+ return NULL;
+
+ if (cnt >= PAGE_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ return memdup_user_nul(ubuf, cnt);
+}
+
static ssize_t event_trigger_regex_write(struct file *file,
const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_event_file *event_file;
ssize_t ret;
- char *buf __free(kfree) = NULL;
+ char *buf __free(kfree) = get_user_buf(ubuf, cnt);
- if (!cnt)
+ if (!buf)
return 0;
- if (cnt >= PAGE_SIZE)
- return -EINVAL;
-
- buf = memdup_user_nul(ubuf, cnt);
if (IS_ERR(buf))
return PTR_ERR(buf);
@@ -397,6 +404,190 @@ const struct file_operations event_trigger_fops = {
.release = event_trigger_release,
};
+static ssize_t
+event_system_trigger_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ char *buf __free(kfree) = kmalloc(SZ_4K, GFP_KERNEL);
+ struct event_command *p;
+ struct seq_buf s;
+ int len;
+
+ if (!buf)
+ return -ENOMEM;
+
+ seq_buf_init(&s, buf, SZ_4K);
+
+ seq_buf_puts(&s, "# Available system triggers:\n");
+ seq_buf_putc(&s, '#');
+
+ guard(mutex)(&trigger_cmd_mutex);
+ list_for_each_entry_reverse(p, &trigger_commands, list) {
+ if (p->flags & EVENT_CMD_FL_SYSTEM)
+ seq_buf_printf(&s, " %s", p->name);
+ }
+ seq_buf_putc(&s, '\n');
+
+ len = seq_buf_used(&s);
+
+ if (*ppos >= len)
+ return 0;
+
+ len -= *ppos;
+
+ if (count > len)
+ count = len;
+
+ if (copy_to_user(ubuf, buf + *ppos, count))
+ return -EFAULT;
+
+ *ppos += count;
+
+ return count;
+}
+
+static int process_system_events(struct trace_subsystem_dir *dir,
+ struct event_command *p, char *buff,
+ char *command, char *next)
+{
+ struct event_subsystem *system = dir->subsystem;
+ struct trace_event_file *file;
+ struct trace_array *tr = dir->tr;
+ bool remove = false;
+ int ret = 0;
+
+ if (buff[0] == '!')
+ remove = true;
+
+ lockdep_assert_held(&event_mutex);
+
+ list_for_each_entry(file, &tr->events, list) {
+
+ if (strcmp(system->name, file->event_call->class->system) != 0)
+ continue;
+
+ ret = p->parse(p, file, buff, command, next);
+
+ /* Removals and existing events do not error */
+ if (ret < 0 && ret != -EEXIST && !remove) {
+ pr_warn("Failed adding trigger %s on %s\n",
+ command, trace_event_name(file->event_call));
+ }
+ }
+ return 0;
+}
+
+static ssize_t
+event_system_trigger_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_subsystem_dir *dir = filp->private_data;
+ struct event_command *p;
+ char *command, *next;
+ char *buf __free(kfree) = get_user_buf(ubuf, cnt);
+ bool remove = false;
+ bool found = false;
+ ssize_t ret;
+ int len;
+
+ if (!buf)
+ return 0;
+
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
+ /* system triggers are not allowed to have counters */
+ if (strchr(buf, ':'))
+ return -EINVAL;
+
+ /* If opened for read too, dir is in the seq_file descriptor */
+ if (filp->f_mode & FMODE_READ) {
+ struct seq_file *m = filp->private_data;
+ dir = m->private;
+ }
+
+ /* Skip added space at beginning of buf */
+ next = buf;
+ strim(next);
+
+ command = strsep(&next, " \t");
+ if (next) {
+ next = skip_spaces(next);
+ if (!*next)
+ next = NULL;
+ }
+ if (command[0] == '!') {
+ remove = true;
+ command++;
+ }
+
+ len = strlen(command);
+ if (next)
+ len += strlen(next) + 1;
+
+ guard(mutex)(&event_mutex);
+ guard(mutex)(&trigger_cmd_mutex);
+
+ list_for_each_entry(p, &trigger_commands, list) {
+ /* Allow to remove any trigger */
+ if (!remove && !(p->flags & EVENT_CMD_FL_SYSTEM))
+ continue;
+ if (strcmp(p->name, command) == 0) {
+ found = true;
+ ret = process_system_events(dir, p, buf, command, next);
+ break;
+ }
+ }
+
+ if (!found)
+ ret = -ENODEV;
+
+ if (!ret)
+ *ppos += cnt;
+
+ if (remove || ret < 0)
+ return ret ? : cnt;
+
+ return cnt;
+}
+
+static int
+event_system_trigger_open(struct inode *inode, struct file *file)
+{
+ struct trace_subsystem_dir *dir;
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
+ dir = trace_get_system_dir(inode);
+ if (!dir)
+ return -ENODEV;
+
+ file->private_data = dir;
+
+ return ret;
+}
+
+static int
+event_system_trigger_release(struct inode *inode, struct file *file)
+{
+ struct trace_subsystem_dir *dir = inode->i_private;
+
+ trace_put_system_dir(dir);
+
+ return 0;
+}
+
+const struct file_operations event_system_trigger_fops = {
+ .open = event_system_trigger_open,
+ .read = event_system_trigger_read,
+ .write = event_system_trigger_write,
+ .llseek = tracing_lseek,
+ .release = event_system_trigger_release,
+};
+
/*
* Currently we only register event commands from __init, so mark this
* __init too.
@@ -1587,7 +1778,7 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data)
static struct event_command trigger_stacktrace_cmd = {
.name = "stacktrace",
.trigger_type = ETT_STACKTRACE,
- .flags = EVENT_CMD_FL_POST_TRIGGER,
+ .flags = EVENT_CMD_FL_POST_TRIGGER | EVENT_CMD_FL_SYSTEM,
.parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
--
2.51.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] tracing: Add system trigger file to enable triggers for all the system's events
2025-11-20 21:00 [PATCH] tracing: Add system trigger file to enable triggers for all the system's events Steven Rostedt
@ 2025-11-21 23:53 ` kernel test robot
0 siblings, 0 replies; 4+ messages in thread
From: kernel test robot @ 2025-11-21 23:53 UTC (permalink / raw)
To: Steven Rostedt, LKML, Linux Trace Kernel
Cc: llvm, oe-kbuild-all, Masami Hiramatsu, Mathieu Desnoyers,
Tom Zanussi
Hi Steven,
kernel test robot noticed the following build warnings:
[auto build test WARNING on trace/for-next]
[also build test WARNING on linus/master v6.18-rc6 next-20251121]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Steven-Rostedt/tracing-Add-system-trigger-file-to-enable-triggers-for-all-the-system-s-events/20251121-050454
base: https://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace for-next
patch link: https://lore.kernel.org/r/20251120160003.2fa33d80%40gandalf.local.home
patch subject: [PATCH] tracing: Add system trigger file to enable triggers for all the system's events
config: sparc64-defconfig (https://download.01.org/0day-ci/archive/20251122/202511220730.PQFWRcck-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251122/202511220730.PQFWRcck-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202511220730.PQFWRcck-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> kernel/trace/trace_events_trigger.c:432:6: warning: variable 'len' set but not used [-Wunused-but-set-variable]
432 | int len;
| ^
1 warning generated.
vim +/len +432 kernel/trace/trace_events_trigger.c
420
421 static ssize_t
422 event_system_trigger_write(struct file *filp, const char __user *ubuf,
423 size_t cnt, loff_t *ppos)
424 {
425 struct trace_subsystem_dir *dir = filp->private_data;
426 struct event_command *p;
427 char *command, *next;
428 char *buf __free(kfree) = get_user_buf(ubuf, cnt);
429 bool remove = false;
430 bool found = false;
431 ssize_t ret;
> 432 int len;
433
434 if (!buf)
435 return 0;
436
437 if (IS_ERR(buf))
438 return PTR_ERR(buf);
439
440 /* system triggers are not allowed to have counters */
441 if (strchr(buf, ':'))
442 return -EINVAL;
443
444 /* If opened for read too, dir is in the seq_file descriptor */
445 if (filp->f_mode & FMODE_READ) {
446 struct seq_file *m = filp->private_data;
447 dir = m->private;
448 }
449
450 /* Skip added space at beginning of buf */
451 next = buf;
452 strim(next);
453
454 command = strsep(&next, " \t");
455 if (next) {
456 next = skip_spaces(next);
457 if (!*next)
458 next = NULL;
459 }
460 if (command[0] == '!') {
461 remove = true;
462 command++;
463 }
464
465 len = strlen(command);
466 if (next)
467 len += strlen(next) + 1;
468
469 guard(mutex)(&event_mutex);
470 guard(mutex)(&trigger_cmd_mutex);
471
472 list_for_each_entry(p, &trigger_commands, list) {
473 /* Allow to remove any trigger */
474 if (!remove && !(p->flags & EVENT_CMD_FL_SYSTEM))
475 continue;
476 if (strcmp(p->name, command) == 0) {
477 found = true;
478 ret = process_system_events(dir, p, buf, command, next);
479 break;
480 }
481 }
482
483 if (!found)
484 ret = -ENODEV;
485
486 if (!ret)
487 *ppos += cnt;
488
489 if (remove || ret < 0)
490 return ret ? : cnt;
491
492 return cnt;
493 }
494
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] tracing: Add system trigger file to enable triggers for all the system's events
@ 2025-11-26 20:24 Steven Rostedt
2025-12-01 22:56 ` Tom Zanussi
0 siblings, 1 reply; 4+ messages in thread
From: Steven Rostedt @ 2025-11-26 20:24 UTC (permalink / raw)
To: LKML, Linux Trace Kernel; +Cc: Masami Hiramatsu, Mathieu Desnoyers, Tom Zanussi
From: Steven Rostedt <rostedt@goodmis.org>
Currently a trigger can only be added to individual events. Some triggers
(like stacktrace) can be useful to add as a bulk trigger for a set of
system events (like interrupt or scheduling).
Add a trigger file to the system directories:
/sys/kernel/tracing/events/*/trigger
And allow stacktrace trigger to be enabled for all those events.
Writing into the system/trigger file acts the same as writing into each of
the system event's trigger files individually.
This also allows to remove a trigger from all events in a subsystem (even
if it's not a subsystem trigger!).
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
Changes since v2: https://patch.msgid.link/20251125200837.31aae207@gandalf.local.home
- Removed unneeded NULL initialization of tr (Masami Hiramatsu)
Documentation/trace/events.rst | 25 ++++
kernel/trace/trace.c | 11 +-
kernel/trace/trace.h | 15 ++-
kernel/trace/trace_events.c | 70 +++++-----
kernel/trace/trace_events_trigger.c | 199 +++++++++++++++++++++++++++-
5 files changed, 278 insertions(+), 42 deletions(-)
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index 18d112963dec..caa4958af43a 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -416,6 +416,31 @@ way, so beware about making generalizations between the two.
can also enable triggers that are written into
/sys/kernel/tracing/events/ftrace/print/trigger
+The system directory also has a trigger file that allows some triggers to be
+set for all the system's events. This is limited to only a small subset of the
+triggers and does not allow for the count parameter. But it does allow for
+filters. Writing into this file is the same as writing into each of the
+system's event's trigger files individually. Although only a subset of
+triggers may use this file for enabling, all triggers may use this file for
+disabling::
+
+ cd /sys/kernel/tracing
+ cat events/sched/trigger
+ # Available system triggers:
+ # stacktrace
+
+ echo stacktrace > events/sched/trigger
+ cat events/sched/sched_switch/trigger
+ stacktrace:unlimited
+
+ echo snapshot > events/sched/sched_waking/trigger
+ cat events/sched/sched_waking/trigger
+ snapshot:unlimited
+ echo '!snapshot' > events/sched/trigger
+ cat events/sched/sched_waking/trigger
+ # Available triggers:
+ # traceon traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist
+
6.1 Expression syntax
---------------------
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 73f8b79f1b0c..f59645ab5140 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -592,11 +592,12 @@ void trace_set_ring_buffer_expanded(struct trace_array *tr)
LIST_HEAD(ftrace_trace_arrays);
-int trace_array_get(struct trace_array *this_tr)
+int __trace_array_get(struct trace_array *this_tr)
{
struct trace_array *tr;
- guard(mutex)(&trace_types_lock);
+ lockdep_assert_held(&trace_types_lock);
+
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
if (tr == this_tr) {
tr->ref++;
@@ -607,6 +608,12 @@ int trace_array_get(struct trace_array *this_tr)
return -ENODEV;
}
+int trace_array_get(struct trace_array *tr)
+{
+ guard(mutex)(&trace_types_lock);
+ return __trace_array_get(tr);
+}
+
static void __trace_array_put(struct trace_array *this_tr)
{
WARN_ON(!this_tr->ref);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c2b61bcd912f..c4d6074e184c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -471,10 +471,14 @@ extern struct list_head ftrace_trace_arrays;
extern struct mutex trace_types_lock;
extern int trace_array_get(struct trace_array *tr);
+extern int __trace_array_get(struct trace_array *tr);
extern int tracing_check_open_get_tr(struct trace_array *tr);
extern struct trace_array *trace_array_find(const char *instance);
extern struct trace_array *trace_array_find_get(const char *instance);
+extern struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode);
+void trace_put_system_dir(struct trace_subsystem_dir *dir);
+
extern u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe);
extern int tracing_set_filter_buffering(struct trace_array *tr, bool set);
extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
@@ -1777,6 +1781,7 @@ static inline struct trace_event_file *event_file_file(struct file *filp)
}
extern const struct file_operations event_trigger_fops;
+extern const struct file_operations event_system_trigger_fops;
extern const struct file_operations event_hist_fops;
extern const struct file_operations event_hist_debug_fops;
extern const struct file_operations event_inject_fops;
@@ -2060,10 +2065,16 @@ struct event_command {
* regardless of whether or not it has a filter associated with
* it (filters make a trigger require access to the trace record
* but are not always present).
+ *
+ * @SYSTEM: A flag that says whether or not this command can be used
+ * at the event system level. For example, can it be written into
+ * events/sched/trigger file where it will be enabled for all
+ * sched events?
*/
enum event_command_flags {
- EVENT_CMD_FL_POST_TRIGGER = 1,
- EVENT_CMD_FL_NEEDS_REC = 2,
+ EVENT_CMD_FL_POST_TRIGGER = BIT(1),
+ EVENT_CMD_FL_NEEDS_REC = BIT(2),
+ EVENT_CMD_FL_SYSTEM = BIT(3),
};
static inline bool event_command_post_trigger(struct event_command *cmd_ops)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9b07ad9eb284..5cbbcd86cef0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2168,51 +2168,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
static LIST_HEAD(event_subsystems);
-static int subsystem_open(struct inode *inode, struct file *filp)
+struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode)
{
- struct trace_subsystem_dir *dir = NULL, *iter_dir;
- struct trace_array *tr = NULL, *iter_tr;
- struct event_subsystem *system = NULL;
- int ret;
+ struct trace_subsystem_dir *dir;
+ struct trace_array *tr;
- if (tracing_is_disabled())
- return -ENODEV;
+ guard(mutex)(&event_mutex);
+ guard(mutex)(&trace_types_lock);
/* Make sure the system still exists */
- mutex_lock(&event_mutex);
- mutex_lock(&trace_types_lock);
- list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) {
- list_for_each_entry(iter_dir, &iter_tr->systems, list) {
- if (iter_dir == inode->i_private) {
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ list_for_each_entry(dir, &tr->systems, list) {
+ if (dir == inode->i_private) {
/* Don't open systems with no events */
- tr = iter_tr;
- dir = iter_dir;
- if (dir->nr_events) {
- __get_system_dir(dir);
- system = dir->subsystem;
- }
- goto exit_loop;
+ if (!dir->nr_events)
+ return NULL;
+ if (__trace_array_get(tr) < 0)
+ return NULL;
+ __get_system_dir(dir);
+ return dir;
}
}
}
- exit_loop:
- mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
+ return NULL;
+}
- if (!system)
+void trace_put_system_dir(struct trace_subsystem_dir *dir)
+{
+ trace_array_put(dir->tr);
+ put_system(dir);
+}
+
+static int subsystem_open(struct inode *inode, struct file *filp)
+{
+ struct trace_subsystem_dir *dir;
+ int ret;
+
+ if (tracing_is_disabled())
return -ENODEV;
- /* Still need to increment the ref count of the system */
- if (trace_array_get(tr) < 0) {
- put_system(dir);
+ dir = trace_get_system_dir(inode);
+ if (!dir)
return -ENODEV;
- }
ret = tracing_open_generic(inode, filp);
- if (ret < 0) {
- trace_array_put(tr);
- put_system(dir);
- }
+ if (ret < 0)
+ trace_put_system_dir(dir);
return ret;
}
@@ -2761,6 +2762,9 @@ static int system_callback(const char *name, umode_t *mode, void **data,
else if (strcmp(name, "enable") == 0)
*fops = &ftrace_system_enable_fops;
+ else if (strcmp(name, "trigger") == 0)
+ *fops = &event_system_trigger_fops;
+
else
return 0;
@@ -2784,6 +2788,10 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
{
.name = "enable",
.callback = system_callback,
+ },
+ {
+ .name = "trigger",
+ .callback = system_callback,
}
};
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 96aad82b1628..b69b906fb620 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -328,21 +328,28 @@ int trigger_process_regex(struct trace_event_file *file, char *buff)
return -EINVAL;
}
+static char *get_user_buf(const char __user *ubuf, size_t cnt)
+{
+ if (!cnt)
+ return NULL;
+
+ if (cnt >= PAGE_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ return memdup_user_nul(ubuf, cnt);
+}
+
static ssize_t event_trigger_regex_write(struct file *file,
const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_event_file *event_file;
ssize_t ret;
- char *buf __free(kfree) = NULL;
+ char *buf __free(kfree) = get_user_buf(ubuf, cnt);
- if (!cnt)
+ if (!buf)
return 0;
- if (cnt >= PAGE_SIZE)
- return -EINVAL;
-
- buf = memdup_user_nul(ubuf, cnt);
if (IS_ERR(buf))
return PTR_ERR(buf);
@@ -396,6 +403,184 @@ const struct file_operations event_trigger_fops = {
.release = event_trigger_release,
};
+static ssize_t
+event_system_trigger_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ char *buf __free(kfree) = kmalloc(SZ_4K, GFP_KERNEL);
+ struct event_command *p;
+ struct seq_buf s;
+ int len;
+
+ if (!buf)
+ return -ENOMEM;
+
+ seq_buf_init(&s, buf, SZ_4K);
+
+ seq_buf_puts(&s, "# Available system triggers:\n");
+ seq_buf_putc(&s, '#');
+
+ guard(mutex)(&trigger_cmd_mutex);
+ list_for_each_entry_reverse(p, &trigger_commands, list) {
+ if (p->flags & EVENT_CMD_FL_SYSTEM)
+ seq_buf_printf(&s, " %s", p->name);
+ }
+ seq_buf_putc(&s, '\n');
+
+ len = seq_buf_used(&s);
+
+ if (*ppos >= len)
+ return 0;
+
+ len -= *ppos;
+
+ if (count > len)
+ count = len;
+
+ if (copy_to_user(ubuf, buf + *ppos, count))
+ return -EFAULT;
+
+ *ppos += count;
+
+ return count;
+}
+
+static int process_system_events(struct trace_subsystem_dir *dir,
+ struct event_command *p, char *buff,
+ char *command, char *next)
+{
+ struct event_subsystem *system = dir->subsystem;
+ struct trace_event_file *file;
+ struct trace_array *tr = dir->tr;
+ bool remove = false;
+ int ret = 0;
+
+ if (buff[0] == '!')
+ remove = true;
+
+ lockdep_assert_held(&event_mutex);
+
+ list_for_each_entry(file, &tr->events, list) {
+
+ if (strcmp(system->name, file->event_call->class->system) != 0)
+ continue;
+
+ ret = p->parse(p, file, buff, command, next);
+
+ /* Removals and existing events do not error */
+ if (ret < 0 && ret != -EEXIST && !remove) {
+ pr_warn("Failed adding trigger %s on %s\n",
+ command, trace_event_name(file->event_call));
+ }
+ }
+ return 0;
+}
+
+static ssize_t
+event_system_trigger_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_subsystem_dir *dir = filp->private_data;
+ struct event_command *p;
+ char *command, *next;
+ char *buf __free(kfree) = get_user_buf(ubuf, cnt);
+ bool remove = false;
+ bool found = false;
+ ssize_t ret;
+
+ if (!buf)
+ return 0;
+
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
+ /* system triggers are not allowed to have counters */
+ if (strchr(buf, ':'))
+ return -EINVAL;
+
+ /* If opened for read too, dir is in the seq_file descriptor */
+ if (filp->f_mode & FMODE_READ) {
+ struct seq_file *m = filp->private_data;
+ dir = m->private;
+ }
+
+ /* Skip added space at beginning of buf */
+ next = strim(buf);
+
+ command = strsep(&next, " \t");
+ if (next) {
+ next = skip_spaces(next);
+ if (!*next)
+ next = NULL;
+ }
+ if (command[0] == '!') {
+ remove = true;
+ command++;
+ }
+
+ guard(mutex)(&event_mutex);
+ guard(mutex)(&trigger_cmd_mutex);
+
+ list_for_each_entry(p, &trigger_commands, list) {
+ /* Allow to remove any trigger */
+ if (!remove && !(p->flags & EVENT_CMD_FL_SYSTEM))
+ continue;
+ if (strcmp(p->name, command) == 0) {
+ found = true;
+ ret = process_system_events(dir, p, buf, command, next);
+ break;
+ }
+ }
+
+ if (!found)
+ ret = -ENODEV;
+
+ if (!ret)
+ *ppos += cnt;
+
+ if (remove || ret < 0)
+ return ret ? : cnt;
+
+ return cnt;
+}
+
+static int
+event_system_trigger_open(struct inode *inode, struct file *file)
+{
+ struct trace_subsystem_dir *dir;
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
+ dir = trace_get_system_dir(inode);
+ if (!dir)
+ return -ENODEV;
+
+ file->private_data = dir;
+
+ return ret;
+}
+
+static int
+event_system_trigger_release(struct inode *inode, struct file *file)
+{
+ struct trace_subsystem_dir *dir = inode->i_private;
+
+ trace_put_system_dir(dir);
+
+ return 0;
+}
+
+const struct file_operations event_system_trigger_fops = {
+ .open = event_system_trigger_open,
+ .read = event_system_trigger_read,
+ .write = event_system_trigger_write,
+ .llseek = tracing_lseek,
+ .release = event_system_trigger_release,
+};
+
/*
* Currently we only register event commands from __init, so mark this
* __init too.
@@ -1586,7 +1771,7 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data)
static struct event_command trigger_stacktrace_cmd = {
.name = "stacktrace",
.trigger_type = ETT_STACKTRACE,
- .flags = EVENT_CMD_FL_POST_TRIGGER,
+ .flags = EVENT_CMD_FL_POST_TRIGGER | EVENT_CMD_FL_SYSTEM,
.parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
--
2.51.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] tracing: Add system trigger file to enable triggers for all the system's events
2025-11-26 20:24 Steven Rostedt
@ 2025-12-01 22:56 ` Tom Zanussi
0 siblings, 0 replies; 4+ messages in thread
From: Tom Zanussi @ 2025-12-01 22:56 UTC (permalink / raw)
To: Steven Rostedt, LKML, Linux Trace Kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers
Hi Steve,
On Wed, 2025-11-26 at 15:24 -0500, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
>
> Currently a trigger can only be added to individual events. Some triggers
> (like stacktrace) can be useful to add as a bulk trigger for a set of
> system events (like interrupt or scheduling).
>
> Add a trigger file to the system directories:
>
> /sys/kernel/tracing/events/*/trigger
>
> And allow stacktrace trigger to be enabled for all those events.
>
> Writing into the system/trigger file acts the same as writing into each of
> the system event's trigger files individually.
>
> This also allows to remove a trigger from all events in a subsystem (even
> if it's not a subsystem trigger!).
>
This looks very useful! Just a couple comments below..
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---
> Changes since v2: https://patch.msgid.link/20251125200837.31aae207@gandalf.local.home
>
> - Removed unneeded NULL initialization of tr (Masami Hiramatsu)
>
>
>
[ ... ]
> +static ssize_t
> +event_system_trigger_write(struct file *filp, const char __user *ubuf,
> + size_t cnt, loff_t *ppos)
> +{
> + struct trace_subsystem_dir *dir = filp->private_data;
> + struct event_command *p;
> + char *command, *next;
> + char *buf __free(kfree) = get_user_buf(ubuf, cnt);
nit: I think you could move this up a couple lines and keep the reverse
Christmas tree.
> + bool remove = false;
> + bool found = false;
> + ssize_t ret;
> +
> + if (!buf)
> + return 0;
> +
> + if (IS_ERR(buf))
> + return PTR_ERR(buf);
> +
> + /* system triggers are not allowed to have counters */
> + if (strchr(buf, ':'))
> + return -EINVAL;
> +
As mentioned by Masami, I think this would preclude the hist triggers
from having this enabled.
I'm guessing you didn't see the hist commands as one of the triggers
that would be useful as system triggers, but there might be a couple
cases where they could be if using the common fields e.g.
echo hist:keys=common_pid.execname:vals=hitcount > events/syscalls/trigger
or anywhere just
echo hist:keys=common_stacktrace > trigger
Just about everything else would error out, but these might be worth having.
Maybe you could just change this check to look for :<number> at the end of
the command?
Thanks,
Tom
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-12-01 22:56 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-20 21:00 [PATCH] tracing: Add system trigger file to enable triggers for all the system's events Steven Rostedt
2025-11-21 23:53 ` kernel test robot
-- strict thread matches above, loose matches on Subject: below --
2025-11-26 20:24 Steven Rostedt
2025-12-01 22:56 ` Tom Zanussi
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).