linux-trace-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] tracing: Add system trigger file to enable triggers for all the system's events
@ 2025-11-20 21:00 Steven Rostedt
  2025-11-21 23:53 ` kernel test robot
  0 siblings, 1 reply; 4+ messages in thread
From: Steven Rostedt @ 2025-11-20 21:00 UTC (permalink / raw)
  To: LKML, Linux Trace Kernel; +Cc: Masami Hiramatsu, Mathieu Desnoyers, Tom Zanussi

From: Steven Rostedt <rostedt@goodmis.org>

Currently a trigger can only be added to individual events. Some triggers
(like stacktrace) can be useful to add as a bulk trigger for a set of
system events (like interrupt or scheduling).

Add a trigger file to the system directories:

   /sys/kernel/tracing/events/*/trigger

And allow stacktrace trigger to be enabled for all those events.

Writing into the system/trigger file acts the same as writing into each of
the system event's trigger files individually.

This also allows to remove a trigger from all events in a subsystem (even
if it's not a subsystem trigger!).

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
Note, this is based on top of:

  https://patchwork.kernel.org/project/linux-trace-kernel/cover/20251120205600.570673392@kernel.org/

 Documentation/trace/events.rst      |  25 ++++
 kernel/trace/trace.c                |  11 +-
 kernel/trace/trace.h                |  15 +-
 kernel/trace/trace_events.c         |  70 +++++-----
 kernel/trace/trace_events_trigger.c | 205 +++++++++++++++++++++++++++-
 5 files changed, 284 insertions(+), 42 deletions(-)

diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index 18d112963dec..caa4958af43a 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -416,6 +416,31 @@ way, so beware about making generalizations between the two.
      can also enable triggers that are written into
      /sys/kernel/tracing/events/ftrace/print/trigger
 
+The system directory also has a trigger file that allows some triggers to be
+set for all the system's events. This is limited to only a small subset of the
+triggers and does not allow for the count parameter. But it does allow for
+filters. Writing into this file is the same as writing into each of the
+system's event's trigger files individually. Although only a subset of
+triggers may use this file for enabling, all triggers may use this file for
+disabling::
+
+	cd /sys/kernel/tracing
+	cat events/sched/trigger
+	# Available system triggers:
+	# stacktrace
+
+	echo stacktrace > events/sched/trigger
+	cat events/sched/sched_switch/trigger
+	stacktrace:unlimited
+
+	echo snapshot > events/sched/sched_waking/trigger
+	cat events/sched/sched_waking/trigger
+	snapshot:unlimited
+	echo '!snapshot' > events/sched/trigger
+	cat events/sched/sched_waking/trigger
+	# Available triggers:
+	# traceon traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist
+
 6.1 Expression syntax
 ---------------------
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 59cd4ed8af6d..d400c013d42b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -592,11 +592,12 @@ void trace_set_ring_buffer_expanded(struct trace_array *tr)
 
 LIST_HEAD(ftrace_trace_arrays);
 
-int trace_array_get(struct trace_array *this_tr)
+int __trace_array_get(struct trace_array *this_tr)
 {
 	struct trace_array *tr;
 
-	guard(mutex)(&trace_types_lock);
+	lockdep_assert_held(&trace_types_lock);
+
 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 		if (tr == this_tr) {
 			tr->ref++;
@@ -607,6 +608,12 @@ int trace_array_get(struct trace_array *this_tr)
 	return -ENODEV;
 }
 
+int trace_array_get(struct trace_array *tr)
+{
+	guard(mutex)(&trace_types_lock);
+	return __trace_array_get(tr);
+}
+
 static void __trace_array_put(struct trace_array *this_tr)
 {
 	WARN_ON(!this_tr->ref);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fd5a6daa6c25..7379763a057d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -469,10 +469,14 @@ extern struct list_head ftrace_trace_arrays;
 extern struct mutex trace_types_lock;
 
 extern int trace_array_get(struct trace_array *tr);
+extern int __trace_array_get(struct trace_array *tr);
 extern int tracing_check_open_get_tr(struct trace_array *tr);
 extern struct trace_array *trace_array_find(const char *instance);
 extern struct trace_array *trace_array_find_get(const char *instance);
 
+extern struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode);
+void trace_put_system_dir(struct trace_subsystem_dir *dir);
+
 extern u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe);
 extern int tracing_set_filter_buffering(struct trace_array *tr, bool set);
 extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
@@ -1774,6 +1778,7 @@ static inline struct trace_event_file *event_file_file(struct file *filp)
 }
 
 extern const struct file_operations event_trigger_fops;
+extern const struct file_operations event_system_trigger_fops;
 extern const struct file_operations event_hist_fops;
 extern const struct file_operations event_hist_debug_fops;
 extern const struct file_operations event_inject_fops;
@@ -2057,10 +2062,16 @@ struct event_command {
  *	regardless of whether or not it has a filter associated with
  *	it (filters make a trigger require access to the trace record
  *	but are not always present).
+ *
+ * @SYSTEM: A flag that says whether or not this command can be used
+ *	at the event system level. For example, can it be written into
+ *	events/sched/trigger file where it will be enabled for all
+ *	sched events?
  */
 enum event_command_flags {
-	EVENT_CMD_FL_POST_TRIGGER	= 1,
-	EVENT_CMD_FL_NEEDS_REC		= 2,
+	EVENT_CMD_FL_POST_TRIGGER	= BIT(1),
+	EVENT_CMD_FL_NEEDS_REC		= BIT(2),
+	EVENT_CMD_FL_SYSTEM		= BIT(3),
 };
 
 static inline bool event_command_post_trigger(struct event_command *cmd_ops)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9b07ad9eb284..f00b41f73fc2 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2168,51 +2168,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
 static LIST_HEAD(event_subsystems);
 
-static int subsystem_open(struct inode *inode, struct file *filp)
+struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode)
 {
-	struct trace_subsystem_dir *dir = NULL, *iter_dir;
-	struct trace_array *tr = NULL, *iter_tr;
-	struct event_subsystem *system = NULL;
-	int ret;
+	struct trace_subsystem_dir *dir;
+	struct trace_array *tr = NULL;
 
-	if (tracing_is_disabled())
-		return -ENODEV;
+	guard(mutex)(&event_mutex);
+	guard(mutex)(&trace_types_lock);
 
 	/* Make sure the system still exists */
-	mutex_lock(&event_mutex);
-	mutex_lock(&trace_types_lock);
-	list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) {
-		list_for_each_entry(iter_dir, &iter_tr->systems, list) {
-			if (iter_dir == inode->i_private) {
+	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+		list_for_each_entry(dir, &tr->systems, list) {
+			if (dir == inode->i_private) {
 				/* Don't open systems with no events */
-				tr = iter_tr;
-				dir = iter_dir;
-				if (dir->nr_events) {
-					__get_system_dir(dir);
-					system = dir->subsystem;
-				}
-				goto exit_loop;
+				if (!dir->nr_events)
+					return NULL;
+				if (__trace_array_get(tr) < 0)
+					return NULL;
+				__get_system_dir(dir);
+				return dir;
 			}
 		}
 	}
- exit_loop:
-	mutex_unlock(&trace_types_lock);
-	mutex_unlock(&event_mutex);
+	return NULL;
+}
 
-	if (!system)
+void trace_put_system_dir(struct trace_subsystem_dir *dir)
+{
+	trace_array_put(dir->tr);
+	put_system(dir);
+}
+
+static int subsystem_open(struct inode *inode, struct file *filp)
+{
+	struct trace_subsystem_dir *dir;
+	int ret;
+
+	if (tracing_is_disabled())
 		return -ENODEV;
 
-	/* Still need to increment the ref count of the system */
-	if (trace_array_get(tr) < 0) {
-		put_system(dir);
+	dir = trace_get_system_dir(inode);
+	if (!dir)
 		return -ENODEV;
-	}
 
 	ret = tracing_open_generic(inode, filp);
-	if (ret < 0) {
-		trace_array_put(tr);
-		put_system(dir);
-	}
+	if (ret < 0)
+		trace_put_system_dir(dir);
 
 	return ret;
 }
@@ -2761,6 +2762,9 @@ static int system_callback(const char *name, umode_t *mode, void **data,
 	else if (strcmp(name, "enable") == 0)
 		*fops = &ftrace_system_enable_fops;
 
+	else if (strcmp(name, "trigger") == 0)
+		*fops = &event_system_trigger_fops;
+
 	else
 		return 0;
 
@@ -2784,6 +2788,10 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
 		{
 			.name		= "enable",
 			.callback	= system_callback,
+		},
+		{
+			.name		= "trigger",
+			.callback	= system_callback,
 		}
 	};
 
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 1dfe69146a81..b621406054b7 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -329,21 +329,28 @@ int trigger_process_regex(struct trace_event_file *file, char *buff)
 	return -EINVAL;
 }
 
+static char *get_user_buf(const char __user *ubuf, size_t cnt)
+{
+	if (!cnt)
+		return NULL;
+
+	if (cnt >= PAGE_SIZE)
+		return ERR_PTR(-EINVAL);
+
+	return memdup_user_nul(ubuf, cnt);
+}
+
 static ssize_t event_trigger_regex_write(struct file *file,
 					 const char __user *ubuf,
 					 size_t cnt, loff_t *ppos)
 {
 	struct trace_event_file *event_file;
 	ssize_t ret;
-	char *buf __free(kfree) = NULL;
+	char *buf __free(kfree) = get_user_buf(ubuf, cnt);
 
-	if (!cnt)
+	if (!buf)
 		return 0;
 
-	if (cnt >= PAGE_SIZE)
-		return -EINVAL;
-
-	buf = memdup_user_nul(ubuf, cnt);
 	if (IS_ERR(buf))
 		return PTR_ERR(buf);
 
@@ -397,6 +404,190 @@ const struct file_operations event_trigger_fops = {
 	.release = event_trigger_release,
 };
 
+static ssize_t
+event_system_trigger_read(struct file *filp, char __user *ubuf,
+			  size_t count, loff_t *ppos)
+{
+	char *buf __free(kfree) = kmalloc(SZ_4K, GFP_KERNEL);
+	struct event_command *p;
+	struct seq_buf s;
+	int len;
+
+	if (!buf)
+		return -ENOMEM;
+
+	seq_buf_init(&s, buf, SZ_4K);
+
+	seq_buf_puts(&s, "# Available system triggers:\n");
+	seq_buf_putc(&s, '#');
+
+	guard(mutex)(&trigger_cmd_mutex);
+	list_for_each_entry_reverse(p, &trigger_commands, list) {
+		if (p->flags & EVENT_CMD_FL_SYSTEM)
+			seq_buf_printf(&s, " %s", p->name);
+	}
+	seq_buf_putc(&s, '\n');
+
+	len = seq_buf_used(&s);
+
+	if (*ppos >= len)
+		return 0;
+
+	len -= *ppos;
+
+	if (count > len)
+		count = len;
+
+	if (copy_to_user(ubuf, buf + *ppos, count))
+		return -EFAULT;
+
+	*ppos += count;
+
+	return count;
+}
+
+static int process_system_events(struct trace_subsystem_dir *dir,
+				 struct event_command *p, char *buff,
+				 char *command, char *next)
+{
+	struct event_subsystem *system = dir->subsystem;
+	struct trace_event_file *file;
+	struct trace_array *tr = dir->tr;
+	bool remove = false;
+	int ret = 0;
+
+	if (buff[0] == '!')
+		remove = true;
+
+	lockdep_assert_held(&event_mutex);
+
+	list_for_each_entry(file, &tr->events, list) {
+
+		if (strcmp(system->name, file->event_call->class->system) != 0)
+			continue;
+
+		ret = p->parse(p, file, buff, command, next);
+
+		/* Removals and existing events do not error */
+		if (ret < 0 && ret != -EEXIST && !remove) {
+			pr_warn("Failed adding trigger %s on %s\n",
+				command, trace_event_name(file->event_call));
+		}
+	}
+	return 0;
+}
+
+static ssize_t
+event_system_trigger_write(struct file *filp, const char __user *ubuf,
+		    size_t cnt, loff_t *ppos)
+{
+	struct trace_subsystem_dir *dir = filp->private_data;
+	struct event_command *p;
+	char *command, *next;
+	char *buf __free(kfree) = get_user_buf(ubuf, cnt);
+	bool remove = false;
+	bool found = false;
+	ssize_t ret;
+	int len;
+
+	if (!buf)
+		return 0;
+
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	/* system triggers are not allowed to have counters */
+	if (strchr(buf, ':'))
+		return -EINVAL;
+
+	/* If opened for read too, dir is in the seq_file descriptor */
+	if (filp->f_mode & FMODE_READ) {
+		struct seq_file *m = filp->private_data;
+		dir = m->private;
+	}
+
+	/* Skip added space at beginning of buf */
+	next = buf;
+	strim(next);
+
+	command = strsep(&next, " \t");
+	if (next) {
+		next = skip_spaces(next);
+		if (!*next)
+			next = NULL;
+	}
+	if (command[0] == '!') {
+		remove = true;
+		command++;
+	}
+
+	len = strlen(command);
+	if (next)
+		len += strlen(next) + 1;
+
+	guard(mutex)(&event_mutex);
+	guard(mutex)(&trigger_cmd_mutex);
+
+	list_for_each_entry(p, &trigger_commands, list) {
+		/* Allow to remove any trigger */
+		if (!remove && !(p->flags & EVENT_CMD_FL_SYSTEM))
+			continue;
+		if (strcmp(p->name, command) == 0) {
+			found = true;
+			ret = process_system_events(dir, p, buf, command, next);
+			break;
+		}
+	}
+
+	if (!found)
+		ret = -ENODEV;
+
+	if (!ret)
+		*ppos += cnt;
+
+	if (remove || ret < 0)
+		return ret ? : cnt;
+
+	return cnt;
+}
+
+static int
+event_system_trigger_open(struct inode *inode, struct file *file)
+{
+	struct trace_subsystem_dir *dir;
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_TRACEFS);
+	if (ret)
+		return ret;
+
+	dir = trace_get_system_dir(inode);
+	if (!dir)
+		return -ENODEV;
+
+	file->private_data = dir;
+
+	return ret;
+}
+
+static int
+event_system_trigger_release(struct inode *inode, struct file *file)
+{
+	struct trace_subsystem_dir *dir = inode->i_private;
+
+	trace_put_system_dir(dir);
+
+	return 0;
+}
+
+const struct file_operations event_system_trigger_fops = {
+	.open = event_system_trigger_open,
+	.read = event_system_trigger_read,
+	.write = event_system_trigger_write,
+	.llseek = tracing_lseek,
+	.release = event_system_trigger_release,
+};
+
 /*
  * Currently we only register event commands from __init, so mark this
  * __init too.
@@ -1587,7 +1778,7 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data)
 static struct event_command trigger_stacktrace_cmd = {
 	.name			= "stacktrace",
 	.trigger_type		= ETT_STACKTRACE,
-	.flags			= EVENT_CMD_FL_POST_TRIGGER,
+	.flags			= EVENT_CMD_FL_POST_TRIGGER | EVENT_CMD_FL_SYSTEM,
 	.parse			= event_trigger_parse,
 	.reg			= register_trigger,
 	.unreg			= unregister_trigger,
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] tracing: Add system trigger file to enable triggers for all the system's events
  2025-11-20 21:00 [PATCH] tracing: Add system trigger file to enable triggers for all the system's events Steven Rostedt
@ 2025-11-21 23:53 ` kernel test robot
  0 siblings, 0 replies; 4+ messages in thread
From: kernel test robot @ 2025-11-21 23:53 UTC (permalink / raw)
  To: Steven Rostedt, LKML, Linux Trace Kernel
  Cc: llvm, oe-kbuild-all, Masami Hiramatsu, Mathieu Desnoyers,
	Tom Zanussi

Hi Steven,

kernel test robot noticed the following build warnings:

[auto build test WARNING on trace/for-next]
[also build test WARNING on linus/master v6.18-rc6 next-20251121]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Steven-Rostedt/tracing-Add-system-trigger-file-to-enable-triggers-for-all-the-system-s-events/20251121-050454
base:   https://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace for-next
patch link:    https://lore.kernel.org/r/20251120160003.2fa33d80%40gandalf.local.home
patch subject: [PATCH] tracing: Add system trigger file to enable triggers for all the system's events
config: sparc64-defconfig (https://download.01.org/0day-ci/archive/20251122/202511220730.PQFWRcck-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251122/202511220730.PQFWRcck-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202511220730.PQFWRcck-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> kernel/trace/trace_events_trigger.c:432:6: warning: variable 'len' set but not used [-Wunused-but-set-variable]
     432 |         int len;
         |             ^
   1 warning generated.


vim +/len +432 kernel/trace/trace_events_trigger.c

   420	
   421	static ssize_t
   422	event_system_trigger_write(struct file *filp, const char __user *ubuf,
   423			    size_t cnt, loff_t *ppos)
   424	{
   425		struct trace_subsystem_dir *dir = filp->private_data;
   426		struct event_command *p;
   427		char *command, *next;
   428		char *buf __free(kfree) = get_user_buf(ubuf, cnt);
   429		bool remove = false;
   430		bool found = false;
   431		ssize_t ret;
 > 432		int len;
   433	
   434		if (!buf)
   435			return 0;
   436	
   437		if (IS_ERR(buf))
   438			return PTR_ERR(buf);
   439	
   440		/* system triggers are not allowed to have counters */
   441		if (strchr(buf, ':'))
   442			return -EINVAL;
   443	
   444		/* If opened for read too, dir is in the seq_file descriptor */
   445		if (filp->f_mode & FMODE_READ) {
   446			struct seq_file *m = filp->private_data;
   447			dir = m->private;
   448		}
   449	
   450		/* Skip added space at beginning of buf */
   451		next = buf;
   452		strim(next);
   453	
   454		command = strsep(&next, " \t");
   455		if (next) {
   456			next = skip_spaces(next);
   457			if (!*next)
   458				next = NULL;
   459		}
   460		if (command[0] == '!') {
   461			remove = true;
   462			command++;
   463		}
   464	
   465		len = strlen(command);
   466		if (next)
   467			len += strlen(next) + 1;
   468	
   469		guard(mutex)(&event_mutex);
   470		guard(mutex)(&trigger_cmd_mutex);
   471	
   472		list_for_each_entry(p, &trigger_commands, list) {
   473			/* Allow to remove any trigger */
   474			if (!remove && !(p->flags & EVENT_CMD_FL_SYSTEM))
   475				continue;
   476			if (strcmp(p->name, command) == 0) {
   477				found = true;
   478				ret = process_system_events(dir, p, buf, command, next);
   479				break;
   480			}
   481		}
   482	
   483		if (!found)
   484			ret = -ENODEV;
   485	
   486		if (!ret)
   487			*ppos += cnt;
   488	
   489		if (remove || ret < 0)
   490			return ret ? : cnt;
   491	
   492		return cnt;
   493	}
   494	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] tracing: Add system trigger file to enable triggers for all the system's events
@ 2025-11-26 20:24 Steven Rostedt
  2025-12-01 22:56 ` Tom Zanussi
  0 siblings, 1 reply; 4+ messages in thread
From: Steven Rostedt @ 2025-11-26 20:24 UTC (permalink / raw)
  To: LKML, Linux Trace Kernel; +Cc: Masami Hiramatsu, Mathieu Desnoyers, Tom Zanussi

From: Steven Rostedt <rostedt@goodmis.org>

Currently a trigger can only be added to individual events. Some triggers
(like stacktrace) can be useful to add as a bulk trigger for a set of
system events (like interrupt or scheduling).

Add a trigger file to the system directories:

   /sys/kernel/tracing/events/*/trigger

And allow stacktrace trigger to be enabled for all those events.

Writing into the system/trigger file acts the same as writing into each of
the system event's trigger files individually.

This also allows to remove a trigger from all events in a subsystem (even
if it's not a subsystem trigger!).

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
Changes since v2: https://patch.msgid.link/20251125200837.31aae207@gandalf.local.home

- Removed unneeded NULL initialization of tr (Masami Hiramatsu)

 Documentation/trace/events.rst      |  25 ++++
 kernel/trace/trace.c                |  11 +-
 kernel/trace/trace.h                |  15 ++-
 kernel/trace/trace_events.c         |  70 +++++-----
 kernel/trace/trace_events_trigger.c | 199 +++++++++++++++++++++++++++-
 5 files changed, 278 insertions(+), 42 deletions(-)

diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index 18d112963dec..caa4958af43a 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -416,6 +416,31 @@ way, so beware about making generalizations between the two.
      can also enable triggers that are written into
      /sys/kernel/tracing/events/ftrace/print/trigger
 
+The system directory also has a trigger file that allows some triggers to be
+set for all the system's events. This is limited to only a small subset of the
+triggers and does not allow for the count parameter. But it does allow for
+filters. Writing into this file is the same as writing into each of the
+system's event's trigger files individually. Although only a subset of
+triggers may use this file for enabling, all triggers may use this file for
+disabling::
+
+	cd /sys/kernel/tracing
+	cat events/sched/trigger
+	# Available system triggers:
+	# stacktrace
+
+	echo stacktrace > events/sched/trigger
+	cat events/sched/sched_switch/trigger
+	stacktrace:unlimited
+
+	echo snapshot > events/sched/sched_waking/trigger
+	cat events/sched/sched_waking/trigger
+	snapshot:unlimited
+	echo '!snapshot' > events/sched/trigger
+	cat events/sched/sched_waking/trigger
+	# Available triggers:
+	# traceon traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist
+
 6.1 Expression syntax
 ---------------------
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 73f8b79f1b0c..f59645ab5140 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -592,11 +592,12 @@ void trace_set_ring_buffer_expanded(struct trace_array *tr)
 
 LIST_HEAD(ftrace_trace_arrays);
 
-int trace_array_get(struct trace_array *this_tr)
+int __trace_array_get(struct trace_array *this_tr)
 {
 	struct trace_array *tr;
 
-	guard(mutex)(&trace_types_lock);
+	lockdep_assert_held(&trace_types_lock);
+
 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 		if (tr == this_tr) {
 			tr->ref++;
@@ -607,6 +608,12 @@ int trace_array_get(struct trace_array *this_tr)
 	return -ENODEV;
 }
 
+int trace_array_get(struct trace_array *tr)
+{
+	guard(mutex)(&trace_types_lock);
+	return __trace_array_get(tr);
+}
+
 static void __trace_array_put(struct trace_array *this_tr)
 {
 	WARN_ON(!this_tr->ref);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c2b61bcd912f..c4d6074e184c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -471,10 +471,14 @@ extern struct list_head ftrace_trace_arrays;
 extern struct mutex trace_types_lock;
 
 extern int trace_array_get(struct trace_array *tr);
+extern int __trace_array_get(struct trace_array *tr);
 extern int tracing_check_open_get_tr(struct trace_array *tr);
 extern struct trace_array *trace_array_find(const char *instance);
 extern struct trace_array *trace_array_find_get(const char *instance);
 
+extern struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode);
+void trace_put_system_dir(struct trace_subsystem_dir *dir);
+
 extern u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe);
 extern int tracing_set_filter_buffering(struct trace_array *tr, bool set);
 extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
@@ -1777,6 +1781,7 @@ static inline struct trace_event_file *event_file_file(struct file *filp)
 }
 
 extern const struct file_operations event_trigger_fops;
+extern const struct file_operations event_system_trigger_fops;
 extern const struct file_operations event_hist_fops;
 extern const struct file_operations event_hist_debug_fops;
 extern const struct file_operations event_inject_fops;
@@ -2060,10 +2065,16 @@ struct event_command {
  *	regardless of whether or not it has a filter associated with
  *	it (filters make a trigger require access to the trace record
  *	but are not always present).
+ *
+ * @SYSTEM: A flag that says whether or not this command can be used
+ *	at the event system level. For example, can it be written into
+ *	events/sched/trigger file where it will be enabled for all
+ *	sched events?
  */
 enum event_command_flags {
-	EVENT_CMD_FL_POST_TRIGGER	= 1,
-	EVENT_CMD_FL_NEEDS_REC		= 2,
+	EVENT_CMD_FL_POST_TRIGGER	= BIT(1),
+	EVENT_CMD_FL_NEEDS_REC		= BIT(2),
+	EVENT_CMD_FL_SYSTEM		= BIT(3),
 };
 
 static inline bool event_command_post_trigger(struct event_command *cmd_ops)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9b07ad9eb284..5cbbcd86cef0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2168,51 +2168,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
 static LIST_HEAD(event_subsystems);
 
-static int subsystem_open(struct inode *inode, struct file *filp)
+struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode)
 {
-	struct trace_subsystem_dir *dir = NULL, *iter_dir;
-	struct trace_array *tr = NULL, *iter_tr;
-	struct event_subsystem *system = NULL;
-	int ret;
+	struct trace_subsystem_dir *dir;
+	struct trace_array *tr;
 
-	if (tracing_is_disabled())
-		return -ENODEV;
+	guard(mutex)(&event_mutex);
+	guard(mutex)(&trace_types_lock);
 
 	/* Make sure the system still exists */
-	mutex_lock(&event_mutex);
-	mutex_lock(&trace_types_lock);
-	list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) {
-		list_for_each_entry(iter_dir, &iter_tr->systems, list) {
-			if (iter_dir == inode->i_private) {
+	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+		list_for_each_entry(dir, &tr->systems, list) {
+			if (dir == inode->i_private) {
 				/* Don't open systems with no events */
-				tr = iter_tr;
-				dir = iter_dir;
-				if (dir->nr_events) {
-					__get_system_dir(dir);
-					system = dir->subsystem;
-				}
-				goto exit_loop;
+				if (!dir->nr_events)
+					return NULL;
+				if (__trace_array_get(tr) < 0)
+					return NULL;
+				__get_system_dir(dir);
+				return dir;
 			}
 		}
 	}
- exit_loop:
-	mutex_unlock(&trace_types_lock);
-	mutex_unlock(&event_mutex);
+	return NULL;
+}
 
-	if (!system)
+void trace_put_system_dir(struct trace_subsystem_dir *dir)
+{
+	trace_array_put(dir->tr);
+	put_system(dir);
+}
+
+static int subsystem_open(struct inode *inode, struct file *filp)
+{
+	struct trace_subsystem_dir *dir;
+	int ret;
+
+	if (tracing_is_disabled())
 		return -ENODEV;
 
-	/* Still need to increment the ref count of the system */
-	if (trace_array_get(tr) < 0) {
-		put_system(dir);
+	dir = trace_get_system_dir(inode);
+	if (!dir)
 		return -ENODEV;
-	}
 
 	ret = tracing_open_generic(inode, filp);
-	if (ret < 0) {
-		trace_array_put(tr);
-		put_system(dir);
-	}
+	if (ret < 0)
+		trace_put_system_dir(dir);
 
 	return ret;
 }
@@ -2761,6 +2762,9 @@ static int system_callback(const char *name, umode_t *mode, void **data,
 	else if (strcmp(name, "enable") == 0)
 		*fops = &ftrace_system_enable_fops;
 
+	else if (strcmp(name, "trigger") == 0)
+		*fops = &event_system_trigger_fops;
+
 	else
 		return 0;
 
@@ -2784,6 +2788,10 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
 		{
 			.name		= "enable",
 			.callback	= system_callback,
+		},
+		{
+			.name		= "trigger",
+			.callback	= system_callback,
 		}
 	};
 
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 96aad82b1628..b69b906fb620 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -328,21 +328,28 @@ int trigger_process_regex(struct trace_event_file *file, char *buff)
 	return -EINVAL;
 }
 
+static char *get_user_buf(const char __user *ubuf, size_t cnt)
+{
+	if (!cnt)
+		return NULL;
+
+	if (cnt >= PAGE_SIZE)
+		return ERR_PTR(-EINVAL);
+
+	return memdup_user_nul(ubuf, cnt);
+}
+
 static ssize_t event_trigger_regex_write(struct file *file,
 					 const char __user *ubuf,
 					 size_t cnt, loff_t *ppos)
 {
 	struct trace_event_file *event_file;
 	ssize_t ret;
-	char *buf __free(kfree) = NULL;
+	char *buf __free(kfree) = get_user_buf(ubuf, cnt);
 
-	if (!cnt)
+	if (!buf)
 		return 0;
 
-	if (cnt >= PAGE_SIZE)
-		return -EINVAL;
-
-	buf = memdup_user_nul(ubuf, cnt);
 	if (IS_ERR(buf))
 		return PTR_ERR(buf);
 
@@ -396,6 +403,184 @@ const struct file_operations event_trigger_fops = {
 	.release = event_trigger_release,
 };
 
+static ssize_t
+event_system_trigger_read(struct file *filp, char __user *ubuf,
+			  size_t count, loff_t *ppos)
+{
+	char *buf __free(kfree) = kmalloc(SZ_4K, GFP_KERNEL);
+	struct event_command *p;
+	struct seq_buf s;
+	int len;
+
+	if (!buf)
+		return -ENOMEM;
+
+	seq_buf_init(&s, buf, SZ_4K);
+
+	seq_buf_puts(&s, "# Available system triggers:\n");
+	seq_buf_putc(&s, '#');
+
+	guard(mutex)(&trigger_cmd_mutex);
+	list_for_each_entry_reverse(p, &trigger_commands, list) {
+		if (p->flags & EVENT_CMD_FL_SYSTEM)
+			seq_buf_printf(&s, " %s", p->name);
+	}
+	seq_buf_putc(&s, '\n');
+
+	len = seq_buf_used(&s);
+
+	if (*ppos >= len)
+		return 0;
+
+	len -= *ppos;
+
+	if (count > len)
+		count = len;
+
+	if (copy_to_user(ubuf, buf + *ppos, count))
+		return -EFAULT;
+
+	*ppos += count;
+
+	return count;
+}
+
+static int process_system_events(struct trace_subsystem_dir *dir,
+				 struct event_command *p, char *buff,
+				 char *command, char *next)
+{
+	struct event_subsystem *system = dir->subsystem;
+	struct trace_event_file *file;
+	struct trace_array *tr = dir->tr;
+	bool remove = false;
+	int ret = 0;
+
+	if (buff[0] == '!')
+		remove = true;
+
+	lockdep_assert_held(&event_mutex);
+
+	list_for_each_entry(file, &tr->events, list) {
+
+		if (strcmp(system->name, file->event_call->class->system) != 0)
+			continue;
+
+		ret = p->parse(p, file, buff, command, next);
+
+		/* Removals and existing events do not error */
+		if (ret < 0 && ret != -EEXIST && !remove) {
+			pr_warn("Failed adding trigger %s on %s\n",
+				command, trace_event_name(file->event_call));
+		}
+	}
+	return 0;
+}
+
+static ssize_t
+event_system_trigger_write(struct file *filp, const char __user *ubuf,
+		    size_t cnt, loff_t *ppos)
+{
+	struct trace_subsystem_dir *dir = filp->private_data;
+	struct event_command *p;
+	char *command, *next;
+	char *buf __free(kfree) = get_user_buf(ubuf, cnt);
+	bool remove = false;
+	bool found = false;
+	ssize_t ret;
+
+	if (!buf)
+		return 0;
+
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	/* system triggers are not allowed to have counters */
+	if (strchr(buf, ':'))
+		return -EINVAL;
+
+	/* If opened for read too, dir is in the seq_file descriptor */
+	if (filp->f_mode & FMODE_READ) {
+		struct seq_file *m = filp->private_data;
+		dir = m->private;
+	}
+
+	/* Skip added space at beginning of buf */
+	next = strim(buf);
+
+	command = strsep(&next, " \t");
+	if (next) {
+		next = skip_spaces(next);
+		if (!*next)
+			next = NULL;
+	}
+	if (command[0] == '!') {
+		remove = true;
+		command++;
+	}
+
+	guard(mutex)(&event_mutex);
+	guard(mutex)(&trigger_cmd_mutex);
+
+	list_for_each_entry(p, &trigger_commands, list) {
+		/* Allow to remove any trigger */
+		if (!remove && !(p->flags & EVENT_CMD_FL_SYSTEM))
+			continue;
+		if (strcmp(p->name, command) == 0) {
+			found = true;
+			ret = process_system_events(dir, p, buf, command, next);
+			break;
+		}
+	}
+
+	if (!found)
+		ret = -ENODEV;
+
+	if (!ret)
+		*ppos += cnt;
+
+	if (remove || ret < 0)
+		return ret ? : cnt;
+
+	return cnt;
+}
+
+static int
+event_system_trigger_open(struct inode *inode, struct file *file)
+{
+	struct trace_subsystem_dir *dir;
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_TRACEFS);
+	if (ret)
+		return ret;
+
+	dir = trace_get_system_dir(inode);
+	if (!dir)
+		return -ENODEV;
+
+	file->private_data = dir;
+
+	return ret;
+}
+
+static int
+event_system_trigger_release(struct inode *inode, struct file *file)
+{
+	struct trace_subsystem_dir *dir = inode->i_private;
+
+	trace_put_system_dir(dir);
+
+	return 0;
+}
+
+const struct file_operations event_system_trigger_fops = {
+	.open = event_system_trigger_open,
+	.read = event_system_trigger_read,
+	.write = event_system_trigger_write,
+	.llseek = tracing_lseek,
+	.release = event_system_trigger_release,
+};
+
 /*
  * Currently we only register event commands from __init, so mark this
  * __init too.
@@ -1586,7 +1771,7 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data)
 static struct event_command trigger_stacktrace_cmd = {
 	.name			= "stacktrace",
 	.trigger_type		= ETT_STACKTRACE,
-	.flags			= EVENT_CMD_FL_POST_TRIGGER,
+	.flags			= EVENT_CMD_FL_POST_TRIGGER | EVENT_CMD_FL_SYSTEM,
 	.parse			= event_trigger_parse,
 	.reg			= register_trigger,
 	.unreg			= unregister_trigger,
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] tracing: Add system trigger file to enable triggers for all the system's events
  2025-11-26 20:24 Steven Rostedt
@ 2025-12-01 22:56 ` Tom Zanussi
  0 siblings, 0 replies; 4+ messages in thread
From: Tom Zanussi @ 2025-12-01 22:56 UTC (permalink / raw)
  To: Steven Rostedt, LKML, Linux Trace Kernel
  Cc: Masami Hiramatsu, Mathieu Desnoyers

Hi Steve,

On Wed, 2025-11-26 at 15:24 -0500, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
> 
> Currently a trigger can only be added to individual events. Some triggers
> (like stacktrace) can be useful to add as a bulk trigger for a set of
> system events (like interrupt or scheduling).
> 
> Add a trigger file to the system directories:
> 
>    /sys/kernel/tracing/events/*/trigger
> 
> And allow stacktrace trigger to be enabled for all those events.
> 
> Writing into the system/trigger file acts the same as writing into each of
> the system event's trigger files individually.
> 
> This also allows to remove a trigger from all events in a subsystem (even
> if it's not a subsystem trigger!).
> 

This looks very useful! Just a couple comments below..

> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---
> Changes since v2: https://patch.msgid.link/20251125200837.31aae207@gandalf.local.home
> 
> - Removed unneeded NULL initialization of tr (Masami Hiramatsu)
> 
> 
> 

[ ... ]

> +static ssize_t
> +event_system_trigger_write(struct file *filp, const char __user *ubuf,
> +		    size_t cnt, loff_t *ppos)
> +{
> +	struct trace_subsystem_dir *dir = filp->private_data;
> +	struct event_command *p;
> +	char *command, *next;
> +	char *buf __free(kfree) = get_user_buf(ubuf, cnt);

nit: I think you could move this up a couple lines and keep the reverse
Christmas tree.

> +	bool remove = false;
> +	bool found = false;
> +	ssize_t ret;
> +
> +	if (!buf)
> +		return 0;
> +
> +	if (IS_ERR(buf))
> +		return PTR_ERR(buf);
> +
> +	/* system triggers are not allowed to have counters */
> +	if (strchr(buf, ':'))
> +		return -EINVAL;
> +

As mentioned by Masami, I think this would preclude the hist triggers
from having this enabled.

I'm guessing you didn't see the hist commands as one of the triggers
that would be useful as system triggers, but there might be a couple
cases where they could be if using the common fields e.g.

echo hist:keys=common_pid.execname:vals=hitcount > events/syscalls/trigger 

or anywhere just

echo hist:keys=common_stacktrace > trigger

Just about everything else would error out, but these might be worth having.

Maybe you could just change this check to look for :<number> at the end of
the command?

Thanks,

Tom



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-12-01 22:56 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-20 21:00 [PATCH] tracing: Add system trigger file to enable triggers for all the system's events Steven Rostedt
2025-11-21 23:53 ` kernel test robot
  -- strict thread matches above, loose matches on Subject: below --
2025-11-26 20:24 Steven Rostedt
2025-12-01 22:56 ` Tom Zanussi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).