All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Tom Zanussi <zanussi@kernel.org>
Subject: [for-next][PATCH 07/31] tracing: Move histogram trigger variables from stack to per CPU structure
Date: Fri, 09 May 2025 09:12:56 -0400	[thread overview]
Message-ID: <20250509131314.611110671@goodmis.org> (raw)
In-Reply-To: 20250509131249.340302366@goodmis.org

From: Steven Rostedt <rostedt@goodmis.org>

The histogram trigger has three somewhat large arrays on the kernel stack:

	unsigned long entries[HIST_STACKTRACE_DEPTH];
	u64 var_ref_vals[TRACING_MAP_VARS_MAX];
	char compound_key[HIST_KEY_SIZE_MAX];

Checking the function event_hist_trigger() stack frame size, it currently
uses 816 bytes for its stack frame due to these variables!

Instead, allocate a per CPU structure that holds these arrays for each
context level (normal, softirq, irq and NMI). That is, each CPU will have
4 of these structures. This will be allocated when the first histogram
trigger is enabled and freed when the last is disabled. When the
histogram callback triggers, it will request this structure. The request
will disable preemption, get the per CPU structure at the index of the
per CPU variable, and increment that variable.

The callback will use the arrays in this structure to perform its work and
then release the structure. That in turn will simply decrement the per CPU
index and enable preemption.

Moving the variables from the kernel stack to the per CPU structure brings
the stack frame of event_hist_trigger() down to just 112 bytes.

Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Tom Zanussi <zanussi@kernel.org>
Link: https://lore.kernel.org/20250407123851.74ea8d58@gandalf.local.home
Fixes: 067fe038e70f6 ("tracing: Add variable reference handling to hist triggers")
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 120 +++++++++++++++++++++++++++----
 1 file changed, 105 insertions(+), 15 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index e139b58c3a43..e85bc59c0421 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5244,17 +5244,94 @@ hist_trigger_actions(struct hist_trigger_data *hist_data,
 	}
 }
 
+/*
+ * The hist_pad structure is used to save information to create
+ * a histogram from the histogram trigger. It's too big to store
+ * on the stack, so when the histogram trigger is initialized
+ * a percpu array of 4 hist_pad structures is allocated.
+ * This will cover every context from normal, softirq, irq and NMI
+ * in the very unlikely event that a tigger happens at each of
+ * these contexts and interrupts a currently active trigger.
+ */
+struct hist_pad {
+	unsigned long		entries[HIST_STACKTRACE_DEPTH];
+	u64			var_ref_vals[TRACING_MAP_VARS_MAX];
+	char			compound_key[HIST_KEY_SIZE_MAX];
+};
+
+static struct hist_pad __percpu *hist_pads;
+static DEFINE_PER_CPU(int, hist_pad_cnt);
+static refcount_t hist_pad_ref;
+
+/* One hist_pad for every context (normal, softirq, irq, NMI) */
+#define MAX_HIST_CNT 4
+
+static int alloc_hist_pad(void)
+{
+	lockdep_assert_held(&event_mutex);
+
+	if (refcount_read(&hist_pad_ref)) {
+		refcount_inc(&hist_pad_ref);
+		return 0;
+	}
+
+	hist_pads = __alloc_percpu(sizeof(struct hist_pad) * MAX_HIST_CNT,
+				   __alignof__(struct hist_pad));
+	if (!hist_pads)
+		return -ENOMEM;
+
+	refcount_set(&hist_pad_ref, 1);
+	return 0;
+}
+
+static void free_hist_pad(void)
+{
+	lockdep_assert_held(&event_mutex);
+
+	if (!refcount_dec_and_test(&hist_pad_ref))
+		return;
+
+	free_percpu(hist_pads);
+	hist_pads = NULL;
+}
+
+static struct hist_pad *get_hist_pad(void)
+{
+	struct hist_pad *hist_pad;
+	int cnt;
+
+	if (WARN_ON_ONCE(!hist_pads))
+		return NULL;
+
+	preempt_disable();
+
+	hist_pad = per_cpu_ptr(hist_pads, smp_processor_id());
+
+	if (this_cpu_read(hist_pad_cnt) == MAX_HIST_CNT) {
+		preempt_enable();
+		return NULL;
+	}
+
+	cnt = this_cpu_inc_return(hist_pad_cnt) - 1;
+
+	return &hist_pad[cnt];
+}
+
+static void put_hist_pad(void)
+{
+	this_cpu_dec(hist_pad_cnt);
+	preempt_enable();
+}
+
 static void event_hist_trigger(struct event_trigger_data *data,
 			       struct trace_buffer *buffer, void *rec,
 			       struct ring_buffer_event *rbe)
 {
 	struct hist_trigger_data *hist_data = data->private_data;
 	bool use_compound_key = (hist_data->n_keys > 1);
-	unsigned long entries[HIST_STACKTRACE_DEPTH];
-	u64 var_ref_vals[TRACING_MAP_VARS_MAX];
-	char compound_key[HIST_KEY_SIZE_MAX];
 	struct tracing_map_elt *elt = NULL;
 	struct hist_field *key_field;
+	struct hist_pad *hist_pad;
 	u64 field_contents;
 	void *key = NULL;
 	unsigned int i;
@@ -5262,12 +5339,18 @@ static void event_hist_trigger(struct event_trigger_data *data,
 	if (unlikely(!rbe))
 		return;
 
-	memset(compound_key, 0, hist_data->key_size);
+	hist_pad = get_hist_pad();
+	if (!hist_pad)
+		return;
+
+	memset(hist_pad->compound_key, 0, hist_data->key_size);
 
 	for_each_hist_key_field(i, hist_data) {
 		key_field = hist_data->fields[i];
 
 		if (key_field->flags & HIST_FIELD_FL_STACKTRACE) {
+			unsigned long *entries = hist_pad->entries;
+
 			memset(entries, 0, HIST_STACKTRACE_SIZE);
 			if (key_field->field) {
 				unsigned long *stack, n_entries;
@@ -5291,26 +5374,31 @@ static void event_hist_trigger(struct event_trigger_data *data,
 		}
 
 		if (use_compound_key)
-			add_to_key(compound_key, key, key_field, rec);
+			add_to_key(hist_pad->compound_key, key, key_field, rec);
 	}
 
 	if (use_compound_key)
-		key = compound_key;
+		key = hist_pad->compound_key;
 
 	if (hist_data->n_var_refs &&
-	    !resolve_var_refs(hist_data, key, var_ref_vals, false))
-		return;
+	    !resolve_var_refs(hist_data, key, hist_pad->var_ref_vals, false))
+		goto out;
 
 	elt = tracing_map_insert(hist_data->map, key);
 	if (!elt)
-		return;
+		goto out;
 
-	hist_trigger_elt_update(hist_data, elt, buffer, rec, rbe, var_ref_vals);
+	hist_trigger_elt_update(hist_data, elt, buffer, rec, rbe, hist_pad->var_ref_vals);
 
-	if (resolve_var_refs(hist_data, key, var_ref_vals, true))
-		hist_trigger_actions(hist_data, elt, buffer, rec, rbe, key, var_ref_vals);
+	if (resolve_var_refs(hist_data, key, hist_pad->var_ref_vals, true)) {
+		hist_trigger_actions(hist_data, elt, buffer, rec, rbe,
+				     key, hist_pad->var_ref_vals);
+	}
 
 	hist_poll_wakeup();
+
+ out:
+	put_hist_pad();
 }
 
 static void hist_trigger_stacktrace_print(struct seq_file *m,
@@ -6155,6 +6243,9 @@ static int event_hist_trigger_init(struct event_trigger_data *data)
 {
 	struct hist_trigger_data *hist_data = data->private_data;
 
+	if (alloc_hist_pad() < 0)
+		return -ENOMEM;
+
 	if (!data->ref && hist_data->attrs->name)
 		save_named_trigger(hist_data->attrs->name, data);
 
@@ -6199,6 +6290,7 @@ static void event_hist_trigger_free(struct event_trigger_data *data)
 
 		destroy_hist_data(hist_data);
 	}
+	free_hist_pad();
 }
 
 static const struct event_trigger_ops event_hist_trigger_ops = {
@@ -6214,9 +6306,7 @@ static int event_hist_trigger_named_init(struct event_trigger_data *data)
 
 	save_named_trigger(data->named_data->name, data);
 
-	event_hist_trigger_init(data->named_data);
-
-	return 0;
+	return event_hist_trigger_init(data->named_data);
 }
 
 static void event_hist_trigger_named_free(struct event_trigger_data *data)
-- 
2.47.2



  parent reply	other threads:[~2025-05-09 13:12 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-09 13:12 [for-next][PATCH 00/31] tracing: Updates for v6.16 Steven Rostedt
2025-05-09 13:12 ` [for-next][PATCH 01/31] tracing: Update function trace addresses with module addresses Steven Rostedt
2025-05-09 13:12 ` [for-next][PATCH 02/31] tracing: Show function names when possible when listing fields Steven Rostedt
2025-05-09 13:12 ` [for-next][PATCH 03/31] tracing: Only return an adjusted address if it matches the kernel address Steven Rostedt
2025-05-09 13:12 ` [for-next][PATCH 04/31] tracing: Adjust addresses for printing out fields Steven Rostedt
2025-05-09 13:12 ` [for-next][PATCH 05/31] tracing: Show preempt and irq events callsites from the offsets in field print Steven Rostedt
2025-05-09 13:12 ` [for-next][PATCH 06/31] tracing: Always use memcpy() in histogram add_to_key() Steven Rostedt
2025-05-09 13:12 ` Steven Rostedt [this message]
2025-05-09 13:12 ` [for-next][PATCH 08/31] tracing: Add common_comm to histograms Steven Rostedt
2025-05-09 13:12 ` [for-next][PATCH 09/31] ftrace: Show subops in enabled_functions Steven Rostedt
2025-05-09 13:12 ` [for-next][PATCH 10/31] ftrace: Expose call graph depth as unsigned int Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 11/31] ftrace: Comment that ftrace_func_mapper is freed with free_ftrace_hash() Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 12/31] tracing/osnoise: Allow arbitrarily long CPU string Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 13/31] tracing/mmiotrace: Remove reference to unused per CPU data pointer Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 14/31] ftrace: Do not bother checking per CPU "disabled" flag Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 15/31] tracing: Just use this_cpu_read() to access ignore_pid Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 16/31] tracing: Add tracer_tracing_disable/enable() functions Steven Rostedt
2025-05-09 15:49   ` Doug Anderson
2025-05-09 13:13 ` [for-next][PATCH 17/31] tracing: Use tracer_tracing_disable() instead of "disabled" field for ftrace_dump_one() Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 18/31] tracing: kdb: Use tracer_tracing_on/off() instead of setting per CPU disabled Steven Rostedt
2025-05-09 15:49   ` Doug Anderson
2025-05-09 15:57     ` Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 19/31] ftrace: Do not disabled function graph based on "disabled" field Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 20/31] tracing: Do not use per CPU array_buffer.data->disabled for cpumask Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 21/31] ring-buffer: Add ring_buffer_record_is_on_cpu() Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 22/31] tracing: branch: Use trace_tracing_is_on_cpu() instead of "disabled" field Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 23/31] tracing: Convert the per CPU "disabled" counter to local from atomic Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 24/31] tracing: Use atomic_inc_return() for updating "disabled" counter in irqsoff tracer Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 25/31] tracing: Remove unused buffer_page field from trace_array_cpu structure Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 26/31] tracing: Replace deprecated strncpy() with strscpy() for stack_trace_filter_buf Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 27/31] tracing: Rename event_trigger_alloc() to trigger_data_alloc() Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 28/31] tracing: Fix error handling in event_trigger_parse() Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 29/31] tracing: Remove unnecessary "goto out" that simply returns ret is trigger code Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 30/31] tracing: Add a helper function to handle the dereference arg in verifier Steven Rostedt
2025-05-09 13:13 ` [for-next][PATCH 31/31] tracing: Allow the top level trace_marker to write into another instances Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250509131314.611110671@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=zanussi@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.