linux-trace-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Wander Lairson Costa <wander@redhat.com>
To: Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Valentin Schneider <vschneid@redhat.com>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	David Woodhouse <dwmw@amazon.co.uk>,
	Thomas Gleixner <tglx@linutronix.de>,
	Wander Lairson Costa <wander@redhat.com>,
	Boqun Feng <boqun.feng@gmail.com>,
	linux-kernel@vger.kernel.org (open list),
	linux-trace-kernel@vger.kernel.org (open list:TRACING)
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>,
	Clark Williams <williams@redhat.com>,
	Gabriele Monaco <gmonaco@redhat.com>
Subject: [PATCH v3 2/2] tracing/preemptirq: Optimize preempt_disable/enable() tracepoint overhead
Date: Fri,  4 Jul 2025 14:07:43 -0300	[thread overview]
Message-ID: <20250704170748.97632-3-wander@redhat.com> (raw)
In-Reply-To: <20250704170748.97632-1-wander@redhat.com>

Similar to the IRQ tracepoint, the preempt tracepoints are typically
disabled in production systems due to the significant overhead they
introduce even when not in use.

The overhead primarily comes from two sources: First, when tracepoints
are compiled into the kernel, preempt_count_add() and preempt_count_sub()
become external function calls rather than inlined operations. Second,
these functions perform unnecessary preempt_count() checks even when the
tracepoint itself is disabled.

This optimization introduces an early check of the tracepoint static key,
which allows us to skip both the function call overhead and the redundant
preempt_count() checks when tracing is disabled. The change maintains all
existing functionality when tracing is active while significantly
reducing overhead for the common case where tracing is inactive.

Signed-off-by: Wander Lairson Costa <wander@redhat.com>
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Clark Williams <williams@redhat.com>
Cc: Gabriele Monaco <gmonaco@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
---
 include/linux/preempt.h         | 35 ++++++++++++++++++++++++++++++---
 kernel/sched/core.c             | 12 +----------
 kernel/trace/trace_preemptirq.c | 19 ++++++++++++++++++
 3 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index b0af8d4ef6e6..d13c755cd934 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,6 +10,7 @@
 #include <linux/linkage.h>
 #include <linux/cleanup.h>
 #include <linux/types.h>
+#include <linux/tracepoint-defs.h>
 
 /*
  * We put the hardirq and softirq counter into the preemption
@@ -191,17 +192,45 @@ static __always_inline unsigned char interrupt_context_level(void)
  */
 #define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET)
 
-#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
+#if defined(CONFIG_DEBUG_PREEMPT)
 extern void preempt_count_add(int val);
 extern void preempt_count_sub(int val);
-#define preempt_count_dec_and_test() \
-	({ preempt_count_sub(1); should_resched(0); })
+#elif defined(CONFIG_TRACE_PREEMPT_TOGGLE)
+extern void __trace_preempt_on(void);
+extern void __trace_preempt_off(void);
+
+DECLARE_TRACEPOINT(preempt_enable);
+DECLARE_TRACEPOINT(preempt_disable);
+
+#define __preempt_trace_enabled(type) \
+	(tracepoint_enabled(preempt_##type) && preempt_count() == val)
+
+static inline void preempt_count_add(int val)
+{
+	__preempt_count_add(val);
+
+	if (__preempt_trace_enabled(disable))
+		__trace_preempt_off();
+}
+
+static inline void preempt_count_sub(int val)
+{
+	if (__preempt_trace_enabled(enable))
+		__trace_preempt_on();
+
+	__preempt_count_sub(val);
+}
 #else
 #define preempt_count_add(val)	__preempt_count_add(val)
 #define preempt_count_sub(val)	__preempt_count_sub(val)
 #define preempt_count_dec_and_test() __preempt_count_dec_and_test()
 #endif
 
+#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
+#define preempt_count_dec_and_test() \
+	({ preempt_count_sub(1); should_resched(0); })
+#endif
+
 #define __preempt_count_inc() __preempt_count_add(1)
 #define __preempt_count_dec() __preempt_count_sub(1)
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8988d38d46a3..4feba4738d79 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5840,8 +5840,7 @@ static inline void sched_tick_start(int cpu) { }
 static inline void sched_tick_stop(int cpu) { }
 #endif
 
-#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-				defined(CONFIG_TRACE_PREEMPT_TOGGLE))
+#if defined(CONFIG_PREEMPTION) && defined(CONFIG_DEBUG_PREEMPT)
 /*
  * If the value passed in is equal to the current preempt count
  * then we just disabled preemption. Start timing the latency.
@@ -5850,30 +5849,24 @@ static inline void preempt_latency_start(int val)
 {
 	if (preempt_count() == val) {
 		unsigned long ip = get_lock_parent_ip();
-#ifdef CONFIG_DEBUG_PREEMPT
 		current->preempt_disable_ip = ip;
-#endif
 		trace_preempt_off(CALLER_ADDR0, ip);
 	}
 }
 
 void preempt_count_add(int val)
 {
-#ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Underflow?
 	 */
 	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
 		return;
-#endif
 	__preempt_count_add(val);
-#ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Spinlock count overflowing soon?
 	 */
 	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
 				PREEMPT_MASK - 10);
-#endif
 	preempt_latency_start(val);
 }
 EXPORT_SYMBOL(preempt_count_add);
@@ -5891,7 +5884,6 @@ static inline void preempt_latency_stop(int val)
 
 void preempt_count_sub(int val)
 {
-#ifdef CONFIG_DEBUG_PREEMPT
 	/*
 	 * Underflow?
 	 */
@@ -5903,14 +5895,12 @@ void preempt_count_sub(int val)
 	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
 			!(preempt_count() & PREEMPT_MASK)))
 		return;
-#endif
 
 	preempt_latency_stop(val);
 	__preempt_count_sub(val);
 }
 EXPORT_SYMBOL(preempt_count_sub);
 NOKPROBE_SYMBOL(preempt_count_sub);
-
 #else
 static inline void preempt_latency_start(int val) { }
 static inline void preempt_latency_stop(int val) { }
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index 90ee65db4516..deb2428b34a2 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -118,6 +118,25 @@ EXPORT_TRACEPOINT_SYMBOL(irq_enable);
 
 #ifdef CONFIG_TRACE_PREEMPT_TOGGLE
 
+#if !defined(CONFIG_DEBUG_PREEMPT)
+EXPORT_SYMBOL(__tracepoint_preempt_disable);
+EXPORT_SYMBOL(__tracepoint_preempt_enable);
+
+void __trace_preempt_on(void)
+{
+	trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
+}
+EXPORT_SYMBOL(__trace_preempt_on);
+NOKPROBE_SYMBOL(__trace_preempt_on);
+
+void __trace_preempt_off(void)
+{
+	trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
+}
+EXPORT_SYMBOL(__trace_preempt_off);
+NOKPROBE_SYMBOL(__trace_preempt_off);
+#endif /* !CONFIG_DEBUG_PREEMPT */
+
 void trace_preempt_on(unsigned long a0, unsigned long a1)
 {
 	trace(preempt_enable, TP_ARGS(a0, a1));
-- 
2.50.0


  parent reply	other threads:[~2025-07-04 17:08 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-04 17:07 [PATCH v3 0/2] tracing/preemptirq: Optimize disabled tracepoint overhead Wander Lairson Costa
2025-07-04 17:07 ` [PATCH v3 1/2] trace/preemptirq: reduce overhead of irq_enable/disable tracepoints Wander Lairson Costa
2025-07-06  4:29   ` kernel test robot
2025-07-04 17:07 ` Wander Lairson Costa [this message]
2025-07-07 10:29   ` [PATCH v3 2/2] tracing/preemptirq: Optimize preempt_disable/enable() tracepoint overhead kernel test robot
2025-07-07 11:20   ` Peter Zijlstra
2025-07-08 12:54     ` Wander Lairson Costa
2025-07-08 18:54       ` Peter Zijlstra
2025-08-01 13:30         ` Wander Lairson Costa
2025-08-25 11:56           ` Wander Lairson Costa
2025-07-07 11:26   ` Peter Zijlstra
2025-07-08 13:09     ` Wander Lairson Costa
2025-07-08 18:46       ` Peter Zijlstra
2025-08-01 13:05         ` Wander Lairson Costa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250704170748.97632-3-wander@redhat.com \
    --to=wander@redhat.com \
    --cc=acme@kernel.org \
    --cc=boqun.feng@gmail.com \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=dwmw@amazon.co.uk \
    --cc=gmonaco@redhat.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mgorman@suse.de \
    --cc=mhiramat@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=williams@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).