linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Adrian Hunter <adrian.hunter@intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Daniel Bristot de Oliveira <bristot@kernel.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Ian Rogers <irogers@google.com>, Ingo Molnar <mingo@redhat.com>,
	Jiri Olsa <jolsa@kernel.org>,
	Kan Liang  <kan.liang@linux.intel.com>,
	Marco Elver <elver@google.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Namhyung Kim <namhyung@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Subject: [PATCH v4 4/6] perf: Move swevent_htable::recursion into task_struct.
Date: Mon, 24 Jun 2024 17:15:17 +0200	[thread overview]
Message-ID: <20240624152732.1231678-5-bigeasy@linutronix.de> (raw)
In-Reply-To: <20240624152732.1231678-1-bigeasy@linutronix.de>

The swevent_htable::recursion counter is used to avoid creating an
swevent while an event is processed to avoid recursion. The counter is
per-CPU and preemption must be disabled to have a stable counter.
perf_pending_task() disables preemption to access the counter and then
signal. This is problematic on PREEMPT_RT because sending a signal uses
a spinlock_t which must not be acquired in atomic on PREEMPT_RT because
it becomes a sleeping lock.

The atomic context can be avoided by moving the counter into the
task_struct. There is a 4 byte hole between futex_state (usually always
on) and the following perf pointer (perf_event_ctxp). After the
recursion lost some weight it fits perfectly.

Move swevent_htable::recursion into task_struct.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/perf_event.h |  6 ------
 include/linux/sched.h      |  7 +++++++
 kernel/events/core.c       | 13 +++----------
 kernel/events/internal.h   |  2 +-
 4 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ea0d82418d854..99a7ea1d29ed5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -970,12 +970,6 @@ struct perf_event_context {
 	local_t				nr_pending;
 };
 
-/*
- * Number of contexts where an event can trigger:
- *	task, softirq, hardirq, nmi.
- */
-#define PERF_NR_CONTEXTS	4
-
 struct perf_cpu_pmu_context {
 	struct perf_event_pmu_context	epc;
 	struct perf_event_pmu_context	*task_epc;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61591ac6eab6d..afb1087f5831b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,6 +734,12 @@ enum perf_event_task_context {
 	perf_nr_task_contexts,
 };
 
+/*
+ * Number of contexts where an event can trigger:
+ *      task, softirq, hardirq, nmi.
+ */
+#define PERF_NR_CONTEXTS	4
+
 struct wake_q_node {
 	struct wake_q_node *next;
 };
@@ -1256,6 +1262,7 @@ struct task_struct {
 	unsigned int			futex_state;
 #endif
 #ifdef CONFIG_PERF_EVENTS
+	u8				perf_recursion[PERF_NR_CONTEXTS];
 	struct perf_event_context	*perf_event_ctxp;
 	struct mutex			perf_event_mutex;
 	struct list_head		perf_event_list;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f48ce05907042..fc9a78e1fb4aa 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9779,11 +9779,7 @@ struct swevent_htable {
 	struct swevent_hlist		*swevent_hlist;
 	struct mutex			hlist_mutex;
 	int				hlist_refcount;
-
-	/* Recursion avoidance in each contexts */
-	u8				recursion[PERF_NR_CONTEXTS];
 };
-
 static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
 
 /*
@@ -9981,17 +9977,13 @@ DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
 
 int perf_swevent_get_recursion_context(void)
 {
-	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
-
-	return get_recursion_context(swhash->recursion);
+	return get_recursion_context(current->perf_recursion);
 }
 EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
 
 void perf_swevent_put_recursion_context(int rctx)
 {
-	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
-
-	put_recursion_context(swhash->recursion, rctx);
+	put_recursion_context(current->perf_recursion, rctx);
 }
 
 void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
@@ -13658,6 +13650,7 @@ int perf_event_init_task(struct task_struct *child, u64 clone_flags)
 {
 	int ret;
 
+	memset(child->perf_recursion, 0, sizeof(child->perf_recursion));
 	child->perf_event_ctxp = NULL;
 	mutex_init(&child->perf_event_mutex);
 	INIT_LIST_HEAD(&child->perf_event_list);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index f9a3244206b20..f0daaa6f2a33b 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -221,7 +221,7 @@ static inline int get_recursion_context(u8 *recursion)
 	return rctx;
 }
 
-static inline void put_recursion_context(u8 *recursion, int rctx)
+static inline void put_recursion_context(u8 *recursion, unsigned char rctx)
 {
 	barrier();
 	recursion[rctx]--;
-- 
2.45.2


  parent reply	other threads:[~2024-06-24 15:27 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-24 15:15 [PATCH v4 0/6] perf: Make SIGTRAP and __perf_pending_irq() work on RT Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 1/6] perf: Move irq_work_queue() where the event is prepared Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 2/6] perf: Enqueue SIGTRAP always via task_work Sebastian Andrzej Siewior
2024-07-01 12:28   ` Peter Zijlstra
2024-07-01 13:27     ` Sebastian Andrzej Siewior
2024-07-02  9:01       ` Peter Zijlstra
2024-10-28  8:30   ` Lai, Yi
2024-10-28 12:21     ` Frederic Weisbecker
2024-10-29 17:21       ` Sebastian Andrzej Siewior
2024-10-30 14:07         ` Sebastian Andrzej Siewior
2024-10-30 15:46           ` Frederic Weisbecker
2024-11-07 14:46             ` Sebastian Andrzej Siewior
2024-11-08 13:11               ` Frederic Weisbecker
2024-11-08 19:08                 ` Oleg Nesterov
2024-11-08 22:26                   ` Frederic Weisbecker
2024-11-11 12:08                     ` Sebastian Andrzej Siewior
2024-12-04  3:02                       ` Lai, Yi
2024-12-04 13:48                       ` Oleg Nesterov
2024-12-05  0:19                         ` Frederic Weisbecker
2024-12-05  9:20                           ` Oleg Nesterov
2024-12-05 10:05                             ` Frederic Weisbecker
2024-12-05 10:28                               ` Oleg Nesterov
2024-12-13 22:52                                 ` Frederic Weisbecker
2024-12-16 19:19                                   ` Oleg Nesterov
2024-06-24 15:15 ` [PATCH v4 3/6] perf: Shrink the size of the recursion counter Sebastian Andrzej Siewior
2024-07-01 12:31   ` Peter Zijlstra
2024-07-01 12:56     ` Sebastian Andrzej Siewior
2024-07-01 13:10       ` Peter Zijlstra
2024-06-24 15:15 ` Sebastian Andrzej Siewior [this message]
2024-06-24 15:15 ` [PATCH v4 5/6] perf: Don't disable preemption in perf_pending_task() Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 6/6] perf: Split __perf_pending_irq() out of perf_pending_irq() Sebastian Andrzej Siewior
2024-06-25 13:42 ` [PATCH v4 0/6] perf: Make SIGTRAP and __perf_pending_irq() work on RT Marco Elver

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240624152732.1231678-5-bigeasy@linutronix.de \
    --to=bigeasy@linutronix.de \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=bristot@kernel.org \
    --cc=elver@google.com \
    --cc=frederic@kernel.org \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).