[RFC][PATCH 6/7] perf: Fix race between event install and jump_labels

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: peterz@infradead.org, mingo@kernel.org,
	alexander.shishkin@linux.intel.com, eranian@google.com
Cc: linux-kernel@vger.kernel.org, vince@deater.net,
	dvyukov@google.com, andi@firstfloor.org, jolsa@redhat.com,
	sasha.levin@oracle.com, oleg@redhat.com
Subject: [RFC][PATCH 6/7] perf: Fix race between event install and jump_labels
Date: Fri, 19 Feb 2016 15:37:49 +0100	[thread overview]
Message-ID: <20160219144132.170234098@infradead.org> (raw)
In-Reply-To: 20160219143743.692339502@infradead.org

[-- Attachment #1: peterz-perf-more-fixes-6.patch --]
[-- Type: text/plain, Size: 4579 bytes --]

perf_install_in_context() relies upon the context switch hooks to have
scheduled in events when the IPI misses its target -- after all, if
the task has moved from the CPU (or wasn't running at all), it will
have to context switch to run elsewhere.

This however doesn't appear to be happening.

It is possible for the IPI to not happen (task wasn't running) only to
later observe the task running with an inactive context.

The only possible explanation is that the context switch hooks are not
called. Therefore put in a sync_sched() after toggling the jump_label
to guarantee all CPUs will have them enabled before we install an
event.

A simple if (0->1) sync_sched() will not in fact work, because any
further increment can race and complete before the sync_sched().
Therefore we must jump through some hoops.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/perf_event.h |    6 ++---
 kernel/events/core.c       |   49 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 44 insertions(+), 11 deletions(-)

--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -906,7 +906,7 @@ perf_sw_event_sched(u32 event_id, u64 nr
 	}
 }
 
-extern struct static_key_deferred perf_sched_events;
+extern struct static_key_false perf_sched_events;
 
 static __always_inline bool
 perf_sw_migrate_enabled(void)
@@ -925,7 +925,7 @@ static inline void perf_event_task_migra
 static inline void perf_event_task_sched_in(struct task_struct *prev,
 					    struct task_struct *task)
 {
-	if (static_key_false(&perf_sched_events.key))
+	if (static_branch_unlikely(&perf_sched_events))
 		__perf_event_task_sched_in(prev, task);
 
 	if (perf_sw_migrate_enabled() && task->sched_migrated) {
@@ -942,7 +942,7 @@ static inline void perf_event_task_sched
 {
 	perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
 
-	if (static_key_false(&perf_sched_events.key))
+	if (static_branch_unlikely(&perf_sched_events))
 		__perf_event_task_sched_out(prev, next);
 }
 
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -321,7 +321,13 @@ enum event_type_t {
  * perf_sched_events : >0 events exist
  * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
-struct static_key_deferred perf_sched_events __read_mostly;
+
+static void perf_sched_delayed(struct work_struct *work);
+DEFINE_STATIC_KEY_FALSE(perf_sched_events);
+static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
+static DEFINE_MUTEX(perf_sched_mutex);
+static atomic_t perf_sched_count;
+
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 
@@ -3542,12 +3548,22 @@ static void unaccount_event(struct perf_
 	if (has_branch_stack(event))
 		dec = true;
 
-	if (dec)
-		static_key_slow_dec_deferred(&perf_sched_events);
+	if (dec) {
+		if (!atomic_add_unless(&perf_sched_count, -1, 1))
+			schedule_delayed_work(&perf_sched_work, HZ);
+	}
 
 	unaccount_event_cpu(event, event->cpu);
 }
 
+static void perf_sched_delayed(struct work_struct *work)
+{
+	mutex_lock(&perf_sched_mutex);
+	if (atomic_dec_and_test(&perf_sched_count))
+		static_branch_disable(&perf_sched_events);
+	mutex_unlock(&perf_sched_mutex);
+}
+
 /*
  * The following implement mutual exclusion of events on "exclusive" pmus
  * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
@@ -7786,8 +7802,28 @@ static void account_event(struct perf_ev
 	if (is_cgroup_event(event))
 		inc = true;
 
-	if (inc)
-		static_key_slow_inc(&perf_sched_events.key);
+	if (inc) {
+		if (atomic_inc_not_zero(&perf_sched_count))
+			goto enabled;
+
+		mutex_lock(&perf_sched_mutex);
+		if (!atomic_read(&perf_sched_count)) {
+			static_branch_enable(&perf_sched_events);
+			/*
+			 * Guarantee that all CPUs observe they key change and
+			 * call the perf scheduling hooks before proceeding to
+			 * install events that need them.
+			 */
+			synchronize_sched();
+		}
+		/*
+		 * Now that we have waited for the sync_sched(), allow further
+		 * increments to by-pass the mutex.
+		 */
+		atomic_inc(&perf_sched_count);
+		mutex_unlock(&perf_sched_mutex);
+	}
+enabled:
 
 	account_event_cpu(event, event->cpu);
 }
@@ -9352,9 +9388,6 @@ void __init perf_event_init(void)
 	ret = init_hw_breakpoint();
 	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 
-	/* do not patch jump label more than once per second */
-	jump_label_rate_limit(&perf_sched_events, HZ);
-
 	/*
 	 * Build time assertion that we keep the data_head at the intended
 	 * location.  IOW, validation we got the __reserved[] size right.

next prev parent reply	other threads:[~2016-02-19 14:51 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-19 14:37 [RFC][PATCH 0/7] perf: more fuzzer inspired patches Peter Zijlstra
2016-02-19 14:37 ` [RFC][PATCH 1/7] perf: Close install vs exit race Peter Zijlstra
2016-02-19 14:37 ` [RFC][PATCH 2/7] perf: Do not double free Peter Zijlstra
2016-02-19 14:37 ` [RFC][PATCH 3/7] perf: Allow perf_release() with !event->ctx Peter Zijlstra
2016-02-19 14:37 ` [RFC][PATCH 4/7] perf: Fix scaling vs enable_on_exec Peter Zijlstra
2016-02-23 15:27   ` Peter Zijlstra
2016-02-23 15:48     ` Jiri Olsa
2016-02-23 16:35       ` Pratyush Anand
2016-02-23 17:47         ` Peter Zijlstra
2016-02-24 11:53           ` Peter Zijlstra
2016-02-24 14:02             ` Peter Zijlstra
2016-02-24 16:02               ` Peter Zijlstra
2016-02-25  4:07                 ` Pratyush Anand
2016-02-23 21:44       ` Jiri Olsa
2016-02-26  2:25         ` Oleg Nesterov
2016-02-19 14:37 ` [RFC][PATCH 5/7] perf: Fix cloning Peter Zijlstra
2016-02-19 14:37 ` Peter Zijlstra [this message]
2016-02-19 14:37 ` [RFC][PATCH 7/7] perf: Cure event->pending_disable race Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160219144132.170234098@infradead.org \
    --to=peterz@infradead.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=andi@firstfloor.org \
    --cc=dvyukov@google.com \
    --cc=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=sasha.levin@oracle.com \
    --cc=vince@deater.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).