From: Peter Zijlstra <peterz@infradead.org>
To: mingo@kernel.org, alexander.shishkin@linux.intel.com, eranian@google.com
Cc: linux-kernel@vger.kernel.org, vince@deater.net,
dvyukov@google.com, andi@firstfloor.org, jolsa@redhat.com,
peterz@infradead.org
Subject: [RFC][PATCH 12/12] perf: Collapse and fix event_function_call() users
Date: Mon, 11 Jan 2016 17:25:10 +0100 [thread overview]
Message-ID: <20160111163229.411314288@infradead.org> (raw)
In-Reply-To: 20160111162458.427203780@infradead.org
[-- Attachment #1: peterz-perf-fixes-13.patch --]
[-- Type: text/plain, Size: 15526 bytes --]
There is one common bug left in all the event_function_call() users,
between loading ctx->task and getting to the remote_function(),
ctx->task can already have been changed.
Therefore we need to double check and retry if ctx->task != current.
Insert another trampoline specific to event_function_call() that
checks for this and further validates state. This also allows getting
rid of the active/inactive functions.
Note: Stephane, can you please look at __perf_event_enable()?
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/linux/perf_event.h | 2
kernel/events/core.c | 362 ++++++++++++++++++------------------------
kernel/events/hw_breakpoint.c | 2
3 files changed, 164 insertions(+), 202 deletions(-)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1044,7 +1044,7 @@ extern void perf_swevent_put_recursion_c
extern u64 perf_swevent_set_period(struct perf_event *event);
extern void perf_event_enable(struct perf_event *event);
extern void perf_event_disable(struct perf_event *event);
-extern int __perf_event_disable(void *info);
+extern void perf_event_disable_local(struct perf_event *event);
extern void perf_event_task_tick(void);
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -126,6 +126,28 @@ static int cpu_function_call(int cpu, re
return data.ret;
}
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+ return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
+static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx)
+{
+ raw_spin_lock(&cpuctx->ctx.lock);
+ if (ctx)
+ raw_spin_lock(&ctx->lock);
+}
+
+static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx)
+{
+ if (ctx)
+ raw_spin_unlock(&ctx->lock);
+ raw_spin_unlock(&cpuctx->ctx.lock);
+}
+
/*
* On task ctx scheduling...
*
@@ -158,21 +180,96 @@ static int cpu_function_call(int cpu, re
* If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
*/
-static void event_function_call(struct perf_event *event,
- int (*active)(void *),
- void (*inactive)(void *),
- void *data)
+typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
+ struct perf_event_context *, void *);
+
+struct event_function_struct {
+ struct perf_event *event;
+ event_f func;
+ void *data;
+};
+
+static int event_function(void *info)
+{
+ struct event_function_struct *efs = info;
+ struct perf_event *event = efs->event;
+ struct perf_event_context *ctx = event->ctx;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+ struct perf_event_context *task_ctx = cpuctx->task_ctx;
+
+ WARN_ON_ONCE(!irqs_disabled());
+
+ /*
+ * Since we do the IPI call without holding ctx->lock things can have
+ * changed, double check we hit the task we set out to hit.
+ *
+ * If ctx->task == current, we know things must remain valid because
+ * we have IRQs disabled so we cannot schedule.
+ */
+ if (ctx->task) {
+ if (ctx->task != current)
+ return -EAGAIN;
+
+ WARN_ON_ONCE(task_ctx != ctx);
+ } else {
+ WARN_ON_ONCE(&cpuctx->ctx != ctx);
+ }
+
+ perf_ctx_lock(cpuctx, task_ctx);
+ /*
+ * Now that we hold locks, double check state. Paranoia pays.
+ */
+ if (task_ctx) {
+ WARN_ON_ONCE(task_ctx->task != current);
+ /*
+ * We only use event_function_call() on established contexts,
+ * and event_function() is only ever called when active (or
+ * rather, we'll have bailed in task_function_call() or the
+ * above ctx->task != current test), therefore we must have
+ * ctx->is_active here.
+ */
+ WARN_ON_ONCE(!ctx->is_active);
+ /*
+ * And since we have ctx->is_active, cpuctx->task_ctx must
+ * match.
+ */
+ WARN_ON_ONCE(cpuctx->task_ctx != task_ctx);
+ }
+ efs->func(event, cpuctx, ctx, efs->data);
+ perf_ctx_unlock(cpuctx, task_ctx);
+
+ return 0;
+}
+
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+ struct event_function_struct efs = {
+ .event = event,
+ .func = func,
+ .data = data,
+ };
+
+ int ret = event_function(&efs);
+ WARN_ON_ONCE(ret);
+}
+
+static void event_function_call(struct perf_event *event, event_f func, void *data)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
+ struct event_function_struct efs = {
+ .event = event,
+ .func = func,
+ .data = data,
+ };
if (!task) {
- cpu_function_call(event->cpu, active, data);
+ cpu_function_call(event->cpu, event_function, &efs);
return;
}
again:
- if (!task_function_call(task, active, data))
+ if (!task_function_call(task, event_function, &efs))
return;
raw_spin_lock_irq(&ctx->lock);
@@ -185,7 +282,7 @@ static void event_function_call(struct p
raw_spin_unlock_irq(&ctx->lock);
goto again;
}
- inactive(data);
+ func(event, NULL, ctx, data);
raw_spin_unlock_irq(&ctx->lock);
}
@@ -400,28 +497,6 @@ static inline u64 perf_event_clock(struc
return event->clock();
}
-static inline struct perf_cpu_context *
-__get_cpu_context(struct perf_event_context *ctx)
-{
- return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
-}
-
-static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
- struct perf_event_context *ctx)
-{
- raw_spin_lock(&cpuctx->ctx.lock);
- if (ctx)
- raw_spin_lock(&ctx->lock);
-}
-
-static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
- struct perf_event_context *ctx)
-{
- if (ctx)
- raw_spin_unlock(&ctx->lock);
- raw_spin_unlock(&cpuctx->ctx.lock);
-}
-
#ifdef CONFIG_CGROUP_PERF
static inline bool
@@ -1684,38 +1759,22 @@ group_sched_out(struct perf_event *group
cpuctx->exclusive = 0;
}
-struct remove_event {
- struct perf_event *event;
- bool detach_group;
-};
-
-static void ___perf_remove_from_context(void *info)
-{
- struct remove_event *re = info;
- struct perf_event *event = re->event;
- struct perf_event_context *ctx = event->ctx;
-
- if (re->detach_group)
- perf_group_detach(event);
- list_del_event(event, ctx);
-}
-
/*
* Cross CPU call to remove a performance event
*
* We disable the event on the hardware level first. After that we
* remove it from the context list.
*/
-static int __perf_remove_from_context(void *info)
+static void
+__perf_remove_from_context(struct perf_event *event,
+ struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx,
+ void *info)
{
- struct remove_event *re = info;
- struct perf_event *event = re->event;
- struct perf_event_context *ctx = event->ctx;
- struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+ bool detach_group = (unsigned long)info;
- raw_spin_lock(&ctx->lock);
event_sched_out(event, cpuctx, ctx);
- if (re->detach_group)
+ if (detach_group)
perf_group_detach(event);
list_del_event(event, ctx);
@@ -1726,17 +1785,11 @@ static int __perf_remove_from_context(vo
cpuctx->task_ctx = NULL;
}
}
- raw_spin_unlock(&ctx->lock);
-
- return 0;
}
/*
* Remove the event from a task's (or a CPU's) list of events.
*
- * CPU events are removed with a smp call. For task events we only
- * call when the task is on a CPU.
- *
* If event->ctx is a cloned context, callers must make sure that
* every task struct that event->ctx->task could possibly point to
* remains valid. This is OK when called from perf_release since
@@ -1746,71 +1799,31 @@ static int __perf_remove_from_context(vo
*/
static void perf_remove_from_context(struct perf_event *event, bool detach_group)
{
- struct perf_event_context *ctx = event->ctx;
- struct remove_event re = {
- .event = event,
- .detach_group = detach_group,
- };
-
- lockdep_assert_held(&ctx->mutex);
+ lockdep_assert_held(&event->ctx->mutex);
event_function_call(event, __perf_remove_from_context,
- ___perf_remove_from_context, &re);
+ (void *)(unsigned long)detach_group);
}
/*
* Cross CPU call to disable a performance event
*/
-int __perf_event_disable(void *info)
-{
- struct perf_event *event = info;
- struct perf_event_context *ctx = event->ctx;
- struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
- /*
- * If this is a per-task event, need to check whether this
- * event's task is the current task on this cpu.
- *
- * Can trigger due to concurrent perf_event_context_sched_out()
- * flipping contexts around.
- */
- if (ctx->task && cpuctx->task_ctx != ctx)
- return -EINVAL;
-
- raw_spin_lock(&ctx->lock);
-
- /*
- * If the event is on, turn it off.
- * If it is in error state, leave it in error state.
- */
- if (event->state >= PERF_EVENT_STATE_INACTIVE) {
- update_context_time(ctx);
- update_cgrp_time_from_event(event);
- update_group_times(event);
- if (event == event->group_leader)
- group_sched_out(event, cpuctx, ctx);
- else
- event_sched_out(event, cpuctx, ctx);
- event->state = PERF_EVENT_STATE_OFF;
- }
-
- raw_spin_unlock(&ctx->lock);
-
- return 0;
-}
-
-void ___perf_event_disable(void *info)
+static void __perf_event_disable(struct perf_event *event,
+ struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx,
+ void *info)
{
- struct perf_event *event = info;
+ if (event->state < PERF_EVENT_STATE_INACTIVE)
+ return;
- /*
- * Since we have the lock this context can't be scheduled
- * in, so we can change the state safely.
- */
- if (event->state == PERF_EVENT_STATE_INACTIVE) {
- update_group_times(event);
- event->state = PERF_EVENT_STATE_OFF;
- }
+ update_context_time(ctx);
+ update_cgrp_time_from_event(event);
+ update_group_times(event);
+ if (event == event->group_leader)
+ group_sched_out(event, cpuctx, ctx);
+ else
+ event_sched_out(event, cpuctx, ctx);
+ event->state = PERF_EVENT_STATE_OFF;
}
/*
@@ -1837,8 +1850,12 @@ static void _perf_event_disable(struct p
}
raw_spin_unlock_irq(&ctx->lock);
- event_function_call(event, __perf_event_disable,
- ___perf_event_disable, event);
+ event_function_call(event, __perf_event_disable, NULL);
+}
+
+void perf_event_disable_local(struct perf_event *event)
+{
+ event_function_local(event, __perf_event_disable, NULL);
}
/*
@@ -2202,44 +2219,28 @@ static void __perf_event_mark_enabled(st
/*
* Cross CPU call to enable a performance event
*/
-static int __perf_event_enable(void *info)
+static void __perf_event_enable(struct perf_event *event,
+ struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx,
+ void *info)
{
- struct perf_event *event = info;
- struct perf_event_context *ctx = event->ctx;
struct perf_event *leader = event->group_leader;
- struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
- struct perf_event_context *task_ctx = cpuctx->task_ctx;
-
- /*
- * There's a time window between 'ctx->is_active' check
- * in perf_event_enable function and this place having:
- * - IRQs on
- * - ctx->lock unlocked
- *
- * where the task could be killed and 'ctx' deactivated
- * by perf_event_exit_task.
- */
- if (!ctx->is_active)
- return -EINVAL;
-
- perf_ctx_lock(cpuctx, task_ctx);
- WARN_ON_ONCE(&cpuctx->ctx != ctx && task_ctx != ctx);
- update_context_time(ctx);
if (event->state >= PERF_EVENT_STATE_INACTIVE)
- goto unlock;
-
- /*
- * set current task's cgroup time reference point
- */
- perf_cgroup_set_timestamp(current, ctx);
+ return;
+ update_context_time(ctx);
__perf_event_mark_enabled(event);
+ if (!ctx->is_active)
+ return;
+
if (!event_filter_match(event)) {
- if (is_cgroup_event(event))
+ if (is_cgroup_event(event)) {
+ perf_cgroup_set_timestamp(current, ctx); // XXX ?
perf_cgroup_defer_enabled(event);
- goto unlock;
+ }
+ return;
}
/*
@@ -2247,19 +2248,9 @@ static int __perf_event_enable(void *inf
* then don't put it on unless the group is on.
*/
if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
- goto unlock;
-
- ctx_resched(cpuctx, task_ctx);
-
-unlock:
- perf_ctx_unlock(cpuctx, task_ctx);
-
- return 0;
-}
+ return;
-void ___perf_event_enable(void *info)
-{
- __perf_event_mark_enabled((struct perf_event *)info);
+ ctx_resched(cpuctx, ctx);
}
/*
@@ -2292,8 +2283,7 @@ static void _perf_event_enable(struct pe
event->state = PERF_EVENT_STATE_OFF;
raw_spin_unlock_irq(&ctx->lock);
- event_function_call(event, __perf_event_enable,
- ___perf_event_enable, event);
+ event_function_call(event, __perf_event_enable, NULL);
}
/*
@@ -4095,36 +4085,14 @@ static void perf_event_for_each(struct p
perf_event_for_each_child(sibling, func);
}
-struct period_event {
- struct perf_event *event;
- u64 value;
-};
-
-static void ___perf_event_period(void *info)
-{
- struct period_event *pe = info;
- struct perf_event *event = pe->event;
- u64 value = pe->value;
-
- if (event->attr.freq) {
- event->attr.sample_freq = value;
- } else {
- event->attr.sample_period = value;
- event->hw.sample_period = value;
- }
-
- local64_set(&event->hw.period_left, 0);
-}
-
-static int __perf_event_period(void *info)
+static void __perf_event_period(struct perf_event *event,
+ struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx,
+ void *info)
{
- struct period_event *pe = info;
- struct perf_event *event = pe->event;
- struct perf_event_context *ctx = event->ctx;
- u64 value = pe->value;
+ u64 value = *((u64 *)info);
bool active;
- raw_spin_lock(&ctx->lock);
if (event->attr.freq) {
event->attr.sample_freq = value;
} else {
@@ -4144,14 +4112,10 @@ static int __perf_event_period(void *inf
event->pmu->start(event, PERF_EF_RELOAD);
perf_pmu_enable(ctx->pmu);
}
- raw_spin_unlock(&ctx->lock);
-
- return 0;
}
static int perf_event_period(struct perf_event *event, u64 __user *arg)
{
- struct period_event pe = { .event = event, };
u64 value;
if (!is_sampling_event(event))
@@ -4166,10 +4130,7 @@ static int perf_event_period(struct perf
if (event->attr.freq && value > sysctl_perf_event_sample_rate)
return -EINVAL;
- pe.value = value;
-
- event_function_call(event, __perf_event_period,
- ___perf_event_period, &pe);
+ event_function_call(event, __perf_event_period, &value);
return 0;
}
@@ -4941,7 +4902,7 @@ static void perf_pending_event(struct ir
if (event->pending_disable) {
event->pending_disable = 0;
- __perf_event_disable(event);
+ perf_event_disable_local(event);
}
if (event->pending_wakeup) {
@@ -9239,13 +9200,14 @@ static void perf_event_init_cpu(int cpu)
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static void __perf_event_exit_context(void *__info)
{
- struct remove_event re = { .detach_group = true };
struct perf_event_context *ctx = __info;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+ struct perf_event *event;
- rcu_read_lock();
- list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
- __perf_remove_from_context(&re);
- rcu_read_unlock();
+ raw_spin_lock(&ctx->lock);
+ list_for_each_entry(event, &ctx->event_list, event_entry)
+ __perf_remove_from_context(event, cpuctx, ctx, (void *)(unsigned long)true);
+ raw_spin_unlock(&ctx->lock);
}
static void perf_event_exit_cpu_context(int cpu)
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -444,7 +444,7 @@ int modify_user_hw_breakpoint(struct per
* current task.
*/
if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
- __perf_event_disable(bp);
+ perf_event_disable_local(bp);
else
perf_event_disable(bp);
next prev parent reply other threads:[~2016-01-11 16:37 UTC|newest]
Thread overview: 67+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-01-11 16:24 [RFC][PATCH 00/12] various perf fixes Peter Zijlstra
2016-01-11 16:24 ` [RFC][PATCH 01/12] perf: Add lockdep assertions Peter Zijlstra
2016-01-11 16:25 ` [RFC][PATCH 02/12] perf: Fix cgroup event scheduling Peter Zijlstra
2016-01-11 19:43 ` Stephane Eranian
2016-01-11 22:03 ` Peter Zijlstra
2016-01-11 16:25 ` [RFC][PATCH 03/12] perf: Fix cgroup scheduling in enable_on_exec Peter Zijlstra
2016-01-11 16:25 ` [RFC][PATCH 04/12] perf: Remove stale comment Peter Zijlstra
2016-01-11 16:25 ` [RFC][PATCH 05/12] perf: Fix enable_on_exec event scheduling Peter Zijlstra
2016-01-11 16:25 ` [RFC][PATCH 06/12] perf: Use task_ctx_sched_out() Peter Zijlstra
2016-01-11 16:25 ` [RFC][PATCH 07/12] perf: Simplify/fix perf_event_enable() event scheduling Peter Zijlstra
2016-03-08 10:04 ` James Morse
2016-03-08 10:26 ` Peter Zijlstra
2016-03-08 10:42 ` James Morse
2016-03-08 11:29 ` Peter Zijlstra
2016-01-11 16:25 ` [RFC][PATCH 08/12] perf: Optimize perf_sched_events usage Peter Zijlstra
2016-01-11 16:25 ` [RFC][PATCH 09/12] perf: Make ctx->is_active and cpuctx->task_ctx consistent Peter Zijlstra
2016-01-11 16:25 ` [RFC][PATCH 10/12] perf: Fix task context scheduling Peter Zijlstra
2016-01-11 16:25 ` [RFC][PATCH 11/12] perf: Specialize perf_event_exit_task() Peter Zijlstra
2016-01-11 16:25 ` Peter Zijlstra [this message]
2016-01-13 10:50 ` [RFC][PATCH 12/12] perf: Collapse and fix event_function_call() users Peter Zijlstra
2016-01-13 13:46 ` Alexander Shishkin
2016-01-13 17:30 ` Peter Zijlstra
2016-01-13 15:00 ` Alexander Shishkin
2016-01-13 15:38 ` Alexander Shishkin
2016-01-13 18:10 ` Peter Zijlstra
2016-01-13 20:44 ` Peter Zijlstra
2016-01-14 10:44 ` Peter Zijlstra
2016-01-14 16:30 ` Peter Zijlstra
2016-02-17 22:38 ` Sasha Levin
2016-02-18 7:46 ` Peter Zijlstra
2016-02-18 12:37 ` Peter Zijlstra
2016-01-11 18:44 ` [RFC][PATCH 00/12] various perf fixes Dmitry Vyukov
2016-01-11 19:56 ` Andi Kleen
2016-01-11 22:00 ` Peter Zijlstra
2016-01-12 9:59 ` Ingo Molnar
2016-01-12 10:11 ` Ingo Molnar
2016-01-12 10:57 ` Dmitry Vyukov
2016-01-12 11:00 ` Dmitry Vyukov
2016-01-12 11:01 ` Dmitry Vyukov
2016-01-12 11:26 ` Dmitry Vyukov
2016-01-12 11:35 ` Dmitry Vyukov
2016-01-12 12:01 ` Peter Zijlstra
2016-01-13 15:18 ` Alexander Shishkin
2016-01-13 15:22 ` Dmitry Vyukov
2016-01-13 15:35 ` Alexander Shishkin
2016-01-14 9:35 ` Peter Zijlstra
2016-01-14 10:05 ` Dmitry Vyukov
2016-02-15 15:04 ` Dmitry Vyukov
2016-02-15 15:38 ` Peter Zijlstra
2016-02-15 15:47 ` Dmitry Vyukov
2016-02-15 16:01 ` Vince Weaver
2016-02-15 16:17 ` Peter Zijlstra
2016-02-15 16:29 ` Dmitry Vyukov
2016-02-15 16:29 ` Peter Zijlstra
2016-02-15 16:35 ` Dmitry Vyukov
2016-02-15 16:38 ` Dmitry Vyukov
2016-02-15 16:52 ` Peter Zijlstra
2016-02-15 17:04 ` Dmitry Vyukov
2016-02-15 17:07 ` Peter Zijlstra
2016-02-15 17:45 ` Dmitry Vyukov
2016-02-15 18:01 ` Peter Zijlstra
2016-02-15 18:33 ` Dmitry Vyukov
2016-02-15 16:41 ` Peter Zijlstra
2016-02-15 16:54 ` Dmitry Vyukov
2016-02-15 16:59 ` Peter Zijlstra
2016-01-12 13:13 ` Peter Zijlstra
2016-01-12 13:31 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160111163229.411314288@infradead.org \
--to=peterz@infradead.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=andi@firstfloor.org \
--cc=dvyukov@google.com \
--cc=eranian@google.com \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=vince@deater.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.