From: Peter Zijlstra <peterz@infradead.org>
To: Jiri Olsa <jolsa@redhat.com>
Cc: Vince Weaver <vince@deater.net>, Ingo Molnar <mingo@redhat.com>,
Andi Kleen <ak@linux.intel.com>,
linux-kernel@vger.kernel.org, mark.rutland@arm.com,
Linus Torvalds <torvalds@linux-foundation.org>
Subject: Re: perf fuzzer crash [PATCH] perf: Get group events reference before moving the group
Date: Fri, 16 Jan 2015 15:11:04 +0100 [thread overview]
Message-ID: <20150116141104.GD21958@worktop.programming.kicks-ass.net> (raw)
In-Reply-To: <20150116104644.GW23965@worktop.programming.kicks-ass.net>
On Fri, Jan 16, 2015 at 11:46:44AM +0100, Peter Zijlstra wrote:
> Its a bandaid at best :/ The problem is (again) that we changes
> event->ctx without any kind of serialization.
>
> The issue came up before:
>
> https://lkml.org/lkml/2014/9/5/397
>
> and I've not been able to come up with anything much saner.
A little something like the below is the best I could come up with; I
know Linus hated it, but I figure we ought to do something to stop
crashing.
---
init/Kconfig | 1 +
kernel/events/core.c | 126 ++++++++++++++++++++++++++++++++++++++++----------
2 files changed, 103 insertions(+), 24 deletions(-)
diff --git a/init/Kconfig b/init/Kconfig
index 9afb971..ebc8522 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1595,6 +1595,7 @@ config PERF_EVENTS
depends on HAVE_PERF_EVENTS
select ANON_INODES
select IRQ_WORK
+ select PERCPU_RWSEM
help
Enable kernel support for various performance events provided
by software and hardware.
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c10124b..fb3971d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -42,6 +42,7 @@
#include <linux/module.h>
#include <linux/mman.h>
#include <linux/compat.h>
+#include <linux/percpu-rwsem.h>
#include "internal.h"
@@ -122,6 +123,42 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
return data.ret;
}
+/*
+ * Required to migrate events between contexts.
+ *
+ * Migrating events between contexts is rather tricky; there is no real
+ * serialization around the perf_event::ctx pointer.
+ *
+ * So what we do is hold this rwsem over the remove_from_context and
+ * install_in_context. The remove_from_context ensures the event is inactive
+ * and will not be used from IRQ/NMI context anymore, and the remaining
+ * sites can acquire the rwsem read side.
+ */
+static struct percpu_rw_semaphore perf_rwsem;
+
+static inline struct perf_event_context *perf_event_ctx(struct perf_event *event)
+{
+#ifdef CONFIG_LOCKDEP
+ /*
+ * Assert the locking rules outlined above; in order to dereference
+ * event->ctx we must either be attached to the context or hold
+ * perf_rwsem.
+ *
+ * XXX not usable from IPIs because the lockdep held lock context
+ * will be wrong; maybe add trylock variants to the percpu_rw_semaphore
+ */
+ WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_CONTEXT) ||
+ (debug_locks && !lockdep_is_held(&perf_rwsem.rw_sem)));
+#endif
+
+ return event->ctx;
+}
+
+static inline struct perf_event_context *__perf_event_ctx(struct perf_event *event)
+{
+ return event->ctx;
+}
+
#define EVENT_OWNER_KERNEL ((void *) -1)
static bool is_kernel_event(struct perf_event *event)
@@ -380,7 +417,7 @@ perf_cgroup_from_task(struct task_struct *task)
static inline bool
perf_cgroup_match(struct perf_event *event)
{
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx = __perf_event_ctx(event);
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
/* @event doesn't care about cgroup */
@@ -1054,7 +1091,7 @@ static void update_context_time(struct perf_event_context *ctx)
static u64 perf_event_time(struct perf_event *event)
{
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx = __perf_event_ctx(event);
if (is_cgroup_event(event))
return perf_cgroup_event_time(event);
@@ -1068,7 +1105,7 @@ static u64 perf_event_time(struct perf_event *event)
*/
static void update_event_times(struct perf_event *event)
{
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx = __perf_event_ctx(event);
u64 run_end;
if (event->state < PERF_EVENT_STATE_INACTIVE ||
@@ -1518,7 +1555,7 @@ static int __perf_remove_from_context(void *info)
{
struct remove_event *re = info;
struct perf_event *event = re->event;
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx = __perf_event_ctx(event);
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
raw_spin_lock(&ctx->lock);
@@ -1551,7 +1588,7 @@ static int __perf_remove_from_context(void *info)
*/
static void perf_remove_from_context(struct perf_event *event, bool detach_group)
{
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx = perf_event_ctx(event);
struct task_struct *task = ctx->task;
struct remove_event re = {
.event = event,
@@ -1606,7 +1643,7 @@ retry:
int __perf_event_disable(void *info)
{
struct perf_event *event = info;
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx = __perf_event_ctx(event);
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
/*
@@ -1656,20 +1693,24 @@ int __perf_event_disable(void *info)
*/
void perf_event_disable(struct perf_event *event)
{
- struct perf_event_context *ctx = event->ctx;
- struct task_struct *task = ctx->task;
+ struct perf_event_context *ctx;
+ struct task_struct *task;
+
+ percpu_down_read(&perf_rwsem);
+ ctx = perf_event_ctx(event);
+ task = ctx->task;
if (!task) {
/*
* Disable the event on the cpu that it's on
*/
cpu_function_call(event->cpu, __perf_event_disable, event);
- return;
+ goto unlock;
}
retry:
if (!task_function_call(task, __perf_event_disable, event))
- return;
+ goto unlock;
raw_spin_lock_irq(&ctx->lock);
/*
@@ -1694,6 +1735,8 @@ retry:
event->state = PERF_EVENT_STATE_OFF;
}
raw_spin_unlock_irq(&ctx->lock);
+unlock:
+ percpu_up_read(&perf_rwsem);
}
EXPORT_SYMBOL_GPL(perf_event_disable);
@@ -1937,7 +1980,7 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
static int __perf_install_in_context(void *info)
{
struct perf_event *event = info;
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx = __perf_event_ctx(event);
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
struct perf_event_context *task_ctx = cpuctx->task_ctx;
struct task_struct *task = current;
@@ -2076,7 +2119,7 @@ static void __perf_event_mark_enabled(struct perf_event *event)
static int __perf_event_enable(void *info)
{
struct perf_event *event = info;
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx = __perf_event_ctx(event);
struct perf_event *leader = event->group_leader;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
int err;
@@ -2160,15 +2203,19 @@ unlock:
*/
void perf_event_enable(struct perf_event *event)
{
- struct perf_event_context *ctx = event->ctx;
- struct task_struct *task = ctx->task;
+ struct perf_event_context *ctx;
+ struct task_struct *task;
+
+ percpu_down_read(&perf_rwsem);
+ ctx = perf_event_ctx(event);
+ task = ctx->task;
if (!task) {
/*
* Enable the event on the cpu that it's on
*/
cpu_function_call(event->cpu, __perf_event_enable, event);
- return;
+ goto unlock;
}
raw_spin_lock_irq(&ctx->lock);
@@ -2194,7 +2241,7 @@ retry:
raw_spin_unlock_irq(&ctx->lock);
if (!task_function_call(task, __perf_event_enable, event))
- return;
+ goto unlock;
raw_spin_lock_irq(&ctx->lock);
@@ -2213,6 +2260,8 @@ retry:
out:
raw_spin_unlock_irq(&ctx->lock);
+unlock:
+ percpu_up_read(&perf_rwsem);
}
EXPORT_SYMBOL_GPL(perf_event_enable);
@@ -3076,7 +3125,7 @@ void perf_event_exec(void)
static void __perf_event_read(void *info)
{
struct perf_event *event = info;
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx = __perf_event_ctx(event);
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
/*
@@ -3115,7 +3164,7 @@ static u64 perf_event_read(struct perf_event *event)
smp_call_function_single(event->oncpu,
__perf_event_read, event, 1);
} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx = perf_event_ctx(event);
unsigned long flags;
raw_spin_lock_irqsave(&ctx->lock, flags);
@@ -3440,7 +3489,7 @@ static void perf_remove_from_owner(struct perf_event *event)
*/
static void put_event(struct perf_event *event)
{
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx;
if (!atomic_long_dec_and_test(&event->refcount))
return;
@@ -3448,6 +3497,8 @@ static void put_event(struct perf_event *event)
if (!is_kernel_event(event))
perf_remove_from_owner(event);
+ percpu_down_read(&perf_rwsem);
+ ctx = perf_event_ctx(event);
WARN_ON_ONCE(ctx->parent_ctx);
/*
* There are two ways this annotation is useful:
@@ -3464,6 +3515,7 @@ static void put_event(struct perf_event *event)
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
perf_remove_from_context(event, true);
mutex_unlock(&ctx->mutex);
+ percpu_up_read(&perf_rwsem);
_free_event(event);
}
@@ -3647,11 +3699,13 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
if (count < event->read_size)
return -ENOSPC;
- WARN_ON_ONCE(event->ctx->parent_ctx);
+ percpu_down_read(&perf_rwsem);
+ WARN_ON_ONCE(perf_event_ctx(event)->parent_ctx);
if (read_format & PERF_FORMAT_GROUP)
ret = perf_event_read_group(event, read_format, buf);
else
ret = perf_event_read_one(event, read_format, buf);
+ percpu_up_read(&perf_rwsem);
return ret;
}
@@ -3689,9 +3743,11 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
static void perf_event_reset(struct perf_event *event)
{
+ percpu_down_read(&perf_rwsem);
(void)perf_event_read(event);
local64_set(&event->count, 0);
perf_event_update_userpage(event);
+ percpu_up_read(&perf_rwsem);
}
/*
@@ -3705,7 +3761,7 @@ static void perf_event_for_each_child(struct perf_event *event,
{
struct perf_event *child;
- WARN_ON_ONCE(event->ctx->parent_ctx);
+ WARN_ON_ONCE(__perf_event_ctx(event)->parent_ctx);
mutex_lock(&event->child_mutex);
func(event);
list_for_each_entry(child, &event->child_list, child_list)
@@ -3716,6 +3772,14 @@ static void perf_event_for_each_child(struct perf_event *event,
static void perf_event_for_each(struct perf_event *event,
void (*func)(struct perf_event *))
{
+ /*
+ * XXX broken
+ *
+ * lock inversion and recursion issues; ctx->mutex must nest inside
+ * perf_rwsem, but func() will take perf_rwsem again.
+ *
+ * Cure with ugly.
+ */
struct perf_event_context *ctx = event->ctx;
struct perf_event *sibling;
@@ -3731,7 +3795,7 @@ static void perf_event_for_each(struct perf_event *event,
static int perf_event_period(struct perf_event *event, u64 __user *arg)
{
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx;
int ret = 0, active;
u64 value;
@@ -3744,6 +3808,8 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
if (!value)
return -EINVAL;
+ percpu_down_read(&perf_rwsem);
+ ctx = perf_event_ctx(event);
raw_spin_lock_irq(&ctx->lock);
if (event->attr.freq) {
if (value > sysctl_perf_event_sample_rate) {
@@ -3772,6 +3838,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
unlock:
raw_spin_unlock_irq(&ctx->lock);
+ percpu_up_read(&perf_rwsem);
return ret;
}
@@ -7229,11 +7296,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
if (output_event->cpu != event->cpu)
goto out;
+ percpu_down_read(&perf_rwsem);
/*
* If its not a per-cpu rb, it must be the same task.
*/
- if (output_event->cpu == -1 && output_event->ctx != event->ctx)
- goto out;
+ if (output_event->cpu == -1 &&
+ perf_event_ctx(output_event) != perf_event_ctx(event))
+ goto unlock_rwsem;
set:
mutex_lock(&event->mmap_mutex);
@@ -7253,6 +7322,8 @@ set:
ret = 0;
unlock:
mutex_unlock(&event->mmap_mutex);
+unlock_rwsem:
+ percpu_up_read(&perf_rwsem);
out:
return ret;
@@ -7461,6 +7532,7 @@ SYSCALL_DEFINE5(perf_event_open,
if (move_group) {
struct perf_event_context *gctx = group_leader->ctx;
+ percpu_down_write(&perf_rwsem);
mutex_lock(&gctx->mutex);
perf_remove_from_context(group_leader, false);
@@ -7498,6 +7570,9 @@ SYSCALL_DEFINE5(perf_event_open,
perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);
+ if (move_group)
+ percpu_up_write(&perf_rwsem);
+
put_online_cpus();
event->owner = current;
@@ -7600,6 +7675,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
struct perf_event *event, *tmp;
LIST_HEAD(events);
+ percpu_down_write(&perf_rwsem);
src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
@@ -7625,6 +7701,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
get_ctx(dst_ctx);
}
mutex_unlock(&dst_ctx->mutex);
+ percpu_up_write(&perf_rwsem);
}
EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
@@ -8261,6 +8338,7 @@ void __init perf_event_init(void)
idr_init(&pmu_idr);
+ percpu_init_rwsem(&perf_rwsem);
perf_event_init_all_cpus();
init_srcu_struct(&pmus_srcu);
perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
next prev parent reply other threads:[~2015-01-16 14:11 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-01-16 7:57 perf fuzzer crash [PATCH] perf: Get group events reference before moving the group Jiri Olsa
2015-01-16 10:46 ` Peter Zijlstra
2015-01-16 14:11 ` Peter Zijlstra [this message]
2015-01-16 18:54 ` Vince Weaver
2015-01-19 3:49 ` Vince Weaver
2015-01-18 14:13 ` Ingo Molnar
2015-01-19 14:40 ` Mark Rutland
2015-01-19 17:40 ` Mark Rutland
2015-01-20 13:39 ` Mark Rutland
2015-01-20 14:35 ` Mark Rutland
2015-01-21 1:00 ` Paul E. McKenney
2015-01-21 12:08 ` Mark Rutland
2015-01-21 20:07 ` Paul E. McKenney
-- strict thread matches above, loose matches on Subject: below --
2015-01-19 18:09 Vince Weaver
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150116141104.GD21958@worktop.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=ak@linux.intel.com \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=torvalds@linux-foundation.org \
--cc=vince@deater.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.