From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: mingo@redhat.com, hpa@zytor.com, eranian@google.com,
linux-kernel@vger.kernel.org, tglx@linutronix.de, mingo@elte.hu
Cc: linux-tip-commits@vger.kernel.org
Subject: Re: [tip:perf/core] perf: Add cgroup support
Date: Wed, 16 Feb 2011 17:57:32 +0100 [thread overview]
Message-ID: <1297875452.2413.453.camel@twins> (raw)
In-Reply-To: <tip-e5d1367f17ba6a6fed5fd8b74e4d5720923e0c25@git.kernel.org>
On Wed, 2011-02-16 at 13:46 +0000, tip-bot for Stephane Eranian wrote:
> +static inline struct perf_cgroup *
> +perf_cgroup_from_task(struct task_struct *task)
> +{
> + return container_of(task_subsys_state(task, perf_subsys_id),
> + struct perf_cgroup, css);
> +}
===================================================
[ INFO: suspicious rcu_dereference_check() usage. ]
---------------------------------------------------
include/linux/cgroup.h:547 invoked rcu_dereference_check() without protection!
other info that might help us debug this:
rcu_scheduler_active = 1, debug_locks = 1
1 lock held by perf/1774:
#0: (&ctx->lock){......}, at: [<ffffffff810afb91>] ctx_sched_in+0x2a/0x37b
stack backtrace:
Pid: 1774, comm: perf Not tainted 2.6.38-rc5-tip+ #94017
Call Trace:
[<ffffffff81070932>] ? lockdep_rcu_dereference+0x9d/0xa5
[<ffffffff810afc4e>] ? ctx_sched_in+0xe7/0x37b
[<ffffffff810aff37>] ? perf_event_context_sched_in+0x55/0xa3
[<ffffffff810b0203>] ? __perf_event_task_sched_in+0x20/0x5b
[<ffffffff81035714>] ? finish_task_switch+0x49/0xf4
[<ffffffff81340d60>] ? schedule+0x9cc/0xa85
[<ffffffff8110a84c>] ? vfsmount_lock_global_unlock_online+0x9e/0xb0
[<ffffffff8110b556>] ? mntput_no_expire+0x4e/0xc1
[<ffffffff8110b5ef>] ? mntput+0x26/0x28
[<ffffffff810f2add>] ? fput+0x1a0/0x1af
[<ffffffff81002eb9>] ? int_careful+0xb/0x2c
[<ffffffff813432bf>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[<ffffffff81002ec7>] ? int_careful+0x19/0x2c
The simple fix seemed to be to add:
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a0a6987..e739e6f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -204,7 +204,8 @@ __get_cpu_context(struct perf_event_context *ctx)
static inline struct perf_cgroup *
perf_cgroup_from_task(struct task_struct *task)
{
- return container_of(task_subsys_state(task, perf_subsys_id),
+ return container_of(task_subsys_state_check(task, perf_subsys_id,
+ lockdep_is_held(&ctx->lock)),
struct perf_cgroup, css);
}
For all callers _should_ hold ctx->lock and ctx->lock is acquired during
->attach/->exit so holding that lock will pin the cgroup.
However, not all update_context_time()/update_cgrp_time_from_event()
callers actually hold ctx->lock, which is a bug because that lock also
serializes the timestamps.
Most notably, task_clock_event_read(), which leads us to:
@@ -5794,9 +5795,14 @@ static void task_clock_event_read(struct perf_event *event)
u64 time;
if (!in_nmi()) {
- update_context_time(event->ctx);
+ struct perf_event_context *ctx = event->ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ update_context_time(ctx);
update_cgrp_time_from_event(event);
- time = event->ctx->time;
+ time = ctx->time;
+ spin_unlock_irqrestore(&ctx->lock, flags);
} else {
u64 now = perf_clock();
u64 delta = now - event->ctx->timestamp;
I then realized that the events themselves pin the cgroup, so its all
cosmetic at best, but then I already had the below patch...
Thoughts?
---
kernel/perf_event.c | 30 ++++++++++++++++++------------
1 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a0a6987..810ee49 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -202,9 +202,10 @@ __get_cpu_context(struct perf_event_context *ctx)
#ifdef CONFIG_CGROUP_PERF
static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task)
+perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
{
- return container_of(task_subsys_state(task, perf_subsys_id),
+ return container_of(task_subsys_state_check(task, perf_subsys_id,
+ lockdep_is_held(&ctx->lock)),
struct perf_cgroup, css);
}
@@ -268,7 +269,7 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
static inline void update_cgrp_time_from_event(struct perf_event *event)
{
- struct perf_cgroup *cgrp = perf_cgroup_from_task(current);
+ struct perf_cgroup *cgrp = perf_cgroup_from_task(current, event->ctx);
/*
* do not update time when cgroup is not active
*/
@@ -279,7 +280,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
}
static inline void
-perf_cgroup_set_timestamp(struct task_struct *task, u64 now)
+perf_cgroup_set_timestamp(struct task_struct *task, struct perf_event_context *ctx)
{
struct perf_cgroup *cgrp;
struct perf_cgroup_info *info;
@@ -287,9 +288,9 @@ perf_cgroup_set_timestamp(struct task_struct *task, u64 now)
if (!task)
return;
- cgrp = perf_cgroup_from_task(task);
+ cgrp = perf_cgroup_from_task(task, ctx);
info = this_cpu_ptr(cgrp->info);
- info->timestamp = now;
+ info->timestamp = ctx->timestamp;
}
#define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */
@@ -349,7 +350,7 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
* allow event_filter_match() to not
* have to pass task around
*/
- cpuctx->cgrp = perf_cgroup_from_task(task);
+ cpuctx->cgrp = perf_cgroup_from_task(task, &cpuctx->ctx);
cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
}
}
@@ -494,7 +495,7 @@ static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
}
static inline void
-perf_cgroup_set_timestamp(struct task_struct *task, u64 now)
+perf_cgroup_set_timestamp(struct task_struct *task, struct perf_event_context *ctx)
{
}
@@ -1613,7 +1614,7 @@ static int __perf_event_enable(void *info)
/*
* set current task's cgroup time reference point
*/
- perf_cgroup_set_timestamp(current, perf_clock());
+ perf_cgroup_set_timestamp(current, ctx);
__perf_event_mark_enabled(event, ctx);
@@ -2048,7 +2049,7 @@ ctx_sched_in(struct perf_event_context *ctx,
now = perf_clock();
ctx->timestamp = now;
- perf_cgroup_set_timestamp(task, now);
+ perf_cgroup_set_timestamp(task, ctx);
/*
* First go through the list and put on any pinned groups
* in order to give them the best chance of going on.
@@ -5794,9 +5795,14 @@ static void task_clock_event_read(struct perf_event *event)
u64 time;
if (!in_nmi()) {
- update_context_time(event->ctx);
+ struct perf_event_context *ctx = event->ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ update_context_time(ctx);
update_cgrp_time_from_event(event);
- time = event->ctx->time;
+ time = ctx->time;
+ spin_unlock_irqrestore(&ctx->lock, flags);
} else {
u64 now = perf_clock();
u64 delta = now - event->ctx->timestamp;
next prev parent reply other threads:[~2011-02-16 16:57 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-02-14 9:20 [PATCH 1/2] perf_events: add cgroup support (v9) Stephane Eranian
2011-02-15 14:55 ` Peter Zijlstra
2011-02-15 15:01 ` stephane eranian
2011-02-16 13:46 ` [tip:perf/core] perf: Add cgroup support tip-bot for Stephane Eranian
2011-02-16 16:57 ` Peter Zijlstra [this message]
2011-02-17 11:16 ` Stephane Eranian
2011-02-17 11:36 ` Peter Zijlstra
2011-02-17 14:45 ` Stephane Eranian
2011-02-17 15:50 ` Peter Zijlstra
2011-02-17 16:01 ` Stephane Eranian
2011-02-17 16:05 ` Peter Zijlstra
2011-02-17 16:13 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1297875452.2413.453.camel@twins \
--to=a.p.zijlstra@chello.nl \
--cc=eranian@google.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=mingo@redhat.com \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox