public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: mingo@redhat.com, hpa@zytor.com, eranian@google.com,
	linux-kernel@vger.kernel.org, tglx@linutronix.de, mingo@elte.hu
Cc: linux-tip-commits@vger.kernel.org
Subject: Re: [tip:perf/core] perf: Add cgroup support
Date: Wed, 16 Feb 2011 17:57:32 +0100	[thread overview]
Message-ID: <1297875452.2413.453.camel@twins> (raw)
In-Reply-To: <tip-e5d1367f17ba6a6fed5fd8b74e4d5720923e0c25@git.kernel.org>

On Wed, 2011-02-16 at 13:46 +0000, tip-bot for Stephane Eranian wrote:
> +static inline struct perf_cgroup *
> +perf_cgroup_from_task(struct task_struct *task)
> +{
> +       return container_of(task_subsys_state(task, perf_subsys_id),
> +                       struct perf_cgroup, css);
> +} 

===================================================
[ INFO: suspicious rcu_dereference_check() usage. ]
---------------------------------------------------
include/linux/cgroup.h:547 invoked rcu_dereference_check() without protection!
other info that might help us debug this:
rcu_scheduler_active = 1, debug_locks = 1
1 lock held by perf/1774:
 #0:  (&ctx->lock){......}, at: [<ffffffff810afb91>] ctx_sched_in+0x2a/0x37b
stack backtrace:
Pid: 1774, comm: perf Not tainted 2.6.38-rc5-tip+ #94017
Call Trace:
 [<ffffffff81070932>] ? lockdep_rcu_dereference+0x9d/0xa5
 [<ffffffff810afc4e>] ? ctx_sched_in+0xe7/0x37b
 [<ffffffff810aff37>] ? perf_event_context_sched_in+0x55/0xa3
 [<ffffffff810b0203>] ? __perf_event_task_sched_in+0x20/0x5b
 [<ffffffff81035714>] ? finish_task_switch+0x49/0xf4
 [<ffffffff81340d60>] ? schedule+0x9cc/0xa85
 [<ffffffff8110a84c>] ? vfsmount_lock_global_unlock_online+0x9e/0xb0
 [<ffffffff8110b556>] ? mntput_no_expire+0x4e/0xc1
 [<ffffffff8110b5ef>] ? mntput+0x26/0x28
 [<ffffffff810f2add>] ? fput+0x1a0/0x1af
 [<ffffffff81002eb9>] ? int_careful+0xb/0x2c
 [<ffffffff813432bf>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff81002ec7>] ? int_careful+0x19/0x2c


The simple fix seemed to be to add:

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a0a6987..e739e6f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -204,7 +204,8 @@ __get_cpu_context(struct perf_event_context *ctx)
 static inline struct perf_cgroup *
 perf_cgroup_from_task(struct task_struct *task)
 {
-	return container_of(task_subsys_state(task, perf_subsys_id),
+	return container_of(task_subsys_state_check(task, perf_subsys_id,
+				lockdep_is_held(&ctx->lock)),
 			struct perf_cgroup, css);
 }

For all callers _should_ hold ctx->lock and ctx->lock is acquired during
->attach/->exit so holding that lock will pin the cgroup.

However, not all update_context_time()/update_cgrp_time_from_event()
callers actually hold ctx->lock, which is a bug because that lock also
serializes the timestamps.

Most notably, task_clock_event_read(), which leads us to:

@@ -5794,9 +5795,14 @@ static void task_clock_event_read(struct perf_event *event)
        u64 time;
 
        if (!in_nmi()) {
-               update_context_time(event->ctx);
+               struct perf_event_context *ctx = event->ctx;
+               unsigned long flags;
+
+               spin_lock_irqsave(&ctx->lock, flags);
+               update_context_time(ctx);
                update_cgrp_time_from_event(event);
-               time = event->ctx->time;
+               time = ctx->time;
+               spin_unlock_irqrestore(&ctx->lock, flags);
        } else {
                u64 now = perf_clock();
                u64 delta = now - event->ctx->timestamp;


I then realized that the events themselves pin the cgroup, so its all
cosmetic at best, but then I already had the below patch...

Thoughts?

---
 kernel/perf_event.c |   30 ++++++++++++++++++------------
 1 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a0a6987..810ee49 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -202,9 +202,10 @@ __get_cpu_context(struct perf_event_context *ctx)
 #ifdef CONFIG_CGROUP_PERF
 
 static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task)
+perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
 {
-	return container_of(task_subsys_state(task, perf_subsys_id),
+	return container_of(task_subsys_state_check(task, perf_subsys_id,
+				lockdep_is_held(&ctx->lock)),
 			struct perf_cgroup, css);
 }
 
@@ -268,7 +269,7 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
 
 static inline void update_cgrp_time_from_event(struct perf_event *event)
 {
-	struct perf_cgroup *cgrp = perf_cgroup_from_task(current);
+	struct perf_cgroup *cgrp = perf_cgroup_from_task(current, event->ctx);
 	/*
 	 * do not update time when cgroup is not active
 	 */
@@ -279,7 +280,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
 }
 
 static inline void
-perf_cgroup_set_timestamp(struct task_struct *task, u64 now)
+perf_cgroup_set_timestamp(struct task_struct *task, struct perf_event_context *ctx)
 {
 	struct perf_cgroup *cgrp;
 	struct perf_cgroup_info *info;
@@ -287,9 +288,9 @@ perf_cgroup_set_timestamp(struct task_struct *task, u64 now)
 	if (!task)
 		return;
 
-	cgrp = perf_cgroup_from_task(task);
+	cgrp = perf_cgroup_from_task(task, ctx);
 	info = this_cpu_ptr(cgrp->info);
-	info->timestamp = now;
+	info->timestamp = ctx->timestamp;
 }
 
 #define PERF_CGROUP_SWOUT	0x1 /* cgroup switch out every event */
@@ -349,7 +350,7 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 				 * allow event_filter_match() to not
 				 * have to pass task around
 				 */
-				cpuctx->cgrp = perf_cgroup_from_task(task);
+				cpuctx->cgrp = perf_cgroup_from_task(task, &cpuctx->ctx);
 				cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
 			}
 		}
@@ -494,7 +495,7 @@ static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
 }
 
 static inline void
-perf_cgroup_set_timestamp(struct task_struct *task, u64 now)
+perf_cgroup_set_timestamp(struct task_struct *task, struct perf_event_context *ctx)
 {
 }
 
@@ -1613,7 +1614,7 @@ static int __perf_event_enable(void *info)
 	/*
 	 * set current task's cgroup time reference point
 	 */
-	perf_cgroup_set_timestamp(current, perf_clock());
+	perf_cgroup_set_timestamp(current, ctx);
 
 	__perf_event_mark_enabled(event, ctx);
 
@@ -2048,7 +2049,7 @@ ctx_sched_in(struct perf_event_context *ctx,
 
 	now = perf_clock();
 	ctx->timestamp = now;
-	perf_cgroup_set_timestamp(task, now);
+	perf_cgroup_set_timestamp(task, ctx);
 	/*
 	 * First go through the list and put on any pinned groups
 	 * in order to give them the best chance of going on.
@@ -5794,9 +5795,14 @@ static void task_clock_event_read(struct perf_event *event)
 	u64 time;
 
 	if (!in_nmi()) {
-		update_context_time(event->ctx);
+		struct perf_event_context *ctx = event->ctx;
+		unsigned long flags;
+
+		spin_lock_irqsave(&ctx->lock, flags);
+		update_context_time(ctx);
 		update_cgrp_time_from_event(event);
-		time = event->ctx->time;
+		time = ctx->time;
+		spin_unlock_irqrestore(&ctx->lock, flags);
 	} else {
 		u64 now = perf_clock();
 		u64 delta = now - event->ctx->timestamp;


  reply	other threads:[~2011-02-16 16:57 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-14  9:20 [PATCH 1/2] perf_events: add cgroup support (v9) Stephane Eranian
2011-02-15 14:55 ` Peter Zijlstra
2011-02-15 15:01   ` stephane eranian
2011-02-16 13:46 ` [tip:perf/core] perf: Add cgroup support tip-bot for Stephane Eranian
2011-02-16 16:57   ` Peter Zijlstra [this message]
2011-02-17 11:16     ` Stephane Eranian
2011-02-17 11:36       ` Peter Zijlstra
2011-02-17 14:45         ` Stephane Eranian
2011-02-17 15:50           ` Peter Zijlstra
2011-02-17 16:01             ` Stephane Eranian
2011-02-17 16:05               ` Peter Zijlstra
2011-02-17 16:13             ` Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1297875452.2413.453.camel@twins \
    --to=a.p.zijlstra@chello.nl \
    --cc=eranian@google.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox