* [PATCH 1/6] percpu: {get,put}_cpu_ptr
2010-09-17 9:28 [PATCH 0/6] Various perf fixes Peter Zijlstra
@ 2010-09-17 9:28 ` Peter Zijlstra
2010-09-19 15:09 ` Tejun Heo
2010-09-21 14:13 ` [tip:perf/core] percpu: Add {get,put}_cpu_ptr tip-bot for Peter Zijlstra
2010-09-17 9:28 ` [PATCH 2/6] perf: Avoid RCU vs preemption assumptions Peter Zijlstra
` (4 subsequent siblings)
5 siblings, 2 replies; 14+ messages in thread
From: Peter Zijlstra @ 2010-09-17 9:28 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Stephane Eranian, Robert Richter, Tejun Heo,
Peter Zijlstra
[-- Attachment #1: percpu-get-cpu-ptr.patch --]
[-- Type: text/plain, Size: 769 bytes --]
Similar to {get,put}_cpu_var() except for dynamically allocated per-cpu memory.
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/percpu.h | 9 +++++++++
1 file changed, 9 insertions(+)
Index: linux-2.6/include/linux/percpu.h
===================================================================
--- linux-2.6.orig/include/linux/percpu.h
+++ linux-2.6/include/linux/percpu.h
@@ -39,6 +39,15 @@
preempt_enable(); \
} while (0)
+#define get_cpu_ptr(var) ({ \
+ preempt_disable(); \
+ this_cpu_ptr(var); })
+
+#define put_cpu_ptr(var) do { \
+ (void)(var); \
+ preempt_enable(); \
+} while (0)
+
#ifdef CONFIG_SMP
/* minimum unit size, also is the maximum supported allocation size */
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH 1/6] percpu: {get,put}_cpu_ptr
2010-09-17 9:28 ` [PATCH 1/6] percpu: {get,put}_cpu_ptr Peter Zijlstra
@ 2010-09-19 15:09 ` Tejun Heo
2010-09-21 14:13 ` [tip:perf/core] percpu: Add {get,put}_cpu_ptr tip-bot for Peter Zijlstra
1 sibling, 0 replies; 14+ messages in thread
From: Tejun Heo @ 2010-09-19 15:09 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, linux-kernel, Stephane Eranian, Robert Richter
On 09/17/2010 11:28 AM, Peter Zijlstra wrote:
> Similar to {get,put}_cpu_var() except for dynamically allocated per-cpu memory.
>
> Cc: Tejun Heo <tj@kernel.org>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Tejun Heo <tj@kernel.org>
How do you want to route this? Given that perf is the only user at
this point, feel free to push this with other perf changes.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 14+ messages in thread* [tip:perf/core] percpu: Add {get,put}_cpu_ptr
2010-09-17 9:28 ` [PATCH 1/6] percpu: {get,put}_cpu_ptr Peter Zijlstra
2010-09-19 15:09 ` Tejun Heo
@ 2010-09-21 14:13 ` tip-bot for Peter Zijlstra
1 sibling, 0 replies; 14+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-09-21 14:13 UTC (permalink / raw)
To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, a.p.zijlstra, tj, tglx, mingo
Commit-ID: 8b8e2ec1eeca7f6941bc81cefc9663018d6ceb57
Gitweb: http://git.kernel.org/tip/8b8e2ec1eeca7f6941bc81cefc9663018d6ceb57
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Thu, 16 Sep 2010 19:21:28 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 21 Sep 2010 13:55:43 +0200
percpu: Add {get,put}_cpu_ptr
These are similar to {get,put}_cpu_var() except for dynamically
allocated per-cpu memory.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Tejun Heo <tj@kernel.org>
LKML-Reference: <20100917093009.252867712@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/percpu.h | 9 +++++++++
1 files changed, 9 insertions(+), 0 deletions(-)
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 49466b1..0eb5083 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -39,6 +39,15 @@
preempt_enable(); \
} while (0)
+#define get_cpu_ptr(var) ({ \
+ preempt_disable(); \
+ this_cpu_ptr(var); })
+
+#define put_cpu_ptr(var) do { \
+ (void)(var); \
+ preempt_enable(); \
+} while (0)
+
#ifdef CONFIG_SMP
/* minimum unit size, also is the maximum supported allocation size */
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/6] perf: Avoid RCU vs preemption assumptions
2010-09-17 9:28 [PATCH 0/6] Various perf fixes Peter Zijlstra
2010-09-17 9:28 ` [PATCH 1/6] percpu: {get,put}_cpu_ptr Peter Zijlstra
@ 2010-09-17 9:28 ` Peter Zijlstra
2010-09-21 14:13 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-09-17 9:28 ` [PATCH 3/6] perf_events: Fix broken event grouping Peter Zijlstra
` (3 subsequent siblings)
5 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2010-09-17 9:28 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Stephane Eranian, Robert Richter, Tejun Heo,
Peter Zijlstra
[-- Attachment #1: perf-fix-preempt-iteration.patch --]
[-- Type: text/plain, Size: 2377 bytes --]
The per-pmu per-cpu context patch converted things from get_cpu_var() to
this_cpu_ptr(), but that only works if rcu_read_lock() actually disables
preemption, and since there is no such guarantee, we need to fix that.
Use the newly introduced {get,put}_cpu_ptr().
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/perf_event.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -3814,18 +3814,20 @@ static void perf_event_task_event(struct
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
perf_event_task_ctx(&cpuctx->ctx, task_event);
ctx = task_event->task_ctx;
if (!ctx) {
ctxn = pmu->task_ctx_nr;
if (ctxn < 0)
- continue;
+ goto next;
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
}
if (ctx)
perf_event_task_ctx(ctx, task_event);
+next:
+ put_cpu_ptr(pmu->pmu_cpu_context);
}
rcu_read_unlock();
}
@@ -3947,16 +3949,18 @@ static void perf_event_comm_event(struct
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
perf_event_comm_ctx(&cpuctx->ctx, comm_event);
ctxn = pmu->task_ctx_nr;
if (ctxn < 0)
- continue;
+ goto next;
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
if (ctx)
perf_event_comm_ctx(ctx, comm_event);
+next:
+ put_cpu_ptr(pmu->pmu_cpu_context);
}
rcu_read_unlock();
}
@@ -4130,19 +4134,21 @@ static void perf_event_mmap_event(struct
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
vma->vm_flags & VM_EXEC);
ctxn = pmu->task_ctx_nr;
if (ctxn < 0)
- continue;
+ goto next;
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
if (ctx) {
perf_event_mmap_ctx(ctx, mmap_event,
vma->vm_flags & VM_EXEC);
}
+next:
+ put_cpu_ptr(pmu->pmu_cpu_context);
}
rcu_read_unlock();
^ permalink raw reply [flat|nested] 14+ messages in thread* [tip:perf/core] perf: Avoid RCU vs preemption assumptions
2010-09-17 9:28 ` [PATCH 2/6] perf: Avoid RCU vs preemption assumptions Peter Zijlstra
@ 2010-09-21 14:13 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-09-21 14:13 UTC (permalink / raw)
To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, a.p.zijlstra, tj, tglx, mingo
Commit-ID: 41945f6ccf1e86f87fddf6b32db9cf431c05fb54
Gitweb: http://git.kernel.org/tip/41945f6ccf1e86f87fddf6b32db9cf431c05fb54
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Thu, 16 Sep 2010 19:17:24 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 21 Sep 2010 13:55:44 +0200
perf: Avoid RCU vs preemption assumptions
The per-pmu per-cpu context patch converted things from
get_cpu_var() to this_cpu_ptr(), but that only works if
rcu_read_lock() actually disables preemption, and since
there is no such guarantee, we need to fix that.
Use the newly introduced {get,put}_cpu_ptr().
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Tejun Heo <tj@kernel.org>
LKML-Reference: <20100917093009.308453028@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/perf_event.c | 18 ++++++++++++------
1 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index baae136..c16158c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3836,18 +3836,20 @@ static void perf_event_task_event(struct perf_task_event *task_event)
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
perf_event_task_ctx(&cpuctx->ctx, task_event);
ctx = task_event->task_ctx;
if (!ctx) {
ctxn = pmu->task_ctx_nr;
if (ctxn < 0)
- continue;
+ goto next;
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
}
if (ctx)
perf_event_task_ctx(ctx, task_event);
+next:
+ put_cpu_ptr(pmu->pmu_cpu_context);
}
rcu_read_unlock();
}
@@ -3969,16 +3971,18 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
perf_event_comm_ctx(&cpuctx->ctx, comm_event);
ctxn = pmu->task_ctx_nr;
if (ctxn < 0)
- continue;
+ goto next;
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
if (ctx)
perf_event_comm_ctx(ctx, comm_event);
+next:
+ put_cpu_ptr(pmu->pmu_cpu_context);
}
rcu_read_unlock();
}
@@ -4152,19 +4156,21 @@ got_name:
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
vma->vm_flags & VM_EXEC);
ctxn = pmu->task_ctx_nr;
if (ctxn < 0)
- continue;
+ goto next;
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
if (ctx) {
perf_event_mmap_ctx(ctx, mmap_event,
vma->vm_flags & VM_EXEC);
}
+next:
+ put_cpu_ptr(pmu->pmu_cpu_context);
}
rcu_read_unlock();
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 3/6] perf_events: Fix broken event grouping
2010-09-17 9:28 [PATCH 0/6] Various perf fixes Peter Zijlstra
2010-09-17 9:28 ` [PATCH 1/6] percpu: {get,put}_cpu_ptr Peter Zijlstra
2010-09-17 9:28 ` [PATCH 2/6] perf: Avoid RCU vs preemption assumptions Peter Zijlstra
@ 2010-09-17 9:28 ` Peter Zijlstra
2010-09-17 11:27 ` [tip:perf/core] " tip-bot for Stephane Eranian
2010-09-17 9:28 ` [PATCH 4/6] perf: Complete software pmu grouping Peter Zijlstra
` (2 subsequent siblings)
5 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2010-09-17 9:28 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Stephane Eranian, Robert Richter, Peter Zijlstra
[-- Attachment #1: stephane-perf_events-fix_broken_event_grouping.patch --]
[-- Type: text/plain, Size: 1746 bytes --]
Author: Stephane Eranian <eranian@google.com>
Events were not grouped anymore. The reason was that in
perf_event_open(), the field event->group_leader was
initialized before the function looked up the group_fd
to find the event leader. This patch fixes this by
reordering the code correctly.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4c90ea16.21edd80a.4b94.2495@mx.google.com>
---
kernel/perf_event.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -5543,17 +5543,11 @@ SYSCALL_DEFINE5(perf_event_open,
if (event_fd < 0)
return event_fd;
- event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL);
- if (IS_ERR(event)) {
- err = PTR_ERR(event);
- goto err_fd;
- }
-
if (group_fd != -1) {
group_leader = perf_fget_light(group_fd, &fput_needed);
if (IS_ERR(group_leader)) {
err = PTR_ERR(group_leader);
- goto err_alloc;
+ goto err_fd;
}
group_file = group_leader->filp;
if (flags & PERF_FLAG_FD_OUTPUT)
@@ -5562,6 +5556,12 @@ SYSCALL_DEFINE5(perf_event_open,
group_leader = NULL;
}
+ event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL);
+ if (IS_ERR(event)) {
+ err = PTR_ERR(event);
+ goto err_fd;
+ }
+
/*
* Special case software events and allow them to be part of
* any hardware group.
@@ -5643,7 +5643,6 @@ SYSCALL_DEFINE5(perf_event_open,
put_ctx(ctx);
err_group_fd:
fput_light(group_file, fput_needed);
-err_alloc:
free_event(event);
err_fd:
put_unused_fd(event_fd);
^ permalink raw reply [flat|nested] 14+ messages in thread* [tip:perf/core] perf_events: Fix broken event grouping
2010-09-17 9:28 ` [PATCH 3/6] perf_events: Fix broken event grouping Peter Zijlstra
@ 2010-09-17 11:27 ` tip-bot for Stephane Eranian
0 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Stephane Eranian @ 2010-09-17 11:27 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, eranian, hpa, mingo, robert.richter, a.p.zijlstra,
tglx, mingo
Commit-ID: d14b12d7adbf214f33eb59f800b5c3d5ed9268e8
Gitweb: http://git.kernel.org/tip/d14b12d7adbf214f33eb59f800b5c3d5ed9268e8
Author: Stephane Eranian <eranian@google.com>
AuthorDate: Fri, 17 Sep 2010 11:28:47 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 17 Sep 2010 12:48:47 +0200
perf_events: Fix broken event grouping
Events were not grouped anymore. The reason was that in
perf_event_open(), the field event->group_leader was
initialized before the function looked up the group_fd
to find the event leader. This patch fixes this by
reordering the code correctly.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <20100917093009.360420946@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/perf_event.c | 15 +++++++--------
1 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 86f394e..ce95617 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5550,17 +5550,11 @@ SYSCALL_DEFINE5(perf_event_open,
if (event_fd < 0)
return event_fd;
- event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL);
- if (IS_ERR(event)) {
- err = PTR_ERR(event);
- goto err_fd;
- }
-
if (group_fd != -1) {
group_leader = perf_fget_light(group_fd, &fput_needed);
if (IS_ERR(group_leader)) {
err = PTR_ERR(group_leader);
- goto err_alloc;
+ goto err_fd;
}
group_file = group_leader->filp;
if (flags & PERF_FLAG_FD_OUTPUT)
@@ -5569,6 +5563,12 @@ SYSCALL_DEFINE5(perf_event_open,
group_leader = NULL;
}
+ event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL);
+ if (IS_ERR(event)) {
+ err = PTR_ERR(event);
+ goto err_fd;
+ }
+
/*
* Special case software events and allow them to be part of
* any hardware group.
@@ -5653,7 +5653,6 @@ err_context:
put_ctx(ctx);
err_group_fd:
fput_light(group_file, fput_needed);
-err_alloc:
free_event(event);
err_fd:
put_unused_fd(event_fd);
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 4/6] perf: Complete software pmu grouping
2010-09-17 9:28 [PATCH 0/6] Various perf fixes Peter Zijlstra
` (2 preceding siblings ...)
2010-09-17 9:28 ` [PATCH 3/6] perf_events: Fix broken event grouping Peter Zijlstra
@ 2010-09-17 9:28 ` Peter Zijlstra
2010-09-17 11:28 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-09-17 9:28 ` [PATCH 5/6] perf: Fix perf_event_exit_cpu_context() Peter Zijlstra
2010-09-17 9:28 ` [PATCH 6/6] perf: Undo the per cpu-context timer stuff Peter Zijlstra
5 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2010-09-17 9:28 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Stephane Eranian, Robert Richter, Paul Mackerras,
Peter Zijlstra
[-- Attachment #1: perf-cross-pmu-group.patch --]
[-- Type: text/plain, Size: 4722 bytes --]
Aside from allowing software events into a !software group, allow
adding !software events to pure software groups.
Once we've moved the software group and attached the first !software
event, the group will no longer be a pure software group and hence no
longer be eligible for movement, at which point the straight ctx
comparison is correct again.
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_event.h | 6 ++++
kernel/perf_event.c | 65 +++++++++++++++++++++++++++++++++++++++++----
2 files changed, 66 insertions(+), 5 deletions(-)
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -5190,6 +5190,7 @@ int perf_pmu_register(struct pmu *pmu)
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
__perf_event_init_context(&cpuctx->ctx);
+ cpuctx->ctx.type = cpu_context;
cpuctx->ctx.pmu = pmu;
cpuctx->timer_interval = TICK_NSEC;
hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -5523,7 +5524,8 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event_attr __user *, attr_uptr,
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{
- struct perf_event *event, *group_leader = NULL, *output_event = NULL;
+ struct perf_event *group_leader = NULL, *output_event = NULL;
+ struct perf_event *event, *sibling;
struct perf_event_attr attr;
struct perf_event_context *ctx;
struct file *event_file = NULL;
@@ -5531,6 +5533,7 @@ SYSCALL_DEFINE5(perf_event_open,
struct task_struct *task = NULL;
struct pmu *pmu;
int event_fd;
+ int move_group = 0;
int fput_needed = 0;
int err;
@@ -5580,8 +5583,29 @@ SYSCALL_DEFINE5(perf_event_open,
* any hardware group.
*/
pmu = event->pmu;
- if ((pmu->task_ctx_nr == perf_sw_context) && group_leader)
- pmu = group_leader->pmu;
+
+ if (group_leader &&
+ (is_software_event(event) != is_software_event(group_leader))) {
+ if (is_software_event(event)) {
+ /*
+ * If event and group_leader are not both a software
+ * event, and event is, then group leader is not.
+ *
+ * Allow the addition of software events to !software
+ * groups, this is safe because software events never
+ * fail to schedule.
+ */
+ pmu = group_leader->pmu;
+ } else if (is_software_event(group_leader) &&
+ (group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
+ /*
+ * In case the group is a pure software group, and we
+ * try to add a hardware event, move the whole group to
+ * the hardware context.
+ */
+ move_group = 1;
+ }
+ }
if (pid != -1)
task = find_lively_task_by_vpid(pid);
@@ -5611,8 +5635,14 @@ SYSCALL_DEFINE5(perf_event_open,
* Do not allow to attach to a group in a different
* task or CPU context:
*/
- if (group_leader->ctx != ctx)
- goto err_context;
+ if (move_group) {
+ if (group_leader->ctx->type != ctx->type)
+ goto err_context;
+ } else {
+ if (group_leader->ctx != ctx)
+ goto err_context;
+ }
+
/*
* Only a group leader can be exclusive or pinned
*/
@@ -5632,9 +5662,34 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_context;
}
+ if (move_group) {
+ struct perf_event_context *gctx = group_leader->ctx;
+
+ mutex_lock(&gctx->mutex);
+ perf_event_remove_from_context(group_leader);
+ list_for_each_entry(sibling, &group_leader->sibling_list,
+ group_entry) {
+ perf_event_remove_from_context(sibling);
+ put_ctx(gctx);
+ }
+ mutex_unlock(&gctx->mutex);
+ put_ctx(gctx);
+ }
+
event->filp = event_file;
WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);
+
+ if (move_group) {
+ perf_install_in_context(ctx, group_leader, cpu);
+ get_ctx(ctx);
+ list_for_each_entry(sibling, &group_leader->sibling_list,
+ group_entry) {
+ perf_install_in_context(ctx, sibling, cpu);
+ get_ctx(ctx);
+ }
+ }
+
perf_install_in_context(ctx, event, cpu);
++ctx->generation;
mutex_unlock(&ctx->mutex);
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -804,12 +804,18 @@ struct perf_event {
#endif /* CONFIG_PERF_EVENTS */
};
+enum perf_event_context_type {
+ task_context,
+ cpu_context,
+};
+
/**
* struct perf_event_context - event context structure
*
* Used as a container for task events and CPU events as well:
*/
struct perf_event_context {
+ enum perf_event_context_type type;
struct pmu *pmu;
/*
* Protect the states of the events in the list,
^ permalink raw reply [flat|nested] 14+ messages in thread* [tip:perf/core] perf: Complete software pmu grouping
2010-09-17 9:28 ` [PATCH 4/6] perf: Complete software pmu grouping Peter Zijlstra
@ 2010-09-17 11:28 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-09-17 11:28 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, eranian, hpa, mingo, a.p.zijlstra,
robert.richter, tglx, mingo
Commit-ID: b04243ef7006cda301819f54ee7ce0a3632489e3
Gitweb: http://git.kernel.org/tip/b04243ef7006cda301819f54ee7ce0a3632489e3
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 17 Sep 2010 11:28:48 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 17 Sep 2010 12:48:48 +0200
perf: Complete software pmu grouping
Aside from allowing software events into a !software group,
allow adding !software events to pure software groups.
Once we've moved the software group and attached the first
!software event, the group will no longer be a pure software
group and hence no longer be eligible for movement, at which
point the straight ctx comparison is correct again.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20100917093009.410784731@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/perf_event.h | 6 ++++
kernel/perf_event.c | 65 ++++++++++++++++++++++++++++++++++++++++---
2 files changed, 66 insertions(+), 5 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 39d8860..165287f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -804,12 +804,18 @@ struct perf_event {
#endif /* CONFIG_PERF_EVENTS */
};
+enum perf_event_context_type {
+ task_context,
+ cpu_context,
+};
+
/**
* struct perf_event_context - event context structure
*
* Used as a container for task events and CPU events as well:
*/
struct perf_event_context {
+ enum perf_event_context_type type;
struct pmu *pmu;
/*
* Protect the states of the events in the list,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ce95617..6d7eef5 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5184,6 +5184,7 @@ int perf_pmu_register(struct pmu *pmu)
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
__perf_event_init_context(&cpuctx->ctx);
+ cpuctx->ctx.type = cpu_context;
cpuctx->ctx.pmu = pmu;
cpuctx->timer_interval = TICK_NSEC;
hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -5517,7 +5518,8 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event_attr __user *, attr_uptr,
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{
- struct perf_event *event, *group_leader = NULL, *output_event = NULL;
+ struct perf_event *group_leader = NULL, *output_event = NULL;
+ struct perf_event *event, *sibling;
struct perf_event_attr attr;
struct perf_event_context *ctx;
struct file *event_file = NULL;
@@ -5525,6 +5527,7 @@ SYSCALL_DEFINE5(perf_event_open,
struct task_struct *task = NULL;
struct pmu *pmu;
int event_fd;
+ int move_group = 0;
int fput_needed = 0;
int err;
@@ -5574,8 +5577,29 @@ SYSCALL_DEFINE5(perf_event_open,
* any hardware group.
*/
pmu = event->pmu;
- if ((pmu->task_ctx_nr == perf_sw_context) && group_leader)
- pmu = group_leader->pmu;
+
+ if (group_leader &&
+ (is_software_event(event) != is_software_event(group_leader))) {
+ if (is_software_event(event)) {
+ /*
+ * If event and group_leader are not both a software
+ * event, and event is, then group leader is not.
+ *
+ * Allow the addition of software events to !software
+ * groups, this is safe because software events never
+ * fail to schedule.
+ */
+ pmu = group_leader->pmu;
+ } else if (is_software_event(group_leader) &&
+ (group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
+ /*
+ * In case the group is a pure software group, and we
+ * try to add a hardware event, move the whole group to
+ * the hardware context.
+ */
+ move_group = 1;
+ }
+ }
if (pid != -1)
task = find_lively_task_by_vpid(pid);
@@ -5605,8 +5629,14 @@ SYSCALL_DEFINE5(perf_event_open,
* Do not allow to attach to a group in a different
* task or CPU context:
*/
- if (group_leader->ctx != ctx)
- goto err_context;
+ if (move_group) {
+ if (group_leader->ctx->type != ctx->type)
+ goto err_context;
+ } else {
+ if (group_leader->ctx != ctx)
+ goto err_context;
+ }
+
/*
* Only a group leader can be exclusive or pinned
*/
@@ -5626,9 +5656,34 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_context;
}
+ if (move_group) {
+ struct perf_event_context *gctx = group_leader->ctx;
+
+ mutex_lock(&gctx->mutex);
+ perf_event_remove_from_context(group_leader);
+ list_for_each_entry(sibling, &group_leader->sibling_list,
+ group_entry) {
+ perf_event_remove_from_context(sibling);
+ put_ctx(gctx);
+ }
+ mutex_unlock(&gctx->mutex);
+ put_ctx(gctx);
+ }
+
event->filp = event_file;
WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);
+
+ if (move_group) {
+ perf_install_in_context(ctx, group_leader, cpu);
+ get_ctx(ctx);
+ list_for_each_entry(sibling, &group_leader->sibling_list,
+ group_entry) {
+ perf_install_in_context(ctx, sibling, cpu);
+ get_ctx(ctx);
+ }
+ }
+
perf_install_in_context(ctx, event, cpu);
++ctx->generation;
mutex_unlock(&ctx->mutex);
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 5/6] perf: Fix perf_event_exit_cpu_context()
2010-09-17 9:28 [PATCH 0/6] Various perf fixes Peter Zijlstra
` (3 preceding siblings ...)
2010-09-17 9:28 ` [PATCH 4/6] perf: Complete software pmu grouping Peter Zijlstra
@ 2010-09-17 9:28 ` Peter Zijlstra
2010-09-17 11:28 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-09-17 9:28 ` [PATCH 6/6] perf: Undo the per cpu-context timer stuff Peter Zijlstra
5 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2010-09-17 9:28 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Stephane Eranian, Robert Richter, Peter Zijlstra
[-- Attachment #1: perf-fix-unplug.patch --]
[-- Type: text/plain, Size: 865 bytes --]
Use the right cpu-context.. spotted by preempt warning on hot-unplug
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/perf_event.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -6259,14 +6259,13 @@ static void perf_event_exit_cpu_context(
idx = srcu_read_lock(&pmus_srcu);
list_for_each_entry_rcu(pmu, &pmus, entry) {
- ctx = &this_cpu_ptr(pmu->pmu_cpu_context)->ctx;
+ ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
mutex_lock(&ctx->mutex);
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
mutex_unlock(&ctx->mutex);
}
srcu_read_unlock(&pmus_srcu, idx);
-
}
static void perf_event_exit_cpu(int cpu)
^ permalink raw reply [flat|nested] 14+ messages in thread* [tip:perf/core] perf: Fix perf_event_exit_cpu_context()
2010-09-17 9:28 ` [PATCH 5/6] perf: Fix perf_event_exit_cpu_context() Peter Zijlstra
@ 2010-09-17 11:28 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-09-17 11:28 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, eranian, hpa, mingo, robert.richter, a.p.zijlstra,
tglx, mingo
Commit-ID: 917bdd1c9b7b0f4c22f2504c2f0c1074c8ab9df7
Gitweb: http://git.kernel.org/tip/917bdd1c9b7b0f4c22f2504c2f0c1074c8ab9df7
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 17 Sep 2010 11:28:49 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 17 Sep 2010 12:48:48 +0200
perf: Fix perf_event_exit_cpu_context()
Use the right cpu-context.. spotted by preempt warning on
hot-unplug
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <20100917093009.461794357@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/perf_event.c | 3 +--
1 files changed, 1 insertions(+), 2 deletions(-)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 6d7eef5..27332e5 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -6269,14 +6269,13 @@ static void perf_event_exit_cpu_context(int cpu)
idx = srcu_read_lock(&pmus_srcu);
list_for_each_entry_rcu(pmu, &pmus, entry) {
- ctx = &this_cpu_ptr(pmu->pmu_cpu_context)->ctx;
+ ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
mutex_lock(&ctx->mutex);
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
mutex_unlock(&ctx->mutex);
}
srcu_read_unlock(&pmus_srcu, idx);
-
}
static void perf_event_exit_cpu(int cpu)
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 6/6] perf: Undo the per cpu-context timer stuff
2010-09-17 9:28 [PATCH 0/6] Various perf fixes Peter Zijlstra
` (4 preceding siblings ...)
2010-09-17 9:28 ` [PATCH 5/6] perf: Fix perf_event_exit_cpu_context() Peter Zijlstra
@ 2010-09-17 9:28 ` Peter Zijlstra
2010-09-17 11:29 ` [tip:perf/core] " tip-bot for Peter Zijlstra
5 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2010-09-17 9:28 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Stephane Eranian, Robert Richter, Yinghai Lu,
Peter Zijlstra
[-- Attachment #1: perf-fix-rotation-timers.patch --]
[-- Type: text/plain, Size: 6736 bytes --]
Revert the timer per cpu-context timers because of unfortunate nohz
interaction. Fixing that would have been somewhat ugly, so go back to
driving things from the regular tick. Provide a jiffies interval
feature for people who want slower rotations.
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_event.h | 6 ++-
kernel/perf_event.c | 79 +++++++++++++++++++++++++++------------------
kernel/sched.c | 2 +
3 files changed, 55 insertions(+), 32 deletions(-)
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -77,23 +77,22 @@ void perf_pmu_enable(struct pmu *pmu)
pmu->pmu_enable(pmu);
}
+static DEFINE_PER_CPU(struct list_head, rotation_list);
+
+/*
+ * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
+ * because they're strictly cpu affine and rotate_start is called with IRQs
+ * disabled, while rotate_context is called from IRQ context.
+ */
static void perf_pmu_rotate_start(struct pmu *pmu)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ struct list_head *head = &__get_cpu_var(rotation_list);
- if (hrtimer_active(&cpuctx->timer))
- return;
+ WARN_ON(!irqs_disabled());
- __hrtimer_start_range_ns(&cpuctx->timer,
- ns_to_ktime(cpuctx->timer_interval), 0,
- HRTIMER_MODE_REL_PINNED, 0);
-}
-
-static void perf_pmu_rotate_stop(struct pmu *pmu)
-{
- struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
- hrtimer_cancel(&cpuctx->timer);
+ if (list_empty(&cpuctx->rotation_list))
+ list_add(&cpuctx->rotation_list, head);
}
static void get_ctx(struct perf_event_context *ctx)
@@ -1607,36 +1606,33 @@ static void rotate_ctx(struct perf_event
}
/*
- * Cannot race with ->pmu_rotate_start() because this is ran from hardirq
- * context, and ->pmu_rotate_start() is called with irqs disabled (both are
- * cpu affine, so there are no SMP races).
+ * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
+ * because they're strictly cpu affine and rotate_start is called with IRQs
+ * disabled, while rotate_context is called from IRQ context.
*/
-static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer)
+static void perf_rotate_context(struct perf_cpu_context *cpuctx)
{
- enum hrtimer_restart restart = HRTIMER_NORESTART;
- struct perf_cpu_context *cpuctx;
+ u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
struct perf_event_context *ctx = NULL;
- int rotate = 0;
-
- cpuctx = container_of(timer, struct perf_cpu_context, timer);
+ int rotate = 0, remove = 1;
if (cpuctx->ctx.nr_events) {
- restart = HRTIMER_RESTART;
+ remove = 0;
if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
rotate = 1;
}
ctx = cpuctx->task_ctx;
if (ctx && ctx->nr_events) {
- restart = HRTIMER_RESTART;
+ remove = 0;
if (ctx->nr_events != ctx->nr_active)
rotate = 1;
}
perf_pmu_disable(cpuctx->ctx.pmu);
- perf_ctx_adjust_freq(&cpuctx->ctx, cpuctx->timer_interval);
+ perf_ctx_adjust_freq(&cpuctx->ctx, interval);
if (ctx)
- perf_ctx_adjust_freq(ctx, cpuctx->timer_interval);
+ perf_ctx_adjust_freq(ctx, interval);
if (!rotate)
goto done;
@@ -1654,10 +1650,24 @@ static enum hrtimer_restart perf_event_c
task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
done:
+ if (remove)
+ list_del_init(&cpuctx->rotation_list);
+
perf_pmu_enable(cpuctx->ctx.pmu);
- hrtimer_forward_now(timer, ns_to_ktime(cpuctx->timer_interval));
+}
+
+void perf_event_task_tick(void)
+{
+ struct list_head *head = &__get_cpu_var(rotation_list);
+ struct perf_cpu_context *cpuctx, *tmp;
- return restart;
+ WARN_ON(!irqs_disabled());
+
+ list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
+ if (cpuctx->jiffies_interval == 1 ||
+ !(jiffies % cpuctx->jiffies_interval))
+ perf_rotate_context(cpuctx);
+ }
}
static int event_enable_on_exec(struct perf_event *event,
@@ -5180,9 +5190,8 @@ int perf_pmu_register(struct pmu *pmu)
__perf_event_init_context(&cpuctx->ctx);
cpuctx->ctx.type = cpu_context;
cpuctx->ctx.pmu = pmu;
- cpuctx->timer_interval = TICK_NSEC;
- hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- cpuctx->timer.function = perf_event_context_tick;
+ cpuctx->jiffies_interval = 1;
+ INIT_LIST_HEAD(&cpuctx->rotation_list);
}
got_cpu_context:
@@ -6219,6 +6228,7 @@ static void __init perf_event_init_all_c
for_each_possible_cpu(cpu) {
swhash = &per_cpu(swevent_htable, cpu);
mutex_init(&swhash->hlist_mutex);
+ INIT_LIST_HEAD(&per_cpu(rotation_list, cpu));
}
}
@@ -6238,6 +6248,15 @@ static void __cpuinit perf_event_init_cp
}
#ifdef CONFIG_HOTPLUG_CPU
+static void perf_pmu_rotate_stop(struct pmu *pmu)
+{
+ struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+ WARN_ON(!irqs_disabled());
+
+ list_del_init(&cpuctx->rotation_list);
+}
+
static void __perf_event_exit_context(void *__info)
{
struct perf_event_context *ctx = __info;
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -870,8 +870,8 @@ struct perf_cpu_context {
struct perf_event_context *task_ctx;
int active_oncpu;
int exclusive;
- u64 timer_interval;
- struct hrtimer timer;
+ struct list_head rotation_list;
+ int jiffies_interval;
};
struct perf_output_handle {
@@ -1065,6 +1065,7 @@ extern int perf_swevent_get_recursion_co
extern void perf_swevent_put_recursion_context(int rctx);
extern void perf_event_enable(struct perf_event *event);
extern void perf_event_disable(struct perf_event *event);
+extern void perf_event_task_tick(void);
#else
static inline void
perf_event_task_sched_in(struct task_struct *task) { }
@@ -1099,6 +1100,7 @@ static inline int perf_swevent_get_recu
static inline void perf_swevent_put_recursion_context(int rctx) { }
static inline void perf_event_enable(struct perf_event *event) { }
static inline void perf_event_disable(struct perf_event *event) { }
+static inline void perf_event_task_tick(void) { }
#endif
#define perf_output_put(handle, x) \
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -3581,6 +3581,8 @@ void scheduler_tick(void)
curr->sched_class->task_tick(rq, curr, 0);
raw_spin_unlock(&rq->lock);
+ perf_event_task_tick();
+
#ifdef CONFIG_SMP
rq->idle_at_tick = idle_cpu(cpu);
trigger_load_balance(rq, cpu);
^ permalink raw reply [flat|nested] 14+ messages in thread* [tip:perf/core] perf: Undo the per cpu-context timer stuff
2010-09-17 9:28 ` [PATCH 6/6] perf: Undo the per cpu-context timer stuff Peter Zijlstra
@ 2010-09-17 11:29 ` tip-bot for Peter Zijlstra
0 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-09-17 11:29 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, eranian, hpa, mingo, yinghai, a.p.zijlstra,
robert.richter, tglx, mingo
Commit-ID: e9d2b064149ff7ef4acbc65a1b9374ac8b218d3e
Gitweb: http://git.kernel.org/tip/e9d2b064149ff7ef4acbc65a1b9374ac8b218d3e
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 17 Sep 2010 11:28:50 +0200
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 17 Sep 2010 12:48:48 +0200
perf: Undo the per cpu-context timer stuff
Revert the timer per cpu-context timers because of unfortunate
nohz interaction. Fixing that would have been somewhat ugly, so
go back to driving things from the regular tick. Provide a
jiffies interval feature for people who want slower rotations.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20100917093009.519845633@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/perf_event.h | 6 ++-
kernel/perf_event.c | 79 +++++++++++++++++++++++++++----------------
kernel/sched.c | 2 +
3 files changed, 55 insertions(+), 32 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 165287f..61b1e2d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -870,8 +870,8 @@ struct perf_cpu_context {
struct perf_event_context *task_ctx;
int active_oncpu;
int exclusive;
- u64 timer_interval;
- struct hrtimer timer;
+ struct list_head rotation_list;
+ int jiffies_interval;
};
struct perf_output_handle {
@@ -1065,6 +1065,7 @@ extern int perf_swevent_get_recursion_context(void);
extern void perf_swevent_put_recursion_context(int rctx);
extern void perf_event_enable(struct perf_event *event);
extern void perf_event_disable(struct perf_event *event);
+extern void perf_event_task_tick(void);
#else
static inline void
perf_event_task_sched_in(struct task_struct *task) { }
@@ -1099,6 +1100,7 @@ static inline int perf_swevent_get_recursion_context(void) { return -1; }
static inline void perf_swevent_put_recursion_context(int rctx) { }
static inline void perf_event_enable(struct perf_event *event) { }
static inline void perf_event_disable(struct perf_event *event) { }
+static inline void perf_event_task_tick(void) { }
#endif
#define perf_output_put(handle, x) \
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 27332e5..baae136 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -77,23 +77,22 @@ void perf_pmu_enable(struct pmu *pmu)
pmu->pmu_enable(pmu);
}
+static DEFINE_PER_CPU(struct list_head, rotation_list);
+
+/*
+ * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
+ * because they're strictly cpu affine and rotate_start is called with IRQs
+ * disabled, while rotate_context is called from IRQ context.
+ */
static void perf_pmu_rotate_start(struct pmu *pmu)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ struct list_head *head = &__get_cpu_var(rotation_list);
- if (hrtimer_active(&cpuctx->timer))
- return;
+ WARN_ON(!irqs_disabled());
- __hrtimer_start_range_ns(&cpuctx->timer,
- ns_to_ktime(cpuctx->timer_interval), 0,
- HRTIMER_MODE_REL_PINNED, 0);
-}
-
-static void perf_pmu_rotate_stop(struct pmu *pmu)
-{
- struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
- hrtimer_cancel(&cpuctx->timer);
+ if (list_empty(&cpuctx->rotation_list))
+ list_add(&cpuctx->rotation_list, head);
}
static void get_ctx(struct perf_event_context *ctx)
@@ -1607,36 +1606,33 @@ static void rotate_ctx(struct perf_event_context *ctx)
}
/*
- * Cannot race with ->pmu_rotate_start() because this is ran from hardirq
- * context, and ->pmu_rotate_start() is called with irqs disabled (both are
- * cpu affine, so there are no SMP races).
+ * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
+ * because they're strictly cpu affine and rotate_start is called with IRQs
+ * disabled, while rotate_context is called from IRQ context.
*/
-static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer)
+static void perf_rotate_context(struct perf_cpu_context *cpuctx)
{
- enum hrtimer_restart restart = HRTIMER_NORESTART;
- struct perf_cpu_context *cpuctx;
+ u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
struct perf_event_context *ctx = NULL;
- int rotate = 0;
-
- cpuctx = container_of(timer, struct perf_cpu_context, timer);
+ int rotate = 0, remove = 1;
if (cpuctx->ctx.nr_events) {
- restart = HRTIMER_RESTART;
+ remove = 0;
if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
rotate = 1;
}
ctx = cpuctx->task_ctx;
if (ctx && ctx->nr_events) {
- restart = HRTIMER_RESTART;
+ remove = 0;
if (ctx->nr_events != ctx->nr_active)
rotate = 1;
}
perf_pmu_disable(cpuctx->ctx.pmu);
- perf_ctx_adjust_freq(&cpuctx->ctx, cpuctx->timer_interval);
+ perf_ctx_adjust_freq(&cpuctx->ctx, interval);
if (ctx)
- perf_ctx_adjust_freq(ctx, cpuctx->timer_interval);
+ perf_ctx_adjust_freq(ctx, interval);
if (!rotate)
goto done;
@@ -1654,10 +1650,24 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer)
task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
done:
+ if (remove)
+ list_del_init(&cpuctx->rotation_list);
+
perf_pmu_enable(cpuctx->ctx.pmu);
- hrtimer_forward_now(timer, ns_to_ktime(cpuctx->timer_interval));
+}
+
+void perf_event_task_tick(void)
+{
+ struct list_head *head = &__get_cpu_var(rotation_list);
+ struct perf_cpu_context *cpuctx, *tmp;
- return restart;
+ WARN_ON(!irqs_disabled());
+
+ list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
+ if (cpuctx->jiffies_interval == 1 ||
+ !(jiffies % cpuctx->jiffies_interval))
+ perf_rotate_context(cpuctx);
+ }
}
static int event_enable_on_exec(struct perf_event *event,
@@ -5186,9 +5196,8 @@ int perf_pmu_register(struct pmu *pmu)
__perf_event_init_context(&cpuctx->ctx);
cpuctx->ctx.type = cpu_context;
cpuctx->ctx.pmu = pmu;
- cpuctx->timer_interval = TICK_NSEC;
- hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- cpuctx->timer.function = perf_event_context_tick;
+ cpuctx->jiffies_interval = 1;
+ INIT_LIST_HEAD(&cpuctx->rotation_list);
}
got_cpu_context:
@@ -6229,6 +6238,7 @@ static void __init perf_event_init_all_cpus(void)
for_each_possible_cpu(cpu) {
swhash = &per_cpu(swevent_htable, cpu);
mutex_init(&swhash->hlist_mutex);
+ INIT_LIST_HEAD(&per_cpu(rotation_list, cpu));
}
}
@@ -6248,6 +6258,15 @@ static void __cpuinit perf_event_init_cpu(int cpu)
}
#ifdef CONFIG_HOTPLUG_CPU
+static void perf_pmu_rotate_stop(struct pmu *pmu)
+{
+ struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+ WARN_ON(!irqs_disabled());
+
+ list_del_init(&cpuctx->rotation_list);
+}
+
static void __perf_event_exit_context(void *__info)
{
struct perf_event_context *ctx = __info;
diff --git a/kernel/sched.c b/kernel/sched.c
index 1c3ea7a..794819e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3584,6 +3584,8 @@ void scheduler_tick(void)
curr->sched_class->task_tick(rq, curr, 0);
raw_spin_unlock(&rq->lock);
+ perf_event_task_tick();
+
#ifdef CONFIG_SMP
rq->idle_at_tick = idle_cpu(cpu);
trigger_load_balance(rq, cpu);
^ permalink raw reply related [flat|nested] 14+ messages in thread