[RFC][PATCH 3/9] perf: Change event scheduling locking

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Oleg Nesterov <oleg@redhat.com>, Jiri Olsa <jolsa@redhat.com>,
	Ingo Molnar <mingo@elte.hu>
Cc: linux-kernel@vger.kernel.org,
	Stephane Eranian <eranian@google.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC][PATCH 3/9] perf: Change event scheduling locking
Date: Sat, 09 Apr 2011 21:17:42 +0200	[thread overview]
Message-ID: <20110409192141.769881865@chello.nl> (raw)
In-Reply-To: 20110409191739.813727025@chello.nl

[-- Attachment #1: perf-change-locking.patch --]
[-- Type: text/plain, Size: 7071 bytes --]

Currently we only hold one ctx->lock at a time, which results in us
flipping back and forth between cpuctx->ctx.lock and task_ctx->lock.

Avoid this and gain large atomic regions by holding both locks. We
nest the task lock inside the cpu lock, since with task scheduling we
might have to change task ctx while holding the cpu ctx lock.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/perf_event.c |   63 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 36 insertions(+), 27 deletions(-)

Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -200,6 +200,22 @@ __get_cpu_context(struct perf_event_cont
 	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
 }
 
+static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
+			  struct perf_event_context *ctx)
+{
+	raw_spin_lock(&cpuctx->ctx.lock);
+	if (ctx)
+		raw_spin_lock(&ctx->lock);
+}
+
+static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
+			    struct perf_event_context *ctx)
+{
+	if (ctx)
+		raw_spin_unlock(&ctx->lock);
+	raw_spin_unlock(&cpuctx->ctx.lock);
+}
+
 #ifdef CONFIG_CGROUP_PERF
 
 /*
@@ -340,11 +356,8 @@ void perf_cgroup_switch(struct task_stru
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
-
 		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 
-		perf_pmu_disable(cpuctx->ctx.pmu);
-
 		/*
 		 * perf_cgroup_events says at least one
 		 * context on this CPU has cgroup events.
@@ -353,6 +366,8 @@ void perf_cgroup_switch(struct task_stru
 		 * events for a context.
 		 */
 		if (cpuctx->ctx.nr_cgroups > 0) {
+			perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+			perf_pmu_disable(cpuctx->ctx.pmu);
 
 			if (mode & PERF_CGROUP_SWOUT) {
 				cpu_ctx_sched_out(cpuctx, EVENT_ALL);
@@ -371,9 +386,9 @@ void perf_cgroup_switch(struct task_stru
 				cpuctx->cgrp = perf_cgroup_from_task(task);
 				cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
 			}
+			perf_pmu_enable(cpuctx->ctx.pmu);
+			perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 		}
-
-		perf_pmu_enable(cpuctx->ctx.pmu);
 	}
 
 	rcu_read_unlock();
@@ -1766,15 +1781,14 @@ static void ctx_sched_out(struct perf_ev
 {
 	struct perf_event *event;
 
-	raw_spin_lock(&ctx->lock);
 	ctx->is_active = 0;
 	if (likely(!ctx->nr_events))
-		goto out;
+		return;
+
 	update_context_time(ctx);
 	update_cgrp_time_from_cpuctx(cpuctx);
-
 	if (!ctx->nr_active)
-		goto out;
+		return;
 
 	perf_pmu_disable(ctx->pmu);
 	if (event_type & EVENT_PINNED) {
@@ -1787,8 +1801,6 @@ static void ctx_sched_out(struct perf_ev
 			group_sched_out(event, cpuctx, ctx);
 	}
 	perf_pmu_enable(ctx->pmu);
-out:
-	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -1936,8 +1948,10 @@ static void perf_event_context_sched_out
 	rcu_read_unlock();
 
 	if (do_switch) {
+		raw_spin_lock(&ctx->lock);
 		ctx_sched_out(ctx, cpuctx, EVENT_ALL);
 		cpuctx->task_ctx = NULL;
+		raw_spin_unlock(&ctx->lock);
 	}
 }
 
@@ -2063,10 +2077,9 @@ ctx_sched_in(struct perf_event_context *
 {
 	u64 now;
 
-	raw_spin_lock(&ctx->lock);
 	ctx->is_active = 1;
 	if (likely(!ctx->nr_events))
-		goto out;
+		return;
 
 	now = perf_clock();
 	ctx->timestamp = now;
@@ -2081,9 +2094,6 @@ ctx_sched_in(struct perf_event_context *
 	/* Then walk through the lower prio flexible groups */
 	if (event_type & EVENT_FLEXIBLE)
 		ctx_flexible_sched_in(ctx, cpuctx);
-
-out:
-	raw_spin_unlock(&ctx->lock);
 }
 
 static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
@@ -2117,6 +2127,7 @@ static void perf_event_context_sched_in(
 	if (cpuctx->task_ctx == ctx)
 		return;
 
+	perf_ctx_lock(cpuctx, ctx);
 	perf_pmu_disable(ctx->pmu);
 	/*
 	 * We want to keep the following priority order:
@@ -2131,12 +2142,14 @@ static void perf_event_context_sched_in(
 
 	cpuctx->task_ctx = ctx;
 
+	perf_pmu_enable(ctx->pmu);
+	perf_ctx_unlock(cpuctx, ctx);
+
 	/*
 	 * Since these rotations are per-cpu, we need to ensure the
 	 * cpu-context we got scheduled on is actually rotating.
 	 */
 	perf_pmu_rotate_start(ctx->pmu);
-	perf_pmu_enable(ctx->pmu);
 }
 
 /*
@@ -2276,7 +2289,6 @@ static void perf_ctx_adjust_freq(struct 
 	u64 interrupts, now;
 	s64 delta;
 
-	raw_spin_lock(&ctx->lock);
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (event->state != PERF_EVENT_STATE_ACTIVE)
 			continue;
@@ -2308,7 +2320,6 @@ static void perf_ctx_adjust_freq(struct 
 		if (delta > 0)
 			perf_adjust_period(event, period, delta);
 	}
-	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -2316,16 +2327,12 @@ static void perf_ctx_adjust_freq(struct 
  */
 static void rotate_ctx(struct perf_event_context *ctx)
 {
-	raw_spin_lock(&ctx->lock);
-
 	/*
 	 * Rotate the first entry last of non-pinned groups. Rotation might be
 	 * disabled by the inheritance code.
 	 */
 	if (!ctx->rotate_disable)
 		list_rotate_left(&ctx->flexible_groups);
-
-	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -2352,6 +2359,7 @@ static void perf_rotate_context(struct p
 			rotate = 1;
 	}
 
+	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
 	perf_pmu_disable(cpuctx->ctx.pmu);
 	perf_ctx_adjust_freq(&cpuctx->ctx, interval);
 	if (ctx)
@@ -2377,6 +2385,7 @@ static void perf_rotate_context(struct p
 		list_del_init(&cpuctx->rotation_list);
 
 	perf_pmu_enable(cpuctx->ctx.pmu);
+	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 }
 
 void perf_event_task_tick(void)
@@ -2423,9 +2432,8 @@ static void perf_event_enable_on_exec(st
 	if (!ctx || !ctx->nr_events)
 		goto out;
 
-	task_ctx_sched_out(ctx, EVENT_ALL);
-
 	raw_spin_lock(&ctx->lock);
+	task_ctx_sched_out(ctx, EVENT_ALL);
 
 	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
 		ret = event_enable_on_exec(event, ctx);
@@ -2444,7 +2452,6 @@ static void perf_event_enable_on_exec(st
 	 */
 	if (enabled)
 		unclone_ctx(ctx);
-
 	raw_spin_unlock(&ctx->lock);
 
 	perf_event_context_sched_in(ctx, ctx->task);
@@ -5978,6 +5985,7 @@ static int pmu_dev_alloc(struct pmu *pmu
 }
 
 static struct lock_class_key cpuctx_mutex;
+static struct lock_class_key cpuctx_lock;
 
 int perf_pmu_register(struct pmu *pmu, char *name, int type)
 {
@@ -6028,6 +6036,7 @@ int perf_pmu_register(struct pmu *pmu, c
 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
 		__perf_event_init_context(&cpuctx->ctx);
 		lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
+		lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
 		cpuctx->ctx.type = cpu_context;
 		cpuctx->ctx.pmu = pmu;
 		cpuctx->jiffies_interval = 1;
@@ -6772,7 +6781,6 @@ static void perf_event_exit_task_context
 	 * our context.
 	 */
 	child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
-	task_ctx_sched_out(child_ctx, EVENT_ALL);
 
 	/*
 	 * Take the context lock here so that if find_get_context is
@@ -6780,6 +6788,7 @@ static void perf_event_exit_task_context
 	 * incremented the context's refcount before we do put_ctx below.
 	 */
 	raw_spin_lock(&child_ctx->lock);
+	task_ctx_sched_out(child_ctx, EVENT_ALL);
 	child->perf_event_ctxp[ctxn] = NULL;
 	/*
 	 * If this context is a clone; unclone it so it can't get

next prev parent reply	other threads:[~2011-04-09 19:23 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-09 19:17 [RFC][PATCH 0/9] perf: Rework event scheduling Peter Zijlstra
2011-04-09 19:17 ` [RFC][PATCH 1/9] perf: Optimize ctx_sched_out Peter Zijlstra
2011-05-28 16:38   ` [tip:perf/core] perf: Optimize ctx_sched_out() tip-bot for Peter Zijlstra
2011-04-09 19:17 ` [RFC][PATCH 2/9] perf: Clean up ctx reference counting Peter Zijlstra
2011-04-11  6:05   ` Lin Ming
2011-04-11  8:35     ` Peter Zijlstra
2011-05-28 16:39   ` [tip:perf/core] perf: Clean up 'ctx' " tip-bot for Peter Zijlstra
2011-04-09 19:17 ` Peter Zijlstra [this message]
2011-05-28 16:39   ` [tip:perf/core] perf: Optimize event scheduling locking tip-bot for Peter Zijlstra
2011-04-09 19:17 ` [RFC][PATCH 4/9] perf: Remove task_ctx_sched_in Peter Zijlstra
2011-05-28 16:39   ` [tip:perf/core] perf: Remove task_ctx_sched_in() tip-bot for Peter Zijlstra
2011-04-09 19:17 ` [RFC][PATCH 5/9] perf: Simplify and fix __perf_install_in_context Peter Zijlstra
2011-04-10  8:13   ` Peter Zijlstra
2011-04-11  8:44     ` Lin Ming
2011-04-11  8:50       ` Peter Zijlstra
2011-04-11  8:12   ` Lin Ming
2011-05-28 16:40   ` [tip:perf/core] perf: Simplify and fix __perf_install_in_context() tip-bot for Peter Zijlstra
2011-04-09 19:17 ` [RFC][PATCH 6/9] perf: Change ctx::is_active semantics Peter Zijlstra
2011-05-28 16:40   ` [tip:perf/core] perf: Change and simplify " tip-bot for Peter Zijlstra
2011-04-09 19:17 ` [RFC][PATCH 7/9] perf: Collect the schedule in rules in one function Peter Zijlstra
2011-05-28 16:41   ` [tip:perf/core] perf: Collect the schedule-in " tip-bot for Peter Zijlstra
2011-04-09 19:17 ` [RFC][PATCH 8/9] perf: Change close() semantics for group events Peter Zijlstra
2011-05-28 16:41   ` [tip:perf/core] " tip-bot for Peter Zijlstra
2011-04-09 19:17 ` [RFC][PATCH 9/9] perf: De-schedule a task context when removing the last event Peter Zijlstra
2011-05-28 16:42   ` [tip:perf/core] " tip-bot for Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110409192141.769881865@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=oleg@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox