From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754460AbcBXLyR (ORCPT ); Wed, 24 Feb 2016 06:54:17 -0500 Received: from bombadil.infradead.org ([198.137.202.9]:59195 "EHLO bombadil.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754157AbcBXLyN (ORCPT ); Wed, 24 Feb 2016 06:54:13 -0500 Date: Wed, 24 Feb 2016 12:53:51 +0100 From: Peter Zijlstra To: Pratyush Anand Cc: Jiri Olsa , mingo@kernel.org, alexander.shishkin@linux.intel.com, eranian@google.com, linux-kernel@vger.kernel.org, vince@deater.net, dvyukov@google.com, andi@firstfloor.org, sasha.levin@oracle.com, oleg@redhat.com Subject: Re: [RFC][PATCH 4/7] perf: Fix scaling vs enable_on_exec Message-ID: <20160224115351.GS6375@twins.programming.kicks-ass.net> References: <20160219143743.692339502@infradead.org> <20160219144132.043429490@infradead.org> <20160223152729.GT6357@twins.programming.kicks-ass.net> <20160223154849.GC9102@krava.redhat.com> <20160223163550.GC11724@dhcppc3.redhat.com> <20160223174741.GL6356@twins.programming.kicks-ass.net> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20160223174741.GL6356@twins.programming.kicks-ass.net> User-Agent: Mutt/1.5.21 (2012-12-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Tue, Feb 23, 2016 at 06:47:41PM +0100, Peter Zijlstra wrote: > On Tue, Feb 23, 2016 at 10:05:50PM +0530, Pratyush Anand wrote: > > Its better with this patch, still count is 1 more in case of higher probe hits ( > > like 65535 times). > > Ah, ok, I'll go try again. OK, so the below seems to cure this for me, but now I'm hurting my head to make the same true for perf_install_in_context(), because 'tricky' :/ (the below is a fold of 2 patches, I'll send out a new series once I get my head around the install_in_context muck :/) --- kernel/events/core.c | 84 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 31 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 94c47e3f9a0a..8326a7f5729c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -314,7 +314,8 @@ static void event_function_call(struct perf_event *event, event_f func, void *da enum event_type_t { EVENT_FLEXIBLE = 0x1, EVENT_PINNED = 0x2, - EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, + EVENT_TIME = 0x4, + EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED | EVENT_TIME, }; /* @@ -1288,16 +1289,18 @@ static u64 perf_event_time(struct perf_event *event) /* * Update the total_time_enabled and total_time_running fields for a event. - * The caller of this function needs to hold the ctx->lock. */ static void update_event_times(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; u64 run_end; + lockdep_assert_held(&ctx->lock); + if (event->state < PERF_EVENT_STATE_INACTIVE || event->group_leader->state < PERF_EVENT_STATE_INACTIVE) return; + /* * in cgroup mode, time_enabled represents * the time the event was enabled AND active @@ -2063,14 +2066,27 @@ static void add_event_to_ctx(struct perf_event *event, event->tstamp_stopped = tstamp; } -static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx); +static void ctx_sched_out(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx, + enum event_type_t event_type); static void ctx_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, enum event_type_t event_type, struct task_struct *task); +static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ + if (!cpuctx->task_ctx) + return; + + if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) + return; + + ctx_sched_out(ctx, cpuctx, EVENT_ALL); +} + static void perf_event_sched_in(struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, struct task_struct *task) @@ -2219,17 +2235,17 @@ static void __perf_event_enable(struct perf_event *event, event->state <= PERF_EVENT_STATE_ERROR) return; - update_context_time(ctx); + ctx_sched_out(ctx, cpuctx, EVENT_TIME); + __perf_event_mark_enabled(event); if (!ctx->is_active) return; if (!event_filter_match(event)) { - if (is_cgroup_event(event)) { - perf_cgroup_set_timestamp(current, ctx); // XXX ? + if (is_cgroup_event(event)) perf_cgroup_defer_enabled(event); - } + ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); return; } @@ -2237,8 +2253,10 @@ static void __perf_event_enable(struct perf_event *event, * If the event is in a group and isn't the group leader, * then don't put it on unless the group is on. */ - if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) + if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) { + ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); return; + } task_ctx = cpuctx->task_ctx; if (ctx->task) @@ -2344,24 +2362,33 @@ static void ctx_sched_out(struct perf_event_context *ctx, } ctx->is_active &= ~event_type; + if (!(ctx->is_active & (EVENT_FLEXIBLE | EVENT_PINNED))) + ctx->is_active = 0; + if (ctx->task) { WARN_ON_ONCE(cpuctx->task_ctx != ctx); if (!ctx->is_active) cpuctx->task_ctx = NULL; } - update_context_time(ctx); - update_cgrp_time_from_cpuctx(cpuctx); + is_active ^= ctx->is_active; /* changed bits */ + + if (is_active & EVENT_TIME) { + /* update (and stop) ctx time */ + update_context_time(ctx); + update_cgrp_time_from_cpuctx(cpuctx); + } + if (!ctx->nr_active) return; perf_pmu_disable(ctx->pmu); - if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) { + if (is_active & EVENT_PINNED) { list_for_each_entry(event, &ctx->pinned_groups, group_entry) group_sched_out(event, cpuctx, ctx); } - if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) { + if (is_active & EVENT_FLEXIBLE) { list_for_each_entry(event, &ctx->flexible_groups, group_entry) group_sched_out(event, cpuctx, ctx); } @@ -2641,18 +2668,6 @@ void __perf_event_task_sched_out(struct task_struct *task, perf_cgroup_sched_out(task, next); } -static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx) -{ - if (!cpuctx->task_ctx) - return; - - if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) - return; - - ctx_sched_out(ctx, cpuctx, EVENT_ALL); -} - /* * Called with IRQs disabled */ @@ -2735,7 +2750,7 @@ ctx_sched_in(struct perf_event_context *ctx, if (likely(!ctx->nr_events)) return; - ctx->is_active |= event_type; + ctx->is_active |= (event_type | EVENT_TIME); if (ctx->task) { if (!is_active) cpuctx->task_ctx = ctx; @@ -2743,18 +2758,24 @@ ctx_sched_in(struct perf_event_context *ctx, WARN_ON_ONCE(cpuctx->task_ctx != ctx); } - now = perf_clock(); - ctx->timestamp = now; - perf_cgroup_set_timestamp(task, ctx); + is_active ^= ctx->is_active; /* changed bits */ + + if (is_active & EVENT_TIME) { + /* start ctx time */ + now = perf_clock(); + ctx->timestamp = now; + perf_cgroup_set_timestamp(task, ctx); + } + /* * First go through the list and put on any pinned groups * in order to give them the best chance of going on. */ - if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) + if (is_active & EVENT_PINNED) ctx_pinned_sched_in(ctx, cpuctx); /* Then walk through the lower prio flexible groups */ - if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) + if (is_active & EVENT_FLEXIBLE) ctx_flexible_sched_in(ctx, cpuctx); } @@ -3120,6 +3141,7 @@ static void perf_event_enable_on_exec(int ctxn) cpuctx = __get_cpu_context(ctx); perf_ctx_lock(cpuctx, ctx); + ctx_sched_out(ctx, cpuctx, EVENT_TIME); list_for_each_entry(event, &ctx->event_list, event_entry) enabled |= event_enable_on_exec(event, ctx);