Re: [RFC 2/2] perf/core: Invoke pmu::sched_task callback for per-cpu events

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: "Liang, Kan" <kan.liang@linux.intel.com>
To: Namhyung Kim <namhyung@kernel.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Ingo Molnar <mingo@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>,
	Jiri Olsa <jolsa@redhat.com>, LKML <linux-kernel@vger.kernel.org>,
	Stephane Eranian <eranian@google.com>,
	Andi Kleen <ak@linux.intel.com>, Ian Rogers <irogers@google.com>,
	Gabriel Marin <gmx@google.com>
Subject: Re: [RFC 2/2] perf/core: Invoke pmu::sched_task callback for per-cpu events
Date: Thu, 5 Nov 2020 09:48:06 -0500	[thread overview]
Message-ID: <03ba0d07-45bb-0849-1843-915b6873478f@linux.intel.com> (raw)
In-Reply-To: <20201102145221.309001-3-namhyung@kernel.org>



On 11/2/2020 9:52 AM, Namhyung Kim wrote:
> The commit 44fae179ce73 ("perf/core: Pull pmu::sched_task() into
> perf_event_context_sched_out()") moved the pmu::sched_task callback to
> be called for task event context.  But it missed to call it for
> per-cpu events to flush PMU internal buffers (i.e. for PEBS, ...).
> 
> This commit basically reverts the commit but keeps the optimization
> for task events and only add missing calls for cpu events.
> 
> Fixes: 44fae179ce73 ("perf/core: Pull pmu::sched_task() into perf_event_context_sched_out()") > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>   include/linux/perf_event.h |  1 +
>   kernel/events/core.c       | 38 ++++++++++++++++++++++++++++++++++++--
>   2 files changed, 37 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 0defb526cd0c..abb70a557cb5 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -872,6 +872,7 @@ struct perf_cpu_context {
>   	struct list_head		cgrp_cpuctx_entry;
>   #endif
>   
> +	struct list_head		sched_cb_entry;
>   	int				sched_cb_usage;
>   
>   	int				online;
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index aaa0155c4142..c04d9a913537 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -383,6 +383,7 @@ static DEFINE_MUTEX(perf_sched_mutex);
>   static atomic_t perf_sched_count;
>   
>   static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
> +static DEFINE_PER_CPU(int, perf_sched_cb_usages);
>   static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
>   
>   static atomic_t nr_mmap_events __read_mostly;
> @@ -3480,11 +3481,16 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
>   	}
>   }
>   
> +static DEFINE_PER_CPU(struct list_head, sched_cb_list);
> +
>   void perf_sched_cb_dec(struct pmu *pmu)
>   {
>   	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
>   
> -	--cpuctx->sched_cb_usage;
> +	this_cpu_dec(perf_sched_cb_usages);
> +
> +	if (!--cpuctx->sched_cb_usage)
> +		list_del(&cpuctx->sched_cb_entry);
>   }
>   
>   
> @@ -3492,7 +3498,10 @@ void perf_sched_cb_inc(struct pmu *pmu)
>   {
>   	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
>   
> -	cpuctx->sched_cb_usage++;
> +	if (!cpuctx->sched_cb_usage++)
> +		list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
> +
> +	this_cpu_inc(perf_sched_cb_usages);
>   }
>   
>   /*
> @@ -3521,6 +3530,24 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in
>   	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
>   }
>   
> +static void perf_pmu_sched_task(struct task_struct *prev,
> +				struct task_struct *next,
> +				bool sched_in)
> +{
> +	struct perf_cpu_context *cpuctx;
> +
> +	if (prev == next)
> +		return;
> +
> +	list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
> +		/* will be handled in perf_event_context_sched_in/out */
> +		if (cpuctx->task_ctx)
> +			continue;
> +
> +		__perf_pmu_sched_task(cpuctx, sched_in);
> +	}
> +}
> +
>   static void perf_event_switch(struct task_struct *task,
>   			      struct task_struct *next_prev, bool sched_in);
>   
> @@ -3543,6 +3570,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
>   {
>   	int ctxn;
>   
> +	if (__this_cpu_read(perf_sched_cb_usages))
> +		perf_pmu_sched_task(task, next, false);
> +
>   	if (atomic_read(&nr_switch_events))
>   		perf_event_switch(task, next, false);
>   
> @@ -3850,6 +3880,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
>   
>   	if (atomic_read(&nr_switch_events))
>   		perf_event_switch(task, prev, true);
> +
> +	if (__this_cpu_read(perf_sched_cb_usages))
> +		perf_pmu_sched_task(prev, task, true);
>   }

It looks like the ("perf/core: Pull pmu::sched_task() into 
perf_event_context_sched_in()") is also reverted in the patch.

>   
>   static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
> @@ -12999,6 +13032,7 @@ static void __init perf_event_init_all_cpus(void)
>   #ifdef CONFIG_CGROUP_PERF
>   		INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
>   #endif
> +		INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
>   	}
>   }
>   
> 

Can we only update the perf_sched_cb_usages and sched_cb_list for 
per-cpu event as below patch (not tested)?

If user only uses the per-process event, we don't need to go through the 
list.


diff --git a/arch/powerpc/perf/core-book3s.c 
b/arch/powerpc/perf/core-book3s.c
index 6586f7e71cfb..63c9b87cab5e 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -380,7 +380,7 @@ static void power_pmu_bhrb_enable(struct perf_event 
*event)
  		cpuhw->bhrb_context = event->ctx;
  	}
  	cpuhw->bhrb_users++;
-	perf_sched_cb_inc(event->ctx->pmu);
+	perf_sched_cb_inc(event->ctx->pmu, !(event->attach_state & 
PERF_ATTACH_TASK));
  }

  static void power_pmu_bhrb_disable(struct perf_event *event)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 444e5f061d04..a34b90c7fa6d 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1022,9 +1022,9 @@ pebs_update_state(bool needed_cb, struct 
cpu_hw_events *cpuc,

  	if (needed_cb != pebs_needs_sched_cb(cpuc)) {
  		if (!needed_cb)
-			perf_sched_cb_inc(pmu);
+			perf_sched_cb_inc(pmu, !(event->attach_state & PERF_ATTACH_TASK));
  		else
-			perf_sched_cb_dec(pmu);
+			perf_sched_cb_dec(pmu, !(event->attach_state & PERF_ATTACH_TASK));

  		update = true;
  	}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8961653c5dd2..8d4d02cde3d4 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -693,7 +693,7 @@ void intel_pmu_lbr_add(struct perf_event *event)
  	 */
  	if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
  		cpuc->lbr_pebs_users++;
-	perf_sched_cb_inc(event->ctx->pmu);
+	perf_sched_cb_inc(event->ctx->pmu, !(event->attach_state & 
PERF_ATTACH_TASK));
  	if (!cpuc->lbr_users++ && !event->total_time_running)
  		intel_pmu_lbr_reset();

@@ -740,7 +740,7 @@ void intel_pmu_lbr_del(struct perf_event *event)
  	cpuc->lbr_users--;
  	WARN_ON_ONCE(cpuc->lbr_users < 0);
  	WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
-	perf_sched_cb_dec(event->ctx->pmu);
+	perf_sched_cb_dec(event->ctx->pmu, !(event->attach_state & 
PERF_ATTACH_TASK));
  }

  static inline bool vlbr_exclude_host(void)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index abb70a557cb5..5a02239ca8fd 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -966,8 +966,8 @@ extern const struct perf_event_attr 
*perf_event_attrs(struct perf_event *event);
  extern void perf_event_print_debug(void);
  extern void perf_pmu_disable(struct pmu *pmu);
  extern void perf_pmu_enable(struct pmu *pmu);
-extern void perf_sched_cb_dec(struct pmu *pmu);
-extern void perf_sched_cb_inc(struct pmu *pmu);
+extern void perf_sched_cb_dec(struct pmu *pmu, bool systemwide);
+extern void perf_sched_cb_inc(struct pmu *pmu, bool systemwide);
  extern int perf_event_task_disable(void);
  extern int perf_event_task_enable(void);

diff --git a/kernel/events/core.c b/kernel/events/core.c
index e6b98507734a..2d7c07af02f8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3484,25 +3484,29 @@ static void perf_event_context_sched_out(struct 
task_struct *task, int ctxn,

  static DEFINE_PER_CPU(struct list_head, sched_cb_list);

-void perf_sched_cb_dec(struct pmu *pmu)
+void perf_sched_cb_dec(struct pmu *pmu, bool systemwide)
  {
  	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);

-	this_cpu_dec(perf_sched_cb_usages);
+	--cpuctx->sched_cb_usage;

-	if (!--cpuctx->sched_cb_usage)
+	if (systemwide) {
+		this_cpu_dec(perf_sched_cb_usages);
  		list_del(&cpuctx->sched_cb_entry);
+	}
  }


-void perf_sched_cb_inc(struct pmu *pmu)
+void perf_sched_cb_inc(struct pmu *pmu, bool systemwide)
  {
  	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);

-	if (!cpuctx->sched_cb_usage++)
-		list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
+	cpuctx->sched_cb_usage++;

-	this_cpu_inc(perf_sched_cb_usages);
+	if (systemwide) {
+		this_cpu_inc(perf_sched_cb_usages);
+		list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
+	}
  }

  /*

Thanks,
Kan

next prev parent reply	other threads:[~2020-11-05 14:48 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-02 14:52 [RFC 0/2] perf/core: Invoke pmu::sched_task callback for cpu events Namhyung Kim
2020-11-02 14:52 ` [RFC 1/2] perf/core: Enable sched_task callbacks if PMU has it Namhyung Kim
2020-11-05 14:47   ` Liang, Kan
2020-11-05 15:45     ` Namhyung Kim
2020-11-05 19:01       ` Liang, Kan
2020-11-06  0:53         ` Namhyung Kim
2020-11-06 16:11           ` Liang, Kan
2020-11-02 14:52 ` [RFC 2/2] perf/core: Invoke pmu::sched_task callback for per-cpu events Namhyung Kim
2020-11-05 14:48   ` Liang, Kan [this message]
2020-11-05 15:54     ` Namhyung Kim
2020-11-05 19:39       ` Liang, Kan
2020-11-05 21:15         ` Stephane Eranian
2020-11-05 23:12           ` Liang, Kan
2020-11-05  8:29 ` [RFC 0/2] perf/core: Invoke pmu::sched_task callback for cpu events Stephane Eranian

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:6586f7e71cf dfblob:63c9b87cab5 dfblob:444e5f061d0
dfblob:a34b90c7fa6 dfblob:8961653c5dd dfblob:8d4d02cde3d
dfblob:abb70a557cb dfblob:5a02239ca8f dfblob:e6b98507734
dfblob:2d7c07af02f )
 OR (
bs:"Re: [RFC 2/2] perf/core: Invoke pmu::sched_task callback for per-cpu events" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=03ba0d07-45bb-0849-1843-915b6873478f@linux.intel.com \
    --to=kan.liang@linux.intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=eranian@google.com \
    --cc=gmx@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox