[PATCH 09/20] perf/x86/intel: Factor out common functions to process PEBS groups

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Dapeng Mi <dapeng1.mi@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Kan Liang <kan.liang@linux.intel.com>,
	Andi Kleen <ak@linux.intel.com>,
	Eranian Stephane <eranian@google.com>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Dapeng Mi <dapeng1.mi@intel.com>,
	Dapeng Mi <dapeng1.mi@linux.intel.com>
Subject: [PATCH 09/20] perf/x86/intel: Factor out common functions to process PEBS groups
Date: Thu, 23 Jan 2025 14:07:10 +0000	[thread overview]
Message-ID: <20250123140721.2496639-10-dapeng1.mi@linux.intel.com> (raw)
In-Reply-To: <20250123140721.2496639-1-dapeng1.mi@linux.intel.com>

Adaptive PEBS and arch-PEBS share lots of same code to process these
PEBS groups, like basic, GPR and meminfo groups. Extract these shared
code to common functions to avoid duplicated code.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 arch/x86/events/intel/ds.c | 239 ++++++++++++++++++-------------------
 1 file changed, 119 insertions(+), 120 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 680637d63679..dce2b6ee8bd1 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2061,6 +2061,91 @@ static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
 
 #define PEBS_LATENCY_MASK			0xffff
 
+static inline void __setup_perf_sample_data(struct perf_event *event,
+					    struct pt_regs *iregs,
+					    struct perf_sample_data *data)
+{
+	perf_sample_data_init(data, 0, event->hw.last_period);
+	data->period = event->hw.last_period;
+
+	/*
+	 * We must however always use iregs for the unwinder to stay sane; the
+	 * record BP,SP,IP can point into thin air when the record is from a
+	 * previous PMI context or an (I)RET happened between the record and
+	 * PMI.
+	 */
+	perf_sample_save_callchain(data, event, iregs);
+}
+
+static inline void __setup_pebs_basic_group(struct perf_event *event,
+					    struct pt_regs *regs,
+					    struct perf_sample_data *data,
+					    u64 sample_type, u64 ip,
+					    u64 tsc, u16 retire)
+{
+	/* The ip in basic is EventingIP */
+	set_linear_ip(regs, ip);
+	regs->flags = PERF_EFLAGS_EXACT;
+	setup_pebs_time(event, data, tsc);
+
+	if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
+		data->weight.var3_w = retire;
+}
+
+static inline void __setup_pebs_gpr_group(struct perf_event *event,
+					  struct pt_regs *regs,
+					  struct pebs_gprs *gprs,
+					  u64 sample_type)
+{
+	if (event->attr.precise_ip < 2) {
+		set_linear_ip(regs, gprs->ip);
+		regs->flags &= ~PERF_EFLAGS_EXACT;
+	}
+
+	if (sample_type & PERF_SAMPLE_REGS_INTR)
+		adaptive_pebs_save_regs(regs, gprs);
+}
+
+static inline void __setup_pebs_meminfo_group(struct perf_event *event,
+					      struct perf_sample_data *data,
+					      u64 sample_type, u64 latency,
+					      u16 instr_latency, u64 address,
+					      u64 aux, u64 tsx_tuning, u64 ax)
+{
+	if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
+		u64 tsx_latency = intel_get_tsx_weight(tsx_tuning);
+
+		data->weight.var2_w = instr_latency;
+
+		/*
+		 * Although meminfo::latency is defined as a u64,
+		 * only the lower 32 bits include the valid data
+		 * in practice on Ice Lake and earlier platforms.
+		 */
+		if (sample_type & PERF_SAMPLE_WEIGHT)
+			data->weight.full = latency ?: tsx_latency;
+		else
+			data->weight.var1_dw = (u32)latency ?: tsx_latency;
+
+		data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+	}
+
+	if (sample_type & PERF_SAMPLE_DATA_SRC) {
+		data->data_src.val = get_data_src(event, aux);
+		data->sample_flags |= PERF_SAMPLE_DATA_SRC;
+	}
+
+	if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
+		data->addr = address;
+		data->sample_flags |= PERF_SAMPLE_ADDR;
+	}
+
+	if (sample_type & PERF_SAMPLE_TRANSACTION) {
+		data->txn = intel_get_tsx_transaction(tsx_tuning, ax);
+		data->sample_flags |= PERF_SAMPLE_TRANSACTION;
+	}
+}
+
 /*
  * With adaptive PEBS the layout depends on what fields are configured.
  */
@@ -2070,12 +2155,14 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 					    struct pt_regs *regs)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	u64 sample_type = event->attr.sample_type;
 	struct pebs_basic *basic = __pebs;
 	void *next_record = basic + 1;
-	u64 sample_type, format_group;
 	struct pebs_meminfo *meminfo = NULL;
 	struct pebs_gprs *gprs = NULL;
 	struct x86_perf_regs *perf_regs;
+	u64 format_group;
+	u16 retire;
 
 	if (basic == NULL)
 		return;
@@ -2083,32 +2170,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 	perf_regs = container_of(regs, struct x86_perf_regs, regs);
 	perf_regs->xmm_regs = NULL;
 
-	sample_type = event->attr.sample_type;
 	format_group = basic->format_group;
-	perf_sample_data_init(data, 0, event->hw.last_period);
-	data->period = event->hw.last_period;
 
-	setup_pebs_time(event, data, basic->tsc);
-
-	/*
-	 * We must however always use iregs for the unwinder to stay sane; the
-	 * record BP,SP,IP can point into thin air when the record is from a
-	 * previous PMI context or an (I)RET happened between the record and
-	 * PMI.
-	 */
-	perf_sample_save_callchain(data, event, iregs);
+	__setup_perf_sample_data(event, iregs, data);
 
 	*regs = *iregs;
-	/* The ip in basic is EventingIP */
-	set_linear_ip(regs, basic->ip);
-	regs->flags = PERF_EFLAGS_EXACT;
 
-	if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
-		if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
-			data->weight.var3_w = basic->retire_latency;
-		else
-			data->weight.var3_w = 0;
-	}
+	/* basic group */
+	retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ?
+			basic->retire_latency : 0;
+	__setup_pebs_basic_group(event, regs, data, sample_type,
+				 basic->ip, basic->tsc, retire);
 
 	/*
 	 * The record for MEMINFO is in front of GP
@@ -2124,54 +2196,20 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 		gprs = next_record;
 		next_record = gprs + 1;
 
-		if (event->attr.precise_ip < 2) {
-			set_linear_ip(regs, gprs->ip);
-			regs->flags &= ~PERF_EFLAGS_EXACT;
-		}
-
-		if (sample_type & PERF_SAMPLE_REGS_INTR)
-			adaptive_pebs_save_regs(regs, gprs);
+		__setup_pebs_gpr_group(event, regs, gprs, sample_type);
 	}
 
 	if (format_group & PEBS_DATACFG_MEMINFO) {
-		if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
-			u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
-					meminfo->cache_latency : meminfo->mem_latency;
-
-			if (x86_pmu.flags & PMU_FL_INSTR_LATENCY)
-				data->weight.var2_w = meminfo->instr_latency;
-
-			/*
-			 * Although meminfo::latency is defined as a u64,
-			 * only the lower 32 bits include the valid data
-			 * in practice on Ice Lake and earlier platforms.
-			 */
-			if (sample_type & PERF_SAMPLE_WEIGHT) {
-				data->weight.full = latency ?:
-					intel_get_tsx_weight(meminfo->tsx_tuning);
-			} else {
-				data->weight.var1_dw = (u32)latency ?:
-					intel_get_tsx_weight(meminfo->tsx_tuning);
-			}
-
-			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
-		}
-
-		if (sample_type & PERF_SAMPLE_DATA_SRC) {
-			data->data_src.val = get_data_src(event, meminfo->aux);
-			data->sample_flags |= PERF_SAMPLE_DATA_SRC;
-		}
+		u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
+				meminfo->cache_latency : meminfo->mem_latency;
+		u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
+				meminfo->instr_latency : 0;
+		u64 ax = gprs ? gprs->ax : 0;
 
-		if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
-			data->addr = meminfo->address;
-			data->sample_flags |= PERF_SAMPLE_ADDR;
-		}
-
-		if (sample_type & PERF_SAMPLE_TRANSACTION) {
-			data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
-							  gprs ? gprs->ax : 0);
-			data->sample_flags |= PERF_SAMPLE_TRANSACTION;
-		}
+		__setup_pebs_meminfo_group(event, data, sample_type, latency,
+					   instr_latency, meminfo->address,
+					   meminfo->aux, meminfo->tsx_tuning,
+					   ax);
 	}
 
 	if (format_group & PEBS_DATACFG_XMMS) {
@@ -2234,13 +2272,13 @@ static void setup_arch_pebs_sample_data(struct perf_event *event,
 					struct pt_regs *regs)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	u64 sample_type = event->attr.sample_type;
 	struct arch_pebs_header *header = NULL;
 	struct arch_pebs_aux *meminfo = NULL;
 	struct arch_pebs_gprs *gprs = NULL;
 	struct x86_perf_regs *perf_regs;
 	void *next_record;
 	void *at = __pebs;
-	u64 sample_type;
 
 	if (at == NULL)
 		return;
@@ -2248,18 +2286,7 @@ static void setup_arch_pebs_sample_data(struct perf_event *event,
 	perf_regs = container_of(regs, struct x86_perf_regs, regs);
 	perf_regs->xmm_regs = NULL;
 
-	sample_type = event->attr.sample_type;
-	perf_sample_data_init(data, 0, event->hw.last_period);
-	data->period = event->hw.last_period;
-
-	/*
-	 * We must however always use iregs for the unwinder to stay sane; the
-	 * record BP,SP,IP can point into thin air when the record is from a
-	 * previous PMI context or an (I)RET happened between the record and
-	 * PMI.
-	 */
-	if (sample_type & PERF_SAMPLE_CALLCHAIN)
-		perf_sample_save_callchain(data, event, iregs);
+	__setup_perf_sample_data(event, iregs, data);
 
 	*regs = *iregs;
 
@@ -2268,16 +2295,14 @@ static void setup_arch_pebs_sample_data(struct perf_event *event,
 	next_record = at + sizeof(struct arch_pebs_header);
 	if (header->basic) {
 		struct arch_pebs_basic *basic = next_record;
+		u16 retire = 0;
 
-		/* The ip in basic is EventingIP */
-		set_linear_ip(regs, basic->ip);
-		regs->flags = PERF_EFLAGS_EXACT;
-		setup_pebs_time(event, data, basic->tsc);
+		next_record = basic + 1;
 
 		if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
-			data->weight.var3_w = basic->valid ? basic->retire : 0;
-
-		next_record = basic + 1;
+			retire = basic->valid ? basic->retire : 0;
+		__setup_pebs_basic_group(event, regs, data, sample_type,
+				 basic->ip, basic->tsc, retire);
 	}
 
 	/*
@@ -2294,44 +2319,18 @@ static void setup_arch_pebs_sample_data(struct perf_event *event,
 		gprs = next_record;
 		next_record = gprs + 1;
 
-		if (event->attr.precise_ip < 2) {
-			set_linear_ip(regs, gprs->ip);
-			regs->flags &= ~PERF_EFLAGS_EXACT;
-		}
-
-		if (sample_type & PERF_SAMPLE_REGS_INTR)
-			adaptive_pebs_save_regs(regs, (struct pebs_gprs *)gprs);
+		__setup_pebs_gpr_group(event, regs, (struct pebs_gprs *)gprs,
+				       sample_type);
 	}
 
 	if (header->aux) {
-		if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
-			u16 latency = meminfo->cache_latency;
-			u64 tsx_latency = intel_get_tsx_weight(meminfo->tsx_tuning);
+		u64 ax = gprs ? gprs->ax : 0;
 
-			data->weight.var2_w = meminfo->instr_latency;
-
-			if (sample_type & PERF_SAMPLE_WEIGHT)
-				data->weight.full = latency ?: tsx_latency;
-			else
-				data->weight.var1_dw = latency ?: (u32)tsx_latency;
-			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
-		}
-
-		if (sample_type & PERF_SAMPLE_DATA_SRC) {
-			data->data_src.val = get_data_src(event, meminfo->aux);
-			data->sample_flags |= PERF_SAMPLE_DATA_SRC;
-		}
-
-		if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
-			data->addr = meminfo->address;
-			data->sample_flags |= PERF_SAMPLE_ADDR;
-		}
-
-		if (sample_type & PERF_SAMPLE_TRANSACTION) {
-			data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
-							  gprs ? gprs->ax : 0);
-			data->sample_flags |= PERF_SAMPLE_TRANSACTION;
-		}
+		__setup_pebs_meminfo_group(event, data, sample_type,
+					   meminfo->cache_latency,
+					   meminfo->instr_latency,
+					   meminfo->address, meminfo->aux,
+					   meminfo->tsx_tuning, ax);
 	}
 
 	if (header->xmm) {
-- 
2.40.1

next prev parent reply	other threads:[~2025-01-23  6:20 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-23 14:07 [PATCH 00/20] Arch-PEBS and PMU supports for Clearwater Forest Dapeng Mi
2025-01-23 14:07 ` [PATCH 01/20] perf/x86/intel: Add PMU support " Dapeng Mi
2025-01-27 16:26   ` Peter Zijlstra
2025-02-06  1:31     ` Mi, Dapeng
2025-02-06  7:53       ` Peter Zijlstra
2025-02-06  9:35         ` Mi, Dapeng
2025-02-06  9:39           ` Peter Zijlstra
2025-01-23 14:07 ` [PATCH 02/20] perf/x86/intel: Fix ARCH_PERFMON_NUM_COUNTER_LEAF Dapeng Mi
2025-01-27 16:29   ` Peter Zijlstra
2025-01-27 16:43     ` Liang, Kan
2025-01-27 21:29       ` Peter Zijlstra
2025-01-28  0:28         ` Liang, Kan
2025-01-23 14:07 ` [PATCH 03/20] perf/x86/intel: Parse CPUID archPerfmonExt leaves for non-hybrid CPUs Dapeng Mi
2025-01-23 18:58   ` Andi Kleen
2025-01-27 15:19     ` Liang, Kan
2025-01-27 16:44       ` Peter Zijlstra
2025-02-06  2:09         ` Mi, Dapeng
2025-01-23 14:07 ` [PATCH 04/20] perf/x86/intel: Decouple BTS initialization from PEBS initialization Dapeng Mi
2025-01-23 14:07 ` [PATCH 05/20] perf/x86/intel: Rename x86_pmu.pebs to x86_pmu.ds_pebs Dapeng Mi
2025-01-23 14:07 ` [PATCH 06/20] perf/x86/intel: Initialize architectural PEBS Dapeng Mi
2025-01-28 11:22   ` Peter Zijlstra
2025-02-06  2:25     ` Mi, Dapeng
2025-01-23 14:07 ` [PATCH 07/20] perf/x86/intel/ds: Factor out common PEBS processing code to functions Dapeng Mi
2025-01-23 14:07 ` [PATCH 08/20] perf/x86/intel: Process arch-PEBS records or record fragments Dapeng Mi
2025-01-23 14:07 ` Dapeng Mi [this message]
2025-01-23 14:07 ` [PATCH 10/20] perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR Dapeng Mi
2025-01-23 14:07 ` [PATCH 11/20] perf/x86/intel: Setup PEBS constraints base on counter & pdist map Dapeng Mi
2025-01-27 16:07   ` Liang, Kan
2025-02-06  2:47     ` Mi, Dapeng
2025-02-06 15:01       ` Liang, Kan
2025-02-07  1:27         ` Mi, Dapeng
2025-01-23 14:07 ` [PATCH 12/20] perf/x86/intel: Setup PEBS data configuration and enable legacy groups Dapeng Mi
2025-01-23 14:07 ` [PATCH 13/20] perf/x86/intel: Add SSP register support for arch-PEBS Dapeng Mi
2025-01-24  5:16   ` Andi Kleen
2025-01-27 15:38     ` Liang, Kan
2025-01-23 14:07 ` [PATCH 14/20] perf/x86/intel: Add counter group " Dapeng Mi
2025-01-23 14:07 ` [PATCH 15/20] perf/core: Support to capture higher width vector registers Dapeng Mi
2025-01-23 14:07 ` [PATCH 16/20] perf/x86/intel: Support arch-PEBS vector registers group capturing Dapeng Mi
2025-01-23 14:07 ` [PATCH 17/20] perf tools: Support to show SSP register Dapeng Mi
2025-01-23 16:15   ` Ian Rogers
2025-02-06  2:57     ` Mi, Dapeng
2025-01-23 14:07 ` [PATCH 18/20] perf tools: Support to capture more vector registers (common part) Dapeng Mi
2025-01-23 16:42   ` Ian Rogers
2025-01-27 15:50     ` Liang, Kan
2025-02-06  3:12       ` Mi, Dapeng
2025-01-23 14:07 ` [PATCH 19/20] perf tools: Support to capture more vector registers (x86/Intel part) Dapeng Mi
2025-01-23 14:07 ` [PATCH 20/20] perf tools/tests: Add vector registers PEBS sampling test Dapeng Mi

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:680637d6367 dfblob:dce2b6ee8bd )
 OR (
bs:"[PATCH 09/20] perf/x86/intel: Factor out common functions to process PEBS groups" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250123140721.2496639-10-dapeng1.mi@linux.intel.com \
    --to=dapeng1.mi@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=dapeng1.mi@intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox