All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dapeng Mi <dapeng1.mi@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Kan Liang <kan.liang@linux.intel.com>,
	Andi Kleen <ak@linux.intel.com>,
	Eranian Stephane <eranian@google.com>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Dapeng Mi <dapeng1.mi@intel.com>,
	Dapeng Mi <dapeng1.mi@linux.intel.com>
Subject: [Patch v3 09/22] perf/x86/intel/ds: Factor out PEBS group processing code to functions
Date: Tue, 15 Apr 2025 11:44:15 +0000	[thread overview]
Message-ID: <20250415114428.341182-10-dapeng1.mi@linux.intel.com> (raw)
In-Reply-To: <20250415114428.341182-1-dapeng1.mi@linux.intel.com>

Adaptive PEBS and arch-PEBS share lots of same code to process these
PEBS groups, like basic, GPR and meminfo groups. Extract these shared
code to generic functions to avoid duplicated code.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 arch/x86/events/intel/ds.c | 172 ++++++++++++++++++++++---------------
 1 file changed, 105 insertions(+), 67 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 22831ef003d0..6c872bf2e916 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2073,6 +2073,91 @@ static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
 
 #define PEBS_LATENCY_MASK			0xffff
 
+static inline void __setup_perf_sample_data(struct perf_event *event,
+					    struct pt_regs *iregs,
+					    struct perf_sample_data *data)
+{
+	perf_sample_data_init(data, 0, event->hw.last_period);
+	data->period = event->hw.last_period;
+
+	/*
+	 * We must however always use iregs for the unwinder to stay sane; the
+	 * record BP,SP,IP can point into thin air when the record is from a
+	 * previous PMI context or an (I)RET happened between the record and
+	 * PMI.
+	 */
+	perf_sample_save_callchain(data, event, iregs);
+}
+
+static inline void __setup_pebs_basic_group(struct perf_event *event,
+					    struct pt_regs *regs,
+					    struct perf_sample_data *data,
+					    u64 sample_type, u64 ip,
+					    u64 tsc, u16 retire)
+{
+	/* The ip in basic is EventingIP */
+	set_linear_ip(regs, ip);
+	regs->flags = PERF_EFLAGS_EXACT;
+	setup_pebs_time(event, data, tsc);
+
+	if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
+		data->weight.var3_w = retire;
+}
+
+static inline void __setup_pebs_gpr_group(struct perf_event *event,
+					  struct pt_regs *regs,
+					  struct pebs_gprs *gprs,
+					  u64 sample_type)
+{
+	if (event->attr.precise_ip < 2) {
+		set_linear_ip(regs, gprs->ip);
+		regs->flags &= ~PERF_EFLAGS_EXACT;
+	}
+
+	if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
+		adaptive_pebs_save_regs(regs, gprs);
+}
+
+static inline void __setup_pebs_meminfo_group(struct perf_event *event,
+					      struct perf_sample_data *data,
+					      u64 sample_type, u64 latency,
+					      u16 instr_latency, u64 address,
+					      u64 aux, u64 tsx_tuning, u64 ax)
+{
+	if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
+		u64 tsx_latency = intel_get_tsx_weight(tsx_tuning);
+
+		data->weight.var2_w = instr_latency;
+
+		/*
+		 * Although meminfo::latency is defined as a u64,
+		 * only the lower 32 bits include the valid data
+		 * in practice on Ice Lake and earlier platforms.
+		 */
+		if (sample_type & PERF_SAMPLE_WEIGHT)
+			data->weight.full = latency ?: tsx_latency;
+		else
+			data->weight.var1_dw = (u32)latency ?: tsx_latency;
+
+		data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+	}
+
+	if (sample_type & PERF_SAMPLE_DATA_SRC) {
+		data->data_src.val = get_data_src(event, aux);
+		data->sample_flags |= PERF_SAMPLE_DATA_SRC;
+	}
+
+	if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
+		data->addr = address;
+		data->sample_flags |= PERF_SAMPLE_ADDR;
+	}
+
+	if (sample_type & PERF_SAMPLE_TRANSACTION) {
+		data->txn = intel_get_tsx_transaction(tsx_tuning, ax);
+		data->sample_flags |= PERF_SAMPLE_TRANSACTION;
+	}
+}
+
 /*
  * With adaptive PEBS the layout depends on what fields are configured.
  */
@@ -2082,12 +2167,14 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 					    struct pt_regs *regs)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	u64 sample_type = event->attr.sample_type;
 	struct pebs_basic *basic = __pebs;
 	void *next_record = basic + 1;
-	u64 sample_type, format_group;
 	struct pebs_meminfo *meminfo = NULL;
 	struct pebs_gprs *gprs = NULL;
 	struct x86_perf_regs *perf_regs;
+	u64 format_group;
+	u16 retire;
 
 	if (basic == NULL)
 		return;
@@ -2095,32 +2182,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 	perf_regs = container_of(regs, struct x86_perf_regs, regs);
 	perf_regs->xmm_regs = NULL;
 
-	sample_type = event->attr.sample_type;
 	format_group = basic->format_group;
-	perf_sample_data_init(data, 0, event->hw.last_period);
-	data->period = event->hw.last_period;
 
-	setup_pebs_time(event, data, basic->tsc);
-
-	/*
-	 * We must however always use iregs for the unwinder to stay sane; the
-	 * record BP,SP,IP can point into thin air when the record is from a
-	 * previous PMI context or an (I)RET happened between the record and
-	 * PMI.
-	 */
-	perf_sample_save_callchain(data, event, iregs);
+	__setup_perf_sample_data(event, iregs, data);
 
 	*regs = *iregs;
-	/* The ip in basic is EventingIP */
-	set_linear_ip(regs, basic->ip);
-	regs->flags = PERF_EFLAGS_EXACT;
 
-	if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
-		if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
-			data->weight.var3_w = basic->retire_latency;
-		else
-			data->weight.var3_w = 0;
-	}
+	/* basic group */
+	retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ?
+			basic->retire_latency : 0;
+	__setup_pebs_basic_group(event, regs, data, sample_type,
+				 basic->ip, basic->tsc, retire);
 
 	/*
 	 * The record for MEMINFO is in front of GP
@@ -2136,54 +2208,20 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 		gprs = next_record;
 		next_record = gprs + 1;
 
-		if (event->attr.precise_ip < 2) {
-			set_linear_ip(regs, gprs->ip);
-			regs->flags &= ~PERF_EFLAGS_EXACT;
-		}
-
-		if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
-			adaptive_pebs_save_regs(regs, gprs);
+		__setup_pebs_gpr_group(event, regs, gprs, sample_type);
 	}
 
 	if (format_group & PEBS_DATACFG_MEMINFO) {
-		if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
-			u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
-					meminfo->cache_latency : meminfo->mem_latency;
-
-			if (x86_pmu.flags & PMU_FL_INSTR_LATENCY)
-				data->weight.var2_w = meminfo->instr_latency;
-
-			/*
-			 * Although meminfo::latency is defined as a u64,
-			 * only the lower 32 bits include the valid data
-			 * in practice on Ice Lake and earlier platforms.
-			 */
-			if (sample_type & PERF_SAMPLE_WEIGHT) {
-				data->weight.full = latency ?:
-					intel_get_tsx_weight(meminfo->tsx_tuning);
-			} else {
-				data->weight.var1_dw = (u32)latency ?:
-					intel_get_tsx_weight(meminfo->tsx_tuning);
-			}
-
-			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
-		}
-
-		if (sample_type & PERF_SAMPLE_DATA_SRC) {
-			data->data_src.val = get_data_src(event, meminfo->aux);
-			data->sample_flags |= PERF_SAMPLE_DATA_SRC;
-		}
-
-		if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
-			data->addr = meminfo->address;
-			data->sample_flags |= PERF_SAMPLE_ADDR;
-		}
-
-		if (sample_type & PERF_SAMPLE_TRANSACTION) {
-			data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
-							  gprs ? gprs->ax : 0);
-			data->sample_flags |= PERF_SAMPLE_TRANSACTION;
-		}
+		u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
+				meminfo->cache_latency : meminfo->mem_latency;
+		u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
+				meminfo->instr_latency : 0;
+		u64 ax = gprs ? gprs->ax : 0;
+
+		__setup_pebs_meminfo_group(event, data, sample_type, latency,
+					   instr_latency, meminfo->address,
+					   meminfo->aux, meminfo->tsx_tuning,
+					   ax);
 	}
 
 	if (format_group & PEBS_DATACFG_XMMS) {
-- 
2.40.1


  parent reply	other threads:[~2025-04-15  8:23 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-15 11:44 [Patch v3 00/22] Arch-PEBS and PMU supports for Clearwater Forest and Panther Lake Dapeng Mi
2025-04-15 11:44 ` [Patch v3 01/22] perf/x86/intel: Add Panther Lake support Dapeng Mi
2025-04-17 13:01   ` [tip: perf/core] " tip-bot2 for Kan Liang
2025-04-15 11:44 ` [Patch v3 02/22] perf/x86/intel: Add PMU support for Clearwater Forest Dapeng Mi
2025-04-17 13:01   ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 03/22] perf/x86/intel: Parse CPUID archPerfmonExt leaves for non-hybrid CPUs Dapeng Mi
2025-04-17 13:01   ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 04/22] perf/x86/intel: Decouple BTS initialization from PEBS initialization Dapeng Mi
2025-04-17 13:01   ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 05/22] perf/x86/intel: Rename x86_pmu.pebs to x86_pmu.ds_pebs Dapeng Mi
2025-04-17 13:01   ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 06/22] perf/x86/intel: Introduce pairs of PEBS static calls Dapeng Mi
2025-04-17 13:00   ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 07/22] perf/x86/intel: Initialize architectural PEBS Dapeng Mi
2025-04-15 11:44 ` [Patch v3 08/22] perf/x86/intel/ds: Factor out PEBS record processing code to functions Dapeng Mi
2025-04-15 11:44 ` Dapeng Mi [this message]
2025-04-15 11:44 ` [Patch v3 10/22] perf/x86/intel: Process arch-PEBS records or record fragments Dapeng Mi
2025-04-15 13:57   ` Peter Zijlstra
2025-04-15 16:09     ` Liang, Kan
2025-04-15 11:44 ` [Patch v3 11/22] perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR Dapeng Mi
2025-04-15 13:45   ` Peter Zijlstra
2025-04-16  0:59     ` Mi, Dapeng
2025-04-15 13:48   ` Peter Zijlstra
2025-04-16  1:03     ` Mi, Dapeng
2025-04-15 11:44 ` [Patch v3 12/22] perf/x86/intel: Update dyn_constranit base on PEBS event precise level Dapeng Mi
2025-04-15 13:53   ` Peter Zijlstra
2025-04-15 16:31     ` Liang, Kan
2025-04-16  1:46       ` Mi, Dapeng
2025-04-16 13:59         ` Liang, Kan
2025-04-17  1:15           ` Mi, Dapeng
2025-04-16 15:32       ` Peter Zijlstra
2025-04-16 19:45         ` Liang, Kan
2025-04-16 19:56           ` Peter Zijlstra
2025-04-22 22:50             ` Liang, Kan
2025-04-15 11:44 ` [Patch v3 13/22] perf/x86/intel: Setup PEBS data configuration and enable legacy groups Dapeng Mi
2025-04-15 11:44 ` [Patch v3 14/22] perf/x86/intel: Add counter group support for arch-PEBS Dapeng Mi
2025-04-15 11:44 ` [Patch v3 15/22] perf/x86/intel: Support SSP register capturing " Dapeng Mi
2025-04-15 14:07   ` Peter Zijlstra
2025-04-16  5:49     ` Mi, Dapeng
2025-04-15 11:44 ` [Patch v3 16/22] perf/core: Support to capture higher width vector registers Dapeng Mi
2025-04-15 14:36   ` Peter Zijlstra
2025-04-16  6:42     ` Mi, Dapeng
2025-04-16 15:53       ` Peter Zijlstra
2025-04-17  2:00         ` Mi, Dapeng
2025-04-22  3:05         ` Mi, Dapeng
2025-04-15 11:44 ` [Patch v3 17/22] perf/x86/intel: Support arch-PEBS vector registers group capturing Dapeng Mi
2025-04-15 11:44 ` [Patch v3 18/22] perf tools: Support to show SSP register Dapeng Mi
2025-04-15 11:44 ` [Patch v3 19/22] perf tools: Enhance arch__intr/user_reg_mask() helpers Dapeng Mi
2025-04-15 11:44 ` [Patch v3 20/22] perf tools: Enhance sample_regs_user/intr to capture more registers Dapeng Mi
2025-04-15 11:44 ` [Patch v3 21/22] perf tools: Support to capture more vector registers (x86/Intel) Dapeng Mi
2025-04-15 11:44 ` [Patch v3 22/22] perf tools/tests: Add vector registers PEBS sampling test Dapeng Mi
2025-04-15 15:21 ` [Patch v3 00/22] Arch-PEBS and PMU supports for Clearwater Forest and Panther Lake Liang, Kan
2025-04-16  7:42   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250415114428.341182-10-dapeng1.mi@linux.intel.com \
    --to=dapeng1.mi@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=dapeng1.mi@intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.