From: "Liang, Kan" <kan.liang@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: acme@kernel.org, mingo@redhat.com, linux-kernel@vger.kernel.org,
tglx@linutronix.de, jolsa@kernel.org, eranian@google.com,
alexander.shishkin@linux.intel.com, ak@linux.intel.com
Subject: Re: [PATCH V5 00/12] perf: Add Icelake support (kernel only, except Topdown)
Date: Mon, 8 Apr 2019 12:06:31 -0400 [thread overview]
Message-ID: <ec9e3a07-4b29-b698-83d7-5b8bbcd48dc4@linux.intel.com> (raw)
In-Reply-To: <20190408154112.GW12232@hirez.programming.kicks-ass.net>
On 4/8/2019 11:41 AM, Peter Zijlstra wrote:
>
> I currently have something like the below on top, is that correct?
Yes, it's correct.
>
> If so, I'll fold it back in.
Thanks. It's really appreciated.
Kan
>
>
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -563,16 +563,17 @@ int x86_pmu_hw_config(struct perf_event
> /* sample_regs_user never support XMM registers */
> if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS))
> return -EINVAL;
> +
> /*
> * Besides the general purpose registers, XMM registers may
> * be collected in PEBS on some platforms, e.g. Icelake
> */
> if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) {
> - if (!is_sampling_event(event) ||
> - !event->attr.precise_ip ||
> - x86_pmu.pebs_no_xmm_regs)
> + if (x86_pmu.pebs_no_xmm_regs)
> return -EINVAL;
>
> + if (!event->attr.precise_ip)
> + return -EINVAL;
> }
>
> return x86_setup_perfctr(event);
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3428,7 +3428,7 @@ icl_get_event_constraints(struct cpu_hw_
> * Force instruction:ppp in Fixed counter 0
> */
> if ((event->attr.precise_ip == 3) &&
> - ((event->hw.config & X86_RAW_EVENT_MASK) == 0x00c0))
> + (event->hw.config == X86_CONFIG(.event=0xc0)))
> return &fixed_counter0_constraint;
>
> return hsw_get_event_constraints(cpuc, idx, event);
> @@ -4810,7 +4810,7 @@ __init int intel_pmu_init(void)
> hsw_format_attr : nhm_format_attr;
> extra_attr = merge_attr(extra_attr, skl_format_attr);
> x86_pmu.cpu_events = get_icl_events_attrs();
> - x86_pmu.force_gpr_event = 0x2ca;
> + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
> x86_pmu.lbr_pt_coexist = true;
> intel_pmu_pebs_data_source_skl(false);
> pr_cont("Icelake events, ");
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -853,13 +853,13 @@ struct event_constraint intel_icl_pebs_e
> INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
> INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */
>
> - INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
> - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
> - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */
> + INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
> + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
> + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */
>
> INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
>
> - INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
> + INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
>
> /*
> * Everything else is handled by PMU_FL_PEBS_ALL, because we
> @@ -963,40 +963,42 @@ static u64 pebs_update_adaptive_cfg(stru
> u64 pebs_data_cfg = 0;
> bool gprs, tsx_weight;
>
> - if ((sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) ||
> - attr->precise_ip < 2) {
> + if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
> + attr->precise_ip > 1)
> + return pebs_data_cfs;
>
> - if (sample_type & PERF_PEBS_MEMINFO_TYPE)
> - pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
> + if (sample_type & PERF_PEBS_MEMINFO_TYPE)
> + pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
>
> + /*
> + * We need GPRs when:
> + * + user requested them
> + * + precise_ip < 2 for the non event IP
> + * + For RTM TSX weight we need GPRs for the abort code.
> + */
> + gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
> + (attr->sample_regs_intr & PEBS_GPRS_REGS);
> +
> + tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
> + ((attr->config & INTEL_ARCH_EVENT_MASK) ==
> + x86_pmu.rtm_abort_event);
> +
> + if (gprs || (attr->precise_ip < 2) || tsx_weight)
> + pebs_data_cfg |= PEBS_DATACFG_GPRS;
> +
> + if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
> + (attr->sample_regs_intr & PERF_XMM_REGS))
> + pebs_data_cfg |= PEBS_DATACFG_XMMS;
> +
> + if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
> /*
> - * Cases we need the registers:
> - * + user requested registers
> - * + precise_ip < 2 for the non event IP
> - * + For RTM TSX weight we need GPRs too for the abort
> - * code. But we don't want to force GPRs for all other
> - * weights. So add it only collectfor the RTM abort event.
> + * For now always log all LBRs. Could configure this
> + * later.
> */
> - gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
> - (attr->sample_regs_intr & 0xffffffff);
> - tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
> - ((attr->config & 0xffff) == x86_pmu.force_gpr_event);
> - if (gprs || (attr->precise_ip < 2) || tsx_weight)
> - pebs_data_cfg |= PEBS_DATACFG_GPRS;
> -
> - if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
> - (attr->sample_regs_intr >> 32))
> - pebs_data_cfg |= PEBS_DATACFG_XMMS;
> -
> - if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
> - /*
> - * For now always log all LBRs. Could configure this
> - * later.
> - */
> - pebs_data_cfg |= PEBS_DATACFG_LBRS |
> - ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
> - }
> + pebs_data_cfg |= PEBS_DATACFG_LBRS |
> + ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
> }
> +
> return pebs_data_cfg;
> }
>
> @@ -1022,13 +1024,8 @@ pebs_update_state(bool needed_cb, struct
> }
>
> /*
> - * The PEBS record doesn't shrink on the del. Because to get
> - * an accurate config needs to go through all the existing pebs events.
> - * It's not necessary.
> - * There is no harmful for a bigger PEBS record, except little
> - * performance impacts.
> - * Also, for most cases, the same pebs config is applied for all
> - * pebs events.
> + * The PEBS record doesn't shrink on pmu::del(). Doing so would require
> + * iterating all remaining PEBS events to reconstruct the config.
> */
> if (x86_pmu.intel_cap.pebs_baseline && add) {
> u64 pebs_data_cfg;
> @@ -1076,8 +1073,7 @@ void intel_pmu_pebs_enable(struct perf_e
>
> cpuc->pebs_enabled |= 1ULL << hwc->idx;
>
> - if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
> - (x86_pmu.version < 5))
> + if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
> cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
> else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
> cpuc->pebs_enabled |= 1ULL << 63;
> @@ -1766,8 +1762,7 @@ static void intel_pmu_drain_pebs_core(st
> setup_pebs_fixed_sample_data);
> }
>
> -static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc,
> - int size)
> +static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
> {
> struct perf_event *event;
> int bit;
> @@ -1826,8 +1821,7 @@ static void intel_pmu_drain_pebs_nhm(str
>
> /* PEBS v3 has more accurate status bits */
> if (x86_pmu.intel_cap.pebs_format >= 3) {
> - for_each_set_bit(bit, (unsigned long *)&pebs_status,
> - size)
> + for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
> counts[bit]++;
>
> continue;
> @@ -1866,8 +1860,7 @@ static void intel_pmu_drain_pebs_nhm(str
> * If collision happened, the record will be dropped.
> */
> if (p->status != (1ULL << bit)) {
> - for_each_set_bit(i, (unsigned long *)&pebs_status,
> - x86_pmu.max_pebs_events)
> + for_each_set_bit(i, (unsigned long *)&pebs_status, size)
> error[i]++;
> continue;
> }
> @@ -1875,7 +1868,7 @@ static void intel_pmu_drain_pebs_nhm(str
> counts[bit]++;
> }
>
> - for (bit = 0; bit < size; bit++) {
> + for_each_set_bit(bit, (unsigned long *)&mask, size) {
> if ((counts[bit] == 0) && (error[bit] == 0))
> continue;
>
> @@ -1939,7 +1932,7 @@ static void intel_pmu_drain_pebs_icl(str
> counts[bit]++;
> }
>
> - for (bit = 0; bit < size; bit++) {
> + for_each_set_bit(bit, (unsigned long *)mask, size) {
> if (counts[bit] == 0)
> continue;
>
> @@ -1980,6 +1973,9 @@ void __init intel_ds_init(void)
> char *pebs_qual = "";
> int format = x86_pmu.intel_cap.pebs_format;
>
> + if (format < 4)
> + x86_pmu.intel_cap.pebs_baseline = 0;
> +
> switch (format) {
> case 0:
> pr_cont("PEBS fmt0%c, ", pebs_type);
> @@ -2042,8 +2038,6 @@ void __init intel_ds_init(void)
> pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
> x86_pmu.pebs = 0;
> }
> - if (format != 4)
> - x86_pmu.intel_cap.pebs_baseline = 0;
> }
> }
>
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -303,8 +303,8 @@ struct cpu_hw_events {
> __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
>
> /*
> - * Only works for Intel events, which has 'small' event codes.
> - * Need to fix the rang compare for 'big' event codes, e.g AMD64_EVENTSEL_EVENT
> + * The constraint_match() function only works for 'simple' event codes
> + * and not for extended (AMD64_EVENTSEL_EVENT) events codes.
> */
> #define EVENT_CONSTRAINT_RANGE(c, e, n, m) \
> __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0)
> @@ -672,12 +672,12 @@ struct x86_pmu {
> pebs_no_xmm_regs :1;
> int pebs_record_size;
> int pebs_buffer_size;
> + int max_pebs_events;
> void (*drain_pebs)(struct pt_regs *regs);
> struct event_constraint *pebs_constraints;
> void (*pebs_aliases)(struct perf_event *event);
> - int max_pebs_events;
> unsigned long large_pebs_flags;
> - u64 force_gpr_event;
> + u64 rtm_abort_event;
>
> /*
> * Intel LBR
>
next prev parent reply other threads:[~2019-04-08 16:06 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-02 19:44 [PATCH V5 00/12] perf: Add Icelake support (kernel only, except Topdown) kan.liang
2019-04-02 19:44 ` [PATCH V5 01/12] perf/x86: Fix wrong PEBS_REGS kan.liang
2019-04-16 11:32 ` [tip:perf/core] perf/x86: Fix incorrect PEBS_REGS tip-bot for Kan Liang
2019-04-02 19:44 ` [PATCH V5 02/12] perf/x86: Support outputting XMM registers kan.liang
2019-04-16 11:34 ` [tip:perf/core] " tip-bot for Kan Liang
2019-04-02 19:45 ` [PATCH V5 03/12] perf/x86/intel: Extract memory code PEBS parser for reuse kan.liang
2019-04-16 11:35 ` [tip:perf/core] " tip-bot for Andi Kleen
2019-04-02 19:45 ` [PATCH V5 04/12] perf/x86/intel/ds: Extract code of event update in short period kan.liang
2019-04-16 11:35 ` [tip:perf/core] " tip-bot for Kan Liang
2019-04-02 19:45 ` [PATCH V5 05/12] perf/x86/intel: Support adaptive PEBSv4 kan.liang
2019-04-08 14:55 ` Peter Zijlstra
2019-04-16 11:36 ` [tip:perf/core] perf/x86/intel: Support adaptive PEBS v4 tip-bot for Kan Liang
2019-04-02 19:45 ` [PATCH V5 06/12] perf/x86/lbr: Avoid reading the LBRs when adaptive PEBS handles them kan.liang
2019-04-16 11:37 ` [tip:perf/core] " tip-bot for Andi Kleen
2019-04-02 19:45 ` [PATCH V5 07/12] perf/x86: Support constraint ranges kan.liang
2019-04-16 11:37 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2019-04-02 19:45 ` [PATCH V5 08/12] perf/x86/intel: Add Icelake support kan.liang
2019-04-08 15:06 ` Peter Zijlstra
2019-04-08 15:45 ` Liang, Kan
2019-04-10 18:22 ` Liang, Kan
2019-04-10 19:47 ` Peter Zijlstra
2019-04-11 9:00 ` Peter Zijlstra
2019-04-11 13:29 ` Liang, Kan
2019-04-16 11:38 ` [tip:perf/core] " tip-bot for Kan Liang
2019-04-02 19:45 ` [PATCH V5 09/12] perf/x86/intel/cstate: " kan.liang
2019-04-16 11:39 ` [tip:perf/core] " tip-bot for Kan Liang
2019-04-02 19:45 ` [PATCH V5 10/12] perf/x86/intel/rapl: " kan.liang
2019-04-16 11:39 ` [tip:perf/core] " tip-bot for Kan Liang
2019-04-02 19:45 ` [PATCH V5 11/12] perf/x86/msr: " kan.liang
2019-04-16 11:40 ` [tip:perf/core] " tip-bot for Kan Liang
2019-04-02 19:45 ` [PATCH V5 12/12] perf/x86/intel/uncore: Add Intel Icelake uncore support kan.liang
2019-04-16 11:40 ` [tip:perf/core] " tip-bot for Kan Liang
2019-04-08 15:41 ` [PATCH V5 00/12] perf: Add Icelake support (kernel only, except Topdown) Peter Zijlstra
2019-04-08 16:06 ` Liang, Kan [this message]
2019-04-08 16:25 ` Liang, Kan
2019-04-08 16:28 ` Peter Zijlstra
2019-04-08 22:49 ` Liang, Kan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ec9e3a07-4b29-b698-83d7-5b8bbcd48dc4@linux.intel.com \
--to=kan.liang@linux.intel.com \
--cc=acme@kernel.org \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=eranian@google.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.