linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Mi, Dapeng" <dapeng1.mi@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Jiri Olsa <jolsa@kernel.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Andi Kleen <ak@linux.intel.com>,
	Eranian Stephane <eranian@google.com>,
	Mark Rutland <mark.rutland@arm.com>,
	broonie@kernel.org, Ravi Bangoria <ravi.bangoria@amd.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Zide Chen <zide.chen@intel.com>,
	Falcon Thomas <thomas.falcon@intel.com>,
	Dapeng Mi <dapeng1.mi@intel.com>,
	Xudong Hao <xudong.hao@intel.com>
Subject: Re: [Patch v5 16/19] perf/x86: Activate back-to-back NMI detection for arch-PEBS induced NMIs
Date: Mon, 8 Dec 2025 14:46:44 +0800	[thread overview]
Message-ID: <5e808ff0-0107-4085-a1ab-ea1cfed4e0ca@linux.intel.com> (raw)
In-Reply-To: <20251205123940.GY2528459@noisy.programming.kicks-ass.net>


On 12/5/2025 8:39 PM, Peter Zijlstra wrote:
> On Wed, Dec 03, 2025 at 02:54:57PM +0800, Dapeng Mi wrote:
>> When two or more identical PEBS events with the same sampling period are
>> programmed on a mix of PDIST and non-PDIST counters, multiple
>> back-to-back NMIs can be triggered.
> This is a hardware defect -- albeit a fairly common one.
>
>
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index da48bcde8fce..a130d3f14844 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -3351,8 +3351,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
>>  	 */
>>  	if (__test_and_clear_bit(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT,
>>  				 (unsigned long *)&status)) {
>> -		handled++;
>> -		static_call(x86_pmu_drain_pebs)(regs, &data);
>> +		handled += static_call(x86_pmu_drain_pebs)(regs, &data);
>>  
>>  		if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] &&
>>  		    is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS]))
> Note that the old code would return handled++, while the new code:
>
>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>> index a01c72c03bd6..c7cdcd585574 100644
>> --- a/arch/x86/events/intel/ds.c
>> +++ b/arch/x86/events/intel/ds.c
>> @@ -2759,7 +2759,7 @@ __intel_pmu_pebs_events(struct perf_event *event,
>>  	__intel_pmu_pebs_last_event(event, iregs, regs, data, at, count, setup_sample);
>>  }
>>  
>> -static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
>> +static int intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
>>  {
>>  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>>  	struct debug_store *ds = cpuc->ds;
>> @@ -2768,7 +2768,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_
>>  	int n;
>>  
>>  	if (!x86_pmu.pebs_active)
>> -		return;
>> +		return 0;
>>  
>>  	at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
>>  	top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
>> @@ -2779,22 +2779,24 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_
>>  	ds->pebs_index = ds->pebs_buffer_base;
>>  
>>  	if (!test_bit(0, cpuc->active_mask))
>> -		return;
>> +		return 0;
>>  
>>  	WARN_ON_ONCE(!event);
>>  
>>  	if (!event->attr.precise_ip)
>> -		return;
>> +		return 0;
>>  
>>  	n = top - at;
>>  	if (n <= 0) {
>>  		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
>>  			intel_pmu_save_and_restart_reload(event, 0);
>> -		return;
>> +		return 0;
>>  	}
>>  
>>  	__intel_pmu_pebs_events(event, iregs, data, at, top, 0, n,
>>  				setup_pebs_fixed_sample_data);
>> +
>> +	return 0;
>>  }
>>  
>>  static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask)
>> @@ -2817,7 +2819,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64
>>  	}
>>  }
>>  
>> -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
>> +static int intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
>>  {
>>  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>>  	struct debug_store *ds = cpuc->ds;
>> @@ -2830,7 +2832,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>  	u64 mask;
>>  
>>  	if (!x86_pmu.pebs_active)
>> -		return;
>> +		return 0;
>>  
>>  	base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
>>  	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
>> @@ -2846,7 +2848,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>  
>>  	if (unlikely(base >= top)) {
>>  		intel_pmu_pebs_event_update_no_drain(cpuc, mask);
>> -		return;
>> +		return 0;
>>  	}
>>  
>>  	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
>> @@ -2931,6 +2933,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>  						setup_pebs_fixed_sample_data);
>>  		}
>>  	}
>> +
>> +	return 0;
>>  }
>>  
>>  static __always_inline void
>> @@ -2984,7 +2988,7 @@ __intel_pmu_handle_last_pebs_record(struct pt_regs *iregs,
>>  
>>  }
>>  
>> -static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
>> +static int intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
>>  {
>>  	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
>>  	void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
>> @@ -2997,7 +3001,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>  	u64 mask;
>>  
>>  	if (!x86_pmu.pebs_active)
>> -		return;
>> +		return 0;
>>  
>>  	base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
>>  	top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
>> @@ -3010,7 +3014,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>  
>>  	if (unlikely(base >= top)) {
>>  		intel_pmu_pebs_event_update_no_drain(cpuc, mask);
>> -		return;
>> +		return 0;
>>  	}
>>  
>>  	if (!iregs)
>> @@ -3032,9 +3036,11 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>  
>>  	__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask, counts, last,
>>  					    setup_pebs_adaptive_sample_data);
>> +
>> +	return 0;
>>  }
> will now return handled+=0 for all these. Which is a change in
> behaviour. Also:

This change only take effects for arch-PEBS. For the legacy PEBS, the
"handled" would still be added 1 unconditionally even the *_drain_pebs()
helpers always return 0.

    /*
     * PEBS overflow sets bit 62 in the global status register
     */
    if (__test_and_clear_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, (unsigned long
*)&status)) {
        u64 pebs_enabled = cpuc->pebs_enabled;

        handled++;
        x86_pmu_handle_guest_pebs(regs, &data);
        static_call(x86_pmu_drain_pebs)(regs, &data);


>
>> -static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
>> +static int intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
>>  				      struct perf_sample_data *data)
>>  {
>>  	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
>> @@ -3044,13 +3050,14 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
>>  	struct x86_perf_regs perf_regs;
>>  	struct pt_regs *regs = &perf_regs.regs;
>>  	void *base, *at, *top;
>> +	u64 events_bitmap = 0;
>>  	u64 mask;
>>  
>>  	rdmsrq(MSR_IA32_PEBS_INDEX, index.whole);
>>  
>>  	if (unlikely(!index.wr)) {
>>  		intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
>> -		return;
>> +		return 0;

If index.wr is 0, then it indicates there is no any PEBS record written in
buffer since last drain of PEBS buffer. In this case, the PEBS PMI should
not be generated. If it's generated, then it implies there must be
something wrong. The 0 return value would lead to a "suspicious NMI"
warning which is good to warn us there are something wrong.


>>  	}
>>  
>>  	base = cpuc->pebs_vaddr;
>> @@ -3089,6 +3096,7 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
>>  
>>  		basic = at + sizeof(struct arch_pebs_header);
>>  		pebs_status = mask & basic->applicable_counters;
>> +		events_bitmap |= pebs_status;
>>  		__intel_pmu_handle_pebs_record(iregs, regs, data, at,
>>  					       pebs_status, counts, last,
>>  					       setup_arch_pebs_sample_data);
>> @@ -3108,6 +3116,8 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
>>  	__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask,
>>  					    counts, last,
>>  					    setup_arch_pebs_sample_data);
>> +
> 	/*
> 	 * Comment that explains the arch pebs defect goes here.
> 	 */
>> +	return hweight64(events_bitmap);
>>  }

  parent reply	other threads:[~2025-12-08  6:46 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-03  6:54 [Patch v5 00/19] Support SIMD/eGPRs/SSP registers sampling for perf Dapeng Mi
2025-12-03  6:54 ` [Patch v5 01/19] perf: Eliminate duplicate arch-specific functions definations Dapeng Mi
2025-12-03  6:54 ` [Patch v5 02/19] perf/x86: Use x86_perf_regs in the x86 nmi handler Dapeng Mi
2025-12-03  6:54 ` [Patch v5 03/19] perf/x86: Introduce x86-specific x86_pmu_setup_regs_data() Dapeng Mi
2025-12-03  6:54 ` [Patch v5 04/19] x86/fpu/xstate: Add xsaves_nmi() helper Dapeng Mi
2025-12-03  6:54 ` [Patch v5 05/19] perf: Move and rename has_extended_regs() for ARCH-specific use Dapeng Mi
2025-12-03  6:54 ` [Patch v5 06/19] perf/x86: Add support for XMM registers in non-PEBS and REGS_USER Dapeng Mi
2025-12-04 15:17   ` Peter Zijlstra
2025-12-04 15:47     ` Peter Zijlstra
2025-12-05  6:37       ` Mi, Dapeng
2025-12-04 18:59     ` Dave Hansen
2025-12-05  8:42       ` Peter Zijlstra
2025-12-03  6:54 ` [Patch v5 07/19] perf: Add sampling support for SIMD registers Dapeng Mi
2025-12-05 11:07   ` Peter Zijlstra
2025-12-08  5:24     ` Mi, Dapeng
2025-12-05 11:40   ` Peter Zijlstra
2025-12-08  6:00     ` Mi, Dapeng
2025-12-03  6:54 ` [Patch v5 08/19] perf/x86: Enable XMM sampling using sample_simd_vec_reg_* fields Dapeng Mi
2025-12-05 11:25   ` Peter Zijlstra
2025-12-08  6:10     ` Mi, Dapeng
2025-12-03  6:54 ` [Patch v5 09/19] perf/x86: Enable YMM " Dapeng Mi
2025-12-03  6:54 ` [Patch v5 10/19] perf/x86: Enable ZMM " Dapeng Mi
2025-12-03  6:54 ` [Patch v5 11/19] perf/x86: Enable OPMASK sampling using sample_simd_pred_reg_* fields Dapeng Mi
2025-12-03  6:54 ` [Patch v5 12/19] perf/x86: Enable eGPRs sampling using sample_regs_* fields Dapeng Mi
2025-12-05 12:16   ` Peter Zijlstra
2025-12-08  6:11     ` Mi, Dapeng
2025-12-03  6:54 ` [Patch v5 13/19] perf/x86: Enable SSP " Dapeng Mi
2025-12-05 12:20   ` Peter Zijlstra
2025-12-08  6:21     ` Mi, Dapeng
2025-12-03  6:54 ` [Patch v5 14/19] perf/x86/intel: Enable PERF_PMU_CAP_SIMD_REGS capability Dapeng Mi
2025-12-03  6:54 ` [Patch v5 15/19] perf/x86/intel: Enable arch-PEBS based SIMD/eGPRs/SSP sampling Dapeng Mi
2025-12-03  6:54 ` [Patch v5 16/19] perf/x86: Activate back-to-back NMI detection for arch-PEBS induced NMIs Dapeng Mi
2025-12-05 12:39   ` Peter Zijlstra
2025-12-07 20:44     ` Andi Kleen
2025-12-08  6:46     ` Mi, Dapeng [this message]
2025-12-08  8:50       ` Peter Zijlstra
2025-12-08  8:53         ` Mi, Dapeng
2025-12-03  6:54 ` [Patch v5 17/19] perf headers: Sync with the kernel headers Dapeng Mi
2025-12-03 23:43   ` Ian Rogers
2025-12-04  1:37     ` Mi, Dapeng
2025-12-04  7:28       ` Ian Rogers
2025-12-03  6:54 ` [Patch v5 18/19] perf parse-regs: Support new SIMD sampling format Dapeng Mi
2025-12-04  0:17   ` Ian Rogers
2025-12-04  2:58     ` Mi, Dapeng
2025-12-04  7:49       ` Ian Rogers
2025-12-04  9:20         ` Mi, Dapeng
2025-12-04 16:16           ` Ian Rogers
2025-12-05  4:00             ` Mi, Dapeng
2025-12-05  6:38               ` Ian Rogers
2025-12-05  8:10                 ` Mi, Dapeng
2025-12-05 16:35                   ` Ian Rogers
2025-12-08  4:20                     ` Mi, Dapeng
2025-12-03  6:55 ` [Patch v5 19/19] perf regs: Enable dumping of SIMD registers Dapeng Mi
2025-12-04  0:24 ` [Patch v5 00/19] Support SIMD/eGPRs/SSP registers sampling for perf Ian Rogers
2025-12-04  3:28   ` Mi, Dapeng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5e808ff0-0107-4085-a1ab-ea1cfed4e0ca@linux.intel.com \
    --to=dapeng1.mi@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=broonie@kernel.org \
    --cc=dapeng1.mi@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=ravi.bangoria@amd.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.falcon@intel.com \
    --cc=xudong.hao@intel.com \
    --cc=zide.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).