From: Robin Murphy <robin.murphy@arm.com>
To: Thomas Richter <tmricht@linux.ibm.com>,
peterz@infradead.org, mingo@redhat.com, will@kernel.org,
mark.rutland@arm.com, acme@kernel.org, namhyung@kernel.org,
alexander.shishkin@linux.intel.com, jolsa@kernel.org,
irogers@google.com, adrian.hunter@intel.com,
kan.liang@linux.intel.com
Cc: linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-alpha@vger.kernel.org, linux-snps-arc@lists.infradead.org,
linux-arm-kernel@lists.infradead.org, imx@lists.linux.dev,
linux-csky@vger.kernel.org, loongarch@lists.linux.dev,
linux-mips@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
linux-s390@vger.kernel.org, linux-sh@vger.kernel.org,
sparclinux@vger.kernel.org, linux-pm@vger.kernel.org,
linux-rockchip@lists.infradead.org, dmaengine@vger.kernel.org,
linux-fpga@vger.kernel.org, amd-gfx@lists.freedesktop.org,
dri-devel@lists.freedesktop.org, intel-gfx@lists.freedesktop.org,
intel-xe@lists.freedesktop.org, coresight@lists.linaro.org,
iommu@lists.linux.dev, linux-amlogic@lists.infradead.org,
linux-cxl@vger.kernel.org, linux-arm-msm@vger.kernel.org,
linux-riscv@lists.infradead.org
Subject: Re: [PATCH 18/19] perf: Introduce positive capability for raw events
Date: Wed, 20 Aug 2025 12:39:28 +0100 [thread overview]
Message-ID: <145e1021-d2c3-4ff3-aabb-fb7416848a97@arm.com> (raw)
In-Reply-To: <295ae4dd-4734-42a0-be63-2d322f00c799@linux.ibm.com>
Hi Thomas,
On 2025-08-20 9:09 am, Thomas Richter wrote:
> On 8/19/25 15:15, Robin Murphy wrote:
>> On 13/08/2025 6:01 pm, Robin Murphy wrote:
>>> Only a handful of CPU PMUs accept PERF_TYPE_{RAW,HARDWARE,HW_CACHE}
>>> events without registering themselves as PERF_TYPE_RAW in the first
>>> place. Add an explicit opt-in for these special cases, so that we can
>>> make life easier for every other driver (and probably also speed up the
>>> slow-path search) by having perf_try_init_event() do the basic type
>>> checking to cover the majority of cases.
>>>
>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>>> ---
>>>
>>> A further possibility is to automatically add the cap to PERF_TYPE_RAW
>>> PMUs in perf_pmu_register() to have a single point-of-use condition; I'm
>>> undecided...
>>> ---
>>> arch/s390/kernel/perf_cpum_cf.c | 1 +
>>> arch/s390/kernel/perf_pai_crypto.c | 2 +-
>>> arch/s390/kernel/perf_pai_ext.c | 2 +-
>>> arch/x86/events/core.c | 2 +-
>>> drivers/perf/arm_pmu.c | 1 +
>>> include/linux/perf_event.h | 1 +
>>> kernel/events/core.c | 15 +++++++++++++++
>>> 7 files changed, 21 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
>>> index 1a94e0944bc5..782ab755ddd4 100644
>>> --- a/arch/s390/kernel/perf_cpum_cf.c
>>> +++ b/arch/s390/kernel/perf_cpum_cf.c
>>> @@ -1054,6 +1054,7 @@ static void cpumf_pmu_del(struct perf_event *event, int flags)
>>> /* Performance monitoring unit for s390x */
>>> static struct pmu cpumf_pmu = {
>>> .task_ctx_nr = perf_sw_context,
>>> + .capabilities = PERF_PMU_CAP_RAW_EVENTS,
>>> .pmu_enable = cpumf_pmu_enable,
>>> .pmu_disable = cpumf_pmu_disable,
>>> .event_init = cpumf_pmu_event_init,
>>> diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
>>> index a64b6b056a21..b5b6d8b5d943 100644
>>> --- a/arch/s390/kernel/perf_pai_crypto.c
>>> +++ b/arch/s390/kernel/perf_pai_crypto.c
>>> @@ -569,7 +569,7 @@ static const struct attribute_group *paicrypt_attr_groups[] = {
>>> /* Performance monitoring unit for mapped counters */
>>> static struct pmu paicrypt = {
>>> .task_ctx_nr = perf_hw_context,
>>> - .capabilities = PERF_PMU_CAP_SAMPLING,
>>> + .capabilities = PERF_PMU_CAP_SAMPLING | PERF_PMU_CAP_RAW_EVENTS,
>>> .event_init = paicrypt_event_init,
>>> .add = paicrypt_add,
>>> .del = paicrypt_del,
>>> diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
>>> index 1261f80c6d52..bcd28c38da70 100644
>>> --- a/arch/s390/kernel/perf_pai_ext.c
>>> +++ b/arch/s390/kernel/perf_pai_ext.c
>>> @@ -595,7 +595,7 @@ static const struct attribute_group *paiext_attr_groups[] = {
>>> /* Performance monitoring unit for mapped counters */
>>> static struct pmu paiext = {
>>> .task_ctx_nr = perf_hw_context,
>>> - .capabilities = PERF_PMU_CAP_SAMPLING,
>>> + .capabilities = PERF_PMU_CAP_SAMPLING | PERF_PMU_CAP_RAW_EVENTS,
>>> .event_init = paiext_event_init,
>>> .add = paiext_add,
>>> .del = paiext_del,
>>> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
>>> index 789dfca2fa67..764728bb80ae 100644
>>> --- a/arch/x86/events/core.c
>>> +++ b/arch/x86/events/core.c
>>> @@ -2697,7 +2697,7 @@ static bool x86_pmu_filter(struct pmu *pmu, int cpu)
>>> }
>>> static struct pmu pmu = {
>>> - .capabilities = PERF_PMU_CAP_SAMPLING,
>>> + .capabilities = PERF_PMU_CAP_SAMPLING | PERF_PMU_CAP_RAW_EVENTS,
>>> .pmu_enable = x86_pmu_enable,
>>> .pmu_disable = x86_pmu_disable,
>>> diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
>>> index 72d8f38d0aa5..bc772a3bf411 100644
>>> --- a/drivers/perf/arm_pmu.c
>>> +++ b/drivers/perf/arm_pmu.c
>>> @@ -877,6 +877,7 @@ struct arm_pmu *armpmu_alloc(void)
>>> * specific PMU.
>>> */
>>> .capabilities = PERF_PMU_CAP_SAMPLING |
>>> + PERF_PMU_CAP_RAW_EVENTS |
>>> PERF_PMU_CAP_EXTENDED_REGS |
>>> PERF_PMU_CAP_EXTENDED_HW_TYPE,
>>> };
>>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>>> index 183b7c48b329..c6ad036c0037 100644
>>> --- a/include/linux/perf_event.h
>>> +++ b/include/linux/perf_event.h
>>> @@ -305,6 +305,7 @@ struct perf_event_pmu_context;
>>> #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
>>> #define PERF_PMU_CAP_AUX_PAUSE 0x0200
>>> #define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400
>>> +#define PERF_PMU_CAP_RAW_EVENTS 0x0800
>>> /**
>>> * pmu::scope
>>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>>> index 71b2a6730705..2ecee76d2ae2 100644
>>> --- a/kernel/events/core.c
>>> +++ b/kernel/events/core.c
>>> @@ -12556,11 +12556,26 @@ static inline bool has_extended_regs(struct perf_event *event)
>>> (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK);
>>> }
>>> +static bool is_raw_pmu(const struct pmu *pmu)
>>> +{
>>> + return pmu->type == PERF_TYPE_RAW ||
>>> + pmu->capabilities & PERF_PMU_CAP_RAW_EVENTS;
>>> +}
>>> +
>>> static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
>>> {
>>> struct perf_event_context *ctx = NULL;
>>> int ret;
>>> + /*
>>> + * Before touching anything, we can safely skip:
>>> + * - any event for a specific PMU which is not this one
>>> + * - any common event if this PMU doesn't support them
>>> + */
>>> + if (event->attr.type != pmu->type &&
>>> + (event->attr.type >= PERF_TYPE_MAX || is_raw_pmu(pmu)))
>>
>> Ah, that should be "!is_raw_pmu(pmu)" there (although it's not entirely the cause of the LKP report on the final patch.)
>>
>> Thanks,
>> Robin.
>>
>>> + return -ENOENT;
>>> +
>>> if (!try_module_get(pmu->module))
>>> return -ENODEV;
>>>
>>
>>
>
> Hi Robin,
>
> what is the intention of that patch?
> Can you explain that a bit more.
The background here is that, in this context, we essentially have 3
distinct categories of PMU driver:
- Older/simpler CPU PMUs which register as PERF_TYPE_RAW and accept
raw/hardware events
- Newer/heterogeneous CPU PMUs which register as a dynamic type, and
accept both raw/hardware events and events of their own type
- Other (mostly uncore) PMUs which only accept events of their own type
These days that third one is by far the majority, so it seems
increasingly unreasonable and inefficient to always offer every kind of
event to every driver, and so force nearly all of them to have the same
boilerplate code to refuse events they don't want. The core code is
already in a position to be able to assume that a PERF_TYPE_RAW PMU
wants "raw" events and a typed PMU wants its own events, so the only
actual new thing we need is a way to discern the 5 drivers in the middle
category - where s390 dominates :) - from the rest in the third.
The way the check itself ends up structured is that the only time we'll
now offer an event to a driver of a different type is if it's a "raw"
event and the driver has asked to be offered them (either by registering
as PERF_TYPE_RAW or with the new cap). Otherwise we can safely assume
that this PMU won't want this event, and so skip straight to trying the
next one. We can get away with the single PERF_TYPE_MAX check for all
"raw" events, since the drivers which do handle them already have to
consider the exact type to discern between RAW/HARDWARE/HW_CACHE, and
thus must reject SOFTWARE/TRACEPOINT/BREAKPOINT events anyway, but I
could of course make that more specific if people prefer. Conversely,
since the actual software/tracepoint/breakpoint PMUs won't pass the
is_raw_pmu() check either, and thus will only be given their own events,
I could remove the type checking from their event_init routines as well,
but I thought that might be perhaps a little too subtle as-is.
BTW if the s390 drivers are intended to coexist then I'm not sure they
actually handle sharing PERF_TYPE_RAW events very well - what happens to
any particular event seems ultimately largely dependent on the order in
which the drivers happen to register - but that's a pre-existing issue
and this series shouldn't change anything in that respect. (As it
similarly shouldn't affect the trick of the first matching driver
rewriting the event type to "forward" it to another driver later in the
list.)
Thanks,
Robin.
next prev parent reply other threads:[~2025-08-20 11:39 UTC|newest]
Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-13 17:00 [PATCH 00/19] perf: Rework event_init checks Robin Murphy
2025-08-13 17:00 ` [PATCH 01/19] perf/arm-cmn: Fix event validation Robin Murphy
2025-08-26 10:46 ` Mark Rutland
2025-08-13 17:00 ` [PATCH 02/19] perf/hisilicon: Fix group validation Robin Murphy
2025-08-26 11:15 ` Mark Rutland
2025-08-26 13:18 ` Mark Rutland
2025-08-26 14:35 ` Robin Murphy
2025-08-26 15:31 ` Mark Rutland
2025-08-26 15:55 ` Mark Rutland
2025-08-27 14:03 ` Mark Rutland
2025-08-13 17:00 ` [PATCH 03/19] perf/imx8_ddr: " Robin Murphy
2025-08-13 17:00 ` [PATCH 04/19] perf/starfive: " Robin Murphy
2025-08-13 17:00 ` [PATCH 05/19] iommu/vt-d: Fix perfmon " Robin Murphy
2025-08-13 17:00 ` [PATCH 06/19] ARM: l2x0: Fix " Robin Murphy
2025-08-13 17:00 ` [PATCH 07/19] ARM: imx: Fix MMDC PMU " Robin Murphy
2025-08-13 17:01 ` [PATCH 08/19] perf/arm_smmu_v3: Improve " Robin Murphy
2025-08-13 17:01 ` [PATCH 09/19] perf/qcom: " Robin Murphy
2025-08-13 17:01 ` [PATCH 10/19] perf/arm-ni: Improve event validation Robin Murphy
2025-08-13 17:01 ` [PATCH 11/19] perf/arm-cci: Tidy up " Robin Murphy
2025-08-13 17:01 ` [PATCH 12/19] perf: Ignore event state for group validation Robin Murphy
2025-08-26 13:03 ` Peter Zijlstra
2025-08-26 15:32 ` Robin Murphy
2025-08-26 18:48 ` Ian Rogers
2025-08-27 8:18 ` Mark Rutland
2025-08-27 15:15 ` Ian Rogers
2025-08-13 17:01 ` [PATCH 13/19] perf: Add helper for checking grouped events Robin Murphy
2025-08-14 5:43 ` kernel test robot
2025-08-13 17:01 ` [PATCH 14/19] perf: Clean up redundant group validation Robin Murphy
2025-08-13 17:01 ` [PATCH 15/19] perf: Simplify " Robin Murphy
2025-08-13 17:01 ` [PATCH 16/19] perf: Introduce positive capability for sampling Robin Murphy
2025-08-26 13:08 ` Peter Zijlstra
2025-08-26 13:28 ` Mark Rutland
2025-08-26 16:35 ` Robin Murphy
2025-08-26 13:11 ` Leo Yan
2025-08-26 15:53 ` Robin Murphy
2025-08-27 8:06 ` Leo Yan
2025-08-13 17:01 ` [PATCH 17/19] perf: Retire PERF_PMU_CAP_NO_INTERRUPT Robin Murphy
2025-08-26 13:08 ` Peter Zijlstra
2025-08-13 17:01 ` [PATCH 18/19] perf: Introduce positive capability for raw events Robin Murphy
2025-08-19 13:15 ` Robin Murphy
2025-08-20 8:09 ` Thomas Richter
2025-08-20 11:39 ` Robin Murphy [this message]
2025-08-21 2:53 ` kernel test robot
2025-08-26 13:43 ` Mark Rutland
2025-08-26 22:46 ` Robin Murphy
2025-08-27 8:04 ` Mark Rutland
2025-08-27 5:27 ` Thomas Richter
2025-08-13 17:01 ` [PATCH 19/19] perf: Garbage-collect event_init checks Robin Murphy
2025-08-14 8:04 ` kernel test robot
2025-08-19 2:44 ` kernel test robot
2025-08-19 17:49 ` Robin Murphy
2025-08-19 13:25 ` Robin Murphy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=145e1021-d2c3-4ff3-aabb-fb7416848a97@arm.com \
--to=robin.murphy@arm.com \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=coresight@lists.linaro.org \
--cc=dmaengine@vger.kernel.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=imx@lists.linux.dev \
--cc=intel-gfx@lists.freedesktop.org \
--cc=intel-xe@lists.freedesktop.org \
--cc=iommu@lists.linux.dev \
--cc=irogers@google.com \
--cc=jolsa@kernel.org \
--cc=kan.liang@linux.intel.com \
--cc=linux-alpha@vger.kernel.org \
--cc=linux-amlogic@lists.infradead.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-arm-msm@vger.kernel.org \
--cc=linux-csky@vger.kernel.org \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-fpga@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mips@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=linux-pm@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=linux-rockchip@lists.infradead.org \
--cc=linux-s390@vger.kernel.org \
--cc=linux-sh@vger.kernel.org \
--cc=linux-snps-arc@lists.infradead.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=loongarch@lists.linux.dev \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=sparclinux@vger.kernel.org \
--cc=tmricht@linux.ibm.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).