* [PATCH v2 0/2] perf/arm-ni: IRQ improvements
@ 2025-07-11 18:15 Robin Murphy
2025-07-11 18:15 ` [PATCH v2 1/2] perf/arm-ni: Consolidate CPU affinity handling Robin Murphy
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Robin Murphy @ 2025-07-11 18:15 UTC (permalink / raw)
To: will
Cc: mark.rutland, linux-arm-kernel, linux-perf-users, allen.wang,
peter.du, andy.xu
v1: https://lore.kernel.org/all/cover.1747149165.git.robin.murphy@arm.com
Hi all,
Here's a quick tweak to address Will's concern last time (minus
the affinity fix that's already queued).
Thanks,
Robin.
Robin Murphy (1):
perf/arm-ni: Consolidate CPU affinity handling
Shouping Wang (1):
perf/arm-ni: Support sharing IRQs within an NI instance
drivers/perf/arm-ni.c | 154 ++++++++++++++++++++++++------------------
1 file changed, 88 insertions(+), 66 deletions(-)
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2 1/2] perf/arm-ni: Consolidate CPU affinity handling
2025-07-11 18:15 [PATCH v2 0/2] perf/arm-ni: IRQ improvements Robin Murphy
@ 2025-07-11 18:15 ` Robin Murphy
2025-07-11 18:15 ` [PATCH v2 2/2] perf/arm-ni: Support sharing IRQs within an NI instance Robin Murphy
2025-07-14 15:10 ` [PATCH v2 0/2] perf/arm-ni: IRQ improvements Will Deacon
2 siblings, 0 replies; 4+ messages in thread
From: Robin Murphy @ 2025-07-11 18:15 UTC (permalink / raw)
To: will
Cc: mark.rutland, linux-arm-kernel, linux-perf-users, allen.wang,
peter.du, andy.xu
Since overflow interrupts from the individual PMUs are infrequent and
unlikely to coincide, and we make no attempt to balance them across
CPUs anyway, there's really not much point tracking a separate CPU
affinity per PMU. Move the CPU affinity and hotplug migration up to
the NI instance level.
Tested-by: Shouping Wang <allen.wang@hj-micro.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
v2: No change
drivers/perf/arm-ni.c | 74 ++++++++++++++++++++-----------------------
1 file changed, 34 insertions(+), 40 deletions(-)
diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c
index 9396d243415f..168750e78fc4 100644
--- a/drivers/perf/arm-ni.c
+++ b/drivers/perf/arm-ni.c
@@ -104,8 +104,6 @@ struct arm_ni_cd {
u16 id;
int num_units;
int irq;
- int cpu;
- struct hlist_node cpuhp_node;
struct pmu pmu;
struct arm_ni_unit *units;
struct perf_event *evcnt[NI_NUM_COUNTERS];
@@ -117,13 +115,18 @@ struct arm_ni {
void __iomem *base;
enum ni_part part;
int id;
+ int cpu;
int num_cds;
+ struct hlist_node cpuhp_node;
struct arm_ni_cd cds[] __counted_by(num_cds);
};
#define cd_to_ni(cd) container_of((cd), struct arm_ni, cds[(cd)->id])
#define pmu_to_cd(p) container_of((p), struct arm_ni_cd, pmu)
+#define ni_for_each_cd(n, c) \
+ for (struct arm_ni_cd *c = n->cds; c < n->cds + n->num_cds; c++) if (c->pmu_base)
+
#define cd_for_each_unit(cd, u) \
for (struct arm_ni_unit *u = cd->units; u < cd->units + cd->num_units; u++)
@@ -218,9 +221,9 @@ static const struct attribute_group arm_ni_format_attrs_group = {
static ssize_t arm_ni_cpumask_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct arm_ni_cd *cd = pmu_to_cd(dev_get_drvdata(dev));
+ struct arm_ni *ni = cd_to_ni(pmu_to_cd(dev_get_drvdata(dev)));
- return cpumap_print_to_pagebuf(true, buf, cpumask_of(cd->cpu));
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(ni->cpu));
}
static struct device_attribute arm_ni_cpumask_attr =
@@ -314,7 +317,7 @@ static int arm_ni_event_init(struct perf_event *event)
if (is_sampling_event(event))
return -EINVAL;
- event->cpu = cd->cpu;
+ event->cpu = cd_to_ni(cd)->cpu;
if (NI_EVENT_TYPE(event) == NI_PMU)
return arm_ni_validate_group(event);
@@ -543,8 +546,7 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s
if (err)
return err;
- cd->cpu = cpumask_local_spread(0, dev_to_node(ni->dev));
- irq_set_affinity(cd->irq, cpumask_of(cd->cpu));
+ irq_set_affinity(cd->irq, cpumask_of(ni->cpu));
cd->pmu = (struct pmu) {
.module = THIS_MODULE,
@@ -566,32 +568,19 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s
if (!name)
return -ENOMEM;
- err = cpuhp_state_add_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node);
- if (err)
- return err;
-
- err = perf_pmu_register(&cd->pmu, name, -1);
- if (err)
- cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node);
-
- return err;
+ return perf_pmu_register(&cd->pmu, name, -1);
}
static void arm_ni_remove(struct platform_device *pdev)
{
struct arm_ni *ni = platform_get_drvdata(pdev);
- for (int i = 0; i < ni->num_cds; i++) {
- struct arm_ni_cd *cd = ni->cds + i;
-
- if (!cd->pmu_base)
- continue;
-
+ ni_for_each_cd(ni, cd) {
writel_relaxed(0, cd->pmu_base + NI_PMCR);
writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR);
perf_pmu_unregister(&cd->pmu);
- cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node);
}
+ cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &ni->cpuhp_node);
}
static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node)
@@ -611,7 +600,7 @@ static int arm_ni_probe(struct platform_device *pdev)
struct resource *res;
void __iomem *base;
static atomic_t id;
- int num_cds;
+ int ret, num_cds;
u32 reg, part;
/*
@@ -662,8 +651,13 @@ static int arm_ni_probe(struct platform_device *pdev)
ni->num_cds = num_cds;
ni->part = part;
ni->id = atomic_fetch_inc(&id);
+ ni->cpu = cpumask_local_spread(0, dev_to_node(ni->dev));
platform_set_drvdata(pdev, ni);
+ ret = cpuhp_state_add_instance_nocalls(arm_ni_hp_state, &ni->cpuhp_node);
+ if (ret)
+ return ret;
+
for (int v = 0; v < cfg.num_components; v++) {
reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v));
arm_ni_probe_domain(base + reg, &vd);
@@ -671,8 +665,6 @@ static int arm_ni_probe(struct platform_device *pdev)
reg = readl_relaxed(vd.base + NI_CHILD_PTR(p));
arm_ni_probe_domain(base + reg, &pd);
for (int c = 0; c < pd.num_components; c++) {
- int ret;
-
reg = readl_relaxed(pd.base + NI_CHILD_PTR(c));
arm_ni_probe_domain(base + reg, &cd);
ret = arm_ni_init_cd(ni, &cd, res->start);
@@ -714,42 +706,44 @@ static struct platform_driver arm_ni_driver = {
.remove = arm_ni_remove,
};
-static void arm_ni_pmu_migrate(struct arm_ni_cd *cd, unsigned int cpu)
+static void arm_ni_pmu_migrate(struct arm_ni *ni, unsigned int cpu)
{
- perf_pmu_migrate_context(&cd->pmu, cd->cpu, cpu);
- irq_set_affinity(cd->irq, cpumask_of(cpu));
- cd->cpu = cpu;
+ ni_for_each_cd(ni, cd) {
+ perf_pmu_migrate_context(&cd->pmu, ni->cpu, cpu);
+ irq_set_affinity(cd->irq, cpumask_of(cpu));
+ }
+ ni->cpu = cpu;
}
static int arm_ni_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
{
- struct arm_ni_cd *cd;
+ struct arm_ni *ni;
int node;
- cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node);
- node = dev_to_node(cd_to_ni(cd)->dev);
- if (cpu_to_node(cd->cpu) != node && cpu_to_node(cpu) == node)
- arm_ni_pmu_migrate(cd, cpu);
+ ni = hlist_entry_safe(cpuhp_node, struct arm_ni, cpuhp_node);
+ node = dev_to_node(ni->dev);
+ if (cpu_to_node(ni->cpu) != node && cpu_to_node(cpu) == node)
+ arm_ni_pmu_migrate(ni, cpu);
return 0;
}
static int arm_ni_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
{
- struct arm_ni_cd *cd;
+ struct arm_ni *ni;
unsigned int target;
int node;
- cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node);
- if (cpu != cd->cpu)
+ ni = hlist_entry_safe(cpuhp_node, struct arm_ni, cpuhp_node);
+ if (cpu != ni->cpu)
return 0;
- node = dev_to_node(cd_to_ni(cd)->dev);
+ node = dev_to_node(ni->dev);
target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target < nr_cpu_ids)
- arm_ni_pmu_migrate(cd, target);
+ arm_ni_pmu_migrate(ni, target);
return 0;
}
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v2 2/2] perf/arm-ni: Support sharing IRQs within an NI instance
2025-07-11 18:15 [PATCH v2 0/2] perf/arm-ni: IRQ improvements Robin Murphy
2025-07-11 18:15 ` [PATCH v2 1/2] perf/arm-ni: Consolidate CPU affinity handling Robin Murphy
@ 2025-07-11 18:15 ` Robin Murphy
2025-07-14 15:10 ` [PATCH v2 0/2] perf/arm-ni: IRQ improvements Will Deacon
2 siblings, 0 replies; 4+ messages in thread
From: Robin Murphy @ 2025-07-11 18:15 UTC (permalink / raw)
To: will
Cc: mark.rutland, linux-arm-kernel, linux-perf-users, allen.wang,
peter.du, andy.xu
From: Shouping Wang <allen.wang@hj-micro.com>
NI-700 has a distinct PMU interrupt output for each Clock Domain,
however some integrations may still combine these together externally.
The initial driver didn't attempt to support this, in anticipation of a
more general solution for IRQ sharing between system PMU instances, but
that's still a way off, so let's make this intermediate step for now to
at least allow sharing IRQs within an individual NI instance.
Now that CPU affinity and migration are cleaned up, it's fairly
straightforward to adopt similar logic to arm-cmn, to identify CDs with
a common interrupt and loop over them directly in the handler.
Signed-off-by: Shouping Wang <allen.wang@hj-micro.com>
[ rm: Rework for affinity handling, cosmetics, new commit message ]
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
v2: Delay enabling IRQs for the sake of reasoning
drivers/perf/arm-ni.c | 82 +++++++++++++++++++++++++++++--------------
1 file changed, 55 insertions(+), 27 deletions(-)
diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c
index 168750e78fc4..3c505352e302 100644
--- a/drivers/perf/arm-ni.c
+++ b/drivers/perf/arm-ni.c
@@ -102,6 +102,7 @@ struct arm_ni_unit {
struct arm_ni_cd {
void __iomem *pmu_base;
u16 id;
+ s8 irq_friend;
int num_units;
int irq;
struct pmu pmu;
@@ -448,33 +449,37 @@ static irqreturn_t arm_ni_handle_irq(int irq, void *dev_id)
{
struct arm_ni_cd *cd = dev_id;
irqreturn_t ret = IRQ_NONE;
- u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR);
- if (reg & (1U << NI_CCNT_IDX)) {
- ret = IRQ_HANDLED;
- if (!(WARN_ON(!cd->ccnt))) {
- arm_ni_event_read(cd->ccnt);
- arm_ni_init_ccnt(cd);
+ for (;;) {
+ u32 reg = readl_relaxed(cd->pmu_base + NI_PMOVSCLR);
+
+ if (reg & (1U << NI_CCNT_IDX)) {
+ ret = IRQ_HANDLED;
+ if (!(WARN_ON(!cd->ccnt))) {
+ arm_ni_event_read(cd->ccnt);
+ arm_ni_init_ccnt(cd);
+ }
}
- }
- for (int i = 0; i < NI_NUM_COUNTERS; i++) {
- if (!(reg & (1U << i)))
- continue;
- ret = IRQ_HANDLED;
- if (!(WARN_ON(!cd->evcnt[i]))) {
- arm_ni_event_read(cd->evcnt[i]);
- arm_ni_init_evcnt(cd, i);
+ for (int i = 0; i < NI_NUM_COUNTERS; i++) {
+ if (!(reg & (1U << i)))
+ continue;
+ ret = IRQ_HANDLED;
+ if (!(WARN_ON(!cd->evcnt[i]))) {
+ arm_ni_event_read(cd->evcnt[i]);
+ arm_ni_init_evcnt(cd, i);
+ }
}
+ writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR);
+ if (!cd->irq_friend)
+ return ret;
+ cd += cd->irq_friend;
}
- writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR);
- return ret;
}
static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_start)
{
struct arm_ni_cd *cd = ni->cds + node->id;
const char *name;
- int err;
cd->id = node->id;
cd->num_units = node->num_components;
@@ -534,20 +539,11 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s
cd->pmu_base + NI_PMCR);
writel_relaxed(U32_MAX, cd->pmu_base + NI_PMCNTENCLR);
writel_relaxed(U32_MAX, cd->pmu_base + NI_PMOVSCLR);
- writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENSET);
cd->irq = platform_get_irq(to_platform_device(ni->dev), cd->id);
if (cd->irq < 0)
return cd->irq;
- err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq,
- IRQF_NOBALANCING | IRQF_NO_THREAD,
- dev_name(ni->dev), cd);
- if (err)
- return err;
-
- irq_set_affinity(cd->irq, cpumask_of(ni->cpu));
-
cd->pmu = (struct pmu) {
.module = THIS_MODULE,
.parent = ni->dev,
@@ -593,6 +589,34 @@ static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node)
node->num_components = readl_relaxed(base + NI_CHILD_NODE_INFO);
}
+static int arm_ni_init_irqs(struct arm_ni *ni)
+{
+ int err;
+
+ ni_for_each_cd(ni, cd) {
+ for (struct arm_ni_cd *prev = cd; prev-- > ni->cds; ) {
+ if (prev->irq == cd->irq) {
+ prev->irq_friend = cd - prev;
+ goto set_inten;
+ }
+ }
+ err = devm_request_irq(ni->dev, cd->irq, arm_ni_handle_irq,
+ IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_AUTOEN,
+ dev_name(ni->dev), cd);
+ if (err)
+ return err;
+
+ irq_set_affinity(cd->irq, cpumask_of(ni->cpu));
+set_inten:
+ writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENSET);
+ }
+ /* Look Ma, no absurdly theoretical race conditions! */
+ ni_for_each_cd(ni, cd)
+ if (!cd->irq_friend)
+ enable_irq(cd->irq);
+ return 0;
+}
+
static int arm_ni_probe(struct platform_device *pdev)
{
struct arm_ni_node cfg, vd, pd, cd;
@@ -677,7 +701,11 @@ static int arm_ni_probe(struct platform_device *pdev)
}
}
- return 0;
+ ret = arm_ni_init_irqs(ni);
+ if (ret)
+ arm_ni_remove(pdev);
+
+ return ret;
}
#ifdef CONFIG_OF
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v2 0/2] perf/arm-ni: IRQ improvements
2025-07-11 18:15 [PATCH v2 0/2] perf/arm-ni: IRQ improvements Robin Murphy
2025-07-11 18:15 ` [PATCH v2 1/2] perf/arm-ni: Consolidate CPU affinity handling Robin Murphy
2025-07-11 18:15 ` [PATCH v2 2/2] perf/arm-ni: Support sharing IRQs within an NI instance Robin Murphy
@ 2025-07-14 15:10 ` Will Deacon
2 siblings, 0 replies; 4+ messages in thread
From: Will Deacon @ 2025-07-14 15:10 UTC (permalink / raw)
To: Robin Murphy
Cc: catalin.marinas, kernel-team, Will Deacon, mark.rutland,
linux-arm-kernel, linux-perf-users, allen.wang, peter.du, andy.xu
On Fri, 11 Jul 2025 19:15:15 +0100, Robin Murphy wrote:
> v1: https://lore.kernel.org/all/cover.1747149165.git.robin.murphy@arm.com
>
> Hi all,
>
> Here's a quick tweak to address Will's concern last time (minus
> the affinity fix that's already queued).
>
> [...]
Applied to will (for-next/perf), thanks!
[1/2] perf/arm-ni: Consolidate CPU affinity handling
https://git.kernel.org/will/c/6a5dc6c7534e
[2/2] perf/arm-ni: Support sharing IRQs within an NI instance
https://git.kernel.org/will/c/89f0b9ccd314
Cheers,
--
Will
https://fixes.arm64.dev
https://next.arm64.dev
https://will.arm64.dev
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-07-14 15:10 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-11 18:15 [PATCH v2 0/2] perf/arm-ni: IRQ improvements Robin Murphy
2025-07-11 18:15 ` [PATCH v2 1/2] perf/arm-ni: Consolidate CPU affinity handling Robin Murphy
2025-07-11 18:15 ` [PATCH v2 2/2] perf/arm-ni: Support sharing IRQs within an NI instance Robin Murphy
2025-07-14 15:10 ` [PATCH v2 0/2] perf/arm-ni: IRQ improvements Will Deacon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).