Devicetree
 help / color / mirror / Atom feed
* [PATCH 0/3] perf: marvell: LLC-TAD PMU MPAM filtering and CN20K support
@ 2026-06-07 12:50 Geetha sowjanya
  2026-06-07 12:50 ` [PATCH 1/3] perf: marvell: Add MPAM partid filtering to CN10K TAD PMU Geetha sowjanya
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Geetha sowjanya @ 2026-06-07 12:50 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel, linux-arm-kernel, devicetree
  Cc: mark.rutland, will, krzk+dt, gakula

This series extends the Marvell LLC-TAD performance driver used on CN10K
and CN20K systems.

Patch 1 adds optional MPAM partition-id filtering for the subset of TAD
events that support it, exposes a partid field in the PMU format string,
and keeps the reduced Odyssey event surface without advertising partid
where it does not apply.

Patch 2 introduces CN20K LLC-TAD support: non-standard PFC/PRF offsets,
additional programmable events, CN20K-specific MPAM encoding for the V3
profile, and device discovery via DeviceTree and ACPI.


Tanmay Jagdale (1):
  perf: marvell: Add MPAM partid filtering to CN10K TAD PMU

Geetha sowjanya (1):
  perf: marvell: Add CN20K LLC-TAD PMU support
  dt-bindings: perf: marvell: Extend CN10K TAD PMU binding for CN20K

Signed-off-by: Geetha sowjanya <gakula@marvell.com>

-- 
2.25.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] perf: marvell: Add MPAM partid filtering to CN10K TAD PMU
  2026-06-07 12:50 [PATCH 0/3] perf: marvell: LLC-TAD PMU MPAM filtering and CN20K support Geetha sowjanya
@ 2026-06-07 12:50 ` Geetha sowjanya
  2026-06-07 13:02   ` sashiko-bot
  2026-06-07 12:51 ` [PATCH 2/3] perf: marvell: Add CN20K LLC-TAD PMU support Geetha sowjanya
  2026-06-07 12:51 ` [PATCH 3/3] dt-bindings: perf: marvell: Extend CN10K TAD PMU binding for CN20K Geetha sowjanya
  2 siblings, 1 reply; 7+ messages in thread
From: Geetha sowjanya @ 2026-06-07 12:50 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel, linux-arm-kernel, devicetree
  Cc: mark.rutland, will, krzk+dt, gakula

From: Tanmay Jagdale <tanmay@marvell.com>

The TAD PMU exposes counters that can be filtered by MPAM partition id
for a subset of allocation and hit events.

Add a 16-bit partid format attribute (config1) and route counter
programming through variant-specific ops so CN10K keeps MPAM-capable
programming while Odyssey keeps the reduced event set without
advertising partid in sysfs.

Example:
  perf stat -e tad/tad_alloc_any,partid=0x12/ -- <program>

Signed-off-by: Tanmay Jagdale <tanmay@marvell.com>
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
---
 drivers/perf/marvell_cn10k_tad_pmu.c | 151 ++++++++++++++++++++-------
 1 file changed, 112 insertions(+), 39 deletions(-)

diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 51ccb0befa05..7a3b659c999a 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -7,6 +7,7 @@
 #define pr_fmt(fmt) "tad_pmu: " fmt
 
 #include <linux/io.h>
+#include <linux/bits.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/cpuhotplug.h>
@@ -14,11 +15,13 @@
 #include <linux/platform_device.h>
 #include <linux/acpi.h>
 
-#define TAD_PFC_OFFSET		0x800
-#define TAD_PFC(counter)	(TAD_PFC_OFFSET | (counter << 3))
 #define TAD_PRF_OFFSET		0x900
-#define TAD_PRF(counter)	(TAD_PRF_OFFSET | (counter << 3))
+#define TAD_PFC_OFFSET		0x800
+#define TAD_PFC(base, counter)	((base) | ((u64)(counter) << 3))
+#define TAD_PRF(base, counter)	((base) | ((u64)(counter) << 3))
 #define TAD_PRF_CNTSEL_MASK	0xFF
+#define TAD_PRF_MATCH_PARTID	BIT(8)
+#define TAD_PRF_PARTID_NS	BIT(10)
 #define TAD_MAX_COUNTERS	8
 
 #define to_tad_pmu(p) (container_of(p, struct tad_pmu, pmu))
@@ -27,30 +30,92 @@ struct tad_region {
 	void __iomem	*base;
 };
 
+enum mrvl_tad_pmu_version {
+	TAD_PMU_V1 = 1,
+	TAD_PMU_V2,
+};
+
+struct tad_pmu_data {
+	int id;
+	u64 tad_prf_offset;
+	u64 tad_pfc_offset;
+};
+
 struct tad_pmu {
 	struct pmu pmu;
 	struct tad_region *regions;
 	u32 region_cnt;
 	unsigned int cpu;
+	const struct tad_pmu_ops *ops;
+	const struct tad_pmu_data *pdata;
 	struct hlist_node node;
 	struct perf_event *events[TAD_MAX_COUNTERS];
 	DECLARE_BITMAP(counters_map, TAD_MAX_COUNTERS);
 };
 
-enum mrvl_tad_pmu_version {
-	TAD_PMU_V1 = 1,
-	TAD_PMU_V2,
-};
-
-struct tad_pmu_data {
-	int id;
+struct tad_pmu_ops {
+	void (*start_counter)(struct tad_pmu *pmu, struct perf_event *event);
 };
 
 static int tad_pmu_cpuhp_state;
 
+static void tad_pmu_start_counter(struct tad_pmu *pmu,
+				  struct perf_event *event)
+{
+	const struct tad_pmu_data *pdata = pmu->pdata;
+	struct hw_perf_event *hwc = &event->hw;
+	u32 event_idx = event->attr.config;
+	u32 counter_idx = hwc->idx;
+	u64 partid_filter = 0;
+	u64 reg_val;
+	u32 partid;
+	int i;
+
+	partid = (u32)(event->attr.config1 & GENMASK(15, 0));
+
+	for (i = 0; i < pmu->region_cnt; i++)
+		writeq_relaxed(0, pmu->regions[i].base +
+			       TAD_PFC(pdata->tad_pfc_offset, counter_idx));
+
+	if (partid && event_idx > 0x19 && event_idx < 0x21) {
+		partid_filter = TAD_PRF_MATCH_PARTID | TAD_PRF_PARTID_NS |
+				((u64)partid << 11);
+	}
+
+
+	for (i = 0; i < pmu->region_cnt; i++) {
+		reg_val = event_idx & 0xFF;
+		reg_val |= partid_filter;
+		writeq_relaxed(reg_val, pmu->regions[i].base +
+			       TAD_PRF(pdata->tad_prf_offset, counter_idx));
+	}
+}
+
+static void tad_pmu_v2_start_counter(struct tad_pmu *pmu,
+				     struct perf_event *event)
+{
+	const struct tad_pmu_data *pdata = pmu->pdata;
+	struct hw_perf_event *hwc = &event->hw;
+	u32 event_idx = event->attr.config;
+	u32 counter_idx = hwc->idx;
+	u64 reg_val;
+	int i;
+
+	for (i = 0; i < pmu->region_cnt; i++)
+		writeq_relaxed(0, pmu->regions[i].base +
+			       TAD_PFC(pdata->tad_pfc_offset, counter_idx));
+
+	for (i = 0; i < pmu->region_cnt; i++) {
+		reg_val = event_idx & 0xFF;
+		writeq_relaxed(reg_val, pmu->regions[i].base +
+			       TAD_PRF(pdata->tad_prf_offset, counter_idx));
+	}
+}
+
 static void tad_pmu_event_counter_read(struct perf_event *event)
 {
 	struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+	const struct tad_pmu_data *pdata = tad_pmu->pdata;
 	struct hw_perf_event *hwc = &event->hw;
 	u32 counter_idx = hwc->idx;
 	u64 prev, new;
@@ -60,7 +125,7 @@ static void tad_pmu_event_counter_read(struct perf_event *event)
 		prev = local64_read(&hwc->prev_count);
 		for (i = 0, new = 0; i < tad_pmu->region_cnt; i++)
 			new += readq(tad_pmu->regions[i].base +
-				     TAD_PFC(counter_idx));
+				     TAD_PFC(pdata->tad_pfc_offset, counter_idx));
 	} while (local64_cmpxchg(&hwc->prev_count, prev, new) != prev);
 
 	local64_add(new - prev, &event->count);
@@ -69,16 +134,14 @@ static void tad_pmu_event_counter_read(struct perf_event *event)
 static void tad_pmu_event_counter_stop(struct perf_event *event, int flags)
 {
 	struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+	const struct tad_pmu_data *pdata = tad_pmu->pdata;
 	struct hw_perf_event *hwc = &event->hw;
 	u32 counter_idx = hwc->idx;
 	int i;
 
-	/* TAD()_PFC() stop counting on the write
-	 * which sets TAD()_PRF()[CNTSEL] == 0
-	 */
 	for (i = 0; i < tad_pmu->region_cnt; i++) {
 		writeq_relaxed(0, tad_pmu->regions[i].base +
-			       TAD_PRF(counter_idx));
+			       TAD_PRF(pdata->tad_prf_offset, counter_idx));
 	}
 
 	tad_pmu_event_counter_read(event);
@@ -89,26 +152,10 @@ static void tad_pmu_event_counter_start(struct perf_event *event, int flags)
 {
 	struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	u32 event_idx = event->attr.config;
-	u32 counter_idx = hwc->idx;
-	u64 reg_val;
-	int i;
 
 	hwc->state = 0;
 
-	/* Typically TAD_PFC() are zeroed to start counting */
-	for (i = 0; i < tad_pmu->region_cnt; i++)
-		writeq_relaxed(0, tad_pmu->regions[i].base +
-			       TAD_PFC(counter_idx));
-
-	/* TAD()_PFC() start counting on the write
-	 * which sets TAD()_PRF()[CNTSEL] != 0
-	 */
-	for (i = 0; i < tad_pmu->region_cnt; i++) {
-		reg_val = event_idx & 0xFF;
-		writeq_relaxed(reg_val,	tad_pmu->regions[i].base +
-			       TAD_PRF(counter_idx));
-	}
+	tad_pmu->ops->start_counter(tad_pmu, event);
 }
 
 static void tad_pmu_event_counter_del(struct perf_event *event, int flags)
@@ -128,7 +175,6 @@ static int tad_pmu_event_counter_add(struct perf_event *event, int flags)
 	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 
-	/* Get a free counter for this event */
 	idx = find_first_zero_bit(tad_pmu->counters_map, TAD_MAX_COUNTERS);
 	if (idx == TAD_MAX_COUNTERS)
 		return -EAGAIN;
@@ -232,7 +278,7 @@ static struct attribute *ody_tad_pmu_event_attrs[] = {
 	TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e),
 	TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f),
 	TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20),
-	TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xFF),
+	TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xff),
 	NULL
 };
 
@@ -242,9 +288,11 @@ static const struct attribute_group ody_tad_pmu_events_attr_group = {
 };
 
 PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(partid, "config1:0-15");
 
 static struct attribute *tad_pmu_format_attrs[] = {
 	&format_attr_event.attr,
+	&format_attr_partid.attr,
 	NULL
 };
 
@@ -253,6 +301,16 @@ static struct attribute_group tad_pmu_format_attr_group = {
 	.attrs = tad_pmu_format_attrs,
 };
 
+static struct attribute *ody_tad_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL
+};
+
+static struct attribute_group ody_tad_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = ody_tad_pmu_format_attrs,
+};
+
 static ssize_t tad_pmu_cpumask_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -281,11 +339,19 @@ static const struct attribute_group *tad_pmu_attr_groups[] = {
 
 static const struct attribute_group *ody_tad_pmu_attr_groups[] = {
 	&ody_tad_pmu_events_attr_group,
-	&tad_pmu_format_attr_group,
+	&ody_tad_pmu_format_attr_group,
 	&tad_pmu_cpumask_attr_group,
 	NULL
 };
 
+static const struct tad_pmu_ops tad_pmu_ops = {
+	.start_counter = tad_pmu_start_counter,
+};
+
+static const struct tad_pmu_ops tad_pmu_v2_ops = {
+	.start_counter = tad_pmu_v2_start_counter,
+};
+
 static int tad_pmu_probe(struct platform_device *pdev)
 {
 	const struct tad_pmu_data *dev_data;
@@ -312,6 +378,7 @@ static int tad_pmu_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 	version = dev_data->id;
+	tad_pmu->pdata = dev_data;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
@@ -344,7 +411,6 @@ static int tad_pmu_probe(struct platform_device *pdev)
 	if (!regions)
 		return -ENOMEM;
 
-	/* ioremap the distributed TAD pmu regions */
 	for (i = 0; i < tad_cnt && res->start < res->end; i++) {
 		regions[i].base = devm_ioremap(&pdev->dev,
 					       res->start,
@@ -374,14 +440,16 @@ static int tad_pmu_probe(struct platform_device *pdev)
 		.read		= tad_pmu_event_counter_read,
 	};
 
-	if (version == TAD_PMU_V1)
+	if (version == TAD_PMU_V1) {
 		tad_pmu->pmu.attr_groups = tad_pmu_attr_groups;
-	else
+		tad_pmu->ops		 = &tad_pmu_ops;
+	} else {
 		tad_pmu->pmu.attr_groups = ody_tad_pmu_attr_groups;
+		tad_pmu->ops		 = &tad_pmu_v2_ops;
+	}
 
 	tad_pmu->cpu = raw_smp_processor_id();
 
-	/* Register pmu instance for cpu hotplug */
 	ret = cpuhp_state_add_instance_nocalls(tad_pmu_cpuhp_state,
 					       &tad_pmu->node);
 	if (ret) {
@@ -410,12 +478,17 @@ static void tad_pmu_remove(struct platform_device *pdev)
 #if defined(CONFIG_OF) || defined(CONFIG_ACPI)
 static const struct tad_pmu_data tad_pmu_data = {
 	.id   = TAD_PMU_V1,
+	.tad_prf_offset = TAD_PRF_OFFSET,
+	.tad_pfc_offset = TAD_PFC_OFFSET,
 };
+
 #endif
 
 #ifdef CONFIG_ACPI
 static const struct tad_pmu_data tad_pmu_v2_data = {
 	.id   = TAD_PMU_V2,
+	.tad_prf_offset = TAD_PRF_OFFSET,
+	.tad_pfc_offset = TAD_PFC_OFFSET,
 };
 #endif
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] perf: marvell: Add CN20K LLC-TAD PMU support
  2026-06-07 12:50 [PATCH 0/3] perf: marvell: LLC-TAD PMU MPAM filtering and CN20K support Geetha sowjanya
  2026-06-07 12:50 ` [PATCH 1/3] perf: marvell: Add MPAM partid filtering to CN10K TAD PMU Geetha sowjanya
@ 2026-06-07 12:51 ` Geetha sowjanya
  2026-06-07 13:04   ` sashiko-bot
  2026-06-07 12:51 ` [PATCH 3/3] dt-bindings: perf: marvell: Extend CN10K TAD PMU binding for CN20K Geetha sowjanya
  2 siblings, 1 reply; 7+ messages in thread
From: Geetha sowjanya @ 2026-06-07 12:51 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel, linux-arm-kernel, devicetree
  Cc: mark.rutland, will, krzk+dt, gakula

CN20K LLC-TAD uses non-standard PFC/PRF page offsets relative to each
TAD region base. Add a TAD_PMU_V3 profile with per-platform register bases,
extend the CN10K event map for new counters, and match the block via OF and
ACPI.

Signed-off-by: Geetha sowjanya <gakula@marvell.com>
---
 drivers/perf/marvell_cn10k_tad_pmu.c | 33 +++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 7a3b659c999a..4daf109b89ee 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -17,6 +17,8 @@
 
 #define TAD_PRF_OFFSET		0x900
 #define TAD_PFC_OFFSET		0x800
+#define TAD_PRF_NS_OFFSET	0x30900
+#define TAD_PFC_NS_OFFSET	0x30800
 #define TAD_PFC(base, counter)	((base) | ((u64)(counter) << 3))
 #define TAD_PRF(base, counter)	((base) | ((u64)(counter) << 3))
 #define TAD_PRF_CNTSEL_MASK	0xFF
@@ -33,6 +35,7 @@ struct tad_region {
 enum mrvl_tad_pmu_version {
 	TAD_PMU_V1 = 1,
 	TAD_PMU_V2,
+	TAD_PMU_V3,
 };
 
 struct tad_pmu_data {
@@ -80,8 +83,14 @@ static void tad_pmu_start_counter(struct tad_pmu *pmu,
 	if (partid && event_idx > 0x19 && event_idx < 0x21) {
 		partid_filter = TAD_PRF_MATCH_PARTID | TAD_PRF_PARTID_NS |
 				((u64)partid << 11);
+
+		if (pdata->id == TAD_PMU_V3)
+			partid_filter = TAD_PRF_MATCH_PARTID | BIT_ULL(25) |
+				((u64)partid << 10);
 	}
 
+	if (pdata->id == TAD_PMU_V1 && event_idx > 0x24)
+		return;
 
 	for (i = 0; i < pmu->region_cnt; i++) {
 		reg_val = event_idx & 0xFF;
@@ -261,6 +270,18 @@ static struct attribute *tad_pmu_event_attrs[] = {
 	TAD_PMU_EVENT_ATTR(tad_dat_rd_byp, 0x22),
 	TAD_PMU_EVENT_ATTR(tad_ifb_occ, 0x23),
 	TAD_PMU_EVENT_ATTR(tad_req_occ, 0x24),
+	TAD_PMU_EVENT_ATTR(tad_req_msh_out_dtg_evict, 0x25),
+	TAD_PMU_EVENT_ATTR(tad_req_msh_out_ltg_evict, 0x26),
+	TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_mpam, 0x28),
+	TAD_PMU_EVENT_ATTR(tad_replays, 0x29),
+	TAD_PMU_EVENT_ATTR(tad_req_byp0, 0x2a),
+	TAD_PMU_EVENT_ATTR(tad_req_byp1, 0x2b),
+	TAD_PMU_EVENT_ATTR(tad_txreq_byp, 0x2c),
+	TAD_PMU_EVENT_ATTR(tad_time_in_dslp, 0x2d),
+	TAD_PMU_EVENT_ATTR(tad_time_elapsed, 0x2e),
+	TAD_PMU_EVENT_ATTR(tad_req_msh_out_dss_rd_128mrg, 0x2f),
+	TAD_PMU_EVENT_ATTR(tad_req_msh_out_dss_wr_128mrg, 0x30),
+	TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xff),
 	NULL
 };
 
@@ -426,7 +447,6 @@ static int tad_pmu_probe(struct platform_device *pdev)
 	tad_pmu->region_cnt = tad_cnt;
 
 	tad_pmu->pmu = (struct pmu) {
-
 		.module		= THIS_MODULE,
 		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE |
 				  PERF_PMU_CAP_NO_INTERRUPT,
@@ -440,7 +460,7 @@ static int tad_pmu_probe(struct platform_device *pdev)
 		.read		= tad_pmu_event_counter_read,
 	};
 
-	if (version == TAD_PMU_V1) {
+	if (version == TAD_PMU_V1 || version == TAD_PMU_V3) {
 		tad_pmu->pmu.attr_groups = tad_pmu_attr_groups;
 		tad_pmu->ops		 = &tad_pmu_ops;
 	} else {
@@ -482,6 +502,11 @@ static const struct tad_pmu_data tad_pmu_data = {
 	.tad_pfc_offset = TAD_PFC_OFFSET,
 };
 
+static const struct tad_pmu_data tad_pmu_cn20k_data = {
+	.id   = TAD_PMU_V3,
+	.tad_prf_offset = TAD_PRF_NS_OFFSET,
+	.tad_pfc_offset = TAD_PFC_NS_OFFSET,
+};
 #endif
 
 #ifdef CONFIG_ACPI
@@ -495,6 +520,7 @@ static const struct tad_pmu_data tad_pmu_v2_data = {
 #ifdef CONFIG_OF
 static const struct of_device_id tad_pmu_of_match[] = {
 	{ .compatible = "marvell,cn10k-tad-pmu", .data = &tad_pmu_data },
+	{ .compatible = "marvell,cn20k-tad-pmu", .data = &tad_pmu_cn20k_data },
 	{},
 };
 #endif
@@ -503,6 +529,7 @@ static const struct of_device_id tad_pmu_of_match[] = {
 static const struct acpi_device_id tad_pmu_acpi_match[] = {
 	{"MRVL000B", (kernel_ulong_t)&tad_pmu_data},
 	{"MRVL000D", (kernel_ulong_t)&tad_pmu_v2_data},
+	{"MRVL000F", (kernel_ulong_t)&tad_pmu_cn20k_data},
 	{},
 };
 MODULE_DEVICE_TABLE(acpi, tad_pmu_acpi_match);
@@ -564,6 +591,6 @@ static void __exit tad_pmu_exit(void)
 module_init(tad_pmu_init);
 module_exit(tad_pmu_exit);
 
-MODULE_DESCRIPTION("Marvell CN10K LLC-TAD Perf driver");
+MODULE_DESCRIPTION("Marvell CN10K/CN20K LLC-TAD Perf driver");
 MODULE_AUTHOR("Bhaskara Budiredla <bbudiredla@marvell.com>");
 MODULE_LICENSE("GPL v2");
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] dt-bindings: perf: marvell: Extend CN10K TAD PMU binding for CN20K
  2026-06-07 12:50 [PATCH 0/3] perf: marvell: LLC-TAD PMU MPAM filtering and CN20K support Geetha sowjanya
  2026-06-07 12:50 ` [PATCH 1/3] perf: marvell: Add MPAM partid filtering to CN10K TAD PMU Geetha sowjanya
  2026-06-07 12:51 ` [PATCH 2/3] perf: marvell: Add CN20K LLC-TAD PMU support Geetha sowjanya
@ 2026-06-07 12:51 ` Geetha sowjanya
  2026-06-07 12:57   ` sashiko-bot
  2 siblings, 1 reply; 7+ messages in thread
From: Geetha sowjanya @ 2026-06-07 12:51 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel, linux-arm-kernel, devicetree
  Cc: mark.rutland, will, krzk+dt, gakula

Allow marvell,cn20k-tad-pmu alongside marvell,cn10k-tad-pmu, document
CN20K in the title and description, add a maintainer, and include a
CN20K example node with the same required properties as CN10K.

Signed-off-by: Geetha sowjanya <gakula@marvell.com>
---
 .../bindings/perf/marvell-cn10k-tad.yaml      | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/perf/marvell-cn10k-tad.yaml b/Documentation/devicetree/bindings/perf/marvell-cn10k-tad.yaml
index 362142252667..1612052b59ae 100644
--- a/Documentation/devicetree/bindings/perf/marvell-cn10k-tad.yaml
+++ b/Documentation/devicetree/bindings/perf/marvell-cn10k-tad.yaml
@@ -4,23 +4,27 @@
 $id: http://devicetree.org/schemas/perf/marvell-cn10k-tad.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell CN10K LLC-TAD performance monitor
+title: Marvell CN10K / CN20K LLC-TAD performance monitor
 
 maintainers:
   - Bhaskara Budiredla <bbudiredla@marvell.com>
+  - Geetha sowjanya <gakula@marvell.com>
 
 description: |
-  The Tag-and-Data units (TADs) maintain coherence and contain CN10K
-  shared on-chip last level cache (LLC). The tad pmu measures the
-  performance of last-level cache. Each tad pmu supports up to eight
-  counters.
+  The Tag-and-Data units (TADs) maintain coherence and contain the
+  shared on-chip last level cache (LLC) on Marvell CN10K and CN20K SoCs.
+  The TAD PMU measures last-level cache performance. Each TAD PMU
+  supports up to eight counters.
 
-  The DT setup comprises of number of tad blocks, the sizes of pmu
-  regions, tad blocks and overall base address of the HW.
+  The DT setup describes the number of TAD blocks, the sizes of PMU
+  regions and TAD pages, and the overall MMIO base of the hardware.
 
 properties:
   compatible:
-    const: marvell,cn10k-tad-pmu
+    items:
+      - enum:
+          - marvell,cn10k-tad-pmu
+          - marvell,cn20k-tad-pmu
 
   reg:
     maxItems: 1
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] dt-bindings: perf: marvell: Extend CN10K TAD PMU binding for CN20K
  2026-06-07 12:51 ` [PATCH 3/3] dt-bindings: perf: marvell: Extend CN10K TAD PMU binding for CN20K Geetha sowjanya
@ 2026-06-07 12:57   ` sashiko-bot
  0 siblings, 0 replies; 7+ messages in thread
From: sashiko-bot @ 2026-06-07 12:57 UTC (permalink / raw)
  To: Geetha sowjanya; +Cc: conor+dt, linux-perf-users, devicetree, robh

Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [Medium] The commit message claims to add a CN20K example node, but the patch does not include it.
--

commit d21c87a34e4ef8a32cf63c6310a789363873d765
Author: Geetha sowjanya <gakula@marvell.com>

dt-bindings: perf: marvell: Extend CN10K TAD PMU binding for CN20K

This commit extends the Marvell CN10K TAD PMU device tree binding to support
the CN20K SoC. It updates the title, description, maintainers, and compatible
strings.

> dt-bindings: perf: marvell: Extend CN10K TAD PMU binding for CN20K
> 
> Allow marvell,cn20k-tad-pmu alongside marvell,cn10k-tad-pmu, document
> CN20K in the title and description, add a maintainer, and include a
> CN20K example node with the same required properties as CN10K.

[Severity: Medium]
The commit message mentions including a CN20K example node, but the patch
does not contain any changes to the examples section in the YAML schema.
Was the CN20K example node accidentally omitted from the patch?

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260607125101.17778-1-gakula@marvell.com?part=3

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] perf: marvell: Add MPAM partid filtering to CN10K TAD PMU
  2026-06-07 12:50 ` [PATCH 1/3] perf: marvell: Add MPAM partid filtering to CN10K TAD PMU Geetha sowjanya
@ 2026-06-07 13:02   ` sashiko-bot
  0 siblings, 0 replies; 7+ messages in thread
From: sashiko-bot @ 2026-06-07 13:02 UTC (permalink / raw)
  To: Geetha sowjanya; +Cc: conor+dt, devicetree, robh, linux-perf-users

Thank you for your contribution! Sashiko AI review found 5 potential issue(s) to consider:

New issues:
- [Medium] The MPAM partition ID filter incorrectly conflates `partid == 0` with 'filter disabled', making it impossible to specifically monitor traffic for the default partition (Partition 0).
- [Medium] The driver fails to validate `config1` during event initialization, silently ignoring unsupported filter requests instead of rejecting them.

Pre-existing issues:
- [High] The probe function permanently mutates the platform device's memory resource, corrupting it and causing subsequent PMU probes to fail.
- [High] If the memory resource size is smaller than expected, an early loop termination leaves region pointers unmapped but sets `region_cnt` incorrectly, causing a kernel panic.
- [High] Registering the CPU hotplug instance before the PMU creates a race window where a concurrent CPU offline event triggers context migration on an uninitialized PMU context.
--

commit f8a0b0ecde1dfeec7798c8aca48869ad592602c8
Author: Tanmay Jagdale <tanmay@marvell.com>

perf: marvell: Add MPAM partid filtering to CN10K TAD PMU

The TAD PMU exposes counters that can be filtered by MPAM partition id
for a subset of allocation and hit events.

Add a 16-bit partid format attribute (config1) and route counter
programming through variant-specific ops so CN10K keeps MPAM-capable
programming while Odyssey keeps the reduced event set without
advertising partid in sysfs.

> diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
> index 51ccb0befa05d..7a3b659c999a2 100644
> --- a/drivers/perf/marvell_cn10k_tad_pmu.c
> +++ b/drivers/perf/marvell_cn10k_tad_pmu.c
[ ... ]
> @@ -27,30 +30,92 @@ struct tad_region {
>  	void __iomem	*base;
>  };
>  
[ ... ]
> +static void tad_pmu_start_counter(struct tad_pmu *pmu,
> +				  struct perf_event *event)
> +{
> +	const struct tad_pmu_data *pdata = pmu->pdata;
> +	struct hw_perf_event *hwc = &event->hw;
> +	u32 event_idx = event->attr.config;
> +	u32 counter_idx = hwc->idx;
> +	u64 partid_filter = 0;
> +	u64 reg_val;
> +	u32 partid;
> +	int i;
> +
> +	partid = (u32)(event->attr.config1 & GENMASK(15, 0));
> +
> +	for (i = 0; i < pmu->region_cnt; i++)
> +		writeq_relaxed(0, pmu->regions[i].base +
> +			       TAD_PFC(pdata->tad_pfc_offset, counter_idx));
> +
> +	if (partid && event_idx > 0x19 && event_idx < 0x21) {

[Severity: Medium]
Since an unconfigured perf_event_attr defaults to 0, using 0 as a sentinel
for "no filter" means explicit requests for Partition 0 will bypass the filter.

Can this be changed so that users can specifically monitor traffic for
Partition 0?

> +		partid_filter = TAD_PRF_MATCH_PARTID | TAD_PRF_PARTID_NS |
> +				((u64)partid << 11);
> +	}
[ ... ]
> @@ -242,9 +288,11 @@ static const struct attribute_group ody_tad_pmu_events_attr_group = {
>  };
>  
>  PMU_FORMAT_ATTR(event, "config:0-7");
> +PMU_FORMAT_ATTR(partid, "config1:0-15");

[Severity: Medium]
Is there missing validation for config1 in tad_pmu_event_init()?

Without it, the driver silently ignores unsupported filter requests instead of
rejecting them with -EINVAL, returning globally aggregated counts rather than
the requested isolated counts.

>  
>  static struct attribute *tad_pmu_format_attrs[] = {
>  	&format_attr_event.attr,
> +	&format_attr_partid.attr,
>  	NULL
>  };
[ ... ]
> @@ -344,7 +411,6 @@ static int tad_pmu_probe(struct platform_device *pdev)
>  	if (!regions)
>  		return -ENOMEM;
>  
> -	/* ioremap the distributed TAD pmu regions */
>  	for (i = 0; i < tad_cnt && res->start < res->end; i++) {
>  		regions[i].base = devm_ioremap(&pdev->dev,
>  					       res->start,

[Severity: High]
This is a pre-existing issue, but the probe function mutates the platform
device's memory resource (res->start += tad_page_size inside the loop).

If the driver is unbound and rebound, does this corrupted res->start cause
the loop condition to fail, leaving regions[i].base unpopulated and causing
a NULL pointer dereference later?

[Severity: High]
This is also a pre-existing issue, but if this loop terminates early because
the memory resource size is smaller than expected, tad_pmu->region_cnt is
later set to tad_cnt instead of the number of mapped regions (i).

Could this result in out-of-bounds NULL pointer dereferences during PMU
operations?

> @@ -374,14 +440,16 @@ static int tad_pmu_probe(struct platform_device *pdev)
>  		.read		= tad_pmu_event_counter_read,
>  	};
>  
> -	if (version == TAD_PMU_V1)
> +	if (version == TAD_PMU_V1) {
>  		tad_pmu->pmu.attr_groups = tad_pmu_attr_groups;
> -	else
> +		tad_pmu->ops		 = &tad_pmu_ops;
> +	} else {
>  		tad_pmu->pmu.attr_groups = ody_tad_pmu_attr_groups;
> +		tad_pmu->ops		 = &tad_pmu_v2_ops;
> +	}
>  
>  	tad_pmu->cpu = raw_smp_processor_id();
>  
> -	/* Register pmu instance for cpu hotplug */
>  	ret = cpuhp_state_add_instance_nocalls(tad_pmu_cpuhp_state,
>  					       &tad_pmu->node);

[Severity: High]
This is a pre-existing issue, but does registering the CPU hotplug instance
before the PMU is fully initialized create a race window?

If a CPU offline event happens before perf_pmu_register() completes,
could perf_pmu_migrate_context() dereference an unallocated
pmu->cpu_pmu_context, leading to a kernel panic?

>  	if (ret) {

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260607125101.17778-1-gakula@marvell.com?part=1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] perf: marvell: Add CN20K LLC-TAD PMU support
  2026-06-07 12:51 ` [PATCH 2/3] perf: marvell: Add CN20K LLC-TAD PMU support Geetha sowjanya
@ 2026-06-07 13:04   ` sashiko-bot
  0 siblings, 0 replies; 7+ messages in thread
From: sashiko-bot @ 2026-06-07 13:04 UTC (permalink / raw)
  To: Geetha sowjanya; +Cc: linux-perf-users, devicetree, conor+dt, robh

Thank you for your contribution! Sashiko AI review found 6 potential issue(s) to consider:

New issues:
- [High] The shifted `partid` value overlaps with the hardcoded Non-Secure (`NS`) flag on CN20K (V3), corrupting the filter configuration.
- [Medium] The driver shares the PMU events attribute array between CN10K (V1) and CN20K (V3) without visibility checks, exposing unsupported V3 events on V1 systems and breaking the pre-existing `tad_tot_cycle` (0xFF) event.
- [Medium] The PMU driver prevents users from profiling MPAM PARTID 0, as it overloads `partid == 0` to mean 'filtering disabled'.

Pre-existing issues:
- [High] Malformed ACPI/DT memory resources cause NULL pointer dereferences if they are smaller than `tad_cnt`, as the driver unconditionally assigns the expected count instead of the mapped count.
- [High] The driver modifies the global platform_device resource bounds during probe, corrupting subsequent ioremap attempts and causing driver rebinds to fail.
- [High] Missing `hwc->prev_count` reset when starting counters leads to bogus perf event counts upon read.
--

commit db735a497cd5d9c282e3c5a1fa6ad9ee8d6ea442
Author: Geetha sowjanya <gakula@marvell.com>
Subject: perf: marvell: Add CN20K LLC-TAD PMU support

Adds support for the CN20K LLC-TAD PMU by defining a TAD_PMU_V3 profile
with per-platform register bases. It extends the CN10K event map for new
counters and matches the block via OF and ACPI.

> diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
> index 7a3b659c999a2..4daf109b89eea 100644
> --- a/drivers/perf/marvell_cn10k_tad_pmu.c
> +++ b/drivers/perf/marvell_cn10k_tad_pmu.c
[ ... ]
> @@ -80,8 +83,14 @@ static void tad_pmu_start_counter(struct tad_pmu *pmu,
>  	if (partid && event_idx > 0x19 && event_idx < 0x21) {

[Severity: High]
This is a pre-existing issue, but does tad_pmu_event_counter_start() fail to
reset the software prev_count when starting the counters?

When the hardware counters are zeroed out earlier in tad_pmu_start_counter(),
local64_set(&hwc->prev_count, 0) is not called to match. When the counter is
later stopped or read, computing new minus prev can underflow because prev
still holds the value from the previous time the event was stopped.

[Severity: Medium]
Does checking if partid is non-zero prevent users from profiling MPAM PARTID 0?

If a user explicitly specifies partid 0, which is a valid MPAM partition, it
evaluates to false. This bypasses the hardware filter configuration entirely,
causing the hardware to count events for all partitions instead of exclusively
restricting to PARTID 0.

>  		partid_filter = TAD_PRF_MATCH_PARTID | TAD_PRF_PARTID_NS |
>  				((u64)partid << 11);
> +
> +		if (pdata->id == TAD_PMU_V3)
> +			partid_filter = TAD_PRF_MATCH_PARTID | BIT_ULL(25) |
> +				((u64)partid << 10);

[Severity: High]
Does shifting a 16-bit partid by 10 cause a bit collision with the hardcoded
Non-Secure flag at bit 25?

Because BIT_ULL(25) occupies the exact same bit position as the most
significant bit (bit 15) of the shifted 16-bit partid, any user-provided
partid with bit 15 set might corrupt the hardware filter value.

>  	}
>  
> +	if (pdata->id == TAD_PMU_V1 && event_idx > 0x24)
> +		return;

[Severity: Medium]
Does this early return prevent tad_tot_cycle (0xFF) from working on V1
hardware?

Because 0xFF is greater than 0x24, attempting to measure the total cycle
event on V1 will be silently ignored, leaving the counter unprogrammed.

>  
>  	for (i = 0; i < pmu->region_cnt; i++) {
>  		reg_val = event_idx & 0xFF;
[ ... ]
> @@ -261,6 +270,18 @@ static struct attribute *tad_pmu_event_attrs[] = {
>  	TAD_PMU_EVENT_ATTR(tad_dat_rd_byp, 0x22),
>  	TAD_PMU_EVENT_ATTR(tad_ifb_occ, 0x23),
>  	TAD_PMU_EVENT_ATTR(tad_req_occ, 0x24),
> +	TAD_PMU_EVENT_ATTR(tad_req_msh_out_dtg_evict, 0x25),
> +	TAD_PMU_EVENT_ATTR(tad_req_msh_out_ltg_evict, 0x26),
> +	TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_mpam, 0x28),
> +	TAD_PMU_EVENT_ATTR(tad_replays, 0x29),
> +	TAD_PMU_EVENT_ATTR(tad_req_byp0, 0x2a),
> +	TAD_PMU_EVENT_ATTR(tad_req_byp1, 0x2b),
> +	TAD_PMU_EVENT_ATTR(tad_txreq_byp, 0x2c),
> +	TAD_PMU_EVENT_ATTR(tad_time_in_dslp, 0x2d),
> +	TAD_PMU_EVENT_ATTR(tad_time_elapsed, 0x2e),
> +	TAD_PMU_EVENT_ATTR(tad_req_msh_out_dss_rd_128mrg, 0x2f),
> +	TAD_PMU_EVENT_ATTR(tad_req_msh_out_dss_wr_128mrg, 0x30),
> +	TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xff),
>  	NULL
>  };

[Severity: Medium]
Since these V3-specific events are added to the shared attribute array
without an is_visible callback, are they incorrectly advertised to
userspace on V1 hardware?

[ ... ]
> @@ -440,7 +460,7 @@ static int tad_pmu_probe(struct platform_device *pdev)
>  		.read		= tad_pmu_event_counter_read,
>  	};
>  
> -	if (version == TAD_PMU_V1) {
> +	if (version == TAD_PMU_V1 || version == TAD_PMU_V3) {
>  		tad_pmu->pmu.attr_groups = tad_pmu_attr_groups;
>  		tad_pmu->ops		 = &tad_pmu_ops;
>  	} else {

[Severity: High]
This is a pre-existing issue, but in the region initialization loop earlier
in tad_pmu_probe():

for (i = 0; i < tad_cnt && res->start < res->end; i++) {
    ...
}
tad_pmu->region_cnt = tad_cnt;

If the loop terminates early due to the resource size being smaller than
expected, region_cnt is unconditionally assigned tad_cnt instead of the
successfully mapped count (i). Does this lead to NULL pointer dereferences
later when PMU operations iterate up to region_cnt and access the
uninitialized base pointers?

[Severity: High]
This is another pre-existing issue, but in that same tad_pmu_probe()
initialization loop:

for (i = 0; i < tad_cnt && res->start < res->end; i++) {
    ...
    res->start += tad_page_size;
}

Does modifying res->start directly corrupt the global platform_device
resource bounds?

Since platform_get_resource() returns a direct pointer to the device's
resource descriptor, permanently altering it means subsequent ioremap
attempts will start from the corrupted offset if the driver module is
reloaded or if probe defers.

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260607125101.17778-1-gakula@marvell.com?part=2

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-06-07 13:17 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-07 12:50 [PATCH 0/3] perf: marvell: LLC-TAD PMU MPAM filtering and CN20K support Geetha sowjanya
2026-06-07 12:50 ` [PATCH 1/3] perf: marvell: Add MPAM partid filtering to CN10K TAD PMU Geetha sowjanya
2026-06-07 13:02   ` sashiko-bot
2026-06-07 12:51 ` [PATCH 2/3] perf: marvell: Add CN20K LLC-TAD PMU support Geetha sowjanya
2026-06-07 13:04   ` sashiko-bot
2026-06-07 12:51 ` [PATCH 3/3] dt-bindings: perf: marvell: Extend CN10K TAD PMU binding for CN20K Geetha sowjanya
2026-06-07 12:57   ` sashiko-bot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox