Linux Perf Users
 help / color / mirror / Atom feed
* [RFC PATCH 0/3] perf/x86/amd/uncore: Add common helpers and Hygon support
@ 2026-05-19  3:32 Qi Liu
  2026-05-19  3:32 ` [RFC PATCH 1/3] perf/x86/amd/uncore: Add common PMU helper functions Qi Liu
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Qi Liu @ 2026-05-19  3:32 UTC (permalink / raw)
  To: peterz, mingo, namhyung, alexander.shishkin, jolsa, x86
  Cc: linux-perf-users, linux-kernel, lijing, liuqi

This series adds initial Hygon uncore PMU support.

Hygon uncore PMUs use a programming model close to the AMD uncore PMU,
so this series factors out a small set of common helpers for perf event
handling, counter assignment and per-CPU context management.

The Hygon-specific support is kept in the Hygon driver. This RFC
currently includes support for the Hygon DF and DF IOD PMUs. No
Hygon-specific logic is added to the AMD driver.

There is also an Hygon L3 PMU patch currently under review:
https://lore.kernel.org/lkml/9a4731a46cc0a7645838bd848a40a6574a97bad6.1775548724.git.fuhao@open-hieco.net/
If the overall approach in this RFC looks acceptable, that support can
be rebased onto this common helper structure in follow-up patches.

This is an RFC. Comments on the helper boundaries and file placement
are welcome.

Qi Liu (3):
  perf/x86/amd/uncore: Add common PMU helper functions
  perf/x86/amd/uncore: Convert AMD driver to common PMU helpers
  perf/x86/amd/uncore: Add Hygon uncore PMU support

 arch/x86/events/Kconfig             |  11 +
 arch/x86/events/amd/Makefile        |   4 +-
 arch/x86/events/amd/hygon_uncore.c  | 567 +++++++++++++++++++++++++++
 arch/x86/events/amd/uncore.c        | 585 ++++------------------------
 arch/x86/events/amd/uncore_common.c | 390 +++++++++++++++++++
 arch/x86/events/amd/uncore_common.h | 113 ++++++
 arch/x86/include/asm/msr-index.h    |   2 +
 arch/x86/include/asm/perf_event.h   |  20 +
 include/linux/cpuhotplug.h          |   3 +
 9 files changed, 1184 insertions(+), 511 deletions(-)
 create mode 100644 arch/x86/events/amd/hygon_uncore.c
 create mode 100644 arch/x86/events/amd/uncore_common.c
 create mode 100644 arch/x86/events/amd/uncore_common.h

-- 
2.34.1



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [RFC PATCH 1/3] perf/x86/amd/uncore: Add common PMU helper functions
  2026-05-19  3:32 [RFC PATCH 0/3] perf/x86/amd/uncore: Add common helpers and Hygon support Qi Liu
@ 2026-05-19  3:32 ` Qi Liu
  2026-05-19  4:07   ` sashiko-bot
  2026-05-19  3:32 ` [RFC PATCH 2/3] perf/x86/amd/uncore: Convert AMD driver to common PMU helpers Qi Liu
  2026-05-19  3:32 ` [RFC PATCH 3/3] perf/x86/amd/uncore: Add Hygon uncore PMU support Qi Liu
  2 siblings, 1 reply; 7+ messages in thread
From: Qi Liu @ 2026-05-19  3:32 UTC (permalink / raw)
  To: peterz, mingo, namhyung, alexander.shishkin, jolsa, x86
  Cc: linux-perf-users, linux-kernel, lijing, liuqi, Zhenglang Hu

Add common helper functions for AMD-family uncore PMU handling.

The helpers cover event initialization, counter allocation, counter
read/update, event start/stop and per-CPU context management.  These
paths are not tied to a specific uncore unit and can be reused by
drivers with a similar uncore PMU programming model.

Signed-off-by: Qi Liu <liuqi@hygon.cn>
Tested-by: Zhenglang Hu <huzhenglang@hygon.cn>
---
 arch/x86/events/amd/uncore_common.c | 390 ++++++++++++++++++++++++++++
 arch/x86/events/amd/uncore_common.h | 113 ++++++++
 2 files changed, 503 insertions(+)
 create mode 100644 arch/x86/events/amd/uncore_common.c
 create mode 100644 arch/x86/events/amd/uncore_common.h

diff --git a/arch/x86/events/amd/uncore_common.c b/arch/x86/events/amd/uncore_common.c
new file mode 100644
index 000000000000..a6d50fe803df
--- /dev/null
+++ b/arch/x86/events/amd/uncore_common.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Common uncore PMU helpers for AMD-family x86 processors.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/hrtimer.h>
+#include <linux/percpu.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+
+#include <asm/msr.h>
+#include <asm/perf_event.h>
+
+#include "uncore_common.h"
+
+#define COUNTER_SHIFT		16
+#define NUM_COUNTERS_MAX	64
+
+/* Interval for hrtimer, defaults to 60000 milliseconds */
+static unsigned int uncore_update_interval = 60 * MSEC_PER_SEC;
+
+void uncore_common_set_update_interval(unsigned int interval)
+{
+	uncore_update_interval = interval;
+}
+
+struct uncore_common_pmu *event_to_uncore_common_pmu(struct perf_event *event)
+{
+	return container_of(event->pmu, struct uncore_common_pmu, pmu);
+}
+
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	struct pmu *ptr = dev_get_drvdata(dev);
+	struct uncore_common_pmu *pmu;
+
+	pmu = container_of(ptr, struct uncore_common_pmu, pmu);
+
+	return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *uncore_common_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+struct attribute_group uncore_common_attr_group = {
+	.attrs = uncore_common_attrs,
+};
+
+static enum hrtimer_restart uncore_common_hrtimer(struct hrtimer *hrtimer)
+{
+	struct uncore_common_ctx *ctx;
+	struct perf_event *event;
+	int bit;
+
+	ctx = container_of(hrtimer, struct uncore_common_ctx, hrtimer);
+
+	if (!ctx->nr_active || ctx->cpu != smp_processor_id())
+		return HRTIMER_NORESTART;
+
+	for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
+		event = ctx->events[bit];
+		event->pmu->read(event);
+	}
+
+	hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
+
+	return HRTIMER_RESTART;
+}
+
+void uncore_common_start_hrtimer(struct uncore_common_ctx *ctx)
+{
+	hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
+		      HRTIMER_MODE_REL_PINNED_HARD);
+}
+
+static void uncore_common_cancel_hrtimer(struct uncore_common_ctx *ctx)
+{
+	hrtimer_cancel(&ctx->hrtimer);
+}
+
+static void uncore_common_init_hrtimer(struct uncore_common_ctx *ctx)
+{
+	hrtimer_setup(&ctx->hrtimer, uncore_common_hrtimer, CLOCK_MONOTONIC,
+		      HRTIMER_MODE_REL_HARD);
+}
+
+void uncore_common_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev, new;
+	s64 delta;
+
+	/*
+	 * since we do not enable counter overflow interrupts,
+	 * we do not have to worry about prev_count changing on us
+	 */
+	prev = local64_read(&hwc->prev_count);
+
+	/*
+	 * Some uncore PMUs do not have RDPMC assignments. In such cases,
+	 * read counts directly from the corresponding PERF_CTR.
+	 */
+	if (hwc->event_base_rdpmc < 0)
+		rdmsrq(hwc->event_base, new);
+	else
+		new = rdpmc(hwc->event_base_rdpmc);
+
+	local64_set(&hwc->prev_count, new);
+
+	delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
+	delta >>= COUNTER_SHIFT;
+
+	local64_add(delta, &event->count);
+}
+
+void uncore_common_start(struct perf_event *event, int flags)
+{
+	struct uncore_common_pmu *pmu = event_to_uncore_common_pmu(event);
+	struct uncore_common_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (!ctx->nr_active++)
+		uncore_common_start_hrtimer(ctx);
+
+	if (flags & PERF_EF_RELOAD)
+		wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count));
+
+	hwc->state = 0;
+
+	__set_bit(hwc->idx, ctx->active_mask);
+	wrmsrq(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
+
+	perf_event_update_userpage(event);
+}
+
+void uncore_common_stop(struct perf_event *event, int flags)
+{
+	struct uncore_common_pmu *pmu = event_to_uncore_common_pmu(event);
+	struct uncore_common_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	wrmsrq(hwc->config_base, hwc->config);
+	hwc->state |= PERF_HES_STOPPED;
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		event->pmu->read(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+
+	if (!--ctx->nr_active)
+		uncore_common_cancel_hrtimer(ctx);
+
+	__clear_bit(hwc->idx, ctx->active_mask);
+}
+
+int uncore_common_event_init(struct perf_event *event)
+{
+	struct uncore_common_pmu *pmu;
+	struct uncore_common_ctx *ctx;
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	pmu = event_to_uncore_common_pmu(event);
+	ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
+	if (!ctx)
+		return -ENODEV;
+
+	hwc->config = event->attr.config;
+	hwc->idx = -1;
+
+	event->cpu = ctx->cpu;
+
+	return 0;
+}
+
+int uncore_common_add(struct perf_event *event, int flags)
+{
+	struct uncore_common_pmu *pmu = event_to_uncore_common_pmu(event);
+	struct uncore_common_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
+	struct hw_perf_event *hwc = &event->hw;
+	int i;
+
+	/* are we already assigned? */
+	if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
+		goto out;
+
+	for (i = 0; i < pmu->num_counters; i++) {
+		if (ctx->events[i] == event) {
+			hwc->idx = i;
+			goto out;
+		}
+	}
+
+	/* if not, take the first available counter */
+	hwc->idx = -1;
+
+	for (i = 0; i < pmu->num_counters; i++) {
+		struct perf_event *tmp = NULL;
+
+		if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
+			hwc->idx = i;
+			break;
+		}
+	}
+
+out:
+	if (hwc->idx == -1)
+		return -EBUSY;
+
+	hwc->config_base = pmu->msr_base + (2 * hwc->idx);
+	hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
+	hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (pmu->rdpmc_base < 0)
+		hwc->event_base_rdpmc = -1;
+
+	if (flags & PERF_EF_START)
+		event->pmu->start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+void uncore_common_del(struct perf_event *event, int flags)
+{
+	struct uncore_common_pmu *pmu = event_to_uncore_common_pmu(event);
+	struct uncore_common_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
+	struct hw_perf_event *hwc = &event->hw;
+	int i;
+
+	event->pmu->stop(event, PERF_EF_UPDATE);
+
+	for (i = 0; i < pmu->num_counters; i++) {
+		struct perf_event *tmp = event;
+
+		if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
+			break;
+	}
+
+	hwc->idx = -1;
+}
+
+int uncore_common_ctx_init(struct uncore_common *uncore, unsigned int cpu)
+{
+	struct uncore_common_ctx *curr, *prev;
+	struct uncore_common_pmu *pmu;
+	int node, cid, gid;
+	int i, j;
+
+	if (!uncore->init_done || !uncore->num_pmus)
+		return 0;
+
+	cid = uncore_common_ctx_cid(uncore, cpu);
+	gid = uncore_common_ctx_gid(uncore, cpu);
+
+	for (i = 0; i < uncore->num_pmus; i++) {
+		pmu = &uncore->pmus[i];
+		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
+		curr = NULL;
+
+		if (gid != pmu->group)
+			continue;
+
+		for_each_online_cpu(j) {
+			if (cpu == j)
+				continue;
+
+			prev = *per_cpu_ptr(pmu->ctx, j);
+			if (!prev)
+				continue;
+
+			if (cid == uncore_common_ctx_cid(uncore, j)) {
+				curr = prev;
+				break;
+			}
+		}
+
+		if (!curr) {
+			node = cpu_to_node(cpu);
+
+			curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node);
+			if (!curr)
+				goto fail;
+
+			curr->cpu = cpu;
+			curr->events = kzalloc_node(sizeof(*curr->events) *
+						    pmu->num_counters,
+						    GFP_KERNEL, node);
+			if (!curr->events) {
+				kfree(curr);
+				goto fail;
+			}
+
+			uncore_common_init_hrtimer(curr);
+			curr->hrtimer_duration = (u64)uncore_update_interval * NSEC_PER_MSEC;
+
+			cpumask_set_cpu(cpu, &pmu->active_mask);
+		}
+
+		curr->refcnt++;
+		*per_cpu_ptr(pmu->ctx, cpu) = curr;
+	}
+
+	return 0;
+
+fail:
+	uncore_common_ctx_free(uncore, cpu);
+
+	return -ENOMEM;
+}
+
+void uncore_common_ctx_free(struct uncore_common *uncore, unsigned int cpu)
+{
+	struct uncore_common_pmu *pmu;
+	struct uncore_common_ctx *ctx;
+	int i;
+
+	if (!uncore->init_done)
+		return;
+
+	for (i = 0; i < uncore->num_pmus; i++) {
+		pmu = &uncore->pmus[i];
+
+		if (!pmu->ctx)
+			continue;
+
+		ctx = *per_cpu_ptr(pmu->ctx, cpu);
+		if (!ctx)
+			continue;
+
+		if (cpu == ctx->cpu)
+			cpumask_clear_cpu(cpu, &pmu->active_mask);
+
+		if (!--ctx->refcnt) {
+			kfree(ctx->events);
+			kfree(ctx);
+		}
+
+		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
+	}
+}
+
+void uncore_common_ctx_move(struct uncore_common *uncore, unsigned int cpu)
+{
+	struct uncore_common_ctx *curr, *next;
+	struct uncore_common_pmu *pmu;
+	int i, j;
+
+	if (!uncore->init_done)
+		return;
+
+	for (i = 0; i < uncore->num_pmus; i++) {
+		pmu = &uncore->pmus[i];
+		if (!pmu->ctx)
+			continue;
+
+		curr = *per_cpu_ptr(pmu->ctx, cpu);
+		if (!curr)
+			continue;
+
+		for_each_online_cpu(j) {
+			if (cpu == j)
+				continue;
+
+			next = *per_cpu_ptr(pmu->ctx, j);
+			if (!next)
+				continue;
+
+			if (curr == next) {
+				perf_pmu_migrate_context(&pmu->pmu, cpu, j);
+				cpumask_clear_cpu(cpu, &pmu->active_mask);
+				cpumask_set_cpu(j, &pmu->active_mask);
+				next->cpu = j;
+				break;
+			}
+		}
+	}
+}
diff --git a/arch/x86/events/amd/uncore_common.h b/arch/x86/events/amd/uncore_common.h
new file mode 100644
index 000000000000..3657b22268c0
--- /dev/null
+++ b/arch/x86/events/amd/uncore_common.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _X86_EVENTS_UNCORE_COMMON_H
+#define _X86_EVENTS_UNCORE_COMMON_H
+
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/hrtimer.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#define UNCORE_NAME_LEN		16
+#define UNCORE_GROUP_MAX	256
+#define NUM_COUNTERS_MAX	64
+
+#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)			\
+static ssize_t __uncore_##_var##_show(struct device *dev,		\
+				struct device_attribute *attr,		\
+				char *page)				\
+{									\
+	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
+	return sprintf(page, _format "\n");				\
+}									\
+static struct device_attribute format_attr_##_var =			\
+	__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+
+union uncore_common_info {
+	struct {
+		u64 aux_data:32;
+		u64 num_pmcs:8;
+		u64 gid:8;
+		u64 cid:8;
+		u64 private:8;
+	} split;
+	u64 full;
+};
+
+struct uncore_common_ctx {
+	int refcnt;
+	int cpu;
+	struct perf_event **events;
+	unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
+	int nr_active;
+	struct hrtimer hrtimer;
+	u64 hrtimer_duration;
+};
+
+struct uncore_common_pmu {
+	char name[UNCORE_NAME_LEN];
+	int num_counters;
+	int rdpmc_base;
+	u32 msr_base;
+	int group;
+	cpumask_t active_mask;
+	struct pmu pmu;
+	struct uncore_common_ctx * __percpu *ctx;
+	void *private;
+};
+
+struct uncore_common {
+	union uncore_common_info __percpu *info;
+	struct uncore_common_pmu *pmus;
+	unsigned int num_pmus;
+	bool init_done;
+	void (*scan)(struct uncore_common *uncore, unsigned int cpu);
+	int (*init)(struct uncore_common *uncore, unsigned int cpu);
+	void (*move)(struct uncore_common *uncore, unsigned int cpu);
+	void (*free)(struct uncore_common *uncore, unsigned int cpu);
+};
+
+extern struct attribute_group uncore_common_attr_group;
+
+static inline int uncore_common_ctx_cid(struct uncore_common *uncore,
+					unsigned int cpu)
+{
+	union uncore_common_info *info = per_cpu_ptr(uncore->info, cpu);
+
+	return info->split.cid;
+}
+
+static inline int uncore_common_ctx_gid(struct uncore_common *uncore,
+					unsigned int cpu)
+{
+	union uncore_common_info *info = per_cpu_ptr(uncore->info, cpu);
+
+	return info->split.gid;
+}
+
+static inline int uncore_common_ctx_num_pmcs(struct uncore_common *uncore,
+					     unsigned int cpu)
+{
+	union uncore_common_info *info = per_cpu_ptr(uncore->info, cpu);
+
+	return info->split.num_pmcs;
+}
+
+struct uncore_common_pmu *event_to_uncore_common_pmu(struct perf_event *event);
+
+void uncore_common_set_update_interval(unsigned int interval);
+int uncore_common_event_init(struct perf_event *event);
+int uncore_common_add(struct perf_event *event, int flags);
+void uncore_common_del(struct perf_event *event, int flags);
+void uncore_common_start(struct perf_event *event, int flags);
+void uncore_common_stop(struct perf_event *event, int flags);
+void uncore_common_read(struct perf_event *event);
+
+int uncore_common_ctx_init(struct uncore_common *uncore, unsigned int cpu);
+void uncore_common_ctx_free(struct uncore_common *uncore, unsigned int cpu);
+void uncore_common_ctx_move(struct uncore_common *uncore, unsigned int cpu);
+void uncore_common_start_hrtimer(struct uncore_common_ctx *ctx);
+
+#endif /* _X86_EVENTS_UNCORE_COMMON_H */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [RFC PATCH 2/3] perf/x86/amd/uncore: Convert AMD driver to common PMU helpers
  2026-05-19  3:32 [RFC PATCH 0/3] perf/x86/amd/uncore: Add common helpers and Hygon support Qi Liu
  2026-05-19  3:32 ` [RFC PATCH 1/3] perf/x86/amd/uncore: Add common PMU helper functions Qi Liu
@ 2026-05-19  3:32 ` Qi Liu
  2026-05-19  4:37   ` sashiko-bot
  2026-05-19  3:32 ` [RFC PATCH 3/3] perf/x86/amd/uncore: Add Hygon uncore PMU support Qi Liu
  2 siblings, 1 reply; 7+ messages in thread
From: Qi Liu @ 2026-05-19  3:32 UTC (permalink / raw)
  To: peterz, mingo, namhyung, alexander.shishkin, jolsa, x86
  Cc: linux-perf-users, linux-kernel, lijing, liuqi, Zhenglang Hu

Use the common uncore PMU helpers for AMD uncore event handling and
per-CPU context management.

The AMD-specific DF, L3 and UMC discovery, event encoding and PMU setup
logic remains in the AMD driver.  This only replaces duplicated common
operations with calls to the helper functions.

Signed-off-by: Qi Liu <liuqi@hygon.cn>
Tested-by: Zhenglang Hu <huzhenglang@hygon.cn>
---
 arch/x86/events/amd/Makefile |   2 +-
 arch/x86/events/amd/uncore.c | 585 +++++------------------------------
 2 files changed, 76 insertions(+), 511 deletions(-)

diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile
index 527d947eb76b..f951ae64ee36 100644
--- a/arch/x86/events/amd/Makefile
+++ b/arch/x86/events/amd/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_PERF_EVENTS_AMD_BRS)	+= brs.o
 obj-$(CONFIG_PERF_EVENTS_AMD_POWER)	+= power.o
 obj-$(CONFIG_X86_LOCAL_APIC)		+= ibs.o
 obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE)	+= amd-uncore.o
-amd-uncore-objs				:= uncore.o
+amd-uncore-objs				:= uncore_common.o uncore.o
 ifdef CONFIG_AMD_IOMMU
 obj-$(CONFIG_CPU_SUP_AMD)		+= iommu.o
 endif
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index dd956cfcadef..065824b70497 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -18,16 +18,16 @@
 #include <asm/perf_event.h>
 #include <asm/msr.h>
 
+#include "uncore_common.h"
+
 #define NUM_COUNTERS_NB		4
 #define NUM_COUNTERS_L2		4
 #define NUM_COUNTERS_L3		6
-#define NUM_COUNTERS_MAX	64
 
 #define RDPMC_BASE_NB		6
 #define RDPMC_BASE_LLC		10
 
 #define COUNTER_SHIFT		16
-#define UNCORE_NAME_LEN		16
 #define UNCORE_GROUP_MAX	256
 
 #undef pr_fmt
@@ -35,27 +35,6 @@
 
 static int pmu_version;
 
-struct amd_uncore_ctx {
-	int refcnt;
-	int cpu;
-	struct perf_event **events;
-	unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
-	int nr_active;
-	struct hrtimer hrtimer;
-	u64 hrtimer_duration;
-};
-
-struct amd_uncore_pmu {
-	char name[UNCORE_NAME_LEN];
-	int num_counters;
-	int rdpmc_base;
-	u32 msr_base;
-	int group;
-	cpumask_t active_mask;
-	struct pmu pmu;
-	struct amd_uncore_ctx * __percpu *ctx;
-};
-
 enum {
 	UNCORE_TYPE_DF,
 	UNCORE_TYPE_L3,
@@ -63,243 +42,11 @@ enum {
 
 	UNCORE_TYPE_MAX
 };
-
-union amd_uncore_info {
-	struct {
-		u64	aux_data:32;	/* auxiliary data */
-		u64	num_pmcs:8;	/* number of counters */
-		u64	gid:8;		/* group id */
-		u64	cid:8;		/* context id */
-	} split;
-	u64		full;
-};
-
-struct amd_uncore {
-	union amd_uncore_info  __percpu *info;
-	struct amd_uncore_pmu *pmus;
-	unsigned int num_pmus;
-	bool init_done;
-	void (*scan)(struct amd_uncore *uncore, unsigned int cpu);
-	int  (*init)(struct amd_uncore *uncore, unsigned int cpu);
-	void (*move)(struct amd_uncore *uncore, unsigned int cpu);
-	void (*free)(struct amd_uncore *uncore, unsigned int cpu);
-};
-
-static struct amd_uncore uncores[UNCORE_TYPE_MAX];
-
 /* Interval for hrtimer, defaults to 60000 milliseconds */
 static unsigned int update_interval = 60 * MSEC_PER_SEC;
 module_param(update_interval, uint, 0444);
 
-static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
-{
-	return container_of(event->pmu, struct amd_uncore_pmu, pmu);
-}
-
-static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
-{
-	struct amd_uncore_ctx *ctx;
-	struct perf_event *event;
-	int bit;
-
-	ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
-
-	if (!ctx->nr_active || ctx->cpu != smp_processor_id())
-		return HRTIMER_NORESTART;
-
-	for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
-		event = ctx->events[bit];
-		event->pmu->read(event);
-	}
-
-	hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
-	return HRTIMER_RESTART;
-}
-
-static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
-{
-	hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
-		      HRTIMER_MODE_REL_PINNED_HARD);
-}
-
-static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
-{
-	hrtimer_cancel(&ctx->hrtimer);
-}
-
-static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
-{
-	hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
-}
-
-static void amd_uncore_read(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-	u64 prev, new;
-	s64 delta;
-
-	/*
-	 * since we do not enable counter overflow interrupts,
-	 * we do not have to worry about prev_count changing on us
-	 */
-
-	prev = local64_read(&hwc->prev_count);
-
-	/*
-	 * Some uncore PMUs do not have RDPMC assignments. In such cases,
-	 * read counts directly from the corresponding PERF_CTR.
-	 */
-	if (hwc->event_base_rdpmc < 0)
-		rdmsrq(hwc->event_base, new);
-	else
-		new = rdpmc(hwc->event_base_rdpmc);
-
-	local64_set(&hwc->prev_count, new);
-	delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
-	delta >>= COUNTER_SHIFT;
-	local64_add(delta, &event->count);
-}
-
-static void amd_uncore_start(struct perf_event *event, int flags)
-{
-	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
-	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	if (!ctx->nr_active++)
-		amd_uncore_start_hrtimer(ctx);
-
-	if (flags & PERF_EF_RELOAD)
-		wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count));
-
-	hwc->state = 0;
-	__set_bit(hwc->idx, ctx->active_mask);
-	wrmsrq(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
-	perf_event_update_userpage(event);
-}
-
-static void amd_uncore_stop(struct perf_event *event, int flags)
-{
-	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
-	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	wrmsrq(hwc->config_base, hwc->config);
-	hwc->state |= PERF_HES_STOPPED;
-
-	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-		event->pmu->read(event);
-		hwc->state |= PERF_HES_UPTODATE;
-	}
-
-	if (!--ctx->nr_active)
-		amd_uncore_cancel_hrtimer(ctx);
-
-	__clear_bit(hwc->idx, ctx->active_mask);
-}
-
-static int amd_uncore_add(struct perf_event *event, int flags)
-{
-	int i;
-	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
-	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	/* are we already assigned? */
-	if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
-		goto out;
-
-	for (i = 0; i < pmu->num_counters; i++) {
-		if (ctx->events[i] == event) {
-			hwc->idx = i;
-			goto out;
-		}
-	}
-
-	/* if not, take the first available counter */
-	hwc->idx = -1;
-	for (i = 0; i < pmu->num_counters; i++) {
-		struct perf_event *tmp = NULL;
-
-		if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
-			hwc->idx = i;
-			break;
-		}
-	}
-
-out:
-	if (hwc->idx == -1)
-		return -EBUSY;
-
-	hwc->config_base = pmu->msr_base + (2 * hwc->idx);
-	hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
-	hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
-	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
-	if (pmu->rdpmc_base < 0)
-		hwc->event_base_rdpmc = -1;
-
-	if (flags & PERF_EF_START)
-		event->pmu->start(event, PERF_EF_RELOAD);
-
-	return 0;
-}
-
-static void amd_uncore_del(struct perf_event *event, int flags)
-{
-	int i;
-	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
-	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	event->pmu->stop(event, PERF_EF_UPDATE);
-
-	for (i = 0; i < pmu->num_counters; i++) {
-		struct perf_event *tmp = event;
-
-		if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
-			break;
-	}
-
-	hwc->idx = -1;
-}
-
-static int amd_uncore_event_init(struct perf_event *event)
-{
-	struct amd_uncore_pmu *pmu;
-	struct amd_uncore_ctx *ctx;
-	struct hw_perf_event *hwc = &event->hw;
-
-	if (event->attr.type != event->pmu->type)
-		return -ENOENT;
-
-	if (event->cpu < 0)
-		return -EINVAL;
-
-	pmu = event_to_amd_uncore_pmu(event);
-	ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
-	if (!ctx)
-		return -ENODEV;
-
-	/*
-	 * NB and Last level cache counters (MSRs) are shared across all cores
-	 * that share the same NB / Last level cache.  On family 16h and below,
-	 * Interrupts can be directed to a single target core, however, event
-	 * counts generated by processes running on other cores cannot be masked
-	 * out. So we do not support sampling and per-thread events via
-	 * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
-	 */
-	hwc->config = event->attr.config;
-	hwc->idx = -1;
-
-	/*
-	 * since request can come in to any of the shared cores, we will remap
-	 * to a single common cpu.
-	 */
-	event->cpu = ctx->cpu;
-
-	return 0;
-}
+static struct uncore_common uncores[UNCORE_TYPE_MAX];
 
 static umode_t
 amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
@@ -314,37 +61,6 @@ amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
 	return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
 }
 
-static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
-					    struct device_attribute *attr,
-					    char *buf)
-{
-	struct pmu *ptr = dev_get_drvdata(dev);
-	struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu);
-
-	return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
-}
-static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
-
-static struct attribute *amd_uncore_attrs[] = {
-	&dev_attr_cpumask.attr,
-	NULL,
-};
-
-static struct attribute_group amd_uncore_attr_group = {
-	.attrs = amd_uncore_attrs,
-};
-
-#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)			\
-static ssize_t __uncore_##_var##_show(struct device *dev,		\
-				struct device_attribute *attr,		\
-				char *page)				\
-{									\
-	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
-	return sprintf(page, _format "\n");				\
-}									\
-static struct device_attribute format_attr_##_var =			\
-	__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
-
 DEFINE_UNCORE_FORMAT_ATTR(event12,	event,		"config:0-7,32-35");
 DEFINE_UNCORE_FORMAT_ATTR(event14,	event,		"config:0-7,32-35,59-60"); /* F17h+ DF */
 DEFINE_UNCORE_FORMAT_ATTR(event14v2,	event,		"config:0-7,32-37");	   /* PerfMonV2 DF */
@@ -425,13 +141,13 @@ static struct attribute_group amd_uncore_umc_format_group = {
 };
 
 static const struct attribute_group *amd_uncore_df_attr_groups[] = {
-	&amd_uncore_attr_group,
+	&uncore_common_attr_group,
 	&amd_uncore_df_format_group,
 	NULL,
 };
 
 static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
-	&amd_uncore_attr_group,
+	&uncore_common_attr_group,
 	&amd_uncore_l3_format_group,
 	NULL,
 };
@@ -443,164 +159,14 @@ static const struct attribute_group *amd_uncore_l3_attr_update[] = {
 };
 
 static const struct attribute_group *amd_uncore_umc_attr_groups[] = {
-	&amd_uncore_attr_group,
+	&uncore_common_attr_group,
 	&amd_uncore_umc_format_group,
 	NULL,
 };
 
-static __always_inline
-int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
-{
-	union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
-	return info->split.cid;
-}
-
-static __always_inline
-int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu)
-{
-	union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
-	return info->split.gid;
-}
-
-static __always_inline
-int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu)
-{
-	union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
-	return info->split.num_pmcs;
-}
-
-static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu)
-{
-	struct amd_uncore_pmu *pmu;
-	struct amd_uncore_ctx *ctx;
-	int i;
-
-	if (!uncore->init_done)
-		return;
-
-	for (i = 0; i < uncore->num_pmus; i++) {
-		pmu = &uncore->pmus[i];
-		ctx = *per_cpu_ptr(pmu->ctx, cpu);
-		if (!ctx)
-			continue;
-
-		if (cpu == ctx->cpu)
-			cpumask_clear_cpu(cpu, &pmu->active_mask);
-
-		if (!--ctx->refcnt) {
-			kfree(ctx->events);
-			kfree(ctx);
-		}
-
-		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
-	}
-}
-
-static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
-{
-	struct amd_uncore_ctx *curr, *prev;
-	struct amd_uncore_pmu *pmu;
-	int node, cid, gid, i, j;
-
-	if (!uncore->init_done || !uncore->num_pmus)
-		return 0;
-
-	cid = amd_uncore_ctx_cid(uncore, cpu);
-	gid = amd_uncore_ctx_gid(uncore, cpu);
-
-	for (i = 0; i < uncore->num_pmus; i++) {
-		pmu = &uncore->pmus[i];
-		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
-		curr = NULL;
-
-		/* Check for group exclusivity */
-		if (gid != pmu->group)
-			continue;
-
-		/* Find a sibling context */
-		for_each_online_cpu(j) {
-			if (cpu == j)
-				continue;
-
-			prev = *per_cpu_ptr(pmu->ctx, j);
-			if (!prev)
-				continue;
-
-			if (cid == amd_uncore_ctx_cid(uncore, j)) {
-				curr = prev;
-				break;
-			}
-		}
-
-		/* Allocate context if sibling does not exist */
-		if (!curr) {
-			node = cpu_to_node(cpu);
-			curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node);
-			if (!curr)
-				goto fail;
-
-			curr->cpu = cpu;
-			curr->events = kzalloc_node(sizeof(*curr->events) *
-						    pmu->num_counters,
-						    GFP_KERNEL, node);
-			if (!curr->events) {
-				kfree(curr);
-				goto fail;
-			}
-
-			amd_uncore_init_hrtimer(curr);
-			curr->hrtimer_duration = (u64)update_interval * NSEC_PER_MSEC;
-
-			cpumask_set_cpu(cpu, &pmu->active_mask);
-		}
-
-		curr->refcnt++;
-		*per_cpu_ptr(pmu->ctx, cpu) = curr;
-	}
-
-	return 0;
-
-fail:
-	amd_uncore_ctx_free(uncore, cpu);
-
-	return -ENOMEM;
-}
-
-static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu)
-{
-	struct amd_uncore_ctx *curr, *next;
-	struct amd_uncore_pmu *pmu;
-	int i, j;
-
-	if (!uncore->init_done)
-		return;
-
-	for (i = 0; i < uncore->num_pmus; i++) {
-		pmu = &uncore->pmus[i];
-		curr = *per_cpu_ptr(pmu->ctx, cpu);
-		if (!curr)
-			continue;
-
-		/* Migrate to a shared sibling if possible */
-		for_each_online_cpu(j) {
-			next = *per_cpu_ptr(pmu->ctx, j);
-			if (!next || cpu == j)
-				continue;
-
-			if (curr == next) {
-				perf_pmu_migrate_context(&pmu->pmu, cpu, j);
-				cpumask_clear_cpu(cpu, &pmu->active_mask);
-				cpumask_set_cpu(j, &pmu->active_mask);
-				next->cpu = j;
-				break;
-			}
-		}
-	}
-}
-
 static int amd_uncore_cpu_starting(unsigned int cpu)
 {
-	struct amd_uncore *uncore;
+	struct uncore_common *uncore;
 	int i;
 
 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
@@ -613,7 +179,7 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
 
 static int amd_uncore_cpu_online(unsigned int cpu)
 {
-	struct amd_uncore *uncore;
+	struct uncore_common *uncore;
 	int i;
 
 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
@@ -627,7 +193,7 @@ static int amd_uncore_cpu_online(unsigned int cpu)
 
 static int amd_uncore_cpu_down_prepare(unsigned int cpu)
 {
-	struct amd_uncore *uncore;
+	struct uncore_common *uncore;
 	int i;
 
 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
@@ -640,7 +206,7 @@ static int amd_uncore_cpu_down_prepare(unsigned int cpu)
 
 static int amd_uncore_cpu_dead(unsigned int cpu)
 {
-	struct amd_uncore *uncore;
+	struct uncore_common *uncore;
 	int i;
 
 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
@@ -654,7 +220,7 @@ static int amd_uncore_cpu_dead(unsigned int cpu)
 static int amd_uncore_df_event_init(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
-	int ret = amd_uncore_event_init(event);
+	int ret = uncore_common_event_init(event);
 
 	hwc->config = event->attr.config &
 		      (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB :
@@ -665,7 +231,7 @@ static int amd_uncore_df_event_init(struct perf_event *event)
 
 static int amd_uncore_df_add(struct perf_event *event, int flags)
 {
-	int ret = amd_uncore_add(event, flags & ~PERF_EF_START);
+	int ret = uncore_common_add(event, flags & ~PERF_EF_START);
 	struct hw_perf_event *hwc = &event->hw;
 
 	if (ret)
@@ -683,16 +249,16 @@ static int amd_uncore_df_add(struct perf_event *event, int flags)
 
 	/* Delayed start after rdpmc base update */
 	if (flags & PERF_EF_START)
-		amd_uncore_start(event, PERF_EF_RELOAD);
+		uncore_common_start(event, PERF_EF_RELOAD);
 
 	return 0;
 }
 
 static
-void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
+void amd_uncore_df_ctx_scan(struct uncore_common *uncore, unsigned int cpu)
 {
 	union cpuid_0x80000022_ebx ebx;
-	union amd_uncore_info info;
+	union uncore_common_info info;
 
 	if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
 		return;
@@ -711,17 +277,17 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
 }
 
 static
-int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
+int amd_uncore_df_ctx_init(struct uncore_common *uncore, unsigned int cpu)
 {
 	struct attribute **df_attr = amd_uncore_df_format_attr;
-	struct amd_uncore_pmu *pmu;
+	struct uncore_common_pmu *pmu;
 	int num_counters;
 
 	/* Run just once */
 	if (uncore->init_done)
-		return amd_uncore_ctx_init(uncore, cpu);
+		return uncore_common_ctx_init(uncore, cpu);
 
-	num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
+	num_counters = uncore_common_ctx_num_pmcs(uncore, cpu);
 	if (!num_counters)
 		goto done;
 
@@ -741,7 +307,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 	pmu->num_counters = num_counters;
 	pmu->msr_base = MSR_F15H_NB_PERF_CTL;
 	pmu->rdpmc_base = RDPMC_BASE_NB;
-	pmu->group = amd_uncore_ctx_gid(uncore, cpu);
+	pmu->group = uncore_common_ctx_gid(uncore, cpu);
 
 	if (pmu_version >= 2) {
 		*df_attr++ = &format_attr_event14v2.attr;
@@ -750,7 +316,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 		*df_attr = &format_attr_event14.attr;
 	}
 
-	pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
+	pmu->ctx = alloc_percpu(struct uncore_common_ctx *);
 	if (!pmu->ctx)
 		goto done;
 
@@ -760,10 +326,10 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 		.name		= pmu->name,
 		.event_init	= amd_uncore_df_event_init,
 		.add		= amd_uncore_df_add,
-		.del		= amd_uncore_del,
-		.start		= amd_uncore_start,
-		.stop		= amd_uncore_stop,
-		.read		= amd_uncore_read,
+		.del		= uncore_common_del,
+		.start		= uncore_common_start,
+		.stop		= uncore_common_stop,
+		.read		= uncore_common_read,
 		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
 		.module		= THIS_MODULE,
 	};
@@ -774,8 +340,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 		goto done;
 	}
 
-	pr_info("%d %s%s counters detected\n", pmu->num_counters,
-		boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON " : "",
+	pr_info("%d %s counters detected\n", pmu->num_counters,
 		pmu->pmu.name);
 
 	uncore->num_pmus = 1;
@@ -783,12 +348,12 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 done:
 	uncore->init_done = true;
 
-	return amd_uncore_ctx_init(uncore, cpu);
+	return uncore_common_ctx_init(uncore, cpu);
 }
 
 static int amd_uncore_l3_event_init(struct perf_event *event)
 {
-	int ret = amd_uncore_event_init(event);
+	int ret = uncore_common_event_init(event);
 	struct hw_perf_event *hwc = &event->hw;
 	u64 config = event->attr.config;
 	u64 mask;
@@ -826,9 +391,9 @@ static int amd_uncore_l3_event_init(struct perf_event *event)
 }
 
 static
-void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
+void amd_uncore_l3_ctx_scan(struct uncore_common *uncore, unsigned int cpu)
 {
-	union amd_uncore_info info;
+	union uncore_common_info info;
 
 	if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
 		return;
@@ -845,17 +410,17 @@ void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
 }
 
 static
-int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
+int amd_uncore_l3_ctx_init(struct uncore_common *uncore, unsigned int cpu)
 {
 	struct attribute **l3_attr = amd_uncore_l3_format_attr;
-	struct amd_uncore_pmu *pmu;
+	struct uncore_common_pmu *pmu;
 	int num_counters;
 
 	/* Run just once */
 	if (uncore->init_done)
-		return amd_uncore_ctx_init(uncore, cpu);
+		return uncore_common_ctx_init(uncore, cpu);
 
-	num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
+	num_counters = uncore_common_ctx_num_pmcs(uncore, cpu);
 	if (!num_counters)
 		goto done;
 
@@ -875,7 +440,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 	pmu->num_counters = num_counters;
 	pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
 	pmu->rdpmc_base = RDPMC_BASE_LLC;
-	pmu->group = amd_uncore_ctx_gid(uncore, cpu);
+	pmu->group = uncore_common_ctx_gid(uncore, cpu);
 
 	if (boot_cpu_data.x86 >= 0x17) {
 		*l3_attr++ = &format_attr_event8.attr;
@@ -885,7 +450,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 			     &format_attr_threadmask8.attr;
 	}
 
-	pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
+	pmu->ctx = alloc_percpu(struct uncore_common_ctx *);
 	if (!pmu->ctx)
 		goto done;
 
@@ -895,11 +460,11 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 		.attr_update	= amd_uncore_l3_attr_update,
 		.name		= pmu->name,
 		.event_init	= amd_uncore_l3_event_init,
-		.add		= amd_uncore_add,
-		.del		= amd_uncore_del,
-		.start		= amd_uncore_start,
-		.stop		= amd_uncore_stop,
-		.read		= amd_uncore_read,
+		.add		= uncore_common_add,
+		.del		= uncore_common_del,
+		.start		= uncore_common_start,
+		.stop		= uncore_common_stop,
+		.read		= uncore_common_read,
 		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
 		.module		= THIS_MODULE,
 	};
@@ -910,8 +475,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 		goto done;
 	}
 
-	pr_info("%d %s%s counters detected\n", pmu->num_counters,
-		boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON " : "",
+	pr_info("%d %s counters detected\n", pmu->num_counters,
 		pmu->pmu.name);
 
 	uncore->num_pmus = 1;
@@ -919,14 +483,14 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 done:
 	uncore->init_done = true;
 
-	return amd_uncore_ctx_init(uncore, cpu);
+	return uncore_common_ctx_init(uncore, cpu);
 }
 
 static int amd_uncore_umc_event_init(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
-	int ret = amd_uncore_event_init(event);
 
+	int ret = uncore_common_event_init(event);
 	if (ret)
 		return ret;
 
@@ -937,12 +501,12 @@ static int amd_uncore_umc_event_init(struct perf_event *event)
 
 static void amd_uncore_umc_start(struct perf_event *event, int flags)
 {
-	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
-	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
+	struct uncore_common_pmu *pmu = event_to_uncore_common_pmu(event);
+	struct uncore_common_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
 	struct hw_perf_event *hwc = &event->hw;
 
 	if (!ctx->nr_active++)
-		amd_uncore_start_hrtimer(ctx);
+		uncore_common_start_hrtimer(ctx);
 
 	if (flags & PERF_EF_RELOAD)
 		wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count));
@@ -987,31 +551,31 @@ static void amd_uncore_umc_read(struct perf_event *event)
 }
 
 static
-void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
+void amd_uncore_umc_ctx_scan(struct uncore_common *uncore, unsigned int cpu)
 {
 	union cpuid_0x80000022_ebx ebx;
-	union amd_uncore_info info;
+	union uncore_common_info info = {};
 	unsigned int eax, ecx, edx;
 
 	if (pmu_version < 2)
 		return;
 
 	cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
-	info.split.aux_data = ecx;	/* stash active mask */
+	info.split.aux_data = ecx;
 	info.split.num_pmcs = ebx.split.num_umc_pmc;
 	info.split.gid = topology_logical_package_id(cpu);
 	info.split.cid = topology_logical_package_id(cpu);
+
 	*per_cpu_ptr(uncore->info, cpu) = info;
 }
 
-static
-int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
+static int amd_uncore_umc_ctx_init(struct uncore_common *uncore, unsigned int cpu)
 {
 	DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 };
 	u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 };
 	u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
-	union amd_uncore_info info;
-	struct amd_uncore_pmu *pmu;
+	union uncore_common_info info;
+	struct uncore_common_pmu *pmu;
 	int gid, i;
 	u16 index = 0;
 
@@ -1020,12 +584,13 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 
 	/* Run just once */
 	if (uncore->init_done)
-		return amd_uncore_ctx_init(uncore, cpu);
+		return uncore_common_ctx_init(uncore, cpu);
 
 	/* Find unique groups */
 	for_each_online_cpu(i) {
 		info = *per_cpu_ptr(uncore->info, i);
 		gid = info.split.gid;
+
 		if (test_bit(gid, gmask))
 			continue;
 
@@ -1050,8 +615,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 			pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
 			pmu->rdpmc_base = -1;
 			pmu->group = gid;
-
-			pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
+			pmu->ctx = alloc_percpu(struct uncore_common_ctx *);
 			if (!pmu->ctx)
 				goto done;
 
@@ -1060,10 +624,10 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 				.attr_groups	= amd_uncore_umc_attr_groups,
 				.name		= pmu->name,
 				.event_init	= amd_uncore_umc_event_init,
-				.add		= amd_uncore_add,
-				.del		= amd_uncore_del,
+				.add		= uncore_common_add,
+				.del		= uncore_common_del,
 				.start		= amd_uncore_umc_start,
-				.stop		= amd_uncore_stop,
+				.stop		= uncore_common_stop,
 				.read		= amd_uncore_umc_read,
 				.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
 				.module		= THIS_MODULE,
@@ -1086,41 +650,40 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 	uncore->num_pmus = index;
 	uncore->init_done = true;
 
-	return amd_uncore_ctx_init(uncore, cpu);
+	return uncore_common_ctx_init(uncore, cpu);
 }
 
-static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
+static struct uncore_common uncores[UNCORE_TYPE_MAX] = {
 	/* UNCORE_TYPE_DF */
 	{
 		.scan = amd_uncore_df_ctx_scan,
 		.init = amd_uncore_df_ctx_init,
-		.move = amd_uncore_ctx_move,
-		.free = amd_uncore_ctx_free,
+		.move = uncore_common_ctx_move,
+		.free = uncore_common_ctx_free,
 	},
 	/* UNCORE_TYPE_L3 */
 	{
 		.scan = amd_uncore_l3_ctx_scan,
 		.init = amd_uncore_l3_ctx_init,
-		.move = amd_uncore_ctx_move,
-		.free = amd_uncore_ctx_free,
+		.move = uncore_common_ctx_move,
+		.free = uncore_common_ctx_free,
 	},
 	/* UNCORE_TYPE_UMC */
 	{
 		.scan = amd_uncore_umc_ctx_scan,
 		.init = amd_uncore_umc_ctx_init,
-		.move = amd_uncore_ctx_move,
-		.free = amd_uncore_ctx_free,
+		.move = uncore_common_ctx_move,
+		.free = uncore_common_ctx_free,
 	},
 };
 
 static int __init amd_uncore_init(void)
 {
-	struct amd_uncore *uncore;
+	struct uncore_common *uncore;
 	int ret = -ENODEV;
 	int i;
 
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
-	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 		return -ENODEV;
 
 	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
@@ -1129,6 +692,8 @@ static int __init amd_uncore_init(void)
 	if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
 		pmu_version = 2;
 
+	uncore_common_set_update_interval(update_interval);
+
 	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
 		uncore = &uncores[i];
 
@@ -1137,7 +702,7 @@ static int __init amd_uncore_init(void)
 		BUG_ON(!uncore->move);
 		BUG_ON(!uncore->free);
 
-		uncore->info = alloc_percpu(union amd_uncore_info);
+		uncore->info = alloc_percpu(union uncore_common_info);
 		if (!uncore->info) {
 			ret = -ENOMEM;
 			goto fail;
@@ -1186,8 +751,8 @@ static int __init amd_uncore_init(void)
 
 static void __exit amd_uncore_exit(void)
 {
-	struct amd_uncore *uncore;
-	struct amd_uncore_pmu *pmu;
+	struct uncore_common *uncore;
+	struct uncore_common_pmu *pmu;
 	int i, j;
 
 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [RFC PATCH 3/3] perf/x86/amd/uncore: Add Hygon uncore PMU support
  2026-05-19  3:32 [RFC PATCH 0/3] perf/x86/amd/uncore: Add common helpers and Hygon support Qi Liu
  2026-05-19  3:32 ` [RFC PATCH 1/3] perf/x86/amd/uncore: Add common PMU helper functions Qi Liu
  2026-05-19  3:32 ` [RFC PATCH 2/3] perf/x86/amd/uncore: Convert AMD driver to common PMU helpers Qi Liu
@ 2026-05-19  3:32 ` Qi Liu
  2026-05-19  5:11   ` sashiko-bot
  2 siblings, 1 reply; 7+ messages in thread
From: Qi Liu @ 2026-05-19  3:32 UTC (permalink / raw)
  To: peterz, mingo, namhyung, alexander.shishkin, jolsa, x86
  Cc: linux-perf-users, linux-kernel, lijing, liuqi, Zhenglang Hu

Add uncore PMU support for Hygon processors based on the
shared AMD-family uncore framework.

Hygon processors implement uncore PMUs for Data Fabric units
with a programming model similar to AMD, but with several
differences:

  - Different MSR base addresses
  - Vendor-specific event encoding and masks
  - DF IOD counter allocation semantics
  - Additional format attributes (e.g. iod, constid)

Reuse the common uncore infrastructure introduced in the
previous patch, this avoids duplicating the full uncore driver
while keeping vendor-specific logic isolated.

Signed-off-by: Qi Liu <liuqi@hygon.cn>
Tested-by: Zhenglang Hu <huzhenglang@hygon.cn>
---
 arch/x86/events/Kconfig            |  11 +
 arch/x86/events/amd/Makefile       |   2 +
 arch/x86/events/amd/hygon_uncore.c | 567 +++++++++++++++++++++++++++++
 arch/x86/include/asm/msr-index.h   |   2 +
 arch/x86/include/asm/perf_event.h  |  20 +
 include/linux/cpuhotplug.h         |   3 +
 6 files changed, 605 insertions(+)
 create mode 100644 arch/x86/events/amd/hygon_uncore.c

diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
index dabdf3d7bf84..cc8387236b95 100644
--- a/arch/x86/events/Kconfig
+++ b/arch/x86/events/Kconfig
@@ -45,6 +45,17 @@ config PERF_EVENTS_AMD_UNCORE
 	  To compile this driver as a module, choose M here: the
 	  module will be called 'amd-uncore'.
 
+config PERF_EVENTS_HYGON_UNCORE
+	tristate "Hygon Uncore performance events"
+	depends on PERF_EVENTS && CPU_SUP_HYGON
+	default y
+	help
+	  Include support for Hygon uncore performance events for use with
+	  e.g., perf stat -e hygon_df/.../.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called 'hygon-uncore'.
+
 config PERF_EVENTS_AMD_BRS
 	depends on PERF_EVENTS && CPU_SUP_AMD
 	bool "AMD Zen3 Branch Sampling support"
diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile
index f951ae64ee36..32bf7aae2368 100644
--- a/arch/x86/events/amd/Makefile
+++ b/arch/x86/events/amd/Makefile
@@ -5,6 +5,8 @@ obj-$(CONFIG_PERF_EVENTS_AMD_POWER)	+= power.o
 obj-$(CONFIG_X86_LOCAL_APIC)		+= ibs.o
 obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE)	+= amd-uncore.o
 amd-uncore-objs				:= uncore_common.o uncore.o
+obj-$(CONFIG_PERF_EVENTS_HYGON_UNCORE)	+= hygon-uncore.o
+hygon-uncore-objs			:= uncore_common.o hygon_uncore.o
 ifdef CONFIG_AMD_IOMMU
 obj-$(CONFIG_CPU_SUP_AMD)		+= iommu.o
 endif
diff --git a/arch/x86/events/amd/hygon_uncore.c b/arch/x86/events/amd/hygon_uncore.c
new file mode 100644
index 000000000000..133b6b1923de
--- /dev/null
+++ b/arch/x86/events/amd/hygon_uncore.c
@@ -0,0 +1,567 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026 Chengdu Haiguang IC Design Co., Ltd.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpufeature.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+
+#include <asm/amd/nb.h>
+#include <asm/msr.h>
+#include <asm/perf_event.h>
+
+#include "uncore_common.h"
+
+#define NUM_COUNTERS_DF		4
+
+#undef pr_fmt
+#define pr_fmt(fmt)	"hygon_uncore: " fmt
+
+enum {
+	HYGON_UNCORE_TYPE_DF,
+	HYGON_UNCORE_TYPE_DF_IOD,
+	HYGON_UNCORE_TYPE_MAX,
+};
+
+/* Interval for hrtimer, defaults to 60000 milliseconds */
+static unsigned int update_interval = 60 * MSEC_PER_SEC;
+module_param(update_interval, uint, 0444);
+
+static struct uncore_common hygon_uncores[HYGON_UNCORE_TYPE_MAX];
+
+static __always_inline bool hygon_uncore_is_df_iod(struct uncore_common_pmu *pmu)
+{
+	struct uncore_common *uncore = pmu->private;
+
+	return uncore == &hygon_uncores[HYGON_UNCORE_TYPE_DF_IOD];
+}
+
+static __always_inline int hygon_uncore_num_iods(struct uncore_common *uncore, unsigned int cpu)
+{
+	union uncore_common_info *info = per_cpu_ptr(uncore->info, cpu);
+
+	return info->split.private;
+}
+
+static u64 hygon_uncore_df_event_mask(void)
+{
+	if (boot_cpu_data.x86_model == 0x4 ||
+	    boot_cpu_data.x86_model == 0x5)
+		return HYGON_F18H_M4H_RAW_EVENT_MASK_DF;
+
+	if (boot_cpu_data.x86_model >= 0x6 &&
+	    boot_cpu_data.x86_model <= 0x18)
+		return HYGON_F18H_M6H_RAW_EVENT_MASK_DF;
+
+	return HYGON_F18H_RAW_EVENT_MASK_DF;
+}
+
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	struct pmu *ptr = dev_get_drvdata(dev);
+	struct uncore_common_pmu *pmu;
+
+	pmu = container_of(ptr, struct uncore_common_pmu, pmu);
+
+	return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *hygon_uncore_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group hygon_uncore_attr_group = {
+	.attrs = hygon_uncore_attrs,
+};
+
+#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)			\
+static ssize_t __uncore_##_var##_show(struct device *dev,		\
+				struct device_attribute *attr,		\
+				char *page)				\
+{									\
+	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
+	return sprintf(page, _format "\n");				\
+}									\
+static struct device_attribute format_attr_##_var =			\
+	__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+
+DEFINE_UNCORE_FORMAT_ATTR(event,	event,		"config:0-5");
+DEFINE_UNCORE_FORMAT_ATTR(umask8,	umask,		"config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(umask10,	umask,		"config:8-17");
+DEFINE_UNCORE_FORMAT_ATTR(umask12,	umask,		"config:8-19");
+DEFINE_UNCORE_FORMAT_ATTR(constid,	constid,	"config:6-7,32-35,61-62");
+DEFINE_UNCORE_FORMAT_ATTR(iod,		iod,		"config1:0-1");
+
+static struct attribute *hygon_uncore_df_format_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask8.attr,
+	&format_attr_constid.attr,
+	NULL,
+};
+
+static struct attribute *hygon_uncore_df_iod_format_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask10.attr,
+	&format_attr_constid.attr,
+	&format_attr_iod.attr,
+	NULL,
+};
+
+static struct attribute_group hygon_uncore_df_format_group = {
+	.name = "format",
+	.attrs = hygon_uncore_df_format_attr,
+};
+
+static struct attribute_group hygon_uncore_df_iod_format_group = {
+	.name = "format",
+	.attrs = hygon_uncore_df_iod_format_attr,
+};
+
+static const struct attribute_group *hygon_uncore_df_attr_groups[] = {
+	&hygon_uncore_attr_group,
+	&hygon_uncore_df_format_group,
+	NULL,
+};
+
+static const struct attribute_group *hygon_uncore_df_iod_attr_groups[] = {
+	&hygon_uncore_attr_group,
+	&hygon_uncore_df_iod_format_group,
+	NULL,
+};
+
+static int hygon_uncore_df_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct uncore_common_pmu *pmu;
+	struct uncore_common *uncore;
+	u64 event_mask;
+	int ret;
+
+	ret = uncore_common_event_init(event);
+	if (ret)
+		return ret;
+
+	pmu = event_to_uncore_common_pmu(event);
+	uncore = pmu->private;
+
+	if (hygon_uncore_is_df_iod(pmu) &&
+	    event->attr.config1 >= hygon_uncore_num_iods(uncore, event->cpu))
+		return -EINVAL;
+
+	event_mask = hygon_uncore_df_event_mask();
+	hwc->config = event->attr.config & event_mask;
+
+	return 0;
+}
+
+static int hygon_uncore_df_iod_add(struct perf_event *event, int flags)
+{
+	struct uncore_common_pmu *pmu = event_to_uncore_common_pmu(event);
+	struct uncore_common_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
+	struct hw_perf_event *hwc = &event->hw;
+	int iod_idx;
+	int i;
+
+	if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
+		goto out;
+
+	for (i = 0; i < pmu->num_counters; i++) {
+		if (ctx->events[i] == event) {
+			hwc->idx = i;
+			goto out;
+		}
+	}
+
+	hwc->idx = -1;
+	iod_idx = event->attr.config1;
+
+	for (i = iod_idx * NUM_COUNTERS_DF; i < (iod_idx + 1) * NUM_COUNTERS_DF; i++) {
+		struct perf_event *tmp = NULL;
+
+		if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
+			hwc->idx = i;
+			break;
+		}
+	}
+
+out:
+	if (hwc->idx == -1)
+		return -EBUSY;
+
+	hwc->config_base = pmu->msr_base + 2 * hwc->idx;
+	hwc->event_base = pmu->msr_base + 1 + 2 * hwc->idx;
+	hwc->event_base_rdpmc = -1;
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		event->pmu->start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+static int hygon_uncore_cpu_starting(unsigned int cpu)
+{
+	struct uncore_common *uncore;
+	int i;
+
+	for (i = 0; i < HYGON_UNCORE_TYPE_MAX; i++) {
+		uncore = &hygon_uncores[i];
+		uncore->scan(uncore, cpu);
+	}
+
+	return 0;
+}
+
+static int hygon_uncore_cpu_online(unsigned int cpu)
+{
+	struct uncore_common *uncore;
+	int ret;
+	int i;
+
+	for (i = 0; i < HYGON_UNCORE_TYPE_MAX; i++) {
+		uncore = &hygon_uncores[i];
+
+		ret = uncore->init(uncore, cpu);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hygon_uncore_cpu_down_prepare(unsigned int cpu)
+{
+	struct uncore_common *uncore;
+	int i;
+
+	for (i = 0; i < HYGON_UNCORE_TYPE_MAX; i++) {
+		uncore = &hygon_uncores[i];
+		uncore->move(uncore, cpu);
+	}
+
+	return 0;
+}
+
+static int hygon_uncore_cpu_dead(unsigned int cpu)
+{
+	struct uncore_common *uncore;
+	int i;
+
+	for (i = 0; i < HYGON_UNCORE_TYPE_MAX; i++) {
+		uncore = &hygon_uncores[i];
+		uncore->free(uncore, cpu);
+	}
+
+	return 0;
+}
+
+static int hygon_uncore_df_ctx_init(struct uncore_common *uncore,
+				    unsigned int cpu)
+{
+	struct attribute *df_attr;
+	struct uncore_common_pmu *pmu;
+	int num_counters;
+
+	if (uncore->init_done)
+		return uncore_common_ctx_init(uncore, cpu);
+
+	num_counters = uncore_common_ctx_num_pmcs(uncore, cpu);
+	if (!num_counters)
+		goto done;
+
+	uncore->pmus = kzalloc_obj(*uncore->pmus);
+	if (!uncore->pmus)
+		goto done;
+
+	pmu = &uncore->pmus[0];
+	strscpy(pmu->name, "hygon_df", sizeof(pmu->name));
+	pmu->num_counters = num_counters;
+	pmu->msr_base = MSR_HYGON_F18H_DF_CTL;
+	pmu->rdpmc_base = -1;
+	pmu->group = uncore_common_ctx_gid(uncore, cpu);
+	pmu->private = uncore;
+
+	df_attr = &format_attr_umask8.attr;
+	if (boot_cpu_data.x86_model == 0x4 ||
+	    boot_cpu_data.x86_model == 0x5)
+		df_attr = &format_attr_umask10.attr;
+	else if (boot_cpu_data.x86_model >= 0x6 &&
+		 boot_cpu_data.x86_model <= 0x18)
+		df_attr = &format_attr_umask12.attr;
+	hygon_uncore_df_format_attr[1] = df_attr;
+
+	pmu->ctx = alloc_percpu(struct uncore_common_ctx *);
+	if (!pmu->ctx)
+		goto done;
+
+	pmu->pmu = (struct pmu) {
+		.task_ctx_nr	= perf_invalid_context,
+		.attr_groups	= hygon_uncore_df_attr_groups,
+		.name		= pmu->name,
+		.event_init	= hygon_uncore_df_event_init,
+		.add		= uncore_common_add,
+		.del		= uncore_common_del,
+		.start		= uncore_common_start,
+		.stop		= uncore_common_stop,
+		.read		= uncore_common_read,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+		.module		= THIS_MODULE,
+	};
+
+	if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
+		free_percpu(pmu->ctx);
+		pmu->ctx = NULL;
+		goto done;
+	}
+
+	pr_info("%d %s counters detected\n", pmu->num_counters, pmu->pmu.name);
+	uncore->num_pmus = 1;
+
+done:
+	uncore->init_done = true;
+	return uncore_common_ctx_init(uncore, cpu);
+}
+
+static void hygon_uncore_df_ctx_scan(struct uncore_common *uncore,
+				     unsigned int cpu)
+{
+	unsigned int eax, ebx, ecx, edx;
+	union uncore_common_info info;
+
+	if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
+		return;
+
+	info.split.gid = 0;
+	info.split.aux_data = 0;
+	info.split.num_pmcs = NUM_COUNTERS_DF;
+
+	cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+	info.split.cid = ecx & 0xff;
+
+	*per_cpu_ptr(uncore->info, cpu) = info;
+}
+
+static void hygon_uncore_df_iod_ctx_scan(struct uncore_common *uncore,
+					 unsigned int cpu)
+{
+	int num_packages, iods_per_package;
+	union uncore_common_info info;
+
+	if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
+		return;
+
+	if (boot_cpu_data.x86_model < 0x4 || boot_cpu_data.x86_model == 0x6)
+		return;
+
+	num_packages = topology_max_packages();
+	iods_per_package = amd_nb_num() / num_packages - topology_max_dies_per_package();
+	if (iods_per_package <= 0)
+		return;
+
+	info.split.cid = topology_physical_package_id(cpu);
+	info.split.gid = 0;
+	info.split.private = iods_per_package;
+	info.split.num_pmcs = NUM_COUNTERS_DF * iods_per_package;
+
+	*per_cpu_ptr(uncore->info, cpu) = info;
+}
+
+static int hygon_uncore_df_iod_ctx_init(struct uncore_common *uncore,
+					unsigned int cpu)
+{
+	struct uncore_common_pmu *pmu;
+	int num_counters;
+
+	if (uncore->init_done)
+		return uncore_common_ctx_init(uncore, cpu);
+
+	num_counters = uncore_common_ctx_num_pmcs(uncore, cpu);
+	if (!num_counters)
+		goto done;
+
+	uncore->pmus = kzalloc_obj(*uncore->pmus);
+	if (!uncore->pmus)
+		goto done;
+
+	pmu = &uncore->pmus[0];
+	strscpy(pmu->name, "hygon_df_iod", sizeof(pmu->name));
+	pmu->num_counters = num_counters;
+	pmu->msr_base = MSR_HYGON_F18H_DF_IOD_CTL;
+	pmu->rdpmc_base = -1;
+	pmu->group = uncore_common_ctx_gid(uncore, cpu);
+	pmu->private = uncore;
+
+	if (boot_cpu_data.x86_model >= 0x6 &&
+	    boot_cpu_data.x86_model <= 0x18)
+		hygon_uncore_df_iod_format_attr[1] = &format_attr_umask12.attr;
+
+	pmu->ctx = alloc_percpu(struct uncore_common_ctx *);
+	if (!pmu->ctx)
+		goto done;
+
+	pmu->pmu = (struct pmu) {
+		.task_ctx_nr	= perf_invalid_context,
+		.attr_groups	= hygon_uncore_df_iod_attr_groups,
+		.name		= pmu->name,
+		.event_init	= hygon_uncore_df_event_init,
+		.add		= hygon_uncore_df_iod_add,
+		.del		= uncore_common_del,
+		.start		= uncore_common_start,
+		.stop		= uncore_common_stop,
+		.read		= uncore_common_read,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+		.module		= THIS_MODULE,
+	};
+
+	if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
+		free_percpu(pmu->ctx);
+		pmu->ctx = NULL;
+		goto done;
+	}
+
+	pr_info("%d %s counters detected\n", pmu->num_counters, pmu->pmu.name);
+	uncore->num_pmus = 1;
+
+done:
+	uncore->init_done = true;
+	return uncore_common_ctx_init(uncore, cpu);
+}
+
+static struct uncore_common hygon_uncores[HYGON_UNCORE_TYPE_MAX] = {
+	/* HYGON_UNCORE_TYPE_DF */
+	{
+		.scan = hygon_uncore_df_ctx_scan,
+		.init = hygon_uncore_df_ctx_init,
+		.move = uncore_common_ctx_move,
+		.free = uncore_common_ctx_free,
+	},
+	/* HYGON_UNCORE_TYPE_DF IOD */
+	{
+		.scan = hygon_uncore_df_iod_ctx_scan,
+		.init = hygon_uncore_df_iod_ctx_init,
+		.move = uncore_common_ctx_move,
+		.free = uncore_common_ctx_free,
+	},
+};
+
+static int __init hygon_uncore_init(void)
+{
+	struct uncore_common *uncore;
+	int ret = -ENODEV;
+	int i;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+		return -ENODEV;
+
+	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
+		return -ENODEV;
+
+	uncore_common_set_update_interval(update_interval);
+
+	for (i = 0; i < HYGON_UNCORE_TYPE_MAX; i++) {
+		uncore = &hygon_uncores[i];
+
+		if (WARN_ON_ONCE(!uncore->scan ||
+				 !uncore->init ||
+				 !uncore->move ||
+				 !uncore->free)) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		uncore->info = alloc_percpu(union uncore_common_info);
+		if (!uncore->info) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+	}
+
+	ret = cpuhp_setup_state(CPUHP_PERF_X86_HYGON_UNCORE_PREP,
+				"perf/x86/hygon/uncore:prepare",
+				NULL, hygon_uncore_cpu_dead);
+	if (ret)
+		goto fail;
+
+	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_HYGON_UNCORE_STARTING,
+				"perf/x86/hygon/uncore:starting",
+				hygon_uncore_cpu_starting, NULL);
+	if (ret)
+		goto fail_prep;
+
+	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_HYGON_UNCORE_ONLINE,
+				"perf/x86/hygon/uncore:online",
+				hygon_uncore_cpu_online,
+				hygon_uncore_cpu_down_prepare);
+	if (ret)
+		goto fail_start;
+
+	return 0;
+
+fail_start:
+	cpuhp_remove_state(CPUHP_AP_PERF_X86_HYGON_UNCORE_STARTING);
+fail_prep:
+	cpuhp_remove_state(CPUHP_PERF_X86_HYGON_UNCORE_PREP);
+fail:
+	for (i = 0; i < HYGON_UNCORE_TYPE_MAX; i++) {
+		uncore = &hygon_uncores[i];
+
+		if (uncore->info) {
+			free_percpu(uncore->info);
+			uncore->info = NULL;
+		}
+	}
+
+	return ret;
+}
+
+static void __exit hygon_uncore_exit(void)
+{
+	struct uncore_common *uncore;
+	struct uncore_common_pmu *pmu;
+	int i, j;
+
+	cpuhp_remove_state(CPUHP_AP_PERF_X86_HYGON_UNCORE_ONLINE);
+	cpuhp_remove_state(CPUHP_AP_PERF_X86_HYGON_UNCORE_STARTING);
+	cpuhp_remove_state(CPUHP_PERF_X86_HYGON_UNCORE_PREP);
+
+	for (i = 0; i < HYGON_UNCORE_TYPE_MAX; i++) {
+		uncore = &hygon_uncores[i];
+
+		if (!uncore->info)
+			continue;
+
+		free_percpu(uncore->info);
+		uncore->info = NULL;
+
+		for (j = 0; j < uncore->num_pmus; j++) {
+			pmu = &uncore->pmus[j];
+
+			if (!pmu->ctx)
+				continue;
+
+			perf_pmu_unregister(&pmu->pmu);
+			free_percpu(pmu->ctx);
+			pmu->ctx = NULL;
+		}
+
+		kfree(uncore->pmus);
+		uncore->pmus = NULL;
+	}
+}
+
+module_init(hygon_uncore_init);
+module_exit(hygon_uncore_exit);
+
+MODULE_DESCRIPTION("Hygon Uncore Driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 86554de9a3f5..b81d0003e295 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -851,6 +851,8 @@
 #define MSR_F15H_PTSC			0xc0010280
 #define MSR_F15H_IC_CFG			0xc0011021
 #define MSR_F15H_EX_CFG			0xc001102c
+#define MSR_HYGON_F18H_DF_CTL		0xc0010240
+#define MSR_HYGON_F18H_DF_IOD_CTL	0xc0010250
 
 /* Fam 10h MSRs */
 #define MSR_FAM10H_MMIO_CONF_BASE	0xc0010058
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 752cb319d5ea..9e56b5c94d4b 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -122,6 +122,26 @@
 	(AMD64_PERFMON_V2_EVENTSEL_EVENT_NB	|	\
 	 AMD64_PERFMON_V2_EVENTSEL_UMASK_NB)
 
+#define HYGON_F18H_EVENTSEL_EVENT_DF		0x0000003FULL
+#define HYGON_F18H_EVENTSEL_UMASK_DF		0x0000FF00ULL
+#define HYGON_F18H_M4H_EVENTSEL_UMASK_DF	0x0003FF00ULL
+#define HYGON_F18H_M6H_EVENTSEL_UMASK_DF	0x000FFF00ULL
+#define HYGON_F18H_EVENTSEL_CONSTID_DF			\
+	(0x000000C0ULL | GENMASK_ULL(35, 32) | GENMASK_ULL(62, 61))
+
+#define HYGON_F18H_RAW_EVENT_MASK_DF			\
+	(HYGON_F18H_EVENTSEL_EVENT_DF		|	\
+	 HYGON_F18H_EVENTSEL_UMASK_DF		|	\
+	 HYGON_F18H_EVENTSEL_CONSTID_DF)
+#define HYGON_F18H_M4H_RAW_EVENT_MASK_DF		\
+	(HYGON_F18H_EVENTSEL_EVENT_DF		|	\
+	 HYGON_F18H_M4H_EVENTSEL_UMASK_DF	|	\
+	 HYGON_F18H_EVENTSEL_CONSTID_DF)
+#define HYGON_F18H_M6H_RAW_EVENT_MASK_DF		\
+	(HYGON_F18H_EVENTSEL_EVENT_DF		|	\
+	 HYGON_F18H_M6H_EVENTSEL_UMASK_DF	|	\
+	 HYGON_F18H_EVENTSEL_CONSTID_DF)
+
 #define AMD64_PERFMON_V2_ENABLE_UMC			BIT_ULL(31)
 #define AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC		GENMASK_ULL(7, 0)
 #define AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC		GENMASK_ULL(9, 8)
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 22ba327ec227..25f64bdfb5dc 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -62,6 +62,7 @@ enum cpuhp_state {
 	CPUHP_CREATE_THREADS,
 	CPUHP_PERF_X86_PREPARE,
 	CPUHP_PERF_X86_AMD_UNCORE_PREP,
+	CPUHP_PERF_X86_HYGON_UNCORE_PREP,
 	CPUHP_PERF_POWER,
 	CPUHP_PERF_SUPERH,
 	CPUHP_X86_HPET_DEAD,
@@ -148,6 +149,7 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING,
 	CPUHP_AP_ARM_MVEBU_COHERENCY,
 	CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
+	CPUHP_AP_PERF_X86_HYGON_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_STARTING,
 	CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
 	CPUHP_AP_PERF_XTENSA_STARTING,
@@ -205,6 +207,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_X86_ONLINE,
 	CPUHP_AP_PERF_X86_UNCORE_ONLINE,
 	CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
+	CPUHP_AP_PERF_X86_HYGON_UNCORE_ONLINE,
 	CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
 	CPUHP_AP_PERF_S390_CF_ONLINE,
 	CPUHP_AP_PERF_S390_SF_ONLINE,
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 1/3] perf/x86/amd/uncore: Add common PMU helper functions
  2026-05-19  3:32 ` [RFC PATCH 1/3] perf/x86/amd/uncore: Add common PMU helper functions Qi Liu
@ 2026-05-19  4:07   ` sashiko-bot
  0 siblings, 0 replies; 7+ messages in thread
From: sashiko-bot @ 2026-05-19  4:07 UTC (permalink / raw)
  To: Qi Liu; +Cc: linux-perf-users

Thank you for your contribution! Sashiko AI review found 5 potential issue(s) to consider:
- [High] uncore_common_ctx_free() frees the PMU context without cancelling its hrtimer or verifying all events are removed, leading to UAF and NULL-pointer dereferences.
- [High] Unconditional double-stop of PMU events underflows ctx->nr_active, permanently breaking the hrtimer.
- [High] Unsanitized event->attr.config is written directly to hardware MSRs, enabling kernel panics and failing to stop counters.
- [Medium] Race condition during CPU context migration permanently disables the hrtimer on the destination CPU.
- [Low] Missing bounds check on pmu->num_counters can lead to an out-of-bounds write.
--

commit f16450b9a54bbeb570f328c8a8626473a456f6b8
Author: Qi Liu <liuqi@hygon.cn>

perf/x86/amd/uncore: Add common PMU helper functions
    
Add common helper functions for AMD-family uncore PMU handling.
The helpers cover event initialization, counter allocation, counter
read/update, event start/stop and per-CPU context management.

> diff --git a/arch/x86/events/amd/uncore_common.c b/arch/x86/events/amd/uncore_common.c
> new file mode 100644
> index 0000000000000..a6d50fe803df3
> --- /dev/null
> +++ b/arch/x86/events/amd/uncore_common.c
[ ... ]
> +void uncore_common_stop(struct perf_event *event, int flags)
> +{
> +	struct uncore_common_pmu *pmu = event_to_uncore_common_pmu(event);
> +	struct uncore_common_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
> +	struct hw_perf_event *hwc = &event->hw;
> +
> +	wrmsrq(hwc->config_base, hwc->config);
> +	hwc->state |= PERF_HES_STOPPED;
> +
> +	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
> +		event->pmu->read(event);
> +		hwc->state |= PERF_HES_UPTODATE;
> +	}
> +
> +	if (!--ctx->nr_active)
> +		uncore_common_cancel_hrtimer(ctx);

When a perf event is closed, free_event() calls pmu->del(), which maps to
uncore_common_del(). Since uncore_common_del() unconditionally calls
event->pmu->stop(), and the event was likely already stopped by
event_sched_out(), could this cause ctx->nr_active to underflow (e.g., from
0 to -1) because there is no check if the event is already stopped?

If ctx->nr_active underflows, a subsequent event start will evaluate
!ctx->nr_active++ as false (!-1 is 0), which would fail to call
uncore_common_start_hrtimer(), causing the hardware counters to silently
overflow.

> +
> +	__clear_bit(hwc->idx, ctx->active_mask);
> +}
> +
> +int uncore_common_event_init(struct perf_event *event)
> +{
> +	struct uncore_common_pmu *pmu;
> +	struct uncore_common_ctx *ctx;
> +	struct hw_perf_event *hwc = &event->hw;
> +
> +	if (event->attr.type != event->pmu->type)
> +		return -ENOENT;
> +
> +	if (event->cpu < 0)
> +		return -EINVAL;
> +
> +	pmu = event_to_uncore_common_pmu(event);
> +	ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
> +	if (!ctx)
> +		return -ENODEV;
> +
> +	hwc->config = event->attr.config;

Is it safe to copy the user-supplied event->attr.config directly to
hwc->config without any sanitization or bit masking?

If a user configures an event with reserved hardware bits set, could the
later call to wrmsrq() trigger a General Protection Fault and crash the
kernel?

Additionally, if the user explicitly sets the hardware enable bit (e.g.,
ARCH_PERFMON_EVENTSEL_ENABLE), wouldn't uncore_common_stop() fail to
actually stop the counter when it writes this configuration back to the MSR?

> +	hwc->idx = -1;
> +
> +	event->cpu = ctx->cpu;
> +
> +	return 0;
> +}
> +
> +int uncore_common_add(struct perf_event *event, int flags)
> +{
> +	struct uncore_common_pmu *pmu = event_to_uncore_common_pmu(event);
> +	struct uncore_common_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
> +	struct hw_perf_event *hwc = &event->hw;
> +	int i;
> +
> +	/* are we already assigned? */
> +	if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
> +		goto out;
> +
> +	for (i = 0; i < pmu->num_counters; i++) {

Is there a bounds check to ensure pmu->num_counters does not exceed
NUM_COUNTERS_MAX?

ctx->active_mask is sized based on NUM_COUNTERS_MAX. If a PMU is initialized
with more than 64 counters, couldn't a later __set_bit(hwc->idx,
ctx->active_mask) write out of bounds and corrupt adjacent memory?

> +		if (ctx->events[i] == event) {
> +			hwc->idx = i;
> +			goto out;
> +		}
> +	}
[ ... ]
> +void uncore_common_ctx_free(struct uncore_common *uncore, unsigned int cpu)
> +{
> +	struct uncore_common_pmu *pmu;
> +	struct uncore_common_ctx *ctx;
> +	int i;
> +
> +	if (!uncore->init_done)
> +		return;
> +
> +	for (i = 0; i < uncore->num_pmus; i++) {
> +		pmu = &uncore->pmus[i];
> +
> +		if (!pmu->ctx)
> +			continue;
> +
> +		ctx = *per_cpu_ptr(pmu->ctx, cpu);
> +		if (!ctx)
> +			continue;
> +
> +		if (cpu == ctx->cpu)
> +			cpumask_clear_cpu(cpu, &pmu->active_mask);
> +
> +		if (!--ctx->refcnt) {
> +			kfree(ctx->events);
> +			kfree(ctx);
> +		}

When the last CPU in the PMU's domain goes offline, ctx->refcnt will drop
to 0 and ctx is freed. However, is there anything ensuring hrtimer_cancel()
is called if there are still active events attached?

If the hrtimer is still running, it will execute on the freed ctx memory.

> +
> +		*per_cpu_ptr(pmu->ctx, cpu) = NULL;

Additionally, since the user's perf_event file descriptors remain valid,
when the user later closes them, uncore_common_del() will attempt to access
the context.

Since *per_cpu_ptr(pmu->ctx, cpu) is set to NULL here, won't that result
in a NULL-pointer dereference?

> +	}
> +}
> +
> +void uncore_common_ctx_move(struct uncore_common *uncore, unsigned int cpu)
> +{
> +	struct uncore_common_ctx *curr, *next;
> +	struct uncore_common_pmu *pmu;
> +	int i, j;
[ ... ]
> +		for_each_online_cpu(j) {
> +			if (cpu == j)
> +				continue;
> +
> +			next = *per_cpu_ptr(pmu->ctx, j);
> +			if (!next)
> +				continue;
> +
> +			if (curr == next) {
> +				perf_pmu_migrate_context(&pmu->pmu, cpu, j);
> +				cpumask_clear_cpu(cpu, &pmu->active_mask);
> +				cpumask_set_cpu(j, &pmu->active_mask);
> +				next->cpu = j;

Should next->cpu = j be set before calling perf_pmu_migrate_context()?

perf_pmu_migrate_context() synchronously installs and starts events on the
destination CPU, which arms the hrtimer. If the hrtimer expires before the
migration thread reaches next->cpu = j, uncore_common_hrtimer() will
observe that ctx->cpu != smp_processor_id() and return HRTIMER_NORESTART.

This would permanently disable the PMU timer on the new CPU.

> +				break;
> +			}
> +		}
> +	}
> +}

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260519033225.1479907-1-liuqi@hygon.cn?part=1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 2/3] perf/x86/amd/uncore: Convert AMD driver to common PMU helpers
  2026-05-19  3:32 ` [RFC PATCH 2/3] perf/x86/amd/uncore: Convert AMD driver to common PMU helpers Qi Liu
@ 2026-05-19  4:37   ` sashiko-bot
  0 siblings, 0 replies; 7+ messages in thread
From: sashiko-bot @ 2026-05-19  4:37 UTC (permalink / raw)
  To: Qi Liu; +Cc: linux-perf-users

Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [High] Failure in module initialization leaves registered PMUs active with dangling pointers to freed module memory.
--

commit 1da22dd6a76f69c0499311126ce80bcfeb22b09d
Author: Qi Liu <liuqi@hygon.cn>

perf/x86/amd/uncore: Convert AMD driver to common PMU helpers

Use the common uncore PMU helpers for AMD uncore event handling and
per-CPU context management. The AMD-specific DF, L3 and UMC discovery,
event encoding and PMU setup logic remains in the AMD driver.

> diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
> --- a/arch/x86/events/amd/uncore.c
> +++ b/arch/x86/events/amd/uncore.c

[ ... ]

> @@ -1137,7 +702,7 @@ static int __init amd_uncore_init(void)
>  		BUG_ON(!uncore->move);
>  		BUG_ON(!uncore->free);
>  
> -		uncore->info = alloc_percpu(union amd_uncore_info);
> +		uncore->info = alloc_percpu(union uncore_common_info);
>  		if (!uncore->info) {
>  			ret = -ENOMEM;
>  			goto fail;

This isn't a bug introduced by this patch, but could this error path leave
registered PMUs active with dangling pointers to freed module memory?

If initialization fails later during the setup, such as if
cpuhp_setup_state() fails, the error handling path rolls back the hotplug
states and amd_uncore_cpu_dead() frees the per-CPU context memory.

However, the fail cleanup block doesn't appear to unregister the PMUs
via perf_pmu_unregister() or free the uncore->pmus allocation:

amd_uncore_init() {
    ...
fail:
    for (i = 0; i < UNCORE_TYPE_MAX; i++) {
        uncore = &uncores[i];
        if (uncore->info) {
            free_percpu(uncore->info);
            uncore->info = NULL;
        }
    }
    return ret;
}

Since the module initialization fails and the module memory is unloaded,
would any subsequent access to these PMUs from user-space (e.g. via
perf_event_open()) dereference pointers to the unloaded module's functions
like .event_init, .start, and .read, resulting in a use-after-free?

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260519033225.1479907-1-liuqi@hygon.cn?part=2

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 3/3] perf/x86/amd/uncore: Add Hygon uncore PMU support
  2026-05-19  3:32 ` [RFC PATCH 3/3] perf/x86/amd/uncore: Add Hygon uncore PMU support Qi Liu
@ 2026-05-19  5:11   ` sashiko-bot
  0 siblings, 0 replies; 7+ messages in thread
From: sashiko-bot @ 2026-05-19  5:11 UTC (permalink / raw)
  To: Qi Liu; +Cc: linux-perf-users

Thank you for your contribution! Sashiko AI review found 5 potential issue(s) to consider:
- [High] Missing PMU unregistration in the module initialization error path leads to a fatal Use-After-Free.
- [High] Integer truncation of the `num_pmcs` bitfield allows an out-of-bounds array write.
- [Medium] Uninitialized stack memory is copied to long-lived per-CPU data structures.
- [Low] Use of non-existent macro `kzalloc_obj` causes a build failure.
- [Low] Linking `uncore_common.o` into both `amd-uncore` and `hygon-uncore` causes multiple definition linker errors.
--

commit a3e675d32729bead13a45d2e64f57244f9040366
Author: Qi Liu <liuqi@hygon.cn>
Subject: perf/x86/amd/uncore: Add Hygon uncore PMU support

Add uncore PMU support for Hygon processors based on the shared AMD-family
uncore framework.

> diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile
> index f951ae64ee361..32bf7aae23681 100644
> --- a/arch/x86/events/amd/Makefile
> +++ b/arch/x86/events/amd/Makefile
> @@ -5,6 +5,8 @@ obj-$(CONFIG_PERF_EVENTS_AMD_POWER)	+= power.o
>  obj-$(CONFIG_X86_LOCAL_APIC)		+= ibs.o
>  obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE)	+= amd-uncore.o
>  amd-uncore-objs				:= uncore_common.o uncore.o
> +obj-$(CONFIG_PERF_EVENTS_HYGON_UNCORE)	+= hygon-uncore.o
> +hygon-uncore-objs			:= uncore_common.o hygon_uncore.o

If a kernel is configured with both CONFIG_PERF_EVENTS_AMD_UNCORE=y and
CONFIG_PERF_EVENTS_HYGON_UNCORE=y, won't Kbuild link uncore_common.o twice
into the built-in.a archive? Will this result in fatal 'multiple definition'
linker errors for all globally visible symbols defined in uncore_common.c
(e.g., uncore_common_event_init)?

> diff --git a/arch/x86/events/amd/hygon_uncore.c b/arch/x86/events/amd/hygon_uncore.c
> new file mode 100644
> index 0000000000000..133b6b1923de9
> --- /dev/null
> +++ b/arch/x86/events/amd/hygon_uncore.c
[ ... ]
> +static int hygon_uncore_df_ctx_init(struct uncore_common *uncore,
> +				    unsigned int cpu)
> +{
> +	struct attribute *df_attr;
> +	struct uncore_common_pmu *pmu;
> +	int num_counters;
> +
> +	if (uncore->init_done)
> +		return uncore_common_ctx_init(uncore, cpu);
> +
> +	num_counters = uncore_common_ctx_num_pmcs(uncore, cpu);
> +	if (!num_counters)
> +		goto done;
> +
> +	uncore->pmus = kzalloc_obj(*uncore->pmus);

Will this cause a build failure? The macro kzalloc_obj does not exist anywhere
in the kernel codebase. Should this be using kzalloc() instead?

(This issue also appears in hygon_uncore_df_iod_ctx_init()).

[ ... ]
> +static void hygon_uncore_df_ctx_scan(struct uncore_common *uncore,
> +				     unsigned int cpu)
> +{
> +	unsigned int eax, ebx, ecx, edx;
> +	union uncore_common_info info;

Does this code copy uninitialized stack memory to long-lived per-CPU data
structures?

Because info is not zero-initialized, and the code manually assigns several
bitfields but omits others (like private here and aux_data in the IOD scan
below), the omitted bitfields will retain uninitialized stack garbage.

> +
> +	if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
> +		return;
> +
> +	info.split.gid = 0;
> +	info.split.aux_data = 0;
> +	info.split.num_pmcs = NUM_COUNTERS_DF;
> +
> +	cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
> +	info.split.cid = ecx & 0xff;
> +
> +	*per_cpu_ptr(uncore->info, cpu) = info;

Since the entire 64-bit union is stored into per-CPU memory here, should info
be explicitly initialized?

> +}
> +
> +static void hygon_uncore_df_iod_ctx_scan(struct uncore_common *uncore,
> +					 unsigned int cpu)
> +{
> +	int num_packages, iods_per_package;
> +	union uncore_common_info info;
> +
> +	if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
> +		return;
> +
> +	if (boot_cpu_data.x86_model < 0x4 || boot_cpu_data.x86_model == 0x6)
> +		return;
> +
> +	num_packages = topology_max_packages();
> +	iods_per_package = amd_nb_num() / num_packages - topology_max_dies_per_package();
> +	if (iods_per_package <= 0)
> +		return;
> +
> +	info.split.cid = topology_physical_package_id(cpu);
> +	info.split.gid = 0;
> +	info.split.private = iods_per_package;
> +	info.split.num_pmcs = NUM_COUNTERS_DF * iods_per_package;

Could this truncation allow an out-of-bounds array write later during event
addition?

num_pmcs is an 8-bit bitfield. If the hardware topology presents a large
number of nodes (e.g., 65 IODs), NUM_COUNTERS_DF * iods_per_package yields
260, which silently truncates to 4.

Consequently, pmu->num_counters would be set to 4, and uncore_common_ctx_init()
would allocate a ctx->events array of only 4 elements.

During hygon_uncore_df_iod_add():
    iod_idx = event->attr.config1;
    for (i = iod_idx * NUM_COUNTERS_DF; ...
        if (try_cmpxchg(&ctx->events[i], &tmp, event)) {

If event->attr.config1 is 64 (which passes validation because
info.split.private is correctly 65), the array index here would be 256. Could
try_cmpxchg() write past the end of the 4-element ctx->events array?

[ ... ]
> +static int __init hygon_uncore_init(void)
> +{
> +	struct uncore_common *uncore;
> +	int ret = -ENODEV;
> +	int i;
> +
[ ... ]
> +	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_HYGON_UNCORE_ONLINE,
> +				"perf/x86/hygon/uncore:online",
> +				hygon_uncore_cpu_online,
> +				hygon_uncore_cpu_down_prepare);
> +	if (ret)
> +		goto fail_start;
> +
> +	return 0;
> +
> +fail_start:
> +	cpuhp_remove_state(CPUHP_AP_PERF_X86_HYGON_UNCORE_STARTING);
> +fail_prep:
> +	cpuhp_remove_state(CPUHP_PERF_X86_HYGON_UNCORE_PREP);
> +fail:
> +	for (i = 0; i < HYGON_UNCORE_TYPE_MAX; i++) {
> +		uncore = &hygon_uncores[i];
> +
> +		if (uncore->info) {
> +			free_percpu(uncore->info);
> +			uncore->info = NULL;
> +		}
> +	}
> +
> +	return ret;
> +}

Does the module initialization error path have a regression where it fails to
unregister any PMUs that were successfully registered via perf_pmu_register()?

If cpuhp_setup_state(CPUHP_AP_PERF_X86_HYGON_UNCORE_ONLINE) fails (for example,
because uncore_common_ctx_init() returns -ENOMEM during DF_IOD setup), the
initialization aborts and jumps to fail_start. This error path removes CPU
hotplug states and frees uncore->info, but fails to unregister the PMU (if
the DF PMU succeeded earlier).

Since module initialization returns an error, the module is unloaded and its
memory is freed. The dangling PMU remains active in the core perf subsystem.
Would subsequent userspace operations on this PMU dereference function pointers
(like .event_init) pointing to the unloaded module's memory and cause a
use-after-free panic?

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260519033225.1479907-1-liuqi@hygon.cn?part=3

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-05-19  5:11 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-19  3:32 [RFC PATCH 0/3] perf/x86/amd/uncore: Add common helpers and Hygon support Qi Liu
2026-05-19  3:32 ` [RFC PATCH 1/3] perf/x86/amd/uncore: Add common PMU helper functions Qi Liu
2026-05-19  4:07   ` sashiko-bot
2026-05-19  3:32 ` [RFC PATCH 2/3] perf/x86/amd/uncore: Convert AMD driver to common PMU helpers Qi Liu
2026-05-19  4:37   ` sashiko-bot
2026-05-19  3:32 ` [RFC PATCH 3/3] perf/x86/amd/uncore: Add Hygon uncore PMU support Qi Liu
2026-05-19  5:11   ` sashiko-bot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox