All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zide Chen <zide.chen@intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Andi Kleen <ak@linux.intel.com>,
	Eranian Stephane <eranian@google.com>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Dapeng Mi <dapeng1.mi@linux.intel.com>,
	Zide Chen <zide.chen@intel.com>
Subject: [PATCH 6/7] perf/x86/intel/uncore: Fix uncore_box ref/unref ordering on CPU hotplug
Date: Tue, 12 May 2026 16:30:47 -0700	[thread overview]
Message-ID: <20260512233048.9577-7-zide.chen@intel.com> (raw)
In-Reply-To: <20260512233048.9577-1-zide.chen@intel.com>

In uncore_event_cpu_online(), uncore_box_ref() was called before
uncore_change_context().  uncore_box_ref() gates on box->cpu >= 0,
but box->cpu is still -1 at that point because uncore_change_context()
has not run yet.  As a result, the box is never initialized on the
first CPU to come online in a die, leaving it permanently
uninitialized in the single-CPU-per-die case.

Thus, cpu_refcnt is one count below the true value, and in the CPU
offline path, the box will be torn down on the second-to-last CPU.

In uncore_event_cpu_offline(), uncore_box_unref() was called after
uncore_change_context(), so box->cpu is already -1 when the collector
CPU goes offline, which prevents it from tearing down the box.

Fix by swapping the call order in both paths so that
uncore_box_{ref,unref}() runs at the point where box->cpu reflects
the correct context.

Fixes: c74443d92f68 ("perf/x86/uncore: Support per PMU cpumask")
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
 arch/x86/events/intel/uncore.c | 56 ++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 922ba299533e..399f434e1a7d 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1574,9 +1574,15 @@ static int uncore_event_cpu_offline(unsigned int cpu)
 {
 	int die, target;
 
+	/* Clear the references */
+	die = topology_logical_die_id(cpu);
+	uncore_box_unref(uncore_msr_uncores, die);
+	uncore_box_unref(uncore_mmio_uncores, die);
+
 	/* Check if exiting cpu is used for collecting uncore events */
 	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
-		goto unref;
+		return 0;
+
 	/* Find a new cpu to collect uncore events */
 	target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
 
@@ -1589,20 +1595,14 @@ static int uncore_event_cpu_offline(unsigned int cpu)
 	uncore_change_context(uncore_msr_uncores, cpu, target);
 	uncore_change_context(uncore_mmio_uncores, cpu, target);
 	uncore_change_context(uncore_pci_uncores, cpu, target);
-
-unref:
-	/* Clear the references */
-	die = topology_logical_die_id(cpu);
-	uncore_box_unref(uncore_msr_uncores, die);
-	uncore_box_unref(uncore_mmio_uncores, die);
 	return 0;
 }
 
-static int allocate_boxes(struct intel_uncore_type **types,
+static void allocate_boxes(struct intel_uncore_type **types,
 			 unsigned int die, unsigned int cpu)
 {
 	struct intel_uncore_box *box, *tmp;
-	struct intel_uncore_type *type;
+	struct intel_uncore_type *type, **start = types;
 	struct intel_uncore_pmu *pmu;
 	LIST_HEAD(allocated);
 	int i;
@@ -1627,14 +1627,21 @@ static int allocate_boxes(struct intel_uncore_type **types,
 		list_del_init(&box->active_list);
 		box->pmu->boxes[die] = box;
 	}
-	return 0;
+	return;
 
 cleanup:
 	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
 		list_del_init(&box->active_list);
 		kfree(box);
 	}
-	return -ENOMEM;
+
+	/* mark the PMU broken to prevent future ussage. */
+	for (; *start; start++) {
+		type = *start;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++)
+			uncore_pmu_set_broken(pmu);
+	}
 }
 
 static int uncore_box_ref(struct intel_uncore_type **types,
@@ -1643,11 +1650,7 @@ static int uncore_box_ref(struct intel_uncore_type **types,
 	struct intel_uncore_type *type;
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
-	int i, ret;
-
-	ret = allocate_boxes(types, die, cpu);
-	if (ret)
-		return ret;
+	int i;
 
 	for (; *types; types++) {
 		type = *types;
@@ -1664,27 +1667,26 @@ static int uncore_box_ref(struct intel_uncore_type **types,
 
 static int uncore_event_cpu_online(unsigned int cpu)
 {
-	int die, target, msr_ret, mmio_ret;
+	int die, target;
 
 	die = topology_logical_die_id(cpu);
-	msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
-	mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
+	allocate_boxes(uncore_msr_uncores, die, cpu);
+	allocate_boxes(uncore_mmio_uncores, die, cpu);
 
 	/*
 	 * Check if there is an online cpu in the package
 	 * which collects uncore events already.
 	 */
 	target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
-	if (target < nr_cpu_ids)
-		return 0;
-
-	cpumask_set_cpu(cpu, &uncore_cpu_mask);
-
-	if (!msr_ret)
+	if (target >= nr_cpu_ids) {
+		cpumask_set_cpu(cpu, &uncore_cpu_mask);
 		uncore_change_context(uncore_msr_uncores, -1, cpu);
-	if (!mmio_ret)
 		uncore_change_context(uncore_mmio_uncores, -1, cpu);
-	uncore_change_context(uncore_pci_uncores, -1, cpu);
+		uncore_change_context(uncore_pci_uncores, -1, cpu);
+	}
+
+	uncore_box_ref(uncore_msr_uncores, die, cpu);
+	uncore_box_ref(uncore_mmio_uncores, die, cpu);
 	return 0;
 }
 
-- 
2.54.0


  parent reply	other threads:[~2026-05-12 23:39 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-12 23:30 [PATCH 0/7] perf/x86/intel/uncore: PMU setup robustness fixes Zide Chen
2026-05-12 23:30 ` [PATCH 1/7] perf/x86/intel/uncore: Rename refcount fields and other cleanups Zide Chen
2026-05-13  0:26   ` Ian Rogers
2026-05-14  0:58   ` sashiko-bot
2026-05-14 19:06     ` Chen, Zide
2026-05-12 23:30 ` [PATCH 2/7] perf/x86/intel/uncore: Let init_box() callback report failures Zide Chen
2026-05-13  0:23   ` Ian Rogers
2026-05-14  2:14   ` sashiko-bot
2026-05-12 23:30 ` [PATCH 3/7] perf/x86/intel/uncore: Keep PCI PMUs working when MMIO/MSR setup fails Zide Chen
2026-05-13  0:30   ` Ian Rogers
2026-05-12 23:30 ` [PATCH 4/7] perf/x86/intel/uncore: Factor out box setup code Zide Chen
2026-05-13  0:27   ` Ian Rogers
2026-05-14  3:34   ` sashiko-bot
2026-05-12 23:30 ` [PATCH 5/7] perf/x86/intel/uncore: Introduce PMU flags and broken state Zide Chen
2026-05-13  0:28   ` Ian Rogers
2026-05-14  4:27   ` sashiko-bot
2026-05-12 23:30 ` Zide Chen [this message]
2026-05-13  0:32   ` [PATCH 6/7] perf/x86/intel/uncore: Fix uncore_box ref/unref ordering on CPU hotplug Ian Rogers
2026-05-13  8:59   ` Mi, Dapeng
2026-05-13 18:43     ` Chen, Zide
2026-05-14  5:12   ` sashiko-bot
2026-05-12 23:30 ` [PATCH 7/7] perf/x86/intel/uncore: Implement lazy setup for MSR/MMIO PMU Zide Chen
2026-05-13  0:34   ` Ian Rogers
2026-05-13  9:03   ` Mi, Dapeng
2026-05-13 16:47     ` Chen, Zide
2026-05-14  5:38   ` sashiko-bot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260512233048.9577-7-zide.chen@intel.com \
    --to=zide.chen@intel.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=dapeng1.mi@linux.intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.