From: Zide Chen <zide.chen@intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
Ian Rogers <irogers@google.com>,
Adrian Hunter <adrian.hunter@intel.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Andi Kleen <ak@linux.intel.com>,
Eranian Stephane <eranian@google.com>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
Dapeng Mi <dapeng1.mi@linux.intel.com>,
Zide Chen <zide.chen@intel.com>
Subject: [PATCH 6/7] perf/x86/intel/uncore: Fix uncore_box ref/unref ordering on CPU hotplug
Date: Tue, 12 May 2026 16:30:47 -0700 [thread overview]
Message-ID: <20260512233048.9577-7-zide.chen@intel.com> (raw)
In-Reply-To: <20260512233048.9577-1-zide.chen@intel.com>
In uncore_event_cpu_online(), uncore_box_ref() was called before
uncore_change_context(). uncore_box_ref() gates on box->cpu >= 0,
but box->cpu is still -1 at that point because uncore_change_context()
has not run yet. As a result, the box is never initialized on the
first CPU to come online in a die, leaving it permanently
uninitialized in the single-CPU-per-die case.
Thus, cpu_refcnt is one count below the true value, and in the CPU
offline path, the box will be torn down on the second-to-last CPU.
In uncore_event_cpu_offline(), uncore_box_unref() was called after
uncore_change_context(), so box->cpu is already -1 when the collector
CPU goes offline, which prevents it from tearing down the box.
Fix by swapping the call order in both paths so that
uncore_box_{ref,unref}() runs at the point where box->cpu reflects
the correct context.
Fixes: c74443d92f68 ("perf/x86/uncore: Support per PMU cpumask")
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
arch/x86/events/intel/uncore.c | 56 ++++++++++++++++++----------------
1 file changed, 29 insertions(+), 27 deletions(-)
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 922ba299533e..399f434e1a7d 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1574,9 +1574,15 @@ static int uncore_event_cpu_offline(unsigned int cpu)
{
int die, target;
+ /* Clear the references */
+ die = topology_logical_die_id(cpu);
+ uncore_box_unref(uncore_msr_uncores, die);
+ uncore_box_unref(uncore_mmio_uncores, die);
+
/* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
- goto unref;
+ return 0;
+
/* Find a new cpu to collect uncore events */
target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
@@ -1589,20 +1595,14 @@ static int uncore_event_cpu_offline(unsigned int cpu)
uncore_change_context(uncore_msr_uncores, cpu, target);
uncore_change_context(uncore_mmio_uncores, cpu, target);
uncore_change_context(uncore_pci_uncores, cpu, target);
-
-unref:
- /* Clear the references */
- die = topology_logical_die_id(cpu);
- uncore_box_unref(uncore_msr_uncores, die);
- uncore_box_unref(uncore_mmio_uncores, die);
return 0;
}
-static int allocate_boxes(struct intel_uncore_type **types,
+static void allocate_boxes(struct intel_uncore_type **types,
unsigned int die, unsigned int cpu)
{
struct intel_uncore_box *box, *tmp;
- struct intel_uncore_type *type;
+ struct intel_uncore_type *type, **start = types;
struct intel_uncore_pmu *pmu;
LIST_HEAD(allocated);
int i;
@@ -1627,14 +1627,21 @@ static int allocate_boxes(struct intel_uncore_type **types,
list_del_init(&box->active_list);
box->pmu->boxes[die] = box;
}
- return 0;
+ return;
cleanup:
list_for_each_entry_safe(box, tmp, &allocated, active_list) {
list_del_init(&box->active_list);
kfree(box);
}
- return -ENOMEM;
+
+ /* mark the PMU broken to prevent future ussage. */
+ for (; *start; start++) {
+ type = *start;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++)
+ uncore_pmu_set_broken(pmu);
+ }
}
static int uncore_box_ref(struct intel_uncore_type **types,
@@ -1643,11 +1650,7 @@ static int uncore_box_ref(struct intel_uncore_type **types,
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, ret;
-
- ret = allocate_boxes(types, die, cpu);
- if (ret)
- return ret;
+ int i;
for (; *types; types++) {
type = *types;
@@ -1664,27 +1667,26 @@ static int uncore_box_ref(struct intel_uncore_type **types,
static int uncore_event_cpu_online(unsigned int cpu)
{
- int die, target, msr_ret, mmio_ret;
+ int die, target;
die = topology_logical_die_id(cpu);
- msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
- mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
+ allocate_boxes(uncore_msr_uncores, die, cpu);
+ allocate_boxes(uncore_mmio_uncores, die, cpu);
/*
* Check if there is an online cpu in the package
* which collects uncore events already.
*/
target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
- if (target < nr_cpu_ids)
- return 0;
-
- cpumask_set_cpu(cpu, &uncore_cpu_mask);
-
- if (!msr_ret)
+ if (target >= nr_cpu_ids) {
+ cpumask_set_cpu(cpu, &uncore_cpu_mask);
uncore_change_context(uncore_msr_uncores, -1, cpu);
- if (!mmio_ret)
uncore_change_context(uncore_mmio_uncores, -1, cpu);
- uncore_change_context(uncore_pci_uncores, -1, cpu);
+ uncore_change_context(uncore_pci_uncores, -1, cpu);
+ }
+
+ uncore_box_ref(uncore_msr_uncores, die, cpu);
+ uncore_box_ref(uncore_mmio_uncores, die, cpu);
return 0;
}
--
2.54.0
next prev parent reply other threads:[~2026-05-12 23:39 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-12 23:30 [PATCH 0/7] perf/x86/intel/uncore: PMU setup robustness fixes Zide Chen
2026-05-12 23:30 ` [PATCH 1/7] perf/x86/intel/uncore: Rename refcount fields and other cleanups Zide Chen
2026-05-13 0:26 ` Ian Rogers
2026-05-12 23:30 ` [PATCH 2/7] perf/x86/intel/uncore: Let init_box() callback report failures Zide Chen
2026-05-13 0:23 ` Ian Rogers
2026-05-12 23:30 ` [PATCH 3/7] perf/x86/intel/uncore: Keep PCI PMUs working when MMIO/MSR setup fails Zide Chen
2026-05-13 0:30 ` Ian Rogers
2026-05-12 23:30 ` [PATCH 4/7] perf/x86/intel/uncore: Factor out box setup code Zide Chen
2026-05-13 0:27 ` Ian Rogers
2026-05-12 23:30 ` [PATCH 5/7] perf/x86/intel/uncore: Introduce PMU flags and broken state Zide Chen
2026-05-13 0:28 ` Ian Rogers
2026-05-12 23:30 ` Zide Chen [this message]
2026-05-13 0:32 ` [PATCH 6/7] perf/x86/intel/uncore: Fix uncore_box ref/unref ordering on CPU hotplug Ian Rogers
2026-05-13 8:59 ` Mi, Dapeng
2026-05-12 23:30 ` [PATCH 7/7] perf/x86/intel/uncore: Implement lazy setup for MSR/MMIO PMU Zide Chen
2026-05-13 0:34 ` Ian Rogers
2026-05-13 9:03 ` Mi, Dapeng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260512233048.9577-7-zide.chen@intel.com \
--to=zide.chen@intel.com \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=dapeng1.mi@linux.intel.com \
--cc=eranian@google.com \
--cc=irogers@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox