Linux Perf Users
 help / color / mirror / Atom feed
From: Zide Chen <zide.chen@intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Andi Kleen <ak@linux.intel.com>,
	Eranian Stephane <eranian@google.com>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Dapeng Mi <dapeng1.mi@linux.intel.com>,
	Zide Chen <zide.chen@intel.com>
Subject: [PATCH V3 7/8] perf/x86/intel/uncore: Fix uncore_box ref/unref ordering
Date: Thu, 11 Jun 2026 09:00:32 -0700	[thread overview]
Message-ID: <20260611160033.66760-8-zide.chen@intel.com> (raw)
In-Reply-To: <20260611160033.66760-1-zide.chen@intel.com>

In uncore_event_cpu_online(), uncore_box_ref() was called before
uncore_change_context().  uncore_box_ref() gates on box->cpu >= 0,
but box->cpu is still -1 at that point because uncore_change_context()
has not run yet.  As a result, the box is never initialized on the
first CPU to come online in a die, leaving it permanently
uninitialized in the single-CPU-per-die case.

Thus, box->refcnt is one count below the true value, and in the CPU
offline path, the box will be torn down on the second-to-last CPU.

In uncore_event_cpu_offline(), uncore_box_unref() was called after
uncore_change_context(), so box->cpu is already -1 when the collector
CPU goes offline, which prevents it from tearing down the box.

Fix by swapping the call order in both paths so that
uncore_box_{ref,unref}() runs at the point where box->cpu reflects
the correct context.

Move allocate_boxes() out of uncore_box_ref() to enable this
reordering.

Fixes: c74443d92f68 ("perf/x86/uncore: Support per PMU cpumask")
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
v3:
- Update changelog to mention moving allocate_boxes(). (Dapeng)
- Update title; the bug is not limited to CPU hotplug.
---
 arch/x86/events/intel/uncore.c | 50 ++++++++++++++++------------------
 1 file changed, 23 insertions(+), 27 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index feb8c3b0076b..b9ac2f7d31ca 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1580,9 +1580,15 @@ static int uncore_event_cpu_offline(unsigned int cpu)
 {
 	int die, target;
 
+	/* Clear the references */
+	die = topology_logical_die_id(cpu);
+	uncore_box_unref(uncore_msr_uncores, die);
+	uncore_box_unref(uncore_mmio_uncores, die);
+
 	/* Check if exiting cpu is used for collecting uncore events */
 	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
-		goto unref;
+		return 0;
+
 	/* Find a new cpu to collect uncore events */
 	target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
 
@@ -1595,16 +1601,10 @@ static int uncore_event_cpu_offline(unsigned int cpu)
 	uncore_change_context(uncore_msr_uncores, cpu, target);
 	uncore_change_context(uncore_mmio_uncores, cpu, target);
 	uncore_change_context(uncore_pci_uncores, cpu, target);
-
-unref:
-	/* Clear the references */
-	die = topology_logical_die_id(cpu);
-	uncore_box_unref(uncore_msr_uncores, die);
-	uncore_box_unref(uncore_mmio_uncores, die);
 	return 0;
 }
 
-static int allocate_boxes(struct intel_uncore_type **types,
+static void allocate_boxes(struct intel_uncore_type **types,
 			 unsigned int die, unsigned int cpu)
 {
 	struct intel_uncore_box *box, *tmp;
@@ -1621,8 +1621,10 @@ static int allocate_boxes(struct intel_uncore_type **types,
 			if (pmu->boxes[die] || uncore_pmu_broken(pmu))
 				continue;
 			box = uncore_alloc_box(type, cpu_to_node(cpu));
-			if (!box)
+			if (!box) {
+				uncore_pmu_set_broken(pmu);
 				goto cleanup;
+			}
 			box->pmu = pmu;
 			box->dieid = die;
 			list_add(&box->active_list, &allocated);
@@ -1633,14 +1635,13 @@ static int allocate_boxes(struct intel_uncore_type **types,
 		list_del_init(&box->active_list);
 		box->pmu->boxes[die] = box;
 	}
-	return 0;
+	return;
 
 cleanup:
 	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
 		list_del_init(&box->active_list);
 		kfree(box);
 	}
-	return -ENOMEM;
 }
 
 static int uncore_box_ref(struct intel_uncore_type **types,
@@ -1649,11 +1650,7 @@ static int uncore_box_ref(struct intel_uncore_type **types,
 	struct intel_uncore_type *type;
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
-	int i, ret;
-
-	ret = allocate_boxes(types, die, cpu);
-	if (ret)
-		return ret;
+	int i;
 
 	for (; *types; types++) {
 		type = *types;
@@ -1669,27 +1666,26 @@ static int uncore_box_ref(struct intel_uncore_type **types,
 
 static int uncore_event_cpu_online(unsigned int cpu)
 {
-	int die, target, msr_ret, mmio_ret;
+	int die, target;
 
 	die = topology_logical_die_id(cpu);
-	msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
-	mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
+	allocate_boxes(uncore_msr_uncores, die, cpu);
+	allocate_boxes(uncore_mmio_uncores, die, cpu);
 
 	/*
 	 * Check if there is an online cpu in the package
 	 * which collects uncore events already.
 	 */
 	target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
-	if (target < nr_cpu_ids)
-		return 0;
-
-	cpumask_set_cpu(cpu, &uncore_cpu_mask);
-
-	if (!msr_ret)
+	if (target >= nr_cpu_ids) {
+		cpumask_set_cpu(cpu, &uncore_cpu_mask);
 		uncore_change_context(uncore_msr_uncores, -1, cpu);
-	if (!mmio_ret)
 		uncore_change_context(uncore_mmio_uncores, -1, cpu);
-	uncore_change_context(uncore_pci_uncores, -1, cpu);
+		uncore_change_context(uncore_pci_uncores, -1, cpu);
+	}
+
+	uncore_box_ref(uncore_msr_uncores, die, cpu);
+	uncore_box_ref(uncore_mmio_uncores, die, cpu);
 	return 0;
 }
 
-- 
2.54.0


  parent reply	other threads:[~2026-06-11 16:09 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-11 16:00 [PATCH v3 0/8] perf/x86/intel/uncore: PMU setup robustness fixes Zide Chen
2026-06-11 16:00 ` [PATCH V3 1/8] perf/x86/intel/uncore: Fix PCI PMU cleanup on setup failure Zide Chen
2026-06-11 16:26   ` sashiko-bot
2026-06-11 16:00 ` [PATCH V3 2/8] perf/x86/intel/uncore: Fix refcnt and other cleanups Zide Chen
2026-06-11 16:29   ` sashiko-bot
2026-06-11 16:00 ` [PATCH V3 3/8] perf/x86/intel/uncore: Let init_box() callback report failures Zide Chen
2026-06-11 16:38   ` sashiko-bot
2026-06-11 16:00 ` [PATCH V3 4/8] perf/x86/intel/uncore: Keep PCI PMUs working when MMIO/MSR setup fails Zide Chen
2026-06-11 16:00 ` [PATCH V3 5/8] perf/x86/intel/uncore: Factor out box setup code Zide Chen
2026-06-11 16:00 ` [PATCH V3 6/8] perf/x86/intel/uncore: Introduce PMU flags and broken state Zide Chen
2026-06-11 16:30   ` sashiko-bot
2026-06-11 16:00 ` Zide Chen [this message]
2026-06-11 16:29   ` [PATCH V3 7/8] perf/x86/intel/uncore: Fix uncore_box ref/unref ordering sashiko-bot
2026-06-11 16:00 ` [PATCH V3 8/8] perf/x86/intel/uncore: Implement lazy setup for MSR/MMIO PMUs Zide Chen
2026-06-11 16:33   ` sashiko-bot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260611160033.66760-8-zide.chen@intel.com \
    --to=zide.chen@intel.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=dapeng1.mi@linux.intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox