From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Ming Lei <ming.lei@redhat.com>,
Reinette Chatre <reinette.chatre@intel.com>,
Fenghua Yu <fenghua.yu@intel.com>,
Peter Newman <peternewman@google.com>,
Babu Moger <babu.moger@amd.com>, Luck Tony <tony.luck@intel.com>,
Jens Axboe <axboe@kernel.dk>, Sasha Levin <sashal@kernel.org>,
linux-block@vger.kernel.org
Subject: [PATCH AUTOSEL 6.12 36/36] blk-mq: move cpuhp callback registering out of q->sysfs_lock
Date: Wed, 11 Dec 2024 13:49:52 -0500 [thread overview]
Message-ID: <20241211185028.3841047-36-sashal@kernel.org> (raw)
In-Reply-To: <20241211185028.3841047-1-sashal@kernel.org>
From: Ming Lei <ming.lei@redhat.com>
[ Upstream commit 22465bbac53c821319089016f268a2437de9b00a ]
Registering and unregistering cpuhp callback requires global cpu hotplug lock,
which is used everywhere. Meantime q->sysfs_lock is used in block layer
almost everywhere.
It is easy to trigger lockdep warning[1] by connecting the two locks.
Fix the warning by moving blk-mq's cpuhp callback registering out of
q->sysfs_lock. Add one dedicated global lock for covering registering &
unregistering hctx's cpuhp, and it is safe to do so because hctx is
guaranteed to be live if our request_queue is live.
[1] https://lore.kernel.org/lkml/Z04pz3AlvI4o0Mr8@agluck-desk3/
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Peter Newman <peternewman@google.com>
Cc: Babu Moger <babu.moger@amd.com>
Reported-by: Luck Tony <tony.luck@intel.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20241206111611.978870-3-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
block/blk-mq.c | 103 +++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 92 insertions(+), 11 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c4012cb3adbf1..6dd849d607c03 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -43,6 +43,7 @@
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd);
+static DEFINE_MUTEX(blk_mq_cpuhp_lock);
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
static void blk_mq_request_bypass_insert(struct request *rq,
@@ -3740,13 +3741,91 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
return 0;
}
-static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
+static void __blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
{
- if (!(hctx->flags & BLK_MQ_F_STACKING))
+ lockdep_assert_held(&blk_mq_cpuhp_lock);
+
+ if (!(hctx->flags & BLK_MQ_F_STACKING) &&
+ !hlist_unhashed(&hctx->cpuhp_online)) {
cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
&hctx->cpuhp_online);
- cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
- &hctx->cpuhp_dead);
+ INIT_HLIST_NODE(&hctx->cpuhp_online);
+ }
+
+ if (!hlist_unhashed(&hctx->cpuhp_dead)) {
+ cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
+ &hctx->cpuhp_dead);
+ INIT_HLIST_NODE(&hctx->cpuhp_dead);
+ }
+}
+
+static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
+{
+ mutex_lock(&blk_mq_cpuhp_lock);
+ __blk_mq_remove_cpuhp(hctx);
+ mutex_unlock(&blk_mq_cpuhp_lock);
+}
+
+static void __blk_mq_add_cpuhp(struct blk_mq_hw_ctx *hctx)
+{
+ lockdep_assert_held(&blk_mq_cpuhp_lock);
+
+ if (!(hctx->flags & BLK_MQ_F_STACKING) &&
+ hlist_unhashed(&hctx->cpuhp_online))
+ cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
+ &hctx->cpuhp_online);
+
+ if (hlist_unhashed(&hctx->cpuhp_dead))
+ cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD,
+ &hctx->cpuhp_dead);
+}
+
+static void __blk_mq_remove_cpuhp_list(struct list_head *head)
+{
+ struct blk_mq_hw_ctx *hctx;
+
+ lockdep_assert_held(&blk_mq_cpuhp_lock);
+
+ list_for_each_entry(hctx, head, hctx_list)
+ __blk_mq_remove_cpuhp(hctx);
+}
+
+/*
+ * Unregister cpuhp callbacks from exited hw queues
+ *
+ * Safe to call if this `request_queue` is live
+ */
+static void blk_mq_remove_hw_queues_cpuhp(struct request_queue *q)
+{
+ LIST_HEAD(hctx_list);
+
+ spin_lock(&q->unused_hctx_lock);
+ list_splice_init(&q->unused_hctx_list, &hctx_list);
+ spin_unlock(&q->unused_hctx_lock);
+
+ mutex_lock(&blk_mq_cpuhp_lock);
+ __blk_mq_remove_cpuhp_list(&hctx_list);
+ mutex_unlock(&blk_mq_cpuhp_lock);
+
+ spin_lock(&q->unused_hctx_lock);
+ list_splice(&hctx_list, &q->unused_hctx_list);
+ spin_unlock(&q->unused_hctx_lock);
+}
+
+/*
+ * Register cpuhp callbacks from all hw queues
+ *
+ * Safe to call if this `request_queue` is live
+ */
+static void blk_mq_add_hw_queues_cpuhp(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+ unsigned long i;
+
+ mutex_lock(&blk_mq_cpuhp_lock);
+ queue_for_each_hw_ctx(q, hctx, i)
+ __blk_mq_add_cpuhp(hctx);
+ mutex_unlock(&blk_mq_cpuhp_lock);
}
/*
@@ -3797,8 +3876,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
- blk_mq_remove_cpuhp(hctx);
-
xa_erase(&q->hctx_table, hctx_idx);
spin_lock(&q->unused_hctx_lock);
@@ -3815,6 +3892,7 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
queue_for_each_hw_ctx(q, hctx, i) {
if (i == nr_queue)
break;
+ blk_mq_remove_cpuhp(hctx);
blk_mq_exit_hctx(q, set, hctx, i);
}
}
@@ -3838,11 +3916,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL))
goto exit_flush_rq;
- if (!(hctx->flags & BLK_MQ_F_STACKING))
- cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
- &hctx->cpuhp_online);
- cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
-
return 0;
exit_flush_rq:
@@ -3877,6 +3950,8 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
spin_lock_init(&hctx->lock);
INIT_LIST_HEAD(&hctx->dispatch);
+ INIT_HLIST_NODE(&hctx->cpuhp_dead);
+ INIT_HLIST_NODE(&hctx->cpuhp_online);
hctx->queue = q;
hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;
@@ -4415,6 +4490,12 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
xa_for_each_start(&q->hctx_table, j, hctx, j)
blk_mq_exit_hctx(q, set, hctx, j);
mutex_unlock(&q->sysfs_lock);
+
+ /* unregister cpuhp callbacks for exited hctxs */
+ blk_mq_remove_hw_queues_cpuhp(q);
+
+ /* register cpuhp for new initialized hctxs */
+ blk_mq_add_hw_queues_cpuhp(q);
}
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
--
2.43.0
prev parent reply other threads:[~2024-12-11 18:51 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-11 18:49 [PATCH AUTOSEL 6.12 01/36] watchdog: it87_wdt: add PWRGD enable quirk for Qotom QCML04 Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 02/36] watchdog: rzg2l_wdt: Power on the watchdog domain in the restart handler Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 03/36] Revert "watchdog: s3c2410_wdt: use exynos_get_pmu_regmap_by_phandle() for PMU regs" Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 04/36] watchdog: mediatek: Add support for MT6735 TOPRGU/WDT Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 05/36] watchdog: s3c2410_wdt: add support for exynosautov920 SoC Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 06/36] scsi: qla1280: Fix hw revision numbering for ISP1020/1040 Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 07/36] scsi: megaraid_sas: Fix for a potential deadlock Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 08/36] udf: Skip parent dir link count update if corrupted Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 09/36] udf: Verify inode link counts before performing rename Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 10/36] ALSA: ump: Don't open legacy substream for an inactive group Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 11/36] ALSA: ump: Indicate the inactive group in legacy substream names Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 12/36] ALSA: ump: Update legacy substream names upon FB info update Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 13/36] ALSA: hda/conexant: fix Z60MR100 startup pop issue Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 14/36] ALSA: sh: Use standard helper for buffer accesses Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 15/36] smb: server: Fix building with GCC 15 Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 16/36] regmap: Use correct format specifier for logging range errors Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 17/36] LoongArch: Fix reserving screen info memory for above-4G firmware Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 18/36] LoongArch/irq: Use seq_put_decimal_ull_width() for decimal values Sasha Levin
2024-12-11 19:14 ` Thomas Gleixner
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 19/36] LoongArch: BPF: Adjust the parameter of emit_jirl() Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 20/36] platform/x86: asus-nb-wmi: Ignore unknown event 0xCF Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 21/36] bpf: Zero index arg error string for dynptr and iter Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 22/36] net: sched: fix ordering of qlen adjustment Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 23/36] spi: intel: Add Panther Lake SPI controller support Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 24/36] scsi: mpt3sas: Diag-Reset when Doorbell-In-Use bit is set during driver load time Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 25/36] scsi: mpi3mr: Synchronize access to ioctl data buffer Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 26/36] scsi: mpi3mr: Fix corrupt config pages PHY state is switched in sysfs Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 27/36] scsi: mpi3mr: Start controller indexing from 0 Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 28/36] scsi: mpi3mr: Handling of fault code for insufficient power Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 29/36] scsi: storvsc: Do not flag MAINTENANCE_IN return of SRB_STATUS_DATA_OVERRUN as an error Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 30/36] ACPI/IORT: Add PMCG platform information for HiSilicon HIP09A Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 31/36] spi: omap2-mcspi: Fix the IS_ERR() bug for devm_clk_get_optional_enabled() Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 32/36] drm/dp_mst: Ensure mst_primary pointer is valid in drm_dp_mst_handle_up_req() Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 33/36] drm/dp_mst: Reset message rx state after OOM " Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 34/36] virtio-blk: don't keep queue frozen during system suspend Sasha Levin
2024-12-11 18:49 ` [PATCH AUTOSEL 6.12 35/36] blk-mq: register cpuhp callback after hctx is added to xarray table Sasha Levin
2024-12-11 18:49 ` Sasha Levin [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241211185028.3841047-36-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=axboe@kernel.dk \
--cc=babu.moger@amd.com \
--cc=fenghua.yu@intel.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=ming.lei@redhat.com \
--cc=peternewman@google.com \
--cc=reinette.chatre@intel.com \
--cc=stable@vger.kernel.org \
--cc=tony.luck@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox