* [PATCH V2] nvmet: move async event work off nvmet-wq
@ 2026-02-26 4:30 Chaitanya Kulkarni
2026-02-26 15:33 ` Christoph Hellwig
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Chaitanya Kulkarni @ 2026-02-26 4:30 UTC (permalink / raw)
To: kbusch, hch, sagi
Cc: wagi, shinichiro.kawasaki, linux-nvme, Chaitanya Kulkarni
For target nvmet_ctrl_free() flushes ctrl->async_event_work.
If nvmet_ctrl_free() runs on nvmet-wq, the flush re-enters workqueue
completion for the same worker:-
A. Async event work queued on nvmet-wq (prior to disconnect):
nvmet_execute_async_event()
queue_work(nvmet_wq, &ctrl->async_event_work)
nvmet_add_async_event()
queue_work(nvmet_wq, &ctrl->async_event_work)
B. Full pre-work chain (RDMA CM path):
nvmet_rdma_cm_handler()
nvmet_rdma_queue_disconnect()
__nvmet_rdma_queue_disconnect()
queue_work(nvmet_wq, &queue->release_work)
process_one_work()
lock((wq_completion)nvmet-wq) <--------- 1st
nvmet_rdma_release_queue_work()
C. Recursive path (same worker):
nvmet_rdma_release_queue_work()
nvmet_rdma_free_queue()
nvmet_sq_destroy()
nvmet_ctrl_put()
nvmet_ctrl_free()
flush_work(&ctrl->async_event_work)
__flush_work()
touch_wq_lockdep_map()
lock((wq_completion)nvmet-wq) <--------- 2nd
Lockdep splat:
============================================
WARNING: possible recursive locking detected
6.19.0-rc3nvme+ #14 Tainted: G N
--------------------------------------------
kworker/u192:42/44933 is trying to acquire lock:
ffff888118a00948 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: touch_wq_lockdep_map+0x26/0x90
but task is already holding lock:
ffff888118a00948 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: process_one_work+0x53e/0x660
3 locks held by kworker/u192:42/44933:
#0: ffff888118a00948 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: process_one_work+0x53e/0x660
#1: ffffc9000e6cbe28 ((work_completion)(&queue->release_work)){+.+.}-{0:0}, at: process_one_work+0x1c5/0x660
#2: ffffffff82d4db60 (rcu_read_lock){....}-{1:3}, at: __flush_work+0x62/0x530
Workqueue: nvmet-wq nvmet_rdma_release_queue_work [nvmet_rdma]
Call Trace:
__flush_work+0x268/0x530
nvmet_ctrl_free+0x140/0x310 [nvmet]
nvmet_cq_put+0x74/0x90 [nvmet]
nvmet_rdma_free_queue+0x23/0xe0 [nvmet_rdma]
nvmet_rdma_release_queue_work+0x19/0x50 [nvmet_rdma]
process_one_work+0x206/0x660
worker_thread+0x184/0x320
kthread+0x10c/0x240
ret_from_fork+0x319/0x390
Move async event work to a dedicated nvmet-aen-wq to avoid reentrant
flush on nvmet-wq.
Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
---
V2:-
* Export nvmet_aen_wq.
---
drivers/nvme/target/admin-cmd.c | 2 +-
drivers/nvme/target/core.c | 14 ++++++++++++--
drivers/nvme/target/nvmet.h | 1 +
drivers/nvme/target/rdma.c | 1 +
4 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 3da31bb1183e..100d1466ff84 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -1586,7 +1586,7 @@ void nvmet_execute_async_event(struct nvmet_req *req)
ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req;
mutex_unlock(&ctrl->lock);
- queue_work(nvmet_wq, &ctrl->async_event_work);
+ queue_work(nvmet_aen_wq, &ctrl->async_event_work);
}
void nvmet_execute_keep_alive(struct nvmet_req *req)
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index cc88e5a28c8a..5075f7123358 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -26,6 +26,8 @@ static DEFINE_IDA(cntlid_ida);
struct workqueue_struct *nvmet_wq;
EXPORT_SYMBOL_GPL(nvmet_wq);
+struct workqueue_struct *nvmet_aen_wq;
+EXPORT_SYMBOL_GPL(nvmet_aen_wq);
/*
* This read/write semaphore is used to synchronize access to configuration
@@ -205,7 +207,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
list_add_tail(&aen->entry, &ctrl->async_events);
mutex_unlock(&ctrl->lock);
- queue_work(nvmet_wq, &ctrl->async_event_work);
+ queue_work(nvmet_aen_wq, &ctrl->async_event_work);
}
static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
@@ -1958,9 +1960,14 @@ static int __init nvmet_init(void)
if (!nvmet_wq)
goto out_free_buffered_work_queue;
+ nvmet_aen_wq = alloc_workqueue("nvmet-aen-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ if (!nvmet_aen_wq)
+ goto out_free_nvmet_work_queue;
+
error = nvmet_init_debugfs();
if (error)
- goto out_free_nvmet_work_queue;
+ goto out_free_nvmet_aen_work_queue;
error = nvmet_init_discovery();
if (error)
@@ -1976,6 +1983,8 @@ static int __init nvmet_init(void)
nvmet_exit_discovery();
out_exit_debugfs:
nvmet_exit_debugfs();
+out_free_nvmet_aen_work_queue:
+ destroy_workqueue(nvmet_aen_wq);
out_free_nvmet_work_queue:
destroy_workqueue(nvmet_wq);
out_free_buffered_work_queue:
@@ -1993,6 +2002,7 @@ static void __exit nvmet_exit(void)
nvmet_exit_discovery();
nvmet_exit_debugfs();
ida_destroy(&cntlid_ida);
+ destroy_workqueue(nvmet_aen_wq);
destroy_workqueue(nvmet_wq);
destroy_workqueue(buffered_io_wq);
destroy_workqueue(zbd_wq);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index b664b584fdc8..319d6a5e9cf0 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -501,6 +501,7 @@ extern struct kmem_cache *nvmet_bvec_cache;
extern struct workqueue_struct *buffered_io_wq;
extern struct workqueue_struct *zbd_wq;
extern struct workqueue_struct *nvmet_wq;
+extern struct workqueue_struct *nvmet_aen_wq;
static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
{
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 9c12b2361a6d..038432364967 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -2088,6 +2088,7 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data
mutex_unlock(&nvmet_rdma_queue_mutex);
flush_workqueue(nvmet_wq);
+ flush_workqueue(nvmet_aen_wq);
}
static struct ib_client nvmet_rdma_ib_client = {
--
2.39.5
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH V2] nvmet: move async event work off nvmet-wq
2026-02-26 4:30 [PATCH V2] nvmet: move async event work off nvmet-wq Chaitanya Kulkarni
@ 2026-02-26 15:33 ` Christoph Hellwig
2026-03-10 5:44 ` Chaitanya Kulkarni
2026-03-10 14:23 ` Keith Busch
2 siblings, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2026-02-26 15:33 UTC (permalink / raw)
To: Chaitanya Kulkarni
Cc: kbusch, hch, sagi, wagi, shinichiro.kawasaki, linux-nvme
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH V2] nvmet: move async event work off nvmet-wq
2026-02-26 4:30 [PATCH V2] nvmet: move async event work off nvmet-wq Chaitanya Kulkarni
2026-02-26 15:33 ` Christoph Hellwig
@ 2026-03-10 5:44 ` Chaitanya Kulkarni
2026-03-10 14:23 ` Keith Busch
2 siblings, 0 replies; 4+ messages in thread
From: Chaitanya Kulkarni @ 2026-03-10 5:44 UTC (permalink / raw)
To: Chaitanya Kulkarni, kbusch@kernel.org, hch@lst.de,
sagi@grimberg.me
Cc: wagi@monom.org, shinichiro.kawasaki@wdc.com,
linux-nvme@lists.infradead.org
On 2/25/26 20:30, Chaitanya Kulkarni wrote:
> For target nvmet_ctrl_free() flushes ctrl->async_event_work.
> If nvmet_ctrl_free() runs on nvmet-wq, the flush re-enters workqueue
> completion for the same worker:-
>
> A. Async event work queued on nvmet-wq (prior to disconnect):
> nvmet_execute_async_event()
> queue_work(nvmet_wq, &ctrl->async_event_work)
>
> nvmet_add_async_event()
> queue_work(nvmet_wq, &ctrl->async_event_work)
>
> B. Full pre-work chain (RDMA CM path):
> nvmet_rdma_cm_handler()
> nvmet_rdma_queue_disconnect()
> __nvmet_rdma_queue_disconnect()
> queue_work(nvmet_wq, &queue->release_work)
> process_one_work()
> lock((wq_completion)nvmet-wq) <--------- 1st
> nvmet_rdma_release_queue_work()
>
> C. Recursive path (same worker):
> nvmet_rdma_release_queue_work()
> nvmet_rdma_free_queue()
> nvmet_sq_destroy()
> nvmet_ctrl_put()
> nvmet_ctrl_free()
> flush_work(&ctrl->async_event_work)
> __flush_work()
> touch_wq_lockdep_map()
> lock((wq_completion)nvmet-wq) <--------- 2nd
>
> Lockdep splat:
>
> ============================================
> WARNING: possible recursive locking detected
> 6.19.0-rc3nvme+ #14 Tainted: G N
> --------------------------------------------
> kworker/u192:42/44933 is trying to acquire lock:
> ffff888118a00948 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: touch_wq_lockdep_map+0x26/0x90
>
> but task is already holding lock:
> ffff888118a00948 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: process_one_work+0x53e/0x660
>
> 3 locks held by kworker/u192:42/44933:
> #0: ffff888118a00948 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: process_one_work+0x53e/0x660
> #1: ffffc9000e6cbe28 ((work_completion)(&queue->release_work)){+.+.}-{0:0}, at: process_one_work+0x1c5/0x660
> #2: ffffffff82d4db60 (rcu_read_lock){....}-{1:3}, at: __flush_work+0x62/0x530
>
> Workqueue: nvmet-wq nvmet_rdma_release_queue_work [nvmet_rdma]
> Call Trace:
> __flush_work+0x268/0x530
> nvmet_ctrl_free+0x140/0x310 [nvmet]
> nvmet_cq_put+0x74/0x90 [nvmet]
> nvmet_rdma_free_queue+0x23/0xe0 [nvmet_rdma]
> nvmet_rdma_release_queue_work+0x19/0x50 [nvmet_rdma]
> process_one_work+0x206/0x660
> worker_thread+0x184/0x320
> kthread+0x10c/0x240
> ret_from_fork+0x319/0x390
>
> Move async event work to a dedicated nvmet-aen-wq to avoid reentrant
> flush on nvmet-wq.
>
> Signed-off-by: Chaitanya Kulkarni<kch@nvidia.com>
> ---
can we please merge this ?
-ck
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH V2] nvmet: move async event work off nvmet-wq
2026-02-26 4:30 [PATCH V2] nvmet: move async event work off nvmet-wq Chaitanya Kulkarni
2026-02-26 15:33 ` Christoph Hellwig
2026-03-10 5:44 ` Chaitanya Kulkarni
@ 2026-03-10 14:23 ` Keith Busch
2 siblings, 0 replies; 4+ messages in thread
From: Keith Busch @ 2026-03-10 14:23 UTC (permalink / raw)
To: Chaitanya Kulkarni; +Cc: hch, sagi, wagi, shinichiro.kawasaki, linux-nvme
On Wed, Feb 25, 2026 at 08:30:03PM -0800, Chaitanya Kulkarni wrote:
> For target nvmet_ctrl_free() flushes ctrl->async_event_work.
> If nvmet_ctrl_free() runs on nvmet-wq, the flush re-enters workqueue
> completion for the same worker:-
Thanks, applied to nvem-7.0.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-03-10 14:23 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-26 4:30 [PATCH V2] nvmet: move async event work off nvmet-wq Chaitanya Kulkarni
2026-02-26 15:33 ` Christoph Hellwig
2026-03-10 5:44 ` Chaitanya Kulkarni
2026-03-10 14:23 ` Keith Busch
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox