From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Ruozhu Li <liruozhu@huawei.com>, Sagi Grimberg <sagi@grimberg.me>,
Christoph Hellwig <hch@lst.de>, Sasha Levin <sashal@kernel.org>,
kbusch@kernel.org, axboe@fb.com, linux-nvme@lists.infradead.org
Subject: [PATCH AUTOSEL 5.15 17/18] nvme: fix regression when disconnect a recovering ctrl
Date: Wed, 6 Jul 2022 11:31:52 -0400 [thread overview]
Message-ID: <20220706153153.1598076-17-sashal@kernel.org> (raw)
In-Reply-To: <20220706153153.1598076-1-sashal@kernel.org>
From: Ruozhu Li <liruozhu@huawei.com>
[ Upstream commit f7f70f4aa09dc43d7455c060143e86a017c30548 ]
We encountered a problem that the disconnect command hangs.
After analyzing the log and stack, we found that the triggering
process is as follows:
CPU0 CPU1
nvme_rdma_error_recovery_work
nvme_rdma_teardown_io_queues
nvme_do_delete_ctrl nvme_stop_queues
nvme_remove_namespaces
--clear ctrl->namespaces
nvme_start_queues
--no ns in ctrl->namespaces
nvme_ns_remove return(because ctrl is deleting)
blk_freeze_queue
blk_mq_freeze_queue_wait
--wait for ns to unquiesce to clean infligt IO, hang forever
This problem was not found in older kernels because we will flush
err work in nvme_stop_ctrl before nvme_remove_namespaces.It does not
seem to be modified for functional reasons, the patch can be revert
to solve the problem.
Revert commit 794a4cb3d2f7 ("nvme: remove the .stop_ctrl callout")
Signed-off-by: Ruozhu Li <liruozhu@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/nvme/host/core.c | 2 ++
drivers/nvme/host/nvme.h | 1 +
drivers/nvme/host/rdma.c | 12 +++++++++---
drivers/nvme/host/tcp.c | 10 +++++++---
4 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 19054b791c67..29b56ea01132 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4385,6 +4385,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
nvme_stop_failfast_work(ctrl);
flush_work(&ctrl->async_event_work);
cancel_work_sync(&ctrl->fw_act_work);
+ if (ctrl->ops->stop_ctrl)
+ ctrl->ops->stop_ctrl(ctrl);
}
EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 72bcd7e5716e..75a7e7baa1fc 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -495,6 +495,7 @@ struct nvme_ctrl_ops {
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
+ void (*stop_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
};
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d51f52e296f5..2db9c166a1b7 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1049,6 +1049,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
}
}
+static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
+{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+ cancel_work_sync(&ctrl->err_work);
+ cancel_delayed_work_sync(&ctrl->reconnect_work);
+}
+
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
@@ -2230,9 +2238,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{
- cancel_work_sync(&ctrl->err_work);
- cancel_delayed_work_sync(&ctrl->reconnect_work);
-
nvme_rdma_teardown_io_queues(ctrl, shutdown);
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
if (shutdown)
@@ -2282,6 +2287,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_delete_ctrl,
.get_address = nvmf_get_address,
+ .stop_ctrl = nvme_rdma_stop_ctrl,
};
/*
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 1821d38e620e..20138e132558 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2163,9 +2163,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
{
- cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
- cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
-
nvme_tcp_teardown_io_queues(ctrl, shutdown);
blk_mq_quiesce_queue(ctrl->admin_q);
if (shutdown)
@@ -2205,6 +2202,12 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
nvme_tcp_reconnect_or_remove(ctrl);
}
+static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
+{
+ cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
+ cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
+}
+
static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
@@ -2528,6 +2531,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
.submit_async_event = nvme_tcp_submit_async_event,
.delete_ctrl = nvme_tcp_delete_ctrl,
.get_address = nvmf_get_address,
+ .stop_ctrl = nvme_tcp_stop_ctrl,
};
static bool
--
2.35.1
next prev parent reply other threads:[~2022-07-06 15:33 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-06 15:31 [PATCH AUTOSEL 5.15 01/18] ksmbd: use SOCK_NONBLOCK type for kernel_accept() Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 02/18] powerpc/xive/spapr: correct bitmap allocation size Sasha Levin
2022-07-06 15:31 ` Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 03/18] vdpa/mlx5: Initialize CVQ vringh only once Sasha Levin
2022-07-06 15:31 ` Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 04/18] vduse: Tie vduse mgmtdev and its device Sasha Levin
2022-07-06 15:31 ` Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 05/18] virtio_mmio: Add missing PM calls to freeze/restore Sasha Levin
2022-07-06 15:31 ` Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 06/18] virtio_mmio: Restore guest page size on resume Sasha Levin
2022-07-06 15:31 ` Sasha Levin
2022-07-06 15:31 ` [Bridge] [PATCH AUTOSEL 5.15 07/18] netfilter: br_netfilter: do not skip all hooks with 0 priority Sasha Levin
2022-07-06 15:31 ` Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 08/18] scsi: hisi_sas: Limit max hw sectors for v3 HW Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 09/18] cpufreq: pmac32-cpufreq: Fix refcount leak bug Sasha Levin
2022-07-06 15:31 ` Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 10/18] platform/x86: hp-wmi: Ignore Sanitization Mode event Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 11/18] firmware: sysfb: Make sysfb_create_simplefb() return a pdev pointer Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 12/18] firmware: sysfb: Add sysfb_disable() helper function Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 13/18] fbdev: Disable sysfb device registration when removing conflicting FBs Sasha Levin
2022-07-06 15:31 ` Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 14/18] net: tipc: fix possible refcount leak in tipc_sk_create() Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 15/18] NFC: nxp-nci: don't print header length mismatch on i2c error Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 16/18] nvme-tcp: always fail a request when sending it failed Sasha Levin
2022-07-06 15:31 ` Sasha Levin [this message]
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 18/18] net: sfp: fix memory leak in sfp_probe() Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220706153153.1598076-17-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=axboe@fb.com \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=liruozhu@huawei.com \
--cc=sagi@grimberg.me \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.