From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Ruozhu Li <liruozhu@huawei.com>, Sagi Grimberg <sagi@grimberg.me>,
Christoph Hellwig <hch@lst.de>, Sasha Levin <sashal@kernel.org>,
kbusch@kernel.org, axboe@fb.com, linux-nvme@lists.infradead.org
Subject: [PATCH AUTOSEL 5.4 8/9] nvme: fix regression when disconnect a recovering ctrl
Date: Wed, 6 Jul 2022 11:33:14 -0400 [thread overview]
Message-ID: <20220706153316.1598554-8-sashal@kernel.org> (raw)
In-Reply-To: <20220706153316.1598554-1-sashal@kernel.org>
From: Ruozhu Li <liruozhu@huawei.com>
[ Upstream commit f7f70f4aa09dc43d7455c060143e86a017c30548 ]
We encountered a problem that the disconnect command hangs.
After analyzing the log and stack, we found that the triggering
process is as follows:
CPU0 CPU1
nvme_rdma_error_recovery_work
nvme_rdma_teardown_io_queues
nvme_do_delete_ctrl nvme_stop_queues
nvme_remove_namespaces
--clear ctrl->namespaces
nvme_start_queues
--no ns in ctrl->namespaces
nvme_ns_remove return(because ctrl is deleting)
blk_freeze_queue
blk_mq_freeze_queue_wait
--wait for ns to unquiesce to clean infligt IO, hang forever
This problem was not found in older kernels because we will flush
err work in nvme_stop_ctrl before nvme_remove_namespaces.It does not
seem to be modified for functional reasons, the patch can be revert
to solve the problem.
Revert commit 794a4cb3d2f7 ("nvme: remove the .stop_ctrl callout")
Signed-off-by: Ruozhu Li <liruozhu@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/nvme/host/core.c | 2 ++
drivers/nvme/host/nvme.h | 1 +
drivers/nvme/host/rdma.c | 12 +++++++++---
drivers/nvme/host/tcp.c | 10 +++++++---
4 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 79e22618817d..d2ea6ca37c41 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4034,6 +4034,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
cancel_work_sync(&ctrl->fw_act_work);
+ if (ctrl->ops->stop_ctrl)
+ ctrl->ops->stop_ctrl(ctrl);
}
EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1d1431dd4f9e..81a5b968253f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -402,6 +402,7 @@ struct nvme_ctrl_ops {
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
+ void (*stop_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
};
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 4213c71b02a4..d5d7b2f98edc 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -973,6 +973,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
}
}
+static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
+{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+ cancel_work_sync(&ctrl->err_work);
+ cancel_delayed_work_sync(&ctrl->reconnect_work);
+}
+
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
@@ -1947,9 +1955,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{
- cancel_work_sync(&ctrl->err_work);
- cancel_delayed_work_sync(&ctrl->reconnect_work);
-
nvme_rdma_teardown_io_queues(ctrl, shutdown);
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
if (shutdown)
@@ -1999,6 +2004,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_delete_ctrl,
.get_address = nvmf_get_address,
+ .stop_ctrl = nvme_rdma_stop_ctrl,
};
/*
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 4378344f0e7a..2a27ac9aedba 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1973,9 +1973,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
{
- cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
- cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
-
nvme_tcp_teardown_io_queues(ctrl, shutdown);
blk_mq_quiesce_queue(ctrl->admin_q);
if (shutdown)
@@ -2014,6 +2011,12 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
nvme_tcp_reconnect_or_remove(ctrl);
}
+static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
+{
+ cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
+ cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
+}
+
static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
@@ -2322,6 +2325,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
.submit_async_event = nvme_tcp_submit_async_event,
.delete_ctrl = nvme_tcp_delete_ctrl,
.get_address = nvmf_get_address,
+ .stop_ctrl = nvme_tcp_stop_ctrl,
};
static bool
--
2.35.1
next prev parent reply other threads:[~2022-07-06 15:36 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-06 15:33 [PATCH AUTOSEL 5.4 1/9] virtio_mmio: Add missing PM calls to freeze/restore Sasha Levin
2022-07-06 15:33 ` [PATCH AUTOSEL 5.4 2/9] virtio_mmio: Restore guest page size on resume Sasha Levin
2022-07-06 15:33 ` [PATCH AUTOSEL 5.4 3/9] netfilter: br_netfilter: do not skip all hooks with 0 priority Sasha Levin
2022-07-06 15:33 ` [PATCH AUTOSEL 5.4 4/9] cpufreq: pmac32-cpufreq: Fix refcount leak bug Sasha Levin
2022-07-06 15:33 ` [PATCH AUTOSEL 5.4 5/9] platform/x86: hp-wmi: Ignore Sanitization Mode event Sasha Levin
2022-07-06 15:33 ` [PATCH AUTOSEL 5.4 6/9] net: tipc: fix possible refcount leak in tipc_sk_create() Sasha Levin
2022-07-06 15:33 ` [PATCH AUTOSEL 5.4 7/9] NFC: nxp-nci: don't print header length mismatch on i2c error Sasha Levin
2022-07-06 15:33 ` Sasha Levin [this message]
2022-07-06 15:33 ` [PATCH AUTOSEL 5.4 9/9] net: sfp: fix memory leak in sfp_probe() Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220706153316.1598554-8-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=axboe@fb.com \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=liruozhu@huawei.com \
--cc=sagi@grimberg.me \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox