public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Ruozhu Li <liruozhu@huawei.com>, Sagi Grimberg <sagi@grimberg.me>,
	Christoph Hellwig <hch@lst.de>, Sasha Levin <sashal@kernel.org>,
	kbusch@kernel.org, axboe@fb.com, linux-nvme@lists.infradead.org
Subject: [PATCH AUTOSEL 5.15 17/18] nvme: fix regression when disconnect a recovering ctrl
Date: Wed,  6 Jul 2022 11:31:52 -0400	[thread overview]
Message-ID: <20220706153153.1598076-17-sashal@kernel.org> (raw)
In-Reply-To: <20220706153153.1598076-1-sashal@kernel.org>

From: Ruozhu Li <liruozhu@huawei.com>

[ Upstream commit f7f70f4aa09dc43d7455c060143e86a017c30548 ]

We encountered a problem that the disconnect command hangs.
After analyzing the log and stack, we found that the triggering
process is as follows:
CPU0                          CPU1
                                nvme_rdma_error_recovery_work
                                  nvme_rdma_teardown_io_queues
nvme_do_delete_ctrl                 nvme_stop_queues
  nvme_remove_namespaces
  --clear ctrl->namespaces
                                    nvme_start_queues
                                    --no ns in ctrl->namespaces
    nvme_ns_remove                  return(because ctrl is deleting)
      blk_freeze_queue
        blk_mq_freeze_queue_wait
        --wait for ns to unquiesce to clean infligt IO, hang forever

This problem was not found in older kernels because we will flush
err work in nvme_stop_ctrl before nvme_remove_namespaces.It does not
seem to be modified for functional reasons, the patch can be revert
to solve the problem.

Revert commit 794a4cb3d2f7 ("nvme: remove the .stop_ctrl callout")

Signed-off-by: Ruozhu Li <liruozhu@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/nvme/host/core.c |  2 ++
 drivers/nvme/host/nvme.h |  1 +
 drivers/nvme/host/rdma.c | 12 +++++++++---
 drivers/nvme/host/tcp.c  | 10 +++++++---
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 19054b791c67..29b56ea01132 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4385,6 +4385,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 	nvme_stop_failfast_work(ctrl);
 	flush_work(&ctrl->async_event_work);
 	cancel_work_sync(&ctrl->fw_act_work);
+	if (ctrl->ops->stop_ctrl)
+		ctrl->ops->stop_ctrl(ctrl);
 }
 EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 72bcd7e5716e..75a7e7baa1fc 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -495,6 +495,7 @@ struct nvme_ctrl_ops {
 	void (*free_ctrl)(struct nvme_ctrl *ctrl);
 	void (*submit_async_event)(struct nvme_ctrl *ctrl);
 	void (*delete_ctrl)(struct nvme_ctrl *ctrl);
+	void (*stop_ctrl)(struct nvme_ctrl *ctrl);
 	int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
 };
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d51f52e296f5..2db9c166a1b7 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1049,6 +1049,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
 	}
 }
 
+static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
+{
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+	cancel_work_sync(&ctrl->err_work);
+	cancel_delayed_work_sync(&ctrl->reconnect_work);
+}
+
 static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
@@ -2230,9 +2238,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
 
 static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 {
-	cancel_work_sync(&ctrl->err_work);
-	cancel_delayed_work_sync(&ctrl->reconnect_work);
-
 	nvme_rdma_teardown_io_queues(ctrl, shutdown);
 	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	if (shutdown)
@@ -2282,6 +2287,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
 	.submit_async_event	= nvme_rdma_submit_async_event,
 	.delete_ctrl		= nvme_rdma_delete_ctrl,
 	.get_address		= nvmf_get_address,
+	.stop_ctrl		= nvme_rdma_stop_ctrl,
 };
 
 /*
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 1821d38e620e..20138e132558 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2163,9 +2163,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 
 static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
 {
-	cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
-	cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
-
 	nvme_tcp_teardown_io_queues(ctrl, shutdown);
 	blk_mq_quiesce_queue(ctrl->admin_q);
 	if (shutdown)
@@ -2205,6 +2202,12 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 	nvme_tcp_reconnect_or_remove(ctrl);
 }
 
+static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
+{
+	cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
+	cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
+}
+
 static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
@@ -2528,6 +2531,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
 	.submit_async_event	= nvme_tcp_submit_async_event,
 	.delete_ctrl		= nvme_tcp_delete_ctrl,
 	.get_address		= nvmf_get_address,
+	.stop_ctrl		= nvme_tcp_stop_ctrl,
 };
 
 static bool
-- 
2.35.1


  parent reply	other threads:[~2022-07-06 15:34 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-06 15:31 [PATCH AUTOSEL 5.15 01/18] ksmbd: use SOCK_NONBLOCK type for kernel_accept() Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 02/18] powerpc/xive/spapr: correct bitmap allocation size Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 03/18] vdpa/mlx5: Initialize CVQ vringh only once Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 04/18] vduse: Tie vduse mgmtdev and its device Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 05/18] virtio_mmio: Add missing PM calls to freeze/restore Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 06/18] virtio_mmio: Restore guest page size on resume Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 07/18] netfilter: br_netfilter: do not skip all hooks with 0 priority Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 08/18] scsi: hisi_sas: Limit max hw sectors for v3 HW Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 09/18] cpufreq: pmac32-cpufreq: Fix refcount leak bug Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 10/18] platform/x86: hp-wmi: Ignore Sanitization Mode event Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 11/18] firmware: sysfb: Make sysfb_create_simplefb() return a pdev pointer Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 12/18] firmware: sysfb: Add sysfb_disable() helper function Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 13/18] fbdev: Disable sysfb device registration when removing conflicting FBs Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 14/18] net: tipc: fix possible refcount leak in tipc_sk_create() Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 15/18] NFC: nxp-nci: don't print header length mismatch on i2c error Sasha Levin
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 16/18] nvme-tcp: always fail a request when sending it failed Sasha Levin
2022-07-06 15:31 ` Sasha Levin [this message]
2022-07-06 15:31 ` [PATCH AUTOSEL 5.15 18/18] net: sfp: fix memory leak in sfp_probe() Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220706153153.1598076-17-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=axboe@fb.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=liruozhu@huawei.com \
    --cc=sagi@grimberg.me \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox