public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Ruozhu Li <liruozhu@huawei.com>,
	Sagi Grimberg <sagi@grimberg.me>, Christoph Hellwig <hch@lst.de>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 5.4 52/71] nvme: fix regression when disconnect a recovering ctrl
Date: Tue, 19 Jul 2022 13:54:15 +0200	[thread overview]
Message-ID: <20220719114557.374203935@linuxfoundation.org> (raw)
In-Reply-To: <20220719114552.477018590@linuxfoundation.org>

From: Ruozhu Li <liruozhu@huawei.com>

[ Upstream commit f7f70f4aa09dc43d7455c060143e86a017c30548 ]

We encountered a problem that the disconnect command hangs.
After analyzing the log and stack, we found that the triggering
process is as follows:
CPU0                          CPU1
                                nvme_rdma_error_recovery_work
                                  nvme_rdma_teardown_io_queues
nvme_do_delete_ctrl                 nvme_stop_queues
  nvme_remove_namespaces
  --clear ctrl->namespaces
                                    nvme_start_queues
                                    --no ns in ctrl->namespaces
    nvme_ns_remove                  return(because ctrl is deleting)
      blk_freeze_queue
        blk_mq_freeze_queue_wait
        --wait for ns to unquiesce to clean infligt IO, hang forever

This problem was not found in older kernels because we will flush
err work in nvme_stop_ctrl before nvme_remove_namespaces.It does not
seem to be modified for functional reasons, the patch can be revert
to solve the problem.

Revert commit 794a4cb3d2f7 ("nvme: remove the .stop_ctrl callout")

Signed-off-by: Ruozhu Li <liruozhu@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/nvme/host/core.c |  2 ++
 drivers/nvme/host/nvme.h |  1 +
 drivers/nvme/host/rdma.c | 12 +++++++++---
 drivers/nvme/host/tcp.c  | 10 +++++++---
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 79e22618817d..d2ea6ca37c41 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4034,6 +4034,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 	nvme_stop_keep_alive(ctrl);
 	flush_work(&ctrl->async_event_work);
 	cancel_work_sync(&ctrl->fw_act_work);
+	if (ctrl->ops->stop_ctrl)
+		ctrl->ops->stop_ctrl(ctrl);
 }
 EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1d1431dd4f9e..81a5b968253f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -402,6 +402,7 @@ struct nvme_ctrl_ops {
 	void (*free_ctrl)(struct nvme_ctrl *ctrl);
 	void (*submit_async_event)(struct nvme_ctrl *ctrl);
 	void (*delete_ctrl)(struct nvme_ctrl *ctrl);
+	void (*stop_ctrl)(struct nvme_ctrl *ctrl);
 	int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
 };
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 4213c71b02a4..d5d7b2f98edc 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -973,6 +973,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
 	}
 }
 
+static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
+{
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+	cancel_work_sync(&ctrl->err_work);
+	cancel_delayed_work_sync(&ctrl->reconnect_work);
+}
+
 static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
@@ -1947,9 +1955,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
 
 static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 {
-	cancel_work_sync(&ctrl->err_work);
-	cancel_delayed_work_sync(&ctrl->reconnect_work);
-
 	nvme_rdma_teardown_io_queues(ctrl, shutdown);
 	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	if (shutdown)
@@ -1999,6 +2004,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
 	.submit_async_event	= nvme_rdma_submit_async_event,
 	.delete_ctrl		= nvme_rdma_delete_ctrl,
 	.get_address		= nvmf_get_address,
+	.stop_ctrl		= nvme_rdma_stop_ctrl,
 };
 
 /*
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 4378344f0e7a..2a27ac9aedba 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1973,9 +1973,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 
 static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
 {
-	cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
-	cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
-
 	nvme_tcp_teardown_io_queues(ctrl, shutdown);
 	blk_mq_quiesce_queue(ctrl->admin_q);
 	if (shutdown)
@@ -2014,6 +2011,12 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 	nvme_tcp_reconnect_or_remove(ctrl);
 }
 
+static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
+{
+	cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
+	cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
+}
+
 static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
@@ -2322,6 +2325,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
 	.submit_async_event	= nvme_tcp_submit_async_event,
 	.delete_ctrl		= nvme_tcp_delete_ctrl,
 	.get_address		= nvmf_get_address,
+	.stop_ctrl		= nvme_tcp_stop_ctrl,
 };
 
 static bool
-- 
2.35.1




  parent reply	other threads:[~2022-07-19 12:12 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-19 11:53 [PATCH 5.4 00/71] 5.4.207-rc1 review Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 01/71] ALSA: hda - Add fixup for Dell Latitidue E5430 Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 02/71] ALSA: hda/conexant: Apply quirk for another HP ProDesk 600 G3 model Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 03/71] ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc671 Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 04/71] ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc221 Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 05/71] ALSA: hda/realtek - Enable the headset-mic on a Xiaomis laptop Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 06/71] xen/netback: avoid entering xenvif_rx_next_skb() with an empty rx queue Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 07/71] tracing/histograms: Fix memory leak problem Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 08/71] net: sock: tracing: Fix sock_exceed_buf_limit not to dereference stale pointer Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 09/71] ip: fix dflt addr selection for connected nexthop Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 10/71] ARM: 9213/1: Print message about disabled Spectre workarounds only once Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 11/71] ARM: 9214/1: alignment: advance IT state after emulating Thumb instruction Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 12/71] wifi: mac80211: fix queue selection for mesh/OCB interfaces Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 13/71] cgroup: Use separate src/dst nodes when preloading css_sets for migration Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 14/71] drm/panfrost: Fix shrinker list corruption by madvise IOCTL Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 15/71] nilfs2: fix incorrect masking of permission flags for symlinks Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 16/71] Revert "evm: Fix memleak in init_desc" Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 17/71] sched/rt: Disable RT_RUNTIME_SHARE by default Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 18/71] ext4: fix race condition between ext4_write and ext4_convert_inline_data Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 19/71] ARM: dts: imx6qdl-ts7970: Fix ngpio typo and count Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 20/71] ARM: 9209/1: Spectre-BHB: avoid pr_info() every time a CPU comes out of idle Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 21/71] ARM: 9210/1: Mark the FDT_FIXED sections as shareable Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 22/71] drm/i915: fix a possible refcount leak in intel_dp_add_mst_connector() Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 23/71] ima: Fix a potential integer overflow in ima_appraise_measurement Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 24/71] ASoC: sgtl5000: Fix noise on shutdown/remove Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 25/71] net: stmmac: dwc-qos: Disable split header for Tegra194 Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 26/71] inetpeer: Fix data-races around sysctl Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 27/71] net: Fix data-races around sysctl_mem Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 28/71] cipso: Fix data-races around sysctl Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 29/71] icmp: " Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 30/71] ipv4: Fix a data-race around sysctl_fib_sync_mem Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 31/71] ARM: dts: at91: sama5d2: Fix typo in i2s1 node Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 32/71] ARM: dts: sunxi: Fix SPI NOR campatible on Orange Pi Zero Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 33/71] drm/i915/gt: Serialize TLB invalidates with GT resets Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 34/71] icmp: Fix a data-race around sysctl_icmp_ratelimit Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 35/71] icmp: Fix a data-race around sysctl_icmp_ratemask Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 36/71] raw: Fix a data-race around sysctl_raw_l3mdev_accept Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 37/71] ipv4: Fix data-races around sysctl_ip_dynaddr Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 38/71] net: ftgmac100: Hold reference returned by of_get_child_by_name() Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 39/71] sfc: fix use after free when disabling sriov Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 40/71] seg6: fix skb checksum evaluation in SRH encapsulation/insertion Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 41/71] seg6: fix skb checksum in SRv6 End.B6 and End.B6.Encaps behaviors Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 42/71] seg6: bpf: fix skb checksum in bpf_push_seg6_encap() Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 43/71] sfc: fix kernel panic when creating VF Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 44/71] mm: sysctl: fix missing numa_stat when !CONFIG_HUGETLB_PAGE Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 45/71] virtio_mmio: Add missing PM calls to freeze/restore Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 46/71] virtio_mmio: Restore guest page size on resume Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 47/71] netfilter: br_netfilter: do not skip all hooks with 0 priority Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 48/71] cpufreq: pmac32-cpufreq: Fix refcount leak bug Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 49/71] platform/x86: hp-wmi: Ignore Sanitization Mode event Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 50/71] net: tipc: fix possible refcount leak in tipc_sk_create() Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 51/71] NFC: nxp-nci: dont print header length mismatch on i2c error Greg Kroah-Hartman
2022-07-19 11:54 ` Greg Kroah-Hartman [this message]
2022-07-19 11:54 ` [PATCH 5.4 53/71] net: sfp: fix memory leak in sfp_probe() Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 54/71] ASoC: ops: Fix off by one in range control validation Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 55/71] ASoC: wm5110: Fix DRE control Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 56/71] ASoC: cs47l15: Fix event generation for low power mux control Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 57/71] ASoC: madera: Fix event generation for OUT1 demux Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 58/71] ASoC: madera: Fix event generation for rate controls Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 59/71] irqchip: or1k-pic: Undefine mask_ack for level triggered hardware Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 60/71] x86: Clear .brk area at early boot Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 61/71] soc: ixp4xx/npe: Fix unused match warning Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 62/71] ARM: dts: stm32: use the correct clock source for CEC on stm32mp151 Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 63/71] signal handling: dont use BUG_ON() for debugging Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 64/71] USB: serial: ftdi_sio: add Belimo device ids Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 65/71] usb: typec: add missing uevent when partner support PD Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 66/71] usb: dwc3: gadget: Fix event pending check Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 67/71] tty: serial: samsung_tty: set dma burst_size to 1 Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 68/71] serial: 8250: fix return error code in serial8250_request_std_resource() Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 69/71] serial: stm32: Clear prev values before setting RTS delays Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 70/71] serial: pl011: UPSTAT_AUTORTS requires .throttle/unthrottle Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 71/71] can: m_can: m_can_tx_handler(): fix use after free of skb Greg Kroah-Hartman
2022-07-19 18:11 ` [PATCH 5.4 00/71] 5.4.207-rc1 review Florian Fainelli
2022-07-20  0:59 ` Samuel Zou
2022-07-20  6:18 ` Guenter Roeck
2022-07-20  9:42 ` Naresh Kamboju
2022-07-20 14:50 ` Sudip Mukherjee (Codethink)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220719114557.374203935@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=hch@lst.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=liruozhu@huawei.com \
    --cc=sagi@grimberg.me \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox