All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Ruozhu Li <liruozhu@huawei.com>,
	Sagi Grimberg <sagi@grimberg.me>, Christoph Hellwig <hch@lst.de>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 5.4 52/71] nvme: fix regression when disconnect a recovering ctrl
Date: Tue, 19 Jul 2022 13:54:15 +0200	[thread overview]
Message-ID: <20220719114557.374203935@linuxfoundation.org> (raw)
In-Reply-To: <20220719114552.477018590@linuxfoundation.org>

From: Ruozhu Li <liruozhu@huawei.com>

[ Upstream commit f7f70f4aa09dc43d7455c060143e86a017c30548 ]

We encountered a problem that the disconnect command hangs.
After analyzing the log and stack, we found that the triggering
process is as follows:
CPU0                          CPU1
                                nvme_rdma_error_recovery_work
                                  nvme_rdma_teardown_io_queues
nvme_do_delete_ctrl                 nvme_stop_queues
  nvme_remove_namespaces
  --clear ctrl->namespaces
                                    nvme_start_queues
                                    --no ns in ctrl->namespaces
    nvme_ns_remove                  return(because ctrl is deleting)
      blk_freeze_queue
        blk_mq_freeze_queue_wait
        --wait for ns to unquiesce to clean infligt IO, hang forever

This problem was not found in older kernels because we will flush
err work in nvme_stop_ctrl before nvme_remove_namespaces.It does not
seem to be modified for functional reasons, the patch can be revert
to solve the problem.

Revert commit 794a4cb3d2f7 ("nvme: remove the .stop_ctrl callout")

Signed-off-by: Ruozhu Li <liruozhu@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/nvme/host/core.c |  2 ++
 drivers/nvme/host/nvme.h |  1 +
 drivers/nvme/host/rdma.c | 12 +++++++++---
 drivers/nvme/host/tcp.c  | 10 +++++++---
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 79e22618817d..d2ea6ca37c41 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4034,6 +4034,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 	nvme_stop_keep_alive(ctrl);
 	flush_work(&ctrl->async_event_work);
 	cancel_work_sync(&ctrl->fw_act_work);
+	if (ctrl->ops->stop_ctrl)
+		ctrl->ops->stop_ctrl(ctrl);
 }
 EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1d1431dd4f9e..81a5b968253f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -402,6 +402,7 @@ struct nvme_ctrl_ops {
 	void (*free_ctrl)(struct nvme_ctrl *ctrl);
 	void (*submit_async_event)(struct nvme_ctrl *ctrl);
 	void (*delete_ctrl)(struct nvme_ctrl *ctrl);
+	void (*stop_ctrl)(struct nvme_ctrl *ctrl);
 	int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
 };
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 4213c71b02a4..d5d7b2f98edc 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -973,6 +973,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
 	}
 }
 
+static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
+{
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+	cancel_work_sync(&ctrl->err_work);
+	cancel_delayed_work_sync(&ctrl->reconnect_work);
+}
+
 static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
@@ -1947,9 +1955,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
 
 static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 {
-	cancel_work_sync(&ctrl->err_work);
-	cancel_delayed_work_sync(&ctrl->reconnect_work);
-
 	nvme_rdma_teardown_io_queues(ctrl, shutdown);
 	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	if (shutdown)
@@ -1999,6 +2004,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
 	.submit_async_event	= nvme_rdma_submit_async_event,
 	.delete_ctrl		= nvme_rdma_delete_ctrl,
 	.get_address		= nvmf_get_address,
+	.stop_ctrl		= nvme_rdma_stop_ctrl,
 };
 
 /*
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 4378344f0e7a..2a27ac9aedba 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1973,9 +1973,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 
 static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
 {
-	cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
-	cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
-
 	nvme_tcp_teardown_io_queues(ctrl, shutdown);
 	blk_mq_quiesce_queue(ctrl->admin_q);
 	if (shutdown)
@@ -2014,6 +2011,12 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 	nvme_tcp_reconnect_or_remove(ctrl);
 }
 
+static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
+{
+	cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
+	cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
+}
+
 static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
@@ -2322,6 +2325,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
 	.submit_async_event	= nvme_tcp_submit_async_event,
 	.delete_ctrl		= nvme_tcp_delete_ctrl,
 	.get_address		= nvmf_get_address,
+	.stop_ctrl		= nvme_tcp_stop_ctrl,
 };
 
 static bool
-- 
2.35.1




  parent reply	other threads:[~2022-07-19 12:12 UTC|newest]

Thread overview: 78+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-19 11:53 [PATCH 5.4 00/71] 5.4.207-rc1 review Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 01/71] ALSA: hda - Add fixup for Dell Latitidue E5430 Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 02/71] ALSA: hda/conexant: Apply quirk for another HP ProDesk 600 G3 model Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 03/71] ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc671 Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 04/71] ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc221 Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 05/71] ALSA: hda/realtek - Enable the headset-mic on a Xiaomis laptop Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 06/71] xen/netback: avoid entering xenvif_rx_next_skb() with an empty rx queue Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 07/71] tracing/histograms: Fix memory leak problem Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 08/71] net: sock: tracing: Fix sock_exceed_buf_limit not to dereference stale pointer Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 09/71] ip: fix dflt addr selection for connected nexthop Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 10/71] ARM: 9213/1: Print message about disabled Spectre workarounds only once Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 11/71] ARM: 9214/1: alignment: advance IT state after emulating Thumb instruction Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 12/71] wifi: mac80211: fix queue selection for mesh/OCB interfaces Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 13/71] cgroup: Use separate src/dst nodes when preloading css_sets for migration Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 14/71] drm/panfrost: Fix shrinker list corruption by madvise IOCTL Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 15/71] nilfs2: fix incorrect masking of permission flags for symlinks Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 16/71] Revert "evm: Fix memleak in init_desc" Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 17/71] sched/rt: Disable RT_RUNTIME_SHARE by default Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 18/71] ext4: fix race condition between ext4_write and ext4_convert_inline_data Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 19/71] ARM: dts: imx6qdl-ts7970: Fix ngpio typo and count Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 20/71] ARM: 9209/1: Spectre-BHB: avoid pr_info() every time a CPU comes out of idle Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 21/71] ARM: 9210/1: Mark the FDT_FIXED sections as shareable Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 22/71] drm/i915: fix a possible refcount leak in intel_dp_add_mst_connector() Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 23/71] ima: Fix a potential integer overflow in ima_appraise_measurement Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 24/71] ASoC: sgtl5000: Fix noise on shutdown/remove Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 25/71] net: stmmac: dwc-qos: Disable split header for Tegra194 Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 26/71] inetpeer: Fix data-races around sysctl Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 27/71] net: Fix data-races around sysctl_mem Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 28/71] cipso: Fix data-races around sysctl Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 29/71] icmp: " Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 30/71] ipv4: Fix a data-race around sysctl_fib_sync_mem Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 31/71] ARM: dts: at91: sama5d2: Fix typo in i2s1 node Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 32/71] ARM: dts: sunxi: Fix SPI NOR campatible on Orange Pi Zero Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 33/71] drm/i915/gt: Serialize TLB invalidates with GT resets Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 34/71] icmp: Fix a data-race around sysctl_icmp_ratelimit Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 35/71] icmp: Fix a data-race around sysctl_icmp_ratemask Greg Kroah-Hartman
2022-07-19 11:53 ` [PATCH 5.4 36/71] raw: Fix a data-race around sysctl_raw_l3mdev_accept Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 37/71] ipv4: Fix data-races around sysctl_ip_dynaddr Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 38/71] net: ftgmac100: Hold reference returned by of_get_child_by_name() Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 39/71] sfc: fix use after free when disabling sriov Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 40/71] seg6: fix skb checksum evaluation in SRH encapsulation/insertion Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 41/71] seg6: fix skb checksum in SRv6 End.B6 and End.B6.Encaps behaviors Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 42/71] seg6: bpf: fix skb checksum in bpf_push_seg6_encap() Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 43/71] sfc: fix kernel panic when creating VF Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 44/71] mm: sysctl: fix missing numa_stat when !CONFIG_HUGETLB_PAGE Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 45/71] virtio_mmio: Add missing PM calls to freeze/restore Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 46/71] virtio_mmio: Restore guest page size on resume Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 47/71] netfilter: br_netfilter: do not skip all hooks with 0 priority Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 48/71] cpufreq: pmac32-cpufreq: Fix refcount leak bug Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 49/71] platform/x86: hp-wmi: Ignore Sanitization Mode event Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 50/71] net: tipc: fix possible refcount leak in tipc_sk_create() Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 51/71] NFC: nxp-nci: dont print header length mismatch on i2c error Greg Kroah-Hartman
2022-07-19 11:54 ` Greg Kroah-Hartman [this message]
2022-07-19 11:54 ` [PATCH 5.4 53/71] net: sfp: fix memory leak in sfp_probe() Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 54/71] ASoC: ops: Fix off by one in range control validation Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 55/71] ASoC: wm5110: Fix DRE control Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 56/71] ASoC: cs47l15: Fix event generation for low power mux control Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 57/71] ASoC: madera: Fix event generation for OUT1 demux Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 58/71] ASoC: madera: Fix event generation for rate controls Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 59/71] irqchip: or1k-pic: Undefine mask_ack for level triggered hardware Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 60/71] x86: Clear .brk area at early boot Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 61/71] soc: ixp4xx/npe: Fix unused match warning Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 62/71] ARM: dts: stm32: use the correct clock source for CEC on stm32mp151 Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 63/71] signal handling: dont use BUG_ON() for debugging Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 64/71] USB: serial: ftdi_sio: add Belimo device ids Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 65/71] usb: typec: add missing uevent when partner support PD Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 66/71] usb: dwc3: gadget: Fix event pending check Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 67/71] tty: serial: samsung_tty: set dma burst_size to 1 Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 68/71] serial: 8250: fix return error code in serial8250_request_std_resource() Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 69/71] serial: stm32: Clear prev values before setting RTS delays Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 70/71] serial: pl011: UPSTAT_AUTORTS requires .throttle/unthrottle Greg Kroah-Hartman
2022-07-19 11:54 ` [PATCH 5.4 71/71] can: m_can: m_can_tx_handler(): fix use after free of skb Greg Kroah-Hartman
2022-07-19 18:11 ` [PATCH 5.4 00/71] 5.4.207-rc1 review Florian Fainelli
2022-07-20  0:59 ` Samuel Zou
2022-07-20  6:18 ` Guenter Roeck
2022-07-20  8:40 ` Jon Hunter
2022-07-20  9:42 ` Naresh Kamboju
2022-07-20 14:50 ` Sudip Mukherjee (Codethink)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220719114557.374203935@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=hch@lst.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=liruozhu@huawei.com \
    --cc=sagi@grimberg.me \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.