From: Taehee Yoo <ap420073@gmail.com>
To: linux-nvme@lists.infradead.org, kbusch@kernel.org, axboe@fb.com,
hch@lst.de, sagi@grimberg.me, kch@nvidia.com
Cc: james.p.freyensee@intel.com, ming.l@ssi.samsung.com,
larrystevenwise@gmail.com, anthony.j.knapp@intel.com,
pizhenwei@bytedance.com, ap420073@gmail.com
Subject: [PATCH 2/4] nvme: fix reset uninitialized controller
Date: Tue, 3 Jan 2023 10:03:55 +0000 [thread overview]
Message-ID: <20230103100357.875854-3-ap420073@gmail.com> (raw)
In-Reply-To: <20230103100357.875854-1-ap420073@gmail.com>
nvme-fabric controllers can be reset by
/sys/class/nvme/nvme#/reset_controller
echo 1 > /sys/class/nvme/nvme#/reset_controller
The above command will call nvme_sysfs_reset().
This function internally calls ctrl->reset_work synchronously or
asynchronously.
At this point, it doesn't sure if the controller will be reset after
initialization.
So kernel panic would occur because ctrl->reset_work dereferences
uninitialized values.
In order to avoid this, nvme_sysfs_reset checks
the NVME_CTRL_STARTED_ONCE flag. This flag indicates the controller is
initialized fully. So, reset logic can be executed safely.
WARNING: CPU: 1 PID: 462 at kernel/workqueue.c:3066 __flush_work+0x74f/0x960
Modules linked in: nvme_tcp xt_conntrack xt_MASQUERADE nf_conntrack_netlink xfrm_user xt_addrtype iptable_filter iptable_nat br_netfilter bridge stp llc crct10dif_pclmul crc32_generic crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 overlay openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 mlx5_ib ib_uverbs ib_core xts cts ecb mlx5_core aesni_intel crypto_simd cryptd mlxfw ptp sch_fq_codel nf_tables nfnetlink ip_tables x_tables unix
CPU: 1 PID: 462 Comm: kworker/u16:5 Not tainted 6.1.0+ #52 1d16bdc3867491ba5cf2147d49bd76d7eacb8fd9
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
Workqueue: nvme-reset-wq nvme_reset_ctrl_work [nvme_tcp]
RIP: 0010:__flush_work+0x74f/0x960
Code: c0 74 6c e8 53 97 17 00 48 c7 c6 c8 4a 1b 84 48 c7 c7 80 70 b9 8e 45 31 f6 e8 cd 53 0f 00 e9 5d fd ff ff 0f 0b e9 56 fd ff ff <0f> 0b 45 31 f6 e9 4c fd ff ff 4c 89 ef e8 4f 81 2a 02 e8 ea 75 16
RSP: 0018:ffff888116507a50 EFLAGS: 00010246
RAX: ffff88800646b490 RBX: 0000000000000011 RCX: 1ffffffff1e59f99
RDX: dffffc0000000000 RSI: 0000000000000001 RDI: ffff88800646b490
RBP: ffff888116507be8 R08: 0000000000000000 R09: 0000000000000000
R10: ffffed1000c8d692 R11: 0000000000000001 R12: 1ffff11022ca0f50
R13: 1ffff11022ca0f80 R14: 0000000000000001 R15: ffff88800646b4a8
FS: 0000000000000000(0000) GS:ffff888117a00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055953af11fd0 CR3: 0000000106f62001 CR4: 00000000003706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
? _raw_spin_unlock_irqrestore+0x59/0x70
? queue_delayed_work_on+0xa0/0xa0
? lock_release+0x631/0xe80
? __up_read+0x192/0x730
? up_write+0x520/0x520
? rcu_read_lock_sched_held+0x12/0x80
? lock_release+0x631/0xe80
? rcu_read_lock_sched_held+0x12/0x80
? try_to_grab_pending.part.0+0x23/0x540
__cancel_work_timer+0x2cb/0x3f0
? cancel_delayed_work+0x10/0x10
? rcu_read_lock_sched_held+0x12/0x80
? lock_acquire+0x4f4/0x630
? lockdep_hardirqs_on_prepare+0x410/0x410
? lock_downgrade+0x700/0x700
? finish_task_switch.isra.0+0x23b/0x870
? trace_hardirqs_on+0x3c/0x190
nvme_stop_ctrl+0x17/0x150
nvme_reset_ctrl_work+0x19/0x120 [nvme_tcp aa1d0deebfd175637ed368a54a16dfbb09be290f]
[ ... ]
Fixes: f3ca80fc11c3 ("nvme: move chardev and sysfs interface to common code")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
---
drivers/nvme/host/core.c | 28 +++++++++++++++++++---------
1 file changed, 19 insertions(+), 9 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cd4c80ca66d4..418bd865c838 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -134,6 +134,14 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
queue_work(nvme_wq, &ctrl->scan_work);
}
+void nvme_queue_scan_sync(struct nvme_ctrl *ctrl)
+{
+ if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset) {
+ queue_work(nvme_wq, &ctrl->scan_work);
+ flush_work(&ctrl->scan_work);
+ }
+}
+
/*
* Use this function to proceed with scheduling reset_work for a controller
* that had previously been set to the resetting state. This is intended for
@@ -1150,10 +1158,8 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
dev_info(ctrl->device,
"controller capabilities changed, reset may be required to take effect.\n");
}
- if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
- nvme_queue_scan(ctrl);
- flush_work(&ctrl->scan_work);
- }
+ if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
+ nvme_queue_scan_sync(ctrl);
switch (cmd->common.opcode) {
case nvme_admin_set_features:
@@ -3350,9 +3356,11 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
int ret;
- ret = nvme_reset_ctrl_sync(ctrl);
- if (ret < 0)
- return ret;
+ if (test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags)) {
+ ret = nvme_reset_ctrl_sync(ctrl);
+ if (ret < 0)
+ return ret;
+ }
return count;
}
static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
@@ -4994,16 +5002,18 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
* that were missed. We identify persistent discovery controllers by
* checking that they started once before, hence are reconnecting back.
*/
- if (test_and_set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) &&
+ if (test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) &&
nvme_discovery_ctrl(ctrl))
nvme_change_uevent(ctrl, "NVME_EVENT=rediscover");
if (ctrl->queue_count > 1) {
- nvme_queue_scan(ctrl);
+ nvme_queue_scan_sync(ctrl);
nvme_unquiesce_io_queues(ctrl);
nvme_mpath_update(ctrl);
}
+ set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags);
+
nvme_change_uevent(ctrl, "NVME_EVENT=connected");
}
EXPORT_SYMBOL_GPL(nvme_start_ctrl);
--
2.34.1
next prev parent reply other threads:[~2023-01-03 10:15 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-03 10:03 [PATCH 0/4] nvme: fix several bugs in nvme-fabric Taehee Yoo
2023-01-03 10:03 ` [PATCH 1/4] nvme: fix delete uninitialized controller Taehee Yoo
2023-01-03 10:30 ` Sagi Grimberg
2023-01-04 0:24 ` Chaitanya Kulkarni
2023-01-04 2:42 ` Taehee Yoo
2023-01-03 10:03 ` Taehee Yoo [this message]
2023-01-03 10:32 ` [PATCH 2/4] nvme: fix reset " Sagi Grimberg
2023-01-03 10:03 ` [PATCH 3/4] nvmet: fix hang in nvmet_ns_disable() Taehee Yoo
2023-01-03 10:58 ` Sagi Grimberg
2023-01-04 0:32 ` Chaitanya Kulkarni
2023-01-04 8:56 ` Taehee Yoo
2023-01-03 10:03 ` [PATCH 4/4] nvmet-tcp: fix memory leak in nvmet_tcp_free_cmd_data_in_buffers() Taehee Yoo
2023-01-03 10:54 ` Sagi Grimberg
2023-01-04 8:44 ` Taehee Yoo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230103100357.875854-3-ap420073@gmail.com \
--to=ap420073@gmail.com \
--cc=anthony.j.knapp@intel.com \
--cc=axboe@fb.com \
--cc=hch@lst.de \
--cc=james.p.freyensee@intel.com \
--cc=kbusch@kernel.org \
--cc=kch@nvidia.com \
--cc=larrystevenwise@gmail.com \
--cc=linux-nvme@lists.infradead.org \
--cc=ming.l@ssi.samsung.com \
--cc=pizhenwei@bytedance.com \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.