From: Mohamed Khalfella <mkhalfella@purestorage.com>
To: Justin Tee <justin.tee@broadcom.com>,
Naresh Gottumukkala <nareshgottumukkala83@gmail.com>,
Paul Ely <paul.ely@broadcom.com>,
Chaitanya Kulkarni <kch@nvidia.com>, Jens Axboe <axboe@kernel.dk>,
Keith Busch <kbusch@kernel.org>, Sagi Grimberg <sagi@grimberg.me>,
James Smart <jsmart833426@gmail.com>,
Hannes Reinecke <hare@suse.de>
Cc: Aaron Dailey <adailey@purestorage.com>,
Randy Jennings <randyj@purestorage.com>,
Dhaval Giani <dgiani@purestorage.com>,
linux-nvme@lists.infradead.org, linux-kernel@vger.kernel.org,
Mohamed Khalfella <mkhalfella@purestorage.com>
Subject: [PATCH v4 14/15] nvme-fc: Hold inflight requests while in FENCING state
Date: Fri, 27 Mar 2026 17:43:45 -0700 [thread overview]
Message-ID: <20260328004518.1729186-15-mkhalfella@purestorage.com> (raw)
In-Reply-To: <20260328004518.1729186-1-mkhalfella@purestorage.com>
While in FENCING state, aborted inflight IOs should be held until fencing
is done. Update nvme_fc_fcpio_done() to not complete aborted requests or
requests with transport errors. These held requests will be canceled in
nvme_fc_delete_association() after fencing is done. nvme_fc_fcpio_done()
avoids racing with canceling aborted requests by making sure we complete
successful requests before waking up the waiting thread.
Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
Signed-off-by: James Smart <jsmart833426@gmail.com>
---
drivers/nvme/host/fc.c | 61 +++++++++++++++++++++++++++++++++++-------
1 file changed, 51 insertions(+), 10 deletions(-)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 9f6b95415f25..eea5a90d936b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -172,7 +172,7 @@ struct nvme_fc_ctrl {
struct kref ref;
unsigned long flags;
- u32 iocnt;
+ atomic_t iocnt;
wait_queue_head_t ioabort_wait;
struct nvme_fc_fcp_op aen_ops[NVME_NR_AEN_COMMANDS];
@@ -1823,7 +1823,7 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
atomic_set(&op->state, opstate);
else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
op->flags |= FCOP_FLAGS_TERMIO;
- ctrl->iocnt++;
+ atomic_inc(&ctrl->iocnt);
}
spin_unlock_irqrestore(&ctrl->lock, flags);
@@ -1853,20 +1853,29 @@ nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
}
static inline void
+__nvme_fc_fcpop_count_one_down(struct nvme_fc_ctrl *ctrl)
+{
+ if (atomic_dec_return(&ctrl->iocnt) == 0)
+ wake_up(&ctrl->ioabort_wait);
+}
+
+static inline bool
__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
struct nvme_fc_fcp_op *op, int opstate)
{
unsigned long flags;
+ bool ret = false;
if (opstate == FCPOP_STATE_ABORTED) {
spin_lock_irqsave(&ctrl->lock, flags);
if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
op->flags & FCOP_FLAGS_TERMIO) {
- if (!--ctrl->iocnt)
- wake_up(&ctrl->ioabort_wait);
+ ret = true;
}
spin_unlock_irqrestore(&ctrl->lock, flags);
}
+
+ return ret;
}
static void nvme_fc_fencing_work(struct work_struct *work)
@@ -1966,7 +1975,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
struct nvme_command *sqe = &op->cmd_iu.sqe;
__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
union nvme_result result;
- bool terminate_assoc = true;
+ bool op_term, terminate_assoc = true;
+ enum nvme_ctrl_state state;
int opstate;
/*
@@ -2099,16 +2109,38 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
done:
if (op->flags & FCOP_FLAGS_AEN) {
nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
- __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
+ if (__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate))
+ __nvme_fc_fcpop_count_one_down(ctrl);
atomic_set(&op->state, FCPOP_STATE_IDLE);
op->flags = FCOP_FLAGS_AEN; /* clear other flags */
nvme_fc_ctrl_put(ctrl);
goto check_error;
}
- __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
+ /*
+ * We can not access op after the request is completed because it can
+ * be reused immediately. At the same time we want to wakeup the thread
+ * waiting for ongoing IOs _after_ requests are completed. This is
+ * necessary because that thread will start canceling inflight IOs
+ * and we want to avoid request completion racing with cancellation.
+ */
+ op_term = __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
+
+ /*
+ * If we are going to terminate associations and the controller is
+ * LIVE or FENCING, then do not complete this request now. Let error
+ * recovery cancel this request when it is safe to do so.
+ */
+ state = nvme_ctrl_state(&ctrl->ctrl);
+ if (terminate_assoc &&
+ (state == NVME_CTRL_LIVE || state == NVME_CTRL_FENCING))
+ goto check_op_term;
+
if (!nvme_try_complete_req(rq, status, result))
nvme_fc_complete_rq(rq);
+check_op_term:
+ if (op_term)
+ __nvme_fc_fcpop_count_one_down(ctrl);
check_error:
if (terminate_assoc)
@@ -2747,7 +2779,8 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
* cmd with the csn was supposed to arrive.
*/
opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
- __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
+ if (__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate))
+ __nvme_fc_fcpop_count_one_down(ctrl);
if (!(op->flags & FCOP_FLAGS_AEN)) {
nvme_fc_unmap_data(ctrl, op->rq, op);
@@ -3216,7 +3249,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
spin_lock_irqsave(&ctrl->lock, flags);
set_bit(FCCTRL_TERMIO, &ctrl->flags);
- ctrl->iocnt = 0;
+ atomic_set(&ctrl->iocnt, 0);
spin_unlock_irqrestore(&ctrl->lock, flags);
__nvme_fc_abort_outstanding_ios(ctrl, false);
@@ -3225,11 +3258,19 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
nvme_fc_abort_aen_ops(ctrl);
/* wait for all io that had to be aborted */
+ wait_event(ctrl->ioabort_wait, atomic_read(&ctrl->iocnt) == 0);
spin_lock_irq(&ctrl->lock);
- wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
clear_bit(FCCTRL_TERMIO, &ctrl->flags);
spin_unlock_irq(&ctrl->lock);
+ /*
+ * At this point all inflight requests have been successfully
+ * aborted. Now it is safe to cancel all requests we decided
+ * not to complete in nvme_fc_fcpio_done().
+ */
+ nvme_cancel_tagset(&ctrl->ctrl);
+ nvme_cancel_admin_tagset(&ctrl->ctrl);
+
nvme_fc_term_aen_ops(ctrl);
/*
--
2.52.0
next prev parent reply other threads:[~2026-03-28 0:46 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-28 0:43 [PATCH v4 00/15] TP8028 Rapid Path Failure Recovery Mohamed Khalfella
2026-03-28 0:43 ` [PATCH v4 01/15] nvmet: Rapid Path Failure Recovery set controller identify fields Mohamed Khalfella
2026-03-30 10:37 ` Hannes Reinecke
2026-05-15 2:08 ` Randy Jennings
2026-03-28 0:43 ` [PATCH v4 02/15] nvmet/debugfs: Export controller CIU and CIRN via debugfs Mohamed Khalfella
2026-05-14 23:42 ` Randy Jennings
2026-03-28 0:43 ` [PATCH v4 03/15] nvmet: Implement CCR nvme command Mohamed Khalfella
2026-03-30 10:45 ` Hannes Reinecke
2026-03-31 16:38 ` Mohamed Khalfella
2026-04-07 5:40 ` Hannes Reinecke
2026-05-15 0:18 ` Randy Jennings
2026-03-28 0:43 ` [PATCH v4 04/15] nvmet: Implement CCR logpage Mohamed Khalfella
2026-05-15 0:38 ` Randy Jennings
2026-03-28 0:43 ` [PATCH v4 05/15] nvmet: Send an AEN on CCR completion Mohamed Khalfella
2026-05-15 0:50 ` Randy Jennings
2026-03-28 0:43 ` [PATCH v4 06/15] nvme: Rapid Path Failure Recovery read controller identify fields Mohamed Khalfella
2026-05-15 2:03 ` Randy Jennings
2026-03-28 0:43 ` [PATCH v4 07/15] nvme: Introduce FENCING and FENCED controller states Mohamed Khalfella
2026-03-30 10:46 ` Hannes Reinecke
2026-05-15 2:06 ` Randy Jennings
2026-03-28 0:43 ` [PATCH v4 08/15] nvme: Implement cross-controller reset recovery Mohamed Khalfella
2026-03-30 10:50 ` Hannes Reinecke
2026-03-31 16:47 ` Mohamed Khalfella
2026-04-07 5:39 ` Hannes Reinecke
2026-04-07 20:46 ` Mohamed Khalfella
2026-04-13 15:25 ` Randy Jennings
2026-04-13 16:33 ` Mohamed Khalfella
2026-04-24 23:07 ` Randy Jennings
2026-03-28 0:43 ` [PATCH v4 09/15] nvme: Implement cross-controller reset completion Mohamed Khalfella
2026-03-30 10:53 ` Hannes Reinecke
2026-03-31 16:55 ` Mohamed Khalfella
2026-04-07 5:48 ` Hannes Reinecke
2026-04-07 19:09 ` Mohamed Khalfella
2026-03-28 0:43 ` [PATCH v4 10/15] nvme-tcp: Use CCR to recover controller that hits an error Mohamed Khalfella
2026-03-30 11:00 ` Hannes Reinecke
2026-03-28 0:43 ` [PATCH v4 11/15] nvme-rdma: " Mohamed Khalfella
2026-03-28 0:43 ` [PATCH v4 12/15] nvme-fc: Refactor IO error recovery Mohamed Khalfella
2026-03-28 0:43 ` [PATCH v4 13/15] nvme-fc: Use CCR to recover controller that hits an error Mohamed Khalfella
2026-03-28 0:43 ` Mohamed Khalfella [this message]
2026-03-28 0:43 ` [PATCH v4 15/15] nvme-fc: Do not cancel requests in io taget before it is initialized Mohamed Khalfella
2026-05-12 21:40 ` [PATCH v4 00/15] TP8028 Rapid Path Failure Recovery Mohamed Khalfella
2026-05-12 22:02 ` Sagi Grimberg
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260328004518.1729186-15-mkhalfella@purestorage.com \
--to=mkhalfella@purestorage.com \
--cc=adailey@purestorage.com \
--cc=axboe@kernel.dk \
--cc=dgiani@purestorage.com \
--cc=hare@suse.de \
--cc=jsmart833426@gmail.com \
--cc=justin.tee@broadcom.com \
--cc=kbusch@kernel.org \
--cc=kch@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=nareshgottumukkala83@gmail.com \
--cc=paul.ely@broadcom.com \
--cc=randyj@purestorage.com \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.