[PATCH 04/10] nvme-fabrics: introduce nvmf_error_recovery API

public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed

From: Max Gurtovoy <mgurtovoy@nvidia.com>
To: <linux-nvme@lists.infradead.org>, <hch@lst.de>,
	<kbusch@kernel.org>, <sagi@grimberg.me>
Cc: <chaitanyak@nvidia.com>, <israelr@nvidia.com>, <oren@nvidia.com>,
	<hare@suse.de>, <jsmart2021@gmail.com>,
	Max Gurtovoy <mgurtovoy@nvidia.com>
Subject: [PATCH 04/10] nvme-fabrics: introduce nvmf_error_recovery API
Date: Wed, 20 Oct 2021 13:38:38 +0300	[thread overview]
Message-ID: <20211020103844.7533-5-mgurtovoy@nvidia.com> (raw)
In-Reply-To: <20211020103844.7533-1-mgurtovoy@nvidia.com>

Error recovery mechanism is duplicated in RDMA and TCP transports. Move
this logic to common code.

Also update the RDMA/TCP transport drivers to use this API and remove
the duplicated code.

Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Israel Rukshin <israelr@nvidia.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
---
 drivers/nvme/host/fabrics.c | 10 ++++++++++
 drivers/nvme/host/fabrics.h |  1 +
 drivers/nvme/host/rdma.c    | 25 ++++++++-----------------
 drivers/nvme/host/tcp.c     | 19 +++++--------------
 4 files changed, 24 insertions(+), 31 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 4a1ef67c6fb3..2edd086fa922 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -493,6 +493,16 @@ void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvmf_reconnect_or_remove);
 
+void nvmf_error_recovery(struct nvme_ctrl *ctrl)
+{
+	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+		return;
+
+	dev_warn(ctrl->device, "starting error recovery\n");
+	queue_work(nvme_reset_wq, &ctrl->err_work);
+}
+EXPORT_SYMBOL_GPL(nvmf_error_recovery);
+
 /**
  * nvmf_register_transport() - NVMe Fabrics Library registration function.
  * @ops:	Transport ops instance to be registered to the
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index de213ab26977..3d8ec7133fc8 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -189,6 +189,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts);
 int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
 void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl);
+void nvmf_error_recovery(struct nvme_ctrl *ctrl);
 bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
 		struct nvmf_ctrl_options *opts);
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index da7f61a5fac4..1c57e371af61 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1185,15 +1185,6 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	nvmf_reconnect_or_remove(&ctrl->ctrl);
 }
 
-static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
-{
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
-		return;
-
-	dev_warn(ctrl->ctrl.device, "starting error recovery\n");
-	queue_work(nvme_reset_wq, &ctrl->ctrl.err_work);
-}
-
 static void nvme_rdma_end_request(struct nvme_rdma_request *req)
 {
 	struct request *rq = blk_mq_rq_from_pdu(req);
@@ -1215,7 +1206,7 @@ static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
 			     "%s for CQE 0x%p failed with status %s (%d)\n",
 			     op, wc->wr_cqe,
 			     ib_wc_status_msg(wc->status), wc->status);
-	nvme_rdma_error_recovery(ctrl);
+	nvmf_error_recovery(&ctrl->ctrl);
 }
 
 static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1715,7 +1706,7 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 		dev_err(queue->ctrl->ctrl.device,
 			"got bad command_id %#x on QP %#x\n",
 			cqe->command_id, queue->qp->qp_num);
-		nvme_rdma_error_recovery(queue->ctrl);
+		nvmf_error_recovery(&queue->ctrl->ctrl);
 		return;
 	}
 	req = blk_mq_rq_to_pdu(rq);
@@ -1729,7 +1720,7 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 			dev_err(queue->ctrl->ctrl.device,
 				"Bogus remote invalidation for rkey %#x\n",
 				req->mr ? req->mr->rkey : 0);
-			nvme_rdma_error_recovery(queue->ctrl);
+			nvmf_error_recovery(&queue->ctrl->ctrl);
 		}
 	} else if (req->mr) {
 		int ret;
@@ -1739,7 +1730,7 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 			dev_err(queue->ctrl->ctrl.device,
 				"Queueing INV WR for rkey %#x failed (%d)\n",
 				req->mr->rkey, ret);
-			nvme_rdma_error_recovery(queue->ctrl);
+			nvmf_error_recovery(&queue->ctrl->ctrl);
 		}
 		/* the local invalidation completion will end the request */
 		return;
@@ -1766,7 +1757,7 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 	if (unlikely(wc->byte_len < len)) {
 		dev_err(queue->ctrl->ctrl.device,
 			"Unexpected nvme completion length(%d)\n", wc->byte_len);
-		nvme_rdma_error_recovery(queue->ctrl);
+		nvmf_error_recovery(&queue->ctrl->ctrl);
 		return;
 	}
 
@@ -1936,7 +1927,7 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
 		dev_dbg(queue->ctrl->ctrl.device,
 			"disconnect received - connection closed\n");
-		nvme_rdma_error_recovery(queue->ctrl);
+		nvmf_error_recovery(&queue->ctrl->ctrl);
 		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 		/* device removal is handled via the ib_client API */
@@ -1944,7 +1935,7 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 	default:
 		dev_err(queue->ctrl->ctrl.device,
 			"Unexpected RDMA CM event (%d)\n", ev->event);
-		nvme_rdma_error_recovery(queue->ctrl);
+		nvmf_error_recovery(&queue->ctrl->ctrl);
 		break;
 	}
 
@@ -2000,7 +1991,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
 	 * LIVE state should trigger the normal error recovery which will
 	 * handle completing this request.
 	 */
-	nvme_rdma_error_recovery(ctrl);
+	nvmf_error_recovery(&ctrl->ctrl);
 	return BLK_EH_RESET_TIMER;
 }
 
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 07a9cc4f2274..fe1f2fec457b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -479,15 +479,6 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
 	queue->ddgst_remaining = 0;
 }
 
-static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
-{
-	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
-		return;
-
-	dev_warn(ctrl->device, "starting error recovery\n");
-	queue_work(nvme_reset_wq, &ctrl->err_work);
-}
-
 static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
 		struct nvme_completion *cqe)
 {
@@ -499,7 +490,7 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
 		dev_err(queue->ctrl->ctrl.device,
 			"got bad cqe.command_id %#x on queue %d\n",
 			cqe->command_id, nvme_tcp_queue_id(queue));
-		nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+		nvmf_error_recovery(&queue->ctrl->ctrl);
 		return -EINVAL;
 	}
 
@@ -541,7 +532,7 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
 		dev_err(queue->ctrl->ctrl.device,
 			"queue %d tag %#x SUCCESS set but not last PDU\n",
 			nvme_tcp_queue_id(queue), rq->tag);
-		nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+		nvmf_error_recovery(&queue->ctrl->ctrl);
 		return -EPROTO;
 	}
 
@@ -850,7 +841,7 @@ static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
 			dev_err(queue->ctrl->ctrl.device,
 				"receive failed:  %d\n", result);
 			queue->rd_enabled = false;
-			nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+			nvmf_error_recovery(&queue->ctrl->ctrl);
 			return result;
 		}
 	}
@@ -898,7 +889,7 @@ static void nvme_tcp_state_change(struct sock *sk)
 	case TCP_LAST_ACK:
 	case TCP_FIN_WAIT1:
 	case TCP_FIN_WAIT2:
-		nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+		nvmf_error_recovery(&queue->ctrl->ctrl);
 		break;
 	default:
 		dev_info(queue->ctrl->ctrl.device,
@@ -2252,7 +2243,7 @@ nvme_tcp_timeout(struct request *rq, bool reserved)
 	 * LIVE state should trigger the normal error recovery which will
 	 * handle completing this request.
 	 */
-	nvme_tcp_error_recovery(ctrl);
+	nvmf_error_recovery(ctrl);
 	return BLK_EH_RESET_TIMER;
 }
 
-- 
2.18.1

next prev parent reply	other threads:[~2021-10-20 10:40 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-20 10:38 [PATCH v2 0/10] Centrelize common fabrics code to core drivers Max Gurtovoy
2021-10-20 10:38 ` [PATCH 01/10] nvme: add connect_work attribute to nvme ctrl Max Gurtovoy
2021-11-02 22:59   ` James Smart
2021-10-20 10:38 ` [PATCH 02/10] nvme-fabrics: introduce nvmf_reconnect_or_remove API Max Gurtovoy
2021-11-02 23:38   ` James Smart
2021-10-20 10:38 ` [PATCH 03/10] nvme: add err_work attribute to nvme ctrl Max Gurtovoy
2021-10-20 11:05   ` Hannes Reinecke
2021-11-02 23:53   ` James Smart
2021-10-20 10:38 ` Max Gurtovoy [this message]
2021-11-02 23:59   ` [PATCH 04/10] nvme-fabrics: introduce nvmf_error_recovery API James Smart
2021-10-20 10:38 ` [PATCH 05/10] nvme/nvme-fabrics: introduce nvmf_error_recovery_work API Max Gurtovoy
2021-11-03  0:04   ` James Smart
2021-10-20 10:38 ` [PATCH 06/10] nvme/nvme-fabrics: introduce nvmf_reconnect_ctrl_work API Max Gurtovoy
2021-11-03  0:15   ` James Smart
2021-10-20 10:38 ` [PATCH 07/10] nvme-fabrics: add nvmf_init_ctrl/nvmf_uninit_ctrl API Max Gurtovoy
2021-11-03  0:19   ` James Smart
2021-10-20 10:38 ` [PATCH 08/10] nvme-rdma: update WARN_ON condition during reset Max Gurtovoy
2021-10-20 10:38 ` [PATCH 09/10] nvme/nvme-fabrics: move reset ctrl flow to common code Max Gurtovoy
2021-11-03  0:27   ` James Smart
2021-10-20 10:38 ` [PATCH 10/10] nvme-fabrics: set common attributes during nvmf_init_ctrl Max Gurtovoy
2021-11-03  0:30   ` James Smart

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4a1ef67c6fb dfblob:2edd086fa92 dfblob:de213ab2697
dfblob:3d8ec7133fc dfblob:da7f61a5fac dfblob:1c57e371af6
dfblob:07a9cc4f227 dfblob:fe1f2fec457 )
 OR (
bs:"[PATCH 04/10] nvme-fabrics: introduce nvmf_error_recovery API" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211020103844.7533-5-mgurtovoy@nvidia.com \
    --to=mgurtovoy@nvidia.com \
    --cc=chaitanyak@nvidia.com \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=israelr@nvidia.com \
    --cc=jsmart2021@gmail.com \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=oren@nvidia.com \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox