[RFC v1 0/3] Unifying fabrics drivers

linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed

* [RFC v1 0/3] Unifying fabrics drivers
@ 2023-03-01  8:27 Daniel Wagner
  2023-03-01  8:27 ` [RFC v1 1/3] nvme-rdma: stream line queue functions arguments Daniel Wagner
                   ` (5 more replies)
  0 siblings, 6 replies; 17+ messages in thread
From: Daniel Wagner @ 2023-03-01  8:27 UTC (permalink / raw)
  To: linux-nvme; +Cc: Daniel Wagner

The two fabrics rdma and tcp share a lot of common code. This here is my attempt
to consolidate the common code.

I've picked just one function (setup admin queue) for this RFC to get a feeling
and feedback if this is a valid approach or if people hate it. I've left out fc
for the time being because it differs too much two the other two drivers.

I've tested quickly tcp, rdma is only compile tested.

Daniel Wagner (3):
  nvme-rdma: stream line queue functions arguments
  nvme-rdma: factor rdma specific queue init code out
  nvme-fabrics: move configure admin queue code to fabrics.c

 drivers/nvme/host/fabrics.c |  52 ++++++++++++++
 drivers/nvme/host/fabrics.h |  12 ++++
 drivers/nvme/host/nvme.h    |   3 +
 drivers/nvme/host/rdma.c    | 132 +++++++++++++++++++-----------------
 drivers/nvme/host/tcp.c     |  94 ++++++++-----------------
 5 files changed, 165 insertions(+), 128 deletions(-)

-- 
2.39.2



^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RFC v1 1/3] nvme-rdma: stream line queue functions arguments
  2023-03-01  8:27 [RFC v1 0/3] Unifying fabrics drivers Daniel Wagner
@ 2023-03-01  8:27 ` Daniel Wagner
  2023-03-01  8:27 ` [RFC v1 2/3] nvme-rdma: factor rdma specific queue init code out Daniel Wagner
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Daniel Wagner @ 2023-03-01  8:27 UTC (permalink / raw)
  To: linux-nvme; +Cc: Daniel Wagner

In preparation to move common code from the fabrics driver to fabrics.c,
we stream line the low level functions. This allows any common code just
to pass in nvme subsystem global types, such as 'struct nvme_ctrl'
instead of the driver specialized types 'struct nvme_rdma_ctrl'.

Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/host/rdma.c | 62 ++++++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 22 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index bbad26b82b56..c26c04c67684 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -133,6 +133,11 @@ static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
 	return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
 }
 
+static inline int nvme_rdma_queue_id(struct nvme_rdma_queue *queue)
+{
+	return queue - queue->ctrl->queues;
+}
+
 static LIST_HEAD(device_list);
 static DEFINE_MUTEX(device_list_mutex);
 
@@ -571,13 +576,19 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
 	return ret;
 }
 
-static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
-		int idx, size_t queue_size)
+static int nvme_rdma_alloc_queue(struct nvme_ctrl *nctrl, int idx)
 {
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
 	struct nvme_rdma_queue *queue;
 	struct sockaddr *src_addr = NULL;
+	size_t queue_size;
 	int ret;
 
+	if (idx == 0)
+		queue_size = NVME_AQ_DEPTH;
+	else
+		queue_size = ctrl->ctrl.sqsize + 1;
+
 	queue = &ctrl->queues[idx];
 	mutex_init(&queue->queue_lock);
 	queue->ctrl = ctrl;
@@ -641,16 +652,22 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
 	ib_drain_qp(queue->qp);
 }
 
-static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
+static void nvme_rdma_stop_queue(struct nvme_ctrl *nctrl, int qid)
 {
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+	struct nvme_rdma_queue *queue = &ctrl->queues[qid];
+
 	mutex_lock(&queue->queue_lock);
 	if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
 		__nvme_rdma_stop_queue(queue);
 	mutex_unlock(&queue->queue_lock);
 }
 
-static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
+static void nvme_rdma_free_queue(struct nvme_ctrl *nctrl, int qid)
 {
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+	struct nvme_rdma_queue *queue = &ctrl->queues[qid];
+
 	if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
 		return;
 
@@ -664,7 +681,7 @@ static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
 	int i;
 
 	for (i = 1; i < ctrl->ctrl.queue_count; i++)
-		nvme_rdma_free_queue(&ctrl->queues[i]);
+		nvme_rdma_free_queue(&ctrl->ctrl, i);
 }
 
 static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
@@ -672,18 +689,19 @@ static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
 	int i;
 
 	for (i = 1; i < ctrl->ctrl.queue_count; i++)
-		nvme_rdma_stop_queue(&ctrl->queues[i]);
+		nvme_rdma_stop_queue(&ctrl->ctrl, i);
 }
 
-static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
+static int nvme_rdma_start_queue(struct nvme_ctrl *nctrl, int idx)
 {
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
 	struct nvme_rdma_queue *queue = &ctrl->queues[idx];
 	int ret;
 
 	if (idx)
-		ret = nvmf_connect_io_queue(&ctrl->ctrl, idx);
+		ret = nvmf_connect_io_queue(nctrl, idx);
 	else
-		ret = nvmf_connect_admin_queue(&ctrl->ctrl);
+		ret = nvmf_connect_admin_queue(nctrl);
 
 	if (!ret) {
 		set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
@@ -702,7 +720,7 @@ static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl,
 	int i, ret = 0;
 
 	for (i = first; i < last; i++) {
-		ret = nvme_rdma_start_queue(ctrl, i);
+		ret = nvme_rdma_start_queue(&ctrl->ctrl, i);
 		if (ret)
 			goto out_stop_queues;
 	}
@@ -711,7 +729,7 @@ static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl,
 
 out_stop_queues:
 	for (i--; i >= first; i--)
-		nvme_rdma_stop_queue(&ctrl->queues[i]);
+		nvme_rdma_stop_queue(&ctrl->ctrl, i);
 	return ret;
 }
 
@@ -773,8 +791,7 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
 	}
 
 	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
-		ret = nvme_rdma_alloc_queue(ctrl, i,
-				ctrl->ctrl.sqsize + 1);
+		ret = nvme_rdma_alloc_queue(&ctrl->ctrl, i);
 		if (ret)
 			goto out_free_queues;
 	}
@@ -783,7 +800,7 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
 
 out_free_queues:
 	for (i--; i >= 1; i--)
-		nvme_rdma_free_queue(&ctrl->queues[i]);
+		nvme_rdma_free_queue(&ctrl->ctrl, i);
 
 	return ret;
 }
@@ -811,7 +828,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl)
 				sizeof(struct nvme_command), DMA_TO_DEVICE);
 		ctrl->async_event_sqe.data = NULL;
 	}
-	nvme_rdma_free_queue(&ctrl->queues[0]);
+	nvme_rdma_free_queue(&ctrl->ctrl, 0);
 }
 
 static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
@@ -820,7 +837,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 	bool pi_capable = false;
 	int error;
 
-	error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
+	error = nvme_rdma_alloc_queue(&ctrl->ctrl, 0);
 	if (error)
 		return error;
 
@@ -855,7 +872,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 
 	}
 
-	error = nvme_rdma_start_queue(ctrl, 0);
+	error = nvme_rdma_start_queue(&ctrl->ctrl, 0);
 	if (error)
 		goto out_remove_admin_tag_set;
 
@@ -882,7 +899,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 	nvme_quiesce_admin_queue(&ctrl->ctrl);
 	blk_sync_queue(ctrl->ctrl.admin_q);
 out_stop_queue:
-	nvme_rdma_stop_queue(&ctrl->queues[0]);
+	nvme_rdma_stop_queue(&ctrl->ctrl, 0);
 	nvme_cancel_admin_tagset(&ctrl->ctrl);
 out_remove_admin_tag_set:
 	if (new)
@@ -894,7 +911,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		ctrl->async_event_sqe.data = NULL;
 	}
 out_free_queue:
-	nvme_rdma_free_queue(&ctrl->queues[0]);
+	nvme_rdma_free_queue(&ctrl->ctrl, 0);
 	return error;
 }
 
@@ -967,7 +984,7 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
 {
 	nvme_quiesce_admin_queue(&ctrl->ctrl);
 	blk_sync_queue(ctrl->ctrl.admin_q);
-	nvme_rdma_stop_queue(&ctrl->queues[0]);
+	nvme_rdma_stop_queue(&ctrl->ctrl, 0);
 	nvme_cancel_admin_tagset(&ctrl->ctrl);
 	if (remove) {
 		nvme_unquiesce_admin_queue(&ctrl->ctrl);
@@ -1118,7 +1135,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
 destroy_admin:
 	nvme_quiesce_admin_queue(&ctrl->ctrl);
 	blk_sync_queue(ctrl->ctrl.admin_q);
-	nvme_rdma_stop_queue(&ctrl->queues[0]);
+	nvme_rdma_stop_queue(&ctrl->ctrl, 0);
 	nvme_cancel_admin_tagset(&ctrl->ctrl);
 	if (new)
 		nvme_remove_admin_tag_set(&ctrl->ctrl);
@@ -1965,9 +1982,10 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 static void nvme_rdma_complete_timed_out(struct request *rq)
 {
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
 	struct nvme_rdma_queue *queue = req->queue;
 
-	nvme_rdma_stop_queue(queue);
+	nvme_rdma_stop_queue(ctrl, nvme_rdma_queue_id(queue));
 	nvmf_complete_timed_out_request(rq);
 }
 
-- 
2.39.2



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [RFC v1 2/3] nvme-rdma: factor rdma specific queue init code out
  2023-03-01  8:27 [RFC v1 0/3] Unifying fabrics drivers Daniel Wagner
  2023-03-01  8:27 ` [RFC v1 1/3] nvme-rdma: stream line queue functions arguments Daniel Wagner
@ 2023-03-01  8:27 ` Daniel Wagner
  2023-03-01  8:27 ` [RFC v1 3/3] nvme-fabrics: move configure admin queue code to fabrics.c Daniel Wagner
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Daniel Wagner @ 2023-03-01  8:27 UTC (permalink / raw)
  To: linux-nvme; +Cc: Daniel Wagner

In preparation to move common code from the fabrics driver to fabrics.c,
move the rmda queue specific initialization code into a separate
function.

Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/host/rdma.c | 65 ++++++++++++++++++++++++++++------------
 1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index c26c04c67684..d070d84b204e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -831,15 +831,16 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl)
 	nvme_rdma_free_queue(&ctrl->ctrl, 0);
 }
 
-static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
-		bool new)
+static int nvme_rdma_init_queue(struct nvme_ctrl *nctrl, int qid)
 {
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
 	bool pi_capable = false;
 	int error;
 
-	error = nvme_rdma_alloc_queue(&ctrl->ctrl, 0);
-	if (error)
-		return error;
+	if (qid != 0)
+		/* only admin queue needs additional work. */
+		return 0;
+
 
 	ctrl->device = ctrl->queues[0].device;
 	ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev);
@@ -859,6 +860,43 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 	 */
 	error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
 			sizeof(struct nvme_command), DMA_TO_DEVICE);
+	if (error)
+		return error;
+
+	ctrl->ctrl.max_segments = ctrl->max_fr_pages;
+	ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
+	if (pi_capable)
+		ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
+	else
+		ctrl->ctrl.max_integrity_segments = 0;
+
+	return 0;
+}
+
+static void nvme_rdma_deinit_queue(struct nvme_ctrl *nctrl, int qid)
+{
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+	if (qid != 0)
+		return;
+
+	if (ctrl->async_event_sqe.data) {
+		nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+			sizeof(struct nvme_command), DMA_TO_DEVICE);
+		ctrl->async_event_sqe.data = NULL;
+	}
+}
+
+static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
+		bool new)
+{
+	int error;
+
+	error = nvme_rdma_alloc_queue(&ctrl->ctrl, 0);
+	if (error)
+		return error;
+
+	error = nvme_rdma_init_queue(&ctrl->ctrl, 0);
 	if (error)
 		goto out_free_queue;
 
@@ -868,7 +906,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 				sizeof(struct nvme_rdma_request) +
 				NVME_RDMA_DATA_SGL_SIZE);
 		if (error)
-			goto out_free_async_qe;
+			goto out_deinit_admin_queue;
 
 	}
 
@@ -880,13 +918,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 	if (error)
 		goto out_stop_queue;
 
-	ctrl->ctrl.max_segments = ctrl->max_fr_pages;
-	ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
-	if (pi_capable)
-		ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
-	else
-		ctrl->ctrl.max_integrity_segments = 0;
-
 	nvme_unquiesce_admin_queue(&ctrl->ctrl);
 
 	error = nvme_init_ctrl_finish(&ctrl->ctrl, false);
@@ -904,12 +935,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 out_remove_admin_tag_set:
 	if (new)
 		nvme_remove_admin_tag_set(&ctrl->ctrl);
-out_free_async_qe:
-	if (ctrl->async_event_sqe.data) {
-		nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
-			sizeof(struct nvme_command), DMA_TO_DEVICE);
-		ctrl->async_event_sqe.data = NULL;
-	}
+out_deinit_admin_queue:
+	nvme_rdma_deinit_queue(&ctrl->ctrl, 0);
 out_free_queue:
 	nvme_rdma_free_queue(&ctrl->ctrl, 0);
 	return error;
-- 
2.39.2



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [RFC v1 3/3] nvme-fabrics: move configure admin queue code to fabrics.c
  2023-03-01  8:27 [RFC v1 0/3] Unifying fabrics drivers Daniel Wagner
  2023-03-01  8:27 ` [RFC v1 1/3] nvme-rdma: stream line queue functions arguments Daniel Wagner
  2023-03-01  8:27 ` [RFC v1 2/3] nvme-rdma: factor rdma specific queue init code out Daniel Wagner
@ 2023-03-01  8:27 ` Daniel Wagner
  2023-03-02  3:02 ` [RFC v1 0/3] Unifying fabrics drivers Chaitanya Kulkarni
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Daniel Wagner @ 2023-03-01  8:27 UTC (permalink / raw)
  To: linux-nvme; +Cc: Daniel Wagner

The two transports for tcp and rdma share a lot of o common code, which
handles the controller states and is not transport specific.

The fabrics.c file contains helper functions to be used by the transport
This limits what the common code in fabrics can do. Any transport
specific allocation/initialization (e.g comand size for admin_tag_set)
has either be parameterized via the function call or we
introduce callbacks for parameterizing.

The third options is to introduce an intermediated type e.g. struct
nvme_fabrics_ctrl, but that would mean a lot of code refactoring and
restructuring just to avoid passing in arguments to functions.

The callback approach has another benefit, as it allows to add hooks to
the generic code for the driver to do allocate additional transport
specific resources.

Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/host/fabrics.c | 52 ++++++++++++++++++++
 drivers/nvme/host/fabrics.h | 12 +++++
 drivers/nvme/host/nvme.h    |  3 ++
 drivers/nvme/host/rdma.c    | 77 +++++++++---------------------
 drivers/nvme/host/tcp.c     | 94 +++++++++++--------------------------
 5 files changed, 115 insertions(+), 123 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index bbaa04a0c502..b09195c4a366 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -1134,6 +1134,58 @@ nvmf_create_ctrl(struct device *dev, const char *buf)
 	return ERR_PTR(ret);
 }
 
+int nvmf_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
+{
+	int error;
+
+	error = ctrl->fabrics_ops->alloc_queue(ctrl, 0);
+	if (error)
+		return error;
+
+	error = ctrl->fabrics_ops->init_queue(ctrl, 0);
+	if (error)
+		goto out_free_queue;
+
+	if (new) {
+		error = ctrl->fabrics_ops->alloc_admin_tag_set(ctrl);
+		if (error)
+			goto out_deinit_admin_queue;
+
+	}
+
+	error = ctrl->fabrics_ops->start_queue(ctrl, 0);
+	if (error)
+		goto out_remove_admin_tag_set;
+
+	error = nvme_enable_ctrl(ctrl);
+	if (error)
+		goto out_stop_queue;
+
+	nvme_unquiesce_admin_queue(ctrl);
+
+	error = nvme_init_ctrl_finish(ctrl, false);
+	if (error)
+		goto out_quiesce_queue;
+
+	return 0;
+
+out_quiesce_queue:
+	nvme_quiesce_admin_queue(ctrl);
+	blk_sync_queue(ctrl->admin_q);
+out_stop_queue:
+	ctrl->fabrics_ops->stop_queue(ctrl, 0);
+	nvme_cancel_admin_tagset(ctrl);
+out_remove_admin_tag_set:
+	if (new)
+		nvme_remove_admin_tag_set(ctrl);
+out_deinit_admin_queue:
+	ctrl->fabrics_ops->deinit_queue(ctrl, 0);
+out_free_queue:
+	ctrl->fabrics_ops->free_queue(ctrl, 0);
+	return error;
+}
+EXPORT_SYMBOL_GPL(nvmf_configure_admin_queue);
+
 static struct class *nvmf_class;
 static struct device *nvmf_device;
 static DEFINE_MUTEX(nvmf_dev_mutex);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index a6e22116e139..30b7e388c8e8 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -172,6 +172,16 @@ struct nvmf_transport_ops {
 					struct nvmf_ctrl_options *opts);
 };
 
+struct nvme_fabrics_ops {
+	int 	(*alloc_queue)(struct nvme_ctrl *ctrl, int qid);
+	void	(*free_queue)(struct nvme_ctrl *ctrl, int qid);
+	int	(*init_queue)(struct nvme_ctrl *ctrl, int qid);
+	void	(*deinit_queue)(struct nvme_ctrl *ctrl, int qid);
+	int	(*start_queue)(struct nvme_ctrl *ctrl, int qid);
+	void	(*stop_queue)(struct nvme_ctrl *ctrl, int qid);
+	int	(*alloc_admin_tag_set)(struct nvme_ctrl *ctrl);
+};
+
 static inline bool
 nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
 			struct nvmf_ctrl_options *opts)
@@ -214,5 +224,7 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
 bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
 		struct nvmf_ctrl_options *opts);
+int nvmf_configure_admin_queue(struct nvme_ctrl *ctrl, bool new);
+
 
 #endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bf46f122e9e1..e58211802031 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -244,6 +244,8 @@ enum nvme_ctrl_flags {
 	NVME_CTRL_STOPPED		= 3,
 };
 
+struct nvme_fabrics_ops;
+
 struct nvme_ctrl {
 	bool comp_seen;
 	enum nvme_ctrl_state state;
@@ -251,6 +253,7 @@ struct nvme_ctrl {
 	spinlock_t lock;
 	struct mutex scan_lock;
 	const struct nvme_ctrl_ops *ops;
+	const struct nvme_fabrics_ops *fabrics_ops;
 	struct request_queue *admin_q;
 	struct request_queue *connect_q;
 	struct request_queue *fabrics_q;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d070d84b204e..0347d7909b1d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -887,61 +887,6 @@ static void nvme_rdma_deinit_queue(struct nvme_ctrl *nctrl, int qid)
 	}
 }
 
-static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
-		bool new)
-{
-	int error;
-
-	error = nvme_rdma_alloc_queue(&ctrl->ctrl, 0);
-	if (error)
-		return error;
-
-	error = nvme_rdma_init_queue(&ctrl->ctrl, 0);
-	if (error)
-		goto out_free_queue;
-
-	if (new) {
-		error = nvme_alloc_admin_tag_set(&ctrl->ctrl,
-				&ctrl->admin_tag_set, &nvme_rdma_admin_mq_ops,
-				sizeof(struct nvme_rdma_request) +
-				NVME_RDMA_DATA_SGL_SIZE);
-		if (error)
-			goto out_deinit_admin_queue;
-
-	}
-
-	error = nvme_rdma_start_queue(&ctrl->ctrl, 0);
-	if (error)
-		goto out_remove_admin_tag_set;
-
-	error = nvme_enable_ctrl(&ctrl->ctrl);
-	if (error)
-		goto out_stop_queue;
-
-	nvme_unquiesce_admin_queue(&ctrl->ctrl);
-
-	error = nvme_init_ctrl_finish(&ctrl->ctrl, false);
-	if (error)
-		goto out_quiesce_queue;
-
-	return 0;
-
-out_quiesce_queue:
-	nvme_quiesce_admin_queue(&ctrl->ctrl);
-	blk_sync_queue(ctrl->ctrl.admin_q);
-out_stop_queue:
-	nvme_rdma_stop_queue(&ctrl->ctrl, 0);
-	nvme_cancel_admin_tagset(&ctrl->ctrl);
-out_remove_admin_tag_set:
-	if (new)
-		nvme_remove_admin_tag_set(&ctrl->ctrl);
-out_deinit_admin_queue:
-	nvme_rdma_deinit_queue(&ctrl->ctrl, 0);
-out_free_queue:
-	nvme_rdma_free_queue(&ctrl->ctrl, 0);
-	return error;
-}
-
 static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
 {
 	int ret, nr_queues;
@@ -1081,12 +1026,21 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
 	}
 }
 
+static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *ctrl)
+{
+
+	return nvme_alloc_admin_tag_set(ctrl, &to_rdma_ctrl(ctrl)->admin_tag_set,
+					&nvme_rdma_admin_mq_ops,
+					sizeof(struct nvme_rdma_request) +
+					   NVME_RDMA_DATA_SGL_SIZE);
+}
+
 static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
 {
 	int ret;
 	bool changed;
 
-	ret = nvme_rdma_configure_admin_queue(ctrl, new);
+	ret = nvmf_configure_admin_queue(&ctrl->ctrl, new);
 	if (ret)
 		return ret;
 
@@ -2330,6 +2284,16 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
 	return found;
 }
 
+static struct nvme_fabrics_ops nvme_rdma_fabrics_ops = {
+	.alloc_queue		= nvme_rdma_alloc_queue,
+	.free_queue		= nvme_rdma_free_queue,
+	.init_queue		= nvme_rdma_init_queue,
+	.deinit_queue		= nvme_rdma_deinit_queue,
+	.start_queue		= nvme_rdma_start_queue,
+	.stop_queue		= nvme_rdma_stop_queue,
+	.alloc_admin_tag_set	= nvme_rdma_alloc_admin_tag_set,
+};
+
 static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
@@ -2341,6 +2305,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	if (!ctrl)
 		return ERR_PTR(-ENOMEM);
 	ctrl->ctrl.opts = opts;
+	ctrl->ctrl.fabrics_ops = &nvme_rdma_fabrics_ops;
 	INIT_LIST_HEAD(&ctrl->list);
 
 	if (!(opts->mask & NVMF_OPT_TRSVCID)) {
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index d6100a787d39..fdb8b6d0c3e1 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1740,25 +1740,6 @@ static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl,
 	return ret;
 }
 
-static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
-{
-	int ret;
-
-	ret = nvme_tcp_alloc_queue(ctrl, 0);
-	if (ret)
-		return ret;
-
-	ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
-	if (ret)
-		goto out_free_queue;
-
-	return 0;
-
-out_free_queue:
-	nvme_tcp_free_queue(ctrl, 0);
-	return ret;
-}
-
 static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
 {
 	int i, ret;
@@ -1932,53 +1913,6 @@ static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
 	nvme_tcp_free_admin_queue(ctrl);
 }
 
-static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
-{
-	int error;
-
-	error = nvme_tcp_alloc_admin_queue(ctrl);
-	if (error)
-		return error;
-
-	if (new) {
-		error = nvme_alloc_admin_tag_set(ctrl,
-				&to_tcp_ctrl(ctrl)->admin_tag_set,
-				&nvme_tcp_admin_mq_ops,
-				sizeof(struct nvme_tcp_request));
-		if (error)
-			goto out_free_queue;
-	}
-
-	error = nvme_tcp_start_queue(ctrl, 0);
-	if (error)
-		goto out_cleanup_tagset;
-
-	error = nvme_enable_ctrl(ctrl);
-	if (error)
-		goto out_stop_queue;
-
-	nvme_unquiesce_admin_queue(ctrl);
-
-	error = nvme_init_ctrl_finish(ctrl, false);
-	if (error)
-		goto out_quiesce_queue;
-
-	return 0;
-
-out_quiesce_queue:
-	nvme_quiesce_admin_queue(ctrl);
-	blk_sync_queue(ctrl->admin_q);
-out_stop_queue:
-	nvme_tcp_stop_queue(ctrl, 0);
-	nvme_cancel_admin_tagset(ctrl);
-out_cleanup_tagset:
-	if (new)
-		nvme_remove_admin_tag_set(ctrl);
-out_free_queue:
-	nvme_tcp_free_admin_queue(ctrl);
-	return error;
-}
-
 static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
 		bool remove)
 {
@@ -2027,12 +1961,28 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
 	}
 }
 
+static int nvme_tcp_init_queue(struct nvme_ctrl *ctrl, int qid)
+{
+	if (qid == 0) {
+		return nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
+	}
+
+	return 0;
+}
+
+static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *ctrl)
+{
+	return nvme_alloc_admin_tag_set(ctrl, &to_tcp_ctrl(ctrl)->admin_tag_set,
+					&nvme_tcp_admin_mq_ops,
+					sizeof(struct nvme_tcp_request));
+}
+
 static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
 {
 	struct nvmf_ctrl_options *opts = ctrl->opts;
 	int ret;
 
-	ret = nvme_tcp_configure_admin_queue(ctrl, new);
+	ret = nvmf_configure_admin_queue(ctrl, new);
 	if (ret)
 		return ret;
 
@@ -2552,6 +2502,15 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
 	return found;
 }
 
+static struct nvme_fabrics_ops nvme_tcp_fabrics_ops = {
+	.alloc_queue		= nvme_tcp_alloc_queue,
+	.free_queue		= nvme_tcp_free_queue,
+	.start_queue		= nvme_tcp_start_queue,
+	.stop_queue		= nvme_tcp_stop_queue,
+	.init_queue		= nvme_tcp_init_queue,
+	.alloc_admin_tag_set	= nvme_tcp_alloc_admin_tag_set,
+};
+
 static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
@@ -2564,6 +2523,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 
 	INIT_LIST_HEAD(&ctrl->list);
 	ctrl->ctrl.opts = opts;
+	ctrl->ctrl.fabrics_ops = &nvme_tcp_fabrics_ops;
 	ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
 				opts->nr_poll_queues + 1;
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
-- 
2.39.2



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-01  8:27 [RFC v1 0/3] Unifying fabrics drivers Daniel Wagner
                   ` (2 preceding siblings ...)
  2023-03-01  8:27 ` [RFC v1 3/3] nvme-fabrics: move configure admin queue code to fabrics.c Daniel Wagner
@ 2023-03-02  3:02 ` Chaitanya Kulkarni
  2023-03-02  8:15   ` Daniel Wagner
  2023-03-03 23:13 ` James Smart
  2023-03-07  9:26 ` Sagi Grimberg
  5 siblings, 1 reply; 17+ messages in thread
From: Chaitanya Kulkarni @ 2023-03-02  3:02 UTC (permalink / raw)
  To: Daniel Wagner, linux-nvme@lists.infradead.org

On 3/1/2023 12:27 AM, Daniel Wagner wrote:
> The two fabrics rdma and tcp share a lot of common code. This here is my attempt
> to consolidate the common code.
> 
> I've picked just one function (setup admin queue) for this RFC to get a feeling
> and feedback if this is a valid approach or if people hate it. I've left out fc
> for the time being because it differs too much two the other two drivers.
> 
> I've tested quickly tcp, rdma is only compile tested.
> 

It is really hard to give any feedback without seeing entire code.

What kind of impact this will have on the current TCP offload series
once it is merged ?

-ck

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-02  3:02 ` [RFC v1 0/3] Unifying fabrics drivers Chaitanya Kulkarni
@ 2023-03-02  8:15   ` Daniel Wagner
  0 siblings, 0 replies; 17+ messages in thread
From: Daniel Wagner @ 2023-03-02  8:15 UTC (permalink / raw)
  To: Chaitanya Kulkarni; +Cc: linux-nvme@lists.infradead.org

On Thu, Mar 02, 2023 at 03:02:38AM +0000, Chaitanya Kulkarni wrote:
> On 3/1/2023 12:27 AM, Daniel Wagner wrote:
> > The two fabrics rdma and tcp share a lot of common code. This here is my attempt
> > to consolidate the common code.
> > 
> > I've picked just one function (setup admin queue) for this RFC to get a feeling
> > and feedback if this is a valid approach or if people hate it. I've left out fc
> > for the time being because it differs too much two the other two drivers.
> > 
> > I've tested quickly tcp, rdma is only compile tested.
> > 
> 
> It is really hard to give any feedback without seeing entire code.

Surely you can give some feedback, for example does this approach make sense, or
do you think we should reorganize the data structures?

> What kind of impact this will have on the current TCP offload series
> once it is merged ?

I don't expect a lot of impact at all. The common code between tcp and rdma is
the state machine code and not the transport bits. The offload code doesn't
touch any of the code I would touch.

See it's not so hard to give some feedback?

Daniel


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-01  8:27 [RFC v1 0/3] Unifying fabrics drivers Daniel Wagner
                   ` (3 preceding siblings ...)
  2023-03-02  3:02 ` [RFC v1 0/3] Unifying fabrics drivers Chaitanya Kulkarni
@ 2023-03-03 23:13 ` James Smart
  2023-03-07 12:41   ` Daniel Wagner
  2023-03-07  9:26 ` Sagi Grimberg
  5 siblings, 1 reply; 17+ messages in thread
From: James Smart @ 2023-03-03 23:13 UTC (permalink / raw)
  To: Daniel Wagner, linux-nvme

On 3/1/2023 12:27 AM, Daniel Wagner wrote:
> The two fabrics rdma and tcp share a lot of common code. This here is my attempt
> to consolidate the common code.
> 
> I've picked just one function (setup admin queue) for this RFC to get a feeling
> and feedback if this is a valid approach or if people hate it. I've left out fc
> for the time being because it differs too much two the other two drivers.
> 
> I've tested quickly tcp, rdma is only compile tested.
> 
> Daniel Wagner (3):
>    nvme-rdma: stream line queue functions arguments
>    nvme-rdma: factor rdma specific queue init code out
>    nvme-fabrics: move configure admin queue code to fabrics.c
> 
>   drivers/nvme/host/fabrics.c |  52 ++++++++++++++
>   drivers/nvme/host/fabrics.h |  12 ++++
>   drivers/nvme/host/nvme.h    |   3 +
>   drivers/nvme/host/rdma.c    | 132 +++++++++++++++++++-----------------
>   drivers/nvme/host/tcp.c     |  94 ++++++++-----------------
>   5 files changed, 165 insertions(+), 128 deletions(-)
> 

Initial reaction:
The resulting nvmf_configure_admin_queue() routine does look nice. 
However, I'm not sure the callback chain is all that easier to follow.

I guess I'm most disappointed that more of the logic isn't actually 
being commonized.

Examples:
alloc_queue():
   At least pass queue_size as an arg. I assume we should to match up
   with the fabric connect routine.
init_queue():
   I'd like to see setting of ctrl.max_segments/hw_sectors in the common
   code. Unfortunately, likely needs to be a fops call. At least it
   makes sure its done at the same point in controller creation.
start_queue():
   can't common code do the nvmf_connect_admin_queue() call ?
      If its too tied to the LIVE bit - then perhaps the queue LIVE
      bits should actually be getting set by the common code after
      the successful start_queue - or has routines transport can call
      from start/stop to set/clear it.
   If LIVE bit can be dealt with - this whole block in rdma/tcp moves to
   common code.

Also I'd really like to get rid of the "new" argument everywhere. It 
should just be a state flag set on the common ctrl.

I guess not bad, but not helping much (yet).

-- james



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-03 23:13 ` James Smart
@ 2023-03-07 12:41   ` Daniel Wagner
  2023-03-07 23:55     ` Chaitanya Kulkarni
  0 siblings, 1 reply; 17+ messages in thread
From: Daniel Wagner @ 2023-03-07 12:41 UTC (permalink / raw)
  To: James Smart; +Cc: linux-nvme

On Fri, Mar 03, 2023 at 03:13:53PM -0800, James Smart wrote:
> Initial reaction:
> The resulting nvmf_configure_admin_queue() routine does look nice. However,
>> I'm not sure the callback chain is all that easier to follow.

v1 is a bit too simple as I only tried to get nvmf_configure_admin_queue()
sorted out.

> I guess I'm most disappointed that more of the logic isn't actually being
> commonized.

v2 is moving the whole state machine which makes more sense.

> Examples:
> alloc_queue():
>   At least pass queue_size as an arg. I assume we should to match up
>   with the fabric connect routine.

I'll look into that.

> init_queue():
>   I'd like to see setting of ctrl.max_segments/hw_sectors in the common
>   code. Unfortunately, likely needs to be a fops call. At least it
>   makes sure its done at the same point in controller creation.

Okay, I suspect it needs a few iterations until we sorted all these details out.

> start_queue():
>   can't common code do the nvmf_connect_admin_queue() call ?
>      If its too tied to the LIVE bit - then perhaps the queue LIVE
>      bits should actually be getting set by the common code after
>      the successful start_queue - or has routines transport can call
>      from start/stop to set/clear it.
>   If LIVE bit can be dealt with - this whole block in rdma/tcp moves to
>   common code.

Good point, the LIVE bit is currently transport specific which it dosen't need
to be.

What would be the best strategy with this kind of change for review? Should I
add stuff ontop v2 or should I try to add the common code first and then migrate
the transports over? The current approach with moving and updating in the same
time is obviously a bit hard to review (and has a good chance to introduce
regressions).

> Also I'd really like to get rid of the "new" argument everywhere. It should
> just be a state flag set on the common ctrl.

Okay.

> I guess not bad, but not helping much (yet).

I posted a very early version to discuss the approach and get all parties to
agree on so it has a chance to get accepted.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-07 12:41   ` Daniel Wagner
@ 2023-03-07 23:55     ` Chaitanya Kulkarni
  2023-03-08  8:33       ` Daniel Wagner
  0 siblings, 1 reply; 17+ messages in thread
From: Chaitanya Kulkarni @ 2023-03-07 23:55 UTC (permalink / raw)
  To: Daniel Wagner, James Smart; +Cc: linux-nvme@lists.infradead.org


>> start_queue():
>>    can't common code do the nvmf_connect_admin_queue() call ?
>>       If its too tied to the LIVE bit - then perhaps the queue LIVE
>>       bits should actually be getting set by the common code after
>>       the successful start_queue - or has routines transport can call
>>       from start/stop to set/clear it.
>>    If LIVE bit can be dealt with - this whole block in rdma/tcp moves to
>>    common code.
> Good point, the LIVE bit is currently transport specific which it dosen't need
> to be.
>
> What would be the best strategy with this kind of change for review? Should I
> add stuff ontop v2 or should I try to add the common code first and then migrate
> the transports over? The current approach with moving and updating in the same
> time is obviously a bit hard to review (and has a good chance to introduce
> regressions).

please avoid it moving and updating at the same time..

I'd first do a prep patches and move the bits/code around
and establish the stability of code then only I'd make the
actual changes for unification...

-ck



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-07 23:55     ` Chaitanya Kulkarni
@ 2023-03-08  8:33       ` Daniel Wagner
  0 siblings, 0 replies; 17+ messages in thread
From: Daniel Wagner @ 2023-03-08  8:33 UTC (permalink / raw)
  To: Chaitanya Kulkarni; +Cc: James Smart, linux-nvme@lists.infradead.org

On Tue, Mar 07, 2023 at 11:55:52PM +0000, Chaitanya Kulkarni wrote:
> please avoid it moving and updating at the same time..
> 
> I'd first do a prep patches and move the bits/code around
> and establish the stability of code then only I'd make the
> actual changes for unification...

Sounds like a plan. Thanks!


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-01  8:27 [RFC v1 0/3] Unifying fabrics drivers Daniel Wagner
                   ` (4 preceding siblings ...)
  2023-03-03 23:13 ` James Smart
@ 2023-03-07  9:26 ` Sagi Grimberg
  2023-03-07 12:28   ` Daniel Wagner
  5 siblings, 1 reply; 17+ messages in thread
From: Sagi Grimberg @ 2023-03-07  9:26 UTC (permalink / raw)
  To: Daniel Wagner, linux-nvme


> The two fabrics rdma and tcp share a lot of common code. This here is my attempt
> to consolidate the common code.
> 
> I've picked just one function (setup admin queue) for this RFC to get a feeling
> and feedback if this is a valid approach or if people hate it. I've left out fc
> for the time being because it differs too much two the other two drivers.
> 
> I've tested quickly tcp, rdma is only compile tested.

I think we should make all transports to unify setup/teardown sequences 
(i.e. including pcie and fc). Otherwise we are not gaining much.

It will help if we make the ops higher level like

ops.setup_transport(ctrl)
ops.alloc_admin_queue(ctrl)
ops.start_admin_queue(ctrl)
ops.stop_admin_queue(ctrl)
ops.free_admin_queue(ctrl)
ops.alloc_io_queues(ctrl)
ops.start_io_queues(ctrl)
ops.stop_io_queues(ctrl)
ops.free_io_queues(ctrl)

The init/deinit can be folded to alloc/free I think.

This is indeed a much larger effort, but I don't know what
this unification of rdma/tcp buys us really...


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-07  9:26 ` Sagi Grimberg
@ 2023-03-07 12:28   ` Daniel Wagner
  2023-03-07 12:34     ` Sagi Grimberg
  2023-03-07 22:09     ` James Smart
  0 siblings, 2 replies; 17+ messages in thread
From: Daniel Wagner @ 2023-03-07 12:28 UTC (permalink / raw)
  To: Sagi Grimberg; +Cc: linux-nvme

On Tue, Mar 07, 2023 at 11:26:12AM +0200, Sagi Grimberg wrote:
> I think we should make all transports to unify setup/teardown sequences
> (i.e. including pcie and fc). Otherwise we are not gaining much.

Not sure about pcie but I haven't really looked at it. The state machine is
really handling all the fabrics specific queue handling.

Ideally, fc would use the same state machine. Though the current code base is
differs quiet significantly but I can try to convert it too.

> It will help if we make the ops higher level like
> 
> ops.setup_transport(ctrl)
> ops.alloc_admin_queue(ctrl)
> ops.start_admin_queue(ctrl)
> ops.stop_admin_queue(ctrl)
> ops.free_admin_queue(ctrl)
> ops.alloc_io_queues(ctrl)
> ops.start_io_queues(ctrl)
> ops.stop_io_queues(ctrl)
> ops.free_io_queues(ctrl)

This is more ore less what v2 is.

> The init/deinit can be folded to alloc/free I think.

Yes, that would a good thing. In my first attempt I tried to keep things more a
less identically with the existing code, just using the callbacks for the
transport specific bits.

> This is indeed a much larger effort, but I don't know what
> this unification of rdma/tcp buys us really...

My motiviation is that we are keep fixing the same bugs in rdma and tcp
transport since a while. The state machine code is almost identically so why not
trying to reduce the code duplication?

(*) There are a few small things which are not the same.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-07 12:28   ` Daniel Wagner
@ 2023-03-07 12:34     ` Sagi Grimberg
  2023-03-07 22:09       ` James Smart
  2023-03-07 22:09     ` James Smart
  1 sibling, 1 reply; 17+ messages in thread
From: Sagi Grimberg @ 2023-03-07 12:34 UTC (permalink / raw)
  To: Daniel Wagner; +Cc: linux-nvme



On 3/7/23 14:28, Daniel Wagner wrote:
> On Tue, Mar 07, 2023 at 11:26:12AM +0200, Sagi Grimberg wrote:
>> I think we should make all transports to unify setup/teardown sequences
>> (i.e. including pcie and fc). Otherwise we are not gaining much.
> 
> Not sure about pcie but I haven't really looked at it. The state machine is
> really handling all the fabrics specific queue handling.
> 
> Ideally, fc would use the same state machine. Though the current code base is
> differs quiet significantly but I can try to convert it too.
> 
>> It will help if we make the ops higher level like
>>
>> ops.setup_transport(ctrl)
>> ops.alloc_admin_queue(ctrl)
>> ops.start_admin_queue(ctrl)
>> ops.stop_admin_queue(ctrl)
>> ops.free_admin_queue(ctrl)
>> ops.alloc_io_queues(ctrl)
>> ops.start_io_queues(ctrl)
>> ops.stop_io_queues(ctrl)
>> ops.free_io_queues(ctrl)
> 
> This is more ore less what v2 is.
> 
>> The init/deinit can be folded to alloc/free I think.
> 
> Yes, that would a good thing. In my first attempt I tried to keep things more a
> less identically with the existing code, just using the callbacks for the
> transport specific bits.
> 
>> This is indeed a much larger effort, but I don't know what
>> this unification of rdma/tcp buys us really...
> 
> My motiviation is that we are keep fixing the same bugs in rdma and tcp
> transport since a while. The state machine code is almost identically so why not
> trying to reduce the code duplication?

I'm not saying that we shouldn't, I'm saying that we should consolidate
with all transports (rdma/fc/tcp/loop and ideally also pcie which is
closer to fabrics than what it used to be).


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-07 12:34     ` Sagi Grimberg
@ 2023-03-07 22:09       ` James Smart
  0 siblings, 0 replies; 17+ messages in thread
From: James Smart @ 2023-03-07 22:09 UTC (permalink / raw)
  To: Sagi Grimberg, Daniel Wagner; +Cc: linux-nvme

On 3/7/2023 4:34 AM, Sagi Grimberg wrote:
> 
> I'm not saying that we shouldn't, I'm saying that we should consolidate
> with all transports (rdma/fc/tcp/loop and ideally also pcie which is
> closer to fabrics than what it used to be).
> 

I have this same sentiment.

-- james



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-07 12:28   ` Daniel Wagner
  2023-03-07 12:34     ` Sagi Grimberg
@ 2023-03-07 22:09     ` James Smart
  2023-03-08 11:38       ` Sagi Grimberg
  1 sibling, 1 reply; 17+ messages in thread
From: James Smart @ 2023-03-07 22:09 UTC (permalink / raw)
  To: Daniel Wagner, Sagi Grimberg; +Cc: linux-nvme

On 3/7/2023 4:28 AM, Daniel Wagner wrote:
> On Tue, Mar 07, 2023 at 11:26:12AM +0200, Sagi Grimberg wrote:
>> I think we should make all transports to unify setup/teardown sequences
>> (i.e. including pcie and fc). Otherwise we are not gaining much.
> 
> Not sure about pcie but I haven't really looked at it. The state machine is
> really handling all the fabrics specific queue handling.
> 
> Ideally, fc would use the same state machine. Though the current code base is
> differs quiet significantly but I can try to convert it too.

I'll certainly help for FC.

The 2 main hurdles are our differences around:
a) the new flag - which I replaced in the fc calling setup and a state 
flag. Shouldn't be much of an issue. We just need to look at when/where 
the tagsets are allocated (and embedding into admin queue setup isn't 
necessarily best).

b) the init_ctrl - I don't have the initial connect inline to the 
init_ctrl call - I push it out to the normal "reconnect" path so that 
everything, initial connect and all reconnects, use the same 
routine/work element. Also says the transport will retry the initial 
connect if there's a failure on the initial attempt. To keep the app 
happy, init_ctrl will wait for the 1st connect attempt to finish before 
returning.   Don't know how rdma/tcp feels about it, but I found it a 
much better solution and cleanup became cleaner.

> 
>> It will help if we make the ops higher level like
>>
>> ops.setup_transport(ctrl)
>> ops.alloc_admin_queue(ctrl)
>> ops.start_admin_queue(ctrl)
>> ops.stop_admin_queue(ctrl)
>> ops.free_admin_queue(ctrl)
>> ops.alloc_io_queues(ctrl)
>> ops.start_io_queues(ctrl)
>> ops.stop_io_queues(ctrl)
>> ops.free_io_queues(ctrl)
> 
> This is more ore less what v2 is.
> 
>> The init/deinit can be folded to alloc/free I think.
> 
> Yes, that would a good thing. In my first attempt I tried to keep things more a
> less identically with the existing code, just using the callbacks for the
> transport specific bits.
> 
>> This is indeed a much larger effort, but I don't know what
>> this unification of rdma/tcp buys us really...
> 
> My motiviation is that we are keep fixing the same bugs in rdma and tcp
> transport since a while. The state machine code is almost identically so why not
> trying to reduce the code duplication?
> 
> (*) There are a few small things which are not the same.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-07 22:09     ` James Smart
@ 2023-03-08 11:38       ` Sagi Grimberg
  2023-03-08 15:13         ` James Smart
  0 siblings, 1 reply; 17+ messages in thread
From: Sagi Grimberg @ 2023-03-08 11:38 UTC (permalink / raw)
  To: James Smart, Daniel Wagner; +Cc: linux-nvme



On 3/8/23 00:09, James Smart wrote:
> On 3/7/2023 4:28 AM, Daniel Wagner wrote:
>> On Tue, Mar 07, 2023 at 11:26:12AM +0200, Sagi Grimberg wrote:
>>> I think we should make all transports to unify setup/teardown sequences
>>> (i.e. including pcie and fc). Otherwise we are not gaining much.
>>
>> Not sure about pcie but I haven't really looked at it. The state 
>> machine is
>> really handling all the fabrics specific queue handling.
>>
>> Ideally, fc would use the same state machine. Though the current code 
>> base is
>> differs quiet significantly but I can try to convert it too.
> 
> I'll certainly help for FC.
> 
> The 2 main hurdles are our differences around:
> a) the new flag - which I replaced in the fc calling setup and a state 
> flag. Shouldn't be much of an issue. We just need to look at when/where 
> the tagsets are allocated (and embedding into admin queue setup isn't 
> necessarily best).

Does fc create an I/O tagset on every reconnect? Doesn't appear
that it is removed on every disconnect... I'm a bit confused.

> b) the init_ctrl - I don't have the initial connect inline to the 
> init_ctrl call - I push it out to the normal "reconnect" path so that 
> everything, initial connect and all reconnects, use the same 
> routine/work element. Also says the transport will retry the initial 
> connect if there's a failure on the initial attempt. To keep the app 
> happy, init_ctrl will wait for the 1st connect attempt to finish before 
> returning.   Don't know how rdma/tcp feels about it, but I found it a 
> much better solution and cleanup became cleaner.

Don't see a major difference, I personally find the initial setup
different than a reconnect so its simpler to do them separately.

Can these two differences change for unification of the transports?

I would also like to get rid of the new flag, and just break these
sequences to different routines.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC v1 0/3] Unifying fabrics drivers
  2023-03-08 11:38       ` Sagi Grimberg
@ 2023-03-08 15:13         ` James Smart
  0 siblings, 0 replies; 17+ messages in thread
From: James Smart @ 2023-03-08 15:13 UTC (permalink / raw)
  To: Sagi Grimberg, Daniel Wagner; +Cc: linux-nvme

On 3/8/2023 3:38 AM, Sagi Grimberg wrote:
> 
> 
> On 3/8/23 00:09, James Smart wrote:
>> On 3/7/2023 4:28 AM, Daniel Wagner wrote:
>>> On Tue, Mar 07, 2023 at 11:26:12AM +0200, Sagi Grimberg wrote:
>>>> I think we should make all transports to unify setup/teardown sequences
>>>> (i.e. including pcie and fc). Otherwise we are not gaining much.
>>>
>>> Not sure about pcie but I haven't really looked at it. The state 
>>> machine is
>>> really handling all the fabrics specific queue handling.
>>>
>>> Ideally, fc would use the same state machine. Though the current code 
>>> base is
>>> differs quiet significantly but I can try to convert it too.
>>
>> I'll certainly help for FC.
>>
>> The 2 main hurdles are our differences around:
>> a) the new flag - which I replaced in the fc calling setup and a state 
>> flag. Shouldn't be much of an issue. We just need to look at 
>> when/where the tagsets are allocated (and embedding into admin queue 
>> setup isn't necessarily best).
> 
> Does fc create an I/O tagset on every reconnect? Doesn't appear
> that it is removed on every disconnect... I'm a bit confused.

No. It maintains a flag (ioq_live) that is set after first time it 
creates the io queues. This flag replaces the "new" argument. In 
controller connect, after admin/init/aen_init, it uses the flag to
select either a routine that either initially sizes and creates the io 
tag_set or a routine that resizes and blk_mq_update's the io tag set.

fc should have the same functionality as rdma/tcp. the calling sequence 
is just a little different.

> 
>> b) the init_ctrl - I don't have the initial connect inline to the 
>> init_ctrl call - I push it out to the normal "reconnect" path so that 
>> everything, initial connect and all reconnects, use the same 
>> routine/work element. Also says the transport will retry the initial 
>> connect if there's a failure on the initial attempt. To keep the app 
>> happy, init_ctrl will wait for the 1st connect attempt to finish 
>> before returning.   Don't know how rdma/tcp feels about it, but I 
>> found it a much better solution and cleanup became cleaner.
> 
> Don't see a major difference, I personally find the initial setup
> different than a reconnect so its simpler to do them separately.
> 
> Can these two differences change for unification of the transports?

Will I change them to match rdma/tcp ?

The "new" flag isn't a big deal, but I prefer not passing a flag around. 
It was a straight-forward change.

The initial connect - no, I won't change fc. It originally was the same 
as rdma/tcp. But the separate the init create path was causing headaches 
with immediate/parallel errors and reconnect. The code ended up being 
duplicate and competing and it was affecting the way ref-counting was 
being done. It became much cleaner when it was reorganized to reuse the 
existing reconnect path.  Took a lot of time and testing to reach where 
it is.

I don't know how much time/testing has gone into rdma/tcp for injected 
errors, or immediately connectivity loss. FC also can't just stop if the 
initial connect fails (e.g. one of first fabric commands fails due to 
injected error). As everything is automated via sysadm and driven by 
events from host adapters, if we stopped after 1 connect attempt the 
storage wouldn't show up. Having the reconnect path exist even for 
initial connect was beneficial.

However, there's a lot of commonality we can address outside of this.

> 
> I would also like to get rid of the new flag, and just break these
> sequences to different routines.

-- james

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2023-03-08 15:13 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-03-01  8:27 [RFC v1 0/3] Unifying fabrics drivers Daniel Wagner
2023-03-01  8:27 ` [RFC v1 1/3] nvme-rdma: stream line queue functions arguments Daniel Wagner
2023-03-01  8:27 ` [RFC v1 2/3] nvme-rdma: factor rdma specific queue init code out Daniel Wagner
2023-03-01  8:27 ` [RFC v1 3/3] nvme-fabrics: move configure admin queue code to fabrics.c Daniel Wagner
2023-03-02  3:02 ` [RFC v1 0/3] Unifying fabrics drivers Chaitanya Kulkarni
2023-03-02  8:15   ` Daniel Wagner
2023-03-03 23:13 ` James Smart
2023-03-07 12:41   ` Daniel Wagner
2023-03-07 23:55     ` Chaitanya Kulkarni
2023-03-08  8:33       ` Daniel Wagner
2023-03-07  9:26 ` Sagi Grimberg
2023-03-07 12:28   ` Daniel Wagner
2023-03-07 12:34     ` Sagi Grimberg
2023-03-07 22:09       ` James Smart
2023-03-07 22:09     ` James Smart
2023-03-08 11:38       ` Sagi Grimberg
2023-03-08 15:13         ` James Smart

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).