linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH for-next v2 0/2] RDMA/erdma: Introduce custom implementation of drain_sq and drain_rq
@ 2022-08-25  2:39 Cheng Xu
  2022-08-25  2:39 ` [PATCH for-next v2 1/2] RDMA/erdma: Introduce internal post_send/post_recv for qp drain Cheng Xu
  2022-08-25  2:39 ` [PATCH for-next v2 2/2] RDMA/erdma: Add drain_sq and drain_rq support Cheng Xu
  0 siblings, 2 replies; 3+ messages in thread
From: Cheng Xu @ 2022-08-25  2:39 UTC (permalink / raw)
  To: jgg, leon; +Cc: linux-rdma, KaiShen

Hi,

This series introduces erdma's implementation of drain_sq and drain_rq.
Our hardware will stop processing any new WRs if QP state is error.
So the default __ib_drain_sq and __ib_drain_rq in core code can not work
for erdma. For this reason, we implement the drain_sq and drain_rq
interfaces.

In SQ draining or RQ draining, we post both drain send wr and drain
recv wr, and then modify_qp to error. At last, we wait the corresponding
completion in the separated interface.

The first patch introduces internal post_send/post_recv for qp drain, and
the second patch implements the drain_sq and drain_rq of erdma.

Changes since V1:
* Add drain_rq/drain_sq assignments in struct ib_device_ops of erdma.

Thanks,
Cheng Xu

Cheng Xu (2):
  RDMA/erdma: Introduce internal post_send/post_recv for qp drain
  RDMA/erdma: Add drain_sq and drain_rq support

 drivers/infiniband/hw/erdma/erdma_main.c  |   6 +-
 drivers/infiniband/hw/erdma/erdma_qp.c    | 116 +++++++++++++++++++++-
 drivers/infiniband/hw/erdma/erdma_verbs.h |  27 ++++-
 3 files changed, 138 insertions(+), 11 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH for-next v2 1/2] RDMA/erdma: Introduce internal post_send/post_recv for qp drain
  2022-08-25  2:39 [PATCH for-next v2 0/2] RDMA/erdma: Introduce custom implementation of drain_sq and drain_rq Cheng Xu
@ 2022-08-25  2:39 ` Cheng Xu
  2022-08-25  2:39 ` [PATCH for-next v2 2/2] RDMA/erdma: Add drain_sq and drain_rq support Cheng Xu
  1 sibling, 0 replies; 3+ messages in thread
From: Cheng Xu @ 2022-08-25  2:39 UTC (permalink / raw)
  To: jgg, leon; +Cc: linux-rdma, KaiShen

For erdma, hardware won't process newly posted send WRs or recv WRs
after QP state changed to error, and no flush cqes will generated
for them. So, internal post_send and post_recv functions are introduced
to prevent the new send WRs or recv WRs.

Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
---
 drivers/infiniband/hw/erdma/erdma_main.c  |  4 +-
 drivers/infiniband/hw/erdma/erdma_qp.c    | 45 ++++++++++++++++++++---
 drivers/infiniband/hw/erdma/erdma_verbs.h | 17 +++++++--
 3 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 07e743d24847..4921ebc1286d 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -460,8 +460,8 @@ static const struct ib_device_ops erdma_device_ops = {
 	.mmap = erdma_mmap,
 	.mmap_free = erdma_mmap_free,
 	.modify_qp = erdma_modify_qp,
-	.post_recv = erdma_post_recv,
-	.post_send = erdma_post_send,
+	.post_recv = erdma_post_recv_nodrain,
+	.post_send = erdma_post_send_nodrain,
 	.poll_cq = erdma_poll_cq,
 	.query_device = erdma_query_device,
 	.query_gid = erdma_query_gid,
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index bc3ec22a62c5..abf8b134d076 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -475,8 +475,8 @@ static void kick_sq_db(struct erdma_qp *qp, u16 pi)
 	writeq(db_data, qp->kern_qp.hw_sq_db);
 }
 
-int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
-		    const struct ib_send_wr **bad_send_wr)
+static int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
+			   const struct ib_send_wr **bad_send_wr, bool drain)
 {
 	struct erdma_qp *qp = to_eqp(ibqp);
 	int ret = 0;
@@ -488,6 +488,16 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
 		return -EINVAL;
 
 	spin_lock_irqsave(&qp->lock, flags);
+
+	if (unlikely(qp->flags & ERDMA_QP_FLAGS_TX_STOPPED)) {
+		*bad_send_wr = send_wr;
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (unlikely(drain))
+		qp->flags |= ERDMA_QP_FLAGS_TX_STOPPED;
+
 	sq_pi = qp->kern_qp.sq_pi;
 
 	while (wr) {
@@ -507,11 +517,19 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
 
 		wr = wr->next;
 	}
-	spin_unlock_irqrestore(&qp->lock, flags);
 
+out:
+	spin_unlock_irqrestore(&qp->lock, flags);
 	return ret;
 }
 
+int erdma_post_send_nodrain(struct ib_qp *ibqp,
+			    const struct ib_send_wr *send_wr,
+			    const struct ib_send_wr **bad_send_wr)
+{
+	return erdma_post_send(ibqp, send_wr, bad_send_wr, false);
+}
+
 static int erdma_post_recv_one(struct erdma_qp *qp,
 			       const struct ib_recv_wr *recv_wr)
 {
@@ -542,8 +560,8 @@ static int erdma_post_recv_one(struct erdma_qp *qp,
 	return 0;
 }
 
-int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
-		    const struct ib_recv_wr **bad_recv_wr)
+static int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
+			   const struct ib_recv_wr **bad_recv_wr, bool drain)
 {
 	const struct ib_recv_wr *wr = recv_wr;
 	struct erdma_qp *qp = to_eqp(ibqp);
@@ -552,6 +570,15 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
 
 	spin_lock_irqsave(&qp->lock, flags);
 
+	if (unlikely(qp->flags & ERDMA_QP_FLAGS_RX_STOPPED)) {
+		ret = -EINVAL;
+		*bad_recv_wr = recv_wr;
+		goto out;
+	}
+
+	if (unlikely(drain))
+		qp->flags |= ERDMA_QP_FLAGS_RX_STOPPED;
+
 	while (wr) {
 		ret = erdma_post_recv_one(qp, wr);
 		if (ret) {
@@ -561,6 +588,14 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
 		wr = wr->next;
 	}
 
+out:
 	spin_unlock_irqrestore(&qp->lock, flags);
 	return ret;
 }
+
+int erdma_post_recv_nodrain(struct ib_qp *ibqp,
+			    const struct ib_recv_wr *recv_wr,
+			    const struct ib_recv_wr **bad_recv_wr)
+{
+	return erdma_post_recv(ibqp, recv_wr, bad_recv_wr, false);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index c7baddb1f292..f4148fbac878 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -195,6 +195,12 @@ struct erdma_qp_attrs {
 	u8 pd_len;
 };
 
+enum erdma_qp_flags {
+	ERDMA_QP_FLAGS_DRAIN_ISSUED = (1 << 0),
+	ERDMA_QP_FLAGS_TX_STOPPED = (1 << 1),
+	ERDMA_QP_FLAGS_RX_STOPPED = (1 << 2),
+};
+
 struct erdma_qp {
 	struct ib_qp ibqp;
 	struct kref ref;
@@ -202,6 +208,7 @@ struct erdma_qp {
 	struct erdma_dev *dev;
 	struct erdma_cep *cep;
 	struct rw_semaphore state_lock;
+	unsigned long flags;
 
 	union {
 		struct erdma_kqp kern_qp;
@@ -328,10 +335,12 @@ void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
 void erdma_qp_get_ref(struct ib_qp *ibqp);
 void erdma_qp_put_ref(struct ib_qp *ibqp);
 struct ib_qp *erdma_get_ibqp(struct ib_device *dev, int id);
-int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
-		    const struct ib_send_wr **bad_send_wr);
-int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
-		    const struct ib_recv_wr **bad_recv_wr);
+int erdma_post_send_nodrain(struct ib_qp *ibqp,
+			    const struct ib_send_wr *send_wr,
+			    const struct ib_send_wr **bad_send_wr);
+int erdma_post_recv_nodrain(struct ib_qp *ibqp,
+			    const struct ib_recv_wr *recv_wr,
+			    const struct ib_recv_wr **bad_recv_wr);
 int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 				u32 max_num_sg);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH for-next v2 2/2] RDMA/erdma: Add drain_sq and drain_rq support
  2022-08-25  2:39 [PATCH for-next v2 0/2] RDMA/erdma: Introduce custom implementation of drain_sq and drain_rq Cheng Xu
  2022-08-25  2:39 ` [PATCH for-next v2 1/2] RDMA/erdma: Introduce internal post_send/post_recv for qp drain Cheng Xu
@ 2022-08-25  2:39 ` Cheng Xu
  1 sibling, 0 replies; 3+ messages in thread
From: Cheng Xu @ 2022-08-25  2:39 UTC (permalink / raw)
  To: jgg, leon; +Cc: linux-rdma, KaiShen

For erdma, hardware won't process any WRs after modifying QP state to
error, so the default __ib_drain_sq and __ib_drain_rq can not work for
erdma device. Here, we introduce custom implementation of drain_sq and
drain_rq interface to fit erdma hardware.

Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
---
v1 -> v2:
 - Add drain_rq/drain_sq assignments in struct ib_device_ops of erdma.
---
 drivers/infiniband/hw/erdma/erdma_main.c  |  2 +
 drivers/infiniband/hw/erdma/erdma_qp.c    | 71 +++++++++++++++++++++++
 drivers/infiniband/hw/erdma/erdma_verbs.h | 10 ++++
 3 files changed, 83 insertions(+)

diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 4921ebc1286d..e4ce77607f10 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -446,6 +446,8 @@ static const struct ib_device_ops erdma_device_ops = {
 	.dereg_mr = erdma_dereg_mr,
 	.destroy_cq = erdma_destroy_cq,
 	.destroy_qp = erdma_destroy_qp,
+	.drain_rq = erdma_drain_rq,
+	.drain_sq = erdma_drain_sq,
 	.get_dma_mr = erdma_get_dma_mr,
 	.get_port_immutable = erdma_get_port_immutable,
 	.iw_accept = erdma_accept,
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index abf8b134d076..57fdb946fbfd 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -599,3 +599,74 @@ int erdma_post_recv_nodrain(struct ib_qp *ibqp,
 {
 	return erdma_post_recv(ibqp, recv_wr, bad_recv_wr, false);
 }
+
+static void erdma_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct erdma_drain_cqe *cqe =
+		container_of(wc->wr_cqe, struct erdma_drain_cqe, cqe);
+
+	complete(&cqe->done);
+}
+
+static void erdma_drain_qp_common(struct ib_qp *ibqp, struct completion *comp,
+				  struct ib_cq *ibcq)
+{
+	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+	struct erdma_qp *qp = to_eqp(ibqp);
+	const struct ib_send_wr *bad_swr;
+	const struct ib_recv_wr *bad_rwr;
+	struct ib_rdma_wr swr = {
+		.wr = {
+			.next = NULL,
+			{ .wr_cqe   = &qp->kern_qp.sdrain.cqe, },
+			.opcode = IB_WR_RDMA_WRITE,
+			.send_flags = IB_SEND_SIGNALED,
+		},
+	};
+	struct ib_recv_wr rwr = {
+		.next = NULL,
+		.wr_cqe = &qp->kern_qp.rdrain.cqe,
+		.num_sge = 0,
+	};
+
+	if (qp->flags & ERDMA_QP_FLAGS_DRAIN_ISSUED)
+		goto wait_for_completion;
+
+	qp->flags |= ERDMA_QP_FLAGS_DRAIN_ISSUED;
+
+	qp->kern_qp.rdrain.cqe.done = erdma_drain_qp_done;
+	init_completion(&qp->kern_qp.rdrain.done);
+
+	qp->kern_qp.sdrain.cqe.done = erdma_drain_qp_done;
+	init_completion(&qp->kern_qp.sdrain.done);
+
+	if (erdma_post_recv(ibqp, &rwr, &bad_rwr, true))
+		return;
+
+	if (erdma_post_send(ibqp, &swr.wr, &bad_swr, true))
+		return;
+
+	if (ib_modify_qp(ibqp, &attr, IB_QP_STATE))
+		return;
+
+wait_for_completion:
+	if (ibcq->poll_ctx == IB_POLL_DIRECT)
+		while (wait_for_completion_timeout(comp, HZ / 10) <= 0)
+			ib_process_cq_direct(ibcq, -1);
+	else
+		wait_for_completion(comp);
+}
+
+void erdma_drain_sq(struct ib_qp *ibqp)
+{
+	struct erdma_qp *qp = to_eqp(ibqp);
+
+	erdma_drain_qp_common(ibqp, &qp->kern_qp.sdrain.done, ibqp->send_cq);
+}
+
+void erdma_drain_rq(struct ib_qp *ibqp)
+{
+	struct erdma_qp *qp = to_eqp(ibqp);
+
+	erdma_drain_qp_common(ibqp, &qp->kern_qp.rdrain.done, ibqp->recv_cq);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index f4148fbac878..4cec92c8a737 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -133,6 +133,11 @@ struct erdma_uqp {
 	u32 rq_offset;
 };
 
+struct erdma_drain_cqe {
+	struct ib_cqe cqe;
+	struct completion done;
+};
+
 struct erdma_kqp {
 	u16 sq_pi;
 	u16 sq_ci;
@@ -155,6 +160,9 @@ struct erdma_kqp {
 	void *sq_db_info;
 	void *rq_db_info;
 
+	struct erdma_drain_cqe sdrain;
+	struct erdma_drain_cqe rdrain;
+
 	u8 sig_all;
 };
 
@@ -341,6 +349,8 @@ int erdma_post_send_nodrain(struct ib_qp *ibqp,
 int erdma_post_recv_nodrain(struct ib_qp *ibqp,
 			    const struct ib_recv_wr *recv_wr,
 			    const struct ib_recv_wr **bad_recv_wr);
+void erdma_drain_sq(struct ib_qp *ibqp);
+void erdma_drain_rq(struct ib_qp *ibqp);
 int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 				u32 max_num_sg);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-08-25  2:39 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-08-25  2:39 [PATCH for-next v2 0/2] RDMA/erdma: Introduce custom implementation of drain_sq and drain_rq Cheng Xu
2022-08-25  2:39 ` [PATCH for-next v2 1/2] RDMA/erdma: Introduce internal post_send/post_recv for qp drain Cheng Xu
2022-08-25  2:39 ` [PATCH for-next v2 2/2] RDMA/erdma: Add drain_sq and drain_rq support Cheng Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).