* [PATCH for-next v2 0/2] RDMA/erdma: Introduce custom implementation of drain_sq and drain_rq
@ 2022-08-25 2:39 Cheng Xu
2022-08-25 2:39 ` [PATCH for-next v2 1/2] RDMA/erdma: Introduce internal post_send/post_recv for qp drain Cheng Xu
2022-08-25 2:39 ` [PATCH for-next v2 2/2] RDMA/erdma: Add drain_sq and drain_rq support Cheng Xu
0 siblings, 2 replies; 3+ messages in thread
From: Cheng Xu @ 2022-08-25 2:39 UTC (permalink / raw)
To: jgg, leon; +Cc: linux-rdma, KaiShen
Hi,
This series introduces erdma's implementation of drain_sq and drain_rq.
Our hardware will stop processing any new WRs if QP state is error.
So the default __ib_drain_sq and __ib_drain_rq in core code can not work
for erdma. For this reason, we implement the drain_sq and drain_rq
interfaces.
In SQ draining or RQ draining, we post both drain send wr and drain
recv wr, and then modify_qp to error. At last, we wait the corresponding
completion in the separated interface.
The first patch introduces internal post_send/post_recv for qp drain, and
the second patch implements the drain_sq and drain_rq of erdma.
Changes since V1:
* Add drain_rq/drain_sq assignments in struct ib_device_ops of erdma.
Thanks,
Cheng Xu
Cheng Xu (2):
RDMA/erdma: Introduce internal post_send/post_recv for qp drain
RDMA/erdma: Add drain_sq and drain_rq support
drivers/infiniband/hw/erdma/erdma_main.c | 6 +-
drivers/infiniband/hw/erdma/erdma_qp.c | 116 +++++++++++++++++++++-
drivers/infiniband/hw/erdma/erdma_verbs.h | 27 ++++-
3 files changed, 138 insertions(+), 11 deletions(-)
--
2.27.0
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH for-next v2 1/2] RDMA/erdma: Introduce internal post_send/post_recv for qp drain
2022-08-25 2:39 [PATCH for-next v2 0/2] RDMA/erdma: Introduce custom implementation of drain_sq and drain_rq Cheng Xu
@ 2022-08-25 2:39 ` Cheng Xu
2022-08-25 2:39 ` [PATCH for-next v2 2/2] RDMA/erdma: Add drain_sq and drain_rq support Cheng Xu
1 sibling, 0 replies; 3+ messages in thread
From: Cheng Xu @ 2022-08-25 2:39 UTC (permalink / raw)
To: jgg, leon; +Cc: linux-rdma, KaiShen
For erdma, hardware won't process newly posted send WRs or recv WRs
after QP state changed to error, and no flush cqes will generated
for them. So, internal post_send and post_recv functions are introduced
to prevent the new send WRs or recv WRs.
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
---
drivers/infiniband/hw/erdma/erdma_main.c | 4 +-
drivers/infiniband/hw/erdma/erdma_qp.c | 45 ++++++++++++++++++++---
drivers/infiniband/hw/erdma/erdma_verbs.h | 17 +++++++--
3 files changed, 55 insertions(+), 11 deletions(-)
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 07e743d24847..4921ebc1286d 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -460,8 +460,8 @@ static const struct ib_device_ops erdma_device_ops = {
.mmap = erdma_mmap,
.mmap_free = erdma_mmap_free,
.modify_qp = erdma_modify_qp,
- .post_recv = erdma_post_recv,
- .post_send = erdma_post_send,
+ .post_recv = erdma_post_recv_nodrain,
+ .post_send = erdma_post_send_nodrain,
.poll_cq = erdma_poll_cq,
.query_device = erdma_query_device,
.query_gid = erdma_query_gid,
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index bc3ec22a62c5..abf8b134d076 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -475,8 +475,8 @@ static void kick_sq_db(struct erdma_qp *qp, u16 pi)
writeq(db_data, qp->kern_qp.hw_sq_db);
}
-int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
- const struct ib_send_wr **bad_send_wr)
+static int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr, bool drain)
{
struct erdma_qp *qp = to_eqp(ibqp);
int ret = 0;
@@ -488,6 +488,16 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
return -EINVAL;
spin_lock_irqsave(&qp->lock, flags);
+
+ if (unlikely(qp->flags & ERDMA_QP_FLAGS_TX_STOPPED)) {
+ *bad_send_wr = send_wr;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (unlikely(drain))
+ qp->flags |= ERDMA_QP_FLAGS_TX_STOPPED;
+
sq_pi = qp->kern_qp.sq_pi;
while (wr) {
@@ -507,11 +517,19 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
wr = wr->next;
}
- spin_unlock_irqrestore(&qp->lock, flags);
+out:
+ spin_unlock_irqrestore(&qp->lock, flags);
return ret;
}
+int erdma_post_send_nodrain(struct ib_qp *ibqp,
+ const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr)
+{
+ return erdma_post_send(ibqp, send_wr, bad_send_wr, false);
+}
+
static int erdma_post_recv_one(struct erdma_qp *qp,
const struct ib_recv_wr *recv_wr)
{
@@ -542,8 +560,8 @@ static int erdma_post_recv_one(struct erdma_qp *qp,
return 0;
}
-int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
- const struct ib_recv_wr **bad_recv_wr)
+static int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr, bool drain)
{
const struct ib_recv_wr *wr = recv_wr;
struct erdma_qp *qp = to_eqp(ibqp);
@@ -552,6 +570,15 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
spin_lock_irqsave(&qp->lock, flags);
+ if (unlikely(qp->flags & ERDMA_QP_FLAGS_RX_STOPPED)) {
+ ret = -EINVAL;
+ *bad_recv_wr = recv_wr;
+ goto out;
+ }
+
+ if (unlikely(drain))
+ qp->flags |= ERDMA_QP_FLAGS_RX_STOPPED;
+
while (wr) {
ret = erdma_post_recv_one(qp, wr);
if (ret) {
@@ -561,6 +588,14 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
wr = wr->next;
}
+out:
spin_unlock_irqrestore(&qp->lock, flags);
return ret;
}
+
+int erdma_post_recv_nodrain(struct ib_qp *ibqp,
+ const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr)
+{
+ return erdma_post_recv(ibqp, recv_wr, bad_recv_wr, false);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index c7baddb1f292..f4148fbac878 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -195,6 +195,12 @@ struct erdma_qp_attrs {
u8 pd_len;
};
+enum erdma_qp_flags {
+ ERDMA_QP_FLAGS_DRAIN_ISSUED = (1 << 0),
+ ERDMA_QP_FLAGS_TX_STOPPED = (1 << 1),
+ ERDMA_QP_FLAGS_RX_STOPPED = (1 << 2),
+};
+
struct erdma_qp {
struct ib_qp ibqp;
struct kref ref;
@@ -202,6 +208,7 @@ struct erdma_qp {
struct erdma_dev *dev;
struct erdma_cep *cep;
struct rw_semaphore state_lock;
+ unsigned long flags;
union {
struct erdma_kqp kern_qp;
@@ -328,10 +335,12 @@ void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
void erdma_qp_get_ref(struct ib_qp *ibqp);
void erdma_qp_put_ref(struct ib_qp *ibqp);
struct ib_qp *erdma_get_ibqp(struct ib_device *dev, int id);
-int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
- const struct ib_send_wr **bad_send_wr);
-int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
- const struct ib_recv_wr **bad_recv_wr);
+int erdma_post_send_nodrain(struct ib_qp *ibqp,
+ const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr);
+int erdma_post_recv_nodrain(struct ib_qp *ibqp,
+ const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
u32 max_num_sg);
--
2.27.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH for-next v2 2/2] RDMA/erdma: Add drain_sq and drain_rq support
2022-08-25 2:39 [PATCH for-next v2 0/2] RDMA/erdma: Introduce custom implementation of drain_sq and drain_rq Cheng Xu
2022-08-25 2:39 ` [PATCH for-next v2 1/2] RDMA/erdma: Introduce internal post_send/post_recv for qp drain Cheng Xu
@ 2022-08-25 2:39 ` Cheng Xu
1 sibling, 0 replies; 3+ messages in thread
From: Cheng Xu @ 2022-08-25 2:39 UTC (permalink / raw)
To: jgg, leon; +Cc: linux-rdma, KaiShen
For erdma, hardware won't process any WRs after modifying QP state to
error, so the default __ib_drain_sq and __ib_drain_rq can not work for
erdma device. Here, we introduce custom implementation of drain_sq and
drain_rq interface to fit erdma hardware.
Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
---
v1 -> v2:
- Add drain_rq/drain_sq assignments in struct ib_device_ops of erdma.
---
drivers/infiniband/hw/erdma/erdma_main.c | 2 +
drivers/infiniband/hw/erdma/erdma_qp.c | 71 +++++++++++++++++++++++
drivers/infiniband/hw/erdma/erdma_verbs.h | 10 ++++
3 files changed, 83 insertions(+)
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 4921ebc1286d..e4ce77607f10 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -446,6 +446,8 @@ static const struct ib_device_ops erdma_device_ops = {
.dereg_mr = erdma_dereg_mr,
.destroy_cq = erdma_destroy_cq,
.destroy_qp = erdma_destroy_qp,
+ .drain_rq = erdma_drain_rq,
+ .drain_sq = erdma_drain_sq,
.get_dma_mr = erdma_get_dma_mr,
.get_port_immutable = erdma_get_port_immutable,
.iw_accept = erdma_accept,
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index abf8b134d076..57fdb946fbfd 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -599,3 +599,74 @@ int erdma_post_recv_nodrain(struct ib_qp *ibqp,
{
return erdma_post_recv(ibqp, recv_wr, bad_recv_wr, false);
}
+
+static void erdma_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct erdma_drain_cqe *cqe =
+ container_of(wc->wr_cqe, struct erdma_drain_cqe, cqe);
+
+ complete(&cqe->done);
+}
+
+static void erdma_drain_qp_common(struct ib_qp *ibqp, struct completion *comp,
+ struct ib_cq *ibcq)
+{
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct erdma_qp *qp = to_eqp(ibqp);
+ const struct ib_send_wr *bad_swr;
+ const struct ib_recv_wr *bad_rwr;
+ struct ib_rdma_wr swr = {
+ .wr = {
+ .next = NULL,
+ { .wr_cqe = &qp->kern_qp.sdrain.cqe, },
+ .opcode = IB_WR_RDMA_WRITE,
+ .send_flags = IB_SEND_SIGNALED,
+ },
+ };
+ struct ib_recv_wr rwr = {
+ .next = NULL,
+ .wr_cqe = &qp->kern_qp.rdrain.cqe,
+ .num_sge = 0,
+ };
+
+ if (qp->flags & ERDMA_QP_FLAGS_DRAIN_ISSUED)
+ goto wait_for_completion;
+
+ qp->flags |= ERDMA_QP_FLAGS_DRAIN_ISSUED;
+
+ qp->kern_qp.rdrain.cqe.done = erdma_drain_qp_done;
+ init_completion(&qp->kern_qp.rdrain.done);
+
+ qp->kern_qp.sdrain.cqe.done = erdma_drain_qp_done;
+ init_completion(&qp->kern_qp.sdrain.done);
+
+ if (erdma_post_recv(ibqp, &rwr, &bad_rwr, true))
+ return;
+
+ if (erdma_post_send(ibqp, &swr.wr, &bad_swr, true))
+ return;
+
+ if (ib_modify_qp(ibqp, &attr, IB_QP_STATE))
+ return;
+
+wait_for_completion:
+ if (ibcq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(comp, HZ / 10) <= 0)
+ ib_process_cq_direct(ibcq, -1);
+ else
+ wait_for_completion(comp);
+}
+
+void erdma_drain_sq(struct ib_qp *ibqp)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+
+ erdma_drain_qp_common(ibqp, &qp->kern_qp.sdrain.done, ibqp->send_cq);
+}
+
+void erdma_drain_rq(struct ib_qp *ibqp)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+
+ erdma_drain_qp_common(ibqp, &qp->kern_qp.rdrain.done, ibqp->recv_cq);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index f4148fbac878..4cec92c8a737 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -133,6 +133,11 @@ struct erdma_uqp {
u32 rq_offset;
};
+struct erdma_drain_cqe {
+ struct ib_cqe cqe;
+ struct completion done;
+};
+
struct erdma_kqp {
u16 sq_pi;
u16 sq_ci;
@@ -155,6 +160,9 @@ struct erdma_kqp {
void *sq_db_info;
void *rq_db_info;
+ struct erdma_drain_cqe sdrain;
+ struct erdma_drain_cqe rdrain;
+
u8 sig_all;
};
@@ -341,6 +349,8 @@ int erdma_post_send_nodrain(struct ib_qp *ibqp,
int erdma_post_recv_nodrain(struct ib_qp *ibqp,
const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
+void erdma_drain_sq(struct ib_qp *ibqp);
+void erdma_drain_rq(struct ib_qp *ibqp);
int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
u32 max_num_sg);
--
2.27.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2022-08-25 2:39 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-08-25 2:39 [PATCH for-next v2 0/2] RDMA/erdma: Introduce custom implementation of drain_sq and drain_rq Cheng Xu
2022-08-25 2:39 ` [PATCH for-next v2 1/2] RDMA/erdma: Introduce internal post_send/post_recv for qp drain Cheng Xu
2022-08-25 2:39 ` [PATCH for-next v2 2/2] RDMA/erdma: Add drain_sq and drain_rq support Cheng Xu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).