[PATCH libmlx5 V1 0/2] Add cross-channel support

linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH libmlx5 V1 0/2] Add cross-channel support
@ 2016-01-16 15:55 Leon Romanovsky
       [not found] ` <1452959758-29611-1-git-send-email-leon-2ukJVAZIZ/Y@public.gmane.org>
  0 siblings, 1 reply; 3+ messages in thread
From: Leon Romanovsky @ 2016-01-16 15:55 UTC (permalink / raw)
  To: yishaih-VPRAkNaXOzVWk0Htik3J/w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Leon Romanovsky

From: Leon Romanovsky <leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

This patchset adds libmlx5 part of cross-channel support [1].

These patches were added on top of "Completion timestamping" [2].

[1] http://comments.gmane.org/gmane.linux.drivers.rdma/31715
[2] http://www.spinics.net/lists/linux-rdma/msg30940.html

Changes from v0:
  * Enrich cover message and commit messages.
  * Add mlx5_post_send() implementation related to cross-channel
    work request opcodes.

Leon Romanovsky (2):
  Add CQ ignore overrun creation flag
  Add cross-channel work request opcodes

 src/mlx5-abi.h |  14 ++++++++
 src/mlx5.h     |   9 ++++++
 src/qp.c       | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 src/verbs.c    |  71 +++++++++++++++++++++++++++++++---------
 src/wqe.h      |   5 +++
 5 files changed, 178 insertions(+), 21 deletions(-)

-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH libmlx5 V1 1/2] Add CQ ignore overrun creation flag
       [not found] ` <1452959758-29611-1-git-send-email-leon-2ukJVAZIZ/Y@public.gmane.org>
@ 2016-01-16 15:55   ` Leon Romanovsky
  2016-01-16 15:55   ` [PATCH libmlx5 V1 2/2] Add cross-channel work request opcodes Leon Romanovsky
  1 sibling, 0 replies; 3+ messages in thread
From: Leon Romanovsky @ 2016-01-16 15:55 UTC (permalink / raw)
  To: yishaih-VPRAkNaXOzVWk0Htik3J/w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Leon Romanovsky

From: Leon Romanovsky <leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

In cross-channel mode, the send/receive queues will forward their
completions to managing QP. It can cause to overrun errors in
managed send/receive queues.

This patch adds ability to provide CQ flags for ibv_create_cq_ex calls
and new flag to disable CQ overrun checks.

Signed-off-by: Leon Romanovsky <leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 src/mlx5-abi.h | 14 ++++++++++++++
 src/verbs.c    | 57 ++++++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/src/mlx5-abi.h b/src/mlx5-abi.h
index 769ea811d26b..85f6ee3f691e 100644
--- a/src/mlx5-abi.h
+++ b/src/mlx5-abi.h
@@ -91,6 +91,20 @@ struct mlx5_create_cq_resp {
 	__u32				cqn;
 };
 
+struct mlx5_create_cq_ex {
+	struct ibv_create_cq_ex		ibv_cmd;
+	__u64				buf_addr;
+	__u64				db_addr;
+	__u32				cqe_size;
+	__u32				comp_mask;
+};
+
+struct mlx5_create_cq_resp_ex {
+	struct ibv_create_cq_resp_ex	ibv_resp;
+	__u32				cqn;
+	__u32				comp_mask;
+};
+
 struct mlx5_create_srq {
 	struct ibv_create_srq		ibv_cmd;
 	__u64				buf_addr;
diff --git a/src/verbs.c b/src/verbs.c
index 94b4d8f2424f..064a500b0a06 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -250,17 +250,26 @@ enum {
 };
 
 enum {
-	CREATE_CQ_SUPPORTED_FLAGS = IBV_CREATE_CQ_ATTR_COMPLETION_TIMESTAMP
+	CREATE_CQ_SUPPORTED_FLAGS = IBV_CREATE_CQ_ATTR_COMPLETION_TIMESTAMP |
+		IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN
+};
+
+enum mlx5_cmd_type {
+	MLX5_LEGACY_CMD,
+	MLX5_EXTENDED_CMD
 };
 
 static struct ibv_cq *create_cq(struct ibv_context *context,
-				const struct ibv_create_cq_attr_ex *cq_attr)
+				struct ibv_create_cq_attr_ex *cq_attr,
+				enum mlx5_cmd_type ctype)
 {
 	struct mlx5_create_cq		cmd;
+	struct mlx5_create_cq_ex	cmd_ex;
 	struct mlx5_create_cq_resp	resp;
+	struct mlx5_create_cq_resp_ex	resp_ex;
 	struct mlx5_cq		       *cq;
 	int				cqe_sz;
-	int				ret;
+	int				ret = -1;
 	int				ncqe;
 #ifdef MLX5_DEBUG
 	FILE *fp = to_mctx(context)->dbg_fp;
@@ -299,7 +308,6 @@ static struct ibv_cq *create_cq(struct ibv_context *context,
 		return NULL;
 	}
 
-	memset(&cmd, 0, sizeof cmd);
 	cq->cons_index = 0;
 
 	if (mlx5_spinlock_init(&cq->lock))
@@ -342,22 +350,41 @@ static struct ibv_cq *create_cq(struct ibv_context *context,
 	cq->arm_sn			= 0;
 	cq->cqe_sz			= cqe_sz;
 
-	cmd.buf_addr = (uintptr_t) cq->buf_a.buf;
-	cmd.db_addr  = (uintptr_t) cq->dbrec;
-	cmd.cqe_size = cqe_sz;
+	if (ctype == MLX5_LEGACY_CMD) {
+		memset(&cmd, 0, sizeof(cmd));
+		cmd.buf_addr = (uintptr_t) cq->buf_a.buf;
+		cmd.db_addr  = (uintptr_t) cq->dbrec;
+		cmd.cqe_size = cqe_sz;
+
+		ret = ibv_cmd_create_cq(context, ncqe - 1, cq_attr->channel,
+					cq_attr->comp_vector,
+					&cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd,
+					&resp.ibv_resp, sizeof resp);
+		cq->cqn = resp.cqn;
+
+	}
+	else if (ctype == MLX5_EXTENDED_CMD) {
+		memset(&cmd_ex, 0, sizeof(cmd_ex));
+		cmd_ex.buf_addr = (uintptr_t) cq->buf_a.buf;
+		cmd_ex.db_addr  = (uintptr_t) cq->dbrec;
+		cmd_ex.cqe_size = cqe_sz;
+
+		ret = ibv_cmd_create_cq_ex(context, cq_attr,
+					&cq->ibv_cq, &cmd_ex.ibv_cmd,
+					sizeof(cmd_ex.ibv_cmd), sizeof(cmd_ex),
+					&resp_ex.ibv_resp,
+					sizeof(resp_ex.ibv_resp), sizeof(resp_ex));
+		cq->cqn = resp_ex.cqn;
+	}
 
-	ret = ibv_cmd_create_cq(context, ncqe - 1, cq_attr->channel,
-				cq_attr->comp_vector,
-				&cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd,
-				&resp.ibv_resp, sizeof resp);
 	if (ret) {
-		mlx5_dbg(fp, MLX5_DBG_CQ, "ret %d\n", ret);
+		mlx5_dbg(fp, MLX5_DBG_CQ, "ret %d, ctype = %d\n", ret, ctype);
 		goto err_db;
 	}
 
 	cq->active_buf = &cq->buf_a;
 	cq->resize_buf = NULL;
-	cq->cqn = resp.cqn;
+
 	cq->stall_enable = to_mctx(context)->stall_enable;
 	cq->stall_adaptive_enable = to_mctx(context)->stall_adaptive_enable;
 	cq->stall_cycles = to_mctx(context)->stall_cycles;
@@ -390,13 +417,13 @@ struct ibv_cq *mlx5_create_cq(struct ibv_context *context, int cqe,
 						.comp_vector = comp_vector,
 						.wc_flags = IBV_WC_STANDARD_FLAGS};
 
-	return create_cq(context, &cq_attr);
+	return create_cq(context, &cq_attr, MLX5_LEGACY_CMD);
 }
 
 struct ibv_cq *mlx5_create_cq_ex(struct ibv_context *context,
 				 struct ibv_create_cq_attr_ex *cq_attr)
 {
-	return create_cq(context, cq_attr);
+	return create_cq(context, cq_attr, MLX5_EXTENDED_CMD);
 }
 
 int mlx5_resize_cq(struct ibv_cq *ibcq, int cqe)
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH libmlx5 V1 2/2] Add cross-channel work request opcodes
       [not found] ` <1452959758-29611-1-git-send-email-leon-2ukJVAZIZ/Y@public.gmane.org>
  2016-01-16 15:55   ` [PATCH libmlx5 V1 1/2] Add CQ ignore overrun creation flag Leon Romanovsky
@ 2016-01-16 15:55   ` Leon Romanovsky
  1 sibling, 0 replies; 3+ messages in thread
From: Leon Romanovsky @ 2016-01-16 15:55 UTC (permalink / raw)
  To: yishaih-VPRAkNaXOzVWk0Htik3J/w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Leon Romanovsky

From: Leon Romanovsky <leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

The cross-channel feature relies on special primitives to
send and receive work requests.

* WAIT on CQ WR - This work request holds execution of subsequent
  requests on that queue until this number of completions of a CQ
  is met.

• SEND_EN WR - This work request specifies value of producer
  index on the controlled send queue. It enables the execution
  of all WQEs up to the work request which is marked by
  IBV_SEND_WAIT_EN_LAST flag.

• RECEIVE_EN WR - Same as SEND_EN but related to a receive
  queue.

Signed-off-by: Leon Romanovsky <leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 src/mlx5.h  |   9 ++++++
 src/qp.c    | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 src/verbs.c |  14 +++++++++
 src/wqe.h   |   5 +++
 4 files changed, 122 insertions(+), 6 deletions(-)

diff --git a/src/mlx5.h b/src/mlx5.h
index 38f5f518a94b..a8e1ad6dda74 100644
--- a/src/mlx5.h
+++ b/src/mlx5.h
@@ -208,6 +208,10 @@ enum {
 	MLX5_OPCODE_LOCAL_INVAL		= 0x1b,
 	MLX5_OPCODE_CONFIG_CMD		= 0x1f,
 
+	MLX5_OPCODE_SEND_ENABLE         = 0x17,
+	MLX5_OPCODE_RECV_ENABLE         = 0x16,
+	MLX5_OPCODE_CQE_WAIT            = 0x0f,
+
 	MLX5_RECV_OPCODE_RDMA_WRITE_IMM	= 0x00,
 	MLX5_RECV_OPCODE_SEND		= 0x01,
 	MLX5_RECV_OPCODE_SEND_IMM	= 0x02,
@@ -368,6 +372,8 @@ struct mlx5_cq {
 	uint64_t			stall_last_count;
 	int				stall_adaptive_enable;
 	int				stall_cycles;
+	uint32_t			wait_index;
+	uint32_t			wait_count;
 };
 
 struct mlx5_srq {
@@ -405,6 +411,8 @@ struct mlx5_wq {
 	int				wqe_shift;
 	int				offset;
 	void			       *qend;
+	uint32_t			head_en_index;
+	uint32_t			head_en_count;
 };
 
 struct mlx5_bf {
@@ -437,6 +445,7 @@ struct mlx5_qp {
 	uint32_t                       *db;
 	struct mlx5_wq                  rq;
 	int                             wq_sig;
+	uint32_t			create_flags;
 };
 
 struct mlx5_av {
diff --git a/src/qp.c b/src/qp.c
index 67ded0d197d3..f84684e69d86 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -54,8 +54,20 @@ static const uint32_t mlx5_ib_opcode[] = {
 	[IBV_WR_RDMA_READ]		= MLX5_OPCODE_RDMA_READ,
 	[IBV_WR_ATOMIC_CMP_AND_SWP]	= MLX5_OPCODE_ATOMIC_CS,
 	[IBV_WR_ATOMIC_FETCH_AND_ADD]	= MLX5_OPCODE_ATOMIC_FA,
+	[IBV_WR_SEND_ENABLE]		= MLX5_OPCODE_SEND_ENABLE,
+	[IBV_WR_RECV_ENABLE]		= MLX5_OPCODE_RECV_ENABLE,
+	[IBV_WR_CQE_WAIT]		= MLX5_OPCODE_CQE_WAIT
 };
 
+static inline void set_wait_en_seg(void *wqe_seg, uint32_t obj_num, uint32_t count)
+{
+	struct mlx5_wqe_wait_en_seg *seg = (struct mlx5_wqe_wait_en_seg *)wqe_seg;
+
+	seg->pi      = htonl(count);
+	seg->obj_num = htonl(obj_num);
+	return;
+}
+
 static void *get_recv_wqe(struct mlx5_qp *qp, int n)
 {
 	return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift);
@@ -155,6 +167,10 @@ void mlx5_init_qp_indices(struct mlx5_qp *qp)
 	qp->rq.head	 = 0;
 	qp->rq.tail	 = 0;
 	qp->sq.cur_post  = 0;
+	qp->sq.head_en_index = 0;
+	qp->sq.head_en_count = 0;
+	qp->rq.head_en_index = 0;
+	qp->rq.head_en_count = 0;
 }
 
 static int mlx5_wq_overflow(struct mlx5_wq *wq, int nreq, struct mlx5_cq *cq)
@@ -336,6 +352,11 @@ int mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 	void *qend = qp->sq.qend;
 	uint32_t mlx5_opcode;
 	struct mlx5_wqe_xrc_seg *xrc;
+	struct mlx5_cq *wait_cq;
+	uint32_t wait_index = 0;
+	unsigned head_en_index;
+	struct mlx5_wq *wq;
+
 #ifdef MLX5_DEBUG
 	FILE *fp = to_mctx(ibqp->context)->dbg_fp;
 #endif
@@ -352,11 +373,10 @@ int mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 			goto out;
 		}
 
-		if (unlikely(mlx5_wq_overflow(&qp->sq, nreq,
+		if (unlikely(!(qp->create_flags & IBV_QP_CREATE_IGNORE_SQ_OVERFLOW) && mlx5_wq_overflow(&qp->sq, nreq,
 					      to_mcq(qp->ibv_qp->send_cq)))) {
 			mlx5_dbg(fp, MLX5_DBG_QP_SEND, "work queue overflow\n");
-			errno = ENOMEM;
-			err = -1;
+			err = ENOMEM;
 			*bad_wr = wr;
 			goto out;
 		}
@@ -409,7 +429,69 @@ int mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 				err = ENOSYS;
 				*bad_wr = wr;
 				goto out;
+			case IBV_WR_CQE_WAIT:
+				if (!(qp->create_flags & IBV_QP_CREATE_CROSS_CHANNEL)) {
+					err = EINVAL;
+					*bad_wr = wr;
+					goto out;
+				}
+
+				wait_cq = to_mcq(wr->wr.cqe_wait.cq);
+				wait_index = wait_cq->wait_index + wr->wr.cqe_wait.cq_count;
+				wait_cq->wait_count = max(wait_cq->wait_count, wr->wr.cqe_wait.cq_count);
+				if (wr->send_flags & IBV_SEND_WAIT_EN_LAST) {
+					wait_cq->wait_index += wait_cq->wait_count;
+					wait_cq->wait_count = 0;
+				}
+				set_wait_en_seg(seg, wait_cq->cqn, wait_index);
+				seg += sizeof(struct mlx5_wqe_wait_en_seg);
+				size += sizeof(struct mlx5_wqe_wait_en_seg) / 16;
+				break;
 
+			case IBV_WR_SEND_ENABLE:
+			case IBV_WR_RECV_ENABLE:
+				if (((wr->opcode == IBV_WR_SEND_ENABLE) &&
+					!(to_mqp(wr->wr.wqe_enable.qp)->create_flags &
+							IBV_QP_CREATE_MANAGED_SEND)) ||
+					((wr->opcode == IBV_WR_RECV_ENABLE) &&
+					!(to_mqp(wr->wr.wqe_enable.qp)->create_flags &
+							IBV_QP_CREATE_MANAGED_RECV))) {
+					err = EINVAL;
+					*bad_wr = wr;
+					goto out;
+				}
+
+				wq = (wr->opcode == IBV_WR_SEND_ENABLE) ?
+					&to_mqp(wr->wr.wqe_enable.qp)->sq :
+					&to_mqp(wr->wr.wqe_enable.qp)->rq;
+
+				/* If wqe_count is 0 release all WRs from queue */
+				if (wr->wr.wqe_enable.wqe_count) {
+					head_en_index = wq->head_en_index +
+								wr->wr.wqe_enable.wqe_count;
+					wq->head_en_count = max(wq->head_en_count,
+								wr->wr.wqe_enable.wqe_count);
+
+					if ((int)(wq->head - head_en_index) < 0) {
+						err = EINVAL;
+						*bad_wr = wr;
+						goto out;
+					}
+				} else {
+					head_en_index = wq->head;
+					wq->head_en_count = wq->head - wq->head_en_index;
+				}
+
+				if (wr->send_flags & IBV_SEND_WAIT_EN_LAST) {
+					wq->head_en_index += wq->head_en_count;
+					wq->head_en_count = 0;
+				}
+
+				set_wait_en_seg(seg, wr->wr.wqe_enable.qp->qp_num, head_en_index);
+
+				seg += sizeof(struct mlx5_wqe_wait_en_seg);
+				size += sizeof(struct mlx5_wqe_wait_en_seg) / 16;
+				break;
 			default:
 				break;
 			}
@@ -492,6 +574,11 @@ out:
 	if (likely(nreq)) {
 		qp->sq.head += nreq;
 
+		if (qp->create_flags & IBV_QP_CREATE_MANAGED_SEND) {
+			wmb();
+			goto post_send_no_db;
+		}
+
 		/*
 		 * Make sure that descriptors are written before
 		 * updating doorbell record and ringing the doorbell
@@ -528,6 +615,7 @@ out:
 			mlx5_spin_unlock(&bf->lock);
 	}
 
+post_send_no_db:
 	mlx5_spin_unlock(&qp->sq.lock);
 
 	return err;
@@ -561,11 +649,11 @@ int mlx5_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		if (unlikely(mlx5_wq_overflow(&qp->rq, nreq,
+		if (unlikely(!(qp->create_flags & IBV_QP_CREATE_IGNORE_RQ_OVERFLOW) &&
+				mlx5_wq_overflow(&qp->rq, nreq,
 					      to_mcq(qp->ibv_qp->recv_cq)))) {
-			errno = ENOMEM;
+			err = ENOMEM;
 			*bad_wr = wr;
-			err = -1;
 			goto out;
 		}
 
diff --git a/src/verbs.c b/src/verbs.c
index 064a500b0a06..15e34488883f 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -309,6 +309,9 @@ static struct ibv_cq *create_cq(struct ibv_context *context,
 	}
 
 	cq->cons_index = 0;
+	/* Cross-channel wait index should start from value below 0 */
+	cq->wait_index = (uint32_t)(-1);
+	cq->wait_count = 0;
 
 	if (mlx5_spinlock_init(&cq->lock))
 		goto err;
@@ -975,6 +978,17 @@ static int init_attr_v2(struct ibv_context *context, struct mlx5_qp *qp,
 	struct mlx5_create_qp_resp_ex	resp;
 	int err;
 
+	qp->create_flags = (attr->create_flags & (IBV_QP_CREATE_IGNORE_SQ_OVERFLOW |
+						  IBV_QP_CREATE_IGNORE_RQ_OVERFLOW |
+						  IBV_QP_CREATE_CROSS_CHANNEL |
+						  IBV_QP_CREATE_MANAGED_SEND |
+						  IBV_QP_CREATE_MANAGED_RECV ));
+	/*
+	 * These QP flags are virtual and don't need to
+	 * be forwarded to the bottom layer.
+	 */
+	attr->create_flags &= ~(IBV_QP_CREATE_IGNORE_SQ_OVERFLOW | IBV_QP_CREATE_IGNORE_RQ_OVERFLOW);
+
 	memset(&cmd, 0, sizeof(cmd));
 	memset(&resp, 0, sizeof(resp));
 	if (qp->wq_sig)
diff --git a/src/wqe.h b/src/wqe.h
index bd50d9a116e1..73aeb6aedfd9 100644
--- a/src/wqe.h
+++ b/src/wqe.h
@@ -187,5 +187,10 @@ struct mlx5_wqe_inline_seg {
 	uint32_t	byte_count;
 };
 
+struct mlx5_wqe_wait_en_seg {
+	uint8_t		rsvd0[8];
+	uint32_t	pi;
+	uint32_t	obj_num;
+};
 
 #endif /* WQE_H */
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-01-16 15:55 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-01-16 15:55 [PATCH libmlx5 V1 0/2] Add cross-channel support Leon Romanovsky
     [not found] ` <1452959758-29611-1-git-send-email-leon-2ukJVAZIZ/Y@public.gmane.org>
2016-01-16 15:55   ` [PATCH libmlx5 V1 1/2] Add CQ ignore overrun creation flag Leon Romanovsky
2016-01-16 15:55   ` [PATCH libmlx5 V1 2/2] Add cross-channel work request opcodes Leon Romanovsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).