From: Leon Romanovsky <leon-2ukJVAZIZ/Y@public.gmane.org>
To: yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Leon Romanovsky <leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Subject: [PATCH libmlx5 V1 2/2] Add cross-channel work request opcodes
Date: Sat, 16 Jan 2016 17:55:58 +0200 [thread overview]
Message-ID: <1452959758-29611-3-git-send-email-leon@leon.nu> (raw)
In-Reply-To: <1452959758-29611-1-git-send-email-leon-2ukJVAZIZ/Y@public.gmane.org>
From: Leon Romanovsky <leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
The cross-channel feature relies on special primitives to
send and receive work requests.
* WAIT on CQ WR - This work request holds execution of subsequent
requests on that queue until this number of completions of a CQ
is met.
• SEND_EN WR - This work request specifies value of producer
index on the controlled send queue. It enables the execution
of all WQEs up to the work request which is marked by
IBV_SEND_WAIT_EN_LAST flag.
• RECEIVE_EN WR - Same as SEND_EN but related to a receive
queue.
Signed-off-by: Leon Romanovsky <leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Reviewed-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
src/mlx5.h | 9 ++++++
src/qp.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
src/verbs.c | 14 +++++++++
src/wqe.h | 5 +++
4 files changed, 122 insertions(+), 6 deletions(-)
diff --git a/src/mlx5.h b/src/mlx5.h
index 38f5f518a94b..a8e1ad6dda74 100644
--- a/src/mlx5.h
+++ b/src/mlx5.h
@@ -208,6 +208,10 @@ enum {
MLX5_OPCODE_LOCAL_INVAL = 0x1b,
MLX5_OPCODE_CONFIG_CMD = 0x1f,
+ MLX5_OPCODE_SEND_ENABLE = 0x17,
+ MLX5_OPCODE_RECV_ENABLE = 0x16,
+ MLX5_OPCODE_CQE_WAIT = 0x0f,
+
MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00,
MLX5_RECV_OPCODE_SEND = 0x01,
MLX5_RECV_OPCODE_SEND_IMM = 0x02,
@@ -368,6 +372,8 @@ struct mlx5_cq {
uint64_t stall_last_count;
int stall_adaptive_enable;
int stall_cycles;
+ uint32_t wait_index;
+ uint32_t wait_count;
};
struct mlx5_srq {
@@ -405,6 +411,8 @@ struct mlx5_wq {
int wqe_shift;
int offset;
void *qend;
+ uint32_t head_en_index;
+ uint32_t head_en_count;
};
struct mlx5_bf {
@@ -437,6 +445,7 @@ struct mlx5_qp {
uint32_t *db;
struct mlx5_wq rq;
int wq_sig;
+ uint32_t create_flags;
};
struct mlx5_av {
diff --git a/src/qp.c b/src/qp.c
index 67ded0d197d3..f84684e69d86 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -54,8 +54,20 @@ static const uint32_t mlx5_ib_opcode[] = {
[IBV_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
[IBV_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
[IBV_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
+ [IBV_WR_SEND_ENABLE] = MLX5_OPCODE_SEND_ENABLE,
+ [IBV_WR_RECV_ENABLE] = MLX5_OPCODE_RECV_ENABLE,
+ [IBV_WR_CQE_WAIT] = MLX5_OPCODE_CQE_WAIT
};
+static inline void set_wait_en_seg(void *wqe_seg, uint32_t obj_num, uint32_t count)
+{
+ struct mlx5_wqe_wait_en_seg *seg = (struct mlx5_wqe_wait_en_seg *)wqe_seg;
+
+ seg->pi = htonl(count);
+ seg->obj_num = htonl(obj_num);
+ return;
+}
+
static void *get_recv_wqe(struct mlx5_qp *qp, int n)
{
return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift);
@@ -155,6 +167,10 @@ void mlx5_init_qp_indices(struct mlx5_qp *qp)
qp->rq.head = 0;
qp->rq.tail = 0;
qp->sq.cur_post = 0;
+ qp->sq.head_en_index = 0;
+ qp->sq.head_en_count = 0;
+ qp->rq.head_en_index = 0;
+ qp->rq.head_en_count = 0;
}
static int mlx5_wq_overflow(struct mlx5_wq *wq, int nreq, struct mlx5_cq *cq)
@@ -336,6 +352,11 @@ int mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
void *qend = qp->sq.qend;
uint32_t mlx5_opcode;
struct mlx5_wqe_xrc_seg *xrc;
+ struct mlx5_cq *wait_cq;
+ uint32_t wait_index = 0;
+ unsigned head_en_index;
+ struct mlx5_wq *wq;
+
#ifdef MLX5_DEBUG
FILE *fp = to_mctx(ibqp->context)->dbg_fp;
#endif
@@ -352,11 +373,10 @@ int mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
goto out;
}
- if (unlikely(mlx5_wq_overflow(&qp->sq, nreq,
+ if (unlikely(!(qp->create_flags & IBV_QP_CREATE_IGNORE_SQ_OVERFLOW) && mlx5_wq_overflow(&qp->sq, nreq,
to_mcq(qp->ibv_qp->send_cq)))) {
mlx5_dbg(fp, MLX5_DBG_QP_SEND, "work queue overflow\n");
- errno = ENOMEM;
- err = -1;
+ err = ENOMEM;
*bad_wr = wr;
goto out;
}
@@ -409,7 +429,69 @@ int mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
err = ENOSYS;
*bad_wr = wr;
goto out;
+ case IBV_WR_CQE_WAIT:
+ if (!(qp->create_flags & IBV_QP_CREATE_CROSS_CHANNEL)) {
+ err = EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wait_cq = to_mcq(wr->wr.cqe_wait.cq);
+ wait_index = wait_cq->wait_index + wr->wr.cqe_wait.cq_count;
+ wait_cq->wait_count = max(wait_cq->wait_count, wr->wr.cqe_wait.cq_count);
+ if (wr->send_flags & IBV_SEND_WAIT_EN_LAST) {
+ wait_cq->wait_index += wait_cq->wait_count;
+ wait_cq->wait_count = 0;
+ }
+ set_wait_en_seg(seg, wait_cq->cqn, wait_index);
+ seg += sizeof(struct mlx5_wqe_wait_en_seg);
+ size += sizeof(struct mlx5_wqe_wait_en_seg) / 16;
+ break;
+ case IBV_WR_SEND_ENABLE:
+ case IBV_WR_RECV_ENABLE:
+ if (((wr->opcode == IBV_WR_SEND_ENABLE) &&
+ !(to_mqp(wr->wr.wqe_enable.qp)->create_flags &
+ IBV_QP_CREATE_MANAGED_SEND)) ||
+ ((wr->opcode == IBV_WR_RECV_ENABLE) &&
+ !(to_mqp(wr->wr.wqe_enable.qp)->create_flags &
+ IBV_QP_CREATE_MANAGED_RECV))) {
+ err = EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wq = (wr->opcode == IBV_WR_SEND_ENABLE) ?
+ &to_mqp(wr->wr.wqe_enable.qp)->sq :
+ &to_mqp(wr->wr.wqe_enable.qp)->rq;
+
+ /* If wqe_count is 0 release all WRs from queue */
+ if (wr->wr.wqe_enable.wqe_count) {
+ head_en_index = wq->head_en_index +
+ wr->wr.wqe_enable.wqe_count;
+ wq->head_en_count = max(wq->head_en_count,
+ wr->wr.wqe_enable.wqe_count);
+
+ if ((int)(wq->head - head_en_index) < 0) {
+ err = EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+ } else {
+ head_en_index = wq->head;
+ wq->head_en_count = wq->head - wq->head_en_index;
+ }
+
+ if (wr->send_flags & IBV_SEND_WAIT_EN_LAST) {
+ wq->head_en_index += wq->head_en_count;
+ wq->head_en_count = 0;
+ }
+
+ set_wait_en_seg(seg, wr->wr.wqe_enable.qp->qp_num, head_en_index);
+
+ seg += sizeof(struct mlx5_wqe_wait_en_seg);
+ size += sizeof(struct mlx5_wqe_wait_en_seg) / 16;
+ break;
default:
break;
}
@@ -492,6 +574,11 @@ out:
if (likely(nreq)) {
qp->sq.head += nreq;
+ if (qp->create_flags & IBV_QP_CREATE_MANAGED_SEND) {
+ wmb();
+ goto post_send_no_db;
+ }
+
/*
* Make sure that descriptors are written before
* updating doorbell record and ringing the doorbell
@@ -528,6 +615,7 @@ out:
mlx5_spin_unlock(&bf->lock);
}
+post_send_no_db:
mlx5_spin_unlock(&qp->sq.lock);
return err;
@@ -561,11 +649,11 @@ int mlx5_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (unlikely(mlx5_wq_overflow(&qp->rq, nreq,
+ if (unlikely(!(qp->create_flags & IBV_QP_CREATE_IGNORE_RQ_OVERFLOW) &&
+ mlx5_wq_overflow(&qp->rq, nreq,
to_mcq(qp->ibv_qp->recv_cq)))) {
- errno = ENOMEM;
+ err = ENOMEM;
*bad_wr = wr;
- err = -1;
goto out;
}
diff --git a/src/verbs.c b/src/verbs.c
index 064a500b0a06..15e34488883f 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -309,6 +309,9 @@ static struct ibv_cq *create_cq(struct ibv_context *context,
}
cq->cons_index = 0;
+ /* Cross-channel wait index should start from value below 0 */
+ cq->wait_index = (uint32_t)(-1);
+ cq->wait_count = 0;
if (mlx5_spinlock_init(&cq->lock))
goto err;
@@ -975,6 +978,17 @@ static int init_attr_v2(struct ibv_context *context, struct mlx5_qp *qp,
struct mlx5_create_qp_resp_ex resp;
int err;
+ qp->create_flags = (attr->create_flags & (IBV_QP_CREATE_IGNORE_SQ_OVERFLOW |
+ IBV_QP_CREATE_IGNORE_RQ_OVERFLOW |
+ IBV_QP_CREATE_CROSS_CHANNEL |
+ IBV_QP_CREATE_MANAGED_SEND |
+ IBV_QP_CREATE_MANAGED_RECV ));
+ /*
+ * These QP flags are virtual and don't need to
+ * be forwarded to the bottom layer.
+ */
+ attr->create_flags &= ~(IBV_QP_CREATE_IGNORE_SQ_OVERFLOW | IBV_QP_CREATE_IGNORE_RQ_OVERFLOW);
+
memset(&cmd, 0, sizeof(cmd));
memset(&resp, 0, sizeof(resp));
if (qp->wq_sig)
diff --git a/src/wqe.h b/src/wqe.h
index bd50d9a116e1..73aeb6aedfd9 100644
--- a/src/wqe.h
+++ b/src/wqe.h
@@ -187,5 +187,10 @@ struct mlx5_wqe_inline_seg {
uint32_t byte_count;
};
+struct mlx5_wqe_wait_en_seg {
+ uint8_t rsvd0[8];
+ uint32_t pi;
+ uint32_t obj_num;
+};
#endif /* WQE_H */
--
1.7.12.4
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
prev parent reply other threads:[~2016-01-16 15:55 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-01-16 15:55 [PATCH libmlx5 V1 0/2] Add cross-channel support Leon Romanovsky
[not found] ` <1452959758-29611-1-git-send-email-leon-2ukJVAZIZ/Y@public.gmane.org>
2016-01-16 15:55 ` [PATCH libmlx5 V1 1/2] Add CQ ignore overrun creation flag Leon Romanovsky
2016-01-16 15:55 ` Leon Romanovsky [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1452959758-29611-3-git-send-email-leon@leon.nu \
--to=leon-2ukjvaziz/y@public.gmane.org \
--cc=leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).