public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH libmlx4 v6 1/2] libmlx4: Infra-structure changes to support verbs extensions
@ 2013-03-26 21:16 sean.hefty-ral2JQCrhuEAvxtiuMwx3w
       [not found] ` <1364332591-22866-1-git-send-email-sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: sean.hefty-ral2JQCrhuEAvxtiuMwx3w @ 2013-03-26 21:16 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg
  Cc: Yishai Hadas, Tzahi Oved

From: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Signed-off-by: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Tzahi Oved <tzahio-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
Change from v5:
Reverted support to include provider ABI 2

 src/mlx4.c |   82 ++++++++++++++++++++++++++++++++---------------------------
 src/mlx4.h |    8 ++++-
 2 files changed, 50 insertions(+), 40 deletions(-)

diff --git a/src/mlx4.c b/src/mlx4.c
index 8cf249a..dcea026 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -120,22 +120,26 @@ static struct ibv_context_ops mlx4_ctx_ops = {
 	.detach_mcast  = ibv_cmd_detach_mcast
 };
 
-static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd)
+static int mlx4_init_context(struct verbs_device *v_device,
+			struct ibv_context *ibv_ctx, int cmd_fd)
 {
-	struct mlx4_context	       *context;
+	struct mlx4_context		*context;
 	struct ibv_get_context		cmd;
 	struct mlx4_alloc_ucontext_resp resp;
 	int				i;
+	/* verbs_context should be used for new verbs
+	  *struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
+	 */
 
-	context = calloc(1, sizeof *context);
-	if (!context)
-		return NULL;
-
-	context->ibv_ctx.cmd_fd = cmd_fd;
+	/* memory footprint of mlx4_context and verbs_context share
+	  * struct ibv_context.
+	*/
+	context = to_mctx(ibv_ctx);
+	ibv_ctx->cmd_fd = cmd_fd;
 
-	if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
+	if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof(cmd),
 				&resp.ibv_resp, sizeof resp))
-		goto err_free;
+		return errno;
 
 	context->num_qps	= resp.qp_tab_size;
 	context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
@@ -150,15 +154,16 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 
 	pthread_mutex_init(&context->db_list_mutex, NULL);
 
-	context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE,
-			    MAP_SHARED, cmd_fd, 0);
+	context->uar = mmap(NULL, to_mdev(&v_device->device)->page_size,
+			    PROT_WRITE, MAP_SHARED, cmd_fd, 0);
 	if (context->uar == MAP_FAILED)
-		goto err_free;
+		return errno;
 
 	if (resp.bf_reg_size) {
-		context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size,
+		context->bf_page = mmap(NULL,
+					to_mdev(&v_device->device)->page_size,
 					PROT_WRITE, MAP_SHARED, cmd_fd,
-					to_mdev(ibdev)->page_size);
+					to_mdev(&v_device->device)->page_size);
 		if (context->bf_page == MAP_FAILED) {
 			fprintf(stderr, PFX "Warning: BlueFlame available, "
 				"but failed to mmap() BlueFlame page.\n");
@@ -176,35 +181,29 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
 
 	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
 
-	context->ibv_ctx.ops = mlx4_ctx_ops;
-
-	return &context->ibv_ctx;
+	ibv_ctx->ops = mlx4_ctx_ops;
+	/* New verbs should be added as below
+	  * verbs_ctx->drv_new_func1 = mlx4_new_func1;
+	  */
+	return 0;
 
-err_free:
-	free(context);
-	return NULL;
 }
 
-static void mlx4_free_context(struct ibv_context *ibctx)
+static void mlx4_uninit_context(struct verbs_device *v_device,
+					struct ibv_context *ibv_ctx)
 {
-	struct mlx4_context *context = to_mctx(ibctx);
+	struct mlx4_context *context = to_mctx(ibv_ctx);
 
-	munmap(context->uar, to_mdev(ibctx->device)->page_size);
+	munmap(context->uar, to_mdev(&v_device->device)->page_size);
 	if (context->bf_page)
-		munmap(context->bf_page, to_mdev(ibctx->device)->page_size);
-	free(context);
+		munmap(context->bf_page, to_mdev(&v_device->device)->page_size);
 }
 
-static struct ibv_device_ops mlx4_dev_ops = {
-	.alloc_context = mlx4_alloc_context,
-	.free_context  = mlx4_free_context
-};
-
-static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path,
-					    int abi_version)
+static struct verbs_device *mlx4_driver_init(const char *uverbs_sys_path,
+					     int abi_version)
 {
 	char			value[8];
-	struct mlx4_device    *dev;
+	struct mlx4_device	*dev;
 	unsigned		vendor, device;
 	int			i;
 
@@ -236,23 +235,30 @@ found:
 		return NULL;
 	}
 
-	dev = malloc(sizeof *dev);
+	dev = calloc(1, sizeof(*dev));
 	if (!dev) {
 		fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n",
 			uverbs_sys_path);
 		return NULL;
 	}
 
-	dev->ibv_dev.ops = mlx4_dev_ops;
 	dev->page_size   = sysconf(_SC_PAGESIZE);
-
-	return &dev->ibv_dev;
+	dev->verbs_dev.sz = sizeof(*dev);
+	dev->verbs_dev.size_of_context =
+		sizeof(struct mlx4_context) - sizeof(struct ibv_context);
+	 /* mlx4_init_context will initialize provider calls */
+	dev->verbs_dev.init_context = mlx4_init_context;
+	dev->verbs_dev.uninit_context = mlx4_uninit_context;
+
+	return &dev->verbs_dev;
 }
 
+
 #ifdef HAVE_IBV_REGISTER_DRIVER
 static __attribute__((constructor)) void mlx4_register_driver(void)
 {
-	ibv_register_driver("mlx4", mlx4_driver_init);
+	verbs_register_driver("mlx4", mlx4_driver_init);
+
 }
 #else
 /*
diff --git a/src/mlx4.h b/src/mlx4.h
index 13c13d8..5028fea 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -131,7 +131,7 @@ enum {
 };
 
 struct mlx4_device {
-	struct ibv_device		ibv_dev;
+	struct verbs_device		verbs_dev;
 	int				page_size;
 };
 
@@ -258,7 +258,11 @@ static inline unsigned long align(unsigned long val, unsigned long align)
 
 static inline struct mlx4_device *to_mdev(struct ibv_device *ibdev)
 {
-	return to_mxxx(dev, device);
+	/* ibv_device is first field of verbs_device
+	 * see try_driver in libibverbs
+	 */
+	return ((struct mlx4_device *)
+		((void *) ibdev - offsetof(struct mlx4_device, verbs_dev)));
 }
 
 static inline struct mlx4_context *to_mctx(struct ibv_context *ibctx)
-- 
1.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH libmlx4 v6 2/2] libmlx4: Add support for XRC QPs
       [not found] ` <1364332591-22866-1-git-send-email-sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2013-03-26 21:16   ` sean.hefty-ral2JQCrhuEAvxtiuMwx3w
  2013-06-04 19:39   ` [PATCH libmlx4 v6 1/2] libmlx4: Infra-structure changes to support verbs extensions Steve Wise
  1 sibling, 0 replies; 8+ messages in thread
From: sean.hefty-ral2JQCrhuEAvxtiuMwx3w @ 2013-03-26 21:16 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg
  Cc: Sean Hefty

From: Sean Hefty <sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

Signed-off-by: Sean Hefty <sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 src/buf.c      |    6 +-
 src/cq.c       |   40 +++++++---
 src/mlx4-abi.h |    6 ++
 src/mlx4.c     |   27 +++++---
 src/mlx4.h     |   64 +++++++++++++++--
 src/qp.c       |   39 +++++++----
 src/srq.c      |  151 ++++++++++++++++++++++++++++++++++++++
 src/verbs.c    |  220 +++++++++++++++++++++++++++++++++++++++++--------------
 8 files changed, 454 insertions(+), 99 deletions(-)

diff --git a/src/buf.c b/src/buf.c
index a80bcb1..50957bb 100644
--- a/src/buf.c
+++ b/src/buf.c
@@ -78,6 +78,8 @@ int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size)
 
 void mlx4_free_buf(struct mlx4_buf *buf)
 {
-	ibv_dofork_range(buf->buf, buf->length);
-	munmap(buf->buf, buf->length);
+	if (buf->length) {
+		ibv_dofork_range(buf->buf, buf->length);
+		munmap(buf->buf, buf->length);
+	}
 }
diff --git a/src/cq.c b/src/cq.c
index 8f7a8cc..20ce1f1 100644
--- a/src/cq.c
+++ b/src/cq.c
@@ -220,33 +220,43 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
 	rmb();
 
 	qpn = ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK;
+	wc->qp_num = qpn;
 
 	is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
 	is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
 		MLX4_CQE_OPCODE_ERROR;
 
-	if (!*cur_qp ||
-	    (qpn != (*cur_qp)->ibv_qp.qp_num)) {
+	if ((qpn & MLX4_XRC_QPN_BIT) && !is_send) {
 		/*
-		 * We do not have to take the QP table lock here,
-		 * because CQs will be locked while QPs are removed
+		 * We do not have to take the XSRQ table lock here,
+		 * because CQs will be locked while SRQs are removed
 		 * from the table.
 		 */
-		*cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn);
-		if (!*cur_qp)
+		srq = mlx4_find_xsrq(&to_mctx(cq->ibv_cq.context)->xsrq_table,
+				     ntohl(cqe->g_mlpath_rqpn) & MLX4_CQE_QPN_MASK);
+		if (!srq)
 			return CQ_POLL_ERR;
+	} else {
+		if (!*cur_qp || (qpn != (*cur_qp)->verbs_qp.qp.qp_num)) {
+			/*
+		 	 * We do not have to take the QP table lock here,
+			 * because CQs will be locked while QPs are removed
+		 	 * from the table.
+			 */
+			*cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn);
+			if (!*cur_qp)
+				return CQ_POLL_ERR;
+		}
+		srq = ((*cur_qp)->verbs_qp.qp.srq) ? to_msrq((*cur_qp)->verbs_qp.qp.srq) : NULL;
 	}
 
-	wc->qp_num = (*cur_qp)->ibv_qp.qp_num;
-
 	if (is_send) {
 		wq = &(*cur_qp)->sq;
 		wqe_index = ntohs(cqe->wqe_index);
 		wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail);
 		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
 		++wq->tail;
-	} else if ((*cur_qp)->ibv_qp.srq) {
-		srq = to_msrq((*cur_qp)->ibv_qp.srq);
+	} else if (srq) {
 		wqe_index = htons(cqe->wqe_index);
 		wc->wr_id = srq->wrid[wqe_index];
 		mlx4_free_srq_wqe(srq, wqe_index);
@@ -322,7 +332,8 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
 		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
 		wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IBV_WC_GRH : 0;
 		wc->pkey_index     = ntohl(cqe->immed_rss_invalid) & 0x7f;
-		if ((*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET)
+		/* HACK */
+		if ((*cur_qp) && (*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET)
 			wc->sl	   = ntohs(cqe->sl_vid) >> 13;
 		else
 			wc->sl	   = ntohs(cqe->sl_vid) >> 12;
@@ -411,7 +422,12 @@ void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
 	 */
 	while ((int) --prod_index - (int) cq->cons_index >= 0) {
 		cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
-		if ((ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
+		if (srq && srq->ext_srq &&
+		    ntohl(cqe->g_mlpath_rqpn & MLX4_CQE_QPN_MASK) == srq->verbs_srq.srq_num &&
+		    !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) {
+			mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
+			++nfreed;
+		} else if ((ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
 			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
 				mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
 			++nfreed;
diff --git a/src/mlx4-abi.h b/src/mlx4-abi.h
index 20a40c9..40d0d9a 100644
--- a/src/mlx4-abi.h
+++ b/src/mlx4-abi.h
@@ -74,6 +74,12 @@ struct mlx4_create_srq {
 	__u64				db_addr;
 };
 
+struct mlx4_create_xsrq {
+	struct ibv_create_xsrq		ibv_cmd;
+	__u64				buf_addr;
+	__u64				db_addr;
+};
+
 struct mlx4_create_srq_resp {
 	struct ibv_create_srq_resp	ibv_resp;
 	__u32				srqn;
diff --git a/src/mlx4.c b/src/mlx4.c
index dcea026..801f4f0 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -127,13 +127,14 @@ static int mlx4_init_context(struct verbs_device *v_device,
 	struct ibv_get_context		cmd;
 	struct mlx4_alloc_ucontext_resp resp;
 	int				i;
-	/* verbs_context should be used for new verbs
-	  *struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
-	 */
+	struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
 
 	/* memory footprint of mlx4_context and verbs_context share
-	  * struct ibv_context.
-	*/
+	 * struct ibv_context.
+	 */
+	if (sizeof(*verbs_ctx) > *(((size_t *) ibv_ctx) - 1))
+		return ENOSYS;
+
 	context = to_mctx(ibv_ctx);
 	ibv_ctx->cmd_fd = cmd_fd;
 
@@ -152,6 +153,7 @@ static int mlx4_init_context(struct verbs_device *v_device,
 	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
 		context->db_list[i] = NULL;
 
+	mlx4_init_xsrq_table(&context->xsrq_table, resp.qp_tab_size);
 	pthread_mutex_init(&context->db_list_mutex, NULL);
 
 	context->uar = mmap(NULL, to_mdev(&v_device->device)->page_size,
@@ -182,15 +184,20 @@ static int mlx4_init_context(struct verbs_device *v_device,
 	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
 
 	ibv_ctx->ops = mlx4_ctx_ops;
-	/* New verbs should be added as below
-	  * verbs_ctx->drv_new_func1 = mlx4_new_func1;
-	  */
-	return 0;
+	verbs_ctx->has_comp_mask = VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ |
+				   VERBS_CONTEXT_QP;
+	verbs_ctx->close_xrcd = mlx4_close_xrcd;
+	verbs_ctx->open_xrcd = mlx4_open_xrcd;
+	verbs_ctx->create_srq_ex = mlx4_create_srq_ex;
+	verbs_ctx->get_srq_num = verbs_get_srq_num;
+	verbs_ctx->create_qp_ex = mlx4_create_qp_ex;
+	verbs_ctx->open_qp = mlx4_open_qp;
 
+	return 0;
 }
 
 static void mlx4_uninit_context(struct verbs_device *v_device,
-					struct ibv_context *ibv_ctx)
+				struct ibv_context *ibv_ctx)
 {
 	struct mlx4_context *context = to_mctx(ibv_ctx);
 
diff --git a/src/mlx4.h b/src/mlx4.h
index 5028fea..6c627e7 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -38,6 +38,7 @@
 
 #include <infiniband/driver.h>
 #include <infiniband/arch.h>
+#include <infiniband/verbs.h>
 
 #ifdef HAVE_VALGRIND_MEMCHECK_H
 
@@ -97,6 +98,37 @@ enum {
 	MLX4_QP_TABLE_MASK		= MLX4_QP_TABLE_SIZE - 1
 };
 
+#define MLX4_REMOTE_SRQN_FLAGS(wr) htonl((wr)->wr.xrc.remote_srqn << 8)
+#define MLX4_GET_SRQN(srq) (srq)->ibv_srq.srq_num
+
+enum {
+	MLX4_XSRQ_TABLE_BITS = 8,
+	MLX4_XSRQ_TABLE_SIZE = 1 << MLX4_XSRQ_TABLE_BITS,
+	MLX4_XSRQ_TABLE_MASK = MLX4_XSRQ_TABLE_SIZE - 1
+};
+
+struct mlx4_xsrq_table {
+	struct {
+		struct mlx4_srq **table;
+		int		  refcnt;
+	} xsrq_table[MLX4_XSRQ_TABLE_SIZE];
+
+	pthread_mutex_t		  mutex;
+	int			  num_xsrq;
+	int			  shift;
+	int			  mask;
+};
+
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size);
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq);
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+
+enum {
+	MLX4_XRC_QPN_BIT     = (1 << 23)
+};
+
 enum mlx4_db_type {
 	MLX4_DB_TYPE_CQ,
 	MLX4_DB_TYPE_RQ,
@@ -157,6 +189,8 @@ struct mlx4_context {
 	int				qp_table_shift;
 	int				qp_table_mask;
 
+	struct mlx4_xsrq_table		xsrq_table;
+
 	struct mlx4_db_page	       *db_list[MLX4_NUM_DB_TYPE];
 	pthread_mutex_t			db_list_mutex;
 };
@@ -184,7 +218,7 @@ struct mlx4_cq {
 };
 
 struct mlx4_srq {
-	struct ibv_srq			ibv_srq;
+	struct verbs_srq		verbs_srq;
 	struct mlx4_buf			buf;
 	pthread_spinlock_t		lock;
 	uint64_t		       *wrid;
@@ -196,6 +230,7 @@ struct mlx4_srq {
 	int				tail;
 	uint32_t		       *db;
 	uint16_t			counter;
+	uint8_t				ext_srq;
 };
 
 struct mlx4_wq {
@@ -211,7 +246,7 @@ struct mlx4_wq {
 };
 
 struct mlx4_qp {
-	struct ibv_qp			ibv_qp;
+	struct verbs_qp			verbs_qp;
 	struct mlx4_buf			buf;
 	int				max_inline_data;
 	int				buf_size;
@@ -251,6 +286,7 @@ static inline unsigned long align(unsigned long val, unsigned long align)
 {
 	return (val + align - 1) & ~(align - 1);
 }
+int align_queue_size(int req);
 
 #define to_mxxx(xxx, type)						\
 	((struct mlx4_##type *)					\
@@ -282,12 +318,14 @@ static inline struct mlx4_cq *to_mcq(struct ibv_cq *ibcq)
 
 static inline struct mlx4_srq *to_msrq(struct ibv_srq *ibsrq)
 {
-	return to_mxxx(srq, srq);
+	return container_of(container_of(ibsrq, struct verbs_srq, srq),
+			    struct mlx4_srq, verbs_srq);
 }
 
 static inline struct mlx4_qp *to_mqp(struct ibv_qp *ibqp)
 {
-	return to_mxxx(qp, qp);
+	return container_of(container_of(ibqp, struct verbs_qp, qp),
+			    struct mlx4_qp, verbs_qp);
 }
 
 static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah)
@@ -308,6 +346,9 @@ int mlx4_query_port(struct ibv_context *context, uint8_t port,
 
 struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context);
 int mlx4_free_pd(struct ibv_pd *pd);
+struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
+				struct ibv_xrcd_init_attr *attr);
+int mlx4_close_xrcd(struct ibv_xrcd *xrcd);
 
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr,
 			    size_t length, int access);
@@ -329,20 +370,33 @@ void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int new_cqe);
 
 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 				 struct ibv_srq_init_attr *attr);
+struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
+				   struct ibv_srq_init_attr_ex *attr_ex);
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
+				    struct ibv_srq_init_attr_ex *attr_ex);
 int mlx4_modify_srq(struct ibv_srq *srq,
 		     struct ibv_srq_attr *attr,
 		     int mask);
 int mlx4_query_srq(struct ibv_srq *srq,
 			   struct ibv_srq_attr *attr);
 int mlx4_destroy_srq(struct ibv_srq *srq);
+int mlx4_destroy_xrc_srq(struct ibv_srq *srq);
 int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 			struct mlx4_srq *srq);
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size);
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq);
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
 void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind);
 int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
 		       struct ibv_recv_wr *wr,
 		       struct ibv_recv_wr **bad_wr);
 
 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
+struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
+				 struct ibv_qp_init_attr_ex *attr);
+struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr);
 int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 		   int attr_mask,
 		   struct ibv_qp_init_attr *init_attr);
@@ -357,7 +411,7 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 			  struct ibv_recv_wr **bad_wr);
 void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 			   struct mlx4_qp *qp);
-int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp);
 void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type);
diff --git a/src/qp.c b/src/qp.c
index 40a6689..132660f 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -208,7 +208,7 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 	ind = qp->sq.head;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
+		if (wq_overflow(&qp->sq, nreq, to_mcq(ibqp->send_cq))) {
 			ret = ENOMEM;
 			*bad_wr = wr;
 			goto out;
@@ -246,6 +246,9 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 		size = sizeof *ctrl / 16;
 
 		switch (ibqp->qp_type) {
+		case IBV_QPT_XRC_SEND:
+			ctrl->srcrb_flags |= MLX4_REMOTE_SRQN_FLAGS(wr);
+			/* fall through */
 		case IBV_QPT_RC:
 		case IBV_QPT_UC:
 			switch (wr->opcode) {
@@ -452,7 +455,7 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
+		if (wq_overflow(&qp->rq, nreq, to_mcq(ibqp->recv_cq))) {
 			ret = ENOMEM;
 			*bad_wr = wr;
 			goto out;
@@ -546,6 +549,7 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 		size += sizeof (struct mlx4_wqe_raddr_seg);
 		break;
 
+	case IBV_QPT_XRC_SEND:
 	case IBV_QPT_RC:
 		size += sizeof (struct mlx4_wqe_raddr_seg);
 		/*
@@ -575,14 +579,16 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 		; /* nothing */
 }
 
-int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp)
 {
 	qp->rq.max_gs	 = cap->max_recv_sge;
 
-	qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
-	if (!qp->sq.wrid)
-		return -1;
+	if (qp->sq.wqe_cnt) {
+		qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
+		if (!qp->sq.wrid)
+			return -1;
+	}
 
 	if (qp->rq.wqe_cnt) {
 		qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t));
@@ -607,15 +613,19 @@ int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
 		qp->sq.offset = 0;
 	}
 
-	if (mlx4_alloc_buf(&qp->buf,
-			    align(qp->buf_size, to_mdev(pd->context->device)->page_size),
-			    to_mdev(pd->context->device)->page_size)) {
-		free(qp->sq.wrid);
-		free(qp->rq.wrid);
-		return -1;
-	}
+	if (qp->buf_size) {
+		if (mlx4_alloc_buf(&qp->buf,
+				   align(qp->buf_size, to_mdev(context->device)->page_size),
+				   to_mdev(context->device)->page_size)) {
+			free(qp->sq.wrid);
+			free(qp->rq.wrid);
+			return -1;
+		}
 
-	memset(qp->buf.buf, 0, qp->buf_size);
+		memset(qp->buf.buf, 0, qp->buf_size);
+	} else {
+		qp->buf.buf = NULL;
+	}
 
 	return 0;
 }
@@ -631,6 +641,7 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 		wqe_size -= sizeof (struct mlx4_wqe_datagram_seg);
 		break;
 
+	case IBV_QPT_XRC_SEND:
 	case IBV_QPT_UC:
 	case IBV_QPT_RC:
 		wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
diff --git a/src/srq.c b/src/srq.c
index f1d1240..bc19c51 100644
--- a/src/srq.c
+++ b/src/srq.c
@@ -42,6 +42,7 @@
 #include "mlx4.h"
 #include "doorbell.h"
 #include "wqe.h"
+#include "mlx4-abi.h"
 
 static void *get_wqe(struct mlx4_srq *srq, int n)
 {
@@ -173,3 +174,153 @@ int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 
 	return 0;
 }
+
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size)
+{
+	memset(xsrq_table, 0, sizeof *xsrq_table);
+	xsrq_table->num_xsrq = size;
+	xsrq_table->shift = ffs(size) - 1 - MLX4_XSRQ_TABLE_BITS;
+	xsrq_table->mask = (1 << xsrq_table->shift) - 1;
+
+	pthread_mutex_init(&xsrq_table->mutex, NULL);
+}
+
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn)
+{
+	int index;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	if (xsrq_table->xsrq_table[index].refcnt)
+		return xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask];
+
+	return NULL;
+}
+
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+		    struct mlx4_srq *srq)
+{
+	int index, ret = 0;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	pthread_mutex_lock(&xsrq_table->mutex);
+	if (!xsrq_table->xsrq_table[index].refcnt) {
+		xsrq_table->xsrq_table[index].table = calloc(xsrq_table->mask + 1,
+							     sizeof(struct mlx4_srq *));
+		if (!xsrq_table->xsrq_table[index].table) {
+			ret = -1;
+			goto out;
+		}
+	}
+
+	xsrq_table->xsrq_table[index].refcnt++;
+	xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask] = srq;
+
+out:
+	pthread_mutex_unlock(&xsrq_table->mutex);
+	return ret;
+}
+
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn)
+{
+	int index;
+
+	index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+	pthread_mutex_lock(&xsrq_table->mutex);
+
+	if (--xsrq_table->xsrq_table[index].refcnt)
+		xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask] = NULL;
+	else
+		free(xsrq_table->xsrq_table[index].table);
+
+	pthread_mutex_unlock(&xsrq_table->mutex);
+}
+
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
+				    struct ibv_srq_init_attr_ex *attr_ex)
+{
+	struct mlx4_create_xsrq cmd;
+	struct mlx4_create_srq_resp resp;
+	struct mlx4_srq *srq;
+	int ret;
+
+	/* Sanity check SRQ size before proceeding */
+	if (attr_ex->attr.max_wr > 1 << 16 || attr_ex->attr.max_sge > 64)
+		return NULL;
+
+	srq = calloc(1, sizeof *srq);
+	if (!srq)
+		return NULL;
+
+	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
+		goto err;
+
+	srq->max     = align_queue_size(attr_ex->attr.max_wr + 1);
+	srq->max_gs  = attr_ex->attr.max_sge;
+	srq->counter = 0;
+	srq->ext_srq = 1;
+
+	if (mlx4_alloc_srq_buf(attr_ex->pd, &attr_ex->attr, srq))
+		goto err;
+
+	srq->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
+	if (!srq->db)
+		goto err_free;
+
+	*srq->db = 0;
+
+	cmd.buf_addr = (uintptr_t) srq->buf.buf;
+	cmd.db_addr  = (uintptr_t) srq->db;
+
+	ret = ibv_cmd_create_srq_ex(context, &srq->verbs_srq, attr_ex,
+				    &cmd.ibv_cmd, sizeof cmd,
+				    &resp.ibv_resp, sizeof resp);
+	if (ret)
+		goto err_db;
+
+	ret = mlx4_store_xsrq(&to_mctx(context)->xsrq_table,
+			      srq->verbs_srq.srq_num, srq);
+	if (ret)
+		goto err_destroy;
+
+	return &srq->verbs_srq.srq;
+
+err_destroy:
+	ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
+err_db:
+	mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, srq->db);
+err_free:
+	free(srq->wrid);
+	mlx4_free_buf(&srq->buf);
+err:
+	free(srq);
+	return NULL;
+}
+
+int mlx4_destroy_xrc_srq(struct ibv_srq *srq)
+{
+	struct mlx4_context *mctx = to_mctx(srq->context);
+	struct mlx4_srq *msrq = to_msrq(srq);
+	struct mlx4_cq *mcq;
+	int ret;
+
+	mcq = to_mcq(msrq->verbs_srq.cq);
+	mlx4_cq_clean(mcq, 0, msrq);
+	pthread_spin_lock(&mcq->lock);
+	mlx4_clear_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num);
+	pthread_spin_unlock(&mcq->lock);
+
+	ret = ibv_cmd_destroy_srq(srq);
+	if (ret) {
+		pthread_spin_lock(&mcq->lock);
+		mlx4_store_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num, msrq);
+		pthread_spin_unlock(&mcq->lock);
+		return ret;
+	}
+
+	mlx4_free_db(mctx, MLX4_DB_TYPE_RQ, msrq->db);
+	mlx4_free_buf(&msrq->buf);
+	free(msrq->wrid);
+	free(msrq);
+
+	return 0;
+}
diff --git a/src/verbs.c b/src/verbs.c
index 408fc6d..1ebf766 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -107,6 +107,42 @@ int mlx4_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
+struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
+				struct ibv_xrcd_init_attr *attr)
+{
+	struct ibv_open_xrcd cmd;
+	struct ibv_open_xrcd_resp resp;
+	struct verbs_xrcd *xrcd;
+	int ret;
+
+	xrcd = calloc(1, sizeof *xrcd);
+	if (!xrcd)
+		return NULL;
+
+	ret = ibv_cmd_open_xrcd(context, xrcd, attr,
+				&cmd, sizeof cmd, &resp, sizeof resp);
+	if (ret)
+		goto err;
+
+	return &xrcd->xrcd;
+
+err:
+	free(xrcd);
+	return NULL;
+}
+
+int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
+{
+	struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd);
+	int ret;
+
+	ret = ibv_cmd_close_xrcd(xrcd);
+	if (!ret)
+		free(xrcd);
+
+	return ret;
+}
+
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 			   int access)
 {
@@ -150,7 +186,7 @@ int mlx4_dereg_mr(struct ibv_mr *mr)
 	return 0;
 }
 
-static int align_queue_size(int req)
+int align_queue_size(int req)
 {
 	int nent;
 
@@ -294,7 +330,7 @@ int mlx4_destroy_cq(struct ibv_cq *cq)
 }
 
 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
-				 struct ibv_srq_init_attr *attr)
+				struct ibv_srq_init_attr *attr)
 {
 	struct mlx4_create_srq      cmd;
 	struct mlx4_create_srq_resp resp;
@@ -315,6 +351,7 @@ struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 	srq->max     = align_queue_size(attr->attr.max_wr + 1);
 	srq->max_gs  = attr->attr.max_sge;
 	srq->counter = 0;
+	srq->ext_srq = 0;
 
 	if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
 		goto err;
@@ -328,15 +365,13 @@ struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
 	cmd.buf_addr = (uintptr_t) srq->buf.buf;
 	cmd.db_addr  = (uintptr_t) srq->db;
 
-	ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr,
+	ret = ibv_cmd_create_srq(pd, &srq->verbs_srq.srq, attr,
 				 &cmd.ibv_cmd, sizeof cmd,
 				 &resp.ibv_resp, sizeof resp);
 	if (ret)
 		goto err_db;
 
-	srq->srqn = resp.srqn;
-
-	return &srq->ibv_srq;
+	return &srq->verbs_srq.srq;
 
 err_db:
 	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
@@ -351,6 +386,18 @@ err:
 	return NULL;
 }
 
+struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
+				   struct ibv_srq_init_attr_ex *attr_ex)
+{
+	if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ||
+	    (attr_ex->srq_type == IBV_SRQT_BASIC))
+		return mlx4_create_srq(attr_ex->pd, (struct ibv_srq_init_attr *) attr_ex);
+	else if (attr_ex->srq_type == IBV_SRQT_XRC)
+		return mlx4_create_xrc_srq(context, attr_ex);
+
+	return NULL;
+}
+
 int mlx4_modify_srq(struct ibv_srq *srq,
 		     struct ibv_srq_attr *attr,
 		     int attr_mask)
@@ -372,6 +419,9 @@ int mlx4_destroy_srq(struct ibv_srq *srq)
 {
 	int ret;
 
+	if (to_msrq(srq)->ext_srq)
+		return mlx4_destroy_xrc_srq(srq);
+
 	ret = ibv_cmd_destroy_srq(srq);
 	if (ret)
 		return ret;
@@ -384,7 +434,8 @@ int mlx4_destroy_srq(struct ibv_srq *srq)
 	return 0;
 }
 
-struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
+				 struct ibv_qp_init_attr_ex *attr)
 {
 	struct mlx4_create_qp     cmd;
 	struct ibv_create_qp_resp resp;
@@ -399,30 +450,34 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	    attr->cap.max_inline_data > 1024)
 		return NULL;
 
-	qp = malloc(sizeof *qp);
+	qp = calloc(1, sizeof *qp);
 	if (!qp)
 		return NULL;
 
-	mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
-
-	/*
-	 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
-	 * allow HW to prefetch.
-	 */
-	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
-	qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
-	qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
+	if (attr->qp_type == IBV_QPT_XRC_RECV) {
+		attr->cap.max_send_wr = qp->sq.wqe_cnt = 0;
+	} else {
+		mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
+		/*
+		 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
+		 * allow HW to prefetch.
+		 */
+		qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
+		qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
+	}
 
-	if (attr->srq)
-		attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
-	else {
+	if (attr->srq || attr->qp_type == IBV_QPT_XRC_SEND ||
+	    attr->qp_type == IBV_QPT_XRC_RECV) {
+		attr->cap.max_recv_wr = qp->rq.wqe_cnt = attr->cap.max_recv_sge = 0;
+	} else {
+		qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
 		if (attr->cap.max_recv_sge < 1)
 			attr->cap.max_recv_sge = 1;
 		if (attr->cap.max_recv_wr < 1)
 			attr->cap.max_recv_wr = 1;
 	}
 
-	if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp))
+	if (mlx4_alloc_qp_buf(context, &attr->cap, attr->qp_type, qp))
 		goto err;
 
 	mlx4_init_qp_indices(qp);
@@ -431,19 +486,18 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
 		goto err_free;
 
-	if (!attr->srq) {
-		qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
+	if (attr->cap.max_recv_sge) {
+		qp->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
 		if (!qp->db)
 			goto err_free;
 
 		*qp->db = 0;
+		cmd.db_addr = (uintptr_t) qp->db;
+	} else {
+		cmd.db_addr = 0;
 	}
 
 	cmd.buf_addr	    = (uintptr_t) qp->buf.buf;
-	if (attr->srq)
-		cmd.db_addr = 0;
-	else
-		cmd.db_addr = (uintptr_t) qp->db;
 	cmd.log_sq_stride   = qp->sq.wqe_shift;
 	for (cmd.log_sq_bb_count = 0;
 	     qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count;
@@ -452,37 +506,39 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 	cmd.sq_no_prefetch = 0;	/* OK for ABI 2: just a reserved field */
 	memset(cmd.reserved, 0, sizeof cmd.reserved);
 
-	pthread_mutex_lock(&to_mctx(pd->context)->qp_table_mutex);
+	pthread_mutex_lock(&to_mctx(context)->qp_table_mutex);
 
-	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd,
-				&resp, sizeof resp);
+	ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr,
+				   &cmd.ibv_cmd, sizeof cmd, &resp, sizeof resp);
 	if (ret)
 		goto err_rq_db;
 
-	ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
-	if (ret)
-		goto err_destroy;
-	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
+	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) {
+		ret = mlx4_store_qp(to_mctx(context), qp->verbs_qp.qp.qp_num, qp);
+		if (ret)
+			goto err_destroy;
+	}
+	pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
 
 	qp->rq.wqe_cnt = qp->rq.max_post = attr->cap.max_recv_wr;
 	qp->rq.max_gs  = attr->cap.max_recv_sge;
 	mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type);
 
-	qp->doorbell_qpn    = htonl(qp->ibv_qp.qp_num << 8);
+	qp->doorbell_qpn    = htonl(qp->verbs_qp.qp.qp_num << 8);
 	if (attr->sq_sig_all)
 		qp->sq_signal_bits = htonl(MLX4_WQE_CTRL_CQ_UPDATE);
 	else
 		qp->sq_signal_bits = 0;
 
-	return &qp->ibv_qp;
+	return &qp->verbs_qp.qp;
 
 err_destroy:
-	ibv_cmd_destroy_qp(&qp->ibv_qp);
+	ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
 
 err_rq_db:
-	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
-	if (!attr->srq)
-		mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
+	pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
+	if (attr->cap.max_recv_sge)
+		mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, qp->db);
 
 err_free:
 	free(qp->sq.wrid);
@@ -496,6 +552,43 @@ err:
 	return NULL;
 }
 
+struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+{
+	struct ibv_qp_init_attr_ex attr_ex;
+	struct ibv_qp *qp;
+
+	memcpy(&attr_ex, attr, sizeof *attr);
+	attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD;
+	attr_ex.pd = pd;
+	qp = mlx4_create_qp_ex(pd->context, &attr_ex);
+	if (qp)
+		memcpy(attr, &attr_ex, sizeof *attr);
+	return qp;
+}
+
+struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr)
+{
+	struct ibv_open_qp cmd;
+	struct ibv_create_qp_resp resp;
+	struct mlx4_qp *qp;
+	int ret;
+
+	qp = calloc(1, sizeof *qp);
+	if (!qp)
+		return NULL;
+
+	ret = ibv_cmd_open_qp(context, &qp->verbs_qp, attr,
+			      &cmd, sizeof cmd, &resp, sizeof resp);
+	if (ret)
+		goto err;
+
+	return &qp->verbs_qp.qp;
+
+err:
+	free(qp);
+	return NULL;
+}
+
 int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
 		   int attr_mask,
 		   struct ibv_qp_init_attr *init_attr)
@@ -526,7 +619,7 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	int ret;
 
 	if (attr_mask & IBV_QP_PORT) {
-		if (ibv_query_port(qp->pd->context, attr->port_num, &port_attr))
+		if (ibv_query_port(qp->context, attr->port_num, &port_attr))
 			return -1;
 		mqp->link_layer = port_attr.link_layer;
 	}
@@ -542,13 +635,14 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	if (!ret		       &&
 	    (attr_mask & IBV_QP_STATE) &&
 	    attr->qp_state == IBV_QPS_RESET) {
-		mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
-			       qp->srq ? to_msrq(qp->srq) : NULL);
-		if (qp->send_cq != qp->recv_cq)
+		if (qp->recv_cq)
+			mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
+				      qp->srq ? to_msrq(qp->srq) : NULL);
+		if (qp->send_cq && qp->send_cq != qp->recv_cq)
 			mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
 
 		mlx4_init_qp_indices(to_mqp(qp));
-		if (!qp->srq)
+		if (to_mqp(qp)->rq.wqe_cnt)
 			*to_mqp(qp)->db = 0;
 	}
 
@@ -560,9 +654,14 @@ static void mlx4_lock_cqs(struct ibv_qp *qp)
 	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
 	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
 
-	if (send_cq == recv_cq)
+	if (!qp->send_cq || !qp->recv_cq) {
+		if (qp->send_cq)
+			pthread_spin_lock(&send_cq->lock);
+		else if (qp->recv_cq)
+			pthread_spin_lock(&recv_cq->lock);
+	} else if (send_cq == recv_cq) {
 		pthread_spin_lock(&send_cq->lock);
-	else if (send_cq->cqn < recv_cq->cqn) {
+	} else if (send_cq->cqn < recv_cq->cqn) {
 		pthread_spin_lock(&send_cq->lock);
 		pthread_spin_lock(&recv_cq->lock);
 	} else {
@@ -576,9 +675,15 @@ static void mlx4_unlock_cqs(struct ibv_qp *qp)
 	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
 	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
 
-	if (send_cq == recv_cq)
+
+	if (!qp->send_cq || !qp->recv_cq) {
+		if (qp->send_cq)
+			pthread_spin_unlock(&send_cq->lock);
+		else if (qp->recv_cq)
+			pthread_spin_unlock(&recv_cq->lock);
+	} else if (send_cq == recv_cq) {
 		pthread_spin_unlock(&send_cq->lock);
-	else if (send_cq->cqn < recv_cq->cqn) {
+	} else if (send_cq->cqn < recv_cq->cqn) {
 		pthread_spin_unlock(&recv_cq->lock);
 		pthread_spin_unlock(&send_cq->lock);
 	} else {
@@ -601,21 +706,24 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
 
 	mlx4_lock_cqs(ibqp);
 
-	__mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
-			ibqp->srq ? to_msrq(ibqp->srq) : NULL);
-	if (ibqp->send_cq != ibqp->recv_cq)
+	if (ibqp->recv_cq)
+		__mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
+				ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+	if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq)
 		__mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
 
-	mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
+	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt)
+		mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
 
 	mlx4_unlock_cqs(ibqp);
 	pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
 
-	if (!ibqp->srq)
+	if (qp->rq.wqe_cnt) {
 		mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
-	free(qp->sq.wrid);
-	if (qp->rq.wqe_cnt)
 		free(qp->rq.wrid);
+	}
+	if (qp->sq.wqe_cnt)
+		free(qp->sq.wrid);
 	mlx4_free_buf(&qp->buf);
 	free(qp);
 
-- 
1.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH libmlx4 v6 1/2] libmlx4: Infra-structure changes to support verbs extensions
       [not found] ` <1364332591-22866-1-git-send-email-sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  2013-03-26 21:16   ` [PATCH libmlx4 v6 2/2] libmlx4: Add support for XRC QPs sean.hefty-ral2JQCrhuEAvxtiuMwx3w
@ 2013-06-04 19:39   ` Steve Wise
       [not found]     ` <51AE4288.4010505-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
  1 sibling, 1 reply; 8+ messages in thread
From: Steve Wise @ 2013-06-04 19:39 UTC (permalink / raw)
  To: sean.hefty-ral2JQCrhuEAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, roland-BHEL68pLQRGGvPXPguhicg,
	Yishai Hadas, Tzahi Oved

On 3/26/2013 4:16 PM, sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org wrote:

<snip>

> @@ -236,23 +235,30 @@ found:
>   		return NULL;
>   	}
>   
> -	dev = malloc(sizeof *dev);
> +	dev = calloc(1, sizeof(*dev));
>   	if (!dev) {
>   		fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n",
>   			uverbs_sys_path);
>   		return NULL;
>   	}
>   
> -	dev->ibv_dev.ops = mlx4_dev_ops;
>   	dev->page_size   = sysconf(_SC_PAGESIZE);
> -
> -	return &dev->ibv_dev;
> +	dev->verbs_dev.sz = sizeof(*dev);
> +	dev->verbs_dev.size_of_context =
> +		sizeof(struct mlx4_context) - sizeof(struct ibv_context);
> +	 /* mlx4_init_context will initialize provider calls */
> +	dev->verbs_dev.init_context = mlx4_init_context;
> +	dev->verbs_dev.uninit_context = mlx4_uninit_context;
> +
> +	return &dev->verbs_dev;
>   }
>   
> +
>   #ifdef HAVE_IBV_REGISTER_DRIVER
>   static __attribute__((constructor)) void mlx4_register_driver(void)
>   {
> -	ibv_register_driver("mlx4", mlx4_driver_init);
> +	verbs_register_driver("mlx4", mlx4_driver_init);
> +
>   }
>   #else

Shouldn't ibv_register_driver() need to be called in the lib constructor 
function if HAVE_IBV_REGISTER_DRIVER is not defined?


Steve.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH libmlx4 v6 1/2] libmlx4: Infra-structure changes to support verbs extensions
       [not found]     ` <51AE4288.4010505-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2013-06-04 19:46       ` Hefty, Sean
       [not found]         ` <1828884A29C6694DAF28B7E6B8A823736FD2CF18-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Hefty, Sean @ 2013-06-04 19:46 UTC (permalink / raw)
  To: Steve Wise
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	roland-BHEL68pLQRGGvPXPguhicg@public.gmane.org, Yishai Hadas,
	Tzahi Oved

> > +
> >   #ifdef HAVE_IBV_REGISTER_DRIVER
> >   static __attribute__((constructor)) void mlx4_register_driver(void)
> >   {
> > -	ibv_register_driver("mlx4", mlx4_driver_init);
> > +	verbs_register_driver("mlx4", mlx4_driver_init);
> > +
> >   }
> >   #else
> 
> Shouldn't ibv_register_driver() need to be called in the lib constructor
> function if HAVE_IBV_REGISTER_DRIVER is not defined?

?  If HAVE_IBV_REGISTER_DRIVER is not defined, then we can't call ibv_register_driver...

We should just remove the HAVE_IBV_... check completely, since with this change, libmlx4 requires an updated version of libibverbs.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH libmlx4 v6 1/2] libmlx4: Infra-structure changes to support verbs extensions
       [not found]         ` <1828884A29C6694DAF28B7E6B8A823736FD2CF18-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2013-06-04 20:01           ` Steve Wise
       [not found]             ` <51AE47A3.5060802-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
  2013-06-04 20:55           ` Jason Gunthorpe
  1 sibling, 1 reply; 8+ messages in thread
From: Steve Wise @ 2013-06-04 20:01 UTC (permalink / raw)
  To: Hefty, Sean
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	roland-BHEL68pLQRGGvPXPguhicg@public.gmane.org, Yishai Hadas,
	Tzahi Oved

On 6/4/2013 2:46 PM, Hefty, Sean wrote:
>>> +
>>>    #ifdef HAVE_IBV_REGISTER_DRIVER
>>>    static __attribute__((constructor)) void mlx4_register_driver(void)
>>>    {
>>> -	ibv_register_driver("mlx4", mlx4_driver_init);
>>> +	verbs_register_driver("mlx4", mlx4_driver_init);
>>> +
>>>    }
>>>    #else
>> Shouldn't ibv_register_driver() need to be called in the lib constructor
>> function if HAVE_IBV_REGISTER_DRIVER is not defined?
> ?  If HAVE_IBV_REGISTER_DRIVER is not defined, then we can't call ibv_register_driver...

I thought HAVE_IBV_REGISTER_DRIVER was something new for deciding if the 
lib should call verbs_register_driver().

> We should just remove the HAVE_IBV_... check completely, since with this change, libmlx4 requires an updated version of libibverbs.

Ah.  I was thinking it would use the old interface if it was compiled 
against a libibverbs that didn't support the extensions.



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH libmlx4 v6 1/2] libmlx4: Infra-structure changes to support verbs extensions
       [not found]         ` <1828884A29C6694DAF28B7E6B8A823736FD2CF18-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  2013-06-04 20:01           ` Steve Wise
@ 2013-06-04 20:55           ` Jason Gunthorpe
  1 sibling, 0 replies; 8+ messages in thread
From: Jason Gunthorpe @ 2013-06-04 20:55 UTC (permalink / raw)
  To: Hefty, Sean
  Cc: Steve Wise, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	roland-BHEL68pLQRGGvPXPguhicg@public.gmane.org, Yishai Hadas,
	Tzahi Oved

On Tue, Jun 04, 2013 at 07:46:41PM +0000, Hefty, Sean wrote:

> We should just remove the HAVE_IBV_... check completely, since with
> this change, libmlx4 requires an updated version of libibverbs.

Agree. There is no need to support such old libibverbs at compile time
anymore.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH libmlx4 v6 1/2] libmlx4: Infra-structure changes to support verbs extensions
       [not found]             ` <51AE47A3.5060802-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2013-06-05 14:54               ` Steve Wise
       [not found]                 ` <51AF5129.7020205-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Steve Wise @ 2013-06-05 14:54 UTC (permalink / raw)
  To: Hefty, Sean
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	roland-BHEL68pLQRGGvPXPguhicg@public.gmane.org, Yishai Hadas,
	Tzahi Oved

On 6/4/2013 3:01 PM, Steve Wise wrote:
> On 6/4/2013 2:46 PM, Hefty, Sean wrote:
>>>> +
>>>>    #ifdef HAVE_IBV_REGISTER_DRIVER
>>>>    static __attribute__((constructor)) void mlx4_register_driver(void)
>>>>    {
>>>> -    ibv_register_driver("mlx4", mlx4_driver_init);
>>>> +    verbs_register_driver("mlx4", mlx4_driver_init);
>>>> +
>>>>    }
>>>>    #else
>>> Shouldn't ibv_register_driver() need to be called in the lib 
>>> constructor
>>> function if HAVE_IBV_REGISTER_DRIVER is not defined?
>> ?  If HAVE_IBV_REGISTER_DRIVER is not defined, then we can't call 
>> ibv_register_driver...
>
> I thought HAVE_IBV_REGISTER_DRIVER was something new for deciding if 
> the lib should call verbs_register_driver().
>
>> We should just remove the HAVE_IBV_... check completely, since with 
>> this change, libmlx4 requires an updated version of libibverbs.
>
> Ah.  I was thinking it would use the old interface if it was compiled 
> against a libibverbs that didn't support the extensions.
>
>
>


So old provider libs will work with the new libibverbs but new provider 
libs will not work with the old libibverbs?   Is there no way around 
this?  That dependency can be painful.



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH libmlx4 v6 1/2] libmlx4: Infra-structure changes to support verbs extensions
       [not found]                 ` <51AF5129.7020205-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2013-06-05 16:55                   ` Jason Gunthorpe
  0 siblings, 0 replies; 8+ messages in thread
From: Jason Gunthorpe @ 2013-06-05 16:55 UTC (permalink / raw)
  To: Steve Wise
  Cc: Hefty, Sean, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	roland-BHEL68pLQRGGvPXPguhicg@public.gmane.org, Yishai Hadas,
	Tzahi Oved

On Wed, Jun 05, 2013 at 09:54:33AM -0500, Steve Wise wrote:
> >Ah.  I was thinking it would use the old interface if it was
> >compiled against a libibverbs that didn't support the extensions.

> So old provider libs will work with the new libibverbs but new
> provider libs will not work with the old libibverbs?   Is there no
> way around this?  That dependency can be painful.

providers can use dlopen/dlsym tricks, or perhaps weak symbols to
discover the new libibverbs symbols. Nobody has had an interest in
working on that problem though.

My original thought when putting all this together was that the one
time synchronized update to the extendable interface was manageable.

.. but seeing now that the providers are linking to other new symbols
beyond the init (eg the cmd family) it seems this will be beyond just
a one time thing. :(

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-06-05 16:55 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-26 21:16 [PATCH libmlx4 v6 1/2] libmlx4: Infra-structure changes to support verbs extensions sean.hefty-ral2JQCrhuEAvxtiuMwx3w
     [not found] ` <1364332591-22866-1-git-send-email-sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2013-03-26 21:16   ` [PATCH libmlx4 v6 2/2] libmlx4: Add support for XRC QPs sean.hefty-ral2JQCrhuEAvxtiuMwx3w
2013-06-04 19:39   ` [PATCH libmlx4 v6 1/2] libmlx4: Infra-structure changes to support verbs extensions Steve Wise
     [not found]     ` <51AE4288.4010505-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2013-06-04 19:46       ` Hefty, Sean
     [not found]         ` <1828884A29C6694DAF28B7E6B8A823736FD2CF18-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2013-06-04 20:01           ` Steve Wise
     [not found]             ` <51AE47A3.5060802-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2013-06-05 14:54               ` Steve Wise
     [not found]                 ` <51AF5129.7020205-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2013-06-05 16:55                   ` Jason Gunthorpe
2013-06-04 20:55           ` Jason Gunthorpe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox