public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4/4] mlx4: implement XRC RCV qp's
@ 2010-02-28  9:02 Jack Morgenstein
       [not found] ` <201002281102.55944.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  0 siblings, 1 reply; 3+ messages in thread
From: Jack Morgenstein @ 2010-02-28  9:02 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Support for XRC RCV-only QP (requested by userspace,
but resides in kernel space).

Signed-off-by: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
---
 drivers/infiniband/hw/mlx4/cq.c      |    2 +-
 drivers/infiniband/hw/mlx4/main.c    |   64 ++++++-
 drivers/infiniband/hw/mlx4/mlx4_ib.h |   16 ++
 drivers/infiniband/hw/mlx4/qp.c      |  304 +++++++++++++++++++++++++++++++++-
 4 files changed, 374 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 7d17d2e..db75580 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -175,7 +175,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
 	if (entries < 1 || entries > dev->dev->caps.max_cqes)
 		return ERR_PTR(-EINVAL);
 
-	cq = kmalloc(sizeof *cq, GFP_KERNEL);
+	cq = kzalloc(sizeof *cq, GFP_KERNEL);
 	if (!cq)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index c7c7a80..b9b70da 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -419,7 +419,7 @@ static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
 	struct mlx4_ib_pd *pd;
 	int err;
 
-	pd = kmalloc(sizeof *pd, GFP_KERNEL);
+	pd = kzalloc(sizeof *pd, GFP_KERNEL);
 	if (!pd)
 		return ERR_PTR(-ENOMEM);
 
@@ -461,12 +461,18 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 				     &to_mqp(ibqp)->mqp, gid->raw);
 }
 
+static void mlx4_dummy_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+}
+
 static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
 					  struct ib_ucontext *context,
 					  struct ib_udata *udata)
 {
 	struct mlx4_ib_xrcd *xrcd;
 	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+	struct ib_pd *pd;
+	struct ib_cq *cq;
 	int err;
 
 	if (!(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
@@ -477,23 +483,51 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
 		return ERR_PTR(-ENOMEM);
 
 	err = mlx4_xrcd_alloc(mdev->dev, &xrcd->xrcdn);
-	if (err) {
-		kfree(xrcd);
-		return ERR_PTR(err);
+	if (err)
+		goto err_xrcd;
+
+	pd = mlx4_ib_alloc_pd(ibdev, NULL, NULL);
+	if (IS_ERR(pd)) {
+		err = PTR_ERR(pd);
+		goto err_pd;
 	}
+	pd->device  = ibdev;
+
+	cq = mlx4_ib_create_cq(ibdev, 1, 0, NULL, NULL);
+	if (IS_ERR(cq)) {
+		err = PTR_ERR(cq);
+		goto err_cq;
+	}
+	cq->device        = ibdev;
+	cq->comp_handler  = mlx4_dummy_comp_handler;
 
 	if (context)
 		if (ib_copy_to_udata(udata, &xrcd->xrcdn, sizeof(__u32))) {
-			mlx4_xrcd_free(mdev->dev, xrcd->xrcdn);
-			kfree(xrcd);
-			return ERR_PTR(-EFAULT);
+			err = -EFAULT;
+			goto err_copy;
 		}
 
+	xrcd->cq = cq;
+	xrcd->pd = pd;
 	return &xrcd->ibxrcd;
+
+err_copy:
+	mlx4_ib_destroy_cq(cq);
+err_cq:
+	mlx4_ib_dealloc_pd(pd);
+err_pd:
+	mlx4_xrcd_free(mdev->dev, xrcd->xrcdn);
+err_xrcd:
+	kfree(xrcd);
+	return ERR_PTR(err);
 }
 
 static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
 {
+	struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
+
+	mlx4_ib_destroy_cq(mxrcd->cq);
+	mlx4_ib_dealloc_pd(mxrcd->pd);
 	mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
 	kfree(xrcd);
 
@@ -699,18 +733,30 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 		ibdev->ib_dev.create_xrc_srq = mlx4_ib_create_xrc_srq;
 		ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
 		ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
+		ibdev->ib_dev.create_xrc_rcv_qp = mlx4_ib_create_xrc_rcv_qp;
+		ibdev->ib_dev.modify_xrc_rcv_qp = mlx4_ib_modify_xrc_rcv_qp;
+		ibdev->ib_dev.query_xrc_rcv_qp = mlx4_ib_query_xrc_rcv_qp;
+		ibdev->ib_dev.reg_xrc_rcv_qp = mlx4_ib_reg_xrc_rcv_qp;
+		ibdev->ib_dev.unreg_xrc_rcv_qp = mlx4_ib_unreg_xrc_rcv_qp;
+		ibdev->ib_dev.destroy_xrc_rcv_qp = mlx4_ib_destroy_xrc_rcv_qp;
 		ibdev->ib_dev.uverbs_cmd_mask |=
 			(1ull << IB_USER_VERBS_CMD_CREATE_XRC_SRQ)	|
 			(1ull << IB_USER_VERBS_CMD_OPEN_XRCD)	|
-			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD)	|
+			(1ull << IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP)	|
+			(1ull << IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP)	|
+			(1ull << IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP)	|
+			(1ull << IB_USER_VERBS_CMD_REG_XRC_RCV_QP)	|
+			(1ull << IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP)    |
+			(1ull << IB_USER_VERBS_CMD_DESTROY_XRC_RCV_QP);
 	}
 
-
 	if (init_node_data(ibdev))
 		goto err_map;
 
 	spin_lock_init(&ibdev->sm_lock);
 	mutex_init(&ibdev->cap_mask_mutex);
+	mutex_init(&ibdev->xrc_reg_mutex);
 
 	if (ib_register_device(&ibdev->ib_dev))
 		goto err_map;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 653f7cc..3ceec5f 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -59,6 +59,8 @@ struct mlx4_ib_pd {
 struct mlx4_ib_xrcd {
 	struct ib_xrcd	ibxrcd;
 	u32		xrcdn;
+	struct ib_pd	*pd;
+	struct ib_cq	*cq;
 };
 
 struct mlx4_ib_cq_buf {
@@ -115,6 +117,7 @@ struct mlx4_ib_wq {
 enum mlx4_ib_qp_flags {
 	MLX4_IB_QP_LSO				= 1 << 0,
 	MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK	= 1 << 1,
+	MLX4_IB_XRC_RCV				= 1 << 2,
 };
 
 struct mlx4_ib_qp {
@@ -137,6 +140,7 @@ struct mlx4_ib_qp {
 	int			buf_size;
 	struct mutex		mutex;
 	u32			flags;
+	struct list_head	xrc_reg_list;
 	u16			xrcdn;
 	u8			port;
 	u8			alt_port;
@@ -181,6 +185,7 @@ struct mlx4_ib_dev {
 	spinlock_t		sm_lock;
 
 	struct mutex		cap_mask_mutex;
+	struct mutex		xrc_reg_mutex;
 	bool			ib_active;
 };
 
@@ -329,6 +334,17 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
 			 u64 iova);
 int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
 int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
+int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr,
+			      u32 *qp_num);
+int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
+			      struct ib_qp_attr *attr, int attr_mask);
+int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
+			     struct ib_qp_attr *attr, int attr_mask,
+			     struct ib_qp_init_attr *init_attr);
+int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num);
+int mlx4_ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num);
+int mlx4_ib_destroy_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num);
+
 
 static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
 {
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 2fff540..3f482cb 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -58,6 +58,12 @@ enum {
 	MLX4_IB_LSO_HEADER_SPARE	= 128,
 };
 
+
+struct mlx4_ib_xrc_reg_entry {
+	struct list_head list;
+	void *context;
+};
+
 struct mlx4_ib_sqp {
 	struct mlx4_ib_qp	qp;
 	int			pkey_index;
@@ -208,14 +214,15 @@ static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind)
 static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
 {
 	struct ib_event event;
-	struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
+	struct mlx4_ib_qp *mqp = to_mibqp(qp);
+	struct ib_qp *ibqp = &mqp->ibqp;
+	struct mlx4_ib_xrc_reg_entry *ctx_entry;
 
 	if (type == MLX4_EVENT_TYPE_PATH_MIG)
 		to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
 
 	if (ibqp->event_handler) {
 		event.device     = ibqp->device;
-		event.element.qp = ibqp;
 		switch (type) {
 		case MLX4_EVENT_TYPE_PATH_MIG:
 			event.event = IB_EVENT_PATH_MIG;
@@ -247,6 +254,15 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
 			return;
 		}
 
+		if (unlikely(ibqp->qp_type == IB_QPT_XRC &&
+			     mqp->flags & MLX4_IB_XRC_RCV)) {
+			event.event |= IB_XRC_QP_EVENT_FLAG;
+			event.element.xrc_qp_num = ibqp->qp_num;
+			list_for_each_entry(ctx_entry, &mqp->xrc_reg_list, list)
+				ibqp->event_handler(&event, ctx_entry->context);
+			return;
+		}
+		event.element.qp = ibqp;
 		ibqp->event_handler(&event, ibqp->qp_context);
 	}
 }
@@ -1982,3 +1998,287 @@ out:
 	return err;
 }
 
+int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr,
+			      u32 *qp_num)
+{
+	struct mlx4_ib_dev *dev = to_mdev(init_attr->xrcd->device);
+	struct mlx4_ib_xrcd *xrcd = to_mxrcd(init_attr->xrcd);
+	struct mlx4_ib_qp *qp;
+	struct ib_qp *ibqp;
+	struct mlx4_ib_xrc_reg_entry *ctx_entry;
+	int err;
+
+	if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+		return -ENOSYS;
+
+	if (init_attr->qp_type != IB_QPT_XRC)
+		return -EINVAL;
+
+	ctx_entry = kmalloc(sizeof *ctx_entry, GFP_KERNEL);
+	if (!ctx_entry)
+		return -ENOMEM;
+
+	qp = kzalloc(sizeof *qp, GFP_KERNEL);
+	if (!qp) {
+		kfree(ctx_entry);
+		return -ENOMEM;
+	}
+	qp->flags = MLX4_IB_XRC_RCV;
+	qp->xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
+	INIT_LIST_HEAD(&qp->xrc_reg_list);
+	err = create_qp_common(dev, xrcd->pd, init_attr, NULL, 0, qp);
+	if (err) {
+		kfree(ctx_entry);
+		kfree(qp);
+		return err;
+	}
+
+	ibqp = &qp->ibqp;
+	/* set the ibpq attributes which will be used by the mlx4 module */
+	ibqp->qp_num = qp->mqp.qpn;
+	ibqp->device = init_attr->xrcd->device;
+	ibqp->pd = xrcd->pd;
+	ibqp->send_cq = ibqp->recv_cq = xrcd->cq;
+	ibqp->event_handler = init_attr->event_handler;
+	ibqp->qp_context = init_attr->qp_context;
+	ibqp->qp_type = init_attr->qp_type;
+	ibqp->xrcd = init_attr->xrcd;
+
+	mutex_lock(&qp->mutex);
+	ctx_entry->context = init_attr->qp_context;
+	list_add_tail(&ctx_entry->list, &qp->xrc_reg_list);
+	mutex_unlock(&qp->mutex);
+	*qp_num = qp->mqp.qpn;
+	return 0;
+}
+
+int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
+			      struct ib_qp_attr *attr, int attr_mask)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
+	struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
+	struct mlx4_qp *mqp;
+	int err = -EINVAL;
+
+	if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+		return -ENOSYS;
+
+	mutex_lock(&dev->xrc_reg_mutex);
+	mqp = __mlx4_qp_lookup(dev->dev, qp_num);
+	if (unlikely(!mqp)) {
+		printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: "
+		       "unknown QPN %06x\n", qp_num);
+		goto err_out;
+	}
+
+	if (xrcd->xrcdn != to_mxrcd(to_mibqp(mqp)->ibqp.xrcd)->xrcdn)
+		goto err_out;
+
+	err = mlx4_ib_modify_qp(&(to_mibqp(mqp)->ibqp), attr, attr_mask, NULL);
+	mutex_unlock(&dev->xrc_reg_mutex);
+	return err;
+
+err_out:
+	mutex_unlock(&dev->xrc_reg_mutex);
+	return err;
+}
+
+int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
+			     struct ib_qp_attr *qp_attr, int qp_attr_mask,
+			     struct ib_qp_init_attr *qp_init_attr)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
+	struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
+	struct mlx4_ib_qp *qp;
+	struct mlx4_qp *mqp;
+	struct mlx4_qp_context context;
+	int mlx4_state;
+	int err = -EINVAL;
+
+	if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+		return -ENOSYS;
+
+	mutex_lock(&dev->xrc_reg_mutex);
+	mqp = __mlx4_qp_lookup(dev->dev, qp_num);
+	if (unlikely(!mqp)) {
+		printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: "
+		       "unknown QPN %06x\n", qp_num);
+		goto err_out;
+	}
+
+	qp = to_mibqp(mqp);
+	if (xrcd->xrcdn != to_mxrcd(qp->ibqp.xrcd)->xrcdn)
+		goto err_out;
+
+	if (qp->state == IB_QPS_RESET) {
+		qp_attr->qp_state = IB_QPS_RESET;
+		goto done;
+	}
+
+	err = mlx4_qp_query(dev->dev, mqp, &context);
+	if (err)
+		goto err_out;
+
+	mlx4_state = be32_to_cpu(context.flags) >> 28;
+
+	qp_attr->qp_state = to_ib_qp_state(mlx4_state);
+	qp_attr->path_mtu = context.mtu_msgmax >> 5;
+	qp_attr->path_mig_state =
+		to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
+	qp_attr->qkey = be32_to_cpu(context.qkey);
+	qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
+	qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff;
+	qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff;
+	qp_attr->qp_access_flags =
+		to_ib_qp_access_flags(be32_to_cpu(context.params2));
+
+	if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC ||
+	    qp->ibqp.qp_type == IB_QPT_XRC) {
+		to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
+		to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr,
+			      &context.alt_path);
+		qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
+		qp_attr->alt_port_num	= qp_attr->alt_ah_attr.port_num;
+	}
+
+	qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
+	if (qp_attr->qp_state == IB_QPS_INIT)
+		qp_attr->port_num = qp->port;
+	else
+		qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
+
+	/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+	qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
+
+	qp_attr->max_rd_atomic =
+		1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
+
+	qp_attr->max_dest_rd_atomic =
+		1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
+	qp_attr->min_rnr_timer =
+		(be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
+	qp_attr->timeout = context.pri_path.ackto >> 3;
+	qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7;
+	qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7;
+	qp_attr->alt_timeout = context.alt_path.ackto >> 3;
+
+done:
+	qp_attr->cur_qp_state	     = qp_attr->qp_state;
+	qp_attr->cap.max_recv_wr     = 0;
+	qp_attr->cap.max_recv_sge    = 0;
+	qp_attr->cap.max_send_wr     = 0;
+	qp_attr->cap.max_send_sge    = 0;
+	qp_attr->cap.max_inline_data = 0;
+	qp_init_attr->cap	     = qp_attr->cap;
+
+	mutex_unlock(&dev->xrc_reg_mutex);
+	return 0;
+
+err_out:
+	mutex_unlock(&dev->xrc_reg_mutex);
+	return err;
+}
+
+int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
+{
+
+	struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
+
+	struct mlx4_qp *mqp;
+	struct mlx4_ib_qp *mibqp;
+	struct mlx4_ib_xrc_reg_entry *ctx_entry, *tmp;
+	int err = -EINVAL;
+
+	mutex_lock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+	mqp = __mlx4_qp_lookup(to_mdev(xrcd->device)->dev, qp_num);
+	if (unlikely(!mqp)) {
+		printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: "
+		       "unknown QPN %06x\n", qp_num);
+		goto err_out;
+	}
+
+	mibqp = to_mibqp(mqp);
+
+	if (mxrcd->xrcdn != to_mxrcd(mibqp->ibqp.xrcd)->xrcdn)
+		goto err_out;
+
+	ctx_entry = kmalloc(sizeof *ctx_entry, GFP_KERNEL);
+	if (!ctx_entry) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	mutex_lock(&mibqp->mutex);
+	list_for_each_entry(tmp, &mibqp->xrc_reg_list, list)
+		if (tmp->context == context) {
+			mutex_unlock(&mibqp->mutex);
+			kfree(ctx_entry);
+			mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+			return 0;
+		}
+
+	ctx_entry->context = context;
+	list_add_tail(&ctx_entry->list, &mibqp->xrc_reg_list);
+	mutex_unlock(&mibqp->mutex);
+	mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+	return 0;
+
+err_out:
+	mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+	return err;
+}
+
+int mlx4_ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
+{
+	return mlx4_ib_destroy_xrc_rcv_qp(xrcd, context, qp_num);
+}
+
+int mlx4_ib_destroy_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
+{
+
+	struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
+
+	struct mlx4_qp *mqp;
+	struct mlx4_ib_qp *mibqp;
+	struct mlx4_ib_xrc_reg_entry *ctx_entry, *tmp;
+	int found = 0;
+	int err = -EINVAL;
+
+	mutex_lock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+	mqp = __mlx4_qp_lookup(to_mdev(xrcd->device)->dev, qp_num);
+	if (unlikely(!mqp)) {
+		printk(KERN_WARNING "mlx4_ib_destroy_xrc_rcv_qp: "
+		       "unknown QPN %06x\n", qp_num);
+		goto err_out;
+	}
+
+	mibqp = to_mibqp(mqp);
+
+	if (mxrcd->xrcdn != (mibqp->xrcdn & 0xffff))
+		goto err_out;
+
+	mutex_lock(&mibqp->mutex);
+	list_for_each_entry_safe(ctx_entry, tmp, &mibqp->xrc_reg_list, list)
+		if (ctx_entry->context == context) {
+			found = 1;
+			list_del(&ctx_entry->list);
+			kfree(ctx_entry);
+			break;
+		}
+
+	mutex_unlock(&mibqp->mutex);
+	if (!found)
+		goto err_out;
+
+	/* destroy the QP if the registration list is empty */
+	if (list_empty(&mibqp->xrc_reg_list))
+		mlx4_ib_destroy_qp(&mibqp->ibqp);
+
+	mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+	return 0;
+
+err_out:
+	mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+	return err;
+}
+
-- 
1.6.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 4/4] mlx4: implement XRC RCV qp's
       [not found] ` <201002281102.55944.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2010-04-29 20:03   ` Roland Dreier
       [not found]     ` <adar5lyxc01.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  0 siblings, 1 reply; 3+ messages in thread
From: Roland Dreier @ 2010-04-29 20:03 UTC (permalink / raw)
  To: Jack Morgenstein
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w, linux-rdma-u79uwXL29TY76Z2rM5mHXA

 > @@ -175,7 +175,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
 >  	if (entries < 1 || entries > dev->dev->caps.max_cqes)
 >  		return ERR_PTR(-EINVAL);
 >  
 > -	cq = kmalloc(sizeof *cq, GFP_KERNEL);
 > +	cq = kzalloc(sizeof *cq, GFP_KERNEL);

What's the reason for this change?

 > @@ -477,23 +483,51 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
 > +	pd = mlx4_ib_alloc_pd(ibdev, NULL, NULL);
 > +	cq = mlx4_ib_create_cq(ibdev, 1, 0, NULL, NULL);

Why does every xrcd get a PD and a CQ now?  Just in case someone wants
to create a rcv QP?  (The spec is unclear on this -- for "create XRC
target QP" it says "A set of initial QP attributes must be specified by
the Consumer," but then doesn't mention anything in the input modifiers,
so it's not clear what PD/CQ is supposed to be used)

 > +			(1ull << IB_USER_VERBS_CMD_DESTROY_XRC_RCV_QP);
 >  	}
 >  
 > -

This seems to be repairing whitespace damage from the previous patch.

 > +int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)

 > +	mutex_lock(&mibqp->mutex);
 > +	list_for_each_entry(tmp, &mibqp->xrc_reg_list, list)
 > +		if (tmp->context == context) {
 > +			mutex_unlock(&mibqp->mutex);
 > +			kfree(ctx_entry);
 > +			mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
 > +			return 0;
 > +		}
 > +
 > +	ctx_entry->context = context;
 > +	list_add_tail(&ctx_entry->list, &mibqp->xrc_reg_list);
 > +	mutex_unlock(&mibqp->mutex);

This list handling looks completely generic and is what I was saying
should probably be in the core uverbs module.

 > +int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
 > +			     struct ib_qp_attr *qp_attr, int qp_attr_mask,
 > +			     struct ib_qp_init_attr *qp_init_attr)

Virtually all of this function seems identical to the existing query QP
operation.  We should avoid the mass duplication of code.

Also I'm not clear why this function takes a qp_num instead of a QP
handle.  Why does the consumer have to pass in the XRCD?  The IB spec
XRC annex just shows the QP handle as input to this verb.  Is it because
the reg_xrc_rcv_qp doesn't give a QP handle back?

Finally, in this implementation, what happens if the consumer passes in
a QP that isn't an XRC rcv QP?

 > +	mqp = __mlx4_qp_lookup(dev->dev, qp_num);
 > +	if (unlikely(!mqp)) {
 > +		printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: "
 > +		       "unknown QPN %06x\n", qp_num);
 > +		goto err_out;
 > +	}
 > +
 > +	qp = to_mibqp(mqp);
 > +	if (xrcd->xrcdn != to_mxrcd(qp->ibqp.xrcd)->xrcdn)
 > +		goto err_out;

In other words is that dereference of ->xrcdn safe if ibqp.xrcd is not set?
(And the error message talks about reg_xrc_rcv_qp instead of query)
-- 
Roland Dreier <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> || For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/index.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 4/4] mlx4: implement XRC RCV qp's
       [not found]     ` <adar5lyxc01.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
@ 2010-05-05 11:44       ` Jack Morgenstein
  0 siblings, 0 replies; 3+ messages in thread
From: Jack Morgenstein @ 2010-05-05 11:44 UTC (permalink / raw)
  To: Roland Dreier
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	Tziporet Koren

On Thursday 29 April 2010 23:03, Roland Dreier wrote:
>  > @@ -175,7 +175,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
>  >  	if (entries < 1 || entries > dev->dev->caps.max_cqes)
>  >  		return ERR_PTR(-EINVAL);
>  >  
>  > -	cq = kmalloc(sizeof *cq, GFP_KERNEL);
>  > +	cq = kzalloc(sizeof *cq, GFP_KERNEL);
> 
> What's the reason for this change?

Because mlx4_ib_create_cq is used in mlx4_ib_alloc_xrcd (to allocate the dummy cq), and I did not
want to worry about unitialized data being present in the struct ib_cq portion of struct mlx4_ib_cq.

>  > @@ -477,23 +483,51 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
>  > +	pd = mlx4_ib_alloc_pd(ibdev, NULL, NULL);
>  > +	cq = mlx4_ib_create_cq(ibdev, 1, 0, NULL, NULL);
> 
> Why does every xrcd get a PD and a CQ now?  Just in case someone wants
> to create a rcv QP?
 
Yes.  This way, we have a dummy PD and CQ available to satisfy the ConnectX requirement
that every QP have a PD and CQ.
  
> (The spec is unclear on this -- for "create XRC 
> target QP" it says "A set of initial QP attributes must be specified by
> the Consumer," but then doesn't mention anything in the input modifiers,
> so it's not clear what PD/CQ is supposed to be used)

>From the XRC Annex to the IB Spec:

A12.5.2.3 XRC TARGET QP
A12.5.2.3.1 CREATE XRC TARGET QP
Description:
Creates a XRC Target QP for the specified HCA.
A set of initial QP attributes must be specified by the Consumer.
On success, a handle to the newly created XRC QP and the XRC QP 
number are returned.
Input Modifiers:
 Same as RC QP except for:
==>   no SQ or RQ or SRQ (WQEs, num of s/g elements, CQs, signaling type, etc.)
      no need for initiator resources (initiator depth)
==>   no PD
      add XRC domain to be associated with this QP. 

>  > +			(1ull << IB_USER_VERBS_CMD_DESTROY_XRC_RCV_QP);
>  >  	}
>  >  
>  > -
> 
> This seems to be repairing whitespace damage from the previous patch.

I will fix both patches.
 
>  > +int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
> 
>  > +	mutex_lock(&mibqp->mutex);
>  > +	list_for_each_entry(tmp, &mibqp->xrc_reg_list, list)
>  > +		if (tmp->context == context) {
>  > +			mutex_unlock(&mibqp->mutex);
>  > +			kfree(ctx_entry);
>  > +			mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
>  > +			return 0;
>  > +		}
>  > +
>  > +	ctx_entry->context = context;
>  > +	list_add_tail(&ctx_entry->list, &mibqp->xrc_reg_list);
>  > +	mutex_unlock(&mibqp->mutex);
> 
> This list handling looks completely generic and is what I was saying
> should probably be in the core uverbs module.
This list is used to send async events for this xrc rcv QP to all processes using
the QP (either registered for the qp, or the creating process).

Moving this to the core layer would require significant modifications.  I responded to
this in my response to your first mail.

>  > +int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
>  > +			     struct ib_qp_attr *qp_attr, int qp_attr_mask,
>  > +			     struct ib_qp_init_attr *qp_init_attr)
> 
> Virtually all of this function seems identical to the existing query QP
> operation.  We should avoid the mass duplication of code.
> 
> Also I'm not clear why this function takes a qp_num instead of a QP
> handle.  Why does the consumer have to pass in the XRCD?  The IB spec
> XRC annex just shows the QP handle as input to this verb.  Is it because
> the reg_xrc_rcv_qp doesn't give a QP handle back?

Yes, I considered the handle to be the pair (xrc domain, qp_number).

> 
> Finally, in this implementation, what happens if the consumer passes in
> a QP that isn't an XRC rcv QP?
> 
>  > +	mqp = __mlx4_qp_lookup(dev->dev, qp_num);
>  > +	if (unlikely(!mqp)) {
>  > +		printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: "
>  > +		       "unknown QPN %06x\n", qp_num);
>  > +		goto err_out;
>  > +	}
>  > +
>  > +	qp = to_mibqp(mqp);
>  > +	if (xrcd->xrcdn != to_mxrcd(qp->ibqp.xrcd)->xrcdn)
>  > +		goto err_out;
> 
> In other words is that dereference of ->xrcdn safe if ibqp.xrcd is not set?
> (And the error message talks about reg_xrc_rcv_qp instead of query)
You are correct -- I will fix both of these (error msg and unprotected dereference).

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-05-05 11:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-02-28  9:02 [PATCH 4/4] mlx4: implement XRC RCV qp's Jack Morgenstein
     [not found] ` <201002281102.55944.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2010-04-29 20:03   ` Roland Dreier
     [not found]     ` <adar5lyxc01.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-05-05 11:44       ` Jack Morgenstein

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox