Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed
* [PATCH V2 for-next 05/11] IB/hns: Modify the condition of notifying hardware loopback
From: Salil Mehta @ 2016-11-15 18:10 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: salil.mehta-hv44wF8Li93QT0dZR+AlfA,
	xavier.huwei-hv44wF8Li93QT0dZR+AlfA,
	oulijun-hv44wF8Li93QT0dZR+AlfA, xushaobo2-hv44wF8Li93QT0dZR+AlfA,
	mehta.salil.lnk-Re5JQEeQqe8AvxtiuMwx3w, lijun_nudt-9Onoh4P/yGk,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linuxarm-hv44wF8Li93QT0dZR+AlfA
In-Reply-To: <20161115181053.399568-1-salil.mehta-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

From: Lijun Ou <oulijun-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

This patch modified the condition of notifying hardware loopback.

In hip06, RoCE Engine has several ports, one QP is related
to one port. hardware only support loopback in the same port,
not in the different ports.

So, If QP related to port N, the dmac in the QP context equals
the smac of the local port N or the loop_idc is 1, we should
set loopback bit in QP context to notify hardware.

Signed-off-by: Wei Hu (Xavier) <xavier.huwei-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Lijun Ou <oulijun-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Salil Mehta  <salil.mehta-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index e080dd6..643a2ff 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2244,24 +2244,14 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 			     QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
 			     hr_qp->sq_signal_bits);
 
-		for (port = 0; port < hr_dev->caps.num_ports; port++) {
-			smac = (u8 *)hr_dev->dev_addr[port];
-			dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n",
-				smac[0], smac[1], smac[2], smac[3], smac[4],
-				smac[5]);
-			if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) &&
-			    (dmac[2] == smac[2]) && (dmac[3] == smac[3]) &&
-			    (dmac[4] == smac[4]) && (dmac[5] == smac[5])) {
-				roce_set_bit(context->qpc_bytes_32,
-				    QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S,
-				    1);
-				break;
-			}
-		}
-
-		if (hr_dev->loop_idc == 0x1)
+		port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
+			hr_qp->port;
+		smac = (u8 *)hr_dev->dev_addr[port];
+		/* when dmac equals smac or loop_idc is 1, it should loopback */
+		if (ether_addr_equal_unaligned(dmac, smac) ||
+		    hr_dev->loop_idc == 0x1)
 			roce_set_bit(context->qpc_bytes_32,
-				QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
+			      QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
 
 		roce_set_bit(context->qpc_bytes_32,
 			     QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH V2 for-next 04/11] IB/hns: add self loopback for CM
From: Salil Mehta @ 2016-11-15 18:10 UTC (permalink / raw)
  To: dledford
  Cc: salil.mehta, xavier.huwei, oulijun, xushaobo2, mehta.salil.lnk,
	lijun_nudt, linux-rdma, netdev, linux-kernel, linuxarm,
	Peter Chen
In-Reply-To: <20161115181053.399568-1-salil.mehta@huawei.com>

From: Lijun Ou <oulijun@huawei.com>

This patch mainly adds self loopback support for CM.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Peter Chen <luck.chen@huawei.com>
Reviewed-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Salil Mehta  <salil.mehta@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   11 +++++++++++
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h |    2 ++
 2 files changed, 13 insertions(+)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 959d5ca..e080dd6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -32,6 +32,7 @@
 
 #include <linux/platform_device.h>
 #include <linux/acpi.h>
+#include <linux/etherdevice.h>
 #include <rdma/ib_umem.h>
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
@@ -72,6 +73,8 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	int nreq = 0;
 	u32 ind = 0;
 	int ret = 0;
+	u8 *smac;
+	int loopback;
 
 	if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
 		ibqp->qp_type != IB_QPT_RC)) {
@@ -129,6 +132,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				       UD_SEND_WQE_U32_8_DMAC_5_M,
 				       UD_SEND_WQE_U32_8_DMAC_5_S,
 				       ah->av.mac[5]);
+
+			smac = (u8 *)hr_dev->dev_addr[qp->port];
+			loopback = ether_addr_equal_unaligned(ah->av.mac,
+							      smac) ? 1 : 0;
+			roce_set_bit(ud_sq_wqe->u32_8,
+				     UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S,
+				     loopback);
+
 			roce_set_field(ud_sq_wqe->u32_8,
 				       UD_SEND_WQE_U32_8_OPERATION_TYPE_M,
 				       UD_SEND_WQE_U32_8_OPERATION_TYPE_S,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 6004c7f..cf28f1b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -440,6 +440,8 @@ struct hns_roce_ud_send_wqe {
 #define UD_SEND_WQE_U32_8_DMAC_5_M   \
 	(((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S)
 
+#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22
+
 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16
 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_M   \
 	(((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S)
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH V2 for-next 03/11] IB/hns: Optimize the logic of allocating memory using APIs
From: Salil Mehta @ 2016-11-15 18:10 UTC (permalink / raw)
  To: dledford
  Cc: salil.mehta, xavier.huwei, oulijun, xushaobo2, mehta.salil.lnk,
	lijun_nudt, linux-rdma, netdev, linux-kernel, linuxarm,
	Ping Zhang
In-Reply-To: <20161115181053.399568-1-salil.mehta@huawei.com>

From: "Wei Hu (Xavier)" <xavier.huwei@huawei.com>

This patch modified the logic of allocating memory using APIs in
hns RoCE driver. We used kcalloc instead of kmalloc_array and
bitmap_zero. And When kcalloc failed, call vzalloc to alloc
memory.

Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Ping Zhang <zhangping5@huawei.com>
Signed-off-by: Salil Mehta  <salil.mehta@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_mr.c |   15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index fb87883..d3dfb5f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -137,11 +137,12 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
 
 	for (i = 0; i <= buddy->max_order; ++i) {
 		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
-		buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL);
-		if (!buddy->bits[i])
-			goto err_out_free;
-
-		bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
+		buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL);
+		if (!buddy->bits[i]) {
+			buddy->bits[i] = vzalloc(s * sizeof(long));
+			if (!buddy->bits[i])
+				goto err_out_free;
+		}
 	}
 
 	set_bit(0, buddy->bits[buddy->max_order]);
@@ -151,7 +152,7 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
 
 err_out_free:
 	for (i = 0; i <= buddy->max_order; ++i)
-		kfree(buddy->bits[i]);
+		kvfree(buddy->bits[i]);
 
 err_out:
 	kfree(buddy->bits);
@@ -164,7 +165,7 @@ static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
 	int i;
 
 	for (i = 0; i <= buddy->max_order; ++i)
-		kfree(buddy->bits[i]);
+		kvfree(buddy->bits[i]);
 
 	kfree(buddy->bits);
 	kfree(buddy->num_free);
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH V2 for-next 02/11] IB/hns: Add code for refreshing CQ CI using TPTR
From: Salil Mehta @ 2016-11-15 18:10 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: salil.mehta-hv44wF8Li93QT0dZR+AlfA,
	xavier.huwei-hv44wF8Li93QT0dZR+AlfA,
	oulijun-hv44wF8Li93QT0dZR+AlfA, xushaobo2-hv44wF8Li93QT0dZR+AlfA,
	mehta.salil.lnk-Re5JQEeQqe8AvxtiuMwx3w, lijun_nudt-9Onoh4P/yGk,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linuxarm-hv44wF8Li93QT0dZR+AlfA, Dongdong Huang
In-Reply-To: <20161115181053.399568-1-salil.mehta-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

From: "Wei Hu (Xavier)" <xavier.huwei-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

This patch added the code for refreshing CQ CI using TPTR in hip06
SoC.

We will send a doorbell to hardware for refreshing CQ CI when user
succeed to poll a cqe. But it will be failed if the doorbell has
been blocked. So hardware will read a special buffer called TPTR
to get the lastest CI value when the cq is almost full.

This patch support the special CI buffer as follows:
a) Alloc the memory for TPTR in the hns_roce_tptr_init function and
   free it in hns_roce_tptr_free function, these two functions will
   be called in probe function and in the remove function.
b) Add the code for computing offset(every cq need 2 bytes) and
   write the dma addr to every cq context to notice hardware in the
   function named hns_roce_v1_write_cqc.
c) Add code for mapping TPTR buffer to user space in function named
   hns_roce_mmap. The mapping distinguish TPTR and UAR of user mode
   by vm_pgoff(0: UAR, 1: TPTR, others:invaild) in hip06.
d) Alloc the code for refreshing CQ CI using TPTR in the function
   named hns_roce_v1_poll_cq.
e) Add some variable definitions to the related structure.

Signed-off-by: Wei Hu (Xavier) <xavier.huwei-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Dongdong Huang(Donald) <hdd.huang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Lijun Ou <oulijun-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Salil Mehta  <salil.mehta-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
 drivers/infiniband/hw/hns/hns_roce_common.h |    2 -
 drivers/infiniband/hw/hns/hns_roce_cq.c     |    9 +++
 drivers/infiniband/hw/hns/hns_roce_device.h |    6 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |   79 ++++++++++++++++++++++++---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h  |    9 +++
 drivers/infiniband/hw/hns/hns_roce_main.c   |   13 ++++-
 6 files changed, 103 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 2970161..0dcb620 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -253,8 +253,6 @@
 #define ROCEE_VENDOR_ID_REG			0x0
 #define ROCEE_VENDOR_PART_ID_REG		0x4
 
-#define ROCEE_HW_VERSION_REG			0x8
-
 #define ROCEE_SYS_IMAGE_GUID_L_REG		0xC
 #define ROCEE_SYS_IMAGE_GUID_H_REG		0x10
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 0973659..5dc8d92 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -349,6 +349,15 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
 		goto err_mtt;
 	}
 
+	/*
+	 * For the QP created by kernel space, tptr value should be initialized
+	 * to zero; For the QP created by user space, it will cause synchronous
+	 * problems if tptr is set to zero here, so we initialze it in user
+	 * space.
+	 */
+	if (!context)
+		*hr_cq->tptr_addr = 0;
+
 	/* Get created cq handler and carry out event */
 	hr_cq->comp = hns_roce_ib_cq_comp;
 	hr_cq->event = hns_roce_ib_cq_event;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 3417315..7242b14 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -37,6 +37,8 @@
 
 #define DRV_NAME "hns_roce"
 
+#define HNS_ROCE_HW_VER1	('h' << 24 | 'i' << 16 | '0' << 8 | '6')
+
 #define MAC_ADDR_OCTET_NUM			6
 #define HNS_ROCE_MAX_MSG_LEN			0x80000000
 
@@ -296,7 +298,7 @@ struct hns_roce_cq {
 	u32				cq_depth;
 	u32				cons_index;
 	void __iomem			*cq_db_l;
-	void __iomem			*tptr_addr;
+	u16				*tptr_addr;
 	unsigned long			cqn;
 	u32				vector;
 	atomic_t			refcount;
@@ -553,6 +555,8 @@ struct hns_roce_dev {
 
 	int			cmd_mod;
 	int			loop_idc;
+	dma_addr_t		tptr_dma_addr; /*only for hw v1*/
+	u32			tptr_size; /*only for hw v1*/
 	struct hns_roce_hw	*hw;
 };
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 7485514..959d5ca 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -849,6 +849,45 @@ static void hns_roce_bt_free(struct hns_roce_dev *hr_dev)
 		priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
 }
 
+static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_buf_list *tptr_buf;
+	struct hns_roce_v1_priv *priv;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	tptr_buf = &priv->tptr_table.tptr_buf;
+
+	/*
+	 * This buffer will be used for CQ's tptr(tail pointer), also
+	 * named ci(customer index). Every CQ will use 2 bytes to save
+	 * cqe ci in hip06. Hardware will read this area to get new ci
+	 * when the queue is almost full.
+	 */
+	tptr_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
+					   &tptr_buf->map, GFP_KERNEL);
+	if (!tptr_buf->buf)
+		return -ENOMEM;
+
+	hr_dev->tptr_dma_addr = tptr_buf->map;
+	hr_dev->tptr_size = HNS_ROCE_V1_TPTR_BUF_SIZE;
+
+	return 0;
+}
+
+static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_buf_list *tptr_buf;
+	struct hns_roce_v1_priv *priv;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	tptr_buf = &priv->tptr_table.tptr_buf;
+
+	dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
+			  tptr_buf->buf, tptr_buf->map);
+}
+
 /**
  * hns_roce_v1_reset - reset RoCE
  * @hr_dev: RoCE device struct pointer
@@ -906,12 +945,11 @@ void hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
 	hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG));
 	hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev,
 					     ROCEE_VENDOR_PART_ID_REG));
-	hr_dev->hw_rev = le32_to_cpu(roce_read(hr_dev, ROCEE_HW_VERSION_REG));
-
 	hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev,
 					     ROCEE_SYS_IMAGE_GUID_L_REG)) |
 				((u64)le32_to_cpu(roce_read(hr_dev,
 					    ROCEE_SYS_IMAGE_GUID_H_REG)) << 32);
+	hr_dev->hw_rev		= HNS_ROCE_HW_VER1;
 
 	caps->num_qps		= HNS_ROCE_V1_MAX_QP_NUM;
 	caps->max_wqes		= HNS_ROCE_V1_MAX_WQE_NUM;
@@ -1009,8 +1047,17 @@ int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
 		goto error_failed_bt_init;
 	}
 
+	ret = hns_roce_tptr_init(hr_dev);
+	if (ret) {
+		dev_err(dev, "tptr init failed!\n");
+		goto error_failed_tptr_init;
+	}
+
 	return 0;
 
+error_failed_tptr_init:
+	hns_roce_bt_free(hr_dev);
+
 error_failed_bt_init:
 	hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
 	hns_roce_raq_free(hr_dev);
@@ -1022,6 +1069,7 @@ int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
 
 void hns_roce_v1_exit(struct hns_roce_dev *hr_dev)
 {
+	hns_roce_tptr_free(hr_dev);
 	hns_roce_bt_free(hr_dev);
 	hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
 	hns_roce_raq_free(hr_dev);
@@ -1339,14 +1387,21 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
 			   dma_addr_t dma_handle, int nent, u32 vector)
 {
 	struct hns_roce_cq_context *cq_context = NULL;
-	void __iomem *tptr_addr;
+	struct hns_roce_buf_list *tptr_buf;
+	struct hns_roce_v1_priv *priv;
+	dma_addr_t tptr_dma_addr;
+	int offset;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	tptr_buf = &priv->tptr_table.tptr_buf;
 
 	cq_context = mb_buf;
 	memset(cq_context, 0, sizeof(*cq_context));
 
-	tptr_addr = 0;
-	hr_dev->priv_addr = tptr_addr;
-	hr_cq->tptr_addr = tptr_addr;
+	/* Get the tptr for this CQ. */
+	offset = hr_cq->cqn * HNS_ROCE_V1_TPTR_ENTRY_SIZE;
+	tptr_dma_addr = tptr_buf->map + offset;
+	hr_cq->tptr_addr = (u16 *)(tptr_buf->buf + offset);
 
 	/* Register cq_context members */
 	roce_set_field(cq_context->cqc_byte_4,
@@ -1390,10 +1445,10 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
 	roce_set_field(cq_context->cqc_byte_20,
 		       CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
 		       CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
-		       (u64)tptr_addr >> 44);
+		       tptr_dma_addr >> 44);
 	cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20);
 
-	cq_context->cqe_tptr_addr_l = (u32)((u64)tptr_addr >> 12);
+	cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12);
 
 	roce_set_field(cq_context->cqc_byte_32,
 		       CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
@@ -1659,8 +1714,14 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 			break;
 	}
 
-	if (npolled)
+	if (npolled) {
+		*hr_cq->tptr_addr = hr_cq->cons_index &
+			((hr_cq->cq_depth << 1) - 1);
+
+		/* Memroy barrier */
+		wmb();
 		hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
+	}
 
 	spin_unlock_irqrestore(&hr_cq->lock, flags);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 2e1878b..6004c7f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -104,6 +104,10 @@
 
 #define HNS_ROCE_BT_RSV_BUF_SIZE			(1 << 17)
 
+#define HNS_ROCE_V1_TPTR_ENTRY_SIZE			2
+#define HNS_ROCE_V1_TPTR_BUF_SIZE	\
+	(HNS_ROCE_V1_TPTR_ENTRY_SIZE * HNS_ROCE_V1_MAX_CQ_NUM)
+
 #define HNS_ROCE_ODB_POLL_MODE				0
 
 #define HNS_ROCE_SDB_NORMAL_MODE			0
@@ -983,10 +987,15 @@ struct hns_roce_bt_table {
 	struct hns_roce_buf_list cqc_buf;
 };
 
+struct hns_roce_tptr_table {
+	struct hns_roce_buf_list tptr_buf;
+};
+
 struct hns_roce_v1_priv {
 	struct hns_roce_db_table  db_table;
 	struct hns_roce_raq_table raq_table;
 	struct hns_roce_bt_table  bt_table;
+	struct hns_roce_tptr_table tptr_table;
 };
 
 int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 764e35a..6770171 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -549,6 +549,8 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
 static int hns_roce_mmap(struct ib_ucontext *context,
 			 struct vm_area_struct *vma)
 {
+	struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
+
 	if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
 		return -EINVAL;
 
@@ -558,10 +560,15 @@ static int hns_roce_mmap(struct ib_ucontext *context,
 				       to_hr_ucontext(context)->uar.pfn,
 				       PAGE_SIZE, vma->vm_page_prot))
 			return -EAGAIN;
-
-	} else {
+	} else if (vma->vm_pgoff == 1 && hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
+		/* vm_pgoff: 1 -- TPTR */
+		if (io_remap_pfn_range(vma, vma->vm_start,
+				       hr_dev->tptr_dma_addr >> PAGE_SHIFT,
+				       hr_dev->tptr_size,
+				       vma->vm_page_prot))
+			return -EAGAIN;
+	} else
 		return -EINVAL;
-	}
 
 	return 0;
 }
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH V2 for-next 01/11] IB/hns: Add the interface for querying QP1
From: Salil Mehta @ 2016-11-15 18:10 UTC (permalink / raw)
  To: dledford
  Cc: salil.mehta, xavier.huwei, oulijun, xushaobo2, mehta.salil.lnk,
	lijun_nudt, linux-rdma, netdev, linux-kernel, linuxarm
In-Reply-To: <20161115181053.399568-1-salil.mehta@huawei.com>

From: Lijun Ou <oulijun@huawei.com>

In old code, It only added the interface for querying non-specific
QP. This patch mainly adds an interface for querying QP1.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
Reviewed-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Salil Mehta  <salil.mehta@huawei.com>
---
Change Log

Patch V2: Addressed the comment provided by Anurup M
	Link: https://patchwork.kernel.org/patch/9412855/
Patch V1: Initial Submit
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   83 +++++++++++++++++++++++++++-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h |    6 +-
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 71232e5..7485514 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2630,8 +2630,78 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev,
 	return ret;
 }
 
-int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
-			 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+			     int qp_attr_mask,
+			     struct ib_qp_init_attr *qp_init_attr)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+	struct hns_roce_sqp_context context;
+	u32 addr;
+
+	mutex_lock(&hr_qp->mutex);
+
+	if (hr_qp->state == IB_QPS_RESET) {
+		qp_attr->qp_state = IB_QPS_RESET;
+		goto done;
+	}
+
+	addr = ROCEE_QP1C_CFG0_0_REG + 
+		hr_qp->port * sizeof(struct hns_roce_sqp_context);
+	context.qp1c_bytes_4 = roce_read(hr_dev, addr);
+	context.sq_rq_bt_l = roce_read(hr_dev, addr + 1);
+	context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2);
+	context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3);
+	context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4);
+	context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5);
+	context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6);
+	context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7);
+	context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8);
+	context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9);
+
+	hr_qp->state = roce_get_field(context.qp1c_bytes_4,
+				      QP1C_BYTES_4_QP_STATE_M,
+				      QP1C_BYTES_4_QP_STATE_S);
+	qp_attr->qp_state	= hr_qp->state;
+	qp_attr->path_mtu	= IB_MTU_256;
+	qp_attr->path_mig_state	= IB_MIG_ARMED;
+	qp_attr->qkey		= QKEY_VAL;
+	qp_attr->rq_psn		= 0;
+	qp_attr->sq_psn		= 0;
+	qp_attr->dest_qp_num	= 1;
+	qp_attr->qp_access_flags = 6;
+
+	qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20,
+					     QP1C_BYTES_20_PKEY_IDX_M,
+					     QP1C_BYTES_20_PKEY_IDX_S);
+	qp_attr->port_num = hr_qp->port + 1;
+	qp_attr->sq_draining = 0;
+	qp_attr->max_rd_atomic = 0;
+	qp_attr->max_dest_rd_atomic = 0;
+	qp_attr->min_rnr_timer = 0;
+	qp_attr->timeout = 0;
+	qp_attr->retry_cnt = 0;
+	qp_attr->rnr_retry = 0;
+	qp_attr->alt_timeout = 0;
+
+done:
+	qp_attr->cur_qp_state = qp_attr->qp_state;
+	qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
+	qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+	qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
+	qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
+	qp_attr->cap.max_inline_data = 0;
+	qp_init_attr->cap = qp_attr->cap;
+	qp_init_attr->create_flags = 0;
+
+	mutex_unlock(&hr_qp->mutex);
+
+	return 0;
+}
+
+static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+			    int qp_attr_mask,
+			    struct ib_qp_init_attr *qp_init_attr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -2767,6 +2837,15 @@ int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	return ret;
 }
 
+int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+			 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+	return hr_qp->doorbell_qpn <= 1 ?
+		hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) :
+		hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr);
+}
 static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev,
 					  struct hns_roce_qp *hr_qp,
 					  int is_user)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 539b0a3b..2e1878b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -480,13 +480,17 @@ struct hns_roce_sqp_context {
 	u32 qp1c_bytes_12;
 	u32 qp1c_bytes_16;
 	u32 qp1c_bytes_20;
-	u32 qp1c_bytes_28;
 	u32 cur_rq_wqe_ba_l;
+	u32 qp1c_bytes_28;
 	u32 qp1c_bytes_32;
 	u32 cur_sq_wqe_ba_l;
 	u32 qp1c_bytes_40;
 };
 
+#define QP1C_BYTES_4_QP_STATE_S 0
+#define QP1C_BYTES_4_QP_STATE_M   \
+	(((1UL << 3) - 1) << QP1C_BYTES_4_QP_STATE_S)
+
 #define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8
 #define QP1C_BYTES_4_SQ_WQE_SHIFT_M   \
 	(((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S)
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH V2 for-next 00/11] Code improvements & fixes for HNS RoCE driver
From: Salil Mehta @ 2016-11-15 18:10 UTC (permalink / raw)
  To: dledford
  Cc: salil.mehta, xavier.huwei, oulijun, xushaobo2, mehta.salil.lnk,
	lijun_nudt, linux-rdma, netdev, linux-kernel, linuxarm

This patchset introduces some code improvements and fixes
for the identified problems in the HNS RoCE driver.

Lijun Ou (4):
  IB/hns: Add the interface for querying QP1
  IB/hns: add self loopback for CM
  IB/hns: Modify the condition of notifying hardware loopback
  IB/hns: Fix the bug for qp state in hns_roce_v1_m_qp()

Salil Mehta (1):
  IB/hns: Fix for Checkpatch.pl comment style errors

Shaobo Xu (1):
  IB/hns: Implement the add_gid/del_gid and optimize the GIDs
    management

Wei Hu (Xavier) (5):
  IB/hns: Add code for refreshing CQ CI using TPTR
  IB/hns: Optimize the logic of allocating memory using APIs
  IB/hns: Modify the macro for the timeout when cmd process
  IB/hns: Modify query info named port_num when querying RC QP
  IB/hns: Change qpn allocation to round-robin mode.

 drivers/infiniband/hw/hns/hns_roce_alloc.c  |   11 +-
 drivers/infiniband/hw/hns/hns_roce_cmd.c    |    8 +-
 drivers/infiniband/hw/hns/hns_roce_cmd.h    |    7 +-
 drivers/infiniband/hw/hns/hns_roce_common.h |    2 -
 drivers/infiniband/hw/hns/hns_roce_cq.c     |   17 +-
 drivers/infiniband/hw/hns/hns_roce_device.h |   45 ++--
 drivers/infiniband/hw/hns/hns_roce_eq.c     |    6 +-
 drivers/infiniband/hw/hns/hns_roce_hem.c    |    6 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |  267 +++++++++++++++++------
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h  |   17 +-
 drivers/infiniband/hw/hns/hns_roce_main.c   |  311 +++++++--------------------
 drivers/infiniband/hw/hns/hns_roce_mr.c     |   21 +-
 drivers/infiniband/hw/hns/hns_roce_pd.c     |    5 +-
 drivers/infiniband/hw/hns/hns_roce_qp.c     |    2 +-
 14 files changed, 363 insertions(+), 362 deletions(-)

-- 
1.7.9.5

^ permalink raw reply

* RE: [PATCH for-next 03/11] IB/hns: Optimize the logic of allocating memory using APIs
From: Salil Mehta @ 2016-11-15 15:52 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: dledford@redhat.com, Huwei (Xavier), oulijun,
	mehta.salil.lnk@gmail.com, linux-rdma@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org, Linuxarm,
	Zhangping (ZP)
In-Reply-To: <20161109072130.GH27883@leon.nu>

> -----Original Message-----
> From: Leon Romanovsky [mailto:leon@kernel.org]
> Sent: Wednesday, November 09, 2016 7:22 AM
> To: Salil Mehta
> Cc: dledford@redhat.com; Huwei (Xavier); oulijun;
> mehta.salil.lnk@gmail.com; linux-rdma@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Linuxarm;
> Zhangping (ZP)
> Subject: Re: [PATCH for-next 03/11] IB/hns: Optimize the logic of
> allocating memory using APIs
> 
> On Fri, Nov 04, 2016 at 04:36:25PM +0000, Salil Mehta wrote:
> > From: "Wei Hu (Xavier)" <xavier.huwei@huawei.com>
> >
> > This patch modified the logic of allocating memory using APIs in
> > hns RoCE driver. We used kcalloc instead of kmalloc_array and
> > bitmap_zero. And When kcalloc failed, call vzalloc to alloc
> > memory.
> >
> > Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
> > Signed-off-by: Ping Zhang <zhangping5@huawei.com>
> > Signed-off-by: Salil Mehta  <salil.mehta@huawei.com>
> > ---
> >  drivers/infiniband/hw/hns/hns_roce_mr.c |   15 ++++++++-------
> >  1 file changed, 8 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c
> b/drivers/infiniband/hw/hns/hns_roce_mr.c
> > index fb87883..d3dfb5f 100644
> > --- a/drivers/infiniband/hw/hns/hns_roce_mr.c
> > +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
> > @@ -137,11 +137,12 @@ static int hns_roce_buddy_init(struct
> hns_roce_buddy *buddy, int max_order)
> >
> >  	for (i = 0; i <= buddy->max_order; ++i) {
> >  		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
> > -		buddy->bits[i] = kmalloc_array(s, sizeof(long),
> GFP_KERNEL);
> > -		if (!buddy->bits[i])
> > -			goto err_out_free;
> > -
> > -		bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
> > +		buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL);
> > +		if (!buddy->bits[i]) {
> > +			buddy->bits[i] = vzalloc(s * sizeof(long));
> 
> I wonder, why don't you use directly vzalloc instead of kcalloc
> fallback?
As we know we will have physical contiguous pages if the kcalloc
call succeeds. This will give us a chance to have better performance
over the allocations which are just virtually contiguous through the
function vzalloc(). Therefore, later has only been used as a fallback
when our memory request cannot be entertained through kcalloc.

Are you suggesting that there will not be much performance penalty
if we use just vzalloc ?

> 
> > +			if (!buddy->bits[i])
> > +				goto err_out_free;
> > +		}
> >  	}

^ permalink raw reply

* [PATCH rdma-next 0/4] Add packet pacing support for IB verbs
From: Rony Efraim @ 2016-11-15 14:43 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky
  Cc: Hefty, Sean, Bodong Wang,
	dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <DB5PR0501MB19281A8064B283BE35EDE821B0BF0-1FH/Iesddo5/SeJcUcAJq8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 1237 bytes --]


On Thu, Nov 10, 2016 at 6:47 PM +0200, Jason Gunthorpe wrote:
>On Thu, Nov 10, 2016 at 09:22:42AM +0200, Leon Romanovsky wrote:
>> I'll let to Bodong to elaborate on it more, but as far as I see, the 
>> AH attribute is relevant to UD QP only, while the packet pacing is 
>> intended for all types of QPs.

>In IB static rate is supposed to work for all QP types.
Indeed we already have a static_rate field, which applies to all IB traffic (including UD).
The problem is:
- This field is only u8, and uses IB standard rate enumerations. For pacing, we need arbitrary rates.
- The field doesn't apply to Raw Ethernet QPs, AH or AV which isn't applicable to Raw Ethernet. It is incorrect to support AV/AH for Raw Ethernet QPs.
 
The rate_limit configuration came form the application like required BW for the streaming and not from the fabric (SM).
The rate_limit resolution is much higher and it is 1Kb/s.

Both of the fields are limit the rate but coming from a different entities and required separate fields.
The actual limit should be the minimum of both of them.

Rony

>Jason
N‹§²æìr¸›yúèšØb²X¬¶Ç§vØ^–)Þº{.nÇ+‰·¥Š{±­ÙšŠ{ayº\x1dʇڙë,j\a­¢f£¢·hš‹»öì\x17/oSc¾™Ú³9˜uÀ¦æå‰È&jw¨®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿïêäz¹Þ–Šàþf£¢·hšˆ§~ˆmš

^ permalink raw reply

* suspicious rcu_dereference_protected() usage in mlx4 when tearing down an interface
From: Sagi Grimberg @ 2016-11-15 12:30 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, Yishai Hadas,
	Matan Barak, Leon Romanovsky

Hi Guys,

I just hit this with 4.9-rc4 on my test vm.
Didn't find any patches on the list so maybe this
is new?


[   12.392197] ===============================
[   12.392927] [ INFO: suspicious RCU usage. ]
[   12.393625] 4.9.0-rc4-e386eaffebc4-lb #25 Tainted: G           OE
[   12.394633] -------------------------------
[   12.395317] drivers/net/ethernet/mellanox/mlx4/en_rx.c:543 suspicious 
rcu_dereference_protected() usage!
[   12.396873]
                other info that might help us debug this:

[   12.398134]
                rcu_scheduler_active = 1, debug_locks = 0
[   12.399107] 1 lock held by rmmod/707:
[   12.399665]  #0:
[   12.399939]  (
[   12.400230] intf_mutex
[   12.400378] ){+.+.+.}
[   12.400805] , at:
[   12.401139] [<ffffffffc015e93d>] mlx4_unregister_interface+0x1d/0x70 
[mlx4_core]
[   12.402275]
                stack backtrace:
[   12.402926] CPU: 3 PID: 707 Comm: rmmod Tainted: G           OE 
4.9.0-rc4-e386eaffebc4-lb #25
[   12.404221] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[   12.405619]  ffffb5fdc0fcfda0 ffffffffb6450643 ffff951f787b22c0 
0000000000000001
[   12.406793]  ffffb5fdc0fcfdd0 ffffffffb60e78f7 ffff951f73966000 
0000000000000400
[   12.407966]  ffff951f70430ad8 ffff951f73584800 ffffb5fdc0fcfe08 
ffffffffc020f77a
[   12.409069] Call Trace:
[   12.409437]  [<ffffffffb6450643>] dump_stack+0x85/0xc2
[   12.410151]  [<ffffffffb60e78f7>] lockdep_rcu_suspicious+0xd7/0x110
[   12.411024]  [<ffffffffc020f77a>] mlx4_en_destroy_rx_ring+0xba/0xc0 
[mlx4_en]
[   12.412032]  [<ffffffffc0215ec6>] mlx4_en_free_resources+0xb6/0xf0 
[mlx4_en]
[   12.413006]  [<ffffffffc021abe2>] mlx4_en_destroy_netdev+0xe2/0x1b0 
[mlx4_en]
[   12.413994]  [<ffffffffc020b105>] mlx4_en_remove+0xe5/0xf0 [mlx4_en]
[   12.414883]  [<ffffffffc015e903>] mlx4_remove_device+0x73/0x90 
[mlx4_core]
[   12.415844]  [<ffffffffc015e95e>] mlx4_unregister_interface+0x3e/0x70 
[mlx4_core]
[   12.416880]  [<ffffffffc021dd6b>] mlx4_en_cleanup+0x10/0x2a5 [mlx4_en]
[   12.417773]  [<ffffffffb613322a>] SyS_delete_module+0x1ba/0x220
[   12.418545]  [<ffffffffb68e3085>] entry_SYSCALL_64_fastpath+0x23/0xc6
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* RE: [bug report] qedr: Add support for QP verbs
From: Amrani, Ram @ 2016-11-15 10:36 UTC (permalink / raw)
  To: Dan Carpenter; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <20161114130310.GA14500@mwanda>

> Hello Ram Amrani,
> 
> The patch cecbcddf6461: "qedr: Add support for QP verbs" from Oct 10, 2016,
> leads to the following static checker warning:
> 
> 	drivers/infiniband/hw/qedr/verbs.c:2067 qedr_destroy_qp()
> 	'0x5 | 0x1' has '0x1' set on both sides
> 
> drivers/infiniband/hw/qedr/verbs.c
>   2056  int qedr_destroy_qp(struct ib_qp *ibqp)
>   2057  {
>   2058          struct qedr_qp *qp = get_qedr_qp(ibqp);
>   2059          struct qedr_dev *dev = qp->dev;
>   2060          struct ib_qp_attr attr;
>   2061          int attr_mask = 0;
>   2062          int rc = 0;
>   2063
>   2064          DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp
> type=%d\n",
>   2065                   qp, qp->qp_type);
>   2066
>   2067          if (qp->state != (QED_ROCE_QP_STATE_RESET |
> QED_ROCE_QP_STATE_ERR |
>   2068                            QED_ROCE_QP_STATE_INIT)) {
> 
> These aren't bitfields, they're just numbers.  This code is pretty suspect.  Not
> sure what was intended.
> 
>   2069                  attr.qp_state = IB_QPS_ERR;
>   2070                  attr_mask |= IB_QP_STATE;
>   2071
>   2072                  /* Change the QP state to ERROR */
>   2073                  qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
>   2074          }
> 
> regards,
> dan carpenter

That's a real bug. Thanks!
Will fix.

Ram

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* RE: [patch] qedr: Unlock on error in qedr_post_send()
From: Amrani, Ram @ 2016-11-15 10:28 UTC (permalink / raw)
  To: Dan Carpenter, Doug Ledford
  Cc: Sean Hefty, Hal Rosenstock, Borundia, Rajesh,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	kernel-janitors-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Wei Yongjun
In-Reply-To: <20161115095059.GB15424@mwanda>

> If "wr" is NULL then we return without unlocking.  I added an unlock
> label and changed the direct returns to gotos.
> 
> Fixes: afa0e13be754 ("qedr: Add support for data path")
> Signed-off-by: Dan Carpenter <dan.carpenter-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> 
> diff --git a/drivers/infiniband/hw/qedr/verbs.c
> b/drivers/infiniband/hw/qedr/verbs.c
> index a615142..01d05b0 100644
> --- a/drivers/infiniband/hw/qedr/verbs.c
> +++ b/drivers/infiniband/hw/qedr/verbs.c
> @@ -2973,17 +2973,18 @@ int qedr_post_send(struct ib_qp *ibqp, struct
> ib_send_wr *wr,
> 
>  	if ((qp->state == QED_ROCE_QP_STATE_RESET) ||
>  	    (qp->state == QED_ROCE_QP_STATE_ERR)) {
> -		spin_unlock_irqrestore(&qp->q_lock, flags);
>  		*bad_wr = wr;
>  		DP_DEBUG(dev, QEDR_MSG_CQ,
>  			 "QP in wrong state! QP icid=0x%x state %d\n",
>  			 qp->icid, qp->state);
> -		return -EINVAL;
> +		rc = -EINVAL;
> +		goto unlock;
>  	}
> 
>  	if (!wr) {
>  		DP_ERR(dev, "Got an empty post send.\n");
> -		return -EINVAL;
> +		rc = -EINVAL;
> +		goto unlock;
>  	}
> 
>  	while (wr) {
> @@ -3012,6 +3013,7 @@ int qedr_post_send(struct ib_qp *ibqp, struct
> ib_send_wr *wr,
>  	/* Make sure write sticks */
>  	mmiowb();
> 
> +unlock:
>  	spin_unlock_irqrestore(&qp->q_lock, flags);
> 
>  	return rc;

Thanks for catching this. However this was already taken care of by Wei Yongjun in a different manner.
See: https://patchwork.kernel.org/patch/9409053/ 

Thanks,
Ram

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [patch] qedr: Unlock on error in qedr_post_send()
From: Dan Carpenter @ 2016-11-15  9:51 UTC (permalink / raw)
  To: Doug Ledford, Ram Amrani
  Cc: Sean Hefty, Hal Rosenstock, Rajesh Borundia, linux-rdma,
	kernel-janitors

If "wr" is NULL then we return without unlocking.  I added an unlock
label and changed the direct returns to gotos.

Fixes: afa0e13be754 ("qedr: Add support for data path")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>

diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index a615142..01d05b0 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -2973,17 +2973,18 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	if ((qp->state == QED_ROCE_QP_STATE_RESET) ||
 	    (qp->state == QED_ROCE_QP_STATE_ERR)) {
-		spin_unlock_irqrestore(&qp->q_lock, flags);
 		*bad_wr = wr;
 		DP_DEBUG(dev, QEDR_MSG_CQ,
 			 "QP in wrong state! QP icid=0x%x state %d\n",
 			 qp->icid, qp->state);
-		return -EINVAL;
+		rc = -EINVAL;
+		goto unlock;
 	}
 
 	if (!wr) {
 		DP_ERR(dev, "Got an empty post send.\n");
-		return -EINVAL;
+		rc = -EINVAL;
+		goto unlock;
 	}
 
 	while (wr) {
@@ -3012,6 +3013,7 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	/* Make sure write sticks */
 	mmiowb();
 
+unlock:
 	spin_unlock_irqrestore(&qp->q_lock, flags);
 
 	return rc;

^ permalink raw reply related

* Re: [PATCH] IB/mlx4: Rework special QP creation error path
From: Yishai Hadas @ 2016-11-15  8:48 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Doug Ledford, Yishai Hadas,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <3a41daa7-cfb8-7374-5dab-dfa3b1bf83ff-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>

On 11/14/2016 6:44 PM, Bart Van Assche wrote:
> The special QP creation error path relies on offset_of(struct mlx4_ib_sqp,
> qp) == 0. Remove this assumption because that makes the QP creation
> code easier to understand.
>
> Signed-off-by: Bart Van Assche <bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
> Cc: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
>  drivers/infiniband/hw/mlx4/qp.c | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
> index 570bc86..b63d6be 100644
> --- a/drivers/infiniband/hw/mlx4/qp.c
> +++ b/drivers/infiniband/hw/mlx4/qp.c
> @@ -644,7 +644,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
>  	int qpn;
>  	int err;
>  	struct ib_qp_cap backup_cap;
> -	struct mlx4_ib_sqp *sqp;
> +	struct mlx4_ib_sqp *sqp = NULL;
>  	struct mlx4_ib_qp *qp;
>  	enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
>  	struct mlx4_ib_cq *mcq;
> @@ -933,7 +933,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
>  		mlx4_db_free(dev->dev, &qp->db);
>
>  err:
> -	if (!*caller_qp)
> +	if (sqp)
> +		kfree(sqp);
> +	else if (!*caller_qp)
>  		kfree(qp);
>  	return err;
>  }
>

Looks fine, thanks.
Reviewed-by: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH libibmad] configure.ac: Update AM_INIT_AUTOMAKE to use subdir-objects
From: Hal Rosenstock @ 2016-11-14 21:34 UTC (permalink / raw)
  To: Weiny, Ira; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org


With this option, the objects are placed into the subdirectory of
the build directory corresponding to the subdirectory of the source file.

Fixes the following warning:
Makefile.am:16: warning: source file 'src/dump.c' is in a subdirectory,
Makefile.am:16: but option 'subdir-objects' is disabled
automake: warning: possible forward-incompatibility.
automake: At least a source file is in a subdirectory, but the 'subdir-objects'
automake: automake option hasn't been enabled.  For now, the corresponding output
automake: object file(s) will be placed in the top-level directory.  However,
automake: this behaviour will change in future Automake versions: they will
automake: unconditionally cause object files to be placed in the same subdirectory
automake: of the corresponding sources.
automake: You are advised to start using 'subdir-objects' option throughout your
automake: project, to avoid future incompatibilities.

Signed-off-by: Hal Rosenstock <hal-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
diff --git a/configure.ac b/configure.ac
index 843dbfd..6453e11 100644
--- a/configure.ac
+++ b/configure.ac
@@ -6,7 +6,7 @@ AC_CONFIG_SRCDIR([src/sa.c])
 AC_CONFIG_AUX_DIR(config)
 AC_CONFIG_MACRO_DIR(config)
 AM_CONFIG_HEADER(config.h)
-AM_INIT_AUTOMAKE
+AM_INIT_AUTOMAKE([subdir-objects])
 
 AC_SUBST(RELEASE, ${RELEASE:-unknown})
 AC_SUBST(TARBALL, ${TARBALL:-${PACKAGE}-${VERSION}.tar.gz})
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH] IB/mlx4: Rework special QP creation error path
From: Laurence Oberman @ 2016-11-14 19:33 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Doug Ledford, Yishai Hadas, linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <3a41daa7-cfb8-7374-5dab-dfa3b1bf83ff-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>



----- Original Message -----
> From: "Bart Van Assche" <bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
> To: "Doug Ledford" <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
> Cc: "Yishai Hadas" <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Sent: Monday, November 14, 2016 11:44:11 AM
> Subject: [PATCH] IB/mlx4: Rework special QP creation error path
> 
> The special QP creation error path relies on offset_of(struct mlx4_ib_sqp,
> qp) == 0. Remove this assumption because that makes the QP creation
> code easier to understand.
> 
> Signed-off-by: Bart Van Assche <bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
> Cc: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
>  drivers/infiniband/hw/mlx4/qp.c | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx4/qp.c
> b/drivers/infiniband/hw/mlx4/qp.c
> index 570bc86..b63d6be 100644
> --- a/drivers/infiniband/hw/mlx4/qp.c
> +++ b/drivers/infiniband/hw/mlx4/qp.c
> @@ -644,7 +644,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev,
> struct ib_pd *pd,
>  	int qpn;
>  	int err;
>  	struct ib_qp_cap backup_cap;
> -	struct mlx4_ib_sqp *sqp;
> +	struct mlx4_ib_sqp *sqp = NULL;
>  	struct mlx4_ib_qp *qp;
>  	enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
>  	struct mlx4_ib_cq *mcq;
> @@ -933,7 +933,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev,
> struct ib_pd *pd,
>  		mlx4_db_free(dev->dev, &qp->db);
>  
>  err:
> -	if (!*caller_qp)
> +	if (sqp)
> +		kfree(sqp);
> +	else if (!*caller_qp)
>  		kfree(qp);
>  	return err;
>  }
> --
> 2.10.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
This looks fine to me.

Reviewed-by: Laurence Oberman <loberman-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH rdma-next 6/6] IB/mlx5: Add support to match inner packet fields
From: Leon Romanovsky @ 2016-11-14 17:04 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Leon Romanovsky, Maor Gottlieb,
	Moses Reuben
In-Reply-To: <1479143092-11723-1-git-send-email-leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

From: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Add support to match packet fields which are tunneled,
i.e. support matching the header of the inner packet which is the result of
or bit operation of the original header and the IB_FLOW_SPEC_INNER type.

The combination of IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL is not
needed to be checked, because the IB core has this check already.

Signed-off-by: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Reviewed-by: Maor Gottlieb <maorg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 drivers/infiniband/hw/mlx5/main.c | 131 +++++++++++++++++++++++---------------
 1 file changed, 78 insertions(+), 53 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index a833f45..65eb5e0 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1502,6 +1502,22 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
 	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
 }

+static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val,
+			   bool inner)
+{
+	if (inner) {
+		MLX5_SET(fte_match_set_misc,
+			 misc_c, inner_ipv6_flow_label, mask);
+		MLX5_SET(fte_match_set_misc,
+			 misc_v, inner_ipv6_flow_label, val);
+	} else {
+		MLX5_SET(fte_match_set_misc,
+			 misc_c, outer_ipv6_flow_label, mask);
+		MLX5_SET(fte_match_set_misc,
+			 misc_v, outer_ipv6_flow_label, val);
+	}
+}
+
 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 {
 	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
@@ -1528,155 +1544,164 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 static int parse_flow_attr(u32 *match_c, u32 *match_v,
 			   const union ib_flow_spec *ib_spec)
 {
-	void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
-					     outer_headers);
-	void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
-					     outer_headers);
 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
 					   misc_parameters);
 	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
 					   misc_parameters);
+	void *headers_c;
+	void *headers_v;
+
+	if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					 inner_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					 inner_headers);
+	} else {
+		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					 outer_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					 outer_headers);
+	}

-	switch (ib_spec->type) {
+	switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
 	case IB_FLOW_SPEC_ETH:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
 			return -ENOTSUPP;

-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 					     dmac_47_16),
 				ib_spec->eth.mask.dst_mac);
-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 					     dmac_47_16),
 				ib_spec->eth.val.dst_mac);

-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 					     smac_47_16),
 				ib_spec->eth.mask.src_mac);
-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 					     smac_47_16),
 				ib_spec->eth.val.src_mac);

 		if (ib_spec->eth.mask.vlan_tag) {
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 vlan_tag, 1);
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 				 vlan_tag, 1);

-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 				 first_vid, ntohs(ib_spec->eth.val.vlan_tag));

-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 first_cfi,
 				 ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 				 first_cfi,
 				 ntohs(ib_spec->eth.val.vlan_tag) >> 12);

-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 first_prio,
 				 ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 				 first_prio,
 				 ntohs(ib_spec->eth.val.vlan_tag) >> 13);
 		}
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 			 ethertype, ntohs(ib_spec->eth.mask.ether_type));
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 			 ethertype, ntohs(ib_spec->eth.val.ether_type));
 		break;
 	case IB_FLOW_SPEC_IPV4:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
 			return -ENOTSUPP;

-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 			 ethertype, 0xffff);
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 			 ethertype, ETH_P_IP);

-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 		       &ib_spec->ipv4.mask.src_ip,
 		       sizeof(ib_spec->ipv4.mask.src_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 		       &ib_spec->ipv4.val.src_ip,
 		       sizeof(ib_spec->ipv4.val.src_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 		       &ib_spec->ipv4.mask.dst_ip,
 		       sizeof(ib_spec->ipv4.mask.dst_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 		       &ib_spec->ipv4.val.dst_ip,
 		       sizeof(ib_spec->ipv4.val.dst_ip));

-		set_tos(outer_headers_c, outer_headers_v,
+		set_tos(headers_c, headers_v,
 			ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);

-		set_proto(outer_headers_c, outer_headers_v,
+		set_proto(headers_c, headers_v,
 			  ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
 		break;
 	case IB_FLOW_SPEC_IPV6:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
 			return -ENOTSUPP;

-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 			 ethertype, 0xffff);
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 			 ethertype, ETH_P_IPV6);

-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 		       &ib_spec->ipv6.mask.src_ip,
 		       sizeof(ib_spec->ipv6.mask.src_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 		       &ib_spec->ipv6.val.src_ip,
 		       sizeof(ib_spec->ipv6.val.src_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 		       &ib_spec->ipv6.mask.dst_ip,
 		       sizeof(ib_spec->ipv6.mask.dst_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 		       &ib_spec->ipv6.val.dst_ip,
 		       sizeof(ib_spec->ipv6.val.dst_ip));

-		set_tos(outer_headers_c, outer_headers_v,
+		set_tos(headers_c, headers_v,
 			ib_spec->ipv6.mask.traffic_class,
 			ib_spec->ipv6.val.traffic_class);

-		set_proto(outer_headers_c, outer_headers_v,
+		set_proto(headers_c, headers_v,
 			  ib_spec->ipv6.mask.next_hdr,
 			  ib_spec->ipv6.val.next_hdr);

-		MLX5_SET(fte_match_set_misc, misc_params_c,
-			 outer_ipv6_flow_label,
-			 ntohl(ib_spec->ipv6.mask.flow_label));
-		MLX5_SET(fte_match_set_misc, misc_params_v,
-			 outer_ipv6_flow_label,
-			 ntohl(ib_spec->ipv6.val.flow_label));
+		set_flow_label(misc_params_c, misc_params_v,
+			       ntohl(ib_spec->ipv6.mask.flow_label),
+			       ntohl(ib_spec->ipv6.val.flow_label),
+			       ib_spec->type & IB_FLOW_SPEC_INNER);
+
 		break;
 	case IB_FLOW_SPEC_TCP:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
 					 LAST_TCP_UDP_FIELD))
 			return -ENOTSUPP;

-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 			 0xff);
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 			 IPPROTO_TCP);

-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
 			 ntohs(ib_spec->tcp_udp.mask.src_port));
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
 			 ntohs(ib_spec->tcp_udp.val.src_port));

-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
 			 ntohs(ib_spec->tcp_udp.mask.dst_port));
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
 			 ntohs(ib_spec->tcp_udp.val.dst_port));
 		break;
 	case IB_FLOW_SPEC_UDP:
@@ -1684,19 +1709,19 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
 					 LAST_TCP_UDP_FIELD))
 			return -ENOTSUPP;

-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 			 0xff);
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 			 IPPROTO_UDP);

-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
 			 ntohs(ib_spec->tcp_udp.mask.src_port));
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
 			 ntohs(ib_spec->tcp_udp.val.src_port));

-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
 			 ntohs(ib_spec->tcp_udp.mask.dst_port));
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
 			 ntohs(ib_spec->tcp_udp.val.dst_port));
 		break;
 	case IB_FLOW_SPEC_VXLAN_TUNNEL:
--
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-next 5/6] IB/core: Introduce inner flow steering
From: Leon Romanovsky @ 2016-11-14 17:04 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Leon Romanovsky, Maor Gottlieb,
	Moses Reuben
In-Reply-To: <1479143092-11723-1-git-send-email-leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

From: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

For a tunneled packet which contains external and internal headers,
we refer to the external headers as "outer fields" and the internal
headers as "inner fields".

Example of a tunneled packet:

{ L2 | L3 | L4 | tunnel header | L2 | L3 | l4 | data }
  |     |    |         |         |    |    |
{       outer fields           }{ inner fields }

This patch introduces a new flag for flow steering rules
- IB_FLOW_SPEC_INNER - which specifies that the rule applies
to the inner fields, rather than to the outer fields of the packet.

Signed-off-by: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Reviewed-by: Maor Gottlieb <maorg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 drivers/infiniband/core/uverbs_cmd.c |  4 +++-
 include/rdma/ib_verbs.h              | 17 +++++++++--------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 561010a..10fb325 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3124,8 +3124,10 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
 	kern_spec_val = (void *)kern_spec +
 		sizeof(struct ib_uverbs_flow_spec_hdr);
 	kern_spec_mask = kern_spec_val + kern_filter_sz;
+	if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL))
+		return -EINVAL;

-	switch (ib_spec->type) {
+	switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
 	case IB_FLOW_SPEC_ETH:
 		ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz);
 		actual_filter_sz = spec_filter_size(kern_spec_mask,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 81c5c80..195a03e 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1601,9 +1601,10 @@ enum ib_flow_spec_type {
 	IB_FLOW_SPEC_TCP		= 0x40,
 	IB_FLOW_SPEC_UDP		= 0x41,
 	IB_FLOW_SPEC_VXLAN_TUNNEL	= 0x50,
+	IB_FLOW_SPEC_INNER		= 0x100,
 };
 #define IB_FLOW_SPEC_LAYER_MASK	0xF0
-#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 8

 /* Flow steering rule priority is set according to it's domain.
  * Lower domain value means higher priority.
@@ -1631,7 +1632,7 @@ struct ib_flow_eth_filter {
 };

 struct ib_flow_spec_eth {
-	enum ib_flow_spec_type	  type;
+	u32			  type;
 	u16			  size;
 	struct ib_flow_eth_filter val;
 	struct ib_flow_eth_filter mask;
@@ -1645,7 +1646,7 @@ struct ib_flow_ib_filter {
 };

 struct ib_flow_spec_ib {
-	enum ib_flow_spec_type	 type;
+	u32			 type;
 	u16			 size;
 	struct ib_flow_ib_filter val;
 	struct ib_flow_ib_filter mask;
@@ -1670,7 +1671,7 @@ struct ib_flow_ipv4_filter {
 };

 struct ib_flow_spec_ipv4 {
-	enum ib_flow_spec_type	   type;
+	u32			   type;
 	u16			   size;
 	struct ib_flow_ipv4_filter val;
 	struct ib_flow_ipv4_filter mask;
@@ -1688,7 +1689,7 @@ struct ib_flow_ipv6_filter {
 };

 struct ib_flow_spec_ipv6 {
-	enum ib_flow_spec_type	   type;
+	u32			   type;
 	u16			   size;
 	struct ib_flow_ipv6_filter val;
 	struct ib_flow_ipv6_filter mask;
@@ -1702,7 +1703,7 @@ struct ib_flow_tcp_udp_filter {
 };

 struct ib_flow_spec_tcp_udp {
-	enum ib_flow_spec_type	      type;
+	u32			      type;
 	u16			      size;
 	struct ib_flow_tcp_udp_filter val;
 	struct ib_flow_tcp_udp_filter mask;
@@ -1717,7 +1718,7 @@ struct ib_flow_tunnel_filter {
  * the tunnel_id from val has the vni value
  */
 struct ib_flow_spec_tunnel {
-	enum ib_flow_spec_type	      type;
+	u32			      type;
 	u16			      size;
 	struct ib_flow_tunnel_filter  val;
 	struct ib_flow_tunnel_filter  mask;
@@ -1725,7 +1726,7 @@ struct ib_flow_spec_tunnel {

 union ib_flow_spec {
 	struct {
-		enum ib_flow_spec_type	type;
+		u32			type;
 		u16			size;
 	};
 	struct ib_flow_spec_eth		eth;
--
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-next 4/6] IB/mlx5: Support Vxlan tunneling specification
From: Leon Romanovsky @ 2016-11-14 17:04 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Leon Romanovsky, Maor Gottlieb,
	Moses Reuben
In-Reply-To: <1479143092-11723-1-git-send-email-leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

From: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Add support to receive specific Vxlan packet in ConnectX-4.

Signed-off-by: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Reviewed-by: Maor Gottlieb <maorg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 drivers/infiniband/hw/mlx5/main.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 2217477..a833f45 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1515,6 +1515,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 #define LAST_IPV4_FIELD tos
 #define LAST_IPV6_FIELD traffic_class
 #define LAST_TCP_UDP_FIELD src_port
+#define LAST_TUNNEL_FIELD tunnel_id

 /* Field is the last supported field */
 #define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1698,6 +1699,16 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
 		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
 			 ntohs(ib_spec->tcp_udp.val.dst_port));
 		break;
+	case IB_FLOW_SPEC_VXLAN_TUNNEL:
+		if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
+					 LAST_TUNNEL_FIELD))
+			return -ENOTSUPP;
+
+		MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
+			 ntohl(ib_spec->tunnel.mask.tunnel_id));
+		MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
+			 ntohl(ib_spec->tunnel.val.tunnel_id));
+		break;
 	default:
 		return -EINVAL;
 	}
--
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-next 3/6] IB/uverbs: Add support for Vxlan protocol
From: Leon Romanovsky @ 2016-11-14 17:04 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Leon Romanovsky, Maor Gottlieb,
	Moses Reuben
In-Reply-To: <1479143092-11723-1-git-send-email-leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

From: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Add ib_uverbs_flow_spec_tunnel to define the rule to match Vxlan,
the type, size, reserved fields are identical to rest of the protocols,
and are used to identify the spec.
The tunnel id is the vni value of the Vxlan protocol, and it is used
as part of the steering rule, it is limited by the mask.
The steering rule configured on the hardware does a match
according to vni and other protocols.
In the same way as rest of the protocols that we match,
the uniq field's of each protocol are represented on
the val and the mask.

Signed-off-by: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Reviewed-by: Maor Gottlieb <maorg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 include/uapi/rdma/ib_user_verbs.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 25225eb..90ba5e8 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -908,6 +908,23 @@ struct ib_uverbs_flow_spec_ipv6 {
 	struct ib_uverbs_flow_ipv6_filter mask;
 };

+struct ib_uverbs_flow_tunnel_filter {
+	__be32 tunnel_id;
+};
+
+struct ib_uverbs_flow_spec_tunnel {
+	union {
+		struct ib_uverbs_flow_spec_hdr hdr;
+		struct {
+			__u32 type;
+			__u16 size;
+			__u16 reserved;
+		};
+	};
+	struct ib_uverbs_flow_tunnel_filter val;
+	struct ib_uverbs_flow_tunnel_filter mask;
+};
+
 struct ib_uverbs_flow_attr {
 	__u32 type;
 	__u16 size;
--
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-next 2/6] IB/core: Align structure ib_flow_spec_type
From: Leon Romanovsky @ 2016-11-14 17:04 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Leon Romanovsky, Maor Gottlieb,
	Moses Reuben
In-Reply-To: <1479143092-11723-1-git-send-email-leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

From: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Aligned the structure ib_flow_spec_type indentation,
after adding a new definition.

Signed-off-by: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Reviewed-by: Maor Gottlieb <maorg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 include/rdma/ib_verbs.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9dd8309..81c5c80 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1592,14 +1592,14 @@ enum ib_flow_attr_type {
 /* Supported steering header types */
 enum ib_flow_spec_type {
 	/* L2 headers*/
-	IB_FLOW_SPEC_ETH	= 0x20,
-	IB_FLOW_SPEC_IB		= 0x22,
+	IB_FLOW_SPEC_ETH		= 0x20,
+	IB_FLOW_SPEC_IB			= 0x22,
 	/* L3 header*/
-	IB_FLOW_SPEC_IPV4	= 0x30,
-	IB_FLOW_SPEC_IPV6	= 0x31,
+	IB_FLOW_SPEC_IPV4		= 0x30,
+	IB_FLOW_SPEC_IPV6		= 0x31,
 	/* L4 headers*/
-	IB_FLOW_SPEC_TCP	= 0x40,
-	IB_FLOW_SPEC_UDP	= 0x41,
+	IB_FLOW_SPEC_TCP		= 0x40,
+	IB_FLOW_SPEC_UDP		= 0x41,
 	IB_FLOW_SPEC_VXLAN_TUNNEL	= 0x50,
 };
 #define IB_FLOW_SPEC_LAYER_MASK	0xF0
--
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-next 1/6] IB/core: Add flow spec tunneling support
From: Leon Romanovsky @ 2016-11-14 17:04 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Leon Romanovsky, Maor Gottlieb,
	Moses Reuben
In-Reply-To: <1479143092-11723-1-git-send-email-leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

From: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

In order to support tunneling, that can be used by the QP,
both struct ib_flow_spec_tunnel and struct ib_flow_tunnel_filter can be
used to more IP or UDP based tunneling protocols (e.g NVGRE, GRE, etc).

IB_FLOW_SPEC_VXLAN_TUNNEL type flow specification is added to use this
functionality and match specific Vxlan packets.

In similar to IPv6, we check overflow of the vni value by
comparing with the maximum size.

Signed-off-by: Moses Reuben <mosesr-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Reviewed-by: Maor Gottlieb <maorg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 drivers/infiniband/core/uverbs_cmd.c | 15 +++++++++++++++
 include/rdma/ib_verbs.h              | 19 ++++++++++++++++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index cb3f515a..561010a 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3175,6 +3175,21 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
 		memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz);
 		memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz);
 		break;
+	case IB_FLOW_SPEC_VXLAN_TUNNEL:
+		ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz);
+		actual_filter_sz = spec_filter_size(kern_spec_mask,
+						    kern_filter_sz,
+						    ib_filter_sz);
+		if (actual_filter_sz <= 0)
+			return -EINVAL;
+		ib_spec->tunnel.size = sizeof(struct ib_flow_spec_tunnel);
+		memcpy(&ib_spec->tunnel.val, kern_spec_val, actual_filter_sz);
+		memcpy(&ib_spec->tunnel.mask, kern_spec_mask, actual_filter_sz);
+
+		if ((ntohl(ib_spec->tunnel.mask.tunnel_id)) >= BIT(24) ||
+		    (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24))
+			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5ad43a4..9dd8309 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1599,7 +1599,8 @@ enum ib_flow_spec_type {
 	IB_FLOW_SPEC_IPV6	= 0x31,
 	/* L4 headers*/
 	IB_FLOW_SPEC_TCP	= 0x40,
-	IB_FLOW_SPEC_UDP	= 0x41
+	IB_FLOW_SPEC_UDP	= 0x41,
+	IB_FLOW_SPEC_VXLAN_TUNNEL	= 0x50,
 };
 #define IB_FLOW_SPEC_LAYER_MASK	0xF0
 #define IB_FLOW_SPEC_SUPPORT_LAYERS 4
@@ -1707,6 +1708,21 @@ struct ib_flow_spec_tcp_udp {
 	struct ib_flow_tcp_udp_filter mask;
 };

+struct ib_flow_tunnel_filter {
+	__be32	tunnel_id;
+	u8	real_sz[0];
+};
+
+/* ib_flow_spec_tunnel describes the Vxlan tunnel
+ * the tunnel_id from val has the vni value
+ */
+struct ib_flow_spec_tunnel {
+	enum ib_flow_spec_type	      type;
+	u16			      size;
+	struct ib_flow_tunnel_filter  val;
+	struct ib_flow_tunnel_filter  mask;
+};
+
 union ib_flow_spec {
 	struct {
 		enum ib_flow_spec_type	type;
@@ -1717,6 +1733,7 @@ union ib_flow_spec {
 	struct ib_flow_spec_ipv4        ipv4;
 	struct ib_flow_spec_tcp_udp	tcp_udp;
 	struct ib_flow_spec_ipv6        ipv6;
+	struct ib_flow_spec_tunnel      tunnel;
 };

 struct ib_flow_attr {
--
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-next 0/6] Add support for Vxlan protocol for the steering rules
From: Leon Romanovsky @ 2016-11-14 17:04 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Leon Romanovsky, Maor Gottlieb

From: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

Virtual Extensible LAN - Vxlan is a proposed encapsulation protocol for
running an overlay network on existing Layer 3 infrastructure. An
overlay network is a virtual network that is built on top of existing
network Layer 2 and Layer 3 technologies to support elastic COMPUTE
architectures. Vxlan will make it easier for network engineers to scale
out a cloud computing environment while logically isolating cloud apps
and tenants.

This patchset introduces support Vxlan steering rules to IB/core and
mlx5 devices implementation. The application will be able to send
packet with Vxlan tunneling, and configure a steering rule to receive
the stream from the rule to the QP.

The rule will contain inner and outer parts for the inner and outer
protocol header respectively. The matching is based on the rule
specification and it includes the Vxlan header itself, so application
will be able to send several Vxlan streams with a new vni for each stream.

In order to steer traffic according to a Vxlan tunnel ID, there is a need to
use the ibv_cmd_create_flow call. At the end of the struct, the application
should append several specs for each feature.

The number of specifications to add is 4. Use the first as the Vxlan
spec (tunnel), which is described below. The following specifications to
add are the L2, L3, and L4 specs.

There is an option to mask a range of tunnel IDs to be steered by using
the filler mask, This following example provides the ibv_flow_tunnel_filter
struct definition:

struct ibv_flow_tunnel_filter {
        uint32_t tunnel_id;
};

This following example specifies the Vxlan tunnel definition:

struct ibv_flow_spec_tunnel {
        enum ibv_flow_spec_type  type;
        uint16_t  size;
        struct ibv_flow_tunnel_filter val;
        struct ibv_flow_tunnel_filter mask;
};

Where:

 * type - Specifies that there is a need to use IBV_FLOW_SPEC_VXLAN_TUNNEL
	to define the spec as an Vxlan spec.
 * size - Identifies the size of the ibv_kern_spec_tunnel.
 * tunnel_id: Indicates that the tunnel ID as in the Vxlan header.


Thanks,
	Mosses and Leon.

Available in the "topic/vxlan" topic branch of this git repo:
git://git.kernel.org/pub/scm/linux/kernel/git/leon/linux-rdma.git
Or for browsing:
https://git.kernel.org/cgit/linux/kernel/git/leon/linux-rdma.git/log/?h=topic/vxlan

CC: Maor Gottlieb <maorg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Moses Reuben (6):
  IB/core: Add flow spec tunneling support
  IB/core: Align structure ib_flow_spec_type
  IB/uverbs: Add support for Vxlan protocol
  IB/mlx5: Support Vxlan tunneling specification
  IB/core: Introduce inner flow steering
  IB/mlx5: Add support to match inner packet fields

 drivers/infiniband/core/uverbs_cmd.c |  19 ++++-
 drivers/infiniband/hw/mlx5/main.c    | 142 ++++++++++++++++++++++-------------
 include/rdma/ib_verbs.h              |  44 +++++++----
 include/uapi/rdma/ib_user_verbs.h    |  17 +++++
 4 files changed, 155 insertions(+), 67 deletions(-)

--
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH] IB/mlx4: Rework special QP creation error path
From: Bart Van Assche @ 2016-11-14 16:44 UTC (permalink / raw)
  To: Doug Ledford
  Cc: Yishai Hadas, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

The special QP creation error path relies on offset_of(struct mlx4_ib_sqp,
qp) == 0. Remove this assumption because that makes the QP creation
code easier to understand.

Signed-off-by: Bart Van Assche <bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
Cc: Yishai Hadas <yishaih-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/hw/mlx4/qp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 570bc86..b63d6be 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -644,7 +644,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 	int qpn;
 	int err;
 	struct ib_qp_cap backup_cap;
-	struct mlx4_ib_sqp *sqp;
+	struct mlx4_ib_sqp *sqp = NULL;
 	struct mlx4_ib_qp *qp;
 	enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
 	struct mlx4_ib_cq *mcq;
@@ -933,7 +933,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		mlx4_db_free(dev->dev, &qp->db);
 
 err:
-	if (!*caller_qp)
+	if (sqp)
+		kfree(sqp);
+	else if (!*caller_qp)
 		kfree(qp);
 	return err;
 }
-- 
2.10.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [bug report] qedr: Add support for QP verbs
From: Dan Carpenter @ 2016-11-14 13:03 UTC (permalink / raw)
  To: Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Hello Ram Amrani,

The patch cecbcddf6461: "qedr: Add support for QP verbs" from Oct 10,
2016, leads to the following static checker warning:

	drivers/infiniband/hw/qedr/verbs.c:2067 qedr_destroy_qp()
	'0x5 | 0x1' has '0x1' set on both sides

drivers/infiniband/hw/qedr/verbs.c
  2056  int qedr_destroy_qp(struct ib_qp *ibqp)
  2057  {
  2058          struct qedr_qp *qp = get_qedr_qp(ibqp);
  2059          struct qedr_dev *dev = qp->dev;
  2060          struct ib_qp_attr attr;
  2061          int attr_mask = 0;
  2062          int rc = 0;
  2063  
  2064          DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
  2065                   qp, qp->qp_type);
  2066  
  2067          if (qp->state != (QED_ROCE_QP_STATE_RESET | QED_ROCE_QP_STATE_ERR |
  2068                            QED_ROCE_QP_STATE_INIT)) {

These aren't bitfields, they're just numbers.  This code is pretty
suspect.  Not sure what was intended.

  2069                  attr.qp_state = IB_QPS_ERR;
  2070                  attr_mask |= IB_QP_STATE;
  2071  
  2072                  /* Change the QP state to ERROR */
  2073                  qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
  2074          }

regards,
dan carpenter
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* RE: Configuration of cq->cqe is lower than entries by 1
From: Amrani, Ram @ 2016-11-14 12:05 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <20161114120153.GC4240-2ukJVAZIZ/Y@public.gmane.org>

> There is addition of 1 in mlx4_ib_create_cq():
> 192         entries      = roundup_pow_of_two(entries + 1);
> 193         cq->ibcq.cqe = entries - 1;

I thought something else might hide there.
Thanks,
Ram

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox