Netdev List
 help / color / mirror / Atom feed
* [PATCH V3 for-next 07/11] IB/hns: Modify the macro for the timeout when cmd process
From: Salil Mehta @ 2016-11-23 19:41 UTC (permalink / raw)
  To: dledford
  Cc: salil.mehta, xavier.huwei, oulijun, xushaobo2, mehta.salil.lnk,
	lijun_nudt, linux-rdma, netdev, linux-kernel, linuxarm
In-Reply-To: <20161123194109.420760-1-salil.mehta@huawei.com>

From: "Wei Hu (Xavier)" <xavier.huwei@huawei.com>

This patch modified the macro for the timeout when cmd is
processing as follows:
Before modification:
 enum {
	HNS_ROCE_CMD_TIME_CLASS_A       = 10000,
	HNS_ROCE_CMD_TIME_CLASS_B       = 10000,
	HNS_ROCE_CMD_TIME_CLASS_C       = 10000,
 };
After modification:
 #define HNS_ROCE_CMD_TIMEOUT_MSECS	10000

Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Salil Mehta  <salil.mehta@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_cmd.h   |    7 +------
 drivers/infiniband/hw/hns/hns_roce_cq.c    |    4 ++--
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |    8 ++++----
 drivers/infiniband/hw/hns/hns_roce_mr.c    |    4 ++--
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index e3997d3..ed14ad3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -34,6 +34,7 @@
 #define _HNS_ROCE_CMD_H
 
 #define HNS_ROCE_MAILBOX_SIZE		4096
+#define HNS_ROCE_CMD_TIMEOUT_MSECS	10000
 
 enum {
 	/* TPT commands */
@@ -57,12 +58,6 @@ enum {
 	HNS_ROCE_CMD_QUERY_QP		= 0x22,
 };
 
-enum {
-	HNS_ROCE_CMD_TIME_CLASS_A	= 10000,
-	HNS_ROCE_CMD_TIME_CLASS_B	= 10000,
-	HNS_ROCE_CMD_TIME_CLASS_C	= 10000,
-};
-
 struct hns_roce_cmd_mailbox {
 	void		       *buf;
 	dma_addr_t		dma;
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 5dc8d92..461a273 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -77,7 +77,7 @@ static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev,
 			     unsigned long cq_num)
 {
 	return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0,
-			    HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIME_CLASS_A);
+			    HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
 static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
@@ -176,7 +176,7 @@ static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev,
 {
 	return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
 				 mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ,
-				 HNS_ROCE_CMD_TIME_CLASS_A);
+				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
 static void hns_roce_free_cq(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index b835a55..509ea75 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1871,12 +1871,12 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
 	if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP)
 		return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
 					 HNS_ROCE_CMD_2RST_QP,
-					 HNS_ROCE_CMD_TIME_CLASS_A);
+					 HNS_ROCE_CMD_TIMEOUT_MSECS);
 
 	if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP)
 		return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
 					 HNS_ROCE_CMD_2ERR_QP,
-					 HNS_ROCE_CMD_TIME_CLASS_A);
+					 HNS_ROCE_CMD_TIMEOUT_MSECS);
 
 	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
 	if (IS_ERR(mailbox))
@@ -1886,7 +1886,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
 
 	ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
 				op[cur_state][new_state],
-				HNS_ROCE_CMD_TIME_CLASS_C);
+				HNS_ROCE_CMD_TIMEOUT_MSECS);
 
 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 	return ret;
@@ -2681,7 +2681,7 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev,
 
 	ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
 				HNS_ROCE_CMD_QUERY_QP,
-				HNS_ROCE_CMD_TIME_CLASS_A);
+				HNS_ROCE_CMD_TIMEOUT_MSECS);
 	if (!ret)
 		memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
 	else
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index d87d189..a5bd645 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -53,7 +53,7 @@ static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
 {
 	return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
 				 HNS_ROCE_CMD_SW2HW_MPT,
-				 HNS_ROCE_CMD_TIME_CLASS_B);
+				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
 static int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
@@ -62,7 +62,7 @@ static int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
 {
 	return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
 				 mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
-				 HNS_ROCE_CMD_TIME_CLASS_B);
+				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
 static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH V3 for-next 06/11] IB/hns: Fix the bug for qp state in hns_roce_v1_m_qp()
From: Salil Mehta @ 2016-11-23 19:41 UTC (permalink / raw)
  To: dledford
  Cc: salil.mehta, xavier.huwei, oulijun, xushaobo2, mehta.salil.lnk,
	lijun_nudt, linux-rdma, netdev, linux-kernel, linuxarm
In-Reply-To: <20161123194109.420760-1-salil.mehta@huawei.com>

From: Lijun Ou <oulijun@huawei.com>

In old code, the value of qp state from qpc was assigned for
attr->qp_state. The value may be an error while attr_mask &
IB_QP_STATE is zero.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
Reviewed-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Salil Mehta  <salil.mehta@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 643a2ff..b835a55 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2571,7 +2571,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 	/* Every status migrate must change state */
 	roce_set_field(context->qpc_bytes_144,
 		       QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
-		       QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, attr->qp_state);
+		       QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state);
 
 	/* SW pass context to HW */
 	ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt,
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH V3 for-next 05/11] IB/hns: Modify the condition of notifying hardware loopback
From: Salil Mehta @ 2016-11-23 19:41 UTC (permalink / raw)
  To: dledford
  Cc: salil.mehta, xavier.huwei, oulijun, xushaobo2, mehta.salil.lnk,
	lijun_nudt, linux-rdma, netdev, linux-kernel, linuxarm
In-Reply-To: <20161123194109.420760-1-salil.mehta@huawei.com>

From: Lijun Ou <oulijun@huawei.com>

This patch modified the condition of notifying hardware loopback.

In hip06, RoCE Engine has several ports, one QP is related
to one port. hardware only support loopback in the same port,
not in the different ports.

So, If QP related to port N, the dmac in the QP context equals
the smac of the local port N or the loop_idc is 1, we should
set loopback bit in QP context to notify hardware.

Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Salil Mehta  <salil.mehta@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index e080dd6..643a2ff 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2244,24 +2244,14 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 			     QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
 			     hr_qp->sq_signal_bits);
 
-		for (port = 0; port < hr_dev->caps.num_ports; port++) {
-			smac = (u8 *)hr_dev->dev_addr[port];
-			dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n",
-				smac[0], smac[1], smac[2], smac[3], smac[4],
-				smac[5]);
-			if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) &&
-			    (dmac[2] == smac[2]) && (dmac[3] == smac[3]) &&
-			    (dmac[4] == smac[4]) && (dmac[5] == smac[5])) {
-				roce_set_bit(context->qpc_bytes_32,
-				    QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S,
-				    1);
-				break;
-			}
-		}
-
-		if (hr_dev->loop_idc == 0x1)
+		port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
+			hr_qp->port;
+		smac = (u8 *)hr_dev->dev_addr[port];
+		/* when dmac equals smac or loop_idc is 1, it should loopback */
+		if (ether_addr_equal_unaligned(dmac, smac) ||
+		    hr_dev->loop_idc == 0x1)
 			roce_set_bit(context->qpc_bytes_32,
-				QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
+			      QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
 
 		roce_set_bit(context->qpc_bytes_32,
 			     QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH V3 for-next 04/11] IB/hns: add self loopback for CM
From: Salil Mehta @ 2016-11-23 19:41 UTC (permalink / raw)
  To: dledford
  Cc: salil.mehta, xavier.huwei, oulijun, xushaobo2, mehta.salil.lnk,
	lijun_nudt, linux-rdma, netdev, linux-kernel, linuxarm,
	Peter Chen
In-Reply-To: <20161123194109.420760-1-salil.mehta@huawei.com>

From: Lijun Ou <oulijun@huawei.com>

This patch mainly adds self loopback support for CM.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Peter Chen <luck.chen@huawei.com>
Reviewed-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Salil Mehta  <salil.mehta@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   11 +++++++++++
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h |    2 ++
 2 files changed, 13 insertions(+)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 959d5ca..e080dd6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -32,6 +32,7 @@
 
 #include <linux/platform_device.h>
 #include <linux/acpi.h>
+#include <linux/etherdevice.h>
 #include <rdma/ib_umem.h>
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
@@ -72,6 +73,8 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	int nreq = 0;
 	u32 ind = 0;
 	int ret = 0;
+	u8 *smac;
+	int loopback;
 
 	if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
 		ibqp->qp_type != IB_QPT_RC)) {
@@ -129,6 +132,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				       UD_SEND_WQE_U32_8_DMAC_5_M,
 				       UD_SEND_WQE_U32_8_DMAC_5_S,
 				       ah->av.mac[5]);
+
+			smac = (u8 *)hr_dev->dev_addr[qp->port];
+			loopback = ether_addr_equal_unaligned(ah->av.mac,
+							      smac) ? 1 : 0;
+			roce_set_bit(ud_sq_wqe->u32_8,
+				     UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S,
+				     loopback);
+
 			roce_set_field(ud_sq_wqe->u32_8,
 				       UD_SEND_WQE_U32_8_OPERATION_TYPE_M,
 				       UD_SEND_WQE_U32_8_OPERATION_TYPE_S,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 6004c7f..cf28f1b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -440,6 +440,8 @@ struct hns_roce_ud_send_wqe {
 #define UD_SEND_WQE_U32_8_DMAC_5_M   \
 	(((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S)
 
+#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22
+
 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16
 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_M   \
 	(((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S)
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH V3 for-next 03/11] IB/hns: Optimize the logic of allocating memory using APIs
From: Salil Mehta @ 2016-11-23 19:41 UTC (permalink / raw)
  To: dledford
  Cc: salil.mehta, xavier.huwei, oulijun, xushaobo2, mehta.salil.lnk,
	lijun_nudt, linux-rdma, netdev, linux-kernel, linuxarm,
	Ping Zhang
In-Reply-To: <20161123194109.420760-1-salil.mehta@huawei.com>

From: "Wei Hu (Xavier)" <xavier.huwei@huawei.com>

This patch modified the logic of allocating memory using APIs in
hns RoCE driver. We used kcalloc instead of kmalloc_array and
bitmap_zero. And When kcalloc failed, call vzalloc to alloc
memory.

Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Ping Zhang <zhangping5@huawei.com>
Signed-off-by: Salil Mehta  <salil.mehta@huawei.com>
---
Change log:

PATCH V2: Addressed comment given by Leon
         Link: https://patchwork.kernel.org/patch/9412859/
PATCH V1: Initial Submit
---
 drivers/infiniband/hw/hns/hns_roce_mr.c |   16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index fb87883..d87d189 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -137,11 +137,13 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
 
 	for (i = 0; i <= buddy->max_order; ++i) {
 		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
-		buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL);
-		if (!buddy->bits[i])
-			goto err_out_free;
-
-		bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
+		buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
+					 __GFP_NOWARN);
+		if (!buddy->bits[i]) {
+			buddy->bits[i] = vzalloc(s * sizeof(long));
+			if (!buddy->bits[i])
+				goto err_out_free;
+		}
 	}
 
 	set_bit(0, buddy->bits[buddy->max_order]);
@@ -151,7 +153,7 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
 
 err_out_free:
 	for (i = 0; i <= buddy->max_order; ++i)
-		kfree(buddy->bits[i]);
+		kvfree(buddy->bits[i]);
 
 err_out:
 	kfree(buddy->bits);
@@ -164,7 +166,7 @@ static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
 	int i;
 
 	for (i = 0; i <= buddy->max_order; ++i)
-		kfree(buddy->bits[i]);
+		kvfree(buddy->bits[i]);
 
 	kfree(buddy->bits);
 	kfree(buddy->num_free);
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH V3 for-next 02/11] IB/hns: Add code for refreshing CQ CI using TPTR
From: Salil Mehta @ 2016-11-23 19:41 UTC (permalink / raw)
  To: dledford
  Cc: salil.mehta, xavier.huwei, oulijun, xushaobo2, mehta.salil.lnk,
	lijun_nudt, linux-rdma, netdev, linux-kernel, linuxarm,
	Dongdong Huang
In-Reply-To: <20161123194109.420760-1-salil.mehta@huawei.com>

From: "Wei Hu (Xavier)" <xavier.huwei@huawei.com>

This patch added the code for refreshing CQ CI using TPTR in hip06
SoC.

We will send a doorbell to hardware for refreshing CQ CI when user
succeed to poll a cqe. But it will be failed if the doorbell has
been blocked. So hardware will read a special buffer called TPTR
to get the lastest CI value when the cq is almost full.

This patch support the special CI buffer as follows:
a) Alloc the memory for TPTR in the hns_roce_tptr_init function and
   free it in hns_roce_tptr_free function, these two functions will
   be called in probe function and in the remove function.
b) Add the code for computing offset(every cq need 2 bytes) and
   write the dma addr to every cq context to notice hardware in the
   function named hns_roce_v1_write_cqc.
c) Add code for mapping TPTR buffer to user space in function named
   hns_roce_mmap. The mapping distinguish TPTR and UAR of user mode
   by vm_pgoff(0: UAR, 1: TPTR, others:invaild) in hip06.
d) Alloc the code for refreshing CQ CI using TPTR in the function
   named hns_roce_v1_poll_cq.
e) Add some variable definitions to the related structure.

Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Dongdong Huang(Donald) <hdd.huang@huawei.com>
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Salil Mehta  <salil.mehta@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_common.h |    2 -
 drivers/infiniband/hw/hns/hns_roce_cq.c     |    9 +++
 drivers/infiniband/hw/hns/hns_roce_device.h |    6 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |   79 ++++++++++++++++++++++++---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h  |    9 +++
 drivers/infiniband/hw/hns/hns_roce_main.c   |   13 ++++-
 6 files changed, 103 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 2970161..0dcb620 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -253,8 +253,6 @@
 #define ROCEE_VENDOR_ID_REG			0x0
 #define ROCEE_VENDOR_PART_ID_REG		0x4
 
-#define ROCEE_HW_VERSION_REG			0x8
-
 #define ROCEE_SYS_IMAGE_GUID_L_REG		0xC
 #define ROCEE_SYS_IMAGE_GUID_H_REG		0x10
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 0973659..5dc8d92 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -349,6 +349,15 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
 		goto err_mtt;
 	}
 
+	/*
+	 * For the QP created by kernel space, tptr value should be initialized
+	 * to zero; For the QP created by user space, it will cause synchronous
+	 * problems if tptr is set to zero here, so we initialze it in user
+	 * space.
+	 */
+	if (!context)
+		*hr_cq->tptr_addr = 0;
+
 	/* Get created cq handler and carry out event */
 	hr_cq->comp = hns_roce_ib_cq_comp;
 	hr_cq->event = hns_roce_ib_cq_event;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 3417315..7242b14 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -37,6 +37,8 @@
 
 #define DRV_NAME "hns_roce"
 
+#define HNS_ROCE_HW_VER1	('h' << 24 | 'i' << 16 | '0' << 8 | '6')
+
 #define MAC_ADDR_OCTET_NUM			6
 #define HNS_ROCE_MAX_MSG_LEN			0x80000000
 
@@ -296,7 +298,7 @@ struct hns_roce_cq {
 	u32				cq_depth;
 	u32				cons_index;
 	void __iomem			*cq_db_l;
-	void __iomem			*tptr_addr;
+	u16				*tptr_addr;
 	unsigned long			cqn;
 	u32				vector;
 	atomic_t			refcount;
@@ -553,6 +555,8 @@ struct hns_roce_dev {
 
 	int			cmd_mod;
 	int			loop_idc;
+	dma_addr_t		tptr_dma_addr; /*only for hw v1*/
+	u32			tptr_size; /*only for hw v1*/
 	struct hns_roce_hw	*hw;
 };
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 7485514..959d5ca 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -849,6 +849,45 @@ static void hns_roce_bt_free(struct hns_roce_dev *hr_dev)
 		priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
 }
 
+static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_buf_list *tptr_buf;
+	struct hns_roce_v1_priv *priv;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	tptr_buf = &priv->tptr_table.tptr_buf;
+
+	/*
+	 * This buffer will be used for CQ's tptr(tail pointer), also
+	 * named ci(customer index). Every CQ will use 2 bytes to save
+	 * cqe ci in hip06. Hardware will read this area to get new ci
+	 * when the queue is almost full.
+	 */
+	tptr_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
+					   &tptr_buf->map, GFP_KERNEL);
+	if (!tptr_buf->buf)
+		return -ENOMEM;
+
+	hr_dev->tptr_dma_addr = tptr_buf->map;
+	hr_dev->tptr_size = HNS_ROCE_V1_TPTR_BUF_SIZE;
+
+	return 0;
+}
+
+static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_buf_list *tptr_buf;
+	struct hns_roce_v1_priv *priv;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	tptr_buf = &priv->tptr_table.tptr_buf;
+
+	dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
+			  tptr_buf->buf, tptr_buf->map);
+}
+
 /**
  * hns_roce_v1_reset - reset RoCE
  * @hr_dev: RoCE device struct pointer
@@ -906,12 +945,11 @@ void hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
 	hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG));
 	hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev,
 					     ROCEE_VENDOR_PART_ID_REG));
-	hr_dev->hw_rev = le32_to_cpu(roce_read(hr_dev, ROCEE_HW_VERSION_REG));
-
 	hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev,
 					     ROCEE_SYS_IMAGE_GUID_L_REG)) |
 				((u64)le32_to_cpu(roce_read(hr_dev,
 					    ROCEE_SYS_IMAGE_GUID_H_REG)) << 32);
+	hr_dev->hw_rev		= HNS_ROCE_HW_VER1;
 
 	caps->num_qps		= HNS_ROCE_V1_MAX_QP_NUM;
 	caps->max_wqes		= HNS_ROCE_V1_MAX_WQE_NUM;
@@ -1009,8 +1047,17 @@ int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
 		goto error_failed_bt_init;
 	}
 
+	ret = hns_roce_tptr_init(hr_dev);
+	if (ret) {
+		dev_err(dev, "tptr init failed!\n");
+		goto error_failed_tptr_init;
+	}
+
 	return 0;
 
+error_failed_tptr_init:
+	hns_roce_bt_free(hr_dev);
+
 error_failed_bt_init:
 	hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
 	hns_roce_raq_free(hr_dev);
@@ -1022,6 +1069,7 @@ int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
 
 void hns_roce_v1_exit(struct hns_roce_dev *hr_dev)
 {
+	hns_roce_tptr_free(hr_dev);
 	hns_roce_bt_free(hr_dev);
 	hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
 	hns_roce_raq_free(hr_dev);
@@ -1339,14 +1387,21 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
 			   dma_addr_t dma_handle, int nent, u32 vector)
 {
 	struct hns_roce_cq_context *cq_context = NULL;
-	void __iomem *tptr_addr;
+	struct hns_roce_buf_list *tptr_buf;
+	struct hns_roce_v1_priv *priv;
+	dma_addr_t tptr_dma_addr;
+	int offset;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	tptr_buf = &priv->tptr_table.tptr_buf;
 
 	cq_context = mb_buf;
 	memset(cq_context, 0, sizeof(*cq_context));
 
-	tptr_addr = 0;
-	hr_dev->priv_addr = tptr_addr;
-	hr_cq->tptr_addr = tptr_addr;
+	/* Get the tptr for this CQ. */
+	offset = hr_cq->cqn * HNS_ROCE_V1_TPTR_ENTRY_SIZE;
+	tptr_dma_addr = tptr_buf->map + offset;
+	hr_cq->tptr_addr = (u16 *)(tptr_buf->buf + offset);
 
 	/* Register cq_context members */
 	roce_set_field(cq_context->cqc_byte_4,
@@ -1390,10 +1445,10 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
 	roce_set_field(cq_context->cqc_byte_20,
 		       CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
 		       CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
-		       (u64)tptr_addr >> 44);
+		       tptr_dma_addr >> 44);
 	cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20);
 
-	cq_context->cqe_tptr_addr_l = (u32)((u64)tptr_addr >> 12);
+	cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12);
 
 	roce_set_field(cq_context->cqc_byte_32,
 		       CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
@@ -1659,8 +1714,14 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 			break;
 	}
 
-	if (npolled)
+	if (npolled) {
+		*hr_cq->tptr_addr = hr_cq->cons_index &
+			((hr_cq->cq_depth << 1) - 1);
+
+		/* Memroy barrier */
+		wmb();
 		hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
+	}
 
 	spin_unlock_irqrestore(&hr_cq->lock, flags);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 2e1878b..6004c7f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -104,6 +104,10 @@
 
 #define HNS_ROCE_BT_RSV_BUF_SIZE			(1 << 17)
 
+#define HNS_ROCE_V1_TPTR_ENTRY_SIZE			2
+#define HNS_ROCE_V1_TPTR_BUF_SIZE	\
+	(HNS_ROCE_V1_TPTR_ENTRY_SIZE * HNS_ROCE_V1_MAX_CQ_NUM)
+
 #define HNS_ROCE_ODB_POLL_MODE				0
 
 #define HNS_ROCE_SDB_NORMAL_MODE			0
@@ -983,10 +987,15 @@ struct hns_roce_bt_table {
 	struct hns_roce_buf_list cqc_buf;
 };
 
+struct hns_roce_tptr_table {
+	struct hns_roce_buf_list tptr_buf;
+};
+
 struct hns_roce_v1_priv {
 	struct hns_roce_db_table  db_table;
 	struct hns_roce_raq_table raq_table;
 	struct hns_roce_bt_table  bt_table;
+	struct hns_roce_tptr_table tptr_table;
 };
 
 int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 764e35a..6770171 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -549,6 +549,8 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
 static int hns_roce_mmap(struct ib_ucontext *context,
 			 struct vm_area_struct *vma)
 {
+	struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
+
 	if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
 		return -EINVAL;
 
@@ -558,10 +560,15 @@ static int hns_roce_mmap(struct ib_ucontext *context,
 				       to_hr_ucontext(context)->uar.pfn,
 				       PAGE_SIZE, vma->vm_page_prot))
 			return -EAGAIN;
-
-	} else {
+	} else if (vma->vm_pgoff == 1 && hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
+		/* vm_pgoff: 1 -- TPTR */
+		if (io_remap_pfn_range(vma, vma->vm_start,
+				       hr_dev->tptr_dma_addr >> PAGE_SHIFT,
+				       hr_dev->tptr_size,
+				       vma->vm_page_prot))
+			return -EAGAIN;
+	} else
 		return -EINVAL;
-	}
 
 	return 0;
 }
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH V3 for-next 01/11] IB/hns: Add the interface for querying QP1
From: Salil Mehta @ 2016-11-23 19:40 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: salil.mehta-hv44wF8Li93QT0dZR+AlfA,
	xavier.huwei-hv44wF8Li93QT0dZR+AlfA,
	oulijun-hv44wF8Li93QT0dZR+AlfA, xushaobo2-hv44wF8Li93QT0dZR+AlfA,
	mehta.salil.lnk-Re5JQEeQqe8AvxtiuMwx3w, lijun_nudt-9Onoh4P/yGk,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linuxarm-hv44wF8Li93QT0dZR+AlfA
In-Reply-To: <20161123194109.420760-1-salil.mehta-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

From: Lijun Ou <oulijun-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

In old code, It only added the interface for querying non-specific
QP. This patch mainly adds an interface for querying QP1.

Signed-off-by: Lijun Ou <oulijun-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Reviewed-by: Wei Hu (Xavier) <xavier.huwei-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Salil Mehta  <salil.mehta-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
Change Log

Patch V2: Addressed the comment provided by Anurup M
	Link: https://patchwork.kernel.org/patch/9412855/
Patch V1: Initial Submit
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |   83 +++++++++++++++++++++++++++-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h |    6 +-
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 71232e5..7485514 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2630,8 +2630,78 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev,
 	return ret;
 }
 
-int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
-			 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+			     int qp_attr_mask,
+			     struct ib_qp_init_attr *qp_init_attr)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+	struct hns_roce_sqp_context context;
+	u32 addr;
+
+	mutex_lock(&hr_qp->mutex);
+
+	if (hr_qp->state == IB_QPS_RESET) {
+		qp_attr->qp_state = IB_QPS_RESET;
+		goto done;
+	}
+
+	addr = ROCEE_QP1C_CFG0_0_REG + 
+		hr_qp->port * sizeof(struct hns_roce_sqp_context);
+	context.qp1c_bytes_4 = roce_read(hr_dev, addr);
+	context.sq_rq_bt_l = roce_read(hr_dev, addr + 1);
+	context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2);
+	context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3);
+	context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4);
+	context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5);
+	context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6);
+	context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7);
+	context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8);
+	context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9);
+
+	hr_qp->state = roce_get_field(context.qp1c_bytes_4,
+				      QP1C_BYTES_4_QP_STATE_M,
+				      QP1C_BYTES_4_QP_STATE_S);
+	qp_attr->qp_state	= hr_qp->state;
+	qp_attr->path_mtu	= IB_MTU_256;
+	qp_attr->path_mig_state	= IB_MIG_ARMED;
+	qp_attr->qkey		= QKEY_VAL;
+	qp_attr->rq_psn		= 0;
+	qp_attr->sq_psn		= 0;
+	qp_attr->dest_qp_num	= 1;
+	qp_attr->qp_access_flags = 6;
+
+	qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20,
+					     QP1C_BYTES_20_PKEY_IDX_M,
+					     QP1C_BYTES_20_PKEY_IDX_S);
+	qp_attr->port_num = hr_qp->port + 1;
+	qp_attr->sq_draining = 0;
+	qp_attr->max_rd_atomic = 0;
+	qp_attr->max_dest_rd_atomic = 0;
+	qp_attr->min_rnr_timer = 0;
+	qp_attr->timeout = 0;
+	qp_attr->retry_cnt = 0;
+	qp_attr->rnr_retry = 0;
+	qp_attr->alt_timeout = 0;
+
+done:
+	qp_attr->cur_qp_state = qp_attr->qp_state;
+	qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
+	qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+	qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
+	qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
+	qp_attr->cap.max_inline_data = 0;
+	qp_init_attr->cap = qp_attr->cap;
+	qp_init_attr->create_flags = 0;
+
+	mutex_unlock(&hr_qp->mutex);
+
+	return 0;
+}
+
+static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+			    int qp_attr_mask,
+			    struct ib_qp_init_attr *qp_init_attr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -2767,6 +2837,15 @@ int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	return ret;
 }
 
+int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+			 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+	return hr_qp->doorbell_qpn <= 1 ?
+		hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) :
+		hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr);
+}
 static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev,
 					  struct hns_roce_qp *hr_qp,
 					  int is_user)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 539b0a3b..2e1878b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -480,13 +480,17 @@ struct hns_roce_sqp_context {
 	u32 qp1c_bytes_12;
 	u32 qp1c_bytes_16;
 	u32 qp1c_bytes_20;
-	u32 qp1c_bytes_28;
 	u32 cur_rq_wqe_ba_l;
+	u32 qp1c_bytes_28;
 	u32 qp1c_bytes_32;
 	u32 cur_sq_wqe_ba_l;
 	u32 qp1c_bytes_40;
 };
 
+#define QP1C_BYTES_4_QP_STATE_S 0
+#define QP1C_BYTES_4_QP_STATE_M   \
+	(((1UL << 3) - 1) << QP1C_BYTES_4_QP_STATE_S)
+
 #define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8
 #define QP1C_BYTES_4_SQ_WQE_SHIFT_M   \
 	(((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S)
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH V3 for-next 00/11] Code improvements & fixes for HNS RoCE driver
From: Salil Mehta @ 2016-11-23 19:40 UTC (permalink / raw)
  To: dledford
  Cc: salil.mehta, xavier.huwei, oulijun, xushaobo2, mehta.salil.lnk,
	lijun_nudt, linux-rdma, netdev, linux-kernel, linuxarm

This patchset introduces some code improvements and fixes
for the identified problems in the HNS RoCE driver.

Lijun Ou (4):
  IB/hns: Add the interface for querying QP1
  IB/hns: add self loopback for CM
  IB/hns: Modify the condition of notifying hardware loopback
  IB/hns: Fix the bug for qp state in hns_roce_v1_m_qp()

Salil Mehta (1):
  IB/hns: Fix for Checkpatch.pl comment style errors

Shaobo Xu (1):
  IB/hns: Implement the add_gid/del_gid and optimize the GIDs
    management

Wei Hu (Xavier) (5):
  IB/hns: Add code for refreshing CQ CI using TPTR
  IB/hns: Optimize the logic of allocating memory using APIs
  IB/hns: Modify the macro for the timeout when cmd process
  IB/hns: Modify query info named port_num when querying RC QP
  IB/hns: Change qpn allocation to round-robin mode.

 drivers/infiniband/hw/hns/hns_roce_alloc.c  |   11 +-
 drivers/infiniband/hw/hns/hns_roce_cmd.c    |    8 +-
 drivers/infiniband/hw/hns/hns_roce_cmd.h    |    7 +-
 drivers/infiniband/hw/hns/hns_roce_common.h |    2 -
 drivers/infiniband/hw/hns/hns_roce_cq.c     |   17 +-
 drivers/infiniband/hw/hns/hns_roce_device.h |   45 ++--
 drivers/infiniband/hw/hns/hns_roce_eq.c     |    6 +-
 drivers/infiniband/hw/hns/hns_roce_hem.c    |    6 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |  267 +++++++++++++++++------
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h  |   17 +-
 drivers/infiniband/hw/hns/hns_roce_main.c   |  311 +++++++--------------------
 drivers/infiniband/hw/hns/hns_roce_mr.c     |   22 +-
 drivers/infiniband/hw/hns/hns_roce_pd.c     |    5 +-
 drivers/infiniband/hw/hns/hns_roce_qp.c     |    2 +-
 14 files changed, 364 insertions(+), 362 deletions(-)

-- 
1.7.9.5

^ permalink raw reply

* Re: [PATCH net 1/2] r8152: fix the sw rx checksum is unavailable
From: Mark Lord @ 2016-11-23 19:29 UTC (permalink / raw)
  To: Hayes Wang, netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
  Cc: nic_swsd, linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-usb-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <0835B3720019904CB8F7AA43166CEEB201051D51-JIZ+AM9kKNzuvTFwvkocLypo8c9IxeqyAjHCUHv49ws@public.gmane.org>

On 16-11-23 10:12 AM, Hayes Wang wrote:
> Mark Lord [mlord-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org]
> [...]
>> What does this code do:
>
>>> static void r8153_set_rx_early_size(struct r8152 *tp)
>>> {
>>>        u32 mtu = tp->netdev->mtu;
>>>        u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 4;
>>>
>>>        ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
>>> }
>
> This only works for RTL8153. However, what you use is RTL8152.
> It is like delay completion. It is used to reduce the loading of CPU
> by letting a transfer contain more data to reduce the number of
> transfers.
>
>> How is ocp_data used by the hardware?
>> Shouldn't the calculation also include sizeof(rx_desc) in there somewhere?
>
> The algorithm is from our hw engineers, and it should be
>
>    (agg_buf_sz - packet size) / 8
>
> You could refer to commit a59e6d815226 ("r8152: correct the rx early size").

Thanks.

Right now I am working quite hard trying to narrow things down exactly.
You are correct that the driver does appear to be careful about accesses
beyond the filled portion of a URB buffer -- for some reason I thought
the original driver had issues there, but looking again it does not seem to.

One idea that is now looking more likely:
Things could be suffering from speculative CPU accesses to RAM
(the system here has non-coherent d-cache/RAM).
This could incorrectly pre-load data from adjacent URB buffers
into the d-cache, creating coherency issues.  I am testing now
with cacheline-sized guard zones between the buffers to see if
that is the issue or not.

Worth repeating: other dongles we have tried, eg. those using the asix driver,
do not cause us any troubles here.  Only the r8152 dongles do.

The other drivers do not use hardware checksums, so even if they did
incur similar bad packets, whatever the reason, those bad packets
would be detected/rejected by the Linux network stack (software checksums).
So everything appears to behave fine with them, as it does with
the r8152 driver when hardware checksums are disabled.

Still trying to understand exactly how these errors are happening.
It takes a very long time to do a conclusive test of anything here,
and I only have the hardware for a day or two a week.
So my apologies if I am slow in getting back to you on stuff.

Cheers



--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next] net/sched: cls_flower: verify root pointer before dereferncing it
From: Cong Wang @ 2016-11-23 19:29 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: John Fastabend, Jiri Pirko, Roi Dayan, David S. Miller,
	Linux Kernel Network Developers, Jiri Pirko, Or Gerlitz,
	Cong Wang
In-Reply-To: <58357D7E.3010100@iogearbox.net>

On Wed, Nov 23, 2016 at 3:29 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> Can't we drop the 'force' parameter from tcf_destroy() and related cls
> destroy() callbacks, and change the logic roughly like this:
>
> [...]
>         case RTM_DELTFILTER:
>                 err = tp->ops->delete(tp, fh, &drop_tp);
>                 if (err == 0) {
>                         struct tcf_proto *next = rtnl_dereference(tp->next);
>
>                         tfilter_notify(net, skb, n, tp,
>                                        t->tcm_handle,
>                                        RTM_DELTFILTER, false);
>                         if (drop_tp) {
>                                 RCU_INIT_POINTER(*back, next);
>                                 tcf_destroy(tp);
>                         }
>                 }
>                 goto errout;
> [...]
>
> This one was the only tcf_destroy() instance with force=false. Why can't
> the prior delete() callback make the decision whether the tp now has no
> further internal filters and thus can be dropped. Afaik, delete() and
> destroy() are protected by RTNL anyway. Thus, we could unlink the tp from
> the list before tcf_destroy(), which should then work with grace period
> as well. Given we remove the setting of tp->root to NULL, any outstanding
> readers for that grace period should either still execute the 'scheduled
> for removal' filter we just dropped, or find an empty list of filters.

This is exactly why I said "the semantic of ->destroy() needs to revise too",
this is a reasonable revision of course, but the change is still large because
we need to move that logic from ->destroy() to ->delete(). I was trying to find
a relatively small fix for -net and -stable, for -net-next we could do
aggressive
change as long as it's necessary. This is why I am still thinking about it,
perhaps there is no quick fix for this bug.


>
>> Hmm, perhaps we really have to switch to a doubly-linked list, that is
>> list_head. I need to double check. And also the semantic of ->destroy()
>> needs to revise too.
>
>
> Can you elaborate why double-linked list? Isn't the tp list always protected
> from modifications via RTNL in control path, and walked via
> rcu_dereference_bh()
> in data path?

At least two benefits we can get from using doubly-linked list:

1) No need to pass a 'prev' pointer if we want to remove tp in a RCU callback,
list_del_rcu(&tp->head) is just enough.

2) No need to worry about RCU pointers because list_head has RCU API's
already, much more readable to me.

Of course, the size of struct tcf_proto will grow a bit, but it doesn't seem to
be a problem.

^ permalink raw reply

* Re: [PATCH net-next 1/2] openvswitch: Add a missing break statement.
From: Pravin Shelar @ 2016-11-23 19:23 UTC (permalink / raw)
  To: Jarno Rajahalme; +Cc: Linux Kernel Network Developers
In-Reply-To: <1479874174-75329-1-git-send-email-jarno@ovn.org>

On Tue, Nov 22, 2016 at 8:09 PM, Jarno Rajahalme <jarno@ovn.org> wrote:
> Add a break statement to prevent fall-through from
> OVS_KEY_ATTR_ETHERNET to OVS_KEY_ATTR_TUNNEL.  Without the break
> actions setting ethernet addresses fail to validate with log messages
> complaining about invalid tunnel attributes.
>
> Fixes: 0a6410fbde ("openvswitch: netlink: support L3 packets")
> Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
> ---
>  net/openvswitch/flow_netlink.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index d19044f..c87d359 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -2195,6 +2195,7 @@ static int validate_set(const struct nlattr *a,
>         case OVS_KEY_ATTR_ETHERNET:
>                 if (mac_proto != MAC_PROTO_ETHERNET)
>                         return -EINVAL;
> +               break;
>
>         case OVS_KEY_ATTR_TUNNEL:
>                 if (masked)

Thanks for tracking it down.

Acked-by: Pravin B Shelar <pshelar@ovn.org>

^ permalink raw reply

* Re: [patch net-next v2 10/11] mlxsw: spectrum_router: Request a dump of FIB tables during init
From: Ido Schimmel @ 2016-11-23 19:22 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Jiri Pirko, netdev, davem, idosch, eladr, yotamg, nogahf, arkadis,
	ogerlitz, roopa, dsa, nikolay, andy, vivien.didelot, andrew,
	f.fainelli, alexander.h.duyck, kaber
In-Reply-To: <1479920903.4037682.797206209.599A8C7A@webmail.messagingengine.com>

On Wed, Nov 23, 2016 at 06:08:23PM +0100, Hannes Frederic Sowa wrote:
> On Wed, Nov 23, 2016, at 18:04, Jiri Pirko wrote:
> > >Sure, but an abort function can be provided to the kernel anyway and the
> > >driver can care about that.
> > 
> > Ok, how?
> 
> I think just a sysctl ontop of this series is enough plus a pr_warn.
> Rocker and mlxsw are responsible to loop for a maximum amount of time.

Maybe, when the module requests a dump it can also provide a callback
that is invoked following each failed dump?

^ permalink raw reply

* Re: [PATCH 2/2] net: dsa: mv88e6xxx: enable EDSA on mv88e6097
From: Vivien Didelot @ 2016-11-23 18:21 UTC (permalink / raw)
  To: Stefan Eichenberger, andrew, davem; +Cc: netdev, Stefan Eichenberger
In-Reply-To: <20161123175546.31416-3-stefan.eichenberger@netmodule.com>

Hi Stefan,

Stefan Eichenberger <eichest@gmail.com> writes:

> EDSA is currently disabled on mv88e6097 devices, this commit enables it.
>
> Signed-off-by: Stefan Eichenberger <stefan.eichenberger@netmodule.com>

Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>

(you can include our Reviewed-by tags directly in the commit message of
this patch for v3, right under your Signed-off-by tag.)

Thanks,

        Vivien

^ permalink raw reply

* Re: [PATCH v2] net: dsa: mv88e6xxx: forward unknown mc packets on mv88e6097
From: Vivien Didelot @ 2016-11-23 18:18 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: Stefan Eichenberger, f.fainelli, netdev
In-Reply-To: <20161123180125.GF8760@lunn.ch>

Hi Andrew,

Andrew Lunn <andrew@lunn.ch> writes:

> On Wed, Nov 23, 2016 at 12:52:52PM -0500, Vivien Didelot wrote:
>> Hi Andrew,
>> 
>> Andrew Lunn <andrew@lunn.ch> writes:
>> 
>> > And if you have a recent version of tcpdump, it will decode
>> > the header.
>> 
>> Since d729eb4, thanks to you Andrew ;-)
>> 
>> I move up the cleanup of ports setup in my priority list.
>
> Hi Vivien
>
> Please take a look at my mv88e6390 branch. I already refactored this
> code, because the mv88e6390 does something slightly different...
>
> I hope to post another batch of mv88e6390 patches soon, and they will
> include this cleanup. Since they will clash with these patches, i will
> post them first as RFC.

Perfect. Please split an RFC only including this cleanup if
possible. Fewer patches will be easier to review, since the first port
registers differs a lot.

Thanks,

        Vivien

^ permalink raw reply

* [PATCH 2/2] net: dsa: mv88e6xxx: enable EDSA on mv88e6097
From: Stefan Eichenberger @ 2016-11-23 17:55 UTC (permalink / raw)
  To: andrew, vivien.didelot, davem; +Cc: netdev, Stefan Eichenberger
In-Reply-To: <20161123175546.31416-1-stefan.eichenberger@netmodule.com>

EDSA is currently disabled on mv88e6097 devices, this commit enables it.

Signed-off-by: Stefan Eichenberger <stefan.eichenberger@netmodule.com>
---
 drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index ab52c37..a2ff1fc 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -543,7 +543,8 @@ enum mv88e6xxx_cap {
 	 MV88E6XXX_FLAGS_MULTI_CHIP)
 
 #define MV88E6XXX_FLAGS_FAMILY_6097	\
-	(MV88E6XXX_FLAG_G1_ATU_FID |	\
+	(MV88E6XXX_FLAG_EDSA |		\
+	 MV88E6XXX_FLAG_G1_ATU_FID |	\
 	 MV88E6XXX_FLAG_G1_VTU_FID |	\
 	 MV88E6XXX_FLAG_GLOBAL2 |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
-- 
2.9.3

^ permalink raw reply related

* Re: [PATCH net-next 4/5] net: phy: bcm7xxx: Add support for downshift/Wirespeed
From: Florian Fainelli @ 2016-11-23 18:16 UTC (permalink / raw)
  To: Andrew Lunn, Allan W. Nielsen
  Cc: netdev, davem, bcm-kernel-feedback-list, raju.lakkaraju,
	vivien.didelot
In-Reply-To: <20161123144636.GK14947@lunn.ch>

On 11/23/2016 06:46 AM, Andrew Lunn wrote:
>>>> Maybe we should think about this locking a bit. It is normal for the
>>>> lock to be held when using ops in the phy driver structure. The
>>>> exception is suspend/resume. Maybe we should also take the lock before
>>>> calling the phydev->drv->get_tunable() and phydev->drv->set_tunable()?
>>>
>>> Yes, that certainly seems like a good approach to me, let me cook a
>>> patch doing that.
>>
>> Just for my understanding (such that I will not make the same mistake again)...
>>
>> Why is it that phy functions such as get_wol needs to take the phy_lock and
>> others like get_tunable does not.
>>
>> I do understand the arguments on why the lock should be held by the caller of
>> get_tunable, but I do not understand why the same argument does not apply for
>> get_wol.
> 
> Hi Allan
> 
> phy_ethtool_get_wol and friends probably should take the
> phy_lock. This inconsistency is probably leading to locking
> bugs. e.g. at803x_set_wol() does a read-modify-write, and does not
> take the lock.
> 
> There is no comment in the patch adding phy_ethtool_set_wol() to say
> why the lock is not taken, and a quick look at the code does not
> suggest a reason why it could not be taken/released by
> phy_ethtool_set_wol().

Yes, this should happen. I don't see how we cannot have two user-space
processes not racing with each other here for instance, see
mv643xx_eth_get_wol and cpsw_get_wol.

> 
> I think it would be a good idea to change this.
> 
> phy_suspend()/phy_resume() might have good reasons to avoid the lock,
> i've no idea how it is supposed to work. Is there a danger something
> else is holding the lock and has already been suspended? I guess not,
> otherwise there is little hope suspend would work at all.

phy_suspend() and phy_resume() usually get called after phy_disconnect()
or phy_stop() have been invoked, and even then this is during the
Ethernet driver's suspend resume/resume path, so there is no room for
concurrency to occur (user space is quiesced, and the PHY state machine
is stopped/halted), but still, if we were to change the calling context
it would be a good idea to acquire phydev->lock.
-- 
Florian

^ permalink raw reply

* Re: [PATCH net-next] net: properly flush delay-freed skbs
From: Jesper Dangaard Brouer @ 2016-11-23 18:11 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Eric Dumazet, David Miller, netdev, Alexander Duyck, brouer
In-Reply-To: <CAKgT0UerbiZ8cWk3hkVO2SckxBgiG3zsLn_P05jM=-Cr6B3qAg@mail.gmail.com>

On Wed, 23 Nov 2016 09:12:50 -0800
Alexander Duyck <alexander.duyck@gmail.com> wrote:

> On Wed, Nov 23, 2016 at 8:44 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > From: Eric Dumazet <edumazet@google.com>
> >
> > Typical NAPI drivers use napi_consume_skb(skb) at TX completion time.
> > This put skb in a percpu special queue, napi_alloc_cache, to get bulk
> > frees.
> >
> > It turns out the queue is not flushed and hits the NAPI_SKB_CACHE_SIZE
> > limit quite often, with skbs that were queued hundreds of usec earlier.
> > I measured this can take ~6000 nsec to perform one flush.
> >
> > __kfree_skb_flush() can be called from two points right now :
> >
> > 1) From net_tx_action(), but only for skbs that were queued to
> > sd->completion_queue.
> >  
> >  -> Irrelevant for NAPI drivers in normal operation.  
> >
> > 2) From net_rx_action(), but only under high stress or if RPS/RFS has a
> > pending action.
> >
> > This patch changes net_rx_action() to perform the flush in all cases and
> > after more urgent operations happened (like kicking remote CPUS for
> > RPS/RFS).
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > Cc: Jesper Dangaard Brouer <brouer@redhat.com>
> > Cc: Alexander Duyck <alexander.h.duyck@intel.com>
> > ---  
> 
> Yeah, we didn't intent the data to be sitting around that long.  The
> change looks good to me.
> 
> Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>

Also looks good to me! Thanks for catching this.

Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  Author of http://www.iptv-analyzer.org
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH 2/2] net: dsa: mv88e6xxx: enable EDSA on mv88e6097
From: Andrew Lunn @ 2016-11-23 18:10 UTC (permalink / raw)
  To: Stefan Eichenberger; +Cc: vivien.didelot, davem, netdev, Stefan Eichenberger
In-Reply-To: <20161123175546.31416-3-stefan.eichenberger@netmodule.com>

On Wed, Nov 23, 2016 at 06:55:46PM +0100, Stefan Eichenberger wrote:
> EDSA is currently disabled on mv88e6097 devices, this commit enables it.
> 
> Signed-off-by: Stefan Eichenberger <stefan.eichenberger@netmodule.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* Re: [PATCH 1/2] net: dsa: mv88e6xxx: add MV88E6097 switch
From: Andrew Lunn @ 2016-11-23 18:10 UTC (permalink / raw)
  To: Stefan Eichenberger; +Cc: vivien.didelot, davem, netdev, Stefan Eichenberger
In-Reply-To: <20161123175546.31416-2-stefan.eichenberger@netmodule.com>

> +	[MV88E6097] = {
> +		.prod_num = PORT_SWITCH_ID_PROD_NUM_6097,
> +		.family = MV88E6XXX_FAMILY_6097,
> +		.name = "Marvell 88E6097/88E6097F",
> +		.num_databases = 4096,
> +		.num_ports = 11,
> +		.port_base_addr = 0x10,
> +		.global1_addr = 0x1b,
> +		.age_time_coeff = 15000,
> +		.flags = MV88E6XXX_FLAGS_FAMILY_6097,
> +		.ops = &mv88e6097_ops,

Upps. Sorry, i missed something when you rebased onto net-next. You
are missing .g1_irqs = . It is probably 9. You can check the
datasheet, global 1, register 0. If bit 8 is AVBInt, you need 9. If
bit 8 is reserved, then 8.

    Andrew

^ permalink raw reply

* Re: [PATCH net-next 0/2] Add support for the MV88e6097
From: Vivien Didelot @ 2016-11-23 18:09 UTC (permalink / raw)
  To: Stefan Eichenberger, andrew, davem; +Cc: netdev, Stefan Eichenberger
In-Reply-To: <20161123175546.31416-1-stefan.eichenberger@netmodule.com>

Hi Stefan,

Stefan Eichenberger <eichest@gmail.com> writes:

> This patchset will add support for the MV88E6097 DSA switch and enable
> EDSA on MV88E6097 family devices.
>
> Stefan Eichenberger (2):
>   net: dsa: mv88e6xxx: add MV88E6097 switch
>   net: dsa: mv88e6xxx: enable EDSA on mv88e6097
>
>  drivers/net/dsa/mv88e6xxx/chip.c      | 26 ++++++++++++++++++++++++++
>  drivers/net/dsa/mv88e6xxx/mv88e6xxx.h |  5 ++++-
>  2 files changed, 30 insertions(+), 1 deletion(-)

Ideally I'd put 2/2 first, because right after 1/2 your switch won't
work as expected.

Thanks,

        Vivien

^ permalink raw reply

* Re: [PATCH 12/20] net/iucv: Convert to hotplug state machine
From: Ursula Braun @ 2016-11-23 18:04 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, linux-kernel
  Cc: rt, David S. Miller, linux-s390, netdev
In-Reply-To: <20161117183541.8588-13-bigeasy@linutronix.de>

Sebastian,

your patch looks good to me. I run successfully some small tests with it.
I want to suggest a small change in iucv_init() to keep the uniform technique
of undo labels below. Do you agree?

Kind regards, Ursula

On 11/17/2016 07:35 PM, Sebastian Andrzej Siewior wrote:
> Install the callbacks via the state machine and let the core invoke the
> callbacks on the already online CPUs. The smp function calls in the
> online/downprep callbacks are not required as the callback is guaranteed to
> be invoked on the upcoming/outgoing cpu.
> 
> Cc: Ursula Braun <ubraun@linux.vnet.ibm.com>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: linux-s390@vger.kernel.org
> Cc: netdev@vger.kernel.org
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  include/linux/cpuhotplug.h |   1 +
>  net/iucv/iucv.c            | 118 +++++++++++++++++----------------------------
>  2 files changed, 45 insertions(+), 74 deletions(-)
> 
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index fd5598b8353a..69abf2c09f6c 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -63,6 +63,7 @@ enum cpuhp_state {
>  	CPUHP_X86_THERM_PREPARE,
>  	CPUHP_X86_CPUID_PREPARE,
>  	CPUHP_X86_MSR_PREPARE,
> +	CPUHP_NET_IUCV_PREPARE,
>  	CPUHP_TIMERS_DEAD,
>  	CPUHP_NOTF_ERR_INJ_PREPARE,
>  	CPUHP_MIPS_SOC_PREPARE,
> diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
> index 88a2a3ba4212..f0d6afc5d4a9 100644
> --- a/net/iucv/iucv.c
> +++ b/net/iucv/iucv.c
> @@ -639,7 +639,7 @@ static void iucv_disable(void)
>  	put_online_cpus();
>  }
>  
> -static void free_iucv_data(int cpu)
> +static int iucv_cpu_dead(unsigned int cpu)
>  {
>  	kfree(iucv_param_irq[cpu]);
>  	iucv_param_irq[cpu] = NULL;
> @@ -647,9 +647,10 @@ static void free_iucv_data(int cpu)
>  	iucv_param[cpu] = NULL;
>  	kfree(iucv_irq_data[cpu]);
>  	iucv_irq_data[cpu] = NULL;
> +	return 0;
>  }
>  
> -static int alloc_iucv_data(int cpu)
> +static int iucv_cpu_prepare(unsigned int cpu)
>  {
>  	/* Note: GFP_DMA used to get memory below 2G */
>  	iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
> @@ -671,58 +672,38 @@ static int alloc_iucv_data(int cpu)
>  	return 0;
>  
>  out_free:
> -	free_iucv_data(cpu);
> +	iucv_cpu_dead(cpu);
>  	return -ENOMEM;
>  }
>  
> -static int iucv_cpu_notify(struct notifier_block *self,
> -				     unsigned long action, void *hcpu)
> +static int iucv_cpu_online(unsigned int cpu)
>  {
> -	cpumask_t cpumask;
> -	long cpu = (long) hcpu;
> -
> -	switch (action) {
> -	case CPU_UP_PREPARE:
> -	case CPU_UP_PREPARE_FROZEN:
> -		if (alloc_iucv_data(cpu))
> -			return notifier_from_errno(-ENOMEM);
> -		break;
> -	case CPU_UP_CANCELED:
> -	case CPU_UP_CANCELED_FROZEN:
> -	case CPU_DEAD:
> -	case CPU_DEAD_FROZEN:
> -		free_iucv_data(cpu);
> -		break;
> -	case CPU_ONLINE:
> -	case CPU_ONLINE_FROZEN:
> -	case CPU_DOWN_FAILED:
> -	case CPU_DOWN_FAILED_FROZEN:
> -		if (!iucv_path_table)
> -			break;
> -		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
> -		break;
> -	case CPU_DOWN_PREPARE:
> -	case CPU_DOWN_PREPARE_FROZEN:
> -		if (!iucv_path_table)
> -			break;
> -		cpumask_copy(&cpumask, &iucv_buffer_cpumask);
> -		cpumask_clear_cpu(cpu, &cpumask);
> -		if (cpumask_empty(&cpumask))
> -			/* Can't offline last IUCV enabled cpu. */
> -			return notifier_from_errno(-EINVAL);
> -		smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
> -		if (cpumask_empty(&iucv_irq_cpumask))
> -			smp_call_function_single(
> -				cpumask_first(&iucv_buffer_cpumask),
> -				iucv_allow_cpu, NULL, 1);
> -		break;
> -	}
> -	return NOTIFY_OK;
> +	if (!iucv_path_table)
> +		return 0;
> +	iucv_declare_cpu(NULL);
> +	return 0;
>  }
>  
> -static struct notifier_block __refdata iucv_cpu_notifier = {
> -	.notifier_call = iucv_cpu_notify,
> -};
> +static int iucv_cpu_down_prep(unsigned int cpu)
> +{
> +	cpumask_t cpumask;
> +
> +	if (!iucv_path_table)
> +		return 0;
> +
> +	cpumask_copy(&cpumask, &iucv_buffer_cpumask);
> +	cpumask_clear_cpu(cpu, &cpumask);
> +	if (cpumask_empty(&cpumask))
> +		/* Can't offline last IUCV enabled cpu. */
> +		return -EINVAL;
> +
> +	iucv_retrieve_cpu(NULL);
> +	if (!cpumask_empty(&iucv_irq_cpumask))
> +		return 0;
> +	smp_call_function_single(cpumask_first(&iucv_buffer_cpumask),
> +				 iucv_allow_cpu, NULL, 1);
> +	return 0;
> +}
>  
>  /**
>   * iucv_sever_pathid
> @@ -2027,6 +2008,7 @@ struct iucv_interface iucv_if = {
>  };
>  EXPORT_SYMBOL(iucv_if);
>  
> +static enum cpuhp_state iucv_online;
>  /**
>   * iucv_init
>   *
> @@ -2035,7 +2017,6 @@ EXPORT_SYMBOL(iucv_if);
>  static int __init iucv_init(void)
>  {
>  	int rc;
> -	int cpu;
>  
>  	if (!MACHINE_IS_VM) {
>  		rc = -EPROTONOSUPPORT;
> @@ -2054,23 +2035,19 @@ static int __init iucv_init(void)
>  		goto out_int;
>  	}
>  
> -	cpu_notifier_register_begin();
> -
> -	for_each_online_cpu(cpu) {
> -		if (alloc_iucv_data(cpu)) {
> -			rc = -ENOMEM;
> -			goto out_free;
> -		}
> -	}
> -	rc = __register_hotcpu_notifier(&iucv_cpu_notifier);
> +	rc = cpuhp_setup_state(CPUHP_NET_IUCV_PREPARE, "net/iucv:prepare",
> +			       iucv_cpu_prepare, iucv_cpu_dead);
>  	if (rc)
>  		goto out_free;
> -
> -	cpu_notifier_register_done();
> +	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "net/iucv:online",
> +			       iucv_cpu_online, iucv_cpu_down_prep);
> +	if (rc < 0)
> +		goto out_free;
> +	iucv_online = rc;
>  
>  	rc = register_reboot_notifier(&iucv_reboot_notifier);
>  	if (rc)
> -		goto out_cpu;
> +		goto out_free;
>  	ASCEBC(iucv_error_no_listener, 16);
>  	ASCEBC(iucv_error_no_memory, 16);
>  	ASCEBC(iucv_error_pathid, 16);
> @@ -2084,14 +2061,10 @@ static int __init iucv_init(void)
>  
>  out_reboot:
>  	unregister_reboot_notifier(&iucv_reboot_notifier);
> -out_cpu:
> -	cpu_notifier_register_begin();
> -	__unregister_hotcpu_notifier(&iucv_cpu_notifier);
>  out_free:
> -	for_each_possible_cpu(cpu)
> -		free_iucv_data(cpu);
> -
> -	cpu_notifier_register_done();
> +	if (iucv_online)
> +		cpuhp_remove_state(iucv_online);
> +	cpuhp_remove_state(CPUHP_NET_IUCV_PREPARE);
>  
>  	root_device_unregister(iucv_root);
>  out_int:
I prefer to keep the technique of cascaded undo labels here, like this:
@@ -2054,23 +2035,19 @@ static int __init iucv_init(void)
                goto out_int;
        }
 
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu) {
-               if (alloc_iucv_data(cpu)) {
-                       rc = -ENOMEM;
-                       goto out_free;
-               }
-       }
-       rc = __register_hotcpu_notifier(&iucv_cpu_notifier);
+       rc = cpuhp_setup_state(CPUHP_NET_IUCV_PREPARE, "net/iucv:prepare",
+                              iucv_cpu_prepare, iucv_cpu_dead);
        if (rc)
-               goto out_free;
-
-       cpu_notifier_register_done();
+               goto out_dev;
+       rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "net/iucv:online",
+                              iucv_cpu_online, iucv_cpu_down_prep);
+       if (rc < 0)
+               goto out_prep;
+       iucv_online = rc;
 
        rc = register_reboot_notifier(&iucv_reboot_notifier);
        if (rc)
-               goto out_cpu;
+               goto out_remove;
        ASCEBC(iucv_error_no_listener, 16);
        ASCEBC(iucv_error_no_memory, 16);
        ASCEBC(iucv_error_pathid, 16);
@@ -2084,15 +2061,12 @@ static int __init iucv_init(void)
 
 out_reboot:
        unregister_reboot_notifier(&iucv_reboot_notifier);
-out_cpu:
-       cpu_notifier_register_begin();
-       __unregister_hotcpu_notifier(&iucv_cpu_notifier);
-out_free:
-       for_each_possible_cpu(cpu)
-               free_iucv_data(cpu);
-
-       cpu_notifier_register_done();
-
+out_remove:
+       if (iucv_online)
+               cpuhp_remove_state(iucv_online);
+out_prep:
+       cpuhp_remove_state(CPUHP_NET_IUCV_PREPARE);
+out_dev:
        root_device_unregister(iucv_root);
 out_int:
        unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);

> @@ -2110,7 +2083,6 @@ static int __init iucv_init(void)
>  static void __exit iucv_exit(void)
>  {
>  	struct iucv_irq_list *p, *n;
> -	int cpu;
>  
>  	spin_lock_irq(&iucv_queue_lock);
>  	list_for_each_entry_safe(p, n, &iucv_task_queue, list)
> @@ -2119,11 +2091,9 @@ static void __exit iucv_exit(void)
>  		kfree(p);
>  	spin_unlock_irq(&iucv_queue_lock);
>  	unregister_reboot_notifier(&iucv_reboot_notifier);
> -	cpu_notifier_register_begin();
> -	__unregister_hotcpu_notifier(&iucv_cpu_notifier);
> -	for_each_possible_cpu(cpu)
> -		free_iucv_data(cpu);
> -	cpu_notifier_register_done();
> +
> +	cpuhp_remove_state_nocalls(iucv_online);
> +	cpuhp_remove_state(CPUHP_NET_IUCV_PREPARE);
>  	root_device_unregister(iucv_root);
>  	bus_unregister(&iucv_bus);
>  	unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
> 

^ permalink raw reply

* Re: [PATCH v2] net: dsa: mv88e6xxx: forward unknown mc packets on mv88e6097
From: Andrew Lunn @ 2016-11-23 18:01 UTC (permalink / raw)
  To: Vivien Didelot; +Cc: Stefan Eichenberger, f.fainelli, netdev
In-Reply-To: <877f7uxevf.fsf@ketchup.i-did-not-set--mail-host-address--so-tickle-me>

On Wed, Nov 23, 2016 at 12:52:52PM -0500, Vivien Didelot wrote:
> Hi Andrew,
> 
> Andrew Lunn <andrew@lunn.ch> writes:
> 
> > And if you have a recent version of tcpdump, it will decode
> > the header.
> 
> Since d729eb4, thanks to you Andrew ;-)
> 
> I move up the cleanup of ports setup in my priority list.

Hi Vivien

Please take a look at my mv88e6390 branch. I already refactored this
code, because the mv88e6390 does something slightly different...

I hope to post another batch of mv88e6390 patches soon, and they will
include this cleanup. Since they will clash with these patches, i will
post them first as RFC.

      Andrew

^ permalink raw reply

* [PATCH 1/2] net: dsa: mv88e6xxx: add MV88E6097 switch
From: Stefan Eichenberger @ 2016-11-23 17:55 UTC (permalink / raw)
  To: andrew, vivien.didelot, davem; +Cc: netdev, Stefan Eichenberger
In-Reply-To: <20161123175546.31416-1-stefan.eichenberger@netmodule.com>

Add support for the MV88E6097 switch. The change was tested on an Armada
based platform with a MV88E6097 switch.

Signed-off-by: Stefan Eichenberger <stefan.eichenberger@netmodule.com>
---
 drivers/net/dsa/mv88e6xxx/chip.c      | 26 ++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/mv88e6xxx.h |  2 ++
 2 files changed, 28 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index bada646..b14b3d5 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3209,6 +3209,19 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
 	.stats_get_stats = mv88e6095_stats_get_stats,
 };
 
+static const struct mv88e6xxx_ops mv88e6097_ops = {
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+	.port_set_link = mv88e6xxx_port_set_link,
+	.port_set_duplex = mv88e6xxx_port_set_duplex,
+	.port_set_speed = mv88e6185_port_set_speed,
+	.stats_snapshot = mv88e6xxx_g1_stats_snapshot,
+	.stats_get_sset_count = mv88e6095_stats_get_sset_count,
+	.stats_get_strings = mv88e6095_stats_get_strings,
+	.stats_get_stats = mv88e6095_stats_get_stats,
+};
+
 static const struct mv88e6xxx_ops mv88e6123_ops = {
 	/* MV88E6XXX_FAMILY_6165 */
 	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
@@ -3580,6 +3593,19 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.ops = &mv88e6095_ops,
 	},
 
+	[MV88E6097] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6097,
+		.family = MV88E6XXX_FAMILY_6097,
+		.name = "Marvell 88E6097/88E6097F",
+		.num_databases = 4096,
+		.num_ports = 11,
+		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
+		.age_time_coeff = 15000,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6097,
+		.ops = &mv88e6097_ops,
+	},
+
 	[MV88E6123] = {
 		.prod_num = PORT_SWITCH_ID_PROD_NUM_6123,
 		.family = MV88E6XXX_FAMILY_6165,
diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index 9298faa..ab52c37 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -81,6 +81,7 @@
 #define PORT_SWITCH_ID		0x03
 #define PORT_SWITCH_ID_PROD_NUM_6085	0x04a
 #define PORT_SWITCH_ID_PROD_NUM_6095	0x095
+#define PORT_SWITCH_ID_PROD_NUM_6097	0x099
 #define PORT_SWITCH_ID_PROD_NUM_6131	0x106
 #define PORT_SWITCH_ID_PROD_NUM_6320	0x115
 #define PORT_SWITCH_ID_PROD_NUM_6123	0x121
@@ -378,6 +379,7 @@
 enum mv88e6xxx_model {
 	MV88E6085,
 	MV88E6095,
+	MV88E6097,
 	MV88E6123,
 	MV88E6131,
 	MV88E6161,
-- 
2.9.3

^ permalink raw reply related

* [PATCH net-next 0/2] Add support for the MV88e6097
From: Stefan Eichenberger @ 2016-11-23 17:55 UTC (permalink / raw)
  To: andrew, vivien.didelot, davem; +Cc: netdev, Stefan Eichenberger

This patchset will add support for the MV88E6097 DSA switch and enable
EDSA on MV88E6097 family devices.

Stefan Eichenberger (2):
  net: dsa: mv88e6xxx: add MV88E6097 switch
  net: dsa: mv88e6xxx: enable EDSA on mv88e6097

 drivers/net/dsa/mv88e6xxx/chip.c      | 26 ++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/mv88e6xxx.h |  5 ++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

-- 
2.9.3

^ permalink raw reply

* Re: [PATCH v2] net: dsa: mv88e6xxx: forward unknown mc packets on mv88e6097
From: Vivien Didelot @ 2016-11-23 17:52 UTC (permalink / raw)
  To: Andrew Lunn, Stefan Eichenberger; +Cc: Stefan Eichenberger, f.fainelli, netdev
In-Reply-To: <20161123174040.GE8760@lunn.ch>

Hi Andrew,

Andrew Lunn <andrew@lunn.ch> writes:

> And if you have a recent version of tcpdump, it will decode
> the header.

Since d729eb4, thanks to you Andrew ;-)

I move up the cleanup of ports setup in my priority list. The code is
quite cluttered at the moment and it's hard to read through it. We need
proper helpers for egress floods, (E)DSA setup, etc. like what is being
done for the other devices.

Thanks,

        Vivien

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox