Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 02/13] net: stmmac: xgmac: Add RBU handling in DMA interrupt
From: Jose Abreu @ 2019-09-02  8:01 UTC (permalink / raw)
  To: netdev
  Cc: Joao Pinto, Jose Abreu, Giuseppe Cavallaro, Alexandre Torgue,
	David S. Miller, Maxime Coquelin, linux-stm32, linux-arm-kernel,
	linux-kernel
In-Reply-To: <cover.1567410970.git.joabreu@synopsys.com>

Add the handling of Receive Buffer Unavailable interrupt in the DMA
handler of XGMAC cores.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>

---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 64956465c030..e77eb0ddf9b5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -322,6 +322,10 @@ static int dwxgmac2_dma_interrupt(void __iomem *ioaddr,
 
 	/* ABNORMAL interrupts */
 	if (unlikely(intr_status & XGMAC_AIS)) {
+		if (unlikely(intr_status & XGMAC_RBU)) {
+			x->rx_buf_unav_irq++;
+			ret |= handle_rx;
+		}
 		if (unlikely(intr_status & XGMAC_TPS)) {
 			x->tx_process_stopped_irq++;
 			ret |= tx_hard_error;
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 04/13] net: stmmac: Implement L3/L4 Filters using TC Flower
From: Jose Abreu @ 2019-09-02  8:01 UTC (permalink / raw)
  To: netdev
  Cc: Joao Pinto, Jose Abreu, Giuseppe Cavallaro, Alexandre Torgue,
	David S. Miller, Maxime Coquelin, linux-stm32, linux-arm-kernel,
	linux-kernel
In-Reply-To: <cover.1567410970.git.joabreu@synopsys.com>

Implement filters for Layer 3 and Layer 4 using TC Flower API. Add the
corresponding callbacks in XGMAC core.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>

---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |   1 +
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h     |  30 +++
 .../net/ethernet/stmicro/stmmac/dwxgmac2_core.c    | 177 +++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c |   1 +
 drivers/net/ethernet/stmicro/stmmac/hwif.h         |  16 ++
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |  12 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |   9 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c    | 244 +++++++++++++++++++++
 8 files changed, 488 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 49aa56ca09cc..19538057c24e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -360,6 +360,7 @@ struct dma_features {
 	unsigned int sphen;
 	unsigned int vlins;
 	unsigned int dvlan;
+	unsigned int l3l4fnum;
 };
 
 /* GMAC TX FIFO is 8K, Rx FIFO is 16K */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 7357b8bdc128..f942ac975c29 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -47,6 +47,7 @@
 #define XGMAC_CORE_INIT_RX		0
 #define XGMAC_PACKET_FILTER		0x00000008
 #define XGMAC_FILTER_RA			BIT(31)
+#define XGMAC_FILTER_IPFE		BIT(20)
 #define XGMAC_FILTER_VTFE		BIT(16)
 #define XGMAC_FILTER_HPF		BIT(10)
 #define XGMAC_FILTER_PCF		BIT(7)
@@ -119,6 +120,7 @@
 #define XGMAC_HWFEAT_VLHASH		BIT(4)
 #define XGMAC_HWFEAT_GMIISEL		BIT(1)
 #define XGMAC_HW_FEATURE1		0x00000120
+#define XGMAC_HWFEAT_L3L4FNUM		GENMASK(30, 27)
 #define XGMAC_HWFEAT_RSSEN		BIT(20)
 #define XGMAC_HWFEAT_TSOEN		BIT(18)
 #define XGMAC_HWFEAT_SPHEN		BIT(17)
@@ -150,6 +152,34 @@
 #define XGMAC_DCS			GENMASK(19, 16)
 #define XGMAC_DCS_SHIFT			16
 #define XGMAC_ADDRx_LOW(x)		(0x00000304 + (x) * 0x8)
+#define XGMAC_L3L4_ADDR_CTRL		0x00000c00
+#define XGMAC_IDDR			GENMASK(15, 8)
+#define XGMAC_IDDR_SHIFT		8
+#define XGMAC_IDDR_FNUM			4
+#define XGMAC_TT			BIT(1)
+#define XGMAC_XB			BIT(0)
+#define XGMAC_L3L4_DATA			0x00000c04
+#define XGMAC_L3L4_CTRL			0x0
+#define XGMAC_L4DPIM0			BIT(21)
+#define XGMAC_L4DPM0			BIT(20)
+#define XGMAC_L4SPIM0			BIT(19)
+#define XGMAC_L4SPM0			BIT(18)
+#define XGMAC_L4PEN0			BIT(16)
+#define XGMAC_L3HDBM0			GENMASK(15, 11)
+#define XGMAC_L3HSBM0			GENMASK(10, 6)
+#define XGMAC_L3DAIM0			BIT(5)
+#define XGMAC_L3DAM0			BIT(4)
+#define XGMAC_L3SAIM0			BIT(3)
+#define XGMAC_L3SAM0			BIT(2)
+#define XGMAC_L3PEN0			BIT(0)
+#define XGMAC_L4_ADDR			0x1
+#define XGMAC_L4DP0			GENMASK(31, 16)
+#define XGMAC_L4DP0_SHIFT		16
+#define XGMAC_L4SP0			GENMASK(15, 0)
+#define XGMAC_L3_ADDR0			0x4
+#define XGMAC_L3_ADDR1			0x5
+#define XGMAC_L3_ADDR2			0x6
+#define XMGAC_L3_ADDR3			0x7
 #define XGMAC_ARP_ADDR			0x00000c10
 #define XGMAC_RSS_CTRL			0x00000c80
 #define XGMAC_UDP4TE			BIT(3)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index e534a3aaf4a3..9f568b54b339 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -1163,6 +1163,181 @@ static void dwxgmac2_enable_vlan(struct mac_device_info *hw, u32 type)
 	writel(value, ioaddr + XGMAC_VLAN_INCL);
 }
 
+static int dwxgmac2_filter_wait(struct mac_device_info *hw)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	if (readl_poll_timeout(ioaddr + XGMAC_L3L4_ADDR_CTRL, value,
+			       !(value & XGMAC_XB), 100, 10000))
+		return -EBUSY;
+	return 0;
+}
+
+static int dwxgmac2_filter_read(struct mac_device_info *hw, u32 filter_no,
+				u8 reg, u32 *data)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+	int ret;
+
+	ret = dwxgmac2_filter_wait(hw);
+	if (ret)
+		return ret;
+
+	value = ((filter_no << XGMAC_IDDR_FNUM) | reg) << XGMAC_IDDR_SHIFT;
+	value |= XGMAC_TT | XGMAC_XB;
+	writel(value, ioaddr + XGMAC_L3L4_ADDR_CTRL);
+
+	ret = dwxgmac2_filter_wait(hw);
+	if (ret)
+		return ret;
+
+	*data = readl(ioaddr + XGMAC_L3L4_DATA);
+	return 0;
+}
+
+static int dwxgmac2_filter_write(struct mac_device_info *hw, u32 filter_no,
+				 u8 reg, u32 data)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+	int ret;
+
+	ret = dwxgmac2_filter_wait(hw);
+	if (ret)
+		return ret;
+
+	writel(data, ioaddr + XGMAC_L3L4_DATA);
+
+	value = ((filter_no << XGMAC_IDDR_FNUM) | reg) << XGMAC_IDDR_SHIFT;
+	value |= XGMAC_XB;
+	writel(value, ioaddr + XGMAC_L3L4_ADDR_CTRL);
+
+	return dwxgmac2_filter_wait(hw);
+}
+
+static int dwxgmac2_config_l3_filter(struct mac_device_info *hw, u32 filter_no,
+				     bool en, bool ipv6, bool sa, bool inv,
+				     u32 match)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+	int ret;
+
+	value = readl(ioaddr + XGMAC_PACKET_FILTER);
+	value |= XGMAC_FILTER_IPFE;
+	writel(value, ioaddr + XGMAC_PACKET_FILTER);
+
+	ret = dwxgmac2_filter_read(hw, filter_no, XGMAC_L3L4_CTRL, &value);
+	if (ret)
+		return ret;
+
+	/* For IPv6 not both SA/DA filters can be active */
+	if (ipv6) {
+		value |= XGMAC_L3PEN0;
+		value &= ~(XGMAC_L3SAM0 | XGMAC_L3SAIM0);
+		value &= ~(XGMAC_L3DAM0 | XGMAC_L3DAIM0);
+		if (sa) {
+			value |= XGMAC_L3SAM0;
+			if (inv)
+				value |= XGMAC_L3SAIM0;
+		} else {
+			value |= XGMAC_L3DAM0;
+			if (inv)
+				value |= XGMAC_L3DAIM0;
+		}
+	} else {
+		value &= ~XGMAC_L3PEN0;
+		if (sa) {
+			value |= XGMAC_L3SAM0;
+			if (inv)
+				value |= XGMAC_L3SAIM0;
+		} else {
+			value |= XGMAC_L3DAM0;
+			if (inv)
+				value |= XGMAC_L3DAIM0;
+		}
+	}
+
+	ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, value);
+	if (ret)
+		return ret;
+
+	if (sa) {
+		ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3_ADDR0, match);
+		if (ret)
+			return ret;
+	} else {
+		ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3_ADDR1, match);
+		if (ret)
+			return ret;
+	}
+
+	if (!en)
+		return dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, 0);
+
+	return 0;
+}
+
+static int dwxgmac2_config_l4_filter(struct mac_device_info *hw, u32 filter_no,
+				     bool en, bool udp, bool sa, bool inv,
+				     u32 match)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+	int ret;
+
+	value = readl(ioaddr + XGMAC_PACKET_FILTER);
+	value |= XGMAC_FILTER_IPFE;
+	writel(value, ioaddr + XGMAC_PACKET_FILTER);
+
+	ret = dwxgmac2_filter_read(hw, filter_no, XGMAC_L3L4_CTRL, &value);
+	if (ret)
+		return ret;
+
+	if (udp) {
+		value |= XGMAC_L4PEN0;
+	} else {
+		value &= ~XGMAC_L4PEN0;
+	}
+
+	value &= ~(XGMAC_L4SPM0 | XGMAC_L4SPIM0);
+	value &= ~(XGMAC_L4DPM0 | XGMAC_L4DPIM0);
+	if (sa) {
+		value |= XGMAC_L4SPM0;
+		if (inv)
+			value |= XGMAC_L4SPIM0;
+	} else {
+		value |= XGMAC_L4DPM0;
+		if (inv)
+			value |= XGMAC_L4DPIM0;
+	}
+
+	ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, value);
+	if (ret)
+		return ret;
+
+	if (sa) {
+		value = match & XGMAC_L4SP0;
+
+		ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L4_ADDR, value);
+		if (ret)
+			return ret;
+	} else {
+		value = (match << XGMAC_L4DP0_SHIFT) & XGMAC_L4DP0;
+
+		ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L4_ADDR, value);
+		if (ret)
+			return ret;
+	}
+
+	if (!en)
+		return dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, 0);
+
+	return 0;
+}
+
 const struct stmmac_ops dwxgmac210_ops = {
 	.core_init = dwxgmac2_core_init,
 	.set_mac = dwxgmac2_set_mac,
@@ -1203,6 +1378,8 @@ const struct stmmac_ops dwxgmac210_ops = {
 	.flex_pps_config = dwxgmac2_flex_pps_config,
 	.sarc_configure = dwxgmac2_sarc_configure,
 	.enable_vlan = dwxgmac2_enable_vlan,
+	.config_l3_filter = dwxgmac2_config_l3_filter,
+	.config_l4_filter = dwxgmac2_config_l4_filter,
 };
 
 int dwxgmac2_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index e77eb0ddf9b5..fb0283b15c77 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -378,6 +378,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
 
 	/* MAC HW feature 1 */
 	hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1);
+	dma_cap->l3l4fnum = (hw_cap & XGMAC_HWFEAT_L3L4FNUM) >> 27;
 	dma_cap->rssen = (hw_cap & XGMAC_HWFEAT_RSSEN) >> 20;
 	dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18;
 	dma_cap->sphen = (hw_cap & XGMAC_HWFEAT_SPHEN) >> 17;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 9435b312495d..47c8ad9ec671 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -363,6 +363,13 @@ struct stmmac_ops {
 	int (*get_mac_tx_timestamp)(struct mac_device_info *hw, u64 *ts);
 	/* Source Address Insertion / Replacement */
 	void (*sarc_configure)(void __iomem *ioaddr, int val);
+	/* Filtering */
+	int (*config_l3_filter)(struct mac_device_info *hw, u32 filter_no,
+				bool en, bool ipv6, bool sa, bool inv,
+				u32 match);
+	int (*config_l4_filter)(struct mac_device_info *hw, u32 filter_no,
+				bool en, bool udp, bool sa, bool inv,
+				u32 match);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -443,6 +450,10 @@ struct stmmac_ops {
 	stmmac_do_callback(__priv, mac, get_mac_tx_timestamp, __args)
 #define stmmac_sarc_configure(__priv, __args...) \
 	stmmac_do_void_callback(__priv, mac, sarc_configure, __args)
+#define stmmac_config_l3_filter(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, config_l3_filter, __args)
+#define stmmac_config_l4_filter(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, config_l4_filter, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
@@ -499,6 +510,7 @@ struct stmmac_mode_ops {
 struct stmmac_priv;
 struct tc_cls_u32_offload;
 struct tc_cbs_qopt_offload;
+struct flow_cls_offload;
 
 struct stmmac_tc_ops {
 	int (*init)(struct stmmac_priv *priv);
@@ -506,6 +518,8 @@ struct stmmac_tc_ops {
 			     struct tc_cls_u32_offload *cls);
 	int (*setup_cbs)(struct stmmac_priv *priv,
 			 struct tc_cbs_qopt_offload *qopt);
+	int (*setup_cls)(struct stmmac_priv *priv,
+			 struct flow_cls_offload *cls);
 };
 
 #define stmmac_tc_init(__priv, __args...) \
@@ -514,6 +528,8 @@ struct stmmac_tc_ops {
 	stmmac_do_callback(__priv, tc, setup_cls_u32, __args)
 #define stmmac_tc_setup_cbs(__priv, __args...) \
 	stmmac_do_callback(__priv, tc, setup_cbs, __args)
+#define stmmac_tc_setup_cls(__priv, __args...) \
+	stmmac_do_callback(__priv, tc, setup_cls, __args)
 
 struct stmmac_counters;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index dcb2e29a5717..d993fc7e82c3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -128,6 +128,16 @@ struct stmmac_rss {
 	u32 table[STMMAC_RSS_MAX_TABLE_SIZE];
 };
 
+#define STMMAC_FLOW_ACTION_DROP		BIT(0)
+struct stmmac_flow_entry {
+	unsigned long cookie;
+	unsigned long action;
+	u8 ip_proto;
+	int in_use;
+	int idx;
+	int is_l4;
+};
+
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
 	u32 tx_coal_frames;
@@ -216,6 +226,8 @@ struct stmmac_priv {
 	unsigned int tc_entries_max;
 	unsigned int tc_off_max;
 	struct stmmac_tc_entry *tc_entries;
+	unsigned int flow_entries_max;
+	struct stmmac_flow_entry *flow_entries;
 
 	/* Pulse Per Second output */
 	struct stmmac_pps_cfg pps[STMMAC_PPS_MAX];
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 06ccd216ae90..c59c232aca64 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3931,12 +3931,17 @@ static int stmmac_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	struct stmmac_priv *priv = cb_priv;
 	int ret = -EOPNOTSUPP;
 
+	if (!tc_cls_can_offload_and_chain0(priv->dev, type_data))
+		return ret;
+
 	stmmac_disable_all_queues(priv);
 
 	switch (type) {
 	case TC_SETUP_CLSU32:
-		if (tc_cls_can_offload_and_chain0(priv->dev, type_data))
-			ret = stmmac_tc_setup_cls_u32(priv, priv, type_data);
+		ret = stmmac_tc_setup_cls_u32(priv, priv, type_data);
+		break;
+	case TC_SETUP_CLSFLOWER:
+		ret = stmmac_tc_setup_cls(priv, priv, type_data);
 		break;
 	default:
 		break;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 8dbbbf181ada..e231098061b6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -242,6 +242,23 @@ static int tc_init(struct stmmac_priv *priv)
 {
 	struct dma_features *dma_cap = &priv->dma_cap;
 	unsigned int count;
+	int i;
+
+	if (dma_cap->l3l4fnum) {
+		priv->flow_entries_max = dma_cap->l3l4fnum;
+		priv->flow_entries = devm_kcalloc(priv->device,
+						  dma_cap->l3l4fnum,
+						  sizeof(*priv->flow_entries),
+						  GFP_KERNEL);
+		if (!priv->flow_entries)
+			return -ENOMEM;
+
+		for (i = 0; i < priv->flow_entries_max; i++)
+			priv->flow_entries[i].idx = i;
+
+		dev_info(priv->device, "Enabled Flow TC (entries=%d)\n",
+			 priv->flow_entries_max);
+	}
 
 	/* Fail silently as we can still use remaining features, e.g. CBS */
 	if (!dma_cap->frpsel)
@@ -350,8 +367,235 @@ static int tc_setup_cbs(struct stmmac_priv *priv,
 	return 0;
 }
 
+static int tc_parse_flow_actions(struct stmmac_priv *priv,
+				 struct flow_action *action,
+				 struct stmmac_flow_entry *entry)
+{
+	struct flow_action_entry *act;
+	int i;
+
+	if (!flow_action_has_entries(action))
+		return -EINVAL;
+
+	flow_action_for_each(i, act, action) {
+		switch (act->id) {
+		case FLOW_ACTION_DROP:
+			entry->action |= STMMAC_FLOW_ACTION_DROP;
+			return 0;
+		default:
+			break;
+		}
+	}
+
+	/* Nothing to do, maybe inverse filter ? */
+	return 0;
+}
+
+static int tc_add_basic_flow(struct stmmac_priv *priv,
+			     struct flow_cls_offload *cls,
+			     struct stmmac_flow_entry *entry)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
+	struct flow_match_basic match;
+
+	/* Nothing to do here */
+	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_BASIC))
+		return -EINVAL;
+
+	flow_rule_match_basic(rule, &match);
+	entry->ip_proto = match.key->ip_proto;
+	return 0;
+}
+
+static int tc_add_ip4_flow(struct stmmac_priv *priv,
+			   struct flow_cls_offload *cls,
+			   struct stmmac_flow_entry *entry)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
+	bool inv = entry->action & STMMAC_FLOW_ACTION_DROP;
+	struct flow_match_ipv4_addrs match;
+	u32 hw_match;
+	int ret;
+
+	/* Nothing to do here */
+	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS))
+		return -EINVAL;
+
+	flow_rule_match_ipv4_addrs(rule, &match);
+	hw_match = ntohl(match.key->src) & ntohl(match.mask->src);
+	if (hw_match) {
+		ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, true,
+					      false, true, inv, hw_match);
+		if (ret)
+			return ret;
+	}
+
+	hw_match = ntohl(match.key->dst) & ntohl(match.mask->dst);
+	if (hw_match) {
+		ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, true,
+					      false, false, inv, hw_match);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int tc_add_ports_flow(struct stmmac_priv *priv,
+			     struct flow_cls_offload *cls,
+			     struct stmmac_flow_entry *entry)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
+	bool inv = entry->action & STMMAC_FLOW_ACTION_DROP;
+	struct flow_match_ports match;
+	u32 hw_match;
+	bool is_udp;
+	int ret;
+
+	/* Nothing to do here */
+	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_PORTS))
+		return -EINVAL;
+
+	switch (entry->ip_proto) {
+	case IPPROTO_TCP:
+		is_udp = false;
+		break;
+	case IPPROTO_UDP:
+		is_udp = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	flow_rule_match_ports(rule, &match);
+
+	hw_match = ntohs(match.key->src) & ntohs(match.mask->src);
+	if (hw_match) {
+		ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, true,
+					      is_udp, true, inv, hw_match);
+		if (ret)
+			return ret;
+	}
+
+	hw_match = ntohs(match.key->dst) & ntohs(match.mask->dst);
+	if (hw_match) {
+		ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, true,
+					      is_udp, false, inv, hw_match);
+		if (ret)
+			return ret;
+	}
+
+	entry->is_l4 = true;
+	return 0;
+}
+
+static struct stmmac_flow_entry *tc_find_flow(struct stmmac_priv *priv,
+					      struct flow_cls_offload *cls,
+					      bool get_free)
+{
+	int i;
+
+	for (i = 0; i < priv->flow_entries_max; i++) {
+		struct stmmac_flow_entry *entry = &priv->flow_entries[i];
+
+		if (entry->cookie == cls->cookie)
+			return entry;
+		if (get_free && (entry->in_use == false))
+			return entry;
+	}
+
+	return NULL;
+}
+
+struct {
+	int (*fn)(struct stmmac_priv *priv, struct flow_cls_offload *cls,
+		  struct stmmac_flow_entry *entry);
+} tc_flow_parsers[] = {
+	{ .fn = tc_add_basic_flow },
+	{ .fn = tc_add_ip4_flow },
+	{ .fn = tc_add_ports_flow },
+};
+
+static int tc_add_flow(struct stmmac_priv *priv,
+		       struct flow_cls_offload *cls)
+{
+	struct stmmac_flow_entry *entry = tc_find_flow(priv, cls, false);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	int i, ret;
+
+	if (!entry) {
+		entry = tc_find_flow(priv, cls, true);
+		if (!entry)
+			return -ENOENT;
+	}
+
+	ret = tc_parse_flow_actions(priv, &rule->action, entry);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(tc_flow_parsers); i++) {
+		ret = tc_flow_parsers[i].fn(priv, cls, entry);
+		if (!ret) {
+			entry->in_use = true;
+			continue;
+		}
+	}
+
+	if (!entry->in_use)
+		return -EINVAL;
+
+	entry->cookie = cls->cookie;
+	return 0;
+}
+
+static int tc_del_flow(struct stmmac_priv *priv,
+		       struct flow_cls_offload *cls)
+{
+	struct stmmac_flow_entry *entry = tc_find_flow(priv, cls, false);
+	int ret;
+
+	if (!entry || !entry->in_use)
+		return -ENOENT;
+
+	if (entry->is_l4) {
+		ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, false,
+					      false, false, false, 0);
+	} else {
+		ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, false,
+					      false, false, false, 0);
+	}
+
+	entry->in_use = false;
+	entry->cookie = 0;
+	entry->is_l4 = false;
+	return ret;
+}
+
+static int tc_setup_cls(struct stmmac_priv *priv,
+			struct flow_cls_offload *cls)
+{
+	int ret = 0;
+
+	switch (cls->command) {
+	case FLOW_CLS_REPLACE:
+		ret = tc_add_flow(priv, cls);
+		break;
+	case FLOW_CLS_DESTROY:
+		ret = tc_del_flow(priv, cls);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
 const struct stmmac_tc_ops dwmac510_tc_ops = {
 	.init = tc_init,
 	.setup_cls_u32 = tc_setup_cls_u32,
 	.setup_cbs = tc_setup_cbs,
+	.setup_cls = tc_setup_cls,
 };
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 06/13] net: stmmac: xgmac: Implement ARP Offload
From: Jose Abreu @ 2019-09-02  8:01 UTC (permalink / raw)
  To: netdev
  Cc: Joao Pinto, Jose Abreu, Giuseppe Cavallaro, Alexandre Torgue,
	David S. Miller, Maxime Coquelin, linux-stm32, linux-arm-kernel,
	linux-kernel
In-Reply-To: <cover.1567410970.git.joabreu@synopsys.com>

Implement the ARP Offload feature in XGMAC cores.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>

---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 drivers/net/ethernet/stmicro/stmmac/common.h        |  1 +
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 17 +++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c  |  1 +
 drivers/net/ethernet/stmicro/stmmac/hwif.h          |  3 +++
 4 files changed, 22 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 19538057c24e..912bbb6515b2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -361,6 +361,7 @@ struct dma_features {
 	unsigned int vlins;
 	unsigned int dvlan;
 	unsigned int l3l4fnum;
+	unsigned int arpoffsel;
 };
 
 /* GMAC TX FIFO is 8K, Rx FIFO is 16K */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 9f568b54b339..36262ef8b70a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -1338,6 +1338,22 @@ static int dwxgmac2_config_l4_filter(struct mac_device_info *hw, u32 filter_no,
 	return 0;
 }
 
+static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en,
+				     u32 addr)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	writel(addr, ioaddr + XGMAC_ARP_ADDR);
+
+	value = readl(ioaddr + XGMAC_RX_CONFIG);
+	if (en)
+		value |= XGMAC_CONFIG_ARPEN;
+	else
+		value &= ~XGMAC_CONFIG_ARPEN;
+	writel(value, ioaddr + XGMAC_RX_CONFIG);
+}
+
 const struct stmmac_ops dwxgmac210_ops = {
 	.core_init = dwxgmac2_core_init,
 	.set_mac = dwxgmac2_set_mac,
@@ -1380,6 +1396,7 @@ const struct stmmac_ops dwxgmac210_ops = {
 	.enable_vlan = dwxgmac2_enable_vlan,
 	.config_l3_filter = dwxgmac2_config_l3_filter,
 	.config_l4_filter = dwxgmac2_config_l4_filter,
+	.set_arp_offload = dwxgmac2_set_arp_offload,
 };
 
 int dwxgmac2_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index fb0283b15c77..fd60bf5e0a72 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -370,6 +370,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
 	dma_cap->atime_stamp = (hw_cap & XGMAC_HWFEAT_TSSEL) >> 12;
 	dma_cap->av = (hw_cap & XGMAC_HWFEAT_AVSEL) >> 11;
 	dma_cap->av &= (hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10;
+	dma_cap->arpoffsel = (hw_cap & XGMAC_HWFEAT_ARPOFFSEL) >> 9;
 	dma_cap->rmon = (hw_cap & XGMAC_HWFEAT_MMCSEL) >> 8;
 	dma_cap->pmt_magic_frame = (hw_cap & XGMAC_HWFEAT_MGKSEL) >> 7;
 	dma_cap->pmt_remote_wake_up = (hw_cap & XGMAC_HWFEAT_RWKSEL) >> 6;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 47c8ad9ec671..ddb851d99618 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -370,6 +370,7 @@ struct stmmac_ops {
 	int (*config_l4_filter)(struct mac_device_info *hw, u32 filter_no,
 				bool en, bool udp, bool sa, bool inv,
 				u32 match);
+	void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -454,6 +455,8 @@ struct stmmac_ops {
 	stmmac_do_callback(__priv, mac, config_l3_filter, __args)
 #define stmmac_config_l4_filter(__priv, __args...) \
 	stmmac_do_callback(__priv, mac, config_l4_filter, __args)
+#define stmmac_set_arp_offload(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_arp_offload, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 07/13] net: stmmac: selftests: Implement the ARP Offload test
From: Jose Abreu @ 2019-09-02  8:01 UTC (permalink / raw)
  To: netdev
  Cc: Joao Pinto, Jose Abreu, Giuseppe Cavallaro, Alexandre Torgue,
	David S. Miller, Maxime Coquelin, linux-stm32, linux-arm-kernel,
	linux-kernel
In-Reply-To: <cover.1567410970.git.joabreu@synopsys.com>

Implement a test for ARP Offload feature.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>

---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 .../net/ethernet/stmicro/stmmac/stmmac_selftests.c | 110 +++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index 8e9d0aeda817..f531dbe038df 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -196,6 +196,24 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
 	return skb;
 }
 
+static struct sk_buff *stmmac_test_get_arp_skb(struct stmmac_priv *priv,
+					       struct stmmac_packet_attrs *attr)
+{
+	__be32 ip_src = htonl(attr->ip_src);
+	__be32 ip_dst = htonl(attr->ip_dst);
+	struct sk_buff *skb = NULL;
+
+	skb = arp_create(ARPOP_REQUEST, ETH_P_ARP, ip_dst, priv->dev, ip_src,
+			 NULL, attr->src, attr->dst);
+	if (!skb)
+		return NULL;
+
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = priv->dev;
+
+	return skb;
+}
+
 struct stmmac_test_priv {
 	struct stmmac_packet_attrs *packet;
 	struct packet_type pt;
@@ -1396,6 +1414,94 @@ static int stmmac_test_l4filt_sa_udp(struct stmmac_priv *priv)
 	return __stmmac_test_l4filt(priv, 0, dummy_port, 0, ~0, true);
 }
 
+static int stmmac_test_arp_validate(struct sk_buff *skb,
+				    struct net_device *ndev,
+				    struct packet_type *pt,
+				    struct net_device *orig_ndev)
+{
+	struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+	struct ethhdr *ehdr;
+	struct arphdr *ahdr;
+
+	ehdr = (struct ethhdr *)skb_mac_header(skb);
+	if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->src))
+		goto out;
+
+	ahdr = arp_hdr(skb);
+	if (ahdr->ar_op != htons(ARPOP_REPLY))
+		goto out;
+
+	tpriv->ok = true;
+	complete(&tpriv->comp);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int stmmac_test_arpoffload(struct stmmac_priv *priv)
+{
+	unsigned char src[ETH_ALEN] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06};
+	unsigned char dst[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+	struct stmmac_packet_attrs attr = { };
+	struct stmmac_test_priv *tpriv;
+	struct sk_buff *skb = NULL;
+	u32 ip_addr = 0xdeadcafe;
+	u32 ip_src = 0xdeadbeef;
+	int ret;
+
+	if (!priv->dma_cap.arpoffsel)
+		return -EOPNOTSUPP;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	init_completion(&tpriv->comp);
+
+	tpriv->pt.type = htons(ETH_P_ARP);
+	tpriv->pt.func = stmmac_test_arp_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	tpriv->packet = &attr;
+	dev_add_pack(&tpriv->pt);
+
+	attr.src = src;
+	attr.ip_src = ip_src;
+	attr.dst = dst;
+	attr.ip_dst = ip_addr;
+
+	skb = stmmac_test_get_arp_skb(priv, &attr);
+	if (!skb) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	ret = stmmac_set_arp_offload(priv, priv->hw, true, ip_addr);
+	if (ret)
+		goto cleanup;
+
+	ret = dev_set_promiscuity(priv->dev, 1);
+	if (ret)
+		goto cleanup;
+
+	skb_set_queue_mapping(skb, 0);
+	ret = dev_queue_xmit(skb);
+	if (ret)
+		goto cleanup_promisc;
+
+	wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+	ret = tpriv->ok ? 0 : -ETIMEDOUT;
+
+cleanup_promisc:
+	dev_set_promiscuity(priv->dev, -1);
+cleanup:
+	stmmac_set_arp_offload(priv, priv->hw, false, 0x0);
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
 #define STMMAC_LOOPBACK_NONE	0
 #define STMMAC_LOOPBACK_MAC	1
 #define STMMAC_LOOPBACK_PHY	2
@@ -1505,6 +1611,10 @@ static const struct stmmac_test {
 		.name = "L4 SA UDP Filtering ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_l4filt_sa_udp,
+	}, {
+		.name = "ARP Offload         ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_arpoffload,
 	},
 };
 
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 12/13] net: stmmac: xgmac: Enable RX Jumbo frame support
From: Jose Abreu @ 2019-09-02  8:01 UTC (permalink / raw)
  To: netdev
  Cc: Joao Pinto, Jose Abreu, Giuseppe Cavallaro, Alexandre Torgue,
	David S. Miller, Maxime Coquelin, linux-stm32, linux-arm-kernel,
	linux-kernel
In-Reply-To: <cover.1567410970.git.joabreu@synopsys.com>

We are already doing it by default in the TX path so we can also enable
Jumbo Frame support in the RX path independently of MTU value.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>

---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h      |  3 ++-
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 11 -----------
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index f942ac975c29..5923ca62d793 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -44,7 +44,8 @@
 #define XGMAC_CONFIG_CST		BIT(2)
 #define XGMAC_CONFIG_ACS		BIT(1)
 #define XGMAC_CONFIG_RE			BIT(0)
-#define XGMAC_CORE_INIT_RX		0
+#define XGMAC_CORE_INIT_RX		(XGMAC_CONFIG_GPSLCE | XGMAC_CONFIG_WD | \
+					 (XGMAC_JUMBO_LEN << XGMAC_CONFIG_GPSL_SHIFT))
 #define XGMAC_PACKET_FILTER		0x00000008
 #define XGMAC_FILTER_RA			BIT(31)
 #define XGMAC_FILTER_IPFE		BIT(20)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 36262ef8b70a..78ac659da279 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -15,7 +15,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw,
 			       struct net_device *dev)
 {
 	void __iomem *ioaddr = hw->pcsr;
-	int mtu = dev->mtu;
 	u32 tx, rx;
 
 	tx = readl(ioaddr + XGMAC_TX_CONFIG);
@@ -24,16 +23,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw,
 	tx |= XGMAC_CORE_INIT_TX;
 	rx |= XGMAC_CORE_INIT_RX;
 
-	if (mtu >= 9000) {
-		rx |= XGMAC_CONFIG_GPSLCE;
-		rx |= XGMAC_JUMBO_LEN << XGMAC_CONFIG_GPSL_SHIFT;
-		rx |= XGMAC_CONFIG_WD;
-	} else if (mtu > 2000) {
-		rx |= XGMAC_CONFIG_JE;
-	} else if (mtu > 1500) {
-		rx |= XGMAC_CONFIG_S2KP;
-	}
-
 	if (hw->ps) {
 		tx |= XGMAC_CONFIG_TE;
 		tx &= ~hw->link.speed_mask;
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 05/13] net: stmmac: selftests: Add selftest for L3/L4 Filters
From: Jose Abreu @ 2019-09-02  8:01 UTC (permalink / raw)
  To: netdev
  Cc: Joao Pinto, Jose Abreu, Giuseppe Cavallaro, Alexandre Torgue,
	David S. Miller, Maxime Coquelin, linux-stm32, linux-arm-kernel,
	linux-kernel
In-Reply-To: <cover.1567410970.git.joabreu@synopsys.com>

Adds the selftests for L3 and L4 filters with DA/SA/DP/SP support.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>

---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 .../net/ethernet/stmicro/stmmac/stmmac_selftests.c | 254 ++++++++++++++++++++-
 1 file changed, 253 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index d3234338a0ca..8e9d0aeda817 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -164,7 +164,7 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
 		iplen += sizeof(*uhdr);
 	ihdr->tot_len = htons(iplen);
 	ihdr->frag_off = 0;
-	ihdr->saddr = 0;
+	ihdr->saddr = htonl(attr->ip_src);
 	ihdr->daddr = htonl(attr->ip_dst);
 	ihdr->tos = 0;
 	ihdr->id = 0;
@@ -1168,6 +1168,234 @@ static int stmmac_test_svlanoff(struct stmmac_priv *priv)
 	return stmmac_test_vlanoff_common(priv, true);
 }
 
+#ifdef CONFIG_NET_CLS_ACT
+static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src,
+				u32 dst_mask, u32 src_mask)
+{
+	struct flow_dissector_key_ipv4_addrs key, mask;
+	unsigned long dummy_cookie = 0xdeadbeef;
+	struct flow_dissector dissector = { };
+	struct stmmac_packet_attrs attr = { };
+	struct flow_cls_offload cls = { };
+	struct flow_rule *rule;
+	int ret;
+
+	if (!tc_can_offload(priv->dev))
+		return -EOPNOTSUPP;
+	if (!priv->dma_cap.l3l4fnum)
+		return -EOPNOTSUPP;
+	if (priv->rss.enable) {
+		struct stmmac_rss rss = { .enable = false, };
+
+		stmmac_rss_configure(priv, priv->hw, &rss,
+				     priv->plat->rx_queues_to_use);
+	}
+
+	dissector.used_keys |= (1 << FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+	dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] = 0;
+
+	cls.common.chain_index = 0;
+	cls.command = FLOW_CLS_REPLACE;
+	cls.cookie = dummy_cookie;
+
+	rule = kzalloc(struct_size(rule, action.entries, 1), GFP_KERNEL);
+	if (!rule) {
+		ret = -ENOMEM;
+		goto cleanup_rss;
+	}
+
+	rule->match.dissector = &dissector;
+	rule->match.key = (void *)&key;
+	rule->match.mask = (void *)&mask;
+
+	key.src = htonl(src);
+	key.dst = htonl(dst);
+	mask.src = src_mask;
+	mask.dst = dst_mask;
+
+	cls.rule = rule;
+
+	rule->action.entries[0].id = FLOW_ACTION_DROP;
+	rule->action.num_entries = 1;
+
+	attr.dst = priv->dev->dev_addr;
+	attr.ip_dst = dst;
+	attr.ip_src = src;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup_rule;
+
+	ret = stmmac_tc_setup_cls(priv, priv, &cls);
+	if (ret)
+		goto cleanup_rule;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = ret ? 0 : -EINVAL;
+
+	cls.command = FLOW_CLS_DESTROY;
+	stmmac_tc_setup_cls(priv, priv, &cls);
+cleanup_rule:
+	kfree(rule);
+cleanup_rss:
+	if (priv->rss.enable) {
+		stmmac_rss_configure(priv, priv->hw, &priv->rss,
+				     priv->plat->rx_queues_to_use);
+	}
+
+	return ret;
+}
+#else
+static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src,
+				u32 dst_mask, u32 src_mask)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+static int stmmac_test_l3filt_da(struct stmmac_priv *priv)
+{
+	u32 addr = 0x10203040;
+
+	return __stmmac_test_l3filt(priv, addr, 0, ~0, 0);
+}
+
+static int stmmac_test_l3filt_sa(struct stmmac_priv *priv)
+{
+	u32 addr = 0x10203040;
+
+	return __stmmac_test_l3filt(priv, 0, addr, 0, ~0);
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src,
+				u32 dst_mask, u32 src_mask, bool udp)
+{
+	struct {
+		struct flow_dissector_key_basic bkey;
+		struct flow_dissector_key_ports key;
+	} __aligned(BITS_PER_LONG / 8) keys;
+	struct {
+		struct flow_dissector_key_basic bmask;
+		struct flow_dissector_key_ports mask;
+	} __aligned(BITS_PER_LONG / 8) masks;
+	unsigned long dummy_cookie = 0xdeadbeef;
+	struct flow_dissector dissector = { };
+	struct stmmac_packet_attrs attr = { };
+	struct flow_cls_offload cls = { };
+	struct flow_rule *rule;
+	int ret;
+
+	if (!tc_can_offload(priv->dev))
+		return -EOPNOTSUPP;
+	if (!priv->dma_cap.l3l4fnum)
+		return -EOPNOTSUPP;
+	if (priv->rss.enable) {
+		struct stmmac_rss rss = { .enable = false, };
+
+		stmmac_rss_configure(priv, priv->hw, &rss,
+				     priv->plat->rx_queues_to_use);
+	}
+
+	dissector.used_keys |= (1 << FLOW_DISSECTOR_KEY_BASIC);
+	dissector.used_keys |= (1 << FLOW_DISSECTOR_KEY_PORTS);
+	dissector.offset[FLOW_DISSECTOR_KEY_BASIC] = 0;
+	dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = offsetof(typeof(keys), key);
+
+	cls.common.chain_index = 0;
+	cls.command = FLOW_CLS_REPLACE;
+	cls.cookie = dummy_cookie;
+
+	rule = kzalloc(struct_size(rule, action.entries, 1), GFP_KERNEL);
+	if (!rule) {
+		ret = -ENOMEM;
+		goto cleanup_rss;
+	}
+
+	rule->match.dissector = &dissector;
+	rule->match.key = (void *)&keys;
+	rule->match.mask = (void *)&masks;
+
+	keys.bkey.ip_proto = udp ? IPPROTO_UDP : IPPROTO_TCP;
+	keys.key.src = htons(src);
+	keys.key.dst = htons(dst);
+	masks.mask.src = src_mask;
+	masks.mask.dst = dst_mask;
+
+	cls.rule = rule;
+
+	rule->action.entries[0].id = FLOW_ACTION_DROP;
+	rule->action.num_entries = 1;
+
+	attr.dst = priv->dev->dev_addr;
+	attr.tcp = !udp;
+	attr.sport = src;
+	attr.dport = dst;
+	attr.ip_dst = 0;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup_rule;
+
+	ret = stmmac_tc_setup_cls(priv, priv, &cls);
+	if (ret)
+		goto cleanup_rule;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = ret ? 0 : -EINVAL;
+
+	cls.command = FLOW_CLS_DESTROY;
+	stmmac_tc_setup_cls(priv, priv, &cls);
+cleanup_rule:
+	kfree(rule);
+cleanup_rss:
+	if (priv->rss.enable) {
+		stmmac_rss_configure(priv, priv->hw, &priv->rss,
+				     priv->plat->rx_queues_to_use);
+	}
+
+	return ret;
+}
+#else
+static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src,
+				u32 dst_mask, u32 src_mask, bool udp)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+static int stmmac_test_l4filt_da_tcp(struct stmmac_priv *priv)
+{
+	u16 dummy_port = 0x123;
+
+	return __stmmac_test_l4filt(priv, dummy_port, 0, ~0, 0, false);
+}
+
+static int stmmac_test_l4filt_sa_tcp(struct stmmac_priv *priv)
+{
+	u16 dummy_port = 0x123;
+
+	return __stmmac_test_l4filt(priv, 0, dummy_port, 0, ~0, false);
+}
+
+static int stmmac_test_l4filt_da_udp(struct stmmac_priv *priv)
+{
+	u16 dummy_port = 0x123;
+
+	return __stmmac_test_l4filt(priv, dummy_port, 0, ~0, 0, true);
+}
+
+static int stmmac_test_l4filt_sa_udp(struct stmmac_priv *priv)
+{
+	u16 dummy_port = 0x123;
+
+	return __stmmac_test_l4filt(priv, 0, dummy_port, 0, ~0, true);
+}
+
 #define STMMAC_LOOPBACK_NONE	0
 #define STMMAC_LOOPBACK_MAC	1
 #define STMMAC_LOOPBACK_PHY	2
@@ -1253,6 +1481,30 @@ static const struct stmmac_test {
 		.name = "SVLAN TX Insertion  ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_svlanoff,
+	}, {
+		.name = "L3 DA Filtering     ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l3filt_da,
+	}, {
+		.name = "L3 SA Filtering     ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l3filt_sa,
+	}, {
+		.name = "L4 DA TCP Filtering ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l4filt_da_tcp,
+	}, {
+		.name = "L4 SA TCP Filtering ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l4filt_sa_tcp,
+	}, {
+		.name = "L4 DA UDP Filtering ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l4filt_da_udp,
+	}, {
+		.name = "L4 SA UDP Filtering ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l4filt_sa_udp,
 	},
 };
 
-- 
2.7.4


^ permalink raw reply related

* [net-next 3/3] ravb: TROCR register is only present on R-Car Gen3
From: Simon Horman @ 2019-09-02  8:06 UTC (permalink / raw)
  To: David Miller, Sergei Shtylyov
  Cc: Magnus Damm, netdev, linux-renesas-soc, Simon Horman
In-Reply-To: <20190902080603.5636-1-horms+renesas@verge.net.au>

Only use the TROCR register on R-Car Gen3.
It is not present on other SoCs.

Offsets used for the undocumented registers are also considered reserved
and should not be written to.

After some internal investigation with Renesas it remains unclear why this
driver accesses these fields on R-Car Gen2 but regardless of what the
historical reasons are the current code is considered incorrect.

Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/net/ethernet/renesas/ravb.h      | 2 +-
 drivers/net/ethernet/renesas/ravb_main.c | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index bdb051f04b0c..a9c89d5d8898 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -193,7 +193,7 @@ enum ravb_reg {
 	GECMR	= 0x05b0,
 	MAHR	= 0x05c0,
 	MALR	= 0x05c8,
-	TROCR	= 0x0700,	/* Undocumented? */
+	TROCR	= 0x0700,	/* R-Car Gen3 only */
 	CEFCR	= 0x0740,
 	FRECR	= 0x0748,
 	TSFRCR	= 0x0750,
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index b538cc6fdbb7..de9aa8c47f1c 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1621,8 +1621,10 @@ static struct net_device_stats *ravb_get_stats(struct net_device *ndev)
 	stats0 = &priv->stats[RAVB_BE];
 	stats1 = &priv->stats[RAVB_NC];
 
-	nstats->tx_dropped += ravb_read(ndev, TROCR);
-	ravb_write(ndev, 0, TROCR);	/* (write clear) */
+	if (priv->chip_id == RCAR_GEN3) {
+		nstats->tx_dropped += ravb_read(ndev, TROCR);
+		ravb_write(ndev, 0, TROCR);	/* (write clear) */
+	}
 
 	nstats->rx_packets = stats0->rx_packets + stats1->rx_packets;
 	nstats->tx_packets = stats0->tx_packets + stats1->tx_packets;
-- 
2.11.0


^ permalink raw reply related

* [net-next 0/3] ravb: Remove use of undocumented registers
From: Simon Horman @ 2019-09-02  8:06 UTC (permalink / raw)
  To: David Miller, Sergei Shtylyov
  Cc: Magnus Damm, netdev, linux-renesas-soc, Simon Horman

Hi,

this short series cleans up the RAVB driver a little.

The first patch corrects the spelling of the FBP field of SFO register.
This register field is unused and should have no run-time effect.

The remaining two patches remove the use of undocumented registers
after some consultation with the internal Renesas BSP team.

All patches have been lightly tested on:
* E3 Ebisu
* H3 Salvator-XS (ES2.0)
* M3-W Salvator-XS
* M3-N Salvator-XS

Kazuya Mizuguchi (2):
  ravb: correct typo in FBP field of SFO register
  ravb: Remove undocumented processing

Simon Horman (1):
  ravb: TROCR register is only present on R-Car Gen3

 drivers/net/ethernet/renesas/ravb.h      |  9 ++-------
 drivers/net/ethernet/renesas/ravb_main.c | 21 ++++-----------------
 2 files changed, 6 insertions(+), 24 deletions(-)

-- 
2.11.0


^ permalink raw reply

* [net-next 1/3] ravb: correct typo in FBP field of SFO register
From: Simon Horman @ 2019-09-02  8:06 UTC (permalink / raw)
  To: David Miller, Sergei Shtylyov
  Cc: Magnus Damm, netdev, linux-renesas-soc, Kazuya Mizuguchi,
	Simon Horman
In-Reply-To: <20190902080603.5636-1-horms+renesas@verge.net.au>

From: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>

The field name is FBP rather than FPB.

This field is unused and could equally be removed from the driver entirely.
But there seems no harm in leaving as documentation of the presence of the
field.

Signed-off-by: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
v0 - Kazuya Mizuguchi

v1 - Simon Horman
* Extracted from larger patch
* Wrote changelog

diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index ac9195add811..bdb051f04b0c 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -317,7 +312,7 @@ enum UFCD_BIT {

 /* SFO */
 enum SFO_BIT {
-	SFO_FPB		= 0x0000003F,
+	SFO_FBP		= 0x0000003F,
 };

 /* RTC */
---
 drivers/net/ethernet/renesas/ravb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index ac9195add811..2596a95a4300 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -317,7 +317,7 @@ enum UFCD_BIT {
 
 /* SFO */
 enum SFO_BIT {
-	SFO_FPB		= 0x0000003F,
+	SFO_FBP		= 0x0000003F,
 };
 
 /* RTC */
-- 
2.11.0


^ permalink raw reply related

* [net-next 2/3] ravb: Remove undocumented processing
From: Simon Horman @ 2019-09-02  8:06 UTC (permalink / raw)
  To: David Miller, Sergei Shtylyov
  Cc: Magnus Damm, netdev, linux-renesas-soc, Kazuya Mizuguchi,
	Simon Horman
In-Reply-To: <20190902080603.5636-1-horms+renesas@verge.net.au>

From: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>

This patch removes the use of the undocumented registers
CDCR, LCCR, CERCR, CEECR and the undocumented BOC bit of the CCC register.

Current documentation for EtherAVB (ravb) describes the offset of
what the driver uses as the BOC bit as reserved and that only a value of
0 should be written. Furthermore, the offsets used for the undocumented
registers are also considered reserved nd should not be written to.

After some internal investigation with Renesas it remains unclear
why this driver accesses these fields but regardless of what the historical
reasons are the current code is considered incorrect.

Signed-off-by: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/net/ethernet/renesas/ravb.h      |  5 -----
 drivers/net/ethernet/renesas/ravb_main.c | 15 ---------------
 2 files changed, 20 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 2596a95a4300..bdb051f04b0c 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -194,15 +194,11 @@ enum ravb_reg {
 	MAHR	= 0x05c0,
 	MALR	= 0x05c8,
 	TROCR	= 0x0700,	/* Undocumented? */
-	CDCR	= 0x0708,	/* Undocumented? */
-	LCCR	= 0x0710,	/* Undocumented? */
 	CEFCR	= 0x0740,
 	FRECR	= 0x0748,
 	TSFRCR	= 0x0750,
 	TLFRCR	= 0x0758,
 	RFCR	= 0x0760,
-	CERCR	= 0x0768,	/* Undocumented? */
-	CEECR	= 0x0770,	/* Undocumented? */
 	MAFCR	= 0x0778,
 };
 
@@ -220,7 +216,6 @@ enum CCC_BIT {
 	CCC_CSEL_HPB	= 0x00010000,
 	CCC_CSEL_ETH_TX	= 0x00020000,
 	CCC_CSEL_GMII_REF = 0x00030000,
-	CCC_BOC		= 0x00100000,	/* Undocumented? */
 	CCC_LBME	= 0x01000000,
 };
 
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 6cacd5e893ac..b538cc6fdbb7 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -447,12 +447,6 @@ static int ravb_dmac_init(struct net_device *ndev)
 	ravb_ring_format(ndev, RAVB_BE);
 	ravb_ring_format(ndev, RAVB_NC);
 
-#if defined(__LITTLE_ENDIAN)
-	ravb_modify(ndev, CCC, CCC_BOC, 0);
-#else
-	ravb_modify(ndev, CCC, CCC_BOC, CCC_BOC);
-#endif
-
 	/* Set AVB RX */
 	ravb_write(ndev,
 		   RCR_EFFS | RCR_ENCF | RCR_ETS0 | RCR_ESF | 0x18000000, RCR);
@@ -1629,15 +1623,6 @@ static struct net_device_stats *ravb_get_stats(struct net_device *ndev)
 
 	nstats->tx_dropped += ravb_read(ndev, TROCR);
 	ravb_write(ndev, 0, TROCR);	/* (write clear) */
-	nstats->collisions += ravb_read(ndev, CDCR);
-	ravb_write(ndev, 0, CDCR);	/* (write clear) */
-	nstats->tx_carrier_errors += ravb_read(ndev, LCCR);
-	ravb_write(ndev, 0, LCCR);	/* (write clear) */
-
-	nstats->tx_carrier_errors += ravb_read(ndev, CERCR);
-	ravb_write(ndev, 0, CERCR);	/* (write clear) */
-	nstats->tx_carrier_errors += ravb_read(ndev, CEECR);
-	ravb_write(ndev, 0, CEECR);	/* (write clear) */
 
 	nstats->rx_packets = stats0->rx_packets + stats1->rx_packets;
 	nstats->tx_packets = stats0->tx_packets + stats1->tx_packets;
-- 
2.11.0


^ permalink raw reply related

* Re: [net-next 0/3] ravb: Remove use of undocumented registers
From: Geert Uytterhoeven @ 2019-09-02  8:16 UTC (permalink / raw)
  To: Simon Horman, Biju Das, Fabrizio Castro
  Cc: David Miller, Sergei Shtylyov, Magnus Damm, netdev, Linux-Renesas
In-Reply-To: <20190902080603.5636-1-horms+renesas@verge.net.au>

Hi Simon, Biju, Fabrizio,

On Mon, Sep 2, 2019 at 10:06 AM Simon Horman <horms+renesas@verge.net.au> wrote:
> this short series cleans up the RAVB driver a little.
>
> The first patch corrects the spelling of the FBP field of SFO register.
> This register field is unused and should have no run-time effect.
>
> The remaining two patches remove the use of undocumented registers
> after some consultation with the internal Renesas BSP team.
>
> All patches have been lightly tested on:
> * E3 Ebisu
> * H3 Salvator-XS (ES2.0)
> * M3-W Salvator-XS
> * M3-N Salvator-XS

It would be good if someone could test this on an R-Car Gen2 board
that uses ravb (iwg22d or iwg23s).

Thanks!

> Kazuya Mizuguchi (2):
>   ravb: correct typo in FBP field of SFO register
>   ravb: Remove undocumented processing
>
> Simon Horman (1):
>   ravb: TROCR register is only present on R-Car Gen3

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH v4 1/5] vsock/virtio: limit the memory used per-socket
From: Stefan Hajnoczi @ 2019-09-02  8:39 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Stefano Garzarella, kvm, netdev, linux-kernel, virtualization,
	Stefan Hajnoczi, David S. Miller
In-Reply-To: <20190901024525-mutt-send-email-mst@kernel.org>

[-- Attachment #1: Type: text/plain, Size: 5393 bytes --]

On Sun, Sep 01, 2019 at 02:56:44AM -0400, Michael S. Tsirkin wrote:
> On Fri, Aug 30, 2019 at 11:40:59AM +0200, Stefano Garzarella wrote:
> > On Mon, Jul 29, 2019 at 10:04:29AM -0400, Michael S. Tsirkin wrote:
> > > On Wed, Jul 17, 2019 at 01:30:26PM +0200, Stefano Garzarella wrote:
> > > > Since virtio-vsock was introduced, the buffers filled by the host
> > > > and pushed to the guest using the vring, are directly queued in
> > > > a per-socket list. These buffers are preallocated by the guest
> > > > with a fixed size (4 KB).
> > > > 
> > > > The maximum amount of memory used by each socket should be
> > > > controlled by the credit mechanism.
> > > > The default credit available per-socket is 256 KB, but if we use
> > > > only 1 byte per packet, the guest can queue up to 262144 of 4 KB
> > > > buffers, using up to 1 GB of memory per-socket. In addition, the
> > > > guest will continue to fill the vring with new 4 KB free buffers
> > > > to avoid starvation of other sockets.
> > > > 
> > > > This patch mitigates this issue copying the payload of small
> > > > packets (< 128 bytes) into the buffer of last packet queued, in
> > > > order to avoid wasting memory.
> > > > 
> > > > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > > 
> > > This is good enough for net-next, but for net I think we
> > > should figure out how to address the issue completely.
> > > Can we make the accounting precise? What happens to
> > > performance if we do?
> > > 
> > 
> > Since I'm back from holidays, I'm restarting this thread to figure out
> > how to address the issue completely.
> > 
> > I did a better analysis of the credit mechanism that we implemented in
> > virtio-vsock to get a clearer view and I'd share it with you:
> > 
> >     This issue affect only the "host->guest" path. In this case, when the
> >     host wants to send a packet to the guest, it uses a "free" buffer
> >     allocated by the guest (4KB).
> >     The "free" buffers available for the host are shared between all
> >     sockets, instead, the credit mechanism is per-socket, I think to
> >     avoid the starvation of others sockets.
> >     The guests re-fill the "free" queue when the available buffers are
> >     less than half.
> > 
> >     Each peer have these variables in the per-socket state:
> >        /* local vars */
> >        buf_alloc        /* max bytes usable by this socket
> >                            [exposed to the other peer] */
> >        fwd_cnt          /* increased when RX packet is consumed by the
> >                            user space [exposed to the other peer] */
> >        tx_cnt 	        /* increased when TX packet is sent to the other peer */
> > 
> >        /* remote vars  */
> >        peer_buf_alloc   /* peer's buf_alloc */
> >        peer_fwd_cnt     /* peer's fwd_cnt */
> > 
> >     When a peer sends a packet, it increases the 'tx_cnt'; when the
> >     receiver consumes the packet (copy it to the user-space buffer), it
> >     increases the 'fwd_cnt'.
> >     Note: increments are made considering the payload length and not the
> >     buffer length.
> > 
> >     The value of 'buf_alloc' and 'fwd_cnt' are sent to the other peer in
> >     all packet headers or with an explicit CREDIT_UPDATE packet.
> > 
> >     The local 'buf_alloc' value can be modified by the user space using
> >     setsockopt() with optname=SO_VM_SOCKETS_BUFFER_SIZE.
> > 
> >     Before to send a packet, the peer checks the space available:
> >     	credit_available = peer_buf_alloc - (tx_cnt - peer_fwd_cnt)
> >     and it will send up to credit_available bytes to the other peer.
> > 
> > Possible solutions considering Michael's advice:
> > 1. Use the buffer length instead of the payload length when we increment
> >    the counters:
> >   - This approach will account precisely the memory used per socket.
> >   - This requires changes in both guest and host.
> >   - It is not compatible with old drivers, so a feature should be negotiated.
> > 2. Decrease the advertised 'buf_alloc' taking count of bytes queued in
> >    the socket queue but not used. (e.g. 256 byte used on 4K available in
> >    the buffer)
> >   - pkt->hdr.buf_alloc = buf_alloc - bytes_not_used.
> >   - This should be compatible also with old drivers.
> > 
> > Maybe the second is less invasive, but will it be too tricky?
> > Any other advice or suggestions?
> > 
> > Thanks in advance,
> > Stefano
> 
> OK let me try to clarify.  The idea is this:
> 
> Let's say we queue a buffer of 4K, and we copy if len < 128 bytes.  This
> means that in the worst case (128 byte packets), each byte of credit in
> the socket uses up 4K/128 = 16 bytes of kernel memory. In fact we need
> to also account for the virtio_vsock_pkt since I think it's kept around
> until userspace consumes it.
> 
> Thus given X buf alloc allowed in the socket, we should publish X/16
> credits to the other side. This will ensure the other side does not send
> more than X/16 bytes for a given socket and thus we won't need to
> allocate more than X bytes to hold the data.
> 
> We can play with the copy break value to tweak this.

This seems like a reasonable solution.  Hopefully the benchmark results
will come out okay too.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [RFC PATCH v2 net-next 10/15] net: dsa: Pass ndo_setup_tc slave callback to drivers
From: Vladimir Oltean @ 2019-09-02  8:49 UTC (permalink / raw)
  To: Kurt Kanzenbach
  Cc: Florian Fainelli, Vivien Didelot, Andrew Lunn, David S. Miller,
	Vinicius Costa Gomes, vedang.patel, Richard Cochran, weifeng.voon,
	jiri, m-karicheri2, Jose.Abreu, Ilias Apalodimas,
	Jamal Hadi Salim, xiyou.wangcong, netdev
In-Reply-To: <20190902075209.GC3343@linutronix.de>

Hi Kurt,

On Mon, 2 Sep 2019 at 10:52, Kurt Kanzenbach
<kurt.kanzenbach@linutronix.de> wrote:
>
> Hi,
>
> On Fri, Aug 30, 2019 at 03:46:30AM +0300, Vladimir Oltean wrote:
> > DSA currently handles shared block filters (for the classifier-action
> > qdisc) in the core due to what I believe are simply pragmatic reasons -
> > hiding the complexity from drivers and offerring a simple API for port
> > mirroring.
> >
> > Extend the dsa_slave_setup_tc function by passing all other qdisc
> > offloads to the driver layer, where the driver may choose what it
> > implements and how. DSA is simply a pass-through in this case.
>
> I'm having the same problem on how to pass the taprio schedule down to
> the DSA driver. I didn't perform a pass-through to keep it in sync with
> the already implemented offload. See my approach below.
>
> >
> > There is an open question related to the drivers potentially needing to
> > do work in process context, but .ndo_setup_tc is called in atomic
> > context. At the moment the drivers are left to handle this on their own.
> > The risk is that once accepting the offload callback right away in the
> > DSA core, then the driver would have no way to signal an error back. So
> > right now the driver has to do as much error checking as possible in the
> > atomic context and only defer (probably) the actual configuring of the
> > offload.
> >
> > Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
> > ---
> >  include/net/dsa.h |  3 +++
> >  net/dsa/slave.c   | 12 ++++++++----
> >  2 files changed, 11 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/net/dsa.h b/include/net/dsa.h
> > index 96acb14ec1a8..232b5d36815d 100644
> > --- a/include/net/dsa.h
> > +++ b/include/net/dsa.h
> > @@ -154,6 +154,7 @@ struct dsa_mall_tc_entry {
> >       };
> >  };
> >
> > +struct tc_taprio_qopt_offload;
>
> Is this needed? The rest looks good to me.
>

No, this isn't needed. It is a remnant from v1.

> My approach:
>
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index ba6dfff98196..a60bd55f27f2 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -20,6 +20,7 @@
>  #include <linux/platform_data/dsa.h>
>  #include <net/devlink.h>
>  #include <net/switchdev.h>
> +#include <net/pkt_sched.h>
>
>  struct tc_action;
>  struct phy_device;
> @@ -539,6 +540,13 @@ struct dsa_switch_ops {
>          */
>         netdev_tx_t (*port_deferred_xmit)(struct dsa_switch *ds, int port,
>                                           struct sk_buff *skb);
> +
> +       /*
> +        * Scheduled traffic functionality
> +        */
> +       int (*port_set_schedule)(struct dsa_switch *ds, int port,
> +                                const struct tc_taprio_qopt_offload *taprio);
> +       int (*port_del_schedule)(struct dsa_switch *ds, int port);
>  };
>
>  struct dsa_switch_driver {
> diff --git a/net/dsa/slave.c b/net/dsa/slave.c
> index 8157be7e162d..6290d55e6011 100644
> --- a/net/dsa/slave.c
> +++ b/net/dsa/slave.c
> @@ -15,6 +15,7 @@
>  #include <linux/mdio.h>
>  #include <net/rtnetlink.h>
>  #include <net/pkt_cls.h>
> +#include <net/pkt_sched.h>
>  #include <net/tc_act/tc_mirred.h>
>  #include <linux/if_bridge.h>
>  #include <linux/netpoll.h>
> @@ -953,12 +954,33 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
>         }
>  }
>
> +static int dsa_slave_setup_tc_taprio(struct net_device *dev,
> +                                    const struct tc_taprio_qopt_offload *taprio)
> +{
> +       struct dsa_port *dp = dsa_slave_to_port(dev);
> +       struct dsa_switch *ds = dp->ds;
> +
> +       if (taprio->enable) {
> +               if (!ds->ops->port_set_schedule)
> +                       return -EOPNOTSUPP;
> +
> +               return ds->ops->port_set_schedule(ds, dp->index, taprio);
> +       }
> +
> +       if (!ds->ops->port_del_schedule)
> +               return -EOPNOTSUPP;
> +
> +       return ds->ops->port_del_schedule(ds, dp->index);
> +}
> +
>  static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
>                               void *type_data)
>  {
>         switch (type) {
>         case TC_SETUP_BLOCK:
>                 return dsa_slave_setup_tc_block(dev, type_data);
> +       case TC_SETUP_QDISC_TAPRIO:
> +               return dsa_slave_setup_tc_taprio(dev, type_data);
>         default:
>                 return -EOPNOTSUPP;
>         }
>

I did something similar in v1 with a .port_setup_taprio in "[RFC PATCH
net-next 3/6] net: dsa: Pass tc-taprio offload to drivers".
Would this address Ilias's comment about DSA not really needing to
have this level of awareness into the qdisc offload type? Rightfully I
can agree that the added-value of making a .port_set_schedule and
.port_del_schedule in DSA compared to simply passing the ndo_setup_tc
is not that great.

By the way, thanks for the iproute2 patch for parsing 64-bit base time
on ARM 32, saved me a bit of debugging time :)

> Thanks,
> Kurt

Regards,
-Vladimir

^ permalink raw reply

* Re: [PATCH v4 1/5] vsock/virtio: limit the memory used per-socket
From: Stefano Garzarella @ 2019-09-02  8:55 UTC (permalink / raw)
  To: Michael S. Tsirkin, Stefan Hajnoczi
  Cc: kvm, netdev, linux-kernel, virtualization, Stefan Hajnoczi,
	David S. Miller
In-Reply-To: <20190902083912.GA9069@stefanha-x1.localdomain>

On Mon, Sep 02, 2019 at 09:39:12AM +0100, Stefan Hajnoczi wrote:
> On Sun, Sep 01, 2019 at 02:56:44AM -0400, Michael S. Tsirkin wrote:
> > 
> > OK let me try to clarify.  The idea is this:
> > 
> > Let's say we queue a buffer of 4K, and we copy if len < 128 bytes.  This
> > means that in the worst case (128 byte packets), each byte of credit in
> > the socket uses up 4K/128 = 16 bytes of kernel memory. In fact we need
> > to also account for the virtio_vsock_pkt since I think it's kept around
> > until userspace consumes it.
> > 
> > Thus given X buf alloc allowed in the socket, we should publish X/16
> > credits to the other side. This will ensure the other side does not send
> > more than X/16 bytes for a given socket and thus we won't need to
> > allocate more than X bytes to hold the data.
> > 
> > We can play with the copy break value to tweak this.

Thanks Michael, now it is perfectly clear. It seems an excellent solution and
easy to implement. I'll work on that.

> 
> This seems like a reasonable solution.  Hopefully the benchmark results
> will come out okay too.

Yes, as Michael suggested I'll play with the copy break value to see as
benchmark has affected.

Thank you very much,
Stefano

^ permalink raw reply

* Re: net/mlx5e: bind() always returns EINVAL with XDP_ZEROCOPY
From: Jesper Dangaard Brouer @ 2019-09-02  9:08 UTC (permalink / raw)
  To: Kal Cutter Conley
  Cc: brouer, Karlsson, Magnus, Björn Töpel,
	Maxim Mikityanskiy, Jakub Kicinski,
	Toke Høiland-Jørgensen, Andy Gospodarek,
	xdp-newbies@vger.kernel.org, Saeed Mahameed, Tariq Toukan,
	netdev@vger.kernel.org
In-Reply-To: <CAHApi-=YSo=sOTkRxmY=fct3TePFFdG9oPTRHWYd1AXjk0ACfw@mail.gmail.com>

On Sun, 1 Sep 2019 18:47:15 +0200
Kal Cutter Conley <kal.conley@dectris.com> wrote:

> Hi,
> I figured out the problem. Let me document the issue here for others
> and hopefully start a discussion.
> 
> The mlx5 driver uses special queue ids for ZC. If N is the number of
> configured queues, then for XDP_ZEROCOPY the queue ids start at N. So
> queue ids [0..N) can only be used with XDP_COPY and queue ids [N..2N)
> can only be used with XDP_ZEROCOPY.

Thanks for the followup and explanation on how mlx5 AF_XDP queue
implementation is different from other vendors.


> sudo ethtool -L eth0 combined 16
> sudo samples/bpf/xdpsock -r -i eth0 -c -q 0   # OK
> sudo samples/bpf/xdpsock -r -i eth0 -z -q 0   # ERROR
> sudo samples/bpf/xdpsock -r -i eth0 -c -q 16  # ERROR
> sudo samples/bpf/xdpsock -r -i eth0 -z -q 16  # OK
> 
> Why was this done? To use zerocopy if available and fallback on copy
> mode normally you would set sxdp_flags=0. However, here this is no
> longer possible. To support this driver, you have to first try binding
> with XDP_ZEROCOPY and the special queue id, then if that fails, you
> have to try binding again with a normal queue id. Peculiarities like
> this complicate the XDP user api. Maybe someone can explain the
> benefits?

Thanks for complaining, it is actually valuable. It really illustrate
the kernel need to improve in this area, which is what our talk[1] at
LPC2019 (Sep 10) is about.

Title: "Making Networking Queues a First Class Citizen in the Kernel"
 [1] https://linuxplumbersconf.org/event/4/contributions/462/

As you can see, several vendors are actually involved. Kudos to Magnus
for taking initiative here!  It's unfortunately not solved "tomorrow",
as first we have to agree this is needed (facility to register queues),
then agree on API and get commitment from vendors, as this requires
drivers changes.  There is a long road ahead, but I think it will be
worthwhile in the end, as effective use of dedicated hardware queues
(both RX and TX) is key to performance.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer



> On Wed, Aug 7, 2019 at 2:49 PM Kal Cutter Conley <kal.conley@dectris.com> wrote:
> >
> > Hello,
> > I am testing the mlx5e driver with AF_XDP. When I specify
> > XDP_ZEROCOPY, bind() always returns EINVAL. I observe the same problem
> > with the xdpsock sample:
> >
> > sudo samples/bpf/xdpsock -r -i dcb1-port1 -z
> > samples/bpf/xdpsock_user.c:xsk_configure_socket:322: errno:
> > 22/"Invalid argument"
> >
> > Without XDP_ZEROCOPY, everything works as expected. Is this a known
> > issue/limitation? I expected this to be supported looking at the
> > code/commit history.
> >
> > Thanks,
> > Kal  




^ permalink raw reply

* Re: [PATCH v4 1/5] vsock/virtio: limit the memory used per-socket
From: Stefano Garzarella @ 2019-09-02  9:57 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, Stefan Hajnoczi, David S. Miller,
	virtualization, Jason Wang, kvm
In-Reply-To: <20190901061707-mutt-send-email-mst@kernel.org>

On Sun, Sep 01, 2019 at 06:17:58AM -0400, Michael S. Tsirkin wrote:
> On Sun, Sep 01, 2019 at 04:26:19AM -0400, Michael S. Tsirkin wrote:
> > On Thu, Aug 01, 2019 at 03:36:16PM +0200, Stefano Garzarella wrote:
> > > On Thu, Aug 01, 2019 at 09:21:15AM -0400, Michael S. Tsirkin wrote:
> > > > On Thu, Aug 01, 2019 at 12:47:54PM +0200, Stefano Garzarella wrote:
> > > > > On Tue, Jul 30, 2019 at 04:42:25PM -0400, Michael S. Tsirkin wrote:
> > > > > > On Tue, Jul 30, 2019 at 11:35:39AM +0200, Stefano Garzarella wrote:
> > > > > 
> > > > > (...)
> > > > > 
> > > > > > > 
> > > > > > > The problem here is the compatibility. Before this series virtio-vsock
> > > > > > > and vhost-vsock modules had the RX buffer size hard-coded
> > > > > > > (VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE = 4K). So, if we send a buffer smaller
> > > > > > > of 4K, there might be issues.
> > > > > > 
> > > > > > Shouldn't be if they are following the spec. If not let's fix
> > > > > > the broken parts.
> > > > > > 
> > > > > > > 
> > > > > > > Maybe it is the time to add add 'features' to virtio-vsock device.
> > > > > > > 
> > > > > > > Thanks,
> > > > > > > Stefano
> > > > > > 
> > > > > > Why would a remote care about buffer sizes?
> > > > > > 
> > > > > > Let's first see what the issues are. If they exist
> > > > > > we can either fix the bugs, or code the bug as a feature in spec.
> > > > > > 
> > > > > 
> > > > > The vhost_transport '.stream_enqueue' callback
> > > > > [virtio_transport_stream_enqueue()] calls the virtio_transport_send_pkt_info(),
> > > > > passing the user message. This function allocates a new packet, copying
> > > > > the user message, but (before this series) it limits the packet size to
> > > > > the VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (4K):
> > > > > 
> > > > > static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> > > > > 					  struct virtio_vsock_pkt_info *info)
> > > > > {
> > > > >  ...
> > > > > 	/* we can send less than pkt_len bytes */
> > > > > 	if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
> > > > > 		pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
> > > > > 
> > > > > 	/* virtio_transport_get_credit might return less than pkt_len credit */
> > > > > 	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> > > > > 
> > > > > 	/* Do not send zero length OP_RW pkt */
> > > > > 	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> > > > > 		return pkt_len;
> > > > >  ...
> > > > > }
> > > > > 
> > > > > then it queues the packet for the TX worker calling .send_pkt()
> > > > > [vhost_transport_send_pkt() in the vhost_transport case]
> > > > > 
> > > > > The main function executed by the TX worker is
> > > > > vhost_transport_do_send_pkt() that picks up a buffer from the virtqueue
> > > > > and it tries to copy the packet (up to 4K) on it.  If the buffer
> > > > > allocated from the guest will be smaller then 4K, I think here it will
> > > > > be discarded with an error:
> > > > > 
> > > 
> > > I'm adding more lines to explain better.
> > > 
> > > > > static void
> > > > > vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
> > > > > 				struct vhost_virtqueue *vq)
> > > > > {
> > > 		...
> > > 
> > > 		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
> > > 					 &out, &in, NULL, NULL);
> > > 
> > > 		...
> > > 
> > > 		len = iov_length(&vq->iov[out], in);
> > > 		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
> > > 
> > > 		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
> > > 		if (nbytes != sizeof(pkt->hdr)) {
> > > 			virtio_transport_free_pkt(pkt);
> > > 			vq_err(vq, "Faulted on copying pkt hdr\n");
> > > 			break;
> > > 		}
> > > 
> > > > >  ...
> > > > > 		nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter);
> > > > 
> > > > isn't pck len the actual length though?
> > > > 
> > > 
> > > It is the length of the packet that we are copying in the guest RX
> > > buffers pointed by the iov_iter. The guest allocates an iovec with 2
> > > buffers, one for the header and one for the payload (4KB).
> > 
> > BTW at the moment that forces another kmalloc within virtio core. Maybe
> > vsock needs a flag to skip allocation in this case.  Worth benchmarking.
> > See virtqueue_use_indirect which just does total_sg > 1.

Okay, I'll take a look at virtqueue_use_indirect and I'll do some
benchmarking.

> > 
> > > 
> > > > > 		if (nbytes != pkt->len) {
> > > > > 			virtio_transport_free_pkt(pkt);
> > > > > 			vq_err(vq, "Faulted on copying pkt buf\n");
> > > > > 			break;
> > > > > 		}
> > > > >  ...
> > > > > }
> > > > > 
> > > > > 
> > > > > This series changes this behavior since now we will split the packet in
> > > > > vhost_transport_do_send_pkt() depending on the buffer found in the
> > > > > virtqueue.
> > > > > 
> > > > > We didn't change the buffer size in this series, so we still backward
> > > > > compatible, but if we will use buffers smaller than 4K, we should
> > > > > encounter the error described above.
> > 
> > So that's an implementation bug then? It made an assumption
> > of a 4K sized buffer? Or even PAGE_SIZE sized buffer?

Yes, I think it made an assumption and it used this macro as a limit:

include/linux/virtio_vsock.h:13:
    #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE        (1024 * 4)

> 
> Assuming we miss nothing and buffers < 4K are broken,
> I think we need to add this to the spec, possibly with
> a feature bit to relax the requirement that all buffers
> are at least 4k in size.
> 

Okay, should I send a proposal to virtio-dev@lists.oasis-open.org?

Thanks,
Stefano

^ permalink raw reply

* Re: [PATCH net 0/5] net: aquantia: fixes on vlan filters and other conditions
From: Igor Russkikh @ 2019-09-02 10:07 UTC (permalink / raw)
  To: David Miller, jakub.kicinski@netronome.com; +Cc: netdev@vger.kernel.org
In-Reply-To: <20190831.133618.60802477215444924.davem@davemloft.net>


>>
>> LGTM, Fixes tag should had been first there on patch 4.
> 
> Series applied with fixes tag ordering fixed in patch 4.

Thanks Jakub, David,

> You should also perhaps check the return value from
> napi_complete_done() as an optimization for net-next?

Right, thanks, will put that with next net-next patchset.

Regards,
  Igor

^ permalink raw reply

* [PATCH net-next 0/2] mvpp2: per-cpu buffers
From: Matteo Croce @ 2019-09-02 10:21 UTC (permalink / raw)
  To: netdev
  Cc: linux-kernel, Maxime Chevallier, Marcin Wojtas, Antoine Tenart,
	Stefan Chulski, Nadav Haklai, Lorenzo Bianconi, David S. Miller

This patchset workarounds an PP2 HW limitation which prevents to use
per-cpu rx buffers.
The first patch is just a refactor to prepare for the second one.
The second one allocates percpu buffers if the following conditions are met:
- CPU number is less or equal 4
- no port is using jumbo frames

If the following conditions are not met at load time, of jumbo frame is enabled
later on, the shared allocation is reverted.

Matteo Croce (2):
  mvpp2: refactor BM pool functions
  mvpp2: percpu buffers

 drivers/net/ethernet/marvell/mvpp2/mvpp2.h    |   4 +
 .../net/ethernet/marvell/mvpp2/mvpp2_main.c   | 272 +++++++++++++++---
 2 files changed, 235 insertions(+), 41 deletions(-)

-- 
2.21.0


^ permalink raw reply

* [PATCH net-next 1/2] mvpp2: refactor BM pool functions
From: Matteo Croce @ 2019-09-02 10:21 UTC (permalink / raw)
  To: netdev
  Cc: linux-kernel, Maxime Chevallier, Marcin Wojtas, Antoine Tenart,
	Stefan Chulski, Nadav Haklai, Lorenzo Bianconi, David S. Miller
In-Reply-To: <20190902102137.841-1-mcroce@redhat.com>

Refactor mvpp2_bm_pool_create(), mvpp2_bm_pool_destroy() and
mvpp2_bm_pools_init() so that they accept a struct device instead
of a struct platform_device, as they just need platform_device->dev.

Removing such dependency makes the BM code more reusable in context
where we don't have a pointer to the platform_device.

Signed-off-by: Matteo Croce <mcroce@redhat.com>
---
 .../net/ethernet/marvell/mvpp2/mvpp2_main.c   | 35 +++++++++----------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index ccdd47f3b8fb..871f14cc7284 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -323,8 +323,7 @@ static void mvpp2_frag_free(const struct mvpp2_bm_pool *pool, void *data)
 /* Buffer Manager configuration routines */
 
 /* Create pool */
-static int mvpp2_bm_pool_create(struct platform_device *pdev,
-				struct mvpp2 *priv,
+static int mvpp2_bm_pool_create(struct device *dev, struct mvpp2 *priv,
 				struct mvpp2_bm_pool *bm_pool, int size)
 {
 	u32 val;
@@ -343,7 +342,7 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
 	else
 		bm_pool->size_bytes = 2 * sizeof(u64) * size;
 
-	bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, bm_pool->size_bytes,
+	bm_pool->virt_addr = dma_alloc_coherent(dev, bm_pool->size_bytes,
 						&bm_pool->dma_addr,
 						GFP_KERNEL);
 	if (!bm_pool->virt_addr)
@@ -351,9 +350,9 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
 
 	if (!IS_ALIGNED((unsigned long)bm_pool->virt_addr,
 			MVPP2_BM_POOL_PTR_ALIGN)) {
-		dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
+		dma_free_coherent(dev, bm_pool->size_bytes,
 				  bm_pool->virt_addr, bm_pool->dma_addr);
-		dev_err(&pdev->dev, "BM pool %d is not %d bytes aligned\n",
+		dev_err(dev, "BM pool %d is not %d bytes aligned\n",
 			bm_pool->id, MVPP2_BM_POOL_PTR_ALIGN);
 		return -ENOMEM;
 	}
@@ -468,15 +467,14 @@ static int mvpp2_check_hw_buf_num(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_p
 }
 
 /* Cleanup pool */
-static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
-				 struct mvpp2 *priv,
+static int mvpp2_bm_pool_destroy(struct device *dev, struct mvpp2 *priv,
 				 struct mvpp2_bm_pool *bm_pool)
 {
 	int buf_num;
 	u32 val;
 
 	buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
-	mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool, buf_num);
+	mvpp2_bm_bufs_free(dev, priv, bm_pool, buf_num);
 
 	/* Check buffer counters after free */
 	buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
@@ -490,14 +488,13 @@ static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
 	val |= MVPP2_BM_STOP_MASK;
 	mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
 
-	dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
+	dma_free_coherent(dev, bm_pool->size_bytes,
 			  bm_pool->virt_addr,
 			  bm_pool->dma_addr);
 	return 0;
 }
 
-static int mvpp2_bm_pools_init(struct platform_device *pdev,
-			       struct mvpp2 *priv)
+static int mvpp2_bm_pools_init(struct device *dev, struct mvpp2 *priv)
 {
 	int i, err, size;
 	struct mvpp2_bm_pool *bm_pool;
@@ -507,7 +504,7 @@ static int mvpp2_bm_pools_init(struct platform_device *pdev,
 	for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
 		bm_pool = &priv->bm_pools[i];
 		bm_pool->id = i;
-		err = mvpp2_bm_pool_create(pdev, priv, bm_pool, size);
+		err = mvpp2_bm_pool_create(dev, priv, bm_pool, size);
 		if (err)
 			goto err_unroll_pools;
 		mvpp2_bm_pool_bufsize_set(priv, bm_pool, 0);
@@ -515,13 +512,13 @@ static int mvpp2_bm_pools_init(struct platform_device *pdev,
 	return 0;
 
 err_unroll_pools:
-	dev_err(&pdev->dev, "failed to create BM pool %d, size %d\n", i, size);
+	dev_err(dev, "failed to create BM pool %d, size %d\n", i, size);
 	for (i = i - 1; i >= 0; i--)
-		mvpp2_bm_pool_destroy(pdev, priv, &priv->bm_pools[i]);
+		mvpp2_bm_pool_destroy(dev, priv, &priv->bm_pools[i]);
 	return err;
 }
 
-static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
+static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv)
 {
 	int i, err;
 
@@ -533,12 +530,12 @@ static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
 	}
 
 	/* Allocate and initialize BM pools */
-	priv->bm_pools = devm_kcalloc(&pdev->dev, MVPP2_BM_POOLS_NUM,
+	priv->bm_pools = devm_kcalloc(dev, MVPP2_BM_POOLS_NUM,
 				      sizeof(*priv->bm_pools), GFP_KERNEL);
 	if (!priv->bm_pools)
 		return -ENOMEM;
 
-	err = mvpp2_bm_pools_init(pdev, priv);
+	err = mvpp2_bm_pools_init(dev, priv);
 	if (err < 0)
 		return err;
 	return 0;
@@ -5497,7 +5494,7 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
 	mvpp2_write(priv, MVPP2_TX_SNOOP_REG, 0x1);
 
 	/* Buffer Manager initialization */
-	err = mvpp2_bm_init(pdev, priv);
+	err = mvpp2_bm_init(&pdev->dev, priv);
 	if (err < 0)
 		return err;
 
@@ -5766,7 +5763,7 @@ static int mvpp2_remove(struct platform_device *pdev)
 	for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
 		struct mvpp2_bm_pool *bm_pool = &priv->bm_pools[i];
 
-		mvpp2_bm_pool_destroy(pdev, priv, bm_pool);
+		mvpp2_bm_pool_destroy(&pdev->dev, priv, bm_pool);
 	}
 
 	for (i = 0; i < MVPP2_MAX_THREADS; i++) {
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 2/2] mvpp2: percpu buffers
From: Matteo Croce @ 2019-09-02 10:21 UTC (permalink / raw)
  To: netdev
  Cc: linux-kernel, Maxime Chevallier, Marcin Wojtas, Antoine Tenart,
	Stefan Chulski, Nadav Haklai, Lorenzo Bianconi, David S. Miller
In-Reply-To: <20190902102137.841-1-mcroce@redhat.com>

Every mvpp2 unit can use up to 8 buffers mapped by the BM (the HW buffer
manager). The HW will place the frames in the buffer pool depending on the
frame size: short (< 128 bytes), long (< 1664) or jumbo (up to 9856).

As any unit can have up to 4 ports, the driver allocates only 2 pools,
one for small and one long frames, and share them between ports.
When the first port MTU is set higher than 1664 bytes, a third pool is
allocated for jumbo frames.

This shared allocation makes impossible to use percpu allocators,
and creates contention between HW queues.

If possible, i.e. if the number of possible CPU are less than 8 and jumbo
frames are not used, switch to a new scheme: allocate 8 per-cpu pools for
short and long frames and bind every pool to an RXQ.

When the first port MTU is set higher than 1664 bytes, the allocation
scheme is reverted to the old behaviour (3 shared pools), and when all
ports MTU are lowered, the per-cpu buffers are allocated again.

Signed-off-by: Matteo Croce <mcroce@redhat.com>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2.h    |   4 +
 .../net/ethernet/marvell/mvpp2/mvpp2_main.c   | 241 ++++++++++++++++--
 2 files changed, 222 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 4d9564ba68f6..c89dd7169e3c 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -683,6 +683,7 @@ enum mvpp2_prs_l3_cast {
 #define MVPP2_BM_SHORT_BUF_NUM		2048
 #define MVPP2_BM_POOL_SIZE_MAX		(16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4)
 #define MVPP2_BM_POOL_PTR_ALIGN		128
+#define MVPP2_BM_MAX_POOLS		8
 
 /* BM cookie (32 bits) definition */
 #define MVPP2_BM_COOKIE_POOL_OFFS	8
@@ -787,6 +788,9 @@ struct mvpp2 {
 	/* Aggregated TXQs */
 	struct mvpp2_tx_queue *aggr_txqs;
 
+	/* Are we using page_pool with per-cpu pools? */
+	int percpu_pools;
+
 	/* BM pools */
 	struct mvpp2_bm_pool *bm_pools;
 
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 871f14cc7284..637d9269d4d3 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -292,6 +292,26 @@ static void mvpp2_txq_inc_put(struct mvpp2_port *port,
 		txq_pcpu->txq_put_index = 0;
 }
 
+/* Get number of maximum RXQ */
+static int mvpp2_get_nrxqs(struct mvpp2 *priv)
+{
+	unsigned int nrxqs;
+
+	if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_SINGLE_MODE)
+		return 1;
+
+	/* According to the PPv2.2 datasheet and our experiments on
+	 * PPv2.1, RX queues have an allocation granularity of 4 (when
+	 * more than a single one on PPv2.2).
+	 * Round up to nearest multiple of 4.
+	 */
+	nrxqs = (num_possible_cpus() + 3) & ~0x3;
+	if (nrxqs > MVPP2_PORT_MAX_RXQ)
+		nrxqs = MVPP2_PORT_MAX_RXQ;
+
+	return nrxqs;
+}
+
 /* Get number of physical egress port */
 static inline int mvpp2_egress_port(struct mvpp2_port *port)
 {
@@ -496,12 +516,15 @@ static int mvpp2_bm_pool_destroy(struct device *dev, struct mvpp2 *priv,
 
 static int mvpp2_bm_pools_init(struct device *dev, struct mvpp2 *priv)
 {
-	int i, err, size;
+	int i, err, size, poolnum = MVPP2_BM_POOLS_NUM;
 	struct mvpp2_bm_pool *bm_pool;
 
+	if (priv->percpu_pools)
+		poolnum = mvpp2_get_nrxqs(priv) * 2;
+
 	/* Create all pools with maximum size */
 	size = MVPP2_BM_POOL_SIZE_MAX;
-	for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
+	for (i = 0; i < poolnum; i++) {
 		bm_pool = &priv->bm_pools[i];
 		bm_pool->id = i;
 		err = mvpp2_bm_pool_create(dev, priv, bm_pool, size);
@@ -520,9 +543,15 @@ static int mvpp2_bm_pools_init(struct device *dev, struct mvpp2 *priv)
 
 static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv)
 {
-	int i, err;
+	int i, err, poolnum = MVPP2_BM_POOLS_NUM;
 
-	for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
+	if (priv->percpu_pools)
+		poolnum = mvpp2_get_nrxqs(priv) * 2;
+
+	dev_info(dev, "using %d %s buffers\n", poolnum,
+		 priv->percpu_pools ? "per-cpu" : "shared");
+
+	for (i = 0; i < poolnum; i++) {
 		/* Mask BM all interrupts */
 		mvpp2_write(priv, MVPP2_BM_INTR_MASK_REG(i), 0);
 		/* Clear BM cause register */
@@ -530,7 +559,7 @@ static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv)
 	}
 
 	/* Allocate and initialize BM pools */
-	priv->bm_pools = devm_kcalloc(dev, MVPP2_BM_POOLS_NUM,
+	priv->bm_pools = devm_kcalloc(dev, poolnum,
 				      sizeof(*priv->bm_pools), GFP_KERNEL);
 	if (!priv->bm_pools)
 		return -ENOMEM;
@@ -676,6 +705,13 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
 	phys_addr_t phys_addr;
 	void *buf;
 
+	if (port->priv->percpu_pools &&
+	    bm_pool->pkt_size > MVPP2_BM_LONG_PKT_SIZE) {
+		netdev_err(port->dev,
+			   "attempted to use jumbo frames with per-cpu pools");
+		return 0;
+	}
+
 	buf_size = MVPP2_RX_BUF_SIZE(bm_pool->pkt_size);
 	total_size = MVPP2_RX_TOTAL_SIZE(buf_size);
 
@@ -719,7 +755,64 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
 	struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
 	int num;
 
-	if (pool >= MVPP2_BM_POOLS_NUM) {
+	if ((port->priv->percpu_pools && pool > mvpp2_get_nrxqs(port->priv) * 2) ||
+	    (!port->priv->percpu_pools && pool >= MVPP2_BM_POOLS_NUM)) {
+		netdev_err(port->dev, "Invalid pool %d\n", pool);
+		return NULL;
+	}
+
+	/* Allocate buffers in case BM pool is used as long pool, but packet
+	 * size doesn't match MTU or BM pool hasn't being used yet
+	 */
+	if (new_pool->pkt_size == 0) {
+		int pkts_num;
+
+		/* Set default buffer number or free all the buffers in case
+		 * the pool is not empty
+		 */
+		pkts_num = new_pool->buf_num;
+		if (pkts_num == 0) {
+			if (port->priv->percpu_pools) {
+				if (pool < port->nrxqs)
+					pkts_num = mvpp2_pools[MVPP2_BM_SHORT].buf_num;
+				else
+					pkts_num = mvpp2_pools[MVPP2_BM_LONG].buf_num;
+			} else {
+				pkts_num = mvpp2_pools[pool].buf_num;
+			}
+		} else {
+			mvpp2_bm_bufs_free(port->dev->dev.parent,
+					   port->priv, new_pool, pkts_num);
+		}
+
+		new_pool->pkt_size = pkt_size;
+		new_pool->frag_size =
+			SKB_DATA_ALIGN(MVPP2_RX_BUF_SIZE(pkt_size)) +
+			MVPP2_SKB_SHINFO_SIZE;
+
+		/* Allocate buffers for this pool */
+		num = mvpp2_bm_bufs_add(port, new_pool, pkts_num);
+		if (num != pkts_num) {
+			WARN(1, "pool %d: %d of %d allocated\n",
+			     new_pool->id, num, pkts_num);
+			return NULL;
+		}
+	}
+
+	mvpp2_bm_pool_bufsize_set(port->priv, new_pool,
+				  MVPP2_RX_BUF_SIZE(new_pool->pkt_size));
+
+	return new_pool;
+}
+
+static struct mvpp2_bm_pool *
+mvpp2_bm_pool_use_percpu(struct mvpp2_port *port, int type,
+			 unsigned int pool, int pkt_size)
+{
+	struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
+	int num;
+
+	if (pool > port->nrxqs * 2) {
 		netdev_err(port->dev, "Invalid pool %d\n", pool);
 		return NULL;
 	}
@@ -735,7 +828,7 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
 		 */
 		pkts_num = new_pool->buf_num;
 		if (pkts_num == 0)
-			pkts_num = mvpp2_pools[pool].buf_num;
+			pkts_num = mvpp2_pools[type].buf_num;
 		else
 			mvpp2_bm_bufs_free(port->dev->dev.parent,
 					   port->priv, new_pool, pkts_num);
@@ -760,11 +853,11 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
 	return new_pool;
 }
 
-/* Initialize pools for swf */
-static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
+/* Initialize pools for swf, shared buffers variant */
+static int mvpp2_swf_bm_pool_init_shared(struct mvpp2_port *port)
 {
-	int rxq;
 	enum mvpp2_bm_pool_log_num long_log_pool, short_log_pool;
+	int rxq;
 
 	/* If port pkt_size is higher than 1518B:
 	 * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
@@ -808,6 +901,47 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 	return 0;
 }
 
+/* Initialize pools for swf, percpu buffers variant */
+static int mvpp2_swf_bm_pool_init_percpu(struct mvpp2_port *port)
+{
+	struct mvpp2_bm_pool *p;
+	int i;
+
+	for (i = 0; i < port->nrxqs; i++) {
+		p = mvpp2_bm_pool_use_percpu(port, MVPP2_BM_SHORT, i,
+					     mvpp2_pools[MVPP2_BM_SHORT].pkt_size);
+		if (!p)
+			return -ENOMEM;
+
+		port->priv->bm_pools[i].port_map |= BIT(port->id);
+		mvpp2_rxq_short_pool_set(port, i, port->priv->bm_pools[i].id);
+	}
+
+	for (i = 0; i < port->nrxqs; i++) {
+		p = mvpp2_bm_pool_use_percpu(port, MVPP2_BM_LONG, i + port->nrxqs,
+					     mvpp2_pools[MVPP2_BM_LONG].pkt_size);
+		if (!p)
+			return -ENOMEM;
+
+		port->priv->bm_pools[i + port->nrxqs].port_map |= BIT(port->id);
+		mvpp2_rxq_long_pool_set(port, i,
+					port->priv->bm_pools[i + port->nrxqs].id);
+	}
+
+	port->pool_long = NULL;
+	port->pool_short = NULL;
+
+	return 0;
+}
+
+static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
+{
+	if (port->priv->percpu_pools)
+		return mvpp2_swf_bm_pool_init_percpu(port);
+	else
+		return mvpp2_swf_bm_pool_init_shared(port);
+}
+
 static void mvpp2_set_hw_csum(struct mvpp2_port *port,
 			      enum mvpp2_bm_pool_log_num new_long_pool)
 {
@@ -834,6 +968,9 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 	enum mvpp2_bm_pool_log_num new_long_pool;
 	int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
 
+	if (port->priv->percpu_pools)
+		goto out_set;
+
 	/* If port MTU is higher than 1518B:
 	 * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
 	 * else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
@@ -863,6 +1000,7 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 		mvpp2_set_hw_csum(port, new_long_pool);
 	}
 
+out_set:
 	dev->mtu = mtu;
 	dev->wanted_features = dev->features;
 
@@ -3706,10 +3844,48 @@ static int mvpp2_set_mac_address(struct net_device *dev, void *p)
 	return err;
 }
 
+/* Shut down all the ports, reconfigure the pools as percpu or shared,
+ * then bring up again all ports.
+ */
+static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu)
+{
+	int numbufs = MVPP2_BM_POOLS_NUM, i;
+	struct mvpp2_port *port = NULL;
+	bool status[MVPP2_MAX_PORTS];
+
+	for (i = 0; i < priv->port_count; i++) {
+		port = priv->port_list[i];
+		status[i] = netif_running(port->dev);
+		if (status[i])
+			mvpp2_stop(port->dev);
+	}
+
+	/* nrxqs is the same for all ports */
+	if (priv->percpu_pools)
+		numbufs = port->nrxqs * 2;
+
+	for (i = 0; i < numbufs; i++)
+		mvpp2_bm_pool_destroy(port->dev->dev.parent, priv, &priv->bm_pools[i]);
+
+	devm_kfree(port->dev->dev.parent, priv->bm_pools);
+	priv->percpu_pools = percpu;
+	mvpp2_bm_init(port->dev->dev.parent, priv);
+
+	for (i = 0; i < priv->port_count; i++) {
+		port = priv->port_list[i];
+		mvpp2_swf_bm_pool_init(port);
+		if (status[i])
+			mvpp2_open(port->dev);
+	}
+
+	return 0;
+}
+
 static int mvpp2_change_mtu(struct net_device *dev, int mtu)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
 	bool running = netif_running(dev);
+	struct mvpp2 *priv = port->priv;
 	int err;
 
 	if (!IS_ALIGNED(MVPP2_RX_PKT_SIZE(mtu), 8)) {
@@ -3718,6 +3894,31 @@ static int mvpp2_change_mtu(struct net_device *dev, int mtu)
 		mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8);
 	}
 
+	if (MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE) {
+		if (priv->percpu_pools) {
+			netdev_warn(dev, "mtu %d too high, switching to shared buffers", mtu);
+			mvpp2_bm_switch_buffers(priv, false);
+		}
+	} else {
+		bool jumbo = false;
+		int i;
+
+		for (i = 0; i < priv->port_count; i++)
+			if (priv->port_list[i] != port &&
+			    MVPP2_RX_PKT_SIZE(priv->port_list[i]->dev->mtu) >
+			    MVPP2_BM_LONG_PKT_SIZE) {
+				jumbo = true;
+				break;
+			}
+
+		/* No port is using jumbo frames */
+		if (!jumbo) {
+			dev_info(port->dev->dev.parent,
+				 "all ports have a low MTU, switching to per-cpu buffers");
+			mvpp2_bm_switch_buffers(priv, true);
+		}
+	}
+
 	if (running)
 		mvpp2_stop_dev(port);
 
@@ -5025,18 +5226,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	}
 
 	ntxqs = MVPP2_MAX_TXQ;
-	if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_SINGLE_MODE) {
-		nrxqs = 1;
-	} else {
-		/* According to the PPv2.2 datasheet and our experiments on
-		 * PPv2.1, RX queues have an allocation granularity of 4 (when
-		 * more than a single one on PPv2.2).
-		 * Round up to nearest multiple of 4.
-		 */
-		nrxqs = (num_possible_cpus() + 3) & ~0x3;
-		if (nrxqs > MVPP2_PORT_MAX_RXQ)
-			nrxqs = MVPP2_PORT_MAX_RXQ;
-	}
+	nrxqs = mvpp2_get_nrxqs(priv);
 
 	dev = alloc_etherdev_mqs(sizeof(*port), ntxqs, nrxqs);
 	if (!dev)
@@ -5202,7 +5392,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		dev->features |= NETIF_F_NTUPLE;
 	}
 
-	mvpp2_set_hw_csum(port, port->pool_long->id);
+	if (!port->priv->percpu_pools)
+		mvpp2_set_hw_csum(port, port->pool_long->id);
 
 	dev->vlan_features |= features;
 	dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
@@ -5582,6 +5773,10 @@ static int mvpp2_probe(struct platform_device *pdev)
 			priv->sysctrl_base = NULL;
 	}
 
+	if (priv->hw_version == MVPP22 &&
+	    mvpp2_get_nrxqs(priv) * 2 <= MVPP2_BM_MAX_POOLS)
+		priv->percpu_pools = 1;
+
 	mvpp2_setup_bm_pool();
 
 
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v2 0/3]  dpaa2-eth: Add new statistics counters
From: Ioana Radulescu @ 2019-09-02 10:23 UTC (permalink / raw)
  To: netdev, davem; +Cc: ioana.ciornei

Recent firmware versions offer access to more DPNI statistics
counters. Add the relevant ones to ethtool interface stats.

Also we can now make use of a new counter for in flight egress frames
to avoid sleeping an arbitrary amount of time in the ndo_stop routine.

v2: in patch 2/3, treat separately the error case for unsupported
statistics pages

Ioana Radulescu (3):
  dpaa2-eth: Minor refactoring in ethtool stats
  dpaa2-eth: Add new DPNI statistics counters
  dpaa2-eth: Poll Tx pending frames counter on if down

 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c   | 31 +++++++++++++++--
 .../net/ethernet/freescale/dpaa2/dpaa2-ethtool.c   | 37 +++++++++++++-------
 drivers/net/ethernet/freescale/dpaa2/dpni.c        |  2 +-
 drivers/net/ethernet/freescale/dpaa2/dpni.h        | 40 ++++++++++++++++++++++
 4 files changed, 93 insertions(+), 17 deletions(-)

-- 
2.7.4


^ permalink raw reply

* [PATCH net-next v2 2/3] dpaa2-eth: Add new DPNI statistics counters
From: Ioana Radulescu @ 2019-09-02 10:23 UTC (permalink / raw)
  To: netdev, davem; +Cc: ioana.ciornei
In-Reply-To: <1567419799-28179-1-git-send-email-ruxandra.radulescu@nxp.com>

Recent firmware versions expose more  DPNI counters.
Export relevant ones via ethtool -S.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
---
v2: treat separately error case for unsupported statistics pages

 .../net/ethernet/freescale/dpaa2/dpaa2-ethtool.c   | 19 ++++++++--
 drivers/net/ethernet/freescale/dpaa2/dpni.c        |  2 +-
 drivers/net/ethernet/freescale/dpaa2/dpni.h        | 40 ++++++++++++++++++++++
 3 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 1c5b54b..0aa1c34 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -28,6 +28,11 @@ static char dpaa2_ethtool_stats[][ETH_GSTRING_LEN] = {
 	"[hw] rx nobuffer discards",
 	"[hw] tx discarded frames",
 	"[hw] tx confirmed frames",
+	"[hw] tx dequeued bytes",
+	"[hw] tx dequeued frames",
+	"[hw] tx rejected bytes",
+	"[hw] tx rejected frames",
+	"[hw] tx pending frames",
 };
 
 #define DPAA2_ETH_NUM_STATS	ARRAY_SIZE(dpaa2_ethtool_stats)
@@ -192,16 +197,26 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
 		sizeof(dpni_stats.page_0),
 		sizeof(dpni_stats.page_1),
 		sizeof(dpni_stats.page_2),
+		sizeof(dpni_stats.page_3),
+		sizeof(dpni_stats.page_4),
+		sizeof(dpni_stats.page_5),
+		sizeof(dpni_stats.page_6),
 	};
 
 	memset(data, 0,
 	       sizeof(u64) * (DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS));
 
 	/* Print standard counters, from DPNI statistics */
-	for (j = 0; j <= 2; j++) {
+	for (j = 0; j <= 6; j++) {
+		/* We're not interested in pages 4 & 5 for now */
+		if (j == 4 || j == 5)
+			continue;
 		err = dpni_get_statistics(priv->mc_io, 0, priv->mc_token,
 					  j, &dpni_stats);
-		if (err != 0)
+		if (err == -EINVAL)
+			/* Older firmware versions don't support all pages */
+			memset(&dpni_stats, 0, sizeof(dpni_stats));
+		else
 			netdev_warn(net_dev, "dpni_get_stats(%d) failed\n", j);
 
 		num_cnt = dpni_stats_page_size[j] / sizeof(u64);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c
index 05e3089..dd54e69 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c
@@ -1470,7 +1470,7 @@ int dpni_get_queue(struct fsl_mc_io *mc_io,
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:	Token of DPNI object
  * @page:	Selects the statistics page to retrieve, see
- *		DPNI_GET_STATISTICS output. Pages are numbered 0 to 2.
+ *		DPNI_GET_STATISTICS output. Pages are numbered 0 to 6.
  * @stat:	Structure containing the statistics
  *
  * Return:	'0' on Success; Error code otherwise.
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
index 3e8fc6c..fd583911 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h
@@ -416,6 +416,26 @@ int dpni_get_tx_data_offset(struct fsl_mc_io	*mc_io,
  *	lack of buffers
  * @page_2.egress_discarded_frames: Egress discarded frame count
  * @page_2.egress_confirmed_frames: Egress confirmed frame count
+ * @page3: Page_3 statistics structure
+ * @page_3.egress_dequeue_bytes: Cumulative count of the number of bytes
+ *	dequeued from egress FQs
+ * @page_3.egress_dequeue_frames: Cumulative count of the number of frames
+ *	dequeued from egress FQs
+ * @page_3.egress_reject_bytes: Cumulative count of the number of bytes in
+ *	egress frames whose enqueue was rejected
+ * @page_3.egress_reject_frames: Cumulative count of the number of egress
+ *	frames whose enqueue was rejected
+ * @page_4: Page_4 statistics structure: congestion points
+ * @page_4.cgr_reject_frames: number of rejected frames due to congestion point
+ * @page_4.cgr_reject_bytes: number of rejected bytes due to congestion point
+ * @page_5: Page_5 statistics structure: policer
+ * @page_5.policer_cnt_red: NUmber of red colored frames
+ * @page_5.policer_cnt_yellow: number of yellow colored frames
+ * @page_5.policer_cnt_green: number of green colored frames
+ * @page_5.policer_cnt_re_red: number of recolored red frames
+ * @page_5.policer_cnt_re_yellow: number of recolored yellow frames
+ * @page_6: Page_6 statistics structure
+ * @page_6.tx_pending_frames: total number of frames pending in egress FQs
  * @raw: raw statistics structure, used to index counters
  */
 union dpni_statistics {
@@ -443,6 +463,26 @@ union dpni_statistics {
 		u64 egress_confirmed_frames;
 	} page_2;
 	struct {
+		u64 egress_dequeue_bytes;
+		u64 egress_dequeue_frames;
+		u64 egress_reject_bytes;
+		u64 egress_reject_frames;
+	} page_3;
+	struct {
+		u64 cgr_reject_frames;
+		u64 cgr_reject_bytes;
+	} page_4;
+	struct {
+		u64 policer_cnt_red;
+		u64 policer_cnt_yellow;
+		u64 policer_cnt_green;
+		u64 policer_cnt_re_red;
+		u64 policer_cnt_re_yellow;
+	} page_5;
+	struct {
+		u64 tx_pending_frames;
+	} page_6;
+	struct {
 		u64 counter[DPNI_STATISTICS_CNT];
 	} raw;
 };
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next v2 3/3] dpaa2-eth: Poll Tx pending frames counter on if down
From: Ioana Radulescu @ 2019-09-02 10:23 UTC (permalink / raw)
  To: netdev, davem; +Cc: ioana.ciornei
In-Reply-To: <1567419799-28179-1-git-send-email-ruxandra.radulescu@nxp.com>

Starting with firmware version MC10.18.0, a new counter for in flight
Tx frames is offered. Use it when bringing down the interface to
determine when all pending Tx frames have been processed by hardware
instead of sleeping a fixed amount of time.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
---
v2: no changes

 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 31 +++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 5402867..162d7d8 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1348,7 +1348,7 @@ static u32 ingress_fq_count(struct dpaa2_eth_priv *priv)
 	return total;
 }
 
-static void wait_for_fq_empty(struct dpaa2_eth_priv *priv)
+static void wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv)
 {
 	int retries = 10;
 	u32 pending;
@@ -1360,6 +1360,31 @@ static void wait_for_fq_empty(struct dpaa2_eth_priv *priv)
 	} while (pending && --retries);
 }
 
+#define DPNI_TX_PENDING_VER_MAJOR	7
+#define DPNI_TX_PENDING_VER_MINOR	13
+static void wait_for_egress_fq_empty(struct dpaa2_eth_priv *priv)
+{
+	union dpni_statistics stats;
+	int retries = 10;
+	int err;
+
+	if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_TX_PENDING_VER_MAJOR,
+				   DPNI_TX_PENDING_VER_MINOR) < 0)
+		goto out;
+
+	do {
+		err = dpni_get_statistics(priv->mc_io, 0, priv->mc_token, 6,
+					  &stats);
+		if (err)
+			goto out;
+		if (stats.page_6.tx_pending_frames == 0)
+			return;
+	} while (--retries);
+
+out:
+	msleep(500);
+}
+
 static int dpaa2_eth_stop(struct net_device *net_dev)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
@@ -1379,7 +1404,7 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
 	 * on WRIOP. After it finishes, wait until all remaining frames on Rx
 	 * and Tx conf queues are consumed on NAPI poll.
 	 */
-	msleep(500);
+	wait_for_egress_fq_empty(priv);
 
 	do {
 		dpni_disable(priv->mc_io, 0, priv->mc_token);
@@ -1395,7 +1420,7 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
 		 */
 	}
 
-	wait_for_fq_empty(priv);
+	wait_for_ingress_fq_empty(priv);
 	disable_ch_napi(priv);
 
 	/* Empty the buffer pool */
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next v2 1/3] dpaa2-eth: Minor refactoring in ethtool stats
From: Ioana Radulescu @ 2019-09-02 10:23 UTC (permalink / raw)
  To: netdev, davem; +Cc: ioana.ciornei
In-Reply-To: <1567419799-28179-1-git-send-email-ruxandra.radulescu@nxp.com>

As we prepare to read more pages from the DPNI stat counters,
reorganize the code a bit to make it easier to extend.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
---
v2: no changes

 drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 93076fe..1c5b54b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -188,6 +188,11 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	struct dpaa2_eth_drv_stats *extras;
 	struct dpaa2_eth_ch_stats *ch_stats;
+	int dpni_stats_page_size[DPNI_STATISTICS_CNT] = {
+		sizeof(dpni_stats.page_0),
+		sizeof(dpni_stats.page_1),
+		sizeof(dpni_stats.page_2),
+	};
 
 	memset(data, 0,
 	       sizeof(u64) * (DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS));
@@ -198,17 +203,8 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
 					  j, &dpni_stats);
 		if (err != 0)
 			netdev_warn(net_dev, "dpni_get_stats(%d) failed\n", j);
-		switch (j) {
-		case 0:
-			num_cnt = sizeof(dpni_stats.page_0) / sizeof(u64);
-			break;
-		case 1:
-			num_cnt = sizeof(dpni_stats.page_1) / sizeof(u64);
-			break;
-		case 2:
-			num_cnt = sizeof(dpni_stats.page_2) / sizeof(u64);
-			break;
-		}
+
+		num_cnt = dpni_stats_page_size[j] / sizeof(u64);
 		for (k = 0; k < num_cnt; k++)
 			*(data + i++) = dpni_stats.raw.counter[k];
 	}
-- 
2.7.4


^ permalink raw reply related

* [PATCH net] rxrpc: Fix misplaced traceline
From: David Howells @ 2019-09-02 10:34 UTC (permalink / raw)
  To: netdev; +Cc: Hillf Danton, dhowells, linux-afs, linux-kernel

There's a misplaced traceline in rxrpc_input_packet() which is looking at a
packet that just got released rather than the replacement packet.

Fix this by moving the traceline after the assignment that moves the new
packet pointer to the actual packet pointer.

Fixes: d0d5c0cd1e71 ("rxrpc: Use skb_unshare() rather than skb_cow_data()")
Reported-by: Hillf Danton <hdanton@sina.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---

 net/rxrpc/input.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index d122c53c8697..157be1ff8697 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1262,8 +1262,8 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 
 			if (nskb != skb) {
 				rxrpc_eaten_skb(skb, rxrpc_skb_received);
-				rxrpc_new_skb(skb, rxrpc_skb_unshared);
 				skb = nskb;
+				rxrpc_new_skb(skb, rxrpc_skb_unshared);
 				sp = rxrpc_skb(skb);
 			}
 		}


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox