Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v2 08/13] net: mvpp2: 1000baseX support
From: Antoine Tenart @ 2018-05-04 13:56 UTC (permalink / raw)
  To: davem, kishon, linux, gregory.clement, andrew, jason,
	sebastian.hesselbarth
  Cc: ymarkman, Antoine Tenart, netdev, linux-kernel, maxime.chevallier,
	nadavh, thomas.petazzoni, miquel.raynal, stefanc, mw,
	linux-arm-kernel
In-Reply-To: <20180504135643.23466-1-antoine.tenart@bootlin.com>

This patch adds the 1000Base-X PHY mode support in the Marvell PPv2
driver. 1000Base-X is quite close the SGMII and uses nearly the same
code path.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 drivers/net/ethernet/marvell/mvpp2.c | 73 ++++++++++++++++++++--------
 1 file changed, 52 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index da00e11dc178..4775ab56075e 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -4869,6 +4869,7 @@ static int mvpp22_gop_init(struct mvpp2_port *port)
 		mvpp22_gop_init_rgmii(port);
 		break;
 	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_1000BASEX:
 		mvpp22_gop_init_sgmii(port);
 		break;
 	case PHY_INTERFACE_MODE_10GKR:
@@ -4906,7 +4907,8 @@ static void mvpp22_gop_unmask_irq(struct mvpp2_port *port)
 	u32 val;
 
 	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-	    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
+	    port->phy_interface == PHY_INTERFACE_MODE_1000BASEX) {
 		/* Enable the GMAC link status irq for this port */
 		val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK);
 		val |= MVPP22_GMAC_INT_SUM_MASK_LINK_STAT;
@@ -4936,7 +4938,8 @@ static void mvpp22_gop_mask_irq(struct mvpp2_port *port)
 	}
 
 	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-	    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
+	    port->phy_interface == PHY_INTERFACE_MODE_1000BASEX) {
 		val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK);
 		val &= ~MVPP22_GMAC_INT_SUM_MASK_LINK_STAT;
 		writel(val, port->base + MVPP22_GMAC_INT_SUM_MASK);
@@ -4948,7 +4951,8 @@ static void mvpp22_gop_setup_irq(struct mvpp2_port *port)
 	u32 val;
 
 	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-	    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
+	    port->phy_interface == PHY_INTERFACE_MODE_1000BASEX) {
 		val = readl(port->base + MVPP22_GMAC_INT_MASK);
 		val |= MVPP22_GMAC_INT_MASK_LINK_STAT;
 		writel(val, port->base + MVPP22_GMAC_INT_MASK);
@@ -4973,6 +4977,7 @@ static int mvpp22_comphy_init(struct mvpp2_port *port)
 
 	switch (port->phy_interface) {
 	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_1000BASEX:
 		mode = PHY_MODE_SGMII;
 		break;
 	case PHY_INTERFACE_MODE_10GKR:
@@ -5052,7 +5057,8 @@ static void mvpp2_port_loopback_set(struct mvpp2_port *port,
 	else
 		val &= ~MVPP2_GMAC_GMII_LB_EN_MASK;
 
-	if (port->phy_interface == PHY_INTERFACE_MODE_SGMII)
+	if (port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
+	    port->phy_interface == PHY_INTERFACE_MODE_1000BASEX)
 		val |= MVPP2_GMAC_PCS_LB_EN_MASK;
 	else
 		val &= ~MVPP2_GMAC_PCS_LB_EN_MASK;
@@ -6262,7 +6268,8 @@ static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
 				link = true;
 		}
 	} else if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-		   port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+		   port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
+		   port->phy_interface == PHY_INTERFACE_MODE_1000BASEX) {
 		val = readl(port->base + MVPP22_GMAC_INT_STAT);
 		if (val & MVPP22_GMAC_INT_STAT_LINK) {
 			event = true;
@@ -8029,20 +8036,25 @@ static void mvpp2_phylink_validate(struct net_device *dev,
 	phylink_set(mask, Pause);
 	phylink_set(mask, Asym_Pause);
 
-	phylink_set(mask, 10baseT_Half);
-	phylink_set(mask, 10baseT_Full);
-	phylink_set(mask, 100baseT_Half);
-	phylink_set(mask, 100baseT_Full);
-	phylink_set(mask, 1000baseT_Full);
-	phylink_set(mask, 10000baseT_Full);
-
-	if (state->interface == PHY_INTERFACE_MODE_10GKR) {
+	switch (state->interface) {
+	case PHY_INTERFACE_MODE_10GKR:
 		phylink_set(mask, 10000baseCR_Full);
 		phylink_set(mask, 10000baseSR_Full);
 		phylink_set(mask, 10000baseLR_Full);
 		phylink_set(mask, 10000baseLRM_Full);
 		phylink_set(mask, 10000baseER_Full);
 		phylink_set(mask, 10000baseKR_Full);
+		/* Fall-through */
+	default:
+		phylink_set(mask, 10baseT_Half);
+		phylink_set(mask, 10baseT_Full);
+		phylink_set(mask, 100baseT_Half);
+		phylink_set(mask, 100baseT_Full);
+		phylink_set(mask, 10000baseT_Full);
+		/* Fall-through */
+	case PHY_INTERFACE_MODE_1000BASEX:
+		phylink_set(mask, 1000baseT_Full);
+		phylink_set(mask, 1000baseX_Full);
 	}
 
 	bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -8081,12 +8093,18 @@ static void mvpp2_gmac_link_state(struct mvpp2_port *port,
 	state->link = !!(val & MVPP2_GMAC_STATUS0_LINK_UP);
 	state->duplex = !!(val & MVPP2_GMAC_STATUS0_FULL_DUPLEX);
 
-	if (val & MVPP2_GMAC_STATUS0_GMII_SPEED)
+	switch (port->phy_interface) {
+	case PHY_INTERFACE_MODE_1000BASEX:
 		state->speed = SPEED_1000;
-	else if (val & MVPP2_GMAC_STATUS0_MII_SPEED)
-		state->speed = SPEED_100;
-	else
-		state->speed = SPEED_10;
+		break;
+	default:
+		if (val & MVPP2_GMAC_STATUS0_GMII_SPEED)
+			state->speed = SPEED_1000;
+		else if (val & MVPP2_GMAC_STATUS0_MII_SPEED)
+			state->speed = SPEED_100;
+		else
+			state->speed = SPEED_10;
+	}
 
 	state->pause = 0;
 	if (val & MVPP2_GMAC_STATUS0_RX_PAUSE)
@@ -8177,7 +8195,18 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
 	ctrl0 &= ~MVPP2_GMAC_PORT_TYPE_MASK;
 	ctrl2 &= ~(MVPP2_GMAC_PORT_RESET_MASK | MVPP2_GMAC_PCS_ENABLE_MASK);
 
-	an |= MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG;
+	if (state->interface == PHY_INTERFACE_MODE_1000BASEX) {
+		/* 1000BaseX port cannot negotiate speed nor can it negotiate
+		 * duplex: they are always operating with a fixed speed of
+		 * 1000Mbps in full duplex, so force 1000 speed and full duplex
+		 * here.
+		 */
+		ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK;
+		an |= MVPP2_GMAC_CONFIG_GMII_SPEED |
+		      MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+	} else {
+		an |= MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG;
+	}
 
 	if (state->duplex)
 		an |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
@@ -8186,7 +8215,8 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
 	if (phylink_test(state->advertising, Asym_Pause))
 		an |= MVPP2_GMAC_FC_ADV_ASM_EN;
 
-	if (state->interface == PHY_INTERFACE_MODE_SGMII) {
+	if (state->interface == PHY_INTERFACE_MODE_SGMII ||
+	    state->interface == PHY_INTERFACE_MODE_1000BASEX) {
 		an |= MVPP2_GMAC_IN_BAND_AUTONEG;
 		ctrl2 |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK;
 
@@ -8248,7 +8278,8 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
 	if (state->interface == PHY_INTERFACE_MODE_10GKR)
 		mvpp2_xlg_config(port, mode, state);
 	else if (phy_interface_mode_is_rgmii(state->interface) ||
-		 state->interface == PHY_INTERFACE_MODE_SGMII)
+		 state->interface == PHY_INTERFACE_MODE_SGMII ||
+		 state->interface == PHY_INTERFACE_MODE_1000BASEX)
 		mvpp2_gmac_config(port, mode, state);
 
 	if (port->priv->hw_version == MVPP21 && port->flags & MVPP2_F_LOOPBACK)
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next v2 07/13] phy: cp110-comphy: 2.5G SGMII mode
From: Antoine Tenart @ 2018-05-04 13:56 UTC (permalink / raw)
  To: davem, kishon, linux, gregory.clement, andrew, jason,
	sebastian.hesselbarth
  Cc: ymarkman, Antoine Tenart, netdev, linux-kernel, maxime.chevallier,
	nadavh, thomas.petazzoni, miquel.raynal, stefanc, mw,
	linux-arm-kernel
In-Reply-To: <20180504135643.23466-1-antoine.tenart@bootlin.com>

This patch allow the CP110 comphy to configure some lanes in the
2.5G SGMII mode. This mode is quite close to SGMII and uses nearly the
same code path.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 drivers/phy/marvell/phy-mvebu-cp110-comphy.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
index a0d522154cdf..4ef429250d7b 100644
--- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
+++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
@@ -135,19 +135,25 @@ struct mvebu_comhy_conf {
 static const struct mvebu_comhy_conf mvebu_comphy_cp110_modes[] = {
 	/* lane 0 */
 	MVEBU_COMPHY_CONF(0, 1, PHY_MODE_SGMII, 0x1),
+	MVEBU_COMPHY_CONF(0, 1, PHY_MODE_2500SGMII, 0x1),
 	/* lane 1 */
 	MVEBU_COMPHY_CONF(1, 2, PHY_MODE_SGMII, 0x1),
+	MVEBU_COMPHY_CONF(1, 2, PHY_MODE_2500SGMII, 0x1),
 	/* lane 2 */
 	MVEBU_COMPHY_CONF(2, 0, PHY_MODE_SGMII, 0x1),
+	MVEBU_COMPHY_CONF(2, 0, PHY_MODE_2500SGMII, 0x1),
 	MVEBU_COMPHY_CONF(2, 0, PHY_MODE_10GKR, 0x1),
 	/* lane 3 */
 	MVEBU_COMPHY_CONF(3, 1, PHY_MODE_SGMII, 0x2),
+	MVEBU_COMPHY_CONF(3, 1, PHY_MODE_2500SGMII, 0x2),
 	/* lane 4 */
 	MVEBU_COMPHY_CONF(4, 0, PHY_MODE_SGMII, 0x2),
+	MVEBU_COMPHY_CONF(4, 0, PHY_MODE_2500SGMII, 0x2),
 	MVEBU_COMPHY_CONF(4, 0, PHY_MODE_10GKR, 0x2),
 	MVEBU_COMPHY_CONF(4, 1, PHY_MODE_SGMII, 0x1),
 	/* lane 5 */
 	MVEBU_COMPHY_CONF(5, 2, PHY_MODE_SGMII, 0x1),
+	MVEBU_COMPHY_CONF(5, 2, PHY_MODE_2500SGMII, 0x1),
 };
 
 struct mvebu_comphy_priv {
@@ -206,6 +212,10 @@ static void mvebu_comphy_ethernet_init_reset(struct mvebu_comphy_lane *lane,
 	if (mode == PHY_MODE_10GKR)
 		val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0xe) |
 		       MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0xe);
+	else if (mode == PHY_MODE_2500SGMII)
+		val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0x8) |
+		       MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0x8) |
+		       MVEBU_COMPHY_SERDES_CFG0_HALF_BUS;
 	else if (mode == PHY_MODE_SGMII)
 		val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0x6) |
 		       MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0x6) |
@@ -296,13 +306,13 @@ static int mvebu_comphy_init_plls(struct mvebu_comphy_lane *lane,
 	return 0;
 }
 
-static int mvebu_comphy_set_mode_sgmii(struct phy *phy)
+static int mvebu_comphy_set_mode_sgmii(struct phy *phy, enum phy_mode mode)
 {
 	struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
 	struct mvebu_comphy_priv *priv = lane->priv;
 	u32 val;
 
-	mvebu_comphy_ethernet_init_reset(lane, PHY_MODE_SGMII);
+	mvebu_comphy_ethernet_init_reset(lane, mode);
 
 	val = readl(priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id));
 	val &= ~MVEBU_COMPHY_RX_CTRL1_CLK8T_EN;
@@ -487,7 +497,8 @@ static int mvebu_comphy_power_on(struct phy *phy)
 
 	switch (lane->mode) {
 	case PHY_MODE_SGMII:
-		ret = mvebu_comphy_set_mode_sgmii(phy);
+	case PHY_MODE_2500SGMII:
+		ret = mvebu_comphy_set_mode_sgmii(phy, lane->mode);
 		break;
 	case PHY_MODE_10GKR:
 		ret = mvebu_comphy_set_mode_10gkr(phy);
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next v2 06/13] phy: add 2.5G SGMII mode to the phy_mode enum
From: Antoine Tenart @ 2018-05-04 13:56 UTC (permalink / raw)
  To: davem, kishon, linux, gregory.clement, andrew, jason,
	sebastian.hesselbarth
  Cc: Antoine Tenart, netdev, linux-kernel, thomas.petazzoni,
	maxime.chevallier, miquel.raynal, nadavh, stefanc, ymarkman, mw,
	linux-arm-kernel
In-Reply-To: <20180504135643.23466-1-antoine.tenart@bootlin.com>

This patch adds one more generic PHY mode to the phy_mode enum, to allow
configuring generic PHYs to the 2.5G SGMII mode by using the set_mode
callback.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 include/linux/phy/phy.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index c9d14eeee7f5..9713aebdd348 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -36,6 +36,7 @@ enum phy_mode {
 	PHY_MODE_USB_DEVICE_SS,
 	PHY_MODE_USB_OTG,
 	PHY_MODE_SGMII,
+	PHY_MODE_2500SGMII,
 	PHY_MODE_10GKR,
 	PHY_MODE_UFS_HS_A,
 	PHY_MODE_UFS_HS_B,
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next v2 05/13] net: mvpp2: phylink support
From: Antoine Tenart @ 2018-05-04 13:56 UTC (permalink / raw)
  To: davem, kishon, linux, gregory.clement, andrew, jason,
	sebastian.hesselbarth
  Cc: Antoine Tenart, netdev, linux-kernel, thomas.petazzoni,
	maxime.chevallier, miquel.raynal, nadavh, stefanc, ymarkman, mw,
	linux-arm-kernel
In-Reply-To: <20180504135643.23466-1-antoine.tenart@bootlin.com>

Convert the PPv2 driver to implement phylink helpers, and use phylink in
DT mode. The other mode supported is ACPI, which will need further work
in order to be entirely compatible with phylink.

The MAC and GoP configuration functions were completely moved to fit
into the phylink helpers. When a PHY is always present between the MAC
and the physical port, phylink only is used, but when this is not the
case (the MAC directly is connected to the physical port) the link IRQ
is used to detect changes in the link state and call phylink_mac_change.

The ACPI mode do not uses phylink as of now, and the changes shouldn't
impact its use.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 drivers/net/ethernet/marvell/Kconfig |   1 +
 drivers/net/ethernet/marvell/mvpp2.c | 832 ++++++++++++++++-----------
 2 files changed, 497 insertions(+), 336 deletions(-)

diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index ebe5c9148935..cc2f7701e71e 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -86,6 +86,7 @@ config MVPP2
 	depends on ARCH_MVEBU || COMPILE_TEST
 	depends on HAS_DMA
 	select MVMDIO
+	select PHYLINK
 	---help---
 	  This driver supports the network interface units in the
 	  Marvell ARMADA 375, 7K and 8K SoCs.
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 77dd91e3d962..da00e11dc178 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -29,6 +29,7 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/phy/phy.h>
 #include <linux/clk.h>
 #include <linux/hrtimer.h>
@@ -359,15 +360,23 @@
 #define     MVPP2_GMAC_FORCE_LINK_PASS		BIT(1)
 #define     MVPP2_GMAC_IN_BAND_AUTONEG		BIT(2)
 #define     MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS	BIT(3)
+#define     MVPP2_GMAC_IN_BAND_RESTART_AN	BIT(4)
 #define     MVPP2_GMAC_CONFIG_MII_SPEED	BIT(5)
 #define     MVPP2_GMAC_CONFIG_GMII_SPEED	BIT(6)
 #define     MVPP2_GMAC_AN_SPEED_EN		BIT(7)
 #define     MVPP2_GMAC_FC_ADV_EN		BIT(9)
+#define     MVPP2_GMAC_FC_ADV_ASM_EN		BIT(10)
 #define     MVPP2_GMAC_FLOW_CTRL_AUTONEG	BIT(11)
 #define     MVPP2_GMAC_CONFIG_FULL_DUPLEX	BIT(12)
 #define     MVPP2_GMAC_AN_DUPLEX_EN		BIT(13)
 #define MVPP2_GMAC_STATUS0			0x10
 #define     MVPP2_GMAC_STATUS0_LINK_UP		BIT(0)
+#define     MVPP2_GMAC_STATUS0_GMII_SPEED	BIT(1)
+#define     MVPP2_GMAC_STATUS0_MII_SPEED	BIT(2)
+#define     MVPP2_GMAC_STATUS0_FULL_DUPLEX	BIT(3)
+#define     MVPP2_GMAC_STATUS0_RX_PAUSE		BIT(6)
+#define     MVPP2_GMAC_STATUS0_TX_PAUSE		BIT(7)
+#define     MVPP2_GMAC_STATUS0_AN_COMPLETE	BIT(11)
 #define MVPP2_GMAC_PORT_FIFO_CFG_1_REG		0x1c
 #define     MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS	6
 #define     MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK	0x1fc0
@@ -379,6 +388,8 @@
 #define     MVPP22_GMAC_INT_MASK_LINK_STAT	BIT(1)
 #define MVPP22_GMAC_CTRL_4_REG			0x90
 #define     MVPP22_CTRL4_EXT_PIN_GMII_SEL	BIT(0)
+#define     MVPP22_CTRL4_RX_FC_EN		BIT(3)
+#define     MVPP22_CTRL4_TX_FC_EN		BIT(4)
 #define     MVPP22_CTRL4_DP_CLK_SEL		BIT(5)
 #define     MVPP22_CTRL4_SYNC_BYPASS_DIS	BIT(6)
 #define     MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE	BIT(7)
@@ -392,6 +403,7 @@
 #define     MVPP22_XLG_CTRL0_PORT_EN		BIT(0)
 #define     MVPP22_XLG_CTRL0_MAC_RESET_DIS	BIT(1)
 #define     MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN	BIT(7)
+#define     MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN	BIT(8)
 #define     MVPP22_XLG_CTRL0_MIB_CNT_DIS	BIT(14)
 #define MVPP22_XLG_CTRL1_REG			0x104
 #define     MVPP22_XLG_CTRL1_FRAMESIZELIMIT_OFFS	0
@@ -1017,6 +1029,9 @@ struct mvpp2_port {
 	/* Firmware node associated to the port */
 	struct fwnode_handle *fwnode;
 
+	/* Is a PHY always connected to the port */
+	bool has_phy;
+
 	/* Per-port registers' base address */
 	void __iomem *base;
 	void __iomem *stats_base;
@@ -1044,12 +1059,11 @@ struct mvpp2_port {
 	struct mutex gather_stats_lock;
 	struct delayed_work stats_work;
 
+	struct device_node *of_node;
+
 	phy_interface_t phy_interface;
-	struct device_node *phy_node;
+	struct phylink *phylink;
 	struct phy *comphy;
-	unsigned int link;
-	unsigned int duplex;
-	unsigned int speed;
 
 	struct mvpp2_bm_pool *pool_long;
 	struct mvpp2_bm_pool *pool_short;
@@ -1338,6 +1352,12 @@ struct mvpp2_bm_pool {
 	 (addr) < (txq_pcpu)->tso_headers_dma + \
 	 (txq_pcpu)->size * TSO_HEADER_SIZE)
 
+/* The prototype is added here to be used in start_dev when using ACPI. This
+ * will be removed once phylink is used for all modes (dt+ACPI).
+ */
+static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+			     const struct phylink_link_state *state);
+
 /* Queue modes */
 #define MVPP2_QDIST_SINGLE_MODE	0
 #define MVPP2_QDIST_MULTI_MODE	1
@@ -4969,133 +4989,6 @@ static int mvpp22_comphy_init(struct mvpp2_port *port)
 	return phy_power_on(port->comphy);
 }
 
-static void mvpp2_port_mii_gmac_configure_mode(struct mvpp2_port *port)
-{
-	u32 val;
-
-	if (port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
-		val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
-		val |= MVPP22_CTRL4_SYNC_BYPASS_DIS | MVPP22_CTRL4_DP_CLK_SEL |
-		       MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
-		val &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
-		writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
-	} else if (phy_interface_mode_is_rgmii(port->phy_interface)) {
-		val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
-		val |= MVPP22_CTRL4_EXT_PIN_GMII_SEL |
-		       MVPP22_CTRL4_SYNC_BYPASS_DIS |
-		       MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
-		val &= ~MVPP22_CTRL4_DP_CLK_SEL;
-		writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
-	}
-
-	/* The port is connected to a copper PHY */
-	val = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
-	val &= ~MVPP2_GMAC_PORT_TYPE_MASK;
-	writel(val, port->base + MVPP2_GMAC_CTRL_0_REG);
-
-	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-	val |= MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS |
-	       MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG |
-	       MVPP2_GMAC_AN_DUPLEX_EN;
-	if (port->phy_interface == PHY_INTERFACE_MODE_SGMII)
-		val |= MVPP2_GMAC_IN_BAND_AUTONEG;
-	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-}
-
-static void mvpp2_port_mii_gmac_configure(struct mvpp2_port *port)
-{
-	u32 val;
-
-	/* Force link down */
-	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-	val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
-	val |= MVPP2_GMAC_FORCE_LINK_DOWN;
-	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-
-	/* Set the GMAC in a reset state */
-	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
-	val |= MVPP2_GMAC_PORT_RESET_MASK;
-	writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
-
-	/* Configure the PCS and in-band AN */
-	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
-	if (port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
-	        val |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK;
-	} else if (phy_interface_mode_is_rgmii(port->phy_interface)) {
-		val &= ~MVPP2_GMAC_PCS_ENABLE_MASK;
-	}
-	writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
-
-	mvpp2_port_mii_gmac_configure_mode(port);
-
-	/* Unset the GMAC reset state */
-	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
-	val &= ~MVPP2_GMAC_PORT_RESET_MASK;
-	writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
-
-	/* Stop forcing link down */
-	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-	val &= ~MVPP2_GMAC_FORCE_LINK_DOWN;
-	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-}
-
-static void mvpp2_port_mii_xlg_configure(struct mvpp2_port *port)
-{
-	u32 val;
-
-	if (port->gop_id != 0)
-		return;
-
-	val = readl(port->base + MVPP22_XLG_CTRL0_REG);
-	val |= MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN;
-	writel(val, port->base + MVPP22_XLG_CTRL0_REG);
-
-	val = readl(port->base + MVPP22_XLG_CTRL4_REG);
-	val &= ~MVPP22_XLG_CTRL4_MACMODSELECT_GMAC;
-	val |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC;
-	writel(val, port->base + MVPP22_XLG_CTRL4_REG);
-}
-
-static void mvpp22_port_mii_set(struct mvpp2_port *port)
-{
-	u32 val;
-
-	/* Only GOP port 0 has an XLG MAC */
-	if (port->gop_id == 0) {
-		val = readl(port->base + MVPP22_XLG_CTRL3_REG);
-		val &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
-
-		if (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
-		    port->phy_interface == PHY_INTERFACE_MODE_10GKR)
-			val |= MVPP22_XLG_CTRL3_MACMODESELECT_10G;
-		else
-			val |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC;
-
-		writel(val, port->base + MVPP22_XLG_CTRL3_REG);
-	}
-}
-
-static void mvpp2_port_mii_set(struct mvpp2_port *port)
-{
-	if (port->priv->hw_version == MVPP22)
-		mvpp22_port_mii_set(port);
-
-	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-	    port->phy_interface == PHY_INTERFACE_MODE_SGMII)
-		mvpp2_port_mii_gmac_configure(port);
-	else if (port->phy_interface == PHY_INTERFACE_MODE_10GKR)
-		mvpp2_port_mii_xlg_configure(port);
-}
-
-static void mvpp2_port_fc_adv_enable(struct mvpp2_port *port)
-{
-	u32 val;
-
-	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-	val |= MVPP2_GMAC_FC_ADV_EN;
-	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-}
-
 static void mvpp2_port_enable(struct mvpp2_port *port)
 {
 	u32 val;
@@ -5147,13 +5040,14 @@ static void mvpp2_port_periodic_xon_disable(struct mvpp2_port *port)
 }
 
 /* Configure loopback port */
-static void mvpp2_port_loopback_set(struct mvpp2_port *port)
+static void mvpp2_port_loopback_set(struct mvpp2_port *port,
+				    const struct phylink_link_state *state)
 {
 	u32 val;
 
 	val = readl(port->base + MVPP2_GMAC_CTRL_1_REG);
 
-	if (port->speed == 1000)
+	if (state->speed == 1000)
 		val |= MVPP2_GMAC_GMII_LB_EN_MASK;
 	else
 		val &= ~MVPP2_GMAC_GMII_LB_EN_MASK;
@@ -5331,10 +5225,6 @@ static void mvpp2_defaults_set(struct mvpp2_port *port)
 	int tx_port_num, val, queue, ptxq, lrxq;
 
 	if (port->priv->hw_version == MVPP21) {
-		/* Configure port to loopback if needed */
-		if (port->flags & MVPP2_F_LOOPBACK)
-			mvpp2_port_loopback_set(port);
-
 		/* Update TX FIFO MIN Threshold */
 		val = readl(port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
 		val &= ~MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK;
@@ -6382,6 +6272,11 @@ static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
 		}
 	}
 
+	if (port->phylink) {
+		phylink_mac_change(port->phylink, link);
+		goto handled;
+	}
+
 	if (!netif_running(dev) || !event)
 		goto handled;
 
@@ -6406,111 +6301,6 @@ static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void mvpp2_gmac_set_autoneg(struct mvpp2_port *port,
-				   struct phy_device *phydev)
-{
-	u32 val;
-
-	if (port->phy_interface != PHY_INTERFACE_MODE_RGMII &&
-	    port->phy_interface != PHY_INTERFACE_MODE_RGMII_ID &&
-	    port->phy_interface != PHY_INTERFACE_MODE_RGMII_RXID &&
-	    port->phy_interface != PHY_INTERFACE_MODE_RGMII_TXID &&
-	    port->phy_interface != PHY_INTERFACE_MODE_SGMII)
-		return;
-
-	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-	val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
-		 MVPP2_GMAC_CONFIG_GMII_SPEED |
-		 MVPP2_GMAC_CONFIG_FULL_DUPLEX |
-		 MVPP2_GMAC_AN_SPEED_EN |
-		 MVPP2_GMAC_AN_DUPLEX_EN);
-
-	if (phydev->duplex)
-		val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
-
-	if (phydev->speed == SPEED_1000)
-		val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
-	else if (phydev->speed == SPEED_100)
-		val |= MVPP2_GMAC_CONFIG_MII_SPEED;
-
-	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-}
-
-/* Adjust link */
-static void mvpp2_link_event(struct net_device *dev)
-{
-	struct mvpp2_port *port = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-	bool link_reconfigured = false;
-	u32 val;
-
-	if (phydev->link) {
-		if (port->phy_interface != phydev->interface && port->comphy) {
-	                /* disable current port for reconfiguration */
-	                mvpp2_interrupts_disable(port);
-	                netif_carrier_off(port->dev);
-	                mvpp2_port_disable(port);
-			phy_power_off(port->comphy);
-
-	                /* comphy reconfiguration */
-	                port->phy_interface = phydev->interface;
-	                mvpp22_comphy_init(port);
-
-	                /* gop/mac reconfiguration */
-	                mvpp22_gop_init(port);
-	                mvpp2_port_mii_set(port);
-
-	                link_reconfigured = true;
-		}
-
-		if ((port->speed != phydev->speed) ||
-		    (port->duplex != phydev->duplex)) {
-			mvpp2_gmac_set_autoneg(port, phydev);
-
-			port->duplex = phydev->duplex;
-			port->speed  = phydev->speed;
-		}
-	}
-
-	if (phydev->link != port->link || link_reconfigured) {
-		port->link = phydev->link;
-
-		if (phydev->link) {
-			if (port->phy_interface == PHY_INTERFACE_MODE_RGMII ||
-			    port->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
-			    port->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID ||
-			    port->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID ||
-			    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
-				val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-				val |= (MVPP2_GMAC_FORCE_LINK_PASS |
-					MVPP2_GMAC_FORCE_LINK_DOWN);
-				writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-			}
-
-			mvpp2_interrupts_enable(port);
-			mvpp2_port_enable(port);
-
-			mvpp2_egress_enable(port);
-			mvpp2_ingress_enable(port);
-			netif_carrier_on(dev);
-			netif_tx_wake_all_queues(dev);
-		} else {
-			port->duplex = -1;
-			port->speed = 0;
-
-			netif_tx_stop_all_queues(dev);
-			netif_carrier_off(dev);
-			mvpp2_ingress_disable(port);
-			mvpp2_egress_disable(port);
-
-			mvpp2_port_disable(port);
-			mvpp2_interrupts_disable(port);
-		}
-
-		phy_print_status(phydev);
-	}
-}
-
 static void mvpp2_timer_set(struct mvpp2_port_pcpu *port_pcpu)
 {
 	ktime_t interval;
@@ -7118,11 +6908,29 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 	return rx_done;
 }
 
-/* Set hw internals when starting port */
-static void mvpp2_start_dev(struct mvpp2_port *port)
+static void mvpp22_mode_reconfigure(struct mvpp2_port *port)
 {
-	struct net_device *ndev = port->dev;
-	int i;
+	u32 ctrl3;
+
+	/* comphy reconfiguration */
+	mvpp22_comphy_init(port);
+
+	/* gop reconfiguration */
+	mvpp22_gop_init(port);
+
+	/* Only GOP port 0 has an XLG MAC */
+	if (port->gop_id == 0) {
+		ctrl3 = readl(port->base + MVPP22_XLG_CTRL3_REG);
+		ctrl3 &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
+
+		if (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
+		    port->phy_interface == PHY_INTERFACE_MODE_10GKR)
+			ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_10G;
+		else
+			ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC;
+
+		writel(ctrl3, port->base + MVPP22_XLG_CTRL3_REG);
+	}
 
 	if (port->gop_id == 0 &&
 	    (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
@@ -7130,6 +6938,12 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 		mvpp2_xlg_max_rx_size_set(port);
 	else
 		mvpp2_gmac_max_rx_size_set(port);
+}
+
+/* Set hw internals when starting port */
+static void mvpp2_start_dev(struct mvpp2_port *port)
+{
+	int i;
 
 	mvpp2_txp_max_tx_size_set(port);
 
@@ -7139,42 +6953,39 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 	/* Enable interrupts on all CPUs */
 	mvpp2_interrupts_enable(port);
 
-	if (port->priv->hw_version == MVPP22) {
-		mvpp22_comphy_init(port);
-		mvpp22_gop_init(port);
+	if (port->priv->hw_version == MVPP22)
+		mvpp22_mode_reconfigure(port);
+
+	if (port->phylink) {
+		phylink_start(port->phylink);
+	} else {
+		/* Phylink isn't used as of now for ACPI, so the MAC has to be
+		 * configured manually when the interface is started. This will
+		 * be removed as soon as the phylink ACPI support lands in.
+		 */
+		struct phylink_link_state state = {
+			.interface = port->phy_interface,
+			.link = 1,
+		};
+		mvpp2_mac_config(port->dev, MLO_AN_INBAND, &state);
 	}
 
-	mvpp2_port_mii_set(port);
-	mvpp2_port_enable(port);
-	if (ndev->phydev)
-		phy_start(ndev->phydev);
 	netif_tx_start_all_queues(port->dev);
 }
 
 /* Set hw internals when stopping port */
 static void mvpp2_stop_dev(struct mvpp2_port *port)
 {
-	struct net_device *ndev = port->dev;
 	int i;
 
-	/* Stop new packets from arriving to RXQs */
-	mvpp2_ingress_disable(port);
-
-	mdelay(10);
-
 	/* Disable interrupts on all CPUs */
 	mvpp2_interrupts_disable(port);
 
 	for (i = 0; i < port->nqvecs; i++)
 		napi_disable(&port->qvecs[i].napi);
 
-	netif_carrier_off(port->dev);
-	netif_tx_stop_all_queues(port->dev);
-
-	mvpp2_egress_disable(port);
-	mvpp2_port_disable(port);
-	if (ndev->phydev)
-		phy_stop(ndev->phydev);
+	if (port->phylink)
+		phylink_stop(port->phylink);
 	phy_power_off(port->comphy);
 }
 
@@ -7233,40 +7044,6 @@ static void mvpp21_get_mac_address(struct mvpp2_port *port, unsigned char *addr)
 	addr[5] = (mac_addr_l >> MVPP2_GMAC_SA_LOW_OFFS) & 0xFF;
 }
 
-static int mvpp2_phy_connect(struct mvpp2_port *port)
-{
-	struct phy_device *phy_dev;
-
-	/* No PHY is attached */
-	if (!port->phy_node)
-		return 0;
-
-	phy_dev = of_phy_connect(port->dev, port->phy_node, mvpp2_link_event, 0,
-				 port->phy_interface);
-	if (!phy_dev) {
-		netdev_err(port->dev, "cannot connect to phy\n");
-		return -ENODEV;
-	}
-	phy_dev->supported &= PHY_GBIT_FEATURES;
-	phy_dev->advertising = phy_dev->supported;
-
-	port->link    = 0;
-	port->duplex  = 0;
-	port->speed   = 0;
-
-	return 0;
-}
-
-static void mvpp2_phy_disconnect(struct mvpp2_port *port)
-{
-	struct net_device *ndev = port->dev;
-
-	if (!ndev->phydev)
-		return;
-
-	phy_disconnect(ndev->phydev);
-}
-
 static int mvpp2_irqs_init(struct mvpp2_port *port)
 {
 	int err, i;
@@ -7350,6 +7127,7 @@ static int mvpp2_open(struct net_device *dev)
 	struct mvpp2 *priv = port->priv;
 	unsigned char mac_bcast[ETH_ALEN] = {
 			0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	bool valid = false;
 	int err;
 
 	err = mvpp2_prs_mac_da_accept(port, mac_bcast, true);
@@ -7392,7 +7170,20 @@ static int mvpp2_open(struct net_device *dev)
 		goto err_cleanup_txqs;
 	}
 
-	if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq) {
+	/* Phylink isn't supported yet in ACPI mode */
+	if (port->of_node) {
+		err = phylink_of_phy_connect(port->phylink, port->of_node, 0);
+		if (err) {
+			netdev_err(port->dev, "could not attach PHY (%d)\n",
+				   err);
+			goto err_free_irq;
+		}
+
+		valid = true;
+	}
+
+	if (priv->hw_version == MVPP22 && port->link_irq &&
+	    (!port->phylink || !port->has_phy)) {
 		err = request_irq(port->link_irq, mvpp2_link_status_isr, 0,
 				  dev->name, port);
 		if (err) {
@@ -7402,14 +7193,20 @@ static int mvpp2_open(struct net_device *dev)
 		}
 
 		mvpp22_gop_setup_irq(port);
-	}
 
-	/* In default link is down */
-	netif_carrier_off(port->dev);
+		/* In default link is down */
+		netif_carrier_off(port->dev);
 
-	err = mvpp2_phy_connect(port);
-	if (err < 0)
-		goto err_free_link_irq;
+		valid = true;
+	} else {
+		port->link_irq = 0;
+	}
+
+	if (!valid) {
+		netdev_err(port->dev,
+			   "invalid configuration: no dt or link IRQ");
+		goto err_free_irq;
+	}
 
 	/* Unmask interrupts on all CPUs */
 	on_each_cpu(mvpp2_interrupts_unmask, port, 1);
@@ -7426,9 +7223,6 @@ static int mvpp2_open(struct net_device *dev)
 
 	return 0;
 
-err_free_link_irq:
-	if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq)
-		free_irq(port->link_irq, port);
 err_free_irq:
 	mvpp2_irqs_deinit(port);
 err_cleanup_txqs:
@@ -7442,17 +7236,17 @@ static int mvpp2_stop(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
 	struct mvpp2_port_pcpu *port_pcpu;
-	struct mvpp2 *priv = port->priv;
 	int cpu;
 
 	mvpp2_stop_dev(port);
-	mvpp2_phy_disconnect(port);
 
 	/* Mask interrupts on all CPUs */
 	on_each_cpu(mvpp2_interrupts_mask, port, 1);
 	mvpp2_shared_interrupt_mask_unmask(port, true);
 
-	if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq)
+	if (port->phylink)
+		phylink_disconnect_phy(port->phylink);
+	if (port->link_irq)
 		free_irq(port->link_irq, port);
 
 	mvpp2_irqs_deinit(port);
@@ -7658,16 +7452,12 @@ mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 
 static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	int ret;
+	struct mvpp2_port *port = netdev_priv(dev);
 
-	if (!dev->phydev)
+	if (!port->phylink)
 		return -ENOTSUPP;
 
-	ret = phy_mii_ioctl(dev->phydev, ifr, cmd);
-	if (!ret)
-		mvpp2_link_event(dev);
-
-	return ret;
+	return phylink_mii_ioctl(port->phylink, ifr, cmd);
 }
 
 static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
@@ -7714,6 +7504,16 @@ static int mvpp2_set_features(struct net_device *dev,
 
 /* Ethtool methods */
 
+static int mvpp2_ethtool_nway_reset(struct net_device *dev)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phylink)
+		return -ENOTSUPP;
+
+	return phylink_ethtool_nway_reset(port->phylink);
+}
+
 /* Set interrupt coalescing for ethtools */
 static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
 				      struct ethtool_coalesce *c)
@@ -7842,6 +7642,50 @@ static int mvpp2_ethtool_set_ringparam(struct net_device *dev,
 	return err;
 }
 
+static void mvpp2_ethtool_get_pause_param(struct net_device *dev,
+					  struct ethtool_pauseparam *pause)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phylink)
+		return;
+
+	phylink_ethtool_get_pauseparam(port->phylink, pause);
+}
+
+static int mvpp2_ethtool_set_pause_param(struct net_device *dev,
+					 struct ethtool_pauseparam *pause)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phylink)
+		return -ENOTSUPP;
+
+	return phylink_ethtool_set_pauseparam(port->phylink, pause);
+}
+
+static int mvpp2_ethtool_get_link_ksettings(struct net_device *dev,
+					    struct ethtool_link_ksettings *cmd)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phylink)
+		return -ENOTSUPP;
+
+	return phylink_ethtool_ksettings_get(port->phylink, cmd);
+}
+
+static int mvpp2_ethtool_set_link_ksettings(struct net_device *dev,
+					    const struct ethtool_link_ksettings *cmd)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phylink)
+		return -ENOTSUPP;
+
+	return phylink_ethtool_ksettings_set(port->phylink, cmd);
+}
+
 /* Device ops */
 
 static const struct net_device_ops mvpp2_netdev_ops = {
@@ -7859,7 +7703,7 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
-	.nway_reset		= phy_ethtool_nway_reset,
+	.nway_reset		= mvpp2_ethtool_nway_reset,
 	.get_link		= ethtool_op_get_link,
 	.set_coalesce		= mvpp2_ethtool_set_coalesce,
 	.get_coalesce		= mvpp2_ethtool_get_coalesce,
@@ -7869,8 +7713,10 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
 	.get_strings		= mvpp2_ethtool_get_strings,
 	.get_ethtool_stats	= mvpp2_ethtool_get_stats,
 	.get_sset_count		= mvpp2_ethtool_get_sset_count,
-	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
-	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
+	.get_pauseparam		= mvpp2_ethtool_get_pause_param,
+	.set_pauseparam		= mvpp2_ethtool_set_pause_param,
+	.get_link_ksettings	= mvpp2_ethtool_get_link_ksettings,
+	.set_link_ksettings	= mvpp2_ethtool_set_link_ksettings,
 };
 
 /* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
@@ -8172,18 +8018,323 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
 	eth_hw_addr_random(dev);
 }
 
+static void mvpp2_phylink_validate(struct net_device *dev,
+				   unsigned long *supported,
+				   struct phylink_link_state *state)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+	phylink_set(mask, Autoneg);
+	phylink_set_port_modes(mask);
+	phylink_set(mask, Pause);
+	phylink_set(mask, Asym_Pause);
+
+	phylink_set(mask, 10baseT_Half);
+	phylink_set(mask, 10baseT_Full);
+	phylink_set(mask, 100baseT_Half);
+	phylink_set(mask, 100baseT_Full);
+	phylink_set(mask, 1000baseT_Full);
+	phylink_set(mask, 10000baseT_Full);
+
+	if (state->interface == PHY_INTERFACE_MODE_10GKR) {
+		phylink_set(mask, 10000baseCR_Full);
+		phylink_set(mask, 10000baseSR_Full);
+		phylink_set(mask, 10000baseLR_Full);
+		phylink_set(mask, 10000baseLRM_Full);
+		phylink_set(mask, 10000baseER_Full);
+		phylink_set(mask, 10000baseKR_Full);
+	}
+
+	bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_and(state->advertising, state->advertising, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void mvpp22_xlg_link_state(struct mvpp2_port *port,
+				  struct phylink_link_state *state)
+{
+	u32 val;
+
+	state->speed = SPEED_10000;
+	state->duplex = 1;
+	state->an_complete = 1;
+
+	val = readl(port->base + MVPP22_XLG_STATUS);
+	state->link = !!(val & MVPP22_XLG_STATUS_LINK_UP);
+
+	state->pause = 0;
+	val = readl(port->base + MVPP22_XLG_CTRL0_REG);
+	if (val & MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN)
+		state->pause |= MLO_PAUSE_TX;
+	if (val & MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN)
+		state->pause |= MLO_PAUSE_RX;
+}
+
+static void mvpp2_gmac_link_state(struct mvpp2_port *port,
+				  struct phylink_link_state *state)
+{
+	u32 val;
+
+	val = readl(port->base + MVPP2_GMAC_STATUS0);
+
+	state->an_complete = !!(val & MVPP2_GMAC_STATUS0_AN_COMPLETE);
+	state->link = !!(val & MVPP2_GMAC_STATUS0_LINK_UP);
+	state->duplex = !!(val & MVPP2_GMAC_STATUS0_FULL_DUPLEX);
+
+	if (val & MVPP2_GMAC_STATUS0_GMII_SPEED)
+		state->speed = SPEED_1000;
+	else if (val & MVPP2_GMAC_STATUS0_MII_SPEED)
+		state->speed = SPEED_100;
+	else
+		state->speed = SPEED_10;
+
+	state->pause = 0;
+	if (val & MVPP2_GMAC_STATUS0_RX_PAUSE)
+		state->pause |= MLO_PAUSE_RX;
+	if (val & MVPP2_GMAC_STATUS0_TX_PAUSE)
+		state->pause |= MLO_PAUSE_TX;
+}
+
+static int mvpp2_phylink_mac_link_state(struct net_device *dev,
+					struct phylink_link_state *state)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (port->priv->hw_version == MVPP22 && port->gop_id == 0) {
+		u32 mode = readl(port->base + MVPP22_XLG_CTRL3_REG);
+		mode &= MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
+
+		if (mode == MVPP22_XLG_CTRL3_MACMODESELECT_10G) {
+			mvpp22_xlg_link_state(port, state);
+			return 1;
+		}
+	}
+
+	mvpp2_gmac_link_state(port, state);
+	return 1;
+}
+
+static void mvpp2_mac_an_restart(struct net_device *dev)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	u32 val;
+
+	if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
+		return;
+
+	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	/* The RESTART_AN bit is cleared by the h/w after restarting the AN
+	 * process.
+	 */
+	val |= MVPP2_GMAC_IN_BAND_RESTART_AN | MVPP2_GMAC_IN_BAND_AUTONEG;
+	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+}
+
+static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode,
+			     const struct phylink_link_state *state)
+{
+	u32 ctrl0, ctrl4;
+
+	ctrl0 = readl(port->base + MVPP22_XLG_CTRL0_REG);
+	ctrl4 = readl(port->base + MVPP22_XLG_CTRL4_REG);
+
+	if (state->pause & MLO_PAUSE_TX)
+		ctrl0 |= MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN;
+	if (state->pause & MLO_PAUSE_RX)
+		ctrl0 |= MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN;
+
+	ctrl4 &= ~MVPP22_XLG_CTRL4_MACMODSELECT_GMAC;
+	ctrl4 |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC;
+
+	writel(ctrl0, port->base + MVPP22_XLG_CTRL0_REG);
+	writel(ctrl4, port->base + MVPP22_XLG_CTRL4_REG);
+}
+
+static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
+			      const struct phylink_link_state *state)
+{
+	u32 an, ctrl0, ctrl2, ctrl4;
+
+	an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
+	ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+	ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
+
+	/* Force link down */
+	an &= ~MVPP2_GMAC_FORCE_LINK_PASS;
+	an |= MVPP2_GMAC_FORCE_LINK_DOWN;
+	writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+	/* Set the GMAC in a reset state */
+	ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK;
+	writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
+
+	an &= ~(MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED |
+		MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FC_ADV_EN |
+		MVPP2_GMAC_FC_ADV_ASM_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG |
+		MVPP2_GMAC_CONFIG_FULL_DUPLEX | MVPP2_GMAC_AN_DUPLEX_EN |
+		MVPP2_GMAC_FORCE_LINK_DOWN);
+	ctrl0 &= ~MVPP2_GMAC_PORT_TYPE_MASK;
+	ctrl2 &= ~(MVPP2_GMAC_PORT_RESET_MASK | MVPP2_GMAC_PCS_ENABLE_MASK);
+
+	an |= MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG;
+
+	if (state->duplex)
+		an |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+	if (phylink_test(state->advertising, Pause))
+		an |= MVPP2_GMAC_FC_ADV_EN;
+	if (phylink_test(state->advertising, Asym_Pause))
+		an |= MVPP2_GMAC_FC_ADV_ASM_EN;
+
+	if (state->interface == PHY_INTERFACE_MODE_SGMII) {
+		an |= MVPP2_GMAC_IN_BAND_AUTONEG;
+		ctrl2 |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK;
+
+		ctrl4 &= ~(MVPP22_CTRL4_EXT_PIN_GMII_SEL |
+			   MVPP22_CTRL4_RX_FC_EN | MVPP22_CTRL4_TX_FC_EN);
+		ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS |
+			 MVPP22_CTRL4_DP_CLK_SEL |
+			 MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+
+		if (state->pause & MLO_PAUSE_TX)
+			ctrl4 |= MVPP22_CTRL4_TX_FC_EN;
+		if (state->pause & MLO_PAUSE_RX)
+			ctrl4 |= MVPP22_CTRL4_RX_FC_EN;
+	} else if (phy_interface_mode_is_rgmii(state->interface)) {
+		an |= MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS;
+
+		if (state->speed == SPEED_1000)
+			an |= MVPP2_GMAC_STATUS0_GMII_SPEED;
+		else if (state->speed == SPEED_100)
+			an |= MVPP2_GMAC_STATUS0_MII_SPEED;
+
+		ctrl4 &= ~MVPP22_CTRL4_DP_CLK_SEL;
+		ctrl4 |= MVPP22_CTRL4_EXT_PIN_GMII_SEL |
+			 MVPP22_CTRL4_SYNC_BYPASS_DIS |
+			 MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+	}
+
+	writel(ctrl0, port->base + MVPP2_GMAC_CTRL_0_REG);
+	writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
+	writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG);
+	writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+}
+
+static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+			     const struct phylink_link_state *state)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	/* Check for invalid configuration */
+	if (state->interface == PHY_INTERFACE_MODE_10GKR && port->gop_id != 0) {
+		netdev_err(dev, "Invalid mode on %s\n", dev->name);
+		return;
+	}
+
+	/* Make sure the port is disabled when reconfiguring the mode */
+	netif_carrier_off(port->dev);
+	mvpp2_port_disable(port);
+
+	if (port->priv->hw_version == MVPP22 &&
+	    port->phy_interface != state->interface) {
+		port->phy_interface = state->interface;
+
+		/* Reconfigure the serdes lanes */
+		phy_power_off(port->comphy);
+		mvpp22_mode_reconfigure(port);
+	}
+
+	/* mac (re)configuration */
+	if (state->interface == PHY_INTERFACE_MODE_10GKR)
+		mvpp2_xlg_config(port, mode, state);
+	else if (phy_interface_mode_is_rgmii(state->interface) ||
+		 state->interface == PHY_INTERFACE_MODE_SGMII)
+		mvpp2_gmac_config(port, mode, state);
+
+	if (port->priv->hw_version == MVPP21 && port->flags & MVPP2_F_LOOPBACK)
+		mvpp2_port_loopback_set(port, state);
+
+	/* If the port already was up, make sure it's still in the same state */
+	if (state->link || !port->has_phy) {
+		mvpp2_port_enable(port);
+
+		mvpp2_egress_enable(port);
+		mvpp2_ingress_enable(port);
+		netif_carrier_on(dev);
+		netif_tx_wake_all_queues(dev);
+	}
+}
+
+static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+			      phy_interface_t interface, struct phy_device *phy)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	u32 val;
+
+	if (!phylink_autoneg_inband(mode) &&
+	    interface != PHY_INTERFACE_MODE_10GKR) {
+		val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+		val &= ~MVPP2_GMAC_FORCE_LINK_DOWN;
+		val |= MVPP2_GMAC_FORCE_LINK_PASS;
+		writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	}
+
+	mvpp2_port_enable(port);
+
+	mvpp2_egress_enable(port);
+	mvpp2_ingress_enable(port);
+	netif_tx_wake_all_queues(dev);
+}
+
+static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode,
+				phy_interface_t interface)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	u32 val;
+
+	if (!phylink_autoneg_inband(mode) &&
+	    interface != PHY_INTERFACE_MODE_10GKR) {
+		val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+		val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
+		val |= MVPP2_GMAC_FORCE_LINK_DOWN;
+		writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	}
+
+	netif_tx_stop_all_queues(dev);
+	mvpp2_egress_disable(port);
+	mvpp2_ingress_disable(port);
+
+	/* When using link interrupts to notify phylink of a MAC state change,
+	 * we do not want the port to be disabled (we want to receive further
+	 * interrupts, to be notified when the port will have a link later).
+	 */
+	if (!port->has_phy)
+		return;
+
+	mvpp2_port_disable(port);
+}
+
+static const struct phylink_mac_ops mvpp2_phylink_ops = {
+	.validate = mvpp2_phylink_validate,
+	.mac_link_state = mvpp2_phylink_mac_link_state,
+	.mac_an_restart = mvpp2_mac_an_restart,
+	.mac_config = mvpp2_mac_config,
+	.mac_link_up = mvpp2_mac_link_up,
+	.mac_link_down = mvpp2_mac_link_down,
+};
+
 /* Ports initialization */
 static int mvpp2_port_probe(struct platform_device *pdev,
 			    struct fwnode_handle *port_fwnode,
 			    struct mvpp2 *priv)
 {
-	struct device_node *phy_node;
 	struct phy *comphy = NULL;
 	struct mvpp2_port *port;
 	struct mvpp2_port_pcpu *port_pcpu;
 	struct device_node *port_node = to_of_node(port_fwnode);
 	struct net_device *dev;
 	struct resource *res;
+	struct phylink *phylink;
 	char *mac_from = "";
 	unsigned int ntxqs, nrxqs;
 	bool has_tx_irqs;
@@ -8212,11 +8363,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	if (!dev)
 		return -ENOMEM;
 
-	if (port_node)
-		phy_node = of_parse_phandle(port_node, "phy", 0);
-	else
-		phy_node = NULL;
-
 	phy_mode = fwnode_get_phy_mode(port_fwnode);
 	if (phy_mode < 0) {
 		dev_err(&pdev->dev, "incorrect phy mode\n");
@@ -8249,6 +8395,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	port = netdev_priv(dev);
 	port->dev = dev;
 	port->fwnode = port_fwnode;
+	port->has_phy = !!of_find_property(port_node, "phy", NULL);
 	port->ntxqs = ntxqs;
 	port->nrxqs = nrxqs;
 	port->priv = priv;
@@ -8279,7 +8426,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	else
 		port->first_rxq = port->id * priv->max_port_rxqs;
 
-	port->phy_node = phy_node;
+	port->of_node = port_node;
 	port->phy_interface = phy_mode;
 	port->comphy = comphy;
 
@@ -8340,9 +8487,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
 	mvpp2_port_periodic_xon_disable(port);
 
-	if (priv->hw_version == MVPP21)
-		mvpp2_port_fc_adv_enable(port);
-
 	mvpp2_port_reset(port);
 
 	port->pcpu = alloc_percpu(struct mvpp2_port_pcpu);
@@ -8386,10 +8530,23 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	/* 9704 == 9728 - 20 and rounding to 8 */
 	dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE;
 
+	/* Phylink isn't used w/ ACPI as of now */
+	if (port_node) {
+		phylink = phylink_create(dev, port_fwnode, phy_mode,
+					 &mvpp2_phylink_ops);
+		if (IS_ERR(phylink)) {
+			err = PTR_ERR(phylink);
+			goto err_free_port_pcpu;
+		}
+		port->phylink = phylink;
+	} else {
+		port->phylink = NULL;
+	}
+
 	err = register_netdev(dev);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register netdev\n");
-		goto err_free_port_pcpu;
+		goto err_phylink;
 	}
 	netdev_info(dev, "Using %s mac address %pM\n", mac_from, dev->dev_addr);
 
@@ -8397,6 +8554,9 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
 	return 0;
 
+err_phylink:
+	if (port->phylink)
+		phylink_destroy(port->phylink);
 err_free_port_pcpu:
 	free_percpu(port->pcpu);
 err_free_txq_pcpu:
@@ -8410,7 +8570,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 err_deinit_qvecs:
 	mvpp2_queue_vectors_deinit(port);
 err_free_netdev:
-	of_node_put(phy_node);
 	free_netdev(dev);
 	return err;
 }
@@ -8421,7 +8580,8 @@ static void mvpp2_port_remove(struct mvpp2_port *port)
 	int i;
 
 	unregister_netdev(port->dev);
-	of_node_put(port->phy_node);
+	if (port->phylink)
+		phylink_destroy(port->phylink);
 	free_percpu(port->pcpu);
 	free_percpu(port->stats);
 	for (i = 0; i < port->ntxqs; i++)
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next v2 04/13] net: mvpp2: align the ethtool ops definition
From: Antoine Tenart @ 2018-05-04 13:56 UTC (permalink / raw)
  To: davem, kishon, linux, gregory.clement, andrew, jason,
	sebastian.hesselbarth
  Cc: Antoine Tenart, netdev, linux-kernel, thomas.petazzoni,
	maxime.chevallier, miquel.raynal, nadavh, stefanc, ymarkman, mw,
	linux-arm-kernel
In-Reply-To: <20180504135643.23466-1-antoine.tenart@bootlin.com>

Cosmetic patch to align the ethtool functions to ops definitions. This
patch does not change in any way the driver's behaviour.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 drivers/net/ethernet/marvell/mvpp2.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 6f410235987c..77dd91e3d962 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -7859,18 +7859,18 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
-	.nway_reset	= phy_ethtool_nway_reset,
-	.get_link	= ethtool_op_get_link,
-	.set_coalesce	= mvpp2_ethtool_set_coalesce,
-	.get_coalesce	= mvpp2_ethtool_get_coalesce,
-	.get_drvinfo	= mvpp2_ethtool_get_drvinfo,
-	.get_ringparam	= mvpp2_ethtool_get_ringparam,
-	.set_ringparam	= mvpp2_ethtool_set_ringparam,
-	.get_strings	= mvpp2_ethtool_get_strings,
-	.get_ethtool_stats = mvpp2_ethtool_get_stats,
-	.get_sset_count	= mvpp2_ethtool_get_sset_count,
-	.get_link_ksettings = phy_ethtool_get_link_ksettings,
-	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+	.nway_reset		= phy_ethtool_nway_reset,
+	.get_link		= ethtool_op_get_link,
+	.set_coalesce		= mvpp2_ethtool_set_coalesce,
+	.get_coalesce		= mvpp2_ethtool_get_coalesce,
+	.get_drvinfo		= mvpp2_ethtool_get_drvinfo,
+	.get_ringparam		= mvpp2_ethtool_get_ringparam,
+	.set_ringparam		= mvpp2_ethtool_set_ringparam,
+	.get_strings		= mvpp2_ethtool_get_strings,
+	.get_ethtool_stats	= mvpp2_ethtool_get_stats,
+	.get_sset_count		= mvpp2_ethtool_get_sset_count,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 };
 
 /* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next v2 03/13] net: phy: sfp: warn the user when no tx_disable pin is available
From: Antoine Tenart @ 2018-05-04 13:56 UTC (permalink / raw)
  To: davem, kishon, linux, gregory.clement, andrew, jason,
	sebastian.hesselbarth
  Cc: Antoine Tenart, netdev, linux-kernel, thomas.petazzoni,
	maxime.chevallier, miquel.raynal, nadavh, stefanc, ymarkman, mw,
	linux-arm-kernel
In-Reply-To: <20180504135643.23466-1-antoine.tenart@bootlin.com>

In case no Tx disable pin is available the SFP modules will always be
emitting. This could be an issue when using modules using laser as their
light source as we would have no way to disable it when the fiber is
removed. This patch adds a warning when registering an SFP cage which do
not have its tx_disable pin wired or available.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 drivers/net/phy/sfp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 8e323a4b70da..d4f503b2e3e2 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -1093,6 +1093,15 @@ static int sfp_probe(struct platform_device *pdev)
 	if (!sfp->gpio[GPIO_MODDEF0] && !sfp->gpio[GPIO_LOS])
 		sfp->sm_dev_state = SFP_DEV_UNKNOWN;
 
+	/* We could have an issue in cases no Tx disable pin is available or
+	 * wired as modules using a laser as their light source will continue to
+	 * be active when the fiber is removed. This could be a safety issue and
+	 * we should at least warn the user about that.
+	 */
+	if (!sfp->gpio[GPIO_TX_DISABLE])
+		dev_warn(sfp->dev,
+			 "No tx_disable pin: SFP modules will always be emitting.\n");
+
 	return 0;
 }
 
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next v2 02/13] net: phy: sfp: handle non-wired SFP connectors
From: Antoine Tenart @ 2018-05-04 13:56 UTC (permalink / raw)
  To: davem, kishon, linux, gregory.clement, andrew, jason,
	sebastian.hesselbarth
  Cc: Antoine Tenart, netdev, linux-kernel, thomas.petazzoni,
	maxime.chevallier, miquel.raynal, nadavh, stefanc, ymarkman, mw,
	linux-arm-kernel
In-Reply-To: <20180504135643.23466-1-antoine.tenart@bootlin.com>

SFP connectors can be solder on a board without having any of their pins
(LOS, i2c...) wired. In such cases the SFP link state cannot be guessed,
and the overall link status reporting is left to other layers.

In order to achieve this, a new SFP_DEV status is added, named UNKNOWN.
This mode is set when it is not possible for the SFP code to get the
link status and as a result the link status is reported to be always UP
from the SFP point of view.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 drivers/net/phy/sfp.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 4686c443fc22..8e323a4b70da 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -48,6 +48,7 @@ enum {
 
 	SFP_DEV_DOWN = 0,
 	SFP_DEV_UP,
+	SFP_DEV_UNKNOWN,
 
 	SFP_S_DOWN = 0,
 	SFP_S_INIT,
@@ -737,6 +738,15 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
 			sfp->sm_dev_state = SFP_DEV_DOWN;
 		}
 		break;
+
+	case SFP_DEV_UNKNOWN:
+		/* We can't know the state of the SFP link. Report the
+		 * link as being up as its status has to be guessed by
+		 * other layers.
+		 */
+		if (event != SFP_E_DEV_UP)
+			sfp_link_up(sfp->sfp_bus);
+		break;
 	}
 
 	/* Some events are global */
@@ -1077,6 +1087,12 @@ static int sfp_probe(struct platform_device *pdev)
 	if (poll)
 		mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
 
+	/* We won't be able to know the state of the SFP link, report it as
+	 * unknown.
+	 */
+	if (!sfp->gpio[GPIO_MODDEF0] && !sfp->gpio[GPIO_LOS])
+		sfp->sm_dev_state = SFP_DEV_UNKNOWN;
+
 	return 0;
 }
 
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next v2 01/13] net: phy: sfp: make the i2c-bus property really optional
From: Antoine Tenart @ 2018-05-04 13:56 UTC (permalink / raw)
  To: davem, kishon, linux, gregory.clement, andrew, jason,
	sebastian.hesselbarth
  Cc: Antoine Tenart, netdev, linux-kernel, thomas.petazzoni,
	maxime.chevallier, miquel.raynal, nadavh, stefanc, ymarkman, mw,
	linux-arm-kernel
In-Reply-To: <20180504135643.23466-1-antoine.tenart@bootlin.com>

The SFF,SFP documentation is clear about making all the DT properties,
with the exception of the compatible, optional. In practice this is not
the case and without an i2c-bus property provided the SFP code will
throw NULL pointer exceptions.

This patch is an attempt to fix this.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
 drivers/net/phy/sfp.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 4ab6e9a50bbe..4686c443fc22 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -298,11 +298,17 @@ static void sfp_set_state(struct sfp *sfp, unsigned int state)
 
 static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
 {
+	if (!sfp->read)
+		return -EOPNOTSUPP;
+
 	return sfp->read(sfp, a2, addr, buf, len);
 }
 
 static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
 {
+	if (!sfp->write)
+		return -EOPNOTSUPP;
+
 	return sfp->write(sfp, a2, addr, buf, len);
 }
 
@@ -533,6 +539,8 @@ static int sfp_sm_mod_hpower(struct sfp *sfp)
 		return 0;
 
 	err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+	if (err == -EOPNOTSUPP)
+		goto err;
 	if (err != sizeof(val)) {
 		dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
 		err = -EAGAIN;
@@ -542,6 +550,8 @@ static int sfp_sm_mod_hpower(struct sfp *sfp)
 	val |= BIT(0);
 
 	err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+	if (err == -EOPNOTSUPP)
+		goto err;
 	if (err != sizeof(val)) {
 		dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
 		err = -EAGAIN;
@@ -565,6 +575,8 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
 	int ret;
 
 	ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+	if (ret == -EOPNOTSUPP)
+		return ret;
 	if (ret < 0) {
 		dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
 		return -EAGAIN;
-- 
2.17.0

^ permalink raw reply related

* Re: i.MX6S/DL and QCA8334 switch using DSA driver - CPU port not working
From: Andrew Lunn @ 2018-05-04 13:30 UTC (permalink / raw)
  To: Michal Vokáč; +Cc: netdev, Vivien Didelot, Florian Fainelli
In-Reply-To: <573258a9-b115-ca4a-121c-5174798d5621@gmail.com>

> Some RGMII delay tunning attempts with v4.17-rc2:
> 
> phy-mode (fec)	Rx/Tx delay	result
> --------------------------------------
> rgmii		0/0		NOT OK
> rgmii		1/1		NOT OK
> rgmii		2/2		NOT OK
> rgmii		3/3		NOT OK
> rgmii-id	0/0		NOT OK
> rgmii-id	1/1		NOT OK
> rgmii-id	2/2		NOT OK
> rgmii-id	3/3		NOT OK
> 
> I am out of ideas how to further debug this.
> Any additional adivce will be much appreciated.

I would suggest looking at the statistics counters.  ethtool -S. Try
it for both the slave interfaces, and the master interface. The master
interfaces statistics should have both the host counters, and the
switches counters. Do you see packets being sent but not received? Are
there errors reported?

      Andrew

^ permalink raw reply

* Re
From: James Manuh @ 2018-05-04 13:23 UTC (permalink / raw)

In-Reply-To: <1945529796.4083982.1525440231781.ref@mail.yahoo.com>

Hello,How are you,I contacted you for the first time concerning your late relative that deposited value of funds in the bank that will not be mentioned due to security reason,but I did not hear from you.WHY?I will appreciate you get back to me for more details. Thanks James Manuh.

^ permalink raw reply

* Re: [PATCH] dt-bindings: can: rcar_can: Fix R8A7796 SoC name
From: Simon Horman @ 2018-05-04 13:14 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Wolfgang Grandegger, Marc Kleine-Budde, Rob Herring, Mark Rutland,
	Chris Paterson, linux-can, netdev, devicetree, linux-renesas-soc
In-Reply-To: <1525352553-17045-1-git-send-email-geert+renesas@glider.be>

On Thu, May 03, 2018 at 03:02:33PM +0200, Geert Uytterhoeven wrote:
> R8A7796 is R-Car M3-W.
> 
> Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>

Reviewed-by: Simon Horman <horms+renesas@verge.net.au>

^ permalink raw reply

* Re: [PATCH] netfilter: nf_queue: Replace conntrack entry
From: kbuild test robot @ 2018-05-04 13:11 UTC (permalink / raw)
  To: Kristian Evensen
  Cc: kbuild-all, netfilter-devel, Kristian Evensen, Pablo Neira Ayuso,
	Jozsef Kadlecsik, Florian Westphal, David S. Miller, coreteam,
	netdev, linux-kernel
In-Reply-To: <20180503140745.26588-1-kristian.evensen@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2850 bytes --]

Hi Kristian,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on nf-next/master]
[also build test ERROR on v4.17-rc3 next-20180503]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Kristian-Evensen/netfilter-nf_queue-Replace-conntrack-entry/20180504-051218
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
config: x86_64-randconfig-s5-05041850 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   net/netfilter/nfnetlink_queue.o: In function `nfqnl_update_ct':
>> net/netfilter/nfnetlink_queue.c:1062: undefined reference to `nf_ct_l3proto_find_get'
>> net/netfilter/nfnetlink_queue.c:1069: undefined reference to `nf_ct_l4proto_find_get'
>> net/netfilter/nfnetlink_queue.c:1071: undefined reference to `nf_ct_get_tuple'
>> net/netfilter/nfnetlink_queue.c:1079: undefined reference to `nf_conntrack_find_get'

vim +1062 net/netfilter/nfnetlink_queue.c

  1046	
  1047	#if IS_ENABLED(CONFIG_NF_CONNTRACK)
  1048	static void nfqnl_update_ct(struct net *net, struct sk_buff *skb)
  1049	{
  1050		const struct nf_conntrack_l3proto *l3proto;
  1051		const struct nf_conntrack_l4proto *l4proto;
  1052		struct nf_conntrack_tuple_hash *h;
  1053		struct nf_conntrack_tuple tuple;
  1054		enum ip_conntrack_info ctinfo;
  1055		struct nf_conn *ct = NULL;
  1056		unsigned int dataoff;
  1057		u16 l3num;
  1058		u8 l4num;
  1059	
  1060		ct = nf_ct_get(skb, &ctinfo);
  1061		l3num = nf_ct_l3num(ct);
> 1062		l3proto = nf_ct_l3proto_find_get(l3num);
  1063	
  1064		if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
  1065					 &l4num) <= 0) {
  1066			return;
  1067		}
  1068	
> 1069		l4proto = nf_ct_l4proto_find_get(l3num, l4num);
  1070	
> 1071		if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
  1072				     l4num, net, &tuple, l3proto, l4proto)) {
  1073			return;
  1074		}
  1075	
  1076	#if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)
  1077		h = nf_conntrack_find_get(net, &ct->zone, &tuple);
  1078	#else
> 1079		h = nf_conntrack_find_get(net, NULL, &tuple);
  1080	#endif
  1081	
  1082		if (h) {
  1083			pr_debug("%s: tuple %u %pI4:%hu -> %pI4:%hu\n", __func__,
  1084				 tuple.dst.protonum, &tuple.src.u3.ip,
  1085				 ntohs(tuple.src.u.all), &tuple.dst.u3.ip,
  1086				 ntohs(tuple.dst.u.all));
  1087			nf_ct_put(ct);
  1088			ct = nf_ct_tuplehash_to_ctrack(h);
  1089			nf_ct_set(skb, ct, IP_CT_NEW);
  1090		}
  1091	}
  1092	#endif
  1093	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 31647 bytes --]

^ permalink raw reply

* Re: [PATCH bpf-next v3 06/15] xsk: add Rx receive functions and poll support
From: Daniel Borkmann @ 2018-05-04 12:59 UTC (permalink / raw)
  To: Björn Töpel, magnus.karlsson, alexander.h.duyck,
	alexander.duyck, john.fastabend, ast, brouer,
	willemdebruijn.kernel, mst, netdev
  Cc: Björn Töpel, michael.lundkvist, jesse.brandeburg,
	anjali.singhai, qi.z.zhang
In-Reply-To: <20180502110136.3738-7-bjorn.topel@gmail.com>

On 05/02/2018 01:01 PM, Björn Töpel wrote:
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> Here the actual receive functions of AF_XDP are implemented, that in a
> later commit, will be called from the XDP layers.
> 
> There's one set of functions for the XDP_DRV side and another for
> XDP_SKB (generic).
> 
> A new XDP API, xdp_return_buff, is also introduced.
> 
> Adding xdp_return_buff, which is analogous to xdp_return_frame, but
> acts upon an struct xdp_buff. The API will be used by AF_XDP in future
> commits.
> 
> Support for the poll syscall is also implemented.
> 
> v2: xskq_validate_id did not update cons_tail.
>     The entries variable was calculated twice in xskq_nb_avail.
>     Squashed xdp_return_buff commit.
> 
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> ---
>  include/net/xdp.h      |   1 +
>  include/net/xdp_sock.h |  22 ++++++++++
>  net/core/xdp.c         |  15 +++++--
>  net/xdp/xdp_umem.h     |  18 ++++++++
>  net/xdp/xsk.c          |  73 ++++++++++++++++++++++++++++++-
>  net/xdp/xsk_queue.h    | 114 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  6 files changed, 238 insertions(+), 5 deletions(-)
> 
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 137ad5f9f40f..0b689cf561c7 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
>  }
>  
>  void xdp_return_frame(struct xdp_frame *xdpf);
> +void xdp_return_buff(struct xdp_buff *xdp);
>  
>  int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
>  		     struct net_device *dev, u32 queue_index);
[...]
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index bf2c97b87992..4e1e6c581e1d 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -41,6 +41,74 @@ static struct xdp_sock *xdp_sk(struct sock *sk)
>  	return (struct xdp_sock *)sk;
>  }
>  
> +static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
> +{
> +	u32 *id, len = xdp->data_end - xdp->data;
> +	void *buffer;
> +	int err = 0;
> +
> +	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
> +		return -EINVAL;
> +
> +	id = xskq_peek_id(xs->umem->fq);
> +	if (!id)
> +		return -ENOSPC;
> +
> +	buffer = xdp_umem_get_data_with_headroom(xs->umem, *id);
> +	memcpy(buffer, xdp->data, len);
> +	err = xskq_produce_batch_desc(xs->rx, *id, len,
> +				      xs->umem->frame_headroom);
> +	if (!err)
> +		xskq_discard_id(xs->umem->fq);
> +
> +	return err;
> +}
> +
> +int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
> +{
> +	int err;
> +
> +	err = __xsk_rcv(xs, xdp);
> +	if (likely(!err))
> +		xdp_return_buff(xdp);
> +	else
> +		xs->rx_dropped++;

This is triggered from __bpf_tx_xdp_map() -> __xsk_map_redirect().
Should this be percpu counter instead?

> +	return err;
> +}
> +
> +void xsk_flush(struct xdp_sock *xs)
> +{
> +	xskq_produce_flush_desc(xs->rx);
> +	xs->sk.sk_data_ready(&xs->sk);
> +}
> +
> +int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
> +{
> +	int err;
> +
> +	err = __xsk_rcv(xs, xdp);
> +	if (!err)
> +		xsk_flush(xs);
> +	else
> +		xs->rx_dropped++;
> +
> +	return err;
> +}
> +

^ permalink raw reply

* Re: DSA switch
From: Andrew Lunn @ 2018-05-04 12:56 UTC (permalink / raw)
  To: Ran Shalit; +Cc: netdev
In-Reply-To: <CAJ2oMhLvs2+SxzLoDYN9Y6YjcKAN1VwtY6a3b4XYuujLFiOgkw@mail.gmail.com>

> I also see that there is no bridge function in /drivers/net/dsa files
> in our kernel (2.6.37)

Sorry, i cannot really help you with an ancient kernel like that.

       Andrew

^ permalink raw reply

* Re: [PATCH bpf-next v3 03/15] xsk: add umem fill queue support and mmap
From: Daniel Borkmann @ 2018-05-04 12:49 UTC (permalink / raw)
  To: Björn Töpel, magnus.karlsson, alexander.h.duyck,
	alexander.duyck, john.fastabend, ast, brouer,
	willemdebruijn.kernel, mst, netdev
  Cc: michael.lundkvist, jesse.brandeburg, anjali.singhai, qi.z.zhang
In-Reply-To: <20180502110136.3738-4-bjorn.topel@gmail.com>

On 05/02/2018 01:01 PM, Björn Töpel wrote:
> From: Magnus Karlsson <magnus.karlsson@intel.com>
> 
> Here, we add another setsockopt for registered user memory (umem)
> called XDP_UMEM_FILL_QUEUE. Using this socket option, the process can
> ask the kernel to allocate a queue (ring buffer) and also mmap it
> (XDP_UMEM_PGOFF_FILL_QUEUE) into the process.
> 
> The queue is used to explicitly pass ownership of umem frames from the
> user process to the kernel. These frames will in a later patch be
> filled in with Rx packet data by the kernel.
> 
> v2: Fixed potential crash in xsk_mmap.
> 
> Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
> ---
>  include/uapi/linux/if_xdp.h | 15 +++++++++++
>  net/xdp/Makefile            |  2 +-
>  net/xdp/xdp_umem.c          |  5 ++++
>  net/xdp/xdp_umem.h          |  2 ++
>  net/xdp/xsk.c               | 65 ++++++++++++++++++++++++++++++++++++++++++++-
>  net/xdp/xsk_queue.c         | 58 ++++++++++++++++++++++++++++++++++++++++
>  net/xdp/xsk_queue.h         | 38 ++++++++++++++++++++++++++
>  7 files changed, 183 insertions(+), 2 deletions(-)
>  create mode 100644 net/xdp/xsk_queue.c
>  create mode 100644 net/xdp/xsk_queue.h
> 
> diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
> index 41252135a0fe..975661e1baca 100644
> --- a/include/uapi/linux/if_xdp.h
> +++ b/include/uapi/linux/if_xdp.h
> @@ -23,6 +23,7 @@
>  
>  /* XDP socket options */
>  #define XDP_UMEM_REG			3
> +#define XDP_UMEM_FILL_RING		4
>  
>  struct xdp_umem_reg {
>  	__u64 addr; /* Start of packet data area */
> @@ -31,4 +32,18 @@ struct xdp_umem_reg {
>  	__u32 frame_headroom; /* Frame head room */
>  };
>  
> +/* Pgoff for mmaping the rings */
> +#define XDP_UMEM_PGOFF_FILL_RING	0x100000000
> +
> +struct xdp_ring {
> +	__u32 producer __attribute__((aligned(64)));
> +	__u32 consumer __attribute__((aligned(64)));
> +};
> +
> +/* Used for the fill and completion queues for buffers */
> +struct xdp_umem_ring {
> +	struct xdp_ring ptrs;
> +	__u32 desc[0] __attribute__((aligned(64)));
> +};
> +
>  #endif /* _LINUX_IF_XDP_H */
> diff --git a/net/xdp/Makefile b/net/xdp/Makefile
> index a5d736640a0f..074fb2b2d51c 100644
> --- a/net/xdp/Makefile
> +++ b/net/xdp/Makefile
> @@ -1,2 +1,2 @@
> -obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o
> +obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
>  
> diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
> index ec8b3552be44..e1f627d0cc1c 100644
> --- a/net/xdp/xdp_umem.c
> +++ b/net/xdp/xdp_umem.c
> @@ -65,6 +65,11 @@ static void xdp_umem_release(struct xdp_umem *umem)
>  	struct task_struct *task;
>  	struct mm_struct *mm;
>  
> +	if (umem->fq) {
> +		xskq_destroy(umem->fq);
> +		umem->fq = NULL;
> +	}
> +
>  	if (umem->pgs) {
>  		xdp_umem_unpin_pages(umem);
>  
> diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
> index 4597ae81a221..25634b8a5c6f 100644
> --- a/net/xdp/xdp_umem.h
> +++ b/net/xdp/xdp_umem.h
> @@ -19,9 +19,11 @@
>  #include <linux/if_xdp.h>
>  #include <linux/workqueue.h>
>  
> +#include "xsk_queue.h"
>  #include "xdp_umem_props.h"
>  
>  struct xdp_umem {
> +	struct xsk_queue *fq;
>  	struct page **pgs;
>  	struct xdp_umem_props props;
>  	u32 npgs;
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index 84e0e867febb..da67a3c5c1c9 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -32,6 +32,7 @@
>  #include <linux/netdevice.h>
>  #include <net/xdp_sock.h>
>  
> +#include "xsk_queue.h"
>  #include "xdp_umem.h"
>  
>  static struct xdp_sock *xdp_sk(struct sock *sk)
> @@ -39,6 +40,21 @@ static struct xdp_sock *xdp_sk(struct sock *sk)
>  	return (struct xdp_sock *)sk;
>  }
>  
> +static int xsk_init_queue(u32 entries, struct xsk_queue **queue)
> +{
> +	struct xsk_queue *q;
> +
> +	if (entries == 0 || *queue || !is_power_of_2(entries))
> +		return -EINVAL;
> +
> +	q = xskq_create(entries);
> +	if (!q)
> +		return -ENOMEM;
> +
> +	*queue = q;
> +	return 0;
> +}
> +
>  static int xsk_release(struct socket *sock)
>  {
>  	struct sock *sk = sock->sk;
> @@ -101,6 +117,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
>  		mutex_unlock(&xs->mutex);
>  		return 0;
>  	}
> +	case XDP_UMEM_FILL_RING:
> +	{
> +		struct xsk_queue **q;
> +		int entries;
> +
> +		if (!xs->umem)
> +			return -EINVAL;

(Same here as previously mentioned.)

> +		if (copy_from_user(&entries, optval, sizeof(entries)))
> +			return -EFAULT;
> +
> +		mutex_lock(&xs->mutex);
> +		q = &xs->umem->fq;
> +		err = xsk_init_queue(entries, q);
> +		mutex_unlock(&xs->mutex);
> +		return err;
> +	}
>  	default:
>  		break;
>  	}
> @@ -108,6 +141,36 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
>  	return -ENOPROTOOPT;
>  }
>  
> +static int xsk_mmap(struct file *file, struct socket *sock,
> +		    struct vm_area_struct *vma)
> +{
> +	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
> +	unsigned long size = vma->vm_end - vma->vm_start;
> +	struct xdp_sock *xs = xdp_sk(sock->sk);
> +	struct xsk_queue *q = NULL;
> +	unsigned long pfn;
> +	struct page *qpg;
> +
> +	if (!xs->umem)
> +		return -EINVAL;
> +
> +	if (offset == XDP_UMEM_PGOFF_FILL_RING)
> +		q = xs->umem->fq;
> +	else
> +		return -EINVAL;
> +
> +	if (!q)
> +		return -EINVAL;

Nit: since q is NULL above, could be simplified as:

	if (offset == XDP_UMEM_PGOFF_FILL_RING)
		q = xs->umem->fq;
	if (!q)
		return -EINVAL;

> +
> +	qpg = virt_to_head_page(q->ring);
> +	if (size > (PAGE_SIZE << compound_order(qpg)))
> +		return -EINVAL;
> +
> +	pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
> +	return remap_pfn_range(vma, vma->vm_start, pfn,
> +			       size, vma->vm_page_prot);
> +}
> +
>  static struct proto xsk_proto = {
>  	.name =		"XDP",
>  	.owner =	THIS_MODULE,
> @@ -131,7 +194,7 @@ static const struct proto_ops xsk_proto_ops = {
>  	.getsockopt =	sock_no_getsockopt,
>  	.sendmsg =	sock_no_sendmsg,
>  	.recvmsg =	sock_no_recvmsg,
> -	.mmap =		sock_no_mmap,
> +	.mmap =		xsk_mmap,
>  	.sendpage =	sock_no_sendpage,
>  };
>  

^ permalink raw reply

* Re: ipw2200: fix spelling mistake: "functionalitis" -> "functionalities"
From: Kalle Valo @ 2018-05-04 12:37 UTC (permalink / raw)
  To: Colin Ian King
  Cc: Stanislav Yakovlev, David S . Miller,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	kernel-janitors-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20180501093643.23318-1-colin.king-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>

Colin Ian King <colin.king-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org> wrote:

> From: Colin Ian King <colin.king-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
> 
> Trivial fix to spelling mistake in module parameter description text
> 
> Signed-off-by: Colin Ian King <colin.king-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>

Patch applied to wireless-drivers-next.git, thanks.

7e7939e80e3c ipw2200: fix spelling mistake: "functionalitis" -> "functionalities"

-- 
https://patchwork.kernel.org/patch/10373469/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: rtlwifi: fix spelling mistake: "dismatch" -> "mismatch"
From: Kalle Valo @ 2018-05-04 12:36 UTC (permalink / raw)
  To: Colin Ian King
  Cc: Ping-Ke Shih, David S . Miller, Larry Finger, linux-wireless,
	netdev, kernel-janitors, linux-kernel
In-Reply-To: <20180430134102.11171-1-colin.king@canonical.com>

Colin Ian King <colin.king@canonical.com> wrote:

> From: Colin Ian King <colin.king@canonical.com>
> 
> Trivial fix to spelling mistake in RT_TRACE message text.
> 
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Patch applied to wireless-drivers-next.git, thanks.

070fc356e21a rtlwifi: fix spelling mistake: "dismatch" -> "mismatch"

-- 
https://patchwork.kernel.org/patch/10371799/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: [PATCH bpf-next v3 02/15] xsk: add user memory registration support sockopt
From: Daniel Borkmann @ 2018-05-04 12:34 UTC (permalink / raw)
  To: Björn Töpel, magnus.karlsson, alexander.h.duyck,
	alexander.duyck, john.fastabend, ast, brouer,
	willemdebruijn.kernel, mst, netdev
  Cc: Björn Töpel, michael.lundkvist, jesse.brandeburg,
	anjali.singhai, qi.z.zhang
In-Reply-To: <20180502110136.3738-3-bjorn.topel@gmail.com>

On 05/02/2018 01:01 PM, Björn Töpel wrote:
[...]

(Few nits for follow-ups I haven't sent out yesterday night yet; they are on
 top of Alexei's remarks.)

> ---
>  include/net/xdp_sock.h      |  31 ++++++
>  include/uapi/linux/if_xdp.h |  34 ++++++
>  net/Makefile                |   1 +
>  net/xdp/Makefile            |   2 +
>  net/xdp/xdp_umem.c          | 245 ++++++++++++++++++++++++++++++++++++++++++++
>  net/xdp/xdp_umem.h          |  45 ++++++++
>  net/xdp/xdp_umem_props.h    |  23 +++++
>  net/xdp/xsk.c               | 215 ++++++++++++++++++++++++++++++++++++++
>  8 files changed, 596 insertions(+)
>  create mode 100644 include/net/xdp_sock.h
>  create mode 100644 include/uapi/linux/if_xdp.h
>  create mode 100644 net/xdp/Makefile
>  create mode 100644 net/xdp/xdp_umem.c
>  create mode 100644 net/xdp/xdp_umem.h
>  create mode 100644 net/xdp/xdp_umem_props.h
>  create mode 100644 net/xdp/xsk.c
> 
> diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
> new file mode 100644
> index 000000000000..94785f5db13e
> --- /dev/null
> +++ b/include/net/xdp_sock.h
> @@ -0,0 +1,31 @@
> +/* SPDX-License-Identifier: GPL-2.0

I think this should just be a single line comment, at least that's the case
for majority of files in tree, so probably good to stick to it as well ...

$ git grep -n "SPDX-License-Identifier" | grep "\/\*" | grep -v "\*\/" | wc -l
20
$ git grep -n "SPDX-License-Identifier" | grep "\/\*" | grep "\*\/" | wc -l
7742

... and probably would also make sense to get rid of the below boiler plate
text then. (Applies to other added files as well in this series, only mention
it here once.)

> + * AF_XDP internal functions
> + * Copyright(c) 2018 Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#ifndef _LINUX_XDP_SOCK_H
> +#define _LINUX_XDP_SOCK_H
> +
> +#include <linux/mutex.h>
> +#include <net/sock.h>
> +
> +struct xdp_umem;
> +
> +struct xdp_sock {
> +	/* struct sock must be the first member of struct xdp_sock */
> +	struct sock sk;
> +	struct xdp_umem *umem;
> +	/* Protects multiple processes in the control path */
> +	struct mutex mutex;
> +};
> +
> +#endif /* _LINUX_XDP_SOCK_H */
[...]
> diff --git a/net/Makefile b/net/Makefile
> index a6147c61b174..77aaddedbd29 100644
> --- a/net/Makefile
> +++ b/net/Makefile
> @@ -85,3 +85,4 @@ obj-y				+= l3mdev/
>  endif
>  obj-$(CONFIG_QRTR)		+= qrtr/
>  obj-$(CONFIG_NET_NCSI)		+= ncsi/
> +obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
> diff --git a/net/xdp/Makefile b/net/xdp/Makefile
> new file mode 100644
> index 000000000000..a5d736640a0f
> --- /dev/null
> +++ b/net/xdp/Makefile
> @@ -0,0 +1,2 @@
> +obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o
> +

Nit: newline at end of file.

> diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
> new file mode 100644
> index 000000000000..ec8b3552be44
> --- /dev/null
> +++ b/net/xdp/xdp_umem.c
[...]
> +
> +#include "xdp_umem.h"
> +
> +#define XDP_UMEM_MIN_FRAME_SIZE 2048
> +
> +int xdp_umem_create(struct xdp_umem **umem)
> +{
> +	*umem = kzalloc(sizeof(**umem), GFP_KERNEL);
> +
> +	if (!(*umem))

Nit: extra () not needed. You also have the extra brackets in couple of
other places/conditionals.

> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +static void xdp_umem_unpin_pages(struct xdp_umem *umem)
> +{
> +	unsigned int i;
> +
> +	if (umem->pgs) {

All call-sites of this have umem->pgs never as NULL.

> +		for (i = 0; i < umem->npgs; i++) {
> +			struct page *page = umem->pgs[i];
> +
> +			set_page_dirty_lock(page);
> +			put_page(page);
> +		}
> +
> +		kfree(umem->pgs);
> +		umem->pgs = NULL;
> +	}
> +}
> +
> +static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
> +{
> +	if (umem->user) {
> +		atomic_long_sub(umem->npgs, &umem->user->locked_vm);
> +		free_uid(umem->user);
> +	}
> +}
> +
> +static void xdp_umem_release(struct xdp_umem *umem)
> +{
> +	struct task_struct *task;
> +	struct mm_struct *mm;
> +
> +	if (umem->pgs) {
> +		xdp_umem_unpin_pages(umem);
> +
> +		task = get_pid_task(umem->pid, PIDTYPE_PID);
> +		put_pid(umem->pid);
> +		if (!task)
> +			goto out;
> +		mm = get_task_mm(task);
> +		put_task_struct(task);
> +		if (!mm)
> +			goto out;
> +
> +		mmput(mm);
> +		umem->pgs = NULL;
> +	}
> +
> +	xdp_umem_unaccount_pages(umem);
> +out:
> +	kfree(umem);
> +}
> +
> +static void xdp_umem_release_deferred(struct work_struct *work)
> +{
> +	struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
> +
> +	xdp_umem_release(umem);
> +}
> +
> +void xdp_get_umem(struct xdp_umem *umem)
> +{
> +	atomic_inc(&umem->users);
> +}
> +
> +void xdp_put_umem(struct xdp_umem *umem)
> +{
> +	if (!umem)
> +		return;
> +
> +	if (atomic_dec_and_test(&umem->users)) {
> +		INIT_WORK(&umem->work, xdp_umem_release_deferred);
> +		schedule_work(&umem->work);
> +	}
> +}
> +
> +static int xdp_umem_pin_pages(struct xdp_umem *umem)
> +{
> +	unsigned int gup_flags = FOLL_WRITE;
> +	long npgs;
> +	int err;
> +
> +	umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL);
> +	if (!umem->pgs)
> +		return -ENOMEM;
> +
> +	down_write(&current->mm->mmap_sem);
> +	npgs = get_user_pages(umem->address, umem->npgs,
> +			      gup_flags, &umem->pgs[0], NULL);
> +	up_write(&current->mm->mmap_sem);
> +
> +	if (npgs != umem->npgs) {
> +		if (npgs >= 0) {
> +			umem->npgs = npgs;
> +			err = -ENOMEM;
> +			goto out_pin;
> +		}
> +		err = npgs;
> +		goto out_pgs;
> +	}
> +	return 0;
> +
> +out_pin:
> +	xdp_umem_unpin_pages(umem);
> +out_pgs:
> +	kfree(umem->pgs);
> +	umem->pgs = NULL;
> +	return err;
> +}
> +
> +static int xdp_umem_account_pages(struct xdp_umem *umem)
> +{
> +	unsigned long lock_limit, new_npgs, old_npgs;
> +
> +	if (capable(CAP_IPC_LOCK))
> +		return 0;
> +
> +	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> +	umem->user = get_uid(current_user());
> +
> +	do {
> +		old_npgs = atomic_long_read(&umem->user->locked_vm);
> +		new_npgs = old_npgs + umem->npgs;
> +		if (new_npgs > lock_limit) {
> +			free_uid(umem->user);
> +			umem->user = NULL;
> +			return -ENOBUFS;
> +		}
> +	} while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
> +				     new_npgs) != old_npgs);
> +	return 0;
> +}
> +
> +int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
> +{
> +	u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
> +	u64 addr = mr->addr, size = mr->len;
> +	unsigned int nframes, nfpp;
> +	int size_chk, err;
> +
> +	if (!umem)
> +		return -EINVAL;

Wouldn't it be better to remove these sort of defensive checks (here and in
other places)? Eventually they might only end up hiding potential bugs rather
than having them noticed. Only call-site is in xsk_setsockopt() where you do:

[...]
                mutex_lock(&xs->mutex);
                err = xdp_umem_create(&umem);

                err = xdp_umem_reg(umem, &mr);
                if (err) {
                        kfree(umem);
                        mutex_unlock(&xs->mutex);
                        return err;
                }
[...]

Seems more intuitive and easier to audit when you bail out initially when
xdp_umem_create() fails rather than in xdp_umem_reg(), and then the test
for !umem can be removed.

> +	if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
> +		/* Strictly speaking we could support this, if:
> +		 * - huge pages, or*
> +		 * - using an IOMMU, or
> +		 * - making sure the memory area is consecutive
> +		 * but for now, we simply say "computer says no".
> +		 */
> +		return -EINVAL;
> +	}
> +
> +	if (!is_power_of_2(frame_size))
> +		return -EINVAL;
> +
> +	if (!PAGE_ALIGNED(addr)) {
> +		/* Memory area has to be page size aligned. For
> +		 * simplicity, this might change.
> +		 */
> +		return -EINVAL;
> +	}
> +
> +	if ((addr + size) < addr)
> +		return -EINVAL;
> +
> +	nframes = size / frame_size;
> +	if (nframes == 0 || nframes > UINT_MAX)
> +		return -EINVAL;
> +
> +	nfpp = PAGE_SIZE / frame_size;
> +	if (nframes < nfpp || nframes % nfpp)
> +		return -EINVAL;
> +
> +	frame_headroom = ALIGN(frame_headroom, 64);
> +
> +	size_chk = frame_size - frame_headroom - XDP_PACKET_HEADROOM;
> +	if (size_chk < 0)
> +		return -EINVAL;
> +
> +	umem->pid = get_task_pid(current, PIDTYPE_PID);
> +	umem->size = (size_t)size;
> +	umem->address = (unsigned long)addr;
> +	umem->props.frame_size = frame_size;
> +	umem->props.nframes = nframes;
> +	umem->frame_headroom = frame_headroom;
> +	umem->npgs = size / PAGE_SIZE;
> +	umem->pgs = NULL;
> +	umem->user = NULL;
> +
> +	umem->frame_size_log2 = ilog2(frame_size);
> +	umem->nfpp_mask = nfpp - 1;
> +	umem->nfpplog2 = ilog2(nfpp);
> +	atomic_set(&umem->users, 1);
> +
> +	err = xdp_umem_account_pages(umem);
> +	if (err)
> +		goto out;
> +
> +	err = xdp_umem_pin_pages(umem);
> +	if (err)
> +		goto out_account;
> +	return 0;
> +
> +out_account:
> +	xdp_umem_unaccount_pages(umem);
> +out:
> +	put_pid(umem->pid);
> +	return err;
> +}
[...]
> +#ifndef XDP_UMEM_H_
> +#define XDP_UMEM_H_
> +
> +#include <linux/mm.h>
> +#include <linux/if_xdp.h>
> +#include <linux/workqueue.h>
> +
> +#include "xdp_umem_props.h"
> +
> +struct xdp_umem {
> +	struct page **pgs;
> +	struct xdp_umem_props props;
> +	u32 npgs;
> +	u32 frame_headroom;
> +	u32 nfpp_mask;
> +	u32 nfpplog2;
> +	u32 frame_size_log2;
> +	struct user_struct *user;
> +	struct pid *pid;
> +	unsigned long address;
> +	size_t size;
> +	atomic_t users;

Convert to refcnt_t?

> +	struct work_struct work;
> +};
> +
> +int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
> +void xdp_get_umem(struct xdp_umem *umem);
> +void xdp_put_umem(struct xdp_umem *umem);
> +int xdp_umem_create(struct xdp_umem **umem);
> +
> +#endif /* XDP_UMEM_H_ */
[...]
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> new file mode 100644
> index 000000000000..84e0e867febb
[...]
> +static struct xdp_sock *xdp_sk(struct sock *sk)
> +{
> +	return (struct xdp_sock *)sk;
> +}
> +
> +static int xsk_release(struct socket *sock)
> +{
> +	struct sock *sk = sock->sk;
> +	struct net *net;
> +
> +	if (!sk)
> +		return 0;
> +
> +	net = sock_net(sk);
> +
> +	local_bh_disable();
> +	sock_prot_inuse_add(net, sk->sk_prot, -1);
> +	local_bh_enable();
> +
> +	sock_orphan(sk);
> +	sock->sk = NULL;
> +
> +	sk_refcnt_debug_release(sk);
> +	sock_put(sk);
> +
> +	return 0;
> +}
> +
> +static int xsk_setsockopt(struct socket *sock, int level, int optname,
> +			  char __user *optval, unsigned int optlen)
> +{
> +	struct sock *sk = sock->sk;
> +	struct xdp_sock *xs = xdp_sk(sk);
> +	int err;
> +
> +	if (level != SOL_XDP)
> +		return -ENOPROTOOPT;
> +
> +	switch (optname) {
> +	case XDP_UMEM_REG:
> +	{
> +		struct xdp_umem_reg mr;
> +		struct xdp_umem *umem;
> +
> +		if (xs->umem)

Does this need READ_ONCE() or needs to go under the lock?

> +			return -EBUSY;
> +
> +		if (copy_from_user(&mr, optval, sizeof(mr)))
> +			return -EFAULT;
> +
> +		mutex_lock(&xs->mutex);
> +		err = xdp_umem_create(&umem);
> +
> +		err = xdp_umem_reg(umem, &mr);
> +		if (err) {
> +			kfree(umem);

The kfree() here begs for having a proper destructor such that once you extend
xdp_umem_create() these spots are not missed when further cleanups have to be
performed on dismantle.

> +			mutex_unlock(&xs->mutex);
> +			return err;
> +		}
> +
> +		/* Make sure umem is ready before it can be seen by others */
> +		smp_wmb();
> +
> +		xs->umem = umem;
> +		mutex_unlock(&xs->mutex);
> +		return 0;
> +	}
> +	default:
> +		break;
> +	}
> +
> +	return -ENOPROTOOPT;
> +}
> +
> +static struct proto xsk_proto = {
> +	.name =		"XDP",
> +	.owner =	THIS_MODULE,
> +	.obj_size =	sizeof(struct xdp_sock),
> +};
> +
> +static const struct proto_ops xsk_proto_ops = {
> +	.family =	PF_XDP,
> +	.owner =	THIS_MODULE,
> +	.release =	xsk_release,
> +	.bind =		sock_no_bind,
> +	.connect =	sock_no_connect,
> +	.socketpair =	sock_no_socketpair,
> +	.accept =	sock_no_accept,
> +	.getname =	sock_no_getname,
> +	.poll =		sock_no_poll,
> +	.ioctl =	sock_no_ioctl,
> +	.listen =	sock_no_listen,
> +	.shutdown =	sock_no_shutdown,
> +	.setsockopt =	xsk_setsockopt,
> +	.getsockopt =	sock_no_getsockopt,
> +	.sendmsg =	sock_no_sendmsg,
> +	.recvmsg =	sock_no_recvmsg,
> +	.mmap =		sock_no_mmap,
> +	.sendpage =	sock_no_sendpage,

Nit: would have been nice to properly align the '='

> +};
> +
> +static void xsk_destruct(struct sock *sk)
> +{
> +	struct xdp_sock *xs = xdp_sk(sk);
> +
> +	if (!sock_flag(sk, SOCK_DEAD))
> +		return;
> +
> +	xdp_put_umem(xs->umem);
> +
> +	sk_refcnt_debug_dec(sk);
> +}
> +
> +static int xsk_create(struct net *net, struct socket *sock, int protocol,
> +		      int kern)
> +{
> +	struct sock *sk;
> +	struct xdp_sock *xs;
> +
> +	if (!ns_capable(net->user_ns, CAP_NET_RAW))
> +		return -EPERM;
> +	if (sock->type != SOCK_RAW)
> +		return -ESOCKTNOSUPPORT;
> +
> +	if (protocol)
> +		return -EPROTONOSUPPORT;
> +
> +	sock->state = SS_UNCONNECTED;
[...]

^ permalink raw reply

* Re: [RFC PATCH 5/5] net: macb: Add WOL support with ARP
From: Claudiu Beznea @ 2018-05-04 12:17 UTC (permalink / raw)
  To: harinikatakamlinux, nicolas.ferre, davem
  Cc: netdev, linux-kernel, harinik, michals, appanad
In-Reply-To: <1521726700-22634-6-git-send-email-harinikatakamlinux@gmail.com>



On 22.03.2018 15:51, harinikatakamlinux@gmail.com wrote:
> From: Harini Katakam <harinik@xilinx.com>
> 
> This patch enables ARP wake event support in GEM through the following:
> 
> -> WOL capability can be selected based on the SoC/GEM IP version rather
> than a devictree property alone. Hence add a new capability property and
> set device as "wakeup capable" in probe in this case.
> -> Wake source selection can be done via ethtool or by enabling wakeup
> in /sys/devices/platform/..../ethx/power/
> This patch adds default wake source as ARP and the existing selection of
> WOL using magic packet remains unchanged.
> -> When GEM is the wake device with ARP as the wake event, the current
> IP address to match is written to WOL register along with other
> necessary confuguration required for MAC to recognize an ARP event.
> -> While RX needs to remain enabled, there is no need to process the
> actual wake packet - hence tie off all RX queues to avoid unnecessary
> processing by DMA in the background. 

Why is this different for magic packet vs ARP packet?

This tie off is done using a
> dummy buffer descriptor with used bit set. (There is no other provision
> to disable RX DMA in the GEM IP version in ZynqMP)
> -> TX is disabled and all interrupts except WOL on Q0 are disabled.
> Clear the WOL interrupt as no other action is required from driver.
> Power management of the SoC will already have got the event and will
> take care of initiating resume.
> -> Upon resume ARP WOL config is cleared and macb is reinitialized.
> 
> Signed-off-by: Harini Katakam <harinik@xilinx.com>
> ---
>  drivers/net/ethernet/cadence/macb.h      |   6 ++
>  drivers/net/ethernet/cadence/macb_main.c | 130 +++++++++++++++++++++++++++++--
>  2 files changed, 131 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
> index 9e7fb14..e18ff34 100644
> --- a/drivers/net/ethernet/cadence/macb.h
> +++ b/drivers/net/ethernet/cadence/macb.h
> @@ -93,6 +93,7 @@
>  #define GEM_SA3T		0x009C /* Specific3 Top */
>  #define GEM_SA4B		0x00A0 /* Specific4 Bottom */
>  #define GEM_SA4T		0x00A4 /* Specific4 Top */
> +#define GEM_WOL			0x00B8 /* Wake on LAN */
>  #define GEM_EFTSH		0x00e8 /* PTP Event Frame Transmitted Seconds Register 47:32 */
>  #define GEM_EFRSH		0x00ec /* PTP Event Frame Received Seconds Register 47:32 */
>  #define GEM_PEFTSH		0x00f0 /* PTP Peer Event Frame Transmitted Seconds Register 47:32 */
> @@ -398,6 +399,8 @@
>  #define MACB_PDRSFT_SIZE	1
>  #define MACB_SRI_OFFSET		26 /* TSU Seconds Register Increment */
>  #define MACB_SRI_SIZE		1
> +#define GEM_WOL_OFFSET		28 /* Enable wake-on-lan interrupt in GEM */
> +#define GEM_WOL_SIZE		1
>  
>  /* Timer increment fields */
>  #define MACB_TI_CNS_OFFSET	0
> @@ -635,6 +638,7 @@
>  #define MACB_CAPS_USRIO_DISABLED		0x00000010
>  #define MACB_CAPS_JUMBO				0x00000020
>  #define MACB_CAPS_GEM_HAS_PTP			0x00000040
> +#define MACB_CAPS_WOL				0x00000080

I think would be better to have this as part of bp->wol and use it properly
in suspend/resume hooks.

>  #define MACB_CAPS_FIFO_MODE			0x10000000
>  #define MACB_CAPS_GIGABIT_MODE_AVAILABLE	0x20000000
>  #define MACB_CAPS_SG_DISABLED			0x40000000
> @@ -1147,6 +1151,8 @@ struct macb {
>  	unsigned int		num_queues;
>  	unsigned int		queue_mask;
>  	struct macb_queue	queues[MACB_MAX_QUEUES];
> +	dma_addr_t		rx_ring_tieoff_dma;
> +	struct macb_dma_desc	*rx_ring_tieoff;
>  
>  	spinlock_t		lock;
>  	struct platform_device	*pdev;
> diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
> index bca91bd..9902654 100644
> --- a/drivers/net/ethernet/cadence/macb_main.c
> +++ b/drivers/net/ethernet/cadence/macb_main.c
> @@ -36,6 +36,7 @@
>  #include <linux/udp.h>
>  #include <linux/tcp.h>
>  #include <linux/pm_runtime.h>
> +#include <linux/inetdevice.h>
>  #include "macb.h"
>  
>  #define MACB_RX_BUFFER_SIZE	128
> @@ -1400,6 +1401,12 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
>  	spin_lock(&bp->lock);
>  
>  	while (status) {
> +		if (status & GEM_BIT(WOL)) {
> +			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
> +				queue_writel(queue, ISR, GEM_BIT(WOL));
> +			break;
> +		}
> +
>  		/* close possible race with dev_close */
>  		if (unlikely(!netif_running(dev))) {
>  			queue_writel(queue, IDR, -1);
> @@ -1900,6 +1907,12 @@ static void macb_free_consistent(struct macb *bp)
>  		queue->rx_ring = NULL;
>  	}
>  
> +	if (bp->rx_ring_tieoff) {
> +		dma_free_coherent(&bp->pdev->dev, macb_dma_desc_get_size(bp),
> +				  bp->rx_ring_tieoff, bp->rx_ring_tieoff_dma);
> +		bp->rx_ring_tieoff = NULL;
> +	}
> +
>  	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
>  		kfree(queue->tx_skb);
>  		queue->tx_skb = NULL;
> @@ -1979,6 +1992,14 @@ static int macb_alloc_consistent(struct macb *bp)
>  			   "Allocated RX ring of %d bytes at %08lx (mapped %p)\n",
>  			   size, (unsigned long)queue->rx_ring_dma, queue->rx_ring);
>  	}
> +	/* Allocate one dummy descriptor to tie off RX queues when required */
> +	bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
> +						macb_dma_desc_get_size(bp),
> +						&bp->rx_ring_tieoff_dma,
> +						GFP_KERNEL);
> +	if (!bp->rx_ring_tieoff)
> +		goto out_err;
> +
>  	if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
>  		goto out_err;
>  
> @@ -1989,6 +2010,34 @@ static int macb_alloc_consistent(struct macb *bp)
>  	return -ENOMEM;
>  }
>  
> +static void macb_init_tieoff(struct macb *bp)
> +{
> +	struct macb_dma_desc *d = bp->rx_ring_tieoff;
> +
> +	/* Setup a wrapping descriptor with no free slots
> +	 * (WRAP and USED) to tie off/disable unused RX queues.
> +	 */
> +	macb_set_addr(bp, d, MACB_BIT(RX_WRAP) | MACB_BIT(RX_USED));
> +	d->ctrl = 0;
> +}
> +
> +static inline void macb_rx_tieoff(struct macb *bp)
> +{
> +	struct macb_queue *queue = bp->queues;
> +	unsigned int q;
> +
> +	for (q = 0, queue = bp->queues; q < bp->num_queues;
> +	     ++q, ++queue) {
> +		queue_writel(queue, RBQP,
> +			     lower_32_bits(bp->rx_ring_tieoff_dma));
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> +		if (bp->hw_dma_cap & HW_DMA_CAP_64B)
> +			queue_writel(queue, RBQPH,
> +				     upper_32_bits(bp->rx_ring_tieoff_dma));
> +#endif
> +	}
> +}
> +
>  static void gem_init_rings(struct macb *bp)
>  {
>  	struct macb_queue *queue;
> @@ -2011,6 +2060,7 @@ static void gem_init_rings(struct macb *bp)
>  
>  		gem_rx_refill(queue);
>  	}
> +	macb_init_tieoff(bp);
>  
>  }
>  
> @@ -2653,6 +2703,13 @@ static void macb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
>  		if (bp->wol & MACB_WOL_ENABLED)
>  			wol->wolopts |= WAKE_MAGIC;
>  	}
> +
> +	if (bp->caps & MACB_CAPS_WOL) {
> +		wol->supported = WAKE_ARP;
> +
> +		if (bp->wol & MACB_WOL_ENABLED)
> +			wol->wolopts |= WAKE_ARP;
> +	}
>  }
>  
>  static int macb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
> @@ -2660,10 +2717,11 @@ static int macb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
>  	struct macb *bp = netdev_priv(netdev);
>  
>  	if (!(bp->wol & MACB_WOL_HAS_MAGIC_PACKET) ||
> -	    (wol->wolopts & ~WAKE_MAGIC))
> +	    !(bp->caps & MACB_CAPS_WOL) ||
> +	    (wol->wolopts & ~WAKE_MAGIC) || (wol->wolopts & ~WAKE_ARP))
>  		return -EOPNOTSUPP;

So, both flags WAKE_MAGIC and WAKE_ARP needs to be set in order to use
Wake on Lan? Shouldn't this be exclusive? I mean if only one is set to
use that one?

Also, wouldn't be better to have all Wake on LAN capabilities in the same
place? I mean either bp->wol or bp->caps??


>  
> -	if (wol->wolopts & WAKE_MAGIC)
> +	if (wol->wolopts & (WAKE_MAGIC | WAKE_ARP))
>  		bp->wol |= MACB_WOL_ENABLED;
>  	else
>  		bp->wol &= ~MACB_WOL_ENABLED;
> @@ -3895,7 +3953,7 @@ static const struct macb_config np4_config = {
>  static const struct macb_config zynqmp_config = {
>  	.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE |
>  			MACB_CAPS_JUMBO |
> -			MACB_CAPS_GEM_HAS_PTP,
> +			MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_WOL,
>  	.dma_burst_length = 16,
>  	.clk_init = macb_clk_init,
>  	.init = macb_init,
> @@ -4093,6 +4151,9 @@ static int macb_probe(struct platform_device *pdev)
>  
>  	phy_attached_info(phydev);
>  
> +	if (bp->caps & MACB_CAPS_WOL)
> +		device_set_wakeup_capable(&bp->dev->dev, 1);
> +

I think it is better to have this in bp->wol to keep all the wakeup related
events in the same place.

>  	netdev_info(dev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
>  		    macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID),
>  		    dev->base_addr, dev->irq, dev->dev_addr);
> @@ -4170,16 +4231,58 @@ static int __maybe_unused macb_suspend(struct device *dev)
>  	struct macb_queue *queue = bp->queues;
>  	unsigned long flags;
>  	unsigned int q;
> +	u32 ctrl, arpipmask;
>  
>  	if (!netif_running(netdev))
>  		return 0;
>  
>  
> -	if (bp->wol & MACB_WOL_ENABLED) {
> +	if ((bp->wol & MACB_WOL_ENABLED) &&
> +	    (bp->wol & MACB_WOL_HAS_MAGIC_PACKET)) {

If you will also introduce the other 2 wakeup supported sources of GEM GXL you
will end up having also a new else and conditioning device_may_wakeup() below
if-else condition with a bp->caps & MACB_CAPS_WOL.

I thinking that having something like this will be simpler:
	if (bp->wol & MACB_WOL_ENABLED) {
		if (bp->wol & MACB_WOL_HAS_MAGIC_PACKET)
			macb_configure_magic_pkt();
		if (bp->wol & MACB_WOL_HAS_ARP_PACKET)
			macb_configure_arp_pkt();
	}

What do you think?

>  		macb_writel(bp, IER, MACB_BIT(WOL));
>  		macb_writel(bp, WOL, MACB_BIT(MAG));
>  		enable_irq_wake(bp->queues[0].irq);
>  		netif_device_detach(netdev);
> +	} else if (device_may_wakeup(&bp->dev->dev)) {
> +		/* Use ARP as default wake source */
> +		spin_lock_irqsave(&bp->lock, flags);
> +		ctrl = macb_readl(bp, NCR);
> +		/* Disable TX as is it not required;
> +		 * Disable RX to change BD pointers and enable again
> +		 */
> +		ctrl &= ~(MACB_BIT(TE) | MACB_BIT(RE));
> +		macb_writel(bp, NCR, ctrl);
> +		/* Tie all RX queues */
> +		macb_rx_tieoff(bp);
> +		ctrl = macb_readl(bp, NCR);
> +		ctrl |= MACB_BIT(RE);
> +		macb_writel(bp, NCR, ctrl);
> +		/* Broadcast should be enabled for ARP wake event */
> +		gem_writel(bp, NCFGR, gem_readl(bp, NCFGR) & ~MACB_BIT(NBC));
> +		macb_writel(bp, TSR, -1);
> +		macb_writel(bp, RSR, -1);
> +		macb_readl(bp, ISR);
> +		if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
> +			macb_writel(bp, ISR, -1);
> +
> +		/* Enable WOL (Q0 only) and disable all other interrupts */
> +		queue = bp->queues;
> +		queue_writel(queue, IER, GEM_BIT(WOL));
> +		for (q = 0, queue = bp->queues; q < bp->num_queues;
> +		     ++q, ++queue) {
> +			queue_writel(queue, IDR, MACB_RX_INT_FLAGS |
> +						 MACB_TX_INT_FLAGS |
> +						 MACB_BIT(HRESP));
> +		}
> +
> +		arpipmask = cpu_to_be32p(&bp->dev->ip_ptr->ifa_list->ifa_local)
> +					 & 0xFFFF;
> +		gem_writel(bp, WOL, MACB_BIT(ARP) | arpipmask);
> +		spin_unlock_irqrestore(&bp->lock, flags);
> +		enable_irq_wake(bp->queues[0].irq);
> +		netif_device_detach(netdev);
> +		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
> +			napi_disable(&queue->napi);

Is all this really necessary?
I mean, wouldn't be enough the adaption of previous approach? Looking over the initial
approach we have this:

	if (bp->wol & MACB_WOL_ENABLED) {
		macb_writel(bp, IER, MACB_BIT(WOL));
		macb_writel(bp, WOL, MACB_BIT(MAG));
		enable_irq_wake(bp->queues[0].irq);
		netif_device_detach(netdev);
	}

Wouldn't it work if you will change it in something like this:

	u32 wolmask, arpipmask = 0;

	if (bp->wol & MACB_WOL_ENABLED) {
		macb_writel(bp, IER, MACB_BIT(WOL));

		if (bp->wol & MACB_WOL_HAS_ARP_PACKET) {
			/* Enable broadcast. */
			gem_writel(bp, NCFGR, gem_readl(bp, NCFGR) & ~MACB_BIT(NBC));
			arpipmask = cpu_to_be32p(&bp->dev->ip_ptr->ifa_list->ifa_local) & 0xFFFF;
			wolmask = arpipmask | MACB_BIT(ARP);
		} else {
			wolmask = MACB_BIT(MAG);
		}

		macb_writel(bp, WOL, wolmask);
		enable_irq_wake(bp->queues[0].irq);
		netif_device_detach(netdev);
	}

I cannot find anything particular for ARP WOL events in datasheet. Also, 
I cannot find something related to DMA activity while WOL is active

Thank you,
Claudiu

>  	} else {
>  		netif_device_detach(netdev);
>  		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
> @@ -4206,16 +4309,33 @@ static int __maybe_unused macb_resume(struct device *dev)
>  	struct macb *bp = netdev_priv(netdev);
>  	struct macb_queue *queue = bp->queues;
>  	unsigned int q;
> +	unsigned long flags;
>  
>  	if (!netif_running(netdev))
>  		return 0;
>  
>  	pm_runtime_force_resume(dev);
>  
> -	if (bp->wol & MACB_WOL_ENABLED) {
> +	if ((bp->wol & MACB_WOL_ENABLED) &&
> +	    (bp->wol & MACB_WOL_HAS_MAGIC_PACKET)) {
>  		macb_writel(bp, IDR, MACB_BIT(WOL));
>  		macb_writel(bp, WOL, 0);
>  		disable_irq_wake(bp->queues[0].irq);
> +	} else if (device_may_wakeup(&bp->dev->dev)) {
> +		/* Resume after ARP wake event */
> +		spin_lock_irqsave(&bp->lock, flags);
> +		queue = bp->queues;
> +		queue_writel(queue, IDR, GEM_BIT(WOL));
> +		gem_writel(bp, WOL, 0);
> +		/* Clear Q0 ISR as WOL was enabled on Q0 */
> +		if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
> +			macb_writel(bp, ISR, -1);
> +		disable_irq_wake(bp->queues[0].irq);
> +		spin_unlock_irqrestore(&bp->lock, flags);
> +		macb_writel(bp, NCR, MACB_BIT(MPE));
> +		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
> +			napi_enable(&queue->napi);
> +		netif_carrier_on(netdev);
>  	} else {
>  		macb_writel(bp, NCR, MACB_BIT(MPE));
>  		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
> 

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: Add support for U32 TC filter using Flexible RX Parser
From: kbuild test robot @ 2018-05-04 12:04 UTC (permalink / raw)
  To: Jose Abreu
  Cc: kbuild-all, netdev, Jose Abreu, David S. Miller, Joao Pinto,
	Vitor Soares, Giuseppe Cavallaro, Alexandre Torgue
In-Reply-To: <9f57f98ef35360619001882fdcf0d7ef0558863f.1525351447.git.joabreu@synopsys.com>

Hi Jose,

I love your patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Jose-Abreu/net-stmmac-Add-support-for-U32-TC-filter-using-Flexible-RX-Parser/20180504-164205
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

>> drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:104:14: sparse: incorrect type in assignment (different base types) @@    expected unsigned int [unsigned] [usertype] data @@    got ed int [unsigned] [usertype] data @@
   drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:104:14:    expected unsigned int [unsigned] [usertype] data
   drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:104:14:    got restricted __be32 [usertype] val
>> drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:105:14: sparse: incorrect type in assignment (different base types) @@    expected unsigned int [unsigned] [usertype] mask @@    got ed int [unsigned] [usertype] mask @@
   drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:105:14:    expected unsigned int [unsigned] [usertype] mask
   drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:105:14:    got restricted __be32 [usertype] mask

vim +104 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c

    89	
    90	static int tc_fill_entry(struct stmmac_priv *priv,
    91			struct tc_cls_u32_offload *cls)
    92	{
    93		struct stmmac_tc_entry *entry, *frag = NULL;
    94		struct tc_u32_sel *sel = cls->knode.sel;
    95		u32 off, data, mask, real_off, rem;
    96		u32 prio = cls->common.prio;
    97		int ret;
    98	
    99		/* Only 1 match per entry */
   100		if (sel->nkeys <= 0 || sel->nkeys > 1)
   101			return -EINVAL;
   102	
   103		off = sel->keys[0].off << sel->offshift;
 > 104		data = sel->keys[0].val;
 > 105		mask = sel->keys[0].mask;
   106	
   107		switch (ntohs(cls->common.protocol)) {
   108		case ETH_P_ALL:
   109			break;
   110		case ETH_P_IP:
   111			off += ETH_HLEN;
   112			break;
   113		default:
   114			return -EINVAL;
   115		}
   116	
   117		if (off > priv->tc_off_max)
   118			return -EINVAL;
   119	
   120		real_off = off / 4;
   121		rem = off % 4;
   122	
   123		entry = tc_find_entry(priv, cls, true);
   124		if (!entry)
   125			return -EINVAL;
   126	
   127		if (rem) {
   128			frag = tc_find_entry(priv, cls, true);
   129			if (!frag) {
   130				ret = -EINVAL;
   131				goto err_unuse;
   132			}
   133	
   134			entry->frag_ptr = frag;
   135			entry->val.match_en = (mask << (rem * 8)) &
   136				GENMASK(31, rem * 8);
   137			entry->val.match_data = (data << (rem * 8)) &
   138				GENMASK(31, rem * 8);
   139			entry->val.frame_offset = real_off;
   140			entry->prio = prio;
   141	
   142			frag->val.match_en = (mask >> (rem * 8)) &
   143				GENMASK(rem * 8 - 1, 0);
   144			frag->val.match_data = (data >> (rem * 8)) &
   145				GENMASK(rem * 8 - 1, 0);
   146			frag->val.frame_offset = real_off + 1;
   147			frag->prio = prio;
   148			frag->is_frag = true;
   149		} else {
   150			entry->frag_ptr = NULL;
   151			entry->val.match_en = mask;
   152			entry->val.match_data = data;
   153			entry->val.frame_offset = real_off;
   154			entry->prio = prio;
   155		}
   156	
   157		ret = tc_fill_actions(entry, frag, cls);
   158		if (ret)
   159			goto err_unuse;
   160	
   161		return 0;
   162	
   163	err_unuse:
   164		if (frag)
   165			frag->in_use = false;
   166		entry->in_use = false;
   167		return ret;
   168	}
   169	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply

* Re: [PATCH bpf-next v3 00/15] Introducing AF_XDP support
From: Magnus Karlsson @ 2018-05-04 11:22 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Daniel Borkmann, Björn Töpel, Karlsson, Magnus,
	Alexander Duyck, Alexander Duyck, John Fastabend,
	Alexei Starovoitov, Jesper Dangaard Brouer, Willem de Bruijn,
	Michael S. Tsirkin, Network Development, Björn Töpel,
	michael.lundkvist, Brandeburg, Jesse, Singhai, Anjali,
	Zhang, Qi Z
In-Reply-To: <20180503233819.2tplfjd32auudys2@ast-mbp>

On Fri, May 4, 2018 at 1:38 AM, Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
> On Fri, May 04, 2018 at 12:49:09AM +0200, Daniel Borkmann wrote:
>> On 05/02/2018 01:01 PM, Björn Töpel wrote:
>> > From: Björn Töpel <bjorn.topel@intel.com>
>> >
>> > This patch set introduces a new address family called AF_XDP that is
>> > optimized for high performance packet processing and, in upcoming
>> > patch sets, zero-copy semantics. In this patch set, we have removed
>> > all zero-copy related code in order to make it smaller, simpler and
>> > hopefully more review friendly. This patch set only supports copy-mode
>> > for the generic XDP path (XDP_SKB) for both RX and TX and copy-mode
>> > for RX using the XDP_DRV path. Zero-copy support requires XDP and
>> > driver changes that Jesper Dangaard Brouer is working on. Some of his
>> > work has already been accepted. We will publish our zero-copy support
>> > for RX and TX on top of his patch sets at a later point in time.
>>
>> +1, would be great to see it land this cycle. Saw few minor nits here
>> and there but nothing to hold it up, for the series:
>>
>> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
>>
>> Thanks everyone!
>
> Great stuff!
>
> Applied to bpf-next, with one condition.
> Upcoming zero-copy patches for both RX and TX need to be posted
> and reviewed within this release window.
> If netdev community as a whole won't be able to agree on the zero-copy
> bits we'd need to revert this feature before the next merge window.

Thanks everyone for reviewing this. Highly appreciated.

Just so we understand the purpose correctly:

1: Do you want to see the ZC patches in order to verify that the user
space API holds? If so, we can produce an additional RFC  patch set
using a big chunk of code that we had in RFC V1. We are not proud of
this code since it is clunky, but it hopefully proves the point with
the uapi being the same.

2: And/Or are you worried about us all (the netdev community) not
agreeing on a way to implement ZC internally in the drivers and the
XDP infrastructure? This is not going to be possible to finish during
this cycle since we do not like the implementation we had in RFC V1.
Too intrusive and now we also have nicer abstractions from Jesper that
we can use and extend to provide a (hopefully) much cleaner and less
intrusive solution.

Just so that we focus on the right proof points.

> Few other minor nits:
> patch 3:
> +struct xdp_ring {
> +       __u32 producer __attribute__((aligned(64)));
> +       __u32 consumer __attribute__((aligned(64)));
> +};
> It kinda begs for ____cacheline_aligned_in_smp to be introduced for uapi headers.

Agreed.

> patch 5:
> +struct sockaddr_xdp {
> +       __u16 sxdp_family;
> +       __u32 sxdp_ifindex;
> Not great to have a hole in uapi struct. Please fix it in the follow up.

You are correct. Will fix.

> patch 7:
> Has a lot of synchronize_net(). I think udpate/delete side
> can be improved to avoid them. Otherwise users may unknowingly DoS.

OK. Could you please elaborate on what kind of DoS attacks can be
performed with this, so we can come up with the right solution here?

Thanks: Magnus

> As the next steps I suggest to prioritize the highest to ship
> zero-copy rx/tx patches and to add selftests.
>
> Thanks!
>

^ permalink raw reply

* Re: [PATCH] bpf: fix possible spectre-v1 in find_and_alloc_map()
From: Mark Rutland @ 2018-05-04 10:49 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: linux-kernel, Alexei Starovoitov, Dan Carpenter, Peter Zijlstra,
	netdev
In-Reply-To: <28363c0d-0b78-681c-d9ea-908671b0067e@iogearbox.net>

On Fri, May 04, 2018 at 02:16:31AM +0200, Daniel Borkmann wrote:
> On 05/03/2018 06:04 PM, Mark Rutland wrote:
> > It's possible for userspace to control attr->map_type. Sanitize it when
> > using it as an array index to prevent an out-of-bounds value being used
> > under speculation.
> > 
> > Found by smatch.
> > 
> > Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Dan Carpenter <dan.carpenter@oracle.com>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: Peter Zijlstra <peterz@infradead.org>
> > Cc: netdev@vger.kernel.org
> 
> Applied to bpf tree, thanks Mark!

Cheers!

> I've also just submitted one for BPF progs
> (http://patchwork.ozlabs.org/patch/908385/) which is same situation.

That looks good to me. That case doesn't show up in my smatch results so
far, but I might just need a few more build iterations.

Thanks,
Mark.

^ permalink raw reply

* Re: [PATCH bpf-next v4 3/4] bpf: selftest additions for SOCKHASH
From: Quentin Monnet @ 2018-05-04 10:11 UTC (permalink / raw)
  To: John Fastabend, borkmann, ast; +Cc: netdev
In-Reply-To: <1525372108-8690-4-git-send-email-john.fastabend@gmail.com>

2018-05-03 11:28:27 UTC-0700 ~ John Fastabend <john.fastabend@gmail.com>
> This runs existing SOCKMAP tests with SOCKHASH map type. To do this
> we push programs into include file and build two BPF programs. One
> for SOCKHASH and one for SOCKMAP.
> 
> We then run the entire test suite with each type.
> 
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---
>  tools/include/uapi/linux/bpf.h                     |  6 ++++-
>  tools/testing/selftests/bpf/Makefile               |  3 ++-
>  tools/testing/selftests/bpf/test_sockhash_kern.c   |  4 ++++
>  tools/testing/selftests/bpf/test_sockmap.c         | 27 ++++++++++++++++------
>  .../{test_sockmap_kern.c => test_sockmap_kern.h}   | 10 ++++----
>  5 files changed, 36 insertions(+), 14 deletions(-)
>  create mode 100644 tools/testing/selftests/bpf/test_sockhash_kern.c
>  rename tools/testing/selftests/bpf/{test_sockmap_kern.c => test_sockmap_kern.h} (97%)
> 
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index da77a93..5cb983d 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -116,6 +116,7 @@ enum bpf_map_type {
>  	BPF_MAP_TYPE_DEVMAP,
>  	BPF_MAP_TYPE_SOCKMAP,
>  	BPF_MAP_TYPE_CPUMAP,
> +	BPF_MAP_TYPE_SOCKHASH,
>  };
>  
>  enum bpf_prog_type {
> @@ -1835,7 +1836,10 @@ struct bpf_stack_build_id {
>  	FN(msg_pull_data),		\
>  	FN(bind),			\
>  	FN(xdp_adjust_tail),		\
> -	FN(skb_get_xfrm_state),
> +	FN(skb_get_xfrm_state),		\
> +	FN(sock_hash_update),		\
> +	FN(msg_redirect_hash),		\
> +	FN(sk_redirect_hash),
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call

Thanks for documenting the helpers in include/uapi/linux/bpf.h! However
the doc is missing in the update to the bpf.h file under tools/ in this
patch, could you please fix it?

Best regards,
Quentin

^ permalink raw reply

* [PATCH REPOST] xen/9pfs: don't inclide rwlock.h directly.
From: Sebastian Andrzej Siewior @ 2018-05-04 10:03 UTC (permalink / raw)
  To: netdev
  Cc: tglx, Sebastian Andrzej Siewior, Eric Van Hensbergen, Ron Minnich,
	Latchesar Ionkov, David S. Miller, v9fs-developer

rwlock.h should not be included directly. Instead linux/splinlock.h
should be included. One thing it does is to break the RT build.

Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: v9fs-developer@lists.sourceforge.net
Cc: netdev@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 net/9p/trans_xen.c |    1 -
 1 file changed, 1 deletion(-)

--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -38,7 +38,6 @@
 
 #include <linux/module.h>
 #include <linux/spinlock.h>
-#include <linux/rwlock.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>

^ permalink raw reply

* [PATCH net-next v2] net: core: rework basic flow dissection helper
From: Paolo Abeni @ 2018-05-04  9:32 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Eric Dumazet, Jason Wang

When the core networking needs to detect the transport offset in a given
packet and parse it explicitly, a full-blown flow_keys struct is used for
storage.
This patch introduces a smaller keys store, rework the basic flow dissect
helper to use it, and apply this new helper where possible - namely in
skb_probe_transport_header(). The used flow dissector data structures
are renamed to match more closely the new role.

The above gives ~50% performance improvement in micro benchmarking around
skb_probe_transport_header() and ~30% around eth_get_headlen(), mostly due
to the smaller memset. Small, but measurable improvement is measured also
in macro benchmarking.

v1 -> v2: use the new helper in eth_get_headlen() and skb_get_poff(),
  as per DaveM suggestion

Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h       | 18 ++++++++++--------
 include/net/flow_dissector.h |  7 ++++++-
 net/core/flow_dissector.c    | 17 +++++++++--------
 net/ethernet/eth.c           |  6 +++---
 4 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 908d66e55b14..693564a9a979 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1171,7 +1171,7 @@ void __skb_get_hash(struct sk_buff *skb);
 u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
-		   const struct flow_keys *keys, int hlen);
+		   const struct flow_keys_basic *keys, int hlen);
 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
 			    void *data, int hlen_proto);
 
@@ -1208,13 +1208,14 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
 				  NULL, 0, 0, 0, flags);
 }
 
-static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
-						  void *data, __be16 proto,
-						  int nhoff, int hlen,
-						  unsigned int flags)
+static inline bool
+skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb,
+				 struct flow_keys_basic *flow, void *data,
+				 __be16 proto, int nhoff, int hlen,
+				 unsigned int flags)
 {
 	memset(flow, 0, sizeof(*flow));
-	return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow,
+	return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow,
 				  data, proto, nhoff, hlen, flags);
 }
 
@@ -2350,11 +2351,12 @@ static inline void skb_pop_mac_header(struct sk_buff *skb)
 static inline void skb_probe_transport_header(struct sk_buff *skb,
 					      const int offset_hint)
 {
-	struct flow_keys keys;
+	struct flow_keys_basic keys;
 
 	if (skb_transport_header_was_set(skb))
 		return;
-	else if (skb_flow_dissect_flow_keys(skb, &keys, 0))
+
+	if (skb_flow_dissect_flow_keys_basic(skb, &keys, 0, 0, 0, 0, 0))
 		skb_set_transport_header(skb, keys.control.thoff);
 	else
 		skb_set_transport_header(skb, offset_hint);
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 9a074776f70b..e2f6e5c928bb 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -226,6 +226,11 @@ struct flow_dissector {
 	unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];
 };
 
+struct flow_keys_basic {
+	struct flow_dissector_key_control control;
+	struct flow_dissector_key_basic basic;
+};
+
 struct flow_keys {
 	struct flow_dissector_key_control control;
 #define FLOW_KEYS_HASH_START_FIELD basic
@@ -244,7 +249,7 @@ __be32 flow_get_u32_src(const struct flow_keys *flow);
 __be32 flow_get_u32_dst(const struct flow_keys *flow);
 
 extern struct flow_dissector flow_keys_dissector;
-extern struct flow_dissector flow_keys_buf_dissector;
+extern struct flow_dissector flow_keys_basic_dissector;
 
 /* struct flow_keys_digest:
  *
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d29f09bc5ff9..030d4ca177fb 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1253,7 +1253,7 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
 EXPORT_SYMBOL(skb_get_hash_perturb);
 
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
-		   const struct flow_keys *keys, int hlen)
+		   const struct flow_keys_basic *keys, int hlen)
 {
 	u32 poff = keys->control.thoff;
 
@@ -1314,9 +1314,9 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
  */
 u32 skb_get_poff(const struct sk_buff *skb)
 {
-	struct flow_keys keys;
+	struct flow_keys_basic keys;
 
-	if (!skb_flow_dissect_flow_keys(skb, &keys, 0))
+	if (!skb_flow_dissect_flow_keys_basic(skb, &keys, 0, 0, 0, 0, 0))
 		return 0;
 
 	return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
@@ -1403,7 +1403,7 @@ static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
 	},
 };
 
-static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
+static const struct flow_dissector_key flow_keys_basic_dissector_keys[] = {
 	{
 		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
 		.offset = offsetof(struct flow_keys, control),
@@ -1417,7 +1417,8 @@ static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
 struct flow_dissector flow_keys_dissector __read_mostly;
 EXPORT_SYMBOL(flow_keys_dissector);
 
-struct flow_dissector flow_keys_buf_dissector __read_mostly;
+struct flow_dissector flow_keys_basic_dissector __read_mostly;
+EXPORT_SYMBOL(flow_keys_basic_dissector);
 
 static int __init init_default_flow_dissectors(void)
 {
@@ -1427,9 +1428,9 @@ static int __init init_default_flow_dissectors(void)
 	skb_flow_dissector_init(&flow_keys_dissector_symmetric,
 				flow_keys_dissector_symmetric_keys,
 				ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
-	skb_flow_dissector_init(&flow_keys_buf_dissector,
-				flow_keys_buf_dissector_keys,
-				ARRAY_SIZE(flow_keys_buf_dissector_keys));
+	skb_flow_dissector_init(&flow_keys_basic_dissector,
+				flow_keys_basic_dissector_keys,
+				ARRAY_SIZE(flow_keys_basic_dissector_keys));
 	return 0;
 }
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index eaeba9b99a73..ee28440f57c5 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -128,15 +128,15 @@ u32 eth_get_headlen(void *data, unsigned int len)
 {
 	const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
 	const struct ethhdr *eth = (const struct ethhdr *)data;
-	struct flow_keys keys;
+	struct flow_keys_basic keys;
 
 	/* this should never happen, but better safe than sorry */
 	if (unlikely(len < sizeof(*eth)))
 		return len;
 
 	/* parse any remaining L2/L3 headers, check for L4 */
-	if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
-					    sizeof(*eth), len, flags))
+	if (!skb_flow_dissect_flow_keys_basic(NULL, &keys, data, eth->h_proto,
+					      sizeof(*eth), len, flags))
 		return max_t(u32, keys.control.thoff, sizeof(*eth));
 
 	/* parse for any L4 headers */
-- 
2.14.3

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox